diff options
685 files changed, 7469 insertions, 3909 deletions
diff --git a/.clang-format b/.clang-format index 1247d54f9e49..8d01225bfcb7 100644 --- a/.clang-format +++ b/.clang-format @@ -535,6 +535,7 @@ ForEachMacros: - 'perf_hpp_list__for_each_sort_list_safe' - 'perf_pmu__for_each_hybrid_pmu' - 'ping_portaddr_for_each_entry' + - 'ping_portaddr_for_each_entry_rcu' - 'plist_for_each' - 'plist_for_each_continue' - 'plist_for_each_entry' @@ -29,6 +29,7 @@ Alexandre Belloni <alexandre.belloni@bootlin.com> <alexandre.belloni@free-electr Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com> Alexei Starovoitov <ast@kernel.org> <ast@fb.com> Alexei Starovoitov <ast@kernel.org> <ast@plumgrid.com> +Alex Hung <alexhung@gmail.com> <alex.hung@canonical.com> Alex Shi <alexs@kernel.org> <alex.shi@intel.com> Alex Shi <alexs@kernel.org> <alex.shi@linaro.org> Alex Shi <alexs@kernel.org> <alex.shi@linux.alibaba.com> @@ -286,6 +287,7 @@ Matthew Wilcox <willy@infradead.org> <willy@linux.intel.com> Matthew Wilcox <willy@infradead.org> <willy@parisc-linux.org> Matthias Fuchs <socketcan@esd.eu> <matthias.fuchs@esd.eu> Matthieu CASTET <castet.matthieu@free.fr> +Matti Vaittinen <mazziesaccount@gmail.com> <matti.vaittinen@fi.rohmeurope.com> Matt Ranostay <matt.ranostay@konsulko.com> <matt@ranostay.consulting> Matt Ranostay <mranostay@gmail.com> Matthew Ranostay <mranostay@embeddedalley.com> Matt Ranostay <mranostay@gmail.com> <matt.ranostay@intel.com> @@ -371,6 +373,8 @@ Ricardo Ribalda <ribalda@kernel.org> <ricardo.ribalda@gmail.com> Roman Gushchin <roman.gushchin@linux.dev> <guro@fb.com> Roman Gushchin <roman.gushchin@linux.dev> <guroan@gmail.com> Roman Gushchin <roman.gushchin@linux.dev> <klamm@yandex-team.ru> +Muchun Song <muchun.song@linux.dev> <songmuchun@bytedance.com> +Muchun Song <muchun.song@linux.dev> <smuchun@gmail.com> Ross Zwisler <zwisler@kernel.org> <ross.zwisler@linux.intel.com> Rudolf Marek <R.Marek@sh.cvut.cz> Rui Saraiva <rmps@joel.ist.utl.pt> @@ -382,6 +386,7 @@ Santosh Shilimkar <santosh.shilimkar@oracle.org> Santosh Shilimkar <ssantosh@kernel.org> Sarangdhar Joshi <spjoshi@codeaurora.org> Sascha Hauer <s.hauer@pengutronix.de> +Satya Priya <quic_c_skakit@quicinc.com> <skakit@codeaurora.org> S.Çağlar Onur <caglar@pardus.org.tr> Sean Christopherson <seanjc@google.com> <sean.j.christopherson@intel.com> Sean Nyekjaer <sean@geanix.com> <sean.nyekjaer@prevas.dk> @@ -389,6 +394,7 @@ Sebastian Reichel <sre@kernel.org> <sebastian.reichel@collabora.co.uk> Sebastian Reichel <sre@kernel.org> <sre@debian.org> Sedat Dilek <sedat.dilek@gmail.com> <sedat.dilek@credativ.de> Seth Forshee <sforshee@kernel.org> <seth.forshee@canonical.com> +Shannon Nelson <shannon.nelson@amd.com> <snelson@pensando.io> Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com> Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com> Shuah Khan <shuah@kernel.org> <shuah.khan@hp.com> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a465d5242774..6b838869554b 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2300,7 +2300,13 @@ Provide an override to the IOAPIC-ID<->DEVICE-ID mapping provided in the IVRS ACPI table. By default, PCI segment is 0, and can be omitted. - For example: + + For example, to map IOAPIC-ID decimal 10 to + PCI segment 0x1 and PCI device 00:14.0, + write the parameter as: + ivrs_ioapic=10@0001:00:14.0 + + Deprecated formats: * To map IOAPIC-ID decimal 10 to PCI device 00:14.0 write the parameter as: ivrs_ioapic[10]=00:14.0 @@ -2312,7 +2318,13 @@ Provide an override to the HPET-ID<->DEVICE-ID mapping provided in the IVRS ACPI table. By default, PCI segment is 0, and can be omitted. - For example: + + For example, to map HPET-ID decimal 10 to + PCI segment 0x1 and PCI device 00:14.0, + write the parameter as: + ivrs_hpet=10@0001:00:14.0 + + Deprecated formats: * To map HPET-ID decimal 0 to PCI device 00:14.0 write the parameter as: ivrs_hpet[0]=00:14.0 @@ -2323,15 +2335,20 @@ ivrs_acpihid [HW,X86-64] Provide an override to the ACPI-HID:UID<->DEVICE-ID mapping provided in the IVRS ACPI table. + By default, PCI segment is 0, and can be omitted. For example, to map UART-HID:UID AMD0020:0 to PCI segment 0x1 and PCI device ID 00:14.5, write the parameter as: - ivrs_acpihid[0001:00:14.5]=AMD0020:0 + ivrs_acpihid=AMD0020:0@0001:00:14.5 - By default, PCI segment is 0, and can be omitted. - For example, PCI device 00:14.5 write the parameter as: + Deprecated formats: + * To map UART-HID:UID AMD0020:0 to PCI segment is 0, + PCI device ID 00:14.5, write the parameter as: ivrs_acpihid[00:14.5]=AMD0020:0 + * To map UART-HID:UID AMD0020:0 to PCI segment 0x1 and + PCI device ID 00:14.5, write the parameter as: + ivrs_acpihid[0001:00:14.5]=AMD0020:0 js= [HW,JOY] Analog joystick See Documentation/input/joydev/joystick.rst. @@ -6959,3 +6976,14 @@ memory, and other data can't be written using xmon commands. off xmon is disabled. + + amd_pstate= [X86] + disable + Do not enable amd_pstate as the default + scaling driver for the supported processors + passive + Use amd_pstate as a scaling driver, driver requests a + desired performance on this abstract scale and the power + management firmware translates the requests into actual + hardware states (core frequency, data fabric and memory + clocks etc.) diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index 8f3d30c5a0d8..06e23538f79c 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -283,23 +283,19 @@ efficiency frequency management method on AMD processors. Kernel Module Options for ``amd-pstate`` ========================================= -.. _shared_mem: - -``shared_mem`` -Use a module param (shared_mem) to enable related processors manually with -**amd_pstate.shared_mem=1**. -Due to the performance issue on the processors with `Shared Memory Support -<perf_cap_>`_, we disable it presently and will re-enable this by default -once we address performance issue with this solution. - -To check whether the current processor is using `Full MSR Support <perf_cap_>`_ -or `Shared Memory Support <perf_cap_>`_ : :: - - ray@hr-test1:~$ lscpu | grep cppc - Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd cppc arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm - -If the CPU flags have ``cppc``, then this processor supports `Full MSR Support -<perf_cap_>`_. Otherwise, it supports `Shared Memory Support <perf_cap_>`_. +Passive Mode +------------ + +``amd_pstate=passive`` + +It will be enabled if the ``amd_pstate=passive`` is passed to the kernel in the command line. +In this mode, ``amd_pstate`` driver software specifies a desired QoS target in the CPPC +performance scale as a relative number. This can be expressed as percentage of nominal +performance (infrastructure max). Below the nominal sustained performance level, +desired performance expresses the average performance level of the processor subject +to the Performance Reduction Tolerance register. Above the nominal performance level, +processor must provide at least nominal performance requested and go higher if current +operating conditions allow. ``cpupower`` tool support for ``amd-pstate`` diff --git a/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml index 2ab4642679c0..55c4f94a14d1 100644 --- a/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml +++ b/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml @@ -148,7 +148,7 @@ allOf: items: - const: oscclk - const: dout_clkcmu_fsys1_bus - - const: dout_clkcmu_fsys1_mmc_card + - const: gout_clkcmu_fsys1_mmc_card - const: dout_clkcmu_fsys1_usbdrd - if: diff --git a/Documentation/devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml b/Documentation/devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml index b283c8ca2bbf..5c08d8b6e995 100644 --- a/Documentation/devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml @@ -62,13 +62,6 @@ properties: description: Inform the driver that last channel will be used to sensor battery. - aspeed,trim-data-valid: - type: boolean - description: | - The ADC reference voltage can be calibrated to obtain the trimming - data which will be stored in otp. This property informs the driver that - the data store in the otp is valid. - required: - compatible - reg diff --git a/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml b/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml index 2684562df4d9..be29e0b80995 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml @@ -24,7 +24,7 @@ properties: oneOf: - items: - enum: - - qcom,sc7280-bwmon + - qcom,sc7280-cpu-bwmon - qcom,sdm845-bwmon - const: qcom,msm8998-bwmon - const: qcom,msm8998-bwmon # BWMON v4 diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 9066e6df1ba1..b28c5c2b0ff2 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -28,19 +28,50 @@ properties: - enum: - qcom,msm8996-smmu-v2 - qcom,msm8998-smmu-v2 + - qcom,sdm630-smmu-v2 - const: qcom,smmu-v2 - - description: Qcom SoCs implementing "arm,mmu-500" + - description: Qcom SoCs implementing "qcom,smmu-500" and "arm,mmu-500" items: - enum: - qcom,qcm2290-smmu-500 + - qcom,qdu1000-smmu-500 - qcom,sc7180-smmu-500 - qcom,sc7280-smmu-500 - qcom,sc8180x-smmu-500 - qcom,sc8280xp-smmu-500 + - qcom,sdm670-smmu-500 - qcom,sdm845-smmu-500 + - qcom,sm6115-smmu-500 + - qcom,sm6350-smmu-500 + - qcom,sm6375-smmu-500 + - qcom,sm8150-smmu-500 + - qcom,sm8250-smmu-500 + - qcom,sm8350-smmu-500 + - qcom,sm8450-smmu-500 + - const: qcom,smmu-500 + - const: arm,mmu-500 + + - description: Qcom SoCs implementing "arm,mmu-500" (non-qcom implementation) + deprecated: true + items: + - enum: - qcom,sdx55-smmu-500 - qcom,sdx65-smmu-500 + - const: arm,mmu-500 + + - description: Qcom SoCs implementing "arm,mmu-500" (legacy binding) + deprecated: true + items: + # Do not add additional SoC to this list. Instead use two previous lists. + - enum: + - qcom,qcm2290-smmu-500 + - qcom,sc7180-smmu-500 + - qcom,sc7280-smmu-500 + - qcom,sc8180x-smmu-500 + - qcom,sc8280xp-smmu-500 + - qcom,sdm845-smmu-500 + - qcom,sm6115-smmu-500 - qcom,sm6350-smmu-500 - qcom,sm6375-smmu-500 - qcom,sm8150-smmu-500 @@ -48,13 +79,28 @@ properties: - qcom,sm8350-smmu-500 - qcom,sm8450-smmu-500 - const: arm,mmu-500 + + - description: Qcom Adreno GPUs implementing "arm,smmu-500" + items: + - enum: + - qcom,sc7280-smmu-500 + - qcom,sm8250-smmu-500 + - const: qcom,adreno-smmu + - const: arm,mmu-500 - description: Qcom Adreno GPUs implementing "arm,smmu-v2" items: - enum: + - qcom,msm8996-smmu-v2 - qcom,sc7180-smmu-v2 + - qcom,sdm630-smmu-v2 - qcom,sdm845-smmu-v2 + - qcom,sm6350-smmu-v2 - const: qcom,adreno-smmu - const: qcom,smmu-v2 + - description: Qcom Adreno GPUs on Google Cheza platform + items: + - const: qcom,sdm845-smmu-v2 + - const: qcom,smmu-v2 - description: Marvell SoCs implementing "arm,mmu-500" items: - const: marvell,ap806-smmu-500 @@ -147,16 +193,12 @@ properties: present in such cases. clock-names: - items: - - const: bus - - const: iface + minItems: 1 + maxItems: 7 clocks: - items: - - description: bus clock required for downstream bus access and for the - smmu ptw - - description: interface clock required to access smmu's registers - through the TCU's programming interface. + minItems: 1 + maxItems: 7 power-domains: maxItems: 1 @@ -206,6 +248,124 @@ allOf: reg: maxItems: 1 + - if: + properties: + compatible: + contains: + enum: + - qcom,msm8998-smmu-v2 + - qcom,sdm630-smmu-v2 + then: + anyOf: + - properties: + clock-names: + items: + - const: bus + clocks: + items: + - description: bus clock required for downstream bus access and for + the smmu ptw + - properties: + clock-names: + items: + - const: iface + - const: mem + - const: mem_iface + clocks: + items: + - description: interface clock required to access smmu's registers + through the TCU's programming interface. + - description: bus clock required for memory access + - description: bus clock required for GPU memory access + - properties: + clock-names: + items: + - const: iface-mm + - const: iface-smmu + - const: bus-mm + - const: bus-smmu + clocks: + items: + - description: interface clock required to access mnoc's registers + through the TCU's programming interface. + - description: interface clock required to access smmu's registers + through the TCU's programming interface. + - description: bus clock required for downstream bus access + - description: bus clock required for the smmu ptw + + - if: + properties: + compatible: + contains: + enum: + - qcom,msm8996-smmu-v2 + - qcom,sc7180-smmu-v2 + - qcom,sdm845-smmu-v2 + then: + properties: + clock-names: + items: + - const: bus + - const: iface + + clocks: + items: + - description: bus clock required for downstream bus access and for + the smmu ptw + - description: interface clock required to access smmu's registers + through the TCU's programming interface. + + - if: + properties: + compatible: + contains: + const: qcom,sc7280-smmu-500 + then: + properties: + clock-names: + items: + - const: gcc_gpu_memnoc_gfx_clk + - const: gcc_gpu_snoc_dvm_gfx_clk + - const: gpu_cc_ahb_clk + - const: gpu_cc_hlos1_vote_gpu_smmu_clk + - const: gpu_cc_cx_gmu_clk + - const: gpu_cc_hub_cx_int_clk + - const: gpu_cc_hub_aon_clk + + clocks: + items: + - description: GPU memnoc_gfx clock + - description: GPU snoc_dvm_gfx clock + - description: GPU ahb clock + - description: GPU hlos1_vote_GPU smmu clock + - description: GPU cx_gmu clock + - description: GPU hub_cx_int clock + - description: GPU hub_aon clock + + - if: + properties: + compatible: + contains: + enum: + - qcom,sm6350-smmu-v2 + - qcom,sm8150-smmu-500 + - qcom,sm8250-smmu-500 + then: + properties: + clock-names: + items: + - const: ahb + - const: bus + - const: iface + + clocks: + items: + - description: bus clock required for AHB bus access + - description: bus clock required for downstream bus access and for + the smmu ptw + - description: interface clock required to access smmu's registers + through the TCU's programming interface. + examples: - |+ /* SMMU with stream matching or stream indexing */ diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml index 839e3be0bf3c..5b6395bc10e0 100644 --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml @@ -82,6 +82,7 @@ properties: - mediatek,mt8195-iommu-vdo # generation two - mediatek,mt8195-iommu-vpp # generation two - mediatek,mt8195-iommu-infra # generation two + - mediatek,mt8365-m4u # generation two - description: mt7623 generation one items: @@ -132,6 +133,7 @@ properties: dt-binding/memory/mt8186-memory-port.h for mt8186, dt-binding/memory/mt8192-larb-port.h for mt8192. dt-binding/memory/mt8195-memory-port.h for mt8195. + dt-binding/memory/mediatek,mt8365-larb-port.h for mt8365. power-domains: maxItems: 1 diff --git a/Documentation/devicetree/bindings/media/allwinner,sun50i-h6-vpu-g2.yaml b/Documentation/devicetree/bindings/media/allwinner,sun50i-h6-vpu-g2.yaml index 24d7bf21499e..9d44236f2deb 100644 --- a/Documentation/devicetree/bindings/media/allwinner,sun50i-h6-vpu-g2.yaml +++ b/Documentation/devicetree/bindings/media/allwinner,sun50i-h6-vpu-g2.yaml @@ -36,6 +36,9 @@ properties: resets: maxItems: 1 + iommus: + maxItems: 1 + required: - compatible - reg @@ -43,6 +46,7 @@ required: - clocks - clock-names - resets + - iommus additionalProperties: false @@ -59,6 +63,7 @@ examples: clocks = <&ccu CLK_BUS_VP9>, <&ccu CLK_VP9>; clock-names = "bus", "mod"; resets = <&ccu RST_BUS_VP9>; + iommus = <&iommu 5>; }; ... diff --git a/Documentation/loongarch/booting.rst b/Documentation/loongarch/booting.rst new file mode 100644 index 000000000000..91eccd410478 --- /dev/null +++ b/Documentation/loongarch/booting.rst @@ -0,0 +1,42 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================= +Booting Linux/LoongArch +======================= + +:Author: Yanteng Si <siyanteng@loongson.cn> +:Date: 18 Nov 2022 + +Information passed from BootLoader to kernel +============================================ + +LoongArch supports ACPI and FDT. The information that needs to be passed +to the kernel includes the memmap, the initrd, the command line, optionally +the ACPI/FDT tables, and so on. + +The kernel is passed the following arguments on `kernel_entry` : + + - a0 = efi_boot: `efi_boot` is a flag indicating whether + this boot environment is fully UEFI-compliant. + + - a1 = cmdline: `cmdline` is a pointer to the kernel command line. + + - a2 = systemtable: `systemtable` points to the EFI system table. + All pointers involved at this stage are in physical addresses. + +Header of Linux/LoongArch kernel images +======================================= + +Linux/LoongArch kernel images are EFI images. Being PE files, they have +a 64-byte header structured like:: + + u32 MZ_MAGIC /* "MZ", MS-DOS header */ + u32 res0 = 0 /* Reserved */ + u64 kernel_entry /* Kernel entry point */ + u64 _end - _text /* Kernel image effective size */ + u64 load_offset /* Kernel image load offset from start of RAM */ + u64 res1 = 0 /* Reserved */ + u64 res2 = 0 /* Reserved */ + u64 res3 = 0 /* Reserved */ + u32 LINUX_PE_MAGIC /* Magic number */ + u32 pe_header - _head /* Offset to the PE header */ diff --git a/Documentation/loongarch/index.rst b/Documentation/loongarch/index.rst index aaba648db907..c779bfa00c05 100644 --- a/Documentation/loongarch/index.rst +++ b/Documentation/loongarch/index.rst @@ -9,6 +9,7 @@ LoongArch Architecture :numbered: introduction + booting irq-chip-model features diff --git a/Documentation/networking/generic_netlink.rst b/Documentation/networking/generic_netlink.rst index 59e04ccf80c1..d960dbd7e80e 100644 --- a/Documentation/networking/generic_netlink.rst +++ b/Documentation/networking/generic_netlink.rst @@ -6,4 +6,4 @@ Generic Netlink A wiki document on how to use Generic Netlink can be found here: - * http://www.linuxfoundation.org/collaborate/workgroups/networking/generic_netlink_howto + * https://wiki.linuxfoundation.org/networking/generic_netlink_howto diff --git a/Documentation/translations/zh_CN/loongarch/booting.rst b/Documentation/translations/zh_CN/loongarch/booting.rst new file mode 100644 index 000000000000..fb6440c438f0 --- /dev/null +++ b/Documentation/translations/zh_CN/loongarch/booting.rst @@ -0,0 +1,48 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/loongarch/booting.rst + +:翻译: + + 司延腾 Yanteng Si <siyanteng@loongson.cn> + +==================== +启动 Linux/LoongArch +==================== + +:作者: 司延腾 <siyanteng@loongson.cn> +:日期: 2022年11月18日 + +BootLoader传递给内核的信息 +========================== + +LoongArch支持ACPI和FDT启动,需要传递给内核的信息包括memmap、initrd、cmdline、可 +选的ACPI/FDT表等。 + +内核在 `kernel_entry` 入口处被传递以下参数: + + - a0 = efi_boot: `efi_boot` 是一个标志,表示这个启动环境是否完全符合UEFI + 的要求。 + + - a1 = cmdline: `cmdline` 是一个指向内核命令行的指针。 + + - a2 = systemtable: `systemtable` 指向EFI的系统表,在这个阶段涉及的所有 + 指针都是物理地址。 + +Linux/LoongArch内核镜像文件头 +============================= + +内核镜像是EFI镜像。作为PE文件,它们有一个64字节的头部结构体,如下所示:: + + u32 MZ_MAGIC /* "MZ", MS-DOS 头 */ + u32 res0 = 0 /* 保留 */ + u64 kernel_entry /* 内核入口点 */ + u64 _end - _text /* 内核镜像有效大小 */ + u64 load_offset /* 加载内核镜像相对内存起始地址的偏移量 */ + u64 res1 = 0 /* 保留 */ + u64 res2 = 0 /* 保留 */ + u64 res3 = 0 /* 保留 */ + u32 LINUX_PE_MAGIC /* 魔术数 */ + u32 pe_header - _head /* 到PE头的偏移量 */ diff --git a/Documentation/translations/zh_CN/loongarch/index.rst b/Documentation/translations/zh_CN/loongarch/index.rst index 7d23eb78379d..0273a08342f7 100644 --- a/Documentation/translations/zh_CN/loongarch/index.rst +++ b/Documentation/translations/zh_CN/loongarch/index.rst @@ -14,6 +14,7 @@ LoongArch体系结构 :numbered: introduction + booting irq-chip-model features diff --git a/Documentation/translations/zh_CN/loongarch/introduction.rst b/Documentation/translations/zh_CN/loongarch/introduction.rst index 128878f5bb70..f3ec25b163d7 100644 --- a/Documentation/translations/zh_CN/loongarch/introduction.rst +++ b/Documentation/translations/zh_CN/loongarch/introduction.rst @@ -70,8 +70,8 @@ LA64中每个寄存器为64位宽。 ``$r0`` 的内容总是固定为0,而其 ================= ================== =================== ========== .. note:: - 注意:在一些遗留代码中有时可能见到 ``$v0`` 和 ``$v1`` ,它们是 - ``$a0`` 和 ``$a1`` 的别名,属于已经废弃的用法。 + 注意:在一些遗留代码中有时可能见到 ``$fv0`` 和 ``$fv1`` ,它们是 + ``$fa0`` 和 ``$fa1`` 的别名,属于已经废弃的用法。 向量寄存器 diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index eee9f857a986..896914e3a847 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7213,14 +7213,13 @@ veto the transition. :Parameters: args[0] is the maximum poll time in nanoseconds :Returns: 0 on success; -1 on error -This capability overrides the kvm module parameter halt_poll_ns for the -target VM. - -VCPU polling allows a VCPU to poll for wakeup events instead of immediately -scheduling during guest halts. The maximum time a VCPU can spend polling is -controlled by the kvm module parameter halt_poll_ns. This capability allows -the maximum halt time to specified on a per-VM basis, effectively overriding -the module parameter for the target VM. +KVM_CAP_HALT_POLL overrides the kvm.halt_poll_ns module parameter to set the +maximum halt-polling time for all vCPUs in the target VM. This capability can +be invoked at any time and any number of times to dynamically change the +maximum halt-polling time. + +See Documentation/virt/kvm/halt-polling.rst for more information on halt +polling. 7.21 KVM_CAP_X86_USER_SPACE_MSR ------------------------------- diff --git a/Documentation/virt/kvm/x86/halt-polling.rst b/Documentation/virt/kvm/halt-polling.rst index 4922e4a15f18..3fae39b1a5ba 100644 --- a/Documentation/virt/kvm/x86/halt-polling.rst +++ b/Documentation/virt/kvm/halt-polling.rst @@ -119,6 +119,19 @@ These module parameters can be set from the debugfs files in: Note: that these module parameters are system wide values and are not able to be tuned on a per vm basis. +Any changes to these parameters will be picked up by new and existing vCPUs the +next time they halt, with the notable exception of VMs using KVM_CAP_HALT_POLL +(see next section). + +KVM_CAP_HALT_POLL +================= + +KVM_CAP_HALT_POLL is a VM capability that allows userspace to override halt_poll_ns +on a per-VM basis. VMs using KVM_CAP_HALT_POLL ignore halt_poll_ns completely (but +still obey halt_poll_ns_grow, halt_poll_ns_grow_start, and halt_poll_ns_shrink). + +See Documentation/virt/kvm/api.rst for more information on this capability. + Further Notes ============= diff --git a/Documentation/virt/kvm/index.rst b/Documentation/virt/kvm/index.rst index e0a2c74e1043..ad13ec55ddfe 100644 --- a/Documentation/virt/kvm/index.rst +++ b/Documentation/virt/kvm/index.rst @@ -17,4 +17,5 @@ KVM locking vcpu-requests + halt-polling review-checklist diff --git a/Documentation/virt/kvm/x86/index.rst b/Documentation/virt/kvm/x86/index.rst index 7ff588826b9f..9ece6b8dc817 100644 --- a/Documentation/virt/kvm/x86/index.rst +++ b/Documentation/virt/kvm/x86/index.rst @@ -10,7 +10,6 @@ KVM for x86 systems amd-memory-encryption cpuid errata - halt-polling hypercalls mmu msr diff --git a/MAINTAINERS b/MAINTAINERS index 2585e7edc335..886d3f69ee64 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5299,7 +5299,7 @@ M: Johannes Weiner <hannes@cmpxchg.org> M: Michal Hocko <mhocko@kernel.org> M: Roman Gushchin <roman.gushchin@linux.dev> M: Shakeel Butt <shakeelb@google.com> -R: Muchun Song <songmuchun@bytedance.com> +R: Muchun Song <muchun.song@linux.dev> L: cgroups@vger.kernel.org L: linux-mm@kvack.org S: Maintained @@ -5585,8 +5585,6 @@ F: drivers/scsi/cxgbi/cxgb3i CXGB4 CRYPTO DRIVER (chcr) M: Ayush Sawal <ayush.sawal@chelsio.com> -M: Vinay Kumar Yadav <vinay.yadav@chelsio.com> -M: Rohit Maheshwari <rohitm@chelsio.com> L: linux-crypto@vger.kernel.org S: Supported W: http://www.chelsio.com @@ -5594,8 +5592,6 @@ F: drivers/crypto/chelsio CXGB4 INLINE CRYPTO DRIVER M: Ayush Sawal <ayush.sawal@chelsio.com> -M: Vinay Kumar Yadav <vinay.yadav@chelsio.com> -M: Rohit Maheshwari <rohitm@chelsio.com> L: netdev@vger.kernel.org S: Supported W: http://www.chelsio.com @@ -9443,7 +9439,7 @@ F: drivers/net/ethernet/huawei/hinic/ HUGETLB SUBSYSTEM M: Mike Kravetz <mike.kravetz@oracle.com> -M: Muchun Song <songmuchun@bytedance.com> +M: Muchun Song <muchun.song@linux.dev> L: linux-mm@kvack.org S: Maintained F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages @@ -10287,7 +10283,7 @@ T: git https://github.com/intel/gvt-linux.git F: drivers/gpu/drm/i915/gvt/ INTEL HID EVENT DRIVER -M: Alex Hung <alex.hung@canonical.com> +M: Alex Hung <alexhung@gmail.com> L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/intel/hid.c @@ -15952,6 +15948,7 @@ Q: https://patchwork.kernel.org/project/linux-pci/list/ B: https://bugzilla.kernel.org C: irc://irc.oftc.net/linux-pci T: git git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/pci.git +F: Documentation/devicetree/bindings/pci/ F: drivers/pci/controller/ F: drivers/pci/pci-bridge-emul.c F: drivers/pci/pci-bridge-emul.h @@ -16058,7 +16055,7 @@ F: Documentation/devicetree/bindings/pci/microchip* F: drivers/pci/controller/*microchip* PCIE DRIVER FOR QUALCOMM MSM -M: Stanimir Varbanov <svarbanov@mm-sol.com> +M: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> L: linux-pci@vger.kernel.org L: linux-arm-msm@vger.kernel.org S: Maintained @@ -16148,7 +16145,8 @@ F: include/linux/peci-cpu.h F: include/linux/peci.h PENSANDO ETHERNET DRIVERS -M: Shannon Nelson <snelson@pensando.io> +M: Shannon Nelson <shannon.nelson@amd.com> +M: Brett Creeley <brett.creeley@amd.com> M: drivers@pensando.io L: netdev@vger.kernel.org S: Supported @@ -17484,10 +17482,8 @@ S: Maintained F: drivers/net/wireless/realtek/rtw89/ REDPINE WIRELESS DRIVER -M: Amitkumar Karwar <amitkarwar@gmail.com> -M: Siva Rebbagondla <siva8118@gmail.com> L: linux-wireless@vger.kernel.org -S: Maintained +S: Orphan F: drivers/net/wireless/rsi/ REGISTER MAP ABSTRACTION @@ -18010,7 +18006,7 @@ L: linux-fbdev@vger.kernel.org S: Maintained F: drivers/video/fbdev/savage/ -S390 +S390 ARCHITECTURE M: Heiko Carstens <hca@linux.ibm.com> M: Vasily Gorbik <gor@linux.ibm.com> M: Alexander Gordeev <agordeev@linux.ibm.com> @@ -18065,6 +18061,15 @@ L: netdev@vger.kernel.org S: Supported F: drivers/s390/net/ +S390 MM +M: Alexander Gordeev <agordeev@linux.ibm.com> +M: Gerald Schaefer <gerald.schaefer@linux.ibm.com> +L: linux-s390@vger.kernel.org +S: Supported +T: git git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git +F: arch/s390/include/asm/pgtable.h +F: arch/s390/mm + S390 PCI SUBSYSTEM M: Niklas Schnelle <schnelle@linux.ibm.com> M: Gerald Schaefer <gerald.schaefer@linux.ibm.com> @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* diff --git a/arch/arm/boot/dts/am335x-pcm-953.dtsi b/arch/arm/boot/dts/am335x-pcm-953.dtsi index dae448040a97..947497413977 100644 --- a/arch/arm/boot/dts/am335x-pcm-953.dtsi +++ b/arch/arm/boot/dts/am335x-pcm-953.dtsi @@ -12,22 +12,20 @@ compatible = "phytec,am335x-pcm-953", "phytec,am335x-phycore-som", "ti,am33xx"; /* Power */ - regulators { - vcc3v3: fixedregulator@1 { - compatible = "regulator-fixed"; - regulator-name = "vcc3v3"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - regulator-boot-on; - }; + vcc3v3: fixedregulator1 { + compatible = "regulator-fixed"; + regulator-name = "vcc3v3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-boot-on; + }; - vcc1v8: fixedregulator@2 { - compatible = "regulator-fixed"; - regulator-name = "vcc1v8"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-boot-on; - }; + vcc1v8: fixedregulator2 { + compatible = "regulator-fixed"; + regulator-name = "vcc1v8"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-boot-on; }; /* User IO */ diff --git a/arch/arm/boot/dts/at91rm9200.dtsi b/arch/arm/boot/dts/at91rm9200.dtsi index 7a113325abb9..6f9004ebf424 100644 --- a/arch/arm/boot/dts/at91rm9200.dtsi +++ b/arch/arm/boot/dts/at91rm9200.dtsi @@ -666,7 +666,7 @@ compatible = "atmel,at91rm9200-udc"; reg = <0xfffb0000 0x4000>; interrupts = <11 IRQ_TYPE_LEVEL_HIGH 2>; - clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 2>; + clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 1>; clock-names = "pclk", "hclk"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi index 60d61291f344..024af2db638e 100644 --- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi +++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi @@ -39,6 +39,13 @@ }; + usb1 { + pinctrl_usb1_vbus_gpio: usb1_vbus_gpio { + atmel,pins = + <AT91_PIOC 5 AT91_PERIPH_GPIO AT91_PINCTRL_DEGLITCH>; /* PC5 GPIO */ + }; + }; + mmc0_slot1 { pinctrl_board_mmc0_slot1: mmc0_slot1-board { atmel,pins = @@ -84,6 +91,8 @@ }; usb1: gadget@fffa4000 { + pinctrl-0 = <&pinctrl_usb1_vbus_gpio>; + pinctrl-names = "default"; atmel,vbus-gpio = <&pioC 5 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6q-prti6q.dts b/arch/arm/boot/dts/imx6q-prti6q.dts index b4605edfd2ab..d8fa83effd63 100644 --- a/arch/arm/boot/dts/imx6q-prti6q.dts +++ b/arch/arm/boot/dts/imx6q-prti6q.dts @@ -364,8 +364,8 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_wifi>; interrupts-extended = <&gpio1 30 IRQ_TYPE_LEVEL_HIGH>; - ref-clock-frequency = "38400000"; - tcxo-clock-frequency = "19200000"; + ref-clock-frequency = <38400000>; + tcxo-clock-frequency = <19200000>; }; }; diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 03d2e8544a4e..0fc9e6b8b05d 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -1270,10 +1270,10 @@ clocks = <&clks IMX7D_NAND_USDHC_BUS_RAWNAND_CLK>; }; - gpmi: nand-controller@33002000 { + gpmi: nand-controller@33002000{ compatible = "fsl,imx7d-gpmi-nand"; #address-cells = <1>; - #size-cells = <0>; + #size-cells = <1>; reg = <0x33002000 0x2000>, <0x33004000 0x4000>; reg-names = "gpmi-nand", "bch"; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm/boot/dts/rk3036-evb.dts b/arch/arm/boot/dts/rk3036-evb.dts index 9fd4d9db9f8f..becdc0b664bf 100644 --- a/arch/arm/boot/dts/rk3036-evb.dts +++ b/arch/arm/boot/dts/rk3036-evb.dts @@ -35,11 +35,10 @@ &i2c1 { status = "okay"; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; }; }; diff --git a/arch/arm/boot/dts/rk3066a-mk808.dts b/arch/arm/boot/dts/rk3066a-mk808.dts index cfa318a506eb..2db5ba706208 100644 --- a/arch/arm/boot/dts/rk3066a-mk808.dts +++ b/arch/arm/boot/dts/rk3066a-mk808.dts @@ -32,7 +32,7 @@ keyup-threshold-microvolt = <2500000>; poll-interval = <100>; - recovery { + button-recovery { label = "recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <0>; diff --git a/arch/arm/boot/dts/rk3188-radxarock.dts b/arch/arm/boot/dts/rk3188-radxarock.dts index e7cf18823558..118deacd38c4 100644 --- a/arch/arm/boot/dts/rk3188-radxarock.dts +++ b/arch/arm/boot/dts/rk3188-radxarock.dts @@ -71,7 +71,7 @@ #sound-dai-cells = <0>; }; - ir_recv: gpio-ir-receiver { + ir_recv: ir-receiver { compatible = "gpio-ir-receiver"; gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/rk3188.dtsi b/arch/arm/boot/dts/rk3188.dtsi index cdd4a0bd5133..44b54af0bbf9 100644 --- a/arch/arm/boot/dts/rk3188.dtsi +++ b/arch/arm/boot/dts/rk3188.dtsi @@ -379,7 +379,7 @@ rockchip,pins = <2 RK_PD3 1 &pcfg_pull_none>; }; - lcdc1_rgb24: ldcd1-rgb24 { + lcdc1_rgb24: lcdc1-rgb24 { rockchip,pins = <2 RK_PA0 1 &pcfg_pull_none>, <2 RK_PA1 1 &pcfg_pull_none>, <2 RK_PA2 1 &pcfg_pull_none>, @@ -607,7 +607,6 @@ &global_timer { interrupts = <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>; - status = "disabled"; }; &local_timer { diff --git a/arch/arm/boot/dts/rk3288-evb-act8846.dts b/arch/arm/boot/dts/rk3288-evb-act8846.dts index be695b8c1f67..8a635c243127 100644 --- a/arch/arm/boot/dts/rk3288-evb-act8846.dts +++ b/arch/arm/boot/dts/rk3288-evb-act8846.dts @@ -54,7 +54,7 @@ vin-supply = <&vcc_sys>; }; - hym8563@51 { + rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; diff --git a/arch/arm/boot/dts/rk3288-evb.dtsi b/arch/arm/boot/dts/rk3288-evb.dtsi index 399d6b9c5fd4..382d2839cf47 100644 --- a/arch/arm/boot/dts/rk3288-evb.dtsi +++ b/arch/arm/boot/dts/rk3288-evb.dtsi @@ -28,19 +28,19 @@ press-threshold-microvolt = <300000>; }; - menu { + button-menu { label = "Menu"; linux,code = <KEY_MENU>; press-threshold-microvolt = <640000>; }; - esc { + button-esc { label = "Esc"; linux,code = <KEY_ESC>; press-threshold-microvolt = <1000000>; }; - home { + button-home { label = "Home"; linux,code = <KEY_HOME>; press-threshold-microvolt = <1300000>; diff --git a/arch/arm/boot/dts/rk3288-firefly.dtsi b/arch/arm/boot/dts/rk3288-firefly.dtsi index 052afe5543e2..3836c61cfb76 100644 --- a/arch/arm/boot/dts/rk3288-firefly.dtsi +++ b/arch/arm/boot/dts/rk3288-firefly.dtsi @@ -233,11 +233,10 @@ vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; interrupt-parent = <&gpio7>; interrupts = <RK_PA4 IRQ_TYPE_EDGE_FALLING>; diff --git a/arch/arm/boot/dts/rk3288-miqi.dts b/arch/arm/boot/dts/rk3288-miqi.dts index 713f55e143c6..db1eb648e0e1 100644 --- a/arch/arm/boot/dts/rk3288-miqi.dts +++ b/arch/arm/boot/dts/rk3288-miqi.dts @@ -162,11 +162,10 @@ vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; }; diff --git a/arch/arm/boot/dts/rk3288-rock2-square.dts b/arch/arm/boot/dts/rk3288-rock2-square.dts index 80e0f07c8e87..13cfdaa95cc7 100644 --- a/arch/arm/boot/dts/rk3288-rock2-square.dts +++ b/arch/arm/boot/dts/rk3288-rock2-square.dts @@ -165,11 +165,10 @@ }; &i2c0 { - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; interrupt-parent = <&gpio0>; interrupts = <RK_PA4 IRQ_TYPE_EDGE_FALLING>; diff --git a/arch/arm/boot/dts/rk3288-vmarc-som.dtsi b/arch/arm/boot/dts/rk3288-vmarc-som.dtsi index 0ae2bd150e37..793951655b73 100644 --- a/arch/arm/boot/dts/rk3288-vmarc-som.dtsi +++ b/arch/arm/boot/dts/rk3288-vmarc-som.dtsi @@ -241,7 +241,6 @@ interrupt-parent = <&gpio5>; interrupts = <RK_PC3 IRQ_TYPE_LEVEL_LOW>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "hym8563"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi index bf285091a9eb..cb4e42ede56a 100644 --- a/arch/arm/boot/dts/rk3xxx.dtsi +++ b/arch/arm/boot/dts/rk3xxx.dtsi @@ -76,6 +76,13 @@ reg = <0x1013c200 0x20>; interrupts = <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_EDGE_RISING)>; clocks = <&cru CORE_PERI>; + status = "disabled"; + /* The clock source and the sched_clock provided by the arm_global_timer + * on Rockchip rk3066a/rk3188 are quite unstable because their rates + * depend on the CPU frequency. + * Keep the arm_global_timer disabled in order to have the + * DW_APB_TIMER (rk3066a) or ROCKCHIP_TIMER (rk3188) selected by default. + */ }; local_timer: local-timer@1013c600 { diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index fe87397c3d8c..bdbc1e590891 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -17,7 +17,7 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_arch_fetch_caller_regs(regs, __ip) { \ (regs)->ARM_pc = (__ip); \ - (regs)->ARM_fp = (unsigned long) __builtin_frame_address(0); \ + frame_pointer((regs)) = (unsigned long) __builtin_frame_address(0); \ (regs)->ARM_sp = current_stack_pointer; \ (regs)->ARM_cpsr = SVC_MODE; \ } diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h index d16aba48fa0a..090011394477 100644 --- a/arch/arm/include/asm/pgtable-nommu.h +++ b/arch/arm/include/asm/pgtable-nommu.h @@ -45,12 +45,6 @@ typedef pte_t *pte_addr_t; /* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -#define ZERO_PAGE(vaddr) (virt_to_page(0)) - -/* * Mark the prot value as uncacheable and unbufferable. */ #define pgprot_noncached(prot) (prot) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 78a532068fec..ef48a55e9af8 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -10,6 +10,15 @@ #include <linux/const.h> #include <asm/proc-fns.h> +#ifndef __ASSEMBLY__ +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern struct page *empty_zero_page; +#define ZERO_PAGE(vaddr) (empty_zero_page) +#endif + #ifndef CONFIG_MMU #include <asm-generic/pgtable-nopud.h> @@ -139,13 +148,6 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, */ #ifndef __ASSEMBLY__ -/* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -extern struct page *empty_zero_page; -#define ZERO_PAGE(vaddr) (empty_zero_page) - extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; diff --git a/arch/arm/mach-at91/sama5.c b/arch/arm/mach-at91/sama5.c index 67ed68fbe3a5..bf2b5c6a18c6 100644 --- a/arch/arm/mach-at91/sama5.c +++ b/arch/arm/mach-at91/sama5.c @@ -26,7 +26,7 @@ static void sama5_l2c310_write_sec(unsigned long val, unsigned reg) static void __init sama5_secure_cache_init(void) { sam_secure_init(); - if (sam_linux_is_optee_available()) + if (IS_ENABLED(CONFIG_OUTER_CACHE) && sam_linux_is_optee_available()) outer_cache.write_sec = sama5_l2c310_write_sec; } diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c index 25c9d184fa4c..1c57ac401649 100644 --- a/arch/arm/mach-mxs/mach-mxs.c +++ b/arch/arm/mach-mxs/mach-mxs.c @@ -393,8 +393,10 @@ static void __init mxs_machine_init(void) root = of_find_node_by_path("/"); ret = of_property_read_string(root, "model", &soc_dev_attr->machine); - if (ret) + if (ret) { + kfree(soc_dev_attr); return; + } soc_dev_attr->family = "Freescale MXS Family"; soc_dev_attr->soc_id = mxs_get_soc_id(); diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 46cccd6bf705..de988cba9a4b 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -105,6 +105,19 @@ static inline bool is_write_fault(unsigned int fsr) return (fsr & FSR_WRITE) && !(fsr & FSR_CM); } +static inline bool is_translation_fault(unsigned int fsr) +{ + int fs = fsr_fs(fsr); +#ifdef CONFIG_ARM_LPAE + if ((fs & FS_MMU_NOLL_MASK) == FS_TRANS_NOLL) + return true; +#else + if (fs == FS_L1_TRANS || fs == FS_L2_TRANS) + return true; +#endif + return false; +} + static void die_kernel_fault(const char *msg, struct mm_struct *mm, unsigned long addr, unsigned int fsr, struct pt_regs *regs) @@ -140,7 +153,8 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, if (addr < PAGE_SIZE) { msg = "NULL pointer dereference"; } else { - if (kfence_handle_page_fault(addr, is_write_fault(fsr), regs)) + if (is_translation_fault(fsr) && + kfence_handle_page_fault(addr, is_write_fault(fsr), regs)) return; msg = "paging request"; @@ -208,7 +222,7 @@ static inline bool is_permission_fault(unsigned int fsr) { int fs = fsr_fs(fsr); #ifdef CONFIG_ARM_LPAE - if ((fs & FS_PERM_NOLL_MASK) == FS_PERM_NOLL) + if ((fs & FS_MMU_NOLL_MASK) == FS_PERM_NOLL) return true; #else if (fs == FS_L1_PERM || fs == FS_L2_PERM) diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h index 83b5ab32d7a4..54927ba1fa6e 100644 --- a/arch/arm/mm/fault.h +++ b/arch/arm/mm/fault.h @@ -14,8 +14,9 @@ #ifdef CONFIG_ARM_LPAE #define FSR_FS_AEA 17 +#define FS_TRANS_NOLL 0x4 #define FS_PERM_NOLL 0xC -#define FS_PERM_NOLL_MASK 0x3C +#define FS_MMU_NOLL_MASK 0x3C static inline int fsr_fs(unsigned int fsr) { @@ -23,8 +24,10 @@ static inline int fsr_fs(unsigned int fsr) } #else #define FSR_FS_AEA 22 -#define FS_L1_PERM 0xD -#define FS_L2_PERM 0xF +#define FS_L1_TRANS 0x5 +#define FS_L2_TRANS 0x7 +#define FS_L1_PERM 0xD +#define FS_L2_PERM 0xF static inline int fsr_fs(unsigned int fsr) { diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index c42debaded95..c1494a4dee25 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -26,6 +26,13 @@ unsigned long vectors_base; +/* + * empty_zero_page is a special page that is used for + * zero-initialized data and COW. + */ +struct page *empty_zero_page; +EXPORT_SYMBOL(empty_zero_page); + #ifdef CONFIG_ARM_MPU struct mpu_rgn_info mpu_rgn_info; #endif @@ -148,9 +155,21 @@ void __init adjust_lowmem_bounds(void) */ void __init paging_init(const struct machine_desc *mdesc) { + void *zero_page; + early_trap_init((void *)vectors_base); mpu_setup(); + + /* allocate the zero page. */ + zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!zero_page) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE); + bootmem_init(); + + empty_zero_page = virt_to_page(zero_page); + flush_dcache_page(empty_zero_page); } /* diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi index 53f6660656ac..ca1d287a0a01 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi @@ -161,6 +161,7 @@ clocks = <&ccu CLK_BUS_VP9>, <&ccu CLK_VP9>; clock-names = "bus", "mod"; resets = <&ccu RST_BUS_VP9>; + iommus = <&iommu 5>; }; video-codec@1c0e000 { diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts index 9f1469db554d..b4c1ef2559f2 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts @@ -544,14 +544,14 @@ pinctrl_pcie0: pcie0grp { fsl,pins = < - MX8MP_IOMUXC_I2C4_SCL__PCIE_CLKREQ_B 0x61 /* open drain, pull up */ - MX8MP_IOMUXC_SD1_DATA5__GPIO2_IO07 0x41 + MX8MP_IOMUXC_I2C4_SCL__PCIE_CLKREQ_B 0x60 /* open drain, pull up */ + MX8MP_IOMUXC_SD1_DATA5__GPIO2_IO07 0x40 >; }; pinctrl_pcie0_reg: pcie0reggrp { fsl,pins = < - MX8MP_IOMUXC_SD1_DATA4__GPIO2_IO06 0x41 + MX8MP_IOMUXC_SD1_DATA4__GPIO2_IO06 0x40 >; }; diff --git a/arch/arm64/boot/dts/rockchip/px30-evb.dts b/arch/arm64/boot/dts/rockchip/px30-evb.dts index 07008d84434c..c1bbd555f5f5 100644 --- a/arch/arm64/boot/dts/rockchip/px30-evb.dts +++ b/arch/arm64/boot/dts/rockchip/px30-evb.dts @@ -30,31 +30,31 @@ keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - esc-key { + button-esc { label = "esc"; linux,code = <KEY_ESC>; press-threshold-microvolt = <1310000>; }; - home-key { + button-home { label = "home"; linux,code = <KEY_HOME>; press-threshold-microvolt = <624000>; }; - menu-key { + button-menu { label = "menu"; linux,code = <KEY_MENU>; press-threshold-microvolt = <987000>; }; - vol-down-key { + button-down { label = "volume down"; linux,code = <KEY_VOLUMEDOWN>; press-threshold-microvolt = <300000>; }; - vol-up-key { + button-up { label = "volume up"; linux,code = <KEY_VOLUMEUP>; press-threshold-microvolt = <17000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3308-evb.dts b/arch/arm64/boot/dts/rockchip/rk3308-evb.dts index 9fe9b0d11003..184b84fdde07 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-evb.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-evb.dts @@ -23,7 +23,7 @@ poll-interval = <100>; keyup-threshold-microvolt = <1800000>; - func-key { + button-func { linux,code = <KEY_FN>; label = "function"; press-threshold-microvolt = <18000>; @@ -37,31 +37,31 @@ poll-interval = <100>; keyup-threshold-microvolt = <1800000>; - esc-key { + button-esc { linux,code = <KEY_MICMUTE>; label = "micmute"; press-threshold-microvolt = <1130000>; }; - home-key { + button-home { linux,code = <KEY_MODE>; label = "mode"; press-threshold-microvolt = <901000>; }; - menu-key { + button-menu { linux,code = <KEY_PLAY>; label = "play"; press-threshold-microvolt = <624000>; }; - vol-down-key { + button-down { linux,code = <KEY_VOLUMEDOWN>; label = "volume down"; press-threshold-microvolt = <300000>; }; - vol-up-key { + button-up { linux,code = <KEY_VOLUMEUP>; label = "volume up"; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts index ea6820902ede..7ea48167747c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts @@ -19,7 +19,7 @@ stdout-path = "serial2:1500000n8"; }; - ir_rx { + ir-receiver { compatible = "gpio-ir-receiver"; gpios = <&gpio0 RK_PC0 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts b/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts index 43c928ac98f0..1deef53a4c94 100644 --- a/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts +++ b/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts @@ -25,7 +25,7 @@ keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - recovery { + button-recovery { label = "recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <17000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts index 7f5bba0c6001..81d1064fdb21 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts @@ -208,11 +208,10 @@ vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; /* rtc_int is not connected */ }; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-r88.dts b/arch/arm64/boot/dts/rockchip/rk3368-r88.dts index 38d757c00548..5589f3db6b36 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-r88.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-r88.dts @@ -192,11 +192,10 @@ vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; /* rtc_int is not connected */ }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi index ed3348b558f8..a47d9f758611 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi @@ -734,10 +734,6 @@ camera: &i2c7 { }; /* PINCTRL OVERRIDES */ -&ec_ap_int_l { - rockchip,pins = <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_up>; -}; - &ap_fw_wp { rockchip,pins = <0 RK_PB5 RK_FUNC_GPIO &pcfg_pull_none>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi index 2a332763c35c..9d9297bc5f04 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi @@ -123,7 +123,7 @@ keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts b/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts index 452728b82e42..3bf8f959e42c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts @@ -39,7 +39,7 @@ keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts b/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts index 72182c58cc46..65cb21837b0c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts @@ -19,7 +19,7 @@ keyup-threshold-microvolt = <1500000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi index 278123b4f911..b6e082f1f6d9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi @@ -167,6 +167,7 @@ }; &emmc_phy { + rockchip,enable-strobe-pulldown; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts b/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts index 9e2e246e0bab..dba4d03bfc2b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts @@ -52,13 +52,13 @@ press-threshold-microvolt = <300000>; }; - back { + button-back { label = "Back"; linux,code = <KEY_BACK>; press-threshold-microvolt = <985000>; }; - menu { + button-menu { label = "Menu"; linux,code = <KEY_MENU>; press-threshold-microvolt = <1314000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts index 04c752f49be9..115c14c0a3c6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts @@ -207,7 +207,7 @@ cap-sd-highspeed; cd-gpios = <&gpio0 RK_PA7 GPIO_ACTIVE_LOW>; disable-wp; - max-frequency = <150000000>; + max-frequency = <40000000>; pinctrl-names = "default"; pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>; vmmc-supply = <&vcc3v3_baseboard>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts index 5a2661ae0131..7ba1c28f70a9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts @@ -98,13 +98,12 @@ }; &i2c0 { - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; interrupt-parent = <&gpio0>; interrupts = <RK_PA5 IRQ_TYPE_EDGE_FALLING>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi index 2f4b1b2e3ac7..bbf1e3f24585 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi @@ -41,7 +41,7 @@ keyup-threshold-microvolt = <1500000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi index 645ced6617a6..1f76d3501bda 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi @@ -509,7 +509,6 @@ &i2s1 { rockchip,playback-channels = <2>; rockchip,capture-channels = <2>; - status = "okay"; }; &i2s2 { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts index 13927e7d0724..dbec2b7173a0 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts @@ -33,13 +33,13 @@ press-threshold-microvolt = <300000>; }; - back { + button-back { label = "Back"; linux,code = <KEY_BACK>; press-threshold-microvolt = <985000>; }; - menu { + button-menu { label = "Menu"; linux,code = <KEY_MENU>; press-threshold-microvolt = <1314000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi b/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi index 935b8c68a71d..bf9eb0405b62 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi @@ -297,11 +297,10 @@ clock-frequency = <400000>; status = "okay"; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "hym8563"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi index 0d45868132b9..8d61f824c12d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi @@ -23,7 +23,7 @@ io-channel-names = "buttons"; keyup-threshold-microvolt = <1750000>; - recovery { + button-recovery { label = "recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts index a05460b92415..25a8c781f4e7 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts @@ -740,7 +740,7 @@ &uart1 { pinctrl-names = "default"; - pinctrl-0 = <&uart1m0_xfer &uart1m0_ctsn>; + pinctrl-0 = <&uart1m0_xfer &uart1m0_ctsn &uart1m0_rtsn>; status = "okay"; uart-has-rtscts; @@ -748,13 +748,14 @@ compatible = "brcm,bcm43438-bt"; clocks = <&rk817 1>; clock-names = "lpo"; - device-wakeup-gpios = <&gpio2 RK_PC1 GPIO_ACTIVE_HIGH>; - host-wakeup-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_HIGH>; + host-wakeup-gpios = <&gpio2 RK_PC1 GPIO_ACTIVE_HIGH>; + device-wakeup-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_HIGH>; shutdown-gpios = <&gpio2 RK_PB7 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&bt_host_wake_l &bt_wake_l &bt_enable_h>; vbat-supply = <&vcc_sys>; vddio-supply = <&vcca1v8_pmu>; + max-speed = <3000000>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts index 77b179cd20e7..b276eb0810c7 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts @@ -246,7 +246,7 @@ compatible = "rockchip,rk809"; reg = <0x20>; interrupt-parent = <&gpio0>; - interrupts = <RK_PA7 IRQ_TYPE_LEVEL_LOW>; + interrupts = <RK_PA3 IRQ_TYPE_LEVEL_LOW>; assigned-clocks = <&cru I2S1_MCLKOUT_TX>; assigned-clock-parents = <&cru CLK_I2S1_8CH_TX>; clock-names = "mclk"; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts b/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts index dba648c2f57e..9fd262334d77 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts @@ -142,7 +142,7 @@ assigned-clocks = <&cru SCLK_GMAC1_RX_TX>, <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>; assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>, <&gmac1_clkin>; clock_in_out = "input"; - phy-mode = "rgmii-id"; + phy-mode = "rgmii"; phy-supply = <&vcc_3v3>; pinctrl-names = "default"; pinctrl-0 = <&gmac1m0_miim @@ -432,11 +432,7 @@ &i2c3 { pinctrl-names = "default"; - pinctrl-0 = <&i2c3m1_xfer>; - status = "okay"; -}; - -&i2c5 { + pinctrl-0 = <&i2c3m0_xfer>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts index c282f6e79960..26d7fda275ed 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts @@ -500,7 +500,6 @@ interrupt-parent = <&gpio0>; interrupts = <RK_PD3 IRQ_TYPE_EDGE_FALLING>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "rtcic_32kout"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts index fb87a168fe96..539ef8cc7792 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts @@ -509,7 +509,6 @@ interrupt-parent = <&gpio0>; interrupts = <RK_PD3 IRQ_TYPE_LEVEL_LOW>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "rtcic_32kout"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index d6cf535d8352..439e2bc5d5d8 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -14,16 +14,8 @@ #ifdef CONFIG_EFI extern void efi_init(void); - -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg); #else #define efi_init() - -static inline -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) -{ - return false; -} #endif int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S index 67babd5f04c2..75691a2641c1 100644 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -6,7 +6,7 @@ #include <linux/linkage.h> SYM_FUNC_START(__efi_rt_asm_wrapper) - stp x29, x30, [sp, #-112]! + stp x29, x30, [sp, #-32]! mov x29, sp /* @@ -17,20 +17,6 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) stp x1, x18, [sp, #16] /* - * Preserve all callee saved registers and record the stack pointer - * value in a per-CPU variable so we can recover from synchronous - * exceptions occurring while running the firmware routines. - */ - stp x19, x20, [sp, #32] - stp x21, x22, [sp, #48] - stp x23, x24, [sp, #64] - stp x25, x26, [sp, #80] - stp x27, x28, [sp, #96] - - adr_this_cpu x8, __efi_rt_asm_recover_sp, x9 - str x29, [x8] - - /* * We are lucky enough that no EFI runtime services take more than * 5 arguments, so all are passed in registers rather than via the * stack. @@ -45,7 +31,7 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) ldp x1, x2, [sp, #16] cmp x2, x18 - ldp x29, x30, [sp], #112 + ldp x29, x30, [sp], #32 b.ne 0f ret 0: @@ -59,18 +45,3 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) mov x18, x2 b efi_handle_corrupted_x18 // tail call SYM_FUNC_END(__efi_rt_asm_wrapper) - -SYM_FUNC_START(__efi_rt_asm_recover) - ldr_this_cpu x8, __efi_rt_asm_recover_sp, x9 - mov sp, x8 - - ldp x0, x18, [sp, #16] - ldp x19, x20, [sp, #32] - ldp x21, x22, [sp, #48] - ldp x23, x24, [sp, #64] - ldp x25, x26, [sp, #80] - ldp x27, x28, [sp, #96] - ldp x29, x30, [sp], #112 - - b efi_handle_runtime_exception -SYM_FUNC_END(__efi_rt_asm_recover) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index ee53f2a0aa03..a908a37f0367 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -9,7 +9,6 @@ #include <linux/efi.h> #include <linux/init.h> -#include <linux/percpu.h> #include <asm/efi.h> @@ -145,28 +144,3 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f); return s; } - -asmlinkage DEFINE_PER_CPU(u64, __efi_rt_asm_recover_sp); - -asmlinkage efi_status_t __efi_rt_asm_recover(void); - -asmlinkage efi_status_t efi_handle_runtime_exception(const char *f) -{ - pr_err(FW_BUG "Synchronous exception occurred in EFI runtime service %s()\n", f); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return EFI_ABORTED; -} - -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) -{ - /* Check whether the exception occurred while running the firmware */ - if (current_work() != &efi_rts_work.work || regs->pc >= TASK_SIZE_64) - return false; - - pr_err(FW_BUG "Unable to handle %s in EFI runtime service\n", msg); - add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); - dump_stack(); - - regs->pc = (u64)__efi_rt_asm_recover; - return true; -} diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 3cb101e8cb29..5240f6acad64 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -36,7 +36,22 @@ void arch_dma_prep_coherent(struct page *page, size_t size) { unsigned long start = (unsigned long)page_address(page); - dcache_clean_poc(start, start + size); + /* + * The architecture only requires a clean to the PoC here in order to + * meet the requirements of the DMA API. However, some vendors (i.e. + * Qualcomm) abuse the DMA API for transferring buffers from the + * non-secure to the secure world, resetting the system if a non-secure + * access shows up after the buffer has been transferred: + * + * https://lore.kernel.org/r/20221114110329.68413-1-manivannan.sadhasivam@linaro.org + * + * Using clean+invalidate appears to make this issue less likely, but + * the drivers themselves still need fixing as the CPU could issue a + * speculative read from the buffer via the linear mapping irrespective + * of the cache maintenance we use. Once the drivers are fixed, we can + * relax this to a clean operation. + */ + dcache_clean_inval_poc(start, start + size); } #ifdef CONFIG_IOMMU_DMA diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 3e9cf9826417..5b391490e045 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -30,7 +30,6 @@ #include <asm/bug.h> #include <asm/cmpxchg.h> #include <asm/cpufeature.h> -#include <asm/efi.h> #include <asm/exception.h> #include <asm/daifflags.h> #include <asm/debug-monitors.h> @@ -392,9 +391,6 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr, msg = "paging request"; } - if (efi_runtime_fixup_exception(regs, msg)) - return; - die_kernel_fault(msg, addr, esr, regs); } diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index f4cb54d5afd6..01b57b726322 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -97,7 +97,7 @@ KBUILD_LDFLAGS += -m $(ld-emul) ifdef CONFIG_LOONGARCH CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \ - egrep -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ + grep -E -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g') endif diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h index d06d4542b634..5332b1433f38 100644 --- a/arch/loongarch/include/asm/irq.h +++ b/arch/loongarch/include/asm/irq.h @@ -117,7 +117,7 @@ extern struct fwnode_handle *liointc_handle; extern struct fwnode_handle *pch_lpc_handle; extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS]; -extern irqreturn_t loongson3_ipi_interrupt(int irq, void *dev); +extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev); #include <asm-generic/irq.h> diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 946704bee599..79d5bfd913e0 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -349,13 +349,17 @@ static inline pte_t pte_mkclean(pte_t pte) static inline pte_t pte_mkdirty(pte_t pte) { - pte_val(pte) |= (_PAGE_DIRTY | _PAGE_MODIFIED); + pte_val(pte) |= _PAGE_MODIFIED; + if (pte_val(pte) & _PAGE_WRITE) + pte_val(pte) |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) |= (_PAGE_WRITE | _PAGE_DIRTY); + pte_val(pte) |= _PAGE_WRITE; + if (pte_val(pte) & _PAGE_MODIFIED) + pte_val(pte) |= _PAGE_DIRTY; return pte; } @@ -455,7 +459,9 @@ static inline int pmd_write(pmd_t pmd) static inline pmd_t pmd_mkwrite(pmd_t pmd) { - pmd_val(pmd) |= (_PAGE_WRITE | _PAGE_DIRTY); + pmd_val(pmd) |= _PAGE_WRITE; + if (pmd_val(pmd) & _PAGE_MODIFIED) + pmd_val(pmd) |= _PAGE_DIRTY; return pmd; } @@ -478,10 +484,13 @@ static inline pmd_t pmd_mkclean(pmd_t pmd) static inline pmd_t pmd_mkdirty(pmd_t pmd) { - pmd_val(pmd) |= (_PAGE_DIRTY | _PAGE_MODIFIED); + pmd_val(pmd) |= _PAGE_MODIFIED; + if (pmd_val(pmd) & _PAGE_WRITE) + pmd_val(pmd) |= _PAGE_DIRTY; return pmd; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_ACCESSED); diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index 71189b28bfb2..d82687390b4a 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -19,21 +19,21 @@ extern cpumask_t cpu_sibling_map[]; extern cpumask_t cpu_core_map[]; extern cpumask_t cpu_foreign_map[]; -void loongson3_smp_setup(void); -void loongson3_prepare_cpus(unsigned int max_cpus); -void loongson3_boot_secondary(int cpu, struct task_struct *idle); -void loongson3_init_secondary(void); -void loongson3_smp_finish(void); -void loongson3_send_ipi_single(int cpu, unsigned int action); -void loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action); +void loongson_smp_setup(void); +void loongson_prepare_cpus(unsigned int max_cpus); +void loongson_boot_secondary(int cpu, struct task_struct *idle); +void loongson_init_secondary(void); +void loongson_smp_finish(void); +void loongson_send_ipi_single(int cpu, unsigned int action); +void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action); #ifdef CONFIG_HOTPLUG_CPU -int loongson3_cpu_disable(void); -void loongson3_cpu_die(unsigned int cpu); +int loongson_cpu_disable(void); +void loongson_cpu_die(unsigned int cpu); #endif static inline void plat_smp_setup(void) { - loongson3_smp_setup(); + loongson_smp_setup(); } static inline int raw_smp_processor_id(void) @@ -78,35 +78,25 @@ extern void calculate_cpu_foreign_map(void); */ extern void show_ipi_list(struct seq_file *p, int prec); -/* - * This function sends a 'reschedule' IPI to another CPU. - * it goes straight through and wastes no time serializing - * anything. Worst case is that we lose a reschedule ... - */ -static inline void smp_send_reschedule(int cpu) -{ - loongson3_send_ipi_single(cpu, SMP_RESCHEDULE); -} - static inline void arch_send_call_function_single_ipi(int cpu) { - loongson3_send_ipi_single(cpu, SMP_CALL_FUNCTION); + loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION); } static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) { - loongson3_send_ipi_mask(mask, SMP_CALL_FUNCTION); + loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION); } #ifdef CONFIG_HOTPLUG_CPU static inline int __cpu_disable(void) { - return loongson3_cpu_disable(); + return loongson_cpu_disable(); } static inline void __cpu_die(unsigned int cpu) { - loongson3_cpu_die(cpu); + loongson_cpu_die(cpu); } extern void play_dead(void); diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 335398482038..8319cc409009 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -56,23 +56,6 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) return ioremap_cache(phys, size); } -void __init acpi_boot_table_init(void) -{ - /* - * If acpi_disabled, bail out - */ - if (acpi_disabled) - return; - - /* - * Initialize the ACPI boot-time table parser. - */ - if (acpi_table_init()) { - disable_acpi(); - return; - } -} - #ifdef CONFIG_SMP static int set_processor_mask(u32 id, u32 flags) { @@ -156,13 +139,21 @@ static void __init acpi_process_madt(void) loongson_sysconf.nr_cpus = num_processors; } -int __init acpi_boot_init(void) +void __init acpi_boot_table_init(void) { /* * If acpi_disabled, bail out */ if (acpi_disabled) - return -1; + return; + + /* + * Initialize the ACPI boot-time table parser. + */ + if (acpi_table_init()) { + disable_acpi(); + return; + } loongson_sysconf.boot_cpu_id = read_csr_cpuid(); @@ -173,8 +164,6 @@ int __init acpi_boot_init(void) /* Do not enable ACPI SPCR console by default */ acpi_parse_spcr(earlycon_acpi_spcr_enable, false); - - return 0; } #ifdef CONFIG_ACPI_NUMA diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c index 1ba19c76563e..0524bf1169b7 100644 --- a/arch/loongarch/kernel/irq.c +++ b/arch/loongarch/kernel/irq.c @@ -117,7 +117,7 @@ void __init init_IRQ(void) if (ipi_irq < 0) panic("IPI IRQ mapping failed\n"); irq_set_percpu_devid(ipi_irq); - r = request_percpu_irq(ipi_irq, loongson3_ipi_interrupt, "IPI", &ipi_dummy_dev); + r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &ipi_dummy_dev); if (r < 0) panic("IPI IRQ request failed\n"); #endif diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 2526b68f1c0f..ddb8ba4eb399 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -152,7 +152,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) childregs->csr_crmd = p->thread.csr_crmd; childregs->csr_prmd = p->thread.csr_prmd; childregs->csr_ecfg = p->thread.csr_ecfg; - return 0; + goto out; } /* user thread */ @@ -171,14 +171,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) */ childregs->csr_euen = 0; + if (clone_flags & CLONE_SETTLS) + childregs->regs[2] = tls; + +out: clear_tsk_thread_flag(p, TIF_USEDFPU); clear_tsk_thread_flag(p, TIF_USEDSIMD); clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE); clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE); - if (clone_flags & CLONE_SETTLS) - childregs->regs[2] = tls; - return 0; } diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 1eb63fa9bc81..ae436def7ee9 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -257,7 +257,6 @@ void __init platform_init(void) #ifdef CONFIG_ACPI acpi_gbl_use_default_register_widths = false; acpi_boot_table_init(); - acpi_boot_init(); #endif #ifdef CONFIG_NUMA diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 781a4d4bdddc..14508d429ffa 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -136,12 +136,12 @@ static void ipi_write_action(int cpu, u32 action) } } -void loongson3_send_ipi_single(int cpu, unsigned int action) +void loongson_send_ipi_single(int cpu, unsigned int action) { ipi_write_action(cpu_logical_map(cpu), (u32)action); } -void loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action) +void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action) { unsigned int i; @@ -149,7 +149,18 @@ void loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action) ipi_write_action(cpu_logical_map(i), (u32)action); } -irqreturn_t loongson3_ipi_interrupt(int irq, void *dev) +/* + * This function sends a 'reschedule' IPI to another CPU. + * it goes straight through and wastes no time serializing + * anything. Worst case is that we lose a reschedule ... + */ +void smp_send_reschedule(int cpu) +{ + loongson_send_ipi_single(cpu, SMP_RESCHEDULE); +} +EXPORT_SYMBOL_GPL(smp_send_reschedule); + +irqreturn_t loongson_ipi_interrupt(int irq, void *dev) { unsigned int action; unsigned int cpu = smp_processor_id(); @@ -169,7 +180,7 @@ irqreturn_t loongson3_ipi_interrupt(int irq, void *dev) return IRQ_HANDLED; } -void __init loongson3_smp_setup(void) +void __init loongson_smp_setup(void) { cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package; cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package; @@ -178,7 +189,7 @@ void __init loongson3_smp_setup(void) pr_info("Detected %i available CPU(s)\n", loongson_sysconf.nr_cpus); } -void __init loongson3_prepare_cpus(unsigned int max_cpus) +void __init loongson_prepare_cpus(unsigned int max_cpus) { int i = 0; @@ -193,7 +204,7 @@ void __init loongson3_prepare_cpus(unsigned int max_cpus) /* * Setup the PC, SP, and TP of a secondary processor and start it running! */ -void loongson3_boot_secondary(int cpu, struct task_struct *idle) +void loongson_boot_secondary(int cpu, struct task_struct *idle) { unsigned long entry; @@ -205,13 +216,13 @@ void loongson3_boot_secondary(int cpu, struct task_struct *idle) csr_mail_send(entry, cpu_logical_map(cpu), 0); - loongson3_send_ipi_single(cpu, SMP_BOOT_CPU); + loongson_send_ipi_single(cpu, SMP_BOOT_CPU); } /* * SMP init and finish on secondary CPUs */ -void loongson3_init_secondary(void) +void loongson_init_secondary(void) { unsigned int cpu = smp_processor_id(); unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | @@ -231,7 +242,7 @@ void loongson3_init_secondary(void) cpu_logical_map(cpu) / loongson_sysconf.cores_per_package; } -void loongson3_smp_finish(void) +void loongson_smp_finish(void) { local_irq_enable(); iocsr_write64(0, LOONGARCH_IOCSR_MBUF0); @@ -240,7 +251,7 @@ void loongson3_smp_finish(void) #ifdef CONFIG_HOTPLUG_CPU -int loongson3_cpu_disable(void) +int loongson_cpu_disable(void) { unsigned long flags; unsigned int cpu = smp_processor_id(); @@ -262,7 +273,7 @@ int loongson3_cpu_disable(void) return 0; } -void loongson3_cpu_die(unsigned int cpu) +void loongson_cpu_die(unsigned int cpu) { while (per_cpu(cpu_state, cpu) != CPU_DEAD) cpu_relax(); @@ -300,19 +311,19 @@ void play_dead(void) */ #ifdef CONFIG_PM -static int loongson3_ipi_suspend(void) +static int loongson_ipi_suspend(void) { return 0; } -static void loongson3_ipi_resume(void) +static void loongson_ipi_resume(void) { iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN); } -static struct syscore_ops loongson3_ipi_syscore_ops = { - .resume = loongson3_ipi_resume, - .suspend = loongson3_ipi_suspend, +static struct syscore_ops loongson_ipi_syscore_ops = { + .resume = loongson_ipi_resume, + .suspend = loongson_ipi_suspend, }; /* @@ -321,7 +332,7 @@ static struct syscore_ops loongson3_ipi_syscore_ops = { */ static int __init ipi_pm_init(void) { - register_syscore_ops(&loongson3_ipi_syscore_ops); + register_syscore_ops(&loongson_ipi_syscore_ops); return 0; } @@ -425,7 +436,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) { init_new_context(current, &init_mm); current_thread_info()->cpu = 0; - loongson3_prepare_cpus(max_cpus); + loongson_prepare_cpus(max_cpus); set_cpu_sibling_map(0); set_cpu_core_map(0); calculate_cpu_foreign_map(); @@ -436,7 +447,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - loongson3_boot_secondary(cpu, tidle); + loongson_boot_secondary(cpu, tidle); /* Wait for CPU to start and be ready to sync counters */ if (!wait_for_completion_timeout(&cpu_starting, @@ -465,7 +476,7 @@ asmlinkage void start_secondary(void) cpu_probe(); constant_clockevent_init(); - loongson3_init_secondary(); + loongson_init_secondary(); set_cpu_sibling_map(cpu); set_cpu_core_map(cpu); @@ -487,11 +498,11 @@ asmlinkage void start_secondary(void) complete(&cpu_running); /* - * irq will be enabled in loongson3_smp_finish(), enabling it too + * irq will be enabled in loongson_smp_finish(), enabling it too * early is dangerous. */ WARN_ON_ONCE(!irqs_disabled()); - loongson3_smp_finish(); + loongson_smp_finish(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index b206d9159205..4571c3c87cd4 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -43,7 +43,8 @@ static bool unwind_by_prologue(struct unwind_state *state) { struct stack_info *info = &state->stack_info; union loongarch_instruction *ip, *ip_end; - unsigned long frame_size = 0, frame_ra = -1; + long frame_ra = -1; + unsigned long frame_size = 0; unsigned long size, offset, pc = state->pc; if (state->sp >= info->end || state->sp < info->begin) diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index d8ee8fbc8c67..58781c6e4191 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -10,6 +10,8 @@ #include <asm/regdef.h> #include <asm/stackframe.h> +#define INVTLB_ADDR_GFALSE_AND_ASID 5 + #define PTRS_PER_PGD_BITS (PAGE_SHIFT - 3) #define PTRS_PER_PUD_BITS (PAGE_SHIFT - 3) #define PTRS_PER_PMD_BITS (PAGE_SHIFT - 3) @@ -136,13 +138,10 @@ tlb_huge_update_load: ori t0, ra, _PAGE_VALID st.d t0, t1, 0 #endif - tlbsrch - addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16) - addi.d ra, t1, 0 - csrxchg ra, t1, LOONGARCH_CSR_TLBIDX - tlbwr - - csrxchg zero, t1, LOONGARCH_CSR_TLBIDX + csrrd ra, LOONGARCH_CSR_ASID + csrrd t1, LOONGARCH_CSR_BADV + andi ra, ra, CSR_ASID_ASID + invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1 /* * A huge PTE describes an area the size of the @@ -287,13 +286,11 @@ tlb_huge_update_store: ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) st.d t0, t1, 0 #endif - tlbsrch - addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16) - addi.d ra, t1, 0 - csrxchg ra, t1, LOONGARCH_CSR_TLBIDX - tlbwr + csrrd ra, LOONGARCH_CSR_ASID + csrrd t1, LOONGARCH_CSR_BADV + andi ra, ra, CSR_ASID_ASID + invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1 - csrxchg zero, t1, LOONGARCH_CSR_TLBIDX /* * A huge PTE describes an area the size of the * configured huge page size. This is twice the @@ -436,6 +433,11 @@ tlb_huge_update_modify: ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) st.d t0, t1, 0 #endif + csrrd ra, LOONGARCH_CSR_ASID + csrrd t1, LOONGARCH_CSR_BADV + andi ra, ra, CSR_ASID_ASID + invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1 + /* * A huge PTE describes an area the size of the * configured huge page size. This is twice the @@ -466,7 +468,7 @@ tlb_huge_update_modify: addu16i.d t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) csrxchg t1, t0, LOONGARCH_CSR_TLBIDX - tlbwr + tlbfill /* Reset default page size */ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16) diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index 3f8a86c4336a..02e6be9c5b0d 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -67,12 +67,12 @@ linux.bin.ub linux.bin.gz: linux.bin linux.bin: vmlinux linux.bin linux.bin.gz linux.bin.ub: $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ - @echo 'Kernel: $(boot)/$@ is ready' ' (#'`cat .version`')' + @echo 'Kernel: $(boot)/$@ is ready' ' (#'$(or $(KBUILD_BUILD_VERSION),`cat .version`)')' PHONY += simpleImage.$(DTB) simpleImage.$(DTB): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(addprefix $(boot)/$@., ub unstrip strip) - @echo 'Kernel: $(boot)/$@ is ready' ' (#'`cat .version`')' + @echo 'Kernel: $(boot)/$@ is ready' ' (#'$(or $(KBUILD_BUILD_VERSION),`cat .version`)')' define archhelp echo '* linux.bin - Create raw binary' diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 6caec386ad2f..4678627673df 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -622,6 +622,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pmd; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_ACCESSED); diff --git a/arch/nios2/boot/Makefile b/arch/nios2/boot/Makefile index 8c3ad76602f3..29c11a06b750 100644 --- a/arch/nios2/boot/Makefile +++ b/arch/nios2/boot/Makefile @@ -20,7 +20,7 @@ $(obj)/vmlinux.bin: vmlinux FORCE $(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE $(call if_changed,gzip) -$(obj)/vmImage: $(obj)/vmlinux.gz +$(obj)/vmImage: $(obj)/vmlinux.gz FORCE $(call if_changed,uimage) @$(kecho) 'Kernel: $@ is ready' diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 4745bb9998bd..6d8492b6e2b8 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -602,6 +602,7 @@ ____##func(struct pt_regs *regs) /* kernel/traps.c */ DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception); #ifdef CONFIG_PPC_BOOK3S_64 +DECLARE_INTERRUPT_HANDLER_RAW(machine_check_early_boot); DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async); #endif DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception); diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 43f1c76d48ce..a379b0ce19ff 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -113,23 +113,19 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; - /* First arg comes in as a 32 bits pointer. */ - EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3)); - EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0)); + /* Initialize tail_call_cnt, to be skipped if we do tail calls. */ + EMIT(PPC_RAW_LI(_R4, 0)); + +#define BPF_TAILCALL_PROLOGUE_SIZE 4 + EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); - /* - * Initialize tail_call_cnt in stack frame if we do tail calls. - * Otherwise, put in NOPs so that it can be skipped when we are - * invoked through a tail call. - */ if (ctx->seen & SEEN_TAILCALL) - EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_1) - 1, _R1, - bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); - else - EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_STW(_R4, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); -#define BPF_TAILCALL_PROLOGUE_SIZE 16 + /* First arg comes in as a 32 bits pointer. */ + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3)); + EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0)); /* * We need a stack frame, but we don't necessarily need to @@ -170,24 +166,24 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx for (i = BPF_PPC_NVR_MIN; i <= 31; i++) if (bpf_is_seen_register(ctx, i)) EMIT(PPC_RAW_LWZ(i, _R1, bpf_jit_stack_offsetof(ctx, i))); -} - -void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) -{ - EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); - - bpf_jit_emit_common_epilogue(image, ctx); - - /* Tear down our stack frame */ if (ctx->seen & SEEN_FUNC) EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + /* Tear down our stack frame */ EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx))); if (ctx->seen & SEEN_FUNC) EMIT(PPC_RAW_MTLR(_R0)); +} + +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) +{ + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); + + bpf_jit_emit_common_epilogue(image, ctx); + EMIT(PPC_RAW_BLR()); } @@ -244,7 +240,6 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29)); EMIT(PPC_RAW_ADD(_R3, _R3, b2p_bpf_array)); EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_array, ptrs))); - EMIT(PPC_RAW_STW(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); /* * if (prog == NULL) @@ -255,19 +250,14 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o /* goto *(prog->bpf_func + prologue_size); */ EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func))); - - if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); - EMIT(PPC_RAW_ADDIC(_R3, _R3, BPF_TAILCALL_PROLOGUE_SIZE)); - - if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_MTLR(_R0)); - EMIT(PPC_RAW_MTCTR(_R3)); EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_1))); + /* Put tail_call_cnt in r4 */ + EMIT(PPC_RAW_MR(_R4, _R0)); + /* tear restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index fa78595a6089..593cf09264d8 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -317,9 +317,9 @@ config SMP config NR_CPUS int "Maximum number of CPUs (2-512)" depends on SMP - range 2 512 if !SBI_V01 - range 2 32 if SBI_V01 && 32BIT - range 2 64 if SBI_V01 && 64BIT + range 2 512 if !RISCV_SBI_V01 + range 2 32 if RISCV_SBI_V01 && 32BIT + range 2 64 if RISCV_SBI_V01 && 64BIT default "32" if 32BIT default "64" if 64BIT diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 1b471ff73178..816e753de636 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -23,6 +23,7 @@ #define REG_L __REG_SEL(ld, lw) #define REG_S __REG_SEL(sd, sw) #define REG_SC __REG_SEL(sc.d, sc.w) +#define REG_AMOSWAP_AQ __REG_SEL(amoswap.d.aq, amoswap.w.aq) #define REG_ASM __REG_SEL(.dword, .word) #define SZREG __REG_SEL(8, 4) #define LGREG __REG_SEL(3, 2) diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h index f74879a8f1ea..e229d7be4b66 100644 --- a/arch/riscv/include/asm/efi.h +++ b/arch/riscv/include/asm/efi.h @@ -10,6 +10,7 @@ #include <asm/mmu_context.h> #include <asm/ptrace.h> #include <asm/tlbflush.h> +#include <asm/pgalloc.h> #ifdef CONFIG_EFI extern void efi_init(void); @@ -20,7 +21,10 @@ extern void efi_init(void); int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); -#define arch_efi_call_virt_setup() efi_virtmap_load() +#define arch_efi_call_virt_setup() ({ \ + sync_kernel_mappings(efi_mm.pgd); \ + efi_virtmap_load(); \ + }) #define arch_efi_call_virt_teardown() efi_virtmap_unload() #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index 947f23d7b6af..59dc12b5b7e8 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -127,6 +127,13 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) #define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) #endif /* __PAGETABLE_PMD_FOLDED */ +static inline void sync_kernel_mappings(pgd_t *pgd) +{ + memcpy(pgd + USER_PTRS_PER_PGD, + init_mm.pgd + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); +} + static inline pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; @@ -135,9 +142,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) if (likely(pgd != NULL)) { memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); /* Copy kernel mappings */ - memcpy(pgd + USER_PTRS_PER_PGD, - init_mm.pgd + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + sync_kernel_mappings(pgd); } return pgd; } diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7ec936910a96..92ec2d9d7273 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -600,6 +600,7 @@ static inline int pmd_dirty(pmd_t pmd) return pte_dirty(pmd_pte(pmd)); } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return pte_young(pmd_pte(pmd)); diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index d3443be7eedc..3831b638ecab 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -50,6 +50,9 @@ void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops); /* Clear IPI for current CPU */ void riscv_clear_ipi(void); +/* Check other CPUs stop or not */ +bool smp_crash_stop_failed(void); + /* Secondary hart entry */ asmlinkage void smp_callin(void); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index b9eda3fcbd6d..186abd146eaf 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -404,6 +404,19 @@ handle_syscall_trace_exit: #ifdef CONFIG_VMAP_STACK handle_kernel_stack_overflow: + /* + * Takes the psuedo-spinlock for the shadow stack, in case multiple + * harts are concurrently overflowing their kernel stacks. We could + * store any value here, but since we're overflowing the kernel stack + * already we only have SP to use as a scratch register. So we just + * swap in the address of the spinlock, as that's definately non-zero. + * + * Pairs with a store_release in handle_bad_stack(). + */ +1: la sp, spin_shadow_stack + REG_AMOSWAP_AQ sp, sp, (sp) + bnez sp, 1b + la sp, shadow_stack addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index ee79e6839b86..2d139b724bc8 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -15,6 +15,8 @@ #include <linux/compiler.h> /* For unreachable() */ #include <linux/cpu.h> /* For cpu_down() */ #include <linux/reboot.h> +#include <linux/interrupt.h> +#include <linux/irq.h> /* * kexec_image_info - Print received image details @@ -138,20 +140,35 @@ void machine_shutdown(void) #endif } -/* Override the weak function in kernel/panic.c */ -void crash_smp_send_stop(void) +static void machine_kexec_mask_interrupts(void) { - static int cpus_stopped; + unsigned int i; + struct irq_desc *desc; - /* - * This function can be called twice in panic path, but obviously - * we execute this only once. - */ - if (cpus_stopped) - return; + for_each_irq_desc(i, desc) { + struct irq_chip *chip; + int ret; + + chip = irq_desc_get_chip(desc); + if (!chip) + continue; + + /* + * First try to remove the active state. If this + * fails, try to EOI the interrupt. + */ + ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); + + if (ret && irqd_irq_inprogress(&desc->irq_data) && + chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); - smp_send_stop(); - cpus_stopped = 1; + if (chip->irq_mask) + chip->irq_mask(&desc->irq_data); + + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) + chip->irq_disable(&desc->irq_data); + } } /* @@ -169,6 +186,8 @@ machine_crash_shutdown(struct pt_regs *regs) crash_smp_send_stop(); crash_save_cpu(regs, smp_processor_id()); + machine_kexec_mask_interrupts(); + pr_info("Starting crashdump kernel...\n"); } @@ -195,6 +214,11 @@ machine_kexec(struct kimage *image) void *control_code_buffer = page_address(image->control_code_page); riscv_kexec_method kexec_method = NULL; +#ifdef CONFIG_SMP + WARN(smp_crash_stop_failed(), + "Some CPUs may be stale, kdump will be unreliable.\n"); +#endif + if (image->type != KEXEC_TYPE_CRASH) kexec_method = control_code_buffer; else diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 67ec1fadcfe2..86acd690d529 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -322,10 +322,11 @@ subsys_initcall(topology_init); void free_initmem(void) { - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) - set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), - IS_ENABLED(CONFIG_64BIT) ? - set_memory_rw : set_memory_rw_nx); + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { + set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), set_memory_rw_nx); + if (IS_ENABLED(CONFIG_64BIT)) + set_kernel_memory(__init_begin, __init_end, set_memory_nx); + } free_initmem_default(POISON_FREE_INITMEM); } diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 760a64518c58..8c3b59f1f9b8 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -12,6 +12,7 @@ #include <linux/clockchips.h> #include <linux/interrupt.h> #include <linux/module.h> +#include <linux/kexec.h> #include <linux/profile.h> #include <linux/smp.h> #include <linux/sched.h> @@ -22,11 +23,13 @@ #include <asm/sbi.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> +#include <asm/cpu_ops.h> enum ipi_message_type { IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_CPU_STOP, + IPI_CPU_CRASH_STOP, IPI_IRQ_WORK, IPI_TIMER, IPI_MAX @@ -71,6 +74,32 @@ static void ipi_stop(void) wait_for_interrupt(); } +#ifdef CONFIG_KEXEC_CORE +static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0); + +static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +{ + crash_save_cpu(regs, cpu); + + atomic_dec(&waiting_for_crash_ipi); + + local_irq_disable(); + +#ifdef CONFIG_HOTPLUG_CPU + if (cpu_has_hotplug(cpu)) + cpu_ops[cpu]->cpu_stop(); +#endif + + for(;;) + wait_for_interrupt(); +} +#else +static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +{ + unreachable(); +} +#endif + static const struct riscv_ipi_ops *ipi_ops __ro_after_init; void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops) @@ -124,8 +153,9 @@ void arch_irq_work_raise(void) void handle_IPI(struct pt_regs *regs) { - unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits; - unsigned long *stats = ipi_data[smp_processor_id()].stats; + unsigned int cpu = smp_processor_id(); + unsigned long *pending_ipis = &ipi_data[cpu].bits; + unsigned long *stats = ipi_data[cpu].stats; riscv_clear_ipi(); @@ -154,6 +184,10 @@ void handle_IPI(struct pt_regs *regs) ipi_stop(); } + if (ops & (1 << IPI_CPU_CRASH_STOP)) { + ipi_cpu_crash_stop(cpu, get_irq_regs()); + } + if (ops & (1 << IPI_IRQ_WORK)) { stats[IPI_IRQ_WORK]++; irq_work_run(); @@ -176,6 +210,7 @@ static const char * const ipi_names[] = { [IPI_RESCHEDULE] = "Rescheduling interrupts", [IPI_CALL_FUNC] = "Function call interrupts", [IPI_CPU_STOP] = "CPU stop interrupts", + [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", [IPI_IRQ_WORK] = "IRQ work interrupts", [IPI_TIMER] = "Timer broadcast interrupts", }; @@ -235,6 +270,64 @@ void smp_send_stop(void) cpumask_pr_args(cpu_online_mask)); } +#ifdef CONFIG_KEXEC_CORE +/* + * The number of CPUs online, not counting this CPU (which may not be + * fully online and so not counted in num_online_cpus()). + */ +static inline unsigned int num_other_online_cpus(void) +{ + unsigned int this_cpu_online = cpu_online(smp_processor_id()); + + return num_online_cpus() - this_cpu_online; +} + +void crash_smp_send_stop(void) +{ + static int cpus_stopped; + cpumask_t mask; + unsigned long timeout; + + /* + * This function can be called twice in panic path, but obviously + * we execute this only once. + */ + if (cpus_stopped) + return; + + cpus_stopped = 1; + + /* + * If this cpu is the only one alive at this point in time, online or + * not, there are no stop messages to be sent around, so just back out. + */ + if (num_other_online_cpus() == 0) + return; + + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + + atomic_set(&waiting_for_crash_ipi, num_other_online_cpus()); + + pr_crit("SMP: stopping secondary CPUs\n"); + send_ipi_mask(&mask, IPI_CPU_CRASH_STOP); + + /* Wait up to one second for other CPUs to stop */ + timeout = USEC_PER_SEC; + while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--) + udelay(1); + + if (atomic_read(&waiting_for_crash_ipi) > 0) + pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", + cpumask_pr_args(&mask)); +} + +bool smp_crash_stop_failed(void) +{ + return (atomic_read(&waiting_for_crash_ipi) > 0); +} +#endif + void smp_send_reschedule(int cpu) { send_ipi_single(cpu, IPI_RESCHEDULE); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index f3e96d60a2ff..7abd8e4c4df6 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -221,11 +221,29 @@ asmlinkage unsigned long get_overflow_stack(void) OVERFLOW_STACK_SIZE; } +/* + * A pseudo spinlock to protect the shadow stack from being used by multiple + * harts concurrently. This isn't a real spinlock because the lock side must + * be taken without a valid stack and only a single register, it's only taken + * while in the process of panicing anyway so the performance and error + * checking a proper spinlock gives us doesn't matter. + */ +unsigned long spin_shadow_stack; + asmlinkage void handle_bad_stack(struct pt_regs *regs) { unsigned long tsk_stk = (unsigned long)current->stack; unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack); + /* + * We're done with the shadow stack by this point, as we're on the + * overflow stack. Tell any other concurrent overflowing harts that + * they can proceed with panicing by releasing the pseudo-spinlock. + * + * This pairs with an amoswap.aq in handle_kernel_stack_overflow. + */ + smp_store_release(&spin_shadow_stack, 0); + console_verbose(); pr_emerg("Insufficient stack space to handle exception!\n"); diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index db6548509bb3..06e6b27f3bcc 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -17,6 +17,7 @@ vdso-syms += flush_icache obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o ccflags-y := -fno-stack-protector +ccflags-y += -DDISABLE_BRANCH_PROFILING ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 108e732d7b14..b248694e0024 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -117,7 +117,9 @@ struct zpci_bus { struct zpci_dev { struct zpci_bus *zbus; struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */ + struct list_head iommu_list; struct kref kref; + struct rcu_head rcu; struct hotplug_slot hotplug_slot; enum zpci_state state; @@ -155,7 +157,6 @@ struct zpci_dev { /* DMA stuff */ unsigned long *dma_table; - spinlock_t dma_table_lock; int tlb_refresh; spinlock_t iommu_bitmap_lock; @@ -220,7 +221,7 @@ void zpci_device_reserved(struct zpci_dev *zdev); bool zpci_is_device_configured(struct zpci_dev *zdev); int zpci_hot_reset_device(struct zpci_dev *zdev); -int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); +int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64, u8 *); int zpci_unregister_ioat(struct zpci_dev *, u8); void zpci_remove_reserved_devices(void); void zpci_update_fh(struct zpci_dev *zdev, u32 fh); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index f1cb9391190d..11e901286414 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -763,6 +763,7 @@ static inline int pmd_dirty(pmd_t pmd) return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index dd74fe664ed1..e4ef67e4da0a 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -46,7 +46,7 @@ struct save_area { u64 fprs[16]; u32 fpc; u32 prefix; - u64 todpreg; + u32 todpreg; u64 timer; u64 todcmp; u64 vxrs_low[16]; diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index ded1af2ddae9..ec51e810e381 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -434,6 +434,7 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev) static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) { struct zpci_dev *zdev = opaque; + u8 status; int rc; if (!zdev) @@ -486,7 +487,7 @@ static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) /* Re-register the IOMMU that was already created */ rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table)); + virt_to_phys(zdev->dma_table), &status); if (rc) goto clear_gisa; @@ -516,6 +517,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque) { struct zpci_dev *zdev = opaque; struct kvm *kvm; + u8 status; if (!zdev) return; @@ -554,7 +556,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque) /* Re-register the IOMMU that was already created */ zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table)); + virt_to_phys(zdev->dma_table), &status); out: spin_lock(&kvm->arch.kzdev_list_lock); diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 94138f8f0c1c..ace2541ababd 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -546,8 +546,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) scb_s->eca |= scb_o->eca & ECA_CEI; /* Epoch Extension */ - if (test_kvm_facility(vcpu->kvm, 139)) + if (test_kvm_facility(vcpu->kvm, 139)) { scb_s->ecd |= scb_o->ecd & ECD_MEF; + scb_s->epdx = scb_o->epdx; + } /* etoken */ if (test_kvm_facility(vcpu->kvm, 156)) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 73cdc5539384..ef38b1514c77 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -116,20 +116,20 @@ EXPORT_SYMBOL_GPL(pci_proc_domain); /* Modify PCI: Register I/O address translation parameters */ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas, - u64 base, u64 limit, u64 iota) + u64 base, u64 limit, u64 iota, u8 *status) { u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT); struct zpci_fib fib = {0}; - u8 cc, status; + u8 cc; WARN_ON_ONCE(iota & 0x3fff); fib.pba = base; fib.pal = limit; fib.iota = iota | ZPCI_IOTA_RTTO_FLAG; fib.gd = zdev->gisa; - cc = zpci_mod_fc(req, &fib, &status); + cc = zpci_mod_fc(req, &fib, status); if (cc) - zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status); + zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, *status); return cc; } EXPORT_SYMBOL_GPL(zpci_register_ioat); @@ -764,6 +764,7 @@ EXPORT_SYMBOL_GPL(zpci_disable_device); */ int zpci_hot_reset_device(struct zpci_dev *zdev) { + u8 status; int rc; zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh); @@ -787,7 +788,7 @@ int zpci_hot_reset_device(struct zpci_dev *zdev) if (zdev->dma_table) rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table)); + virt_to_phys(zdev->dma_table), &status); else rc = zpci_dma_init_device(zdev); if (rc) { @@ -995,7 +996,7 @@ void zpci_release_device(struct kref *kref) break; } zpci_dbg(3, "rem fid:%x\n", zdev->fid); - kfree(zdev); + kfree_rcu(zdev, rcu); } int zpci_report_error(struct pci_dev *pdev, diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 227cf0a62800..ea478d11fbd1 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -63,37 +63,55 @@ static void dma_free_page_table(void *table) kmem_cache_free(dma_page_table_cache, table); } -static unsigned long *dma_get_seg_table_origin(unsigned long *entry) +static unsigned long *dma_get_seg_table_origin(unsigned long *rtep) { + unsigned long old_rte, rte; unsigned long *sto; - if (reg_entry_isvalid(*entry)) - sto = get_rt_sto(*entry); - else { + rte = READ_ONCE(*rtep); + if (reg_entry_isvalid(rte)) { + sto = get_rt_sto(rte); + } else { sto = dma_alloc_cpu_table(); if (!sto) return NULL; - set_rt_sto(entry, virt_to_phys(sto)); - validate_rt_entry(entry); - entry_clr_protected(entry); + set_rt_sto(&rte, virt_to_phys(sto)); + validate_rt_entry(&rte); + entry_clr_protected(&rte); + + old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte); + if (old_rte != ZPCI_TABLE_INVALID) { + /* Somone else was faster, use theirs */ + dma_free_cpu_table(sto); + sto = get_rt_sto(old_rte); + } } return sto; } -static unsigned long *dma_get_page_table_origin(unsigned long *entry) +static unsigned long *dma_get_page_table_origin(unsigned long *step) { + unsigned long old_ste, ste; unsigned long *pto; - if (reg_entry_isvalid(*entry)) - pto = get_st_pto(*entry); - else { + ste = READ_ONCE(*step); + if (reg_entry_isvalid(ste)) { + pto = get_st_pto(ste); + } else { pto = dma_alloc_page_table(); if (!pto) return NULL; - set_st_pto(entry, virt_to_phys(pto)); - validate_st_entry(entry); - entry_clr_protected(entry); + set_st_pto(&ste, virt_to_phys(pto)); + validate_st_entry(&ste); + entry_clr_protected(&ste); + + old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste); + if (old_ste != ZPCI_TABLE_INVALID) { + /* Somone else was faster, use theirs */ + dma_free_page_table(pto); + pto = get_st_pto(old_ste); + } } return pto; } @@ -117,19 +135,24 @@ unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) return &pto[px]; } -void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags) +void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags) { + unsigned long pte; + + pte = READ_ONCE(*ptep); if (flags & ZPCI_PTE_INVALID) { - invalidate_pt_entry(entry); + invalidate_pt_entry(&pte); } else { - set_pt_pfaa(entry, page_addr); - validate_pt_entry(entry); + set_pt_pfaa(&pte, page_addr); + validate_pt_entry(&pte); } if (flags & ZPCI_TABLE_PROTECTED) - entry_set_protected(entry); + entry_set_protected(&pte); else - entry_clr_protected(entry); + entry_clr_protected(&pte); + + xchg(ptep, pte); } static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, @@ -137,18 +160,14 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, { unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; phys_addr_t page_addr = (pa & PAGE_MASK); - unsigned long irq_flags; unsigned long *entry; int i, rc = 0; if (!nr_pages) return -EINVAL; - spin_lock_irqsave(&zdev->dma_table_lock, irq_flags); - if (!zdev->dma_table) { - rc = -EINVAL; - goto out_unlock; - } + if (!zdev->dma_table) + return -EINVAL; for (i = 0; i < nr_pages; i++) { entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); @@ -173,8 +192,6 @@ undo_cpu_trans: dma_update_cpu_trans(entry, page_addr, flags); } } -out_unlock: - spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); return rc; } @@ -547,6 +564,7 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int zpci_dma_init_device(struct zpci_dev *zdev) { + u8 status; int rc; /* @@ -557,7 +575,6 @@ int zpci_dma_init_device(struct zpci_dev *zdev) WARN_ON(zdev->s390_domain); spin_lock_init(&zdev->iommu_bitmap_lock); - spin_lock_init(&zdev->dma_table_lock); zdev->dma_table = dma_alloc_cpu_table(); if (!zdev->dma_table) { @@ -598,7 +615,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev) } if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table))) { + virt_to_phys(zdev->dma_table), &status)) { rc = -EIO; goto free_bitmap; } diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index a779418ceba9..3bc9736bddb1 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -693,6 +693,7 @@ static inline unsigned long pmd_dirty(pmd_t pmd) return pte_dirty(pte); } +#define pmd_young pmd_young static inline unsigned long pmd_young(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 9860ca5979f8..9e38ffaadb5d 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -83,7 +83,7 @@ cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \ $(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE $(call if_changed,image) - @$(kecho) 'Kernel: $@ is ready' ' (#'`cat .version`')' + @$(kecho) 'Kernel: $@ is ready' ' (#'$(or $(KBUILD_BUILD_VERSION),`cat .version`)')' OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index f49bc3ec76e6..a269049a43ce 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -77,7 +77,7 @@ static int hyperv_init_ghcb(void) static int hv_cpu_init(unsigned int cpu) { union hv_vp_assist_msr_contents msr = { 0 }; - struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; + struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu]; int ret; ret = hv_common_cpu_init(cpu); @@ -87,34 +87,32 @@ static int hv_cpu_init(unsigned int cpu) if (!hv_vp_assist_page) return 0; - if (!*hvp) { - if (hv_root_partition) { - /* - * For root partition we get the hypervisor provided VP assist - * page, instead of allocating a new page. - */ - rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); - *hvp = memremap(msr.pfn << - HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT, - PAGE_SIZE, MEMREMAP_WB); - } else { - /* - * The VP assist page is an "overlay" page (see Hyper-V TLFS's - * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed - * out to make sure we always write the EOI MSR in - * hv_apic_eoi_write() *after* the EOI optimization is disabled - * in hv_cpu_die(), otherwise a CPU may not be stopped in the - * case of CPU offlining and the VM will hang. - */ + if (hv_root_partition) { + /* + * For root partition we get the hypervisor provided VP assist + * page, instead of allocating a new page. + */ + rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); + *hvp = memremap(msr.pfn << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT, + PAGE_SIZE, MEMREMAP_WB); + } else { + /* + * The VP assist page is an "overlay" page (see Hyper-V TLFS's + * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed + * out to make sure we always write the EOI MSR in + * hv_apic_eoi_write() *after* the EOI optimization is disabled + * in hv_cpu_die(), otherwise a CPU may not be stopped in the + * case of CPU offlining and the VM will hang. + */ + if (!*hvp) *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO); - if (*hvp) - msr.pfn = vmalloc_to_pfn(*hvp); - } - WARN_ON(!(*hvp)); - if (*hvp) { - msr.enable = 1; - wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); - } + if (*hvp) + msr.pfn = vmalloc_to_pfn(*hvp); + + } + if (!WARN_ON(!(*hvp))) { + msr.enable = 1; + wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); } return hyperv_init_ghcb(); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b71f4f2ecdd5..b2da7cb64b31 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -305,6 +305,9 @@ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ + +#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index c936ce9f0c47..dfdb103ae4f6 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -321,7 +321,7 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; DECLARE_PER_CPU(u64, x86_spec_ctrl_current); -extern void write_spec_ctrl_current(u64 val, bool force); +extern void update_spec_ctrl_cond(u64 val); extern u64 spec_ctrl_current(void); /* diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5059799bebe3..286a71810f9e 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -139,6 +139,7 @@ static inline int pmd_dirty(pmd_t pmd) return pmd_flags(pmd) & _PAGE_DIRTY; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_ACCESSED; @@ -1438,6 +1439,14 @@ static inline bool arch_has_hw_pte_young(void) return true; } +#ifdef CONFIG_XEN_PV +#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return !cpu_feature_enabled(X86_FEATURE_XENPV); +} +#endif + #ifdef CONFIG_PAGE_TABLE_CHECK static inline bool pte_user_accessible_page(pte_t pte) { diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3e3230cccaa7..6daf84229548 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -60,11 +60,18 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); static DEFINE_MUTEX(spec_ctrl_mutex); +/* Update SPEC_CTRL MSR and its cached copy unconditionally */ +static void update_spec_ctrl(u64 val) +{ + this_cpu_write(x86_spec_ctrl_current, val); + wrmsrl(MSR_IA32_SPEC_CTRL, val); +} + /* * Keep track of the SPEC_CTRL MSR value for the current task, which may differ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). */ -void write_spec_ctrl_current(u64 val, bool force) +void update_spec_ctrl_cond(u64 val) { if (this_cpu_read(x86_spec_ctrl_current) == val) return; @@ -75,7 +82,7 @@ void write_spec_ctrl_current(u64 val, bool force) * When KERNEL_IBRS this MSR is written on return-to-user, unless * forced the update can be delayed until that time. */ - if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) + if (!cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) wrmsrl(MSR_IA32_SPEC_CTRL, val); } @@ -1328,7 +1335,7 @@ static void __init spec_ctrl_disable_kernel_rrsba(void) if (ia32_cap & ARCH_CAP_RRSBA) { x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } } @@ -1450,7 +1457,7 @@ static void __init spectre_v2_select_mitigation(void) if (spectre_v2_in_ibrs_mode(mode)) { x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } switch (mode) { @@ -1564,7 +1571,7 @@ static void __init spectre_v2_select_mitigation(void) static void update_stibp_msr(void * __unused) { u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); - write_spec_ctrl_current(val, true); + update_spec_ctrl(val); } /* Update x86_spec_ctrl_base in case SMT state changed. */ @@ -1797,7 +1804,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } } @@ -2048,7 +2055,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index ec7bbac3a9f2..8009c8346d8f 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -58,24 +58,6 @@ static void tsx_enable(void) wrmsrl(MSR_IA32_TSX_CTRL, tsx); } -static bool tsx_ctrl_is_supported(void) -{ - u64 ia32_cap = x86_read_arch_cap_msr(); - - /* - * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this - * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. - * - * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a - * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES - * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get - * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, - * tsx= cmdline requests will do nothing on CPUs without - * MSR_IA32_TSX_CTRL support. - */ - return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); -} - static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) { if (boot_cpu_has_bug(X86_BUG_TAA)) @@ -135,7 +117,7 @@ static void tsx_clear_cpuid(void) rdmsrl(MSR_TSX_FORCE_ABORT, msr); msr |= MSR_TFA_TSX_CPUID_CLEAR; wrmsrl(MSR_TSX_FORCE_ABORT, msr); - } else if (tsx_ctrl_is_supported()) { + } else if (cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL)) { rdmsrl(MSR_IA32_TSX_CTRL, msr); msr |= TSX_CTRL_CPUID_CLEAR; wrmsrl(MSR_IA32_TSX_CTRL, msr); @@ -158,7 +140,8 @@ static void tsx_dev_mode_disable(void) u64 mcu_opt_ctrl; /* Check if RTM_ALLOW exists */ - if (!boot_cpu_has_bug(X86_BUG_TAA) || !tsx_ctrl_is_supported() || + if (!boot_cpu_has_bug(X86_BUG_TAA) || + !cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL) || !cpu_feature_enabled(X86_FEATURE_SRBDS_CTRL)) return; @@ -191,7 +174,20 @@ void __init tsx_init(void) return; } - if (!tsx_ctrl_is_supported()) { + /* + * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this + * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. + * + * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a + * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES + * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get + * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, + * tsx= cmdline requests will do nothing on CPUs without + * MSR_IA32_TSX_CTRL support. + */ + if (x86_read_arch_cap_msr() & ARCH_CAP_TSX_CTRL_MSR) { + setup_force_cpu_cap(X86_FEATURE_MSR_TSX_CTRL); + } else { tsx_ctrl_state = TSX_CTRL_NOT_SUPPORTED; return; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c21b7347a26d..e436c9c1ef3b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } if (updmsr) - write_spec_ctrl_current(msr, false); + update_spec_ctrl_cond(msr); } static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 1ccb769f62af..b6f96d47e596 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2443,6 +2443,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, { bool list_unstable, zapped_root = false; + lockdep_assert_held_write(&kvm->mmu_lock); trace_kvm_mmu_prepare_zap_page(sp); ++kvm->stat.mmu_shadow_zapped; *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list); @@ -4262,14 +4263,14 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault if (is_page_fault_stale(vcpu, fault, mmu_seq)) goto out_unlock; - r = make_mmu_pages_available(vcpu); - if (r) - goto out_unlock; - - if (is_tdp_mmu_fault) + if (is_tdp_mmu_fault) { r = kvm_tdp_mmu_map(vcpu, fault); - else + } else { + r = make_mmu_pages_available(vcpu); + if (r) + goto out_unlock; r = __direct_map(vcpu, fault); + } out_unlock: if (is_tdp_mmu_fault) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 4c620999d230..995bc0f90759 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1091,6 +1091,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm) static void nested_svm_triple_fault(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); + + if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SHUTDOWN)) + return; + + kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu); nested_svm_simple_vmexit(to_svm(vcpu), SVM_EXIT_SHUTDOWN); } @@ -1125,6 +1131,9 @@ void svm_free_nested(struct vcpu_svm *svm) if (!svm->nested.initialized) return; + if (WARN_ON_ONCE(svm->vmcb != svm->vmcb01.ptr)) + svm_switch_vmcb(svm, &svm->vmcb01); + svm_vcpu_free_msrpm(svm->nested.msrpm); svm->nested.msrpm = NULL; @@ -1143,9 +1152,6 @@ void svm_free_nested(struct vcpu_svm *svm) svm->nested.initialized = false; } -/* - * Forcibly leave nested mode in order to be able to reset the VCPU later on. - */ void svm_leave_nested(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4b6d2b050e57..ce362e88a567 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -346,12 +346,6 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) return 0; } -static int is_external_interrupt(u32 info) -{ - info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; - return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); -} - static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1438,6 +1432,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu) */ svm_clear_current_vmcb(svm->vmcb); + svm_leave_nested(vcpu); svm_free_nested(svm); sev_free_vcpu(vcpu); @@ -3425,15 +3420,6 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) return 0; } - if (is_external_interrupt(svm->vmcb->control.exit_int_info) && - exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && - exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && - exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) - printk(KERN_ERR "%s: unexpected exit_int_info 0x%x " - "exit_code 0x%x\n", - __func__, svm->vmcb->control.exit_int_info, - exit_code); - if (exit_fastpath != EXIT_FASTPATH_NONE) return 1; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 0c62352dda6a..5b0d4859e4b7 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4854,6 +4854,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, static void nested_vmx_triple_fault(struct kvm_vcpu *vcpu) { + kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu); nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0); } @@ -6440,9 +6441,6 @@ out: return kvm_state.size; } -/* - * Forcibly leave nested mode in order to be able to reset the VCPU later on. - */ void vmx_leave_nested(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu)) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 490ec23c8450..69227f77b201 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -628,6 +628,12 @@ static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vecto ex->payload = payload; } +/* Forcibly leave the nested mode in cases like a vCPU reset */ +static void kvm_leave_nested(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops.nested_ops->leave_nested(vcpu); +} + static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned nr, bool has_error, u32 error_code, bool has_payload, unsigned long payload, bool reinject) @@ -5195,7 +5201,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, if (events->flags & KVM_VCPUEVENT_VALID_SMM) { if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) { - kvm_x86_ops.nested_ops->leave_nested(vcpu); + kvm_leave_nested(vcpu); kvm_smm_changed(vcpu, events->smi.smm); } @@ -9805,7 +9811,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) int kvm_check_nested_events(struct kvm_vcpu *vcpu) { - if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { + if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { kvm_x86_ops.nested_ops->triple_fault(vcpu); return 1; } @@ -10560,10 +10566,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 0; goto out; } - if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { - if (is_guest_mode(vcpu)) { + if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { + if (is_guest_mode(vcpu)) kvm_x86_ops.nested_ops->triple_fault(vcpu); - } else { + + if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; vcpu->mmio_needed = 0; r = 0; @@ -11997,8 +12004,18 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) WARN_ON_ONCE(!init_event && (old_cr0 || kvm_read_cr3(vcpu) || kvm_read_cr4(vcpu))); + /* + * SVM doesn't unconditionally VM-Exit on INIT and SHUTDOWN, thus it's + * possible to INIT the vCPU while L2 is active. Force the vCPU back + * into L1 as EFER.SVME is cleared on INIT (along with all other EFER + * bits), i.e. virtualization is disabled. + */ + if (is_guest_mode(vcpu)) + kvm_leave_nested(vcpu); + kvm_lapic_reset(vcpu, init_event); + WARN_ON_ONCE(is_guest_mode(vcpu) || is_smm(vcpu)); vcpu->arch.hflags = 0; vcpu->arch.smi_pending = 0; diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 2dae413bd62a..f3098c0e386a 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -954,6 +954,14 @@ static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu) return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result); } +static inline int max_evtchn_port(struct kvm *kvm) +{ + if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) + return EVTCHN_2L_NR_CHANNELS; + else + return COMPAT_EVTCHN_2L_NR_CHANNELS; +} + static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, evtchn_port_t *ports) { @@ -1042,6 +1050,10 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, *r = -EFAULT; goto out; } + if (ports[i] >= max_evtchn_port(vcpu->kvm)) { + *r = -EINVAL; + goto out; + } } if (sched_poll.nr_ports == 1) @@ -1215,6 +1227,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) bool longmode; u64 input, params[6], r = -ENOSYS; bool handled = false; + u8 cpl; input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX); @@ -1242,9 +1255,17 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) params[5] = (u64)kvm_r9_read(vcpu); } #endif + cpl = static_call(kvm_x86_get_cpl)(vcpu); trace_kvm_xen_hypercall(input, params[0], params[1], params[2], params[3], params[4], params[5]); + /* + * Only allow hypercall acceleration for CPL0. The rare hypercalls that + * are permitted in guest userspace can be handled by the VMM. + */ + if (unlikely(cpl > 0)) + goto handle_in_userspace; + switch (input) { case __HYPERVISOR_xen_version: if (params[0] == XENVER_version && vcpu->kvm->arch.xen.xen_version) { @@ -1279,10 +1300,11 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) if (handled) return kvm_xen_hypercall_set_result(vcpu, r); +handle_in_userspace: vcpu->run->exit_reason = KVM_EXIT_XEN; vcpu->run->xen.type = KVM_EXIT_XEN_HCALL; vcpu->run->xen.u.hcall.longmode = longmode; - vcpu->run->xen.u.hcall.cpl = static_call(kvm_x86_get_cpl)(vcpu); + vcpu->run->xen.u.hcall.cpl = cpl; vcpu->run->xen.u.hcall.input = input; vcpu->run->xen.u.hcall.params[0] = params[0]; vcpu->run->xen.u.hcall.params[1] = params[1]; @@ -1297,14 +1319,6 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) return 0; } -static inline int max_evtchn_port(struct kvm *kvm) -{ - if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) - return EVTCHN_2L_NR_CHANNELS; - else - return COMPAT_EVTCHN_2L_NR_CHANNELS; -} - static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port) { int poll_evtchn = vcpu->arch.xen.poll_evtchn; diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 78c5bc654cff..6453fbaedb08 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -217,9 +217,15 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size, * Mappings have to be page-aligned */ offset = phys_addr & ~PAGE_MASK; - phys_addr &= PHYSICAL_PAGE_MASK; + phys_addr &= PAGE_MASK; size = PAGE_ALIGN(last_addr+1) - phys_addr; + /* + * Mask out any bits not part of the actual physical + * address, like memory encryption bits. + */ + phys_addr &= PHYSICAL_PAGE_MASK; + retval = memtype_reserve(phys_addr, (u64)phys_addr + size, pcm, &new_pcm); if (retval) { diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 4cd39f304e20..93ae33248f42 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -513,16 +513,23 @@ static int pm_cpu_check(const struct x86_cpu_id *c) static void pm_save_spec_msr(void) { - u32 spec_msr_id[] = { - MSR_IA32_SPEC_CTRL, - MSR_IA32_TSX_CTRL, - MSR_TSX_FORCE_ABORT, - MSR_IA32_MCU_OPT_CTRL, - MSR_AMD64_LS_CFG, - MSR_AMD64_DE_CFG, + struct msr_enumeration { + u32 msr_no; + u32 feature; + } msr_enum[] = { + { MSR_IA32_SPEC_CTRL, X86_FEATURE_MSR_SPEC_CTRL }, + { MSR_IA32_TSX_CTRL, X86_FEATURE_MSR_TSX_CTRL }, + { MSR_TSX_FORCE_ABORT, X86_FEATURE_TSX_FORCE_ABORT }, + { MSR_IA32_MCU_OPT_CTRL, X86_FEATURE_SRBDS_CTRL }, + { MSR_AMD64_LS_CFG, X86_FEATURE_LS_CFG_SSBD }, + { MSR_AMD64_DE_CFG, X86_FEATURE_LFENCE_RDTSC }, }; + int i; - msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); + for (i = 0; i < ARRAY_SIZE(msr_enum); i++) { + if (boot_cpu_has(msr_enum[i].feature)) + msr_build_context(&msr_enum[i].msr_no, 1); + } } static int pm_check_save_msr(void) diff --git a/block/blk-mq.c b/block/blk-mq.c index 6a789cda68a5..228a6696d835 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4045,9 +4045,14 @@ EXPORT_SYMBOL(__blk_mq_alloc_disk); struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, struct lock_class_key *lkclass) { + struct gendisk *disk; + if (!blk_get_queue(q)) return NULL; - return __alloc_disk_node(q, NUMA_NO_NODE, lkclass); + disk = __alloc_disk_node(q, NUMA_NO_NODE, lkclass); + if (!disk) + blk_put_queue(q); + return disk; } EXPORT_SYMBOL(blk_mq_alloc_disk_for_queue); diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 23f49a2f4d14..6cceca64a6bc 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -562,17 +562,26 @@ static int initiator_cmp(void *priv, const struct list_head *a, { struct memory_initiator *ia; struct memory_initiator *ib; - unsigned long *p_nodes = priv; ia = list_entry(a, struct memory_initiator, node); ib = list_entry(b, struct memory_initiator, node); - set_bit(ia->processor_pxm, p_nodes); - set_bit(ib->processor_pxm, p_nodes); - return ia->processor_pxm - ib->processor_pxm; } +static int initiators_to_nodemask(unsigned long *p_nodes) +{ + struct memory_initiator *initiator; + + if (list_empty(&initiators)) + return -ENXIO; + + list_for_each_entry(initiator, &initiators, node) + set_bit(initiator->processor_pxm, p_nodes); + + return 0; +} + static void hmat_register_target_initiators(struct memory_target *target) { static DECLARE_BITMAP(p_nodes, MAX_NUMNODES); @@ -609,7 +618,10 @@ static void hmat_register_target_initiators(struct memory_target *target) * initiators. */ bitmap_zero(p_nodes, MAX_NUMNODES); - list_sort(p_nodes, &initiators, initiator_cmp); + list_sort(NULL, &initiators, initiator_cmp); + if (initiators_to_nodemask(p_nodes) < 0) + return; + if (!access0done) { for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { loc = localities_types[i]; @@ -643,8 +655,9 @@ static void hmat_register_target_initiators(struct memory_target *target) /* Access 1 ignores Generic Initiators */ bitmap_zero(p_nodes, MAX_NUMNODES); - list_sort(p_nodes, &initiators, initiator_cmp); - best = 0; + if (initiators_to_nodemask(p_nodes) < 0) + return; + for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { loc = localities_types[i]; if (!loc) diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index ddf17e2d266c..b9e336bacf17 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -109,7 +109,7 @@ struct clk *ahci_platform_find_clk(struct ahci_host_priv *hpriv, const char *con int i; for (i = 0; i < hpriv->n_clks; i++) { - if (!strcmp(hpriv->clks[i].id, con_id)) + if (hpriv->clks[i].id && !strcmp(hpriv->clks[i].id, con_id)) return hpriv->clks[i].clk; } diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index f96cb01e9604..e9de9d846b73 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -57,10 +57,8 @@ #define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD) struct ublk_rq_data { - union { - struct callback_head work; - struct llist_node node; - }; + struct llist_node node; + struct callback_head work; }; struct ublk_uring_cmd_pdu { @@ -766,15 +764,31 @@ static inline void __ublk_rq_task_work(struct request *req) ubq_complete_io_cmd(io, UBLK_IO_RES_OK); } +static inline void ublk_forward_io_cmds(struct ublk_queue *ubq) +{ + struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); + struct ublk_rq_data *data, *tmp; + + io_cmds = llist_reverse_order(io_cmds); + llist_for_each_entry_safe(data, tmp, io_cmds, node) + __ublk_rq_task_work(blk_mq_rq_from_pdu(data)); +} + +static inline void ublk_abort_io_cmds(struct ublk_queue *ubq) +{ + struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); + struct ublk_rq_data *data, *tmp; + + llist_for_each_entry_safe(data, tmp, io_cmds, node) + __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data)); +} + static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd) { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; - struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); - struct ublk_rq_data *data; - llist_for_each_entry(data, io_cmds, node) - __ublk_rq_task_work(blk_mq_rq_from_pdu(data)); + ublk_forward_io_cmds(ubq); } static void ublk_rq_task_work_fn(struct callback_head *work) @@ -782,14 +796,20 @@ static void ublk_rq_task_work_fn(struct callback_head *work) struct ublk_rq_data *data = container_of(work, struct ublk_rq_data, work); struct request *req = blk_mq_rq_from_pdu(data); + struct ublk_queue *ubq = req->mq_hctx->driver_data; - __ublk_rq_task_work(req); + ublk_forward_io_cmds(ubq); } -static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq) +static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) { - struct ublk_io *io = &ubq->ios[rq->tag]; + struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); + struct ublk_io *io; + if (!llist_add(&data->node, &ubq->io_cmds)) + return; + + io = &ubq->ios[rq->tag]; /* * If the check pass, we know that this is a re-issued request aborted * previously in monitor_work because the ubq_daemon(cmd's task) is @@ -803,11 +823,11 @@ static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq) * guarantees that here is a re-issued request aborted previously. */ if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED)) { - struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); - struct ublk_rq_data *data; - - llist_for_each_entry(data, io_cmds, node) - __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data)); + ublk_abort_io_cmds(ubq); + } else if (ublk_can_use_task_work(ubq)) { + if (task_work_add(ubq->ubq_daemon, &data->work, + TWA_SIGNAL_NO_IPI)) + ublk_abort_io_cmds(ubq); } else { struct io_uring_cmd *cmd = io->cmd; struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); @@ -817,23 +837,6 @@ static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq) } } -static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq, - bool last) -{ - struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); - - if (ublk_can_use_task_work(ubq)) { - enum task_work_notify_mode notify_mode = last ? - TWA_SIGNAL_NO_IPI : TWA_NONE; - - if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode)) - __ublk_abort_rq(ubq, rq); - } else { - if (llist_add(&data->node, &ubq->io_cmds)) - ublk_submit_cmd(ubq, rq); - } -} - static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -865,19 +868,11 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } - ublk_queue_cmd(ubq, rq, bd->last); + ublk_queue_cmd(ubq, rq); return BLK_STS_OK; } -static void ublk_commit_rqs(struct blk_mq_hw_ctx *hctx) -{ - struct ublk_queue *ubq = hctx->driver_data; - - if (ublk_can_use_task_work(ubq)) - __set_notify_signal(ubq->ubq_daemon); -} - static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, unsigned int hctx_idx) { @@ -899,7 +894,6 @@ static int ublk_init_rq(struct blk_mq_tag_set *set, struct request *req, static const struct blk_mq_ops ublk_mq_ops = { .queue_rq = ublk_queue_rq, - .commit_rqs = ublk_commit_rqs, .init_hctx = ublk_init_hctx, .init_request = ublk_init_rq, }; @@ -1197,7 +1191,7 @@ static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, struct ublk_queue *ubq = ublk_get_queue(ub, q_id); struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); - ublk_queue_cmd(ubq, req, true); + ublk_queue_cmd(ubq, req); } static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 271963805a38..f05018988a17 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2056,6 +2056,11 @@ static int btusb_setup_csr(struct hci_dev *hdev) rp = (struct hci_rp_read_local_version *)skb->data; + bt_dev_info(hdev, "CSR: Setting up dongle with HCI ver=%u rev=%04x; LMP ver=%u subver=%04x; manufacturer=%u", + le16_to_cpu(rp->hci_ver), le16_to_cpu(rp->hci_rev), + le16_to_cpu(rp->lmp_ver), le16_to_cpu(rp->lmp_subver), + le16_to_cpu(rp->manufacturer)); + /* Detect a wide host of Chinese controllers that aren't CSR. * * Known fake bcdDevices: 0x0100, 0x0134, 0x1915, 0x2520, 0x7558, 0x8891 @@ -2118,6 +2123,7 @@ static int btusb_setup_csr(struct hci_dev *hdev) * without these the controller will lock up. */ set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); + set_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks); set_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks); set_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks); diff --git a/drivers/bus/intel-ixp4xx-eb.c b/drivers/bus/intel-ixp4xx-eb.c index a4388440aca7..91db001eb69a 100644 --- a/drivers/bus/intel-ixp4xx-eb.c +++ b/drivers/bus/intel-ixp4xx-eb.c @@ -49,7 +49,7 @@ #define IXP4XX_EXP_SIZE_SHIFT 10 #define IXP4XX_EXP_CNFG_0 BIT(9) /* Always zero */ #define IXP43X_EXP_SYNC_INTEL BIT(8) /* Only on IXP43x */ -#define IXP43X_EXP_EXP_CHIP BIT(7) /* Only on IXP43x */ +#define IXP43X_EXP_EXP_CHIP BIT(7) /* Only on IXP43x, dangerous to touch on IXP42x */ #define IXP4XX_EXP_BYTE_RD16 BIT(6) #define IXP4XX_EXP_HRDY_POL BIT(5) /* Only on IXP42x */ #define IXP4XX_EXP_MUX_EN BIT(4) @@ -57,8 +57,6 @@ #define IXP4XX_EXP_WORD BIT(2) /* Always zero */ #define IXP4XX_EXP_WR_EN BIT(1) #define IXP4XX_EXP_BYTE_EN BIT(0) -#define IXP42X_RESERVED (BIT(30)|IXP4XX_EXP_CNFG_0|BIT(8)|BIT(7)|IXP4XX_EXP_WORD) -#define IXP43X_RESERVED (BIT(30)|IXP4XX_EXP_CNFG_0|BIT(5)|IXP4XX_EXP_WORD) #define IXP4XX_EXP_CNFG0 0x20 #define IXP4XX_EXP_CNFG0_MEM_MAP BIT(31) @@ -252,10 +250,9 @@ static void ixp4xx_exp_setup_chipselect(struct ixp4xx_eb *eb, cs_cfg |= val << IXP4XX_EXP_CYC_TYPE_SHIFT; } - if (eb->is_42x) - cs_cfg &= ~IXP42X_RESERVED; if (eb->is_43x) { - cs_cfg &= ~IXP43X_RESERVED; + /* Should always be zero */ + cs_cfg &= ~IXP4XX_EXP_WORD; /* * This bit for Intel strata flash is currently unused, but let's * report it if we find one. diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 4cd2e127946e..3aa91aed3bf7 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -267,6 +267,9 @@ EXPORT_SYMBOL_GPL(sunxi_rsb_driver_register); /* common code that starts a transfer */ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb) { + u32 int_mask, status; + bool timeout; + if (readl(rsb->regs + RSB_CTRL) & RSB_CTRL_START_TRANS) { dev_dbg(rsb->dev, "RSB transfer still in progress\n"); return -EBUSY; @@ -274,13 +277,23 @@ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb) reinit_completion(&rsb->complete); - writel(RSB_INTS_LOAD_BSY | RSB_INTS_TRANS_ERR | RSB_INTS_TRANS_OVER, - rsb->regs + RSB_INTE); + int_mask = RSB_INTS_LOAD_BSY | RSB_INTS_TRANS_ERR | RSB_INTS_TRANS_OVER; + writel(int_mask, rsb->regs + RSB_INTE); writel(RSB_CTRL_START_TRANS | RSB_CTRL_GLOBAL_INT_ENB, rsb->regs + RSB_CTRL); - if (!wait_for_completion_io_timeout(&rsb->complete, - msecs_to_jiffies(100))) { + if (irqs_disabled()) { + timeout = readl_poll_timeout_atomic(rsb->regs + RSB_INTS, + status, (status & int_mask), + 10, 100000); + writel(status, rsb->regs + RSB_INTS); + } else { + timeout = !wait_for_completion_io_timeout(&rsb->complete, + msecs_to_jiffies(100)); + status = rsb->status; + } + + if (timeout) { dev_dbg(rsb->dev, "RSB timeout\n"); /* abort the transfer */ @@ -292,18 +305,18 @@ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb) return -ETIMEDOUT; } - if (rsb->status & RSB_INTS_LOAD_BSY) { + if (status & RSB_INTS_LOAD_BSY) { dev_dbg(rsb->dev, "RSB busy\n"); return -EBUSY; } - if (rsb->status & RSB_INTS_TRANS_ERR) { - if (rsb->status & RSB_INTS_TRANS_ERR_ACK) { + if (status & RSB_INTS_TRANS_ERR) { + if (status & RSB_INTS_TRANS_ERR_ACK) { dev_dbg(rsb->dev, "RSB slave nack\n"); return -EINVAL; } - if (rsb->status & RSB_INTS_TRANS_ERR_DATA) { + if (status & RSB_INTS_TRANS_ERR_DATA) { dev_dbg(rsb->dev, "RSB transfer data error\n"); return -EIO; } @@ -812,14 +825,6 @@ static int sunxi_rsb_remove(struct platform_device *pdev) return 0; } -static void sunxi_rsb_shutdown(struct platform_device *pdev) -{ - struct sunxi_rsb *rsb = platform_get_drvdata(pdev); - - pm_runtime_disable(&pdev->dev); - sunxi_rsb_hw_exit(rsb); -} - static const struct dev_pm_ops sunxi_rsb_dev_pm_ops = { SET_RUNTIME_PM_OPS(sunxi_rsb_runtime_suspend, sunxi_rsb_runtime_resume, NULL) @@ -835,7 +840,6 @@ MODULE_DEVICE_TABLE(of, sunxi_rsb_of_match_table); static struct platform_driver sunxi_rsb_driver = { .probe = sunxi_rsb_probe, .remove = sunxi_rsb_remove, - .shutdown = sunxi_rsb_shutdown, .driver = { .name = RSB_CTRL_NAME, .of_match_table = sunxi_rsb_of_match_table, diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 1621ce818705..d69905233aff 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -401,13 +401,14 @@ int tpm_pm_suspend(struct device *dev) !pm_suspend_via_firmware()) goto suspended; - if (!tpm_chip_start(chip)) { + rc = tpm_try_get_ops(chip); + if (!rc) { if (chip->flags & TPM_CHIP_FLAG_TPM2) tpm2_shutdown(chip, TPM2_SU_STATE); else rc = tpm1_pm_suspend(chip, tpm_suspend_pcr); - tpm_chip_stop(chip); + tpm_put_ops(chip); } suspended: diff --git a/drivers/clk/at91/at91rm9200.c b/drivers/clk/at91/at91rm9200.c index b174f727a8ef..16870943a13e 100644 --- a/drivers/clk/at91/at91rm9200.c +++ b/drivers/clk/at91/at91rm9200.c @@ -40,7 +40,7 @@ static const struct clk_pll_characteristics rm9200_pll_characteristics = { }; static const struct sck at91rm9200_systemck[] = { - { .n = "udpck", .p = "usbck", .id = 2 }, + { .n = "udpck", .p = "usbck", .id = 1 }, { .n = "uhpck", .p = "usbck", .id = 4 }, { .n = "pck0", .p = "prog0", .id = 8 }, { .n = "pck1", .p = "prog1", .id = 9 }, diff --git a/drivers/clk/qcom/gcc-sc8280xp.c b/drivers/clk/qcom/gcc-sc8280xp.c index a18ed88f3b82..b3198784e1c3 100644 --- a/drivers/clk/qcom/gcc-sc8280xp.c +++ b/drivers/clk/qcom/gcc-sc8280xp.c @@ -5364,6 +5364,8 @@ static struct clk_branch gcc_ufs_1_card_clkref_clk = { .enable_mask = BIT(0), .hw.init = &(const struct clk_init_data) { .name = "gcc_ufs_1_card_clkref_clk", + .parent_data = &gcc_parent_data_tcxo, + .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -5432,6 +5434,8 @@ static struct clk_branch gcc_ufs_card_clkref_clk = { .enable_mask = BIT(0), .hw.init = &(const struct clk_init_data) { .name = "gcc_ufs_card_clkref_clk", + .parent_data = &gcc_parent_data_tcxo, + .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -5848,6 +5852,8 @@ static struct clk_branch gcc_ufs_ref_clkref_clk = { .enable_mask = BIT(0), .hw.init = &(const struct clk_init_data) { .name = "gcc_ufs_ref_clkref_clk", + .parent_data = &gcc_parent_data_tcxo, + .num_parents = 1, .ops = &clk_branch2_ops, }, }, diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c index 7cf5e130e92f..0f21a8a767ac 100644 --- a/drivers/clk/qcom/gdsc.c +++ b/drivers/clk/qcom/gdsc.c @@ -11,7 +11,6 @@ #include <linux/kernel.h> #include <linux/ktime.h> #include <linux/pm_domain.h> -#include <linux/pm_runtime.h> #include <linux/regmap.h> #include <linux/regulator/consumer.h> #include <linux/reset-controller.h> @@ -56,22 +55,6 @@ enum gdsc_status { GDSC_ON }; -static int gdsc_pm_runtime_get(struct gdsc *sc) -{ - if (!sc->dev) - return 0; - - return pm_runtime_resume_and_get(sc->dev); -} - -static int gdsc_pm_runtime_put(struct gdsc *sc) -{ - if (!sc->dev) - return 0; - - return pm_runtime_put_sync(sc->dev); -} - /* Returns 1 if GDSC status is status, 0 if not, and < 0 on error */ static int gdsc_check_status(struct gdsc *sc, enum gdsc_status status) { @@ -271,8 +254,9 @@ static void gdsc_retain_ff_on(struct gdsc *sc) regmap_update_bits(sc->regmap, sc->gdscr, mask, mask); } -static int _gdsc_enable(struct gdsc *sc) +static int gdsc_enable(struct generic_pm_domain *domain) { + struct gdsc *sc = domain_to_gdsc(domain); int ret; if (sc->pwrsts == PWRSTS_ON) @@ -328,22 +312,11 @@ static int _gdsc_enable(struct gdsc *sc) return 0; } -static int gdsc_enable(struct generic_pm_domain *domain) +static int gdsc_disable(struct generic_pm_domain *domain) { struct gdsc *sc = domain_to_gdsc(domain); int ret; - ret = gdsc_pm_runtime_get(sc); - if (ret) - return ret; - - return _gdsc_enable(sc); -} - -static int _gdsc_disable(struct gdsc *sc) -{ - int ret; - if (sc->pwrsts == PWRSTS_ON) return gdsc_assert_reset(sc); @@ -388,18 +361,6 @@ static int _gdsc_disable(struct gdsc *sc) return 0; } -static int gdsc_disable(struct generic_pm_domain *domain) -{ - struct gdsc *sc = domain_to_gdsc(domain); - int ret; - - ret = _gdsc_disable(sc); - - gdsc_pm_runtime_put(sc); - - return ret; -} - static int gdsc_init(struct gdsc *sc) { u32 mask, val; @@ -447,11 +408,6 @@ static int gdsc_init(struct gdsc *sc) return ret; } - /* ...and the power-domain */ - ret = gdsc_pm_runtime_get(sc); - if (ret) - goto err_disable_supply; - /* * Votable GDSCs can be ON due to Vote from other masters. * If a Votable GDSC is ON, make sure we have a Vote. @@ -459,14 +415,14 @@ static int gdsc_init(struct gdsc *sc) if (sc->flags & VOTABLE) { ret = gdsc_update_collapse_bit(sc, false); if (ret) - goto err_put_rpm; + goto err_disable_supply; } /* Turn on HW trigger mode if supported */ if (sc->flags & HW_CTRL) { ret = gdsc_hwctrl(sc, true); if (ret < 0) - goto err_put_rpm; + goto err_disable_supply; } /* @@ -496,13 +452,10 @@ static int gdsc_init(struct gdsc *sc) ret = pm_genpd_init(&sc->pd, NULL, !on); if (ret) - goto err_put_rpm; + goto err_disable_supply; return 0; -err_put_rpm: - if (on) - gdsc_pm_runtime_put(sc); err_disable_supply: if (on && sc->rsupply) regulator_disable(sc->rsupply); @@ -541,8 +494,6 @@ int gdsc_register(struct gdsc_desc *desc, for (i = 0; i < num; i++) { if (!scs[i]) continue; - if (pm_runtime_enabled(dev)) - scs[i]->dev = dev; scs[i]->regmap = regmap; scs[i]->rcdev = rcdev; ret = gdsc_init(scs[i]); diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h index 981a12c8502d..803512688336 100644 --- a/drivers/clk/qcom/gdsc.h +++ b/drivers/clk/qcom/gdsc.h @@ -30,7 +30,6 @@ struct reset_controller_dev; * @resets: ids of resets associated with this gdsc * @reset_count: number of @resets * @rcdev: reset controller - * @dev: the device holding the GDSC, used for pm_runtime calls */ struct gdsc { struct generic_pm_domain pd; @@ -74,7 +73,6 @@ struct gdsc { const char *supply; struct regulator *rsupply; - struct device *dev; }; struct gdsc_desc { diff --git a/drivers/clk/samsung/clk-exynos-clkout.c b/drivers/clk/samsung/clk-exynos-clkout.c index 273f77d54dab..e6d6cbf8c4e6 100644 --- a/drivers/clk/samsung/clk-exynos-clkout.c +++ b/drivers/clk/samsung/clk-exynos-clkout.c @@ -81,17 +81,19 @@ MODULE_DEVICE_TABLE(of, exynos_clkout_ids); static int exynos_clkout_match_parent_dev(struct device *dev, u32 *mux_mask) { const struct exynos_clkout_variant *variant; + const struct of_device_id *match; if (!dev->parent) { dev_err(dev, "not instantiated from MFD\n"); return -EINVAL; } - variant = of_device_get_match_data(dev->parent); - if (!variant) { + match = of_match_device(exynos_clkout_ids, dev->parent); + if (!match) { dev_err(dev, "cannot match parent device\n"); return -EINVAL; } + variant = match->data; *mux_mask = variant->mux_mask; diff --git a/drivers/clk/samsung/clk-exynos7885.c b/drivers/clk/samsung/clk-exynos7885.c index 62ce6814f141..0d2a950ed184 100644 --- a/drivers/clk/samsung/clk-exynos7885.c +++ b/drivers/clk/samsung/clk-exynos7885.c @@ -231,7 +231,7 @@ static const struct samsung_div_clock top_div_clks[] __initconst = { CLK_CON_DIV_PLL_SHARED0_DIV2, 0, 1), DIV(CLK_DOUT_SHARED0_DIV3, "dout_shared0_div3", "fout_shared0_pll", CLK_CON_DIV_PLL_SHARED0_DIV3, 0, 2), - DIV(CLK_DOUT_SHARED0_DIV4, "dout_shared0_div4", "fout_shared0_pll", + DIV(CLK_DOUT_SHARED0_DIV4, "dout_shared0_div4", "dout_shared0_div2", CLK_CON_DIV_PLL_SHARED0_DIV4, 0, 1), DIV(CLK_DOUT_SHARED0_DIV5, "dout_shared0_div5", "fout_shared0_pll", CLK_CON_DIV_PLL_SHARED0_DIV5, 0, 3), @@ -239,7 +239,7 @@ static const struct samsung_div_clock top_div_clks[] __initconst = { CLK_CON_DIV_PLL_SHARED1_DIV2, 0, 1), DIV(CLK_DOUT_SHARED1_DIV3, "dout_shared1_div3", "fout_shared1_pll", CLK_CON_DIV_PLL_SHARED1_DIV3, 0, 2), - DIV(CLK_DOUT_SHARED1_DIV4, "dout_shared1_div4", "fout_shared1_pll", + DIV(CLK_DOUT_SHARED1_DIV4, "dout_shared1_div4", "dout_shared1_div2", CLK_CON_DIV_PLL_SHARED1_DIV4, 0, 1), /* CORE */ diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index a7ff77550e17..933bb960490d 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -806,6 +806,9 @@ static u64 __arch_timer_check_delta(void) /* * XGene-1 implements CVAL in terms of TVAL, meaning * that the maximum timer range is 32bit. Shame on them. + * + * Note that TVAL is signed, thus has only 31 of its + * 32 bits to express magnitude. */ MIDR_ALL_VERSIONS(MIDR_CPU_MODEL(ARM_CPU_IMP_APM, APM_CPU_PART_POTENZA)), @@ -813,8 +816,8 @@ static u64 __arch_timer_check_delta(void) }; if (is_midr_in_range_list(read_cpuid_id(), broken_cval_midrs)) { - pr_warn_once("Broken CNTx_CVAL_EL1, limiting width to 32bits"); - return CLOCKSOURCE_MASK(32); + pr_warn_once("Broken CNTx_CVAL_EL1, using 31 bit TVAL instead.\n"); + return CLOCKSOURCE_MASK(31); } #endif return CLOCKSOURCE_MASK(arch_counter_get_width()); diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index 969a552da8d2..a0d66fabf073 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -51,7 +51,7 @@ static int riscv_clock_next_event(unsigned long delta, static unsigned int riscv_clock_event_irq; static DEFINE_PER_CPU(struct clock_event_device, riscv_clock_event) = { .name = "riscv_timer_clockevent", - .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP, + .features = CLOCK_EVT_FEAT_ONESHOT, .rating = 100, .set_next_event = riscv_clock_next_event, }; diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 310779b07daf..00476e94db90 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -35,7 +35,7 @@ config X86_PCC_CPUFREQ If in doubt, say N. config X86_AMD_PSTATE - tristate "AMD Processor P-State driver" + bool "AMD Processor P-State driver" depends on X86 && ACPI select ACPI_PROCESSOR select ACPI_CPPC_LIB if X86_64 diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index ace7d50cf2ac..204e39006dda 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -59,12 +59,8 @@ * we disable it by default to go acpi-cpufreq on these processors and add a * module parameter to be able to enable it manually for debugging. */ -static bool shared_mem = false; -module_param(shared_mem, bool, 0444); -MODULE_PARM_DESC(shared_mem, - "enable amd-pstate on processors with shared memory solution (false = disabled (default), true = enabled)"); - static struct cpufreq_driver amd_pstate_driver; +static int cppc_load __initdata; static inline int pstate_enable(bool enable) { @@ -424,12 +420,22 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata) amd_pstate_driver.boost_enabled = true; } +static void amd_perf_ctl_reset(unsigned int cpu) +{ + wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); +} + static int amd_pstate_cpu_init(struct cpufreq_policy *policy) { int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; struct device *dev; struct amd_cpudata *cpudata; + /* + * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, + * which is ideal for initialization process. + */ + amd_perf_ctl_reset(policy->cpu); dev = get_cpu_device(policy->cpu); if (!dev) return -ENODEV; @@ -616,6 +622,15 @@ static int __init amd_pstate_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) return -ENODEV; + /* + * by default the pstate driver is disabled to load + * enable the amd_pstate passive mode driver explicitly + * with amd_pstate=passive in kernel command line + */ + if (!cppc_load) { + pr_debug("driver load is disabled, boot with amd_pstate=passive to enable this\n"); + return -ENODEV; + } if (!acpi_cpc_valid()) { pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n"); @@ -630,13 +645,11 @@ static int __init amd_pstate_init(void) if (boot_cpu_has(X86_FEATURE_CPPC)) { pr_debug("AMD CPPC MSR based functionality is supported\n"); amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf; - } else if (shared_mem) { + } else { + pr_debug("AMD CPPC shared memory based functionality is supported\n"); static_call_update(amd_pstate_enable, cppc_enable); static_call_update(amd_pstate_init_perf, cppc_init_perf); static_call_update(amd_pstate_update_perf, cppc_update_perf); - } else { - pr_info("This processor supports shared memory solution, you can enable it with amd_pstate.shared_mem=1\n"); - return -ENODEV; } /* enable amd pstate feature */ @@ -653,16 +666,22 @@ static int __init amd_pstate_init(void) return ret; } +device_initcall(amd_pstate_init); -static void __exit amd_pstate_exit(void) +static int __init amd_pstate_param(char *str) { - cpufreq_unregister_driver(&amd_pstate_driver); + if (!str) + return -EINVAL; - amd_pstate_enable(false); -} + if (!strcmp(str, "disable")) { + cppc_load = 0; + pr_info("driver is explicitly disabled\n"); + } else if (!strcmp(str, "passive")) + cppc_load = 1; -module_init(amd_pstate_init); -module_exit(amd_pstate_exit); + return 0; +} +early_param("amd_pstate", amd_pstate_param); MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>"); MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c index 97086fab698e..903325aac991 100644 --- a/drivers/dax/hmem/device.c +++ b/drivers/dax/hmem/device.c @@ -8,6 +8,13 @@ static bool nohmem; module_param_named(disable, nohmem, bool, 0444); +static struct resource hmem_active = { + .name = "HMEM devices", + .start = 0, + .end = -1, + .flags = IORESOURCE_MEM, +}; + void hmem_register_device(int target_nid, struct resource *r) { /* define a clean / non-busy resource for the platform device */ @@ -41,6 +48,12 @@ void hmem_register_device(int target_nid, struct resource *r) goto out_pdev; } + if (!__request_region(&hmem_active, res.start, resource_size(&res), + dev_name(&pdev->dev), 0)) { + dev_dbg(&pdev->dev, "hmem range %pr already active\n", &res); + goto out_active; + } + pdev->dev.numa_node = numa_map_to_online_node(target_nid); info = (struct memregion_info) { .target_node = target_nid, @@ -66,6 +79,8 @@ void hmem_register_device(int target_nid, struct resource *r) return; out_resource: + __release_region(&hmem_active, res.start, resource_size(&res)); +out_active: platform_device_put(pdev); out_pdev: memregion_free(id); @@ -73,15 +88,6 @@ out_pdev: static __init int hmem_register_one(struct resource *res, void *data) { - /* - * If the resource is not a top-level resource it was already - * assigned to a device by the HMAT parsing. - */ - if (res->parent != &iomem_resource) { - pr_info("HMEM: skip %pr, already claimed\n", res); - return 0; - } - hmem_register_device(phys_to_target_node(res->start), res); return 0; diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index dd0f83ee505b..e6f36c014c4c 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <linux/dma-buf.h> #include <linux/dma-fence.h> +#include <linux/dma-fence-unwrap.h> #include <linux/anon_inodes.h> #include <linux/export.h> #include <linux/debugfs.h> @@ -391,8 +392,10 @@ static long dma_buf_import_sync_file(struct dma_buf *dmabuf, const void __user *user_data) { struct dma_buf_import_sync_file arg; - struct dma_fence *fence; + struct dma_fence *fence, *f; enum dma_resv_usage usage; + struct dma_fence_unwrap iter; + unsigned int num_fences; int ret = 0; if (copy_from_user(&arg, user_data, sizeof(arg))) @@ -411,13 +414,21 @@ static long dma_buf_import_sync_file(struct dma_buf *dmabuf, usage = (arg.flags & DMA_BUF_SYNC_WRITE) ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ; - dma_resv_lock(dmabuf->resv, NULL); + num_fences = 0; + dma_fence_unwrap_for_each(f, &iter, fence) + ++num_fences; - ret = dma_resv_reserve_fences(dmabuf->resv, 1); - if (!ret) - dma_resv_add_fence(dmabuf->resv, fence, usage); + if (num_fences > 0) { + dma_resv_lock(dmabuf->resv, NULL); - dma_resv_unlock(dmabuf->resv); + ret = dma_resv_reserve_fences(dmabuf->resv, num_fences); + if (!ret) { + dma_fence_unwrap_for_each(f, &iter, fence) + dma_resv_add_fence(dmabuf->resv, f, usage); + } + + dma_resv_unlock(dmabuf->resv); + } dma_fence_put(fence); diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c index 8f5848aa144f..59d158873f4c 100644 --- a/drivers/dma-buf/dma-heap.c +++ b/drivers/dma-buf/dma-heap.c @@ -233,18 +233,6 @@ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info) return ERR_PTR(-EINVAL); } - /* check the name is unique */ - mutex_lock(&heap_list_lock); - list_for_each_entry(h, &heap_list, list) { - if (!strcmp(h->name, exp_info->name)) { - mutex_unlock(&heap_list_lock); - pr_err("dma_heap: Already registered heap named %s\n", - exp_info->name); - return ERR_PTR(-EINVAL); - } - } - mutex_unlock(&heap_list_lock); - heap = kzalloc(sizeof(*heap), GFP_KERNEL); if (!heap) return ERR_PTR(-ENOMEM); @@ -283,13 +271,27 @@ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info) err_ret = ERR_CAST(dev_ret); goto err2; } - /* Add heap to the list */ + mutex_lock(&heap_list_lock); + /* check the name is unique */ + list_for_each_entry(h, &heap_list, list) { + if (!strcmp(h->name, exp_info->name)) { + mutex_unlock(&heap_list_lock); + pr_err("dma_heap: Already registered heap named %s\n", + exp_info->name); + err_ret = ERR_PTR(-EINVAL); + goto err3; + } + } + + /* Add heap to the list */ list_add(&heap->list, &heap_list); mutex_unlock(&heap_list_lock); return heap; +err3: + device_destroy(dma_heap_class, heap->heap_devt); err2: cdev_del(&heap->heap_cdev); err1: diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index a9b96b18772f..e13e92609943 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -6,7 +6,6 @@ #include <linux/pci.h> #include <linux/device.h> #include <linux/sched/task.h> -#include <linux/intel-svm.h> #include <linux/io-64-nonatomic-lo-hi.h> #include <linux/cdev.h> #include <linux/fs.h> @@ -100,7 +99,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) filp->private_data = ctx; if (device_user_pasid_enabled(idxd)) { - sva = iommu_sva_bind_device(dev, current->mm, NULL); + sva = iommu_sva_bind_device(dev, current->mm); if (IS_ERR(sva)) { rc = PTR_ERR(sva); dev_err(dev, "pasid allocation failed: %d\n", rc); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 09cbf0c179ba..529ea09c9094 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -14,7 +14,6 @@ #include <linux/io-64-nonatomic-lo-hi.h> #include <linux/device.h> #include <linux/idr.h> -#include <linux/intel-svm.h> #include <linux/iommu.h> #include <uapi/linux/idxd.h> #include <linux/dmaengine.h> @@ -502,29 +501,7 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d static int idxd_enable_system_pasid(struct idxd_device *idxd) { - int flags; - unsigned int pasid; - struct iommu_sva *sva; - - flags = SVM_FLAG_SUPERVISOR_MODE; - - sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, &flags); - if (IS_ERR(sva)) { - dev_warn(&idxd->pdev->dev, - "iommu sva bind failed: %ld\n", PTR_ERR(sva)); - return PTR_ERR(sva); - } - - pasid = iommu_sva_get_pasid(sva); - if (pasid == IOMMU_PASID_INVALID) { - iommu_sva_unbind_device(sva); - return -ENODEV; - } - - idxd->sva = sva; - idxd->pasid = pasid; - dev_dbg(&idxd->pdev->dev, "system pasid: %u\n", pasid); - return 0; + return -EOPNOTSUPP; } static void idxd_disable_system_pasid(struct idxd_device *idxd) diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig index 6c416955da53..bbe0a7cabb75 100644 --- a/drivers/fpga/Kconfig +++ b/drivers/fpga/Kconfig @@ -246,7 +246,9 @@ config FPGA_MGR_VERSAL_FPGA config FPGA_M10_BMC_SEC_UPDATE tristate "Intel MAX10 BMC Secure Update driver" - depends on MFD_INTEL_M10_BMC && FW_UPLOAD + depends on MFD_INTEL_M10_BMC + select FW_LOADER + select FW_UPLOAD help Secure update support for the Intel MAX10 board management controller. diff --git a/drivers/gpio/gpio-amd8111.c b/drivers/gpio/gpio-amd8111.c index 14e6b3e64add..6f3ded619c8b 100644 --- a/drivers/gpio/gpio-amd8111.c +++ b/drivers/gpio/gpio-amd8111.c @@ -226,7 +226,10 @@ found: ioport_unmap(gp.pm); goto out; } + return 0; + out: + pci_dev_put(pdev); return err; } @@ -234,6 +237,7 @@ static void __exit amd_gpio_exit(void) { gpiochip_remove(&gp.chip); ioport_unmap(gp.pm); + pci_dev_put(gp.pdev); } module_init(amd_gpio_init); diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c index 870910bb9dd3..200e43a6f4b4 100644 --- a/drivers/gpio/gpio-rockchip.c +++ b/drivers/gpio/gpio-rockchip.c @@ -610,6 +610,7 @@ static int rockchip_gpiolib_register(struct rockchip_pin_bank *bank) return -ENODATA; pctldev = of_pinctrl_get(pctlnp); + of_node_put(pctlnp); if (!pctldev) return -ENODEV; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 4756ea08894f..a70522aef355 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -526,12 +526,13 @@ static int gpiochip_setup_dev(struct gpio_device *gdev) if (ret) return ret; + /* From this point, the .release() function cleans up gpio_device */ + gdev->dev.release = gpiodevice_release; + ret = gpiochip_sysfs_register(gdev); if (ret) goto err_remove_device; - /* From this point, the .release() function cleans up gpio_device */ - gdev->dev.release = gpiodevice_release; dev_dbg(&gdev->dev, "registered GPIOs %d to %d on %s\n", gdev->base, gdev->base + gdev->ngpio - 1, gdev->chip->label ? : "generic"); @@ -597,10 +598,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, struct fwnode_handle *fwnode = NULL; struct gpio_device *gdev; unsigned long flags; - int base = gc->base; unsigned int i; + u32 ngpios = 0; + int base = 0; int ret = 0; - u32 ngpios; if (gc->fwnode) fwnode = gc->fwnode; @@ -647,17 +648,12 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, else gdev->owner = THIS_MODULE; - gdev->descs = kcalloc(gc->ngpio, sizeof(gdev->descs[0]), GFP_KERNEL); - if (!gdev->descs) { - ret = -ENOMEM; - goto err_free_dev_name; - } - /* * Try the device properties if the driver didn't supply the number * of GPIO lines. */ - if (gc->ngpio == 0) { + ngpios = gc->ngpio; + if (ngpios == 0) { ret = device_property_read_u32(&gdev->dev, "ngpios", &ngpios); if (ret == -ENODATA) /* @@ -668,7 +664,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, */ ngpios = 0; else if (ret) - goto err_free_descs; + goto err_free_dev_name; gc->ngpio = ngpios; } @@ -676,13 +672,19 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, if (gc->ngpio == 0) { chip_err(gc, "tried to insert a GPIO chip with zero lines\n"); ret = -EINVAL; - goto err_free_descs; + goto err_free_dev_name; } if (gc->ngpio > FASTPATH_NGPIO) chip_warn(gc, "line cnt %u is greater than fast path cnt %u\n", gc->ngpio, FASTPATH_NGPIO); + gdev->descs = kcalloc(gc->ngpio, sizeof(*gdev->descs), GFP_KERNEL); + if (!gdev->descs) { + ret = -ENOMEM; + goto err_free_dev_name; + } + gdev->label = kstrdup_const(gc->label ?: "unknown", GFP_KERNEL); if (!gdev->label) { ret = -ENOMEM; @@ -701,11 +703,13 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, * it may be a pipe dream. It will not happen before we get rid * of the sysfs interface anyways. */ + base = gc->base; if (base < 0) { base = gpiochip_find_base(gc->ngpio); if (base < 0) { - ret = base; spin_unlock_irqrestore(&gpio_lock, flags); + ret = base; + base = 0; goto err_free_label; } /* @@ -816,6 +820,11 @@ err_remove_of_chip: err_free_gpiochip_mask: gpiochip_remove_pin_ranges(gc); gpiochip_free_valid_mask(gc); + if (gdev->dev.release) { + /* release() has been registered by gpiochip_setup_dev() */ + put_device(&gdev->dev); + goto err_print_message; + } err_remove_from_list: spin_lock_irqsave(&gpio_lock, flags); list_del(&gdev->list); @@ -829,13 +838,14 @@ err_free_dev_name: err_free_ida: ida_free(&gpio_ida, gdev->id); err_free_gdev: + kfree(gdev); +err_print_message: /* failures here can mean systems won't boot... */ if (ret != -EPROBE_DEFER) { pr_err("%s: GPIOs %d..%d (%s) failed to register, %d\n", __func__, - gdev->base, gdev->base + gdev->ngpio - 1, + base, base + (int)ngpios - 1, gc->label ? : "generic", ret); } - kfree(gdev); return ret; } EXPORT_SYMBOL_GPL(gpiochip_add_data_with_key); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c index c8935d718207..4485bb29bec9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c @@ -41,5 +41,6 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 6d1ff7b9258d..1f76e27f1a35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -986,6 +986,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, struct amdkfd_process_info *process_info = mem->process_info; struct amdgpu_bo *bo = mem->bo; struct ttm_operation_ctx ctx = { true, false }; + struct hmm_range *range; int ret = 0; mutex_lock(&process_info->lock); @@ -1015,7 +1016,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, return 0; } - ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); + ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range); if (ret) { pr_err("%s: Failed to get user pages: %d\n", __func__, ret); goto unregister_out; @@ -1033,7 +1034,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, amdgpu_bo_unreserve(bo); release_out: - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); unregister_out: if (ret) amdgpu_mn_unregister(bo); @@ -2370,6 +2371,8 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, /* Go through userptr_inval_list and update any invalid user_pages */ list_for_each_entry(mem, &process_info->userptr_inval_list, validate_list.head) { + struct hmm_range *range; + invalid = atomic_read(&mem->invalid); if (!invalid) /* BO hasn't been invalidated since the last @@ -2380,7 +2383,8 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, bo = mem->bo; /* Get updated user pages */ - ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); + ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, + &range); if (ret) { pr_debug("Failed %d to get user pages\n", ret); @@ -2399,7 +2403,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, * FIXME: Cannot ignore the return code, must hold * notifier_lock */ - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); } /* Mark the BO as valid unless it was invalidated diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 2168163aad2d..252a876b0725 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -209,6 +209,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, list_add_tail(&e->tv.head, &bucket[priority]); e->user_pages = NULL; + e->range = NULL; } /* Connect the sorted buckets in the output list. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 9caea1688fc3..e4d78491bcc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -26,6 +26,8 @@ #include <drm/ttm/ttm_execbuf_util.h> #include <drm/amdgpu_drm.h> +struct hmm_range; + struct amdgpu_device; struct amdgpu_bo; struct amdgpu_bo_va; @@ -36,6 +38,7 @@ struct amdgpu_bo_list_entry { struct amdgpu_bo_va *bo_va; uint32_t priority; struct page **user_pages; + struct hmm_range *range; bool user_invalidated; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 491d4846fc02..cfb262911bfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -328,7 +328,6 @@ static void amdgpu_connector_free_edid(struct drm_connector *connector) kfree(amdgpu_connector->edid); amdgpu_connector->edid = NULL; - drm_connector_update_edid_property(connector, NULL); } static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d038b258cc92..365e3fb6a9e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -913,7 +913,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto out_free_user_pages; } - r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages); + r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range); if (r) { kvfree(e->user_pages); e->user_pages = NULL; @@ -991,9 +991,10 @@ out_free_user_pages: if (!e->user_pages) continue; - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); kvfree(e->user_pages); e->user_pages = NULL; + e->range = NULL; } mutex_unlock(&p->bo_list->bo_list_mutex); return r; @@ -1273,7 +1274,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); + e->range = NULL; } if (r) { r = -EAGAIN; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 8ef31d687ef3..91571b1324f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -378,6 +378,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, struct amdgpu_device *adev = drm_to_adev(dev); struct drm_amdgpu_gem_userptr *args = data; struct drm_gem_object *gobj; + struct hmm_range *range; struct amdgpu_bo *bo; uint32_t handle; int r; @@ -413,14 +414,13 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, if (r) goto release_object; - if (args->flags & AMDGPU_GEM_USERPTR_REGISTER) { - r = amdgpu_mn_register(bo, args->addr); - if (r) - goto release_object; - } + r = amdgpu_mn_register(bo, args->addr); + if (r) + goto release_object; if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { - r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); + r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, + &range); if (r) goto release_object; @@ -443,7 +443,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, user_pages_done: if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); release_object: drm_gem_object_put(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index b06cb0bb166c..28612e56d0d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -479,6 +479,12 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) unsigned i; unsigned vmhub, inv_eng; + if (adev->enable_mes) { + /* reserve engine 5 for firmware */ + for (vmhub = 0; vmhub < AMDGPU_MAX_VMHUBS; vmhub++) + vm_inv_engs[vmhub] &= ~(1 << 5); + } + for (i = 0; i < adev->num_rings; ++i) { ring = adev->rings[i]; vmhub = ring->funcs->vmhub; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index cd968e781077..adac650cf544 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -169,7 +169,11 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - dma_fence_put(&job->hw_fence); + /* only put the hw fence if has embedded fence */ + if (!job->hw_fence.ops) + kfree(job); + else + dma_fence_put(&job->hw_fence); } void amdgpu_job_set_gang_leader(struct amdgpu_job *job, @@ -254,6 +258,9 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, DRM_ERROR("Error adding fence (%d)\n", r); } + if (!fence && job->gang_submit) + fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); + while (fence == NULL && vm && !job->vmid) { r = amdgpu_vmid_grab(vm, ring, &job->sync, &job->base.s_fence->finished, @@ -264,9 +271,6 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, fence = amdgpu_sync_get_fence(&job->sync); } - if (!fence && job->gang_submit) - fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); - return fence; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index effa7df3ddbf..7978307e1d6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -172,6 +172,7 @@ void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx) { amdgpu_bo_free_kernel(&mem_ctx->shared_bo, &mem_ctx->shared_mc_addr, &mem_ctx->shared_buf); + mem_ctx->shared_bo = NULL; } static void psp_free_shared_bufs(struct psp_context *psp) @@ -182,6 +183,7 @@ static void psp_free_shared_bufs(struct psp_context *psp) /* free TMR memory buffer */ pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr); + psp->tmr_bo = NULL; /* free xgmi shared memory */ psp_ta_free_shared_buf(&psp->xgmi_context.context.mem_context); @@ -743,7 +745,7 @@ static int psp_load_toc(struct psp_context *psp, /* Set up Trusted Memory Region */ static int psp_tmr_init(struct psp_context *psp) { - int ret; + int ret = 0; int tmr_size; void *tmr_buf; void **pptr; @@ -770,10 +772,12 @@ static int psp_tmr_init(struct psp_context *psp) } } - pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; - ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, - AMDGPU_GEM_DOMAIN_VRAM, - &psp->tmr_bo, &psp->tmr_mc_addr, pptr); + if (!psp->tmr_bo) { + pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; + ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->tmr_bo, &psp->tmr_mc_addr, pptr); + } return ret; } @@ -2732,8 +2736,6 @@ static int psp_suspend(void *handle) } out: - psp_free_shared_bufs(psp); - return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 57277b1cf183..b64938ed8cb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -643,9 +643,6 @@ struct amdgpu_ttm_tt { struct task_struct *usertask; uint32_t userflags; bool bound; -#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - struct hmm_range *range; -#endif }; #define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm) @@ -658,7 +655,8 @@ struct amdgpu_ttm_tt { * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only * once afterwards to stop HMM tracking */ -int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, + struct hmm_range **range) { struct ttm_tt *ttm = bo->tbo.ttm; struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); @@ -668,16 +666,15 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) bool readonly; int r = 0; + /* Make sure get_user_pages_done() can cleanup gracefully */ + *range = NULL; + mm = bo->notifier.mm; if (unlikely(!mm)) { DRM_DEBUG_DRIVER("BO is not registered?\n"); return -EFAULT; } - /* Another get_user_pages is running at the same time?? */ - if (WARN_ON(gtt->range)) - return -EFAULT; - if (!mmget_not_zero(mm)) /* Happens during process shutdown */ return -ESRCH; @@ -695,7 +692,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) readonly = amdgpu_ttm_tt_is_readonly(ttm); r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start, - ttm->num_pages, >t->range, readonly, + ttm->num_pages, range, readonly, true, NULL); out_unlock: mmap_read_unlock(mm); @@ -713,30 +710,24 @@ out_unlock: * * Returns: true if pages are still valid */ -bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, + struct hmm_range *range) { struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); - bool r = false; - if (!gtt || !gtt->userptr) + if (!gtt || !gtt->userptr || !range) return false; DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n", gtt->userptr, ttm->num_pages); - WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns, - "No user pages to check\n"); + WARN_ONCE(!range->hmm_pfns, "No user pages to check\n"); - if (gtt->range) { - /* - * FIXME: Must always hold notifier_lock for this, and must - * not ignore the return code. - */ - r = amdgpu_hmm_range_get_pages_done(gtt->range); - gtt->range = NULL; - } - - return !r; + /* + * FIXME: Must always hold notifier_lock for this, and must + * not ignore the return code. + */ + return !amdgpu_hmm_range_get_pages_done(range); } #endif @@ -813,20 +804,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev, /* unmap the pages mapped to the device */ dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0); sg_free_table(ttm->sg); - -#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - if (gtt->range) { - unsigned long i; - - for (i = 0; i < ttm->num_pages; i++) { - if (ttm->pages[i] != - hmm_pfn_to_page(gtt->range->hmm_pfns[i])) - break; - } - - WARN((i == ttm->num_pages), "Missing get_user_page_done\n"); - } -#endif } static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 6a70818039dd..a37207011a69 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -39,6 +39,8 @@ #define AMDGPU_POISON 0xd0bed0be +struct hmm_range; + struct amdgpu_gtt_mgr { struct ttm_resource_manager manager; struct drm_mm mm; @@ -149,15 +151,19 @@ void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) -int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages); -bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, + struct hmm_range **range); +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, + struct hmm_range *range); #else static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, - struct page **pages) + struct page **pages, + struct hmm_range **range) { return -EPERM; } -static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) +static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, + struct hmm_range *range) { return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 0b52af415b28..ce64ca1c6e66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -156,6 +156,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) break; case IP_VERSION(3, 0, 2): fw_name = FIRMWARE_VANGOGH; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; break; case IP_VERSION(3, 0, 16): fw_name = FIRMWARE_DIMGREY_CAVEFISH; diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h index f772bb499f3e..0312c71c3af9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h @@ -32,7 +32,6 @@ #define RB_ENABLED (1 << 0) #define RB4_ENABLED (1 << 1) -#define MMSCH_DOORBELL_OFFSET 0x8 #define MMSCH_VF_ENGINE_STATUS__PASS 0x1 diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 1122bd4eae98..4d780e4430e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -907,13 +907,13 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se /** - * sdma_v4_0_gfx_stop - stop the gfx async dma engines + * sdma_v4_0_gfx_enable - enable the gfx async dma engines * * @adev: amdgpu_device pointer - * - * Stop the gfx async dma ring buffers (VEGA10). + * @enable: enable SDMA RB/IB + * control the gfx async dma ring buffers (VEGA10). */ -static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) +static void sdma_v4_0_gfx_enable(struct amdgpu_device *adev, bool enable) { u32 rb_cntl, ib_cntl; int i; @@ -922,10 +922,10 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, enable ? 1 : 0); WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL); - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, enable ? 1 : 0); WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); } } @@ -1044,7 +1044,7 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) int i; if (!enable) { - sdma_v4_0_gfx_stop(adev); + sdma_v4_0_gfx_enable(adev, enable); sdma_v4_0_rlc_stop(adev); if (adev->sdma.has_page_queue) sdma_v4_0_page_stop(adev); @@ -1960,8 +1960,10 @@ static int sdma_v4_0_suspend(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SMU saves SDMA state for us */ - if (adev->in_s0ix) + if (adev->in_s0ix) { + sdma_v4_0_gfx_enable(adev, false); return 0; + } return sdma_v4_0_hw_fini(adev); } @@ -1971,8 +1973,12 @@ static int sdma_v4_0_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SMU restores SDMA state for us */ - if (adev->in_s0ix) + if (adev->in_s0ix) { + sdma_v4_0_enable(adev, true); + sdma_v4_0_gfx_enable(adev, true); + amdgpu_ttm_set_buffer_funcs_status(adev, true); return 0; + } return sdma_v4_0_hw_init(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 897a5ce9c9da..dcc49b01bd59 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -100,7 +100,6 @@ static int vcn_v4_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; - int vcn_doorbell_index = 0; r = amdgpu_vcn_sw_init(adev); if (r) @@ -112,12 +111,6 @@ static int vcn_v4_0_sw_init(void *handle) if (r) return r; - if (amdgpu_sriov_vf(adev)) { - vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 - MMSCH_DOORBELL_OFFSET; - /* get DWORD offset */ - vcn_doorbell_index = vcn_doorbell_index << 1; - } - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; @@ -135,7 +128,7 @@ static int vcn_v4_0_sw_init(void *handle) ring = &adev->vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; if (amdgpu_sriov_vf(adev)) - ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1; else ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 6925e0280dbe..f4f3d2665a6b 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -5,6 +5,7 @@ menu "Display Engine Configuration" config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y + depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64 select SND_HDA_COMPONENT if SND_HDA_CORE select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128) help @@ -12,6 +13,12 @@ config DRM_AMD_DC support for AMDGPU. This adds required support for Vega and Raven ASICs. + calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64) + architectures built with Clang (all released versions), whereby the stack + frame gets blown up to well over 5k. This would cause an immediate kernel + panic on most architectures. We'll revert this when the following bug report + has been resolved: https://github.com/llvm/llvm-project/issues/41896. + config DRM_AMD_DC_DCN def_bool n help diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3e1ecca72430..512c32327eb1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1372,7 +1372,44 @@ static const struct dmi_system_id hpd_disconnect_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"), }, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower Plus 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF Plus 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro Plus 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro 7010"), + }, + }, {} + /* TODO: refactor this from a fixed table to a dynamic option */ }; static void retrieve_dmi_info(struct amdgpu_display_manager *dm) @@ -6475,7 +6512,7 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, struct drm_connector_state *new_con_state; struct amdgpu_dm_connector *aconnector; struct dm_connector_state *dm_conn_state; - int i, j; + int i, j, ret; int vcpi, pbn_div, pbn, slot_num = 0; for_each_new_connector_in_state(state, connector, new_con_state, i) { @@ -6522,8 +6559,11 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, dm_conn_state->pbn = pbn; dm_conn_state->vcpi_slots = slot_num; - drm_dp_mst_atomic_enable_dsc(state, aconnector->port, dm_conn_state->pbn, - false); + ret = drm_dp_mst_atomic_enable_dsc(state, aconnector->port, + dm_conn_state->pbn, false); + if (ret < 0) + return ret; + continue; } @@ -9537,10 +9577,9 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, #if defined(CONFIG_DRM_AMD_DC_DCN) if (dc_resource_is_dsc_encoding_supported(dc)) { - if (!pre_validate_dsc(state, &dm_state, vars)) { - ret = -EINVAL; + ret = pre_validate_dsc(state, &dm_state, vars); + if (ret != 0) goto fail; - } } #endif @@ -9635,9 +9674,9 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } #if defined(CONFIG_DRM_AMD_DC_DCN) - if (!compute_mst_dsc_configs_for_state(state, dm_state->context, vars)) { + ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); + if (ret) { DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); - ret = -EINVAL; goto fail; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 6ff96b4bdda5..6483ba266893 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -703,13 +703,13 @@ static int bpp_x16_from_pbn(struct dsc_mst_fairness_params param, int pbn) return dsc_config.bits_per_pixel; } -static bool increase_dsc_bpp(struct drm_atomic_state *state, - struct drm_dp_mst_topology_state *mst_state, - struct dc_link *dc_link, - struct dsc_mst_fairness_params *params, - struct dsc_mst_fairness_vars *vars, - int count, - int k) +static int increase_dsc_bpp(struct drm_atomic_state *state, + struct drm_dp_mst_topology_state *mst_state, + struct dc_link *dc_link, + struct dsc_mst_fairness_params *params, + struct dsc_mst_fairness_vars *vars, + int count, + int k) { int i; bool bpp_increased[MAX_PIPES]; @@ -719,6 +719,7 @@ static bool increase_dsc_bpp(struct drm_atomic_state *state, int remaining_to_increase = 0; int link_timeslots_used; int fair_pbn_alloc; + int ret = 0; for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled) { @@ -757,52 +758,60 @@ static bool increase_dsc_bpp(struct drm_atomic_state *state, if (initial_slack[next_index] > fair_pbn_alloc) { vars[next_index].pbn += fair_pbn_alloc; - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; - if (!drm_dp_mst_atomic_check(state)) { + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; + + ret = drm_dp_mst_atomic_check(state); + if (ret == 0) { vars[next_index].bpp_x16 = bpp_x16_from_pbn(params[next_index], vars[next_index].pbn); } else { vars[next_index].pbn -= fair_pbn_alloc; - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; } } else { vars[next_index].pbn += initial_slack[next_index]; - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; - if (!drm_dp_mst_atomic_check(state)) { + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; + + ret = drm_dp_mst_atomic_check(state); + if (ret == 0) { vars[next_index].bpp_x16 = params[next_index].bw_range.max_target_bpp_x16; } else { vars[next_index].pbn -= initial_slack[next_index]; - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; } } bpp_increased[next_index] = true; remaining_to_increase--; } - return true; + return 0; } -static bool try_disable_dsc(struct drm_atomic_state *state, - struct dc_link *dc_link, - struct dsc_mst_fairness_params *params, - struct dsc_mst_fairness_vars *vars, - int count, - int k) +static int try_disable_dsc(struct drm_atomic_state *state, + struct dc_link *dc_link, + struct dsc_mst_fairness_params *params, + struct dsc_mst_fairness_vars *vars, + int count, + int k) { int i; bool tried[MAX_PIPES]; @@ -810,6 +819,7 @@ static bool try_disable_dsc(struct drm_atomic_state *state, int max_kbps_increase; int next_index; int remaining_to_try = 0; + int ret; for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled @@ -840,49 +850,52 @@ static bool try_disable_dsc(struct drm_atomic_state *state, break; vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps); - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; - if (!drm_dp_mst_atomic_check(state)) { + ret = drm_dp_mst_atomic_check(state); + if (ret == 0) { vars[next_index].dsc_enabled = false; vars[next_index].bpp_x16 = 0; } else { vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.max_kbps); - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; } tried[next_index] = true; remaining_to_try--; } - return true; + return 0; } -static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, - struct dc_state *dc_state, - struct dc_link *dc_link, - struct dsc_mst_fairness_vars *vars, - struct drm_dp_mst_topology_mgr *mgr, - int *link_vars_start_index) +static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, + struct dc_state *dc_state, + struct dc_link *dc_link, + struct dsc_mst_fairness_vars *vars, + struct drm_dp_mst_topology_mgr *mgr, + int *link_vars_start_index) { struct dc_stream_state *stream; struct dsc_mst_fairness_params params[MAX_PIPES]; struct amdgpu_dm_connector *aconnector; struct drm_dp_mst_topology_state *mst_state = drm_atomic_get_mst_topology_state(state, mgr); int count = 0; - int i, k; + int i, k, ret; bool debugfs_overwrite = false; memset(params, 0, sizeof(params)); if (IS_ERR(mst_state)) - return false; + return PTR_ERR(mst_state); mst_state->pbn_div = dm_mst_get_pbn_divider(dc_link); #if defined(CONFIG_DRM_AMD_DC_DCN) @@ -933,7 +946,7 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, if (count == 0) { ASSERT(0); - return true; + return 0; } /* k is start index of vars for current phy link used by mst hub */ @@ -947,13 +960,17 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; - if (drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, - vars[i + k].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, + vars[i + k].pbn); + if (ret < 0) + return ret; } - if (!drm_dp_mst_atomic_check(state) && !debugfs_overwrite) { + ret = drm_dp_mst_atomic_check(state); + if (ret == 0 && !debugfs_overwrite) { set_dsc_configs_from_fairness_vars(params, vars, count, k); - return true; + return 0; + } else if (ret != -ENOSPC) { + return ret; } /* Try max compression */ @@ -962,31 +979,36 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps); vars[i + k].dsc_enabled = true; vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16; - if (drm_dp_atomic_find_time_slots(state, params[i].port->mgr, - params[i].port, vars[i + k].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, + params[i].port, vars[i + k].pbn); + if (ret < 0) + return ret; } else { vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; - if (drm_dp_atomic_find_time_slots(state, params[i].port->mgr, - params[i].port, vars[i + k].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, + params[i].port, vars[i + k].pbn); + if (ret < 0) + return ret; } } - if (drm_dp_mst_atomic_check(state)) - return false; + ret = drm_dp_mst_atomic_check(state); + if (ret != 0) + return ret; /* Optimize degree of compression */ - if (!increase_dsc_bpp(state, mst_state, dc_link, params, vars, count, k)) - return false; + ret = increase_dsc_bpp(state, mst_state, dc_link, params, vars, count, k); + if (ret < 0) + return ret; - if (!try_disable_dsc(state, dc_link, params, vars, count, k)) - return false; + ret = try_disable_dsc(state, dc_link, params, vars, count, k); + if (ret < 0) + return ret; set_dsc_configs_from_fairness_vars(params, vars, count, k); - return true; + return 0; } static bool is_dsc_need_re_compute( @@ -1087,15 +1109,17 @@ static bool is_dsc_need_re_compute( return is_dsc_need_re_compute; } -bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, - struct dc_state *dc_state, - struct dsc_mst_fairness_vars *vars) +int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, + struct dc_state *dc_state, + struct dsc_mst_fairness_vars *vars) { int i, j; struct dc_stream_state *stream; bool computed_streams[MAX_PIPES]; struct amdgpu_dm_connector *aconnector; + struct drm_dp_mst_topology_mgr *mst_mgr; int link_vars_start_index = 0; + int ret = 0; for (i = 0; i < dc_state->stream_count; i++) computed_streams[i] = false; @@ -1108,7 +1132,7 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; - if (!aconnector || !aconnector->dc_sink) + if (!aconnector || !aconnector->dc_sink || !aconnector->port) continue; if (!aconnector->dc_sink->dsc_caps.dsc_dec_caps.is_dsc_supported) @@ -1118,19 +1142,16 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, continue; if (dcn20_remove_stream_from_ctx(stream->ctx->dc, dc_state, stream) != DC_OK) - return false; + return -EINVAL; if (!is_dsc_need_re_compute(state, dc_state, stream->link)) continue; - mutex_lock(&aconnector->mst_mgr.lock); - if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, - &aconnector->mst_mgr, - &link_vars_start_index)) { - mutex_unlock(&aconnector->mst_mgr.lock); - return false; - } - mutex_unlock(&aconnector->mst_mgr.lock); + mst_mgr = aconnector->port->mgr; + ret = compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, mst_mgr, + &link_vars_start_index); + if (ret != 0) + return ret; for (j = 0; j < dc_state->stream_count; j++) { if (dc_state->streams[j]->link == stream->link) @@ -1143,22 +1164,23 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, if (stream->timing.flags.DSC == 1) if (dc_stream_add_dsc_to_resource(stream->ctx->dc, dc_state, stream) != DC_OK) - return false; + return -EINVAL; } - return true; + return ret; } -static bool - pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, - struct dc_state *dc_state, - struct dsc_mst_fairness_vars *vars) +static int pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, + struct dc_state *dc_state, + struct dsc_mst_fairness_vars *vars) { int i, j; struct dc_stream_state *stream; bool computed_streams[MAX_PIPES]; struct amdgpu_dm_connector *aconnector; + struct drm_dp_mst_topology_mgr *mst_mgr; int link_vars_start_index = 0; + int ret = 0; for (i = 0; i < dc_state->stream_count; i++) computed_streams[i] = false; @@ -1171,7 +1193,7 @@ static bool aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; - if (!aconnector || !aconnector->dc_sink) + if (!aconnector || !aconnector->dc_sink || !aconnector->port) continue; if (!aconnector->dc_sink->dsc_caps.dsc_dec_caps.is_dsc_supported) @@ -1183,14 +1205,11 @@ static bool if (!is_dsc_need_re_compute(state, dc_state, stream->link)) continue; - mutex_lock(&aconnector->mst_mgr.lock); - if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, - &aconnector->mst_mgr, - &link_vars_start_index)) { - mutex_unlock(&aconnector->mst_mgr.lock); - return false; - } - mutex_unlock(&aconnector->mst_mgr.lock); + mst_mgr = aconnector->port->mgr; + ret = compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, mst_mgr, + &link_vars_start_index); + if (ret != 0) + return ret; for (j = 0; j < dc_state->stream_count; j++) { if (dc_state->streams[j]->link == stream->link) @@ -1198,7 +1217,7 @@ static bool } } - return true; + return ret; } static int find_crtc_index_in_state_by_stream(struct drm_atomic_state *state, @@ -1253,9 +1272,9 @@ static bool is_dsc_precompute_needed(struct drm_atomic_state *state) return ret; } -bool pre_validate_dsc(struct drm_atomic_state *state, - struct dm_atomic_state **dm_state_ptr, - struct dsc_mst_fairness_vars *vars) +int pre_validate_dsc(struct drm_atomic_state *state, + struct dm_atomic_state **dm_state_ptr, + struct dsc_mst_fairness_vars *vars) { int i; struct dm_atomic_state *dm_state; @@ -1264,11 +1283,12 @@ bool pre_validate_dsc(struct drm_atomic_state *state, if (!is_dsc_precompute_needed(state)) { DRM_INFO_ONCE("DSC precompute is not needed.\n"); - return true; + return 0; } - if (dm_atomic_get_state(state, dm_state_ptr)) { + ret = dm_atomic_get_state(state, dm_state_ptr); + if (ret != 0) { DRM_INFO_ONCE("dm_atomic_get_state() failed\n"); - return false; + return ret; } dm_state = *dm_state_ptr; @@ -1280,7 +1300,7 @@ bool pre_validate_dsc(struct drm_atomic_state *state, local_dc_state = kmemdup(dm_state->context, sizeof(struct dc_state), GFP_KERNEL); if (!local_dc_state) - return false; + return -ENOMEM; for (i = 0; i < local_dc_state->stream_count; i++) { struct dc_stream_state *stream = dm_state->context->streams[i]; @@ -1316,9 +1336,9 @@ bool pre_validate_dsc(struct drm_atomic_state *state, if (ret != 0) goto clean_exit; - if (!pre_compute_mst_dsc_configs_for_state(state, local_dc_state, vars)) { + ret = pre_compute_mst_dsc_configs_for_state(state, local_dc_state, vars); + if (ret != 0) { DRM_INFO_ONCE("pre_compute_mst_dsc_configs_for_state() failed\n"); - ret = -EINVAL; goto clean_exit; } @@ -1349,7 +1369,7 @@ clean_exit: kfree(local_dc_state); - return (ret == 0); + return ret; } static unsigned int kbps_from_pbn(unsigned int pbn) @@ -1392,6 +1412,7 @@ enum dc_status dm_dp_mst_is_port_support_mode( unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0; unsigned int max_compressed_bw_in_kbps = 0; struct dc_dsc_bw_range bw_range = {0}; + struct drm_dp_mst_topology_mgr *mst_mgr; /* * check if the mode could be supported if DSC pass-through is supported @@ -1400,7 +1421,8 @@ enum dc_status dm_dp_mst_is_port_support_mode( */ if (is_dsc_common_config_possible(stream, &bw_range) && aconnector->port->passthrough_aux) { - mutex_lock(&aconnector->mst_mgr.lock); + mst_mgr = aconnector->port->mgr; + mutex_lock(&mst_mgr->lock); cur_link_settings = stream->link->verified_link_cap; @@ -1413,7 +1435,7 @@ enum dc_status dm_dp_mst_is_port_support_mode( end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps, down_link_bw_in_kbps); - mutex_unlock(&aconnector->mst_mgr.lock); + mutex_unlock(&mst_mgr->lock); /* * use the maximum dsc compression bandwidth as the required diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index b92a7c5671aa..97fd70df531b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -53,15 +53,15 @@ struct dsc_mst_fairness_vars { struct amdgpu_dm_connector *aconnector; }; -bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, - struct dc_state *dc_state, - struct dsc_mst_fairness_vars *vars); +int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, + struct dc_state *dc_state, + struct dsc_mst_fairness_vars *vars); bool needs_dsc_aux_workaround(struct dc_link *link); -bool pre_validate_dsc(struct drm_atomic_state *state, - struct dm_atomic_state **dm_state_ptr, - struct dsc_mst_fairness_vars *vars); +int pre_validate_dsc(struct drm_atomic_state *state, + struct dm_atomic_state **dm_state_ptr, + struct dsc_mst_fairness_vars *vars); enum dc_status dm_dp_mst_is_port_support_mode( struct amdgpu_dm_connector *aconnector, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c index ef0795b14a1f..2db595672a46 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c @@ -123,9 +123,10 @@ static int dcn314_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uint32_t result; result = dcn314_smu_wait_for_response(clk_mgr, 10, 200000); - ASSERT(result == VBIOSSMC_Result_OK); - smu_print("SMU response after wait: %d\n", result); + if (result != VBIOSSMC_Result_OK) + smu_print("SMU Response was not OK. SMU response after wait received is: %d\n", + result); if (result == VBIOSSMC_Status_BUSY) return -1; @@ -216,6 +217,12 @@ int dcn314_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int request VBIOSSMC_MSG_SetHardMinDcfclkByFreq, khz_to_mhz_ceil(requested_dcfclk_khz)); +#ifdef DBG + smu_print("actual_dcfclk_set_mhz %d is set to : %d\n", + actual_dcfclk_set_mhz, + actual_dcfclk_set_mhz * 1000); +#endif + return actual_dcfclk_set_mhz * 1000; } diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 1b70b78e2fa1..af631085e88c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -359,7 +359,8 @@ static const struct dce_audio_registers audio_regs[] = { audio_regs(2), audio_regs(3), audio_regs(4), - audio_regs(5) + audio_regs(5), + audio_regs(6), }; #define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index b9765b3899e1..ef52e6b6eccf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -436,34 +436,48 @@ void dpp1_set_cursor_position( uint32_t height) { struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); - int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x; - int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y; + int x_pos = pos->x - param->viewport.x; + int y_pos = pos->y - param->viewport.y; + int x_hotspot = pos->x_hotspot; + int y_hotspot = pos->y_hotspot; + int src_x_offset = x_pos - pos->x_hotspot; + int src_y_offset = y_pos - pos->y_hotspot; + int cursor_height = (int)height; + int cursor_width = (int)width; uint32_t cur_en = pos->enable ? 1 : 0; - // Cursor width/height and hotspots need to be rotated for offset calculation + // Transform cursor width / height and hotspots for offset calculations if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) { - swap(width, height); + swap(cursor_height, cursor_width); + swap(x_hotspot, y_hotspot); + if (param->rotation == ROTATION_ANGLE_90) { - src_x_offset = pos->x - pos->y_hotspot - param->viewport.x; - src_y_offset = pos->y - pos->x_hotspot - param->viewport.y; + // hotspot = (-y, x) + src_x_offset = x_pos - (cursor_width - x_hotspot); + src_y_offset = y_pos - y_hotspot; + } else if (param->rotation == ROTATION_ANGLE_270) { + // hotspot = (y, -x) + src_x_offset = x_pos - x_hotspot; + src_y_offset = y_pos - (cursor_height - y_hotspot); } } else if (param->rotation == ROTATION_ANGLE_180) { + // hotspot = (-x, -y) if (!param->mirror) - src_x_offset = pos->x - param->viewport.x; + src_x_offset = x_pos - (cursor_width - x_hotspot); - src_y_offset = pos->y - param->viewport.y; + src_y_offset = y_pos - (cursor_height - y_hotspot); } if (src_x_offset >= (int)param->viewport.width) cur_en = 0; /* not visible beyond right edge*/ - if (src_x_offset + (int)width <= 0) + if (src_x_offset + cursor_width <= 0) cur_en = 0; /* not visible beyond left edge*/ if (src_y_offset >= (int)param->viewport.height) cur_en = 0; /* not visible beyond bottom edge*/ - if (src_y_offset + (int)height <= 0) + if (src_y_offset + cursor_height <= 0) cur_en = 0; /* not visible beyond top edge*/ REG_UPDATE(CURSOR0_CONTROL, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 52e201e9b091..a142a00bc432 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -1179,10 +1179,12 @@ void hubp1_cursor_set_position( const struct dc_cursor_mi_param *param) { struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); - int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x; - int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y; + int x_pos = pos->x - param->viewport.x; + int y_pos = pos->y - param->viewport.y; int x_hotspot = pos->x_hotspot; int y_hotspot = pos->y_hotspot; + int src_x_offset = x_pos - pos->x_hotspot; + int src_y_offset = y_pos - pos->y_hotspot; int cursor_height = (int)hubp->curs_attr.height; int cursor_width = (int)hubp->curs_attr.width; uint32_t dst_x_offset; @@ -1200,18 +1202,26 @@ void hubp1_cursor_set_position( if (hubp->curs_attr.address.quad_part == 0) return; - // Rotated cursor width/height and hotspots tweaks for offset calculation + // Transform cursor width / height and hotspots for offset calculations if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) { swap(cursor_height, cursor_width); + swap(x_hotspot, y_hotspot); + if (param->rotation == ROTATION_ANGLE_90) { - src_x_offset = pos->x - pos->y_hotspot - param->viewport.x; - src_y_offset = pos->y - pos->x_hotspot - param->viewport.y; + // hotspot = (-y, x) + src_x_offset = x_pos - (cursor_width - x_hotspot); + src_y_offset = y_pos - y_hotspot; + } else if (param->rotation == ROTATION_ANGLE_270) { + // hotspot = (y, -x) + src_x_offset = x_pos - x_hotspot; + src_y_offset = y_pos - (cursor_height - y_hotspot); } } else if (param->rotation == ROTATION_ANGLE_180) { + // hotspot = (-x, -y) if (!param->mirror) - src_x_offset = pos->x - param->viewport.x; + src_x_offset = x_pos - (cursor_width - x_hotspot); - src_y_offset = pos->y - param->viewport.y; + src_y_offset = y_pos - (cursor_height - y_hotspot); } dst_x_offset = (src_x_offset >= 0) ? src_x_offset : 0; @@ -1248,8 +1258,8 @@ void hubp1_cursor_set_position( CURSOR_Y_POSITION, pos->y); REG_SET_2(CURSOR_HOT_SPOT, 0, - CURSOR_HOT_SPOT_X, x_hotspot, - CURSOR_HOT_SPOT_Y, y_hotspot); + CURSOR_HOT_SPOT_X, pos->x_hotspot, + CURSOR_HOT_SPOT_Y, pos->y_hotspot); REG_SET(CURSOR_DST_OFFSET, 0, CURSOR_DST_X_OFFSET, dst_x_offset); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index 938dba5249d4..4566bc7abf17 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -973,10 +973,12 @@ void hubp2_cursor_set_position( const struct dc_cursor_mi_param *param) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); - int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x; - int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y; + int x_pos = pos->x - param->viewport.x; + int y_pos = pos->y - param->viewport.y; int x_hotspot = pos->x_hotspot; int y_hotspot = pos->y_hotspot; + int src_x_offset = x_pos - pos->x_hotspot; + int src_y_offset = y_pos - pos->y_hotspot; int cursor_height = (int)hubp->curs_attr.height; int cursor_width = (int)hubp->curs_attr.width; uint32_t dst_x_offset; @@ -994,18 +996,26 @@ void hubp2_cursor_set_position( if (hubp->curs_attr.address.quad_part == 0) return; - // Rotated cursor width/height and hotspots tweaks for offset calculation + // Transform cursor width / height and hotspots for offset calculations if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) { swap(cursor_height, cursor_width); + swap(x_hotspot, y_hotspot); + if (param->rotation == ROTATION_ANGLE_90) { - src_x_offset = pos->x - pos->y_hotspot - param->viewport.x; - src_y_offset = pos->y - pos->x_hotspot - param->viewport.y; + // hotspot = (-y, x) + src_x_offset = x_pos - (cursor_width - x_hotspot); + src_y_offset = y_pos - y_hotspot; + } else if (param->rotation == ROTATION_ANGLE_270) { + // hotspot = (y, -x) + src_x_offset = x_pos - x_hotspot; + src_y_offset = y_pos - (cursor_height - y_hotspot); } } else if (param->rotation == ROTATION_ANGLE_180) { + // hotspot = (-x, -y) if (!param->mirror) - src_x_offset = pos->x - param->viewport.x; + src_x_offset = x_pos - (cursor_width - x_hotspot); - src_y_offset = pos->y - param->viewport.y; + src_y_offset = y_pos - (cursor_height - y_hotspot); } dst_x_offset = (src_x_offset >= 0) ? src_x_offset : 0; @@ -1042,8 +1052,8 @@ void hubp2_cursor_set_position( CURSOR_Y_POSITION, pos->y); REG_SET_2(CURSOR_HOT_SPOT, 0, - CURSOR_HOT_SPOT_X, x_hotspot, - CURSOR_HOT_SPOT_Y, y_hotspot); + CURSOR_HOT_SPOT_X, pos->x_hotspot, + CURSOR_HOT_SPOT_Y, pos->y_hotspot); REG_SET(CURSOR_DST_OFFSET, 0, CURSOR_DST_X_OFFSET, dst_x_offset); @@ -1052,8 +1062,8 @@ void hubp2_cursor_set_position( hubp->pos.cur_ctl.bits.cur_enable = cur_en; hubp->pos.position.bits.x_pos = pos->x; hubp->pos.position.bits.y_pos = pos->y; - hubp->pos.hot_spot.bits.x_hot = x_hotspot; - hubp->pos.hot_spot.bits.y_hot = y_hotspot; + hubp->pos.hot_spot.bits.x_hot = pos->x_hotspot; + hubp->pos.hot_spot.bits.y_hot = pos->y_hotspot; hubp->pos.dst_offset.bits.dst_x_offset = dst_x_offset; /* Cursor Rectangle Cache * Cursor bitmaps have different hotspot values diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c index 1bd7e0f327d8..389a8938ee45 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c @@ -96,6 +96,13 @@ static void dccg314_set_pixel_rate_div( struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); enum pixel_rate_div cur_k1 = PIXEL_RATE_DIV_NA, cur_k2 = PIXEL_RATE_DIV_NA; + // Don't program 0xF into the register field. Not valid since + // K1 / K2 field is only 1 / 2 bits wide + if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) { + BREAK_TO_DEBUGGER(); + return; + } + dccg314_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2); if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA || (k1 == cur_k1 && k2 == cur_k2)) return; diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c index 588c1c71241f..a0741794db62 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c @@ -348,10 +348,8 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); odm_combine_factor = get_odm_config(pipe_ctx, NULL); - if (pipe_ctx->stream->signal == SIGNAL_TYPE_VIRTUAL) - return odm_combine_factor; - if (is_dp_128b_132b_signal(pipe_ctx)) { + *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_1; } else if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) || dc_is_dvi_signal(pipe_ctx->stream->signal)) { *k1_div = PIXEL_RATE_DIV_BY_1; @@ -359,7 +357,7 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig *k2_div = PIXEL_RATE_DIV_BY_2; else *k2_div = PIXEL_RATE_DIV_BY_4; - } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) { + } else if (dc_is_dp_signal(pipe_ctx->stream->signal) || dc_is_virtual_signal(pipe_ctx->stream->signal)) { if (two_pix_per_container) { *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_2; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c index e4daed44ef5f..df4f25119142 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c @@ -96,8 +96,10 @@ static void dccg32_set_pixel_rate_div( // Don't program 0xF into the register field. Not valid since // K1 / K2 field is only 1 / 2 bits wide - if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) + if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) { + BREAK_TO_DEBUGGER(); return; + } dccg32_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2); if (k1 == cur_k1 && k2 == cur_k2) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index ac41a763bb1d..d0b46a3e0155 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -1171,10 +1171,8 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); odm_combine_factor = get_odm_config(pipe_ctx, NULL); - if (pipe_ctx->stream->signal == SIGNAL_TYPE_VIRTUAL) - return odm_combine_factor; - if (is_dp_128b_132b_signal(pipe_ctx)) { + *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_1; } else if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) || dc_is_dvi_signal(pipe_ctx->stream->signal)) { *k1_div = PIXEL_RATE_DIV_BY_1; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index b03a7814e96d..fa3778849db1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -111,7 +111,7 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat mall_alloc_width_blk_aligned = full_vp_width_blk_aligned; /* mall_alloc_height_blk_aligned_l/c = CEILING(sub_vp_height_l/c - 1, blk_height_l/c) + blk_height_l/c */ - mall_alloc_height_blk_aligned = (pipe->stream->timing.v_addressable - 1 + mblk_height - 1) / + mall_alloc_height_blk_aligned = (pipe->plane_res.scl_data.viewport.height - 1 + mblk_height - 1) / mblk_height * mblk_height + mblk_height; /* full_mblk_width_ub_l/c = mall_alloc_width_blk_aligned_l/c; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 12e17bcfca3f..2abe3967f7fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -157,7 +157,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { .dispclk_dppclk_vco_speed_mhz = 4300.0, .do_urgent_latency_adjustment = true, .urgent_latency_adjustment_fabric_clock_component_us = 1.0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, }; void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) @@ -211,7 +211,7 @@ void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */ if (clk_mgr->base.ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) { clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 38; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 50; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; @@ -221,7 +221,7 @@ void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz * 16; - clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38; + clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 50; clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[1].memclk_mhz * 16; clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz * 16; @@ -1910,7 +1910,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] == dm_dram_clock_change_unsupported) { - int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries - 1; + int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels - 1; min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 432b4ecd01a7..f4b176599be7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -126,9 +126,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { .sr_enter_plus_exit_z8_time_us = 320, .writeback_latency_us = 12.0, .round_trip_ping_latency_dcfclk_cycles = 263, - .urgent_latency_pixel_data_only_us = 9.35, - .urgent_latency_pixel_mixed_with_vm_data_us = 9.35, - .urgent_latency_vm_data_only_us = 9.35, + .urgent_latency_pixel_data_only_us = 4, + .urgent_latency_pixel_mixed_with_vm_data_us = 4, + .urgent_latency_vm_data_only_us = 4, .fclk_change_latency_us = 20, .usr_retraining_latency_us = 2, .smn_latency_us = 2, @@ -156,7 +156,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { .dispclk_dppclk_vco_speed_mhz = 4300.0, .do_urgent_latency_adjustment = true, .urgent_latency_adjustment_fabric_clock_component_us = 1.0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, }; static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 630f3395e90a..a0207a8f8756 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -1153,7 +1153,7 @@ struct vba_vars_st { double UrgBurstFactorLumaPre[DC__NUM_DPP__MAX]; double UrgBurstFactorChromaPre[DC__NUM_DPP__MAX]; bool NotUrgentLatencyHidingPre[DC__NUM_DPP__MAX]; - bool LinkCapacitySupport[DC__NUM_DPP__MAX]; + bool LinkCapacitySupport[DC__VOLTAGE_STATES]; bool VREADY_AT_OR_AFTER_VSYNC[DC__NUM_DPP__MAX]; unsigned int MIN_DST_Y_NEXT_START[DC__NUM_DPP__MAX]; unsigned int VFrontPorch[DC__NUM_DPP__MAX]; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h index 25c08f963f49..d6b13933a98f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h @@ -25,10 +25,10 @@ // *** IMPORTANT *** // PMFW TEAM: Always increment the interface version on any change to this file -#define SMU13_DRIVER_IF_VERSION 0x2C +#define SMU13_DRIVER_IF_VERSION 0x35 //Increment this version if SkuTable_t or BoardTable_t change -#define PPTABLE_VERSION 0x20 +#define PPTABLE_VERSION 0x27 #define NUM_GFXCLK_DPM_LEVELS 16 #define NUM_SOCCLK_DPM_LEVELS 8 @@ -96,7 +96,7 @@ #define FEATURE_MEM_TEMP_READ_BIT 47 #define FEATURE_ATHUB_MMHUB_PG_BIT 48 #define FEATURE_SOC_PCC_BIT 49 -#define FEATURE_SPARE_50_BIT 50 +#define FEATURE_EDC_PWRBRK_BIT 50 #define FEATURE_SPARE_51_BIT 51 #define FEATURE_SPARE_52_BIT 52 #define FEATURE_SPARE_53_BIT 53 @@ -282,15 +282,15 @@ typedef enum { } I2cControllerPort_e; typedef enum { - I2C_CONTROLLER_NAME_VR_GFX = 0, - I2C_CONTROLLER_NAME_VR_SOC, - I2C_CONTROLLER_NAME_VR_VMEMP, - I2C_CONTROLLER_NAME_VR_VDDIO, - I2C_CONTROLLER_NAME_LIQUID0, - I2C_CONTROLLER_NAME_LIQUID1, - I2C_CONTROLLER_NAME_PLX, - I2C_CONTROLLER_NAME_OTHER, - I2C_CONTROLLER_NAME_COUNT, + I2C_CONTROLLER_NAME_VR_GFX = 0, + I2C_CONTROLLER_NAME_VR_SOC, + I2C_CONTROLLER_NAME_VR_VMEMP, + I2C_CONTROLLER_NAME_VR_VDDIO, + I2C_CONTROLLER_NAME_LIQUID0, + I2C_CONTROLLER_NAME_LIQUID1, + I2C_CONTROLLER_NAME_PLX, + I2C_CONTROLLER_NAME_FAN_INTAKE, + I2C_CONTROLLER_NAME_COUNT, } I2cControllerName_e; typedef enum { @@ -302,6 +302,7 @@ typedef enum { I2C_CONTROLLER_THROTTLER_LIQUID0, I2C_CONTROLLER_THROTTLER_LIQUID1, I2C_CONTROLLER_THROTTLER_PLX, + I2C_CONTROLLER_THROTTLER_FAN_INTAKE, I2C_CONTROLLER_THROTTLER_INA3221, I2C_CONTROLLER_THROTTLER_COUNT, } I2cControllerThrottler_e; @@ -309,8 +310,9 @@ typedef enum { typedef enum { I2C_CONTROLLER_PROTOCOL_VR_XPDE132G5, I2C_CONTROLLER_PROTOCOL_VR_IR35217, - I2C_CONTROLLER_PROTOCOL_TMP_TMP102A, + I2C_CONTROLLER_PROTOCOL_TMP_MAX31875, I2C_CONTROLLER_PROTOCOL_INA3221, + I2C_CONTROLLER_PROTOCOL_TMP_MAX6604, I2C_CONTROLLER_PROTOCOL_COUNT, } I2cControllerProtocol_e; @@ -690,6 +692,9 @@ typedef struct { #define PP_OD_FEATURE_UCLK_BIT 8 #define PP_OD_FEATURE_ZERO_FAN_BIT 9 #define PP_OD_FEATURE_TEMPERATURE_BIT 10 +#define PP_OD_FEATURE_POWER_FEATURE_CTRL_BIT 11 +#define PP_OD_FEATURE_ASIC_TDC_BIT 12 +#define PP_OD_FEATURE_COUNT 13 typedef enum { PP_OD_POWER_FEATURE_ALWAYS_ENABLED, @@ -697,6 +702,11 @@ typedef enum { PP_OD_POWER_FEATURE_ALWAYS_DISABLED, } PP_OD_POWER_FEATURE_e; +typedef enum { + FAN_MODE_AUTO = 0, + FAN_MODE_MANUAL_LINEAR, +} FanMode_e; + typedef struct { uint32_t FeatureCtrlMask; @@ -708,8 +718,8 @@ typedef struct { uint8_t RuntimePwrSavingFeaturesCtrl; //Frequency changes - int16_t GfxclkFmin; // MHz - int16_t GfxclkFmax; // MHz + int16_t GfxclkFmin; // MHz + int16_t GfxclkFmax; // MHz uint16_t UclkFmin; // MHz uint16_t UclkFmax; // MHz @@ -730,7 +740,12 @@ typedef struct { uint8_t MaxOpTemp; uint8_t Padding[4]; - uint32_t Spare[12]; + uint16_t GfxVoltageFullCtrlMode; + uint16_t GfxclkFullCtrlMode; + uint16_t UclkFullCtrlMode; + int16_t AsicTdc; + + uint32_t Spare[10]; uint32_t MmHubPadding[8]; // SMU internal use. Adding here instead of external as a workaround } OverDriveTable_t; @@ -748,8 +763,8 @@ typedef struct { uint8_t IdlePwrSavingFeaturesCtrl; uint8_t RuntimePwrSavingFeaturesCtrl; - uint16_t GfxclkFmin; // MHz - uint16_t GfxclkFmax; // MHz + int16_t GfxclkFmin; // MHz + int16_t GfxclkFmax; // MHz uint16_t UclkFmin; // MHz uint16_t UclkFmax; // MHz @@ -769,7 +784,12 @@ typedef struct { uint8_t MaxOpTemp; uint8_t Padding[4]; - uint32_t Spare[12]; + uint16_t GfxVoltageFullCtrlMode; + uint16_t GfxclkFullCtrlMode; + uint16_t UclkFullCtrlMode; + int16_t AsicTdc; + + uint32_t Spare[10]; } OverDriveLimits_t; @@ -903,7 +923,8 @@ typedef struct { uint16_t FanStartTempMin; uint16_t FanStartTempMax; - uint32_t Spare[12]; + uint16_t PowerMinPpt0[POWER_SOURCE_COUNT]; + uint32_t Spare[11]; } MsgLimits_t; @@ -1086,11 +1107,13 @@ typedef struct { uint32_t GfxoffSpare[15]; // GFX GPO - float DfllBtcMasterScalerM; + uint32_t DfllBtcMasterScalerM; int32_t DfllBtcMasterScalerB; - float DfllBtcSlaveScalerM; + uint32_t DfllBtcSlaveScalerM; int32_t DfllBtcSlaveScalerB; - uint32_t GfxGpoSpare[12]; + uint32_t DfllPccAsWaitCtrl; //GDFLL_AS_WAIT_CTRL_PCC register value to be passed to RLC msg + uint32_t DfllPccAsStepCtrl; //GDFLL_AS_STEP_CTRL_PCC register value to be passed to RLC msg + uint32_t GfxGpoSpare[10]; // GFX DCS @@ -1106,7 +1129,10 @@ typedef struct { uint16_t DcsTimeout; //This is the amount of time SMU FW waits for RLC to put GFX into GFXOFF before reverting to the fallback mechanism of throttling GFXCLK to Fmin. - uint32_t DcsSpare[16]; + uint32_t DcsSpare[14]; + + // UCLK section + uint16_t ShadowFreqTableUclk[NUM_UCLK_DPM_LEVELS]; // In MHz // UCLK section uint8_t UseStrobeModeOptimizations; //Set to indicate that FW should use strobe mode optimizations @@ -1163,13 +1189,14 @@ typedef struct { uint16_t IntakeTempHighIntakeAcousticLimit; uint16_t IntakeTempAcouticLimitReleaseRate; - uint16_t FanStalledTempLimitOffset; + int16_t FanAbnormalTempLimitOffset; uint16_t FanStalledTriggerRpm; - uint16_t FanAbnormalTriggerRpm; - uint16_t FanPadding; - - uint32_t FanSpare[14]; + uint16_t FanAbnormalTriggerRpmCoeff; + uint16_t FanAbnormalDetectionEnable; + uint8_t FanIntakeSensorSupport; + uint8_t FanIntakePadding[3]; + uint32_t FanSpare[13]; // SECTION: VDD_GFX AVFS uint8_t OverrideGfxAvfsFuses; @@ -1193,7 +1220,6 @@ typedef struct { uint32_t dGbV_dT_vmin; uint32_t dGbV_dT_vmax; - //Unused: PMFW-9370 uint32_t V2F_vmin_range_low; uint32_t V2F_vmin_range_high; uint32_t V2F_vmax_range_low; @@ -1238,8 +1264,21 @@ typedef struct { // SECTION: Advanced Options uint32_t DebugOverrides; + // Section: Total Board Power idle vs active coefficients + uint8_t TotalBoardPowerSupport; + uint8_t TotalBoardPowerPadding[3]; + + int16_t TotalIdleBoardPowerM; + int16_t TotalIdleBoardPowerB; + int16_t TotalBoardPowerM; + int16_t TotalBoardPowerB; + + QuadraticInt_t qFeffCoeffGameClock[POWER_SOURCE_COUNT]; + QuadraticInt_t qFeffCoeffBaseClock[POWER_SOURCE_COUNT]; + QuadraticInt_t qFeffCoeffBoostClock[POWER_SOURCE_COUNT]; + // SECTION: Sku Reserved - uint32_t Spare[64]; + uint32_t Spare[43]; // Padding for MMHUB - do not modify this uint32_t MmHubPadding[8]; @@ -1304,7 +1343,8 @@ typedef struct { // SECTION: Clock Spread Spectrum // UCLK Spread Spectrum - uint16_t UclkSpreadPadding; + uint8_t UclkTrainingModeSpreadPercent; // Q4.4 + uint8_t UclkSpreadPadding; uint16_t UclkSpreadFreq; // kHz // UCLK Spread Spectrum @@ -1317,11 +1357,7 @@ typedef struct { // Section: Memory Config uint8_t DramWidth; // Width of interface to the channel for each DRAM module. See DRAM_BIT_WIDTH_TYPE_e - uint8_t PaddingMem1[3]; - - // Section: Total Board Power - uint16_t TotalBoardPower; //Only needed for TCP Estimated case, where TCP = TGP+Total Board Power - uint16_t BoardPowerPadding; + uint8_t PaddingMem1[7]; // SECTION: UMC feature flags uint8_t HsrEnabled; @@ -1423,8 +1459,11 @@ typedef struct { uint16_t Vcn1ActivityPercentage ; uint32_t EnergyAccumulator; - uint16_t AverageSocketPower ; + uint16_t AverageSocketPower; + uint16_t AverageTotalBoardPower; + uint16_t AvgTemperature[TEMP_COUNT]; + uint16_t AvgTemperatureFanIntake; uint8_t PcieRate ; uint8_t PcieWidth ; @@ -1592,5 +1631,7 @@ typedef struct { #define IH_INTERRUPT_CONTEXT_ID_AUDIO_D0 0x5 #define IH_INTERRUPT_CONTEXT_ID_AUDIO_D3 0x6 #define IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING 0x7 +#define IH_INTERRUPT_CONTEXT_ID_FAN_ABNORMAL 0x8 +#define IH_INTERRUPT_CONTEXT_ID_FAN_RECOVERY 0x9 #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index b7f4569aff2a..865d6358918d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -31,7 +31,7 @@ #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x35 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_10 0x1D #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 40d8ca37f5bc..aa51c61a78c7 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -2720,6 +2720,9 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge, * if supported. In any case the default RGB888 format is added */ + /* Default 8bit RGB fallback */ + output_fmts[i++] = MEDIA_BUS_FMT_RGB888_1X24; + if (max_bpc >= 16 && info->bpc == 16) { if (info->color_formats & DRM_COLOR_FORMAT_YCBCR444) output_fmts[i++] = MEDIA_BUS_FMT_YUV16_1X48; @@ -2753,9 +2756,6 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge, if (info->color_formats & DRM_COLOR_FORMAT_YCBCR444) output_fmts[i++] = MEDIA_BUS_FMT_YUV8_1X24; - /* Default 8bit RGB fallback */ - output_fmts[i++] = MEDIA_BUS_FMT_RGB888_1X24; - *num_output_fmts = i; return output_fmts; diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 3c3561942eb6..eb24322df721 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -931,9 +931,9 @@ static void ti_sn_bridge_set_video_timings(struct ti_sn65dsi86 *pdata) &pdata->bridge.encoder->crtc->state->adjusted_mode; u8 hsync_polarity = 0, vsync_polarity = 0; - if (mode->flags & DRM_MODE_FLAG_PHSYNC) + if (mode->flags & DRM_MODE_FLAG_NHSYNC) hsync_polarity = CHA_HSYNC_POLARITY; - if (mode->flags & DRM_MODE_FLAG_PVSYNC) + if (mode->flags & DRM_MODE_FLAG_NVSYNC) vsync_polarity = CHA_VSYNC_POLARITY; ti_sn65dsi86_write_u16(pdata, SN_CHA_ACTIVE_LINE_LENGTH_LOW_REG, diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index ecd22c038c8c..51a46689cda7 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -5186,7 +5186,7 @@ int drm_dp_mst_add_affected_dsc_crtcs(struct drm_atomic_state *state, struct drm mst_state = drm_atomic_get_mst_topology_state(state, mgr); if (IS_ERR(mst_state)) - return -EINVAL; + return PTR_ERR(mst_state); list_for_each_entry(pos, &mst_state->payloads, next) { diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 35138f8a375c..b602cd72a120 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -571,12 +571,20 @@ static void drm_gem_shmem_vm_open(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); - int ret; WARN_ON(shmem->base.import_attach); - ret = drm_gem_shmem_get_pages(shmem); - WARN_ON_ONCE(ret != 0); + mutex_lock(&shmem->pages_lock); + + /* + * We should have already pinned the pages when the buffer was first + * mmap'd, vm_open() just grabs an additional reference for the new + * mm the vma is getting copied into (ie. on fork()). + */ + if (!WARN_ON_ONCE(!shmem->pages_use_count)) + shmem->pages_use_count++; + + mutex_unlock(&shmem->pages_lock); drm_gem_vm_open(vma); } @@ -622,10 +630,8 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct } ret = drm_gem_shmem_get_pages(shmem); - if (ret) { - drm_gem_vm_close(vma); + if (ret) return ret; - } vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 461c62c88413..de77054195c6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3723,12 +3723,16 @@ out: static u8 bigjoiner_pipes(struct drm_i915_private *i915) { + u8 pipes; + if (DISPLAY_VER(i915) >= 12) - return BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D); + pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D); else if (DISPLAY_VER(i915) >= 11) - return BIT(PIPE_B) | BIT(PIPE_C); + pipes = BIT(PIPE_B) | BIT(PIPE_C); else - return 0; + pipes = 0; + + return pipes & RUNTIME_INFO(i915)->pipe_mask; } static bool transcoder_ddi_func_is_enabled(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 1e608b9e5055..1a63da28f330 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -2434,7 +2434,7 @@ intel_display_power_ddi_io_domain(struct drm_i915_private *i915, enum port port) { const struct intel_ddi_port_domains *domains = intel_port_domains_for_port(i915, port); - if (drm_WARN_ON(&i915->drm, !domains) || domains->ddi_io == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->ddi_io == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_PORT_DDI_IO_A; return domains->ddi_io + (int)(port - domains->port_start); @@ -2445,7 +2445,7 @@ intel_display_power_ddi_lanes_domain(struct drm_i915_private *i915, enum port po { const struct intel_ddi_port_domains *domains = intel_port_domains_for_port(i915, port); - if (drm_WARN_ON(&i915->drm, !domains) || domains->ddi_lanes == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->ddi_lanes == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_PORT_DDI_LANES_A; return domains->ddi_lanes + (int)(port - domains->port_start); @@ -2471,7 +2471,7 @@ intel_display_power_legacy_aux_domain(struct drm_i915_private *i915, enum aux_ch { const struct intel_ddi_port_domains *domains = intel_port_domains_for_aux_ch(i915, aux_ch); - if (drm_WARN_ON(&i915->drm, !domains) || domains->aux_legacy_usbc == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->aux_legacy_usbc == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_AUX_A; return domains->aux_legacy_usbc + (int)(aux_ch - domains->aux_ch_start); @@ -2482,7 +2482,7 @@ intel_display_power_tbt_aux_domain(struct drm_i915_private *i915, enum aux_ch au { const struct intel_ddi_port_domains *domains = intel_port_domains_for_aux_ch(i915, aux_ch); - if (drm_WARN_ON(&i915->drm, !domains) || domains->aux_tbt == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->aux_tbt == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_AUX_TBT1; return domains->aux_tbt + (int)(aux_ch - domains->aux_ch_start); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 3d4305eea1aa..0d6d640225fc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -612,6 +612,10 @@ static int i915_ttm_truncate(struct drm_i915_gem_object *obj) WARN_ON_ONCE(obj->mm.madv == I915_MADV_WILLNEED); + err = ttm_bo_wait(bo, true, false); + if (err) + return err; + err = i915_ttm_move_notify(bo); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index d0b03a928b9a..7caa3412a244 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -625,8 +625,13 @@ int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) return -EINTR; } - return timeout ? timeout : intel_uc_wait_for_idle(>->uc, - remaining_timeout); + if (timeout) + return timeout; + + if (remaining_timeout < 0) + remaining_timeout = 0; + + return intel_uc_wait_for_idle(>->uc, remaining_timeout); } int intel_gt_init(struct intel_gt *gt) @@ -1017,6 +1022,11 @@ static void mmio_invalidate_full(struct intel_gt *gt) if (!i915_mmio_reg_offset(rb.reg)) continue; + if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS || + engine->class == VIDEO_ENHANCEMENT_CLASS || + engine->class == COMPUTE_CLASS)) + rb.bit = _MASKED_BIT_ENABLE(rb.bit); + intel_uncore_write_fw(uncore, rb.reg, rb.bit); awake |= engine->mask; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index edb881d75630..1dfd01668c79 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -199,7 +199,7 @@ out_active: spin_lock(&timelines->lock); if (remaining_timeout) *remaining_timeout = timeout; - return active_count ? timeout : 0; + return active_count ? timeout ?: -ETIME : 0; } static void retire_work_handler(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 7a45e5360caf..714221f9a131 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -664,8 +664,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) return -ESRCH; } - kvm_get_kvm(vgpu->vfio_device.kvm); - if (__kvmgt_vgpu_exist(vgpu)) return -EEXIST; @@ -676,6 +674,7 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) vgpu->track_node.track_write = kvmgt_page_track_write; vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot; + kvm_get_kvm(vgpu->vfio_device.kvm); kvm_page_track_register_notifier(vgpu->vfio_device.kvm, &vgpu->track_node); @@ -715,15 +714,14 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev) kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm, &vgpu->track_node); + kvm_put_kvm(vgpu->vfio_device.kvm); + kvmgt_protect_table_destroy(vgpu); gvt_cache_destroy(vgpu); intel_vgpu_release_msi_eventfd_ctx(vgpu); vgpu->attached = false; - - if (vgpu->vfio_device.kvm) - kvm_put_kvm(vgpu->vfio_device.kvm); } static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) diff --git a/drivers/gpu/drm/i915/intel_dram.c b/drivers/gpu/drm/i915/intel_dram.c index 2403ccd52c74..bba8cb6e8ae4 100644 --- a/drivers/gpu/drm/i915/intel_dram.c +++ b/drivers/gpu/drm/i915/intel_dram.c @@ -471,8 +471,7 @@ static int xelpdp_get_dram_info(struct drm_i915_private *i915) u32 val = intel_uncore_read(&i915->uncore, MTL_MEM_SS_INFO_GLOBAL); struct dram_info *dram_info = &i915->dram_info; - val = REG_FIELD_GET(MTL_DDR_TYPE_MASK, val); - switch (val) { + switch (REG_FIELD_GET(MTL_DDR_TYPE_MASK, val)) { case 0: dram_info->type = INTEL_DRAM_DDR4; break; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c index 089046fa21be..50fa3df0bc0c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c @@ -1085,21 +1085,21 @@ int vmw_mksstat_add_ioctl(struct drm_device *dev, void *data, reset_ppn_array(pdesc->strsPPNs, ARRAY_SIZE(pdesc->strsPPNs)); /* Pin mksGuestStat user pages and store those in the instance descriptor */ - nr_pinned_stat = pin_user_pages(arg->stat, num_pages_stat, FOLL_LONGTERM, pages_stat, NULL); + nr_pinned_stat = pin_user_pages_fast(arg->stat, num_pages_stat, FOLL_LONGTERM, pages_stat); if (num_pages_stat != nr_pinned_stat) goto err_pin_stat; for (i = 0; i < num_pages_stat; ++i) pdesc->statPPNs[i] = page_to_pfn(pages_stat[i]); - nr_pinned_info = pin_user_pages(arg->info, num_pages_info, FOLL_LONGTERM, pages_info, NULL); + nr_pinned_info = pin_user_pages_fast(arg->info, num_pages_info, FOLL_LONGTERM, pages_info); if (num_pages_info != nr_pinned_info) goto err_pin_info; for (i = 0; i < num_pages_info; ++i) pdesc->infoPPNs[i] = page_to_pfn(pages_info[i]); - nr_pinned_strs = pin_user_pages(arg->strs, num_pages_strs, FOLL_LONGTERM, pages_strs, NULL); + nr_pinned_strs = pin_user_pages_fast(arg->strs, num_pages_strs, FOLL_LONGTERM, pages_strs); if (num_pages_strs != nr_pinned_strs) goto err_pin_strs; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index ecd3c2fc978b..9c79873f62f0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -949,6 +949,10 @@ int vmw_kms_sou_init_display(struct vmw_private *dev_priv) struct drm_device *dev = &dev_priv->drm; int i, ret; + /* Screen objects won't work if GMR's aren't available */ + if (!dev_priv->has_gmr) + return -ENOSYS; + if (!(dev_priv->capabilities & SVGA_CAP_SCREEN_OBJECT_2)) { return -ENOSYS; } diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 9c1d31f63f85..bd47628da6be 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1315,6 +1315,9 @@ static s32 snto32(__u32 value, unsigned n) if (!value || !n) return 0; + if (n > 32) + n = 32; + switch (n) { case 8: return ((__s8)value); case 16: return ((__s16)value); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index dad953f66996..8f58c3c1bec3 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -274,6 +274,7 @@ #define USB_DEVICE_ID_CH_AXIS_295 0x001c #define USB_VENDOR_ID_CHERRY 0x046a +#define USB_DEVICE_ID_CHERRY_MOUSE_000C 0x000c #define USB_DEVICE_ID_CHERRY_CYMOTION 0x0023 #define USB_DEVICE_ID_CHERRY_CYMOTION_SOLAR 0x0027 @@ -917,6 +918,7 @@ #define USB_DEVICE_ID_MS_XBOX_ONE_S_CONTROLLER 0x02fd #define USB_DEVICE_ID_MS_PIXART_MOUSE 0x00cb #define USB_DEVICE_ID_8BITDO_SN30_PRO_PLUS 0x02e0 +#define USB_DEVICE_ID_MS_MOUSE_0783 0x0783 #define USB_VENDOR_ID_MOJO 0x8282 #define USB_DEVICE_ID_RETRO_ADAPTER 0x3201 @@ -1215,6 +1217,7 @@ #define USB_DEVICE_ID_SYNAPTICS_DELL_K15A 0x6e21 #define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1002 0x73f4 #define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003 0x73f5 +#define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_017 0x73f6 #define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5 0x81a7 #define USB_VENDOR_ID_TEXAS_INSTRUMENTS 0x2047 @@ -1381,6 +1384,7 @@ #define USB_VENDOR_ID_PRIMAX 0x0461 #define USB_DEVICE_ID_PRIMAX_MOUSE_4D22 0x4d22 +#define USB_DEVICE_ID_PRIMAX_MOUSE_4E2A 0x4e2a #define USB_DEVICE_ID_PRIMAX_KEYBOARD 0x4e05 #define USB_DEVICE_ID_PRIMAX_REZEL 0x4e72 #define USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D0F 0x4d0f diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c index 430fa4f52ed3..75ebfcf31889 100644 --- a/drivers/hid/hid-ite.c +++ b/drivers/hid/hid-ite.c @@ -121,6 +121,11 @@ static const struct hid_device_id ite_devices[] = { USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003), .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT }, + /* ITE8910 USB kbd ctlr, with Synaptics touchpad connected to it. */ + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_SYNAPTICS, + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_017), + .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT }, { } }; MODULE_DEVICE_TABLE(hid, ite_devices); diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c index 5e6a0cef2a06..e3fcf1353fb3 100644 --- a/drivers/hid/hid-lg4ff.c +++ b/drivers/hid/hid-lg4ff.c @@ -872,6 +872,12 @@ static ssize_t lg4ff_alternate_modes_store(struct device *dev, struct device_att return -ENOMEM; i = strlen(lbuf); + + if (i == 0) { + kfree(lbuf); + return -EINVAL; + } + if (lbuf[i-1] == '\n') { if (i == 1) { kfree(lbuf); diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 71a9c258a20b..8a2aac18dcc5 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4269,21 +4269,6 @@ static void hidpp_remove(struct hid_device *hdev) mutex_destroy(&hidpp->send_mutex); } -static const struct hid_device_id unhandled_hidpp_devices[] = { - /* Logitech Harmony Adapter for PS3, handled in hid-sony */ - { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_PS3) }, - /* Handled in hid-generic */ - { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD) }, - {} -}; - -static bool hidpp_match(struct hid_device *hdev, - bool ignore_special_driver) -{ - /* Refuse to handle devices handled by other HID drivers */ - return !hid_match_id(hdev, unhandled_hidpp_devices); -} - #define LDJ_DEVICE(product) \ HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE, \ USB_VENDOR_ID_LOGITECH, (product)) @@ -4367,9 +4352,15 @@ static const struct hid_device_id hidpp_devices[] = { { /* MX5500 keyboard over Bluetooth */ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb30b), .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS }, - - { /* And try to enable HID++ for all the Logitech Bluetooth devices */ - HID_DEVICE(BUS_BLUETOOTH, HID_GROUP_ANY, USB_VENDOR_ID_LOGITECH, HID_ANY_ID) }, + { /* M-RCQ142 V470 Cordless Laser Mouse over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb008) }, + { /* MX Master mouse over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb012) }, + { /* MX Ergo trackball over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01d) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01e) }, + { /* MX Master 3 mouse over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb023) }, {} }; @@ -4383,7 +4374,6 @@ static const struct hid_usage_id hidpp_usages[] = { static struct hid_driver hidpp_driver = { .name = "logitech-hidpp-device", .id_table = hidpp_devices, - .match = hidpp_match, .report_fixup = hidpp_report_fixup, .probe = hidpp_probe, .remove = hidpp_remove, diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 50e1c717fc0a..0e9702c7f7d6 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -54,6 +54,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FLIGHT_SIM_YOKE), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_PEDALS), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_THROTTLE), HID_QUIRK_NOGET }, + { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_MOUSE_000C), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE), HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB), HID_QUIRK_NO_INIT_REPORTS }, @@ -122,6 +123,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C05A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C06A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MCS, USB_DEVICE_ID_MCS_GAMEPADBLOCK), HID_QUIRK_MULTI_INPUT }, + { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_MOUSE_0783), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PIXART_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE3_COVER), HID_QUIRK_NO_INIT_REPORTS }, @@ -146,6 +148,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4D22), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4E2A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D0F), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D65), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4E22), HID_QUIRK_ALWAYS_POLL }, diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c index 0fbc408c2607..7fa6fe04f1b2 100644 --- a/drivers/hid/hid-uclogic-core.c +++ b/drivers/hid/hid-uclogic-core.c @@ -192,6 +192,7 @@ static int uclogic_probe(struct hid_device *hdev, * than the pen, so use QUIRK_MULTI_INPUT for all tablets. */ hdev->quirks |= HID_QUIRK_MULTI_INPUT; + hdev->quirks |= HID_QUIRK_HIDINPUT_FORCE; /* Allocate and assign driver data */ drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL); diff --git a/drivers/hid/hid-uclogic-rdesc.c b/drivers/hid/hid-uclogic-rdesc.c index 4bd54c4fb5b0..6b73eb0df6bd 100644 --- a/drivers/hid/hid-uclogic-rdesc.c +++ b/drivers/hid/hid-uclogic-rdesc.c @@ -1193,7 +1193,7 @@ __u8 *uclogic_rdesc_template_apply(const __u8 *template_ptr, p[sizeof(btn_head)] < param_num) { v = param_list[p[sizeof(btn_head)]]; put_unaligned((__u8)0x2A, p); /* Usage Maximum */ - put_unaligned_le16((__force u16)cpu_to_le16(v), p + 1); + put_unaligned((__force u16)cpu_to_le16(v), (s16 *)(p + 1)); p += sizeof(btn_head) + 1; } else { p++; diff --git a/drivers/hid/i2c-hid/Kconfig b/drivers/hid/i2c-hid/Kconfig index 5273ee2bb134..d65abe65ce73 100644 --- a/drivers/hid/i2c-hid/Kconfig +++ b/drivers/hid/i2c-hid/Kconfig @@ -66,6 +66,6 @@ endmenu config I2C_HID_CORE tristate - default y if I2C_HID_ACPI=y || I2C_HID_OF=y || I2C_HID_OF_GOODIX=y - default m if I2C_HID_ACPI=m || I2C_HID_OF=m || I2C_HID_OF_GOODIX=m + default y if I2C_HID_ACPI=y || I2C_HID_OF=y || I2C_HID_OF_ELAN=y || I2C_HID_OF_GOODIX=y + default m if I2C_HID_ACPI=m || I2C_HID_OF=m || I2C_HID_OF_ELAN=m || I2C_HID_OF_GOODIX=m select HID diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 5b120402d405..cc23b90cae02 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -533,13 +533,17 @@ static void vmbus_add_channel_work(struct work_struct *work) * Add the new device to the bus. This will kick off device-driver * binding which eventually invokes the device driver's AddDevice() * method. + * + * If vmbus_device_register() fails, the 'device_obj' is freed in + * vmbus_device_release() as called by device_unregister() in the + * error path of vmbus_device_register(). In the outside error + * path, there's no need to free it. */ ret = vmbus_device_register(newchannel->device_obj); if (ret != 0) { pr_err("unable to add child device object (relid %d)\n", newchannel->offermsg.child_relid); - kfree(newchannel->device_obj); goto err_deq_chan; } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 8b2e413bf19c..e592c481f7ae 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2082,6 +2082,7 @@ int vmbus_device_register(struct hv_device *child_device_obj) ret = device_register(&child_device_obj->device); if (ret) { pr_err("Unable to register child device\n"); + put_device(&child_device_obj->device); return ret; } diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c index 81e688975c6a..a901e4e33d81 100644 --- a/drivers/hwmon/asus-ec-sensors.c +++ b/drivers/hwmon/asus-ec-sensors.c @@ -938,6 +938,8 @@ static int asus_ec_probe(struct platform_device *pdev) ec_data->nr_sensors = hweight_long(ec_data->board_info->sensors); ec_data->sensors = devm_kcalloc(dev, ec_data->nr_sensors, sizeof(struct ec_sensor), GFP_KERNEL); + if (!ec_data->sensors) + return -ENOMEM; status = setup_lock_data(dev); if (status) { diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 8bf32c6c85d9..9bee4d33fbdf 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -242,10 +242,13 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) */ if (host_bridge && host_bridge->vendor == PCI_VENDOR_ID_INTEL) { for (i = 0; i < ARRAY_SIZE(tjmax_pci_table); i++) { - if (host_bridge->device == tjmax_pci_table[i].device) + if (host_bridge->device == tjmax_pci_table[i].device) { + pci_dev_put(host_bridge); return tjmax_pci_table[i].tjmax; + } } } + pci_dev_put(host_bridge); for (i = 0; i < ARRAY_SIZE(tjmax_table); i++) { if (strstr(c->x86_model_id, tjmax_table[i].id)) @@ -533,6 +536,10 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx) { struct temp_data *tdata = pdata->core_data[indx]; + /* if we errored on add then this is already gone */ + if (!tdata) + return; + /* Remove the sysfs attributes */ sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata->attr_group); diff --git a/drivers/hwmon/i5500_temp.c b/drivers/hwmon/i5500_temp.c index 05f68e9c9477..23b9f94fe0a9 100644 --- a/drivers/hwmon/i5500_temp.c +++ b/drivers/hwmon/i5500_temp.c @@ -117,7 +117,7 @@ static int i5500_temp_probe(struct pci_dev *pdev, u32 tstimer; s8 tsfsc; - err = pci_enable_device(pdev); + err = pcim_enable_device(pdev); if (err) { dev_err(&pdev->dev, "Failed to enable device\n"); return err; diff --git a/drivers/hwmon/ibmpex.c b/drivers/hwmon/ibmpex.c index f6ec165c0fa8..1837cccd993c 100644 --- a/drivers/hwmon/ibmpex.c +++ b/drivers/hwmon/ibmpex.c @@ -502,6 +502,7 @@ static void ibmpex_register_bmc(int iface, struct device *dev) return; out_register: + list_del(&data->list); hwmon_device_unregister(data->hwmon_dev); out_user: ipmi_destroy_user(data->user); diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c index 2a57f4b60c29..e06186986444 100644 --- a/drivers/hwmon/ina3221.c +++ b/drivers/hwmon/ina3221.c @@ -228,7 +228,7 @@ static int ina3221_read_value(struct ina3221_data *ina, unsigned int reg, * Shunt Voltage Sum register has 14-bit value with 1-bit shift * Other Shunt Voltage registers have 12 bits with 3-bit shift */ - if (reg == INA3221_SHUNT_SUM) + if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM) *val = sign_extend32(regval >> 1, 14); else *val = sign_extend32(regval >> 3, 12); @@ -465,7 +465,7 @@ static int ina3221_write_curr(struct device *dev, u32 attr, * SHUNT_SUM: (1 / 40uV) << 1 = 1 / 20uV * SHUNT[1-3]: (1 / 40uV) << 3 = 1 / 5uV */ - if (reg == INA3221_SHUNT_SUM) + if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM) regval = DIV_ROUND_CLOSEST(voltage_uv, 20) & 0xfffe; else regval = DIV_ROUND_CLOSEST(voltage_uv, 5) & 0xfff8; diff --git a/drivers/hwmon/ltc2947-core.c b/drivers/hwmon/ltc2947-core.c index 7404e974762f..2dbbbac9de09 100644 --- a/drivers/hwmon/ltc2947-core.c +++ b/drivers/hwmon/ltc2947-core.c @@ -396,7 +396,7 @@ static int ltc2947_read_temp(struct device *dev, const u32 attr, long *val, return ret; /* in milidegrees celcius, temp is given by: */ - *val = (__val * 204) + 550; + *val = (__val * 204) + 5500; return 0; } diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index fe0cd205502d..f58943cb1341 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -852,7 +852,8 @@ static int cdns_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, CDNS_I2C_POLL_US, CDNS_I2C_TIMEOUT_US); if (ret) { ret = -EAGAIN; - i2c_recover_bus(adap); + if (id->adap.bus_recovery_info) + i2c_recover_bus(adap); goto out; } @@ -1263,8 +1264,13 @@ static int cdns_i2c_probe(struct platform_device *pdev) id->rinfo.pinctrl = devm_pinctrl_get(&pdev->dev); if (IS_ERR(id->rinfo.pinctrl)) { + int err = PTR_ERR(id->rinfo.pinctrl); + dev_info(&pdev->dev, "can't get pinctrl, bus recovery not supported\n"); - return PTR_ERR(id->rinfo.pinctrl); + if (err != -ENODEV) + return err; + } else { + id->adap.bus_recovery_info = &id->rinfo; } id->membase = devm_platform_get_and_ioremap_resource(pdev, 0, &r_mem); @@ -1283,7 +1289,6 @@ static int cdns_i2c_probe(struct platform_device *pdev) id->adap.retries = 3; /* Default retry value. */ id->adap.algo_data = id; id->adap.dev.parent = &pdev->dev; - id->adap.bus_recovery_info = &id->rinfo; init_completion(&id->xfer_done); snprintf(id->adap.name, sizeof(id->adap.name), "Cadence I2C at %08lx", (unsigned long)r_mem->start); diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 3082183bd66a..fc70920c4dda 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1132,7 +1132,8 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, int i, result; unsigned int temp; int block_data = msgs->flags & I2C_M_RECV_LEN; - int use_dma = i2c_imx->dma && msgs->len >= DMA_THRESHOLD && !block_data; + int use_dma = i2c_imx->dma && msgs->flags & I2C_M_DMA_SAFE && + msgs->len >= DMA_THRESHOLD && !block_data; dev_dbg(&i2c_imx->adapter.dev, "<%s> write slave address: addr=0x%x\n", @@ -1298,7 +1299,8 @@ static int i2c_imx_xfer_common(struct i2c_adapter *adapter, result = i2c_imx_read(i2c_imx, &msgs[i], is_lastmsg, atomic); } else { if (!atomic && - i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD) + i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD && + msgs[i].flags & I2C_M_DMA_SAFE) result = i2c_imx_dma_write(i2c_imx, &msgs[i]); else result = i2c_imx_write(i2c_imx, &msgs[i], atomic); diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index 0c365b57d957..83457359ec45 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -2393,8 +2393,17 @@ static struct platform_driver npcm_i2c_bus_driver = { static int __init npcm_i2c_init(void) { + int ret; + npcm_i2c_debugfs_dir = debugfs_create_dir("npcm_i2c", NULL); - return platform_driver_register(&npcm_i2c_bus_driver); + + ret = platform_driver_register(&npcm_i2c_bus_driver); + if (ret) { + debugfs_remove_recursive(npcm_i2c_debugfs_dir); + return ret; + } + + return 0; } module_init(npcm_i2c_init); diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 84a77512614d..8fce98bb77ff 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -626,7 +626,6 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i dev_err(gi2c->se.dev, "I2C timeout gpi flags:%d addr:0x%x\n", gi2c->cur->flags, gi2c->cur->addr); gi2c->err = -ETIMEDOUT; - goto err; } if (gi2c->err) { diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index b4edf10e8fd0..7539b0740351 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -467,6 +467,7 @@ static int i2c_device_probe(struct device *dev) { struct i2c_client *client = i2c_verify_client(dev); struct i2c_driver *driver; + bool do_power_on; int status; if (!client) @@ -545,8 +546,8 @@ static int i2c_device_probe(struct device *dev) if (status < 0) goto err_clear_wakeup_irq; - status = dev_pm_domain_attach(&client->dev, - !i2c_acpi_waive_d0_probe(dev)); + do_power_on = !i2c_acpi_waive_d0_probe(dev); + status = dev_pm_domain_attach(&client->dev, do_power_on); if (status) goto err_clear_wakeup_irq; @@ -585,7 +586,7 @@ static int i2c_device_probe(struct device *dev) err_release_driver_resources: devres_release_group(&client->dev, client->devres_group_id); err_detach_pm_domain: - dev_pm_domain_detach(&client->dev, !i2c_acpi_waive_d0_probe(dev)); + dev_pm_domain_detach(&client->dev, do_power_on); err_clear_wakeup_irq: dev_pm_clear_wake_irq(&client->dev); device_init_wakeup(&client->dev, false); @@ -610,7 +611,7 @@ static void i2c_device_remove(struct device *dev) devres_release_group(&client->dev, client->devres_group_id); - dev_pm_domain_detach(&client->dev, !i2c_acpi_waive_d0_probe(dev)); + dev_pm_domain_detach(&client->dev, true); dev_pm_clear_wake_irq(&client->dev); device_init_wakeup(&client->dev, false); diff --git a/drivers/iio/accel/bma400_core.c b/drivers/iio/accel/bma400_core.c index 490c342ef72a..6e4d10a7cd32 100644 --- a/drivers/iio/accel/bma400_core.c +++ b/drivers/iio/accel/bma400_core.c @@ -805,8 +805,10 @@ static int bma400_get_steps_reg(struct bma400_data *data, int *val) ret = regmap_bulk_read(data->regmap, BMA400_STEP_CNT0_REG, steps_raw, BMA400_STEP_RAW_LEN); - if (ret) + if (ret) { + kfree(steps_raw); return ret; + } *val = get_unaligned_le24(steps_raw); kfree(steps_raw); return IIO_VAL_INT; diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c index 9341e0e0eb55..998e8bcc06e1 100644 --- a/drivers/iio/adc/aspeed_adc.c +++ b/drivers/iio/adc/aspeed_adc.c @@ -202,6 +202,8 @@ static int aspeed_adc_set_trim_data(struct iio_dev *indio_dev) ((scu_otp) & (data->model_data->trim_locate->field)) >> __ffs(data->model_data->trim_locate->field); + if (!trimming_val) + trimming_val = 0x8; } dev_dbg(data->dev, "trimming val = %d, offset = %08x, fields = %08x\n", @@ -563,12 +565,9 @@ static int aspeed_adc_probe(struct platform_device *pdev) if (ret) return ret; - if (of_find_property(data->dev->of_node, "aspeed,trim-data-valid", - NULL)) { - ret = aspeed_adc_set_trim_data(indio_dev); - if (ret) - return ret; - } + ret = aspeed_adc_set_trim_data(indio_dev); + if (ret) + return ret; if (of_find_property(data->dev->of_node, "aspeed,battery-sensing", NULL)) { diff --git a/drivers/iio/health/afe4403.c b/drivers/iio/health/afe4403.c index 3bb4028c5d74..df3bc5c3d378 100644 --- a/drivers/iio/health/afe4403.c +++ b/drivers/iio/health/afe4403.c @@ -245,14 +245,14 @@ static int afe4403_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct afe4403_data *afe = iio_priv(indio_dev); - unsigned int reg = afe4403_channel_values[chan->address]; - unsigned int field = afe4403_channel_leds[chan->address]; + unsigned int reg, field; int ret; switch (chan->type) { case IIO_INTENSITY: switch (mask) { case IIO_CHAN_INFO_RAW: + reg = afe4403_channel_values[chan->address]; ret = afe4403_read(afe, reg, val); if (ret) return ret; @@ -262,6 +262,7 @@ static int afe4403_read_raw(struct iio_dev *indio_dev, case IIO_CURRENT: switch (mask) { case IIO_CHAN_INFO_RAW: + field = afe4403_channel_leds[chan->address]; ret = regmap_field_read(afe->fields[field], val); if (ret) return ret; diff --git a/drivers/iio/health/afe4404.c b/drivers/iio/health/afe4404.c index 8fca787b2524..836da31b7e30 100644 --- a/drivers/iio/health/afe4404.c +++ b/drivers/iio/health/afe4404.c @@ -250,20 +250,20 @@ static int afe4404_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct afe4404_data *afe = iio_priv(indio_dev); - unsigned int value_reg = afe4404_channel_values[chan->address]; - unsigned int led_field = afe4404_channel_leds[chan->address]; - unsigned int offdac_field = afe4404_channel_offdacs[chan->address]; + unsigned int value_reg, led_field, offdac_field; int ret; switch (chan->type) { case IIO_INTENSITY: switch (mask) { case IIO_CHAN_INFO_RAW: + value_reg = afe4404_channel_values[chan->address]; ret = regmap_read(afe->regmap, value_reg, val); if (ret) return ret; return IIO_VAL_INT; case IIO_CHAN_INFO_OFFSET: + offdac_field = afe4404_channel_offdacs[chan->address]; ret = regmap_field_read(afe->fields[offdac_field], val); if (ret) return ret; @@ -273,6 +273,7 @@ static int afe4404_read_raw(struct iio_dev *indio_dev, case IIO_CURRENT: switch (mask) { case IIO_CHAN_INFO_RAW: + led_field = afe4404_channel_leds[chan->address]; ret = regmap_field_read(afe->fields[led_field], val); if (ret) return ret; @@ -295,19 +296,20 @@ static int afe4404_write_raw(struct iio_dev *indio_dev, int val, int val2, long mask) { struct afe4404_data *afe = iio_priv(indio_dev); - unsigned int led_field = afe4404_channel_leds[chan->address]; - unsigned int offdac_field = afe4404_channel_offdacs[chan->address]; + unsigned int led_field, offdac_field; switch (chan->type) { case IIO_INTENSITY: switch (mask) { case IIO_CHAN_INFO_OFFSET: + offdac_field = afe4404_channel_offdacs[chan->address]; return regmap_field_write(afe->fields[offdac_field], val); } break; case IIO_CURRENT: switch (mask) { case IIO_CHAN_INFO_RAW: + led_field = afe4404_channel_leds[chan->address]; return regmap_field_write(afe->fields[led_field], val); } break; diff --git a/drivers/iio/industrialio-sw-trigger.c b/drivers/iio/industrialio-sw-trigger.c index 994f03a71520..d86a3305d9e8 100644 --- a/drivers/iio/industrialio-sw-trigger.c +++ b/drivers/iio/industrialio-sw-trigger.c @@ -58,8 +58,12 @@ int iio_register_sw_trigger_type(struct iio_sw_trigger_type *t) t->group = configfs_register_default_group(iio_triggers_group, t->name, &iio_trigger_type_group_type); - if (IS_ERR(t->group)) + if (IS_ERR(t->group)) { + mutex_lock(&iio_trigger_types_lock); + list_del(&t->list); + mutex_unlock(&iio_trigger_types_lock); ret = PTR_ERR(t->group); + } return ret; } diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig index 7cf6e8490123..0d4447df7200 100644 --- a/drivers/iio/light/Kconfig +++ b/drivers/iio/light/Kconfig @@ -293,6 +293,8 @@ config RPR0521 tristate "ROHM RPR0521 ALS and proximity sensor driver" depends on I2C select REGMAP_I2C + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say Y here if you want to build support for ROHM's RPR0521 ambient light and proximity sensor device. diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c index b62c139baf41..38d4c7644bef 100644 --- a/drivers/iio/light/apds9960.c +++ b/drivers/iio/light/apds9960.c @@ -54,9 +54,6 @@ #define APDS9960_REG_CONTROL_PGAIN_MASK_SHIFT 2 #define APDS9960_REG_CONFIG_2 0x90 -#define APDS9960_REG_CONFIG_2_GGAIN_MASK 0x60 -#define APDS9960_REG_CONFIG_2_GGAIN_MASK_SHIFT 5 - #define APDS9960_REG_ID 0x92 #define APDS9960_REG_STATUS 0x93 @@ -77,6 +74,9 @@ #define APDS9960_REG_GCONF_1_GFIFO_THRES_MASK_SHIFT 6 #define APDS9960_REG_GCONF_2 0xa3 +#define APDS9960_REG_GCONF_2_GGAIN_MASK 0x60 +#define APDS9960_REG_GCONF_2_GGAIN_MASK_SHIFT 5 + #define APDS9960_REG_GOFFSET_U 0xa4 #define APDS9960_REG_GOFFSET_D 0xa5 #define APDS9960_REG_GPULSE 0xa6 @@ -396,9 +396,9 @@ static int apds9960_set_pxs_gain(struct apds9960_data *data, int val) } ret = regmap_update_bits(data->regmap, - APDS9960_REG_CONFIG_2, - APDS9960_REG_CONFIG_2_GGAIN_MASK, - idx << APDS9960_REG_CONFIG_2_GGAIN_MASK_SHIFT); + APDS9960_REG_GCONF_2, + APDS9960_REG_GCONF_2_GGAIN_MASK, + idx << APDS9960_REG_GCONF_2_GGAIN_MASK_SHIFT); if (!ret) data->pxs_gain = idx; mutex_unlock(&data->lock); diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c index 3a4952935366..3d9c5758d8a4 100644 --- a/drivers/input/touchscreen/raydium_i2c_ts.c +++ b/drivers/input/touchscreen/raydium_i2c_ts.c @@ -211,12 +211,14 @@ static int raydium_i2c_send(struct i2c_client *client, error = raydium_i2c_xfer(client, addr, xfer, ARRAY_SIZE(xfer)); if (likely(!error)) - return 0; + goto out; msleep(RM_RETRY_DELAY_MS); } while (++tries < RM_MAX_RETRIES); dev_err(&client->dev, "%s failed: %d\n", __func__, error); +out: + kfree(tx_buf); return error; } diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index cc9f381013c3..7fbf6a337662 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -28,6 +28,6 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o -obj-$(CONFIG_IOMMU_SVA) += iommu-sva-lib.o io-pgfault.o +obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o io-pgfault.o obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o obj-$(CONFIG_APPLE_DART) += apple-dart.o diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 1a2d425bf568..467b194975b3 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -85,7 +85,7 @@ #define LOOP_TIMEOUT 2000000 -#define IVRS_GET_SBDF_ID(seg, bus, dev, fd) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \ +#define IVRS_GET_SBDF_ID(seg, bus, dev, fn) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \ | ((dev & 0x1f) << 3) | (fn & 0x7)) /* @@ -3402,18 +3402,24 @@ static int __init parse_amd_iommu_options(char *str) static int __init parse_ivrs_ioapic(char *str) { u32 seg = 0, bus, dev, fn; - int ret, id, i; + int id, i; u32 devid; - ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); - if (ret != 4) { - ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn); - if (ret != 5) { - pr_err("Invalid command line: ivrs_ioapic%s\n", str); - return 1; - } + if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) + goto found; + + if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { + pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n", + str, id, seg, bus, dev, fn); + goto found; } + pr_err("Invalid command line: ivrs_ioapic%s\n", str); + return 1; + +found: if (early_ioapic_map_size == EARLY_MAP_SIZE) { pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n", str); @@ -3434,18 +3440,24 @@ static int __init parse_ivrs_ioapic(char *str) static int __init parse_ivrs_hpet(char *str) { u32 seg = 0, bus, dev, fn; - int ret, id, i; + int id, i; u32 devid; - ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); - if (ret != 4) { - ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn); - if (ret != 5) { - pr_err("Invalid command line: ivrs_hpet%s\n", str); - return 1; - } + if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) + goto found; + + if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { + pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n", + str, id, seg, bus, dev, fn); + goto found; } + pr_err("Invalid command line: ivrs_hpet%s\n", str); + return 1; + +found: if (early_hpet_map_size == EARLY_MAP_SIZE) { pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n", str); @@ -3466,19 +3478,36 @@ static int __init parse_ivrs_hpet(char *str) static int __init parse_ivrs_acpihid(char *str) { u32 seg = 0, bus, dev, fn; - char *hid, *uid, *p; + char *hid, *uid, *p, *addr; char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0}; - int ret, i; - - ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid); - if (ret != 4) { - ret = sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid); - if (ret != 5) { - pr_err("Invalid command line: ivrs_acpihid(%s)\n", str); - return 1; + int i; + + addr = strchr(str, '@'); + if (!addr) { + if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 || + sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) { + pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n", + str, acpiid, seg, bus, dev, fn); + goto found; } + goto not_found; } + /* We have the '@', make it the terminator to get just the acpiid */ + *addr++ = 0; + + if (sscanf(str, "=%s", acpiid) != 1) + goto not_found; + + if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 || + sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4) + goto found; + +not_found: + pr_err("Invalid command line: ivrs_acpihid%s\n", str); + return 1; + +found: p = acpiid; hid = strsep(&p, ":"); uid = p; @@ -3488,6 +3517,13 @@ static int __init parse_ivrs_acpihid(char *str) return 1; } + /* + * Ignore leading zeroes after ':', so e.g., AMDI0095:00 + * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match + */ + while (*uid == '0' && *(uid + 1)) + uid++; + i = early_acpihid_map_size++; memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index d3b39d0416fa..5c381cc3658e 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -767,7 +767,7 @@ EXPORT_SYMBOL(amd_iommu_register_ga_log_notifier); static void iommu_poll_ga_log(struct amd_iommu *iommu) { - u32 head, tail, cnt = 0; + u32 head, tail; if (iommu->ga_log == NULL) return; @@ -780,7 +780,6 @@ static void iommu_poll_ga_log(struct amd_iommu *iommu) u64 log_entry; raw = (u64 *)(iommu->ga_log + head); - cnt++; /* Avoid memcpy function-call overhead */ log_entry = *raw; @@ -2155,21 +2154,13 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, static int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev) { + struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); struct protection_domain *domain = to_pdomain(dom); - struct iommu_dev_data *dev_data; - struct amd_iommu *iommu; + struct amd_iommu *iommu = rlookup_amd_iommu(dev); int ret; - if (!check_device(dev)) - return -EINVAL; - - dev_data = dev_iommu_priv_get(dev); dev_data->defer_attach = false; - iommu = rlookup_amd_iommu(dev); - if (!iommu) - return -EINVAL; - if (dev_data->domain) detach_device(dev); diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 6a1f02c62dff..864e4ffb6aa9 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -587,6 +587,7 @@ out_drop_state: put_device_state(dev_state); out: + pci_dev_put(pdev); return ret; } @@ -639,7 +640,9 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, if (pasid_state->mm == NULL) goto out_free; - mmu_notifier_register(&pasid_state->mn, mm); + ret = mmu_notifier_register(&pasid_state->mn, mm); + if (ret) + goto out_free; ret = set_pasid_state(dev_state, pasid_state, pasid); if (ret) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 5968a568aae2..a5a63b1c947e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -10,7 +10,7 @@ #include <linux/slab.h> #include "arm-smmu-v3.h" -#include "../../iommu-sva-lib.h" +#include "../../iommu-sva.h" #include "../../io-pgtable-arm.h" struct arm_smmu_mmu_notifier { @@ -344,11 +344,6 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) if (!bond) return ERR_PTR(-ENOMEM); - /* Allocate a PASID for this mm if necessary */ - ret = iommu_sva_alloc_pasid(mm, 1, (1U << master->ssid_bits) - 1); - if (ret) - goto err_free_bond; - bond->mm = mm; bond->sva.dev = dev; refcount_set(&bond->refs, 1); @@ -367,42 +362,6 @@ err_free_bond: return ERR_PTR(ret); } -struct iommu_sva * -arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata) -{ - struct iommu_sva *handle; - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - - if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) - return ERR_PTR(-EINVAL); - - mutex_lock(&sva_lock); - handle = __arm_smmu_sva_bind(dev, mm); - mutex_unlock(&sva_lock); - return handle; -} - -void arm_smmu_sva_unbind(struct iommu_sva *handle) -{ - struct arm_smmu_bond *bond = sva_to_bond(handle); - - mutex_lock(&sva_lock); - if (refcount_dec_and_test(&bond->refs)) { - list_del(&bond->list); - arm_smmu_mmu_notifier_put(bond->smmu_mn); - kfree(bond); - } - mutex_unlock(&sva_lock); -} - -u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle) -{ - struct arm_smmu_bond *bond = sva_to_bond(handle); - - return bond->mm->pasid; -} - bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) { unsigned long reg, fld; @@ -550,3 +509,64 @@ void arm_smmu_sva_notifier_synchronize(void) */ mmu_notifier_synchronize(); } + +void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t id) +{ + struct mm_struct *mm = domain->mm; + struct arm_smmu_bond *bond = NULL, *t; + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + + mutex_lock(&sva_lock); + list_for_each_entry(t, &master->bonds, list) { + if (t->mm == mm) { + bond = t; + break; + } + } + + if (!WARN_ON(!bond) && refcount_dec_and_test(&bond->refs)) { + list_del(&bond->list); + arm_smmu_mmu_notifier_put(bond->smmu_mn); + kfree(bond); + } + mutex_unlock(&sva_lock); +} + +static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t id) +{ + int ret = 0; + struct iommu_sva *handle; + struct mm_struct *mm = domain->mm; + + mutex_lock(&sva_lock); + handle = __arm_smmu_sva_bind(dev, mm); + if (IS_ERR(handle)) + ret = PTR_ERR(handle); + mutex_unlock(&sva_lock); + + return ret; +} + +static void arm_smmu_sva_domain_free(struct iommu_domain *domain) +{ + kfree(domain); +} + +static const struct iommu_domain_ops arm_smmu_sva_domain_ops = { + .set_dev_pasid = arm_smmu_sva_set_dev_pasid, + .free = arm_smmu_sva_domain_free +}; + +struct iommu_domain *arm_smmu_sva_domain_alloc(void) +{ + struct iommu_domain *domain; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return NULL; + domain->ops = &arm_smmu_sva_domain_ops; + + return domain; +} diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 6d5df91c5c46..ab160198edd6 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -29,7 +29,7 @@ #include "arm-smmu-v3.h" #include "../../dma-iommu.h" -#include "../../iommu-sva-lib.h" +#include "../../iommu-sva.h" static bool disable_bypass = true; module_param(disable_bypass, bool, 0444); @@ -2009,6 +2009,9 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) { struct arm_smmu_domain *smmu_domain; + if (type == IOMMU_DOMAIN_SVA) + return arm_smmu_sva_domain_alloc(); + if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_DMA_FQ && @@ -2430,23 +2433,14 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) goto out_unlock; } } else if (smmu_domain->smmu != smmu) { - dev_err(dev, - "cannot attach to SMMU %s (upstream of %s)\n", - dev_name(smmu_domain->smmu->dev), - dev_name(smmu->dev)); - ret = -ENXIO; + ret = -EINVAL; goto out_unlock; } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) { - dev_err(dev, - "cannot attach to incompatible domain (%u SSID bits != %u)\n", - smmu_domain->s1_cfg.s1cdmax, master->ssid_bits); ret = -EINVAL; goto out_unlock; } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && smmu_domain->stall_enabled != master->stall_enabled) { - dev_err(dev, "cannot attach to stall-%s domain\n", - smmu_domain->stall_enabled ? "enabled" : "disabled"); ret = -EINVAL; goto out_unlock; } @@ -2838,6 +2832,17 @@ static int arm_smmu_def_domain_type(struct device *dev) return 0; } +static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid) +{ + struct iommu_domain *domain; + + domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA); + if (WARN_ON(IS_ERR(domain)) || !domain) + return; + + arm_smmu_sva_remove_dev_pasid(domain, dev, pasid); +} + static struct iommu_ops arm_smmu_ops = { .capable = arm_smmu_capable, .domain_alloc = arm_smmu_domain_alloc, @@ -2846,11 +2851,9 @@ static struct iommu_ops arm_smmu_ops = { .device_group = arm_smmu_device_group, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, + .remove_dev_pasid = arm_smmu_remove_dev_pasid, .dev_enable_feat = arm_smmu_dev_enable_feature, .dev_disable_feat = arm_smmu_dev_disable_feature, - .sva_bind = arm_smmu_sva_bind, - .sva_unbind = arm_smmu_sva_unbind, - .sva_get_pasid = arm_smmu_sva_get_pasid, .page_response = arm_smmu_page_response, .def_domain_type = arm_smmu_def_domain_type, .pgsize_bitmap = -1UL, /* Restricted during device attach */ @@ -3543,6 +3546,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) /* SID/SSID sizes */ smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg); smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg); + smmu->iommu.max_pasids = 1UL << smmu->ssid_bits; /* * If the SMMU supports fewer bits than would fill a single L2 stream diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index cd48590ada30..8d772ea8a583 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -754,11 +754,10 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master); int arm_smmu_master_enable_sva(struct arm_smmu_master *master); int arm_smmu_master_disable_sva(struct arm_smmu_master *master); bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master); -struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, - void *drvdata); -void arm_smmu_sva_unbind(struct iommu_sva *handle); -u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle); void arm_smmu_sva_notifier_synchronize(void); +struct iommu_domain *arm_smmu_sva_domain_alloc(void); +void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t id); #else /* CONFIG_ARM_SMMU_V3_SVA */ static inline bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) { @@ -790,19 +789,17 @@ static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master return false; } -static inline struct iommu_sva * -arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata) +static inline void arm_smmu_sva_notifier_synchronize(void) {} + +static inline struct iommu_domain *arm_smmu_sva_domain_alloc(void) { - return ERR_PTR(-ENODEV); + return NULL; } -static inline void arm_smmu_sva_unbind(struct iommu_sva *handle) {} - -static inline u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle) +static inline void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, + struct device *dev, + ioasid_t id) { - return IOMMU_PASID_INVALID; } - -static inline void arm_smmu_sva_notifier_synchronize(void) {} #endif /* CONFIG_ARM_SMMU_V3_SVA */ #endif /* _ARM_SMMU_V3_H */ diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c index 658f3cc83278..9dc772f2cbb2 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c @@ -136,6 +136,9 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu) reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR); reg &= ~ARM_MMU500_ACTLR_CPRE; arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, reg); + reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR); + if (reg & ARM_MMU500_ACTLR_CPRE) + dev_warn_once(smmu->dev, "Failed to disable prefetcher [errata #841119 and #826419], check ACR.CACHE_LOCK\n"); } return 0; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c index 6eed8e67a0ca..74e9ef2fd580 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -10,16 +10,6 @@ #include "arm-smmu.h" #include "arm-smmu-qcom.h" -enum qcom_smmu_impl_reg_offset { - QCOM_SMMU_TBU_PWR_STATUS, - QCOM_SMMU_STATS_SYNC_INV_TBU_ACK, - QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR, -}; - -struct qcom_smmu_config { - const u32 *reg_offset; -}; - void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) { int ret; @@ -59,84 +49,3 @@ void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) tbu_pwr_status, sync_inv_ack, sync_inv_progress); } } - -/* Implementation Defined Register Space 0 register offsets */ -static const u32 qcom_smmu_impl0_reg_offset[] = { - [QCOM_SMMU_TBU_PWR_STATUS] = 0x2204, - [QCOM_SMMU_STATS_SYNC_INV_TBU_ACK] = 0x25dc, - [QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR] = 0x2670, -}; - -static const struct qcom_smmu_config qcm2290_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sc7180_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sc7280_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sc8180x_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sc8280xp_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sm6125_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sm6350_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sm8150_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sm8250_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sm8350_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sm8450_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct of_device_id __maybe_unused qcom_smmu_impl_debug_match[] = { - { .compatible = "qcom,msm8998-smmu-v2" }, - { .compatible = "qcom,qcm2290-smmu-500", .data = &qcm2290_smmu_cfg }, - { .compatible = "qcom,sc7180-smmu-500", .data = &sc7180_smmu_cfg }, - { .compatible = "qcom,sc7280-smmu-500", .data = &sc7280_smmu_cfg}, - { .compatible = "qcom,sc8180x-smmu-500", .data = &sc8180x_smmu_cfg }, - { .compatible = "qcom,sc8280xp-smmu-500", .data = &sc8280xp_smmu_cfg }, - { .compatible = "qcom,sdm630-smmu-v2" }, - { .compatible = "qcom,sdm845-smmu-500" }, - { .compatible = "qcom,sm6125-smmu-500", .data = &sm6125_smmu_cfg}, - { .compatible = "qcom,sm6350-smmu-500", .data = &sm6350_smmu_cfg}, - { .compatible = "qcom,sm8150-smmu-500", .data = &sm8150_smmu_cfg }, - { .compatible = "qcom,sm8250-smmu-500", .data = &sm8250_smmu_cfg }, - { .compatible = "qcom,sm8350-smmu-500", .data = &sm8350_smmu_cfg }, - { .compatible = "qcom,sm8450-smmu-500", .data = &sm8450_smmu_cfg }, - { } -}; - -const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu) -{ - const struct of_device_id *match; - const struct device_node *np = smmu->dev->of_node; - - match = of_match_node(qcom_smmu_impl_debug_match, np); - if (!match) - return NULL; - - return match->data; -} diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index b2708de25ea3..91d404deb115 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -361,6 +361,8 @@ static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu) { int ret; + arm_mmu500_reset(smmu); + /* * To address performance degradation in non-real time clients, * such as USB and UFS, turn off wait-for-safe on sdm845 based boards, @@ -374,41 +376,67 @@ static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu) return ret; } -static int qcom_smmu500_reset(struct arm_smmu_device *smmu) -{ - const struct device_node *np = smmu->dev->of_node; - - arm_mmu500_reset(smmu); - - if (of_device_is_compatible(np, "qcom,sdm845-smmu-500")) - return qcom_sdm845_smmu500_reset(smmu); +static const struct arm_smmu_impl qcom_smmu_v2_impl = { + .init_context = qcom_smmu_init_context, + .cfg_probe = qcom_smmu_cfg_probe, + .def_domain_type = qcom_smmu_def_domain_type, + .write_s2cr = qcom_smmu_write_s2cr, + .tlb_sync = qcom_smmu_tlb_sync, +}; - return 0; -} +static const struct arm_smmu_impl qcom_smmu_500_impl = { + .init_context = qcom_smmu_init_context, + .cfg_probe = qcom_smmu_cfg_probe, + .def_domain_type = qcom_smmu_def_domain_type, + .reset = arm_mmu500_reset, + .write_s2cr = qcom_smmu_write_s2cr, + .tlb_sync = qcom_smmu_tlb_sync, +}; -static const struct arm_smmu_impl qcom_smmu_impl = { +static const struct arm_smmu_impl sdm845_smmu_500_impl = { .init_context = qcom_smmu_init_context, .cfg_probe = qcom_smmu_cfg_probe, .def_domain_type = qcom_smmu_def_domain_type, - .reset = qcom_smmu500_reset, + .reset = qcom_sdm845_smmu500_reset, .write_s2cr = qcom_smmu_write_s2cr, .tlb_sync = qcom_smmu_tlb_sync, }; -static const struct arm_smmu_impl qcom_adreno_smmu_impl = { +static const struct arm_smmu_impl qcom_adreno_smmu_v2_impl = { + .init_context = qcom_adreno_smmu_init_context, + .def_domain_type = qcom_smmu_def_domain_type, + .alloc_context_bank = qcom_adreno_smmu_alloc_context_bank, + .write_sctlr = qcom_adreno_smmu_write_sctlr, + .tlb_sync = qcom_smmu_tlb_sync, +}; + +static const struct arm_smmu_impl qcom_adreno_smmu_500_impl = { .init_context = qcom_adreno_smmu_init_context, .def_domain_type = qcom_smmu_def_domain_type, - .reset = qcom_smmu500_reset, + .reset = arm_mmu500_reset, .alloc_context_bank = qcom_adreno_smmu_alloc_context_bank, .write_sctlr = qcom_adreno_smmu_write_sctlr, .tlb_sync = qcom_smmu_tlb_sync, }; static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, - const struct arm_smmu_impl *impl) + const struct qcom_smmu_match_data *data) { + const struct device_node *np = smmu->dev->of_node; + const struct arm_smmu_impl *impl; struct qcom_smmu *qsmmu; + if (!data) + return ERR_PTR(-EINVAL); + + if (np && of_device_is_compatible(np, "qcom,adreno-smmu")) + impl = data->adreno_impl; + else + impl = data->impl; + + if (!impl) + return smmu; + /* Check to make sure qcom_scm has finished probing */ if (!qcom_scm_is_available()) return ERR_PTR(-EPROBE_DEFER); @@ -418,27 +446,77 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, return ERR_PTR(-ENOMEM); qsmmu->smmu.impl = impl; - qsmmu->cfg = qcom_smmu_impl_data(smmu); + qsmmu->cfg = data->cfg; return &qsmmu->smmu; } +/* Implementation Defined Register Space 0 register offsets */ +static const u32 qcom_smmu_impl0_reg_offset[] = { + [QCOM_SMMU_TBU_PWR_STATUS] = 0x2204, + [QCOM_SMMU_STATS_SYNC_INV_TBU_ACK] = 0x25dc, + [QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR] = 0x2670, +}; + +static const struct qcom_smmu_config qcom_smmu_impl0_cfg = { + .reg_offset = qcom_smmu_impl0_reg_offset, +}; + +/* + * It is not yet possible to use MDP SMMU with the bypass quirk on the msm8996, + * there are not enough context banks. + */ +static const struct qcom_smmu_match_data msm8996_smmu_data = { + .impl = NULL, + .adreno_impl = &qcom_adreno_smmu_v2_impl, +}; + +static const struct qcom_smmu_match_data qcom_smmu_v2_data = { + .impl = &qcom_smmu_v2_impl, + .adreno_impl = &qcom_adreno_smmu_v2_impl, +}; + +static const struct qcom_smmu_match_data sdm845_smmu_500_data = { + .impl = &sdm845_smmu_500_impl, + /* + * No need for adreno impl here. On sdm845 the Adreno SMMU is handled + * by the separate sdm845-smmu-v2 device. + */ + /* Also no debug configuration. */ +}; + +static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = { + .impl = &qcom_smmu_500_impl, + .adreno_impl = &qcom_adreno_smmu_500_impl, + .cfg = &qcom_smmu_impl0_cfg, +}; + +/* + * Do not add any more qcom,SOC-smmu-500 entries to this list, unless they need + * special handling and can not be covered by the qcom,smmu-500 entry. + */ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { - { .compatible = "qcom,msm8998-smmu-v2" }, - { .compatible = "qcom,qcm2290-smmu-500" }, - { .compatible = "qcom,sc7180-smmu-500" }, - { .compatible = "qcom,sc7280-smmu-500" }, - { .compatible = "qcom,sc8180x-smmu-500" }, - { .compatible = "qcom,sc8280xp-smmu-500" }, - { .compatible = "qcom,sdm630-smmu-v2" }, - { .compatible = "qcom,sdm845-smmu-500" }, - { .compatible = "qcom,sm6125-smmu-500" }, - { .compatible = "qcom,sm6350-smmu-500" }, - { .compatible = "qcom,sm6375-smmu-500" }, - { .compatible = "qcom,sm8150-smmu-500" }, - { .compatible = "qcom,sm8250-smmu-500" }, - { .compatible = "qcom,sm8350-smmu-500" }, - { .compatible = "qcom,sm8450-smmu-500" }, + { .compatible = "qcom,msm8996-smmu-v2", .data = &msm8996_smmu_data }, + { .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_v2_data }, + { .compatible = "qcom,qcm2290-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,qdu1000-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sc7180-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sc7280-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_v2_data }, + { .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_v2_data }, + { .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data }, + { .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_500_impl0_data}, + { .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm6350-smmu-v2", .data = &qcom_smmu_v2_data }, + { .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm8450-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,smmu-500", .data = &qcom_smmu_500_impl0_data }, { } }; @@ -453,26 +531,19 @@ static struct acpi_platform_list qcom_acpi_platlist[] = { struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) { const struct device_node *np = smmu->dev->of_node; + const struct of_device_id *match; #ifdef CONFIG_ACPI if (np == NULL) { /* Match platform for ACPI boot */ if (acpi_match_platform_list(qcom_acpi_platlist) >= 0) - return qcom_smmu_create(smmu, &qcom_smmu_impl); + return qcom_smmu_create(smmu, &qcom_smmu_500_impl0_data); } #endif - /* - * Do not change this order of implementation, i.e., first adreno - * smmu impl and then apss smmu since we can have both implementing - * arm,mmu-500 in which case we will miss setting adreno smmu specific - * features if the order is changed. - */ - if (of_device_is_compatible(np, "qcom,adreno-smmu")) - return qcom_smmu_create(smmu, &qcom_adreno_smmu_impl); - - if (of_match_node(qcom_smmu_impl_of_match, np)) - return qcom_smmu_create(smmu, &qcom_smmu_impl); + match = of_match_node(qcom_smmu_impl_of_match, np); + if (match) + return qcom_smmu_create(smmu, match->data); return smmu; } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h index 99ec8f8629a0..593910567b88 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h @@ -14,15 +14,26 @@ struct qcom_smmu { u32 stall_enabled; }; +enum qcom_smmu_impl_reg_offset { + QCOM_SMMU_TBU_PWR_STATUS, + QCOM_SMMU_STATS_SYNC_INV_TBU_ACK, + QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR, +}; + +struct qcom_smmu_config { + const u32 *reg_offset; +}; + +struct qcom_smmu_match_data { + const struct qcom_smmu_config *cfg; + const struct arm_smmu_impl *impl; + const struct arm_smmu_impl *adreno_impl; +}; + #ifdef CONFIG_ARM_SMMU_QCOM_DEBUG void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu); -const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu); #else static inline void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) { } -static inline const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu) -{ - return NULL; -} #endif #endif /* _ARM_SMMU_QCOM_H */ diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 30dab1418e3f..719fbca1fe52 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1150,9 +1150,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) * different SMMUs. */ if (smmu_domain->smmu != smmu) { - dev_err(dev, - "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n", - dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev)); ret = -EINVAL; goto rpm_put; } diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 3869c3ecda8c..270c3d9128ba 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -381,13 +381,8 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev * Sanity check the domain. We don't support domains across * different IOMMUs. */ - if (qcom_domain->iommu != qcom_iommu) { - dev_err(dev, "cannot attach to IOMMU %s while already " - "attached to domain on IOMMU %s\n", - dev_name(qcom_domain->iommu->dev), - dev_name(qcom_iommu->dev)); + if (qcom_domain->iommu != qcom_iommu) return -EINVAL; - } return 0; } @@ -415,7 +410,8 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de } static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { int ret; unsigned long flags; @@ -426,13 +422,14 @@ static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova, return -ENODEV; spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags); - ret = ops->map(ops, iova, paddr, size, prot, GFP_ATOMIC); + ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, GFP_ATOMIC, mapped); spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags); return ret; } static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather) + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { size_t ret; unsigned long flags; @@ -449,7 +446,7 @@ static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova, */ pm_runtime_get_sync(qcom_domain->iommu->dev); spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags); - ret = ops->unmap(ops, iova, size, gather); + ret = ops->unmap_pages(ops, iova, pgsize, pgcount, gather); spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags); pm_runtime_put_sync(qcom_domain->iommu->dev); @@ -587,8 +584,8 @@ static const struct iommu_ops qcom_iommu_ops = { .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = qcom_iommu_attach_dev, .detach_dev = qcom_iommu_detach_dev, - .map = qcom_iommu_map, - .unmap = qcom_iommu_unmap, + .map_pages = qcom_iommu_map, + .unmap_pages = qcom_iommu_unmap, .flush_iotlb_all = qcom_iommu_flush_iotlb_all, .iotlb_sync = qcom_iommu_iotlb_sync, .iova_to_phys = qcom_iommu_iova_to_phys, diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 45fd4850bacb..b0cde2211987 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -708,10 +708,6 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) if (ret) return ret; - ret = iommu_device_register(&data->iommu, &exynos_iommu_ops, dev); - if (ret) - goto err_iommu_register; - platform_set_drvdata(pdev, data); if (PG_ENT_SHIFT < 0) { @@ -743,11 +739,13 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) pm_runtime_enable(dev); + ret = iommu_device_register(&data->iommu, &exynos_iommu_ops, dev); + if (ret) + goto err_dma_set_mask; + return 0; err_dma_set_mask: - iommu_device_unregister(&data->iommu); -err_iommu_register: iommu_device_sysfs_remove(&data->iommu); return ret; } @@ -1432,12 +1430,6 @@ static int __init exynos_iommu_init(void) return -ENOMEM; } - ret = platform_driver_register(&exynos_sysmmu_driver); - if (ret) { - pr_err("%s: Failed to register driver\n", __func__); - goto err_reg_driver; - } - zero_lv2_table = kmem_cache_zalloc(lv2table_kmem_cache, GFP_KERNEL); if (zero_lv2_table == NULL) { pr_err("%s: Failed to allocate zero level2 page table\n", @@ -1446,10 +1438,16 @@ static int __init exynos_iommu_init(void) goto err_zero_lv2; } + ret = platform_driver_register(&exynos_sysmmu_driver); + if (ret) { + pr_err("%s: Failed to register driver\n", __func__); + goto err_reg_driver; + } + return 0; -err_zero_lv2: - platform_driver_unregister(&exynos_sysmmu_driver); err_reg_driver: + platform_driver_unregister(&exynos_sysmmu_driver); +err_zero_lv2: kmem_cache_destroy(lv2table_kmem_cache); return ret; } diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index 0d03f837a5d4..05d820fb1d0b 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -211,7 +211,7 @@ int pamu_config_ppaace(int liodn, u32 omi, u32 stashid, int prot) ppaace->op_encode.index_ot.omi = omi; } else if (~omi != 0) { pr_debug("bad operation mapping index: %d\n", omi); - return -EINVAL; + return -ENODEV; } /* configure stash id */ @@ -779,7 +779,7 @@ static int fsl_pamu_probe(struct platform_device *pdev) of_get_address(dev->of_node, 0, &size, NULL); irq = irq_of_parse_and_map(dev->of_node, 0); - if (irq == NO_IRQ) { + if (!irq) { dev_warn(dev, "no interrupts listed in PAMU node\n"); goto error; } @@ -868,7 +868,7 @@ static int fsl_pamu_probe(struct platform_device *pdev) ret = create_csd(ppaact_phys, mem_size, csd_port_id); if (ret) { dev_err(dev, "could not create coherence subdomain\n"); - return ret; + goto error; } } @@ -903,7 +903,7 @@ static int fsl_pamu_probe(struct platform_device *pdev) return 0; error: - if (irq != NO_IRQ) + if (irq) free_irq(irq, data); kfree_sensitive(data); diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index fa20f4b03e12..4408ac3c49b6 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -258,7 +258,7 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain, liodn = of_get_property(dev->of_node, "fsl,liodn", &len); if (!liodn) { pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node); - return -EINVAL; + return -ENODEV; } spin_lock_irqsave(&dma_domain->domain_lock, flags); @@ -267,7 +267,7 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain, if (liodn[i] >= PAACE_NUMBER_ENTRIES) { pr_debug("Invalid liodn %d, attach device failed for %pOF\n", liodn[i], dev->of_node); - ret = -EINVAL; + ret = -ENODEV; break; } diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 5a8f780e7ffd..b00a0ceb2d13 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -820,6 +820,7 @@ int __init dmar_dev_scope_init(void) info = dmar_alloc_pci_notify_info(dev, BUS_NOTIFY_ADD_DEVICE); if (!info) { + pci_dev_put(dev); return dmar_dev_scope_status; } else { dmar_pci_bus_add_dev(info); @@ -1105,6 +1106,13 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) raw_spin_lock_init(&iommu->register_lock); /* + * A value of N in PSS field of eCap register indicates hardware + * supports PASID field of N+1 bits. + */ + if (pasid_supported(iommu)) + iommu->iommu.max_pasids = 2UL << ecap_pss(iommu->ecap); + + /* * This is only for hotplug; at boot time intel_iommu_enabled won't * be set yet. When intel_iommu_init() runs, it registers the units * present at boot time, then sets intel_iommu_enabled. diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index a3db7ac3d60c..5fa52a03069f 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -27,7 +27,7 @@ #include "iommu.h" #include "../dma-iommu.h" #include "../irq_remapping.h" -#include "../iommu-sva-lib.h" +#include "../iommu-sva.h" #include "pasid.h" #include "cap_audit.h" @@ -1379,6 +1379,24 @@ static void domain_update_iotlb(struct dmar_domain *domain) spin_unlock_irqrestore(&domain->lock, flags); } +/* + * The extra devTLB flush quirk impacts those QAT devices with PCI device + * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device() + * check because it applies only to the built-in QAT devices and it doesn't + * grant additional privileges. + */ +#define BUGGY_QAT_DEVID_MASK 0x4940 +static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev) +{ + if (pdev->vendor != PCI_VENDOR_ID_INTEL) + return false; + + if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK) + return false; + + return true; +} + static void iommu_enable_pci_caps(struct device_domain_info *info) { struct pci_dev *pdev; @@ -1461,6 +1479,7 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info, qdep = info->ats_qdep; qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, qdep, addr, mask); + quirk_extra_dev_tlb_flush(info, addr, mask, PASID_RID2PASID, qdep); } static void iommu_flush_dev_iotlb(struct dmar_domain *domain, @@ -3808,8 +3827,10 @@ static inline bool has_external_pci(void) struct pci_dev *pdev = NULL; for_each_pci_dev(pdev) - if (pdev->external_facing) + if (pdev->external_facing) { + pci_dev_put(pdev); return true; + } return false; } @@ -4167,6 +4188,8 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) return domain; case IOMMU_DOMAIN_IDENTITY: return &si_domain->domain; + case IOMMU_DOMAIN_SVA: + return intel_svm_domain_alloc(); default: return NULL; } @@ -4192,19 +4215,15 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, return -ENODEV; if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap)) - return -EOPNOTSUPP; + return -EINVAL; /* check if this iommu agaw is sufficient for max mapped address */ addr_width = agaw_to_width(iommu->agaw); if (addr_width > cap_mgaw(iommu->cap)) addr_width = cap_mgaw(iommu->cap); - if (dmar_domain->max_addr > (1LL << addr_width)) { - dev_err(dev, "%s: iommu width (%d) is not " - "sufficient for the mapped address (%llx)\n", - __func__, addr_width, dmar_domain->max_addr); - return -EFAULT; - } + if (dmar_domain->max_addr > (1LL << addr_width)) + return -EINVAL; dmar_domain->gaw = addr_width; /* @@ -4481,9 +4500,10 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) if (dev_is_pci(dev)) { if (ecap_dev_iotlb_support(iommu->ecap) && pci_ats_supported(pdev) && - dmar_ats_supported(pdev, iommu)) + dmar_ats_supported(pdev, iommu)) { info->ats_supported = 1; - + info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev); + } if (sm_supported(iommu)) { if (pasid_supported(iommu)) { int features = pci_pasid_features(pdev); @@ -4712,6 +4732,28 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, __mapping_notify_one(info->iommu, dmar_domain, pfn, pages); } +static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) +{ + struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); + struct iommu_domain *domain; + + /* Domain type specific cleanup: */ + domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); + if (domain) { + switch (domain->type) { + case IOMMU_DOMAIN_SVA: + intel_svm_remove_dev_pasid(dev, pasid); + break; + default: + /* should never reach here */ + WARN_ON(1); + break; + } + } + + intel_pasid_tear_down_entry(iommu, dev, pasid, false); +} + const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, @@ -4724,11 +4766,9 @@ const struct iommu_ops intel_iommu_ops = { .dev_disable_feat = intel_iommu_dev_disable_feat, .is_attach_deferred = intel_iommu_is_attach_deferred, .def_domain_type = device_def_domain_type, + .remove_dev_pasid = intel_iommu_remove_dev_pasid, .pgsize_bitmap = SZ_4K, #ifdef CONFIG_INTEL_IOMMU_SVM - .sva_bind = intel_svm_bind, - .sva_unbind = intel_svm_unbind, - .sva_get_pasid = intel_svm_get_pasid, .page_response = intel_svm_page_response, #endif .default_domain_ops = &(const struct iommu_domain_ops) { @@ -4932,3 +4972,48 @@ static void __init check_tylersburg_isoch(void) pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n", vtisochctrl); } + +/* + * Here we deal with a device TLB defect where device may inadvertently issue ATS + * invalidation completion before posted writes initiated with translated address + * that utilized translations matching the invalidation address range, violating + * the invalidation completion ordering. + * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is + * vulnerable to this defect. In other words, any dTLB invalidation initiated not + * under the control of the trusted/privileged host device driver must use this + * quirk. + * Device TLBs are invalidated under the following six conditions: + * 1. Device driver does DMA API unmap IOVA + * 2. Device driver unbind a PASID from a process, sva_unbind_device() + * 3. PASID is torn down, after PASID cache is flushed. e.g. process + * exit_mmap() due to crash + * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where + * VM has to free pages that were unmapped + * 5. Userspace driver unmaps a DMA buffer + * 6. Cache invalidation in vSVA usage (upcoming) + * + * For #1 and #2, device drivers are responsible for stopping DMA traffic + * before unmap/unbind. For #3, iommu driver gets mmu_notifier to + * invalidate TLB the same way as normal user unmap which will use this quirk. + * The dTLB invalidation after PASID cache flush does not need this quirk. + * + * As a reminder, #6 will *NEED* this quirk as we enable nested translation. + */ +void quirk_extra_dev_tlb_flush(struct device_domain_info *info, + unsigned long address, unsigned long mask, + u32 pasid, u16 qdep) +{ + u16 sid; + + if (likely(!info->dtlb_extra_inval)) + return; + + sid = PCI_DEVID(info->bus, info->devfn); + if (pasid == PASID_RID2PASID) { + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, + qdep, address, mask); + } else { + qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid, + pasid, qdep, address, mask); + } +} diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 30b0d72aeb6c..9ab4b1b27d2a 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -480,8 +480,6 @@ enum { #define VTD_FLAG_IRQ_REMAP_PRE_ENABLED (1 << 1) #define VTD_FLAG_SVM_CAPABLE (1 << 2) -extern int intel_iommu_sm; - #define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) #define pasid_supported(iommu) (sm_supported(iommu) && \ ecap_pasid((iommu)->ecap)) @@ -618,6 +616,7 @@ struct device_domain_info { u8 pri_enabled:1; u8 ats_supported:1; u8 ats_enabled:1; + u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */ u8 ats_qdep; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ @@ -723,6 +722,9 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, u32 pasid, u16 qdep, u64 addr, unsigned int size_order); +void quirk_extra_dev_tlb_flush(struct device_domain_info *info, + unsigned long address, unsigned long pages, + u32 pasid, u16 qdep); void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu, u32 pasid); @@ -745,12 +747,10 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); extern void intel_svm_check(struct intel_iommu *iommu); extern int intel_svm_enable_prq(struct intel_iommu *iommu); extern int intel_svm_finish_prq(struct intel_iommu *iommu); -struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, - void *drvdata); -void intel_svm_unbind(struct iommu_sva *handle); -u32 intel_svm_get_pasid(struct iommu_sva *handle); int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); +struct iommu_domain *intel_svm_domain_alloc(void); +void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid); struct intel_svm_dev { struct list_head list; @@ -775,6 +775,14 @@ struct intel_svm { }; #else static inline void intel_svm_check(struct intel_iommu *iommu) {} +static inline struct iommu_domain *intel_svm_domain_alloc(void) +{ + return NULL; +} + +static inline void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid) +{ +} #endif #ifdef CONFIG_INTEL_IOMMU_DEBUGFS @@ -790,6 +798,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, extern const struct iommu_ops intel_iommu_ops; #ifdef CONFIG_INTEL_IOMMU +extern int intel_iommu_sm; extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; @@ -805,6 +814,7 @@ static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) } #define dmar_disabled (1) #define intel_iommu_enabled (0) +#define intel_iommu_sm (0) #endif static inline const char *decode_prq_descriptor(char *str, size_t size, diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index e13d7e5273e1..fb3c7020028d 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -101,8 +101,10 @@ int intel_pasid_alloc_table(struct device *dev) might_sleep(); info = dev_iommu_priv_get(dev); - if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table)) - return -EINVAL; + if (WARN_ON(!info || !dev_is_pci(dev))) + return -ENODEV; + if (WARN_ON(info->pasid_table)) + return -EEXIST; pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL); if (!pasid_table) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 7d08eb034f2d..c76b66263467 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -24,7 +24,7 @@ #include "iommu.h" #include "pasid.h" #include "perf.h" -#include "../iommu-sva-lib.h" +#include "../iommu-sva.h" #include "trace.h" static irqreturn_t prq_event_thread(int irq, void *d); @@ -184,10 +184,13 @@ static void __flush_svm_range_dev(struct intel_svm *svm, return; qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih); - if (info->ats_enabled) + if (info->ats_enabled) { qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, svm->pasid, sdev->qdep, address, order_base_2(pages)); + quirk_extra_dev_tlb_flush(info, address, order_base_2(pages), + svm->pasid, sdev->qdep); + } } static void intel_flush_svm_range_dev(struct intel_svm *svm, @@ -296,19 +299,9 @@ out: return 0; } -static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, - unsigned int flags) -{ - ioasid_t max_pasid = dev_is_pci(dev) ? - pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id; - - return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); -} - static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, - struct mm_struct *mm, - unsigned int flags) + struct mm_struct *mm) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_svm_dev *sdev; @@ -324,22 +317,18 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, svm->pasid = mm->pasid; svm->mm = mm; - svm->flags = flags; INIT_LIST_HEAD_RCU(&svm->devs); - if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) { - svm->notifier.ops = &intel_mmuops; - ret = mmu_notifier_register(&svm->notifier, mm); - if (ret) { - kfree(svm); - return ERR_PTR(ret); - } + svm->notifier.ops = &intel_mmuops; + ret = mmu_notifier_register(&svm->notifier, mm); + if (ret) { + kfree(svm); + return ERR_PTR(ret); } ret = pasid_private_add(svm->pasid, svm); if (ret) { - if (svm->notifier.ops) - mmu_notifier_unregister(&svm->notifier, mm); + mmu_notifier_unregister(&svm->notifier, mm); kfree(svm); return ERR_PTR(ret); } @@ -374,9 +363,7 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, } /* Setup the pasid table: */ - sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ? - PASID_FLAG_SUPERVISOR_MODE : 0; - sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; + sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, FLPT_DEFAULT_DID, sflags); if (ret) @@ -390,8 +377,7 @@ free_sdev: kfree(sdev); free_svm: if (list_empty(&svm->devs)) { - if (svm->notifier.ops) - mmu_notifier_unregister(&svm->notifier, mm); + mmu_notifier_unregister(&svm->notifier, mm); pasid_private_remove(mm->pasid); kfree(svm); } @@ -745,12 +731,16 @@ bad_req: * If prq is to be handled outside iommu driver via receiver of * the fault notifiers, we skip the page response here. */ - if (!pdev || intel_svm_prq_report(iommu, &pdev->dev, req)) - handle_bad_prq_event(iommu, req, QI_RESP_INVALID); + if (!pdev) + goto bad_req; - trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, - req->priv_data[0], req->priv_data[1], - iommu->prq_seq_number++); + if (intel_svm_prq_report(iommu, &pdev->dev, req)) + handle_bad_prq_event(iommu, req, QI_RESP_INVALID); + else + trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, + req->priv_data[0], req->priv_data[1], + iommu->prq_seq_number++); + pci_dev_put(pdev); prq_advance: head = (head + sizeof(*req)) & PRQ_RING_MASK; } @@ -780,67 +770,6 @@ prq_advance: return IRQ_RETVAL(handled); } -struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) -{ - struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); - unsigned int flags = 0; - struct iommu_sva *sva; - int ret; - - if (drvdata) - flags = *(unsigned int *)drvdata; - - if (flags & SVM_FLAG_SUPERVISOR_MODE) { - if (!ecap_srs(iommu->ecap)) { - dev_err(dev, "%s: Supervisor PASID not supported\n", - iommu->name); - return ERR_PTR(-EOPNOTSUPP); - } - - if (mm) { - dev_err(dev, "%s: Supervisor PASID with user provided mm\n", - iommu->name); - return ERR_PTR(-EINVAL); - } - - mm = &init_mm; - } - - mutex_lock(&pasid_mutex); - ret = intel_svm_alloc_pasid(dev, mm, flags); - if (ret) { - mutex_unlock(&pasid_mutex); - return ERR_PTR(ret); - } - - sva = intel_svm_bind_mm(iommu, dev, mm, flags); - mutex_unlock(&pasid_mutex); - - return sva; -} - -void intel_svm_unbind(struct iommu_sva *sva) -{ - struct intel_svm_dev *sdev = to_intel_svm_dev(sva); - - mutex_lock(&pasid_mutex); - intel_svm_unbind_mm(sdev->dev, sdev->pasid); - mutex_unlock(&pasid_mutex); -} - -u32 intel_svm_get_pasid(struct iommu_sva *sva) -{ - struct intel_svm_dev *sdev; - u32 pasid; - - mutex_lock(&pasid_mutex); - sdev = to_intel_svm_dev(sva); - pasid = sdev->pasid; - mutex_unlock(&pasid_mutex); - - return pasid; -} - int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg) @@ -911,3 +840,50 @@ int intel_svm_page_response(struct device *dev, out: return ret; } + +void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid) +{ + mutex_lock(&pasid_mutex); + intel_svm_unbind_mm(dev, pasid); + mutex_unlock(&pasid_mutex); +} + +static int intel_svm_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + struct mm_struct *mm = domain->mm; + struct iommu_sva *sva; + int ret = 0; + + mutex_lock(&pasid_mutex); + sva = intel_svm_bind_mm(iommu, dev, mm); + if (IS_ERR(sva)) + ret = PTR_ERR(sva); + mutex_unlock(&pasid_mutex); + + return ret; +} + +static void intel_svm_domain_free(struct iommu_domain *domain) +{ + kfree(to_dmar_domain(domain)); +} + +static const struct iommu_domain_ops intel_svm_domain_ops = { + .set_dev_pasid = intel_svm_set_dev_pasid, + .free = intel_svm_domain_free +}; + +struct iommu_domain *intel_svm_domain_alloc(void) +{ + struct dmar_domain *domain; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return NULL; + domain->domain.ops = &intel_svm_domain_ops; + + return &domain->domain; +} diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 1df8c1dcae77..e5b8b9110c13 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -11,7 +11,7 @@ #include <linux/slab.h> #include <linux/workqueue.h> -#include "iommu-sva-lib.h" +#include "iommu-sva.h" /** * struct iopf_queue - IO Page Fault queue @@ -69,69 +69,18 @@ static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf, return iommu_page_response(dev, &resp); } -static enum iommu_page_response_code -iopf_handle_single(struct iopf_fault *iopf) -{ - vm_fault_t ret; - struct mm_struct *mm; - struct vm_area_struct *vma; - unsigned int access_flags = 0; - unsigned int fault_flags = FAULT_FLAG_REMOTE; - struct iommu_fault_page_request *prm = &iopf->fault.prm; - enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID; - - if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID)) - return status; - - mm = iommu_sva_find(prm->pasid); - if (IS_ERR_OR_NULL(mm)) - return status; - - mmap_read_lock(mm); - - vma = find_extend_vma(mm, prm->addr); - if (!vma) - /* Unmapped area */ - goto out_put_mm; - - if (prm->perm & IOMMU_FAULT_PERM_READ) - access_flags |= VM_READ; - - if (prm->perm & IOMMU_FAULT_PERM_WRITE) { - access_flags |= VM_WRITE; - fault_flags |= FAULT_FLAG_WRITE; - } - - if (prm->perm & IOMMU_FAULT_PERM_EXEC) { - access_flags |= VM_EXEC; - fault_flags |= FAULT_FLAG_INSTRUCTION; - } - - if (!(prm->perm & IOMMU_FAULT_PERM_PRIV)) - fault_flags |= FAULT_FLAG_USER; - - if (access_flags & ~vma->vm_flags) - /* Access fault */ - goto out_put_mm; - - ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL); - status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID : - IOMMU_PAGE_RESP_SUCCESS; - -out_put_mm: - mmap_read_unlock(mm); - mmput(mm); - - return status; -} - -static void iopf_handle_group(struct work_struct *work) +static void iopf_handler(struct work_struct *work) { struct iopf_group *group; + struct iommu_domain *domain; struct iopf_fault *iopf, *next; enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS; group = container_of(work, struct iopf_group, work); + domain = iommu_get_domain_for_dev_pasid(group->dev, + group->last_fault.fault.prm.pasid, 0); + if (!domain || !domain->iopf_handler) + status = IOMMU_PAGE_RESP_INVALID; list_for_each_entry_safe(iopf, next, &group->faults, list) { /* @@ -139,7 +88,8 @@ static void iopf_handle_group(struct work_struct *work) * faults in the group if there is an error. */ if (status == IOMMU_PAGE_RESP_SUCCESS) - status = iopf_handle_single(iopf); + status = domain->iopf_handler(&iopf->fault, + domain->fault_data); if (!(iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) @@ -181,6 +131,13 @@ static void iopf_handle_group(struct work_struct *work) * request completes, outstanding faults will have been dealt with by the time * the PASID is freed. * + * Any valid page fault will be eventually routed to an iommu domain and the + * page fault handler installed there will get called. The users of this + * handling framework should guarantee that the iommu domain could only be + * freed after the device has stopped generating page faults (or the iommu + * hardware has been set to block the page faults) and the pending page faults + * have been flushed. + * * Return: 0 on success and <0 on error. */ int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) @@ -235,7 +192,7 @@ int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) group->last_fault.fault = *fault; INIT_LIST_HEAD(&group->faults); list_add(&group->last_fault.list, &group->faults); - INIT_WORK(&group->work, iopf_handle_group); + INIT_WORK(&group->work, iopf_handler); /* See if we have partial faults for this group */ list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index ba3115fd0f86..75f244a3e12d 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -564,8 +564,7 @@ static int arm_v7s_map_pages(struct io_pgtable_ops *ops, unsigned long iova, iova += pgsize; paddr += pgsize; - if (mapped) - *mapped += pgsize; + *mapped += pgsize; } /* * Synchronise all PTE updates for the new mapping before there's @@ -576,12 +575,6 @@ static int arm_v7s_map_pages(struct io_pgtable_ops *ops, unsigned long iova, return ret; } -static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) -{ - return arm_v7s_map_pages(ops, iova, paddr, size, 1, prot, gfp, NULL); -} - static void arm_v7s_free_pgtable(struct io_pgtable *iop) { struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop); @@ -764,12 +757,6 @@ static size_t arm_v7s_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova return unmapped; } -static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather) -{ - return arm_v7s_unmap_pages(ops, iova, size, 1, gather); -} - static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) { @@ -842,9 +829,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, goto out_free_data; data->iop.ops = (struct io_pgtable_ops) { - .map = arm_v7s_map, .map_pages = arm_v7s_map_pages, - .unmap = arm_v7s_unmap, .unmap_pages = arm_v7s_unmap_pages, .iova_to_phys = arm_v7s_iova_to_phys, }; @@ -954,6 +939,7 @@ static int __init arm_v7s_do_selftests(void) }; unsigned int iova, size, iova_start; unsigned int i, loopnr = 0; + size_t mapped; selftest_running = true; @@ -984,15 +970,16 @@ static int __init arm_v7s_do_selftests(void) iova = 0; for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) { size = 1UL << i; - if (ops->map(ops, iova, iova, size, IOMMU_READ | - IOMMU_WRITE | - IOMMU_NOEXEC | - IOMMU_CACHE, GFP_KERNEL)) + if (ops->map_pages(ops, iova, iova, size, 1, + IOMMU_READ | IOMMU_WRITE | + IOMMU_NOEXEC | IOMMU_CACHE, + GFP_KERNEL, &mapped)) return __FAIL(ops); /* Overlapping mappings */ - if (!ops->map(ops, iova, iova + size, size, - IOMMU_READ | IOMMU_NOEXEC, GFP_KERNEL)) + if (!ops->map_pages(ops, iova, iova + size, size, 1, + IOMMU_READ | IOMMU_NOEXEC, GFP_KERNEL, + &mapped)) return __FAIL(ops); if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) @@ -1007,11 +994,12 @@ static int __init arm_v7s_do_selftests(void) size = 1UL << __ffs(cfg.pgsize_bitmap); while (i < loopnr) { iova_start = i * SZ_16M; - if (ops->unmap(ops, iova_start + size, size, NULL) != size) + if (ops->unmap_pages(ops, iova_start + size, size, 1, NULL) != size) return __FAIL(ops); /* Remap of partial unmap */ - if (ops->map(ops, iova_start + size, size, size, IOMMU_READ, GFP_KERNEL)) + if (ops->map_pages(ops, iova_start + size, size, size, 1, + IOMMU_READ, GFP_KERNEL, &mapped)) return __FAIL(ops); if (ops->iova_to_phys(ops, iova_start + size + 42) @@ -1025,14 +1013,15 @@ static int __init arm_v7s_do_selftests(void) for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) { size = 1UL << i; - if (ops->unmap(ops, iova, size, NULL) != size) + if (ops->unmap_pages(ops, iova, size, 1, NULL) != size) return __FAIL(ops); if (ops->iova_to_phys(ops, iova + 42)) return __FAIL(ops); /* Remap full block */ - if (ops->map(ops, iova, iova, size, IOMMU_WRITE, GFP_KERNEL)) + if (ops->map_pages(ops, iova, iova, size, 1, IOMMU_WRITE, + GFP_KERNEL, &mapped)) return __FAIL(ops); if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 0ba817e86346..72dcdd468cf3 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -360,7 +360,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start; num_entries = min_t(int, pgcount, max_entries); ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep); - if (!ret && mapped) + if (!ret) *mapped += num_entries * size; return ret; @@ -496,13 +496,6 @@ static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova, return ret; } -static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, - phys_addr_t paddr, size_t size, int iommu_prot, gfp_t gfp) -{ - return arm_lpae_map_pages(ops, iova, paddr, size, 1, iommu_prot, gfp, - NULL); -} - static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, arm_lpae_iopte *ptep) { @@ -682,12 +675,6 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov data->start_level, ptep); } -static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather) -{ - return arm_lpae_unmap_pages(ops, iova, size, 1, gather); -} - static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) { @@ -799,9 +786,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) data->pgd_bits = va_bits - (data->bits_per_level * (levels - 1)); data->iop.ops = (struct io_pgtable_ops) { - .map = arm_lpae_map, .map_pages = arm_lpae_map_pages, - .unmap = arm_lpae_unmap, .unmap_pages = arm_lpae_unmap_pages, .iova_to_phys = arm_lpae_iova_to_phys, }; @@ -1176,7 +1161,7 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) int i, j; unsigned long iova; - size_t size; + size_t size, mapped; struct io_pgtable_ops *ops; selftest_running = true; @@ -1209,15 +1194,16 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) { size = 1UL << j; - if (ops->map(ops, iova, iova, size, IOMMU_READ | - IOMMU_WRITE | - IOMMU_NOEXEC | - IOMMU_CACHE, GFP_KERNEL)) + if (ops->map_pages(ops, iova, iova, size, 1, + IOMMU_READ | IOMMU_WRITE | + IOMMU_NOEXEC | IOMMU_CACHE, + GFP_KERNEL, &mapped)) return __FAIL(ops, i); /* Overlapping mappings */ - if (!ops->map(ops, iova, iova + size, size, - IOMMU_READ | IOMMU_NOEXEC, GFP_KERNEL)) + if (!ops->map_pages(ops, iova, iova + size, size, 1, + IOMMU_READ | IOMMU_NOEXEC, + GFP_KERNEL, &mapped)) return __FAIL(ops, i); if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) @@ -1228,11 +1214,12 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) /* Partial unmap */ size = 1UL << __ffs(cfg->pgsize_bitmap); - if (ops->unmap(ops, SZ_1G + size, size, NULL) != size) + if (ops->unmap_pages(ops, SZ_1G + size, size, 1, NULL) != size) return __FAIL(ops, i); /* Remap of partial unmap */ - if (ops->map(ops, SZ_1G + size, size, size, IOMMU_READ, GFP_KERNEL)) + if (ops->map_pages(ops, SZ_1G + size, size, size, 1, + IOMMU_READ, GFP_KERNEL, &mapped)) return __FAIL(ops, i); if (ops->iova_to_phys(ops, SZ_1G + size + 42) != (size + 42)) @@ -1243,14 +1230,15 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) { size = 1UL << j; - if (ops->unmap(ops, iova, size, NULL) != size) + if (ops->unmap_pages(ops, iova, size, 1, NULL) != size) return __FAIL(ops, i); if (ops->iova_to_phys(ops, iova + 42)) return __FAIL(ops, i); /* Remap full block */ - if (ops->map(ops, iova, iova, size, IOMMU_WRITE, GFP_KERNEL)) + if (ops->map_pages(ops, iova, iova, size, 1, + IOMMU_WRITE, GFP_KERNEL, &mapped)) return __FAIL(ops, i); if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c deleted file mode 100644 index 106506143896..000000000000 --- a/drivers/iommu/iommu-sva-lib.c +++ /dev/null @@ -1,71 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Helpers for IOMMU drivers implementing SVA - */ -#include <linux/mutex.h> -#include <linux/sched/mm.h> - -#include "iommu-sva-lib.h" - -static DEFINE_MUTEX(iommu_sva_lock); -static DECLARE_IOASID_SET(iommu_sva_pasid); - -/** - * iommu_sva_alloc_pasid - Allocate a PASID for the mm - * @mm: the mm - * @min: minimum PASID value (inclusive) - * @max: maximum PASID value (inclusive) - * - * Try to allocate a PASID for this mm, or take a reference to the existing one - * provided it fits within the [@min, @max] range. On success the PASID is - * available in mm->pasid and will be available for the lifetime of the mm. - * - * Returns 0 on success and < 0 on error. - */ -int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) -{ - int ret = 0; - ioasid_t pasid; - - if (min == INVALID_IOASID || max == INVALID_IOASID || - min == 0 || max < min) - return -EINVAL; - - mutex_lock(&iommu_sva_lock); - /* Is a PASID already associated with this mm? */ - if (pasid_valid(mm->pasid)) { - if (mm->pasid < min || mm->pasid >= max) - ret = -EOVERFLOW; - goto out; - } - - pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm); - if (!pasid_valid(pasid)) - ret = -ENOMEM; - else - mm_pasid_set(mm, pasid); -out: - mutex_unlock(&iommu_sva_lock); - return ret; -} -EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid); - -/* ioasid_find getter() requires a void * argument */ -static bool __mmget_not_zero(void *mm) -{ - return mmget_not_zero(mm); -} - -/** - * iommu_sva_find() - Find mm associated to the given PASID - * @pasid: Process Address Space ID assigned to the mm - * - * On success a reference to the mm is taken, and must be released with mmput(). - * - * Returns the mm corresponding to this PASID, or an error if not found. - */ -struct mm_struct *iommu_sva_find(ioasid_t pasid) -{ - return ioasid_find(&iommu_sva_pasid, pasid, __mmget_not_zero); -} -EXPORT_SYMBOL_GPL(iommu_sva_find); diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c new file mode 100644 index 000000000000..24bf9b2b58aa --- /dev/null +++ b/drivers/iommu/iommu-sva.c @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Helpers for IOMMU drivers implementing SVA + */ +#include <linux/mutex.h> +#include <linux/sched/mm.h> +#include <linux/iommu.h> + +#include "iommu-sva.h" + +static DEFINE_MUTEX(iommu_sva_lock); +static DECLARE_IOASID_SET(iommu_sva_pasid); + +/** + * iommu_sva_alloc_pasid - Allocate a PASID for the mm + * @mm: the mm + * @min: minimum PASID value (inclusive) + * @max: maximum PASID value (inclusive) + * + * Try to allocate a PASID for this mm, or take a reference to the existing one + * provided it fits within the [@min, @max] range. On success the PASID is + * available in mm->pasid and will be available for the lifetime of the mm. + * + * Returns 0 on success and < 0 on error. + */ +int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) +{ + int ret = 0; + ioasid_t pasid; + + if (min == INVALID_IOASID || max == INVALID_IOASID || + min == 0 || max < min) + return -EINVAL; + + mutex_lock(&iommu_sva_lock); + /* Is a PASID already associated with this mm? */ + if (pasid_valid(mm->pasid)) { + if (mm->pasid < min || mm->pasid >= max) + ret = -EOVERFLOW; + goto out; + } + + pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm); + if (!pasid_valid(pasid)) + ret = -ENOMEM; + else + mm_pasid_set(mm, pasid); +out: + mutex_unlock(&iommu_sva_lock); + return ret; +} +EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid); + +/* ioasid_find getter() requires a void * argument */ +static bool __mmget_not_zero(void *mm) +{ + return mmget_not_zero(mm); +} + +/** + * iommu_sva_find() - Find mm associated to the given PASID + * @pasid: Process Address Space ID assigned to the mm + * + * On success a reference to the mm is taken, and must be released with mmput(). + * + * Returns the mm corresponding to this PASID, or an error if not found. + */ +struct mm_struct *iommu_sva_find(ioasid_t pasid) +{ + return ioasid_find(&iommu_sva_pasid, pasid, __mmget_not_zero); +} +EXPORT_SYMBOL_GPL(iommu_sva_find); + +/** + * iommu_sva_bind_device() - Bind a process address space to a device + * @dev: the device + * @mm: the mm to bind, caller must hold a reference to mm_users + * + * Create a bond between device and address space, allowing the device to + * access the mm using the PASID returned by iommu_sva_get_pasid(). If a + * bond already exists between @device and @mm, an additional internal + * reference is taken. Caller must call iommu_sva_unbind_device() + * to release each reference. + * + * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to + * initialize the required SVA features. + * + * On error, returns an ERR_PTR value. + */ +struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) +{ + struct iommu_domain *domain; + struct iommu_sva *handle; + ioasid_t max_pasids; + int ret; + + max_pasids = dev->iommu->max_pasids; + if (!max_pasids) + return ERR_PTR(-EOPNOTSUPP); + + /* Allocate mm->pasid if necessary. */ + ret = iommu_sva_alloc_pasid(mm, 1, max_pasids - 1); + if (ret) + return ERR_PTR(ret); + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return ERR_PTR(-ENOMEM); + + mutex_lock(&iommu_sva_lock); + /* Search for an existing domain. */ + domain = iommu_get_domain_for_dev_pasid(dev, mm->pasid, + IOMMU_DOMAIN_SVA); + if (IS_ERR(domain)) { + ret = PTR_ERR(domain); + goto out_unlock; + } + + if (domain) { + domain->users++; + goto out; + } + + /* Allocate a new domain and set it on device pasid. */ + domain = iommu_sva_domain_alloc(dev, mm); + if (!domain) { + ret = -ENOMEM; + goto out_unlock; + } + + ret = iommu_attach_device_pasid(domain, dev, mm->pasid); + if (ret) + goto out_free_domain; + domain->users = 1; +out: + mutex_unlock(&iommu_sva_lock); + handle->dev = dev; + handle->domain = domain; + + return handle; + +out_free_domain: + iommu_domain_free(domain); +out_unlock: + mutex_unlock(&iommu_sva_lock); + kfree(handle); + + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(iommu_sva_bind_device); + +/** + * iommu_sva_unbind_device() - Remove a bond created with iommu_sva_bind_device + * @handle: the handle returned by iommu_sva_bind_device() + * + * Put reference to a bond between device and address space. The device should + * not be issuing any more transaction for this PASID. All outstanding page + * requests for this PASID must have been flushed to the IOMMU. + */ +void iommu_sva_unbind_device(struct iommu_sva *handle) +{ + struct iommu_domain *domain = handle->domain; + ioasid_t pasid = domain->mm->pasid; + struct device *dev = handle->dev; + + mutex_lock(&iommu_sva_lock); + if (--domain->users == 0) { + iommu_detach_device_pasid(domain, dev, pasid); + iommu_domain_free(domain); + } + mutex_unlock(&iommu_sva_lock); + kfree(handle); +} +EXPORT_SYMBOL_GPL(iommu_sva_unbind_device); + +u32 iommu_sva_get_pasid(struct iommu_sva *handle) +{ + struct iommu_domain *domain = handle->domain; + + return domain->mm->pasid; +} +EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); + +/* + * I/O page fault handler for SVA + */ +enum iommu_page_response_code +iommu_sva_handle_iopf(struct iommu_fault *fault, void *data) +{ + vm_fault_t ret; + struct vm_area_struct *vma; + struct mm_struct *mm = data; + unsigned int access_flags = 0; + unsigned int fault_flags = FAULT_FLAG_REMOTE; + struct iommu_fault_page_request *prm = &fault->prm; + enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID; + + if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID)) + return status; + + if (!mmget_not_zero(mm)) + return status; + + mmap_read_lock(mm); + + vma = find_extend_vma(mm, prm->addr); + if (!vma) + /* Unmapped area */ + goto out_put_mm; + + if (prm->perm & IOMMU_FAULT_PERM_READ) + access_flags |= VM_READ; + + if (prm->perm & IOMMU_FAULT_PERM_WRITE) { + access_flags |= VM_WRITE; + fault_flags |= FAULT_FLAG_WRITE; + } + + if (prm->perm & IOMMU_FAULT_PERM_EXEC) { + access_flags |= VM_EXEC; + fault_flags |= FAULT_FLAG_INSTRUCTION; + } + + if (!(prm->perm & IOMMU_FAULT_PERM_PRIV)) + fault_flags |= FAULT_FLAG_USER; + + if (access_flags & ~vma->vm_flags) + /* Access fault */ + goto out_put_mm; + + ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL); + status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID : + IOMMU_PAGE_RESP_SUCCESS; + +out_put_mm: + mmap_read_unlock(mm); + mmput(mm); + + return status; +} diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva.h index 8909ea1094e3..7215a761b962 100644 --- a/drivers/iommu/iommu-sva-lib.h +++ b/drivers/iommu/iommu-sva.h @@ -2,8 +2,8 @@ /* * SVA library for IOMMU drivers */ -#ifndef _IOMMU_SVA_LIB_H -#define _IOMMU_SVA_LIB_H +#ifndef _IOMMU_SVA_H +#define _IOMMU_SVA_H #include <linux/ioasid.h> #include <linux/mm_types.h> @@ -26,6 +26,8 @@ int iopf_queue_flush_dev(struct device *dev); struct iopf_queue *iopf_queue_alloc(const char *name); void iopf_queue_free(struct iopf_queue *queue); int iopf_queue_discard_partial(struct iopf_queue *queue); +enum iommu_page_response_code +iommu_sva_handle_iopf(struct iommu_fault *fault, void *data); #else /* CONFIG_IOMMU_SVA */ static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) @@ -63,5 +65,11 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue) { return -ENODEV; } + +static inline enum iommu_page_response_code +iommu_sva_handle_iopf(struct iommu_fault *fault, void *data) +{ + return IOMMU_PAGE_RESP_INVALID; +} #endif /* CONFIG_IOMMU_SVA */ -#endif /* _IOMMU_SVA_LIB_H */ +#endif /* _IOMMU_SVA_H */ diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 65a3b3d886dc..7c99d8eb3182 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -21,6 +21,7 @@ #include <linux/idr.h> #include <linux/err.h> #include <linux/pci.h> +#include <linux/pci-ats.h> #include <linux/bitops.h> #include <linux/platform_device.h> #include <linux/property.h> @@ -28,9 +29,12 @@ #include <linux/module.h> #include <linux/cc_platform.h> #include <trace/events/iommu.h> +#include <linux/sched/mm.h> #include "dma-iommu.h" +#include "iommu-sva.h" + static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); @@ -42,6 +46,7 @@ struct iommu_group { struct kobject kobj; struct kobject *devices_kobj; struct list_head devices; + struct xarray pasid_array; struct mutex mutex; void *iommu_data; void (*iommu_data_release)(void *iommu_data); @@ -278,18 +283,46 @@ static void dev_iommu_free(struct device *dev) kfree(param); } +static u32 dev_iommu_get_max_pasids(struct device *dev) +{ + u32 max_pasids = 0, bits = 0; + int ret; + + if (dev_is_pci(dev)) { + ret = pci_max_pasids(to_pci_dev(dev)); + if (ret > 0) + max_pasids = ret; + } else { + ret = device_property_read_u32(dev, "pasid-num-bits", &bits); + if (!ret) + max_pasids = 1UL << bits; + } + + return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); +} + static int __iommu_probe_device(struct device *dev, struct list_head *group_list) { const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_device *iommu_dev; struct iommu_group *group; + static DEFINE_MUTEX(iommu_probe_device_lock); int ret; if (!ops) return -ENODEV; - - if (!dev_iommu_get(dev)) - return -ENOMEM; + /* + * Serialise to avoid races between IOMMU drivers registering in + * parallel and/or the "replay" calls from ACPI/OF code via client + * driver probe. Once the latter have been cleaned up we should + * probably be able to use device_lock() here to minimise the scope, + * but for now enforcing a simple global ordering is fine. + */ + mutex_lock(&iommu_probe_device_lock); + if (!dev_iommu_get(dev)) { + ret = -ENOMEM; + goto err_unlock; + } if (!try_module_get(ops->owner)) { ret = -EINVAL; @@ -303,17 +336,21 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list } dev->iommu->iommu_dev = iommu_dev; + dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) { ret = PTR_ERR(group); goto out_release; } - iommu_group_put(group); + mutex_lock(&group->mutex); if (group_list && !group->default_domain && list_empty(&group->entry)) list_add_tail(&group->entry, group_list); + mutex_unlock(&group->mutex); + iommu_group_put(group); + mutex_unlock(&iommu_probe_device_lock); iommu_device_link(iommu_dev, dev); return 0; @@ -328,6 +365,9 @@ out_module_put: err_free: dev_iommu_free(dev); +err_unlock: + mutex_unlock(&iommu_probe_device_lock); + return ret; } @@ -703,6 +743,7 @@ struct iommu_group *iommu_group_alloc(void) mutex_init(&group->mutex); INIT_LIST_HEAD(&group->devices); INIT_LIST_HEAD(&group->entry); + xa_init(&group->pasid_array); ret = ida_alloc(&iommu_group_ida, GFP_KERNEL); if (ret < 0) { @@ -1799,11 +1840,11 @@ int bus_iommu_probe(struct bus_type *bus) return ret; list_for_each_entry_safe(group, next, &group_list, entry) { + mutex_lock(&group->mutex); + /* Remove item from the list */ list_del_init(&group->entry); - mutex_lock(&group->mutex); - /* Try to allocate default domain */ probe_alloc_default_domain(bus, group); @@ -1912,6 +1953,8 @@ EXPORT_SYMBOL_GPL(iommu_domain_alloc); void iommu_domain_free(struct iommu_domain *domain) { + if (domain->type == IOMMU_DOMAIN_SVA) + mmdrop(domain->mm); iommu_put_dma_cookie(domain); domain->ops->free(domain); } @@ -1949,6 +1992,18 @@ static int __iommu_attach_device(struct iommu_domain *domain, return ret; } +/** + * iommu_attach_device - Attach an IOMMU domain to a device + * @domain: IOMMU domain to attach + * @dev: Device that will be attached + * + * Returns 0 on success and error code on failure + * + * Note that EINVAL can be treated as a soft failure, indicating + * that certain configuration of the domain is incompatible with + * the device. In this case attaching a different domain to the + * device may succeed. + */ int iommu_attach_device(struct iommu_domain *domain, struct device *dev) { struct iommu_group *group; @@ -2075,6 +2130,18 @@ static int __iommu_attach_group(struct iommu_domain *domain, return ret; } +/** + * iommu_attach_group - Attach an IOMMU domain to an IOMMU group + * @domain: IOMMU domain to attach + * @group: IOMMU group that will be attached + * + * Returns 0 on success and error code on failure + * + * Note that EINVAL can be treated as a soft failure, indicating + * that certain configuration of the domain is incompatible with + * the group. In this case attaching a different domain to the + * group may succeed. + */ int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) { int ret; @@ -2726,98 +2793,6 @@ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) } EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); -/** - * iommu_sva_bind_device() - Bind a process address space to a device - * @dev: the device - * @mm: the mm to bind, caller must hold a reference to it - * @drvdata: opaque data pointer to pass to bind callback - * - * Create a bond between device and address space, allowing the device to access - * the mm using the returned PASID. If a bond already exists between @device and - * @mm, it is returned and an additional reference is taken. Caller must call - * iommu_sva_unbind_device() to release each reference. - * - * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to - * initialize the required SVA features. - * - * On error, returns an ERR_PTR value. - */ -struct iommu_sva * -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) -{ - struct iommu_group *group; - struct iommu_sva *handle = ERR_PTR(-EINVAL); - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (!ops->sva_bind) - return ERR_PTR(-ENODEV); - - group = iommu_group_get(dev); - if (!group) - return ERR_PTR(-ENODEV); - - /* Ensure device count and domain don't change while we're binding */ - mutex_lock(&group->mutex); - - /* - * To keep things simple, SVA currently doesn't support IOMMU groups - * with more than one device. Existing SVA-capable systems are not - * affected by the problems that required IOMMU groups (lack of ACS - * isolation, device ID aliasing and other hardware issues). - */ - if (iommu_group_device_count(group) != 1) - goto out_unlock; - - handle = ops->sva_bind(dev, mm, drvdata); - -out_unlock: - mutex_unlock(&group->mutex); - iommu_group_put(group); - - return handle; -} -EXPORT_SYMBOL_GPL(iommu_sva_bind_device); - -/** - * iommu_sva_unbind_device() - Remove a bond created with iommu_sva_bind_device - * @handle: the handle returned by iommu_sva_bind_device() - * - * Put reference to a bond between device and address space. The device should - * not be issuing any more transaction for this PASID. All outstanding page - * requests for this PASID must have been flushed to the IOMMU. - */ -void iommu_sva_unbind_device(struct iommu_sva *handle) -{ - struct iommu_group *group; - struct device *dev = handle->dev; - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (!ops->sva_unbind) - return; - - group = iommu_group_get(dev); - if (!group) - return; - - mutex_lock(&group->mutex); - ops->sva_unbind(handle); - mutex_unlock(&group->mutex); - - iommu_group_put(group); -} -EXPORT_SYMBOL_GPL(iommu_sva_unbind_device); - -u32 iommu_sva_get_pasid(struct iommu_sva *handle) -{ - const struct iommu_ops *ops = dev_iommu_ops(handle->dev); - - if (!ops->sva_get_pasid) - return IOMMU_PASID_INVALID; - - return ops->sva_get_pasid(handle); -} -EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); - /* * Changes the default domain of an iommu group that has *only* one device * @@ -3087,7 +3062,8 @@ int iommu_device_use_default_domain(struct device *dev) mutex_lock(&group->mutex); if (group->owner_cnt) { - if (group->owner || !iommu_is_default_domain(group)) { + if (group->owner || !iommu_is_default_domain(group) || + !xa_empty(&group->pasid_array)) { ret = -EBUSY; goto unlock_out; } @@ -3118,7 +3094,7 @@ void iommu_device_unuse_default_domain(struct device *dev) return; mutex_lock(&group->mutex); - if (!WARN_ON(!group->owner_cnt)) + if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array))) group->owner_cnt--; mutex_unlock(&group->mutex); @@ -3166,7 +3142,8 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) ret = -EPERM; goto unlock_out; } else { - if (group->domain && group->domain != group->default_domain) { + if ((group->domain && group->domain != group->default_domain) || + !xa_empty(&group->pasid_array)) { ret = -EBUSY; goto unlock_out; } @@ -3200,7 +3177,8 @@ void iommu_group_release_dma_owner(struct iommu_group *group) int ret; mutex_lock(&group->mutex); - if (WARN_ON(!group->owner_cnt || !group->owner)) + if (WARN_ON(!group->owner_cnt || !group->owner || + !xa_empty(&group->pasid_array))) goto unlock_out; group->owner_cnt = 0; @@ -3231,3 +3209,150 @@ bool iommu_group_dma_owner_claimed(struct iommu_group *group) return user; } EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); + +static int __iommu_set_group_pasid(struct iommu_domain *domain, + struct iommu_group *group, ioasid_t pasid) +{ + struct group_device *device; + int ret = 0; + + list_for_each_entry(device, &group->devices, list) { + ret = domain->ops->set_dev_pasid(domain, device->dev, pasid); + if (ret) + break; + } + + return ret; +} + +static void __iommu_remove_group_pasid(struct iommu_group *group, + ioasid_t pasid) +{ + struct group_device *device; + const struct iommu_ops *ops; + + list_for_each_entry(device, &group->devices, list) { + ops = dev_iommu_ops(device->dev); + ops->remove_dev_pasid(device->dev, pasid); + } +} + +/* + * iommu_attach_device_pasid() - Attach a domain to pasid of device + * @domain: the iommu domain. + * @dev: the attached device. + * @pasid: the pasid of the device. + * + * Return: 0 on success, or an error. + */ +int iommu_attach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ + struct iommu_group *group; + void *curr; + int ret; + + if (!domain->ops->set_dev_pasid) + return -EOPNOTSUPP; + + group = iommu_group_get(dev); + if (!group) + return -ENODEV; + + mutex_lock(&group->mutex); + curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL); + if (curr) { + ret = xa_err(curr) ? : -EBUSY; + goto out_unlock; + } + + ret = __iommu_set_group_pasid(domain, group, pasid); + if (ret) { + __iommu_remove_group_pasid(group, pasid); + xa_erase(&group->pasid_array, pasid); + } +out_unlock: + mutex_unlock(&group->mutex); + iommu_group_put(group); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); + +/* + * iommu_detach_device_pasid() - Detach the domain from pasid of device + * @domain: the iommu domain. + * @dev: the attached device. + * @pasid: the pasid of the device. + * + * The @domain must have been attached to @pasid of the @dev with + * iommu_attach_device_pasid(). + */ +void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, + ioasid_t pasid) +{ + struct iommu_group *group = iommu_group_get(dev); + + mutex_lock(&group->mutex); + __iommu_remove_group_pasid(group, pasid); + WARN_ON(xa_erase(&group->pasid_array, pasid) != domain); + mutex_unlock(&group->mutex); + + iommu_group_put(group); +} +EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); + +/* + * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev + * @dev: the queried device + * @pasid: the pasid of the device + * @type: matched domain type, 0 for any match + * + * This is a variant of iommu_get_domain_for_dev(). It returns the existing + * domain attached to pasid of a device. Callers must hold a lock around this + * function, and both iommu_attach/detach_dev_pasid() whenever a domain of + * type is being manipulated. This API does not internally resolve races with + * attach/detach. + * + * Return: attached domain on success, NULL otherwise. + */ +struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev, + ioasid_t pasid, + unsigned int type) +{ + struct iommu_domain *domain; + struct iommu_group *group; + + group = iommu_group_get(dev); + if (!group) + return NULL; + + xa_lock(&group->pasid_array); + domain = xa_load(&group->pasid_array, pasid); + if (type && domain && domain->type != type) + domain = ERR_PTR(-EBUSY); + xa_unlock(&group->pasid_array); + iommu_group_put(group); + + return domain; +} +EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid); + +struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm) +{ + const struct iommu_ops *ops = dev_iommu_ops(dev); + struct iommu_domain *domain; + + domain = ops->domain_alloc(IOMMU_DOMAIN_SVA); + if (!domain) + return NULL; + + domain->type = IOMMU_DOMAIN_SVA; + mmgrab(mm); + domain->mm = mm; + domain->iopf_handler = iommu_sva_handle_iopf; + domain->fault_data = mm; + + return domain; +} diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 3b30c0752274..a003bd5fc65c 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -628,8 +628,6 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, * Something is wrong, we can't attach two devices using * different IOMMUs to the same domain. */ - dev_err(dev, "Can't attach IPMMU %s to domain on IPMMU %s\n", - dev_name(mmu->dev), dev_name(domain->mmu->dev)); ret = -EINVAL; } else dev_info(dev, "Reusing IPMMU context %u\n", domain->context_id); @@ -661,22 +659,22 @@ static void ipmmu_detach_device(struct iommu_domain *io_domain, } static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); - if (!domain) - return -ENODEV; - - return domain->iop->map(domain->iop, iova, paddr, size, prot, gfp); + return domain->iop->map_pages(domain->iop, iova, paddr, pgsize, pgcount, + prot, gfp, mapped); } static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather) + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); - return domain->iop->unmap(domain->iop, iova, size, gather); + return domain->iop->unmap_pages(domain->iop, iova, pgsize, pgcount, gather); } static void ipmmu_flush_iotlb_all(struct iommu_domain *io_domain) @@ -879,8 +877,8 @@ static const struct iommu_ops ipmmu_ops = { .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = ipmmu_attach_device, .detach_dev = ipmmu_detach_device, - .map = ipmmu_map, - .unmap = ipmmu_unmap, + .map_pages = ipmmu_map, + .unmap_pages = ipmmu_unmap, .flush_iotlb_all = ipmmu_flush_iotlb_all, .iotlb_sync = ipmmu_iotlb_sync, .iova_to_phys = ipmmu_iova_to_phys, diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 16179a9a7283..c60624910872 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -471,14 +471,16 @@ fail: } static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t pa, size_t len, int prot, gfp_t gfp) + phys_addr_t pa, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct msm_priv *priv = to_msm_priv(domain); unsigned long flags; int ret; spin_lock_irqsave(&priv->pgtlock, flags); - ret = priv->iop->map(priv->iop, iova, pa, len, prot, GFP_ATOMIC); + ret = priv->iop->map_pages(priv->iop, iova, pa, pgsize, pgcount, prot, + GFP_ATOMIC, mapped); spin_unlock_irqrestore(&priv->pgtlock, flags); return ret; @@ -493,16 +495,18 @@ static void msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, } static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t len, struct iommu_iotlb_gather *gather) + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { struct msm_priv *priv = to_msm_priv(domain); unsigned long flags; + size_t ret; spin_lock_irqsave(&priv->pgtlock, flags); - len = priv->iop->unmap(priv->iop, iova, len, gather); + ret = priv->iop->unmap_pages(priv->iop, iova, pgsize, pgcount, gather); spin_unlock_irqrestore(&priv->pgtlock, flags); - return len; + return ret; } static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain, @@ -679,8 +683,8 @@ static struct iommu_ops msm_iommu_ops = { .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = msm_iommu_attach_dev, .detach_dev = msm_iommu_detach_dev, - .map = msm_iommu_map, - .unmap = msm_iommu_unmap, + .map_pages = msm_iommu_map, + .unmap_pages = msm_iommu_unmap, /* * Nothing is needed here, the barrier to guarantee * completion of the tlb sync operation is implicitly diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 2ab2ecfe01f8..2badd6acfb23 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -108,8 +108,12 @@ #define F_MMU_INT_ID_SUB_COMM_ID(a) (((a) >> 7) & 0x3) #define F_MMU_INT_ID_COMM_ID_EXT(a) (((a) >> 10) & 0x7) #define F_MMU_INT_ID_SUB_COMM_ID_EXT(a) (((a) >> 7) & 0x7) +/* Macro for 5 bits length port ID field (default) */ #define F_MMU_INT_ID_LARB_ID(a) (((a) >> 7) & 0x7) #define F_MMU_INT_ID_PORT_ID(a) (((a) >> 2) & 0x1f) +/* Macro for 6 bits length port ID field */ +#define F_MMU_INT_ID_LARB_ID_WID_6(a) (((a) >> 8) & 0x7) +#define F_MMU_INT_ID_PORT_ID_WID_6(a) (((a) >> 2) & 0x3f) #define MTK_PROTECT_PA_ALIGN 256 #define MTK_IOMMU_BANK_SZ 0x1000 @@ -139,6 +143,7 @@ #define IFA_IOMMU_PCIE_SUPPORT BIT(16) #define PGTABLE_PA_35_EN BIT(17) #define TF_PORT_TO_ADDR_MT8173 BIT(18) +#define INT_ID_PORT_WIDTH_6 BIT(19) #define MTK_IOMMU_HAS_FLAG_MASK(pdata, _x, mask) \ ((((pdata)->flags) & (mask)) == (_x)) @@ -165,6 +170,7 @@ enum mtk_iommu_plat { M4U_MT8186, M4U_MT8192, M4U_MT8195, + M4U_MT8365, }; struct mtk_iommu_iova_region { @@ -223,10 +229,7 @@ struct mtk_iommu_data { struct device *smicomm_dev; struct mtk_iommu_bank_data *bank; - - struct dma_iommu_mapping *mapping; /* For mtk_iommu_v1.c */ struct regmap *pericfg; - struct mutex mutex; /* Protect m4u_group/m4u_dom above */ /* @@ -441,20 +444,25 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) fault_pa |= (u64)pa34_32 << 32; if (MTK_IOMMU_IS_TYPE(plat_data, MTK_IOMMU_TYPE_MM)) { - fault_port = F_MMU_INT_ID_PORT_ID(regval); if (MTK_IOMMU_HAS_FLAG(plat_data, HAS_SUB_COMM_2BITS)) { fault_larb = F_MMU_INT_ID_COMM_ID(regval); sub_comm = F_MMU_INT_ID_SUB_COMM_ID(regval); + fault_port = F_MMU_INT_ID_PORT_ID(regval); } else if (MTK_IOMMU_HAS_FLAG(plat_data, HAS_SUB_COMM_3BITS)) { fault_larb = F_MMU_INT_ID_COMM_ID_EXT(regval); sub_comm = F_MMU_INT_ID_SUB_COMM_ID_EXT(regval); + fault_port = F_MMU_INT_ID_PORT_ID(regval); + } else if (MTK_IOMMU_HAS_FLAG(plat_data, INT_ID_PORT_WIDTH_6)) { + fault_port = F_MMU_INT_ID_PORT_ID_WID_6(regval); + fault_larb = F_MMU_INT_ID_LARB_ID_WID_6(regval); } else { + fault_port = F_MMU_INT_ID_PORT_ID(regval); fault_larb = F_MMU_INT_ID_LARB_ID(regval); } fault_larb = data->plat_data->larbid_remap[fault_larb][sub_comm]; } - if (report_iommu_fault(&dom->domain, bank->parent_dev, fault_iova, + if (!dom || report_iommu_fault(&dom->domain, bank->parent_dev, fault_iova, write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) { dev_err_ratelimited( bank->parent_dev, @@ -609,7 +617,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data); if (!dom->iop) { dev_err(data->dev, "Failed to alloc io pgtable\n"); - return -EINVAL; + return -ENOMEM; } /* Update our support page sizes bitmap */ @@ -668,7 +676,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, ret = mtk_iommu_domain_finalise(dom, frstdata, region_id); if (ret) { mutex_unlock(&dom->mutex); - return -ENODEV; + return ret; } dom->bank = &data->bank[bankid]; } @@ -711,7 +719,8 @@ static void mtk_iommu_detach_device(struct iommu_domain *domain, } static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); @@ -720,17 +729,17 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, paddr |= BIT_ULL(32); /* Synchronize with the tlb_lock */ - return dom->iop->map(dom->iop, iova, paddr, size, prot, gfp); + return dom->iop->map_pages(dom->iop, iova, paddr, pgsize, pgcount, prot, gfp, mapped); } static size_t mtk_iommu_unmap(struct iommu_domain *domain, - unsigned long iova, size_t size, + unsigned long iova, size_t pgsize, size_t pgcount, struct iommu_iotlb_gather *gather) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); - iommu_iotlb_gather_add_range(gather, iova, size); - return dom->iop->unmap(dom->iop, iova, size, gather); + iommu_iotlb_gather_add_range(gather, iova, pgsize * pgcount); + return dom->iop->unmap_pages(dom->iop, iova, pgsize, pgcount, gather); } static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain) @@ -938,8 +947,8 @@ static const struct iommu_ops mtk_iommu_ops = { .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = mtk_iommu_attach_device, .detach_dev = mtk_iommu_detach_device, - .map = mtk_iommu_map, - .unmap = mtk_iommu_unmap, + .map_pages = mtk_iommu_map, + .unmap_pages = mtk_iommu_unmap, .flush_iotlb_all = mtk_iommu_flush_iotlb_all, .iotlb_sync = mtk_iommu_iotlb_sync, .iotlb_sync_map = mtk_iommu_sync_map, @@ -1043,21 +1052,26 @@ static const struct component_master_ops mtk_iommu_com_ops = { static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **match, struct mtk_iommu_data *data) { - struct device_node *larbnode, *smicomm_node, *smi_subcomm_node; - struct platform_device *plarbdev; + struct device_node *larbnode, *frst_avail_smicomm_node = NULL; + struct platform_device *plarbdev, *pcommdev; struct device_link *link; int i, larb_nr, ret; larb_nr = of_count_phandle_with_args(dev->of_node, "mediatek,larbs", NULL); if (larb_nr < 0) return larb_nr; + if (larb_nr == 0 || larb_nr > MTK_LARB_NR_MAX) + return -EINVAL; for (i = 0; i < larb_nr; i++) { + struct device_node *smicomm_node, *smi_subcomm_node; u32 id; larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i); - if (!larbnode) - return -EINVAL; + if (!larbnode) { + ret = -EINVAL; + goto err_larbdev_put; + } if (!of_device_is_available(larbnode)) { of_node_put(larbnode); @@ -1067,48 +1081,91 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m ret = of_property_read_u32(larbnode, "mediatek,larb-id", &id); if (ret)/* The id is consecutive if there is no this property */ id = i; + if (id >= MTK_LARB_NR_MAX) { + of_node_put(larbnode); + ret = -EINVAL; + goto err_larbdev_put; + } plarbdev = of_find_device_by_node(larbnode); + of_node_put(larbnode); if (!plarbdev) { - of_node_put(larbnode); - return -ENODEV; + ret = -ENODEV; + goto err_larbdev_put; } - if (!plarbdev->dev.driver) { - of_node_put(larbnode); - return -EPROBE_DEFER; + if (data->larb_imu[id].dev) { + platform_device_put(plarbdev); + ret = -EEXIST; + goto err_larbdev_put; } data->larb_imu[id].dev = &plarbdev->dev; - component_match_add_release(dev, match, component_release_of, - component_compare_of, larbnode); + if (!plarbdev->dev.driver) { + ret = -EPROBE_DEFER; + goto err_larbdev_put; + } + + /* Get smi-(sub)-common dev from the last larb. */ + smi_subcomm_node = of_parse_phandle(larbnode, "mediatek,smi", 0); + if (!smi_subcomm_node) { + ret = -EINVAL; + goto err_larbdev_put; + } + + /* + * It may have two level smi-common. the node is smi-sub-common if it + * has a new mediatek,smi property. otherwise it is smi-commmon. + */ + smicomm_node = of_parse_phandle(smi_subcomm_node, "mediatek,smi", 0); + if (smicomm_node) + of_node_put(smi_subcomm_node); + else + smicomm_node = smi_subcomm_node; + + /* + * All the larbs that connect to one IOMMU must connect with the same + * smi-common. + */ + if (!frst_avail_smicomm_node) { + frst_avail_smicomm_node = smicomm_node; + } else if (frst_avail_smicomm_node != smicomm_node) { + dev_err(dev, "mediatek,smi property is not right @larb%d.", id); + of_node_put(smicomm_node); + ret = -EINVAL; + goto err_larbdev_put; + } else { + of_node_put(smicomm_node); + } + + component_match_add(dev, match, component_compare_dev, &plarbdev->dev); + platform_device_put(plarbdev); } - /* Get smi-(sub)-common dev from the last larb. */ - smi_subcomm_node = of_parse_phandle(larbnode, "mediatek,smi", 0); - if (!smi_subcomm_node) + if (!frst_avail_smicomm_node) return -EINVAL; - /* - * It may have two level smi-common. the node is smi-sub-common if it - * has a new mediatek,smi property. otherwise it is smi-commmon. - */ - smicomm_node = of_parse_phandle(smi_subcomm_node, "mediatek,smi", 0); - if (smicomm_node) - of_node_put(smi_subcomm_node); - else - smicomm_node = smi_subcomm_node; - - plarbdev = of_find_device_by_node(smicomm_node); - of_node_put(smicomm_node); - data->smicomm_dev = &plarbdev->dev; + pcommdev = of_find_device_by_node(frst_avail_smicomm_node); + of_node_put(frst_avail_smicomm_node); + if (!pcommdev) + return -ENODEV; + data->smicomm_dev = &pcommdev->dev; link = device_link_add(data->smicomm_dev, dev, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME); + platform_device_put(pcommdev); if (!link) { dev_err(dev, "Unable to link %s.\n", dev_name(data->smicomm_dev)); return -EINVAL; } return 0; + +err_larbdev_put: + for (i = MTK_LARB_NR_MAX - 1; i >= 0; i--) { + if (!data->larb_imu[i].dev) + continue; + put_device(data->larb_imu[i].dev); + } + return ret; } static int mtk_iommu_probe(struct platform_device *pdev) @@ -1173,6 +1230,8 @@ static int mtk_iommu_probe(struct platform_device *pdev) banks_num = data->plat_data->banks_num; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; if (resource_size(res) < banks_num * MTK_IOMMU_BANK_SZ) { dev_err(dev, "banknr %d. res %pR is not enough.\n", banks_num, res); return -EINVAL; @@ -1516,6 +1575,17 @@ static const struct mtk_iommu_plat_data mt8195_data_vpp = { {4, MTK_INVALID_LARBID, MTK_INVALID_LARBID, MTK_INVALID_LARBID, 6}}, }; +static const struct mtk_iommu_plat_data mt8365_data = { + .m4u_plat = M4U_MT8365, + .flags = RESET_AXI | INT_ID_PORT_WIDTH_6, + .inv_sel_reg = REG_MMU_INV_SEL_GEN1, + .banks_num = 1, + .banks_enable = {true}, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), + .larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}}, /* Linear mapping. */ +}; + static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data}, { .compatible = "mediatek,mt6779-m4u", .data = &mt6779_data}, @@ -1528,6 +1598,7 @@ static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt8195-iommu-infra", .data = &mt8195_data_infra}, { .compatible = "mediatek,mt8195-iommu-vdo", .data = &mt8195_data_vdo}, { .compatible = "mediatek,mt8195-iommu-vpp", .data = &mt8195_data_vpp}, + { .compatible = "mediatek,mt8365-m4u", .data = &mt8365_data}, {} }; diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 6e0e65831eb7..69682ee068d2 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -327,44 +327,42 @@ static void mtk_iommu_v1_detach_device(struct iommu_domain *domain, struct devic } static int mtk_iommu_v1_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct mtk_iommu_v1_domain *dom = to_mtk_domain(domain); - unsigned int page_num = size >> MT2701_IOMMU_PAGE_SHIFT; unsigned long flags; unsigned int i; u32 *pgt_base_iova = dom->pgt_va + (iova >> MT2701_IOMMU_PAGE_SHIFT); u32 pabase = (u32)paddr; - int map_size = 0; spin_lock_irqsave(&dom->pgtlock, flags); - for (i = 0; i < page_num; i++) { - if (pgt_base_iova[i]) { - memset(pgt_base_iova, 0, i * sizeof(u32)); + for (i = 0; i < pgcount; i++) { + if (pgt_base_iova[i]) break; - } pgt_base_iova[i] = pabase | F_DESC_VALID | F_DESC_NONSEC; pabase += MT2701_IOMMU_PAGE_SIZE; - map_size += MT2701_IOMMU_PAGE_SIZE; } spin_unlock_irqrestore(&dom->pgtlock, flags); - mtk_iommu_v1_tlb_flush_range(dom->data, iova, size); + *mapped = i * MT2701_IOMMU_PAGE_SIZE; + mtk_iommu_v1_tlb_flush_range(dom->data, iova, *mapped); - return map_size == size ? 0 : -EEXIST; + return i == pgcount ? 0 : -EEXIST; } static size_t mtk_iommu_v1_unmap(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather) + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { struct mtk_iommu_v1_domain *dom = to_mtk_domain(domain); unsigned long flags; u32 *pgt_base_iova = dom->pgt_va + (iova >> MT2701_IOMMU_PAGE_SHIFT); - unsigned int page_num = size >> MT2701_IOMMU_PAGE_SHIFT; + size_t size = pgcount * MT2701_IOMMU_PAGE_SIZE; spin_lock_irqsave(&dom->pgtlock, flags); - memset(pgt_base_iova, 0, page_num * sizeof(u32)); + memset(pgt_base_iova, 0, pgcount * sizeof(u32)); spin_unlock_irqrestore(&dom->pgtlock, flags); mtk_iommu_v1_tlb_flush_range(dom->data, iova, size); @@ -586,13 +584,13 @@ static const struct iommu_ops mtk_iommu_v1_ops = { .release_device = mtk_iommu_v1_release_device, .def_domain_type = mtk_iommu_v1_def_domain_type, .device_group = generic_device_group, - .pgsize_bitmap = ~0UL << MT2701_IOMMU_PAGE_SHIFT, + .pgsize_bitmap = MT2701_IOMMU_PAGE_SIZE, .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = mtk_iommu_v1_attach_device, .detach_dev = mtk_iommu_v1_detach_device, - .map = mtk_iommu_v1_map, - .unmap = mtk_iommu_v1_unmap, + .map_pages = mtk_iommu_v1_map, + .unmap_pages = mtk_iommu_v1_unmap, .iova_to_phys = mtk_iommu_v1_iova_to_phys, .free = mtk_iommu_v1_domain_free, } diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 07ee2600113c..2fd7702c6709 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1414,7 +1414,7 @@ static int omap_iommu_attach_init(struct device *dev, odomain->num_iommus = omap_iommu_count(dev); if (!odomain->num_iommus) - return -EINVAL; + return -ENODEV; odomain->iommus = kcalloc(odomain->num_iommus, sizeof(*iommu), GFP_ATOMIC); @@ -1464,7 +1464,7 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) if (!arch_data || !arch_data->iommu_dev) { dev_err(dev, "device doesn't have an associated iommu\n"); - return -EINVAL; + return -ENODEV; } spin_lock(&omap_domain->lock); @@ -1472,7 +1472,7 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) /* only a single client device can be attached to a domain */ if (omap_domain->dev) { dev_err(dev, "iommu domain is already attached\n"); - ret = -EBUSY; + ret = -EINVAL; goto out; } diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index a3fc59b814ab..a68eadd64f38 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -280,19 +280,17 @@ static u32 rk_mk_pte(phys_addr_t page, int prot) * 11:9 - Page address bit 34:32 * 8:4 - Page address bit 39:35 * 3 - Security - * 2 - Readable - * 1 - Writable + * 2 - Writable + * 1 - Readable * 0 - 1 if Page @ Page address is valid */ -#define RK_PTE_PAGE_READABLE_V2 BIT(2) -#define RK_PTE_PAGE_WRITABLE_V2 BIT(1) static u32 rk_mk_pte_v2(phys_addr_t page, int prot) { u32 flags = 0; - flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE_V2 : 0; - flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE_V2 : 0; + flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE : 0; + flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE : 0; return rk_mk_dte_v2(page) | flags; } diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 3c071782f6f1..ed33c6cce083 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -10,28 +10,18 @@ #include <linux/iommu.h> #include <linux/iommu-helper.h> #include <linux/sizes.h> +#include <linux/rculist.h> +#include <linux/rcupdate.h> #include <asm/pci_dma.h> -/* - * Physically contiguous memory regions can be mapped with 4 KiB alignment, - * we allow all page sizes that are an order of 4KiB (no special large page - * support so far). - */ -#define S390_IOMMU_PGSIZES (~0xFFFUL) - static const struct iommu_ops s390_iommu_ops; struct s390_domain { struct iommu_domain domain; struct list_head devices; unsigned long *dma_table; - spinlock_t dma_table_lock; spinlock_t list_lock; -}; - -struct s390_domain_device { - struct list_head list; - struct zpci_dev *zdev; + struct rcu_head rcu; }; static struct s390_domain *to_s390_domain(struct iommu_domain *dom) @@ -67,119 +57,125 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type) kfree(s390_domain); return NULL; } + s390_domain->domain.geometry.force_aperture = true; + s390_domain->domain.geometry.aperture_start = 0; + s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1; - spin_lock_init(&s390_domain->dma_table_lock); spin_lock_init(&s390_domain->list_lock); - INIT_LIST_HEAD(&s390_domain->devices); + INIT_LIST_HEAD_RCU(&s390_domain->devices); return &s390_domain->domain; } -static void s390_domain_free(struct iommu_domain *domain) +static void s390_iommu_rcu_free_domain(struct rcu_head *head) { - struct s390_domain *s390_domain = to_s390_domain(domain); + struct s390_domain *s390_domain = container_of(head, struct s390_domain, rcu); dma_cleanup_tables(s390_domain->dma_table); kfree(s390_domain); } +static void s390_domain_free(struct iommu_domain *domain) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + + rcu_read_lock(); + WARN_ON(!list_empty(&s390_domain->devices)); + rcu_read_unlock(); + + call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain); +} + +static void __s390_iommu_detach_device(struct zpci_dev *zdev) +{ + struct s390_domain *s390_domain = zdev->s390_domain; + unsigned long flags; + + if (!s390_domain) + return; + + spin_lock_irqsave(&s390_domain->list_lock, flags); + list_del_rcu(&zdev->iommu_list); + spin_unlock_irqrestore(&s390_domain->list_lock, flags); + + zpci_unregister_ioat(zdev, 0); + zdev->s390_domain = NULL; + zdev->dma_table = NULL; +} + static int s390_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { struct s390_domain *s390_domain = to_s390_domain(domain); struct zpci_dev *zdev = to_zpci_dev(dev); - struct s390_domain_device *domain_device; unsigned long flags; - int cc, rc; + u8 status; + int cc; if (!zdev) return -ENODEV; - domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL); - if (!domain_device) - return -ENOMEM; - - if (zdev->dma_table && !zdev->s390_domain) { - cc = zpci_dma_exit_device(zdev); - if (cc) { - rc = -EIO; - goto out_free; - } - } + if (WARN_ON(domain->geometry.aperture_start > zdev->end_dma || + domain->geometry.aperture_end < zdev->start_dma)) + return -EINVAL; if (zdev->s390_domain) - zpci_unregister_ioat(zdev, 0); + __s390_iommu_detach_device(zdev); + else if (zdev->dma_table) + zpci_dma_exit_device(zdev); - zdev->dma_table = s390_domain->dma_table; cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table)); - if (cc) { - rc = -EIO; - goto out_restore; - } + virt_to_phys(s390_domain->dma_table), &status); + /* + * If the device is undergoing error recovery the reset code + * will re-establish the new domain. + */ + if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL) + return -EIO; + zdev->dma_table = s390_domain->dma_table; - spin_lock_irqsave(&s390_domain->list_lock, flags); - /* First device defines the DMA range limits */ - if (list_empty(&s390_domain->devices)) { - domain->geometry.aperture_start = zdev->start_dma; - domain->geometry.aperture_end = zdev->end_dma; - domain->geometry.force_aperture = true; - /* Allow only devices with identical DMA range limits */ - } else if (domain->geometry.aperture_start != zdev->start_dma || - domain->geometry.aperture_end != zdev->end_dma) { - rc = -EINVAL; - spin_unlock_irqrestore(&s390_domain->list_lock, flags); - goto out_restore; - } - domain_device->zdev = zdev; + zdev->dma_table = s390_domain->dma_table; zdev->s390_domain = s390_domain; - list_add(&domain_device->list, &s390_domain->devices); + + spin_lock_irqsave(&s390_domain->list_lock, flags); + list_add_rcu(&zdev->iommu_list, &s390_domain->devices); spin_unlock_irqrestore(&s390_domain->list_lock, flags); return 0; - -out_restore: - if (!zdev->s390_domain) { - zpci_dma_init_device(zdev); - } else { - zdev->dma_table = zdev->s390_domain->dma_table; - zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table)); - } -out_free: - kfree(domain_device); - - return rc; } static void s390_iommu_detach_device(struct iommu_domain *domain, struct device *dev) { - struct s390_domain *s390_domain = to_s390_domain(domain); struct zpci_dev *zdev = to_zpci_dev(dev); - struct s390_domain_device *domain_device, *tmp; - unsigned long flags; - int found = 0; - if (!zdev) - return; + WARN_ON(zdev->s390_domain != to_s390_domain(domain)); - spin_lock_irqsave(&s390_domain->list_lock, flags); - list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices, - list) { - if (domain_device->zdev == zdev) { - list_del(&domain_device->list); - kfree(domain_device); - found = 1; - break; - } + __s390_iommu_detach_device(zdev); + zpci_dma_init_device(zdev); +} + +static void s390_iommu_get_resv_regions(struct device *dev, + struct list_head *list) +{ + struct zpci_dev *zdev = to_zpci_dev(dev); + struct iommu_resv_region *region; + + if (zdev->start_dma) { + region = iommu_alloc_resv_region(0, zdev->start_dma, 0, + IOMMU_RESV_RESERVED, GFP_KERNEL); + if (!region) + return; + list_add_tail(®ion->list, list); } - spin_unlock_irqrestore(&s390_domain->list_lock, flags); - if (found && (zdev->s390_domain == s390_domain)) { - zdev->s390_domain = NULL; - zpci_unregister_ioat(zdev, 0); - zpci_dma_init_device(zdev); + if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) { + region = iommu_alloc_resv_region(zdev->end_dma + 1, + ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1, + 0, IOMMU_RESV_RESERVED, GFP_KERNEL); + if (!region) + return; + list_add_tail(®ion->list, list); } } @@ -192,55 +188,88 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev) zdev = to_zpci_dev(dev); + if (zdev->start_dma > zdev->end_dma || + zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1) + return ERR_PTR(-EINVAL); + + if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1) + zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1; + return &zdev->iommu_dev; } static void s390_iommu_release_device(struct device *dev) { struct zpci_dev *zdev = to_zpci_dev(dev); - struct iommu_domain *domain; /* - * This is a workaround for a scenario where the IOMMU API common code - * "forgets" to call the detach_dev callback: After binding a device - * to vfio-pci and completing the VFIO_SET_IOMMU ioctl (which triggers - * the attach_dev), removing the device via - * "echo 1 > /sys/bus/pci/devices/.../remove" won't trigger detach_dev, - * only release_device will be called via the BUS_NOTIFY_REMOVED_DEVICE - * notifier. - * - * So let's call detach_dev from here if it hasn't been called before. + * release_device is expected to detach any domain currently attached + * to the device, but keep it attached to other devices in the group. */ - if (zdev && zdev->s390_domain) { - domain = iommu_get_domain_for_dev(dev); - if (domain) - s390_iommu_detach_device(domain, dev); + if (zdev) + __s390_iommu_detach_device(zdev); +} + +static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + struct zpci_dev *zdev; + + rcu_read_lock(); + list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { + zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma, + zdev->end_dma - zdev->start_dma + 1); } + rcu_read_unlock(); } -static int s390_iommu_update_trans(struct s390_domain *s390_domain, - phys_addr_t pa, dma_addr_t dma_addr, - size_t size, int flags) +static void s390_iommu_iotlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather) { - struct s390_domain_device *domain_device; - phys_addr_t page_addr = pa & PAGE_MASK; - dma_addr_t start_dma_addr = dma_addr; - unsigned long irq_flags, nr_pages, i; - unsigned long *entry; - int rc = 0; + struct s390_domain *s390_domain = to_s390_domain(domain); + size_t size = gather->end - gather->start + 1; + struct zpci_dev *zdev; - if (dma_addr < s390_domain->domain.geometry.aperture_start || - dma_addr + size > s390_domain->domain.geometry.aperture_end) - return -EINVAL; + /* If gather was never added to there is nothing to flush */ + if (!gather->end) + return; - nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; - if (!nr_pages) - return 0; + rcu_read_lock(); + list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { + zpci_refresh_trans((u64)zdev->fh << 32, gather->start, + size); + } + rcu_read_unlock(); +} + +static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + struct zpci_dev *zdev; + + rcu_read_lock(); + list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { + if (!zdev->tlb_refresh) + continue; + zpci_refresh_trans((u64)zdev->fh << 32, + iova, size); + } + rcu_read_unlock(); +} + +static int s390_iommu_validate_trans(struct s390_domain *s390_domain, + phys_addr_t pa, dma_addr_t dma_addr, + unsigned long nr_pages, int flags) +{ + phys_addr_t page_addr = pa & PAGE_MASK; + unsigned long *entry; + unsigned long i; + int rc; - spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags); for (i = 0; i < nr_pages; i++) { entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); - if (!entry) { + if (unlikely(!entry)) { rc = -ENOMEM; goto undo_cpu_trans; } @@ -249,47 +278,70 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain, dma_addr += PAGE_SIZE; } - spin_lock(&s390_domain->list_lock); - list_for_each_entry(domain_device, &s390_domain->devices, list) { - rc = zpci_refresh_trans((u64) domain_device->zdev->fh << 32, - start_dma_addr, nr_pages * PAGE_SIZE); - if (rc) + return 0; + +undo_cpu_trans: + while (i-- > 0) { + dma_addr -= PAGE_SIZE; + entry = dma_walk_cpu_trans(s390_domain->dma_table, + dma_addr); + if (!entry) break; + dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID); } - spin_unlock(&s390_domain->list_lock); -undo_cpu_trans: - if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { - flags = ZPCI_PTE_INVALID; - while (i-- > 0) { - page_addr -= PAGE_SIZE; - dma_addr -= PAGE_SIZE; - entry = dma_walk_cpu_trans(s390_domain->dma_table, - dma_addr); - if (!entry) - break; - dma_update_cpu_trans(entry, page_addr, flags); + return rc; +} + +static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain, + dma_addr_t dma_addr, unsigned long nr_pages) +{ + unsigned long *entry; + unsigned long i; + int rc = 0; + + for (i = 0; i < nr_pages; i++) { + entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); + if (unlikely(!entry)) { + rc = -EINVAL; + break; } + dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID); + dma_addr += PAGE_SIZE; } - spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags); return rc; } -static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +static int s390_iommu_map_pages(struct iommu_domain *domain, + unsigned long iova, phys_addr_t paddr, + size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct s390_domain *s390_domain = to_s390_domain(domain); + size_t size = pgcount << __ffs(pgsize); int flags = ZPCI_PTE_VALID, rc = 0; + if (pgsize != SZ_4K) + return -EINVAL; + + if (iova < s390_domain->domain.geometry.aperture_start || + (iova + size - 1) > s390_domain->domain.geometry.aperture_end) + return -EINVAL; + + if (!IS_ALIGNED(iova | paddr, pgsize)) + return -EINVAL; + if (!(prot & IOMMU_READ)) return -EINVAL; if (!(prot & IOMMU_WRITE)) flags |= ZPCI_TABLE_PROTECTED; - rc = s390_iommu_update_trans(s390_domain, paddr, iova, - size, flags); + rc = s390_iommu_validate_trans(s390_domain, paddr, iova, + pgcount, flags); + if (!rc) + *mapped = size; return rc; } @@ -298,7 +350,8 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { struct s390_domain *s390_domain = to_s390_domain(domain); - unsigned long *sto, *pto, *rto, flags; + unsigned long *rto, *sto, *pto; + unsigned long ste, pte, rte; unsigned int rtx, sx, px; phys_addr_t phys = 0; @@ -311,38 +364,40 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain, px = calc_px(iova); rto = s390_domain->dma_table; - spin_lock_irqsave(&s390_domain->dma_table_lock, flags); - if (rto && reg_entry_isvalid(rto[rtx])) { - sto = get_rt_sto(rto[rtx]); - if (sto && reg_entry_isvalid(sto[sx])) { - pto = get_st_pto(sto[sx]); - if (pto && pt_entry_isvalid(pto[px])) - phys = pto[px] & ZPCI_PTE_ADDR_MASK; + rte = READ_ONCE(rto[rtx]); + if (reg_entry_isvalid(rte)) { + sto = get_rt_sto(rte); + ste = READ_ONCE(sto[sx]); + if (reg_entry_isvalid(ste)) { + pto = get_st_pto(ste); + pte = READ_ONCE(pto[px]); + if (pt_entry_isvalid(pte)) + phys = pte & ZPCI_PTE_ADDR_MASK; } } - spin_unlock_irqrestore(&s390_domain->dma_table_lock, flags); return phys; } -static size_t s390_iommu_unmap(struct iommu_domain *domain, - unsigned long iova, size_t size, - struct iommu_iotlb_gather *gather) +static size_t s390_iommu_unmap_pages(struct iommu_domain *domain, + unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { struct s390_domain *s390_domain = to_s390_domain(domain); - int flags = ZPCI_PTE_INVALID; - phys_addr_t paddr; + size_t size = pgcount << __ffs(pgsize); int rc; - paddr = s390_iommu_iova_to_phys(domain, iova); - if (!paddr) + if (WARN_ON(iova < s390_domain->domain.geometry.aperture_start || + (iova + size - 1) > s390_domain->domain.geometry.aperture_end)) return 0; - rc = s390_iommu_update_trans(s390_domain, paddr, iova, - size, flags); + rc = s390_iommu_invalidate_trans(s390_domain, iova, pgcount); if (rc) return 0; + iommu_iotlb_gather_add_range(gather, iova, size); + return size; } @@ -380,12 +435,16 @@ static const struct iommu_ops s390_iommu_ops = { .probe_device = s390_iommu_probe_device, .release_device = s390_iommu_release_device, .device_group = generic_device_group, - .pgsize_bitmap = S390_IOMMU_PGSIZES, + .pgsize_bitmap = SZ_4K, + .get_resv_regions = s390_iommu_get_resv_regions, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = s390_iommu_attach_device, .detach_dev = s390_iommu_detach_device, - .map = s390_iommu_map, - .unmap = s390_iommu_unmap, + .map_pages = s390_iommu_map_pages, + .unmap_pages = s390_iommu_unmap_pages, + .flush_iotlb_all = s390_iommu_flush_iotlb_all, + .iotlb_sync = s390_iommu_iotlb_sync, + .iotlb_sync_map = s390_iommu_iotlb_sync_map, .iova_to_phys = s390_iommu_iova_to_phys, .free = s390_domain_free, } diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index fadd2c907222..219bfa11f7f4 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -237,10 +237,8 @@ static int sprd_iommu_attach_device(struct iommu_domain *domain, struct sprd_iommu_domain *dom = to_sprd_domain(domain); size_t pgt_size = sprd_iommu_pgt_size(domain); - if (dom->sdev) { - pr_err("There's already a device attached to this domain.\n"); + if (dom->sdev) return -EINVAL; - } dom->pgt_va = dma_alloc_coherent(sdev->dev, pgt_size, &dom->pgt_pa, GFP_KERNEL); if (!dom->pgt_va) @@ -273,10 +271,11 @@ static void sprd_iommu_detach_device(struct iommu_domain *domain, } static int sprd_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct sprd_iommu_domain *dom = to_sprd_domain(domain); - unsigned int page_num = size >> SPRD_IOMMU_PAGE_SHIFT; + size_t size = pgcount * SPRD_IOMMU_PAGE_SIZE; unsigned long flags; unsigned int i; u32 *pgt_base_iova; @@ -298,35 +297,37 @@ static int sprd_iommu_map(struct iommu_domain *domain, unsigned long iova, pgt_base_iova = dom->pgt_va + ((iova - start) >> SPRD_IOMMU_PAGE_SHIFT); spin_lock_irqsave(&dom->pgtlock, flags); - for (i = 0; i < page_num; i++) { + for (i = 0; i < pgcount; i++) { pgt_base_iova[i] = pabase >> SPRD_IOMMU_PAGE_SHIFT; pabase += SPRD_IOMMU_PAGE_SIZE; } spin_unlock_irqrestore(&dom->pgtlock, flags); + *mapped = size; return 0; } static size_t sprd_iommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *iotlb_gather) + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *iotlb_gather) { struct sprd_iommu_domain *dom = to_sprd_domain(domain); unsigned long flags; u32 *pgt_base_iova; - unsigned int page_num = size >> SPRD_IOMMU_PAGE_SHIFT; + size_t size = pgcount * SPRD_IOMMU_PAGE_SIZE; unsigned long start = domain->geometry.aperture_start; unsigned long end = domain->geometry.aperture_end; if (iova < start || (iova + size) > (end + 1)) - return -EINVAL; + return 0; pgt_base_iova = dom->pgt_va + ((iova - start) >> SPRD_IOMMU_PAGE_SHIFT); spin_lock_irqsave(&dom->pgtlock, flags); - memset(pgt_base_iova, 0, page_num * sizeof(u32)); + memset(pgt_base_iova, 0, pgcount * sizeof(u32)); spin_unlock_irqrestore(&dom->pgtlock, flags); - return 0; + return size; } static void sprd_iommu_sync_map(struct iommu_domain *domain, @@ -409,13 +410,13 @@ static const struct iommu_ops sprd_iommu_ops = { .probe_device = sprd_iommu_probe_device, .device_group = sprd_iommu_device_group, .of_xlate = sprd_iommu_of_xlate, - .pgsize_bitmap = ~0UL << SPRD_IOMMU_PAGE_SHIFT, + .pgsize_bitmap = SPRD_IOMMU_PAGE_SIZE, .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = sprd_iommu_attach_device, .detach_dev = sprd_iommu_detach_device, - .map = sprd_iommu_map, - .unmap = sprd_iommu_unmap, + .map_pages = sprd_iommu_map, + .unmap_pages = sprd_iommu_unmap, .iotlb_sync_map = sprd_iommu_sync_map, .iotlb_sync = sprd_iommu_sync, .iova_to_phys = sprd_iommu_iova_to_phys, diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index cd9b74ee24de..5b585eace3d4 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -27,6 +27,7 @@ #include <linux/types.h> #define IOMMU_RESET_REG 0x010 +#define IOMMU_RESET_RELEASE_ALL 0xffffffff #define IOMMU_ENABLE_REG 0x020 #define IOMMU_ENABLE_ENABLE BIT(0) @@ -92,6 +93,8 @@ #define NUM_PT_ENTRIES 256 #define PT_SIZE (NUM_PT_ENTRIES * PT_ENTRY_SIZE) +#define SPAGE_SIZE 4096 + struct sun50i_iommu { struct iommu_device iommu; @@ -270,7 +273,7 @@ static u32 sun50i_mk_pte(phys_addr_t page, int prot) enum sun50i_iommu_aci aci; u32 flags = 0; - if (prot & (IOMMU_READ | IOMMU_WRITE)) + if ((prot & (IOMMU_READ | IOMMU_WRITE)) == (IOMMU_READ | IOMMU_WRITE)) aci = SUN50I_IOMMU_ACI_RD_WR; else if (prot & IOMMU_READ) aci = SUN50I_IOMMU_ACI_RD; @@ -294,6 +297,62 @@ static void sun50i_table_flush(struct sun50i_iommu_domain *sun50i_domain, dma_sync_single_for_device(iommu->dev, dma, size, DMA_TO_DEVICE); } +static void sun50i_iommu_zap_iova(struct sun50i_iommu *iommu, + unsigned long iova) +{ + u32 reg; + int ret; + + iommu_write(iommu, IOMMU_TLB_IVLD_ADDR_REG, iova); + iommu_write(iommu, IOMMU_TLB_IVLD_ADDR_MASK_REG, GENMASK(31, 12)); + iommu_write(iommu, IOMMU_TLB_IVLD_ENABLE_REG, + IOMMU_TLB_IVLD_ENABLE_ENABLE); + + ret = readl_poll_timeout_atomic(iommu->base + IOMMU_TLB_IVLD_ENABLE_REG, + reg, !reg, 1, 2000); + if (ret) + dev_warn(iommu->dev, "TLB invalidation timed out!\n"); +} + +static void sun50i_iommu_zap_ptw_cache(struct sun50i_iommu *iommu, + unsigned long iova) +{ + u32 reg; + int ret; + + iommu_write(iommu, IOMMU_PC_IVLD_ADDR_REG, iova); + iommu_write(iommu, IOMMU_PC_IVLD_ENABLE_REG, + IOMMU_PC_IVLD_ENABLE_ENABLE); + + ret = readl_poll_timeout_atomic(iommu->base + IOMMU_PC_IVLD_ENABLE_REG, + reg, !reg, 1, 2000); + if (ret) + dev_warn(iommu->dev, "PTW cache invalidation timed out!\n"); +} + +static void sun50i_iommu_zap_range(struct sun50i_iommu *iommu, + unsigned long iova, size_t size) +{ + assert_spin_locked(&iommu->iommu_lock); + + iommu_write(iommu, IOMMU_AUTO_GATING_REG, 0); + + sun50i_iommu_zap_iova(iommu, iova); + sun50i_iommu_zap_iova(iommu, iova + SPAGE_SIZE); + if (size > SPAGE_SIZE) { + sun50i_iommu_zap_iova(iommu, iova + size); + sun50i_iommu_zap_iova(iommu, iova + size + SPAGE_SIZE); + } + sun50i_iommu_zap_ptw_cache(iommu, iova); + sun50i_iommu_zap_ptw_cache(iommu, iova + SZ_1M); + if (size > SZ_1M) { + sun50i_iommu_zap_ptw_cache(iommu, iova + size); + sun50i_iommu_zap_ptw_cache(iommu, iova + size + SZ_1M); + } + + iommu_write(iommu, IOMMU_AUTO_GATING_REG, IOMMU_AUTO_GATING_ENABLE); +} + static int sun50i_iommu_flush_all_tlb(struct sun50i_iommu *iommu) { u32 reg; @@ -343,6 +402,18 @@ static void sun50i_iommu_flush_iotlb_all(struct iommu_domain *domain) spin_unlock_irqrestore(&iommu->iommu_lock, flags); } +static void sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); + struct sun50i_iommu *iommu = sun50i_domain->iommu; + unsigned long flags; + + spin_lock_irqsave(&iommu->iommu_lock, flags); + sun50i_iommu_zap_range(iommu, iova, size); + spin_unlock_irqrestore(&iommu->iommu_lock, flags); +} + static void sun50i_iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { @@ -511,7 +582,7 @@ static u32 *sun50i_dte_get_page_table(struct sun50i_iommu_domain *sun50i_domain, sun50i_iommu_free_page_table(iommu, drop_pt); } - sun50i_table_flush(sun50i_domain, page_table, PT_SIZE); + sun50i_table_flush(sun50i_domain, page_table, NUM_PT_ENTRIES); sun50i_table_flush(sun50i_domain, dte_addr, 1); return page_table; @@ -601,7 +672,6 @@ static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type) struct sun50i_iommu_domain *sun50i_domain; if (type != IOMMU_DOMAIN_DMA && - type != IOMMU_DOMAIN_IDENTITY && type != IOMMU_DOMAIN_UNMANAGED) return NULL; @@ -766,6 +836,7 @@ static const struct iommu_ops sun50i_iommu_ops = { .attach_dev = sun50i_iommu_attach_device, .detach_dev = sun50i_iommu_detach_device, .flush_iotlb_all = sun50i_iommu_flush_iotlb_all, + .iotlb_sync_map = sun50i_iommu_iotlb_sync_map, .iotlb_sync = sun50i_iommu_iotlb_sync, .iova_to_phys = sun50i_iommu_iova_to_phys, .map = sun50i_iommu_map, @@ -785,6 +856,8 @@ static void sun50i_iommu_report_fault(struct sun50i_iommu *iommu, report_iommu_fault(iommu->domain, iommu->dev, iova, prot); else dev_err(iommu->dev, "Page fault while iommu not attached to any domain?\n"); + + sun50i_iommu_zap_range(iommu, iova, SPAGE_SIZE); } static phys_addr_t sun50i_iommu_handle_pt_irq(struct sun50i_iommu *iommu, @@ -868,8 +941,8 @@ static phys_addr_t sun50i_iommu_handle_perm_irq(struct sun50i_iommu *iommu) static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id) { + u32 status, l1_status, l2_status, resets; struct sun50i_iommu *iommu = dev_id; - u32 status; spin_lock(&iommu->iommu_lock); @@ -879,6 +952,9 @@ static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id) return IRQ_NONE; } + l1_status = iommu_read(iommu, IOMMU_L1PG_INT_REG); + l2_status = iommu_read(iommu, IOMMU_L2PG_INT_REG); + if (status & IOMMU_INT_INVALID_L2PG) sun50i_iommu_handle_pt_irq(iommu, IOMMU_INT_ERR_ADDR_L2_REG, @@ -892,8 +968,9 @@ static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id) iommu_write(iommu, IOMMU_INT_CLR_REG, status); - iommu_write(iommu, IOMMU_RESET_REG, ~status); - iommu_write(iommu, IOMMU_RESET_REG, status); + resets = (status | l1_status | l2_status) & IOMMU_INT_MASTER_MASK; + iommu_write(iommu, IOMMU_RESET_REG, ~resets); + iommu_write(iommu, IOMMU_RESET_REG, IOMMU_RESET_RELEASE_ALL); spin_unlock(&iommu->iommu_lock); diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index e5ca3cf1a949..ed53279d1106 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -112,7 +112,7 @@ static int gart_iommu_attach_dev(struct iommu_domain *domain, spin_lock(&gart->dom_lock); if (gart->active_domain && gart->active_domain != domain) { - ret = -EBUSY; + ret = -EINVAL; } else if (dev_iommu_priv_get(dev) != domain) { dev_iommu_priv_set(dev, domain); gart->active_domain = domain; diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 8b1b5c270e50..5b8fe9bfa9a5 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -670,7 +670,7 @@ static int viommu_domain_finalise(struct viommu_endpoint *vdev, dev_err(vdev->dev, "granule 0x%lx larger than system page size 0x%lx\n", viommu_page_size, PAGE_SIZE); - return -EINVAL; + return -ENODEV; } ret = ida_alloc_range(&viommu->domain_ids, viommu->first_domain, @@ -697,7 +697,7 @@ static int viommu_domain_finalise(struct viommu_endpoint *vdev, if (ret) { ida_free(&viommu->domain_ids, vdomain->id); vdomain->viommu = NULL; - return -EOPNOTSUPP; + return ret; } } @@ -734,8 +734,7 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev) */ ret = viommu_domain_finalise(vdev, domain); } else if (vdomain->viommu != vdev->viommu) { - dev_err(dev, "cannot attach to foreign vIOMMU\n"); - ret = -EXDEV; + ret = -EINVAL; } mutex_unlock(&vdomain->mutex); diff --git a/drivers/media/common/videobuf2/frame_vector.c b/drivers/media/common/videobuf2/frame_vector.c index 542dde9d2609..144027035892 100644 --- a/drivers/media/common/videobuf2/frame_vector.c +++ b/drivers/media/common/videobuf2/frame_vector.c @@ -35,11 +35,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, struct frame_vector *vec) { - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - int ret_pin_user_pages_fast = 0; - int ret = 0; - int err; + int ret; if (nr_frames == 0) return 0; @@ -52,57 +48,17 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, ret = pin_user_pages_fast(start, nr_frames, FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM, (struct page **)(vec->ptrs)); - if (ret > 0) { - vec->got_ref = true; - vec->is_pfns = false; - goto out_unlocked; - } - ret_pin_user_pages_fast = ret; - - mmap_read_lock(mm); - vec->got_ref = false; - vec->is_pfns = true; - ret = 0; - do { - unsigned long *nums = frame_vector_pfns(vec); - - vma = vma_lookup(mm, start); - if (!vma) - break; - - while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) { - err = follow_pfn(vma, start, &nums[ret]); - if (err) { - if (ret) - goto out; - // If follow_pfn() returns -EINVAL, then this - // is not an IO mapping or a raw PFN mapping. - // In that case, return the original error from - // pin_user_pages_fast(). Otherwise this - // function would return -EINVAL when - // pin_user_pages_fast() returned -ENOMEM, - // which makes debugging hard. - if (err == -EINVAL && ret_pin_user_pages_fast) - ret = ret_pin_user_pages_fast; - else - ret = err; - goto out; - } - start += PAGE_SIZE; - ret++; - } - /* Bail out if VMA doesn't completely cover the tail page. */ - if (start < vma->vm_end) - break; - } while (ret < nr_frames); -out: - mmap_read_unlock(mm); -out_unlocked: - if (!ret) - ret = -EFAULT; - if (ret > 0) - vec->nr_frames = ret; - return ret; + vec->got_ref = true; + vec->is_pfns = false; + vec->nr_frames = ret; + + if (likely(ret > 0)) + return ret; + + /* This used to (racily) return non-refcounted pfns. Let people know */ + WARN_ONCE(1, "get_vaddr_frames() cannot follow VM_IO mapping"); + vec->nr_frames = 0; + return ret ? ret : -EFAULT; } EXPORT_SYMBOL(get_vaddr_frames); diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c index ab9697f3b5f1..92efc4676df6 100644 --- a/drivers/media/common/videobuf2/videobuf2-core.c +++ b/drivers/media/common/videobuf2/videobuf2-core.c @@ -813,7 +813,13 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, num_buffers = max_t(unsigned int, *count, q->min_buffers_needed); num_buffers = min_t(unsigned int, num_buffers, VB2_MAX_FRAME); memset(q->alloc_devs, 0, sizeof(q->alloc_devs)); + /* + * Set this now to ensure that drivers see the correct q->memory value + * in the queue_setup op. + */ + mutex_lock(&q->mmap_lock); q->memory = memory; + mutex_unlock(&q->mmap_lock); set_queue_coherency(q, non_coherent_mem); /* @@ -823,22 +829,27 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes, plane_sizes, q->alloc_devs); if (ret) - return ret; + goto error; /* Check that driver has set sane values */ - if (WARN_ON(!num_planes)) - return -EINVAL; + if (WARN_ON(!num_planes)) { + ret = -EINVAL; + goto error; + } for (i = 0; i < num_planes; i++) - if (WARN_ON(!plane_sizes[i])) - return -EINVAL; + if (WARN_ON(!plane_sizes[i])) { + ret = -EINVAL; + goto error; + } /* Finally, allocate buffers and video memory */ allocated_buffers = __vb2_queue_alloc(q, memory, num_buffers, num_planes, plane_sizes); if (allocated_buffers == 0) { dprintk(q, 1, "memory allocation failed\n"); - return -ENOMEM; + ret = -ENOMEM; + goto error; } /* @@ -879,7 +890,8 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, if (ret < 0) { /* * Note: __vb2_queue_free() will subtract 'allocated_buffers' - * from q->num_buffers. + * from q->num_buffers and it will reset q->memory to + * VB2_MEMORY_UNKNOWN. */ __vb2_queue_free(q, allocated_buffers); mutex_unlock(&q->mmap_lock); @@ -895,6 +907,12 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, q->waiting_for_buffers = !q->is_output; return 0; + +error: + mutex_lock(&q->mmap_lock); + q->memory = VB2_MEMORY_UNKNOWN; + mutex_unlock(&q->mmap_lock); + return ret; } EXPORT_SYMBOL_GPL(vb2_core_reqbufs); @@ -906,6 +924,7 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, unsigned int num_planes = 0, num_buffers, allocated_buffers; unsigned plane_sizes[VB2_MAX_PLANES] = { }; bool non_coherent_mem = flags & V4L2_MEMORY_FLAG_NON_COHERENT; + bool no_previous_buffers = !q->num_buffers; int ret; if (q->num_buffers == VB2_MAX_FRAME) { @@ -913,13 +932,19 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, return -ENOBUFS; } - if (!q->num_buffers) { + if (no_previous_buffers) { if (q->waiting_in_dqbuf && *count) { dprintk(q, 1, "another dup()ped fd is waiting for a buffer\n"); return -EBUSY; } memset(q->alloc_devs, 0, sizeof(q->alloc_devs)); + /* + * Set this now to ensure that drivers see the correct q->memory + * value in the queue_setup op. + */ + mutex_lock(&q->mmap_lock); q->memory = memory; + mutex_unlock(&q->mmap_lock); q->waiting_for_buffers = !q->is_output; set_queue_coherency(q, non_coherent_mem); } else { @@ -945,14 +970,15 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes, plane_sizes, q->alloc_devs); if (ret) - return ret; + goto error; /* Finally, allocate buffers and video memory */ allocated_buffers = __vb2_queue_alloc(q, memory, num_buffers, num_planes, plane_sizes); if (allocated_buffers == 0) { dprintk(q, 1, "memory allocation failed\n"); - return -ENOMEM; + ret = -ENOMEM; + goto error; } /* @@ -983,7 +1009,8 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, if (ret < 0) { /* * Note: __vb2_queue_free() will subtract 'allocated_buffers' - * from q->num_buffers. + * from q->num_buffers and it will reset q->memory to + * VB2_MEMORY_UNKNOWN. */ __vb2_queue_free(q, allocated_buffers); mutex_unlock(&q->mmap_lock); @@ -998,6 +1025,14 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, *count = allocated_buffers; return 0; + +error: + if (no_previous_buffers) { + mutex_lock(&q->mmap_lock); + q->memory = VB2_MEMORY_UNKNOWN; + mutex_unlock(&q->mmap_lock); + } + return ret; } EXPORT_SYMBOL_GPL(vb2_core_create_bufs); @@ -2165,6 +2200,22 @@ static int __find_plane_by_offset(struct vb2_queue *q, unsigned long off, unsigned int buffer, plane; /* + * Sanity checks to ensure the lock is held, MEMORY_MMAP is + * used and fileio isn't active. + */ + lockdep_assert_held(&q->mmap_lock); + + if (q->memory != VB2_MEMORY_MMAP) { + dprintk(q, 1, "queue is not currently set up for mmap\n"); + return -EINVAL; + } + + if (vb2_fileio_is_active(q)) { + dprintk(q, 1, "file io in progress\n"); + return -EBUSY; + } + + /* * Go over all buffers and their planes, comparing the given offset * with an offset assigned to each plane. If a match is found, * return its buffer and plane numbers. @@ -2265,11 +2316,6 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) int ret; unsigned long length; - if (q->memory != VB2_MEMORY_MMAP) { - dprintk(q, 1, "queue is not currently set up for mmap\n"); - return -EINVAL; - } - /* * Check memory area access mode. */ @@ -2291,14 +2337,9 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) mutex_lock(&q->mmap_lock); - if (vb2_fileio_is_active(q)) { - dprintk(q, 1, "mmap: file io in progress\n"); - ret = -EBUSY; - goto unlock; - } - /* - * Find the plane corresponding to the offset passed by userspace. + * Find the plane corresponding to the offset passed by userspace. This + * will return an error if not MEMORY_MMAP or file I/O is in progress. */ ret = __find_plane_by_offset(q, off, &buffer, &plane); if (ret) @@ -2351,22 +2392,25 @@ unsigned long vb2_get_unmapped_area(struct vb2_queue *q, void *vaddr; int ret; - if (q->memory != VB2_MEMORY_MMAP) { - dprintk(q, 1, "queue is not currently set up for mmap\n"); - return -EINVAL; - } + mutex_lock(&q->mmap_lock); /* - * Find the plane corresponding to the offset passed by userspace. + * Find the plane corresponding to the offset passed by userspace. This + * will return an error if not MEMORY_MMAP or file I/O is in progress. */ ret = __find_plane_by_offset(q, off, &buffer, &plane); if (ret) - return ret; + goto unlock; vb = q->bufs[buffer]; vaddr = vb2_plane_vaddr(vb, plane); + mutex_unlock(&q->mmap_lock); return vaddr ? (unsigned long)vaddr : -EINVAL; + +unlock: + mutex_unlock(&q->mmap_lock); + return ret; } EXPORT_SYMBOL_GPL(vb2_get_unmapped_area); #endif diff --git a/drivers/media/v4l2-core/v4l2-dv-timings.c b/drivers/media/v4l2-core/v4l2-dv-timings.c index 003c32fed3f7..942d0005c55e 100644 --- a/drivers/media/v4l2-core/v4l2-dv-timings.c +++ b/drivers/media/v4l2-core/v4l2-dv-timings.c @@ -145,6 +145,8 @@ bool v4l2_valid_dv_timings(const struct v4l2_dv_timings *t, const struct v4l2_bt_timings *bt = &t->bt; const struct v4l2_bt_timings_cap *cap = &dvcap->bt; u32 caps = cap->capabilities; + const u32 max_vert = 10240; + u32 max_hor = 3 * bt->width; if (t->type != V4L2_DV_BT_656_1120) return false; @@ -166,14 +168,20 @@ bool v4l2_valid_dv_timings(const struct v4l2_dv_timings *t, if (!bt->interlaced && (bt->il_vbackporch || bt->il_vsync || bt->il_vfrontporch)) return false; - if (bt->hfrontporch > 2 * bt->width || - bt->hsync > 1024 || bt->hbackporch > 1024) + /* + * Some video receivers cannot properly separate the frontporch, + * backporch and sync values, and instead they only have the total + * blanking. That can be assigned to any of these three fields. + * So just check that none of these are way out of range. + */ + if (bt->hfrontporch > max_hor || + bt->hsync > max_hor || bt->hbackporch > max_hor) return false; - if (bt->vfrontporch > 4096 || - bt->vsync > 128 || bt->vbackporch > 4096) + if (bt->vfrontporch > max_vert || + bt->vsync > max_vert || bt->vbackporch > max_vert) return false; - if (bt->interlaced && (bt->il_vfrontporch > 4096 || - bt->il_vsync > 128 || bt->il_vbackporch > 4096)) + if (bt->interlaced && (bt->il_vfrontporch > max_vert || + bt->il_vsync > max_vert || bt->il_vbackporch > max_vert)) return false; return fnc == NULL || fnc(t, fnc_handle); } diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c index b70a013139c7..905eff1f840e 100644 --- a/drivers/misc/uacce/uacce.c +++ b/drivers/misc/uacce/uacce.c @@ -108,7 +108,7 @@ static int uacce_bind_queue(struct uacce_device *uacce, struct uacce_queue *q) if (!(uacce->flags & UACCE_DEV_SVA)) return 0; - handle = iommu_sva_bind_device(uacce->parent, current->mm, NULL); + handle = iommu_sva_bind_device(uacce->parent, current->mm); if (IS_ERR(handle)) return PTR_ERR(handle); diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index c5de202f530a..de1cc9e1ae57 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1484,6 +1484,11 @@ void mmc_init_erase(struct mmc_card *card) card->pref_erase = 0; } +static bool is_trim_arg(unsigned int arg) +{ + return (arg & MMC_TRIM_OR_DISCARD_ARGS) && arg != MMC_DISCARD_ARG; +} + static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card, unsigned int arg, unsigned int qty) { @@ -1766,7 +1771,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr, !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN)) return -EOPNOTSUPP; - if (mmc_card_mmc(card) && (arg & MMC_TRIM_ARGS) && + if (mmc_card_mmc(card) && is_trim_arg(arg) && !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_GB_CL_EN)) return -EOPNOTSUPP; @@ -1796,7 +1801,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr, * identified by the card->eg_boundary flag. */ rem = card->erase_size - (from % card->erase_size); - if ((arg & MMC_TRIM_ARGS) && (card->eg_boundary) && (nr > rem)) { + if ((arg & MMC_TRIM_OR_DISCARD_ARGS) && card->eg_boundary && nr > rem) { err = mmc_do_erase(card, from, from + rem - 1, arg); from += rem; if ((err) || (to <= from)) diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c index 8d9bceeff986..155ce2bdfe62 100644 --- a/drivers/mmc/core/mmc_test.c +++ b/drivers/mmc/core/mmc_test.c @@ -3179,7 +3179,8 @@ static int __mmc_test_register_dbgfs_file(struct mmc_card *card, struct mmc_test_dbgfs_file *df; if (card->debugfs_root) - debugfs_create_file(name, mode, card->debugfs_root, card, fops); + file = debugfs_create_file(name, mode, card->debugfs_root, + card, fops); df = kmalloc(sizeof(*df), GFP_KERNEL); if (!df) { diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index df941438aef5..26bc59b5a7cc 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -2588,13 +2588,11 @@ static int msdc_of_clock_parse(struct platform_device *pdev, return PTR_ERR(host->src_clk_cg); } - host->sys_clk_cg = devm_clk_get_optional(&pdev->dev, "sys_cg"); + /* If present, always enable for this clock gate */ + host->sys_clk_cg = devm_clk_get_optional_enabled(&pdev->dev, "sys_cg"); if (IS_ERR(host->sys_clk_cg)) host->sys_clk_cg = NULL; - /* If present, always enable for this clock gate */ - clk_prepare_enable(host->sys_clk_cg); - host->bulk_clks[0].id = "pclk_cg"; host->bulk_clks[1].id = "axi_cg"; host->bulk_clks[2].id = "ahb_cg"; diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 31ea0a2fce35..ffeb5759830f 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -1512,7 +1512,7 @@ static void esdhc_cqe_enable(struct mmc_host *mmc) * system resume back. */ cqhci_writel(cq_host, 0, CQHCI_CTL); - if (cqhci_readl(cq_host, CQHCI_CTL) && CQHCI_HALT) + if (cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT) dev_err(mmc_dev(host->mmc), "failed to exit halt state when enable CQE\n"); diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index b92a408f138d..bec3f9e3cd3f 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -470,7 +470,7 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) } if (IS_ERR(sprd_host->pinctrl)) - return 0; + goto reset; switch (ios->signal_voltage) { case MMC_SIGNAL_VOLTAGE_180: @@ -498,6 +498,8 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) /* Wait for 300 ~ 500 us for pin state stable */ usleep_range(300, 500); + +reset: sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA); return 0; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index fef03de85b99..c7ad32a75b57 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -373,6 +373,7 @@ static void sdhci_init(struct sdhci_host *host, int soft) if (soft) { /* force clock reconfiguration */ host->clock = 0; + host->reinit_uhs = true; mmc->ops->set_ios(mmc, &mmc->ios); } } @@ -2293,11 +2294,46 @@ void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing) } EXPORT_SYMBOL_GPL(sdhci_set_uhs_signaling); +static bool sdhci_timing_has_preset(unsigned char timing) +{ + switch (timing) { + case MMC_TIMING_UHS_SDR12: + case MMC_TIMING_UHS_SDR25: + case MMC_TIMING_UHS_SDR50: + case MMC_TIMING_UHS_SDR104: + case MMC_TIMING_UHS_DDR50: + case MMC_TIMING_MMC_DDR52: + return true; + }; + return false; +} + +static bool sdhci_preset_needed(struct sdhci_host *host, unsigned char timing) +{ + return !(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && + sdhci_timing_has_preset(timing); +} + +static bool sdhci_presetable_values_change(struct sdhci_host *host, struct mmc_ios *ios) +{ + /* + * Preset Values are: Driver Strength, Clock Generator and SDCLK/RCLK + * Frequency. Check if preset values need to be enabled, or the Driver + * Strength needs updating. Note, clock changes are handled separately. + */ + return !host->preset_enabled && + (sdhci_preset_needed(host, ios->timing) || host->drv_type != ios->drv_type); +} + void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) { struct sdhci_host *host = mmc_priv(mmc); + bool reinit_uhs = host->reinit_uhs; + bool turning_on_clk = false; u8 ctrl; + host->reinit_uhs = false; + if (ios->power_mode == MMC_POWER_UNDEFINED) return; @@ -2323,6 +2359,8 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) sdhci_enable_preset_value(host, false); if (!ios->clock || ios->clock != host->clock) { + turning_on_clk = ios->clock && !host->clock; + host->ops->set_clock(host, ios->clock); host->clock = ios->clock; @@ -2349,6 +2387,17 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->ops->set_bus_width(host, ios->bus_width); + /* + * Special case to avoid multiple clock changes during voltage + * switching. + */ + if (!reinit_uhs && + turning_on_clk && + host->timing == ios->timing && + host->version >= SDHCI_SPEC_300 && + !sdhci_presetable_values_change(host, ios)) + return; + ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); if (!(host->quirks & SDHCI_QUIRK_NO_HISPD_BIT)) { @@ -2392,6 +2441,7 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) } sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2); + host->drv_type = ios->drv_type; } else { /* * According to SDHC Spec v3.00, if the Preset Value @@ -2419,19 +2469,14 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->ops->set_uhs_signaling(host, ios->timing); host->timing = ios->timing; - if (!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && - ((ios->timing == MMC_TIMING_UHS_SDR12) || - (ios->timing == MMC_TIMING_UHS_SDR25) || - (ios->timing == MMC_TIMING_UHS_SDR50) || - (ios->timing == MMC_TIMING_UHS_SDR104) || - (ios->timing == MMC_TIMING_UHS_DDR50) || - (ios->timing == MMC_TIMING_MMC_DDR52))) { + if (sdhci_preset_needed(host, ios->timing)) { u16 preset; sdhci_enable_preset_value(host, true); preset = sdhci_get_preset_value(host); ios->drv_type = FIELD_GET(SDHCI_PRESET_DRV_MASK, preset); + host->drv_type = ios->drv_type; } /* Re-enable SD Clock */ @@ -3768,6 +3813,7 @@ int sdhci_resume_host(struct sdhci_host *host) sdhci_init(host, 0); host->pwr = 0; host->clock = 0; + host->reinit_uhs = true; mmc->ops->set_ios(mmc, &mmc->ios); } else { sdhci_init(host, (mmc->pm_flags & MMC_PM_KEEP_POWER)); @@ -3830,6 +3876,7 @@ int sdhci_runtime_resume_host(struct sdhci_host *host, int soft_reset) /* Force clock and power re-program */ host->pwr = 0; host->clock = 0; + host->reinit_uhs = true; mmc->ops->start_signal_voltage_switch(mmc, &mmc->ios); mmc->ops->set_ios(mmc, &mmc->ios); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index d750c464bd1e..87a3aaa07438 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -524,6 +524,8 @@ struct sdhci_host { unsigned int clock; /* Current clock (MHz) */ u8 pwr; /* Current voltage */ + u8 drv_type; /* Current UHS-I driver type */ + bool reinit_uhs; /* Force UHS-related re-initialization */ bool runtime_suspended; /* Host is runtime suspended */ bool bus_on; /* Bus power prevents runtime suspend */ diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c index 24150c933fcb..dc3253b318da 100644 --- a/drivers/net/arcnet/com20020_cs.c +++ b/drivers/net/arcnet/com20020_cs.c @@ -113,6 +113,7 @@ static int com20020_probe(struct pcmcia_device *p_dev) struct com20020_dev *info; struct net_device *dev; struct arcnet_local *lp; + int ret = -ENOMEM; dev_dbg(&p_dev->dev, "com20020_attach()\n"); @@ -142,12 +143,18 @@ static int com20020_probe(struct pcmcia_device *p_dev) info->dev = dev; p_dev->priv = info; - return com20020_config(p_dev); + ret = com20020_config(p_dev); + if (ret) + goto fail_config; + + return 0; +fail_config: + free_arcdev(dev); fail_alloc_dev: kfree(info); fail_alloc_info: - return -ENOMEM; + return ret; } /* com20020_attach */ static void com20020_detach(struct pcmcia_device *link) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e84c49bf4d0c..b9a882f182d2 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3231,16 +3231,23 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave) { struct slave *curr_active_slave, *curr_arp_slave; - struct icmp6hdr *hdr = icmp6_hdr(skb); struct in6_addr *saddr, *daddr; + struct { + struct ipv6hdr ip6; + struct icmp6hdr icmp6; + } *combined, _combined; if (skb->pkt_type == PACKET_OTHERHOST || - skb->pkt_type == PACKET_LOOPBACK || - hdr->icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT) + skb->pkt_type == PACKET_LOOPBACK) + goto out; + + combined = skb_header_pointer(skb, 0, sizeof(_combined), &_combined); + if (!combined || combined->ip6.nexthdr != NEXTHDR_ICMP || + combined->icmp6.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT) goto out; - saddr = &ipv6_hdr(skb)->saddr; - daddr = &ipv6_hdr(skb)->daddr; + saddr = &combined->ip6.saddr; + daddr = &combined->ip6.daddr; slave_dbg(bond->dev, slave->dev, "%s: %s/%d av %d sv %d sip %pI6c tip %pI6c\n", __func__, slave->dev->name, bond_slave_state(slave), diff --git a/drivers/net/can/can327.c b/drivers/net/can/can327.c index 094197780776..dc7192ecb001 100644 --- a/drivers/net/can/can327.c +++ b/drivers/net/can/can327.c @@ -263,8 +263,10 @@ static void can327_feed_frame_to_netdev(struct can327 *elm, struct sk_buff *skb) { lockdep_assert_held(&elm->lock); - if (!netif_running(elm->dev)) + if (!netif_running(elm->dev)) { + kfree_skb(skb); return; + } /* Queue for NAPI pickup. * rx-offload will update stats and LEDs for us. @@ -794,9 +796,9 @@ static int can327_netdev_close(struct net_device *dev) netif_stop_queue(dev); - /* Give UART one final chance to flush. */ - clear_bit(TTY_DO_WRITE_WAKEUP, &elm->tty->flags); - flush_work(&elm->tx_work); + /* We don't flush the UART TX queue here, as we want final stop + * commands (like the above dummy char) to be flushed out. + */ can_rx_offload_disable(&elm->offload); elm->can.state = CAN_STATE_STOPPED; @@ -1067,12 +1069,15 @@ static void can327_ldisc_close(struct tty_struct *tty) { struct can327 *elm = (struct can327 *)tty->disc_data; - /* unregister_netdev() calls .ndo_stop() so we don't have to. - * Our .ndo_stop() also flushes the TTY write wakeup handler, - * so we can safely set elm->tty = NULL after this. - */ + /* unregister_netdev() calls .ndo_stop() so we don't have to. */ unregister_candev(elm->dev); + /* Give UART one final chance to flush. + * No need to clear TTY_DO_WRITE_WAKEUP since .write_wakeup() is + * serialised against .close() and will not be called once we return. + */ + flush_work(&elm->tx_work); + /* Mark channel as dead */ spin_lock_bh(&elm->lock); tty->disc_data = NULL; diff --git a/drivers/net/can/cc770/cc770_isa.c b/drivers/net/can/cc770/cc770_isa.c index 194c86e0f340..8f6dccd5a587 100644 --- a/drivers/net/can/cc770/cc770_isa.c +++ b/drivers/net/can/cc770/cc770_isa.c @@ -264,22 +264,24 @@ static int cc770_isa_probe(struct platform_device *pdev) if (err) { dev_err(&pdev->dev, "couldn't register device (err=%d)\n", err); - goto exit_unmap; + goto exit_free; } dev_info(&pdev->dev, "device registered (reg_base=0x%p, irq=%d)\n", priv->reg_base, dev->irq); return 0; - exit_unmap: +exit_free: + free_cc770dev(dev); +exit_unmap: if (mem[idx]) iounmap(base); - exit_release: +exit_release: if (mem[idx]) release_mem_region(mem[idx], iosize); else release_region(port[idx], iosize); - exit: +exit: return err; } diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 00d11e95fd98..e5575d2755e4 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1909,7 +1909,7 @@ int m_can_class_get_clocks(struct m_can_classdev *cdev) cdev->hclk = devm_clk_get(cdev->dev, "hclk"); cdev->cclk = devm_clk_get(cdev->dev, "cclk"); - if (IS_ERR(cdev->cclk)) { + if (IS_ERR(cdev->hclk) || IS_ERR(cdev->cclk)) { dev_err(cdev->dev, "no clock found\n"); ret = -ENODEV; } diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c index 8f184a852a0a..f2219aa2824b 100644 --- a/drivers/net/can/m_can/m_can_pci.c +++ b/drivers/net/can/m_can/m_can_pci.c @@ -120,7 +120,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) ret = pci_alloc_irq_vectors(pci, 1, 1, PCI_IRQ_ALL_TYPES); if (ret < 0) - return ret; + goto err_free_dev; mcan_class->dev = &pci->dev; mcan_class->net->irq = pci_irq_vector(pci, 0); @@ -132,7 +132,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) ret = m_can_class_register(mcan_class); if (ret) - goto err; + goto err_free_irq; /* Enable interrupt control at CAN wrapper IP */ writel(0x1, base + CTL_CSR_INT_CTL_OFFSET); @@ -144,8 +144,10 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) return 0; -err: +err_free_irq: pci_free_irq_vectors(pci); +err_free_dev: + m_can_class_free_dev(mcan_class->net); return ret; } @@ -161,6 +163,7 @@ static void m_can_pci_remove(struct pci_dev *pci) writel(0x0, priv->base + CTL_CSR_INT_CTL_OFFSET); m_can_class_unregister(mcan_class); + m_can_class_free_dev(mcan_class->net); pci_free_irq_vectors(pci); } diff --git a/drivers/net/can/sja1000/sja1000_isa.c b/drivers/net/can/sja1000/sja1000_isa.c index d513fac50718..db3e767d5320 100644 --- a/drivers/net/can/sja1000/sja1000_isa.c +++ b/drivers/net/can/sja1000/sja1000_isa.c @@ -202,22 +202,24 @@ static int sja1000_isa_probe(struct platform_device *pdev) if (err) { dev_err(&pdev->dev, "registering %s failed (err=%d)\n", DRV_NAME, err); - goto exit_unmap; + goto exit_free; } dev_info(&pdev->dev, "%s device registered (reg_base=0x%p, irq=%d)\n", DRV_NAME, priv->reg_base, dev->irq); return 0; - exit_unmap: +exit_free: + free_sja1000dev(dev); +exit_unmap: if (mem[idx]) iounmap(base); - exit_release: +exit_release: if (mem[idx]) release_mem_region(mem[idx], iosize); else release_region(port[idx], iosize); - exit: +exit: return err; } diff --git a/drivers/net/can/slcan/slcan-core.c b/drivers/net/can/slcan/slcan-core.c index fbb34139daa1..f4db77007c13 100644 --- a/drivers/net/can/slcan/slcan-core.c +++ b/drivers/net/can/slcan/slcan-core.c @@ -864,12 +864,14 @@ static void slcan_close(struct tty_struct *tty) { struct slcan *sl = (struct slcan *)tty->disc_data; - /* unregister_netdev() calls .ndo_stop() so we don't have to. - * Our .ndo_stop() also flushes the TTY write wakeup handler, - * so we can safely set sl->tty = NULL after this. - */ unregister_candev(sl->dev); + /* + * The netdev needn't be UP (so .ndo_stop() is not called). Hence make + * sure this is not running before freeing it up. + */ + flush_work(&sl->tx_work); + /* Mark channel as dead */ spin_lock_bh(&sl->lock); tty->disc_data = NULL; diff --git a/drivers/net/can/usb/esd_usb.c b/drivers/net/can/usb/esd_usb.c index 81b88e9e5bdc..42323f5e6f3a 100644 --- a/drivers/net/can/usb/esd_usb.c +++ b/drivers/net/can/usb/esd_usb.c @@ -234,6 +234,10 @@ static void esd_usb_rx_event(struct esd_usb_net_priv *priv, u8 rxerr = msg->msg.rx.data[2]; u8 txerr = msg->msg.rx.data[3]; + netdev_dbg(priv->netdev, + "CAN_ERR_EV_EXT: dlc=%#02x state=%02x ecc=%02x rec=%02x tec=%02x\n", + msg->msg.rx.dlc, state, ecc, rxerr, txerr); + skb = alloc_can_err_skb(priv->netdev, &cf); if (skb == NULL) { stats->rx_dropped++; @@ -260,6 +264,8 @@ static void esd_usb_rx_event(struct esd_usb_net_priv *priv, break; default: priv->can.state = CAN_STATE_ERROR_ACTIVE; + txerr = 0; + rxerr = 0; break; } } else { diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c index 25f863b4f5f0..ddb7c5735c9a 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.c +++ b/drivers/net/can/usb/etas_es58x/es58x_core.c @@ -2091,8 +2091,11 @@ static int es58x_init_netdev(struct es58x_device *es58x_dev, int channel_idx) netdev->dev_port = channel_idx; ret = register_candev(netdev); - if (ret) + if (ret) { + es58x_dev->netdev[channel_idx] = NULL; + free_candev(netdev); return ret; + } netdev_queue_set_dql_min_limit(netdev_get_tx_queue(netdev, 0), es58x_dev->param->dql_min_limit); diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 218b098b261d..47619e9cb005 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -47,6 +47,10 @@ #define MCBA_VER_REQ_USB 1 #define MCBA_VER_REQ_CAN 2 +/* Drive the CAN_RES signal LOW "0" to activate R24 and R25 */ +#define MCBA_VER_TERMINATION_ON 0 +#define MCBA_VER_TERMINATION_OFF 1 + #define MCBA_SIDL_EXID_MASK 0x8 #define MCBA_DLC_MASK 0xf #define MCBA_DLC_RTR_MASK 0x40 @@ -463,7 +467,7 @@ static void mcba_usb_process_ka_usb(struct mcba_priv *priv, priv->usb_ka_first_pass = false; } - if (msg->termination_state) + if (msg->termination_state == MCBA_VER_TERMINATION_ON) priv->can.termination = MCBA_TERMINATION_ENABLED; else priv->can.termination = MCBA_TERMINATION_DISABLED; @@ -785,9 +789,9 @@ static int mcba_set_termination(struct net_device *netdev, u16 term) }; if (term == MCBA_TERMINATION_ENABLED) - usb_msg.termination = 1; + usb_msg.termination = MCBA_VER_TERMINATION_ON; else - usb_msg.termination = 0; + usb_msg.termination = MCBA_VER_TERMINATION_OFF; mcba_usb_xmit_cmd(priv, (struct mcba_usb_msg *)&usb_msg); diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 438e46af03e9..80f07bd20593 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -961,7 +961,7 @@ static const struct lan9303_mib_desc lan9303_mib[] = { { .offset = LAN9303_MAC_TX_BRDCST_CNT_0, .name = "TxBroad", }, { .offset = LAN9303_MAC_TX_PAUSE_CNT_0, .name = "TxPause", }, { .offset = LAN9303_MAC_TX_MULCST_CNT_0, .name = "TxMulti", }, - { .offset = LAN9303_MAC_RX_UNDSZE_CNT_0, .name = "TxUnderRun", }, + { .offset = LAN9303_MAC_RX_UNDSZE_CNT_0, .name = "RxShort", }, { .offset = LAN9303_MAC_TX_64_CNT_0, .name = "Tx64Byte", }, { .offset = LAN9303_MAC_TX_127_CNT_0, .name = "Tx128Byte", }, { .offset = LAN9303_MAC_TX_255_CNT_0, .name = "Tx256Byte", }, diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 2479be3a1e35..937cb22cb3d4 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -833,10 +833,13 @@ static void mv88e6xxx_get_caps(struct dsa_switch *ds, int port, chip->info->ops->phylink_get_caps(chip, port, config); - /* Internal ports need GMII for PHYLIB */ - if (mv88e6xxx_phy_is_internal(ds, port)) + if (mv88e6xxx_phy_is_internal(ds, port)) { + __set_bit(PHY_INTERFACE_MODE_INTERNAL, + config->supported_interfaces); + /* Internal ports with no phy-mode need GMII for PHYLIB */ __set_bit(PHY_INTERFACE_MODE_GMII, config->supported_interfaces); + } } static void mv88e6xxx_mac_config(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c index 10c6fea1227f..bdbbff2a7909 100644 --- a/drivers/net/dsa/sja1105/sja1105_devlink.c +++ b/drivers/net/dsa/sja1105/sja1105_devlink.c @@ -95,6 +95,8 @@ static int sja1105_setup_devlink_regions(struct dsa_switch *ds) if (IS_ERR(region)) { while (--i >= 0) dsa_devlink_region_destroy(priv->regions[i]); + + kfree(priv->regions); return PTR_ERR(region); } diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 412666111b0c..b70dcf32a26d 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1038,7 +1038,7 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv) policing[bcast].sharindx = port; /* Only SJA1110 has multicast policers */ - if (mcast <= table->ops->max_entry_count) + if (mcast < table->ops->max_entry_count) policing[mcast].sharindx = port; } diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c index 215dd17ca790..4059fcc8c832 100644 --- a/drivers/net/dsa/sja1105/sja1105_mdio.c +++ b/drivers/net/dsa/sja1105/sja1105_mdio.c @@ -256,6 +256,9 @@ static int sja1105_base_tx_mdio_read(struct mii_bus *bus, int phy, int reg) u32 tmp; int rc; + if (reg & MII_ADDR_C45) + return -EOPNOTSUPP; + rc = sja1105_xfer_u32(priv, SPI_READ, regs->mdio_100base_tx + reg, &tmp, NULL); if (rc < 0) @@ -272,6 +275,9 @@ static int sja1105_base_tx_mdio_write(struct mii_bus *bus, int phy, int reg, const struct sja1105_regs *regs = priv->info->regs; u32 tmp = val; + if (reg & MII_ADDR_C45) + return -EOPNOTSUPP; + return sja1105_xfer_u32(priv, SPI_WRITE, regs->mdio_100base_tx + reg, &tmp, NULL); } diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index e104fb02817d..aa0d2f3aaeaa 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -258,6 +258,7 @@ static int greth_init_rings(struct greth_private *greth) if (dma_mapping_error(greth->dev, dma_addr)) { if (netif_msg_ifup(greth)) dev_err(greth->dev, "Could not create initial DMA mapping\n"); + dev_kfree_skb(skb); goto cleanup; } greth->rx_skbuff[i] = skb; diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 7633b227b2ca..711d5b5a4c49 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -990,6 +990,7 @@ static int tse_shutdown(struct net_device *dev) int ret; phylink_stop(priv->phylink); + phylink_disconnect_phy(priv->phylink); netif_stop_queue(dev); napi_disable(&priv->napi); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index a08f221e30d4..ac4ea93bd8dd 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -13,6 +13,7 @@ #include "aq_ptp.h" #include "aq_filters.h" #include "aq_macsec.h" +#include "aq_main.h" #include <linux/ptp_clock_kernel.h> @@ -858,7 +859,7 @@ static int aq_set_ringparam(struct net_device *ndev, if (netif_running(ndev)) { ndev_running = true; - dev_close(ndev); + aq_ndev_close(ndev); } cfg->rxds = max(ring->rx_pending, hw_caps->rxds_min); @@ -874,7 +875,7 @@ static int aq_set_ringparam(struct net_device *ndev, goto err_exit; if (ndev_running) - err = dev_open(ndev, NULL); + err = aq_ndev_open(ndev); err_exit: return err; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index 8a0af371e7dc..77609dc0a08d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -58,7 +58,7 @@ struct net_device *aq_ndev_alloc(void) return ndev; } -static int aq_ndev_open(struct net_device *ndev) +int aq_ndev_open(struct net_device *ndev) { struct aq_nic_s *aq_nic = netdev_priv(ndev); int err = 0; @@ -88,7 +88,7 @@ err_exit: return err; } -static int aq_ndev_close(struct net_device *ndev) +int aq_ndev_close(struct net_device *ndev) { struct aq_nic_s *aq_nic = netdev_priv(ndev); int err = 0; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.h b/drivers/net/ethernet/aquantia/atlantic/aq_main.h index 99870865f66d..a78c1a168d8e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.h @@ -16,5 +16,7 @@ DECLARE_STATIC_KEY_FALSE(aq_xdp_locking_key); void aq_ndev_schedule_work(struct work_struct *work); struct net_device *aq_ndev_alloc(void); +int aq_ndev_open(struct net_device *ndev); +int aq_ndev_close(struct net_device *ndev); #endif /* AQ_MAIN_H */ diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 55dfdb34e37b..f4ca0c6c0f51 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -71,13 +71,14 @@ config BCM63XX_ENET config BCMGENET tristate "Broadcom GENET internal MAC support" depends on HAS_IOMEM + depends on PTP_1588_CLOCK_OPTIONAL || !ARCH_BCM2835 select MII select PHYLIB select FIXED_PHY select BCM7XXX_PHY select MDIO_BCM_UNIMAC select DIMLIB - select BROADCOM_PHY if (ARCH_BCM2835 && PTP_1588_CLOCK_OPTIONAL) + select BROADCOM_PHY if ARCH_BCM2835 help This driver supports the built-in Ethernet MACs found in the Broadcom BCM7xxx Set Top Box family chipset. diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 11d15cd03600..77d4cb4ad782 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -795,16 +795,20 @@ static void bnx2x_vf_enable_traffic(struct bnx2x *bp, struct bnx2x_virtf *vf) static u8 bnx2x_vf_is_pcie_pending(struct bnx2x *bp, u8 abs_vfid) { - struct pci_dev *dev; struct bnx2x_virtf *vf = bnx2x_vf_by_abs_fid(bp, abs_vfid); + struct pci_dev *dev; + bool pending; if (!vf) return false; dev = pci_get_domain_bus_and_slot(vf->domain, vf->bus, vf->devfn); - if (dev) - return bnx2x_is_pcie_pending(dev); - return false; + if (!dev) + return false; + pending = bnx2x_is_pcie_pending(dev); + pci_dev_put(dev); + + return pending; } int bnx2x_vf_flr_clnup_epilog(struct bnx2x *bp, u8 abs_vfid) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 75771825c3f9..98793b2ac2c7 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1794,7 +1794,7 @@ static int liquidio_open(struct net_device *netdev) ifstate_set(lio, LIO_IFSTATE_RUNNING); - if (!OCTEON_CN23XX_PF(oct) || (OCTEON_CN23XX_PF(oct) && !oct->msix_on)) { + if (!OCTEON_CN23XX_PF(oct) || !oct->msix_on) { ret = setup_tx_poll_fn(netdev); if (ret) goto err_poll; @@ -1824,7 +1824,7 @@ static int liquidio_open(struct net_device *netdev) return 0; err_rx_ctrl: - if (!OCTEON_CN23XX_PF(oct) || (OCTEON_CN23XX_PF(oct) && !oct->msix_on)) + if (!OCTEON_CN23XX_PF(oct) || !oct->msix_on) cleanup_tx_poll_fn(netdev); err_poll: if (lio->ptp_clock) { diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 98f3dc460ca7..f2f95493ec89 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -2239,7 +2239,7 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = register_netdev(netdev); if (err) { dev_err(dev, "Failed to register netdevice\n"); - goto err_unregister_interrupts; + goto err_destroy_workqueue; } nic->msg_enable = debug; @@ -2248,6 +2248,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; +err_destroy_workqueue: + destroy_workqueue(nic->nicvf_rx_mode_wq); err_unregister_interrupts: nicvf_unregister_interrupts(nic); err_free_netdev: diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 2f6484dc186a..7eb2ddbe9bad 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1436,8 +1436,10 @@ static acpi_status bgx_acpi_match_id(acpi_handle handle, u32 lvl, return AE_OK; } - if (strncmp(string.pointer, bgx_sel, 4)) + if (strncmp(string.pointer, bgx_sel, 4)) { + kfree(string.pointer); return AE_OK; + } acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1, bgx_acpi_register_phy, NULL, bgx, NULL); diff --git a/drivers/net/ethernet/davicom/dm9051.c b/drivers/net/ethernet/davicom/dm9051.c index a523ddda7609..de7105a84747 100644 --- a/drivers/net/ethernet/davicom/dm9051.c +++ b/drivers/net/ethernet/davicom/dm9051.c @@ -798,8 +798,10 @@ static int dm9051_loop_rx(struct board_info *db) } ret = dm9051_stop_mrcmd(db); - if (ret) + if (ret) { + dev_kfree_skb(skb); return ret; + } skb->protocol = eth_type_trans(skb, db->ndev); if (db->ndev->features & NETIF_F_RXCSUM) diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index 48fb391951dd..13d5ff4e0e02 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -542,6 +542,27 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) return (budget != 0); } +static bool tsnep_tx_pending(struct tsnep_tx *tx) +{ + unsigned long flags; + struct tsnep_tx_entry *entry; + bool pending = false; + + spin_lock_irqsave(&tx->lock, flags); + + if (tx->read != tx->write) { + entry = &tx->entry[tx->read]; + if ((__le32_to_cpu(entry->desc_wb->properties) & + TSNEP_TX_DESC_OWNER_MASK) == + (entry->properties & TSNEP_TX_DESC_OWNER_MASK)) + pending = true; + } + + spin_unlock_irqrestore(&tx->lock, flags); + + return pending; +} + static int tsnep_tx_open(struct tsnep_adapter *adapter, void __iomem *addr, int queue_index, struct tsnep_tx *tx) { @@ -821,6 +842,19 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, return done; } +static bool tsnep_rx_pending(struct tsnep_rx *rx) +{ + struct tsnep_rx_entry *entry; + + entry = &rx->entry[rx->read]; + if ((__le32_to_cpu(entry->desc_wb->properties) & + TSNEP_DESC_OWNER_COUNTER_MASK) == + (entry->properties & TSNEP_DESC_OWNER_COUNTER_MASK)) + return true; + + return false; +} + static int tsnep_rx_open(struct tsnep_adapter *adapter, void __iomem *addr, int queue_index, struct tsnep_rx *rx) { @@ -866,6 +900,17 @@ static void tsnep_rx_close(struct tsnep_rx *rx) tsnep_rx_ring_cleanup(rx); } +static bool tsnep_pending(struct tsnep_queue *queue) +{ + if (queue->tx && tsnep_tx_pending(queue->tx)) + return true; + + if (queue->rx && tsnep_rx_pending(queue->rx)) + return true; + + return false; +} + static int tsnep_poll(struct napi_struct *napi, int budget) { struct tsnep_queue *queue = container_of(napi, struct tsnep_queue, @@ -886,9 +931,19 @@ static int tsnep_poll(struct napi_struct *napi, int budget) if (!complete) return budget; - if (likely(napi_complete_done(napi, done))) + if (likely(napi_complete_done(napi, done))) { tsnep_enable_irq(queue->adapter, queue->irq_mask); + /* reschedule if work is already pending, prevent rotten packets + * which are transmitted or received after polling but before + * interrupt enable + */ + if (tsnep_pending(queue)) { + tsnep_disable_irq(queue->adapter, queue->irq_mask); + napi_schedule(napi); + } + } + return min(done, budget - 1); } diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c index cacd454ac696..c39b866e2582 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c @@ -132,6 +132,7 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) { dev_err(dev, "DMA mapping failed\n"); + kfree(cmd_buff); return -EFAULT; } @@ -142,6 +143,7 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block, DMA_TO_DEVICE); if (err) { dev_err(dev, "dpsw_acl_add_entry() failed %d\n", err); + kfree(cmd_buff); return err; } @@ -172,6 +174,7 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) { dev_err(dev, "DMA mapping failed\n"); + kfree(cmd_buff); return -EFAULT; } @@ -182,6 +185,7 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block, DMA_TO_DEVICE); if (err) { dev_err(dev, "dpsw_acl_remove_entry() failed %d\n", err); + kfree(cmd_buff); return err; } diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index f8c06c3f9464..8671591cb750 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -2058,7 +2058,7 @@ static void enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring) /* enable Tx ints by setting pkt thr to 1 */ enetc_txbdr_wr(hw, idx, ENETC_TBICR0, ENETC_TBICR0_ICEN | 0x1); - tbmr = ENETC_TBMR_EN; + tbmr = ENETC_TBMR_EN | ENETC_TBMR_SET_PRIO(tx_ring->prio); if (tx_ring->ndev->features & NETIF_F_HW_VLAN_CTAG_TX) tbmr |= ENETC_TBMR_VIH; @@ -2461,7 +2461,8 @@ int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) /* Reset all ring priorities to 0 */ for (i = 0; i < priv->num_tx_rings; i++) { tx_ring = priv->tx_ring[i]; - enetc_set_bdr_prio(hw, tx_ring->index, 0); + tx_ring->prio = 0; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); } return 0; @@ -2480,7 +2481,8 @@ int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) */ for (i = 0; i < num_tc; i++) { tx_ring = priv->tx_ring[i]; - enetc_set_bdr_prio(hw, tx_ring->index, i); + tx_ring->prio = i; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); } /* Reset the number of netdev queues based on the TC count */ diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 161930a65f61..c6d8cc15c270 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -95,6 +95,7 @@ struct enetc_bdr { void __iomem *rcir; }; u16 index; + u16 prio; int bd_count; /* # of BDs */ int next_to_use; int next_to_clean; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index a842e1999122..fcebb54224c0 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -137,6 +137,7 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) struct tc_taprio_qopt_offload *taprio = type_data; struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_hw *hw = &priv->si->hw; + struct enetc_bdr *tx_ring; int err; int i; @@ -145,16 +146,20 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) if (priv->tx_ring[i]->tsd_enable) return -EBUSY; - for (i = 0; i < priv->num_tx_rings; i++) - enetc_set_bdr_prio(hw, priv->tx_ring[i]->index, - taprio->enable ? i : 0); + for (i = 0; i < priv->num_tx_rings; i++) { + tx_ring = priv->tx_ring[i]; + tx_ring->prio = taprio->enable ? i : 0; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); + } err = enetc_setup_taprio(ndev, taprio); - - if (err) - for (i = 0; i < priv->num_tx_rings; i++) - enetc_set_bdr_prio(hw, priv->tx_ring[i]->index, - taprio->enable ? 0 : i); + if (err) { + for (i = 0; i < priv->num_tx_rings; i++) { + tx_ring = priv->tx_ring[i]; + tx_ring->prio = taprio->enable ? 0 : i; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); + } + } return err; } diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index f623c12eaf95..23e1a94b9ce4 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -74,7 +74,7 @@ #include "fec.h" static void set_multicast_list(struct net_device *ndev); -static void fec_enet_itr_coal_init(struct net_device *ndev); +static void fec_enet_itr_coal_set(struct net_device *ndev); #define DRIVER_NAME "fec" @@ -1220,8 +1220,8 @@ fec_restart(struct net_device *ndev) writel(0, fep->hwp + FEC_IMASK); /* Init the interrupt coalescing */ - fec_enet_itr_coal_init(ndev); - + if (fep->quirks & FEC_QUIRK_HAS_COALESCE) + fec_enet_itr_coal_set(ndev); } static int fec_enet_ipc_handle_init(struct fec_enet_private *fep) @@ -2856,19 +2856,6 @@ static int fec_enet_set_coalesce(struct net_device *ndev, return 0; } -static void fec_enet_itr_coal_init(struct net_device *ndev) -{ - struct ethtool_coalesce ec; - - ec.rx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT; - ec.rx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT; - - ec.tx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT; - ec.tx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT; - - fec_enet_set_coalesce(ndev, &ec, NULL, NULL); -} - static int fec_enet_get_tunable(struct net_device *netdev, const struct ethtool_tunable *tuna, void *data) @@ -3623,6 +3610,10 @@ static int fec_enet_init(struct net_device *ndev) fep->rx_align = 0x3; fep->tx_align = 0x3; #endif + fep->rx_pkts_itr = FEC_ITR_ICFT_DEFAULT; + fep->tx_pkts_itr = FEC_ITR_ICFT_DEFAULT; + fep->rx_time_itr = FEC_ITR_ICTT_DEFAULT; + fep->tx_time_itr = FEC_ITR_ICTT_DEFAULT; /* Check mask of the streaming and coherent API */ ret = dma_set_mask_and_coherent(&fep->pdev->dev, DMA_BIT_MASK(32)); diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c index 93846bace028..ce2571c16e43 100644 --- a/drivers/net/ethernet/hisilicon/hisi_femac.c +++ b/drivers/net/ethernet/hisilicon/hisi_femac.c @@ -283,7 +283,7 @@ static int hisi_femac_rx(struct net_device *dev, int limit) skb->protocol = eth_type_trans(skb, dev); napi_gro_receive(&priv->napi, skb); dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + dev->stats.rx_bytes += len; next: pos = (pos + 1) % rxq->num; if (rx_pkts_num >= limit) diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index ffcf797dfa90..f867e9531117 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -550,7 +550,7 @@ static int hix5hd2_rx(struct net_device *dev, int limit) skb->protocol = eth_type_trans(skb, dev); napi_gro_receive(&priv->napi, skb); dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + dev->stats.rx_bytes += len; next: pos = dma_ring_incr(pos, RX_DESC_NUM); } diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 560d1d442232..d3fdc290937f 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -1741,11 +1741,8 @@ static int e100_xmit_prepare(struct nic *nic, struct cb *cb, dma_addr = dma_map_single(&nic->pdev->dev, skb->data, skb->len, DMA_TO_DEVICE); /* If we can't map the skb, have the upper layer try later */ - if (dma_mapping_error(&nic->pdev->dev, dma_addr)) { - dev_kfree_skb_any(skb); - skb = NULL; + if (dma_mapping_error(&nic->pdev->dev, dma_addr)) return -ENOMEM; - } /* * Use the last 4 bytes of the SKB payload packet as the CRC, used for diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 49e926959ad3..55cf2f62bb30 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5936,9 +5936,9 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, e1000_tx_queue(tx_ring, tx_flags, count); /* Make sure there is space in the ring for the next send. */ e1000_maybe_stop_tx(tx_ring, - (MAX_SKB_FRAGS * + ((MAX_SKB_FRAGS + 1) * DIV_ROUND_UP(PAGE_SIZE, - adapter->tx_fifo_limit) + 2)); + adapter->tx_fifo_limit) + 4)); if (!netdev_xmit_more() || netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) { diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 4a6630586ec9..fc373472e4e1 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -32,6 +32,8 @@ struct workqueue_struct *fm10k_workqueue; **/ static int __init fm10k_init_module(void) { + int ret; + pr_info("%s\n", fm10k_driver_string); pr_info("%s\n", fm10k_copyright); @@ -43,7 +45,13 @@ static int __init fm10k_init_module(void) fm10k_dbg_init(); - return fm10k_register_pci_driver(); + ret = fm10k_register_pci_driver(); + if (ret) { + fm10k_dbg_exit(); + destroy_workqueue(fm10k_workqueue); + } + + return ret; } module_init(fm10k_init_module); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 4a6a6e48c615..f6fa63e4253c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -4464,11 +4464,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi, return -EOPNOTSUPP; /* First 4 bytes of L4 header */ - if (usr_ip4_spec->l4_4_bytes == htonl(0xFFFFFFFF)) - new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK; - else if (!usr_ip4_spec->l4_4_bytes) - new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK); - else + if (usr_ip4_spec->l4_4_bytes) return -EOPNOTSUPP; /* Filtering on Type of Service is not supported. */ @@ -4507,11 +4503,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi, else return -EOPNOTSUPP; - if (usr_ip6_spec->l4_4_bytes == htonl(0xFFFFFFFF)) - new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK; - else if (!usr_ip6_spec->l4_4_bytes) - new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK); - else + if (usr_ip6_spec->l4_4_bytes) return -EOPNOTSUPP; /* Filtering on Traffic class is not supported. */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b5dcd15ced36..6416322d7c18 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -10655,6 +10655,21 @@ static int i40e_rebuild_channels(struct i40e_vsi *vsi) } /** + * i40e_clean_xps_state - clean xps state for every tx_ring + * @vsi: ptr to the VSI + **/ +static void i40e_clean_xps_state(struct i40e_vsi *vsi) +{ + int i; + + if (vsi->tx_rings) + for (i = 0; i < vsi->num_queue_pairs; i++) + if (vsi->tx_rings[i]) + clear_bit(__I40E_TX_XPS_INIT_DONE, + vsi->tx_rings[i]->state); +} + +/** * i40e_prep_for_reset - prep for the core to reset * @pf: board private structure * @@ -10678,8 +10693,10 @@ static void i40e_prep_for_reset(struct i40e_pf *pf) i40e_pf_quiesce_all_vsi(pf); for (v = 0; v < pf->num_alloc_vsi; v++) { - if (pf->vsi[v]) + if (pf->vsi[v]) { + i40e_clean_xps_state(pf->vsi[v]); pf->vsi[v]->seid = 0; + } } i40e_shutdown_adminq(&pf->hw); @@ -16644,6 +16661,8 @@ static struct pci_driver i40e_driver = { **/ static int __init i40e_init_module(void) { + int err; + pr_info("%s: %s\n", i40e_driver_name, i40e_driver_string); pr_info("%s: %s\n", i40e_driver_name, i40e_copyright); @@ -16661,7 +16680,14 @@ static int __init i40e_init_module(void) } i40e_dbg_init(); - return pci_register_driver(&i40e_driver); + err = pci_register_driver(&i40e_driver); + if (err) { + destroy_workqueue(i40e_wq); + i40e_dbg_exit(); + return err; + } + + return 0; } module_init(i40e_init_module); diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 72ddcefc45b1..635f93d60318 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1578,6 +1578,7 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr) i40e_cleanup_reset_vf(vf); i40e_flush(hw); + usleep_range(20000, 40000); clear_bit(I40E_VF_STATE_RESETTING, &vf->vf_states); return true; @@ -1701,6 +1702,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) } i40e_flush(hw); + usleep_range(20000, 40000); clear_bit(__I40E_VF_DISABLE, pf->state); return true; diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 3f6187c16424..0d1bab4ac1b0 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -298,7 +298,6 @@ struct iavf_adapter { #define IAVF_FLAG_QUEUES_DISABLED BIT(17) #define IAVF_FLAG_SETUP_NETDEV_FEATURES BIT(18) #define IAVF_FLAG_REINIT_MSIX_NEEDED BIT(20) -#define IAVF_FLAG_INITIAL_MAC_SET BIT(23) /* duplicates for common code */ #define IAVF_FLAG_DCB_ENABLED 0 /* flags for admin queue service task */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 3fc572341781..f71e132ede09 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1087,12 +1087,6 @@ static int iavf_set_mac(struct net_device *netdev, void *p) if (ret) return ret; - /* If this is an initial set MAC during VF spawn do not wait */ - if (adapter->flags & IAVF_FLAG_INITIAL_MAC_SET) { - adapter->flags &= ~IAVF_FLAG_INITIAL_MAC_SET; - return 0; - } - ret = wait_event_interruptible_timeout(adapter->vc_waitqueue, iavf_is_mac_set_handled(netdev, addr->sa_data), msecs_to_jiffies(2500)); @@ -2605,8 +2599,6 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); } - adapter->flags |= IAVF_FLAG_INITIAL_MAC_SET; - adapter->tx_desc_count = IAVF_DEFAULT_TXD; adapter->rx_desc_count = IAVF_DEFAULT_RXD; err = iavf_init_interrupt_scheme(adapter); @@ -2921,7 +2913,6 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) iavf_free_queues(adapter); memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE); iavf_shutdown_adminq(&adapter->hw); - adapter->netdev->flags &= ~IFF_UP; adapter->flags &= ~IAVF_FLAG_RESET_PENDING; iavf_change_state(adapter, __IAVF_DOWN); wake_up(&adapter->down_waitqueue); @@ -3021,6 +3012,11 @@ static void iavf_reset_task(struct work_struct *work) iavf_disable_vf(adapter); mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); + if (netif_running(netdev)) { + rtnl_lock(); + dev_close(netdev); + rtnl_unlock(); + } return; /* Do not attempt to reinit. It's dead, Jim. */ } @@ -3033,6 +3029,7 @@ continue_reset: if (running) { netif_carrier_off(netdev); + netif_tx_stop_all_queues(netdev); adapter->link_up = false; iavf_napi_disable_all(adapter); } @@ -3172,6 +3169,16 @@ reset_err: mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); + + if (netif_running(netdev)) { + /* Close device to ensure that Tx queues will not be started + * during netif_device_attach() at the end of the reset task. + */ + rtnl_lock(); + dev_close(netdev); + rtnl_unlock(); + } + dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); reset_finish: rtnl_lock(); @@ -5035,23 +5042,21 @@ static int __maybe_unused iavf_resume(struct device *dev_d) static void iavf_remove(struct pci_dev *pdev) { struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev); - struct net_device *netdev = adapter->netdev; struct iavf_fdir_fltr *fdir, *fdirtmp; struct iavf_vlan_filter *vlf, *vlftmp; + struct iavf_cloud_filter *cf, *cftmp; struct iavf_adv_rss *rss, *rsstmp; struct iavf_mac_filter *f, *ftmp; - struct iavf_cloud_filter *cf, *cftmp; - struct iavf_hw *hw = &adapter->hw; + struct net_device *netdev; + struct iavf_hw *hw; int err; - /* When reboot/shutdown is in progress no need to do anything - * as the adapter is already REMOVE state that was set during - * iavf_shutdown() callback. - */ - if (adapter->state == __IAVF_REMOVE) + netdev = adapter->netdev; + hw = &adapter->hw; + + if (test_and_set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) return; - set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section); /* Wait until port initialization is complete. * There are flows where register/unregister netdev may race. */ @@ -5191,6 +5196,8 @@ static struct pci_driver iavf_driver = { **/ static int __init iavf_init_module(void) { + int ret; + pr_info("iavf: %s\n", iavf_driver_string); pr_info("%s\n", iavf_copyright); @@ -5201,7 +5208,12 @@ static int __init iavf_init_module(void) pr_err("%s: Failed to create workqueue\n", iavf_driver_name); return -ENOMEM; } - return pci_register_driver(&iavf_driver); + + ret = pci_register_driver(&iavf_driver); + if (ret) + destroy_workqueue(iavf_wq); + + return ret; } module_init(iavf_init_module); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 0f6718719453..ca2898467dcb 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3145,15 +3145,15 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) */ static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data) { - irqreturn_t ret = IRQ_HANDLED; struct ice_pf *pf = data; - bool irq_handled; - irq_handled = ice_ptp_process_ts(pf); - if (!irq_handled) - ret = IRQ_WAKE_THREAD; + if (ice_is_reset_in_progress(pf->state)) + return IRQ_HANDLED; - return ret; + while (!ice_ptp_process_ts(pf)) + usleep_range(50, 100); + + return IRQ_HANDLED; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 011b727ab190..0f668468d141 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -614,11 +614,14 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp) * 2) extend the 40b timestamp value to get a 64bit timestamp * 3) send that timestamp to the stack * - * After looping, if we still have waiting SKBs, return true. This may cause us - * effectively poll even when not strictly necessary. We do this because it's - * possible a new timestamp was requested around the same time as the interrupt. - * In some cases hardware might not interrupt us again when the timestamp is - * captured. + * Returns true if all timestamps were handled, and false if any slots remain + * without a timestamp. + * + * After looping, if we still have waiting SKBs, return false. This may cause + * us effectively poll even when not strictly necessary. We do this because + * it's possible a new timestamp was requested around the same time as the + * interrupt. In some cases hardware might not interrupt us again when the + * timestamp is captured. * * Note that we only take the tracking lock when clearing the bit and when * checking if we need to re-queue this task. The only place where bits can be @@ -641,7 +644,7 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) u8 idx; if (!tx->init) - return false; + return true; ptp_port = container_of(tx, struct ice_ptp_port, tx); pf = ptp_port_to_pf(ptp_port); @@ -2381,10 +2384,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) */ bool ice_ptp_process_ts(struct ice_pf *pf) { - if (pf->ptp.port.tx.init) - return ice_ptp_tx_tstamp(&pf->ptp.port.tx); - - return false; + return ice_ptp_tx_tstamp(&pf->ptp.port.tx); } static void ice_ptp_periodic_work(struct kthread_work *work) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index e5f3e7680dc6..ff911af16a4b 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -1413,6 +1413,8 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data) *data = 1; return -1; } + wr32(E1000_IVAR_MISC, E1000_IVAR_VALID << 8); + wr32(E1000_EIMS, BIT(0)); } else if (adapter->flags & IGB_FLAG_HAS_MSI) { shared_int = false; if (request_irq(irq, diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 99933e89717a..e338fa572793 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4869,6 +4869,8 @@ static struct pci_driver ixgbevf_driver = { **/ static int __init ixgbevf_init_module(void) { + int err; + pr_info("%s\n", ixgbevf_driver_string); pr_info("%s\n", ixgbevf_copyright); ixgbevf_wq = create_singlethread_workqueue(ixgbevf_driver_name); @@ -4877,7 +4879,13 @@ static int __init ixgbevf_init_module(void) return -ENOMEM; } - return pci_register_driver(&ixgbevf_driver); + err = pci_register_driver(&ixgbevf_driver); + if (err) { + destroy_workqueue(ixgbevf_wq); + return err; + } + + return 0; } module_init(ixgbevf_init_module); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index ff3e361e06e7..5aefaaff0871 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4271,7 +4271,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp) /* Use the cpu associated to the rxq when it is online, in all * the other cases, use the cpu 0 which can't be offline. */ - if (cpu_online(pp->rxq_def)) + if (pp->rxq_def < nr_cpu_ids && cpu_online(pp->rxq_def)) elected_cpu = pp->rxq_def; max_cpu = num_present_cpus(); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index eb0fb8128096..b399bdb1ca36 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -7350,6 +7350,7 @@ static int mvpp2_get_sram(struct platform_device *pdev, struct mvpp2 *priv) { struct resource *res; + void __iomem *base; res = platform_get_resource(pdev, IORESOURCE_MEM, 2); if (!res) { @@ -7360,9 +7361,12 @@ static int mvpp2_get_sram(struct platform_device *pdev, return 0; } - priv->cm3_base = devm_ioremap_resource(&pdev->dev, res); + base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); - return PTR_ERR_OR_ZERO(priv->cm3_base); + priv->cm3_base = base; + return 0; } static int mvpp2_probe(struct platform_device *pdev) diff --git a/drivers/net/ethernet/marvell/octeontx2/Kconfig b/drivers/net/ethernet/marvell/octeontx2/Kconfig index 6b4f640163f7..993ac180a5db 100644 --- a/drivers/net/ethernet/marvell/octeontx2/Kconfig +++ b/drivers/net/ethernet/marvell/octeontx2/Kconfig @@ -32,7 +32,6 @@ config OCTEONTX2_PF tristate "Marvell OcteonTX2 NIC Physical Function driver" select OCTEONTX2_MBOX select NET_DEVLINK - depends on MACSEC || !MACSEC depends on (64BIT && COMPILE_TEST) || ARM64 select DIMLIB depends on PCI diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c index 4a343f853b28..c0bedf402da9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c @@ -951,7 +951,7 @@ static void mcs_bbe_intr_handler(struct mcs *mcs, u64 intr, enum mcs_direction d else event.intr_mask = (dir == MCS_RX) ? MCS_BBE_RX_PLFIFO_OVERFLOW_INT : - MCS_BBE_RX_PLFIFO_OVERFLOW_INT; + MCS_BBE_TX_PLFIFO_OVERFLOW_INT; /* Notify the lmac_id info which ran into BBE fatal error */ event.lmac_id = i & 0x3ULL; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index a1970ebedf95..f66dde2b0f92 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -880,6 +880,8 @@ static int rvu_dbg_rvu_pf_cgx_map_display(struct seq_file *filp, void *unused) sprintf(lmac, "LMAC%d", lmac_id); seq_printf(filp, "%s\t0x%x\t\tNIX%d\t\t%s\t%s\n", dev_name(&pdev->dev), pcifunc, blkid, cgx, lmac); + + pci_dev_put(pdev); } return 0; } @@ -2566,6 +2568,7 @@ static int cgx_print_dmac_flt(struct seq_file *s, int lmac_id) } } + pci_dev_put(pdev); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 7646bb2ec89b..a62c1b322012 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -4985,6 +4985,8 @@ static int nix_setup_ipolicers(struct rvu *rvu, ipolicer->ref_count = devm_kcalloc(rvu->dev, ipolicer->band_prof.max, sizeof(u16), GFP_KERNEL); + if (!ipolicer->ref_count) + return -ENOMEM; } /* Set policer timeunit to 2us ie (19 + 1) * 100 nsec = 2us */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c index b04fb226f708..ae50d56258ec 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c @@ -62,15 +62,18 @@ int rvu_sdp_init(struct rvu *rvu) pfvf->sdp_info = devm_kzalloc(rvu->dev, sizeof(struct sdp_node_info), GFP_KERNEL); - if (!pfvf->sdp_info) + if (!pfvf->sdp_info) { + pci_dev_put(pdev); return -ENOMEM; + } dev_info(rvu->dev, "SDP PF number:%d\n", sdp_pf_num[i]); - put_device(&pdev->dev); i++; } + pci_dev_put(pdev); + return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 282db6fe3b08..67aa02bb2b85 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -884,7 +884,7 @@ static inline void otx2_dma_unmap_page(struct otx2_nic *pfvf, static inline u16 otx2_get_smq_idx(struct otx2_nic *pfvf, u16 qidx) { #ifdef CONFIG_DCB - if (pfvf->pfc_alloc_status[qidx]) + if (qidx < NIX_PF_PFC_PRIO_MAX && pfvf->pfc_alloc_status[qidx]) return pfvf->pfc_schq_list[NIX_TXSCH_LVL_SMQ][qidx]; #endif diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index e64318c110fd..6a01ab1a6e6f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -1134,7 +1134,12 @@ int otx2_init_tc(struct otx2_nic *nic) return err; tc->flow_ht_params = tc_flow_ht_params; - return rhashtable_init(&tc->flow_table, &tc->flow_ht_params); + err = rhashtable_init(&tc->flow_table, &tc->flow_ht_params); + if (err) { + kfree(tc->tc_entries_bitmap); + tc->tc_entries_bitmap = NULL; + } + return err; } EXPORT_SYMBOL(otx2_init_tc); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 24f9d6024745..47796e4d900c 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -746,6 +746,7 @@ static int prestera_port_create(struct prestera_switch *sw, u32 id) return 0; err_sfp_bind: + unregister_netdev(dev); err_register_netdev: prestera_port_list_del(port); err_port_init: diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c index 4046be0e86ff..a9a1028cb17b 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c @@ -457,7 +457,7 @@ prestera_kern_neigh_cache_find(struct prestera_switch *sw, n_cache = rhashtable_lookup_fast(&sw->router->kern_neigh_cache_ht, key, __prestera_kern_neigh_cache_ht_params); - return IS_ERR(n_cache) ? NULL : n_cache; + return n_cache; } static void diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c index aa080dc57ff0..02faaea2aefa 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c @@ -330,7 +330,7 @@ prestera_nh_neigh_find(struct prestera_switch *sw, nh_neigh = rhashtable_lookup_fast(&sw->router->nh_neigh_ht, key, __prestera_nh_neigh_ht_params); - return IS_ERR(nh_neigh) ? NULL : nh_neigh; + return nh_neigh; } struct prestera_nh_neigh * @@ -484,7 +484,7 @@ __prestera_nexthop_group_find(struct prestera_switch *sw, nh_grp = rhashtable_lookup_fast(&sw->router->nexthop_group_ht, key, __prestera_nexthop_group_ht_params); - return IS_ERR(nh_grp) ? NULL : nh_grp; + return nh_grp; } static struct prestera_nexthop_group * diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 7cd381530aa4..1d36619c5ec9 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2378,8 +2378,10 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) data + NET_SKB_PAD + eth->ip_align, ring->buf_size, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(eth->dma_dev, - dma_addr))) + dma_addr))) { + skb_free_frag(data); return -ENOMEM; + } } rxd->rxd1 = (unsigned int)dma_addr; ring->data[i] = data; @@ -2996,8 +2998,10 @@ static int mtk_open(struct net_device *dev) int i; err = mtk_start_dma(eth); - if (err) + if (err) { + phylink_disconnect_phy(mac->phylink); return err; + } for (i = 0; i < ARRAY_SIZE(eth->ppe); i++) mtk_ppe_start(eth->ppe[i]); @@ -4143,13 +4147,13 @@ static int mtk_probe(struct platform_device *pdev) eth->soc->offload_version, i); if (!eth->ppe[i]) { err = -ENOMEM; - goto err_free_dev; + goto err_deinit_ppe; } } err = mtk_eth_offload_init(eth); if (err) - goto err_free_dev; + goto err_deinit_ppe; } for (i = 0; i < MTK_MAX_DEVS; i++) { @@ -4159,7 +4163,7 @@ static int mtk_probe(struct platform_device *pdev) err = register_netdev(eth->netdev[i]); if (err) { dev_err(eth->dev, "error bringing up device\n"); - goto err_deinit_mdio; + goto err_deinit_ppe; } else netif_info(eth, probe, eth->netdev[i], "mediatek frame engine at 0x%08lx, irq %d\n", @@ -4177,7 +4181,8 @@ static int mtk_probe(struct platform_device *pdev) return 0; -err_deinit_mdio: +err_deinit_ppe: + mtk_ppe_deinit(eth); mtk_mdio_cleanup(eth); err_free_dev: mtk_free_dev(eth); diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c index 2d8ca99f2467..784ecb2dc9fb 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -737,7 +737,7 @@ struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, MTK_PPE_ENTRIES * soc->foe_entry_size, &ppe->foe_phys, GFP_KERNEL); if (!foe) - return NULL; + goto err_free_l2_flows; ppe->foe_table = foe; @@ -745,11 +745,26 @@ struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, sizeof(*ppe->foe_flow); ppe->foe_flow = devm_kzalloc(dev, foe_flow_size, GFP_KERNEL); if (!ppe->foe_flow) - return NULL; + goto err_free_l2_flows; mtk_ppe_debugfs_init(ppe, index); return ppe; + +err_free_l2_flows: + rhashtable_destroy(&ppe->l2_flows); + return NULL; +} + +void mtk_ppe_deinit(struct mtk_eth *eth) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(eth->ppe); i++) { + if (!eth->ppe[i]) + return; + rhashtable_destroy(ð->ppe[i]->l2_flows); + } } static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe) diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h index 0b7a67a958e4..a09c32539bcc 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.h +++ b/drivers/net/ethernet/mediatek/mtk_ppe.h @@ -304,6 +304,7 @@ struct mtk_ppe { struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, int version, int index); +void mtk_ppe_deinit(struct mtk_eth *eth); void mtk_ppe_start(struct mtk_ppe *ppe); int mtk_ppe_stop(struct mtk_ppe *ppe); diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index b149e601f673..48cfaa7eaf50 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -697,7 +697,8 @@ static int mlx4_create_zones(struct mlx4_dev *dev, err = mlx4_bitmap_init(*bitmap + k, 1, MLX4_QP_TABLE_RAW_ETH_SIZE - 1, 0, 0); - mlx4_bitmap_alloc_range(*bitmap + k, 1, 1, 0); + if (!err) + mlx4_bitmap_alloc_range(*bitmap + k, 1, 1, 0); } if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 2e0d59ca62b5..e7a894ba5c3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -45,6 +45,8 @@ #include "mlx5_core.h" #include "lib/eq.h" #include "lib/tout.h" +#define CREATE_TRACE_POINTS +#include "diag/cmd_tracepoint.h" enum { CMD_IF_REV = 5, @@ -785,27 +787,14 @@ EXPORT_SYMBOL(mlx5_cmd_out_err); static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out) { u16 opcode, op_mod; - u32 syndrome; - u8 status; u16 uid; - int err; - - syndrome = MLX5_GET(mbox_out, out, syndrome); - status = MLX5_GET(mbox_out, out, status); opcode = MLX5_GET(mbox_in, in, opcode); op_mod = MLX5_GET(mbox_in, in, op_mod); uid = MLX5_GET(mbox_in, in, uid); - err = cmd_status_to_err(status); - if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY) mlx5_cmd_out_err(dev, opcode, op_mod, out); - else - mlx5_core_dbg(dev, - "%s(0x%x) op_mod(0x%x) uid(%d) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n", - mlx5_command_str(opcode), opcode, op_mod, uid, - cmd_status_str(status), status, syndrome, err); } int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out) @@ -1016,6 +1005,7 @@ static void cmd_work_handler(struct work_struct *work) cmd_ent_get(ent); set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); + cmd_ent_get(ent); /* for the _real_ FW event on completion */ /* Skip sending command to fw if internal error */ if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) { ent->ret = -ENXIO; @@ -1023,7 +1013,6 @@ static void cmd_work_handler(struct work_struct *work) return; } - cmd_ent_get(ent); /* for the _real_ FW event on completion */ /* ring doorbell after the descriptor is valid */ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); wmb(); @@ -1508,8 +1497,8 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf, return -EFAULT; err = sscanf(outlen_str, "%d", &outlen); - if (err < 0) - return err; + if (err != 1) + return -EINVAL; ptr = kzalloc(outlen, GFP_KERNEL); if (!ptr) @@ -1672,8 +1661,8 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force cmd_ent_put(ent); /* timeout work was canceled */ if (!forced || /* Real FW completion */ - pci_channel_offline(dev->pdev) || /* FW is inaccessible */ - dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + mlx5_cmd_is_down(dev) || /* No real FW completion is expected */ + !opcode_allowed(cmd, ent->op)) cmd_ent_put(ent); ent->ts2 = ktime_get_ns(); @@ -1892,6 +1881,16 @@ out_in: return err; } +static void mlx5_cmd_err_trace(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out) +{ + u32 syndrome = MLX5_GET(mbox_out, out, syndrome); + u8 status = MLX5_GET(mbox_out, out, status); + + trace_mlx5_cmd(mlx5_command_str(opcode), opcode, op_mod, + cmd_status_str(status), status, syndrome, + cmd_status_to_err(status)); +} + static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, u32 syndrome, int err) { @@ -1914,7 +1913,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, } /* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */ -static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *out) +static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, u16 op_mod, void *out) { u32 syndrome = MLX5_GET(mbox_out, out, syndrome); u8 status = MLX5_GET(mbox_out, out, status); @@ -1922,8 +1921,10 @@ static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void * if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */ err = -EIO; - if (!err && status != MLX5_CMD_STAT_OK) + if (!err && status != MLX5_CMD_STAT_OK) { err = -EREMOTEIO; + mlx5_cmd_err_trace(dev, opcode, op_mod, out); + } cmd_status_log(dev, opcode, status, syndrome, err); return err; @@ -1951,9 +1952,9 @@ int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int { int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false); u16 opcode = MLX5_GET(mbox_in, in, opcode); + u16 op_mod = MLX5_GET(mbox_in, in, op_mod); - err = cmd_status_err(dev, err, opcode, out); - return err; + return cmd_status_err(dev, err, opcode, op_mod, out); } EXPORT_SYMBOL(mlx5_cmd_do); @@ -1997,8 +1998,9 @@ int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, { int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true); u16 opcode = MLX5_GET(mbox_in, in, opcode); + u16 op_mod = MLX5_GET(mbox_in, in, op_mod); - err = cmd_status_err(dev, err, opcode, out); + err = cmd_status_err(dev, err, opcode, op_mod, out); return mlx5_cmd_check(dev, err, in, out); } EXPORT_SYMBOL(mlx5_cmd_exec_polling); @@ -2034,7 +2036,7 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work) struct mlx5_async_ctx *ctx; ctx = work->ctx; - status = cmd_status_err(ctx->dev, status, work->opcode, work->out); + status = cmd_status_err(ctx->dev, status, work->opcode, work->op_mod, work->out); work->user_callback(status, work); if (atomic_dec_and_test(&ctx->num_inflight)) complete(&ctx->inflight_done); @@ -2049,6 +2051,7 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, work->ctx = ctx; work->user_callback = callback; work->opcode = MLX5_GET(mbox_in, in, opcode); + work->op_mod = MLX5_GET(mbox_in, in, op_mod); work->out = out; if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) return -EIO; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h new file mode 100644 index 000000000000..406ebe17405f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_CMD_TP_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_CMD_TP_H_ + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +TRACE_EVENT(mlx5_cmd, + TP_PROTO(const char *command_str, u16 opcode, u16 op_mod, + const char *status_str, u8 status, u32 syndrome, int err), + TP_ARGS(command_str, opcode, op_mod, status_str, status, syndrome, err), + TP_STRUCT__entry(__string(command_str, command_str) + __field(u16, opcode) + __field(u16, op_mod) + __string(status_str, status_str) + __field(u8, status) + __field(u32, syndrome) + __field(int, err) + ), + TP_fast_assign(__assign_str(command_str, command_str); + __entry->opcode = opcode; + __entry->op_mod = op_mod; + __assign_str(status_str, status_str); + __entry->status = status; + __entry->syndrome = syndrome; + __entry->err = err; + ), + TP_printk("%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), err(%d)", + __get_str(command_str), __entry->opcode, __entry->op_mod, + __get_str(status_str), __entry->status, __entry->syndrome, + __entry->err) +); + +#endif /* _MLX5_CMD_TP_H_ */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE cmd_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 978a2bb8e122..21831386b26e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -638,7 +638,7 @@ static void mlx5_tracer_handle_timestamp_trace(struct mlx5_fw_tracer *tracer, trace_timestamp = (timestamp_event.timestamp & MASK_52_7) | (str_frmt->timestamp & MASK_6_0); else - trace_timestamp = ((timestamp_event.timestamp & MASK_52_7) - 1) | + trace_timestamp = ((timestamp_event.timestamp - 1) & MASK_52_7) | (str_frmt->timestamp & MASK_6_0); mlx5_tracer_print_trace(str_frmt, dev, trace_timestamp); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 5aff97914367..ff73d25bc6eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -224,15 +224,16 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, list_for_each_entry(flow, flow_list, tmp_list) { if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW)) continue; - spec = &flow->attr->parse_attr->spec; - - /* update from encap rule to slow path rule */ - rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); attr = mlx5e_tc_get_encap_attr(flow); esw_attr = attr->esw_attr; /* mark the flow's encap dest as non-valid */ esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; + + /* update from encap rule to slow path rule */ + spec = &flow->attr->parse_attr->spec; + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -251,6 +252,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, /* we know that the encap is valid */ e->flags &= ~MLX5_ENCAP_ENTRY_VALID; mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); + e->pkt_reformat = NULL; } static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, @@ -762,8 +764,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, struct net_device *mirred_dev, int out_index, struct netlink_ext_ack *extack, - struct net_device **encap_dev, - bool *encap_valid) + struct net_device **encap_dev) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; @@ -878,9 +879,8 @@ attach_flow: if (e->flags & MLX5_ENCAP_ENTRY_VALID) { attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; - *encap_valid = true; } else { - *encap_valid = false; + flow_flag_set(flow, SLOW); } mutex_unlock(&esw->offloads.encap_tbl_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h index d542b8476491..8ad273dde40e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h @@ -17,8 +17,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, struct net_device *mirred_dev, int out_index, struct netlink_ext_ack *extack, - struct net_device **encap_dev, - bool *encap_valid); + struct net_device **encap_dev); int mlx5e_attach_decap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index 285d32d2fd08..d7c020f72401 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -365,7 +365,7 @@ void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs) for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) accel_fs_tcp_destroy_table(fs, i); - kfree(accel_tcp); + kvfree(accel_tcp); mlx5e_fs_set_accel_tcp(fs, NULL); } @@ -397,7 +397,7 @@ int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) err_destroy_tables: while (--i >= 0) accel_fs_tcp_destroy_table(fs, i); - kfree(accel_tcp); + kvfree(accel_tcp); mlx5e_fs_set_accel_tcp(fs, NULL); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 2ef36cb9555a..f900709639f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -368,15 +368,15 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, obj_attrs.aso_pdn = macsec->aso.pdn; obj_attrs.epn_state = sa->epn_state; - if (is_tx) { - obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci); - key = &ctx->sa.tx_sa->key; - } else { - obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci); - key = &ctx->sa.rx_sa->key; + key = (is_tx) ? &ctx->sa.tx_sa->key : &ctx->sa.rx_sa->key; + + if (sa->epn_state.epn_enabled) { + obj_attrs.ssci = (is_tx) ? cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci) : + cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci); + + memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt)); } - memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt)); obj_attrs.replay_window = ctx->secy->replay_window; obj_attrs.replay_protect = ctx->secy->replay_protect; @@ -427,15 +427,15 @@ mlx5e_macsec_get_rx_sc_from_sc_list(const struct list_head *list, sci_t sci) return NULL; } -static int mlx5e_macsec_update_rx_sa(struct mlx5e_macsec *macsec, - struct mlx5e_macsec_sa *rx_sa, - bool active) +static int macsec_rx_sa_active_update(struct macsec_context *ctx, + struct mlx5e_macsec_sa *rx_sa, + bool active) { - struct mlx5_core_dev *mdev = macsec->mdev; - struct mlx5_macsec_obj_attrs attrs = {}; + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec *macsec = priv->macsec; int err = 0; - if (rx_sa->active != active) + if (rx_sa->active == active) return 0; rx_sa->active = active; @@ -444,13 +444,11 @@ static int mlx5e_macsec_update_rx_sa(struct mlx5e_macsec *macsec, return 0; } - attrs.sci = cpu_to_be64((__force u64)rx_sa->sci); - attrs.enc_key_id = rx_sa->enc_key_id; - err = mlx5e_macsec_create_object(mdev, &attrs, false, &rx_sa->macsec_obj_id); + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false); if (err) - return err; + rx_sa->active = false; - return 0; + return err; } static bool mlx5e_macsec_secy_features_validate(struct macsec_context *ctx) @@ -476,6 +474,11 @@ static bool mlx5e_macsec_secy_features_validate(struct macsec_context *ctx) return false; } + if (!ctx->secy->tx_sc.encrypt) { + netdev_err(netdev, "MACsec offload: encrypt off isn't supported\n"); + return false; + } + return true; } @@ -620,6 +623,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) if (tx_sa->active == ctx_tx_sa->active) goto out; + tx_sa->active = ctx_tx_sa->active; if (tx_sa->assoc_num != tx_sc->encoding_sa) goto out; @@ -635,8 +639,6 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) mlx5e_macsec_cleanup_sa(macsec, tx_sa, true); } - - tx_sa->active = ctx_tx_sa->active; out: mutex_unlock(&macsec->lock); @@ -736,9 +738,14 @@ static int mlx5e_macsec_add_rxsc(struct macsec_context *ctx) sc_xarray_element->rx_sc = rx_sc; err = xa_alloc(&macsec->sc_xarray, &sc_xarray_element->fs_id, sc_xarray_element, - XA_LIMIT(1, USHRT_MAX), GFP_KERNEL); - if (err) + XA_LIMIT(1, MLX5_MACEC_RX_FS_ID_MAX), GFP_KERNEL); + if (err) { + if (err == -EBUSY) + netdev_err(ctx->netdev, + "MACsec offload: unable to create entry for RX SC (%d Rx SCs already allocated)\n", + MLX5_MACEC_RX_FS_ID_MAX); goto destroy_sc_xarray_elemenet; + } rx_sc->md_dst = metadata_dst_alloc(0, METADATA_MACSEC, GFP_KERNEL); if (!rx_sc->md_dst) { @@ -798,16 +805,16 @@ static int mlx5e_macsec_upd_rxsc(struct macsec_context *ctx) goto out; } - rx_sc->active = ctx_rx_sc->active; if (rx_sc->active == ctx_rx_sc->active) goto out; + rx_sc->active = ctx_rx_sc->active; for (i = 0; i < MACSEC_NUM_AN; ++i) { rx_sa = rx_sc->rx_sa[i]; if (!rx_sa) continue; - err = mlx5e_macsec_update_rx_sa(macsec, rx_sa, rx_sa->active && ctx_rx_sc->active); + err = macsec_rx_sa_active_update(ctx, rx_sa, rx_sa->active && ctx_rx_sc->active); if (err) goto out; } @@ -818,16 +825,43 @@ out: return err; } +static void macsec_del_rxsc_ctx(struct mlx5e_macsec *macsec, struct mlx5e_macsec_rx_sc *rx_sc) +{ + struct mlx5e_macsec_sa *rx_sa; + int i; + + for (i = 0; i < MACSEC_NUM_AN; ++i) { + rx_sa = rx_sc->rx_sa[i]; + if (!rx_sa) + continue; + + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); + mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); + + kfree(rx_sa); + rx_sc->rx_sa[i] = NULL; + } + + /* At this point the relevant MACsec offload Rx rule already removed at + * mlx5e_macsec_cleanup_sa need to wait for datapath to finish current + * Rx related data propagating using xa_erase which uses rcu to sync, + * once fs_id is erased then this rx_sc is hidden from datapath. + */ + list_del_rcu(&rx_sc->rx_sc_list_element); + xa_erase(&macsec->sc_xarray, rx_sc->sc_xarray_element->fs_id); + metadata_dst_free(rx_sc->md_dst); + kfree(rx_sc->sc_xarray_element); + kfree_rcu(rx_sc); +} + static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx) { struct mlx5e_priv *priv = netdev_priv(ctx->netdev); struct mlx5e_macsec_device *macsec_device; struct mlx5e_macsec_rx_sc *rx_sc; - struct mlx5e_macsec_sa *rx_sa; struct mlx5e_macsec *macsec; struct list_head *list; int err = 0; - int i; mutex_lock(&priv->macsec->lock); @@ -849,31 +883,7 @@ static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx) goto out; } - for (i = 0; i < MACSEC_NUM_AN; ++i) { - rx_sa = rx_sc->rx_sa[i]; - if (!rx_sa) - continue; - - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); - mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); - - kfree(rx_sa); - rx_sc->rx_sa[i] = NULL; - } - -/* - * At this point the relevant MACsec offload Rx rule already removed at - * mlx5e_macsec_cleanup_sa need to wait for datapath to finish current - * Rx related data propagating using xa_erase which uses rcu to sync, - * once fs_id is erased then this rx_sc is hidden from datapath. - */ - list_del_rcu(&rx_sc->rx_sc_list_element); - xa_erase(&macsec->sc_xarray, rx_sc->sc_xarray_element->fs_id); - metadata_dst_free(rx_sc->md_dst); - kfree(rx_sc->sc_xarray_element); - - kfree_rcu(rx_sc); - + macsec_del_rxsc_ctx(macsec, rx_sc); out: mutex_unlock(&macsec->lock); @@ -1015,7 +1025,7 @@ static int mlx5e_macsec_upd_rxsa(struct macsec_context *ctx) goto out; } - err = mlx5e_macsec_update_rx_sa(macsec, rx_sa, ctx_rx_sa->active); + err = macsec_rx_sa_active_update(ctx, rx_sa, ctx_rx_sa->active); out: mutex_unlock(&macsec->lock); @@ -1155,7 +1165,7 @@ static int macsec_upd_secy_hw_address(struct macsec_context *ctx, continue; if (rx_sa->active) { - err = mlx5e_macsec_init_sa(ctx, rx_sa, false, false); + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false); if (err) goto out; } @@ -1234,7 +1244,6 @@ static int mlx5e_macsec_del_secy(struct macsec_context *ctx) struct mlx5e_priv *priv = netdev_priv(ctx->netdev); struct mlx5e_macsec_device *macsec_device; struct mlx5e_macsec_rx_sc *rx_sc, *tmp; - struct mlx5e_macsec_sa *rx_sa; struct mlx5e_macsec_sa *tx_sa; struct mlx5e_macsec *macsec; struct list_head *list; @@ -1263,28 +1272,15 @@ static int mlx5e_macsec_del_secy(struct macsec_context *ctx) } list = &macsec_device->macsec_rx_sc_list_head; - list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) { - for (i = 0; i < MACSEC_NUM_AN; ++i) { - rx_sa = rx_sc->rx_sa[i]; - if (!rx_sa) - continue; - - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); - mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); - kfree(rx_sa); - rx_sc->rx_sa[i] = NULL; - } - - list_del_rcu(&rx_sc->rx_sc_list_element); - - kfree_rcu(rx_sc); - } + list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) + macsec_del_rxsc_ctx(macsec, rx_sc); kfree(macsec_device->dev_addr); macsec_device->dev_addr = NULL; list_del_rcu(&macsec_device->macsec_device_list_element); --macsec->num_of_devices; + kfree(macsec_device); out: mutex_unlock(&macsec->lock); @@ -1536,6 +1532,8 @@ static void macsec_async_event(struct work_struct *work) async_work = container_of(work, struct mlx5e_macsec_async_work, work); macsec = async_work->macsec; + mutex_lock(&macsec->lock); + mdev = async_work->mdev; obj_id = async_work->obj_id; macsec_sa = get_macsec_tx_sa_from_obj_id(macsec, obj_id); @@ -1557,6 +1555,7 @@ static void macsec_async_event(struct work_struct *work) out_async_work: kfree(async_work); + mutex_unlock(&macsec->lock); } static int macsec_obj_change_event(struct notifier_block *nb, unsigned long event, void *data) @@ -1745,7 +1744,7 @@ void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, if (!macsec) return; - fs_id = MLX5_MACSEC_METADATA_HANDLE(macsec_meta_data); + fs_id = MLX5_MACSEC_RX_METADAT_HANDLE(macsec_meta_data); rcu_read_lock(); sc_xarray_element = xa_load(&macsec->sc_xarray, fs_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h index d580b4a91253..347380a2cd9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h @@ -10,9 +10,11 @@ #include <net/macsec.h> #include <net/dst_metadata.h> -/* Bit31 - 30: MACsec marker, Bit3-0: MACsec id */ +/* Bit31 - 30: MACsec marker, Bit15-0: MACsec id */ +#define MLX5_MACEC_RX_FS_ID_MAX USHRT_MAX /* Must be power of two */ +#define MLX5_MACSEC_RX_FS_ID_MASK MLX5_MACEC_RX_FS_ID_MAX #define MLX5_MACSEC_METADATA_MARKER(metadata) ((((metadata) >> 30) & 0x3) == 0x1) -#define MLX5_MACSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(3, 0)) +#define MLX5_MACSEC_RX_METADAT_HANDLE(metadata) ((metadata) & MLX5_MACSEC_RX_FS_ID_MASK) struct mlx5e_priv; struct mlx5e_macsec; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c index 1ac0cf04e811..5b658a5588c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c @@ -250,7 +250,7 @@ static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs) struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; u32 *flow_group_in; - int err = 0; + int err; ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_EGRESS_MACSEC); if (!ns) @@ -261,8 +261,10 @@ static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs) return -ENOMEM; flow_group_in = kvzalloc(inlen, GFP_KERNEL); - if (!flow_group_in) + if (!flow_group_in) { + err = -ENOMEM; goto out_spec; + } tx_tables = &tx_fs->tables; ft_crypto = &tx_tables->ft_crypto; @@ -898,7 +900,7 @@ static int macsec_fs_rx_create(struct mlx5e_macsec_fs *macsec_fs) struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; u32 *flow_group_in; - int err = 0; + int err; ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC); if (!ns) @@ -909,8 +911,10 @@ static int macsec_fs_rx_create(struct mlx5e_macsec_fs *macsec_fs) return -ENOMEM; flow_group_in = kvzalloc(inlen, GFP_KERNEL); - if (!flow_group_in) + if (!flow_group_in) { + err = -ENOMEM; goto free_spec; + } rx_tables = &rx_fs->tables; ft_crypto = &rx_tables->ft_crypto; @@ -1142,10 +1146,10 @@ macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs, ft_crypto = &rx_tables->ft_crypto; /* Set bit[31 - 30] macsec marker - 0x01 */ - /* Set bit[3-0] fs id */ + /* Set bit[15-0] fs id */ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); - MLX5_SET(set_action_in, action, data, fs_id | BIT(30)); + MLX5_SET(set_action_in, action, data, MLX5_MACSEC_RX_METADAT_HANDLE(fs_id) | BIT(30)); MLX5_SET(set_action_in, action, offset, 0); MLX5_SET(set_action_in, action, length, 32); @@ -1205,6 +1209,7 @@ macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs, rx_rule->rule[1] = rule; } + kvfree(spec); return macsec_rule; err: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 24aa25da482b..1728e197558d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -35,7 +35,6 @@ #include "en.h" #include "en/port.h" #include "en/params.h" -#include "en/xsk/pool.h" #include "en/ptp.h" #include "lib/clock.h" #include "en/fs_ethtool.h" @@ -412,15 +411,8 @@ void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, struct ethtool_channels *ch) { mutex_lock(&priv->state_lock); - ch->max_combined = priv->max_nch; ch->combined_count = priv->channels.params.num_channels; - if (priv->xsk.refcnt) { - /* The upper half are XSK queues. */ - ch->max_combined *= 2; - ch->combined_count *= 2; - } - mutex_unlock(&priv->state_lock); } @@ -454,16 +446,6 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, mutex_lock(&priv->state_lock); - /* Don't allow changing the number of channels if there is an active - * XSK, because the numeration of the XSK and regular RQs will change. - */ - if (priv->xsk.refcnt) { - err = -EINVAL; - netdev_err(priv->netdev, "%s: AF_XDP is active, cannot change the number of channels\n", - __func__); - goto out; - } - /* Don't allow changing the number of channels if HTB offload is active, * because the numeration of the QoS SQs will change, while per-queue * qdiscs are attached. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index e3a4f01bcceb..5e41dfdf79c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -206,10 +206,11 @@ static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv) static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode) { u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); + u32 sz; - WARN_ON_ONCE(entries * umr_entry_size % MLX5_OCTWORD); + sz = ALIGN(entries * umr_entry_size, MLX5_UMR_MTT_ALIGNMENT); - return entries * umr_entry_size / MLX5_OCTWORD; + return sz / MLX5_OCTWORD; } static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 5a6aa61ec82a..bd9936af4582 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1634,7 +1634,6 @@ set_encap_dests(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr, struct netlink_ext_ack *extack, - bool *encap_valid, bool *vf_tun) { struct mlx5e_tc_flow_parse_attr *parse_attr; @@ -1651,7 +1650,6 @@ set_encap_dests(struct mlx5e_priv *priv, parse_attr = attr->parse_attr; esw_attr = attr->esw_attr; *vf_tun = false; - *encap_valid = true; for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { struct net_device *out_dev; @@ -1668,7 +1666,7 @@ set_encap_dests(struct mlx5e_priv *priv, goto out; } err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index, - extack, &encap_dev, encap_valid); + extack, &encap_dev); dev_put(out_dev); if (err) goto out; @@ -1732,8 +1730,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; struct mlx5_esw_flow_attr *esw_attr; - bool vf_tun, encap_valid; u32 max_prio, max_chain; + bool vf_tun; int err = 0; parse_attr = attr->parse_attr; @@ -1823,7 +1821,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, esw_attr->int_port = int_port; } - err = set_encap_dests(priv, flow, attr, extack, &encap_valid, &vf_tun); + err = set_encap_dests(priv, flow, attr, extack, &vf_tun); if (err) goto err_out; @@ -1853,7 +1851,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, * (1) there's no error * (2) there's an encap action and we don't have valid neigh */ - if (!encap_valid || flow_flag_test(flow, SLOW)) + if (flow_flag_test(flow, SLOW)) flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec); else flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); @@ -3759,7 +3757,7 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) struct mlx5e_post_act *post_act = get_post_action(flow->priv); struct mlx5_flow_attr *attr, *next_attr = NULL; struct mlx5e_post_act_handle *handle; - bool vf_tun, encap_valid = true; + bool vf_tun; int err; /* This is going in reverse order as needed. @@ -3781,13 +3779,10 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) if (list_is_last(&attr->list, &flow->attrs)) break; - err = set_encap_dests(flow->priv, flow, attr, extack, &encap_valid, &vf_tun); + err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun); if (err) goto out_free; - if (!encap_valid) - flow_flag_set(flow, SLOW); - err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack); if (err) goto out_free; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 2169486c4bfb..374e3fbdc2cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1362,6 +1362,9 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) devl_rate_nodes_destroy(devlink); } + /* Destroy legacy fdb when disabling sriov in legacy mode. */ + if (esw->mode == MLX5_ESWITCH_LEGACY) + mlx5_eswitch_disable_locked(esw); esw->esw_funcs.num_vfs = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index f68dc2d0dbe6..3029bc1c0dd0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -736,6 +736,14 @@ void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, struct mlx5_eswitch *slave_esw); int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw); +static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw) +{ + if (mlx5_esw_allowed(esw)) + return esw->esw_funcs.num_vfs; + + return 0; +} + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 728ca9f2bb9d..8c6c9bcb3dc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -433,7 +433,7 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f mlx5_lag_mpesw_is_activated(esw->dev)) dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; } - if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) { + if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) { if (pkt_reformat) { flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat; @@ -3387,6 +3387,13 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, int err; esw->mode = MLX5_ESWITCH_LEGACY; + + /* If changing from switchdev to legacy mode without sriov enabled, + * no need to create legacy fdb. + */ + if (!mlx5_sriov_is_enabled(esw->dev)) + return 0; + err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_IGNORE_NUM_VFS); if (err) NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 108a3503f413..edd910258314 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -312,6 +312,8 @@ revert_changes: for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) { struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl; + attr->dests[curr_dest].termtbl = NULL; + /* search for the destination associated with the * current term table */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 9d908a0ccfef..1e46f9afa40e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -9,7 +9,8 @@ enum { MLX5_FW_RESET_FLAGS_RESET_REQUESTED, MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, MLX5_FW_RESET_FLAGS_PENDING_COMP, - MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS + MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, + MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED }; struct mlx5_fw_reset { @@ -406,7 +407,7 @@ static void mlx5_sync_reset_now_event(struct work_struct *work) err = mlx5_pci_link_toggle(dev); if (err) { mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err); - goto done; + set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags); } mlx5_enter_error_state(dev, true); @@ -482,6 +483,10 @@ int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev) goto out; } err = fw_reset->ret; + if (test_and_clear_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags)) { + mlx5_unload_one_devl_locked(dev); + mlx5_load_one_devl_locked(dev, false); + } out: clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index a9f4ede4a9bf..32c3e0a649a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -228,9 +228,8 @@ static void mlx5_ldev_free(struct kref *ref) if (ldev->nb.notifier_call) unregister_netdevice_notifier_net(&init_net, &ldev->nb); mlx5_lag_mp_cleanup(ldev); - mlx5_lag_mpesw_cleanup(ldev); - cancel_work_sync(&ldev->mpesw_work); destroy_workqueue(ldev->wq); + mlx5_lag_mpesw_cleanup(ldev); mutex_destroy(&ldev->lock); kfree(ldev); } @@ -701,10 +700,13 @@ static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) return false; #ifdef CONFIG_MLX5_ESWITCH - dev = ldev->pf[MLX5_LAG_P1].dev; - if ((mlx5_sriov_is_enabled(dev)) && !is_mdev_switchdev_mode(dev)) - return false; + for (i = 0; i < ldev->ports; i++) { + dev = ldev->pf[i].dev; + if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) + return false; + } + dev = ldev->pf[MLX5_LAG_P1].dev; mode = mlx5_eswitch_mode(dev); for (i = 0; i < ldev->ports; i++) if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index ce2ce8ccbd70..f30ac2de639f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -50,6 +50,19 @@ struct lag_tracker { enum netdev_lag_hash hash_type; }; +enum mpesw_op { + MLX5_MPESW_OP_ENABLE, + MLX5_MPESW_OP_DISABLE, +}; + +struct mlx5_mpesw_work_st { + struct work_struct work; + struct mlx5_lag *lag; + enum mpesw_op op; + struct completion comp; + int result; +}; + /* LAG data of a ConnectX card. * It serves both its phys functions. */ @@ -66,7 +79,6 @@ struct mlx5_lag { struct lag_tracker tracker; struct workqueue_struct *wq; struct delayed_work bond_work; - struct work_struct mpesw_work; struct notifier_block nb; struct lag_mp lag_mp; struct mlx5_lag_port_sel port_sel; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index f643202b29c6..c17e8f1ec914 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -7,63 +7,95 @@ #include "eswitch.h" #include "lib/mlx5.h" -void mlx5_mpesw_work(struct work_struct *work) +static int add_mpesw_rule(struct mlx5_lag *ldev) { - struct mlx5_lag *ldev = container_of(work, struct mlx5_lag, mpesw_work); + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + int err; - mutex_lock(&ldev->lock); - mlx5_disable_lag(ldev); - mutex_unlock(&ldev->lock); -} + if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1) + return 0; -static void mlx5_lag_disable_mpesw(struct mlx5_core_dev *dev) -{ - struct mlx5_lag *ldev = dev->priv.lag; + if (ldev->mode != MLX5_LAG_MODE_NONE) { + err = -EINVAL; + goto out_err; + } - if (!queue_work(ldev->wq, &ldev->mpesw_work)) - mlx5_core_warn(dev, "failed to queue work\n"); + err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false); + if (err) { + mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err); + goto out_err; + } + + return 0; + +out_err: + atomic_dec(&ldev->lag_mpesw.mpesw_rule_count); + return err; } -void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev) +static void del_mpesw_rule(struct mlx5_lag *ldev) { - struct mlx5_lag *ldev = dev->priv.lag; + if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) && + ldev->mode == MLX5_LAG_MODE_MPESW) + mlx5_disable_lag(ldev); +} - if (!ldev) - return; +static void mlx5_mpesw_work(struct work_struct *work) +{ + struct mlx5_mpesw_work_st *mpesww = container_of(work, struct mlx5_mpesw_work_st, work); + struct mlx5_lag *ldev = mpesww->lag; mutex_lock(&ldev->lock); - if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) && - ldev->mode == MLX5_LAG_MODE_MPESW) - mlx5_lag_disable_mpesw(dev); + if (mpesww->op == MLX5_MPESW_OP_ENABLE) + mpesww->result = add_mpesw_rule(ldev); + else if (mpesww->op == MLX5_MPESW_OP_DISABLE) + del_mpesw_rule(ldev); mutex_unlock(&ldev->lock); + + complete(&mpesww->comp); } -int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev) +static int mlx5_lag_mpesw_queue_work(struct mlx5_core_dev *dev, + enum mpesw_op op) { struct mlx5_lag *ldev = dev->priv.lag; + struct mlx5_mpesw_work_st *work; int err = 0; if (!ldev) return 0; - mutex_lock(&ldev->lock); - if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1) - goto out; + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return -ENOMEM; - if (ldev->mode != MLX5_LAG_MODE_NONE) { + INIT_WORK(&work->work, mlx5_mpesw_work); + init_completion(&work->comp); + work->op = op; + work->lag = ldev; + + if (!queue_work(ldev->wq, &work->work)) { + mlx5_core_warn(dev, "failed to queue mpesw work\n"); err = -EINVAL; goto out; } - - err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false); - if (err) - mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err); - + wait_for_completion(&work->comp); + err = work->result; out: - mutex_unlock(&ldev->lock); + kfree(work); return err; } +void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev) +{ + mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_DISABLE); +} + +int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev) +{ + return mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_ENABLE); +} + int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev) { struct mlx5_lag *ldev = mdev->priv.lag; @@ -71,12 +103,9 @@ int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev) if (!netif_is_bond_master(out_dev) || !ldev) return 0; - mutex_lock(&ldev->lock); - if (ldev->mode == MLX5_LAG_MODE_MPESW) { - mutex_unlock(&ldev->lock); + if (ldev->mode == MLX5_LAG_MODE_MPESW) return -EOPNOTSUPP; - } - mutex_unlock(&ldev->lock); + return 0; } @@ -90,11 +119,10 @@ bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev) void mlx5_lag_mpesw_init(struct mlx5_lag *ldev) { - INIT_WORK(&ldev->mpesw_work, mlx5_mpesw_work); atomic_set(&ldev->lag_mpesw.mpesw_rule_count, 0); } void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev) { - cancel_delayed_work_sync(&ldev->bond_work); + WARN_ON(atomic_read(&ldev->lag_mpesw.mpesw_rule_count)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h index be4abcb8fcd5..88e8daffcf92 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h @@ -12,7 +12,6 @@ struct lag_mpesw { atomic_t mpesw_rule_count; }; -void mlx5_mpesw_work(struct work_struct *work); int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev); bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev); #if IS_ENABLED(CONFIG_MLX5_ESWITCH) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 283c4cc28944..e58775a7d955 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1798,7 +1798,8 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, res = state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; - mlx5_pci_trace(dev, "Exit, result = %d, %s\n", res, result2str(res)); + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, result = %d, %s\n", + __func__, dev->state, dev->pci_status, res, result2str(res)); return res; } @@ -1837,7 +1838,8 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) struct mlx5_core_dev *dev = pci_get_drvdata(pdev); int err; - mlx5_pci_trace(dev, "Enter\n"); + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Enter\n", + __func__, dev->state, dev->pci_status); err = mlx5_pci_enable_device(dev); if (err) { @@ -1859,7 +1861,8 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) res = PCI_ERS_RESULT_RECOVERED; out: - mlx5_pci_trace(dev, "Exit, err = %d, result = %d, %s\n", err, res, result2str(res)); + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, err = %d, result = %d, %s\n", + __func__, dev->state, dev->pci_status, err, res, result2str(res)); return res; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c index 7da012ff0d41..8e2abbab05f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c @@ -18,6 +18,10 @@ struct mlx5_sf_dev_table { phys_addr_t base_address; u64 sf_bar_length; struct notifier_block nb; + struct mutex table_lock; /* Serializes sf life cycle and vhca state change handler */ + struct workqueue_struct *active_wq; + struct work_struct work; + u8 stop_active_wq:1; struct mlx5_core_dev *dev; }; @@ -168,6 +172,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_ return 0; sf_index = event->function_id - base_id; + mutex_lock(&table->table_lock); sf_dev = xa_load(&table->devices, sf_index); switch (event->new_vhca_state) { case MLX5_VHCA_STATE_INVALID: @@ -191,6 +196,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_ default: break; } + mutex_unlock(&table->table_lock); return 0; } @@ -215,6 +221,78 @@ static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table) return 0; } +static void mlx5_sf_dev_add_active_work(struct work_struct *work) +{ + struct mlx5_sf_dev_table *table = container_of(work, struct mlx5_sf_dev_table, work); + u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {}; + struct mlx5_core_dev *dev = table->dev; + u16 max_functions; + u16 function_id; + u16 sw_func_id; + int err = 0; + u8 state; + int i; + + max_functions = mlx5_sf_max_functions(dev); + function_id = MLX5_CAP_GEN(dev, sf_base_id); + for (i = 0; i < max_functions; i++, function_id++) { + if (table->stop_active_wq) + return; + err = mlx5_cmd_query_vhca_state(dev, function_id, out, sizeof(out)); + if (err) + /* A failure of specific vhca doesn't mean others will + * fail as well. + */ + continue; + state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state); + if (state != MLX5_VHCA_STATE_ACTIVE) + continue; + + sw_func_id = MLX5_GET(query_vhca_state_out, out, vhca_state_context.sw_function_id); + mutex_lock(&table->table_lock); + /* Don't probe device which is already probe */ + if (!xa_load(&table->devices, i)) + mlx5_sf_dev_add(dev, i, function_id, sw_func_id); + /* There is a race where SF got inactive after the query + * above. e.g.: the query returns that the state of the + * SF is active, and after that the eswitch manager set it to + * inactive. + * This case cannot be managed in SW, since the probing of the + * SF is on one system, and the inactivation is on a different + * system. + * If the inactive is done after the SF perform init_hca(), + * the SF will fully probe and then removed. If it was + * done before init_hca(), the SF probe will fail. + */ + mutex_unlock(&table->table_lock); + } +} + +/* In case SFs are generated externally, probe active SFs */ +static int mlx5_sf_dev_queue_active_work(struct mlx5_sf_dev_table *table) +{ + if (MLX5_CAP_GEN(table->dev, eswitch_manager)) + return 0; /* the table is local */ + + /* Use a workqueue to probe active SFs, which are in large + * quantity and may take up to minutes to probe. + */ + table->active_wq = create_singlethread_workqueue("mlx5_active_sf"); + if (!table->active_wq) + return -ENOMEM; + INIT_WORK(&table->work, &mlx5_sf_dev_add_active_work); + queue_work(table->active_wq, &table->work); + return 0; +} + +static void mlx5_sf_dev_destroy_active_work(struct mlx5_sf_dev_table *table) +{ + if (table->active_wq) { + table->stop_active_wq = true; + destroy_workqueue(table->active_wq); + } +} + void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) { struct mlx5_sf_dev_table *table; @@ -240,11 +318,17 @@ void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) table->base_address = pci_resource_start(dev->pdev, 2); table->max_sfs = max_sfs; xa_init(&table->devices); + mutex_init(&table->table_lock); dev->priv.sf_dev_table = table; err = mlx5_vhca_event_notifier_register(dev, &table->nb); if (err) goto vhca_err; + + err = mlx5_sf_dev_queue_active_work(table); + if (err) + goto add_active_err; + err = mlx5_sf_dev_vhca_arm_all(table); if (err) goto arm_err; @@ -252,6 +336,8 @@ void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) return; arm_err: + mlx5_sf_dev_destroy_active_work(table); +add_active_err: mlx5_vhca_event_notifier_unregister(dev, &table->nb); vhca_err: table->max_sfs = 0; @@ -279,7 +365,9 @@ void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev) if (!table) return; + mlx5_sf_dev_destroy_active_work(table); mlx5_vhca_event_notifier_unregister(dev, &table->nb); + mutex_destroy(&table->table_lock); /* Now that event handler is not running, it is safe to destroy * the sf device without race. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c index 31d443dd8386..f68461b13391 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -46,7 +46,7 @@ static int dr_table_set_miss_action_nic(struct mlx5dr_domain *dmn, int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, struct mlx5dr_action *action) { - int ret; + int ret = -EOPNOTSUPP; if (action && action->action_type != DR_ACTION_TYP_FT) return -EOPNOTSUPP; @@ -67,6 +67,9 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, goto out; } + if (ret) + goto out; + /* Release old action */ if (tbl->miss_action) refcount_dec(&tbl->miss_action->refcount); diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c index 81a8ccca7e5e..5693784eec5b 100644 --- a/drivers/net/ethernet/microchip/encx24j600-regmap.c +++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c @@ -359,7 +359,7 @@ static int regmap_encx24j600_phy_reg_read(void *context, unsigned int reg, goto err_out; usleep_range(26, 100); - while ((ret = regmap_read(ctx->regmap, MISTAT, &mistat) != 0) && + while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) == 0) && (mistat & BUSY)) cpu_relax(); @@ -397,7 +397,7 @@ static int regmap_encx24j600_phy_reg_write(void *context, unsigned int reg, goto err_out; usleep_range(26, 100); - while ((ret = regmap_read(ctx->regmap, MISTAT, &mistat) != 0) && + while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) == 0) && (mistat & BUSY)) cpu_relax(); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c index 66360c8c5a38..141897dfe388 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c @@ -317,7 +317,7 @@ int sparx5_fdma_xmit(struct sparx5 *sparx5, u32 *ifh, struct sk_buff *skb) next_dcb_hw = sparx5_fdma_next_dcb(tx, tx->curr_entry); db_hw = &next_dcb_hw->db[0]; if (!(db_hw->status & FDMA_DCB_STATUS_DONE)) - tx->dropped++; + return -EINVAL; db = list_first_entry(&tx->db_list, struct sparx5_db, list); list_move_tail(&db->list, &tx->db_list); next_dcb_hw->nextptr = FDMA_DCB_INVALID_DATA; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index eeac04b84638..b6bbb3c9bd7a 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -887,6 +887,8 @@ static int mchp_sparx5_probe(struct platform_device *pdev) cleanup_ports: sparx5_cleanup_ports(sparx5); + if (sparx5->mact_queue) + destroy_workqueue(sparx5->mact_queue); cleanup_config: kfree(configs); cleanup_pnode: @@ -911,6 +913,7 @@ static int mchp_sparx5_remove(struct platform_device *pdev) sparx5_cleanup_ports(sparx5); /* Unregister netdevs */ sparx5_unregister_notifier_blocks(sparx5); + destroy_workqueue(sparx5->mact_queue); return 0; } diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c index 19516ccad533..d078156581d5 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c @@ -104,7 +104,7 @@ static int sparx5_port_open(struct net_device *ndev) err = phylink_of_phy_connect(port->phylink, port->of_node, 0); if (err) { netdev_err(ndev, "Could not attach to PHY\n"); - return err; + goto err_connect; } phylink_start(port->phylink); @@ -116,10 +116,20 @@ static int sparx5_port_open(struct net_device *ndev) err = sparx5_serdes_set(port->sparx5, port, &port->conf); else err = phy_power_on(port->serdes); - if (err) + if (err) { netdev_err(ndev, "%s failed\n", __func__); + goto out_power; + } } + return 0; + +out_power: + phylink_stop(port->phylink); + phylink_disconnect_phy(port->phylink); +err_connect: + sparx5_port_enable(port, false); + return err; } diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index 83c16ca5b30f..6db6ac6a3bbc 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -234,9 +234,8 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev) sparx5_set_port_ifh(ifh, port->portno); if (sparx5->ptp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { - ret = sparx5_ptp_txtstamp_request(port, skb); - if (ret) - return ret; + if (sparx5_ptp_txtstamp_request(port, skb) < 0) + return NETDEV_TX_BUSY; sparx5_set_port_ifh_rew_op(ifh, SPARX5_SKB_CB(skb)->rew_op); sparx5_set_port_ifh_pdu_type(ifh, SPARX5_SKB_CB(skb)->pdu_type); @@ -250,23 +249,31 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev) else ret = sparx5_inject(sparx5, ifh, skb, dev); - if (ret == NETDEV_TX_OK) { - stats->tx_bytes += skb->len; - stats->tx_packets++; + if (ret == -EBUSY) + goto busy; + if (ret < 0) + goto drop; - if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && - SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) - return ret; + stats->tx_bytes += skb->len; + stats->tx_packets++; + sparx5->tx.packets++; - dev_kfree_skb_any(skb); - } else { - stats->tx_dropped++; + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) + return NETDEV_TX_OK; - if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && - SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) - sparx5_ptp_txtstamp_release(port, skb); - } - return ret; + dev_consume_skb_any(skb); + return NETDEV_TX_OK; +drop: + stats->tx_dropped++; + sparx5->tx.dropped++; + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +busy: + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) + sparx5_ptp_txtstamp_release(port, skb); + return NETDEV_TX_BUSY; } static enum hrtimer_restart sparx5_injection_timeout(struct hrtimer *tmr) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc.c index e05429c751ee..dc2c3756e3a2 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc.c @@ -90,13 +90,10 @@ static int sparx5_tc_setup_qdisc_ets(struct net_device *ndev, } } - sparx5_tc_ets_add(port, params); - break; + return sparx5_tc_ets_add(port, params); case TC_ETS_DESTROY: - sparx5_tc_ets_del(port); - - break; + return sparx5_tc_ets_del(port); case TC_ETS_GRAFT: return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/drivers/net/ethernet/microsoft/mana/gdma.h index 4a6efe6ada08..65c24ee49efd 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma.h +++ b/drivers/net/ethernet/microsoft/mana/gdma.h @@ -498,7 +498,14 @@ enum { #define GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT BIT(0) -#define GDMA_DRV_CAP_FLAGS1 GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT +/* Advertise to the NIC firmware: the NAPI work_done variable race is fixed, + * so the driver is able to reliably support features like busy_poll. + */ +#define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2) + +#define GDMA_DRV_CAP_FLAGS1 \ + (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ + GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX) #define GDMA_DRV_CAP_FLAGS2 0 diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 9259a74eca40..27a0f3af8aab 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1303,10 +1303,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq) xdp_do_flush(); } -static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) +static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) { struct mana_cq *cq = context; u8 arm_bit; + int w; WARN_ON_ONCE(cq->gdma_cq != gdma_queue); @@ -1315,26 +1316,31 @@ static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) else mana_poll_tx_cq(cq); - if (cq->work_done < cq->budget && - napi_complete_done(&cq->napi, cq->work_done)) { + w = cq->work_done; + + if (w < cq->budget && + napi_complete_done(&cq->napi, w)) { arm_bit = SET_ARM_BIT; } else { arm_bit = 0; } mana_gd_ring_cq(gdma_queue, arm_bit); + + return w; } static int mana_poll(struct napi_struct *napi, int budget) { struct mana_cq *cq = container_of(napi, struct mana_cq, napi); + int w; cq->work_done = 0; cq->budget = budget; - mana_cq_handler(cq, cq->gdma_cq); + w = mana_cq_handler(cq, cq->gdma_cq); - return min(cq->work_done, budget); + return min(w, budget); } static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue) diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c index 2b427d8ccb2f..ccacb6ab6c39 100644 --- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c @@ -282,7 +282,7 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) dma_len = skb_headlen(skb); if (skb_is_gso(skb)) type = NFDK_DESC_TX_TYPE_TSO; - else if (!nr_frags && dma_len < NFDK_TX_MAX_DATA_PER_HEAD) + else if (!nr_frags && dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) type = NFDK_DESC_TX_TYPE_SIMPLE; else type = NFDK_DESC_TX_TYPE_GATHER; @@ -927,7 +927,7 @@ nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, dma_len = pkt_len; dma_addr = rxbuf->dma_addr + dma_off; - if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD) + if (dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) type = NFDK_DESC_TX_TYPE_SIMPLE; else type = NFDK_DESC_TX_TYPE_GATHER; @@ -1325,7 +1325,7 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, txbuf = &tx_ring->ktxbufs[wr_idx]; dma_len = skb_headlen(skb); - if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD) + if (dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) type = NFDK_DESC_TX_TYPE_SIMPLE; else type = NFDK_DESC_TX_TYPE_GATHER; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index 405786c00334..cb08d7bf9524 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -341,7 +341,7 @@ int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port) return ret; attrs.split = eth_port.is_split; - attrs.splittable = !attrs.split; + attrs.splittable = eth_port.port_lanes > 1 && !attrs.split; attrs.lanes = eth_port.port_lanes; attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; attrs.phys.port_number = eth_port.label_port; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 1775997f9c69..991059d6cb32 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -1432,6 +1432,9 @@ nfp_port_get_module_info(struct net_device *netdev, u8 data; port = nfp_port_from_netdev(netdev); + if (!port) + return -EOPNOTSUPP; + /* update port state to get latest interface */ set_bit(NFP_PORT_CHANGED, &port->flags); eth_port = nfp_port_get_eth_port(port); diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index 19d043b593cc..62320be4de5a 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -249,25 +249,26 @@ static void nixge_hw_dma_bd_release(struct net_device *ndev) struct sk_buff *skb; int i; - for (i = 0; i < RX_BD_NUM; i++) { - phys_addr = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], - phys); - - dma_unmap_single(ndev->dev.parent, phys_addr, - NIXGE_MAX_JUMBO_FRAME_SIZE, - DMA_FROM_DEVICE); - - skb = (struct sk_buff *)(uintptr_t) - nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], - sw_id_offset); - dev_kfree_skb(skb); - } + if (priv->rx_bd_v) { + for (i = 0; i < RX_BD_NUM; i++) { + phys_addr = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], + phys); + + dma_unmap_single(ndev->dev.parent, phys_addr, + NIXGE_MAX_JUMBO_FRAME_SIZE, + DMA_FROM_DEVICE); + + skb = (struct sk_buff *)(uintptr_t) + nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], + sw_id_offset); + dev_kfree_skb(skb); + } - if (priv->rx_bd_v) dma_free_coherent(ndev->dev.parent, sizeof(*priv->rx_bd_v) * RX_BD_NUM, priv->rx_bd_v, priv->rx_bd_p); + } if (priv->tx_skb) devm_kfree(ndev->dev.parent, priv->tx_skb); diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 3f2c30184752..28b7cec485ef 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -1143,6 +1143,7 @@ static void pch_gbe_tx_queue(struct pch_gbe_adapter *adapter, buffer_info->dma = 0; buffer_info->time_stamp = 0; tx_ring->next_to_use = ring_num; + dev_kfree_skb_any(skb); return; } buffer_info->mapped = true; @@ -2459,6 +2460,7 @@ static void pch_gbe_remove(struct pci_dev *pdev) unregister_netdev(netdev); pch_gbe_phy_hw_reset(&adapter->hw); + pci_dev_put(adapter->ptp_pdev); free_netdev(netdev); } @@ -2533,7 +2535,7 @@ static int pch_gbe_probe(struct pci_dev *pdev, /* setup the private structure */ ret = pch_gbe_sw_init(adapter); if (ret) - goto err_free_netdev; + goto err_put_dev; /* Initialize PHY */ ret = pch_gbe_init_phy(adapter); @@ -2591,6 +2593,8 @@ static int pch_gbe_probe(struct pci_dev *pdev, err_free_adapter: pch_gbe_phy_hw_reset(&adapter->hw); +err_put_dev: + pci_dev_put(adapter->ptp_pdev); err_free_netdev: free_netdev(netdev); return ret; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 9fb1fa479d4b..16e6bd466143 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -767,34 +767,34 @@ static int qed_mcp_cancel_load_req(struct qed_hwfn *p_hwfn, return rc; } -#define CONFIG_QEDE_BITMAP_IDX BIT(0) -#define CONFIG_QED_SRIOV_BITMAP_IDX BIT(1) -#define CONFIG_QEDR_BITMAP_IDX BIT(2) -#define CONFIG_QEDF_BITMAP_IDX BIT(4) -#define CONFIG_QEDI_BITMAP_IDX BIT(5) -#define CONFIG_QED_LL2_BITMAP_IDX BIT(6) +#define BITMAP_IDX_FOR_CONFIG_QEDE BIT(0) +#define BITMAP_IDX_FOR_CONFIG_QED_SRIOV BIT(1) +#define BITMAP_IDX_FOR_CONFIG_QEDR BIT(2) +#define BITMAP_IDX_FOR_CONFIG_QEDF BIT(4) +#define BITMAP_IDX_FOR_CONFIG_QEDI BIT(5) +#define BITMAP_IDX_FOR_CONFIG_QED_LL2 BIT(6) static u32 qed_get_config_bitmap(void) { u32 config_bitmap = 0x0; if (IS_ENABLED(CONFIG_QEDE)) - config_bitmap |= CONFIG_QEDE_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDE; if (IS_ENABLED(CONFIG_QED_SRIOV)) - config_bitmap |= CONFIG_QED_SRIOV_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QED_SRIOV; if (IS_ENABLED(CONFIG_QED_RDMA)) - config_bitmap |= CONFIG_QEDR_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDR; if (IS_ENABLED(CONFIG_QED_FCOE)) - config_bitmap |= CONFIG_QEDF_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDF; if (IS_ENABLED(CONFIG_QED_ISCSI)) - config_bitmap |= CONFIG_QEDI_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDI; if (IS_ENABLED(CONFIG_QED_LL2)) - config_bitmap |= CONFIG_QED_LL2_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QED_LL2; return config_bitmap; } diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 76072f8c3d2f..0d57ffcedf0c 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -2471,6 +2471,7 @@ static netdev_tx_t ql3xxx_send(struct sk_buff *skb, skb_shinfo(skb)->nr_frags); if (tx_cb->seg_count == -1) { netdev_err(ndev, "%s: invalid segment count!\n", __func__); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index bd0607680329..2fd5c6fdb500 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -2991,7 +2991,7 @@ static void qlcnic_83xx_recover_driver_lock(struct qlcnic_adapter *adapter) QLCWRX(adapter->ahw, QLC_83XX_RECOVER_DRV_LOCK, val); dev_info(&adapter->pdev->dev, "%s: lock recovery initiated\n", __func__); - msleep(QLC_83XX_DRV_LOCK_RECOVERY_DELAY); + mdelay(QLC_83XX_DRV_LOCK_RECOVERY_DELAY); val = QLCRDX(adapter->ahw, QLC_83XX_RECOVER_DRV_LOCK); id = ((val >> 2) & 0xF); if (id == adapter->portnum) { @@ -3027,7 +3027,7 @@ int qlcnic_83xx_lock_driver(struct qlcnic_adapter *adapter) if (status) break; - msleep(QLC_83XX_DRV_LOCK_WAIT_DELAY); + mdelay(QLC_83XX_DRV_LOCK_WAIT_DELAY); i++; if (i == 1) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 36324126db6d..33f723a9f471 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -841,7 +841,7 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q) napi_gro_receive(&priv->napi[q], priv->rx_1st_skb); stats->rx_packets++; - stats->rx_bytes += priv->rx_1st_skb->len; + stats->rx_bytes += pkt_len; break; } } @@ -3020,6 +3020,7 @@ static int __maybe_unused ravb_resume(struct device *dev) ret = ravb_open(ndev); if (ret < 0) return ret; + ravb_set_rx_mode(ndev); netif_device_attach(ndev); } diff --git a/drivers/net/ethernet/sfc/ef100_netdev.c b/drivers/net/ethernet/sfc/ef100_netdev.c index 88fa29572e23..ddcc325ed570 100644 --- a/drivers/net/ethernet/sfc/ef100_netdev.c +++ b/drivers/net/ethernet/sfc/ef100_netdev.c @@ -218,6 +218,7 @@ netdev_tx_t __ef100_hard_start_xmit(struct sk_buff *skb, skb->len, skb->data_len, channel->channel); if (!efx->n_channels || !efx->n_tx_channels || !channel) { netif_stop_queue(net_dev); + dev_kfree_skb_any(skb); goto err; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index c25bfecb4a2d..e5cfde1cbd5c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -748,6 +748,8 @@ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex, if (fc & FLOW_RX) { pr_debug("\tReceive Flow-Control ON\n"); flow |= GMAC_RX_FLOW_CTRL_RFE; + } else { + pr_debug("\tReceive Flow-Control OFF\n"); } writel(flow, ioaddr + GMAC_RX_FLOW_CTRL); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6b43da78cdf0..23ec0a9e396c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1061,8 +1061,16 @@ static void stmmac_mac_link_up(struct phylink_config *config, ctrl |= priv->hw->link.duplex; /* Flow Control operation */ - if (tx_pause && rx_pause) - stmmac_mac_flow_ctrl(priv, duplex); + if (rx_pause && tx_pause) + priv->flow_ctrl = FLOW_AUTO; + else if (rx_pause && !tx_pause) + priv->flow_ctrl = FLOW_RX; + else if (!rx_pause && tx_pause) + priv->flow_ctrl = FLOW_TX; + else + priv->flow_ctrl = FLOW_OFF; + + stmmac_mac_flow_ctrl(priv, duplex); if (ctrl != old_ctrl) writel(ctrl, priv->ioaddr + MAC_CTRL_REG); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 50f6b4a14be4..eb6d9cd8e93f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -108,10 +108,10 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev) axi->axi_lpi_en = of_property_read_bool(np, "snps,lpi_en"); axi->axi_xit_frm = of_property_read_bool(np, "snps,xit_frm"); - axi->axi_kbbe = of_property_read_bool(np, "snps,axi_kbbe"); - axi->axi_fb = of_property_read_bool(np, "snps,axi_fb"); - axi->axi_mb = of_property_read_bool(np, "snps,axi_mb"); - axi->axi_rb = of_property_read_bool(np, "snps,axi_rb"); + axi->axi_kbbe = of_property_read_bool(np, "snps,kbbe"); + axi->axi_fb = of_property_read_bool(np, "snps,fb"); + axi->axi_mb = of_property_read_bool(np, "snps,mb"); + axi->axi_rb = of_property_read_bool(np, "snps,rb"); if (of_property_read_u32(np, "snps,wr_osr_lmt", &axi->axi_wr_osr_lmt)) axi->axi_wr_osr_lmt = 1; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index c50b137f92d7..b3b0ba842541 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1454,7 +1454,7 @@ static void am65_cpsw_nuss_mac_link_up(struct phylink_config *config, struct phy if (speed == SPEED_1000) mac_control |= CPSW_SL_CTL_GIG; - if (speed == SPEED_10 && interface == PHY_INTERFACE_MODE_RGMII) + if (speed == SPEED_10 && phy_interface_mode_is_rgmii(interface)) /* Can be used with in band mode only */ mac_control |= CPSW_SL_CTL_EXT_EN; if (speed == SPEED_100 && interface == PHY_INTERFACE_MODE_RMII) @@ -2082,7 +2082,7 @@ static void am65_cpsw_nuss_cleanup_ndev(struct am65_cpsw_common *common) for (i = 0; i < common->port_num; i++) { port = &common->ports[i]; - if (port->ndev) + if (port->ndev && port->ndev->reg_state == NETREG_REGISTERED) unregister_netdev(port->ndev); } } diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 450b16ad40a4..e1a569b99e4a 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -885,7 +885,7 @@ static int ca8210_spi_transfer( dev_dbg(&spi->dev, "%s called\n", __func__); - cas_ctl = kmalloc(sizeof(*cas_ctl), GFP_ATOMIC); + cas_ctl = kzalloc(sizeof(*cas_ctl), GFP_ATOMIC); if (!cas_ctl) return -ENOMEM; diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c index c69b87d3837d..edc769daad07 100644 --- a/drivers/net/ieee802154/cc2520.c +++ b/drivers/net/ieee802154/cc2520.c @@ -970,7 +970,7 @@ static int cc2520_hw_init(struct cc2520_private *priv) if (timeout-- <= 0) { dev_err(&priv->spi->dev, "oscillator start failed!\n"); - return ret; + return -ETIMEDOUT; } udelay(1); } while (!(status & CC2520_STATUS_XOSC32M_STABLE)); diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index de94921cbef9..025e0c19ec25 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -98,6 +98,7 @@ struct ipvl_port { struct sk_buff_head backlog; int count; struct ida ida; + netdevice_tracker dev_tracker; }; struct ipvl_skb_cb { diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 54c94a69c2bb..796a38f9d7b2 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -83,6 +83,7 @@ static int ipvlan_port_create(struct net_device *dev) if (err) goto err; + netdev_hold(dev, &port->dev_tracker, GFP_KERNEL); return 0; err: @@ -95,6 +96,7 @@ static void ipvlan_port_destroy(struct net_device *dev) struct ipvl_port *port = ipvlan_port_get_rtnl(dev); struct sk_buff *skb; + netdev_put(dev, &port->dev_tracker); if (port->mode == IPVLAN_MODE_L3S) ipvlan_l3s_unregister(port); netdev_rx_handler_unregister(dev); diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 14e8d04cb434..2e9742952c4e 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -211,7 +211,7 @@ static __net_init int loopback_net_init(struct net *net) int err; err = -ENOMEM; - dev = alloc_netdev(0, "lo", NET_NAME_UNKNOWN, loopback_setup); + dev = alloc_netdev(0, "lo", NET_NAME_PREDICTABLE, loopback_setup); if (!dev) goto out; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 85376d2f24ca..2fbac51b9b19 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3698,6 +3698,7 @@ static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = { [IFLA_MACSEC_SCB] = { .type = NLA_U8 }, [IFLA_MACSEC_REPLAY_PROTECT] = { .type = NLA_U8 }, [IFLA_MACSEC_VALIDATION] = { .type = NLA_U8 }, + [IFLA_MACSEC_OFFLOAD] = { .type = NLA_U8 }, }; static void macsec_free_netdev(struct net_device *dev) @@ -3835,7 +3836,6 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; - int ret; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c index 689e728345ce..b782c35c4ac1 100644 --- a/drivers/net/mdio/fwnode_mdio.c +++ b/drivers/net/mdio/fwnode_mdio.c @@ -98,6 +98,7 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio, */ rc = phy_device_register(phy); if (rc) { + device_set_node(&phy->mdio.dev, NULL); fwnode_handle_put(child); return rc; } @@ -148,12 +149,13 @@ int fwnode_mdiobus_register_phy(struct mii_bus *bus, /* Associate the fwnode with the device structure so it * can be looked up later. */ - phy->mdio.dev.fwnode = child; + phy->mdio.dev.fwnode = fwnode_handle_get(child); /* All data is now stored in the phy struct, so register it */ rc = phy_device_register(phy); if (rc) { - fwnode_handle_put(phy->mdio.dev.fwnode); + phy->mdio.dev.fwnode = NULL; + fwnode_handle_put(child); goto clean_phy; } } else if (is_of_node(child)) { diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c index 796e9c7857d0..510822d6d0d9 100644 --- a/drivers/net/mdio/of_mdio.c +++ b/drivers/net/mdio/of_mdio.c @@ -68,8 +68,9 @@ static int of_mdiobus_register_device(struct mii_bus *mdio, /* All data is now stored in the mdiodev struct; register it. */ rc = mdio_device_register(mdiodev); if (rc) { + device_set_node(&mdiodev->dev, NULL); + fwnode_handle_put(fwnode); mdio_device_free(mdiodev); - of_node_put(child); return rc; } diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 464d88ca8ab0..a4abea921046 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -484,7 +484,14 @@ static int __init ntb_netdev_init_module(void) rc = ntb_transport_register_client_dev(KBUILD_MODNAME); if (rc) return rc; - return ntb_transport_register_client(&ntb_netdev_client); + + rc = ntb_transport_register_client(&ntb_netdev_client); + if (rc) { + ntb_transport_unregister_client_dev(KBUILD_MODNAME); + return rc; + } + + return 0; } module_init(ntb_netdev_init_module); diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 349b7b1dbbf2..d49965907561 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -870,8 +870,10 @@ static int at803x_probe(struct phy_device *phydev) .wolopts = 0, }; - if (ccr < 0) + if (ccr < 0) { + ret = ccr; goto err; + } mode_cfg = ccr & AT803X_MODE_CFG_MASK; switch (mode_cfg) { diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c index 250742ffdfd9..044828d081d2 100644 --- a/drivers/net/phy/mdio_device.c +++ b/drivers/net/phy/mdio_device.c @@ -21,6 +21,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/unistd.h> +#include <linux/property.h> void mdio_device_free(struct mdio_device *mdiodev) { @@ -30,6 +31,7 @@ EXPORT_SYMBOL(mdio_device_free); static void mdio_device_release(struct device *dev) { + fwnode_handle_put(dev->fwnode); kfree(to_mdio_device(dev)); } diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c index 24bae27eedef..cae24091fb6f 100644 --- a/drivers/net/phy/mxl-gpy.c +++ b/drivers/net/phy/mxl-gpy.c @@ -9,6 +9,7 @@ #include <linux/module.h> #include <linux/bitfield.h> #include <linux/hwmon.h> +#include <linux/mutex.h> #include <linux/phy.h> #include <linux/polynomial.h> #include <linux/netdevice.h> @@ -70,6 +71,14 @@ #define VPSPEC1_TEMP_STA 0x0E #define VPSPEC1_TEMP_STA_DATA GENMASK(9, 0) +/* Mailbox */ +#define VSPEC1_MBOX_DATA 0x5 +#define VSPEC1_MBOX_ADDRLO 0x6 +#define VSPEC1_MBOX_CMD 0x7 +#define VSPEC1_MBOX_CMD_ADDRHI GENMASK(7, 0) +#define VSPEC1_MBOX_CMD_RD (0 << 8) +#define VSPEC1_MBOX_CMD_READY BIT(15) + /* WoL */ #define VPSPEC2_WOL_CTL 0x0E06 #define VPSPEC2_WOL_AD01 0x0E08 @@ -77,7 +86,13 @@ #define VPSPEC2_WOL_AD45 0x0E0A #define WOL_EN BIT(0) +/* Internal registers, access via mbox */ +#define REG_GPIO0_OUT 0xd3ce00 + struct gpy_priv { + /* serialize mailbox acesses */ + struct mutex mbox_lock; + u8 fw_major; u8 fw_minor; }; @@ -187,6 +202,45 @@ static int gpy_hwmon_register(struct phy_device *phydev) } #endif +static int gpy_mbox_read(struct phy_device *phydev, u32 addr) +{ + struct gpy_priv *priv = phydev->priv; + int val, ret; + u16 cmd; + + mutex_lock(&priv->mbox_lock); + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_MBOX_ADDRLO, + addr); + if (ret) + goto out; + + cmd = VSPEC1_MBOX_CMD_RD; + cmd |= FIELD_PREP(VSPEC1_MBOX_CMD_ADDRHI, addr >> 16); + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_MBOX_CMD, cmd); + if (ret) + goto out; + + /* The mbox read is used in the interrupt workaround. It was observed + * that a read might take up to 2.5ms. This is also the time for which + * the interrupt line is stuck low. To be on the safe side, poll the + * ready bit for 10ms. + */ + ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1, + VSPEC1_MBOX_CMD, val, + (val & VSPEC1_MBOX_CMD_READY), + 500, 10000, false); + if (ret) + goto out; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_MBOX_DATA); + +out: + mutex_unlock(&priv->mbox_lock); + return ret; +} + static int gpy_config_init(struct phy_device *phydev) { int ret; @@ -201,6 +255,13 @@ static int gpy_config_init(struct phy_device *phydev) return ret < 0 ? ret : 0; } +static bool gpy_has_broken_mdint(struct phy_device *phydev) +{ + /* At least these PHYs are known to have broken interrupt handling */ + return phydev->drv->phy_id == PHY_ID_GPY215B || + phydev->drv->phy_id == PHY_ID_GPY215C; +} + static int gpy_probe(struct phy_device *phydev) { struct device *dev = &phydev->mdio.dev; @@ -218,6 +279,7 @@ static int gpy_probe(struct phy_device *phydev) if (!priv) return -ENOMEM; phydev->priv = priv; + mutex_init(&priv->mbox_lock); fw_version = phy_read(phydev, PHY_FWV); if (fw_version < 0) @@ -492,6 +554,29 @@ static irqreturn_t gpy_handle_interrupt(struct phy_device *phydev) if (!(reg & PHY_IMASK_MASK)) return IRQ_NONE; + /* The PHY might leave the interrupt line asserted even after PHY_ISTAT + * is read. To avoid interrupt storms, delay the interrupt handling as + * long as the PHY drives the interrupt line. An internal bus read will + * stall as long as the interrupt line is asserted, thus just read a + * random register here. + * Because we cannot access the internal bus at all while the interrupt + * is driven by the PHY, there is no way to make the interrupt line + * unstuck (e.g. by changing the pinmux to GPIO input) during that time + * frame. Therefore, polling is the best we can do and won't do any more + * harm. + * It was observed that this bug happens on link state and link speed + * changes on a GPY215B and GYP215C independent of the firmware version + * (which doesn't mean that this list is exhaustive). + */ + if (gpy_has_broken_mdint(phydev) && + (reg & (PHY_IMASK_LSTC | PHY_IMASK_LSPC))) { + reg = gpy_mbox_read(phydev, REG_GPIO0_OUT); + if (reg < 0) { + phy_error(phydev); + return IRQ_NONE; + } + } + phy_trigger_machine(phydev); return IRQ_HANDLED; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 57849ac0384e..8cff61dbc4b5 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -217,6 +217,7 @@ static void phy_mdio_device_free(struct mdio_device *mdiodev) static void phy_device_release(struct device *dev) { + fwnode_handle_put(dev->fwnode); kfree(to_phy_device(dev)); } @@ -1520,6 +1521,7 @@ error: error_module_put: module_put(d->driver->owner); + d->driver = NULL; error_put_device: put_device(d); if (ndev_owner != bus->owner) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 6547b6cc6cbe..2805b04d6402 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1603,19 +1603,29 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, linkmode_copy(supported, phy->supported); linkmode_copy(config.advertising, phy->advertising); - /* Clause 45 PHYs switch their Serdes lane between several different - * modes, normally 10GBASE-R, SGMII. Some use 2500BASE-X for 2.5G - * speeds. We really need to know which interface modes the PHY and - * MAC supports to properly work out which linkmodes can be supported. + /* Check whether we would use rate matching for the proposed interface + * mode. */ - if (phy->is_c45 && + config.rate_matching = phy_get_rate_matching(phy, interface); + + /* Clause 45 PHYs may switch their Serdes lane between, e.g. 10GBASE-R, + * 5GBASE-R, 2500BASE-X and SGMII if they are not using rate matching. + * For some interface modes (e.g. RXAUI, XAUI and USXGMII) switching + * their Serdes is either unnecessary or not reasonable. + * + * For these which switch interface modes, we really need to know which + * interface modes the PHY supports to properly work out which ethtool + * linkmodes can be supported. For now, as a work-around, we validate + * against all interface modes, which may lead to more ethtool link + * modes being advertised than are actually supported. + */ + if (phy->is_c45 && config.rate_matching == RATE_MATCH_NONE && interface != PHY_INTERFACE_MODE_RXAUI && interface != PHY_INTERFACE_MODE_XAUI && interface != PHY_INTERFACE_MODE_USXGMII) config.interface = PHY_INTERFACE_MODE_NA; else config.interface = interface; - config.rate_matching = phy_get_rate_matching(phy, config.interface); ret = phylink_validate(pl, supported, &config); if (ret) { diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c index c8791e9b451d..40ce8abe6999 100644 --- a/drivers/net/plip/plip.c +++ b/drivers/net/plip/plip.c @@ -450,12 +450,12 @@ plip_bh_timeout_error(struct net_device *dev, struct net_local *nl, } rcv->state = PLIP_PK_DONE; if (rcv->skb) { - kfree_skb(rcv->skb); + dev_kfree_skb_irq(rcv->skb); rcv->skb = NULL; } snd->state = PLIP_PK_DONE; if (snd->skb) { - dev_kfree_skb(snd->skb); + dev_consume_skb_irq(snd->skb); snd->skb = NULL; } spin_unlock_irq(&nl->lock); diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c index a52ee2bf5575..6312f67f260e 100644 --- a/drivers/net/thunderbolt.c +++ b/drivers/net/thunderbolt.c @@ -914,6 +914,7 @@ static int tbnet_open(struct net_device *dev) eof_mask, tbnet_start_poll, net); if (!ring) { netdev_err(dev, "failed to allocate Rx ring\n"); + tb_xdomain_release_out_hopid(xd, hopid); tb_ring_free(net->tx_ring.ring); net->tx_ring.ring = NULL; return -ENOMEM; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 7a3ab3427369..24001112c323 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -686,7 +686,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean) if (tun) xdp_rxq_info_unreg(&tfile->xdp_rxq); ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); - sock_put(&tfile->sk); } } @@ -702,6 +701,9 @@ static void tun_detach(struct tun_file *tfile, bool clean) if (dev) netdev_state_change(dev); rtnl_unlock(); + + if (clean) + sock_put(&tfile->sk); } static void tun_detach_all(struct net_device *dev) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 8d5cbda33f66..0897fdb6254b 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1915,6 +1915,7 @@ static const struct driver_info cdc_ncm_zlp_info = { .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, .tx_fixup = cdc_ncm_tx_fixup, + .set_rx_mode = usbnet_cdc_update_filter, }; /* Same as cdc_ncm_info, but with FLAG_WWAN */ diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index afd6faa4c2ec..554d4e2a84a4 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1423,6 +1423,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */ {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/ {QMI_FIXED_INTF(0x2692, 0x9025, 4)}, /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */ + {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7106932c6f88..86e52454b5b5 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3949,12 +3949,11 @@ static int virtnet_probe(struct virtio_device *vdev) return 0; free_unregister_netdev: - virtio_reset_device(vdev); - unregister_netdev(dev); free_failover: net_failover_destroy(vi->failover); free_vqs: + virtio_reset_device(vdev); cancel_delayed_work_sync(&vi->refill); free_receive_page_frags(vi); virtnet_del_vqs(vi); diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index d3e7b27eb933..6f1e560fb15c 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -75,8 +75,14 @@ vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter) for (i = 0; i < adapter->intr.num_intrs; i++) vmxnet3_enable_intr(adapter, i); - adapter->shared->devRead.intrConf.intrCtrl &= + if (!VMXNET3_VERSION_GE_6(adapter) || + !adapter->queuesExtEnabled) { + adapter->shared->devRead.intrConf.intrCtrl &= + cpu_to_le32(~VMXNET3_IC_DISABLE_ALL); + } else { + adapter->shared->devReadExt.intrConfExt.intrCtrl &= cpu_to_le32(~VMXNET3_IC_DISABLE_ALL); + } } @@ -85,8 +91,14 @@ vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter) { int i; - adapter->shared->devRead.intrConf.intrCtrl |= + if (!VMXNET3_VERSION_GE_6(adapter) || + !adapter->queuesExtEnabled) { + adapter->shared->devRead.intrConf.intrCtrl |= + cpu_to_le32(VMXNET3_IC_DISABLE_ALL); + } else { + adapter->shared->devReadExt.intrConfExt.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL); + } for (i = 0; i < adapter->intr.num_intrs; i++) vmxnet3_disable_intr(adapter, i); } @@ -1396,6 +1408,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, }; u32 num_pkts = 0; bool skip_page_frags = false; + bool encap_lro = false; struct Vmxnet3_RxCompDesc *rcd; struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx; u16 segCnt = 0, mss = 0; @@ -1556,13 +1569,18 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, if (VMXNET3_VERSION_GE_2(adapter) && rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) { struct Vmxnet3_RxCompDescExt *rcdlro; + union Vmxnet3_GenericDesc *gdesc; + rcdlro = (struct Vmxnet3_RxCompDescExt *)rcd; + gdesc = (union Vmxnet3_GenericDesc *)rcd; segCnt = rcdlro->segCnt; WARN_ON_ONCE(segCnt == 0); mss = rcdlro->mss; if (unlikely(segCnt <= 1)) segCnt = 0; + encap_lro = (le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)); } else { segCnt = 0; } @@ -1630,7 +1648,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, vmxnet3_rx_csum(adapter, skb, (union Vmxnet3_GenericDesc *)rcd); skb->protocol = eth_type_trans(skb, adapter->netdev); - if (!rcd->tcp || + if ((!rcd->tcp && !encap_lro) || !(adapter->netdev->features & NETIF_F_LRO)) goto not_lro; @@ -1639,7 +1657,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, SKB_GSO_TCPV4 : SKB_GSO_TCPV6; skb_shinfo(skb)->gso_size = mss; skb_shinfo(skb)->gso_segs = segCnt; - } else if (segCnt != 0 || skb->len > mtu) { + } else if ((segCnt != 0 || skb->len > mtu) && !encap_lro) { u32 hlen; hlen = vmxnet3_get_hdr_len(adapter, skb, @@ -1668,6 +1686,7 @@ not_lro: napi_gro_receive(&rq->napi, skb); ctx->skb = NULL; + encap_lro = false; num_pkts++; } diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index 9bbfff803357..b545d93c6e37 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -959,30 +959,51 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch) return; while (index + sizeof(*e) <= len) { + u16 attr_size; + e = (struct wilc_attr_entry *)&buf[index]; - if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST) + attr_size = le16_to_cpu(e->attr_len); + + if (index + sizeof(*e) + attr_size > len) + return; + + if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST && + attr_size >= (sizeof(struct wilc_attr_ch_list) - sizeof(*e))) ch_list_idx = index; - else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL) + else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL && + attr_size == (sizeof(struct wilc_attr_oper_ch) - sizeof(*e))) op_ch_idx = index; + if (ch_list_idx && op_ch_idx) break; - index += le16_to_cpu(e->attr_len) + sizeof(*e); + + index += sizeof(*e) + attr_size; } if (ch_list_idx) { - u16 attr_size; - struct wilc_ch_list_elem *e; - int i; + u16 elem_size; ch_list = (struct wilc_attr_ch_list *)&buf[ch_list_idx]; - attr_size = le16_to_cpu(ch_list->attr_len); - for (i = 0; i < attr_size;) { + /* the number of bytes following the final 'elem' member */ + elem_size = le16_to_cpu(ch_list->attr_len) - + (sizeof(*ch_list) - sizeof(struct wilc_attr_entry)); + for (unsigned int i = 0; i < elem_size;) { + struct wilc_ch_list_elem *e; + e = (struct wilc_ch_list_elem *)(ch_list->elem + i); + + i += sizeof(*e); + if (i > elem_size) + break; + + i += e->no_of_channels; + if (i > elem_size) + break; + if (e->op_class == WILC_WLAN_OPERATING_CLASS_2_4GHZ) { memset(e->ch_list, sta_ch, e->no_of_channels); break; } - i += e->no_of_channels; } } diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index eb1d1ba3a443..67df8221b5ae 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -482,14 +482,25 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies->data, ies->len); if (rsn_ie) { + int rsn_ie_len = sizeof(struct element) + rsn_ie[1]; int offset = 8; - param->mode_802_11i = 2; - param->rsn_found = true; /* extract RSN capabilities */ - offset += (rsn_ie[offset] * 4) + 2; - offset += (rsn_ie[offset] * 4) + 2; - memcpy(param->rsn_cap, &rsn_ie[offset], 2); + if (offset < rsn_ie_len) { + /* skip over pairwise suites */ + offset += (rsn_ie[offset] * 4) + 2; + + if (offset < rsn_ie_len) { + /* skip over authentication suites */ + offset += (rsn_ie[offset] * 4) + 2; + + if (offset + 1 < rsn_ie_len) { + param->mode_802_11i = 2; + param->rsn_found = true; + memcpy(param->rsn_cap, &rsn_ie[offset], 2); + } + } + } } if (param->rsn_found) { diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux.c b/drivers/net/wwan/iosm/iosm_ipc_mux.c index 9c7a9a2a1f25..fc928b298a98 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mux.c @@ -332,6 +332,7 @@ struct iosm_mux *ipc_mux_init(struct ipc_mux_config *mux_cfg, if (!ipc_mux->ul_adb.pp_qlt[i]) { for (j = i - 1; j >= 0; j--) kfree(ipc_mux->ul_adb.pp_qlt[j]); + kfree(ipc_mux); return NULL; } } diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c index d41e373f9c0a..d6b166fc5c0e 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c @@ -365,7 +365,8 @@ static void ipc_mux_dl_cmd_decode(struct iosm_mux *ipc_mux, struct sk_buff *skb) /* Pass the DL packet to the netif layer. */ static int ipc_mux_net_receive(struct iosm_mux *ipc_mux, int if_id, struct iosm_wwan *wwan, u32 offset, - u8 service_class, struct sk_buff *skb) + u8 service_class, struct sk_buff *skb, + u32 pkt_len) { struct sk_buff *dest_skb = skb_clone(skb, GFP_ATOMIC); @@ -373,7 +374,7 @@ static int ipc_mux_net_receive(struct iosm_mux *ipc_mux, int if_id, return -ENOMEM; skb_pull(dest_skb, offset); - skb_set_tail_pointer(dest_skb, dest_skb->len); + skb_trim(dest_skb, pkt_len); /* Pass the packet to the netif layer. */ dest_skb->priority = service_class; @@ -429,7 +430,7 @@ static void ipc_mux_dl_fcth_decode(struct iosm_mux *ipc_mux, static void ipc_mux_dl_adgh_decode(struct iosm_mux *ipc_mux, struct sk_buff *skb) { - u32 pad_len, packet_offset; + u32 pad_len, packet_offset, adgh_len; struct iosm_wwan *wwan; struct mux_adgh *adgh; u8 *block = skb->data; @@ -470,10 +471,12 @@ static void ipc_mux_dl_adgh_decode(struct iosm_mux *ipc_mux, packet_offset = sizeof(*adgh) + pad_len; if_id += ipc_mux->wwan_q_offset; + adgh_len = le16_to_cpu(adgh->length); /* Pass the packet to the netif layer */ rc = ipc_mux_net_receive(ipc_mux, if_id, wwan, packet_offset, - adgh->service_class, skb); + adgh->service_class, skb, + adgh_len - packet_offset); if (rc) { dev_err(ipc_mux->dev, "mux adgh decoding error"); return; @@ -547,7 +550,7 @@ static int mux_dl_process_dg(struct iosm_mux *ipc_mux, struct mux_adbh *adbh, int if_id, int nr_of_dg) { u32 dl_head_pad_len = ipc_mux->session[if_id].dl_head_pad_len; - u32 packet_offset, i, rc; + u32 packet_offset, i, rc, dg_len; for (i = 0; i < nr_of_dg; i++, dg++) { if (le32_to_cpu(dg->datagram_index) @@ -562,11 +565,12 @@ static int mux_dl_process_dg(struct iosm_mux *ipc_mux, struct mux_adbh *adbh, packet_offset = le32_to_cpu(dg->datagram_index) + dl_head_pad_len; + dg_len = le16_to_cpu(dg->datagram_length); /* Pass the packet to the netif layer. */ rc = ipc_mux_net_receive(ipc_mux, if_id, ipc_mux->wwan, packet_offset, - dg->service_class, - skb); + dg->service_class, skb, + dg_len - dl_head_pad_len); if (rc) goto dg_error; } @@ -1207,10 +1211,9 @@ static int mux_ul_dg_update_tbl_index(struct iosm_mux *ipc_mux, qlth_n_ql_size, ul_list); ipc_mux_ul_adb_finish(ipc_mux); if (ipc_mux_ul_adb_allocate(ipc_mux, adb, &ipc_mux->size_needed, - IOSM_AGGR_MUX_SIG_ADBH)) { - dev_kfree_skb(src_skb); + IOSM_AGGR_MUX_SIG_ADBH)) return -ENOMEM; - } + ipc_mux->size_needed = le32_to_cpu(adb->adbh->block_length); ipc_mux->size_needed += offsetof(struct mux_adth, dg); @@ -1471,8 +1474,7 @@ void ipc_mux_ul_encoded_process(struct iosm_mux *ipc_mux, struct sk_buff *skb) ipc_mux->ul_data_pend_bytes); /* Reset the skb settings. */ - skb->tail = 0; - skb->len = 0; + skb_trim(skb, 0); /* Add the consumed ADB to the free list. */ skb_queue_tail((&ipc_mux->ul_adb.free_list), skb); diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c index d3d34d1c4704..5bf5a93937c9 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_pcie.c +++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c @@ -249,7 +249,7 @@ static enum ipc_pcie_sleep_state ipc_pcie_read_bios_cfg(struct device *dev) if (object->integer.value == 3) sleep_state = IPC_PCIE_D3L2; - kfree(object); + ACPI_FREE(object); default_ret: return sleep_state; diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol.h b/drivers/net/wwan/iosm/iosm_ipc_protocol.h index 9b3a6d86ece7..289397c4ea6c 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_protocol.h +++ b/drivers/net/wwan/iosm/iosm_ipc_protocol.h @@ -122,7 +122,7 @@ struct iosm_protocol { struct iosm_imem *imem; struct ipc_rsp *rsp_ring[IPC_MEM_MSG_ENTRIES]; struct device *dev; - phys_addr_t phy_ap_shm; + dma_addr_t phy_ap_shm; u32 old_msg_tail; }; diff --git a/drivers/net/wwan/t7xx/t7xx_modem_ops.c b/drivers/net/wwan/t7xx/t7xx_modem_ops.c index 3458af31e864..7d0f5e4f0a78 100644 --- a/drivers/net/wwan/t7xx/t7xx_modem_ops.c +++ b/drivers/net/wwan/t7xx/t7xx_modem_ops.c @@ -165,6 +165,8 @@ static int t7xx_acpi_reset(struct t7xx_pci_dev *t7xx_dev, char *fn_name) return -EFAULT; } + kfree(buffer.pointer); + #endif return 0; } diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 1545cbee77a4..3dbfc8a6924e 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -386,7 +386,7 @@ int xenvif_dealloc_kthread(void *data); irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread); -void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); +bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); void xenvif_carrier_on(struct xenvif *vif); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 650fa180220f..f3f2c07423a6 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -254,14 +254,16 @@ xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) skb_clear_hash(skb); - xenvif_rx_queue_tail(queue, skb); + if (!xenvif_rx_queue_tail(queue, skb)) + goto drop; + xenvif_kick_thread(queue); return NETDEV_TX_OK; drop: vif->dev->stats.tx_dropped++; - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 3d2081bbbc86..bf627af723bf 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -332,10 +332,13 @@ static int xenvif_count_requests(struct xenvif_queue *queue, struct xenvif_tx_cb { - u16 pending_idx; + u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1]; + u8 copy_count; }; #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb) +#define copy_pending_idx(skb, i) (XENVIF_TX_CB(skb)->copy_pending_idx[i]) +#define copy_count(skb) (XENVIF_TX_CB(skb)->copy_count) static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue, u16 pending_idx, @@ -370,31 +373,93 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size) return skb; } -static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue, - struct sk_buff *skb, - struct xen_netif_tx_request *txp, - struct gnttab_map_grant_ref *gop, - unsigned int frag_overflow, - struct sk_buff *nskb) +static void xenvif_get_requests(struct xenvif_queue *queue, + struct sk_buff *skb, + struct xen_netif_tx_request *first, + struct xen_netif_tx_request *txfrags, + unsigned *copy_ops, + unsigned *map_ops, + unsigned int frag_overflow, + struct sk_buff *nskb, + unsigned int extra_count, + unsigned int data_len) { struct skb_shared_info *shinfo = skb_shinfo(skb); skb_frag_t *frags = shinfo->frags; - u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; - int start; + u16 pending_idx; pending_ring_idx_t index; unsigned int nr_slots; + struct gnttab_copy *cop = queue->tx_copy_ops + *copy_ops; + struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops; + struct xen_netif_tx_request *txp = first; + + nr_slots = shinfo->nr_frags + 1; + + copy_count(skb) = 0; + + /* Create copy ops for exactly data_len bytes into the skb head. */ + __skb_put(skb, data_len); + while (data_len > 0) { + int amount = data_len > txp->size ? txp->size : data_len; + + cop->source.u.ref = txp->gref; + cop->source.domid = queue->vif->domid; + cop->source.offset = txp->offset; + + cop->dest.domid = DOMID_SELF; + cop->dest.offset = (offset_in_page(skb->data + + skb_headlen(skb) - + data_len)) & ~XEN_PAGE_MASK; + cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb) + - data_len); + + cop->len = amount; + cop->flags = GNTCOPY_source_gref; - nr_slots = shinfo->nr_frags; + index = pending_index(queue->pending_cons); + pending_idx = queue->pending_ring[index]; + callback_param(queue, pending_idx).ctx = NULL; + copy_pending_idx(skb, copy_count(skb)) = pending_idx; + copy_count(skb)++; + + cop++; + data_len -= amount; - /* Skip first skb fragment if it is on same page as header fragment. */ - start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); + if (amount == txp->size) { + /* The copy op covered the full tx_request */ + + memcpy(&queue->pending_tx_info[pending_idx].req, + txp, sizeof(*txp)); + queue->pending_tx_info[pending_idx].extra_count = + (txp == first) ? extra_count : 0; + + if (txp == first) + txp = txfrags; + else + txp++; + queue->pending_cons++; + nr_slots--; + } else { + /* The copy op partially covered the tx_request. + * The remainder will be mapped. + */ + txp->offset += amount; + txp->size -= amount; + } + } - for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots; - shinfo->nr_frags++, txp++, gop++) { + for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; + shinfo->nr_frags++, gop++) { index = pending_index(queue->pending_cons++); pending_idx = queue->pending_ring[index]; - xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop); + xenvif_tx_create_map_op(queue, pending_idx, txp, + txp == first ? extra_count : 0, gop); frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); + + if (txp == first) + txp = txfrags; + else + txp++; } if (frag_overflow) { @@ -415,7 +480,8 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *que skb_shinfo(skb)->frag_list = nskb; } - return gop; + (*copy_ops) = cop - queue->tx_copy_ops; + (*map_ops) = gop - queue->tx_map_ops; } static inline void xenvif_grant_handle_set(struct xenvif_queue *queue, @@ -451,7 +517,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, struct gnttab_copy **gopp_copy) { struct gnttab_map_grant_ref *gop_map = *gopp_map; - u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; + u16 pending_idx; /* This always points to the shinfo of the skb being checked, which * could be either the first or the one on the frag_list */ @@ -462,24 +528,37 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, struct skb_shared_info *first_shinfo = NULL; int nr_frags = shinfo->nr_frags; const bool sharedslot = nr_frags && - frag_get_pending_idx(&shinfo->frags[0]) == pending_idx; - int i, err; + frag_get_pending_idx(&shinfo->frags[0]) == + copy_pending_idx(skb, copy_count(skb) - 1); + int i, err = 0; - /* Check status of header. */ - err = (*gopp_copy)->status; - if (unlikely(err)) { - if (net_ratelimit()) - netdev_dbg(queue->vif->dev, - "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", - (*gopp_copy)->status, - pending_idx, - (*gopp_copy)->source.u.ref); - /* The first frag might still have this slot mapped */ - if (!sharedslot) - xenvif_idx_release(queue, pending_idx, - XEN_NETIF_RSP_ERROR); + for (i = 0; i < copy_count(skb); i++) { + int newerr; + + /* Check status of header. */ + pending_idx = copy_pending_idx(skb, i); + + newerr = (*gopp_copy)->status; + if (likely(!newerr)) { + /* The first frag might still have this slot mapped */ + if (i < copy_count(skb) - 1 || !sharedslot) + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_OKAY); + } else { + err = newerr; + if (net_ratelimit()) + netdev_dbg(queue->vif->dev, + "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", + (*gopp_copy)->status, + pending_idx, + (*gopp_copy)->source.u.ref); + /* The first frag might still have this slot mapped */ + if (i < copy_count(skb) - 1 || !sharedslot) + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_ERROR); + } + (*gopp_copy)++; } - (*gopp_copy)++; check_frags: for (i = 0; i < nr_frags; i++, gop_map++) { @@ -526,14 +605,6 @@ check_frags: if (err) continue; - /* First error: if the header haven't shared a slot with the - * first frag, release it as well. - */ - if (!sharedslot) - xenvif_idx_release(queue, - XENVIF_TX_CB(skb)->pending_idx, - XEN_NETIF_RSP_OKAY); - /* Invalidate preceding fragments of this skb. */ for (j = 0; j < i; j++) { pending_idx = frag_get_pending_idx(&shinfo->frags[j]); @@ -803,7 +874,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, unsigned *copy_ops, unsigned *map_ops) { - struct gnttab_map_grant_ref *gop = queue->tx_map_ops; struct sk_buff *skb, *nskb; int ret; unsigned int frag_overflow; @@ -885,8 +955,12 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, continue; } + data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN) ? + XEN_NETBACK_TX_COPY_LEN : txreq.size; + ret = xenvif_count_requests(queue, &txreq, extra_count, txfrags, work_to_do); + if (unlikely(ret < 0)) break; @@ -912,9 +986,8 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, index = pending_index(queue->pending_cons); pending_idx = queue->pending_ring[index]; - data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN && - ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? - XEN_NETBACK_TX_COPY_LEN : txreq.size; + if (ret >= XEN_NETBK_LEGACY_SLOTS_MAX - 1 && data_len < txreq.size) + data_len = txreq.size; skb = xenvif_alloc_skb(data_len); if (unlikely(skb == NULL)) { @@ -925,8 +998,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, } skb_shinfo(skb)->nr_frags = ret; - if (data_len < txreq.size) - skb_shinfo(skb)->nr_frags++; /* At this point shinfo->nr_frags is in fact the number of * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. */ @@ -988,54 +1059,19 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, type); } - XENVIF_TX_CB(skb)->pending_idx = pending_idx; - - __skb_put(skb, data_len); - queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref; - queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid; - queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset; - - queue->tx_copy_ops[*copy_ops].dest.u.gmfn = - virt_to_gfn(skb->data); - queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF; - queue->tx_copy_ops[*copy_ops].dest.offset = - offset_in_page(skb->data) & ~XEN_PAGE_MASK; - - queue->tx_copy_ops[*copy_ops].len = data_len; - queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref; - - (*copy_ops)++; - - if (data_len < txreq.size) { - frag_set_pending_idx(&skb_shinfo(skb)->frags[0], - pending_idx); - xenvif_tx_create_map_op(queue, pending_idx, &txreq, - extra_count, gop); - gop++; - } else { - frag_set_pending_idx(&skb_shinfo(skb)->frags[0], - INVALID_PENDING_IDX); - memcpy(&queue->pending_tx_info[pending_idx].req, - &txreq, sizeof(txreq)); - queue->pending_tx_info[pending_idx].extra_count = - extra_count; - } - - queue->pending_cons++; - - gop = xenvif_get_requests(queue, skb, txfrags, gop, - frag_overflow, nskb); + xenvif_get_requests(queue, skb, &txreq, txfrags, copy_ops, + map_ops, frag_overflow, nskb, extra_count, + data_len); __skb_queue_tail(&queue->tx_queue, skb); queue->tx.req_cons = idx; - if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) || + if ((*map_ops >= ARRAY_SIZE(queue->tx_map_ops)) || (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops))) break; } - (*map_ops) = gop - queue->tx_map_ops; return; } @@ -1114,9 +1150,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) { struct xen_netif_tx_request *txp; u16 pending_idx; - unsigned data_len; - pending_idx = XENVIF_TX_CB(skb)->pending_idx; + pending_idx = copy_pending_idx(skb, 0); txp = &queue->pending_tx_info[pending_idx].req; /* Check the remap error code. */ @@ -1135,18 +1170,6 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) continue; } - data_len = skb->len; - callback_param(queue, pending_idx).ctx = NULL; - if (data_len < txp->size) { - /* Append the packet payload as a fragment. */ - txp->offset += data_len; - txp->size -= data_len; - } else { - /* Schedule a response immediately. */ - xenvif_idx_release(queue, pending_idx, - XEN_NETIF_RSP_OKAY); - } - if (txp->flags & XEN_NETTXF_csum_blank) skb->ip_summed = CHECKSUM_PARTIAL; else if (txp->flags & XEN_NETTXF_data_validated) @@ -1332,7 +1355,7 @@ static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) /* Called after netfront has transmitted */ int xenvif_tx_action(struct xenvif_queue *queue, int budget) { - unsigned nr_mops, nr_cops = 0; + unsigned nr_mops = 0, nr_cops = 0; int work_done, ret; if (unlikely(!tx_work_todo(queue))) diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index 932762177110..0ba754ebc5ba 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -82,9 +82,10 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) return false; } -void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) +bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) { unsigned long flags; + bool ret = true; spin_lock_irqsave(&queue->rx_queue.lock, flags); @@ -92,8 +93,7 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) struct net_device *dev = queue->vif->dev; netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); - kfree_skb(skb); - queue->vif->dev->stats.rx_dropped++; + ret = false; } else { if (skb_queue_empty(&queue->rx_queue)) xenvif_update_needed_slots(queue, skb); @@ -104,6 +104,8 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) } spin_unlock_irqrestore(&queue->rx_queue.lock, flags); + + return ret; } static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 9af2b027c19c..dc404e05970c 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1862,6 +1862,12 @@ static int netfront_resume(struct xenbus_device *dev) netif_tx_unlock_bh(info->netdev); xennet_disconnect_backend(info); + + rtnl_lock(); + if (info->queues) + xennet_destroy_queues(info); + rtnl_unlock(); + return 0; } diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c index 24436c9e54c9..97600826af69 100644 --- a/drivers/nfc/nfcmrvl/i2c.c +++ b/drivers/nfc/nfcmrvl/i2c.c @@ -112,8 +112,10 @@ static int nfcmrvl_i2c_nci_send(struct nfcmrvl_private *priv, struct nfcmrvl_i2c_drv_data *drv_data = priv->drv_data; int ret; - if (test_bit(NFCMRVL_PHY_ERROR, &priv->flags)) + if (test_bit(NFCMRVL_PHY_ERROR, &priv->flags)) { + kfree_skb(skb); return -EREMOTEIO; + } ret = i2c_master_send(drv_data->i2c, skb->data, skb->len); diff --git a/drivers/nfc/nxp-nci/core.c b/drivers/nfc/nxp-nci/core.c index 580cb6ecffee..66b198663387 100644 --- a/drivers/nfc/nxp-nci/core.c +++ b/drivers/nfc/nxp-nci/core.c @@ -73,11 +73,15 @@ static int nxp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) struct nxp_nci_info *info = nci_get_drvdata(ndev); int r; - if (!info->phy_ops->write) + if (!info->phy_ops->write) { + kfree_skb(skb); return -EOPNOTSUPP; + } - if (info->mode != NXP_NCI_MODE_NCI) + if (info->mode != NXP_NCI_MODE_NCI) { + kfree_skb(skb); return -EINVAL; + } r = info->phy_ops->write(info->phy_id, skb); if (r < 0) { diff --git a/drivers/nfc/s3fwrn5/core.c b/drivers/nfc/s3fwrn5/core.c index 0270e05b68df..aec356880adf 100644 --- a/drivers/nfc/s3fwrn5/core.c +++ b/drivers/nfc/s3fwrn5/core.c @@ -105,6 +105,7 @@ static int s3fwrn5_nci_send(struct nci_dev *ndev, struct sk_buff *skb) mutex_lock(&info->mutex); if (s3fwrn5_get_mode(info) != S3FWRN5_MODE_NCI) { + kfree_skb(skb); mutex_unlock(&info->mutex); return -EINVAL; } diff --git a/drivers/nfc/st-nci/se.c b/drivers/nfc/st-nci/se.c index 7764b1a4c3cf..ec87dd21e054 100644 --- a/drivers/nfc/st-nci/se.c +++ b/drivers/nfc/st-nci/se.c @@ -312,6 +312,8 @@ static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev, int r = 0; struct device *dev = &ndev->nfc_dev->dev; struct nfc_evt_transaction *transaction; + u32 aid_len; + u8 params_len; pr_debug("connectivity gate event: %x\n", event); @@ -325,26 +327,47 @@ static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev, * Description Tag Length * AID 81 5 to 16 * PARAMETERS 82 0 to 255 + * + * The key differences are aid storage length is variably sized + * in the packet, but fixed in nfc_evt_transaction, and that + * the aid_len is u8 in the packet, but u32 in the structure, + * and the tags in the packet are not included in + * nfc_evt_transaction. + * + * size(b): 1 1 5-16 1 1 0-255 + * offset: 0 1 2 aid_len + 2 aid_len + 3 aid_len + 4 + * mem name: aid_tag(M) aid_len aid params_tag(M) params_len params + * example: 0x81 5-16 X 0x82 0-255 X */ - if (skb->len < NFC_MIN_AID_LENGTH + 2 && - skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) + if (skb->len < 2 || skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) return -EPROTO; - transaction = devm_kzalloc(dev, skb->len - 2, GFP_KERNEL); - if (!transaction) - return -ENOMEM; + aid_len = skb->data[1]; - transaction->aid_len = skb->data[1]; - memcpy(transaction->aid, &skb->data[2], transaction->aid_len); + if (skb->len < aid_len + 4 || + aid_len > sizeof(transaction->aid)) + return -EPROTO; - /* Check next byte is PARAMETERS tag (82) */ - if (skb->data[transaction->aid_len + 2] != - NFC_EVT_TRANSACTION_PARAMS_TAG) + params_len = skb->data[aid_len + 3]; + + /* Verify PARAMETERS tag is (82), and final check that there is + * enough space in the packet to read everything. + */ + if (skb->data[aid_len + 2] != NFC_EVT_TRANSACTION_PARAMS_TAG || + skb->len < aid_len + 4 + params_len) return -EPROTO; - transaction->params_len = skb->data[transaction->aid_len + 3]; - memcpy(transaction->params, skb->data + - transaction->aid_len + 4, transaction->params_len); + transaction = devm_kzalloc(dev, sizeof(*transaction) + + params_len, GFP_KERNEL); + if (!transaction) + return -ENOMEM; + + transaction->aid_len = aid_len; + transaction->params_len = params_len; + + memcpy(transaction->aid, &skb->data[2], aid_len); + memcpy(transaction->params, &skb->data[aid_len + 4], + params_len); r = nfc_se_transaction(ndev->nfc_dev, host, transaction); break; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index da55ce45ac70..7e3893d06bab 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3095,10 +3095,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) if (!ctrl->identified) { unsigned int i; - ret = nvme_init_subsystem(ctrl, id); - if (ret) - goto out_free; - /* * Check for quirks. Quirk can depend on firmware version, * so, in principle, the set of quirks present can change @@ -3111,6 +3107,10 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) if (quirk_matches(id, &core_quirks[i])) ctrl->quirks |= core_quirks[i].quirks; } + + ret = nvme_init_subsystem(ctrl, id); + if (ret) + goto out_free; } memcpy(ctrl->subsys->firmware_rev, id->fr, sizeof(ctrl->subsys->firmware_rev)); @@ -4304,7 +4304,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_unlock(&ns->ctrl->subsys->lock); /* guarantee not available in head->list */ - synchronize_rcu(); + synchronize_srcu(&ns->head->srcu); if (!nvme_ns_head_multipath(ns->head)) nvme_cdev_del(&ns->cdev, &ns->cdev_device); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 93e2138a8b42..7e025b8948cb 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -174,11 +174,14 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns) struct nvme_ns_head *head = ns->head; sector_t capacity = get_capacity(head->disk); int node; + int srcu_idx; + srcu_idx = srcu_read_lock(&head->srcu); list_for_each_entry_rcu(ns, &head->list, siblings) { if (capacity != get_capacity(ns->disk)) clear_bit(NVME_NS_READY, &ns->flags); } + srcu_read_unlock(&head->srcu, srcu_idx); for_each_node(node) rcu_assign_pointer(head->current_path[node], NULL); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f4335519399d..488ad7dabeb8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -797,6 +797,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma); if (bv->bv_len > first_prp_len) cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len); + else + cmnd->dptr.prp2 = 0; return BLK_STS_OK; } diff --git a/drivers/nvmem/lan9662-otpc.c b/drivers/nvmem/lan9662-otpc.c index 377bf34c2946..56fc19f092a7 100644 --- a/drivers/nvmem/lan9662-otpc.c +++ b/drivers/nvmem/lan9662-otpc.c @@ -36,7 +36,7 @@ struct lan9662_otp { void __iomem *base; }; -static bool lan9662_otp_wait_flag_clear(void __iomem *reg, u32 flag) +static int lan9662_otp_wait_flag_clear(void __iomem *reg, u32 flag) { u32 val; diff --git a/drivers/nvmem/rmem.c b/drivers/nvmem/rmem.c index b11c3c974b3d..80cb187f1481 100644 --- a/drivers/nvmem/rmem.c +++ b/drivers/nvmem/rmem.c @@ -37,9 +37,9 @@ static int rmem_read(void *context, unsigned int offset, * but as of Dec 2020 this isn't possible on arm64. */ addr = memremap(priv->mem->base, available, MEMREMAP_WB); - if (IS_ERR(addr)) { + if (!addr) { dev_err(priv->dev, "Failed to remap memory region\n"); - return PTR_ERR(addr); + return -ENOMEM; } count = memory_read_from_buffer(val, bytes, &off, addr, available); diff --git a/drivers/of/property.c b/drivers/of/property.c index 967f79b59016..134cfc980b70 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -993,8 +993,10 @@ of_fwnode_get_reference_args(const struct fwnode_handle *fwnode, nargs, index, &of_args); if (ret < 0) return ret; - if (!args) + if (!args) { + of_node_put(of_args.np); return 0; + } args->nargs = of_args.args_count; args->fwnode = of_fwnode_handle(of_args.np); diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index c967ad6e2626..f9cc2e10b676 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -382,6 +382,9 @@ int pci_enable_pasid(struct pci_dev *pdev, int features) if (!pasid) return -EINVAL; + if (!pci_acs_path_enabled(pdev, NULL, PCI_ACS_RR | PCI_ACS_UF)) + return -EINVAL; + pci_read_config_word(pdev, pasid + PCI_PASID_CAP, &supported); supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV; diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index ba64284eaf9f..f1ec8931dfbc 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1613,7 +1613,7 @@ out: } static u32 hv_compose_msi_req_v1( - struct pci_create_interrupt *int_pkt, const struct cpumask *affinity, + struct pci_create_interrupt *int_pkt, u32 slot, u8 vector, u16 vector_count) { int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE; @@ -1632,6 +1632,35 @@ static u32 hv_compose_msi_req_v1( } /* + * The vCPU selected by hv_compose_multi_msi_req_get_cpu() and + * hv_compose_msi_req_get_cpu() is a "dummy" vCPU because the final vCPU to be + * interrupted is specified later in hv_irq_unmask() and communicated to Hyper-V + * via the HVCALL_RETARGET_INTERRUPT hypercall. But the choice of dummy vCPU is + * not irrelevant because Hyper-V chooses the physical CPU to handle the + * interrupts based on the vCPU specified in message sent to the vPCI VSP in + * hv_compose_msi_msg(). Hyper-V's choice of pCPU is not visible to the guest, + * but assigning too many vPCI device interrupts to the same pCPU can cause a + * performance bottleneck. So we spread out the dummy vCPUs to influence Hyper-V + * to spread out the pCPUs that it selects. + * + * For the single-MSI and MSI-X cases, it's OK for hv_compose_msi_req_get_cpu() + * to always return the same dummy vCPU, because a second call to + * hv_compose_msi_msg() contains the "real" vCPU, causing Hyper-V to choose a + * new pCPU for the interrupt. But for the multi-MSI case, the second call to + * hv_compose_msi_msg() exits without sending a message to the vPCI VSP, so the + * original dummy vCPU is used. This dummy vCPU must be round-robin'ed so that + * the pCPUs are spread out. All interrupts for a multi-MSI device end up using + * the same pCPU, even though the vCPUs will be spread out by later calls + * to hv_irq_unmask(), but that is the best we can do now. + * + * With Hyper-V in Nov 2022, the HVCALL_RETARGET_INTERRUPT hypercall does *not* + * cause Hyper-V to reselect the pCPU based on the specified vCPU. Such an + * enhancement is planned for a future version. With that enhancement, the + * dummy vCPU selection won't matter, and interrupts for the same multi-MSI + * device will be spread across multiple pCPUs. + */ + +/* * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten * by subsequent retarget in hv_irq_unmask(). */ @@ -1640,18 +1669,39 @@ static int hv_compose_msi_req_get_cpu(const struct cpumask *affinity) return cpumask_first_and(affinity, cpu_online_mask); } -static u32 hv_compose_msi_req_v2( - struct pci_create_interrupt2 *int_pkt, const struct cpumask *affinity, - u32 slot, u8 vector, u16 vector_count) +/* + * Make sure the dummy vCPU values for multi-MSI don't all point to vCPU0. + */ +static int hv_compose_multi_msi_req_get_cpu(void) { + static DEFINE_SPINLOCK(multi_msi_cpu_lock); + + /* -1 means starting with CPU 0 */ + static int cpu_next = -1; + + unsigned long flags; int cpu; + spin_lock_irqsave(&multi_msi_cpu_lock, flags); + + cpu_next = cpumask_next_wrap(cpu_next, cpu_online_mask, nr_cpu_ids, + false); + cpu = cpu_next; + + spin_unlock_irqrestore(&multi_msi_cpu_lock, flags); + + return cpu; +} + +static u32 hv_compose_msi_req_v2( + struct pci_create_interrupt2 *int_pkt, int cpu, + u32 slot, u8 vector, u16 vector_count) +{ int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2; int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; int_pkt->int_desc.vector_count = vector_count; int_pkt->int_desc.delivery_mode = DELIVERY_MODE; - cpu = hv_compose_msi_req_get_cpu(affinity); int_pkt->int_desc.processor_array[0] = hv_cpu_number_to_vp_number(cpu); int_pkt->int_desc.processor_count = 1; @@ -1660,18 +1710,15 @@ static u32 hv_compose_msi_req_v2( } static u32 hv_compose_msi_req_v3( - struct pci_create_interrupt3 *int_pkt, const struct cpumask *affinity, + struct pci_create_interrupt3 *int_pkt, int cpu, u32 slot, u32 vector, u16 vector_count) { - int cpu; - int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE3; int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; int_pkt->int_desc.reserved = 0; int_pkt->int_desc.vector_count = vector_count; int_pkt->int_desc.delivery_mode = DELIVERY_MODE; - cpu = hv_compose_msi_req_get_cpu(affinity); int_pkt->int_desc.processor_array[0] = hv_cpu_number_to_vp_number(cpu); int_pkt->int_desc.processor_count = 1; @@ -1715,12 +1762,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) struct pci_create_interrupt3 v3; } int_pkts; } __packed ctxt; + bool multi_msi; u64 trans_id; u32 size; int ret; + int cpu; + + msi_desc = irq_data_get_msi_desc(data); + multi_msi = !msi_desc->pci.msi_attrib.is_msix && + msi_desc->nvec_used > 1; /* Reuse the previous allocation */ - if (data->chip_data) { + if (data->chip_data && multi_msi) { int_desc = data->chip_data; msg->address_hi = int_desc->address >> 32; msg->address_lo = int_desc->address & 0xffffffff; @@ -1728,7 +1781,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) return; } - msi_desc = irq_data_get_msi_desc(data); pdev = msi_desc_to_pci_dev(msi_desc); dest = irq_data_get_effective_affinity_mask(data); pbus = pdev->bus; @@ -1738,11 +1790,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) if (!hpdev) goto return_null_message; + /* Free any previous message that might have already been composed. */ + if (data->chip_data && !multi_msi) { + int_desc = data->chip_data; + data->chip_data = NULL; + hv_int_desc_free(hpdev, int_desc); + } + int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC); if (!int_desc) goto drop_reference; - if (!msi_desc->pci.msi_attrib.is_msix && msi_desc->nvec_used > 1) { + if (multi_msi) { /* * If this is not the first MSI of Multi MSI, we already have * a mapping. Can exit early. @@ -1767,9 +1826,11 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) */ vector = 32; vector_count = msi_desc->nvec_used; + cpu = hv_compose_multi_msi_req_get_cpu(); } else { vector = hv_msi_get_int_vector(data); vector_count = 1; + cpu = hv_compose_msi_req_get_cpu(dest); } /* @@ -1785,7 +1846,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) switch (hbus->protocol_version) { case PCI_PROTOCOL_VERSION_1_1: size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1, - dest, hpdev->desc.win_slot.slot, (u8)vector, vector_count); @@ -1794,7 +1854,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) case PCI_PROTOCOL_VERSION_1_2: case PCI_PROTOCOL_VERSION_1_3: size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2, - dest, + cpu, hpdev->desc.win_slot.slot, (u8)vector, vector_count); @@ -1802,7 +1862,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) case PCI_PROTOCOL_VERSION_1_4: size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3, - dest, + cpu, hpdev->desc.win_slot.slot, vector, vector_count); diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 52ecd66ce357..047a8374b4fd 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -436,9 +436,14 @@ static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input) writel(value, padcfg0); } +static int __intel_gpio_get_gpio_mode(u32 value) +{ + return (value & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT; +} + static int intel_gpio_get_gpio_mode(void __iomem *padcfg0) { - return (readl(padcfg0) & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT; + return __intel_gpio_get_gpio_mode(readl(padcfg0)); } static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) @@ -1674,6 +1679,7 @@ EXPORT_SYMBOL_GPL(intel_pinctrl_get_soc_data); static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int pin) { const struct pin_desc *pd = pin_desc_get(pctrl->pctldev, pin); + u32 value; if (!pd || !intel_pad_usable(pctrl, pin)) return false; @@ -1688,6 +1694,25 @@ static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int gpiochip_line_is_irq(&pctrl->chip, intel_pin_to_gpio(pctrl, pin))) return true; + /* + * The firmware on some systems may configure GPIO pins to be + * an interrupt source in so called "direct IRQ" mode. In such + * cases the GPIO controller driver has no idea if those pins + * are being used or not. At the same time, there is a known bug + * in the firmwares that don't restore the pin settings correctly + * after suspend, i.e. by an unknown reason the Rx value becomes + * inverted. + * + * Hence, let's save and restore the pins that are configured + * as GPIOs in the input mode with GPIROUTIOXAPIC bit set. + * + * See https://bugzilla.kernel.org/show_bug.cgi?id=214749. + */ + value = readl(intel_get_padcfg(pctrl, pin, PADCFG0)); + if ((value & PADCFG0_GPIROUTIOXAPIC) && (value & PADCFG0_GPIOTXDIS) && + (__intel_gpio_get_gpio_mode(value) == PADCFG0_PMODE_GPIO)) + return true; + return false; } diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index 65d312967619..27f0a54e12bf 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -303,12 +303,15 @@ static struct irq_chip mtk_eint_irq_chip = { static unsigned int mtk_eint_hw_init(struct mtk_eint *eint) { - void __iomem *reg = eint->base + eint->regs->dom_en; + void __iomem *dom_en = eint->base + eint->regs->dom_en; + void __iomem *mask_set = eint->base + eint->regs->mask_set; unsigned int i; for (i = 0; i < eint->hw->ap_num; i += 32) { - writel(0xffffffff, reg); - reg += 4; + writel(0xffffffff, dom_en); + writel(0xffffffff, mask_set); + dom_en += 4; + mask_set += 4; } return 0; diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 67bec7ea0f8b..414ee6bb8ac9 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -727,7 +727,7 @@ static int pcs_allocate_pin_table(struct pcs_device *pcs) mux_bytes = pcs->width / BITS_PER_BYTE; - if (pcs->bits_per_mux) { + if (pcs->bits_per_mux && pcs->fmask) { pcs->bits_per_pin = fls(pcs->fmask); nr_pins = (pcs->size * BITS_PER_BYTE) / pcs->bits_per_pin; } else { diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c index ef4ae977b8e0..439d282aafd1 100644 --- a/drivers/platform/x86/amd/pmc.c +++ b/drivers/platform/x86/amd/pmc.c @@ -739,8 +739,14 @@ static void amd_pmc_s2idle_prepare(void) static void amd_pmc_s2idle_check(void) { struct amd_pmc_dev *pdev = &pmc; + struct smu_metrics table; int rc; + /* CZN: Ensure that future s0i3 entry attempts at least 10ms passed */ + if (pdev->cpu_id == AMD_CPU_ID_CZN && !get_metrics_table(pdev, &table) && + table.s0i3_last_entry_status) + usleep_range(10000, 20000); + /* Dump the IdleMask before we add to the STB */ amd_pmc_idlemask_read(pdev, pdev->dev, NULL); diff --git a/drivers/power/supply/ab8500_btemp.c b/drivers/power/supply/ab8500_btemp.c index 863fabe05bdc..307ee6f71042 100644 --- a/drivers/power/supply/ab8500_btemp.c +++ b/drivers/power/supply/ab8500_btemp.c @@ -725,7 +725,14 @@ static int ab8500_btemp_probe(struct platform_device *pdev) /* Get thermal zone and ADC */ di->tz = thermal_zone_get_zone_by_name("battery-thermal"); if (IS_ERR(di->tz)) { - return dev_err_probe(dev, PTR_ERR(di->tz), + ret = PTR_ERR(di->tz); + /* + * This usually just means we are probing before the thermal + * zone, so just defer. + */ + if (ret == -ENODEV) + ret = -EPROBE_DEFER; + return dev_err_probe(dev, ret, "failed to get battery thermal zone\n"); } di->bat_ctrl = devm_iio_channel_get(dev, "bat_ctrl"); diff --git a/drivers/power/supply/ip5xxx_power.c b/drivers/power/supply/ip5xxx_power.c index 218e8e689a3f..00221e9c0bfc 100644 --- a/drivers/power/supply/ip5xxx_power.c +++ b/drivers/power/supply/ip5xxx_power.c @@ -352,7 +352,7 @@ static int ip5xxx_battery_get_property(struct power_supply *psy, ret = ip5xxx_battery_read_adc(ip5xxx, IP5XXX_BATIADC_DAT0, IP5XXX_BATIADC_DAT1, &raw); - val->intval = DIV_ROUND_CLOSEST(raw * 745985, 1000); + val->intval = DIV_ROUND_CLOSEST(raw * 149197, 200); return 0; case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT: diff --git a/drivers/power/supply/rk817_charger.c b/drivers/power/supply/rk817_charger.c index 635f051b0821..f20a6ac584cc 100644 --- a/drivers/power/supply/rk817_charger.c +++ b/drivers/power/supply/rk817_charger.c @@ -121,7 +121,7 @@ struct rk817_charger { #define ADC_TO_CHARGE_UAH(adc_value, res_div) \ (adc_value / 3600 * 172 / res_div) -static u8 rk817_chg_cur_to_reg(u32 chg_cur_ma) +static int rk817_chg_cur_to_reg(u32 chg_cur_ma) { if (chg_cur_ma >= 3500) return CHG_3_5A; @@ -864,8 +864,8 @@ static int rk817_battery_init(struct rk817_charger *charger, { struct rk808 *rk808 = charger->rk808; u32 tmp, max_chg_vol_mv, max_chg_cur_ma; - u8 max_chg_vol_reg, chg_term_i_reg, max_chg_cur_reg; - int ret, chg_term_ma; + u8 max_chg_vol_reg, chg_term_i_reg; + int ret, chg_term_ma, max_chg_cur_reg; u8 bulk_reg[2]; /* Get initial plug state */ @@ -1116,14 +1116,12 @@ static int rk817_charger_probe(struct platform_device *pdev) charger->bat_ps = devm_power_supply_register(&pdev->dev, &rk817_bat_desc, &pscfg); - - charger->chg_ps = devm_power_supply_register(&pdev->dev, - &rk817_chg_desc, &pscfg); - - if (IS_ERR(charger->chg_ps)) + if (IS_ERR(charger->bat_ps)) return dev_err_probe(dev, -EINVAL, "Battery failed to probe\n"); + charger->chg_ps = devm_power_supply_register(&pdev->dev, + &rk817_chg_desc, &pscfg); if (IS_ERR(charger->chg_ps)) return dev_err_probe(dev, -EINVAL, "Charger failed to probe\n"); diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index bcccad8f7516..e8c00a884f1f 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5154,6 +5154,7 @@ static void regulator_dev_release(struct device *dev) { struct regulator_dev *rdev = dev_get_drvdata(dev); + debugfs_remove_recursive(rdev->debugfs); kfree(rdev->constraints); of_node_put(rdev->dev.of_node); kfree(rdev); @@ -5644,11 +5645,15 @@ wash: mutex_lock(®ulator_list_mutex); regulator_ena_gpio_free(rdev); mutex_unlock(®ulator_list_mutex); + put_device(&rdev->dev); + rdev = NULL; clean: if (dangling_of_gpiod) gpiod_put(config->ena_gpiod); + if (rdev && rdev->dev.of_node) + of_node_put(rdev->dev.of_node); + kfree(rdev); kfree(config); - put_device(&rdev->dev); rinse: if (dangling_cfg_gpiod) gpiod_put(cfg->ena_gpiod); @@ -5677,7 +5682,6 @@ void regulator_unregister(struct regulator_dev *rdev) mutex_lock(®ulator_list_mutex); - debugfs_remove_recursive(rdev->debugfs); WARN_ON(rdev->open_count); regulator_remove_coupling(rdev); unset_regulator_supplies(rdev); diff --git a/drivers/regulator/rt5759-regulator.c b/drivers/regulator/rt5759-regulator.c index 6b96899eb27e..8488417f4b2c 100644 --- a/drivers/regulator/rt5759-regulator.c +++ b/drivers/regulator/rt5759-regulator.c @@ -243,6 +243,7 @@ static int rt5759_regulator_register(struct rt5759_priv *priv) if (priv->chip_type == CHIP_TYPE_RT5759A) reg_desc->uV_step = RT5759A_STEP_UV; + memset(®_cfg, 0, sizeof(reg_cfg)); reg_cfg.dev = priv->dev; reg_cfg.of_node = np; reg_cfg.init_data = of_get_regulator_init_data(priv->dev, np, reg_desc); diff --git a/drivers/regulator/slg51000-regulator.c b/drivers/regulator/slg51000-regulator.c index 75a941fb3c2b..1b2eee95ad3f 100644 --- a/drivers/regulator/slg51000-regulator.c +++ b/drivers/regulator/slg51000-regulator.c @@ -457,6 +457,8 @@ static int slg51000_i2c_probe(struct i2c_client *client) chip->cs_gpiod = cs_gpiod; } + usleep_range(10000, 11000); + i2c_set_clientdata(client, chip); chip->chip_irq = client->irq; chip->dev = dev; diff --git a/drivers/regulator/twl6030-regulator.c b/drivers/regulator/twl6030-regulator.c index 430265c404d6..f3856750944f 100644 --- a/drivers/regulator/twl6030-regulator.c +++ b/drivers/regulator/twl6030-regulator.c @@ -67,6 +67,7 @@ struct twlreg_info { #define TWL6030_CFG_STATE_SLEEP 0x03 #define TWL6030_CFG_STATE_GRP_SHIFT 5 #define TWL6030_CFG_STATE_APP_SHIFT 2 +#define TWL6030_CFG_STATE_MASK 0x03 #define TWL6030_CFG_STATE_APP_MASK (0x03 << TWL6030_CFG_STATE_APP_SHIFT) #define TWL6030_CFG_STATE_APP(v) (((v) & TWL6030_CFG_STATE_APP_MASK) >>\ TWL6030_CFG_STATE_APP_SHIFT) @@ -128,13 +129,14 @@ static int twl6030reg_is_enabled(struct regulator_dev *rdev) if (grp < 0) return grp; grp &= P1_GRP_6030; + val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); + val = TWL6030_CFG_STATE_APP(val); } else { + val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); + val &= TWL6030_CFG_STATE_MASK; grp = 1; } - val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); - val = TWL6030_CFG_STATE_APP(val); - return grp && (val == TWL6030_CFG_STATE_ON); } @@ -187,7 +189,12 @@ static int twl6030reg_get_status(struct regulator_dev *rdev) val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); - switch (TWL6030_CFG_STATE_APP(val)) { + if (info->features & TWL6032_SUBCLASS) + val &= TWL6030_CFG_STATE_MASK; + else + val = TWL6030_CFG_STATE_APP(val); + + switch (val) { case TWL6030_CFG_STATE_ON: return REGULATOR_STATUS_NORMAL; @@ -530,6 +537,7 @@ static const struct twlreg_info TWL6030_INFO_##label = { \ #define TWL6032_ADJUSTABLE_LDO(label, offset) \ static const struct twlreg_info TWL6032_INFO_##label = { \ .base = offset, \ + .features = TWL6032_SUBCLASS, \ .desc = { \ .name = #label, \ .id = TWL6032_REG_##label, \ @@ -562,6 +570,7 @@ static const struct twlreg_info TWLFIXED_INFO_##label = { \ #define TWL6032_ADJUSTABLE_SMPS(label, offset) \ static const struct twlreg_info TWLSMPS_INFO_##label = { \ .base = offset, \ + .features = TWL6032_SUBCLASS, \ .desc = { \ .name = #label, \ .id = TWL6032_REG_##label, \ diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index cb83f81da416..df17f0f9cb0f 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -1954,7 +1954,7 @@ dasd_copy_pair_show(struct device *dev, break; } } - if (!copy->entry[i].primary) + if (i == DASD_CP_ENTRIES) goto out; /* print all secondary */ diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 662730f3b027..5d0b9991e91a 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -4722,7 +4722,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, struct dasd_device *basedev; struct req_iterator iter; struct dasd_ccw_req *cqr; - unsigned int first_offs; unsigned int trkcount; unsigned long *idaws; unsigned int size; @@ -4756,7 +4755,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, last_trk = (blk_rq_pos(req) + blk_rq_sectors(req) - 1) / DASD_RAW_SECTORS_PER_TRACK; trkcount = last_trk - first_trk + 1; - first_offs = 0; if (rq_data_dir(req) == READ) cmd = DASD_ECKD_CCW_READ_TRACK; @@ -4800,13 +4798,13 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, if (use_prefix) { prefix_LRE(ccw++, data, first_trk, last_trk, cmd, basedev, - startdev, 1, first_offs + 1, trkcount, 0, 0); + startdev, 1, 0, trkcount, 0, 0); } else { define_extent(ccw++, data, first_trk, last_trk, cmd, basedev, 0); ccw[-1].flags |= CCW_FLAG_CC; data += sizeof(struct DE_eckd_data); - locate_record_ext(ccw++, data, first_trk, first_offs + 1, + locate_record_ext(ccw++, data, first_trk, 0, trkcount, cmd, basedev, 0, 0); } @@ -5500,7 +5498,7 @@ dasd_eckd_ioctl(struct dasd_block *block, unsigned int cmd, void __user *argp) * Dump the range of CCWs into 'page' buffer * and return number of printed chars. */ -static int +static void dasd_eckd_dump_ccw_range(struct ccw1 *from, struct ccw1 *to, char *page) { int len, count; @@ -5518,16 +5516,21 @@ dasd_eckd_dump_ccw_range(struct ccw1 *from, struct ccw1 *to, char *page) else datap = (char *) ((addr_t) from->cda); - /* dump data (max 32 bytes) */ - for (count = 0; count < from->count && count < 32; count++) { - if (count % 8 == 0) len += sprintf(page + len, " "); - if (count % 4 == 0) len += sprintf(page + len, " "); + /* dump data (max 128 bytes) */ + for (count = 0; count < from->count && count < 128; count++) { + if (count % 32 == 0) + len += sprintf(page + len, "\n"); + if (count % 8 == 0) + len += sprintf(page + len, " "); + if (count % 4 == 0) + len += sprintf(page + len, " "); len += sprintf(page + len, "%02x", datap[count]); } len += sprintf(page + len, "\n"); from++; } - return len; + if (len > 0) + printk(KERN_ERR "%s", page); } static void @@ -5619,37 +5622,33 @@ static void dasd_eckd_dump_sense_ccw(struct dasd_device *device, if (req) { /* req == NULL for unsolicited interrupts */ /* dump the Channel Program (max 140 Bytes per line) */ - /* Count CCW and print first CCWs (maximum 1024 % 140 = 7) */ + /* Count CCW and print first CCWs (maximum 7) */ first = req->cpaddr; for (last = first; last->flags & (CCW_FLAG_CC | CCW_FLAG_DC); last++); to = min(first + 6, last); - len = sprintf(page, PRINTK_HEADER - " Related CP in req: %p\n", req); - dasd_eckd_dump_ccw_range(first, to, page + len); - printk(KERN_ERR "%s", page); + printk(KERN_ERR PRINTK_HEADER " Related CP in req: %p\n", req); + dasd_eckd_dump_ccw_range(first, to, page); /* print failing CCW area (maximum 4) */ /* scsw->cda is either valid or zero */ - len = 0; from = ++to; fail = (struct ccw1 *)(addr_t) irb->scsw.cmd.cpa; /* failing CCW */ if (from < fail - 2) { from = fail - 2; /* there is a gap - print header */ - len += sprintf(page, PRINTK_HEADER "......\n"); + printk(KERN_ERR PRINTK_HEADER "......\n"); } to = min(fail + 1, last); - len += dasd_eckd_dump_ccw_range(from, to, page + len); + dasd_eckd_dump_ccw_range(from, to, page + len); /* print last CCWs (maximum 2) */ + len = 0; from = max(from, ++to); if (from < last - 1) { from = last - 1; /* there is a gap - print header */ - len += sprintf(page + len, PRINTK_HEADER "......\n"); + printk(KERN_ERR PRINTK_HEADER "......\n"); } - len += dasd_eckd_dump_ccw_range(from, last, page + len); - if (len > 0) - printk(KERN_ERR "%s", page); + dasd_eckd_dump_ccw_range(from, last, page + len); } free_page((unsigned long) page); } diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index d0ddf2cc9786..9327dcdd6e5e 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -401,7 +401,7 @@ dasd_ioctl_copy_pair_swap(struct block_device *bdev, void __user *argp) return -EFAULT; } if (memchr_inv(data.reserved, 0, sizeof(data.reserved))) { - pr_warn("%s: Ivalid swap data specified.\n", + pr_warn("%s: Invalid swap data specified\n", dev_name(&device->cdev->dev)); dasd_put_device(device); return DASD_COPYPAIRSWAP_INVALID; diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 59ac98f2bd27..b02c631f3b71 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -233,8 +233,11 @@ static void __init ap_init_qci_info(void) if (!ap_qci_info) return; ap_qci_info_old = kzalloc(sizeof(*ap_qci_info_old), GFP_KERNEL); - if (!ap_qci_info_old) + if (!ap_qci_info_old) { + kfree(ap_qci_info); + ap_qci_info = NULL; return; + } if (ap_fetch_qci_info(ap_qci_info) != 0) { kfree(ap_qci_info); kfree(ap_qci_info_old); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 9dc935886e9f..c6ded3fdd715 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -758,7 +758,6 @@ static void qeth_l2_br2dev_worker(struct work_struct *work) struct list_head *iter; int err = 0; - kfree(br2dev_event_work); QETH_CARD_TEXT_(card, 4, "b2dw%04lx", event); QETH_CARD_TEXT_(card, 4, "ma%012llx", ether_addr_to_u64(addr)); @@ -815,6 +814,7 @@ unlock: dev_put(brdev); dev_put(lsyncdev); dev_put(dstdev); + kfree(br2dev_event_work); } static int qeth_l2_br2dev_queue_work(struct net_device *brdev, diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index bc46721aa01c..3c5b7e4227b2 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -303,16 +303,21 @@ enum storvsc_request_type { }; /* - * SRB status codes and masks; a subset of the codes used here. + * SRB status codes and masks. In the 8-bit field, the two high order bits + * are flags, while the remaining 6 bits are an integer status code. The + * definitions here include only the subset of the integer status codes that + * are tested for in this driver. */ - #define SRB_STATUS_AUTOSENSE_VALID 0x80 #define SRB_STATUS_QUEUE_FROZEN 0x40 -#define SRB_STATUS_INVALID_LUN 0x20 -#define SRB_STATUS_SUCCESS 0x01 -#define SRB_STATUS_ABORTED 0x02 -#define SRB_STATUS_ERROR 0x04 -#define SRB_STATUS_DATA_OVERRUN 0x12 + +/* SRB status integer codes */ +#define SRB_STATUS_SUCCESS 0x01 +#define SRB_STATUS_ABORTED 0x02 +#define SRB_STATUS_ERROR 0x04 +#define SRB_STATUS_INVALID_REQUEST 0x06 +#define SRB_STATUS_DATA_OVERRUN 0x12 +#define SRB_STATUS_INVALID_LUN 0x20 #define SRB_STATUS(status) \ (status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN)) @@ -969,38 +974,25 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, void (*process_err_fn)(struct work_struct *work); struct hv_host_device *host_dev = shost_priv(host); - /* - * In some situations, Hyper-V sets multiple bits in the - * srb_status, such as ABORTED and ERROR. So process them - * individually, with the most specific bits first. - */ - - if (vm_srb->srb_status & SRB_STATUS_INVALID_LUN) { - set_host_byte(scmnd, DID_NO_CONNECT); - process_err_fn = storvsc_remove_lun; - goto do_work; - } + switch (SRB_STATUS(vm_srb->srb_status)) { + case SRB_STATUS_ERROR: + case SRB_STATUS_ABORTED: + case SRB_STATUS_INVALID_REQUEST: + if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) { + /* Check for capacity change */ + if ((asc == 0x2a) && (ascq == 0x9)) { + process_err_fn = storvsc_device_scan; + /* Retry the I/O that triggered this. */ + set_host_byte(scmnd, DID_REQUEUE); + goto do_work; + } - if (vm_srb->srb_status & SRB_STATUS_ABORTED) { - if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID && - /* Capacity data has changed */ - (asc == 0x2a) && (ascq == 0x9)) { - process_err_fn = storvsc_device_scan; /* - * Retry the I/O that triggered this. + * Otherwise, let upper layer deal with the + * error when sense message is present */ - set_host_byte(scmnd, DID_REQUEUE); - goto do_work; - } - } - - if (vm_srb->srb_status & SRB_STATUS_ERROR) { - /* - * Let upper layer deal with error when - * sense message is present. - */ - if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) return; + } /* * If there is an error; offline the device since all @@ -1023,6 +1015,13 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, default: set_host_byte(scmnd, DID_ERROR); } + return; + + case SRB_STATUS_INVALID_LUN: + set_host_byte(scmnd, DID_NO_CONNECT); + process_err_fn = storvsc_remove_lun; + goto do_work; + } return; diff --git a/drivers/spi/spi-dw-dma.c b/drivers/spi/spi-dw-dma.c index 1322b8cce5b7..ababb910b391 100644 --- a/drivers/spi/spi-dw-dma.c +++ b/drivers/spi/spi-dw-dma.c @@ -128,12 +128,15 @@ static int dw_spi_dma_init_mfld(struct device *dev, struct dw_spi *dws) dw_spi_dma_sg_burst_init(dws); + pci_dev_put(dma_dev); + return 0; free_rxchan: dma_release_channel(dws->rxchan); dws->rxchan = NULL; err_exit: + pci_dev_put(dma_dev); return -EBUSY; } diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 30d82cc7300b..d209930069cf 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -444,8 +444,7 @@ static unsigned int mx51_ecspi_clkdiv(struct spi_imx_data *spi_imx, unsigned int pre, post; unsigned int fin = spi_imx->spi_clk; - if (unlikely(fspi > fin)) - return 0; + fspi = min(fspi, fin); post = fls(fin) - fls(fspi); if (fin > fspi << post) @@ -1608,6 +1607,13 @@ static int spi_imx_transfer_one(struct spi_controller *controller, return spi_imx_pio_transfer_slave(spi, transfer); /* + * If we decided in spi_imx_can_dma() that we want to do a DMA + * transfer, the SPI transfer has already been mapped, so we + * have to do the DMA transfer here. + */ + if (spi_imx->usedma) + return spi_imx_dma_transfer(spi_imx, transfer); + /* * Calculate the estimated time in us the transfer runs. Find * the number of Hz per byte per polling limit. */ @@ -1618,9 +1624,6 @@ static int spi_imx_transfer_one(struct spi_controller *controller, if (transfer->len < byte_limit) return spi_imx_poll_transfer(spi, transfer); - if (spi_imx->usedma) - return spi_imx_dma_transfer(spi_imx, transfer); - return spi_imx_pio_transfer(spi, transfer); } diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index a33c9a3de395..d6aff909fc36 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -1273,8 +1273,11 @@ static int mtk_spi_remove(struct platform_device *pdev) { struct spi_master *master = platform_get_drvdata(pdev); struct mtk_spi *mdata = spi_master_get_devdata(master); + int ret; - pm_runtime_disable(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret < 0) + return ret; mtk_spi_reset(mdata); @@ -1283,6 +1286,9 @@ static int mtk_spi_remove(struct platform_device *pdev) clk_unprepare(mdata->spi_hclk); } + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_disable(&pdev->dev); + return 0; } diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index 10f0c5a6e0dc..9f356612ba7e 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -924,8 +924,9 @@ static int tegra_qspi_start_transfer_one(struct spi_device *spi, static struct tegra_qspi_client_data *tegra_qspi_parse_cdata_dt(struct spi_device *spi) { struct tegra_qspi_client_data *cdata; + struct tegra_qspi *tqspi = spi_master_get_devdata(spi->master); - cdata = devm_kzalloc(&spi->dev, sizeof(*cdata), GFP_KERNEL); + cdata = devm_kzalloc(tqspi->dev, sizeof(*cdata), GFP_KERNEL); if (!cdata) return NULL; diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c index f3947be13e2e..64f0e047c23d 100644 --- a/drivers/tee/optee/device.c +++ b/drivers/tee/optee/device.c @@ -80,7 +80,7 @@ static int optee_register_device(const uuid_t *device_uuid) rc = device_register(&optee_device->dev); if (rc) { pr_err("device registration failed, err: %d\n", rc); - kfree(optee_device); + put_device(&optee_device->dev); } return rc; diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index c67715f6f756..f9aa50ff14d4 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -600,11 +600,11 @@ int cdnsp_halt_endpoint(struct cdnsp_device *pdev, trace_cdnsp_ep_halt(value ? "Set" : "Clear"); - if (value) { - ret = cdnsp_cmd_stop_ep(pdev, pep); - if (ret) - return ret; + ret = cdnsp_cmd_stop_ep(pdev, pep); + if (ret) + return ret; + if (value) { if (GET_EP_CTX_STATE(pep->out_ctx) == EP_STATE_STOPPED) { cdnsp_queue_halt_endpoint(pdev, pep->idx); cdnsp_ring_cmd_db(pdev); @@ -613,10 +613,6 @@ int cdnsp_halt_endpoint(struct cdnsp_device *pdev, pep->ep_state |= EP_HALTED; } else { - /* - * In device mode driver can call reset endpoint command - * from any endpoint state. - */ cdnsp_queue_reset_ep(pdev, pep->idx); cdnsp_ring_cmd_db(pdev); ret = cdnsp_wait_for_cmd_compl(pdev); diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index 794e413800ae..2f29431f612e 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -1763,10 +1763,15 @@ static u32 cdnsp_td_remainder(struct cdnsp_device *pdev, int trb_buff_len, unsigned int td_total_len, struct cdnsp_request *preq, - bool more_trbs_coming) + bool more_trbs_coming, + bool zlp) { u32 maxp, total_packet_count; + /* Before ZLP driver needs set TD_SIZE = 1. */ + if (zlp) + return 1; + /* One TRB with a zero-length data packet. */ if (!more_trbs_coming || (transferred == 0 && trb_buff_len == 0) || trb_buff_len == td_total_len) @@ -1960,7 +1965,8 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) /* Set the TRB length, TD size, and interrupter fields. */ remainder = cdnsp_td_remainder(pdev, enqd_len, trb_buff_len, full_len, preq, - more_trbs_coming); + more_trbs_coming, + zero_len_trb); length_field = TRB_LEN(trb_buff_len) | TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); @@ -2025,7 +2031,7 @@ int cdnsp_queue_ctrl_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) if (preq->request.length > 0) { remainder = cdnsp_td_remainder(pdev, 0, preq->request.length, - preq->request.length, preq, 1); + preq->request.length, preq, 1, 0); length_field = TRB_LEN(preq->request.length) | TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); @@ -2076,7 +2082,8 @@ int cdnsp_cmd_stop_ep(struct cdnsp_device *pdev, struct cdnsp_ep *pep) u32 ep_state = GET_EP_CTX_STATE(pep->out_ctx); int ret = 0; - if (ep_state == EP_STATE_STOPPED || ep_state == EP_STATE_DISABLED) { + if (ep_state == EP_STATE_STOPPED || ep_state == EP_STATE_DISABLED || + ep_state == EP_STATE_HALTED) { trace_cdnsp_ep_stopped_or_disabled(pep->out_ctx); goto ep_stopped; } @@ -2225,7 +2232,7 @@ static int cdnsp_queue_isoc_tx(struct cdnsp_device *pdev, /* Set the TRB length, TD size, & interrupter fields. */ remainder = cdnsp_td_remainder(pdev, running_total, trb_buff_len, td_len, preq, - more_trbs_coming); + more_trbs_coming, 0); length_field = TRB_LEN(trb_buff_len) | TRB_INTR_TARGET(0); diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c index 0ecf20eeceee..4be6a873bd07 100644 --- a/drivers/usb/dwc3/dwc3-exynos.c +++ b/drivers/usb/dwc3/dwc3-exynos.c @@ -37,15 +37,6 @@ struct dwc3_exynos { struct regulator *vdd10; }; -static int dwc3_exynos_remove_child(struct device *dev, void *unused) -{ - struct platform_device *pdev = to_platform_device(dev); - - platform_device_unregister(pdev); - - return 0; -} - static int dwc3_exynos_probe(struct platform_device *pdev) { struct dwc3_exynos *exynos; @@ -142,7 +133,7 @@ static int dwc3_exynos_remove(struct platform_device *pdev) struct dwc3_exynos *exynos = platform_get_drvdata(pdev); int i; - device_for_each_child(&pdev->dev, NULL, dwc3_exynos_remove_child); + of_platform_depopulate(&pdev->dev); for (i = exynos->num_clks - 1; i >= 0; i--) clk_disable_unprepare(exynos->clks[i]); diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 026d4029bda6..6d524fa76443 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -291,7 +291,8 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd, * * DWC_usb3 3.30a and DWC_usb31 1.90a programming guide section 3.2.2 */ - if (dwc->gadget->speed <= USB_SPEED_HIGH) { + if (dwc->gadget->speed <= USB_SPEED_HIGH || + DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_ENDTRANSFER) { reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); if (unlikely(reg & DWC3_GUSB2PHYCFG_SUSPHY)) { saved_config |= DWC3_GUSB2PHYCFG_SUSPHY; @@ -1023,12 +1024,6 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) reg &= ~DWC3_DALEPENA_EP(dep->number); dwc3_writel(dwc->regs, DWC3_DALEPENA, reg); - /* Clear out the ep descriptors for non-ep0 */ - if (dep->number > 1) { - dep->endpoint.comp_desc = NULL; - dep->endpoint.desc = NULL; - } - dwc3_remove_requests(dwc, dep, -ESHUTDOWN); dep->stream_capable = false; @@ -1043,6 +1038,12 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) mask |= (DWC3_EP_DELAY_STOP | DWC3_EP_TRANSFER_STARTED); dep->flags &= mask; + /* Clear out the ep descriptors for non-ep0 */ + if (dep->number > 1) { + dep->endpoint.comp_desc = NULL; + dep->endpoint.desc = NULL; + } + return 0; } diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c index c4ed48d6b8a4..a189b08bba80 100644 --- a/drivers/usb/gadget/function/uvc_v4l2.c +++ b/drivers/usb/gadget/function/uvc_v4l2.c @@ -199,16 +199,6 @@ uvc_send_response(struct uvc_device *uvc, struct uvc_request_data *data) * V4L2 ioctls */ -struct uvc_format { - u8 bpp; - u32 fcc; -}; - -static struct uvc_format uvc_formats[] = { - { 16, V4L2_PIX_FMT_YUYV }, - { 0, V4L2_PIX_FMT_MJPEG }, -}; - static int uvc_v4l2_querycap(struct file *file, void *fh, struct v4l2_capability *cap) { @@ -243,47 +233,6 @@ uvc_v4l2_get_format(struct file *file, void *fh, struct v4l2_format *fmt) } static int -uvc_v4l2_set_format(struct file *file, void *fh, struct v4l2_format *fmt) -{ - struct video_device *vdev = video_devdata(file); - struct uvc_device *uvc = video_get_drvdata(vdev); - struct uvc_video *video = &uvc->video; - struct uvc_format *format; - unsigned int imagesize; - unsigned int bpl; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(uvc_formats); ++i) { - format = &uvc_formats[i]; - if (format->fcc == fmt->fmt.pix.pixelformat) - break; - } - - if (i == ARRAY_SIZE(uvc_formats)) { - uvcg_info(&uvc->func, "Unsupported format 0x%08x.\n", - fmt->fmt.pix.pixelformat); - return -EINVAL; - } - - bpl = format->bpp * fmt->fmt.pix.width / 8; - imagesize = bpl ? bpl * fmt->fmt.pix.height : fmt->fmt.pix.sizeimage; - - video->fcc = format->fcc; - video->bpp = format->bpp; - video->width = fmt->fmt.pix.width; - video->height = fmt->fmt.pix.height; - video->imagesize = imagesize; - - fmt->fmt.pix.field = V4L2_FIELD_NONE; - fmt->fmt.pix.bytesperline = bpl; - fmt->fmt.pix.sizeimage = imagesize; - fmt->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB; - fmt->fmt.pix.priv = 0; - - return 0; -} - -static int uvc_v4l2_try_format(struct file *file, void *fh, struct v4l2_format *fmt) { struct video_device *vdev = video_devdata(file); @@ -324,6 +273,27 @@ uvc_v4l2_try_format(struct file *file, void *fh, struct v4l2_format *fmt) } static int +uvc_v4l2_set_format(struct file *file, void *fh, struct v4l2_format *fmt) +{ + struct video_device *vdev = video_devdata(file); + struct uvc_device *uvc = video_get_drvdata(vdev); + struct uvc_video *video = &uvc->video; + int ret; + + ret = uvc_v4l2_try_format(file, fh, fmt); + if (ret) + return ret; + + video->fcc = fmt->fmt.pix.pixelformat; + video->bpp = fmt->fmt.pix.bytesperline * 8 / video->width; + video->width = fmt->fmt.pix.width; + video->height = fmt->fmt.pix.height; + video->imagesize = fmt->fmt.pix.sizeimage; + + return ret; +} + +static int uvc_v4l2_enum_frameintervals(struct file *file, void *fh, struct v4l2_frmivalenum *fival) { diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 098b62f7b701..c0143d38df83 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -577,7 +577,7 @@ static void fbcon_prepare_logo(struct vc_data *vc, struct fb_info *info, if (scr_readw(r) != vc->vc_video_erase_char) break; if (r != q && new_rows >= rows + logo_lines) { - save = kmalloc(array3_size(logo_lines, new_cols, 2), + save = kzalloc(array3_size(logo_lines, new_cols, 2), GFP_KERNEL); if (save) { int i = min(cols, new_cols); diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index f422f9c58ba7..1ea6d2e5b218 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -67,8 +67,27 @@ static bool is_vmpck_empty(struct snp_guest_dev *snp_dev) return true; } +/* + * If an error is received from the host or AMD Secure Processor (ASP) there + * are two options. Either retry the exact same encrypted request or discontinue + * using the VMPCK. + * + * This is because in the current encryption scheme GHCB v2 uses AES-GCM to + * encrypt the requests. The IV for this scheme is the sequence number. GCM + * cannot tolerate IV reuse. + * + * The ASP FW v1.51 only increments the sequence numbers on a successful + * guest<->ASP back and forth and only accepts messages at its exact sequence + * number. + * + * So if the sequence number were to be reused the encryption scheme is + * vulnerable. If the sequence number were incremented for a fresh IV the ASP + * will reject the request. + */ static void snp_disable_vmpck(struct snp_guest_dev *snp_dev) { + dev_alert(snp_dev->dev, "Disabling vmpck_id %d to prevent IV reuse.\n", + vmpck_id); memzero_explicit(snp_dev->vmpck, VMPCK_KEY_LEN); snp_dev->vmpck = NULL; } @@ -321,34 +340,71 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in if (rc) return rc; - /* Call firmware to process the request */ + /* + * Call firmware to process the request. In this function the encrypted + * message enters shared memory with the host. So after this call the + * sequence number must be incremented or the VMPCK must be deleted to + * prevent reuse of the IV. + */ rc = snp_issue_guest_request(exit_code, &snp_dev->input, &err); + + /* + * If the extended guest request fails due to having too small of a + * certificate data buffer, retry the same guest request without the + * extended data request in order to increment the sequence number + * and thus avoid IV reuse. + */ + if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST && + err == SNP_GUEST_REQ_INVALID_LEN) { + const unsigned int certs_npages = snp_dev->input.data_npages; + + exit_code = SVM_VMGEXIT_GUEST_REQUEST; + + /* + * If this call to the firmware succeeds, the sequence number can + * be incremented allowing for continued use of the VMPCK. If + * there is an error reflected in the return value, this value + * is checked further down and the result will be the deletion + * of the VMPCK and the error code being propagated back to the + * user as an ioctl() return code. + */ + rc = snp_issue_guest_request(exit_code, &snp_dev->input, &err); + + /* + * Override the error to inform callers the given extended + * request buffer size was too small and give the caller the + * required buffer size. + */ + err = SNP_GUEST_REQ_INVALID_LEN; + snp_dev->input.data_npages = certs_npages; + } + if (fw_err) *fw_err = err; - if (rc) - return rc; + if (rc) { + dev_alert(snp_dev->dev, + "Detected error from ASP request. rc: %d, fw_err: %llu\n", + rc, *fw_err); + goto disable_vmpck; + } - /* - * The verify_and_dec_payload() will fail only if the hypervisor is - * actively modifying the message header or corrupting the encrypted payload. - * This hints that hypervisor is acting in a bad faith. Disable the VMPCK so that - * the key cannot be used for any communication. The key is disabled to ensure - * that AES-GCM does not use the same IV while encrypting the request payload. - */ rc = verify_and_dec_payload(snp_dev, resp_buf, resp_sz); if (rc) { dev_alert(snp_dev->dev, - "Detected unexpected decode failure, disabling the vmpck_id %d\n", - vmpck_id); - snp_disable_vmpck(snp_dev); - return rc; + "Detected unexpected decode failure from ASP. rc: %d\n", + rc); + goto disable_vmpck; } /* Increment to new message sequence after payload decryption was successful. */ snp_inc_msg_seqno(snp_dev); return 0; + +disable_vmpck: + snp_disable_vmpck(snp_dev); + return rc; } static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg) diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index c0031a3ab42f..3ac5fcf98d0d 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -167,8 +167,8 @@ responded: clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags); } - if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) && - rtt_us < server->probe.rtt) { + rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us); + if (rtt_us < server->probe.rtt) { server->probe.rtt = rtt_us; server->rtt = rtt_us; alist->preferred = index; diff --git a/fs/afs/server.c b/fs/afs/server.c index 4981baf97835..b5237206eac3 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -406,7 +406,7 @@ void afs_put_server(struct afs_net *net, struct afs_server *server, if (!server) return; - a = atomic_inc_return(&server->active); + a = atomic_read(&server->active); zero = __refcount_dec_and_test(&server->ref, &r); trace_afs_server(debug_id, r - 1, a, reason); if (unlikely(zero)) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index a9543f01184c..dcb510f38dda 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -4663,7 +4663,12 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, int ret; int i; - ASSERT(!path->nowait); + /* + * The nowait semantics are used only for write paths, where we don't + * use the tree mod log and sequence numbers. + */ + if (time_seq) + ASSERT(!path->nowait); nritems = btrfs_header_nritems(path->nodes[0]); if (nritems == 0) @@ -4683,7 +4688,14 @@ again: if (path->need_commit_sem) { path->need_commit_sem = 0; need_commit_sem = true; - down_read(&fs_info->commit_root_sem); + if (path->nowait) { + if (!down_read_trylock(&fs_info->commit_root_sem)) { + ret = -EAGAIN; + goto done; + } + } else { + down_read(&fs_info->commit_root_sem); + } } ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); } @@ -4759,7 +4771,7 @@ again: next = c; ret = read_block_for_search(root, path, &next, level, slot, &key); - if (ret == -EAGAIN) + if (ret == -EAGAIN && !path->nowait) goto again; if (ret < 0) { @@ -4769,6 +4781,10 @@ again: if (!path->skip_locking) { ret = btrfs_try_tree_read_lock(next); + if (!ret && path->nowait) { + ret = -EAGAIN; + goto done; + } if (!ret && time_seq) { /* * If we don't get the lock, we may be racing @@ -4799,7 +4815,7 @@ again: ret = read_block_for_search(root, path, &next, level, 0, &key); - if (ret == -EAGAIN) + if (ret == -EAGAIN && !path->nowait) goto again; if (ret < 0) { @@ -4807,8 +4823,16 @@ again: goto done; } - if (!path->skip_locking) - btrfs_tree_read_lock(next); + if (!path->skip_locking) { + if (path->nowait) { + if (!btrfs_try_tree_read_lock(next)) { + ret = -EAGAIN; + goto done; + } + } else { + btrfs_tree_read_lock(next); + } + } } ret = 0; done: diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d5dd8bed1488..5ba2e810dc6e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3105,6 +3105,8 @@ static int btrfs_ioctl_get_subvol_info(struct inode *inode, void __user *argp) } } + btrfs_free_path(path); + path = NULL; if (copy_to_user(argp, subvol_info, sizeof(*subvol_info))) ret = -EFAULT; @@ -3194,6 +3196,8 @@ static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root, } out: + btrfs_free_path(path); + if (!ret || ret == -EOVERFLOW) { rootrefs->num_items = found; /* update min_treeid for next search */ @@ -3205,7 +3209,6 @@ out: } kfree(rootrefs); - btrfs_free_path(path); return ret; } @@ -4231,6 +4234,8 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) ipath->fspath->val[i] = rel_ptr; } + btrfs_free_path(path); + path = NULL; ret = copy_to_user((void __user *)(unsigned long)ipa->fspath, ipath->fspath, size); if (ret) { @@ -4281,21 +4286,20 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, size = min_t(u32, loi->size, SZ_16M); } - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - inodes = init_data_container(size); if (IS_ERR(inodes)) { ret = PTR_ERR(inodes); - inodes = NULL; - goto out; + goto out_loi; } + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } ret = iterate_inodes_from_logical(loi->logical, fs_info, path, inodes, ignore_offset); + btrfs_free_path(path); if (ret == -EINVAL) ret = -ENOENT; if (ret < 0) @@ -4307,7 +4311,6 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, ret = -EFAULT; out: - btrfs_free_path(path); kvfree(inodes); out_loi: kfree(loi); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 9334c3157c22..b74105a10f16 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2951,14 +2951,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, dstgroup->rsv_rfer = inherit->lim.rsv_rfer; dstgroup->rsv_excl = inherit->lim.rsv_excl; - ret = update_qgroup_limit_item(trans, dstgroup); - if (ret) { - qgroup_mark_inconsistent(fs_info); - btrfs_info(fs_info, - "unable to update quota limit for %llu", - dstgroup->qgroupid); - goto unlock; - } + qgroup_dirty(fs_info, dstgroup); } if (srcid) { diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 145c84b44fd0..1c4b693ee4a3 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -5702,6 +5702,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path, u64 ext_len; u64 clone_len; u64 clone_data_offset; + bool crossed_src_i_size = false; if (slot >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(clone_root->root, path); @@ -5759,8 +5760,10 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path, if (key.offset >= clone_src_i_size) break; - if (key.offset + ext_len > clone_src_i_size) + if (key.offset + ext_len > clone_src_i_size) { ext_len = clone_src_i_size - key.offset; + crossed_src_i_size = true; + } clone_data_offset = btrfs_file_extent_offset(leaf, ei); if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) { @@ -5821,6 +5824,25 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path, ret = send_clone(sctx, offset, clone_len, clone_root); } + } else if (crossed_src_i_size && clone_len < len) { + /* + * If we are at i_size of the clone source inode and we + * can not clone from it, terminate the loop. This is + * to avoid sending two write operations, one with a + * length matching clone_len and the final one after + * this loop with a length of len - clone_len. + * + * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED + * was passed to the send ioctl), this helps avoid + * sending an encoded write for an offset that is not + * sector size aligned, in case the i_size of the source + * inode is not sector size aligned. That will make the + * receiver fallback to decompression of the data and + * writing it using regular buffered IO, therefore while + * not incorrect, it's not optimal due decompression and + * possible re-compression at the receiver. + */ + break; } else { ret = send_extent_data(sctx, dst_path, offset, clone_len); diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 699b54b3acaa..74fef1f49c35 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -2321,8 +2321,11 @@ int __init btrfs_init_sysfs(void) #ifdef CONFIG_BTRFS_DEBUG ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_debug_feature_attr_group); - if (ret) - goto out2; + if (ret) { + sysfs_unmerge_group(&btrfs_kset->kobj, + &btrfs_static_feature_attr_group); + goto out_remove_group; + } #endif return 0; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 813986e38258..c3cf3dabe0b1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3694,15 +3694,29 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans, u64 *last_old_dentry_offset) { struct btrfs_root *log = inode->root->log_root; - struct extent_buffer *src = path->nodes[0]; - const int nritems = btrfs_header_nritems(src); + struct extent_buffer *src; + const int nritems = btrfs_header_nritems(path->nodes[0]); const u64 ino = btrfs_ino(inode); bool last_found = false; int batch_start = 0; int batch_size = 0; int i; - for (i = path->slots[0]; i < nritems; i++) { + /* + * We need to clone the leaf, release the read lock on it, and use the + * clone before modifying the log tree. See the comment at copy_items() + * about why we need to do this. + */ + src = btrfs_clone_extent_buffer(path->nodes[0]); + if (!src) + return -ENOMEM; + + i = path->slots[0]; + btrfs_release_path(path); + path->nodes[0] = src; + path->slots[0] = i; + + for (; i < nritems; i++) { struct btrfs_dir_item *di; struct btrfs_key key; int ret; @@ -4303,7 +4317,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, { struct btrfs_root *log = inode->root->log_root; struct btrfs_file_extent_item *extent; - struct extent_buffer *src = src_path->nodes[0]; + struct extent_buffer *src; int ret = 0; struct btrfs_key *ins_keys; u32 *ins_sizes; @@ -4314,6 +4328,43 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM); const u64 i_size = i_size_read(&inode->vfs_inode); + /* + * To keep lockdep happy and avoid deadlocks, clone the source leaf and + * use the clone. This is because otherwise we would be changing the log + * tree, to insert items from the subvolume tree or insert csum items, + * while holding a read lock on a leaf from the subvolume tree, which + * creates a nasty lock dependency when COWing log tree nodes/leaves: + * + * 1) Modifying the log tree triggers an extent buffer allocation while + * holding a write lock on a parent extent buffer from the log tree. + * Allocating the pages for an extent buffer, or the extent buffer + * struct, can trigger inode eviction and finally the inode eviction + * will trigger a release/remove of a delayed node, which requires + * taking the delayed node's mutex; + * + * 2) Allocating a metadata extent for a log tree can trigger the async + * reclaim thread and make us wait for it to release enough space and + * unblock our reservation ticket. The reclaim thread can start + * flushing delayed items, and that in turn results in the need to + * lock delayed node mutexes and in the need to write lock extent + * buffers of a subvolume tree - all this while holding a write lock + * on the parent extent buffer in the log tree. + * + * So one task in scenario 1) running in parallel with another task in + * scenario 2) could lead to a deadlock, one wanting to lock a delayed + * node mutex while having a read lock on a leaf from the subvolume, + * while the other is holding the delayed node's mutex and wants to + * write lock the same subvolume leaf for flushing delayed items. + */ + src = btrfs_clone_extent_buffer(src_path->nodes[0]); + if (!src) + return -ENOMEM; + + i = src_path->slots[0]; + btrfs_release_path(src_path); + src_path->nodes[0] = src; + src_path->slots[0] = i; + ins_data = kmalloc(nr * sizeof(struct btrfs_key) + nr * sizeof(u32), GFP_NOFS); if (!ins_data) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 1912abf6d020..c9e2b0c85309 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -134,7 +134,8 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones, super[i] = page_address(page[i]); } - if (super[0]->generation > super[1]->generation) + if (btrfs_super_generation(super[0]) > + btrfs_super_generation(super[1])) sector = zones[1].start; else sector = zones[0].start; @@ -466,7 +467,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) goto out; } - zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL); + zones = kvcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL); if (!zones) { ret = -ENOMEM; goto out; @@ -585,7 +586,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) } - kfree(zones); + kvfree(zones); switch (bdev_zoned_model(bdev)) { case BLK_ZONED_HM: @@ -617,7 +618,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) return 0; out: - kfree(zones); + kvfree(zones); out_free_zone_info: btrfs_destroy_dev_zone_info(device); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index fe220686bba4..712a43161448 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1281,7 +1281,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, rc = filemap_write_and_wait_range(src_inode->i_mapping, off, off + len - 1); if (rc) - goto out; + goto unlock; /* should we flush first and last page first */ truncate_inode_pages(&target_inode->i_data, 0); @@ -1297,6 +1297,8 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, * that target is updated on the server */ CIFS_I(target_inode)->time = 0; + +unlock: /* although unlocking in the reverse order from locking is not * strictly necessary here it is a little cleaner to be consistent */ diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 92e4278ec35d..9e7d9f0baa18 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -302,14 +302,14 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) /* now drop the ref to the current iface */ if (old_iface && iface) { - kref_put(&old_iface->refcount, release_iface); cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n", &old_iface->sockaddr, &iface->sockaddr); - } else if (old_iface) { kref_put(&old_iface->refcount, release_iface); + } else if (old_iface) { cifs_dbg(FYI, "releasing ref to iface: %pIS\n", &old_iface->sockaddr); + kref_put(&old_iface->refcount, release_iface); } else { WARN_ON(!iface); cifs_dbg(FYI, "adding new iface: %pIS\n", &iface->sockaddr); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f1956288307f..6c399a8b22b3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5184,6 +5184,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, * and it is decreased till we reach start. */ again: + ret = 0; if (SHIFT == SHIFT_LEFT) iterator = &start; else @@ -5227,14 +5228,21 @@ again: ext4_ext_get_actual_len(extent); } else { extent = EXT_FIRST_EXTENT(path[depth].p_hdr); - if (le32_to_cpu(extent->ee_block) > 0) + if (le32_to_cpu(extent->ee_block) > start) *iterator = le32_to_cpu(extent->ee_block) - 1; - else - /* Beginning is reached, end of the loop */ + else if (le32_to_cpu(extent->ee_block) == start) iterator = NULL; - /* Update path extent in case we need to stop */ - while (le32_to_cpu(extent->ee_block) < start) + else { + extent = EXT_LAST_EXTENT(path[depth].p_hdr); + while (le32_to_cpu(extent->ee_block) >= start) + extent--; + + if (extent == EXT_LAST_EXTENT(path[depth].p_hdr)) + break; + extent++; + iterator = NULL; + } path[depth].p_ext = extent; } ret = ext4_ext_shift_path_extents(path, shift, inode, diff --git a/fs/file.c b/fs/file.c index 5f9c802a5d8d..c942c89ca4cd 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1003,7 +1003,16 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask) struct files_struct *files = current->files; struct file *file; - if (atomic_read(&files->count) == 1) { + /* + * If another thread is concurrently calling close_fd() followed + * by put_files_struct(), we must not observe the old table + * entry combined with the new refcount - otherwise we could + * return a file that is concurrently being freed. + * + * atomic_read_acquire() pairs with atomic_dec_and_test() in + * put_files_struct(). + */ + if (atomic_read_acquire(&files->count) == 1) { file = files_lookup_fd_raw(files, fd); if (!file || unlikely(file->f_mode & mask)) return 0; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 443f83382b9b..9958d4020771 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1712,18 +1712,26 @@ static int writeback_single_inode(struct inode *inode, wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); /* - * If the inode is now fully clean, then it can be safely removed from - * its writeback list (if any). Otherwise the flusher threads are - * responsible for the writeback lists. + * If the inode is freeing, its i_io_list shoudn't be updated + * as it can be finally deleted at this moment. */ - if (!(inode->i_state & I_DIRTY_ALL)) - inode_cgwb_move_to_attached(inode, wb); - else if (!(inode->i_state & I_SYNC_QUEUED)) { - if ((inode->i_state & I_DIRTY)) - redirty_tail_locked(inode, wb); - else if (inode->i_state & I_DIRTY_TIME) { - inode->dirtied_when = jiffies; - inode_io_list_move_locked(inode, wb, &wb->b_dirty_time); + if (!(inode->i_state & I_FREEING)) { + /* + * If the inode is now fully clean, then it can be safely + * removed from its writeback list (if any). Otherwise the + * flusher threads are responsible for the writeback lists. + */ + if (!(inode->i_state & I_DIRTY_ALL)) + inode_cgwb_move_to_attached(inode, wb); + else if (!(inode->i_state & I_SYNC_QUEUED)) { + if ((inode->i_state & I_DIRTY)) + redirty_tail_locked(inode, wb); + else if (inode->i_state & I_DIRTY_TIME) { + inode->dirtied_when = jiffies; + inode_io_list_move_locked(inode, + wb, + &wb->b_dirty_time); + } } } diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 451d8a077e12..bce2492186d0 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -605,6 +605,14 @@ again: set_bit(FSCACHE_COOKIE_DO_PREP_TO_WRITE, &cookie->flags); queue = true; } + /* + * We could race with cookie_lru which may set LRU_DISCARD bit + * but has yet to run the cookie state machine. If this happens + * and another thread tries to use the cookie, clear LRU_DISCARD + * so we don't end up withdrawing the cookie while in use. + */ + if (test_and_clear_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags)) + fscache_see_cookie(cookie, fscache_cookie_see_lru_discard_clear); break; case FSCACHE_COOKIE_STATE_FAILED: diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c index a058e0136bfe..ab8ceddf9efa 100644 --- a/fs/fscache/volume.c +++ b/fs/fscache/volume.c @@ -203,7 +203,11 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key, struct fscache_volume *volume; struct fscache_cache *cache; size_t klen, hlen; - char *key; + u8 *key; + + klen = strlen(volume_key); + if (klen > NAME_MAX) + return NULL; if (!coherency_data) coherency_len = 0; @@ -229,7 +233,6 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key, /* Stick the length on the front of the key and pad it out to make * hashing easier. */ - klen = strlen(volume_key); hlen = round_up(1 + klen + 1, sizeof(__le32)); key = kzalloc(hlen, GFP_KERNEL); if (!key) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 71bfb663aac5..89f4741728ba 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2963,11 +2963,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, .mode = mode }; int err; - bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) || - (mode & (FALLOC_FL_PUNCH_HOLE | - FALLOC_FL_ZERO_RANGE)); - - bool block_faults = FUSE_IS_DAX(inode) && lock_inode; + bool block_faults = FUSE_IS_DAX(inode) && + (!(mode & FALLOC_FL_KEEP_SIZE) || + (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))); if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) @@ -2976,22 +2974,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (fm->fc->no_fallocate) return -EOPNOTSUPP; - if (lock_inode) { - inode_lock(inode); - if (block_faults) { - filemap_invalidate_lock(inode->i_mapping); - err = fuse_dax_break_layouts(inode, 0, 0); - if (err) - goto out; - } + inode_lock(inode); + if (block_faults) { + filemap_invalidate_lock(inode->i_mapping); + err = fuse_dax_break_layouts(inode, 0, 0); + if (err) + goto out; + } - if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) { - loff_t endbyte = offset + length - 1; + if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) { + loff_t endbyte = offset + length - 1; - err = fuse_writeback_range(inode, offset, endbyte); - if (err) - goto out; - } + err = fuse_writeback_range(inode, offset, endbyte); + if (err) + goto out; } if (!(mode & FALLOC_FL_KEEP_SIZE) && @@ -3039,8 +3035,7 @@ out: if (block_faults) filemap_invalidate_unlock(inode->i_mapping); - if (lock_inode) - inode_unlock(inode); + inode_unlock(inode); fuse_flush_time_update(inode); diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index 8de970d6146f..94b8ed4ef870 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -1794,9 +1794,9 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work, ret = vfs_copy_file_range(src_fp->filp, src_off, dst_fp->filp, dst_off, len, 0); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(src_fp->filp, src_off, - dst_fp->filp, dst_off, - len, 0); + ret = vfs_copy_file_range(src_fp->filp, src_off, + dst_fp->filp, dst_off, len, + COPY_FILE_SPLICE); if (ret < 0) return ret; diff --git a/fs/namei.c b/fs/namei.c index 578c2110df02..9155ecb547ce 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3591,6 +3591,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir = d_inode(parentpath->dentry); struct inode *inode; int error; + int open_flag = file->f_flags; /* we want directory to be writable */ error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC); @@ -3613,7 +3614,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns, if (error) return error; inode = file_inode(file); - if (!(file->f_flags & O_EXCL)) { + if (!(open_flag & O_EXCL)) { spin_lock(&inode->i_lock); inode->i_state |= I_LINKABLE; spin_unlock(&inode->i_lock); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f650afedd67f..849a720ab43f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -596,8 +596,8 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(src, src_pos, dst, dst_pos, - count, 0); + ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, + COPY_FILE_SPLICE); return ret; } @@ -871,10 +871,11 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct svc_rqst *rqstp = sd->u.data; struct page *page = buf->page; // may be a compound one unsigned offset = buf->offset; + struct page *last_page; - page += offset / PAGE_SIZE; - for (int i = sd->len; i > 0; i -= PAGE_SIZE) - svc_rqst_replace_page(rqstp, page++); + last_page = page + (offset + sd->len - 1) / PAGE_SIZE; + for (page += offset / PAGE_SIZE; page <= last_page; page++) + svc_rqst_replace_page(rqstp, page); if (rqstp->rq_res.page_len == 0) // first call rqstp->rq_res.page_base = offset % PAGE_SIZE; rqstp->rq_res.page_len += sd->len; diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 3b55e239705f..9930fa901039 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -111,6 +111,13 @@ static void nilfs_dat_commit_free(struct inode *dat, kunmap_atomic(kaddr); nilfs_dat_commit_entry(dat, req); + + if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) { + nilfs_error(dat->i_sb, + "state inconsistency probably due to duplicate use of vblocknr = %llu", + (unsigned long long)req->pr_entry_nr); + return; + } nilfs_palloc_commit_free_entry(dat, req); } diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 77ff8e95421f..dc359b56fdfa 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -495,14 +495,22 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) { struct buffer_head *bh; + void *kaddr; + struct nilfs_segment_usage *su; int ret; + down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); if (!ret) { mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); + kaddr = kmap_atomic(bh->b_page); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); + nilfs_segment_usage_set_dirty(su); + kunmap_atomic(kaddr); brelse(bh); } + up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 5101131e6047..440960110a42 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -115,7 +115,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) #endif show_val_kb(m, "PageTables: ", global_node_page_state(NR_PAGETABLE)); - show_val_kb(m, "SecPageTables: ", + show_val_kb(m, "SecPageTables: ", global_node_page_state(NR_SECONDARY_PAGETABLE)); show_val_kb(m, "NFS_Unstable: ", 0); diff --git a/fs/read_write.c b/fs/read_write.c index 328ce8cf9a85..24b9668d6377 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1388,6 +1388,8 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { + lockdep_assert(sb_write_started(file_inode(file_out)->i_sb)); + return do_splice_direct(file_in, &pos_in, file_out, &pos_out, len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); } @@ -1424,7 +1426,9 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, * and several different sets of file_operations, but they all end up * using the same ->copy_file_range() function pointer. */ - if (file_out->f_op->copy_file_range) { + if (flags & COPY_FILE_SPLICE) { + /* cross sb splice is allowed */ + } else if (file_out->f_op->copy_file_range) { if (file_in->f_op->copy_file_range != file_out->f_op->copy_file_range) return -EXDEV; @@ -1474,8 +1478,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, size_t len, unsigned int flags) { ssize_t ret; + bool splice = flags & COPY_FILE_SPLICE; - if (flags != 0) + if (flags & ~COPY_FILE_SPLICE) return -EINVAL; ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, @@ -1501,14 +1506,14 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * same sb using clone, but for filesystems where both clone and copy * are supported (e.g. nfs,cifs), we only call the copy method. */ - if (file_out->f_op->copy_file_range) { + if (!splice && file_out->f_op->copy_file_range) { ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, pos_out, len, flags); goto done; } - if (file_in->f_op->remap_file_range && + if (!splice && file_in->f_op->remap_file_range && file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { ret = file_in->f_op->remap_file_range(file_in, pos_in, file_out, pos_out, @@ -1528,6 +1533,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * consistent story about which filesystems support copy_file_range() * and which filesystems do not, that will allow userspace tools to * make consistent desicions w.r.t using copy_file_range(). + * + * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE. */ ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, flags); @@ -1582,6 +1589,10 @@ SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in, pos_out = f_out.file->f_pos; } + ret = -EINVAL; + if (flags != 0) + goto out; + ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len, flags); if (ret > 0) { diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index abc9a85106f2..2c53fbb8d918 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -41,6 +41,13 @@ static void zonefs_account_active(struct inode *inode) return; /* + * For zones that transitioned to the offline or readonly condition, + * we only need to clear the active state. + */ + if (zi->i_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY)) + goto out; + + /* * If the zone is active, that is, if it is explicitly open or * partially written, check if it was already accounted as active. */ @@ -53,6 +60,7 @@ static void zonefs_account_active(struct inode *inode) return; } +out: /* The zone is not active. If it was, update the active count */ if (zi->i_flags & ZONEFS_ZONE_ACTIVE) { zi->i_flags &= ~ZONEFS_ZONE_ACTIVE; @@ -324,6 +332,7 @@ static loff_t zonefs_check_zone_condition(struct inode *inode, inode->i_flags |= S_IMMUTABLE; inode->i_mode &= ~0777; zone->wp = zone->start; + zi->i_flags |= ZONEFS_ZONE_OFFLINE; return 0; case BLK_ZONE_COND_READONLY: /* @@ -342,8 +351,10 @@ static loff_t zonefs_check_zone_condition(struct inode *inode, zone->cond = BLK_ZONE_COND_OFFLINE; inode->i_mode &= ~0777; zone->wp = zone->start; + zi->i_flags |= ZONEFS_ZONE_OFFLINE; return 0; } + zi->i_flags |= ZONEFS_ZONE_READONLY; inode->i_mode &= ~0222; return i_size_read(inode); case BLK_ZONE_COND_FULL: @@ -1922,18 +1933,18 @@ static int __init zonefs_init(void) if (ret) return ret; - ret = register_filesystem(&zonefs_type); + ret = zonefs_sysfs_init(); if (ret) goto destroy_inodecache; - ret = zonefs_sysfs_init(); + ret = register_filesystem(&zonefs_type); if (ret) - goto unregister_fs; + goto sysfs_exit; return 0; -unregister_fs: - unregister_filesystem(&zonefs_type); +sysfs_exit: + zonefs_sysfs_exit(); destroy_inodecache: zonefs_destroy_inodecache(); @@ -1942,9 +1953,9 @@ destroy_inodecache: static void __exit zonefs_exit(void) { + unregister_filesystem(&zonefs_type); zonefs_sysfs_exit(); zonefs_destroy_inodecache(); - unregister_filesystem(&zonefs_type); } MODULE_AUTHOR("Damien Le Moal"); diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h index 4b3de66c3233..1dbe78119ff1 100644 --- a/fs/zonefs/zonefs.h +++ b/fs/zonefs/zonefs.h @@ -39,8 +39,10 @@ static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone) return ZONEFS_ZTYPE_SEQ; } -#define ZONEFS_ZONE_OPEN (1 << 0) -#define ZONEFS_ZONE_ACTIVE (1 << 1) +#define ZONEFS_ZONE_OPEN (1U << 0) +#define ZONEFS_ZONE_ACTIVE (1U << 1) +#define ZONEFS_ZONE_OFFLINE (1U << 2) +#define ZONEFS_ZONE_READONLY (1U << 3) /* * In-memory inode data. diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 492dce43236e..cab7cfebf40b 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -222,12 +222,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table); #define tlb_needs_table_invalidate() (true) #endif +void tlb_remove_table_sync_one(void); + #else #ifdef tlb_needs_table_invalidate #error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE #endif +static inline void tlb_remove_table_sync_one(void) { } + #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ diff --git a/include/dt-bindings/memory/mediatek,mt8365-larb-port.h b/include/dt-bindings/memory/mediatek,mt8365-larb-port.h new file mode 100644 index 000000000000..56d5a5dd519e --- /dev/null +++ b/include/dt-bindings/memory/mediatek,mt8365-larb-port.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2022 MediaTek Inc. + * Author: Yong Wu <yong.wu@mediatek.com> + */ +#ifndef _DT_BINDINGS_MEMORY_MT8365_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT8365_LARB_PORT_H_ + +#include <dt-bindings/memory/mtk-memory-port.h> + +#define M4U_LARB0_ID 0 +#define M4U_LARB1_ID 1 +#define M4U_LARB2_ID 2 +#define M4U_LARB3_ID 3 + +/* larb0 */ +#define M4U_PORT_DISP_OVL0 MTK_M4U_ID(M4U_LARB0_ID, 0) +#define M4U_PORT_DISP_OVL0_2L MTK_M4U_ID(M4U_LARB0_ID, 1) +#define M4U_PORT_DISP_RDMA0 MTK_M4U_ID(M4U_LARB0_ID, 2) +#define M4U_PORT_DISP_WDMA0 MTK_M4U_ID(M4U_LARB0_ID, 3) +#define M4U_PORT_DISP_RDMA1 MTK_M4U_ID(M4U_LARB0_ID, 4) +#define M4U_PORT_MDP_RDMA0 MTK_M4U_ID(M4U_LARB0_ID, 5) +#define M4U_PORT_MDP_WROT1 MTK_M4U_ID(M4U_LARB0_ID, 6) +#define M4U_PORT_MDP_WROT0 MTK_M4U_ID(M4U_LARB0_ID, 7) +#define M4U_PORT_MDP_RDMA1 MTK_M4U_ID(M4U_LARB0_ID, 8) +#define M4U_PORT_DISP_FAKE0 MTK_M4U_ID(M4U_LARB0_ID, 9) +#define M4U_PORT_APU_READ MTK_M4U_ID(M4U_LARB0_ID, 10) +#define M4U_PORT_APU_WRITE MTK_M4U_ID(M4U_LARB0_ID, 11) + +/* larb1 */ +#define M4U_PORT_VENC_RCPU MTK_M4U_ID(M4U_LARB1_ID, 0) +#define M4U_PORT_VENC_REC MTK_M4U_ID(M4U_LARB1_ID, 1) +#define M4U_PORT_VENC_BSDMA MTK_M4U_ID(M4U_LARB1_ID, 2) +#define M4U_PORT_VENC_SV_COMV MTK_M4U_ID(M4U_LARB1_ID, 3) +#define M4U_PORT_VENC_RD_COMV MTK_M4U_ID(M4U_LARB1_ID, 4) +#define M4U_PORT_VENC_NBM_RDMA MTK_M4U_ID(M4U_LARB1_ID, 5) +#define M4U_PORT_VENC_NBM_RDMA_LITE MTK_M4U_ID(M4U_LARB1_ID, 6) +#define M4U_PORT_JPGENC_Y_RDMA MTK_M4U_ID(M4U_LARB1_ID, 7) +#define M4U_PORT_JPGENC_C_RDMA MTK_M4U_ID(M4U_LARB1_ID, 8) +#define M4U_PORT_JPGENC_Q_TABLE MTK_M4U_ID(M4U_LARB1_ID, 9) +#define M4U_PORT_JPGENC_BSDMA MTK_M4U_ID(M4U_LARB1_ID, 10) +#define M4U_PORT_JPGDEC_WDMA MTK_M4U_ID(M4U_LARB1_ID, 11) +#define M4U_PORT_JPGDEC_BSDMA MTK_M4U_ID(M4U_LARB1_ID, 12) +#define M4U_PORT_VENC_NBM_WDMA MTK_M4U_ID(M4U_LARB1_ID, 13) +#define M4U_PORT_VENC_NBM_WDMA_LITE MTK_M4U_ID(M4U_LARB1_ID, 14) +#define M4U_PORT_VENC_CUR_LUMA MTK_M4U_ID(M4U_LARB1_ID, 15) +#define M4U_PORT_VENC_CUR_CHROMA MTK_M4U_ID(M4U_LARB1_ID, 16) +#define M4U_PORT_VENC_REF_LUMA MTK_M4U_ID(M4U_LARB1_ID, 17) +#define M4U_PORT_VENC_REF_CHROMA MTK_M4U_ID(M4U_LARB1_ID, 18) + +/* larb2 */ +#define M4U_PORT_CAM_IMGO MTK_M4U_ID(M4U_LARB2_ID, 0) +#define M4U_PORT_CAM_RRZO MTK_M4U_ID(M4U_LARB2_ID, 1) +#define M4U_PORT_CAM_AAO MTK_M4U_ID(M4U_LARB2_ID, 2) +#define M4U_PORT_CAM_LCS MTK_M4U_ID(M4U_LARB2_ID, 3) +#define M4U_PORT_CAM_ESFKO MTK_M4U_ID(M4U_LARB2_ID, 4) +#define M4U_PORT_CAM_CAM_SV0 MTK_M4U_ID(M4U_LARB2_ID, 5) +#define M4U_PORT_CAM_CAM_SV1 MTK_M4U_ID(M4U_LARB2_ID, 6) +#define M4U_PORT_CAM_LSCI MTK_M4U_ID(M4U_LARB2_ID, 7) +#define M4U_PORT_CAM_LSCI_D MTK_M4U_ID(M4U_LARB2_ID, 8) +#define M4U_PORT_CAM_AFO MTK_M4U_ID(M4U_LARB2_ID, 9) +#define M4U_PORT_CAM_SPARE MTK_M4U_ID(M4U_LARB2_ID, 10) +#define M4U_PORT_CAM_BPCI MTK_M4U_ID(M4U_LARB2_ID, 11) +#define M4U_PORT_CAM_BPCI_D MTK_M4U_ID(M4U_LARB2_ID, 12) +#define M4U_PORT_CAM_UFDI MTK_M4U_ID(M4U_LARB2_ID, 13) +#define M4U_PORT_CAM_IMGI MTK_M4U_ID(M4U_LARB2_ID, 14) +#define M4U_PORT_CAM_IMG2O MTK_M4U_ID(M4U_LARB2_ID, 15) +#define M4U_PORT_CAM_IMG3O MTK_M4U_ID(M4U_LARB2_ID, 16) +#define M4U_PORT_CAM_WPE0_I MTK_M4U_ID(M4U_LARB2_ID, 17) +#define M4U_PORT_CAM_WPE1_I MTK_M4U_ID(M4U_LARB2_ID, 18) +#define M4U_PORT_CAM_WPE_O MTK_M4U_ID(M4U_LARB2_ID, 19) +#define M4U_PORT_CAM_FD0_I MTK_M4U_ID(M4U_LARB2_ID, 20) +#define M4U_PORT_CAM_FD1_I MTK_M4U_ID(M4U_LARB2_ID, 21) +#define M4U_PORT_CAM_FD0_O MTK_M4U_ID(M4U_LARB2_ID, 22) +#define M4U_PORT_CAM_FD1_O MTK_M4U_ID(M4U_LARB2_ID, 23) + +/* larb3 */ +#define M4U_PORT_HW_VDEC_MC_EXT MTK_M4U_ID(M4U_LARB3_ID, 0) +#define M4U_PORT_HW_VDEC_UFO_EXT MTK_M4U_ID(M4U_LARB3_ID, 1) +#define M4U_PORT_HW_VDEC_PP_EXT MTK_M4U_ID(M4U_LARB3_ID, 2) +#define M4U_PORT_HW_VDEC_PRED_RD_EXT MTK_M4U_ID(M4U_LARB3_ID, 3) +#define M4U_PORT_HW_VDEC_PRED_WR_EXT MTK_M4U_ID(M4U_LARB3_ID, 4) +#define M4U_PORT_HW_VDEC_PPWRAP_EXT MTK_M4U_ID(M4U_LARB3_ID, 5) +#define M4U_PORT_HW_VDEC_TILE_EXT MTK_M4U_ID(M4U_LARB3_ID, 6) +#define M4U_PORT_HW_VDEC_VLD_EXT MTK_M4U_ID(M4U_LARB3_ID, 7) +#define M4U_PORT_HW_VDEC_VLD2_EXT MTK_M4U_ID(M4U_LARB3_ID, 8) +#define M4U_PORT_HW_VDEC_AVC_MV_EXT MTK_M4U_ID(M4U_LARB3_ID, 9) +#define M4U_PORT_HW_VDEC_RG_CTRL_DMA_EXT MTK_M4U_ID(M4U_LARB3_ID, 10) + +#endif diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h index 5755ae5a4712..6a869682c120 100644 --- a/include/linux/can/platform/sja1000.h +++ b/include/linux/can/platform/sja1000.h @@ -14,7 +14,7 @@ #define OCR_MODE_TEST 0x01 #define OCR_MODE_NORMAL 0x02 #define OCR_MODE_CLOCK 0x03 -#define OCR_MODE_MASK 0x07 +#define OCR_MODE_MASK 0x03 #define OCR_TX0_INVERT 0x04 #define OCR_TX0_PULLDOWN 0x08 #define OCR_TX0_PULLUP 0x10 diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 528bd44b59e2..2b7d077de7ef 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -68,6 +68,7 @@ struct css_task_iter { struct list_head iters_node; /* css_set->task_iters */ }; +extern struct file_system_type cgroup_fs_type; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 9f6e25467844..444236dadcf0 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -20,7 +20,6 @@ struct fault_attr { atomic_t space; unsigned long verbose; bool task_filter; - bool no_warn; unsigned long stacktrace_depth; unsigned long require_start; unsigned long require_end; @@ -32,6 +31,10 @@ struct fault_attr { struct dentry *dname; }; +enum fault_flags { + FAULT_NOWARN = 1 << 0, +}; + #define FAULT_ATTR_INITIALIZER { \ .interval = 1, \ .times = ATOMIC_INIT(1), \ @@ -40,11 +43,11 @@ struct fault_attr { .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \ .verbose = 2, \ .dname = NULL, \ - .no_warn = false, \ } #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER int setup_fault_attr(struct fault_attr *attr, char *str); +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags); bool should_fail(struct fault_attr *attr, ssize_t size); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/include/linux/fs.h b/include/linux/fs.h index e654435f1651..59ae95ddb679 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2089,6 +2089,14 @@ struct dir_context { */ #define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN) +/* + * These flags control the behavior of vfs_copy_file_range(). + * They are not available to the user via syscall. + * + * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops + */ +#define COPY_FILE_SPLICE (1 << 0) + struct iov_iter; struct io_uring_cmd; diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 36e5dd84cf59..8e312c8323a8 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -75,7 +75,7 @@ struct fscache_volume { atomic_t n_accesses; /* Number of cache accesses in progress */ unsigned int debug_id; unsigned int key_hash; /* Hash of key string */ - char *key; /* Volume ID, eg. "afs@example.com@1234" */ + u8 *key; /* Volume ID, eg. "afs@example.com@1234" */ struct list_head proc_link; /* Link in /proc/fs/fscache/volumes */ struct hlist_bl_node hash_link; /* Link in hash table */ struct work_struct work; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index ef4aea3b356e..65a78773dcca 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -210,6 +210,20 @@ alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct p return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array); } +static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask) +{ + gfp_t warn_gfp = gfp_mask & (__GFP_THISNODE|__GFP_NOWARN); + + if (warn_gfp != (__GFP_THISNODE|__GFP_NOWARN)) + return; + + if (node_online(this_node)) + return; + + pr_warn("%pGg allocation from offline node %d\n", &gfp_mask, this_node); + dump_stack(); +} + /* * Allocate pages, preferring the node given as nid. The node must be valid and * online. For more general interface, see alloc_pages_node(). @@ -218,7 +232,7 @@ static inline struct page * __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid)); + warn_if_node_offline(nid, gfp_mask); return __alloc_pages(gfp_mask, order, nid, NULL); } @@ -227,7 +241,7 @@ static inline struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid)); + warn_if_node_offline(nid, gfp); return __folio_alloc(gfp, order, nid, NULL); } diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 207ef06ba3e1..f9a0d44f6fdb 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -13,17 +13,4 @@ #define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20) #define PRQ_DEPTH ((0x1000 << PRQ_ORDER) >> 5) -/* - * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only - * for access to kernel addresses. No IOTLB flushes are automatically done - * for kernel mappings; it is valid only for access to the kernel's static - * 1:1 mapping of physical memory — not to vmalloc or even module mappings. - * A future API addition may permit the use of such ranges, by means of an - * explicit IOTLB flush call (akin to the DMA API's unmap method). - * - * It is unlikely that we will ever hook into flush_tlb_kernel_range() to - * do such IOTLB flushes automatically. - */ -#define SVM_FLAG_SUPERVISOR_MODE BIT(0) - #endif /* __INTEL_SVM_H__ */ diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 1f068dfdb140..1b7a44b35616 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -150,9 +150,7 @@ struct io_pgtable_cfg { /** * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers. * - * @map: Map a physically contiguous memory region. * @map_pages: Map a physically contiguous range of pages of the same size. - * @unmap: Unmap a physically contiguous memory region. * @unmap_pages: Unmap a range of virtually contiguous pages of the same size. * @iova_to_phys: Translate iova to physical address. * @@ -160,13 +158,9 @@ struct io_pgtable_cfg { * the same names. */ struct io_pgtable_ops { - int (*map)(struct io_pgtable_ops *ops, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp); int (*map_pages)(struct io_pgtable_ops *ops, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped); - size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather); size_t (*unmap_pages)(struct io_pgtable_ops *ops, unsigned long iova, size_t pgsize, size_t pgcount, struct iommu_iotlb_gather *gather); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 3c9da1f8979e..68d7d304cdb7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -64,6 +64,8 @@ struct iommu_domain_geometry { #define __IOMMU_DOMAIN_PT (1U << 2) /* Domain is identity mapped */ #define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses flush queue */ +#define __IOMMU_DOMAIN_SVA (1U << 4) /* Shared process address space */ + /* * This are the possible domain-types * @@ -77,6 +79,8 @@ struct iommu_domain_geometry { * certain optimizations for these domains * IOMMU_DOMAIN_DMA_FQ - As above, but definitely using batched TLB * invalidation. + * IOMMU_DOMAIN_SVA - DMA addresses are shared process addresses + * represented by mm_struct's. */ #define IOMMU_DOMAIN_BLOCKED (0U) #define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT) @@ -86,15 +90,27 @@ struct iommu_domain_geometry { #define IOMMU_DOMAIN_DMA_FQ (__IOMMU_DOMAIN_PAGING | \ __IOMMU_DOMAIN_DMA_API | \ __IOMMU_DOMAIN_DMA_FQ) +#define IOMMU_DOMAIN_SVA (__IOMMU_DOMAIN_SVA) struct iommu_domain { unsigned type; const struct iommu_domain_ops *ops; unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ - iommu_fault_handler_t handler; - void *handler_token; struct iommu_domain_geometry geometry; struct iommu_dma_cookie *iova_cookie; + enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault, + void *data); + void *fault_data; + union { + struct { + iommu_fault_handler_t handler; + void *handler_token; + }; + struct { /* IOMMU_DOMAIN_SVA */ + struct mm_struct *mm; + int users; + }; + }; }; static inline bool iommu_is_dma_domain(struct iommu_domain *domain) @@ -214,15 +230,15 @@ struct iommu_iotlb_gather { * driver init to device driver init (default no) * @dev_enable/disable_feat: per device entries to enable/disable * iommu specific features. - * @sva_bind: Bind process address space to device - * @sva_unbind: Unbind process address space from device - * @sva_get_pasid: Get PASID associated to a SVA handle * @page_response: handle page request response * @def_domain_type: device default domain type, return value: * - IOMMU_DOMAIN_IDENTITY: must use an identity domain * - IOMMU_DOMAIN_DMA: must use a dma domain * - 0: use the default setting * @default_domain_ops: the default ops for domains + * @remove_dev_pasid: Remove any translation configurations of a specific + * pasid, so that any DMA transactions with this pasid + * will be blocked by the hardware. * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops */ @@ -247,16 +263,12 @@ struct iommu_ops { int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); - struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm, - void *drvdata); - void (*sva_unbind)(struct iommu_sva *handle); - u32 (*sva_get_pasid)(struct iommu_sva *handle); - int (*page_response)(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); int (*def_domain_type)(struct device *dev); + void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid); const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; @@ -266,7 +278,20 @@ struct iommu_ops { /** * struct iommu_domain_ops - domain specific operations * @attach_dev: attach an iommu domain to a device + * Return: + * * 0 - success + * * EINVAL - can indicate that device and domain are incompatible due to + * some previous configuration of the domain, in which case the + * driver shouldn't log an error, since it is legitimate for a + * caller to test reuse of existing domains. Otherwise, it may + * still represent some other fundamental problem + * * ENOMEM - out of memory + * * ENOSPC - non-ENOMEM type of resource allocation failures + * * EBUSY - device is attached to a domain and cannot be changed + * * ENODEV - device specific errors, not able to be attached + * * <others> - treated as ENODEV by the caller. Use is discouraged * @detach_dev: detach an iommu domain from a device + * @set_dev_pasid: set an iommu domain to a pasid of device * @map: map a physically contiguous memory region to an iommu domain * @map_pages: map a physically contiguous set of pages of the same size to * an iommu domain. @@ -287,6 +312,8 @@ struct iommu_ops { struct iommu_domain_ops { int (*attach_dev)(struct iommu_domain *domain, struct device *dev); void (*detach_dev)(struct iommu_domain *domain, struct device *dev); + int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev, + ioasid_t pasid); int (*map)(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp); @@ -322,12 +349,14 @@ struct iommu_domain_ops { * @list: Used by the iommu-core to keep a list of registered iommus * @ops: iommu-ops for talking to this iommu * @dev: struct device for sysfs handling + * @max_pasids: number of supported PASIDs */ struct iommu_device { struct list_head list; const struct iommu_ops *ops; struct fwnode_handle *fwnode; struct device *dev; + u32 max_pasids; }; /** @@ -366,6 +395,7 @@ struct iommu_fault_param { * @fwspec: IOMMU fwspec data * @iommu_dev: IOMMU device this device is linked to * @priv: IOMMU Driver private data + * @max_pasids: number of PASIDs this device can consume * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; @@ -377,6 +407,7 @@ struct dev_iommu { struct iommu_fwspec *fwspec; struct iommu_device *iommu_dev; void *priv; + u32 max_pasids; }; int iommu_device_register(struct iommu_device *iommu, @@ -622,6 +653,7 @@ struct iommu_fwspec { */ struct iommu_sva { struct device *dev; + struct iommu_domain *domain; }; int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, @@ -663,12 +695,6 @@ void iommu_release_device(struct device *dev); int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); -struct iommu_sva *iommu_sva_bind_device(struct device *dev, - struct mm_struct *mm, - void *drvdata); -void iommu_sva_unbind_device(struct iommu_sva *handle); -u32 iommu_sva_get_pasid(struct iommu_sva *handle); - int iommu_device_use_default_domain(struct device *dev); void iommu_device_unuse_default_domain(struct device *dev); @@ -676,6 +702,15 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner); void iommu_group_release_dma_owner(struct iommu_group *group); bool iommu_group_dma_owner_claimed(struct iommu_group *group); +struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm); +int iommu_attach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid); +void iommu_detach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid); +struct iommu_domain * +iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, + unsigned int type); #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; @@ -995,21 +1030,6 @@ iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) return -ENODEV; } -static inline struct iommu_sva * -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) -{ - return NULL; -} - -static inline void iommu_sva_unbind_device(struct iommu_sva *handle) -{ -} - -static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle) -{ - return IOMMU_PASID_INVALID; -} - static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { return NULL; @@ -1038,6 +1058,30 @@ static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group) { return false; } + +static inline struct iommu_domain * +iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) +{ + return NULL; +} + +static inline int iommu_attach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ + return -ENODEV; +} + +static inline void iommu_detach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ +} + +static inline struct iommu_domain * +iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, + unsigned int type) +{ + return NULL; +} #endif /* CONFIG_IOMMU_API */ /** @@ -1099,4 +1143,26 @@ static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_m #endif /* CONFIG_IOMMU_DMA */ +#ifdef CONFIG_IOMMU_SVA +struct iommu_sva *iommu_sva_bind_device(struct device *dev, + struct mm_struct *mm); +void iommu_sva_unbind_device(struct iommu_sva *handle); +u32 iommu_sva_get_pasid(struct iommu_sva *handle); +#else +static inline struct iommu_sva * +iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) +{ + return NULL; +} + +static inline void iommu_sva_unbind_device(struct iommu_sva *handle) +{ +} + +static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle) +{ + return IOMMU_PASID_INVALID; +} +#endif /* CONFIG_IOMMU_SVA */ + #endif /* __LINUX_IOMMU_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 18592bdf4c1b..637a60607c7d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -776,6 +776,7 @@ struct kvm { struct srcu_struct srcu; struct srcu_struct irq_srcu; pid_t userspace_pid; + bool override_halt_poll_ns; unsigned int max_halt_poll_ns; u32 dirty_ring_size; bool vm_bugged; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index af2ceb4160bc..06cbad166225 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -981,6 +981,7 @@ struct mlx5_async_work { struct mlx5_async_ctx *ctx; mlx5_async_cbk_t user_callback; u16 opcode; /* cmd opcode */ + u16 op_mod; /* cmd op_mod */ void *out; /* pointer to the cmd output buffer */ }; diff --git a/include/linux/mm.h b/include/linux/mm.h index 8bbcccbc5565..974ccca609d2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1852,6 +1852,25 @@ static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodem __show_free_areas(flags, nodemask, MAX_NR_ZONES - 1); } +/* + * Parameter block passed down to zap_pte_range in exceptional cases. + */ +struct zap_details { + struct folio *single_folio; /* Locked folio to be unmapped */ + bool even_cows; /* Zap COWed private pages too? */ + zap_flags_t zap_flags; /* Extra flags for zapping */ +}; + +/* + * Whether to drop the pte markers, for example, the uffd-wp information for + * file-backed memory. This should only be specified when we will completely + * drop the page in the mm, either by truncation or unmapping of the vma. By + * default, the flag is not set. + */ +#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) +/* Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb */ +#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1)) + #ifdef CONFIG_MMU extern bool can_do_mlock(void); #else @@ -1869,6 +1888,8 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size); +void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, + unsigned long size, struct zap_details *details); void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, struct vm_area_struct *start_vma, unsigned long start, unsigned long end); @@ -3467,12 +3488,4 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start, } #endif -/* - * Whether to drop the pte markers, for example, the uffd-wp information for - * file-backed memory. This should only be specified when we will completely - * drop the page in the mm, either by truncation or unmapping of the vma. By - * default, the flag is not set. - */ -#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) - #endif /* _LINUX_MM_H */ diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 9c50bc40f8ff..6f7993803ee7 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -451,7 +451,7 @@ static inline bool mmc_ready_for_data(u32 status) #define MMC_SECURE_TRIM1_ARG 0x80000001 #define MMC_SECURE_TRIM2_ARG 0x80008000 #define MMC_SECURE_ARGS 0x80000000 -#define MMC_TRIM_ARGS 0x00008001 +#define MMC_TRIM_OR_DISCARD_ARGS 0x00008003 #define mmc_driver_type_mask(n) (1 << (n)) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index a108b60a6962..5f0d7d0b9471 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -165,6 +165,13 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr) return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr); } +#ifndef pmd_young +static inline int pmd_young(pmd_t pmd) +{ + return 0; +} +#endif + #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, @@ -260,6 +267,17 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef arch_has_hw_nonleaf_pmd_young +/* + * Return whether the accessed bit in non-leaf PMD entries is supported on the + * local CPU. + */ +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG); +} +#endif + #ifndef arch_has_hw_pte_young /* * Return whether the accessed bit is supported on the local CPU. diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 86b95ccb81bb..b07b277d6a16 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -33,11 +33,13 @@ * can use the extra bits to store other information besides PFN. */ #ifdef MAX_PHYSMEM_BITS -#define SWP_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) +#define SWP_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) #else /* MAX_PHYSMEM_BITS */ -#define SWP_PFN_BITS (BITS_PER_LONG - PAGE_SHIFT) +#define SWP_PFN_BITS min_t(int, \ + sizeof(phys_addr_t) * 8 - PAGE_SHIFT, \ + SWP_TYPE_SHIFT) #endif /* MAX_PHYSMEM_BITS */ -#define SWP_PFN_MASK (BIT(SWP_PFN_BITS) - 1) +#define SWP_PFN_MASK (BIT(SWP_PFN_BITS) - 1) /** * Migration swap entry specific bitfield definitions. Layout: diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index e004ba04a9ae..684f1cd28730 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -228,6 +228,17 @@ enum { */ HCI_QUIRK_VALID_LE_STATES, + /* When this quirk is set, then erroneous data reporting + * is ignored. This is mainly due to the fact that the HCI + * Read Default Erroneous Data Reporting command is advertised, + * but not supported; these controllers often reply with unknown + * command and tend to lock up randomly. Needing a hard reset. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, + /* * When this quirk is set, then the hci_suspend_notifier is not * registered. This is intended for devices which drop completely @@ -1424,7 +1435,6 @@ struct hci_std_codecs_v2 { } __packed; struct hci_vnd_codec_v2 { - __u8 id; __le16 cid; __le16 vid; __u8 transport; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 3af1e927247d..69174093078f 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -281,7 +281,8 @@ inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, in * sk_v6_rcv_saddr (ipv6) changes after it has been binded. The socket's * rcv_saddr field should already have been updated when this is called. */ -int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk); +int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family); +void inet_bhash2_reset_saddr(struct sock *sk); void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, struct inet_bind2_bucket *tb2, unsigned short port); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 20745cf7ae1a..2f2a6023fb0e 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -83,7 +83,7 @@ struct neigh_parms { struct rcu_head rcu_head; int reachable_time; - int qlen; + u32 qlen; int data[NEIGH_VAR_DATA_MAX]; DECLARE_BITMAP(data_state, NEIGH_VAR_DATA_MAX); }; diff --git a/include/net/ping.h b/include/net/ping.h index e4ff3911cbf5..9233ad3de0ad 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -16,9 +16,6 @@ #define PING_HTABLE_SIZE 64 #define PING_HTABLE_MASK (PING_HTABLE_SIZE-1) -#define ping_portaddr_for_each_entry(__sk, node, list) \ - hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) - /* * gid_t is either uint or ushort. We want to pass it to * proc_dointvec_minmax(), so it must not be larger than MAX_INT diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h index 01a70b27e026..65058faea4db 100644 --- a/include/net/sctp/stream_sched.h +++ b/include/net/sctp/stream_sched.h @@ -26,6 +26,8 @@ struct sctp_sched_ops { int (*init)(struct sctp_stream *stream); /* Init a stream */ int (*init_sid)(struct sctp_stream *stream, __u16 sid, gfp_t gfp); + /* free a stream */ + void (*free_sid)(struct sctp_stream *stream, __u16 sid); /* Frees the entire thing */ void (*free)(struct sctp_stream *stream); diff --git a/include/sound/sof/dai.h b/include/sound/sof/dai.h index 83fd81c82e4c..9fbd3832bcdc 100644 --- a/include/sound/sof/dai.h +++ b/include/sound/sof/dai.h @@ -84,8 +84,8 @@ enum sof_ipc_dai_type { SOF_DAI_AMD_BT, /**< AMD ACP BT*/ SOF_DAI_AMD_SP, /**< AMD ACP SP */ SOF_DAI_AMD_DMIC, /**< AMD ACP DMIC */ - SOF_DAI_AMD_HS, /**< Amd HS */ SOF_DAI_MEDIATEK_AFE, /**< Mediatek AFE */ + SOF_DAI_AMD_HS, /**< Amd HS */ }; /* general purpose DAI configuration */ diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index c078c48a8e6d..a6190aa1b406 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -66,6 +66,7 @@ enum fscache_cookie_trace { fscache_cookie_put_work, fscache_cookie_see_active, fscache_cookie_see_lru_discard, + fscache_cookie_see_lru_discard_clear, fscache_cookie_see_lru_do_one, fscache_cookie_see_relinquish, fscache_cookie_see_withdraw, @@ -149,6 +150,7 @@ enum fscache_access_trace { EM(fscache_cookie_put_work, "PQ work ") \ EM(fscache_cookie_see_active, "- activ") \ EM(fscache_cookie_see_lru_discard, "- x-lru") \ + EM(fscache_cookie_see_lru_discard_clear,"- lrudc") \ EM(fscache_cookie_see_lru_do_one, "- lrudo") \ EM(fscache_cookie_see_relinquish, "- x-rlq") \ EM(fscache_cookie_see_withdraw, "- x-wth") \ diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index 935af4947917..760455dfa860 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -171,15 +171,15 @@ TRACE_EVENT(mm_collapse_huge_page_swapin, TRACE_EVENT(mm_khugepaged_scan_file, - TP_PROTO(struct mm_struct *mm, struct page *page, const char *filename, + TP_PROTO(struct mm_struct *mm, struct page *page, struct file *file, int present, int swap, int result), - TP_ARGS(mm, page, filename, present, swap, result), + TP_ARGS(mm, page, file, present, swap, result), TP_STRUCT__entry( __field(struct mm_struct *, mm) __field(unsigned long, pfn) - __string(filename, filename) + __string(filename, file->f_path.dentry->d_iname) __field(int, present) __field(int, swap) __field(int, result) @@ -188,7 +188,7 @@ TRACE_EVENT(mm_khugepaged_scan_file, TP_fast_assign( __entry->mm = mm; __entry->pfn = page ? page_to_pfn(page) : -1; - __assign_str(filename, filename); + __assign_str(filename, file->f_path.dentry->d_iname); __entry->present = present; __entry->swap = swap; __entry->result = result; diff --git a/init/Kconfig b/init/Kconfig index abf65098f1b6..94125d3b6893 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -87,7 +87,7 @@ config CC_HAS_ASM_GOTO_OUTPUT config CC_HAS_ASM_GOTO_TIED_OUTPUT depends on CC_HAS_ASM_GOTO_OUTPUT # Detect buggy gcc and clang, fixed in gcc-11 clang-14. - def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .\n": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) + def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) config TOOLS_SUPPORT_RELR def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh) diff --git a/io_uring/filetable.c b/io_uring/filetable.c index 7b473259f3f4..68dfc6936aa7 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -101,8 +101,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, err: if (needs_switch) io_rsrc_node_switch(ctx, ctx->file_data); - if (ret) - fput(file); return ret; } diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 8840cf3e20f2..61cd7ffd0f6a 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2707,8 +2707,10 @@ static __cold void io_tctx_exit_cb(struct callback_head *cb) /* * When @in_idle, we're in cancellation and it's racy to remove the * node. It'll be removed by the end of cancellation, just ignore it. + * tctx can be NULL if the queueing of this task_work raced with + * work cancelation off the exec path. */ - if (!atomic_read(&tctx->in_idle)) + if (tctx && !atomic_read(&tctx->in_idle)) io_uring_del_tctx_node((unsigned long)work->ctx); complete(&work->completion); } diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index cef5ff924e63..50bc3af44953 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -238,9 +238,14 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) static inline int io_run_task_work(void) { + /* + * Always check-and-clear the task_work notification signal. With how + * signaling works for task_work, we can find it set with nothing to + * run. We need to clear it for that case, like get_signal() does. + */ + if (test_thread_flag(TIF_NOTIFY_SIGNAL)) + clear_notify_signal(); if (task_work_pending(current)) { - if (test_thread_flag(TIF_NOTIFY_SIGNAL)) - clear_notify_signal(); __set_current_state(TASK_RUNNING); task_work_run(); return 1; diff --git a/io_uring/poll.c b/io_uring/poll.c index 055632e9092a..d9bf1767867e 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -40,7 +40,14 @@ struct io_poll_table { }; #define IO_POLL_CANCEL_FLAG BIT(31) -#define IO_POLL_REF_MASK GENMASK(30, 0) +#define IO_POLL_RETRY_FLAG BIT(30) +#define IO_POLL_REF_MASK GENMASK(29, 0) + +/* + * We usually have 1-2 refs taken, 128 is more than enough and we want to + * maximise the margin between this amount and the moment when it overflows. + */ +#define IO_POLL_REF_BIAS 128 #define IO_WQE_F_DOUBLE 1 @@ -58,6 +65,21 @@ static inline bool wqe_is_double(struct wait_queue_entry *wqe) return priv & IO_WQE_F_DOUBLE; } +static bool io_poll_get_ownership_slowpath(struct io_kiocb *req) +{ + int v; + + /* + * poll_refs are already elevated and we don't have much hope for + * grabbing the ownership. Instead of incrementing set a retry flag + * to notify the loop that there might have been some change. + */ + v = atomic_fetch_or(IO_POLL_RETRY_FLAG, &req->poll_refs); + if (v & IO_POLL_REF_MASK) + return false; + return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); +} + /* * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can * bump it and acquire ownership. It's disallowed to modify requests while not @@ -66,6 +88,8 @@ static inline bool wqe_is_double(struct wait_queue_entry *wqe) */ static inline bool io_poll_get_ownership(struct io_kiocb *req) { + if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS)) + return io_poll_get_ownership_slowpath(req); return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); } @@ -235,6 +259,16 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) */ if ((v & IO_POLL_REF_MASK) != 1) req->cqe.res = 0; + if (v & IO_POLL_RETRY_FLAG) { + req->cqe.res = 0; + /* + * We won't find new events that came in between + * vfs_poll and the ref put unless we clear the flag + * in advance. + */ + atomic_andnot(IO_POLL_RETRY_FLAG, &req->poll_refs); + v &= ~IO_POLL_RETRY_FLAG; + } /* the mask was stashed in __io_poll_execute */ if (!req->cqe.res) { @@ -274,7 +308,8 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) * Release all references, retry if someone tried to restart * task_work while we were executing it. */ - } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs)); + } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs) & + IO_POLL_REF_MASK); return IOU_POLL_NO_ACTION; } @@ -518,7 +553,6 @@ static int __io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) { struct io_ring_ctx *ctx = req->ctx; - int v; INIT_HLIST_NODE(&req->hash_node); req->work.cancel_seq = atomic_read(&ctx->cancel_seq); @@ -586,11 +620,10 @@ static int __io_arm_poll_handler(struct io_kiocb *req, if (ipt->owning) { /* - * Release ownership. If someone tried to queue a tw while it was - * locked, kick it off for them. + * Try to release ownership. If we see a change of state, e.g. + * poll was waken up, queue up a tw, it'll deal with it. */ - v = atomic_dec_return(&req->poll_refs); - if (unlikely(v & IO_POLL_REF_MASK)) + if (atomic_cmpxchg(&req->poll_refs, 1, 0) != 1) __io_poll_execute(req, 0); } return 0; diff --git a/ipc/sem.c b/ipc/sem.c index c8496f98b139..00f88aa01ac5 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2179,14 +2179,15 @@ long __do_semtimedop(int semid, struct sembuf *sops, * scenarios where we were awakened externally, during the * window between wake_q_add() and wake_up_q(). */ + rcu_read_lock(); error = READ_ONCE(queue.status); if (error != -EINTR) { /* see SEM_BARRIER_2 for purpose/pairing */ smp_acquire__after_ctrl_dep(); + rcu_read_unlock(); goto out; } - rcu_read_lock(); locknum = sem_lock(sma, sops, nsops); if (!ipc_valid_object(&sma->sem_perm)) diff --git a/ipc/shm.c b/ipc/shm.c index 7d86f058fb86..bd2fcc4d454e 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -275,10 +275,8 @@ static inline void shm_rmid(struct shmid_kernel *s) } -static int __shm_open(struct vm_area_struct *vma) +static int __shm_open(struct shm_file_data *sfd) { - struct file *file = vma->vm_file; - struct shm_file_data *sfd = shm_file_data(file); struct shmid_kernel *shp; shp = shm_lock(sfd->ns, sfd->id); @@ -302,7 +300,15 @@ static int __shm_open(struct vm_area_struct *vma) /* This is called by fork, once for every shm attach. */ static void shm_open(struct vm_area_struct *vma) { - int err = __shm_open(vma); + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + int err; + + /* Always call underlying open if present */ + if (sfd->vm_ops->open) + sfd->vm_ops->open(vma); + + err = __shm_open(sfd); /* * We raced in the idr lookup or with shm_destroy(). * Either way, the ID is busted. @@ -359,10 +365,8 @@ static bool shm_may_destroy(struct shmid_kernel *shp) * The descriptor has already been removed from the current->mm->mmap list * and will later be kfree()d. */ -static void shm_close(struct vm_area_struct *vma) +static void __shm_close(struct shm_file_data *sfd) { - struct file *file = vma->vm_file; - struct shm_file_data *sfd = shm_file_data(file); struct shmid_kernel *shp; struct ipc_namespace *ns = sfd->ns; @@ -388,6 +392,18 @@ done: up_write(&shm_ids(ns).rwsem); } +static void shm_close(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + + /* Always call underlying close if present */ + if (sfd->vm_ops->close) + sfd->vm_ops->close(vma); + + __shm_close(sfd); +} + /* Called with ns->shm_ids(ns).rwsem locked */ static int shm_try_destroy_orphaned(int id, void *p, void *data) { @@ -583,13 +599,13 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma) * IPC ID that was removed, and possibly even reused by another shm * segment already. Propagate this case as an error to caller. */ - ret = __shm_open(vma); + ret = __shm_open(sfd); if (ret) return ret; ret = call_mmap(sfd->file, vma); if (ret) { - shm_close(vma); + __shm_close(sfd); return ret; } sfd->vm_ops = vma->vm_ops; diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 802fc15b0d73..f27fa5ba7d72 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -74,7 +74,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, gfp_flags | __GFP_NOWARN); if (selem) { if (value) - memcpy(SDATA(selem)->data, value, smap->map.value_size); + copy_map_value(&smap->map, SDATA(selem)->data, value); return selem; } diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index fd4020835ec6..367b0a42ada9 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -167,7 +167,6 @@ struct cgroup_mgctx { extern spinlock_t css_set_lock; extern struct cgroup_subsys *cgroup_subsys[]; extern struct list_head cgroup_roots; -extern struct file_system_type cgroup_fs_type; /* iterate across the hierarchies */ #define for_each_root(root) \ diff --git a/kernel/events/core.c b/kernel/events/core.c index 884871427a94..7f04f995c975 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2291,6 +2291,7 @@ event_sched_out(struct perf_event *event, !event->pending_work) { event->pending_work = 1; dec = false; + WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); task_work_add(current, &event->pending_task, TWA_RESUME); } if (dec) @@ -2336,6 +2337,7 @@ group_sched_out(struct perf_event *group_event, #define DETACH_GROUP 0x01UL #define DETACH_CHILD 0x02UL +#define DETACH_DEAD 0x04UL /* * Cross CPU call to remove a performance event @@ -2356,12 +2358,20 @@ __perf_remove_from_context(struct perf_event *event, update_cgrp_time_from_cpuctx(cpuctx, false); } + /* + * Ensure event_sched_out() switches to OFF, at the very least + * this avoids raising perf_pending_task() at this time. + */ + if (flags & DETACH_DEAD) + event->pending_disable = 1; event_sched_out(event, cpuctx, ctx); if (flags & DETACH_GROUP) perf_group_detach(event); if (flags & DETACH_CHILD) perf_child_detach(event); list_del_event(event, ctx); + if (flags & DETACH_DEAD) + event->state = PERF_EVENT_STATE_DEAD; if (!ctx->nr_events && ctx->is_active) { if (ctx == &cpuctx->ctx) @@ -5121,9 +5131,7 @@ int perf_event_release_kernel(struct perf_event *event) ctx = perf_event_ctx_lock(event); WARN_ON_ONCE(ctx->parent_ctx); - perf_remove_from_context(event, DETACH_GROUP); - raw_spin_lock_irq(&ctx->lock); /* * Mark this event as STATE_DEAD, there is no external reference to it * anymore. @@ -5135,8 +5143,7 @@ int perf_event_release_kernel(struct perf_event *event) * Thus this guarantees that we will in fact observe and kill _ALL_ * child events. */ - event->state = PERF_EVENT_STATE_DEAD; - raw_spin_unlock_irq(&ctx->lock); + perf_remove_from_context(event, DETACH_GROUP|DETACH_DEAD); perf_event_ctx_unlock(event, ctx); @@ -6577,6 +6584,8 @@ static void perf_pending_task(struct callback_head *head) if (rctx >= 0) perf_swevent_put_recursion_context(rctx); preempt_enable_notrace(); + + put_event(event); } #ifdef CONFIG_GUEST_PERF_EVENTS @@ -9030,7 +9039,7 @@ static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog, PERF_RECORD_KSYMBOL_TYPE_BPF, (u64)(unsigned long)subprog->bpf_func, subprog->jited_len, unregister, - prog->aux->ksym.name); + subprog->aux->ksym.name); } } } @@ -9273,6 +9282,19 @@ int perf_event_account_interrupt(struct perf_event *event) return __perf_event_account_interrupt(event, 1); } +static inline bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs) +{ + /* + * Due to interrupt latency (AKA "skid"), we may enter the + * kernel before taking an overflow, even if the PMU is only + * counting user events. + */ + if (event->attr.exclude_kernel && !user_mode(regs)) + return false; + + return true; +} + /* * Generic event overflow handling, sampling. */ @@ -9306,6 +9328,13 @@ static int __perf_event_overflow(struct perf_event *event, } if (event->attr.sigtrap) { + /* + * The desired behaviour of sigtrap vs invalid samples is a bit + * tricky; on the one hand, one should not loose the SIGTRAP if + * it is the first event, on the other hand, we should also not + * trigger the WARN or override the data address. + */ + bool valid_sample = sample_is_allowed(event, regs); unsigned int pending_id = 1; if (regs) @@ -9313,7 +9342,7 @@ static int __perf_event_overflow(struct perf_event *event, if (!event->pending_sigtrap) { event->pending_sigtrap = pending_id; local_inc(&event->ctx->nr_pending); - } else if (event->attr.exclude_kernel) { + } else if (event->attr.exclude_kernel && valid_sample) { /* * Should not be able to return to user space without * consuming pending_sigtrap; with exceptions: @@ -9328,7 +9357,10 @@ static int __perf_event_overflow(struct perf_event *event, */ WARN_ON_ONCE(event->pending_sigtrap != pending_id); } - event->pending_addr = data->addr; + + event->pending_addr = 0; + if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR)) + event->pending_addr = data->addr; irq_work_queue(&event->pending_irq); } diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c index cbb0bed958ab..7670a811a565 100644 --- a/kernel/gcov/clang.c +++ b/kernel/gcov/clang.c @@ -280,6 +280,8 @@ void gcov_info_add(struct gcov_info *dst, struct gcov_info *src) for (i = 0; i < sfn_ptr->num_counters; i++) dfn_ptr->counters[i] += sfn_ptr->counters[i]; + + sfn_ptr = list_next_entry(sfn_ptr, head); } } diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 9161d1136d01..1207c78f85c1 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -25,9 +25,6 @@ struct sugov_policy { unsigned int next_freq; unsigned int cached_raw_freq; - /* max CPU capacity, which is equal for all CPUs in freq. domain */ - unsigned long max; - /* The next fields are only needed if fast switch cannot be used: */ struct irq_work irq_work; struct kthread_work work; @@ -51,6 +48,7 @@ struct sugov_cpu { unsigned long util; unsigned long bw_dl; + unsigned long max; /* The field below is for single-CPU policies only: */ #ifdef CONFIG_NO_HZ_COMMON @@ -160,6 +158,7 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) { struct rq *rq = cpu_rq(sg_cpu->cpu); + sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu); sg_cpu->bw_dl = cpu_bw_dl(rq); sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), FREQUENCY_UTIL, NULL); @@ -254,7 +253,6 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, */ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) { - struct sugov_policy *sg_policy = sg_cpu->sg_policy; unsigned long boost; /* No boost currently required */ @@ -282,8 +280,7 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) * sg_cpu->util is already in capacity scale; convert iowait_boost * into the same scale so we can compare. */ - boost = sg_cpu->iowait_boost * sg_policy->max; - boost >>= SCHED_CAPACITY_SHIFT; + boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT; boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL); if (sg_cpu->util < boost) sg_cpu->util = boost; @@ -340,7 +337,7 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time, if (!sugov_update_single_common(sg_cpu, time, flags)) return; - next_f = get_next_freq(sg_policy, sg_cpu->util, sg_policy->max); + next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max); /* * Do not reduce the frequency if the CPU has not been idle * recently, as the reduction is likely to be premature then. @@ -376,7 +373,6 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, unsigned int flags) { struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); - struct sugov_policy *sg_policy = sg_cpu->sg_policy; unsigned long prev_util = sg_cpu->util; /* @@ -403,8 +399,7 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, sg_cpu->util = prev_util; cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl), - map_util_perf(sg_cpu->util), - sg_policy->max); + map_util_perf(sg_cpu->util), sg_cpu->max); sg_cpu->sg_policy->last_freq_update_time = time; } @@ -413,19 +408,25 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) { struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; - unsigned long util = 0; + unsigned long util = 0, max = 1; unsigned int j; for_each_cpu(j, policy->cpus) { struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); + unsigned long j_util, j_max; sugov_get_util(j_sg_cpu); sugov_iowait_apply(j_sg_cpu, time); + j_util = j_sg_cpu->util; + j_max = j_sg_cpu->max; - util = max(j_sg_cpu->util, util); + if (j_util * max > j_max * util) { + util = j_util; + max = j_max; + } } - return get_next_freq(sg_policy, util, sg_policy->max); + return get_next_freq(sg_policy, util, max); } static void @@ -751,7 +752,7 @@ static int sugov_start(struct cpufreq_policy *policy) { struct sugov_policy *sg_policy = policy->governor_data; void (*uu)(struct update_util_data *data, u64 time, unsigned int flags); - unsigned int cpu = cpumask_first(policy->cpus); + unsigned int cpu; sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; sg_policy->last_freq_update_time = 0; @@ -759,7 +760,6 @@ static int sugov_start(struct cpufreq_policy *policy) sg_policy->work_in_progress = false; sg_policy->limits_changed = false; sg_policy->cached_raw_freq = 0; - sg_policy->max = arch_scale_cpu_capacity(cpu); sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 188c305aeb8b..c6d9dec11b74 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -267,13 +267,14 @@ int proc_dostring(struct ctl_table *table, int write, ppos); } -static size_t proc_skip_spaces(char **buf) +static void proc_skip_spaces(char **buf, size_t *size) { - size_t ret; - char *tmp = skip_spaces(*buf); - ret = tmp - *buf; - *buf = tmp; - return ret; + while (*size) { + if (!isspace(**buf)) + break; + (*size)--; + (*buf)++; + } } static void proc_skip_char(char **buf, size_t *size, const char v) @@ -342,13 +343,12 @@ static int proc_get_long(char **buf, size_t *size, unsigned long *val, bool *neg, const char *perm_tr, unsigned perm_tr_len, char *tr) { - int len; char *p, tmp[TMPBUFLEN]; + ssize_t len = *size; - if (!*size) + if (len <= 0) return -EINVAL; - len = *size; if (len > TMPBUFLEN - 1) len = TMPBUFLEN - 1; @@ -521,7 +521,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, bool neg; if (write) { - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (!left) break; @@ -548,7 +548,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, if (!write && !first && left && !err) proc_put_char(&buffer, &left, '\n'); if (write && !err && left) - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (write && first) return err ? : -EINVAL; *lenp -= left; @@ -590,7 +590,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data, if (left > PAGE_SIZE - 1) left = PAGE_SIZE - 1; - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (!left) { err = -EINVAL; goto out_free; @@ -610,7 +610,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data, } if (!err && left) - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); out_free: if (err) @@ -1075,7 +1075,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, if (write) { bool neg; - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (!left) break; @@ -1104,7 +1104,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, if (!write && !first && left && !err) proc_put_char(&buffer, &left, '\n'); if (write && !err) - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (write && first) return err ? : -EINVAL; *lenp -= left; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a7fe0e115272..5cfc95a52bc3 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2180,10 +2180,12 @@ void tracing_reset_online_cpus(struct array_buffer *buf) } /* Must have trace_types_lock held */ -void tracing_reset_all_online_cpus(void) +void tracing_reset_all_online_cpus_unlocked(void) { struct trace_array *tr; + lockdep_assert_held(&trace_types_lock); + list_for_each_entry(tr, &ftrace_trace_arrays, list) { if (!tr->clear_trace) continue; @@ -2195,6 +2197,13 @@ void tracing_reset_all_online_cpus(void) } } +void tracing_reset_all_online_cpus(void) +{ + mutex_lock(&trace_types_lock); + tracing_reset_all_online_cpus_unlocked(); + mutex_unlock(&trace_types_lock); +} + /* * The tgid_map array maps from pid to tgid; i.e. the value stored at index i * is the tgid last observed corresponding to pid=i. diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 54ee5711c729..d42e24507152 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -580,6 +580,7 @@ int tracing_is_enabled(void); void tracing_reset_online_cpus(struct array_buffer *buf); void tracing_reset_current(int cpu); void tracing_reset_all_online_cpus(void); +void tracing_reset_all_online_cpus_unlocked(void); int tracing_open_generic(struct inode *inode, struct file *filp); int tracing_open_generic_tr(struct inode *inode, struct file *filp); bool tracing_is_disabled(void); diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 154996684fb5..4376887e0d8a 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -118,6 +118,7 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type if (ret) break; } + tracing_reset_all_online_cpus(); mutex_unlock(&event_mutex); out: argv_free(argv); @@ -214,6 +215,7 @@ int dyn_events_release_all(struct dyn_event_operations *type) break; } out: + tracing_reset_all_online_cpus(); mutex_unlock(&event_mutex); return ret; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0356cae0cf74..f71ea6e79b3c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2880,7 +2880,10 @@ static int probe_remove_event_call(struct trace_event_call *call) * TRACE_REG_UNREGISTER. */ if (file->flags & EVENT_FILE_FL_ENABLED) - return -EBUSY; + goto busy; + + if (file->flags & EVENT_FILE_FL_WAS_ENABLED) + tr->clear_trace = true; /* * The do_for_each_event_file_safe() is * a double loop. After finding the call for this @@ -2893,6 +2896,12 @@ static int probe_remove_event_call(struct trace_event_call *call) __trace_remove_event_call(call); return 0; + busy: + /* No need to clear the trace now */ + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + tr->clear_trace = false; + } + return -EBUSY; } /* Remove an event_call */ @@ -2972,7 +2981,7 @@ static void trace_module_remove_events(struct module *mod) * over from this module may be passed to the new module events and * unexpected results may occur. */ - tracing_reset_all_online_cpus(); + tracing_reset_all_online_cpus_unlocked(); } static int trace_module_notify(struct notifier_block *self, diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 48465f7e97b4..1c82478e8dff 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -983,7 +983,7 @@ static struct hist_field *find_any_var_ref(struct hist_trigger_data *hist_data, * A trigger can define one or more variables. If any one of them is * currently referenced by any other trigger, this function will * determine that. - + * * Typically used to determine whether or not a trigger can be removed * - if there are any references to a trigger's variables, it cannot. * @@ -3226,7 +3226,7 @@ static struct field_var *create_field_var(struct hist_trigger_data *hist_data, * events. However, for convenience, users are allowed to directly * specify an event field in an action, which will be automatically * converted into a variable on their behalf. - + * * This function creates a field variable with the name var_name on * the hist trigger currently being defined on the target event. If * subsys_name and event_name are specified, this function simply @@ -5143,6 +5143,9 @@ static void event_hist_trigger(struct event_trigger_data *data, void *key = NULL; unsigned int i; + if (unlikely(!rbe)) + return; + memset(compound_key, 0, hist_data->key_size); for_each_hist_key_field(i, hist_data) { diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 29fbfb27c2b2..c3b582d19b62 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -1425,7 +1425,6 @@ int synth_event_delete(const char *event_name) mutex_unlock(&event_mutex); if (mod) { - mutex_lock(&trace_types_lock); /* * It is safest to reset the ring buffer if the module * being unloaded registered any events that were @@ -1437,7 +1436,6 @@ int synth_event_delete(const char *event_name) * occur. */ tracing_reset_all_online_cpus(); - mutex_unlock(&trace_types_lock); } return ret; diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index ae78c2d53c8a..539b08ae7020 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -1100,8 +1100,10 @@ static int user_event_create(const char *raw_command) group = current_user_event_group(); - if (!group) + if (!group) { + kfree(name); return -ENOENT; + } mutex_lock(&group->reg_mutex); diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 78d536d3ff3d..4300c5dc4e5d 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -917,7 +917,7 @@ void osnoise_trace_irq_entry(int id) void osnoise_trace_irq_exit(int id, const char *desc) { struct osnoise_variables *osn_var = this_cpu_osn_var(); - int duration; + s64 duration; if (!osn_var->sampling) return; @@ -1048,7 +1048,7 @@ static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) { struct osnoise_variables *osn_var = this_cpu_osn_var(); - int duration; + s64 duration; if (!osn_var->sampling) return; @@ -1144,7 +1144,7 @@ thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) static void thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) { - int duration; + s64 duration; if (!osn_var->sampling) return; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c3c0b077ade3..3638b3424be5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -399,6 +399,7 @@ config FRAME_WARN default 2048 if GCC_PLUGIN_LATENT_ENTROPY default 2048 if PARISC default 1536 if (!64BIT && XTENSA) + default 1280 if KASAN && !64BIT default 1024 if !64BIT default 2048 if 64BIT help @@ -1874,8 +1875,14 @@ config NETDEV_NOTIFIER_ERROR_INJECT If unsure, say N. config FUNCTION_ERROR_INJECTION - def_bool y + bool "Fault-injections of functions" depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES + help + Add fault injections into various functions that are annotated with + ALLOW_ERROR_INJECTION() in the kernel. BPF may also modify the return + value of theses functions. This is useful to test error paths of code. + + If unsure, say N config FAULT_INJECTION bool "Fault-injection framework" @@ -2107,6 +2114,7 @@ config KPROBES_SANITY_TEST depends on DEBUG_KERNEL depends on KPROBES depends on KUNIT + select STACKTRACE if ARCH_CORRECT_STACKTRACE_ON_KRETPROBE default KUNIT_ALL_TESTS help This option provides for testing basic kprobes functionality on diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 96e092de5b72..adb2f9355ee6 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr); static void fail_dump(struct fault_attr *attr) { - if (attr->no_warn) - return; - if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) { printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n" "name %pd, interval %lu, probability %lu, " @@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr) * http://www.nongnu.org/failmalloc/ */ -bool should_fail(struct fault_attr *attr, ssize_t size) +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) { if (in_task()) { unsigned int fail_nth = READ_ONCE(current->fail_nth); @@ -146,13 +143,19 @@ bool should_fail(struct fault_attr *attr, ssize_t size) return false; fail: - fail_dump(attr); + if (!(flags & FAULT_NOWARN)) + fail_dump(attr); if (atomic_read(&attr->times) != -1) atomic_dec_not_zero(&attr->times); return true; } + +bool should_fail(struct fault_attr *attr, ssize_t size) +{ + return should_fail_ex(attr, size, 0); +} EXPORT_SYMBOL_GPL(should_fail); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile index c415a685d61b..e814061d6aa0 100644 --- a/lib/vdso/Makefile +++ b/lib/vdso/Makefile @@ -17,6 +17,6 @@ $(error ARCH_REL_TYPE_ABS is not set) endif quiet_cmd_vdso_check = VDSOCHK $@ - cmd_vdso_check = if $(OBJDUMP) -R $@ | egrep -h "$(ARCH_REL_TYPE_ABS)"; \ + cmd_vdso_check = if $(OBJDUMP) -R $@ | grep -E -h "$(ARCH_REL_TYPE_ABS)"; \ then (echo >&2 "$@: dynamic relocations are not supported"; \ rm -f $@; /bin/false); fi diff --git a/mm/compaction.c b/mm/compaction.c index c51f7f545afe..1f6da31dd9a5 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -985,28 +985,28 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, } /* + * Be careful not to clear PageLRU until after we're + * sure the page is not being freed elsewhere -- the + * page release code relies on it. + */ + if (unlikely(!get_page_unless_zero(page))) + goto isolate_fail; + + /* * Migration will fail if an anonymous page is pinned in memory, * so avoid taking lru_lock and isolating it unnecessarily in an * admittedly racy check. */ mapping = page_mapping(page); - if (!mapping && page_count(page) > page_mapcount(page)) - goto isolate_fail; + if (!mapping && (page_count(page) - 1) > total_mapcount(page)) + goto isolate_fail_put; /* * Only allow to migrate anonymous pages in GFP_NOFS context * because those do not depend on fs locks. */ if (!(cc->gfp_mask & __GFP_FS) && mapping) - goto isolate_fail; - - /* - * Be careful not to clear PageLRU until after we're - * sure the page is not being freed elsewhere -- the - * page release code relies on it. - */ - if (unlikely(!get_page_unless_zero(page))) - goto isolate_fail; + goto isolate_fail_put; /* Only take pages on LRU: a check now makes later tests safe */ if (!PageLRU(page)) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 9f1219a67e3f..07e5f1bdf025 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -2283,12 +2283,54 @@ static struct damos *damon_sysfs_mk_scheme( &wmarks); } +static void damon_sysfs_update_scheme(struct damos *scheme, + struct damon_sysfs_scheme *sysfs_scheme) +{ + struct damon_sysfs_access_pattern *access_pattern = + sysfs_scheme->access_pattern; + struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas; + struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights; + struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks; + + scheme->pattern.min_sz_region = access_pattern->sz->min; + scheme->pattern.max_sz_region = access_pattern->sz->max; + scheme->pattern.min_nr_accesses = access_pattern->nr_accesses->min; + scheme->pattern.max_nr_accesses = access_pattern->nr_accesses->max; + scheme->pattern.min_age_region = access_pattern->age->min; + scheme->pattern.max_age_region = access_pattern->age->max; + + scheme->action = sysfs_scheme->action; + + scheme->quota.ms = sysfs_quotas->ms; + scheme->quota.sz = sysfs_quotas->sz; + scheme->quota.reset_interval = sysfs_quotas->reset_interval_ms; + scheme->quota.weight_sz = sysfs_weights->sz; + scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses; + scheme->quota.weight_age = sysfs_weights->age; + + scheme->wmarks.metric = sysfs_wmarks->metric; + scheme->wmarks.interval = sysfs_wmarks->interval_us; + scheme->wmarks.high = sysfs_wmarks->high; + scheme->wmarks.mid = sysfs_wmarks->mid; + scheme->wmarks.low = sysfs_wmarks->low; +} + static int damon_sysfs_set_schemes(struct damon_ctx *ctx, struct damon_sysfs_schemes *sysfs_schemes) { - int i; + struct damos *scheme, *next; + int i = 0; - for (i = 0; i < sysfs_schemes->nr; i++) { + damon_for_each_scheme_safe(scheme, next, ctx) { + if (i < sysfs_schemes->nr) + damon_sysfs_update_scheme(scheme, + sysfs_schemes->schemes_arr[i]); + else + damon_destroy_scheme(scheme); + i++; + } + + for (; i < sysfs_schemes->nr; i++) { struct damos *scheme, *next; scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]); @@ -2339,6 +2381,10 @@ static int damon_sysfs_upd_schemes_stats(struct damon_sysfs_kdamond *kdamond) damon_for_each_scheme(scheme, ctx) { struct damon_sysfs_stats *sysfs_stats; + /* user could have removed the scheme sysfs dir */ + if (schemes_idx >= sysfs_schemes->nr) + break; + sysfs_stats = sysfs_schemes->schemes_arr[schemes_idx++]->stats; sysfs_stats->nr_tried = scheme->stat.nr_tried; sysfs_stats->sz_tried = scheme->stat.sz_tried; diff --git a/mm/failslab.c b/mm/failslab.c index 58df9789f1d2..ffc420c0e767 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -16,6 +16,8 @@ static struct { bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) { + int flags = 0; + /* No fault-injection for bootstrap cache */ if (unlikely(s == kmem_cache)) return false; @@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB)) return false; + /* + * In some cases, it expects to specify __GFP_NOWARN + * to avoid printing any information(not just a warning), + * thus avoiding deadlocks. See commit 6b9dbedbe349 for + * details. + */ if (gfpflags & __GFP_NOWARN) - failslab.attr.no_warn = true; + flags |= FAULT_NOWARN; - return should_fail(&failslab.attr, s->object_size); + return should_fail_ex(&failslab.attr, s->object_size, flags); } static int __init setup_failslab(char *str) @@ -2852,7 +2852,7 @@ static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned lo next = pud_addr_end(addr, end); if (unlikely(!pud_present(pud))) return 0; - if (unlikely(pud_huge(pud))) { + if (unlikely(pud_huge(pud) || pud_devmap(pud))) { if (!gup_huge_pud(pud, pudp, addr, next, flags, pages, nr)) return 0; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e48f8ef45b17..e36ca75311a5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1800,6 +1800,7 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, /* we rely on prep_new_huge_page to set the destructor */ set_compound_order(page, order); + __ClearPageReserved(page); __SetPageHead(page); for (i = 0; i < nr_pages; i++) { p = nth_page(page, i); @@ -1816,7 +1817,8 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, * on the head page when they need know if put_page() is needed * after get_user_pages(). */ - __ClearPageReserved(p); + if (i != 0) /* head page cleared above */ + __ClearPageReserved(p); /* * Subtle and very unlikely * @@ -5204,17 +5206,22 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb, __unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags); - /* - * Unlock and free the vma lock before releasing i_mmap_rwsem. When - * the vma_lock is freed, this makes the vma ineligible for pmd - * sharing. And, i_mmap_rwsem is required to set up pmd sharing. - * This is important as page tables for this unmapped range will - * be asynchrously deleted. If the page tables are shared, there - * will be issues when accessed by someone else. - */ - __hugetlb_vma_unlock_write_free(vma); - - i_mmap_unlock_write(vma->vm_file->f_mapping); + if (zap_flags & ZAP_FLAG_UNMAP) { /* final unmap */ + /* + * Unlock and free the vma lock before releasing i_mmap_rwsem. + * When the vma_lock is freed, this makes the vma ineligible + * for pmd sharing. And, i_mmap_rwsem is required to set up + * pmd sharing. This is important as page tables for this + * unmapped range will be asynchrously deleted. If the page + * tables are shared, there will be issues when accessed by + * someone else. + */ + __hugetlb_vma_unlock_write_free(vma); + i_mmap_unlock_write(vma->vm_file->f_mapping); + } else { + i_mmap_unlock_write(vma->vm_file->f_mapping); + hugetlb_vma_unlock_write(vma); + } } void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, diff --git a/mm/kfence/report.c b/mm/kfence/report.c index 7e496856c2eb..46ecea18c4ca 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c @@ -75,18 +75,23 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfence_") || str_has_prefix(buf, ARCH_FUNC_PREFIX "__kfence_") || + str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmem_cache_free") || !strncmp(buf, ARCH_FUNC_PREFIX "__slab_free", len)) { /* - * In case of tail calls from any of the below - * to any of the above. + * In case of tail calls from any of the below to any of + * the above, optimized by the compiler such that the + * stack trace would omit the initial entry point below. */ fallback = skipnr + 1; } - /* Also the *_bulk() variants by only checking prefixes. */ + /* + * The below list should only include the initial entry points + * into the slab allocators. Includes the *_bulk() variants by + * checking prefixes. + */ if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfree") || str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_free") || - str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmem_cache_free") || str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmalloc") || str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_alloc")) goto found; diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 4734315f7940..3703a56571c1 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -97,8 +97,8 @@ struct collapse_control { /* Num pages scanned per node */ u32 node_load[MAX_NUMNODES]; - /* Last target selected in hpage_collapse_find_target_node() */ - int last_target_node; + /* nodemask for allocation fallback */ + nodemask_t alloc_nmask; }; /** @@ -734,7 +734,6 @@ static void khugepaged_alloc_sleep(void) struct collapse_control khugepaged_collapse_control = { .is_khugepaged = true, - .last_target_node = NUMA_NO_NODE, }; static bool hpage_collapse_scan_abort(int nid, struct collapse_control *cc) @@ -783,16 +782,11 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc) target_node = nid; } - /* do some balance if several nodes have the same hit record */ - if (target_node <= cc->last_target_node) - for (nid = cc->last_target_node + 1; nid < MAX_NUMNODES; - nid++) - if (max_value == cc->node_load[nid]) { - target_node = nid; - break; - } + for_each_online_node(nid) { + if (max_value == cc->node_load[nid]) + node_set(nid, cc->alloc_nmask); + } - cc->last_target_node = target_node; return target_node; } #else @@ -802,9 +796,10 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc) } #endif -static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node) +static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node, + nodemask_t *nmask) { - *hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER); + *hpage = __alloc_pages(gfp, HPAGE_PMD_ORDER, node, nmask); if (unlikely(!*hpage)) { count_vm_event(THP_COLLAPSE_ALLOC_FAILED); return false; @@ -955,12 +950,11 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm, static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm, struct collapse_control *cc) { - /* Only allocate from the target node */ gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() : - GFP_TRANSHUGE) | __GFP_THISNODE; + GFP_TRANSHUGE); int node = hpage_collapse_find_target_node(cc); - if (!hpage_collapse_alloc_page(hpage, gfp, node)) + if (!hpage_collapse_alloc_page(hpage, gfp, node, &cc->alloc_nmask)) return SCAN_ALLOC_HUGE_PAGE_FAIL; if (unlikely(mem_cgroup_charge(page_folio(*hpage), mm, gfp))) return SCAN_CGROUP_CHARGE_FAIL; @@ -1057,6 +1051,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, _pmd = pmdp_collapse_flush(vma, address, pmd); spin_unlock(pmd_ptl); mmu_notifier_invalidate_range_end(&range); + tlb_remove_table_sync_one(); spin_lock(pte_ptl); result = __collapse_huge_page_isolate(vma, address, pte, cc, @@ -1144,6 +1139,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, goto out; memset(cc->node_load, 0, sizeof(cc->node_load)); + nodes_clear(cc->alloc_nmask); pte = pte_offset_map_lock(mm, pmd, address, &ptl); for (_address = address, _pte = pte; _pte < pte + HPAGE_PMD_NR; _pte++, _address += PAGE_SIZE) { @@ -1384,16 +1380,43 @@ static int set_huge_pmd(struct vm_area_struct *vma, unsigned long addr, return SCAN_SUCCEED; } +/* + * A note about locking: + * Trying to take the page table spinlocks would be useless here because those + * are only used to synchronize: + * + * - modifying terminal entries (ones that point to a data page, not to another + * page table) + * - installing *new* non-terminal entries + * + * Instead, we need roughly the same kind of protection as free_pgtables() or + * mm_take_all_locks() (but only for a single VMA): + * The mmap lock together with this VMA's rmap locks covers all paths towards + * the page table entries we're messing with here, except for hardware page + * table walks and lockless_pages_from_mm(). + */ static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { - spinlock_t *ptl; pmd_t pmd; + struct mmu_notifier_range range; mmap_assert_write_locked(mm); - ptl = pmd_lock(vma->vm_mm, pmdp); + if (vma->vm_file) + lockdep_assert_held_write(&vma->vm_file->f_mapping->i_mmap_rwsem); + /* + * All anon_vmas attached to the VMA have the same root and are + * therefore locked by the same lock. + */ + if (vma->anon_vma) + lockdep_assert_held_write(&vma->anon_vma->root->rwsem); + + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, addr, + addr + HPAGE_PMD_SIZE); + mmu_notifier_invalidate_range_start(&range); pmd = pmdp_collapse_flush(vma, addr, pmdp); - spin_unlock(ptl); + tlb_remove_table_sync_one(); + mmu_notifier_invalidate_range_end(&range); mm_dec_nr_ptes(mm); page_table_check_pte_clear_range(mm, addr, pmd); pte_free(mm, pmd_pgtable(pmd)); @@ -1444,6 +1467,14 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, if (!hugepage_vma_check(vma, vma->vm_flags, false, false, false)) return SCAN_VMA_CHECK; + /* + * Symmetry with retract_page_tables(): Exclude MAP_PRIVATE mappings + * that got written to. Without this, we'd have to also lock the + * anon_vma if one exists. + */ + if (vma->anon_vma) + return SCAN_VMA_CHECK; + /* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */ if (userfaultfd_wp(vma)) return SCAN_PTE_UFFD_WP; @@ -1477,6 +1508,20 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, goto drop_hpage; } + /* + * We need to lock the mapping so that from here on, only GUP-fast and + * hardware page walks can access the parts of the page tables that + * we're operating on. + * See collapse_and_free_pmd(). + */ + i_mmap_lock_write(vma->vm_file->f_mapping); + + /* + * This spinlock should be unnecessary: Nobody else should be accessing + * the page tables under spinlock protection here, only + * lockless_pages_from_mm() and the hardware page walker can access page + * tables while all the high-level locks are held in write mode. + */ start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); result = SCAN_FAIL; @@ -1531,6 +1576,8 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, /* step 4: remove pte entries */ collapse_and_free_pmd(mm, vma, haddr, pmd); + i_mmap_unlock_write(vma->vm_file->f_mapping); + maybe_install_pmd: /* step 5: install pmd entry */ result = install_pmd @@ -1544,6 +1591,7 @@ drop_hpage: abort: pte_unmap_unlock(start_pte, ptl); + i_mmap_unlock_write(vma->vm_file->f_mapping); goto drop_hpage; } @@ -1600,7 +1648,8 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff, * An alternative would be drop the check, but check that page * table is clear before calling pmdp_collapse_flush() under * ptl. It has higher chance to recover THP for the VMA, but - * has higher cost too. + * has higher cost too. It would also probably require locking + * the anon_vma. */ if (vma->anon_vma) { result = SCAN_PAGE_ANON; @@ -2077,6 +2126,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, present = 0; swap = 0; memset(cc->node_load, 0, sizeof(cc->node_load)); + nodes_clear(cc->alloc_nmask); rcu_read_lock(); xas_for_each(&xas, page, start + HPAGE_PMD_NR - 1) { if (xas_retry(&xas, page)) @@ -2157,8 +2207,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, } } - trace_mm_khugepaged_scan_file(mm, page, file->f_path.dentry->d_iname, - present, swap, result); + trace_mm_khugepaged_scan_file(mm, page, file, present, swap, result); return result; } #else @@ -2576,7 +2625,6 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, if (!cc) return -ENOMEM; cc->is_khugepaged = false; - cc->last_target_node = NUMA_NO_NODE; mmgrab(mm); lru_add_drain_all(); @@ -2602,6 +2650,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, } mmap_assert_locked(mm); memset(cc->node_load, 0, sizeof(cc->node_load)); + nodes_clear(cc->alloc_nmask); if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) { struct file *file = get_file(vma->vm_file); pgoff_t pgoff = linear_page_index(vma, addr); diff --git a/mm/madvise.c b/mm/madvise.c index c7105ec6d08c..b913ba6efc10 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -772,8 +772,8 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, * Application no longer needs these pages. If the pages are dirty, * it's OK to just throw them away. The app will be more careful about * data it wants to keep. Be sure to free swap resources too. The - * zap_page_range call sets things up for shrink_active_list to actually free - * these pages later if no one else has touched them in the meantime, + * zap_page_range_single call sets things up for shrink_active_list to actually + * free these pages later if no one else has touched them in the meantime, * although we could add these pages to a global reuse list for * shrink_active_list to pick up before reclaiming other pages. * @@ -790,7 +790,7 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, static long madvise_dontneed_single_vma(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - zap_page_range(vma, start, end - start); + zap_page_range_single(vma, start, end - start, NULL); return 0; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2d8549ae1b30..266a1ab05434 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3026,7 +3026,7 @@ struct obj_cgroup *get_obj_cgroup_from_page(struct page *page) { struct obj_cgroup *objcg; - if (!memcg_kmem_enabled() || memcg_kmem_bypass()) + if (!memcg_kmem_enabled()) return NULL; if (PageMemcgKmem(page)) { @@ -4832,6 +4832,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, unsigned int efd, cfd; struct fd efile; struct fd cfile; + struct dentry *cdentry; const char *name; char *endp; int ret; @@ -4886,6 +4887,16 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, goto out_put_cfile; /* + * The control file must be a regular cgroup1 file. As a regular cgroup + * file can't be renamed, it's safe to access its name afterwards. + */ + cdentry = cfile.file->f_path.dentry; + if (cdentry->d_sb->s_type != &cgroup_fs_type || !d_is_reg(cdentry)) { + ret = -EINVAL; + goto out_put_cfile; + } + + /* * Determine the event callbacks and set them in @event. This used * to be done via struct cftype but cgroup core no longer knows * about these events. The following is crude but the whole thing @@ -4893,7 +4904,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, * * DO NOT ADD NEW FILES. */ - name = cfile.file->f_path.dentry->d_name.name; + name = cdentry->d_name.name; if (!strcmp(name, "memory.usage_in_bytes")) { event->register_event = mem_cgroup_usage_register_event; @@ -4917,7 +4928,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, * automatically removed on cgroup destruction but the removal is * asynchronous, so take an extra ref on @css. */ - cfile_css = css_tryget_online_from_dir(cfile.file->f_path.dentry->d_parent, + cfile_css = css_tryget_online_from_dir(cdentry->d_parent, &memory_cgrp_subsys); ret = -EINVAL; if (IS_ERR(cfile_css)) diff --git a/mm/memory.c b/mm/memory.c index f88c351aecd4..8c8420934d60 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1341,15 +1341,6 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) return ret; } -/* - * Parameter block passed down to zap_pte_range in exceptional cases. - */ -struct zap_details { - struct folio *single_folio; /* Locked folio to be unmapped */ - bool even_cows; /* Zap COWed private pages too? */ - zap_flags_t zap_flags; /* Extra flags for zapping */ -}; - /* Whether we should zap all COWed (private) pages too */ static inline bool should_zap_cows(struct zap_details *details) { @@ -1720,7 +1711,7 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, { struct mmu_notifier_range range; struct zap_details details = { - .zap_flags = ZAP_FLAG_DROP_MARKER, + .zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP, /* Careful - we need to zap private pages too! */ .even_cows = true, }; @@ -1774,19 +1765,27 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start, * * The range must fit into one VMA. */ -static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, +void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *details) { + const unsigned long end = address + size; struct mmu_notifier_range range; struct mmu_gather tlb; lru_add_drain(); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, - address, address + size); + address, end); + if (is_vm_hugetlb_page(vma)) + adjust_range_if_pmd_sharing_possible(vma, &range.start, + &range.end); tlb_gather_mmu(&tlb, vma->vm_mm); update_hiwater_rss(vma->vm_mm); mmu_notifier_invalidate_range_start(&range); - unmap_single_vma(&tlb, vma, address, range.end, details); + /* + * unmap 'address-end' not 'range.start-range.end' as range + * could have been expanded for hugetlb pmd sharing. + */ + unmap_single_vma(&tlb, vma, address, end, details); mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb); } @@ -3763,7 +3762,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) */ get_page(vmf->page); pte_unmap_unlock(vmf->pte, vmf->ptl); - vmf->page->pgmap->ops->migrate_to_ram(vmf); + ret = vmf->page->pgmap->ops->migrate_to_ram(vmf); put_page(vmf->page); } else if (is_hwpoison_entry(entry)) { ret = VM_FAULT_HWPOISON; diff --git a/mm/migrate_device.c b/mm/migrate_device.c index 6fa682eef7a0..721b2365dbca 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -357,7 +357,8 @@ static bool migrate_vma_check_page(struct page *page, struct page *fault_page) } /* - * Unmaps pages for migration. Returns number of unmapped pages. + * Unmaps pages for migration. Returns number of source pfns marked as + * migrating. */ static unsigned long migrate_device_unmap(unsigned long *src_pfns, unsigned long npages, @@ -373,8 +374,11 @@ static unsigned long migrate_device_unmap(unsigned long *src_pfns, struct page *page = migrate_pfn_to_page(src_pfns[i]); struct folio *folio; - if (!page) + if (!page) { + if (src_pfns[i] & MIGRATE_PFN_MIGRATE) + unmapped++; continue; + } /* ZONE_DEVICE pages are not on LRU */ if (!is_zone_device_page(page)) { diff --git a/mm/mmap.c b/mm/mmap.c index c3c5c1d6103d..54abd46e6007 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -226,8 +226,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) /* Search one past newbrk */ mas_set(&mas, newbrk); brkvma = mas_find(&mas, oldbrk); - BUG_ON(brkvma == NULL); - if (brkvma->vm_start >= oldbrk) + if (!brkvma || brkvma->vm_start >= oldbrk) goto out; /* mapping intersects with an existing non-brk vma. */ /* * mm->brk must be protected by write mmap_lock. @@ -456,7 +455,7 @@ void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas) * vma_mas_szero() - Set a given range to zero. Used when modifying a * vm_area_struct start or end. * - * @mm: The struct_mm + * @mas: The maple tree ma_state * @start: The start address to zero * @end: The end address to zero. */ @@ -1779,9 +1778,6 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, */ pgoff = 0; get_area = shmem_get_unmapped_area; - } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { - /* Ensures that larger anonymous mappings are THP aligned. */ - get_area = thp_get_unmapped_area; } addr = get_area(file, addr, len, pgoff, flags); @@ -2949,9 +2945,9 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma, * Expand the existing vma if possible; Note that singular lists do not * occur after forking, so the expand will only happen on new VMAs. */ - if (vma && - (!vma->anon_vma || list_is_singular(&vma->anon_vma_chain)) && - ((vma->vm_flags & ~VM_SOFTDIRTY) == flags)) { + if (vma && vma->vm_end == addr && !vma_policy(vma) && + can_vma_merge_after(vma, flags, NULL, NULL, + addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) { mas_set_range(mas, vma->vm_start, addr + len - 1); if (mas_preallocate(mas, vma, GFP_KERNEL)) return -ENOMEM; @@ -3038,11 +3034,6 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) goto munmap_failed; vma = mas_prev(&mas, 0); - if (!vma || vma->vm_end != addr || vma_policy(vma) || - !can_vma_merge_after(vma, flags, NULL, NULL, - addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) - vma = NULL; - ret = do_brk_flags(&mas, vma, addr, len, flags); populate = ((mm->def_flags & VM_LOCKED) != 0); mmap_write_unlock(mm); diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index add4244e5790..3a2c3f8cad2f 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -153,7 +153,7 @@ static void tlb_remove_table_smp_sync(void *arg) /* Simply deliver the interrupt */ } -static void tlb_remove_table_sync_one(void) +void tlb_remove_table_sync_one(void) { /* * This isn't an RCU grace period and hence the page-tables cannot be @@ -177,8 +177,6 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch) #else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */ -static void tlb_remove_table_sync_one(void) { } - static void tlb_remove_table_free(struct mmu_table_batch *batch) { __tlb_remove_table_free(batch); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 218b28ee49ed..6e60657875d3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3887,6 +3887,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc); static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) { + int flags = 0; + if (order < fail_page_alloc.min_order) return false; if (gfp_mask & __GFP_NOFAIL) @@ -3897,10 +3899,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) (gfp_mask & __GFP_DIRECT_RECLAIM)) return false; + /* See comment in __should_failslab() */ if (gfp_mask & __GFP_NOWARN) - fail_page_alloc.attr.no_warn = true; + flags |= FAULT_NOWARN; - return should_fail(&fail_page_alloc.attr, 1 << order); + return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags); } #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/mm/page_ext.c b/mm/page_ext.c index affe80243b6d..ddf1968560f0 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -166,7 +166,7 @@ struct page_ext *page_ext_get(struct page *page) /** * page_ext_put() - Working with page extended information is done. - * @page_ext - Page extended information received from page_ext_get(). + * @page_ext: Page extended information received from page_ext_get(). * * The page extended information of the page may not be valid after this * function is called. diff --git a/mm/shmem.c b/mm/shmem.c index c1d8b8a1aa3b..82911fefc2d5 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -948,6 +948,15 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, index++; } + /* + * When undoing a failed fallocate, we want none of the partial folio + * zeroing and splitting below, but shall want to truncate the whole + * folio when !uptodate indicates that it was added by this fallocate, + * even when [lstart, lend] covers only a part of the folio. + */ + if (unfalloc) + goto whole_folios; + same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); folio = shmem_get_partial_folio(inode, lstart >> PAGE_SHIFT); if (folio) { @@ -973,6 +982,8 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, folio_put(folio); } +whole_folios: + index = start; while (index < end) { cond_resched(); diff --git a/mm/swapfile.c b/mm/swapfile.c index 5fc1237a9f21..72e481aacd5d 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -973,23 +973,23 @@ done: scan: spin_unlock(&si->lock); while (++offset <= READ_ONCE(si->highest_bit)) { - if (swap_offset_available_and_locked(si, offset)) - goto checks; if (unlikely(--latency_ration < 0)) { cond_resched(); latency_ration = LATENCY_LIMIT; scanned_many = true; } + if (swap_offset_available_and_locked(si, offset)) + goto checks; } offset = si->lowest_bit; while (offset < scan_base) { - if (swap_offset_available_and_locked(si, offset)) - goto checks; if (unlikely(--latency_ration < 0)) { cond_resched(); latency_ration = LATENCY_LIMIT; scanned_many = true; } + if (swap_offset_available_and_locked(si, offset)) + goto checks; offset++; } spin_lock(&si->lock); diff --git a/mm/vmscan.c b/mm/vmscan.c index 04d8b88e5216..8fcc5fa768c0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2514,8 +2514,20 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan, * the flushers simply cannot keep up with the allocation * rate. Nudge the flusher threads in case they are asleep. */ - if (stat.nr_unqueued_dirty == nr_taken) + if (stat.nr_unqueued_dirty == nr_taken) { wakeup_flusher_threads(WB_REASON_VMSCAN); + /* + * For cgroupv1 dirty throttling is achieved by waking up + * the kernel flusher here and later waiting on folios + * which are in writeback to finish (see shrink_folio_list()). + * + * Flusher may not be able to issue writeback quickly + * enough for cgroupv1 writeback throttling to work + * on a large system. + */ + if (!writeback_throttling_sane(sc)) + reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK); + } sc->nr.dirty += stat.nr_dirty; sc->nr.congested += stat.nr_congested; @@ -3975,7 +3987,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area goto next; if (!pmd_trans_huge(pmd[i])) { - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && + if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) pmdp_test_and_clear_young(vma, addr, pmd + i); goto next; @@ -4073,14 +4085,14 @@ restart: #endif walk->mm_stats[MM_NONLEAF_TOTAL]++; -#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG - if (get_cap(LRU_GEN_NONLEAF_YOUNG)) { + if (arch_has_hw_nonleaf_pmd_young() && + get_cap(LRU_GEN_NONLEAF_YOUNG)) { if (!pmd_young(val)) continue; walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos); } -#endif + if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i)) continue; @@ -4971,10 +4983,13 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap int scanned; int reclaimed; LIST_HEAD(list); + LIST_HEAD(clean); struct folio *folio; + struct folio *next; enum vm_event_item item; struct reclaim_stat stat; struct lru_gen_mm_walk *walk; + bool skip_retry = false; struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); @@ -4991,20 +5006,37 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap if (list_empty(&list)) return scanned; - +retry: reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false); + sc->nr_reclaimed += reclaimed; - list_for_each_entry(folio, &list, lru) { - /* restore LRU_REFS_FLAGS cleared by isolate_folio() */ - if (folio_test_workingset(folio)) - folio_set_referenced(folio); + list_for_each_entry_safe_reverse(folio, next, &list, lru) { + if (!folio_evictable(folio)) { + list_del(&folio->lru); + folio_putback_lru(folio); + continue; + } - /* don't add rejected pages to the oldest generation */ if (folio_test_reclaim(folio) && - (folio_test_dirty(folio) || folio_test_writeback(folio))) - folio_clear_active(folio); - else - folio_set_active(folio); + (folio_test_dirty(folio) || folio_test_writeback(folio))) { + /* restore LRU_REFS_FLAGS cleared by isolate_folio() */ + if (folio_test_workingset(folio)) + folio_set_referenced(folio); + continue; + } + + if (skip_retry || folio_test_active(folio) || folio_test_referenced(folio) || + folio_mapped(folio) || folio_test_locked(folio) || + folio_test_dirty(folio) || folio_test_writeback(folio)) { + /* don't add rejected folios to the oldest generation */ + set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, + BIT(PG_active)); + continue; + } + + /* retry folios that may have missed folio_rotate_reclaimable() */ + list_move(&folio->lru, &clean); + sc->nr_scanned -= folio_nr_pages(folio); } spin_lock_irq(&lruvec->lru_lock); @@ -5026,7 +5058,13 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap mem_cgroup_uncharge_list(&list); free_unref_page_list(&list); - sc->nr_reclaimed += reclaimed; + INIT_LIST_HEAD(&list); + list_splice_init(&clean, &list); + + if (!list_empty(&list)) { + skip_retry = true; + goto retry; + } if (need_swapping && type == LRU_GEN_ANON) *need_swapping = true; @@ -5354,7 +5392,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK)) caps |= BIT(LRU_GEN_MM_WALK); - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG)) + if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) caps |= BIT(LRU_GEN_NONLEAF_YOUNG); return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps); @@ -5844,8 +5882,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) enum lru_list lru; unsigned long nr_reclaimed = 0; unsigned long nr_to_reclaim = sc->nr_to_reclaim; + bool proportional_reclaim; struct blk_plug plug; - bool scan_adjusted; if (lru_gen_enabled()) { lru_gen_shrink_lruvec(lruvec, sc); @@ -5868,8 +5906,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) * abort proportional reclaim if either the file or anon lru has already * dropped to zero at the first pass. */ - scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() && - sc->priority == DEF_PRIORITY); + proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() && + sc->priority == DEF_PRIORITY); blk_start_plug(&plug); while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || @@ -5889,7 +5927,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) cond_resched(); - if (nr_reclaimed < nr_to_reclaim || scan_adjusted) + if (nr_reclaimed < nr_to_reclaim || proportional_reclaim) continue; /* @@ -5940,8 +5978,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) nr_scanned = targets[lru] - nr[lru]; nr[lru] = targets[lru] * (100 - percentage) / 100; nr[lru] -= min(nr[lru], nr_scanned); - - scan_adjusted = true; } blk_finish_plug(&plug); sc->nr_reclaimed += nr_reclaimed; diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 56a186768750..07db2f436d44 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -120,7 +120,7 @@ struct p9_conn { struct list_head unsent_req_list; struct p9_req_t *rreq; struct p9_req_t *wreq; - char tmp_buf[7]; + char tmp_buf[P9_HDRSZ]; struct p9_fcall rc; int wpos; int wsize; @@ -202,9 +202,11 @@ static void p9_conn_cancel(struct p9_conn *m, int err) list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { list_move(&req->req_list, &cancel_list); + req->status = REQ_STATUS_ERROR; } list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { list_move(&req->req_list, &cancel_list); + req->status = REQ_STATUS_ERROR; } spin_unlock(&m->req_lock); @@ -291,7 +293,7 @@ static void p9_read_work(struct work_struct *work) if (!m->rc.sdata) { m->rc.sdata = m->tmp_buf; m->rc.offset = 0; - m->rc.capacity = 7; /* start by reading header */ + m->rc.capacity = P9_HDRSZ; /* start by reading header */ } clear_bit(Rpending, &m->wsched); @@ -314,7 +316,7 @@ static void p9_read_work(struct work_struct *work) p9_debug(P9_DEBUG_TRANS, "got new header\n"); /* Header size */ - m->rc.size = 7; + m->rc.size = P9_HDRSZ; err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0); if (err) { p9_debug(P9_DEBUG_ERROR, @@ -322,14 +324,6 @@ static void p9_read_work(struct work_struct *work) goto error; } - if (m->rc.size >= m->client->msize) { - p9_debug(P9_DEBUG_ERROR, - "requested packet size too big: %d\n", - m->rc.size); - err = -EIO; - goto error; - } - p9_debug(P9_DEBUG_TRANS, "mux %p pkt: size: %d bytes tag: %d\n", m, m->rc.size, m->rc.tag); @@ -342,6 +336,14 @@ static void p9_read_work(struct work_struct *work) goto error; } + if (m->rc.size > m->rreq->rc.capacity) { + p9_debug(P9_DEBUG_ERROR, + "requested packet size too big: %d for tag %d with capacity %zd\n", + m->rc.size, m->rc.tag, m->rreq->rc.capacity); + err = -EIO; + goto error; + } + if (!m->rreq->rc.sdata) { p9_debug(P9_DEBUG_ERROR, "No recv fcall for tag %d (req %p), disconnecting!\n", @@ -860,8 +862,10 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket) struct file *file; p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); - if (!p) + if (!p) { + sock_release(csocket); return -ENOMEM; + } csocket->sk->sk_allocation = GFP_NOIO; file = sock_alloc_file(csocket, 0, NULL); diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index b15c64128c3e..aaa5fd364691 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -208,6 +208,14 @@ static void p9_xen_response(struct work_struct *work) continue; } + if (h.size > req->rc.capacity) { + dev_warn(&priv->dev->dev, + "requested packet size too big: %d for tag %d with capacity %zd\n", + h.size, h.tag, req->rc.capacity); + req->status = REQ_STATUS_ERROR; + goto recv_error; + } + memcpy(&req->rc, &h, sizeof(h)); req->rc.offset = 0; @@ -217,6 +225,7 @@ static void p9_xen_response(struct work_struct *work) masked_prod, &masked_cons, XEN_9PFS_RING_SIZE(ring)); +recv_error: virt_mb(); cons += h.size; ring->intf->in_cons = cons; diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 215af9b3b589..c57d643afb10 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -972,6 +972,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, hci_dev_lock(hdev); hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type); hci_dev_unlock(hdev); + hci_dev_put(hdev); if (!hcon) return -ENOENT; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index dc65974f5adb..1c3c7ff5c3c6 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -737,7 +737,7 @@ static int __init bt_init(void) err = bt_sysfs_init(); if (err < 0) - return err; + goto cleanup_led; err = sock_register(&bt_sock_family_ops); if (err) @@ -773,6 +773,8 @@ unregister_socket: sock_unregister(PF_BLUETOOTH); cleanup_sysfs: bt_sysfs_cleanup(); +cleanup_led: + bt_leds_cleanup(); return err; } diff --git a/net/bluetooth/hci_codec.c b/net/bluetooth/hci_codec.c index 38201532f58e..3cc135bb1d30 100644 --- a/net/bluetooth/hci_codec.c +++ b/net/bluetooth/hci_codec.c @@ -72,9 +72,8 @@ static void hci_read_codec_capabilities(struct hci_dev *hdev, __u8 transport, continue; } - skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODEC_CAPS, - sizeof(*cmd), cmd, - HCI_CMD_TIMEOUT); + skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODEC_CAPS, + sizeof(*cmd), cmd, 0, HCI_CMD_TIMEOUT, NULL); if (IS_ERR(skb)) { bt_dev_err(hdev, "Failed to read codec capabilities (%ld)", PTR_ERR(skb)); @@ -127,8 +126,8 @@ void hci_read_supported_codecs(struct hci_dev *hdev) struct hci_op_read_local_codec_caps caps; __u8 i; - skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL, - HCI_CMD_TIMEOUT); + skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL, + 0, HCI_CMD_TIMEOUT, NULL); if (IS_ERR(skb)) { bt_dev_err(hdev, "Failed to read local supported codecs (%ld)", @@ -158,7 +157,8 @@ void hci_read_supported_codecs(struct hci_dev *hdev) for (i = 0; i < std_codecs->num; i++) { caps.id = std_codecs->codec[i]; caps.direction = 0x00; - hci_read_codec_capabilities(hdev, LOCAL_CODEC_ACL_MASK, &caps); + hci_read_codec_capabilities(hdev, + LOCAL_CODEC_ACL_MASK | LOCAL_CODEC_SCO_MASK, &caps); } skb_pull(skb, flex_array_size(std_codecs, codec, std_codecs->num) @@ -178,7 +178,8 @@ void hci_read_supported_codecs(struct hci_dev *hdev) caps.cid = vnd_codecs->codec[i].cid; caps.vid = vnd_codecs->codec[i].vid; caps.direction = 0x00; - hci_read_codec_capabilities(hdev, LOCAL_CODEC_ACL_MASK, &caps); + hci_read_codec_capabilities(hdev, + LOCAL_CODEC_ACL_MASK | LOCAL_CODEC_SCO_MASK, &caps); } error: @@ -194,8 +195,8 @@ void hci_read_supported_codecs_v2(struct hci_dev *hdev) struct hci_op_read_local_codec_caps caps; __u8 i; - skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODECS_V2, 0, NULL, - HCI_CMD_TIMEOUT); + skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODECS_V2, 0, NULL, + 0, HCI_CMD_TIMEOUT, NULL); if (IS_ERR(skb)) { bt_dev_err(hdev, "Failed to read local supported codecs (%ld)", diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0540555b3704..d97fac4f7130 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2764,7 +2764,8 @@ int hci_register_suspend_notifier(struct hci_dev *hdev) { int ret = 0; - if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) { + if (!hdev->suspend_notifier.notifier_call && + !test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) { hdev->suspend_notifier.notifier_call = hci_suspend_notifier; ret = register_pm_notifier(&hdev->suspend_notifier); } @@ -2776,8 +2777,11 @@ int hci_unregister_suspend_notifier(struct hci_dev *hdev) { int ret = 0; - if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) + if (hdev->suspend_notifier.notifier_call) { ret = unregister_pm_notifier(&hdev->suspend_notifier); + if (!ret) + hdev->suspend_notifier.notifier_call = NULL; + } return ret; } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 5a0296a4352e..f7e006a36382 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -269,7 +269,7 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, const void *param) { - bt_dev_err(req->hdev, "HCI_REQ-0x%4.4x", opcode); + bt_dev_dbg(req->hdev, "HCI_REQ-0x%4.4x", opcode); hci_req_add_ev(req, opcode, plen, param, 0); } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 76c3107c9f91..1fc693122a47 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -12,6 +12,7 @@ #include <net/bluetooth/mgmt.h> #include "hci_request.h" +#include "hci_codec.h" #include "hci_debugfs.h" #include "smp.h" #include "eir.h" @@ -3780,7 +3781,8 @@ static int hci_read_page_scan_activity_sync(struct hci_dev *hdev) static int hci_read_def_err_data_reporting_sync(struct hci_dev *hdev) { if (!(hdev->commands[18] & 0x04) || - !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING)) + !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) || + test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_READ_DEF_ERR_DATA_REPORTING, @@ -4238,11 +4240,12 @@ static int hci_set_event_mask_page_2_sync(struct hci_dev *hdev) /* Read local codec list if the HCI command is supported */ static int hci_read_local_codecs_sync(struct hci_dev *hdev) { - if (!(hdev->commands[29] & 0x20)) - return 0; + if (hdev->commands[45] & 0x04) + hci_read_supported_codecs_v2(hdev); + else if (hdev->commands[29] & 0x20) + hci_read_supported_codecs(hdev); - return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL, - HCI_CMD_TIMEOUT); + return 0; } /* Read local pairing options if the HCI command is supported */ @@ -4298,7 +4301,8 @@ static int hci_set_err_data_report_sync(struct hci_dev *hdev) bool enabled = hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED); if (!(hdev->commands[18] & 0x08) || - !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING)) + !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) || + test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks)) return 0; if (enabled == hdev->err_data_reporting) @@ -4457,6 +4461,9 @@ static const struct { HCI_QUIRK_BROKEN(STORED_LINK_KEY, "HCI Delete Stored Link Key command is advertised, " "but not supported."), + HCI_QUIRK_BROKEN(ERR_DATA_REPORTING, + "HCI Read Default Erroneous Data Reporting command is " + "advertised, but not supported."), HCI_QUIRK_BROKEN(READ_TRANSMIT_POWER, "HCI Read Transmit Power Level command is advertised, " "but not supported."), diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index f825857db6d0..26db929b97c4 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -879,6 +879,7 @@ static int iso_listen_bis(struct sock *sk) iso_pi(sk)->bc_sid); hci_dev_unlock(hdev); + hci_dev_put(hdev); return err; } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 9c24947aa41e..9fdede5fe71c 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4453,7 +4453,8 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, chan->ident = cmd->ident; l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp); - chan->num_conf_rsp++; + if (chan->num_conf_rsp < L2CAP_CONF_MAX_CONF_RSP) + chan->num_conf_rsp++; /* Reset config buffer. */ chan->conf_len = 0; diff --git a/net/can/af_can.c b/net/can/af_can.c index 27dcdcc0b808..c69168f11e44 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -677,7 +677,7 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) static int can_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - if (unlikely(dev->type != ARPHRD_CAN || (!can_is_can_skb(skb)))) { + if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || !can_is_can_skb(skb))) { pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n", dev->type, skb->len); @@ -692,7 +692,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - if (unlikely(dev->type != ARPHRD_CAN || (!can_is_canfd_skb(skb)))) { + if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || !can_is_canfd_skb(skb))) { pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n", dev->type, skb->len); @@ -707,7 +707,7 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, static int canxl_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - if (unlikely(dev->type != ARPHRD_CAN || (!can_is_canxl_skb(skb)))) { + if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || !can_is_canxl_skb(skb))) { pr_warn_once("PF_CAN: dropped non conform CAN XL skbuff: dev type %d, len %d\n", dev->type, skb->len); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 25cd35f5922e..007730412947 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -296,7 +296,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb, key->ct_zone = ct->zone.id; #endif #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) - key->ct_mark = ct->mark; + key->ct_mark = READ_ONCE(ct->mark); #endif cl = nf_ct_labels_find(ct); diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 6fac2f0ef074..711cd3b4347a 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -48,9 +48,11 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) return "RPL"; case LWTUNNEL_ENCAP_IOAM6: return "IOAM6"; + case LWTUNNEL_ENCAP_XFRM: + /* module autoload not supported for encap type */ + return NULL; case LWTUNNEL_ENCAP_IP6: case LWTUNNEL_ENCAP_IP: - case LWTUNNEL_ENCAP_XFRM: case LWTUNNEL_ENCAP_NONE: case __LWTUNNEL_ENCAP_MAX: /* should not have got here */ diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a77a85e357e0..952a54763358 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -307,7 +307,31 @@ static int neigh_del_timer(struct neighbour *n) return 0; } -static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) +static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, + int family) +{ + switch (family) { + case AF_INET: + return __in_dev_arp_parms_get_rcu(dev); + case AF_INET6: + return __in6_dev_nd_parms_get_rcu(dev); + } + return NULL; +} + +static void neigh_parms_qlen_dec(struct net_device *dev, int family) +{ + struct neigh_parms *p; + + rcu_read_lock(); + p = neigh_get_dev_parms_rcu(dev, family); + if (p) + p->qlen--; + rcu_read_unlock(); +} + +static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net, + int family) { struct sk_buff_head tmp; unsigned long flags; @@ -321,13 +345,7 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) struct net_device *dev = skb->dev; if (net == NULL || net_eq(dev_net(dev), net)) { - struct in_device *in_dev; - - rcu_read_lock(); - in_dev = __in_dev_get_rcu(dev); - if (in_dev) - in_dev->arp_parms->qlen--; - rcu_read_unlock(); + neigh_parms_qlen_dec(dev, family); __skb_unlink(skb, list); __skb_queue_tail(&tmp, skb); } @@ -409,7 +427,8 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, write_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev, skip_perm); pneigh_ifdown_and_unlock(tbl, dev); - pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL); + pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL, + tbl->family); if (skb_queue_empty_lockless(&tbl->proxy_queue)) del_timer_sync(&tbl->proxy_timer); return 0; @@ -1621,13 +1640,8 @@ static void neigh_proxy_process(struct timer_list *t) if (tdif <= 0) { struct net_device *dev = skb->dev; - struct in_device *in_dev; - rcu_read_lock(); - in_dev = __in_dev_get_rcu(dev); - if (in_dev) - in_dev->arp_parms->qlen--; - rcu_read_unlock(); + neigh_parms_qlen_dec(dev, tbl->family); __skb_unlink(skb, &tbl->proxy_queue); if (tbl->proxy_redo && netif_running(dev)) { @@ -1821,7 +1835,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl) cancel_delayed_work_sync(&tbl->managed_work); cancel_delayed_work_sync(&tbl->gc_work); del_timer_sync(&tbl->proxy_timer); - pneigh_queue_purge(&tbl->proxy_queue, NULL); + pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family); neigh_ifdown(tbl, NULL); if (atomic_read(&tbl->entries)) pr_crit("neighbour leakage\n"); @@ -3539,18 +3553,6 @@ static int proc_unres_qlen(struct ctl_table *ctl, int write, return ret; } -static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, - int family) -{ - switch (family) { - case AF_INET: - return __in_dev_arp_parms_get_rcu(dev); - case AF_INET6: - return __in6_dev_nd_parms_get_rcu(dev); - } - return NULL; -} - static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, int index) { diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 713b7b8dad7e..b780827f5e0a 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -45,11 +45,10 @@ static unsigned int dccp_v4_pernet_id __read_mostly; int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; - struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; - __be32 daddr, nexthop, prev_sk_rcv_saddr; struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); __be16 orig_sport, orig_dport; + __be32 daddr, nexthop; struct flowi4 *fl4; struct rtable *rt; int err; @@ -91,26 +90,13 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) daddr = fl4->daddr; if (inet->inet_saddr == 0) { - if (inet_csk(sk)->icsk_bind2_hash) { - prev_addr_hashbucket = - inet_bhashfn_portaddr(&dccp_hashinfo, sk, - sock_net(sk), - inet->inet_num); - prev_sk_rcv_saddr = sk->sk_rcv_saddr; - } - inet->inet_saddr = fl4->saddr; - } - - sk_rcv_saddr_set(sk, inet->inet_saddr); - - if (prev_addr_hashbucket) { - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); + err = inet_bhash2_update_saddr(sk, &fl4->saddr, AF_INET); if (err) { - inet->inet_saddr = 0; - sk_rcv_saddr_set(sk, prev_sk_rcv_saddr); ip_rt_put(rt); return err; } + } else { + sk_rcv_saddr_set(sk, inet->inet_saddr); } inet->inet_dport = usin->sin_port; @@ -157,6 +143,7 @@ failure: * This unhashes the socket and releases the local port, if necessary. */ dccp_set_state(sk, DCCP_CLOSED); + inet_bhash2_reset_saddr(sk); ip_rt_put(rt); sk->sk_route_caps = 0; inet->inet_dport = 0; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index e57b43006074..602f3432d80b 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -934,26 +934,11 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } if (saddr == NULL) { - struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; - struct in6_addr prev_v6_rcv_saddr; - - if (icsk->icsk_bind2_hash) { - prev_addr_hashbucket = inet_bhashfn_portaddr(&dccp_hashinfo, - sk, sock_net(sk), - inet->inet_num); - prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr; - } - saddr = &fl6.saddr; - sk->sk_v6_rcv_saddr = *saddr; - - if (prev_addr_hashbucket) { - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); - if (err) { - sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr; - goto failure; - } - } + + err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); + if (err) + goto failure; } /* set the source address */ @@ -985,6 +970,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, late_failure: dccp_set_state(sk, DCCP_CLOSED); + inet_bhash2_reset_saddr(sk); __sk_dst_reset(sk); failure: inet->inet_dport = 0; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index c548ca3e9b0e..85e35c5e8890 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -279,8 +279,7 @@ int dccp_disconnect(struct sock *sk, int flags) inet->inet_dport = 0; - if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) - inet_reset_saddr(sk); + inet_bhash2_reset_saddr(sk); sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c index 846588c0070a..53a206d11685 100644 --- a/net/dsa/tag_hellcreek.c +++ b/net/dsa/tag_hellcreek.c @@ -49,7 +49,8 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb, return NULL; } - pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN); + if (pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN)) + return NULL; dsa_default_offload_fwd_mark(skb); diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 38fa19c1e2d5..429250298ac4 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -21,7 +21,8 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, if (!skb->dev) return NULL; - pskb_trim_rcsum(skb, skb->len - len); + if (pskb_trim_rcsum(skb, skb->len - len)) + return NULL; dsa_default_offload_fwd_mark(skb); diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 83e4136516b0..1a85125bda6d 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -665,7 +665,8 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb, * padding and trailer we need to account for the fact that * skb->data points to skb_mac_header(skb) + ETH_HLEN. */ - pskb_trim_rcsum(skb, start_of_padding - ETH_HLEN); + if (pskb_trim_rcsum(skb, start_of_padding - ETH_HLEN)) + return NULL; /* Trap-to-host frame, no timestamp trailer */ } else { *source_port = SJA1110_RX_HEADER_SRC_PORT(rx_header); diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index a50429a62f74..56bb27d67a2e 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -351,17 +351,18 @@ static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev, struct hsr_node *node_src) { bool was_multicast_frame; - int res; + int res, recv_len; was_multicast_frame = (skb->pkt_type == PACKET_MULTICAST); hsr_addr_subst_source(node_src, skb); skb_pull(skb, ETH_HLEN); + recv_len = skb->len; res = netif_rx(skb); if (res == NET_RX_DROP) { dev->stats.rx_dropped++; } else { dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + dev->stats.rx_bytes += recv_len; if (was_multicast_frame) dev->stats.multicast++; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4728087c42a5..0da679411330 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1230,7 +1230,6 @@ EXPORT_SYMBOL(inet_unregister_protosw); static int inet_sk_reselect_saddr(struct sock *sk) { - struct inet_bind_hashbucket *prev_addr_hashbucket; struct inet_sock *inet = inet_sk(sk); __be32 old_saddr = inet->inet_saddr; __be32 daddr = inet->inet_daddr; @@ -1260,16 +1259,8 @@ static int inet_sk_reselect_saddr(struct sock *sk) return 0; } - prev_addr_hashbucket = - inet_bhashfn_portaddr(tcp_or_dccp_get_hashinfo(sk), sk, - sock_net(sk), inet->inet_num); - - inet->inet_saddr = inet->inet_rcv_saddr = new_saddr; - - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); + err = inet_bhash2_update_saddr(sk, &new_saddr, AF_INET); if (err) { - inet->inet_saddr = old_saddr; - inet->inet_rcv_saddr = old_saddr; ip_rt_put(rt); return err; } diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 170152772d33..3969fa805679 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -314,6 +314,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ xo->seq.low += skb_shinfo(skb)->gso_segs; } + if (xo->seq.low < seq) + xo->seq.hi++; + esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32)); ip_hdr(skb)->tot_len = htons(skb->len); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index f361d3d56be2..b5736ef16ed2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -841,6 +841,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, return -EINVAL; } + if (!cfg->fc_table) + cfg->fc_table = RT_TABLE_MAIN; + return 0; errout: return err; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f721c308248b..ce9ff3c62e84 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -423,6 +423,7 @@ static struct fib_info *fib_find_info(struct fib_info *nfi) nfi->fib_prefsrc == fi->fib_prefsrc && nfi->fib_priority == fi->fib_priority && nfi->fib_type == fi->fib_type && + nfi->fib_tb_id == fi->fib_tb_id && memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(u32) * RTAX_MAX) == 0 && !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) && @@ -888,9 +889,11 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, return 1; } - /* cannot match on nexthop object attributes */ - if (fi->nh) - return 1; + if (fi->nh) { + if (cfg->fc_oif || cfg->fc_gw_family || cfg->fc_mp) + return 1; + return 0; + } if (cfg->fc_oif || cfg->fc_gw_family) { struct fib_nh *nh; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 452ff177e4da..74d403dbd2b4 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -126,7 +126,7 @@ struct key_vector { /* This list pointer if valid if (pos | bits) == 0 (LEAF) */ struct hlist_head leaf; /* This array is valid if (pos | bits) > 0 (TNODE) */ - struct key_vector __rcu *tnode[0]; + DECLARE_FLEX_ARRAY(struct key_vector __rcu *, tnode); }; }; @@ -1381,8 +1381,10 @@ int fib_table_insert(struct net *net, struct fib_table *tb, /* The alias was already inserted, so the node must exist. */ l = l ? l : fib_find_node(t, &tp, key); - if (WARN_ON_ONCE(!l)) + if (WARN_ON_ONCE(!l)) { + err = -ENOENT; goto out_free_new_fa; + } if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) == new_fa) { diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 033bf3c2538f..3cec471a2cd2 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -858,34 +858,80 @@ inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, in return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; } -int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk) +static void inet_update_saddr(struct sock *sk, void *saddr, int family) +{ + if (family == AF_INET) { + inet_sk(sk)->inet_saddr = *(__be32 *)saddr; + sk_rcv_saddr_set(sk, inet_sk(sk)->inet_saddr); + } +#if IS_ENABLED(CONFIG_IPV6) + else { + sk->sk_v6_rcv_saddr = *(struct in6_addr *)saddr; + } +#endif +} + +static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family, bool reset) { struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); + struct inet_bind_hashbucket *head, *head2; struct inet_bind2_bucket *tb2, *new_tb2; int l3mdev = inet_sk_bound_l3mdev(sk); - struct inet_bind_hashbucket *head2; int port = inet_sk(sk)->inet_num; struct net *net = sock_net(sk); + int bhash; + + if (!inet_csk(sk)->icsk_bind2_hash) { + /* Not bind()ed before. */ + if (reset) + inet_reset_saddr(sk); + else + inet_update_saddr(sk, saddr, family); + + return 0; + } /* Allocate a bind2 bucket ahead of time to avoid permanently putting * the bhash2 table in an inconsistent state if a new tb2 bucket * allocation fails. */ new_tb2 = kmem_cache_alloc(hinfo->bind2_bucket_cachep, GFP_ATOMIC); - if (!new_tb2) + if (!new_tb2) { + if (reset) { + /* The (INADDR_ANY, port) bucket might have already + * been freed, then we cannot fixup icsk_bind2_hash, + * so we give up and unlink sk from bhash/bhash2 not + * to leave inconsistency in bhash2. + */ + inet_put_port(sk); + inet_reset_saddr(sk); + } + return -ENOMEM; + } + bhash = inet_bhashfn(net, port, hinfo->bhash_size); + head = &hinfo->bhash[bhash]; head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); - if (prev_saddr) { - spin_lock_bh(&prev_saddr->lock); - __sk_del_bind2_node(sk); - inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, - inet_csk(sk)->icsk_bind2_hash); - spin_unlock_bh(&prev_saddr->lock); - } + /* If we change saddr locklessly, another thread + * iterating over bhash might see corrupted address. + */ + spin_lock_bh(&head->lock); - spin_lock_bh(&head2->lock); + spin_lock(&head2->lock); + __sk_del_bind2_node(sk); + inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, inet_csk(sk)->icsk_bind2_hash); + spin_unlock(&head2->lock); + + if (reset) + inet_reset_saddr(sk); + else + inet_update_saddr(sk, saddr, family); + + head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); + + spin_lock(&head2->lock); tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); if (!tb2) { tb2 = new_tb2; @@ -893,15 +939,29 @@ int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct soc } sk_add_bind2_node(sk, &tb2->owners); inet_csk(sk)->icsk_bind2_hash = tb2; - spin_unlock_bh(&head2->lock); + spin_unlock(&head2->lock); + + spin_unlock_bh(&head->lock); if (tb2 != new_tb2) kmem_cache_free(hinfo->bind2_bucket_cachep, new_tb2); return 0; } + +int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family) +{ + return __inet_bhash2_update_saddr(sk, saddr, family, false); +} EXPORT_SYMBOL_GPL(inet_bhash2_update_saddr); +void inet_bhash2_reset_saddr(struct sock *sk) +{ + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + __inet_bhash2_update_saddr(sk, NULL, 0, true); +} +EXPORT_SYMBOL_GPL(inet_bhash2_reset_saddr); + /* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm * Note that we use 32bit integers (vs RFC 'short integers') * because 2^16 is not a multiple of num_ephemeral and this diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f866d6282b2b..cae9f1a4e059 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1492,24 +1492,6 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel_parm *p = &t->parms; __be16 o_flags = p->o_flags; - if (t->erspan_ver <= 2) { - if (t->erspan_ver != 0 && !t->collect_md) - o_flags |= TUNNEL_KEY; - - if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) - goto nla_put_failure; - - if (t->erspan_ver == 1) { - if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) - goto nla_put_failure; - } else if (t->erspan_ver == 2) { - if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) - goto nla_put_failure; - if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) - goto nla_put_failure; - } - } - if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || @@ -1550,6 +1532,34 @@ nla_put_failure: return -EMSGSIZE; } +static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip_tunnel *t = netdev_priv(dev); + + if (t->erspan_ver <= 2) { + if (t->erspan_ver != 0 && !t->collect_md) + t->parms.o_flags |= TUNNEL_KEY; + + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) + goto nla_put_failure; + + if (t->erspan_ver == 1) { + if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) + goto nla_put_failure; + } else if (t->erspan_ver == 2) { + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) + goto nla_put_failure; + if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) + goto nla_put_failure; + } + } + + return ipgre_fill_info(skb, dev); + +nla_put_failure: + return -EMSGSIZE; +} + static void erspan_setup(struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); @@ -1628,7 +1638,7 @@ static struct rtnl_link_ops erspan_link_ops __read_mostly = { .changelink = erspan_changelink, .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, - .fill_info = ipgre_fill_info, + .fill_info = erspan_fill_info, .get_link_net = ip_tunnel_get_link_net, }; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 1b512390b3cf..e880ce77322a 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -366,6 +366,11 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, iph->tos, dev); if (unlikely(err)) goto drop_error; + } else { + struct in_device *in_dev = __in_dev_get_rcu(dev); + + if (in_dev && IN_DEV_ORCONF(in_dev, NOPOLICY)) + IPCB(skb)->flags |= IPSKB_NOPOLICY; } #ifdef CONFIG_IP_ROUTE_CLASSID diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index f8e176c77d1c..b3cc416ed292 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -435,7 +435,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) switch (ctinfo) { case IP_CT_NEW: - ct->mark = hash; + WRITE_ONCE(ct->mark, hash); break; case IP_CT_RELATED: case IP_CT_RELATED_REPLY: @@ -452,7 +452,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) #ifdef DEBUG nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); #endif - pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); + pr_debug("hash=%u ct_hash=%u ", hash, READ_ONCE(ct->mark)); if (!clusterip_responsible(cipinfo->config, hash)) { pr_debug("not responsible\n"); return NF_DROP; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index bde333b24837..04b4ec07bb06 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -49,6 +49,11 @@ #include <net/transp_v6.h> #endif +#define ping_portaddr_for_each_entry(__sk, node, list) \ + hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) +#define ping_portaddr_for_each_entry_rcu(__sk, node, list) \ + hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node) + struct ping_table { struct hlist_nulls_head hash[PING_HTABLE_SIZE]; spinlock_t lock; @@ -192,7 +197,7 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) return NULL; } - ping_portaddr_for_each_entry(sk, hnode, hslot) { + ping_portaddr_for_each_entry_rcu(sk, hnode, hslot) { isk = inet_sk(sk); pr_debug("iterate\n"); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 54836a6b81d6..4f2205756cfe 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3114,8 +3114,7 @@ int tcp_disconnect(struct sock *sk, int flags) inet->inet_dport = 0; - if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) - inet_reset_saddr(sk); + inet_bhash2_reset_saddr(sk); sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 87d440f47a70..da46357f501b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -199,15 +199,14 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr, /* This will initiate an outgoing connection. */ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct inet_timewait_death_row *tcp_death_row; - __be32 daddr, nexthop, prev_sk_rcv_saddr; struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct ip_options_rcu *inet_opt; struct net *net = sock_net(sk); __be16 orig_sport, orig_dport; + __be32 daddr, nexthop; struct flowi4 *fl4; struct rtable *rt; int err; @@ -251,24 +250,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (!inet->inet_saddr) { - if (inet_csk(sk)->icsk_bind2_hash) { - prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo, - sk, net, inet->inet_num); - prev_sk_rcv_saddr = sk->sk_rcv_saddr; - } - inet->inet_saddr = fl4->saddr; - } - - sk_rcv_saddr_set(sk, inet->inet_saddr); - - if (prev_addr_hashbucket) { - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); + err = inet_bhash2_update_saddr(sk, &fl4->saddr, AF_INET); if (err) { - inet->inet_saddr = 0; - sk_rcv_saddr_set(sk, prev_sk_rcv_saddr); ip_rt_put(rt); return err; } + } else { + sk_rcv_saddr_set(sk, inet->inet_saddr); } if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { @@ -343,6 +331,7 @@ failure: * if necessary. */ tcp_set_state(sk, TCP_CLOSE); + inet_bhash2_reset_saddr(sk); ip_rt_put(rt); sk->sk_route_caps = 0; inet->inet_dport = 0; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 79d43548279c..242f4295940e 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -346,6 +346,9 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features xo->seq.low += skb_shinfo(skb)->gso_segs; } + if (xo->seq.low < seq) + xo->seq.hi++; + esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); len = skb->len - sizeof(struct ipv6hdr); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e19507614f64..60fd91bb5171 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -920,6 +920,9 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (err < 0) goto fail; + /* We prevent @rt from being freed. */ + rcu_read_lock(); + for (;;) { /* Prepare header of the next frame, * before previous one went down. */ @@ -943,6 +946,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (err == 0) { IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGOKS); + rcu_read_unlock(); return 0; } @@ -950,6 +954,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); + rcu_read_unlock(); return err; slow_path_clean: diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2a3f9296df1e..f0548dbcabd2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -292,24 +292,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (!saddr) { - struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; - struct in6_addr prev_v6_rcv_saddr; - - if (icsk->icsk_bind2_hash) { - prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo, - sk, net, inet->inet_num); - prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr; - } saddr = &fl6.saddr; - sk->sk_v6_rcv_saddr = *saddr; - if (prev_addr_hashbucket) { - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); - if (err) { - sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr; - goto failure; - } - } + err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); + if (err) + goto failure; } /* set the source address */ @@ -359,6 +346,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, late_failure: tcp_set_state(sk, TCP_CLOSE); + inet_bhash2_reset_saddr(sk); failure: inet->inet_dport = 0; sk->sk_route_caps = 0; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 4a4b0e49ec92..ea435eba3053 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -287,9 +287,13 @@ int __init xfrm6_init(void) if (ret) goto out_state; - register_pernet_subsys(&xfrm6_net_ops); + ret = register_pernet_subsys(&xfrm6_net_ops); + if (ret) + goto out_protocol; out: return ret; +out_protocol: + xfrm6_protocol_fini(); out_state: xfrm6_state_fini(); out_policy: diff --git a/net/key/af_key.c b/net/key/af_key.c index c85df5b958d2..95edcbedf6ef 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2905,7 +2905,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t) break; if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg) && aalg->available) + if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } return sz + sizeof(struct sadb_prop); @@ -2923,7 +2923,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!ealg->pfkey_supported) continue; - if (!(ealg_tmpl_set(t, ealg) && ealg->available)) + if (!(ealg_tmpl_set(t, ealg))) continue; for (k = 1; ; k++) { @@ -2934,16 +2934,17 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg) && aalg->available) + if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } } return sz + sizeof(struct sadb_prop); } -static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) +static int dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; + int sz = 0; int i; p = skb_put(skb, sizeof(struct sadb_prop)); @@ -2971,13 +2972,17 @@ static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; + sz += sizeof(*c); } } + + return sz + sizeof(*p); } -static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) +static int dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; + int sz = 0; int i, k; p = skb_put(skb, sizeof(struct sadb_prop)); @@ -3019,8 +3024,11 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; + sz += sizeof(*c); } } + + return sz + sizeof(*p); } static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c) @@ -3150,6 +3158,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int ctx_size = 0; + int alg_size = 0; sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) @@ -3161,16 +3170,16 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct sizeof(struct sadb_x_policy); if (x->id.proto == IPPROTO_AH) - size += count_ah_combs(t); + alg_size = count_ah_combs(t); else if (x->id.proto == IPPROTO_ESP) - size += count_esp_combs(t); + alg_size = count_esp_combs(t); if ((xfrm_ctx = x->security)) { ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); size += sizeof(struct sadb_x_sec_ctx) + ctx_size; } - skb = alloc_skb(size + 16, GFP_ATOMIC); + skb = alloc_skb(size + alg_size + 16, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; @@ -3224,10 +3233,13 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct pol->sadb_x_policy_priority = xp->priority; /* Set sadb_comb's. */ + alg_size = 0; if (x->id.proto == IPPROTO_AH) - dump_ah_combs(skb, t); + alg_size = dump_ah_combs(skb, t); else if (x->id.proto == IPPROTO_ESP) - dump_esp_combs(skb, t); + alg_size = dump_esp_combs(skb, t); + + hdr->sadb_msg_len += alg_size / 8; /* security context */ if (xfrm_ctx) { @@ -3382,7 +3394,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, hdr->sadb_msg_len = size / sizeof(uint64_t); hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; - hdr->sadb_msg_seq = x->km.seq = get_acqseq(); + hdr->sadb_msg_seq = x->km.seq; hdr->sadb_msg_pid = 0; /* SA */ diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 754fdda8a5f5..9a1415fe3fa7 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1474,11 +1474,12 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, } sk = sock->sk; - write_lock(&sk->sk_callback_lock); - + write_lock_bh(&sk->sk_callback_lock); ret = l2tp_validate_socket(sk, net, tunnel->encap); if (ret < 0) - goto err_sock; + goto err_inval_sock; + rcu_assign_sk_user_data(sk, tunnel); + write_unlock_bh(&sk->sk_callback_lock); tunnel->l2tp_net = net; pn = l2tp_pernet(net); @@ -1507,8 +1508,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, }; setup_udp_tunnel_sock(net, sock, &udp_cfg); - } else { - rcu_assign_sk_user_data(sk, tunnel); } tunnel->old_sk_destruct = sk->sk_destruct; @@ -1522,16 +1521,18 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, if (tunnel->fd >= 0) sockfd_put(sock); - write_unlock(&sk->sk_callback_lock); return 0; err_sock: + write_lock_bh(&sk->sk_callback_lock); + rcu_assign_sk_user_data(sk, NULL); +err_inval_sock: + write_unlock_bh(&sk->sk_callback_lock); + if (tunnel->fd < 0) sock_release(sock); else sockfd_put(sock); - - write_unlock(&sk->sk_callback_lock); err: return ret; } diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index 2e66598fac79..e8ebd343e2bf 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -452,6 +452,9 @@ static u32 ieee80211_get_rate_duration(struct ieee80211_hw *hw, (status->encoding == RX_ENC_HE && streams > 8))) return 0; + if (idx >= MCS_GROUP_RATES) + return 0; + duration = airtime_mcs_groups[group].duration[idx]; duration <<= airtime_mcs_groups[group].shift; *overhead = 36 + (streams << 2); diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 500ed1b81250..7e2065e72915 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -662,6 +662,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, sdata->dev = ndev; sdata->wpan_dev.wpan_phy = local->hw.phy; sdata->local = local; + INIT_LIST_HEAD(&sdata->wpan_dev.list); /* setup type-dependent data */ ret = ieee802154_setup_sdata(sdata, type); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b6dc6e260334..1dbc62537259 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2354,12 +2354,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, goto out; } - /* if we are invoked by the msk cleanup code, the subflow is - * already orphaned - */ - if (ssk->sk_socket) - sock_orphan(ssk); - + sock_orphan(ssk); subflow->disposable = 1; /* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops @@ -2940,7 +2935,11 @@ cleanup: if (ssk == msk->first) subflow->fail_tout = 0; - sock_orphan(ssk); + /* detach from the parent socket, but allow data_ready to + * push incoming data into the mptcp stack, to properly ack it + */ + ssk->sk_socket = NULL; + ssk->sk_wq = NULL; unlock_sock_fast(ssk, slow); } sock_orphan(sk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 02a54d59697b..2159b5f9988f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1745,16 +1745,16 @@ void mptcp_subflow_queue_clean(struct sock *listener_ssk) for (msk = head; msk; msk = next) { struct sock *sk = (struct sock *)msk; - bool slow, do_cancel_work; + bool do_cancel_work; sock_hold(sk); - slow = lock_sock_fast_nested(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); next = msk->dl_next; msk->first = NULL; msk->dl_next = NULL; do_cancel_work = __mptcp_close(sk, 0); - unlock_sock_fast(sk, slow); + release_sock(sk); if (do_cancel_work) mptcp_cancel_work(sk); sock_put(sk); diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 3adc291d9ce1..7499192af586 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -916,7 +916,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, #ifdef IP_SET_HASH_WITH_MULTI if (h->bucketsize >= AHASH_MAX_TUNED) goto set_full; - else if (h->bucketsize < multi) + else if (h->bucketsize <= multi) h->bucketsize += AHASH_INIT_SIZE; #endif if (n->size >= AHASH_MAX(h)) { diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index dd30c03d5a23..75d556d71652 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -151,18 +151,16 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE) return -ERANGE; - if (retried) { + if (retried) ip = ntohl(h->next.ip); - e.ip = htonl(ip); - } for (; ip <= ip_to;) { + e.ip = htonl(ip); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ip += hosts; - e.ip = htonl(ip); - if (e.ip == 0) + if (ip == 0) return 0; ret = 0; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f97bda06d2a9..23b3fedd619a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -891,7 +891,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) zone = nf_ct_zone(ct); if (!nf_ct_ext_valid_pre(ct->ext)) { - NF_CT_STAT_INC(net, insert_failed); + NF_CT_STAT_INC_ATOMIC(net, insert_failed); return -ETIMEDOUT; } @@ -938,7 +938,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) if (!nf_ct_ext_valid_post(ct->ext)) { nf_ct_kill(ct); - NF_CT_STAT_INC(net, drop); + NF_CT_STAT_INC_ATOMIC(net, drop); return -ETIMEDOUT; } @@ -1275,7 +1275,7 @@ chaintoolong: */ if (!nf_ct_ext_valid_post(ct->ext)) { nf_ct_kill(ct); - NF_CT_STAT_INC(net, drop); + NF_CT_STAT_INC_ATOMIC(net, drop); return NF_DROP; } @@ -1781,7 +1781,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, } #ifdef CONFIG_NF_CONNTRACK_MARK - ct->mark = exp->master->mark; + ct->mark = READ_ONCE(exp->master->mark); #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK ct->secmark = exp->master->secmark; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 7562b215b932..1286ae7d4609 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -330,7 +330,12 @@ nla_put_failure: #ifdef CONFIG_NF_CONNTRACK_MARK static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) { - if (nla_put_be32(skb, CTA_MARK, htonl(ct->mark))) + u32 mark = READ_ONCE(ct->mark); + + if (!mark) + return 0; + + if (nla_put_be32(skb, CTA_MARK, htonl(mark))) goto nla_put_failure; return 0; @@ -826,8 +831,8 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) } #ifdef CONFIG_NF_CONNTRACK_MARK - if ((events & (1 << IPCT_MARK) || ct->mark) - && ctnetlink_dump_mark(skb, ct) < 0) + if (events & (1 << IPCT_MARK) && + ctnetlink_dump_mark(skb, ct) < 0) goto nla_put_failure; #endif nlmsg_end(skb, nlh); @@ -1154,7 +1159,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) } #ifdef CONFIG_NF_CONNTRACK_MARK - if ((ct->mark & filter->mark.mask) != filter->mark.val) + if ((READ_ONCE(ct->mark) & filter->mark.mask) != filter->mark.val) goto ignore_entry; #endif status = (u32)READ_ONCE(ct->status); @@ -2002,9 +2007,9 @@ static void ctnetlink_change_mark(struct nf_conn *ct, mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK])); mark = ntohl(nla_get_be32(cda[CTA_MARK])); - newmark = (ct->mark & mask) ^ mark; - if (newmark != ct->mark) - ct->mark = newmark; + newmark = (READ_ONCE(ct->mark) & mask) ^ mark; + if (newmark != READ_ONCE(ct->mark)) + WRITE_ONCE(ct->mark, newmark); } #endif @@ -2730,7 +2735,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) goto nla_put_failure; #ifdef CONFIG_NF_CONNTRACK_MARK - if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0) + if (ctnetlink_dump_mark(skb, ct) < 0) goto nla_put_failure; #endif if (ctnetlink_dump_labels(skb, ct) < 0) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 4ffe84c5a82c..bca839ab1ae8 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -366,7 +366,7 @@ static int ct_seq_show(struct seq_file *s, void *v) goto release; #if defined(CONFIG_NF_CONNTRACK_MARK) - seq_printf(s, "mark=%u ", ct->mark); + seq_printf(s, "mark=%u ", READ_ONCE(ct->mark)); #endif ct_show_secctx(s, ct); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index b04645ced89b..0fdcdb2c9ae4 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -997,13 +997,13 @@ static void flow_offload_queue_work(struct flow_offload_work *offload) struct net *net = read_pnet(&offload->flowtable->net); if (offload->cmd == FLOW_CLS_REPLACE) { - NF_FLOW_TABLE_STAT_INC(net, count_wq_add); + NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_add); queue_work(nf_flow_offload_add_wq, &offload->work); } else if (offload->cmd == FLOW_CLS_DESTROY) { - NF_FLOW_TABLE_STAT_INC(net, count_wq_del); + NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_del); queue_work(nf_flow_offload_del_wq, &offload->work); } else { - NF_FLOW_TABLE_STAT_INC(net, count_wq_stats); + NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_stats); queue_work(nf_flow_offload_stats_wq, &offload->work); } } @@ -1098,6 +1098,7 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, struct flow_block_cb *block_cb, *next; int err = 0; + down_write(&flowtable->flow_block_lock); switch (cmd) { case FLOW_BLOCK_BIND: list_splice(&bo->cb_list, &flowtable->flow_block.cb_list); @@ -1112,6 +1113,7 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, WARN_ON_ONCE(1); err = -EOPNOTSUPP; } + up_write(&flowtable->flow_block_lock); return err; } @@ -1168,7 +1170,9 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, extack); + down_write(&flowtable->flow_block_lock); err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo); + up_write(&flowtable->flow_block_lock); if (err < 0) return err; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e7152d599d73..7a09421f19e1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5958,7 +5958,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, &timeout); if (err) return err; - } else if (set->flags & NFT_SET_TIMEOUT) { + } else if (set->flags & NFT_SET_TIMEOUT && + !(flags & NFT_SET_ELEM_INTERVAL_END)) { timeout = set->timeout; } @@ -6024,7 +6025,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = -EOPNOTSUPP; goto err_set_elem_expr; } - } else if (set->num_exprs > 0) { + } else if (set->num_exprs > 0 && + !(flags & NFT_SET_ELEM_INTERVAL_END)) { err = nft_set_elem_expr_clone(ctx, set, expr_array); if (err < 0) goto err_set_elem_expr_clone; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index a3f01f209a53..641dc21f92b4 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -98,7 +98,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr, return; #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: - *dest = ct->mark; + *dest = READ_ONCE(ct->mark); return; #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK @@ -297,8 +297,8 @@ static void nft_ct_set_eval(const struct nft_expr *expr, switch (priv->key) { #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: - if (ct->mark != value) { - ct->mark = value; + if (READ_ONCE(ct->mark) != value) { + WRITE_ONCE(ct->mark, value); nf_conntrack_event_cache(IPCT_MARK, ct); } break; diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 4f9299b9dcdd..06d46d182634 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1162,6 +1162,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, struct nft_pipapo_match *m = priv->clone; u8 genmask = nft_genmask_next(net); struct nft_pipapo_field *f; + const u8 *start_p, *end_p; int i, bsize_max, err = 0; if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END)) @@ -1202,9 +1203,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, } /* Validate */ + start_p = start; + end_p = end; nft_pipapo_for_each_field(f, i, m) { - const u8 *start_p = start, *end_p = end; - if (f->rules >= (unsigned long)NFT_PIPAPO_RULE0_MAX) return -ENOSPC; diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index e5ebc0810675..ad3c033db64e 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -30,6 +30,7 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) u_int32_t new_targetmark; struct nf_conn *ct; u_int32_t newmark; + u_int32_t oldmark; ct = nf_ct_get(skb, &ctinfo); if (ct == NULL) @@ -37,14 +38,15 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) switch (info->mode) { case XT_CONNMARK_SET: - newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; + oldmark = READ_ONCE(ct->mark); + newmark = (oldmark & ~info->ctmask) ^ info->ctmark; if (info->shift_dir == D_SHIFT_RIGHT) newmark >>= info->shift_bits; else newmark <<= info->shift_bits; - if (ct->mark != newmark) { - ct->mark = newmark; + if (READ_ONCE(ct->mark) != newmark) { + WRITE_ONCE(ct->mark, newmark); nf_conntrack_event_cache(IPCT_MARK, ct); } break; @@ -55,15 +57,15 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) else new_targetmark <<= info->shift_bits; - newmark = (ct->mark & ~info->ctmask) ^ + newmark = (READ_ONCE(ct->mark) & ~info->ctmask) ^ new_targetmark; - if (ct->mark != newmark) { - ct->mark = newmark; + if (READ_ONCE(ct->mark) != newmark) { + WRITE_ONCE(ct->mark, newmark); nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_RESTORE: - new_targetmark = (ct->mark & info->ctmask); + new_targetmark = (READ_ONCE(ct->mark) & info->ctmask); if (info->shift_dir == D_SHIFT_RIGHT) new_targetmark >>= info->shift_bits; else @@ -126,7 +128,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par) if (ct == NULL) return false; - return ((ct->mark & info->mask) == info->mark) ^ info->invert; + return ((READ_ONCE(ct->mark) & info->mask) == info->mark) ^ info->invert; } static int connmark_mt_check(const struct xt_mtchk_param *par) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 6a193cce2a75..4ffdf2f45c44 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -542,7 +542,7 @@ static int nci_open_device(struct nci_dev *ndev) skb_queue_purge(&ndev->tx_q); ndev->ops->close(ndev); - ndev->flags = 0; + ndev->flags &= BIT(NCI_UNREG); } done: diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index aa5e712adf07..3d36ea5701f0 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -279,8 +279,10 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb) nci_plen(skb->data)); conn_info = nci_get_conn_info_by_conn_id(ndev, nci_conn_id(skb->data)); - if (!conn_info) + if (!conn_info) { + kfree_skb(skb); return; + } /* strip the nci data header */ skb_pull(skb, NCI_DATA_HDR_SIZE); diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 282c51051dcc..994a0a1efb58 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -240,6 +240,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, target->sens_res = nfca_poll->sens_res; target->sel_res = nfca_poll->sel_res; target->nfcid1_len = nfca_poll->nfcid1_len; + if (target->nfcid1_len > ARRAY_SIZE(target->nfcid1)) + return -EPROTO; if (target->nfcid1_len > 0) { memcpy(target->nfcid1, nfca_poll->nfcid1, target->nfcid1_len); @@ -248,6 +250,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, nfcb_poll = (struct rf_tech_specific_params_nfcb_poll *)params; target->sensb_res_len = nfcb_poll->sensb_res_len; + if (target->sensb_res_len > ARRAY_SIZE(target->sensb_res)) + return -EPROTO; if (target->sensb_res_len > 0) { memcpy(target->sensb_res, nfcb_poll->sensb_res, target->sensb_res_len); @@ -256,6 +260,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, nfcf_poll = (struct rf_tech_specific_params_nfcf_poll *)params; target->sensf_res_len = nfcf_poll->sensf_res_len; + if (target->sensf_res_len > ARRAY_SIZE(target->sensf_res)) + return -EPROTO; if (target->sensf_res_len > 0) { memcpy(target->sensf_res, nfcf_poll->sensf_res, target->sensf_res_len); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index c7b10234cf7c..c8eaf4234b2e 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -152,7 +152,7 @@ static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo) static u32 ovs_ct_get_mark(const struct nf_conn *ct) { #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) - return ct ? ct->mark : 0; + return ct ? READ_ONCE(ct->mark) : 0; #else return 0; #endif @@ -340,9 +340,9 @@ static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key, #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) u32 new_mark; - new_mark = ct_mark | (ct->mark & ~(mask)); - if (ct->mark != new_mark) { - ct->mark = new_mark; + new_mark = ct_mark | (READ_ONCE(ct->mark) & ~(mask)); + if (READ_ONCE(ct->mark) != new_mark) { + WRITE_ONCE(ct->mark, new_mark); if (nf_ct_is_confirmed(ct)) nf_conntrack_event_cache(IPCT_MARK, ct); key->ct.mark = new_mark; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6ce8dd19f33c..1ab65f7f2a0a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2293,8 +2293,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (skb->ip_summed == CHECKSUM_PARTIAL) status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && - (skb->ip_summed == CHECKSUM_COMPLETE || - skb_csum_unnecessary(skb))) + skb_csum_unnecessary(skb)) status |= TP_STATUS_CSUM_VALID; if (snaplen > res) @@ -3520,8 +3519,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (skb->ip_summed == CHECKSUM_PARTIAL) aux.tp_status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && - (skb->ip_summed == CHECKSUM_COMPLETE || - skb_csum_unnecessary(skb))) + skb_csum_unnecessary(skb)) aux.tp_status |= TP_STATUS_CSUM_VALID; aux.tp_len = origlen; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 1ad0ec5afb50..8499ceb7719c 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -399,6 +399,7 @@ enum rxrpc_conn_proto_state { struct rxrpc_bundle { struct rxrpc_conn_parameters params; refcount_t ref; + atomic_t active; /* Number of active users */ unsigned int debug_id; bool try_upgrade; /* True if the bundle is attempting upgrade */ bool alloc_conn; /* True if someone's getting a conn */ diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 3c9eeb5b750c..bdb335cb2d05 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -40,6 +40,8 @@ __read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ; DEFINE_IDR(rxrpc_client_conn_ids); static DEFINE_SPINLOCK(rxrpc_conn_id_lock); +static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); + /* * Get a connection ID and epoch for a client connection from the global pool. * The connection struct pointer is then recorded in the idr radix tree. The @@ -123,6 +125,7 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp, bundle->params = *cp; rxrpc_get_peer(bundle->params.peer); refcount_set(&bundle->ref, 1); + atomic_set(&bundle->active, 1); spin_lock_init(&bundle->channel_lock); INIT_LIST_HEAD(&bundle->waiting_calls); } @@ -149,7 +152,7 @@ void rxrpc_put_bundle(struct rxrpc_bundle *bundle) dead = __refcount_dec_and_test(&bundle->ref, &r); - _debug("PUT B=%x %d", d, r); + _debug("PUT B=%x %d", d, r - 1); if (dead) rxrpc_free_bundle(bundle); } @@ -338,6 +341,7 @@ found_bundle_free: rxrpc_free_bundle(candidate); found_bundle: rxrpc_get_bundle(bundle); + atomic_inc(&bundle->active); spin_unlock(&local->client_bundles_lock); _leave(" = %u [found]", bundle->debug_id); return bundle; @@ -435,6 +439,7 @@ static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp) if (old) trace_rxrpc_client(old, -1, rxrpc_client_replace); candidate->bundle_shift = shift; + atomic_inc(&bundle->active); bundle->conns[i] = candidate; for (j = 0; j < RXRPC_MAXCALLS; j++) set_bit(shift + j, &bundle->avail_chans); @@ -725,6 +730,7 @@ granted_channel: smp_rmb(); out_put_bundle: + rxrpc_deactivate_bundle(bundle); rxrpc_put_bundle(bundle); out: _leave(" = %d", ret); @@ -900,9 +906,8 @@ out: static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) { struct rxrpc_bundle *bundle = conn->bundle; - struct rxrpc_local *local = bundle->params.local; unsigned int bindex; - bool need_drop = false, need_put = false; + bool need_drop = false; int i; _enter("C=%x", conn->debug_id); @@ -921,15 +926,22 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) } spin_unlock(&bundle->channel_lock); - /* If there are no more connections, remove the bundle */ - if (!bundle->avail_chans) { - _debug("maybe unbundle"); - spin_lock(&local->client_bundles_lock); + if (need_drop) { + rxrpc_deactivate_bundle(bundle); + rxrpc_put_connection(conn); + } +} - for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) - if (bundle->conns[i]) - break; - if (i == ARRAY_SIZE(bundle->conns) && !bundle->params.exclusive) { +/* + * Drop the active count on a bundle. + */ +static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle) +{ + struct rxrpc_local *local = bundle->params.local; + bool need_put = false; + + if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) { + if (!bundle->params.exclusive) { _debug("erase bundle"); rb_erase(&bundle->local_node, &local->client_bundles); need_put = true; @@ -939,10 +951,6 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) if (need_put) rxrpc_put_bundle(bundle); } - - if (need_drop) - rxrpc_put_connection(conn); - _leave(""); } /* diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 1e8ab4749c6c..4662a6ce8a7e 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -976,7 +976,7 @@ config NET_ACT_TUNNEL_KEY config NET_ACT_CT tristate "connection tracking tc action" - depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE + depends on NET_CLS_ACT && NF_CONNTRACK && (!NF_NAT || NF_NAT) && NF_FLOW_TABLE help Say Y here to allow sending the packets to conntrack module. diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 66b143bb04ac..d41002e4613f 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -61,7 +61,7 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, c = nf_ct_get(skb, &ctinfo); if (c) { - skb->mark = c->mark; + skb->mark = READ_ONCE(c->mark); /* using overlimits stats to count how many packets marked */ ca->tcf_qstats.overlimits++; goto out; @@ -81,7 +81,7 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, c = nf_ct_tuplehash_to_ctrack(thash); /* using overlimits stats to count how many packets marked */ ca->tcf_qstats.overlimits++; - skb->mark = c->mark; + skb->mark = READ_ONCE(c->mark); nf_ct_put(c); out: diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index b38d91d6b249..4c7f7861ea96 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -178,7 +178,7 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct, entry = tcf_ct_flow_table_flow_action_get_next(action); entry->id = FLOW_ACTION_CT_METADATA; #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) - entry->ct_metadata.mark = ct->mark; + entry->ct_metadata.mark = READ_ONCE(ct->mark); #endif ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED : IP_CT_ESTABLISHED_REPLY; @@ -936,9 +936,9 @@ static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask) if (!mask) return; - new_mark = mark | (ct->mark & ~(mask)); - if (ct->mark != new_mark) { - ct->mark = new_mark; + new_mark = mark | (READ_ONCE(ct->mark) & ~(mask)); + if (READ_ONCE(ct->mark) != new_mark) { + WRITE_ONCE(ct->mark, new_mark); if (nf_ct_is_confirmed(ct)) nf_conntrack_event_cache(IPCT_MARK, ct); } diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index d4102f0a9abd..eaa02f098d1c 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -32,7 +32,7 @@ static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca, { u8 dscp, newdscp; - newdscp = (((ct->mark & cp->dscpmask) >> cp->dscpmaskshift) << 2) & + newdscp = (((READ_ONCE(ct->mark) & cp->dscpmask) >> cp->dscpmaskshift) << 2) & ~INET_ECN_MASK; switch (proto) { @@ -72,7 +72,7 @@ static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca, struct sk_buff *skb) { ca->stats_cpmark_set++; - skb->mark = ct->mark & cp->cpmarkmask; + skb->mark = READ_ONCE(ct->mark) & cp->cpmarkmask; } static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, @@ -130,7 +130,7 @@ static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, } if (cp->mode & CTINFO_MODE_DSCP) - if (!cp->dscpstatemask || (ct->mark & cp->dscpstatemask)) + if (!cp->dscpstatemask || (READ_ONCE(ct->mark) & cp->dscpstatemask)) tcf_ctinfo_dscp_set(ct, ca, cp, skb, wlen, proto); if (cp->mode & CTINFO_MODE_CPMARK) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index ef9fceadef8d..ee6514af830f 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -52,6 +52,19 @@ static void sctp_stream_shrink_out(struct sctp_stream *stream, __u16 outcnt) } } +static void sctp_stream_free_ext(struct sctp_stream *stream, __u16 sid) +{ + struct sctp_sched_ops *sched; + + if (!SCTP_SO(stream, sid)->ext) + return; + + sched = sctp_sched_ops_from_stream(stream); + sched->free_sid(stream, sid); + kfree(SCTP_SO(stream, sid)->ext); + SCTP_SO(stream, sid)->ext = NULL; +} + /* Migrates chunks from stream queues to new stream queues if needed, * but not across associations. Also, removes those chunks to streams * higher than the new max. @@ -70,16 +83,14 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, * sctp_stream_update will swap ->out pointers. */ for (i = 0; i < outcnt; i++) { - kfree(SCTP_SO(new, i)->ext); + sctp_stream_free_ext(new, i); SCTP_SO(new, i)->ext = SCTP_SO(stream, i)->ext; SCTP_SO(stream, i)->ext = NULL; } } - for (i = outcnt; i < stream->outcnt; i++) { - kfree(SCTP_SO(stream, i)->ext); - SCTP_SO(stream, i)->ext = NULL; - } + for (i = outcnt; i < stream->outcnt; i++) + sctp_stream_free_ext(stream, i); } static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, @@ -174,9 +185,9 @@ void sctp_stream_free(struct sctp_stream *stream) struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); int i; - sched->free(stream); + sched->unsched_all(stream); for (i = 0; i < stream->outcnt; i++) - kfree(SCTP_SO(stream, i)->ext); + sctp_stream_free_ext(stream, i); genradix_free(&stream->out); genradix_free(&stream->in); } diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 1ad565ed5627..7c8f9d89e16a 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -46,6 +46,10 @@ static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid, return 0; } +static void sctp_sched_fcfs_free_sid(struct sctp_stream *stream, __u16 sid) +{ +} + static void sctp_sched_fcfs_free(struct sctp_stream *stream) { } @@ -96,6 +100,7 @@ static struct sctp_sched_ops sctp_sched_fcfs = { .get = sctp_sched_fcfs_get, .init = sctp_sched_fcfs_init, .init_sid = sctp_sched_fcfs_init_sid, + .free_sid = sctp_sched_fcfs_free_sid, .free = sctp_sched_fcfs_free, .enqueue = sctp_sched_fcfs_enqueue, .dequeue = sctp_sched_fcfs_dequeue, diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c index 80b5a2c4cbc7..4fc9f2923ed1 100644 --- a/net/sctp/stream_sched_prio.c +++ b/net/sctp/stream_sched_prio.c @@ -204,6 +204,24 @@ static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid, return sctp_sched_prio_set(stream, sid, 0, gfp); } +static void sctp_sched_prio_free_sid(struct sctp_stream *stream, __u16 sid) +{ + struct sctp_stream_priorities *prio = SCTP_SO(stream, sid)->ext->prio_head; + int i; + + if (!prio) + return; + + SCTP_SO(stream, sid)->ext->prio_head = NULL; + for (i = 0; i < stream->outcnt; i++) { + if (SCTP_SO(stream, i)->ext && + SCTP_SO(stream, i)->ext->prio_head == prio) + return; + } + + kfree(prio); +} + static void sctp_sched_prio_free(struct sctp_stream *stream) { struct sctp_stream_priorities *prio, *n; @@ -323,6 +341,7 @@ static struct sctp_sched_ops sctp_sched_prio = { .get = sctp_sched_prio_get, .init = sctp_sched_prio_init, .init_sid = sctp_sched_prio_init_sid, + .free_sid = sctp_sched_prio_free_sid, .free = sctp_sched_prio_free, .enqueue = sctp_sched_prio_enqueue, .dequeue = sctp_sched_prio_dequeue, diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c index ff425aed62c7..cc444fe0d67c 100644 --- a/net/sctp/stream_sched_rr.c +++ b/net/sctp/stream_sched_rr.c @@ -90,6 +90,10 @@ static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid, return 0; } +static void sctp_sched_rr_free_sid(struct sctp_stream *stream, __u16 sid) +{ +} + static void sctp_sched_rr_free(struct sctp_stream *stream) { sctp_sched_rr_unsched_all(stream); @@ -177,6 +181,7 @@ static struct sctp_sched_ops sctp_sched_rr = { .get = sctp_sched_rr_get, .init = sctp_sched_rr_init, .init_sid = sctp_sched_rr_init_sid, + .free_sid = sctp_sched_rr_free_sid, .free = sctp_sched_rr_free, .enqueue = sctp_sched_rr_enqueue, .dequeue = sctp_sched_rr_dequeue, diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index f09316a9035f..d67440de011e 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -1971,6 +1971,9 @@ rcv: /* Ok, everything's fine, try to synch own keys according to peers' */ tipc_crypto_key_synch(rx, *skb); + /* Re-fetch skb cb as skb might be changed in tipc_msg_validate */ + skb_cb = TIPC_SKB_CB(*skb); + /* Mark skb decrypted */ skb_cb->decrypted = 1; diff --git a/net/tipc/discover.c b/net/tipc/discover.c index e8630707901e..e8dcdf267c0c 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -211,7 +211,10 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, u32 self; int err; - skb_linearize(skb); + if (skb_linearize(skb)) { + kfree_skb(skb); + return; + } hdr = buf_msg(skb); if (caps & TIPC_NODE_ID128) diff --git a/net/tipc/link.c b/net/tipc/link.c index e260c0d557f5..b3ce24823f50 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2224,7 +2224,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (tipc_own_addr(l->net) > msg_prevnode(hdr)) l->net_plane = msg_net_plane(hdr); - skb_linearize(skb); + if (skb_linearize(skb)) + goto exit; + hdr = buf_msg(skb); data = msg_data(hdr); diff --git a/net/tipc/node.c b/net/tipc/node.c index b48d97cbbe29..49ddc484c4fe 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1689,6 +1689,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, struct tipc_node *n; struct sk_buff_head xmitq; bool node_up = false; + struct net *peer_net; int bearer_id; int rc; @@ -1705,18 +1706,23 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, return -EHOSTUNREACH; } + rcu_read_lock(); tipc_node_read_lock(n); node_up = node_is_up(n); - if (node_up && n->peer_net && check_net(n->peer_net)) { + peer_net = n->peer_net; + tipc_node_read_unlock(n); + if (node_up && peer_net && check_net(peer_net)) { /* xmit inner linux container */ - tipc_lxc_xmit(n->peer_net, list); + tipc_lxc_xmit(peer_net, list); if (likely(skb_queue_empty(list))) { - tipc_node_read_unlock(n); + rcu_read_unlock(); tipc_node_put(n); return 0; } } + rcu_read_unlock(); + tipc_node_read_lock(n); bearer_id = n->active_links[selector & 1]; if (unlikely(bearer_id == INVALID_BEARER_ID)) { tipc_node_read_unlock(n); diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index d92ec92f0b71..e3b427a70398 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -176,7 +176,7 @@ static void tipc_conn_close(struct tipc_conn *con) conn_put(con); } -static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s) +static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s, struct socket *sock) { struct tipc_conn *con; int ret; @@ -202,10 +202,12 @@ static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s) } con->conid = ret; s->idr_in_use++; - spin_unlock_bh(&s->idr_lock); set_bit(CF_CONNECTED, &con->flags); con->server = s; + con->sock = sock; + conn_get(con); + spin_unlock_bh(&s->idr_lock); return con; } @@ -467,7 +469,7 @@ static void tipc_topsrv_accept(struct work_struct *work) ret = kernel_accept(lsock, &newsock, O_NONBLOCK); if (ret < 0) return; - con = tipc_conn_alloc(srv); + con = tipc_conn_alloc(srv, newsock); if (IS_ERR(con)) { ret = PTR_ERR(con); sock_release(newsock); @@ -479,11 +481,11 @@ static void tipc_topsrv_accept(struct work_struct *work) newsk->sk_data_ready = tipc_conn_data_ready; newsk->sk_write_space = tipc_conn_write_space; newsk->sk_user_data = con; - con->sock = newsock; write_unlock_bh(&newsk->sk_callback_lock); /* Wake up receive process in case of 'SYN+' message */ newsk->sk_data_ready(newsk); + conn_put(con); } } @@ -577,17 +579,17 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, sub.filter = filter; *(u64 *)&sub.usr_handle = (u64)port; - con = tipc_conn_alloc(tipc_topsrv(net)); + con = tipc_conn_alloc(tipc_topsrv(net), NULL); if (IS_ERR(con)) return false; *conid = con->conid; - con->sock = NULL; rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub); - if (rc >= 0) - return true; + if (rc) + conn_put(con); + conn_put(con); - return false; + return !rc; } void tipc_topsrv_kern_unsubscr(struct net *net, int conid) diff --git a/net/unix/diag.c b/net/unix/diag.c index 105f522a89fe..616b55c5b890 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -114,14 +114,16 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) return nla_put(nlskb, UNIX_DIAG_RQLEN, sizeof(rql), &rql); } -static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb) +static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb, + struct user_namespace *user_ns) { - uid_t uid = from_kuid_munged(sk_user_ns(nlskb->sk), sock_i_uid(sk)); + uid_t uid = from_kuid_munged(user_ns, sock_i_uid(sk)); return nla_put(nlskb, UNIX_DIAG_UID, sizeof(uid_t), &uid); } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 portid, u32 seq, u32 flags, int sk_ino) + struct user_namespace *user_ns, + u32 portid, u32 seq, u32 flags, int sk_ino) { struct nlmsghdr *nlh; struct unix_diag_msg *rep; @@ -167,7 +169,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_UID) && - sk_diag_dump_uid(sk, skb)) + sk_diag_dump_uid(sk, skb, user_ns)) goto out_nlmsg_trim; nlmsg_end(skb, nlh); @@ -179,7 +181,8 @@ out_nlmsg_trim: } static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 portid, u32 seq, u32 flags) + struct user_namespace *user_ns, + u32 portid, u32 seq, u32 flags) { int sk_ino; @@ -190,7 +193,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r if (!sk_ino) return 0; - return sk_diag_fill(sk, skb, req, portid, seq, flags, sk_ino); + return sk_diag_fill(sk, skb, req, user_ns, portid, seq, flags, sk_ino); } static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -214,7 +217,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto next; if (!(req->udiag_states & (1 << sk->sk_state))) goto next; - if (sk_diag_dump(sk, skb, req, + if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) { @@ -282,7 +285,8 @@ again: if (!rep) goto out; - err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).portid, + err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk), + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, req->udiag_ino); if (err < 0) { nlmsg_free(rep); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index da752b0cc752..3d86482e83f5 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -330,7 +330,8 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, * determine if they are the same ie. */ if (tmp_old[0] == WLAN_EID_VENDOR_SPECIFIC) { - if (!memcmp(tmp_old + 2, tmp + 2, 5)) { + if (tmp_old[1] >= 5 && tmp[1] >= 5 && + !memcmp(tmp_old + 2, tmp + 2, 5)) { /* same vendor ie, copy from * subelement */ @@ -2526,10 +2527,15 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, const struct cfg80211_bss_ies *ies1, *ies2; size_t ielen = len - offsetof(struct ieee80211_mgmt, u.probe_resp.variable); - struct cfg80211_non_tx_bss non_tx_data; + struct cfg80211_non_tx_bss non_tx_data = {}; res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt, len, gfp); + + /* don't do any further MBSSID handling for S1G */ + if (ieee80211_is_s1g_beacon(mgmt->frame_control)) + return res; + if (!res || !wiphy->support_mbssid || !cfg80211_find_elem(WLAN_EID_MULTIPLE_BSSID, ie, ielen)) return res; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 5f5aafd418af..21269e8f2db4 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -97,6 +97,18 @@ static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb) } } +static inline bool xmit_xfrm_check_overflow(struct sk_buff *skb) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + __u32 seq = xo->seq.low; + + seq += skb_shinfo(skb)->gso_segs; + if (unlikely(seq < xo->seq.low)) + return true; + + return false; +} + struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again) { int err; @@ -134,7 +146,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur return skb; } - if (skb_is_gso(skb) && unlikely(x->xso.dev != dev)) { + if (skb_is_gso(skb) && (unlikely(x->xso.dev != dev) || + unlikely(xmit_xfrm_check_overflow(skb)))) { struct sk_buff *segs; /* Packet got rerouted, fixup features and segment it. */ diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 9f4d42eb090f..ce56d659c55a 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -714,7 +714,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff oseq += skb_shinfo(skb)->gso_segs; } - if (unlikely(oseq < replay_esn->oseq)) { + if (unlikely(xo->seq.low < replay_esn->oseq)) { XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi; xo->seq.hi = oseq_hi; replay_esn->oseq_hi = oseq_hi; diff --git a/scripts/Makefile.package b/scripts/Makefile.package index 8bbcced67c22..2a90139ecbe1 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -30,8 +30,8 @@ KBUILD_PKG_ROOTCMD ?="fakeroot -u" export KDEB_SOURCENAME # Include only those top-level files that are needed by make, plus the GPL copy TAR_CONTENT := Documentation LICENSES arch block certs crypto drivers fs \ - include init io_uring ipc kernel lib mm net samples scripts \ - security sound tools usr virt \ + include init io_uring ipc kernel lib mm net rust \ + samples scripts security sound tools usr virt \ .config .scmversion Makefile \ Kbuild Kconfig COPYING $(wildcard localversion*) MKSPEC := $(srctree)/scripts/package/mkspec diff --git a/scripts/faddr2line b/scripts/faddr2line index 5514c23f45c2..0e73aca4f908 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -74,7 +74,8 @@ command -v ${ADDR2LINE} >/dev/null 2>&1 || die "${ADDR2LINE} isn't installed" find_dir_prefix() { local objfile=$1 - local start_kernel_addr=$(${READELF} --symbols --wide $objfile | ${AWK} '$8 == "start_kernel" {printf "0x%s", $2}') + local start_kernel_addr=$(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | + ${AWK} '$8 == "start_kernel" {printf "0x%s", $2}') [[ -z $start_kernel_addr ]] && return local file_line=$(${ADDR2LINE} -e $objfile $start_kernel_addr) @@ -178,7 +179,7 @@ __faddr2line() { found=2 break fi - done < <(${READELF} --symbols --wide $objfile | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2) + done < <(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2) if [[ $found = 0 ]]; then warn "can't find symbol: sym_name: $sym_name sym_sec: $sym_sec sym_addr: $sym_addr sym_elf_size: $sym_elf_size" @@ -259,7 +260,7 @@ __faddr2line() { DONE=1 - done < <(${READELF} --symbols --wide $objfile | ${AWK} -v fn=$sym_name '$4 == "FUNC" && $8 == fn') + done < <(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | ${AWK} -v fn=$sym_name '$4 == "FUNC" && $8 == fn') } [[ $# -lt 2 ]] && usage diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c index b7aee23fc387..47ef6bc30c0e 100644 --- a/sound/core/seq/seq_memory.c +++ b/sound/core/seq/seq_memory.c @@ -113,15 +113,19 @@ EXPORT_SYMBOL(snd_seq_dump_var_event); * expand the variable length event to linear buffer space. */ -static int seq_copy_in_kernel(char **bufptr, const void *src, int size) +static int seq_copy_in_kernel(void *ptr, void *src, int size) { + char **bufptr = ptr; + memcpy(*bufptr, src, size); *bufptr += size; return 0; } -static int seq_copy_in_user(char __user **bufptr, const void *src, int size) +static int seq_copy_in_user(void *ptr, void *src, int size) { + char __user **bufptr = ptr; + if (copy_to_user(*bufptr, src, size)) return -EFAULT; *bufptr += size; @@ -151,8 +155,7 @@ int snd_seq_expand_var_event(const struct snd_seq_event *event, int count, char return newlen; } err = snd_seq_dump_var_event(event, - in_kernel ? (snd_seq_dump_func_t)seq_copy_in_kernel : - (snd_seq_dump_func_t)seq_copy_in_user, + in_kernel ? seq_copy_in_kernel : seq_copy_in_user, &buf); return err < 0 ? err : newlen; } diff --git a/sound/firewire/dice/dice-stream.c b/sound/firewire/dice/dice-stream.c index f99e00083141..4c677c8546c7 100644 --- a/sound/firewire/dice/dice-stream.c +++ b/sound/firewire/dice/dice-stream.c @@ -59,7 +59,7 @@ int snd_dice_stream_get_rate_mode(struct snd_dice *dice, unsigned int rate, static int select_clock(struct snd_dice *dice, unsigned int rate) { - __be32 reg; + __be32 reg, new; u32 data; int i; int err; @@ -83,15 +83,17 @@ static int select_clock(struct snd_dice *dice, unsigned int rate) if (completion_done(&dice->clock_accepted)) reinit_completion(&dice->clock_accepted); - reg = cpu_to_be32(data); + new = cpu_to_be32(data); err = snd_dice_transaction_write_global(dice, GLOBAL_CLOCK_SELECT, - ®, sizeof(reg)); + &new, sizeof(new)); if (err < 0) return err; if (wait_for_completion_timeout(&dice->clock_accepted, - msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0) - return -ETIMEDOUT; + msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0) { + if (reg != new) + return -ETIMEDOUT; + } return 0; } diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c index 51721edd8f53..e88d9ff95cdf 100644 --- a/sound/soc/codecs/cs42l51.c +++ b/sound/soc/codecs/cs42l51.c @@ -143,7 +143,7 @@ static const struct snd_kcontrol_new cs42l51_snd_controls[] = { 0, 0xA0, 96, adc_att_tlv), SOC_DOUBLE_R_SX_TLV("PGA Volume", CS42L51_ALC_PGA_CTL, CS42L51_ALC_PGB_CTL, - 0, 0x19, 30, pga_tlv), + 0, 0x1A, 30, pga_tlv), SOC_SINGLE("Playback Deemphasis Switch", CS42L51_DAC_CTL, 3, 1, 0), SOC_SINGLE("Auto-Mute Switch", CS42L51_DAC_CTL, 2, 1, 0), SOC_SINGLE("Soft Ramp Switch", CS42L51_DAC_CTL, 1, 1, 0), diff --git a/sound/soc/codecs/hdac_hda.h b/sound/soc/codecs/hdac_hda.h index fc19c34ca00e..b65560981abb 100644 --- a/sound/soc/codecs/hdac_hda.h +++ b/sound/soc/codecs/hdac_hda.h @@ -14,7 +14,7 @@ enum { HDAC_HDMI_1_DAI_ID, HDAC_HDMI_2_DAI_ID, HDAC_HDMI_3_DAI_ID, - HDAC_LAST_DAI_ID = HDAC_HDMI_3_DAI_ID, + HDAC_DAI_ID_NUM }; struct hdac_hda_pcm { @@ -24,7 +24,7 @@ struct hdac_hda_pcm { struct hdac_hda_priv { struct hda_codec *codec; - struct hdac_hda_pcm pcm[HDAC_LAST_DAI_ID]; + struct hdac_hda_pcm pcm[HDAC_DAI_ID_NUM]; bool need_display_power; }; diff --git a/sound/soc/codecs/max98373-i2c.c b/sound/soc/codecs/max98373-i2c.c index 3e04c7f0cce4..ec0905df65d1 100644 --- a/sound/soc/codecs/max98373-i2c.c +++ b/sound/soc/codecs/max98373-i2c.c @@ -549,6 +549,10 @@ static int max98373_i2c_probe(struct i2c_client *i2c) max98373->cache = devm_kcalloc(&i2c->dev, max98373->cache_num, sizeof(*max98373->cache), GFP_KERNEL); + if (!max98373->cache) { + ret = -ENOMEM; + return ret; + } for (i = 0; i < max98373->cache_num; i++) max98373->cache[i].reg = max98373_i2c_cache_reg[i]; diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c index 4120842fe699..88a8392a58ed 100644 --- a/sound/soc/codecs/rt711-sdca-sdw.c +++ b/sound/soc/codecs/rt711-sdca-sdw.c @@ -230,7 +230,7 @@ static int rt711_sdca_read_prop(struct sdw_slave *slave) } /* set the timeout values */ - prop->clk_stop_timeout = 20; + prop->clk_stop_timeout = 700; /* wake-up event */ prop->wake_capable = 1; diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 4b2135eba74d..a916f4619ea3 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1794,6 +1794,7 @@ static void sgtl5000_i2c_remove(struct i2c_client *client) { struct sgtl5000_priv *sgtl5000 = i2c_get_clientdata(client); + regmap_write(sgtl5000->regmap, SGTL5000_CHIP_CLK_CTRL, SGTL5000_CHIP_CLK_CTRL_DEFAULT); regmap_write(sgtl5000->regmap, SGTL5000_CHIP_DIG_POWER, SGTL5000_DIG_POWER_DEFAULT); regmap_write(sgtl5000->regmap, SGTL5000_CHIP_ANA_POWER, SGTL5000_ANA_POWER_DEFAULT); diff --git a/sound/soc/codecs/tlv320adc3xxx.c b/sound/soc/codecs/tlv320adc3xxx.c index a969547708d4..52bb55724724 100644 --- a/sound/soc/codecs/tlv320adc3xxx.c +++ b/sound/soc/codecs/tlv320adc3xxx.c @@ -14,6 +14,7 @@ #include <dt-bindings/sound/tlv320adc3xxx.h> #include <linux/clk.h> +#include <linux/gpio/consumer.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/io.h> @@ -1025,7 +1026,9 @@ static const struct gpio_chip adc3xxx_gpio_chip = { static void adc3xxx_free_gpio(struct adc3xxx *adc3xxx) { +#ifdef CONFIG_GPIOLIB gpiochip_remove(&adc3xxx->gpio_chip); +#endif } static void adc3xxx_init_gpio(struct adc3xxx *adc3xxx) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index b4b4355c6728..b901e4c65e8a 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2503,6 +2503,14 @@ static void wm8962_configure_bclk(struct snd_soc_component *component) snd_soc_component_update_bits(component, WM8962_CLOCKING2, WM8962_SYSCLK_ENA_MASK, WM8962_SYSCLK_ENA); + /* DSPCLK_DIV field in WM8962_CLOCKING1 register is used to generate + * correct frequency of LRCLK and BCLK. Sometimes the read-only value + * can't be updated timely after enabling SYSCLK. This results in wrong + * calculation values. Delay is introduced here to wait for newest + * value from register. The time of the delay should be at least + * 500~1000us according to test. + */ + usleep_range(500, 1000); dspclk = snd_soc_component_read(component, WM8962_CLOCKING1); if (snd_soc_component_get_bias_level(component) != SND_SOC_BIAS_ON) diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index 79ef4e269bc9..4b86ef82fd93 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -194,6 +194,25 @@ static int fsl_micfil_reset(struct device *dev) if (ret) return ret; + /* + * SRES is self-cleared bit, but REG_MICFIL_CTRL1 is defined + * as non-volatile register, so SRES still remain in regmap + * cache after set, that every update of REG_MICFIL_CTRL1, + * software reset happens. so clear it explicitly. + */ + ret = regmap_clear_bits(micfil->regmap, REG_MICFIL_CTRL1, + MICFIL_CTRL1_SRES); + if (ret) + return ret; + + /* + * Set SRES should clear CHnF flags, But even add delay here + * the CHnF may not be cleared sometimes, so clear CHnF explicitly. + */ + ret = regmap_write_bits(micfil->regmap, REG_MICFIL_STAT, 0xFF, 0xFF); + if (ret) + return ret; + return 0; } diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index bd88de056358..55b009d3c681 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -452,7 +452,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, val = ucontrol->value.integer.value[0]; if (mc->platform_max && val > mc->platform_max) return -EINVAL; - if (val > max - min) + if (val > max) return -EINVAL; val_mask = mask << shift; val = (val + min) & mask; @@ -464,10 +464,15 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, ret = err; if (snd_soc_volsw_is_stereo(mc)) { - unsigned int val2; + unsigned int val2 = ucontrol->value.integer.value[1]; + + if (mc->platform_max && val2 > mc->platform_max) + return -EINVAL; + if (val2 > max) + return -EINVAL; val_mask = mask << rshift; - val2 = (ucontrol->value.integer.value[1] + min) & mask; + val2 = (val2 + min) & mask; val2 = val2 << rshift; err = snd_soc_component_update_bits(component, reg2, val_mask, diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index fb87d6d23408..35a16c3f9591 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -822,11 +822,6 @@ static int __soc_pcm_open(struct snd_soc_pcm_runtime *rtd, ret = snd_soc_dai_startup(dai, substream); if (ret < 0) goto err; - - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - dai->tx_mask = 0; - else - dai->rx_mask = 0; } /* Dynamic PCM DAI links compat checks use dynamic capabilities */ @@ -1252,6 +1247,8 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe, return; be_substream = snd_soc_dpcm_get_substream(be, stream); + if (!be_substream) + return; for_each_dpcm_fe(be, stream, dpcm) { if (dpcm->fe == fe) diff --git a/sound/soc/stm/stm32_i2s.c b/sound/soc/stm/stm32_i2s.c index ce7f6942308f..f3dd9f8e621c 100644 --- a/sound/soc/stm/stm32_i2s.c +++ b/sound/soc/stm/stm32_i2s.c @@ -1077,7 +1077,7 @@ static int stm32_i2s_parse_dt(struct platform_device *pdev, if (irq < 0) return irq; - ret = devm_request_irq(&pdev->dev, irq, stm32_i2s_isr, IRQF_ONESHOT, + ret = devm_request_irq(&pdev->dev, irq, stm32_i2s_isr, 0, dev_name(&pdev->dev), i2s); if (ret) { dev_err(&pdev->dev, "irq request returned %d\n", ret); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 184ce1684dcd..91b7106a4a73 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11169,7 +11169,7 @@ static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf } *link = bpf_program__attach_raw_tracepoint(prog, tp_name); - return libbpf_get_error(link); + return libbpf_get_error(*link); } /* Common logic for all BPF program types that attach to a btf_id */ diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index f3a8e8e74eb8..d504d96adc83 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -234,7 +234,7 @@ static int probe_map_create(enum bpf_map_type map_type) case BPF_MAP_TYPE_USER_RINGBUF: key_size = 0; value_size = 0; - max_entries = 4096; + max_entries = sysconf(_SC_PAGE_SIZE); break; case BPF_MAP_TYPE_STRUCT_OPS: /* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */ diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index d285171d4b69..6af142953a94 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -77,6 +77,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, __u32 len = sizeof(info); struct epoll_event *e; struct ring *r; + __u64 mmap_sz; void *tmp; int err; @@ -115,8 +116,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, r->mask = info.max_entries - 1; /* Map writable consumer page */ - tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, - map_fd, 0); + tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0); if (tmp == MAP_FAILED) { err = -errno; pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", @@ -129,8 +129,12 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, * data size to allow simple reading of samples that wrap around the * end of a ring buffer. See kernel implementation for details. * */ - tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ, - MAP_SHARED, map_fd, rb->page_size); + mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; + if (mmap_sz != (__u64)(size_t)mmap_sz) { + pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries); + return libbpf_err(-E2BIG); + } + tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; ringbuf_unmap_ring(rb, r); @@ -348,6 +352,7 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) { struct bpf_map_info info; __u32 len = sizeof(info); + __u64 mmap_sz; void *tmp; struct epoll_event *rb_epoll; int err; @@ -384,8 +389,13 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) * simple reading and writing of samples that wrap around the end of * the buffer. See the kernel implementation for details. */ - tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, - PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, rb->page_size); + mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; + if (mmap_sz != (__u64)(size_t)mmap_sz) { + pr_warn("user ringbuf: ring buf size (%u) is too big\n", info.max_entries); + return -E2BIG; + } + tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED, + map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %d\n", @@ -476,6 +486,10 @@ void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size) __u64 cons_pos, prod_pos; struct ringbuf_hdr *hdr; + /* The top two bits are used as special flags */ + if (size & (BPF_RINGBUF_BUSY_BIT | BPF_RINGBUF_DISCARD_BIT)) + return errno = E2BIG, NULL; + /* Synchronizes with smp_store_release() in __bpf_user_ringbuf_peek() in * the kernel. */ diff --git a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c index 099eb4dfd4f7..18405c3b7cee 100644 --- a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c +++ b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c @@ -458,7 +458,7 @@ static void test_sk_storage_map_basic(void) struct { int cnt; int lock; - } value = { .cnt = 0xeB9f, .lock = 0, }, lookup_value; + } value = { .cnt = 0xeB9f, .lock = 1, }, lookup_value; struct bpf_map_create_opts bad_xattr; int btf_fd, map_fd, sk_fd, err; @@ -483,38 +483,41 @@ static void test_sk_storage_map_basic(void) "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt and update with BPF_EXIST | BPF_F_LOCK */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_EXIST | BPF_F_LOCK); CHECK(err, "bpf_map_update_elem(BPF_EXIST|BPF_F_LOCK)", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt and update with BPF_EXIST */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_EXIST); CHECK(err, "bpf_map_update_elem(BPF_EXIST)", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Update with BPF_NOEXIST */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_NOEXIST | BPF_F_LOCK); CHECK(!err || errno != EEXIST, @@ -526,22 +529,23 @@ static void test_sk_storage_map_basic(void) value.cnt -= 1; err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt again and update with map_flags == 0 */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0); CHECK(err, "bpf_map_update_elem()", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Test delete elem */ err = bpf_map_delete_elem(map_fd, &sk_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index d457a55ff408..a4b4133d39e9 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -358,10 +358,12 @@ static int get_syms(char ***symsp, size_t *cntp) * We attach to almost all kernel functions and some of them * will cause 'suspicious RCU usage' when fprobe is attached * to them. Filter out the current culprits - arch_cpu_idle - * and rcu_* functions. + * default_idle and rcu_* functions. */ if (!strcmp(name, "arch_cpu_idle")) continue; + if (!strcmp(name, "default_idle")) + continue; if (!strncmp(name, "rcu_", 4)) continue; if (!strcmp(name, "bpf_dispatcher_xdp_func")) @@ -400,7 +402,7 @@ error: return err; } -static void test_bench_attach(void) +void serial_test_kprobe_multi_bench_attach(void) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); struct kprobe_multi_empty *skel = NULL; @@ -468,6 +470,4 @@ void test_kprobe_multi_test(void) test_attach_api_syms(); if (test__start_subtest("attach_api_fails")) test_attach_api_fails(); - if (test__start_subtest("bench_attach")) - test_bench_attach(); } diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c index 22b31ebb3513..258ddc565deb 100644 --- a/tools/testing/selftests/cgroup/test_kmem.c +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -19,12 +19,12 @@ /* - * Memory cgroup charging is performed using percpu batches 32 pages + * Memory cgroup charging is performed using percpu batches 64 pages * big (look at MEMCG_CHARGE_BATCH), whereas memory.stat is exact. So * the maximum discrepancy between charge and vmstat entries is number - * of cpus multiplied by 32 pages. + * of cpus multiplied by 64 pages. */ -#define MAX_VMSTAT_ERROR (4096 * 32 * get_nprocs()) +#define MAX_VMSTAT_ERROR (4096 * 64 * get_nprocs()) static int alloc_dcache(const char *cgroup, void *arg) diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 2f0d705db9db..05d980fb083d 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -41,6 +41,7 @@ /x86_64/svm_vmcall_test /x86_64/svm_int_ctl_test /x86_64/svm_nested_soft_inject_test +/x86_64/svm_nested_shutdown_test /x86_64/sync_regs_test /x86_64/tsc_msrs_test /x86_64/tsc_scaling_sync diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 0172eb6cb6ee..4a2caef2c939 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -101,6 +101,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test +TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index e8ca0d8a6a7e..5da0c5e2a7af 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -748,6 +748,19 @@ struct ex_regs { uint64_t rflags; }; +struct idt_entry { + uint16_t offset0; + uint16_t selector; + uint16_t ist : 3; + uint16_t : 5; + uint16_t type : 4; + uint16_t : 1; + uint16_t dpl : 2; + uint16_t p : 1; + uint16_t offset1; + uint32_t offset2; uint32_t reserved; +}; + void vm_init_descriptor_tables(struct kvm_vm *vm); void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu); void vm_install_exception_handler(struct kvm_vm *vm, int vector, diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 39c4409ef56a..41c1c73c464d 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1074,19 +1074,6 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) } } -struct idt_entry { - uint16_t offset0; - uint16_t selector; - uint16_t ist : 3; - uint16_t : 5; - uint16_t type : 4; - uint16_t : 1; - uint16_t dpl : 2; - uint16_t p : 1; - uint16_t offset1; - uint32_t offset2; uint32_t reserved; -}; - static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, int dpl, unsigned short selector) { diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c new file mode 100644 index 000000000000..e73fcdef47bb --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * svm_nested_shutdown_test + * + * Copyright (C) 2022, Red Hat, Inc. + * + * Nested SVM testing: test that unintercepted shutdown in L2 doesn't crash the host + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" + +static void l2_guest_code(struct svm_test_data *svm) +{ + __asm__ __volatile__("ud2"); +} + +static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt) +{ + #define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN)); + + idt[6].p = 0; // #UD is intercepted but its injection will cause #NP + idt[11].p = 0; // #NP is not intercepted and will cause another + // #NP that will be converted to #DF + idt[8].p = 0; // #DF will cause #NP which will cause SHUTDOWN + + run_guest(vmcb, svm->vmcb_gpa); + + /* should not reach here */ + GUEST_ASSERT(0); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_run *run; + vm_vaddr_t svm_gva; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vcpu_alloc_svm(vm, &svm_gva); + + vcpu_args_set(vcpu, 2, svm_gva, vm->idt); + run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, + "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c index 70b44f0b52fe..ead5d878a71c 100644 --- a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c +++ b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c @@ -3,6 +3,7 @@ #include "kvm_util.h" #include "processor.h" #include "vmx.h" +#include "svm_util.h" #include <string.h> #include <sys/ioctl.h> @@ -20,10 +21,11 @@ static void l2_guest_code(void) : : [port] "d" (ARBITRARY_IO_PORT) : "rax"); } -void l1_guest_code(struct vmx_pages *vmx) -{ #define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; +unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + +void l1_guest_code_vmx(struct vmx_pages *vmx) +{ GUEST_ASSERT(vmx->vmcs_gpa); GUEST_ASSERT(prepare_for_vmx_operation(vmx)); @@ -38,24 +40,53 @@ void l1_guest_code(struct vmx_pages *vmx) GUEST_DONE(); } +void l1_guest_code_svm(struct svm_test_data *svm) +{ + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* don't intercept shutdown to test the case of SVM allowing to do so */ + vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN)); + + run_guest(vmcb, svm->vmcb_gpa); + + /* should not reach here, L1 should crash */ + GUEST_ASSERT(0); +} + int main(void) { struct kvm_vcpu *vcpu; struct kvm_run *run; struct kvm_vcpu_events events; - vm_vaddr_t vmx_pages_gva; struct ucall uc; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + bool has_vmx = kvm_cpu_has(X86_FEATURE_VMX); + bool has_svm = kvm_cpu_has(X86_FEATURE_SVM); + + TEST_REQUIRE(has_vmx || has_svm); TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT)); - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1); + if (has_vmx) { + vm_vaddr_t vmx_pages_gva; + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx); + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + } else { + vm_vaddr_t svm_gva; + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm); + vcpu_alloc_svm(vm, &svm_gva); + vcpu_args_set(vcpu, 1, svm_gva); + } + + vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1); run = vcpu->run; - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, @@ -78,13 +109,21 @@ int main(void) "No triple fault pending"); vcpu_run(vcpu); - switch (get_ucall(vcpu, &uc)) { - case UCALL_DONE: - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - default: - TEST_FAIL("Unexpected ucall: %lu", uc.cmd); - } + if (has_svm) { + TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, + "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + } else { + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + } + return 0; } diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 3d7adee7a3e6..ff8fe93f679c 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only bind_bhash cmsg_sender +diag_uid fin_ack_lat gro hwtstamp_config diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 969620ae9928..1e4b397cece6 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,3 +1,3 @@ -TEST_GEN_PROGS := test_unix_oob unix_connect +TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c new file mode 100644 index 000000000000..5b88f7129fea --- /dev/null +++ b/tools/testing/selftests/net/af_unix/diag_uid.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#define _GNU_SOURCE +#include <sched.h> + +#include <unistd.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/sock_diag.h> +#include <linux/unix_diag.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "../../kselftest_harness.h" + +FIXTURE(diag_uid) +{ + int netlink_fd; + int unix_fd; + __u32 inode; + __u64 cookie; +}; + +FIXTURE_VARIANT(diag_uid) +{ + int unshare; + int udiag_show; +}; + +FIXTURE_VARIANT_ADD(diag_uid, uid) +{ + .unshare = 0, + .udiag_show = UDIAG_SHOW_UID +}; + +FIXTURE_VARIANT_ADD(diag_uid, uid_unshare) +{ + .unshare = CLONE_NEWUSER, + .udiag_show = UDIAG_SHOW_UID +}; + +FIXTURE_SETUP(diag_uid) +{ + struct stat file_stat; + socklen_t optlen; + int ret; + + if (variant->unshare) + ASSERT_EQ(unshare(variant->unshare), 0); + + self->netlink_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + ASSERT_NE(self->netlink_fd, -1); + + self->unix_fd = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_NE(self->unix_fd, -1); + + ret = fstat(self->unix_fd, &file_stat); + ASSERT_EQ(ret, 0); + + self->inode = file_stat.st_ino; + + optlen = sizeof(self->cookie); + ret = getsockopt(self->unix_fd, SOL_SOCKET, SO_COOKIE, &self->cookie, &optlen); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(diag_uid) +{ + close(self->netlink_fd); + close(self->unix_fd); +} + +int send_request(struct __test_metadata *_metadata, + FIXTURE_DATA(diag_uid) *self, + const FIXTURE_VARIANT(diag_uid) *variant) +{ + struct { + struct nlmsghdr nlh; + struct unix_diag_req udr; + } req = { + .nlh = { + .nlmsg_len = sizeof(req), + .nlmsg_type = SOCK_DIAG_BY_FAMILY, + .nlmsg_flags = NLM_F_REQUEST + }, + .udr = { + .sdiag_family = AF_UNIX, + .udiag_ino = self->inode, + .udiag_cookie = { + (__u32)self->cookie, + (__u32)(self->cookie >> 32) + }, + .udiag_show = variant->udiag_show + } + }; + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK + }; + struct iovec iov = { + .iov_base = &req, + .iov_len = sizeof(req) + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1 + }; + + return sendmsg(self->netlink_fd, &msg, 0); +} + +void render_response(struct __test_metadata *_metadata, + struct unix_diag_req *udr, __u32 len) +{ + unsigned int rta_len = len - NLMSG_LENGTH(sizeof(*udr)); + struct rtattr *attr; + uid_t uid; + + ASSERT_GT(len, sizeof(*udr)); + ASSERT_EQ(udr->sdiag_family, AF_UNIX); + + attr = (struct rtattr *)(udr + 1); + ASSERT_NE(RTA_OK(attr, rta_len), 0); + ASSERT_EQ(attr->rta_type, UNIX_DIAG_UID); + + uid = *(uid_t *)RTA_DATA(attr); + ASSERT_EQ(uid, getuid()); +} + +void receive_response(struct __test_metadata *_metadata, + FIXTURE_DATA(diag_uid) *self) +{ + long buf[8192 / sizeof(long)]; + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK + }; + struct iovec iov = { + .iov_base = buf, + .iov_len = sizeof(buf) + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1 + }; + struct unix_diag_req *udr; + struct nlmsghdr *nlh; + int ret; + + ret = recvmsg(self->netlink_fd, &msg, 0); + ASSERT_GT(ret, 0); + + nlh = (struct nlmsghdr *)buf; + ASSERT_NE(NLMSG_OK(nlh, ret), 0); + ASSERT_EQ(nlh->nlmsg_type, SOCK_DIAG_BY_FAMILY); + + render_response(_metadata, NLMSG_DATA(nlh), nlh->nlmsg_len); + + nlh = NLMSG_NEXT(nlh, ret); + ASSERT_EQ(NLMSG_OK(nlh, ret), 0); +} + +TEST_F(diag_uid, 1) +{ + int ret; + + ret = send_request(_metadata, self, variant); + ASSERT_GT(ret, 0); + + receive_response(_metadata, self); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index ead7963b9bf0..bd89198cd817 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -43,5 +43,5 @@ CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_MIRRED=m CONFIG_BAREUDP=m CONFIG_IPV6_IOAM6_LWTUNNEL=y -CONFIG_CRYPTO_SM4=y +CONFIG_CRYPTO_SM4_GENERIC=y CONFIG_AMT=m diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 31c3b6ebd388..21ca91473c09 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -4196,10 +4196,13 @@ elif [ "$TESTS" = "ipv6" ]; then TESTS="$TESTS_IPV6" fi -which nettest >/dev/null -if [ $? -ne 0 ]; then - echo "'nettest' command not found; skipping tests" - exit $ksft_skip +# nettest can be run from PATH or from same directory as this selftest +if ! which nettest >/dev/null; then + PATH=$PWD:$PATH + if ! which nettest >/dev/null; then + echo "'nettest' command not found; skipping tests" + exit $ksft_skip + fi fi declare -i nfail=0 diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index ee5e98204d3d..a47b26ab48f2 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -1228,6 +1228,17 @@ ipv4_fcnal() run_cmd "$IP ro add 172.16.101.0/24 nhid 21" run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1" log_test $? 2 "Delete multipath route with only nh id based entry" + + run_cmd "$IP nexthop add id 22 via 172.16.1.6 dev veth1" + run_cmd "$IP ro add 172.16.102.0/24 nhid 22" + run_cmd "$IP ro del 172.16.102.0/24 dev veth1" + log_test $? 2 "Delete route when specifying only nexthop device" + + run_cmd "$IP ro del 172.16.102.0/24 via 172.16.1.6" + log_test $? 2 "Delete route when specifying only gateway" + + run_cmd "$IP ro del 172.16.102.0/24" + log_test $? 0 "Delete route when not specifying nexthop attributes" } ipv4_grp_fcnal() diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 2271a8727f62..5637b5dadabd 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1711,13 +1711,21 @@ ipv4_del_addr_test() $IP addr add dev dummy1 172.16.104.1/24 $IP addr add dev dummy1 172.16.104.11/24 + $IP addr add dev dummy1 172.16.104.12/24 + $IP addr add dev dummy1 172.16.104.13/24 $IP addr add dev dummy2 172.16.104.1/24 $IP addr add dev dummy2 172.16.104.11/24 + $IP addr add dev dummy2 172.16.104.12/24 $IP route add 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 + $IP route add 172.16.106.0/24 dev lo src 172.16.104.12 + $IP route add table 0 172.16.107.0/24 via 172.16.104.2 src 172.16.104.13 $IP route add vrf red 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 + $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12 set +e # removing address from device in vrf should only remove route from vrf table + echo " Regular FIB info" + $IP addr del dev dummy2 172.16.104.11/24 $IP ro ls vrf red | grep -q 172.16.105.0/24 log_test $? 1 "Route removed from VRF when source address deleted" @@ -1735,6 +1743,35 @@ ipv4_del_addr_test() $IP ro ls vrf red | grep -q 172.16.105.0/24 log_test $? 0 "Route in VRF is not removed by address delete" + # removing address from device in vrf should only remove route from vrf + # table even when the associated fib info only differs in table ID + echo " Identical FIB info with different table ID" + + $IP addr del dev dummy2 172.16.104.12/24 + $IP ro ls vrf red | grep -q 172.16.106.0/24 + log_test $? 1 "Route removed from VRF when source address deleted" + + $IP ro ls | grep -q 172.16.106.0/24 + log_test $? 0 "Route in default VRF not removed" + + $IP addr add dev dummy2 172.16.104.12/24 + $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12 + + $IP addr del dev dummy1 172.16.104.12/24 + $IP ro ls | grep -q 172.16.106.0/24 + log_test $? 1 "Route removed in default VRF when source address deleted" + + $IP ro ls vrf red | grep -q 172.16.106.0/24 + log_test $? 0 "Route in VRF is not removed by address delete" + + # removing address from device in default vrf should remove route from + # the default vrf even when route was inserted with a table ID of 0. + echo " Table ID 0" + + $IP addr del dev dummy1 172.16.104.13/24 + $IP ro ls | grep -q 172.16.107.0/24 + log_test $? 1 "Route removed in default VRF when source address deleted" + $IP li del dummy1 $IP li del dummy2 cleanup diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index f3dd5f2a0272..2eeaf4aca644 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2152,7 +2152,7 @@ remove_tests() pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 slow + run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 speed_10 chk_join_nr 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 @@ -2165,7 +2165,7 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.4.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow + run_tests $ns1 $ns2 10.0.1.1 0 -3 0 speed_10 chk_join_nr 3 3 3 chk_add_nr 3 3 chk_rm_nr 3 3 invert @@ -2178,7 +2178,7 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.14.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow + run_tests $ns1 $ns2 10.0.1.1 0 -3 0 speed_10 chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 0879da915014..80d36f7cfee8 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -35,8 +35,9 @@ init() ns1="ns1-$rndh" ns2="ns2-$rndh" + ns_sbox="ns_sbox-$rndh" - for netns in "$ns1" "$ns2";do + for netns in "$ns1" "$ns2" "$ns_sbox";do ip netns add $netns || exit $ksft_skip ip -net $netns link set lo up ip netns exec $netns sysctl -q net.mptcp.enabled=1 @@ -73,7 +74,7 @@ init() cleanup() { - for netns in "$ns1" "$ns2"; do + for netns in "$ns1" "$ns2" "$ns_sbox"; do ip netns del $netns done rm -f "$cin" "$cout" @@ -243,7 +244,7 @@ do_mptcp_sockopt_tests() { local lret=0 - ./mptcp_sockopt + ip netns exec "$ns_sbox" ./mptcp_sockopt lret=$? if [ $lret -ne 0 ]; then @@ -252,7 +253,7 @@ do_mptcp_sockopt_tests() return fi - ./mptcp_sockopt -6 + ip netns exec "$ns_sbox" ./mptcp_sockopt -6 lret=$? if [ $lret -ne 0 ]; then diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index ffa13a957a36..40aeb5a71a2a 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -247,9 +247,10 @@ run_test() tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 - # time is measured in ms, account for transfer size, affegated link speed + # time is measured in ms, account for transfer size, aggregated link speed # and header overhead (10%) - local time=$((size * 8 * 1000 * 10 / (( $rate1 + $rate2) * 1024 *1024 * 9) )) + # ms byte -> bit 10% mbit -> kbit -> bit 10% + local time=$((1000 * size * 8 * 10 / ((rate1 + rate2) * 1000 * 1000 * 9) )) # mptcp_connect will do some sleeps to allow the mp_join handshake # completion (see mptcp_connect): 200ms on each side, add some slack diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 736e358dc549..dfe3d287f01d 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -686,10 +686,12 @@ setup_xfrm() { } setup_nettest_xfrm() { - which nettest >/dev/null - if [ $? -ne 0 ]; then - echo "'nettest' command not found; skipping tests" - return 1 + if ! which nettest >/dev/null; then + PATH=$PWD:$PATH + if ! which nettest >/dev/null; then + echo "'nettest' command not found; skipping tests" + return 1 + fi fi [ ${1} -eq 6 ] && proto="-6" || proto="" diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 0900c5438fbb..275491be3da2 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -782,7 +782,7 @@ kci_test_ipsec_offload() tmpl proto esp src $srcip dst $dstip spi 9 \ mode transport reqid 42 check_err $? - ip x p add dir out src $dstip/24 dst $srcip/24 \ + ip x p add dir in src $dstip/24 dst $srcip/24 \ tmpl proto esp src $dstip dst $srcip spi 9 \ mode transport reqid 42 check_err $? diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh index 0a49907cd4fe..da5bfd834eff 100755 --- a/tools/testing/selftests/net/toeplitz.sh +++ b/tools/testing/selftests/net/toeplitz.sh @@ -32,7 +32,7 @@ DEV="eth0" # This is determined by reading the RSS indirection table using ethtool. get_rss_cfg_num_rxqs() { echo $(ethtool -x "${DEV}" | - egrep [[:space:]]+[0-9]+:[[:space:]]+ | + grep -E [[:space:]]+[0-9]+:[[:space:]]+ | cut -d: -f2- | awk '{$1=$1};1' | tr ' ' '\n' | diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 6a443ca3cd3a..0c743752669a 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -5,6 +5,8 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +BPF_FILE="../bpf/xdp_dummy.bpf.o" + # set global exit status, but never reset nonzero one. check_err() { @@ -34,7 +36,7 @@ cfg_veth() { ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp + ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp } run_one() { @@ -195,8 +197,8 @@ run_all() { return $ret } -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index 8a1109a545db..894972877e8b 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -5,6 +5,8 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +BPF_FILE="../bpf/xdp_dummy.bpf.o" + cleanup() { local -r jobs="$(jobs -p)" local -r ns="$(ip netns list|grep $PEER_NS)" @@ -34,7 +36,7 @@ run_one() { ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp + ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r & @@ -80,8 +82,8 @@ run_all() { run_udp "${ipv6_args}" } -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index 7fe85ba51075..c9c4b9d65839 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -5,6 +5,8 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +BPF_FILE="../bpf/xdp_dummy.bpf.o" + cleanup() { local -r jobs="$(jobs -p)" local -r ns="$(ip netns list|grep $PEER_NS)" @@ -36,7 +38,7 @@ run_one() { ip netns exec "${PEER_NS}" ethtool -K veth1 rx-gro-list on - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp + ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp tc -n "${PEER_NS}" qdisc add dev veth1 clsact tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6 direct-action tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action @@ -81,8 +83,8 @@ run_all() { run_udp "${ipv6_args}" } -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 1bcd82e1f662..c079565add39 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +BPF_FILE="../bpf/xdp_dummy.bpf.o" readonly BASE="ns-$(mktemp -u XXXXXX)" readonly SRC=2 readonly DST=1 @@ -46,7 +47,7 @@ create_ns() { ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24 ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad done - ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null + ip -n $NS_DST link set veth$DST xdp object ${BPF_FILE} section xdp 2>/dev/null } create_vxlan_endpoint() { diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 430895d1a2b6..2d073595c620 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +BPF_FILE="../bpf/xdp_dummy.bpf.o" readonly STATS="$(mktemp -p /tmp ns-XXXXXX)" readonly BASE=`basename $STATS` readonly SRC=2 @@ -216,8 +217,8 @@ while getopts "hs:" option; do esac done -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit 1 fi @@ -288,14 +289,14 @@ if [ $CPUS -gt 1 ]; then ip netns exec $NS_DST ethtool -L veth$DST rx 1 tx 2 2>/dev/null ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null printf "%-60s" "bad setting: XDP with RX nr less than TX" - ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \ + ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} \ section xdp 2>/dev/null &&\ echo "fail - set operation successful ?!?" || echo " ok " # the following tests will run with multiple channels active ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 ip netns exec $NS_DST ethtool -L veth$DST rx 2 - ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \ + ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} \ section xdp 2>/dev/null printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set" ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\ @@ -311,7 +312,7 @@ if [ $CPUS -gt 2 ]; then chk_channels "setting invalid channels nr" $DST 2 2 fi -ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null +ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp 2>/dev/null chk_gro_flag "with xdp attached - gro flag" $DST on chk_gro_flag " - peer gro flag" $SRC off chk_tso_flag " - tso flag" $SRC off diff --git a/tools/vm/slabinfo-gnuplot.sh b/tools/vm/slabinfo-gnuplot.sh index 26e193ffd2a2..873a892147e5 100644 --- a/tools/vm/slabinfo-gnuplot.sh +++ b/tools/vm/slabinfo-gnuplot.sh @@ -150,7 +150,7 @@ do_preprocess() let lines=3 out=`basename "$in"`"-slabs-by-loss" `cat "$in" | grep -A "$lines" 'Slabs sorted by loss' |\ - egrep -iv '\-\-|Name|Slabs'\ + grep -E -iv '\-\-|Name|Slabs'\ | awk '{print $1" "$4+$2*$3" "$4}' > "$out"` if [ $? -eq 0 ]; then do_slabs_plotting "$out" @@ -159,7 +159,7 @@ do_preprocess() let lines=3 out=`basename "$in"`"-slabs-by-size" `cat "$in" | grep -A "$lines" 'Slabs sorted by size' |\ - egrep -iv '\-\-|Name|Slabs'\ + grep -E -iv '\-\-|Name|Slabs'\ | awk '{print $1" "$4" "$4-$2*$3}' > "$out"` if [ $? -eq 0 ]; then do_slabs_plotting "$out" diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 25d7872b29c1..fab4d3790578 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1198,8 +1198,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) goto out_err_no_arch_destroy_vm; } - kvm->max_halt_poll_ns = halt_poll_ns; - r = kvm_arch_init_vm(kvm, type); if (r) goto out_err_no_arch_destroy_vm; @@ -3377,9 +3375,6 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) if (val < grow_start) val = grow_start; - if (val > vcpu->kvm->max_halt_poll_ns) - val = vcpu->kvm->max_halt_poll_ns; - vcpu->halt_poll_ns = val; out: trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); @@ -3483,6 +3478,24 @@ static inline void update_halt_poll_stats(struct kvm_vcpu *vcpu, ktime_t start, } } +static unsigned int kvm_vcpu_max_halt_poll_ns(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + if (kvm->override_halt_poll_ns) { + /* + * Ensure kvm->max_halt_poll_ns is not read before + * kvm->override_halt_poll_ns. + * + * Pairs with the smp_wmb() when enabling KVM_CAP_HALT_POLL. + */ + smp_rmb(); + return READ_ONCE(kvm->max_halt_poll_ns); + } + + return READ_ONCE(halt_poll_ns); +} + /* * Emulate a vCPU halt condition, e.g. HLT on x86, WFI on arm, etc... If halt * polling is enabled, busy wait for a short time before blocking to avoid the @@ -3491,12 +3504,18 @@ static inline void update_halt_poll_stats(struct kvm_vcpu *vcpu, ktime_t start, */ void kvm_vcpu_halt(struct kvm_vcpu *vcpu) { + unsigned int max_halt_poll_ns = kvm_vcpu_max_halt_poll_ns(vcpu); bool halt_poll_allowed = !kvm_arch_no_poll(vcpu); - bool do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns; ktime_t start, cur, poll_end; bool waited = false; + bool do_halt_poll; u64 halt_ns; + if (vcpu->halt_poll_ns > max_halt_poll_ns) + vcpu->halt_poll_ns = max_halt_poll_ns; + + do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns; + start = cur = poll_end = ktime_get(); if (do_halt_poll) { ktime_t stop = ktime_add_ns(start, vcpu->halt_poll_ns); @@ -3535,18 +3554,21 @@ out: update_halt_poll_stats(vcpu, start, poll_end, !waited); if (halt_poll_allowed) { + /* Recompute the max halt poll time in case it changed. */ + max_halt_poll_ns = kvm_vcpu_max_halt_poll_ns(vcpu); + if (!vcpu_valid_wakeup(vcpu)) { shrink_halt_poll_ns(vcpu); - } else if (vcpu->kvm->max_halt_poll_ns) { + } else if (max_halt_poll_ns) { if (halt_ns <= vcpu->halt_poll_ns) ; /* we had a long block, shrink polling */ else if (vcpu->halt_poll_ns && - halt_ns > vcpu->kvm->max_halt_poll_ns) + halt_ns > max_halt_poll_ns) shrink_halt_poll_ns(vcpu); /* we had a short halt and our poll time is too small */ - else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns && - halt_ns < vcpu->kvm->max_halt_poll_ns) + else if (vcpu->halt_poll_ns < max_halt_poll_ns && + halt_ns < max_halt_poll_ns) grow_halt_poll_ns(vcpu); } else { vcpu->halt_poll_ns = 0; @@ -4581,6 +4603,16 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, return -EINVAL; kvm->max_halt_poll_ns = cap->args[0]; + + /* + * Ensure kvm->override_halt_poll_ns does not become visible + * before kvm->max_halt_poll_ns. + * + * Pairs with the smp_rmb() in kvm_vcpu_max_halt_poll_ns(). + */ + smp_wmb(); + kvm->override_halt_poll_ns = true; + return 0; } case KVM_CAP_DIRTY_LOG_RING: diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index 346e47f15572..7c248193ca26 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -297,7 +297,12 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, if (!gpc->valid || old_uhva != gpc->uhva) { ret = hva_to_pfn_retry(kvm, gpc); } else { - /* If the HVA→PFN mapping was already valid, don't unmap it. */ + /* + * If the HVA→PFN mapping was already valid, don't unmap it. + * But do update gpc->khva because the offset within the page + * may have changed. + */ + gpc->khva = old_khva + page_offset; old_pfn = KVM_PFN_ERR_FAULT; old_khva = NULL; ret = 0; |