diff options
406 files changed, 11074 insertions, 6300 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bdb22006f713..ee0569a040c6 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4945,8 +4945,6 @@ sa1100ir [NET] See drivers/net/irda/sa1100_ir.c. - sbni= [NET] Granch SBNI12 leased line adapter - sched_verbose [KNL] Enables verbose scheduler debug messages. schedstats= [KNL,X86] Enable or disable scheduled statistics. diff --git a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml index a7b5807c5543..fb547e26c676 100644 --- a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml +++ b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml @@ -104,6 +104,12 @@ properties: maximum: 32 maxItems: 1 + power-domains: + description: + Power domain provider node and an args specifier containing + the can device id value. + maxItems: 1 + can-transceiver: $ref: can-transceiver.yaml# diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index 4853ab7017bd..ed88ba4b94df 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -87,18 +87,16 @@ properties: - const: ipa-setup-ready interconnects: - minItems: 2 items: - - description: Path leading to system memory - - description: Path between the AP and IPA config space - - description: Path leading to internal memory + - description: Interconnect path between IPA and main memory + - description: Interconnect path between IPA and internal memory + - description: Interconnect path between IPA and the AP subsystem interconnect-names: - minItems: 2 items: - const: memory - - const: config - const: imem + - const: config qcom,smem-states: $ref: /schemas/types.yaml#/definitions/phandle-array @@ -209,11 +207,11 @@ examples: interconnects = <&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_EBI1>, - <&rsc_hlos MASTER_APPSS_PROC &rsc_hlos SLAVE_IPA_CFG>, - <&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_IMEM>; + <&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_IMEM>, + <&rsc_hlos MASTER_APPSS_PROC &rsc_hlos SLAVE_IPA_CFG>; interconnect-names = "memory", - "config", - "imem"; + "imem", + "config"; qcom,smem-states = <&ipa_smp2p_out 0>, <&ipa_smp2p_out 1>; diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index 62f2aab8eaec..31cfd7d674a6 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -501,6 +501,18 @@ fail_over_mac This option was added in bonding version 3.2.0. The "follow" policy was added in bonding version 3.3.0. +lacp_active + Option specifying whether to send LACPDU frames periodically. + + off or 0 + LACPDU frames acts as "speak when spoken to". + + on or 1 + LACPDU frames are sent along the configured links + periodically. See lacp_rate for more details. + + The default is on. + lacp_rate Option specifying the rate in which we'll ask our link partner diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst index 91b2cf712801..e26532f49760 100644 --- a/Documentation/networking/netdev-FAQ.rst +++ b/Documentation/networking/netdev-FAQ.rst @@ -228,6 +228,23 @@ before posting to the mailing list. The patchwork build bot instance gets overloaded very easily and netdev@vger really doesn't need more traffic if we can help it. +netdevsim is great, can I extend it for my out-of-tree tests? +------------------------------------------------------------- + +No, `netdevsim` is a test vehicle solely for upstream tests. +(Please add your tests under tools/testing/selftests/.) + +We also give no guarantees that `netdevsim` won't change in the future +in a way which would break what would normally be considered uAPI. + +Is netdevsim considered a "user" of an API? +------------------------------------------- + +Linux kernel has a long standing rule that no API should be added unless +it has a real, in-tree user. Mock-ups and tests based on `netdevsim` are +strongly encouraged when adding new APIs, but `netdevsim` in itself +is **not** considered a use case/user. + Any other tips to help ensure my net/net-next patch gets OK'd? -------------------------------------------------------------- Attention to detail. Re-read your own work as if you were the diff --git a/Documentation/networking/operstates.rst b/Documentation/networking/operstates.rst index 9c918f7cb0e8..1ee2141e8ef1 100644 --- a/Documentation/networking/operstates.rst +++ b/Documentation/networking/operstates.rst @@ -73,7 +73,9 @@ IF_OPER_LOWERLAYERDOWN (3): state (f.e. VLAN). IF_OPER_TESTING (4): - Unused in current kernel. + Interface is in testing mode, for example executing driver self-tests + or media (cable) test. It can't be used for normal traffic until tests + complete. IF_OPER_DORMANT (5): Interface is L1 up, but waiting for an external event, f.e. for a @@ -111,7 +113,7 @@ it as lower layer. Note that for certain kind of soft-devices, which are not managing any real hardware, it is possible to set this bit from userspace. One -should use TVL IFLA_CARRIER to do so. +should use TLV IFLA_CARRIER to do so. netif_carrier_ok() can be used to query that bit. @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Opossums on Parade # *DOCUMENTATION* @@ -546,7 +546,6 @@ export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn \ PHONY += scripts_basic scripts_basic: $(Q)$(MAKE) $(build)=scripts/basic - $(Q)rm -f .tmp_quiet_recordmcount PHONY += outputmakefile ifdef building_out_of_srctree diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 6b3daba60987..1dd9baf4a6c2 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -129,6 +129,8 @@ #define SO_NETNS_COOKIE 71 +#define SO_BUF_LOCK 72 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index 7e0ff917e548..a9a052f8c63c 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -110,11 +110,6 @@ no-map; }; - ipa_fw_mem: memory@8b700000 { - reg = <0 0x8b700000 0 0x10000>; - no-map; - }; - rmtfs_mem: memory@94600000 { compatible = "qcom,rmtfs-mem"; reg = <0x0 0x94600000 0x0 0x200000>; diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 5eb2b58ea23b..a8c274ad74c4 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -7,7 +7,6 @@ #include <dt-bindings/clock/qcom,gcc-sc7280.h> #include <dt-bindings/clock/qcom,rpmh.h> -#include <dt-bindings/interconnect/qcom,sc7280.h> #include <dt-bindings/interrupt-controller/arm-gic.h> #include <dt-bindings/mailbox/qcom-ipcc.h> #include <dt-bindings/power/qcom-aoss-qmp.h> @@ -64,11 +63,6 @@ no-map; reg = <0x0 0x80b00000 0x0 0x100000>; }; - - ipa_fw_mem: memory@8b700000 { - reg = <0 0x8b700000 0 0x10000>; - no-map; - }; }; cpus { @@ -514,43 +508,6 @@ qcom,bcm-voters = <&apps_bcm_voter>; }; - ipa: ipa@1e40000 { - compatible = "qcom,sc7280-ipa"; - - iommus = <&apps_smmu 0x480 0x0>, - <&apps_smmu 0x482 0x0>; - reg = <0 0x1e40000 0 0x8000>, - <0 0x1e50000 0 0x4ad0>, - <0 0x1e04000 0 0x23000>; - reg-names = "ipa-reg", - "ipa-shared", - "gsi"; - - interrupts-extended = <&intc 0 654 IRQ_TYPE_EDGE_RISING>, - <&intc 0 432 IRQ_TYPE_LEVEL_HIGH>, - <&ipa_smp2p_in 0 IRQ_TYPE_EDGE_RISING>, - <&ipa_smp2p_in 1 IRQ_TYPE_EDGE_RISING>; - interrupt-names = "ipa", - "gsi", - "ipa-clock-query", - "ipa-setup-ready"; - - clocks = <&rpmhcc RPMH_IPA_CLK>; - clock-names = "core"; - - interconnects = <&aggre2_noc MASTER_IPA 0 &mc_virt SLAVE_EBI1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &cnoc2 SLAVE_IPA_CFG 0>; - interconnect-names = "memory", - "config"; - - qcom,smem-states = <&ipa_smp2p_out 0>, - <&ipa_smp2p_out 1>; - qcom,smem-state-names = "ipa-clock-enabled-valid", - "ipa-clock-enabled"; - - status = "disabled"; - }; - tcsr_mutex: hwlock@1f40000 { compatible = "qcom,tcsr-mutex", "syscon"; reg = <0 0x01f40000 0 0x40000>; diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index cdf404a831b2..1eaf6a1ca561 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -140,6 +140,8 @@ #define SO_NETNS_COOKIE 71 +#define SO_BUF_LOCK 72 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 5b5351cdcb33..8baaad52d799 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -121,6 +121,8 @@ #define SO_NETNS_COOKIE 0x4045 +#define SO_BUF_LOCK 0x4046 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 2813e3f98db6..3c5baaa6f1e7 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -27,6 +27,13 @@ KASAN_SANITIZE := n ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both + +# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true +# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is +# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code +# generation is minimal, it will just use r29 instead. +ccflags-y += $(call cc-option, -ffixed-r30) + asflags-y := -D__VDSO64__ -s targets += vdso64.lds diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 631a0d57b6cd..6b0886668465 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -77,7 +77,7 @@ #include "../../../../drivers/pci/pci.h" DEFINE_STATIC_KEY_FALSE(shared_processor); -EXPORT_SYMBOL_GPL(shared_processor); +EXPORT_SYMBOL(shared_processor); int CMO_PrPSP = -1; int CMO_SecPSP = -1; diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 660c799d875d..e30d3fdbbc78 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -11,6 +11,7 @@ UBSAN_SANITIZE := n KASAN_SANITIZE := n obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o +obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o obj-all := $(obj-y) piggy.o syms.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 diff --git a/arch/s390/boot/compressed/clz_ctz.c b/arch/s390/boot/compressed/clz_ctz.c new file mode 100644 index 000000000000..c3ebf248596b --- /dev/null +++ b/arch/s390/boot/compressed/clz_ctz.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../../lib/clz_ctz.c" diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 7de253f766e8..b88184019af9 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -335,7 +335,7 @@ CONFIG_L2TP_DEBUGFS=m CONFIG_L2TP_V3=y CONFIG_L2TP_IP=m CONFIG_L2TP_ETH=m -CONFIG_BRIDGE=m +CONFIG_BRIDGE=y CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index b671642967ba..1667a3cdcf0a 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -325,7 +325,7 @@ CONFIG_L2TP_DEBUGFS=m CONFIG_L2TP_V3=y CONFIG_L2TP_IP=m CONFIG_L2TP_ETH=m -CONFIG_BRIDGE=m +CONFIG_BRIDGE=y CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S index bff50b6acd6d..edf5ff1debe1 100644 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -51,6 +51,7 @@ SECTIONS .rela.dyn ALIGN(8) : { *(.rela.dyn) } .got ALIGN(8) : { *(.got .toc) } + .got.plt ALIGN(8) : { *(.got.plt) } _end = .; PROVIDE(end = .); diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index d4fb336d747b..4461ea151e49 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -51,6 +51,7 @@ SECTIONS .rela.dyn ALIGN(8) : { *(.rela.dyn) } .got ALIGN(8) : { *(.got .toc) } + .got.plt ALIGN(8) : { *(.got.plt) } _end = .; PROVIDE(end = .); diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 92675dc380fa..e80ee8641ac3 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -122,6 +122,8 @@ #define SO_NETNS_COOKIE 0x0050 +#define SO_BUF_LOCK 0x0051 + #if !defined(__KERNEL__) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index b07592ca92f0..0b38f944c6b6 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -2016,6 +2016,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) { + trace_kvm_hv_hypercall_done(result); kvm_hv_hypercall_set_result(vcpu, result); ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); @@ -2139,6 +2140,7 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code) int kvm_hv_hypercall(struct kvm_vcpu *vcpu) { + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); struct kvm_hv_hcall hc; u64 ret = HV_STATUS_SUCCESS; @@ -2173,17 +2175,25 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; hc.rep = !!(hc.rep_cnt || hc.rep_idx); - if (hc.fast && is_xmm_fast_hypercall(&hc)) - kvm_hv_hypercall_read_xmm(&hc); - trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx, hc.ingpa, hc.outgpa); - if (unlikely(!hv_check_hypercall_access(to_hv_vcpu(vcpu), hc.code))) { + if (unlikely(!hv_check_hypercall_access(hv_vcpu, hc.code))) { ret = HV_STATUS_ACCESS_DENIED; goto hypercall_complete; } + if (hc.fast && is_xmm_fast_hypercall(&hc)) { + if (unlikely(hv_vcpu->enforce_cpuid && + !(hv_vcpu->cpuid_cache.features_edx & + HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE))) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + kvm_hv_hypercall_read_xmm(&hc); + } + switch (hc.code) { case HVCALL_NOTIFY_LONG_SPIN_WAIT: if (unlikely(hc.rep)) { diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 66f7f5bc3482..c4f4fa23320e 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1644,7 +1644,7 @@ static int is_empty_shadow_page(u64 *spt) * aggregate version in order to make the slab shrinker * faster */ -static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr) +static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr) { kvm->arch.n_used_mmu_pages += nr; percpu_counter_add(&kvm_total_used_mmu_pages, nr); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6710d9ee2e4b..7fbce342eec4 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -64,6 +64,7 @@ static DEFINE_MUTEX(sev_bitmap_lock); unsigned int max_sev_asid; static unsigned int min_sev_asid; static unsigned long sev_me_mask; +static unsigned int nr_asids; static unsigned long *sev_asid_bitmap; static unsigned long *sev_reclaim_asid_bitmap; @@ -78,11 +79,11 @@ struct enc_region { /* Called with the sev_bitmap_lock held, or on shutdown */ static int sev_flush_asids(int min_asid, int max_asid) { - int ret, pos, error = 0; + int ret, asid, error = 0; /* Check if there are any ASIDs to reclaim before performing a flush */ - pos = find_next_bit(sev_reclaim_asid_bitmap, max_asid, min_asid); - if (pos >= max_asid) + asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid); + if (asid > max_asid) return -EBUSY; /* @@ -115,15 +116,15 @@ static bool __sev_recycle_asids(int min_asid, int max_asid) /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */ bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap, - max_sev_asid); - bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid); + nr_asids); + bitmap_zero(sev_reclaim_asid_bitmap, nr_asids); return true; } static int sev_asid_new(struct kvm_sev_info *sev) { - int pos, min_asid, max_asid, ret; + int asid, min_asid, max_asid, ret; bool retry = true; enum misc_res_type type; @@ -143,11 +144,11 @@ static int sev_asid_new(struct kvm_sev_info *sev) * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. */ - min_asid = sev->es_active ? 0 : min_sev_asid - 1; + min_asid = sev->es_active ? 1 : min_sev_asid; max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; again: - pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid); - if (pos >= max_asid) { + asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid); + if (asid > max_asid) { if (retry && __sev_recycle_asids(min_asid, max_asid)) { retry = false; goto again; @@ -157,11 +158,11 @@ again: goto e_uncharge; } - __set_bit(pos, sev_asid_bitmap); + __set_bit(asid, sev_asid_bitmap); mutex_unlock(&sev_bitmap_lock); - return pos + 1; + return asid; e_uncharge: misc_cg_uncharge(type, sev->misc_cg, 1); put_misc_cg(sev->misc_cg); @@ -179,17 +180,16 @@ static int sev_get_asid(struct kvm *kvm) static void sev_asid_free(struct kvm_sev_info *sev) { struct svm_cpu_data *sd; - int cpu, pos; + int cpu; enum misc_res_type type; mutex_lock(&sev_bitmap_lock); - pos = sev->asid - 1; - __set_bit(pos, sev_reclaim_asid_bitmap); + __set_bit(sev->asid, sev_reclaim_asid_bitmap); for_each_possible_cpu(cpu) { sd = per_cpu(svm_data, cpu); - sd->sev_vmcbs[pos] = NULL; + sd->sev_vmcbs[sev->asid] = NULL; } mutex_unlock(&sev_bitmap_lock); @@ -1857,12 +1857,17 @@ void __init sev_hardware_setup(void) min_sev_asid = edx; sev_me_mask = 1UL << (ebx & 0x3f); - /* Initialize SEV ASID bitmaps */ - sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); + /* + * Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap, + * even though it's never used, so that the bitmap is indexed by the + * actual ASID. + */ + nr_asids = max_sev_asid + 1; + sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); if (!sev_asid_bitmap) goto out; - sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); + sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); if (!sev_reclaim_asid_bitmap) { bitmap_free(sev_asid_bitmap); sev_asid_bitmap = NULL; @@ -1907,7 +1912,7 @@ void sev_hardware_teardown(void) return; /* No need to take sev_bitmap_lock, all VMs have been destroyed. */ - sev_flush_asids(0, max_sev_asid); + sev_flush_asids(1, max_sev_asid); bitmap_free(sev_asid_bitmap); bitmap_free(sev_reclaim_asid_bitmap); @@ -1921,7 +1926,7 @@ int sev_cpu_init(struct svm_cpu_data *sd) if (!sev_enabled) return 0; - sd->sev_vmcbs = kcalloc(max_sev_asid + 1, sizeof(void *), GFP_KERNEL); + sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL); if (!sd->sev_vmcbs) return -ENOMEM; diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index b484141ea15b..03ebe368333e 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -92,6 +92,21 @@ TRACE_EVENT(kvm_hv_hypercall, __entry->outgpa) ); +TRACE_EVENT(kvm_hv_hypercall_done, + TP_PROTO(u64 result), + TP_ARGS(result), + + TP_STRUCT__entry( + __field(__u64, result) + ), + + TP_fast_assign( + __entry->result = result; + ), + + TP_printk("result 0x%llx", __entry->result) +); + /* * Tracepoint for Xen hypercall. */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4116567f3d44..e5d5c5ed7dd4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4358,8 +4358,17 @@ static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu) static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) { - return kvm_arch_interrupt_allowed(vcpu) && - kvm_cpu_accept_dm_intr(vcpu); + /* + * Do not cause an interrupt window exit if an exception + * is pending or an event needs reinjection; userspace + * might want to inject the interrupt manually using KVM_SET_REGS + * or KVM_SET_SREGS. For that to work, we must be at an + * instruction boundary and with no events half-injected. + */ + return (kvm_arch_interrupt_allowed(vcpu) && + kvm_cpu_accept_dm_intr(vcpu) && + !kvm_event_needs_reinjection(vcpu) && + !vcpu->arch.exception.pending); } static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index 4f2951cbe69c..d0e67ec46216 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -2167,10 +2167,10 @@ static int hrz_open (struct atm_vcc *atm_vcc) // Part of the job is done by atm_pcr_goal which gives us a PCR // specification which says: EITHER grab the maximum available PCR - // (and perhaps a lower bound which we musn't pass), OR grab this + // (and perhaps a lower bound which we must not pass), OR grab this // amount, rounding down if you have to (and perhaps a lower bound - // which we musn't pass) OR grab this amount, rounding up if you - // have to (and perhaps an upper bound which we musn't pass). If any + // which we must not pass) OR grab this amount, rounding up if you + // have to (and perhaps an upper bound which we must not pass). If any // bounds ARE passed we fail. Note that rounding is only rounding to // match device limitations, we do not round down to satisfy // bandwidth availability even if this would not violate any given diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index 09c8ab5e0959..b3691de8ac06 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -914,7 +914,8 @@ void fsl_mc_device_remove(struct fsl_mc_device *mc_dev) } EXPORT_SYMBOL_GPL(fsl_mc_device_remove); -struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev) +struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev, + u16 if_id) { struct fsl_mc_device *mc_bus_dev, *endpoint; struct fsl_mc_obj_desc endpoint_desc = {{ 0 }}; @@ -925,6 +926,7 @@ struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev) mc_bus_dev = to_fsl_mc_device(mc_dev->dev.parent); strcpy(endpoint1.type, mc_dev->obj_desc.type); endpoint1.id = mc_dev->obj_desc.id; + endpoint1.if_id = if_id; err = dprc_get_connection(mc_bus_dev->mc_io, 0, mc_bus_dev->mc_handle, diff --git a/drivers/bus/mhi/core/internal.h b/drivers/bus/mhi/core/internal.h index 5b9ea66b92dc..bc239a11aa69 100644 --- a/drivers/bus/mhi/core/internal.h +++ b/drivers/bus/mhi/core/internal.h @@ -682,7 +682,7 @@ void mhi_rddm_prepare(struct mhi_controller *mhi_cntrl, struct image_info *img_info); void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl); int mhi_prepare_channel(struct mhi_controller *mhi_cntrl, - struct mhi_chan *mhi_chan); + struct mhi_chan *mhi_chan, unsigned int flags); int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan); void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl, diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c index fc9196f11cb7..84448233f64c 100644 --- a/drivers/bus/mhi/core/main.c +++ b/drivers/bus/mhi/core/main.c @@ -1430,7 +1430,7 @@ exit_unprepare_channel: } int mhi_prepare_channel(struct mhi_controller *mhi_cntrl, - struct mhi_chan *mhi_chan) + struct mhi_chan *mhi_chan, unsigned int flags) { int ret = 0; struct device *dev = &mhi_chan->mhi_dev->dev; @@ -1455,6 +1455,9 @@ int mhi_prepare_channel(struct mhi_controller *mhi_cntrl, if (ret) goto error_pm_state; + if (mhi_chan->dir == DMA_FROM_DEVICE) + mhi_chan->pre_alloc = !!(flags & MHI_CH_INBOUND_ALLOC_BUFS); + /* Pre-allocate buffer for xfer ring */ if (mhi_chan->pre_alloc) { int nr_el = get_nr_avail_ring_elements(mhi_cntrl, @@ -1610,7 +1613,7 @@ void mhi_reset_chan(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan) } /* Move channel to start state */ -int mhi_prepare_for_transfer(struct mhi_device *mhi_dev) +int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, unsigned int flags) { int ret, dir; struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl; @@ -1621,7 +1624,7 @@ int mhi_prepare_for_transfer(struct mhi_device *mhi_dev) if (!mhi_chan) continue; - ret = mhi_prepare_channel(mhi_cntrl, mhi_chan); + ret = mhi_prepare_channel(mhi_cntrl, mhi_chan, flags); if (ret) goto error_open_chan; } diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c index be160764911b..f9d5b7334341 100644 --- a/drivers/clk/clk-devres.c +++ b/drivers/clk/clk-devres.c @@ -92,13 +92,20 @@ int __must_check devm_clk_bulk_get_optional(struct device *dev, int num_clks, } EXPORT_SYMBOL_GPL(devm_clk_bulk_get_optional); +static void devm_clk_bulk_release_all(struct device *dev, void *res) +{ + struct clk_bulk_devres *devres = res; + + clk_bulk_put_all(devres->num_clks, devres->clks); +} + int __must_check devm_clk_bulk_get_all(struct device *dev, struct clk_bulk_data **clks) { struct clk_bulk_devres *devres; int ret; - devres = devres_alloc(devm_clk_bulk_release, + devres = devres_alloc(devm_clk_bulk_release_all, sizeof(*devres), GFP_KERNEL); if (!devres) return -ENOMEM; diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c index 18117ce5ff85..5c75e3d906c2 100644 --- a/drivers/clk/clk-stm32f4.c +++ b/drivers/clk/clk-stm32f4.c @@ -526,7 +526,7 @@ struct stm32f4_pll { struct stm32f4_pll_post_div_data { int idx; - u8 pll_num; + int pll_idx; const char *name; const char *parent; u8 flag; @@ -557,13 +557,13 @@ static const struct clk_div_table post_divr_table[] = { #define MAX_POST_DIV 3 static const struct stm32f4_pll_post_div_data post_div_data[MAX_POST_DIV] = { - { CLK_I2SQ_PDIV, PLL_I2S, "plli2s-q-div", "plli2s-q", + { CLK_I2SQ_PDIV, PLL_VCO_I2S, "plli2s-q-div", "plli2s-q", CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 0, 5, 0, NULL}, - { CLK_SAIQ_PDIV, PLL_SAI, "pllsai-q-div", "pllsai-q", + { CLK_SAIQ_PDIV, PLL_VCO_SAI, "pllsai-q-div", "pllsai-q", CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 8, 5, 0, NULL }, - { NO_IDX, PLL_SAI, "pllsai-r-div", "pllsai-r", CLK_SET_RATE_PARENT, + { NO_IDX, PLL_VCO_SAI, "pllsai-r-div", "pllsai-r", CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 16, 2, 0, post_divr_table }, }; @@ -1774,7 +1774,7 @@ static void __init stm32f4_rcc_init(struct device_node *np) post_div->width, post_div->flag_div, post_div->div_table, - clks[post_div->pll_num], + clks[post_div->pll_idx], &stm32f4_clk_lock); if (post_div->idx != NO_IDX) diff --git a/drivers/clk/hisilicon/Kconfig b/drivers/clk/hisilicon/Kconfig index 5ecc37aaa118..c1ec75aa4ccd 100644 --- a/drivers/clk/hisilicon/Kconfig +++ b/drivers/clk/hisilicon/Kconfig @@ -18,6 +18,7 @@ config COMMON_CLK_HI3519 config COMMON_CLK_HI3559A bool "Hi3559A Clock Driver" depends on ARCH_HISI || COMPILE_TEST + select RESET_HISI default ARCH_HISI help Build the clock driver for hi3559a. diff --git a/drivers/clk/qcom/clk-smd-rpm.c b/drivers/clk/qcom/clk-smd-rpm.c index 800b2fef1887..b2c142f3a649 100644 --- a/drivers/clk/qcom/clk-smd-rpm.c +++ b/drivers/clk/qcom/clk-smd-rpm.c @@ -467,7 +467,7 @@ DEFINE_CLK_SMD_RPM(msm8936, sysmmnoc_clk, sysmmnoc_a_clk, QCOM_SMD_RPM_BUS_CLK, static struct clk_smd_rpm *msm8936_clks[] = { [RPM_SMD_PCNOC_CLK] = &msm8916_pcnoc_clk, - [RPM_SMD_PCNOC_A_CLK] = &msm8916_pcnoc_clk, + [RPM_SMD_PCNOC_A_CLK] = &msm8916_pcnoc_a_clk, [RPM_SMD_SNOC_CLK] = &msm8916_snoc_clk, [RPM_SMD_SNOC_A_CLK] = &msm8916_snoc_a_clk, [RPM_SMD_BIMC_CLK] = &msm8916_bimc_clk, diff --git a/drivers/clk/tegra/clk-sdmmc-mux.c b/drivers/clk/tegra/clk-sdmmc-mux.c index 316912d3b1a4..4f2c3309eea4 100644 --- a/drivers/clk/tegra/clk-sdmmc-mux.c +++ b/drivers/clk/tegra/clk-sdmmc-mux.c @@ -194,6 +194,15 @@ static void clk_sdmmc_mux_disable(struct clk_hw *hw) gate_ops->disable(gate_hw); } +static void clk_sdmmc_mux_disable_unused(struct clk_hw *hw) +{ + struct tegra_sdmmc_mux *sdmmc_mux = to_clk_sdmmc_mux(hw); + const struct clk_ops *gate_ops = sdmmc_mux->gate_ops; + struct clk_hw *gate_hw = &sdmmc_mux->gate.hw; + + gate_ops->disable_unused(gate_hw); +} + static void clk_sdmmc_mux_restore_context(struct clk_hw *hw) { struct clk_hw *parent = clk_hw_get_parent(hw); @@ -218,6 +227,7 @@ static const struct clk_ops tegra_clk_sdmmc_mux_ops = { .is_enabled = clk_sdmmc_mux_is_enabled, .enable = clk_sdmmc_mux_enable, .disable = clk_sdmmc_mux_disable, + .disable_unused = clk_sdmmc_mux_disable_unused, .restore_context = clk_sdmmc_mux_restore_context, }; diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c index 4b9157a69fca..50b321a1ab1b 100644 --- a/drivers/gpio/gpio-mpc8xxx.c +++ b/drivers/gpio/gpio-mpc8xxx.c @@ -405,7 +405,7 @@ static int mpc8xxx_probe(struct platform_device *pdev) ret = devm_request_irq(&pdev->dev, mpc8xxx_gc->irqn, mpc8xxx_gpio_irq_cascade, - IRQF_SHARED, "gpio-cascade", + IRQF_NO_THREAD | IRQF_SHARED, "gpio-cascade", mpc8xxx_gc); if (ret) { dev_err(&pdev->dev, diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c index 5022e0ad0fae..0f5d17f343f1 100644 --- a/drivers/gpio/gpio-tqmx86.c +++ b/drivers/gpio/gpio-tqmx86.c @@ -238,8 +238,8 @@ static int tqmx86_gpio_probe(struct platform_device *pdev) struct resource *res; int ret, irq; - irq = platform_get_irq(pdev, 0); - if (irq < 0) + irq = platform_get_irq_optional(pdev, 0); + if (irq < 0 && irq != -ENXIO) return irq; res = platform_get_resource(pdev, IORESOURCE_IO, 0); @@ -278,7 +278,7 @@ static int tqmx86_gpio_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); - if (irq) { + if (irq > 0) { struct irq_chip *irq_chip = &gpio->irq_chip; u8 irq_status; diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c index 02281d13505f..508ac295eb06 100644 --- a/drivers/media/common/videobuf2/videobuf2-core.c +++ b/drivers/media/common/videobuf2/videobuf2-core.c @@ -1573,6 +1573,7 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb, struct media_request *req) { struct vb2_buffer *vb; + enum vb2_buffer_state orig_state; int ret; if (q->error) { @@ -1673,6 +1674,7 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb, * Add to the queued buffers list, a buffer will stay on it until * dequeued in dqbuf. */ + orig_state = vb->state; list_add_tail(&vb->queued_entry, &q->queued_list); q->queued_count++; q->waiting_for_buffers = false; @@ -1703,8 +1705,17 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb, if (q->streaming && !q->start_streaming_called && q->queued_count >= q->min_buffers_needed) { ret = vb2_start_streaming(q); - if (ret) + if (ret) { + /* + * Since vb2_core_qbuf will return with an error, + * we should return it to state DEQUEUED since + * the error indicates that the buffer wasn't queued. + */ + list_del(&vb->queued_entry); + q->queued_count--; + vb->state = orig_state; return ret; + } } dprintk(q, 2, "qbuf of buffer %d succeeded\n", vb->index); diff --git a/drivers/media/platform/atmel/Kconfig b/drivers/media/platform/atmel/Kconfig index 99b51213f871..dda2f27da317 100644 --- a/drivers/media/platform/atmel/Kconfig +++ b/drivers/media/platform/atmel/Kconfig @@ -8,6 +8,7 @@ config VIDEO_ATMEL_ISC select VIDEOBUF2_DMA_CONTIG select REGMAP_MMIO select V4L2_FWNODE + select VIDEO_ATMEL_ISC_BASE help This module makes the ATMEL Image Sensor Controller available as a v4l2 device. @@ -19,10 +20,17 @@ config VIDEO_ATMEL_XISC select VIDEOBUF2_DMA_CONTIG select REGMAP_MMIO select V4L2_FWNODE + select VIDEO_ATMEL_ISC_BASE help This module makes the ATMEL eXtended Image Sensor Controller available as a v4l2 device. +config VIDEO_ATMEL_ISC_BASE + tristate + default n + help + ATMEL ISC and XISC common code base. + config VIDEO_ATMEL_ISI tristate "ATMEL Image Sensor Interface (ISI) support" depends on VIDEO_V4L2 && OF diff --git a/drivers/media/platform/atmel/Makefile b/drivers/media/platform/atmel/Makefile index c5c01556c653..46d264ab7948 100644 --- a/drivers/media/platform/atmel/Makefile +++ b/drivers/media/platform/atmel/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only -atmel-isc-objs = atmel-sama5d2-isc.o atmel-isc-base.o -atmel-xisc-objs = atmel-sama7g5-isc.o atmel-isc-base.o +atmel-isc-objs = atmel-sama5d2-isc.o +atmel-xisc-objs = atmel-sama7g5-isc.o obj-$(CONFIG_VIDEO_ATMEL_ISI) += atmel-isi.o +obj-$(CONFIG_VIDEO_ATMEL_ISC_BASE) += atmel-isc-base.o obj-$(CONFIG_VIDEO_ATMEL_ISC) += atmel-isc.o obj-$(CONFIG_VIDEO_ATMEL_XISC) += atmel-xisc.o diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c index 19daa49bf604..136ab7cf36ed 100644 --- a/drivers/media/platform/atmel/atmel-isc-base.c +++ b/drivers/media/platform/atmel/atmel-isc-base.c @@ -378,6 +378,7 @@ int isc_clk_init(struct isc_device *isc) return 0; } +EXPORT_SYMBOL_GPL(isc_clk_init); void isc_clk_cleanup(struct isc_device *isc) { @@ -392,6 +393,7 @@ void isc_clk_cleanup(struct isc_device *isc) clk_unregister(isc_clk->clk); } } +EXPORT_SYMBOL_GPL(isc_clk_cleanup); static int isc_queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, unsigned int *nplanes, @@ -1578,6 +1580,7 @@ irqreturn_t isc_interrupt(int irq, void *dev_id) return ret; } +EXPORT_SYMBOL_GPL(isc_interrupt); static void isc_hist_count(struct isc_device *isc, u32 *min, u32 *max) { @@ -2212,6 +2215,7 @@ const struct v4l2_async_notifier_operations isc_async_ops = { .unbind = isc_async_unbind, .complete = isc_async_complete, }; +EXPORT_SYMBOL_GPL(isc_async_ops); void isc_subdev_cleanup(struct isc_device *isc) { @@ -2224,6 +2228,7 @@ void isc_subdev_cleanup(struct isc_device *isc) INIT_LIST_HEAD(&isc->subdev_entities); } +EXPORT_SYMBOL_GPL(isc_subdev_cleanup); int isc_pipeline_init(struct isc_device *isc) { @@ -2264,6 +2269,7 @@ int isc_pipeline_init(struct isc_device *isc) return 0; } +EXPORT_SYMBOL_GPL(isc_pipeline_init); /* regmap configuration */ #define ATMEL_ISC_REG_MAX 0xd5c @@ -2273,4 +2279,9 @@ const struct regmap_config isc_regmap_config = { .val_bits = 32, .max_register = ATMEL_ISC_REG_MAX, }; +EXPORT_SYMBOL_GPL(isc_regmap_config); +MODULE_AUTHOR("Songjun Wu"); +MODULE_AUTHOR("Eugen Hristev"); +MODULE_DESCRIPTION("Atmel ISC common code base"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c index 83705730e37e..795a012d4020 100644 --- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c +++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c @@ -37,7 +37,16 @@ static int rtl28xxu_ctrl_msg(struct dvb_usb_device *d, struct rtl28xxu_req *req) } else { /* read */ requesttype = (USB_TYPE_VENDOR | USB_DIR_IN); - pipe = usb_rcvctrlpipe(d->udev, 0); + + /* + * Zero-length transfers must use usb_sndctrlpipe() and + * rtl28xxu_identify_state() uses a zero-length i2c read + * command to determine the chip type. + */ + if (req->size) + pipe = usb_rcvctrlpipe(d->udev, 0); + else + pipe = usb_sndctrlpipe(d->udev, 0); } ret = usb_control_msg(d->udev, pipe, 0, requesttype, req->value, @@ -612,9 +621,8 @@ static int rtl28xxu_read_config(struct dvb_usb_device *d) static int rtl28xxu_identify_state(struct dvb_usb_device *d, const char **name) { struct rtl28xxu_dev *dev = d_to_priv(d); - u8 buf[1]; int ret; - struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 1, buf}; + struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 0, NULL}; dev_dbg(&d->intf->dev, "\n"); diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 56213a8a1ec5..995c613086aa 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -431,10 +431,10 @@ config VSOCKMON config MHI_NET tristate "MHI network driver" depends on MHI_BUS - select WWAN help This is the network driver for MHI bus. It can be used with - QCOM based WWAN modems (like SDX55). Say Y or M. + QCOM based WWAN modems for IP or QMAP/rmnet protocol (like SDX55). + Say Y or M. endif # NET_CORE @@ -606,4 +606,11 @@ config NET_FAILOVER a VM with direct attached VF by failing over to the paravirtual datapath when the VF is unplugged. +config NETDEV_LEGACY_INIT + bool + depends on ISA + help + Drivers that call netdev_boot_setup_check() should select this + symbol, everything else no longer needs it. + endif # NETDEVICES diff --git a/drivers/net/Makefile b/drivers/net/Makefile index a48a664605a3..739838623cf6 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -18,7 +18,8 @@ obj-$(CONFIG_MACVLAN) += macvlan.o obj-$(CONFIG_MACVTAP) += macvtap.o obj-$(CONFIG_MII) += mii.o obj-$(CONFIG_MDIO) += mdio.o -obj-$(CONFIG_NET) += Space.o loopback.o +obj-$(CONFIG_NET) += loopback.o +obj-$(CONFIG_NETDEV_LEGACY_INIT) += Space.o obj-$(CONFIG_NETCONSOLE) += netconsole.o obj-y += phy/ obj-y += mdio/ @@ -36,7 +37,7 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o obj-$(CONFIG_VSOCKMON) += vsockmon.o -obj-$(CONFIG_MHI_NET) += mhi/ +obj-$(CONFIG_MHI_NET) += mhi_net.o # # Networking Drivers diff --git a/drivers/net/Space.c b/drivers/net/Space.c index df79e7370bcc..49e67c9fb5a4 100644 --- a/drivers/net/Space.c +++ b/drivers/net/Space.c @@ -30,6 +30,148 @@ #include <linux/netlink.h> #include <net/Space.h> +/* + * This structure holds boot-time configured netdevice settings. They + * are then used in the device probing. + */ +struct netdev_boot_setup { + char name[IFNAMSIZ]; + struct ifmap map; +}; +#define NETDEV_BOOT_SETUP_MAX 8 + + +/****************************************************************************** + * + * Device Boot-time Settings Routines + * + ******************************************************************************/ + +/* Boot time configuration table */ +static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; + +/** + * netdev_boot_setup_add - add new setup entry + * @name: name of the device + * @map: configured settings for the device + * + * Adds new setup entry to the dev_boot_setup list. The function + * returns 0 on error and 1 on success. This is a generic routine to + * all netdevices. + */ +static int netdev_boot_setup_add(char *name, struct ifmap *map) +{ + struct netdev_boot_setup *s; + int i; + + s = dev_boot_setup; + for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { + if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { + memset(s[i].name, 0, sizeof(s[i].name)); + strlcpy(s[i].name, name, IFNAMSIZ); + memcpy(&s[i].map, map, sizeof(s[i].map)); + break; + } + } + + return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; +} + +/** + * netdev_boot_setup_check - check boot time settings + * @dev: the netdevice + * + * Check boot time settings for the device. + * The found settings are set for the device to be used + * later in the device probing. + * Returns 0 if no settings found, 1 if they are. + */ +int netdev_boot_setup_check(struct net_device *dev) +{ + struct netdev_boot_setup *s = dev_boot_setup; + int i; + + for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { + if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && + !strcmp(dev->name, s[i].name)) { + dev->irq = s[i].map.irq; + dev->base_addr = s[i].map.base_addr; + dev->mem_start = s[i].map.mem_start; + dev->mem_end = s[i].map.mem_end; + return 1; + } + } + return 0; +} +EXPORT_SYMBOL(netdev_boot_setup_check); + +/** + * netdev_boot_base - get address from boot time settings + * @prefix: prefix for network device + * @unit: id for network device + * + * Check boot time settings for the base address of device. + * The found settings are set for the device to be used + * later in the device probing. + * Returns 0 if no settings found. + */ +static unsigned long netdev_boot_base(const char *prefix, int unit) +{ + const struct netdev_boot_setup *s = dev_boot_setup; + char name[IFNAMSIZ]; + int i; + + sprintf(name, "%s%d", prefix, unit); + + /* + * If device already registered then return base of 1 + * to indicate not to probe for this interface + */ + if (__dev_get_by_name(&init_net, name)) + return 1; + + for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) + if (!strcmp(name, s[i].name)) + return s[i].map.base_addr; + return 0; +} + +/* + * Saves at boot time configured settings for any netdevice. + */ +static int __init netdev_boot_setup(char *str) +{ + int ints[5]; + struct ifmap map; + + str = get_options(str, ARRAY_SIZE(ints), ints); + if (!str || !*str) + return 0; + + /* Save settings */ + memset(&map, 0, sizeof(map)); + if (ints[0] > 0) + map.irq = ints[1]; + if (ints[0] > 1) + map.base_addr = ints[2]; + if (ints[0] > 2) + map.mem_start = ints[3]; + if (ints[0] > 3) + map.mem_end = ints[4]; + + /* Add new entry to the list */ + return netdev_boot_setup_add(str, &map); +} + +__setup("netdev=", netdev_boot_setup); + +static int __init ether_boot_setup(char *str) +{ + return netdev_boot_setup(str); +} +__setup("ether=", ether_boot_setup); + + /* A unified ethernet device probe. This is the easiest way to have every * ethernet adaptor have the name "eth[0123...]". */ @@ -77,39 +219,15 @@ static struct devprobe2 isa_probes[] __initdata = { #ifdef CONFIG_SMC9194 {smc_init, 0}, #endif -#ifdef CONFIG_CS89x0 -#ifndef CONFIG_CS89x0_PLATFORM +#ifdef CONFIG_CS89x0_ISA {cs89x0_probe, 0}, #endif -#endif -#if defined(CONFIG_MVME16x_NET) || defined(CONFIG_BVME6000_NET) /* Intel */ - {i82596_probe, 0}, /* I82596 */ -#endif #ifdef CONFIG_NI65 {ni65_probe, 0}, #endif {NULL, 0}, }; -static struct devprobe2 m68k_probes[] __initdata = { -#ifdef CONFIG_ATARILANCE /* Lance-based Atari ethernet boards */ - {atarilance_probe, 0}, -#endif -#ifdef CONFIG_SUN3LANCE /* sun3 onboard Lance chip */ - {sun3lance_probe, 0}, -#endif -#ifdef CONFIG_SUN3_82586 /* sun3 onboard Intel 82586 chip */ - {sun3_82586_probe, 0}, -#endif -#ifdef CONFIG_APNE /* A1200 PCMCIA NE2000 */ - {apne_probe, 0}, -#endif -#ifdef CONFIG_MVME147_NET /* MVME147 internal Ethernet */ - {mvme147lance_probe, 0}, -#endif - {NULL, 0}, -}; - /* Unified ethernet device probe, segmented per architecture and * per bus interface. This drives the legacy devices only for now. */ @@ -121,8 +239,7 @@ static void __init ethif_probe2(int unit) if (base_addr == 1) return; - (void)(probe_list2(unit, m68k_probes, base_addr == 0) && - probe_list2(unit, isa_probes, base_addr == 0)); + probe_list2(unit, isa_probes, base_addr == 0); } /* Statically configured drivers -- order matters here. */ @@ -130,10 +247,6 @@ static int __init net_olddevs_init(void) { int num; -#ifdef CONFIG_SBNI - for (num = 0; num < 8; ++num) - sbni_probe(num); -#endif for (num = 0; num < 8; ++num) ethif_probe2(num); @@ -142,9 +255,6 @@ static int __init net_olddevs_init(void) cops_probe(1); cops_probe(2); #endif -#ifdef CONFIG_LTPC - ltpc_probe(); -#endif return 0; } diff --git a/drivers/net/appletalk/Kconfig b/drivers/net/appletalk/Kconfig index 43918398f0d3..90b9f1d6eda9 100644 --- a/drivers/net/appletalk/Kconfig +++ b/drivers/net/appletalk/Kconfig @@ -52,7 +52,9 @@ config LTPC config COPS tristate "COPS LocalTalk PC support" - depends on DEV_APPLETALK && (ISA || EISA) + depends on DEV_APPLETALK && ISA + depends on NETDEVICES + select NETDEV_LEGACY_INIT help This allows you to use COPS AppleTalk cards to connect to LocalTalk networks. You also need version 1.3.3 or later of the netatalk diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c index 69c270885ff0..1f8925e75b3f 100644 --- a/drivers/net/appletalk/ltpc.c +++ b/drivers/net/appletalk/ltpc.c @@ -1015,7 +1015,7 @@ static const struct net_device_ops ltpc_netdev = { .ndo_set_rx_mode = set_multicast_list, }; -struct net_device * __init ltpc_probe(void) +static struct net_device * __init ltpc_probe(void) { struct net_device *dev; int err = -ENOMEM; @@ -1221,12 +1221,10 @@ static int __init ltpc_setup(char *str) } __setup("ltpc=", ltpc_setup); -#endif /* MODULE */ +#endif static struct net_device *dev_ltpc; -#ifdef MODULE - MODULE_LICENSE("GPL"); module_param(debug, int, 0); module_param_hw(io, int, ioport, 0); @@ -1244,7 +1242,6 @@ static int __init ltpc_module_init(void) return PTR_ERR_OR_ZERO(dev_ltpc); } module_init(ltpc_module_init); -#endif static void __exit ltpc_cleanup(void) { diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 6908822d9773..a4a202b9a0a2 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -96,7 +96,7 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker); static void ad_mux_machine(struct port *port, bool *update_slave_arr); static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); static void ad_tx_machine(struct port *port); -static void ad_periodic_machine(struct port *port); +static void ad_periodic_machine(struct port *port, struct bond_params bond_params); static void ad_port_selection_logic(struct port *port, bool *update_slave_arr); static void ad_agg_selection_logic(struct aggregator *aggregator, bool *update_slave_arr); @@ -1294,10 +1294,11 @@ static void ad_tx_machine(struct port *port) /** * ad_periodic_machine - handle a port's periodic state machine * @port: the port we're looking at + * @bond_params: bond parameters we will use * * Turn ntt flag on priodically to perform periodic transmission of lacpdu's. */ -static void ad_periodic_machine(struct port *port) +static void ad_periodic_machine(struct port *port, struct bond_params bond_params) { periodic_states_t last_state; @@ -1306,8 +1307,8 @@ static void ad_periodic_machine(struct port *port) /* check if port was reinitialized */ if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) || - (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) - ) { + (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) || + !bond_params.lacp_active) { port->sm_periodic_state = AD_NO_PERIODIC; } /* check if state machine should change state */ @@ -2341,7 +2342,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work) } ad_rx_machine(NULL, port); - ad_periodic_machine(port); + ad_periodic_machine(port, bond->params); ad_port_selection_logic(port, &update_slave_arr); ad_mux_machine(port, &update_slave_arr); ad_tx_machine(port); diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 22e5632089ac..7d3752cbf761 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -17,7 +17,6 @@ #include <linux/if_bonding.h> #include <linux/if_vlan.h> #include <linux/in.h> -#include <net/ipx.h> #include <net/arp.h> #include <net/ipv6.h> #include <asm/byteorder.h> @@ -1351,8 +1350,6 @@ struct slave *bond_xmit_tlb_slave_get(struct bonding *bond, if (!is_multicast_ether_addr(eth_data->h_dest)) { switch (skb->protocol) { case htons(ETH_P_IP): - case htons(ETH_P_IPX): - /* In case of IPX, it will falback to L2 hash */ case htons(ETH_P_IPV6): hash_index = bond_xmit_hash(bond, skb); if (bond->params.tlb_dynamic_lb) { @@ -1454,35 +1451,6 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond, hash_size = sizeof(ip6hdr->daddr); break; } - case ETH_P_IPX: { - const struct ipxhdr *ipxhdr; - - if (pskb_network_may_pull(skb, sizeof(*ipxhdr))) { - do_tx_balance = false; - break; - } - ipxhdr = (struct ipxhdr *)skb_network_header(skb); - - if (ipxhdr->ipx_checksum != IPX_NO_CHECKSUM) { - /* something is wrong with this packet */ - do_tx_balance = false; - break; - } - - if (ipxhdr->ipx_type != IPX_TYPE_NCP) { - /* The only protocol worth balancing in - * this family since it has an "ARP" like - * mechanism - */ - do_tx_balance = false; - break; - } - - eth_data = eth_hdr(skb); - hash_start = (char *)eth_data->h_dest; - hash_size = ETH_ALEN; - break; - } case ETH_P_ARP: do_tx_balance = false; if (bond_info->rlb_enabled) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 04cf78fa1721..365953e8013e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5818,6 +5818,7 @@ static int bond_check_params(struct bond_params *params) params->downdelay = downdelay; params->peer_notif_delay = 0; params->use_carrier = use_carrier; + params->lacp_active = 1; params->lacp_fast = lacp_fast; params->primary[0] = 0; params->primary_reselect = primary_reselect_value; diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index 0561ece1ba45..5d54e11d18fa 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -100,6 +100,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { [IFLA_BOND_MIN_LINKS] = { .type = NLA_U32 }, [IFLA_BOND_LP_INTERVAL] = { .type = NLA_U32 }, [IFLA_BOND_PACKETS_PER_SLAVE] = { .type = NLA_U32 }, + [IFLA_BOND_AD_LACP_ACTIVE] = { .type = NLA_U8 }, [IFLA_BOND_AD_LACP_RATE] = { .type = NLA_U8 }, [IFLA_BOND_AD_SELECT] = { .type = NLA_U8 }, [IFLA_BOND_AD_INFO] = { .type = NLA_NESTED }, @@ -387,6 +388,16 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], if (err) return err; } + + if (data[IFLA_BOND_AD_LACP_ACTIVE]) { + int lacp_active = nla_get_u8(data[IFLA_BOND_AD_LACP_ACTIVE]); + + bond_opt_initval(&newval, lacp_active); + err = __bond_opt_set(bond, BOND_OPT_LACP_ACTIVE, &newval); + if (err) + return err; + } + if (data[IFLA_BOND_AD_LACP_RATE]) { int lacp_rate = nla_get_u8(data[IFLA_BOND_AD_LACP_RATE]); @@ -490,6 +501,7 @@ static size_t bond_get_size(const struct net_device *bond_dev) nla_total_size(sizeof(u32)) + /* IFLA_BOND_MIN_LINKS */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_LP_INTERVAL */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_PACKETS_PER_SLAVE */ + nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_LACP_ACTIVE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_LACP_RATE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_SELECT */ nla_total_size(sizeof(struct nlattr)) + /* IFLA_BOND_AD_INFO */ @@ -622,6 +634,10 @@ static int bond_fill_info(struct sk_buff *skb, packets_per_slave)) goto nla_put_failure; + if (nla_put_u8(skb, IFLA_BOND_AD_LACP_ACTIVE, + bond->params.lacp_active)) + goto nla_put_failure; + if (nla_put_u8(skb, IFLA_BOND_AD_LACP_RATE, bond->params.lacp_fast)) goto nla_put_failure; diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 0cf25de6f46d..a8fde3bc458f 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -58,6 +58,8 @@ static int bond_option_lp_interval_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_pps_set(struct bonding *bond, const struct bond_opt_value *newval); +static int bond_option_lacp_active_set(struct bonding *bond, + const struct bond_opt_value *newval); static int bond_option_lacp_rate_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_ad_select_set(struct bonding *bond, @@ -135,6 +137,12 @@ static const struct bond_opt_value bond_intmax_tbl[] = { { NULL, -1, 0} }; +static const struct bond_opt_value bond_lacp_active[] = { + { "off", 0, 0}, + { "on", 1, BOND_VALFLAG_DEFAULT}, + { NULL, -1, 0} +}; + static const struct bond_opt_value bond_lacp_rate_tbl[] = { { "slow", AD_LACP_SLOW, 0}, { "fast", AD_LACP_FAST, 0}, @@ -283,6 +291,15 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .values = bond_intmax_tbl, .set = bond_option_updelay_set }, + [BOND_OPT_LACP_ACTIVE] = { + .id = BOND_OPT_LACP_ACTIVE, + .name = "lacp_active", + .desc = "Send LACPDU frames with configured lacp rate or acts as speak when spoken to", + .flags = BOND_OPTFLAG_IFDOWN, + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), + .values = bond_lacp_active, + .set = bond_option_lacp_active_set + }, [BOND_OPT_LACP_RATE] = { .id = BOND_OPT_LACP_RATE, .name = "lacp_rate", @@ -1333,6 +1350,16 @@ static int bond_option_pps_set(struct bonding *bond, return 0; } +static int bond_option_lacp_active_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + netdev_dbg(bond->dev, "Setting LACP active to %s (%llu)\n", + newval->string, newval->value); + bond->params.lacp_active = newval->value; + + return 0; +} + static int bond_option_lacp_rate_set(struct bonding *bond, const struct bond_opt_value *newval) { diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index 0fb1da361bb1..f3e3bfd72556 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -133,6 +133,8 @@ static void bond_info_show_master(struct seq_file *seq) struct ad_info ad_info; seq_puts(seq, "\n802.3ad info\n"); + seq_printf(seq, "LACP active: %s\n", + (bond->params.lacp_active) ? "on" : "off"); seq_printf(seq, "LACP rate: %s\n", (bond->params.lacp_fast) ? "fast" : "slow"); seq_printf(seq, "Min links: %d\n", bond->params.min_links); diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 5f9e9a240226..b9e9842fed94 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -339,10 +339,24 @@ static ssize_t bonding_show_peer_notif_delay(struct device *d, static DEVICE_ATTR(peer_notif_delay, 0644, bonding_show_peer_notif_delay, bonding_sysfs_store_option); -/* Show the LACP interval. */ -static ssize_t bonding_show_lacp(struct device *d, - struct device_attribute *attr, - char *buf) +/* Show the LACP activity and interval. */ +static ssize_t bonding_show_lacp_active(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + const struct bond_opt_value *val; + + val = bond_opt_get_val(BOND_OPT_LACP_ACTIVE, bond->params.lacp_active); + + return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_active); +} +static DEVICE_ATTR(lacp_active, 0644, + bonding_show_lacp_active, bonding_sysfs_store_option); + +static ssize_t bonding_show_lacp_rate(struct device *d, + struct device_attribute *attr, + char *buf) { struct bonding *bond = to_bond(d); const struct bond_opt_value *val; @@ -352,7 +366,7 @@ static ssize_t bonding_show_lacp(struct device *d, return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_fast); } static DEVICE_ATTR(lacp_rate, 0644, - bonding_show_lacp, bonding_sysfs_store_option); + bonding_show_lacp_rate, bonding_sysfs_store_option); static ssize_t bonding_show_min_links(struct device *d, struct device_attribute *attr, @@ -738,6 +752,7 @@ static struct attribute *per_bond_attrs[] = { &dev_attr_downdelay.attr, &dev_attr_updelay.attr, &dev_attr_peer_notif_delay.attr, + &dev_attr_lacp_active.attr, &dev_attr_lacp_rate.attr, &dev_attr_ad_select.attr, &dev_attr_xmit_hash_policy.attr, diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 54ffb796a320..7734229aa078 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -649,7 +649,7 @@ static inline void flexcan_error_irq_disable(const struct flexcan_priv *priv) static int flexcan_clks_enable(const struct flexcan_priv *priv) { - int err; + int err = 0; if (priv->clk_ipg) { err = clk_prepare_enable(priv->clk_ipg); diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index b23e3488695b..bd1417a66cbf 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2016,15 +2016,6 @@ int b53_br_flags(struct dsa_switch *ds, int port, } EXPORT_SYMBOL(b53_br_flags); -int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter, - struct netlink_ext_ack *extack) -{ - b53_port_set_mcast_flood(ds->priv, port, mrouter); - - return 0; -} -EXPORT_SYMBOL(b53_set_mrouter); - static bool b53_possible_cpu_port(struct dsa_switch *ds, int port) { /* Broadcom switches will accept enabling Broadcom tags on the @@ -2268,7 +2259,6 @@ static const struct dsa_switch_ops b53_switch_ops = { .port_bridge_leave = b53_br_leave, .port_pre_bridge_flags = b53_br_flags_pre, .port_bridge_flags = b53_br_flags, - .port_set_mrouter = b53_set_mrouter, .port_stp_state_set = b53_br_set_stp_state, .port_fast_age = b53_br_fast_age, .port_vlan_filtering = b53_vlan_filtering, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 82700a5714c1..9bf8319342b0 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -328,8 +328,6 @@ int b53_br_flags_pre(struct dsa_switch *ds, int port, int b53_br_flags(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); -int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter, - struct netlink_ext_ack *extack); int b53_setup_devlink_resources(struct dsa_switch *ds); void b53_port_event(struct dsa_switch *ds, int port); void b53_phylink_validate(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 3b018fcf4412..6ce9ec1283e0 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1199,7 +1199,6 @@ static const struct dsa_switch_ops bcm_sf2_ops = { .port_pre_bridge_flags = b53_br_flags_pre, .port_bridge_flags = b53_br_flags, .port_stp_state_set = b53_br_set_stp_state, - .port_set_mrouter = b53_set_mrouter, .port_fast_age = b53_br_fast_age, .port_vlan_filtering = b53_vlan_filtering, .port_vlan_add = b53_vlan_add, diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index b6e0b347947e..53e6150e95b6 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -366,8 +366,8 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid, int i; reg[1] |= vid & CVID_MASK; - if (vid > 1) - reg[1] |= ATA2_IVL; + reg[1] |= ATA2_IVL; + reg[1] |= ATA2_FID(FID_BRIDGED); reg[2] |= (aging & AGE_TIMER_MASK) << AGE_TIMER; reg[2] |= (port_mask & PORT_MAP_MASK) << PORT_MAP; /* STATIC_ENT indicate that entry is static wouldn't @@ -1021,6 +1021,10 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port) mt7530_write(priv, MT7530_PCR_P(port), PCR_MATRIX(dsa_user_ports(priv->ds))); + /* Set to fallback mode for independent VLAN learning */ + mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, + MT7530_PORT_FALLBACK_MODE); + return 0; } @@ -1143,7 +1147,8 @@ mt7530_stp_state_set(struct dsa_switch *ds, int port, u8 state) break; } - mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK, stp_state); + mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK(FID_BRIDGED), + FID_PST(FID_BRIDGED, stp_state)); } static int @@ -1185,18 +1190,6 @@ mt7530_port_bridge_flags(struct dsa_switch *ds, int port, } static int -mt7530_port_set_mrouter(struct dsa_switch *ds, int port, bool mrouter, - struct netlink_ext_ack *extack) -{ - struct mt7530_priv *priv = ds->priv; - - mt7530_rmw(priv, MT7530_MFC, UNM_FFP(BIT(port)), - mrouter ? UNM_FFP(BIT(port)) : 0); - - return 0; -} - -static int mt7530_port_bridge_join(struct dsa_switch *ds, int port, struct net_device *bridge) { @@ -1229,6 +1222,10 @@ mt7530_port_bridge_join(struct dsa_switch *ds, int port, PCR_MATRIX_MASK, PCR_MATRIX(port_bitmap)); priv->ports[port].pm |= PCR_MATRIX(port_bitmap); + /* Set to fallback mode for independent VLAN learning */ + mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, + MT7530_PORT_FALLBACK_MODE); + mutex_unlock(&priv->reg_mutex); return 0; @@ -1241,15 +1238,22 @@ mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port) bool all_user_ports_removed = true; int i; - /* When a port is removed from the bridge, the port would be set up - * back to the default as is at initial boot which is a VLAN-unaware - * port. + /* This is called after .port_bridge_leave when leaving a VLAN-aware + * bridge. Don't set standalone ports to fallback mode. */ - mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, - MT7530_PORT_MATRIX_MODE); - mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK | PVC_EG_TAG_MASK, + if (dsa_to_port(ds, port)->bridge_dev) + mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, + MT7530_PORT_FALLBACK_MODE); + + mt7530_rmw(priv, MT7530_PVC_P(port), + VLAN_ATTR_MASK | PVC_EG_TAG_MASK | ACC_FRM_MASK, VLAN_ATTR(MT7530_VLAN_TRANSPARENT) | - PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); + PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT) | + MT7530_VLAN_ACC_ALL); + + /* Set PVID to 0 */ + mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, + G0_PORT_VID_DEF); for (i = 0; i < MT7530_NUM_PORTS; i++) { if (dsa_is_user_port(ds, i) && @@ -1276,15 +1280,19 @@ mt7530_port_set_vlan_aware(struct dsa_switch *ds, int port) struct mt7530_priv *priv = ds->priv; /* Trapped into security mode allows packet forwarding through VLAN - * table lookup. CPU port is set to fallback mode to let untagged - * frames pass through. + * table lookup. */ - if (dsa_is_cpu_port(ds, port)) - mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, - MT7530_PORT_FALLBACK_MODE); - else + if (dsa_is_user_port(ds, port)) { mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, MT7530_PORT_SECURITY_MODE); + mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, + G0_PORT_VID(priv->ports[port].pvid)); + + /* Only accept tagged frames if PVID is not set */ + if (!priv->ports[port].pvid) + mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK, + MT7530_VLAN_ACC_TAGGED); + } /* Set the port as a user port which is to be able to recognize VID * from incoming packets before fetching entry within the VLAN table. @@ -1329,6 +1337,13 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port, PCR_MATRIX(BIT(MT7530_CPU_PORT))); priv->ports[port].pm = PCR_MATRIX(BIT(MT7530_CPU_PORT)); + /* When a port is removed from the bridge, the port would be set up + * back to the default as is at initial boot which is a VLAN-unaware + * port. + */ + mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, + MT7530_PORT_MATRIX_MODE); + mutex_unlock(&priv->reg_mutex); } @@ -1511,7 +1526,8 @@ mt7530_hw_vlan_add(struct mt7530_priv *priv, /* Validate the entry with independent learning, create egress tag per * VLAN and joining the port as one of the port members. */ - val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) | VLAN_VALID; + val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) | FID(FID_BRIDGED) | + VLAN_VALID; mt7530_write(priv, MT7530_VAWD1, val); /* Decide whether adding tag or not for those outgoing packets from the @@ -1601,9 +1617,28 @@ mt7530_port_vlan_add(struct dsa_switch *ds, int port, mt7530_hw_vlan_update(priv, vlan->vid, &new_entry, mt7530_hw_vlan_add); if (pvid) { - mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, - G0_PORT_VID(vlan->vid)); priv->ports[port].pvid = vlan->vid; + + /* Accept all frames if PVID is set */ + mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK, + MT7530_VLAN_ACC_ALL); + + /* Only configure PVID if VLAN filtering is enabled */ + if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port))) + mt7530_rmw(priv, MT7530_PPBV1_P(port), + G0_PORT_VID_MASK, + G0_PORT_VID(vlan->vid)); + } else if (vlan->vid && priv->ports[port].pvid == vlan->vid) { + /* This VLAN is overwritten without PVID, so unset it */ + priv->ports[port].pvid = G0_PORT_VID_DEF; + + /* Only accept tagged frames if the port is VLAN-aware */ + if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port))) + mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK, + MT7530_VLAN_ACC_TAGGED); + + mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, + G0_PORT_VID_DEF); } mutex_unlock(&priv->reg_mutex); @@ -1617,11 +1652,9 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port, { struct mt7530_hw_vlan_entry target_entry; struct mt7530_priv *priv = ds->priv; - u16 pvid; mutex_lock(&priv->reg_mutex); - pvid = priv->ports[port].pvid; mt7530_hw_vlan_entry_init(&target_entry, port, 0); mt7530_hw_vlan_update(priv, vlan->vid, &target_entry, mt7530_hw_vlan_del); @@ -1629,11 +1662,18 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port, /* PVID is being restored to the default whenever the PVID port * is being removed from the VLAN. */ - if (pvid == vlan->vid) - pvid = G0_PORT_VID_DEF; + if (priv->ports[port].pvid == vlan->vid) { + priv->ports[port].pvid = G0_PORT_VID_DEF; + + /* Only accept tagged frames if the port is VLAN-aware */ + if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port))) + mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK, + MT7530_VLAN_ACC_TAGGED); + + mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, + G0_PORT_VID_DEF); + } - mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, pvid); - priv->ports[port].pvid = pvid; mutex_unlock(&priv->reg_mutex); @@ -2046,6 +2086,7 @@ mt7530_setup(struct dsa_switch *ds) * as two netdev instances. */ dn = dsa_to_port(ds, MT7530_CPU_PORT)->master->dev.of_node->parent; + ds->assisted_learning_on_cpu_port = true; ds->mtu_enforcement_ingress = true; if (priv->id == ID_MT7530) { @@ -2116,6 +2157,9 @@ mt7530_setup(struct dsa_switch *ds) mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK, PCR_MATRIX_CLR); + /* Disable learning by default on all ports */ + mt7530_set(priv, MT7530_PSC_P(i), SA_DIS); + if (dsa_is_cpu_port(ds, i)) { ret = mt753x_cpu_port_enable(ds, i); if (ret) @@ -2123,8 +2167,9 @@ mt7530_setup(struct dsa_switch *ds) } else { mt7530_port_disable(ds, i); - /* Disable learning by default on all user ports */ - mt7530_set(priv, MT7530_PSC_P(i), SA_DIS); + /* Set default PVID to 0 on all user ports */ + mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK, + G0_PORT_VID_DEF); } /* Enable consistent egress tag */ mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK, @@ -2281,6 +2326,9 @@ mt7531_setup(struct dsa_switch *ds) mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK, PCR_MATRIX_CLR); + /* Disable learning by default on all ports */ + mt7530_set(priv, MT7530_PSC_P(i), SA_DIS); + mt7530_set(priv, MT7531_DBG_CNT(i), MT7531_DIS_CLR); if (dsa_is_cpu_port(ds, i)) { @@ -2290,8 +2338,9 @@ mt7531_setup(struct dsa_switch *ds) } else { mt7530_port_disable(ds, i); - /* Disable learning by default on all user ports */ - mt7530_set(priv, MT7530_PSC_P(i), SA_DIS); + /* Set default PVID to 0 on all user ports */ + mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK, + G0_PORT_VID_DEF); } /* Enable consistent egress tag */ @@ -2299,6 +2348,7 @@ mt7531_setup(struct dsa_switch *ds) PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); } + ds->assisted_learning_on_cpu_port = true; ds->mtu_enforcement_ingress = true; /* Flush the FDB table */ @@ -3052,7 +3102,6 @@ static const struct dsa_switch_ops mt7530_switch_ops = { .port_stp_state_set = mt7530_stp_state_set, .port_pre_bridge_flags = mt7530_port_pre_bridge_flags, .port_bridge_flags = mt7530_port_bridge_flags, - .port_set_mrouter = mt7530_port_set_mrouter, .port_bridge_join = mt7530_port_bridge_join, .port_bridge_leave = mt7530_port_bridge_leave, .port_fdb_add = mt7530_port_fdb_add, diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index b19b389ff10a..fe4cd2ac26d0 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -80,6 +80,7 @@ enum mt753x_bpdu_port_fw { #define STATIC_ENT 3 #define MT7530_ATA2 0x78 #define ATA2_IVL BIT(15) +#define ATA2_FID(x) (((x) & 0x7) << 12) /* Register for address table write data */ #define MT7530_ATWD 0x7c @@ -148,11 +149,18 @@ enum mt7530_vlan_cmd { #define VTAG_EN BIT(28) /* VLAN Member Control */ #define PORT_MEM(x) (((x) & 0xff) << 16) +/* Filter ID */ +#define FID(x) (((x) & 0x7) << 1) /* VLAN Entry Valid */ #define VLAN_VALID BIT(0) #define PORT_MEM_SHFT 16 #define PORT_MEM_MASK 0xff +enum mt7530_fid { + FID_STANDALONE = 0, + FID_BRIDGED = 1, +}; + #define MT7530_VAWD2 0x98 /* Egress Tag Control */ #define ETAG_CTRL_P(p, x) (((x) & 0x3) << ((p) << 1)) @@ -179,8 +187,8 @@ enum mt7530_vlan_egress_attr { /* Register for port STP state control */ #define MT7530_SSP_P(x) (0x2000 + ((x) * 0x100)) -#define FID_PST(x) ((x) & 0x3) -#define FID_PST_MASK FID_PST(0x3) +#define FID_PST(fid, state) (((state) & 0x3) << ((fid) * 2)) +#define FID_PST_MASK(fid) FID_PST(fid, 0x3) enum mt7530_stp_state { MT7530_STP_DISABLED = 0, @@ -230,6 +238,7 @@ enum mt7530_port_mode { #define PVC_EG_TAG_MASK PVC_EG_TAG(7) #define VLAN_ATTR(x) (((x) & 0x3) << 6) #define VLAN_ATTR_MASK VLAN_ATTR(3) +#define ACC_FRM_MASK GENMASK(1, 0) enum mt7530_vlan_port_eg_tag { MT7530_VLAN_EG_DISABLED = 0, @@ -241,13 +250,19 @@ enum mt7530_vlan_port_attr { MT7530_VLAN_TRANSPARENT = 3, }; +enum mt7530_vlan_port_acc_frm { + MT7530_VLAN_ACC_ALL = 0, + MT7530_VLAN_ACC_TAGGED = 1, + MT7530_VLAN_ACC_UNTAGGED = 2, +}; + #define STAG_VPID (((x) & 0xffff) << 16) /* Register for port port-and-protocol based vlan 1 control */ #define MT7530_PPBV1_P(x) (0x2014 + ((x) * 0x100)) #define G0_PORT_VID(x) (((x) & 0xfff) << 0) #define G0_PORT_VID_MASK G0_PORT_VID(0xfff) -#define G0_PORT_VID_DEF G0_PORT_VID(1) +#define G0_PORT_VID_DEF G0_PORT_VID(0) /* Register for port MAC control register */ #define MT7530_PMCR_P(x) (0x3000 + ((x) * 0x100)) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index ddb51dd132ef..c45ca2473743 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -5797,7 +5797,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port, struct netlink_ext_ack *extack) { struct mv88e6xxx_chip *chip = ds->priv; - bool do_fast_age = false; int err = -EOPNOTSUPP; mv88e6xxx_reg_lock(chip); @@ -5809,9 +5808,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port, err = mv88e6xxx_port_set_assoc_vector(chip, port, pav); if (err) goto out; - - if (!learning) - do_fast_age = true; } if (flags.mask & BR_FLOOD) { @@ -5843,26 +5839,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port, out: mv88e6xxx_reg_unlock(chip); - if (do_fast_age) - mv88e6xxx_port_fast_age(ds, port); - - return err; -} - -static int mv88e6xxx_port_set_mrouter(struct dsa_switch *ds, int port, - bool mrouter, - struct netlink_ext_ack *extack) -{ - struct mv88e6xxx_chip *chip = ds->priv; - int err; - - if (!chip->info->ops->port_set_mcast_flood) - return -EOPNOTSUPP; - - mv88e6xxx_reg_lock(chip); - err = chip->info->ops->port_set_mcast_flood(chip, port, mrouter); - mv88e6xxx_reg_unlock(chip); - return err; } @@ -6167,7 +6143,6 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .port_bridge_leave = mv88e6xxx_port_bridge_leave, .port_pre_bridge_flags = mv88e6xxx_port_pre_bridge_flags, .port_bridge_flags = mv88e6xxx_port_bridge_flags, - .port_set_mrouter = mv88e6xxx_port_set_mrouter, .port_stp_state_set = mv88e6xxx_port_stp_state_set, .port_fast_age = mv88e6xxx_port_fast_age, .port_vlan_filtering = mv88e6xxx_port_vlan_filtering, diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c index ca2ad77b71f1..6686192e1883 100644 --- a/drivers/net/dsa/qca/ar9331.c +++ b/drivers/net/dsa/qca/ar9331.c @@ -837,16 +837,24 @@ static int ar9331_mdio_write(void *ctx, u32 reg, u32 val) return 0; } - ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val); + /* In case of this switch we work with 32bit registers on top of 16bit + * bus. Some registers (for example access to forwarding database) have + * trigger bit on the first 16bit half of request, the result and + * configuration of request in the second half. + * To make it work properly, we should do the second part of transfer + * before the first one is done. + */ + ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg + 2, + val >> 16); if (ret < 0) goto error; - ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg + 2, - val >> 16); + ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val); if (ret < 0) goto error; return 0; + error: dev_err_ratelimited(&sbus->dev, "Bus error. Failed to write register.\n"); return ret; diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index 9cd7dbdd7db9..2e899c9f036d 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -233,7 +233,6 @@ struct sja1105_private { phy_interface_t phy_mode[SJA1105_MAX_NUM_PORTS]; bool fixed_link[SJA1105_MAX_NUM_PORTS]; bool vlan_aware; - unsigned long learn_ena; unsigned long ucast_egress_floods; unsigned long bcast_egress_floods; const struct sja1105_info *info; diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c index bd3ad18c150e..f2049f52833c 100644 --- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c +++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c @@ -304,6 +304,15 @@ sja1105pqrs_common_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, hostcmd = SJA1105_HOSTCMD_INVALIDATE; } sja1105_packing(p, &hostcmd, 25, 23, size, op); +} + +static void +sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, + enum packing_op op) +{ + int entry_size = SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY; + + sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, entry_size); /* Hack - The hardware takes the 'index' field within * struct sja1105_l2_lookup_entry as the index on which this command @@ -313,26 +322,18 @@ sja1105pqrs_common_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, * such that our API doesn't need to ask for a full-blown entry * structure when e.g. a delete is requested. */ - sja1105_packing(buf, &cmd->index, 15, 6, - SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY, op); -} - -static void -sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, - enum packing_op op) -{ - int size = SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY; - - return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size); + sja1105_packing(buf, &cmd->index, 15, 6, entry_size, op); } static void sja1110_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, enum packing_op op) { - int size = SJA1110_SIZE_L2_LOOKUP_ENTRY; + int entry_size = SJA1110_SIZE_L2_LOOKUP_ENTRY; + + sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, entry_size); - return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size); + sja1105_packing(buf, &cmd->index, 10, 1, entry_size, op); } /* The switch is so retarded that it makes our command/entry abstraction diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 5ab1676a7448..6a52db1ef24c 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -176,7 +176,7 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv) struct sja1105_mac_config_entry *mac; struct dsa_switch *ds = priv->ds; struct sja1105_table *table; - int i; + struct dsa_port *dp; table = &priv->static_config.tables[BLK_IDX_MAC_CONFIG]; @@ -195,14 +195,21 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv) mac = table->entries; - for (i = 0; i < ds->num_ports; i++) { - mac[i] = default_mac; + list_for_each_entry(dp, &ds->dst->ports, list) { + if (dp->ds != ds) + continue; + + mac[dp->index] = default_mac; /* Let sja1105_bridge_stp_state_set() keep address learning - * enabled for the CPU port. + * enabled for the DSA ports. CPU ports use software-assisted + * learning to ensure that only FDB entries belonging to the + * bridge are learned, and that they are learned towards all + * CPU ports in a cross-chip topology if multiple CPU ports + * exist. */ - if (dsa_is_cpu_port(ds, i)) - priv->learn_ena |= BIT(i); + if (dsa_port_is_dsa(dp)) + dp->learning = true; } return 0; @@ -460,7 +467,7 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv) pvid.vlan_bc |= BIT(port); pvid.tag_port &= ~BIT(port); - if (dsa_is_cpu_port(ds, port)) { + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { priv->tag_8021q_pvid[port] = SJA1105_DEFAULT_VLAN; priv->bridge_pvid[port] = SJA1105_DEFAULT_VLAN; } @@ -474,8 +481,11 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv) { struct sja1105_l2_forwarding_entry *l2fwd; struct dsa_switch *ds = priv->ds; + struct dsa_switch_tree *dst; struct sja1105_table *table; - int i, j; + struct dsa_link *dl; + int port, tc; + int from, to; table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING]; @@ -493,47 +503,109 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv) l2fwd = table->entries; - /* First 5 entries define the forwarding rules */ - for (i = 0; i < ds->num_ports; i++) { - unsigned int upstream = dsa_upstream_port(priv->ds, i); + /* First 5 entries in the L2 Forwarding Table define the forwarding + * rules and the VLAN PCP to ingress queue mapping. + * Set up the ingress queue mapping first. + */ + for (port = 0; port < ds->num_ports; port++) { + if (dsa_is_unused_port(ds, port)) + continue; + + for (tc = 0; tc < SJA1105_NUM_TC; tc++) + l2fwd[port].vlan_pmap[tc] = tc; + } - if (dsa_is_unused_port(ds, i)) + /* Then manage the forwarding domain for user ports. These can forward + * only to the always-on domain (CPU port and DSA links) + */ + for (from = 0; from < ds->num_ports; from++) { + if (!dsa_is_user_port(ds, from)) continue; - for (j = 0; j < SJA1105_NUM_TC; j++) - l2fwd[i].vlan_pmap[j] = j; + for (to = 0; to < ds->num_ports; to++) { + if (!dsa_is_cpu_port(ds, to) && + !dsa_is_dsa_port(ds, to)) + continue; - /* All ports start up with egress flooding enabled, - * including the CPU port. - */ - priv->ucast_egress_floods |= BIT(i); - priv->bcast_egress_floods |= BIT(i); + l2fwd[from].bc_domain |= BIT(to); + l2fwd[from].fl_domain |= BIT(to); - if (i == upstream) + sja1105_port_allow_traffic(l2fwd, from, to, true); + } + } + + /* Then manage the forwarding domain for DSA links and CPU ports (the + * always-on domain). These can send packets to any enabled port except + * themselves. + */ + for (from = 0; from < ds->num_ports; from++) { + if (!dsa_is_cpu_port(ds, from) && !dsa_is_dsa_port(ds, from)) continue; - sja1105_port_allow_traffic(l2fwd, i, upstream, true); - sja1105_port_allow_traffic(l2fwd, upstream, i, true); + for (to = 0; to < ds->num_ports; to++) { + if (dsa_is_unused_port(ds, to)) + continue; - l2fwd[i].bc_domain = BIT(upstream); - l2fwd[i].fl_domain = BIT(upstream); + if (from == to) + continue; + + l2fwd[from].bc_domain |= BIT(to); + l2fwd[from].fl_domain |= BIT(to); + + sja1105_port_allow_traffic(l2fwd, from, to, true); + } + } + + /* In odd topologies ("H" connections where there is a DSA link to + * another switch which also has its own CPU port), TX packets can loop + * back into the system (they are flooded from CPU port 1 to the DSA + * link, and from there to CPU port 2). Prevent this from happening by + * cutting RX from DSA links towards our CPU port, if the remote switch + * has its own CPU port and therefore doesn't need ours for network + * stack termination. + */ + dst = ds->dst; + + list_for_each_entry(dl, &dst->rtable, list) { + if (dl->dp->ds != ds || dl->link_dp->cpu_dp == dl->dp->cpu_dp) + continue; + + from = dl->dp->index; + to = dsa_upstream_port(ds, from); + + dev_warn(ds->dev, + "H topology detected, cutting RX from DSA link %d to CPU port %d to prevent TX packet loops\n", + from, to); + + sja1105_port_allow_traffic(l2fwd, from, to, false); - l2fwd[upstream].bc_domain |= BIT(i); - l2fwd[upstream].fl_domain |= BIT(i); + l2fwd[from].bc_domain &= ~BIT(to); + l2fwd[from].fl_domain &= ~BIT(to); + } + + /* Finally, manage the egress flooding domain. All ports start up with + * flooding enabled, including the CPU port and DSA links. + */ + for (port = 0; port < ds->num_ports; port++) { + if (dsa_is_unused_port(ds, port)) + continue; + + priv->ucast_egress_floods |= BIT(port); + priv->bcast_egress_floods |= BIT(port); } /* Next 8 entries define VLAN PCP mapping from ingress to egress. * Create a one-to-one mapping. */ - for (i = 0; i < SJA1105_NUM_TC; i++) { - for (j = 0; j < ds->num_ports; j++) { - if (dsa_is_unused_port(ds, j)) + for (tc = 0; tc < SJA1105_NUM_TC; tc++) { + for (port = 0; port < ds->num_ports; port++) { + if (dsa_is_unused_port(ds, port)) continue; - l2fwd[ds->num_ports + i].vlan_pmap[j] = i; + l2fwd[ds->num_ports + tc].vlan_pmap[port] = tc; } - l2fwd[ds->num_ports + i].type_egrpcp2outputq = true; + l2fwd[ds->num_ports + tc].type_egrpcp2outputq = true; } return 0; @@ -688,6 +760,72 @@ static void sja1110_select_tdmaconfigidx(struct sja1105_private *priv) general_params->tdmaconfigidx = tdmaconfigidx; } +static int sja1105_init_topology(struct sja1105_private *priv, + struct sja1105_general_params_entry *general_params) +{ + struct dsa_switch *ds = priv->ds; + int port; + + /* The host port is the destination for traffic matching mac_fltres1 + * and mac_fltres0 on all ports except itself. Default to an invalid + * value. + */ + general_params->host_port = ds->num_ports; + + /* Link-local traffic received on casc_port will be forwarded + * to host_port without embedding the source port and device ID + * info in the destination MAC address, and no RX timestamps will be + * taken either (presumably because it is a cascaded port and a + * downstream SJA switch already did that). + * To disable the feature, we need to do different things depending on + * switch generation. On SJA1105 we need to set an invalid port, while + * on SJA1110 which support multiple cascaded ports, this field is a + * bitmask so it must be left zero. + */ + if (!priv->info->multiple_cascade_ports) + general_params->casc_port = ds->num_ports; + + for (port = 0; port < ds->num_ports; port++) { + bool is_upstream = dsa_is_upstream_port(ds, port); + bool is_dsa_link = dsa_is_dsa_port(ds, port); + + /* Upstream ports can be dedicated CPU ports or + * upstream-facing DSA links + */ + if (is_upstream) { + if (general_params->host_port == ds->num_ports) { + general_params->host_port = port; + } else { + dev_err(ds->dev, + "Port %llu is already a host port, configuring %d as one too is not supported\n", + general_params->host_port, port); + return -EINVAL; + } + } + + /* Cascade ports are downstream-facing DSA links */ + if (is_dsa_link && !is_upstream) { + if (priv->info->multiple_cascade_ports) { + general_params->casc_port |= BIT(port); + } else if (general_params->casc_port == ds->num_ports) { + general_params->casc_port = port; + } else { + dev_err(ds->dev, + "Port %llu is already a cascade port, configuring %d as one too is not supported\n", + general_params->casc_port, port); + return -EINVAL; + } + } + } + + if (general_params->host_port == ds->num_ports) { + dev_err(ds->dev, "No host port configured\n"); + return -EINVAL; + } + + return 0; +} + static int sja1105_init_general_params(struct sja1105_private *priv) { struct sja1105_general_params_entry default_general_params = { @@ -706,12 +844,6 @@ static int sja1105_init_general_params(struct sja1105_private *priv) .mac_flt0 = SJA1105_LINKLOCAL_FILTER_B_MASK, .incl_srcpt0 = false, .send_meta0 = false, - /* The destination for traffic matching mac_fltres1 and - * mac_fltres0 on all ports except host_port. Such traffic - * receieved on host_port itself would be dropped, except - * by installing a temporary 'management route' - */ - .host_port = priv->ds->num_ports, /* Default to an invalid value */ .mirr_port = priv->ds->num_ports, /* No TTEthernet */ @@ -731,16 +863,12 @@ static int sja1105_init_general_params(struct sja1105_private *priv) .header_type = ETH_P_SJA1110, }; struct sja1105_general_params_entry *general_params; - struct dsa_switch *ds = priv->ds; struct sja1105_table *table; - int port; + int rc; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_is_cpu_port(ds, port)) { - default_general_params.host_port = port; - break; - } - } + rc = sja1105_init_topology(priv, &default_general_params); + if (rc) + return rc; table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS]; @@ -763,19 +891,6 @@ static int sja1105_init_general_params(struct sja1105_private *priv) sja1110_select_tdmaconfigidx(priv); - /* Link-local traffic received on casc_port will be forwarded - * to host_port without embedding the source port and device ID - * info in the destination MAC address, and no RX timestamps will be - * taken either (presumably because it is a cascaded port and a - * downstream SJA switch already did that). - * To disable the feature, we need to do different things depending on - * switch generation. On SJA1105 we need to set an invalid port, while - * on SJA1110 which support multiple cascaded ports, this field is a - * bitmask so it must be left zero. - */ - if (!priv->info->multiple_cascade_ports) - general_params->casc_port = ds->num_ports; - return 0; } @@ -903,7 +1018,7 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv) for (port = 0; port < ds->num_ports; port++) { int mtu = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN; - if (dsa_is_cpu_port(priv->ds, port)) + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) mtu += VLAN_HLEN; policing[port].smax = 65535; /* Burst size in bytes */ @@ -1372,10 +1487,11 @@ static int sja1105et_is_fdb_entry_in_bin(struct sja1105_private *priv, int bin, int sja1105et_fdb_add(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid) { - struct sja1105_l2_lookup_entry l2_lookup = {0}; + struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp; struct sja1105_private *priv = ds->priv; struct device *dev = ds->dev; int last_unused = -1; + int start, end, i; int bin, way, rc; bin = sja1105et_fdb_hash(priv, addr, vid); @@ -1387,7 +1503,7 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port, * mask? If yes, we need to do nothing. If not, we need * to rewrite the entry by adding this port to it. */ - if (l2_lookup.destports & BIT(port)) + if ((l2_lookup.destports & BIT(port)) && l2_lookup.lockeds) return 0; l2_lookup.destports |= BIT(port); } else { @@ -1418,6 +1534,7 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port, index, NULL, false); } } + l2_lookup.lockeds = true; l2_lookup.index = sja1105et_fdb_index(bin, way); rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, @@ -1426,6 +1543,29 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port, if (rc < 0) return rc; + /* Invalidate a dynamically learned entry if that exists */ + start = sja1105et_fdb_index(bin, 0); + end = sja1105et_fdb_index(bin, way); + + for (i = start; i < end; i++) { + rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, + i, &tmp); + if (rc == -ENOENT) + continue; + if (rc) + return rc; + + if (tmp.macaddr != ether_addr_to_u64(addr) || tmp.vlanid != vid) + continue; + + rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, + i, NULL, false); + if (rc) + return rc; + + break; + } + return sja1105_static_fdb_change(priv, port, &l2_lookup, true); } @@ -1467,32 +1607,30 @@ int sja1105et_fdb_del(struct dsa_switch *ds, int port, int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid) { - struct sja1105_l2_lookup_entry l2_lookup = {0}; + struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp; struct sja1105_private *priv = ds->priv; int rc, i; /* Search for an existing entry in the FDB table */ l2_lookup.macaddr = ether_addr_to_u64(addr); l2_lookup.vlanid = vid; - l2_lookup.iotag = SJA1105_S_TAG; l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0); - if (priv->vlan_aware) { - l2_lookup.mask_vlanid = VLAN_VID_MASK; - l2_lookup.mask_iotag = BIT(0); - } else { - l2_lookup.mask_vlanid = 0; - l2_lookup.mask_iotag = 0; - } + l2_lookup.mask_vlanid = VLAN_VID_MASK; l2_lookup.destports = BIT(port); + tmp = l2_lookup; + rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, - SJA1105_SEARCH, &l2_lookup); - if (rc == 0) { - /* Found and this port is already in the entry's + SJA1105_SEARCH, &tmp); + if (rc == 0 && tmp.index != SJA1105_MAX_L2_LOOKUP_COUNT - 1) { + /* Found a static entry and this port is already in the entry's * port mask => job done */ - if (l2_lookup.destports & BIT(port)) + if ((tmp.destports & BIT(port)) && tmp.lockeds) return 0; + + l2_lookup = tmp; + /* l2_lookup.index is populated by the switch in case it * found something. */ @@ -1514,16 +1652,46 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, dev_err(ds->dev, "FDB is full, cannot add entry.\n"); return -EINVAL; } - l2_lookup.lockeds = true; l2_lookup.index = i; skip_finding_an_index: + l2_lookup.lockeds = true; + rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, l2_lookup.index, &l2_lookup, true); if (rc < 0) return rc; + /* The switch learns dynamic entries and looks up the FDB left to + * right. It is possible that our addition was concurrent with the + * dynamic learning of the same address, so now that the static entry + * has been installed, we are certain that address learning for this + * particular address has been turned off, so the dynamic entry either + * is in the FDB at an index smaller than the static one, or isn't (it + * can also be at a larger index, but in that case it is inactive + * because the static FDB entry will match first, and the dynamic one + * will eventually age out). Search for a dynamically learned address + * prior to our static one and invalidate it. + */ + tmp = l2_lookup; + + rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, + SJA1105_SEARCH, &tmp); + if (rc < 0) { + dev_err(ds->dev, + "port %d failed to read back entry for %pM vid %d: %pe\n", + port, addr, vid, ERR_PTR(rc)); + return rc; + } + + if (tmp.index < l2_lookup.index) { + rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, + tmp.index, NULL, false); + if (rc < 0) + return rc; + } + return sja1105_static_fdb_change(priv, port, &l2_lookup, true); } @@ -1537,15 +1705,8 @@ int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port, l2_lookup.macaddr = ether_addr_to_u64(addr); l2_lookup.vlanid = vid; - l2_lookup.iotag = SJA1105_S_TAG; l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0); - if (priv->vlan_aware) { - l2_lookup.mask_vlanid = VLAN_VID_MASK; - l2_lookup.mask_iotag = BIT(0); - } else { - l2_lookup.mask_vlanid = 0; - l2_lookup.mask_iotag = 0; - } + l2_lookup.mask_vlanid = VLAN_VID_MASK; l2_lookup.destports = BIT(port); rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, @@ -1633,6 +1794,46 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port, return 0; } +static void sja1105_fast_age(struct dsa_switch *ds, int port) +{ + struct sja1105_private *priv = ds->priv; + int i; + + for (i = 0; i < SJA1105_MAX_L2_LOOKUP_COUNT; i++) { + struct sja1105_l2_lookup_entry l2_lookup = {0}; + u8 macaddr[ETH_ALEN]; + int rc; + + rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, + i, &l2_lookup); + /* No fdb entry at i, not an issue */ + if (rc == -ENOENT) + continue; + if (rc) { + dev_err(ds->dev, "Failed to read FDB: %pe\n", + ERR_PTR(rc)); + return; + } + + if (!(l2_lookup.destports & BIT(port))) + continue; + + /* Don't delete static FDB entries */ + if (l2_lookup.lockeds) + continue; + + u64_to_ether_addr(l2_lookup.macaddr, macaddr); + + rc = sja1105_fdb_del(ds, port, macaddr, l2_lookup.vlanid); + if (rc) { + dev_err(ds->dev, + "Failed to delete FDB entry %pM vid %lld: %pe\n", + macaddr, l2_lookup.vlanid, ERR_PTR(rc)); + return; + } + } +} + static int sja1105_mdb_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb) { @@ -1741,6 +1942,7 @@ static int sja1105_bridge_member(struct dsa_switch *ds, int port, static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port, u8 state) { + struct dsa_port *dp = dsa_to_port(ds, port); struct sja1105_private *priv = ds->priv; struct sja1105_mac_config_entry *mac; @@ -1766,12 +1968,12 @@ static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port, case BR_STATE_LEARNING: mac[port].ingress = true; mac[port].egress = false; - mac[port].dyn_learn = !!(priv->learn_ena & BIT(port)); + mac[port].dyn_learn = dp->learning; break; case BR_STATE_FORWARDING: mac[port].ingress = true; mac[port].egress = true; - mac[port].dyn_learn = !!(priv->learn_ena & BIT(port)); + mac[port].dyn_learn = dp->learning; break; default: dev_err(ds->dev, "invalid STP state: %d\n", state); @@ -2231,8 +2433,8 @@ static int sja1105_bridge_vlan_add(struct dsa_switch *ds, int port, return -EBUSY; } - /* Always install bridge VLANs as egress-tagged on the CPU port. */ - if (dsa_is_cpu_port(ds, port)) + /* Always install bridge VLANs as egress-tagged on CPU and DSA ports */ + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) flags = 0; rc = sja1105_vlan_add(priv, port, vlan->vid, flags); @@ -2401,6 +2603,7 @@ static int sja1105_setup(struct dsa_switch *ds) ds->num_tx_queues = SJA1105_NUM_TC; ds->mtu_enforcement_ingress = true; + ds->assisted_learning_on_cpu_port = true; rc = sja1105_devlink_setup(ds); if (rc < 0) @@ -2585,7 +2788,7 @@ static int sja1105_change_mtu(struct dsa_switch *ds, int port, int new_mtu) new_mtu += VLAN_ETH_HLEN + ETH_FCS_LEN; - if (dsa_is_cpu_port(ds, port)) + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) new_mtu += VLAN_HLEN; policing = priv->static_config.tables[BLK_IDX_L2_POLICING].entries; @@ -2732,23 +2935,13 @@ static int sja1105_port_set_learning(struct sja1105_private *priv, int port, bool enabled) { struct sja1105_mac_config_entry *mac; - int rc; mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries; mac[port].dyn_learn = enabled; - rc = sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port, - &mac[port], true); - if (rc) - return rc; - - if (enabled) - priv->learn_ena |= BIT(port); - else - priv->learn_ena &= ~BIT(port); - - return 0; + return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port, + &mac[port], true); } static int sja1105_port_ucast_bcast_flood(struct sja1105_private *priv, int to, @@ -2883,6 +3076,7 @@ static const struct dsa_switch_ops sja1105_switch_ops = { .port_fdb_dump = sja1105_fdb_dump, .port_fdb_add = sja1105_fdb_add, .port_fdb_del = sja1105_fdb_del, + .port_fast_age = sja1105_fast_age, .port_bridge_join = sja1105_bridge_join, .port_bridge_leave = sja1105_bridge_leave, .port_pre_bridge_flags = sja1105_port_pre_bridge_flags, diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c index df6927f66771..87c906e744fb 100644 --- a/drivers/net/ethernet/3com/3c509.c +++ b/drivers/net/ethernet/3com/3c509.c @@ -302,7 +302,6 @@ static int el3_isa_match(struct device *pdev, unsigned int ndev) return -ENOMEM; SET_NETDEV_DEV(dev, pdev); - netdev_boot_setup_check(dev); if (!request_region(ioaddr, EL3_IO_EXTENT, "3c509-isa")) { free_netdev(dev); @@ -421,7 +420,6 @@ static int el3_pnp_probe(struct pnp_dev *pdev, const struct pnp_device_id *id) return -ENOMEM; } SET_NETDEV_DEV(dev, &pdev->dev); - netdev_boot_setup_check(dev); el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_PNP); pnp_set_drvdata(pdev, dev); @@ -590,7 +588,6 @@ static int el3_eisa_probe(struct device *device) } SET_NETDEV_DEV(dev, device); - netdev_boot_setup_check(dev); el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_EISA); eisa_set_drvdata (edev, dev); diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index 47b4215bb93b..8d90fed5d33e 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -407,7 +407,7 @@ MODULE_PARM_DESC(max_interrupt_work, "3c515 maximum events handled per interrupt /* we will need locking (and refcounting) if we ever use it for more */ static LIST_HEAD(root_corkscrew_dev); -int init_module(void) +static int corkscrew_init_module(void) { int found = 0; if (debug >= 0) @@ -416,6 +416,7 @@ int init_module(void) found++; return found ? 0 : -ENODEV; } +module_init(corkscrew_init_module); #else struct net_device *tc515_probe(int unit) diff --git a/drivers/net/ethernet/3com/Kconfig b/drivers/net/ethernet/3com/Kconfig index a52a3740f0c9..706bd59bf645 100644 --- a/drivers/net/ethernet/3com/Kconfig +++ b/drivers/net/ethernet/3com/Kconfig @@ -34,6 +34,7 @@ config EL3 config 3C515 tristate "3c515 ISA \"Fast EtherLink\"" depends on ISA && ISA_DMA_API && !PPC32 + select NETDEV_LEGACY_INIT help If you have a 3Com ISA EtherLink XL "Corkscrew" 3c515 Fast Ethernet network card, say Y here. diff --git a/drivers/net/ethernet/8390/Kconfig b/drivers/net/ethernet/8390/Kconfig index 9f4b302fd2ce..a4130e643342 100644 --- a/drivers/net/ethernet/8390/Kconfig +++ b/drivers/net/ethernet/8390/Kconfig @@ -102,6 +102,7 @@ config MCF8390 config NE2000 tristate "NE2000/NE1000 support" depends on (ISA || (Q40 && m) || MACH_TX49XX || ATARI_ETHERNEC) + select NETDEV_LEGACY_INIT if ISA select CRC32 help If you have a network (Ethernet) card of this type, say Y here. @@ -169,6 +170,7 @@ config STNIC config ULTRA tristate "SMC Ultra support" depends on ISA + select NETDEV_LEGACY_INIT select CRC32 help If you have a network (Ethernet) card of this type, say Y here. @@ -186,6 +188,7 @@ config ULTRA config WD80x3 tristate "WD80*3 support" depends on ISA + select NETDEV_LEGACY_INIT select CRC32 help If you have a network (Ethernet) card of this type, say Y here. diff --git a/drivers/net/ethernet/8390/apne.c b/drivers/net/ethernet/8390/apne.c index fe6c834c422e..da1ae37a9d73 100644 --- a/drivers/net/ethernet/8390/apne.c +++ b/drivers/net/ethernet/8390/apne.c @@ -75,7 +75,6 @@ #define NESM_STOP_PG 0x80 /* Last page +1 of RX ring */ -struct net_device * __init apne_probe(int unit); static int apne_probe1(struct net_device *dev, int ioaddr); static void apne_reset_8390(struct net_device *dev); @@ -120,7 +119,7 @@ static u32 apne_msg_enable; module_param_named(msg_enable, apne_msg_enable, uint, 0444); MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)"); -struct net_device * __init apne_probe(int unit) +static struct net_device * __init apne_probe(void) { struct net_device *dev; struct ei_device *ei_local; @@ -150,10 +149,6 @@ struct net_device * __init apne_probe(int unit) dev = alloc_ei_netdev(); if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } ei_local = netdev_priv(dev); ei_local->msg_enable = apne_msg_enable; @@ -554,12 +549,11 @@ static irqreturn_t apne_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -#ifdef MODULE static struct net_device *apne_dev; static int __init apne_module_init(void) { - apne_dev = apne_probe(-1); + apne_dev = apne_probe(); return PTR_ERR_OR_ZERO(apne_dev); } @@ -579,7 +573,6 @@ static void __exit apne_module_exit(void) } module_init(apne_module_init); module_exit(apne_module_exit); -#endif static int init_pcmcia(void) { diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c index 9595dd1f32ca..6c6bdd5913ec 100644 --- a/drivers/net/ethernet/8390/ax88796.c +++ b/drivers/net/ethernet/8390/ax88796.c @@ -101,6 +101,13 @@ static inline struct ax_device *to_ax_dev(struct net_device *dev) return (struct ax_device *)(ei_local + 1); } +void ax_NS8390_reinit(struct net_device *dev) +{ + ax_NS8390_init(dev, 1); +} + +EXPORT_SYMBOL_GPL(ax_NS8390_reinit); + /* * ax_initial_check * diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c index e9756d0ea5b8..53660bc8d6ff 100644 --- a/drivers/net/ethernet/8390/ne.c +++ b/drivers/net/ethernet/8390/ne.c @@ -923,7 +923,7 @@ static void __init ne_add_devices(void) } #ifdef MODULE -int __init init_module(void) +static int __init ne_init(void) { int retval; ne_add_devices(); @@ -940,6 +940,7 @@ int __init init_module(void) ne_loop_rm_unreg(0); return retval; } +module_init(ne_init); #else /* MODULE */ static int __init ne_init(void) { @@ -951,6 +952,7 @@ static int __init ne_init(void) } module_init(ne_init); +#ifdef CONFIG_NETDEV_LEGACY_INIT struct net_device * __init ne_probe(int unit) { int this_dev; @@ -991,6 +993,7 @@ struct net_device * __init ne_probe(int unit) return ERR_PTR(-ENODEV); } +#endif #endif /* MODULE */ static void __exit ne_exit(void) diff --git a/drivers/net/ethernet/8390/smc-ultra.c b/drivers/net/ethernet/8390/smc-ultra.c index 1d8ed7357b7f..0890fa493f70 100644 --- a/drivers/net/ethernet/8390/smc-ultra.c +++ b/drivers/net/ethernet/8390/smc-ultra.c @@ -522,7 +522,6 @@ static void ultra_pio_input(struct net_device *dev, int count, /* We know skbuffs are padded to at least word alignment. */ insw(ioaddr + IOPD, buf, (count+1)>>1); } - static void ultra_pio_output(struct net_device *dev, int count, const unsigned char *buf, const int start_page) { @@ -572,8 +571,7 @@ MODULE_LICENSE("GPL"); /* This is set up so that only a single autoprobe takes place per call. ISA device autoprobes on a running machine are not recommended. */ -int __init -init_module(void) +static int __init ultra_init_module(void) { struct net_device *dev; int this_dev, found = 0; @@ -600,6 +598,7 @@ init_module(void) return 0; return -ENXIO; } +module_init(ultra_init_module); static void cleanup_card(struct net_device *dev) { @@ -613,8 +612,7 @@ static void cleanup_card(struct net_device *dev) iounmap(ei_status.mem); } -void __exit -cleanup_module(void) +static void __exit ultra_cleanup_module(void) { int this_dev; @@ -627,4 +625,5 @@ cleanup_module(void) } } } +module_exit(ultra_cleanup_module); #endif /* MODULE */ diff --git a/drivers/net/ethernet/8390/wd.c b/drivers/net/ethernet/8390/wd.c index c834123560f1..263a942d81fa 100644 --- a/drivers/net/ethernet/8390/wd.c +++ b/drivers/net/ethernet/8390/wd.c @@ -519,7 +519,7 @@ MODULE_LICENSE("GPL"); /* This is set up so that only a single autoprobe takes place per call. ISA device autoprobes on a running machine are not recommended. */ -int __init init_module(void) +static int __init wd_init_module(void) { struct net_device *dev; int this_dev, found = 0; @@ -548,6 +548,7 @@ int __init init_module(void) return 0; return -ENXIO; } +module_init(wd_init_module); static void cleanup_card(struct net_device *dev) { @@ -556,8 +557,7 @@ static void cleanup_card(struct net_device *dev) iounmap(ei_status.mem); } -void __exit -cleanup_module(void) +static void __exit wd_cleanup_module(void) { int this_dev; @@ -570,4 +570,5 @@ cleanup_module(void) } } } +module_exit(wd_cleanup_module); #endif /* MODULE */ diff --git a/drivers/net/ethernet/8390/xsurf100.c b/drivers/net/ethernet/8390/xsurf100.c index e2c963821ffe..fe7a74707aa4 100644 --- a/drivers/net/ethernet/8390/xsurf100.c +++ b/drivers/net/ethernet/8390/xsurf100.c @@ -22,8 +22,6 @@ #define XS100_8390_DATA_WRITE32_BASE 0x0C80 #define XS100_8390_DATA_AREA_SIZE 0x80 -#define __NS8390_init ax_NS8390_init - /* force unsigned long back to 'void __iomem *' */ #define ax_convert_addr(_a) ((void __force __iomem *)(_a)) @@ -42,10 +40,7 @@ /* Ensure we have our RCR base value */ #define AX88796_PLATFORM -static unsigned char version[] = - "ax88796.c: Copyright 2005,2007 Simtec Electronics\n"; - -#include "lib8390.c" +#include "8390.h" /* from ne.c */ #define NE_CMD EI_SHIFT(0x00) @@ -232,7 +227,7 @@ static void xs100_block_output(struct net_device *dev, int count, if (jiffies - dma_start > 2 * HZ / 100) { /* 20ms */ netdev_warn(dev, "timeout waiting for Tx RDC.\n"); ei_local->reset_8390(dev); - ax_NS8390_init(dev, 1); + ax_NS8390_reinit(dev); break; } } diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig index d0b0609bbe23..c6a3abec86f5 100644 --- a/drivers/net/ethernet/amd/Kconfig +++ b/drivers/net/ethernet/amd/Kconfig @@ -46,6 +46,7 @@ config AMD8111_ETH config LANCE tristate "AMD LANCE and PCnet (AT1500 and NE2100) support" depends on ISA && ISA_DMA_API && !ARM && !PPC32 + select NETDEV_LEGACY_INIT help If you have a network (Ethernet) card of this type, say Y here. Some LinkSys cards are of this type. @@ -132,6 +133,7 @@ config PCMCIA_NMCLAN config NI65 tristate "NI6510 support" depends on ISA && ISA_DMA_API && !ARM && !PPC32 + select NETDEV_LEGACY_INIT help If you have a network (Ethernet) card of this type, say Y here. diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c index 36f54d13a2eb..9d2f49fd945e 100644 --- a/drivers/net/ethernet/amd/atarilance.c +++ b/drivers/net/ethernet/amd/atarilance.c @@ -367,7 +367,7 @@ static void *slow_memcpy( void *dst, const void *src, size_t len ) } -struct net_device * __init atarilance_probe(int unit) +struct net_device * __init atarilance_probe(void) { int i; static int found; @@ -382,10 +382,6 @@ struct net_device * __init atarilance_probe(int unit) dev = alloc_etherdev(sizeof(struct lance_private)); if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } for( i = 0; i < N_LANCE_ADDR; ++i ) { if (lance_probe1( dev, &lance_addr_list[i] )) { @@ -1137,13 +1133,11 @@ static int lance_set_mac_address( struct net_device *dev, void *addr ) return 0; } - -#ifdef MODULE static struct net_device *atarilance_dev; static int __init atarilance_module_init(void) { - atarilance_dev = atarilance_probe(-1); + atarilance_dev = atarilance_probe(); return PTR_ERR_OR_ZERO(atarilance_dev); } @@ -1155,4 +1149,3 @@ static void __exit atarilance_module_exit(void) } module_init(atarilance_module_init); module_exit(atarilance_module_exit); -#endif /* MODULE */ diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c index 2178e6b89dbd..945bf1d87507 100644 --- a/drivers/net/ethernet/amd/lance.c +++ b/drivers/net/ethernet/amd/lance.c @@ -327,7 +327,7 @@ MODULE_PARM_DESC(dma, "LANCE/PCnet ISA DMA channel (ignored for some devices)"); MODULE_PARM_DESC(irq, "LANCE/PCnet IRQ number (ignored for some devices)"); MODULE_PARM_DESC(lance_debug, "LANCE/PCnet debug level (0-7)"); -int __init init_module(void) +static int __init lance_init_module(void) { struct net_device *dev; int this_dev, found = 0; @@ -356,6 +356,7 @@ int __init init_module(void) return 0; return -ENXIO; } +module_init(lance_init_module); static void cleanup_card(struct net_device *dev) { @@ -368,7 +369,7 @@ static void cleanup_card(struct net_device *dev) kfree(lp); } -void __exit cleanup_module(void) +static void __exit lance_cleanup_module(void) { int this_dev; @@ -381,6 +382,7 @@ void __exit cleanup_module(void) } } } +module_exit(lance_cleanup_module); #endif /* MODULE */ MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/amd/mvme147.c b/drivers/net/ethernet/amd/mvme147.c index 3f2e4cdd0b83..da97fccea9ea 100644 --- a/drivers/net/ethernet/amd/mvme147.c +++ b/drivers/net/ethernet/amd/mvme147.c @@ -68,7 +68,7 @@ static const struct net_device_ops lance_netdev_ops = { }; /* Initialise the one and only on-board 7990 */ -struct net_device * __init mvme147lance_probe(int unit) +static struct net_device * __init mvme147lance_probe(void) { struct net_device *dev; static int called; @@ -86,9 +86,6 @@ struct net_device * __init mvme147lance_probe(int unit) if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) - sprintf(dev->name, "eth%d", unit); - /* Fill the dev fields */ dev->base_addr = (unsigned long)MVME147_LANCE_BASE; dev->netdev_ops = &lance_netdev_ops; @@ -179,22 +176,21 @@ static int m147lance_close(struct net_device *dev) return 0; } -#ifdef MODULE MODULE_LICENSE("GPL"); static struct net_device *dev_mvme147_lance; -int __init init_module(void) +static int __init m147lance_init(void) { - dev_mvme147_lance = mvme147lance_probe(-1); + dev_mvme147_lance = mvme147lance_probe(); return PTR_ERR_OR_ZERO(dev_mvme147_lance); } +module_init(m147lance_init); -void __exit cleanup_module(void) +static void __exit m147lance_exit(void) { struct m147lance_private *lp = netdev_priv(dev_mvme147_lance); unregister_netdev(dev_mvme147_lance); free_pages(lp->ram, 3); free_netdev(dev_mvme147_lance); } - -#endif /* MODULE */ +module_exit(m147lance_exit); diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c index 5c1cfb0c4a42..b5df7ad5a83f 100644 --- a/drivers/net/ethernet/amd/ni65.c +++ b/drivers/net/ethernet/amd/ni65.c @@ -1230,18 +1230,20 @@ MODULE_PARM_DESC(irq, "ni6510 IRQ number (ignored for some cards)"); MODULE_PARM_DESC(io, "ni6510 I/O base address"); MODULE_PARM_DESC(dma, "ni6510 ISA DMA channel (ignored for some cards)"); -int __init init_module(void) +static int __init ni65_init_module(void) { dev_ni65 = ni65_probe(-1); return PTR_ERR_OR_ZERO(dev_ni65); } +module_init(ni65_init_module); -void __exit cleanup_module(void) +static void __exit ni65_cleanup_module(void) { unregister_netdev(dev_ni65); cleanup_card(dev_ni65); free_netdev(dev_ni65); } +module_exit(ni65_cleanup_module); #endif /* MODULE */ MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/amd/sun3lance.c b/drivers/net/ethernet/amd/sun3lance.c index f8d7a9387a56..4a845bc071b2 100644 --- a/drivers/net/ethernet/amd/sun3lance.c +++ b/drivers/net/ethernet/amd/sun3lance.c @@ -245,7 +245,7 @@ static void set_multicast_list( struct net_device *dev ); /************************* End of Prototypes **************************/ -struct net_device * __init sun3lance_probe(int unit) +static struct net_device * __init sun3lance_probe(void) { struct net_device *dev; static int found; @@ -272,10 +272,6 @@ struct net_device * __init sun3lance_probe(int unit) dev = alloc_etherdev(sizeof(struct lance_private)); if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } if (!lance_probe(dev)) goto out; @@ -924,17 +920,16 @@ static void set_multicast_list( struct net_device *dev ) } -#ifdef MODULE - static struct net_device *sun3lance_dev; -int __init init_module(void) +static int __init sun3lance_init(void) { - sun3lance_dev = sun3lance_probe(-1); + sun3lance_dev = sun3lance_probe(); return PTR_ERR_OR_ZERO(sun3lance_dev); } +module_init(sun3lance_init); -void __exit cleanup_module(void) +static void __exit sun3lance_cleanup(void) { unregister_netdev(sun3lance_dev); #ifdef CONFIG_SUN3 @@ -942,6 +937,4 @@ void __exit cleanup_module(void) #endif free_netdev(sun3lance_dev); } - -#endif /* MODULE */ - +module_exit(sun3lance_cleanup); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 1a6ec1a12d53..b5d954cb409a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2669,7 +2669,8 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) } /* Allocated memory for FW statistics */ - if (bnx2x_alloc_fw_stats_mem(bp)) + rc = bnx2x_alloc_fw_stats_mem(bp); + if (rc) LOAD_ERROR_EXIT(bp, load_error0); /* request pf to initialize status blocks */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 03b821897cf3..865fcb8cf29f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3163,6 +3163,58 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp) return 0; } +static void bnxt_free_cp_arrays(struct bnxt_cp_ring_info *cpr) +{ + kfree(cpr->cp_desc_ring); + cpr->cp_desc_ring = NULL; + kfree(cpr->cp_desc_mapping); + cpr->cp_desc_mapping = NULL; +} + +static int bnxt_alloc_cp_arrays(struct bnxt_cp_ring_info *cpr, int n) +{ + cpr->cp_desc_ring = kcalloc(n, sizeof(*cpr->cp_desc_ring), GFP_KERNEL); + if (!cpr->cp_desc_ring) + return -ENOMEM; + cpr->cp_desc_mapping = kcalloc(n, sizeof(*cpr->cp_desc_mapping), + GFP_KERNEL); + if (!cpr->cp_desc_mapping) + return -ENOMEM; + return 0; +} + +static void bnxt_free_all_cp_arrays(struct bnxt *bp) +{ + int i; + + if (!bp->bnapi) + return; + for (i = 0; i < bp->cp_nr_rings; i++) { + struct bnxt_napi *bnapi = bp->bnapi[i]; + + if (!bnapi) + continue; + bnxt_free_cp_arrays(&bnapi->cp_ring); + } +} + +static int bnxt_alloc_all_cp_arrays(struct bnxt *bp) +{ + int i, n = bp->cp_nr_pages; + + for (i = 0; i < bp->cp_nr_rings; i++) { + struct bnxt_napi *bnapi = bp->bnapi[i]; + int rc; + + if (!bnapi) + continue; + rc = bnxt_alloc_cp_arrays(&bnapi->cp_ring, n); + if (rc) + return rc; + } + return 0; +} + static void bnxt_free_cp_rings(struct bnxt *bp) { int i; @@ -3190,6 +3242,7 @@ static void bnxt_free_cp_rings(struct bnxt *bp) if (cpr2) { ring = &cpr2->cp_ring_struct; bnxt_free_ring(bp, &ring->ring_mem); + bnxt_free_cp_arrays(cpr2); kfree(cpr2); cpr->cp_ring_arr[j] = NULL; } @@ -3208,6 +3261,12 @@ static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp) if (!cpr) return NULL; + rc = bnxt_alloc_cp_arrays(cpr, bp->cp_nr_pages); + if (rc) { + bnxt_free_cp_arrays(cpr); + kfree(cpr); + return NULL; + } ring = &cpr->cp_ring_struct; rmem = &ring->ring_mem; rmem->nr_pages = bp->cp_nr_pages; @@ -3218,6 +3277,7 @@ static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp) rc = bnxt_alloc_ring(bp, rmem); if (rc) { bnxt_free_ring(bp, rmem); + bnxt_free_cp_arrays(cpr); kfree(cpr); cpr = NULL; } @@ -3650,9 +3710,15 @@ void bnxt_set_ring_params(struct bnxt *bp) if (jumbo_factor > agg_factor) agg_factor = jumbo_factor; } - agg_ring_size = ring_size * agg_factor; + if (agg_factor) { + if (ring_size > BNXT_MAX_RX_DESC_CNT_JUM_ENA) { + ring_size = BNXT_MAX_RX_DESC_CNT_JUM_ENA; + netdev_warn(bp->dev, "RX ring size reduced from %d to %d because the jumbo ring is now enabled\n", + bp->rx_ring_size, ring_size); + bp->rx_ring_size = ring_size; + } + agg_ring_size = ring_size * agg_factor; - if (agg_ring_size) { bp->rx_agg_nr_pages = bnxt_calc_nr_ring_pages(agg_ring_size, RX_DESC_CNT); if (bp->rx_agg_nr_pages > MAX_RX_AGG_PAGES) { @@ -4253,6 +4319,7 @@ static void bnxt_free_mem(struct bnxt *bp, bool irq_re_init) bnxt_free_tx_rings(bp); bnxt_free_rx_rings(bp); bnxt_free_cp_rings(bp); + bnxt_free_all_cp_arrays(bp); bnxt_free_ntp_fltrs(bp, irq_re_init); if (irq_re_init) { bnxt_free_ring_stats(bp); @@ -4373,6 +4440,10 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init) goto alloc_mem_err; } + rc = bnxt_alloc_all_cp_arrays(bp); + if (rc) + goto alloc_mem_err; + bnxt_init_ring_struct(bp); rc = bnxt_alloc_rx_rings(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index e379c48c1df9..9c3324e76ff7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -596,15 +596,17 @@ struct nqe_cn { #define MAX_TPA_SEGS_P5 0x3f #if (BNXT_PAGE_SHIFT == 16) -#define MAX_RX_PAGES 1 +#define MAX_RX_PAGES_AGG_ENA 1 +#define MAX_RX_PAGES 4 #define MAX_RX_AGG_PAGES 4 #define MAX_TX_PAGES 1 -#define MAX_CP_PAGES 8 +#define MAX_CP_PAGES 16 #else -#define MAX_RX_PAGES 8 +#define MAX_RX_PAGES_AGG_ENA 8 +#define MAX_RX_PAGES 32 #define MAX_RX_AGG_PAGES 32 #define MAX_TX_PAGES 8 -#define MAX_CP_PAGES 64 +#define MAX_CP_PAGES 128 #endif #define RX_DESC_CNT (BNXT_PAGE_SIZE / sizeof(struct rx_bd)) @@ -622,6 +624,7 @@ struct nqe_cn { #define HW_CMPD_RING_SIZE (sizeof(struct tx_cmp) * CP_DESC_CNT) #define BNXT_MAX_RX_DESC_CNT (RX_DESC_CNT * MAX_RX_PAGES - 1) +#define BNXT_MAX_RX_DESC_CNT_JUM_ENA (RX_DESC_CNT * MAX_RX_PAGES_AGG_ENA - 1) #define BNXT_MAX_RX_JUM_DESC_CNT (RX_DESC_CNT * MAX_RX_AGG_PAGES - 1) #define BNXT_MAX_TX_DESC_CNT (TX_DESC_CNT * MAX_TX_PAGES - 1) @@ -972,11 +975,11 @@ struct bnxt_cp_ring_info { struct dim dim; union { - struct tx_cmp *cp_desc_ring[MAX_CP_PAGES]; - struct nqe_cn *nq_desc_ring[MAX_CP_PAGES]; + struct tx_cmp **cp_desc_ring; + struct nqe_cn **nq_desc_ring; }; - dma_addr_t cp_desc_mapping[MAX_CP_PAGES]; + dma_addr_t *cp_desc_mapping; struct bnxt_stats_mem stats; u32 hw_stats_ctx_id; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 64381be935a8..2cd8bb37e641 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -743,14 +743,17 @@ static void bnxt_dl_params_unregister(struct bnxt *bp) int bnxt_dl_register(struct bnxt *bp) { + const struct devlink_ops *devlink_ops; struct devlink_port_attrs attrs = {}; struct devlink *dl; int rc; if (BNXT_PF(bp)) - dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl)); + devlink_ops = &bnxt_dl_ops; else - dl = devlink_alloc(&bnxt_vf_dl_ops, sizeof(struct bnxt_dl)); + devlink_ops = &bnxt_vf_dl_ops; + + dl = devlink_alloc(devlink_ops, sizeof(struct bnxt_dl), &bp->pdev->dev); if (!dl) { netdev_warn(bp->dev, "devlink_alloc failed\n"); return -ENOMEM; @@ -763,7 +766,7 @@ int bnxt_dl_register(struct bnxt *bp) bp->hwrm_spec_code > 0x10803) bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY; - rc = devlink_register(dl, &bp->pdev->dev); + rc = devlink_register(dl); if (rc) { netdev_warn(bp->dev, "devlink_register failed. rc=%d\n", rc); goto err_dl_free; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 786ca51e669b..485252d12245 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -768,8 +768,13 @@ static void bnxt_get_ringparam(struct net_device *dev, { struct bnxt *bp = netdev_priv(dev); - ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT; - ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT; + if (bp->flags & BNXT_FLAG_AGG_RINGS) { + ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT_JUM_ENA; + ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT; + } else { + ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT; + ering->rx_jumbo_max_pending = 0; + } ering->tx_max_pending = BNXT_MAX_TX_DESC_CNT; ering->rx_pending = bp->rx_ring_size; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 63e2237e0cb4..8507198992df 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3972,8 +3972,6 @@ static int bcmgenet_probe(struct platform_device *pdev) */ dev->needed_headroom += 64; - netdev_boot_setup_check(dev); - priv->dev = dev; priv->pdev = pdev; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index af116ef83bad..2907e13b9df6 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -3750,7 +3750,8 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) } devlink = devlink_alloc(&liquidio_devlink_ops, - sizeof(struct lio_devlink_priv)); + sizeof(struct lio_devlink_priv), + &octeon_dev->pci_dev->dev); if (!devlink) { dev_err(&octeon_dev->pci_dev->dev, "devlink alloc failed\n"); goto setup_nic_dev_free; @@ -3759,7 +3760,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) lio_devlink = devlink_priv(devlink); lio_devlink->oct = octeon_dev; - if (devlink_register(devlink, &octeon_dev->pci_dev->dev)) { + if (devlink_register(devlink)) { devlink_free(devlink); dev_err(&octeon_dev->pci_dev->dev, "devlink registration failed\n"); diff --git a/drivers/net/ethernet/cirrus/Kconfig b/drivers/net/ethernet/cirrus/Kconfig index d8af9e64dd1e..dac1764ba740 100644 --- a/drivers/net/ethernet/cirrus/Kconfig +++ b/drivers/net/ethernet/cirrus/Kconfig @@ -6,7 +6,7 @@ config NET_VENDOR_CIRRUS bool "Cirrus devices" default y - depends on ISA || EISA || ARM || MAC + depends on ISA || EISA || ARM || MAC || COMPILE_TEST help If you have a network (Ethernet) card belonging to this class, say Y. @@ -18,9 +18,16 @@ config NET_VENDOR_CIRRUS if NET_VENDOR_CIRRUS config CS89x0 - tristate "CS89x0 support" - depends on ISA || EISA || ARM + tristate + +config CS89x0_ISA + tristate "CS89x0 ISA driver support" + depends on HAS_IOPORT_MAP + depends on ISA depends on !PPC32 + depends on CS89x0_PLATFORM=n + select NETDEV_LEGACY_INIT + select CS89x0 help Support for CS89x0 chipset based Ethernet cards. If you have a network (Ethernet) card of this type, say Y and read the file @@ -30,15 +37,15 @@ config CS89x0 will be called cs89x0. config CS89x0_PLATFORM - bool "CS89x0 platform driver support" if HAS_IOPORT_MAP - default !HAS_IOPORT_MAP - depends on CS89x0 + tristate "CS89x0 platform driver support" + depends on ARM || COMPILE_TEST + select CS89x0 help - Say Y to compile the cs89x0 driver as a platform driver. This - makes this driver suitable for use on certain evaluation boards - such as the iMX21ADS. + Say Y to compile the cs89x0 platform driver. This makes this driver + suitable for use on certain evaluation boards such as the iMX21ADS. - If you are unsure, say N. + To compile this driver as a module, choose M here. The module + will be called cs89x0. config EP93XX_ETH tristate "EP93xx Ethernet support" diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c index 33ace3307059..d0c4c8b7a15a 100644 --- a/drivers/net/ethernet/cirrus/cs89x0.c +++ b/drivers/net/ethernet/cirrus/cs89x0.c @@ -104,7 +104,7 @@ static char version[] __initdata = * them to system IRQ numbers. This mapping is card specific and is set to * the configuration of the Cirrus Eval board for this chip. */ -#ifndef CONFIG_CS89x0_PLATFORM +#if IS_ENABLED(CONFIG_CS89x0_ISA) static unsigned int netcard_portlist[] __used __initdata = { 0x300, 0x320, 0x340, 0x360, 0x200, 0x220, 0x240, 0x260, 0x280, 0x2a0, 0x2c0, 0x2e0, 0 @@ -292,7 +292,7 @@ write_irq(struct net_device *dev, int chip_type, int irq) int i; if (chip_type == CS8900) { -#ifndef CONFIG_CS89x0_PLATFORM +#if IS_ENABLED(CONFIG_CS89x0_ISA) /* Search the mapping table for the corresponding IRQ pin. */ for (i = 0; i != ARRAY_SIZE(cs8900_irq_map); i++) if (cs8900_irq_map[i] == irq) @@ -859,7 +859,7 @@ net_open(struct net_device *dev) goto bad_out; } } else { -#if !defined(CONFIG_CS89x0_PLATFORM) +#if IS_ENABLED(CONFIG_CS89x0_ISA) if (((1 << dev->irq) & lp->irq_map) == 0) { pr_err("%s: IRQ %d is not in our map of allowable IRQs, which is %x\n", dev->name, dev->irq, lp->irq_map); @@ -1523,7 +1523,7 @@ cs89x0_probe1(struct net_device *dev, void __iomem *ioaddr, int modular) dev->irq = i; } else { i = lp->isa_config & INT_NO_MASK; -#ifndef CONFIG_CS89x0_PLATFORM +#if IS_ENABLED(CONFIG_CS89x0_ISA) if (lp->chip_type == CS8900) { /* Translate the IRQ using the IRQ mapping table. */ if (i >= ARRAY_SIZE(cs8900_irq_map)) @@ -1576,7 +1576,7 @@ out1: return retval; } -#ifndef CONFIG_CS89x0_PLATFORM +#if IS_ENABLED(CONFIG_CS89x0_ISA) /* * This function converts the I/O port address used by the cs89x0_probe() and * init_module() functions to the I/O memory address used by the @@ -1682,11 +1682,7 @@ out: pr_warn("no cs8900 or cs8920 detected. Be sure to disable PnP with SETUP\n"); return ERR_PTR(err); } -#endif -#endif - -#if defined(MODULE) && !defined(CONFIG_CS89x0_PLATFORM) - +#else static struct net_device *dev_cs89x0; /* Support the 'debug' module parm even if we're compiled for non-debug to @@ -1757,9 +1753,9 @@ MODULE_LICENSE("GPL"); * (hw or software util) */ -int __init init_module(void) +static int __init cs89x0_isa_init_module(void) { - struct net_device *dev = alloc_etherdev(sizeof(struct net_local)); + struct net_device *dev; struct net_local *lp; int ret = 0; @@ -1768,6 +1764,7 @@ int __init init_module(void) #else debug = 0; #endif + dev = alloc_etherdev(sizeof(struct net_local)); if (!dev) return -ENOMEM; @@ -1826,9 +1823,9 @@ out: free_netdev(dev); return ret; } +module_init(cs89x0_isa_init_module); -void __exit -cleanup_module(void) +static void __exit cs89x0_isa_cleanup_module(void) { struct net_local *lp = netdev_priv(dev_cs89x0); @@ -1838,9 +1835,11 @@ cleanup_module(void) release_region(dev_cs89x0->base_addr, NETCARD_IO_EXTENT); free_netdev(dev_cs89x0); } -#endif /* MODULE && !CONFIG_CS89x0_PLATFORM */ +module_exit(cs89x0_isa_cleanup_module); +#endif /* MODULE */ +#endif /* CONFIG_CS89x0_ISA */ -#ifdef CONFIG_CS89x0_PLATFORM +#if IS_ENABLED(CONFIG_CS89x0_PLATFORM) static int __init cs89x0_platform_probe(struct platform_device *pdev) { struct net_device *dev = alloc_etherdev(sizeof(struct net_local)); diff --git a/drivers/net/ethernet/dec/tulip/media.c b/drivers/net/ethernet/dec/tulip/media.c index 011604787b8e..55d6fc99f40b 100644 --- a/drivers/net/ethernet/dec/tulip/media.c +++ b/drivers/net/ethernet/dec/tulip/media.c @@ -362,7 +362,7 @@ void tulip_select_media(struct net_device *dev, int startup) iowrite32(0x33, ioaddr + CSR12); new_csr6 = 0x01860000; /* Trigger autonegotiation. */ - iowrite32(startup ? 0x0201F868 : 0x0001F868, ioaddr + 0xB8); + iowrite32(0x0001F868, ioaddr + 0xB8); } else { iowrite32(0x32, ioaddr + CSR12); new_csr6 = 0x00420000; diff --git a/drivers/net/ethernet/freescale/dpaa2/Makefile b/drivers/net/ethernet/freescale/dpaa2/Makefile index c2ef74052ef8..3d9842af7f10 100644 --- a/drivers/net/ethernet/freescale/dpaa2/Makefile +++ b/drivers/net/ethernet/freescale/dpaa2/Makefile @@ -11,7 +11,7 @@ fsl-dpaa2-eth-objs := dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o dpa fsl-dpaa2-eth-${CONFIG_FSL_DPAA2_ETH_DCB} += dpaa2-eth-dcb.o fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o fsl-dpaa2-ptp-objs := dpaa2-ptp.o dprtc.o -fsl-dpaa2-switch-objs := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o +fsl-dpaa2-switch-objs := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o dpaa2-mac.o dpmac.o # Needed by the tracing framework CFLAGS_dpaa2-eth.o := -I$(src) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c index 8e09f65ea295..605a39f892b9 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c @@ -196,7 +196,8 @@ int dpaa2_eth_dl_register(struct dpaa2_eth_priv *priv) struct dpaa2_eth_devlink_priv *dl_priv; int err; - priv->devlink = devlink_alloc(&dpaa2_eth_devlink_ops, sizeof(*dl_priv)); + priv->devlink = + devlink_alloc(&dpaa2_eth_devlink_ops, sizeof(*dl_priv), dev); if (!priv->devlink) { dev_err(dev, "devlink_alloc failed\n"); return -ENOMEM; @@ -204,7 +205,7 @@ int dpaa2_eth_dl_register(struct dpaa2_eth_priv *priv) dl_priv = devlink_priv(priv->devlink); dl_priv->dpaa2_priv = priv; - err = devlink_register(priv->devlink, dev); + err = devlink_register(priv->devlink); if (err) { dev_err(dev, "devlink_register() = %d\n", err); goto devlink_free; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index f664021c3ad1..7065c71ed7b8 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -4138,7 +4138,7 @@ static int dpaa2_eth_connect_mac(struct dpaa2_eth_priv *priv) int err; dpni_dev = to_fsl_mc_device(priv->net_dev->dev.parent); - dpmac_dev = fsl_mc_get_endpoint(dpni_dev); + dpmac_dev = fsl_mc_get_endpoint(dpni_dev, 0); if (PTR_ERR(dpmac_dev) == -EPROBE_DEFER) return PTR_ERR(dpmac_dev); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c index 70e04321c420..720c9230cab5 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c @@ -15,18 +15,18 @@ static struct { enum dpsw_counter id; char name[ETH_GSTRING_LEN]; } dpaa2_switch_ethtool_counters[] = { - {DPSW_CNT_ING_FRAME, "rx frames"}, - {DPSW_CNT_ING_BYTE, "rx bytes"}, - {DPSW_CNT_ING_FLTR_FRAME, "rx filtered frames"}, - {DPSW_CNT_ING_FRAME_DISCARD, "rx discarded frames"}, - {DPSW_CNT_ING_BCAST_FRAME, "rx b-cast frames"}, - {DPSW_CNT_ING_BCAST_BYTES, "rx b-cast bytes"}, - {DPSW_CNT_ING_MCAST_FRAME, "rx m-cast frames"}, - {DPSW_CNT_ING_MCAST_BYTE, "rx m-cast bytes"}, - {DPSW_CNT_EGR_FRAME, "tx frames"}, - {DPSW_CNT_EGR_BYTE, "tx bytes"}, - {DPSW_CNT_EGR_FRAME_DISCARD, "tx discarded frames"}, - {DPSW_CNT_ING_NO_BUFF_DISCARD, "rx discarded no buffer frames"}, + {DPSW_CNT_ING_FRAME, "[hw] rx frames"}, + {DPSW_CNT_ING_BYTE, "[hw] rx bytes"}, + {DPSW_CNT_ING_FLTR_FRAME, "[hw] rx filtered frames"}, + {DPSW_CNT_ING_FRAME_DISCARD, "[hw] rx discarded frames"}, + {DPSW_CNT_ING_BCAST_FRAME, "[hw] rx bcast frames"}, + {DPSW_CNT_ING_BCAST_BYTES, "[hw] rx bcast bytes"}, + {DPSW_CNT_ING_MCAST_FRAME, "[hw] rx mcast frames"}, + {DPSW_CNT_ING_MCAST_BYTE, "[hw] rx mcast bytes"}, + {DPSW_CNT_EGR_FRAME, "[hw] tx frames"}, + {DPSW_CNT_EGR_BYTE, "[hw] tx bytes"}, + {DPSW_CNT_EGR_FRAME_DISCARD, "[hw] tx discarded frames"}, + {DPSW_CNT_ING_NO_BUFF_DISCARD, "[hw] rx nobuffer discards"}, }; #define DPAA2_SWITCH_NUM_COUNTERS ARRAY_SIZE(dpaa2_switch_ethtool_counters) @@ -62,6 +62,10 @@ dpaa2_switch_get_link_ksettings(struct net_device *netdev, struct dpsw_link_state state = {0}; int err = 0; + if (dpaa2_switch_port_is_type_phy(port_priv)) + return phylink_ethtool_ksettings_get(port_priv->mac->phylink, + link_ksettings); + err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0, port_priv->ethsw_data->dpsw_handle, port_priv->idx, @@ -95,6 +99,10 @@ dpaa2_switch_set_link_ksettings(struct net_device *netdev, bool if_running; int err = 0, ret; + if (dpaa2_switch_port_is_type_phy(port_priv)) + return phylink_ethtool_ksettings_set(port_priv->mac->phylink, + link_ksettings); + /* Interface needs to be down to change link settings */ if_running = netif_running(netdev); if (if_running) { @@ -134,11 +142,17 @@ dpaa2_switch_set_link_ksettings(struct net_device *netdev, return err; } -static int dpaa2_switch_ethtool_get_sset_count(struct net_device *dev, int sset) +static int +dpaa2_switch_ethtool_get_sset_count(struct net_device *netdev, int sset) { + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + int num_ss_stats = DPAA2_SWITCH_NUM_COUNTERS; + switch (sset) { case ETH_SS_STATS: - return DPAA2_SWITCH_NUM_COUNTERS; + if (port_priv->mac) + num_ss_stats += dpaa2_mac_get_sset_count(); + return num_ss_stats; default: return -EOPNOTSUPP; } @@ -147,14 +161,19 @@ static int dpaa2_switch_ethtool_get_sset_count(struct net_device *dev, int sset) static void dpaa2_switch_ethtool_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + u8 *p = data; int i; switch (stringset) { case ETH_SS_STATS: - for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++) - memcpy(data + i * ETH_GSTRING_LEN, - dpaa2_switch_ethtool_counters[i].name, + for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++) { + memcpy(p, dpaa2_switch_ethtool_counters[i].name, ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + if (port_priv->mac) + dpaa2_mac_get_strings(p); break; } } @@ -176,6 +195,9 @@ static void dpaa2_switch_ethtool_get_stats(struct net_device *netdev, netdev_err(netdev, "dpsw_if_get_counter[%s] err %d\n", dpaa2_switch_ethtool_counters[i].name, err); } + + if (port_priv->mac) + dpaa2_mac_get_ethtool_stats(port_priv->mac, data + i); } const struct ethtool_ops dpaa2_switch_port_ethtool_ops = { diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index 71129724d9ca..d260993ab2dc 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -594,12 +594,18 @@ static int dpaa2_switch_port_change_mtu(struct net_device *netdev, int mtu) return 0; } -static int dpaa2_switch_port_carrier_state_sync(struct net_device *netdev) +static int dpaa2_switch_port_link_state_update(struct net_device *netdev) { struct ethsw_port_priv *port_priv = netdev_priv(netdev); struct dpsw_link_state state; int err; + /* When we manage the MAC/PHY using phylink there is no need + * to manually update the netif_carrier. + */ + if (dpaa2_switch_port_is_type_phy(port_priv)) + return 0; + /* Interrupts are received even though no one issued an 'ifconfig up' * on the switch interface. Ignore these link state update interrupts */ @@ -677,12 +683,14 @@ static int dpaa2_switch_port_open(struct net_device *netdev) struct ethsw_core *ethsw = port_priv->ethsw_data; int err; - /* Explicitly set carrier off, otherwise - * netif_carrier_ok() will return true and cause 'ip link show' - * to report the LOWER_UP flag, even though the link - * notification wasn't even received. - */ - netif_carrier_off(netdev); + if (!dpaa2_switch_port_is_type_phy(port_priv)) { + /* Explicitly set carrier off, otherwise + * netif_carrier_ok() will return true and cause 'ip link show' + * to report the LOWER_UP flag, even though the link + * notification wasn't even received. + */ + netif_carrier_off(netdev); + } err = dpsw_if_enable(port_priv->ethsw_data->mc_io, 0, port_priv->ethsw_data->dpsw_handle, @@ -692,23 +700,12 @@ static int dpaa2_switch_port_open(struct net_device *netdev) return err; } - /* sync carrier state */ - err = dpaa2_switch_port_carrier_state_sync(netdev); - if (err) { - netdev_err(netdev, - "dpaa2_switch_port_carrier_state_sync err %d\n", err); - goto err_carrier_sync; - } - dpaa2_switch_enable_ctrl_if_napi(ethsw); - return 0; + if (dpaa2_switch_port_is_type_phy(port_priv)) + phylink_start(port_priv->mac->phylink); -err_carrier_sync: - dpsw_if_disable(port_priv->ethsw_data->mc_io, 0, - port_priv->ethsw_data->dpsw_handle, - port_priv->idx); - return err; + return 0; } static int dpaa2_switch_port_stop(struct net_device *netdev) @@ -717,6 +714,13 @@ static int dpaa2_switch_port_stop(struct net_device *netdev) struct ethsw_core *ethsw = port_priv->ethsw_data; int err; + if (dpaa2_switch_port_is_type_phy(port_priv)) { + phylink_stop(port_priv->mac->phylink); + } else { + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); + } + err = dpsw_if_disable(port_priv->ethsw_data->mc_io, 0, port_priv->ethsw_data->dpsw_handle, port_priv->idx); @@ -1419,41 +1423,103 @@ bool dpaa2_switch_port_dev_check(const struct net_device *netdev) return netdev->netdev_ops == &dpaa2_switch_port_ops; } -static void dpaa2_switch_links_state_update(struct ethsw_core *ethsw) +static int dpaa2_switch_port_connect_mac(struct ethsw_port_priv *port_priv) { - int i; + struct fsl_mc_device *dpsw_port_dev, *dpmac_dev; + struct dpaa2_mac *mac; + int err; - for (i = 0; i < ethsw->sw_attr.num_ifs; i++) { - dpaa2_switch_port_carrier_state_sync(ethsw->ports[i]->netdev); - dpaa2_switch_port_set_mac_addr(ethsw->ports[i]); + dpsw_port_dev = to_fsl_mc_device(port_priv->netdev->dev.parent); + dpmac_dev = fsl_mc_get_endpoint(dpsw_port_dev, port_priv->idx); + + if (PTR_ERR(dpmac_dev) == -EPROBE_DEFER) + return PTR_ERR(dpmac_dev); + + if (IS_ERR(dpmac_dev) || dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type) + return 0; + + mac = kzalloc(sizeof(*mac), GFP_KERNEL); + if (!mac) + return -ENOMEM; + + mac->mc_dev = dpmac_dev; + mac->mc_io = port_priv->ethsw_data->mc_io; + mac->net_dev = port_priv->netdev; + + err = dpaa2_mac_open(mac); + if (err) + goto err_free_mac; + port_priv->mac = mac; + + if (dpaa2_switch_port_is_type_phy(port_priv)) { + err = dpaa2_mac_connect(mac); + if (err) { + netdev_err(port_priv->netdev, + "Error connecting to the MAC endpoint %pe\n", + ERR_PTR(err)); + goto err_close_mac; + } } + + return 0; + +err_close_mac: + dpaa2_mac_close(mac); + port_priv->mac = NULL; +err_free_mac: + kfree(mac); + return err; +} + +static void dpaa2_switch_port_disconnect_mac(struct ethsw_port_priv *port_priv) +{ + if (dpaa2_switch_port_is_type_phy(port_priv)) + dpaa2_mac_disconnect(port_priv->mac); + + if (!dpaa2_switch_port_has_mac(port_priv)) + return; + + dpaa2_mac_close(port_priv->mac); + kfree(port_priv->mac); + port_priv->mac = NULL; } static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg) { struct device *dev = (struct device *)arg; struct ethsw_core *ethsw = dev_get_drvdata(dev); - - /* Mask the events and the if_id reserved bits to be cleared on read */ - u32 status = DPSW_IRQ_EVENT_LINK_CHANGED | 0xFFFF0000; - int err; + struct ethsw_port_priv *port_priv; + u32 status = ~0; + int err, if_id; err = dpsw_get_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle, DPSW_IRQ_INDEX_IF, &status); if (err) { dev_err(dev, "Can't get irq status (err %d)\n", err); - - err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle, - DPSW_IRQ_INDEX_IF, 0xFFFFFFFF); - if (err) - dev_err(dev, "Can't clear irq status (err %d)\n", err); goto out; } - if (status & DPSW_IRQ_EVENT_LINK_CHANGED) - dpaa2_switch_links_state_update(ethsw); + if_id = (status & 0xFFFF0000) >> 16; + port_priv = ethsw->ports[if_id]; + + if (status & DPSW_IRQ_EVENT_LINK_CHANGED) { + dpaa2_switch_port_link_state_update(port_priv->netdev); + dpaa2_switch_port_set_mac_addr(port_priv); + } + + if (status & DPSW_IRQ_EVENT_ENDPOINT_CHANGED) { + if (dpaa2_switch_port_has_mac(port_priv)) + dpaa2_switch_port_disconnect_mac(port_priv); + else + dpaa2_switch_port_connect_mac(port_priv); + } out: + err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle, + DPSW_IRQ_INDEX_IF, status); + if (err) + dev_err(dev, "Can't clear irq status (err %d)\n", err); + return IRQ_HANDLED; } @@ -3133,6 +3199,7 @@ static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev) for (i = 0; i < ethsw->sw_attr.num_ifs; i++) { port_priv = ethsw->ports[i]; unregister_netdev(port_priv->netdev); + dpaa2_switch_port_disconnect_mac(port_priv); free_netdev(port_priv->netdev); } @@ -3212,6 +3279,10 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw, goto err_port_probe; port_priv->learn_ena = false; + err = dpaa2_switch_port_connect_mac(port_priv); + if (err) + goto err_port_probe; + return 0; err_port_probe: @@ -3288,12 +3359,6 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev) ðsw->fq[i].napi, dpaa2_switch_poll, NAPI_POLL_WEIGHT); - err = dpsw_enable(ethsw->mc_io, 0, ethsw->dpsw_handle); - if (err) { - dev_err(ethsw->dev, "dpsw_enable err %d\n", err); - goto err_free_netdev; - } - /* Setup IRQs */ err = dpaa2_switch_setup_irqs(sw_dev); if (err) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h index f69d940f3c5b..0002dca4d417 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h @@ -21,6 +21,7 @@ #include <net/pkt_cls.h> #include <soc/fsl/dpaa2-io.h> +#include "dpaa2-mac.h" #include "dpsw.h" /* Number of IRQs supported */ @@ -159,6 +160,7 @@ struct ethsw_port_priv { bool learn_ena; struct dpaa2_switch_filter_block *filter_block; + struct dpaa2_mac *mac; }; /* Switch data */ @@ -225,6 +227,22 @@ static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw) return true; } +static inline bool +dpaa2_switch_port_is_type_phy(struct ethsw_port_priv *port_priv) +{ + if (port_priv->mac && + (port_priv->mac->attr.link_type == DPMAC_LINK_TYPE_PHY || + port_priv->mac->attr.link_type == DPMAC_LINK_TYPE_BACKPLANE)) + return true; + + return false; +} + +static inline bool dpaa2_switch_port_has_mac(struct ethsw_port_priv *port_priv) +{ + return port_priv->mac ? true : false; +} + bool dpaa2_switch_port_dev_check(const struct net_device *netdev); int dpaa2_switch_port_vlans_add(struct net_device *netdev, diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.h b/drivers/net/ethernet/freescale/dpaa2/dpsw.h index 892df905b876..b90bd363f47a 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpsw.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.h @@ -99,6 +99,11 @@ int dpsw_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token); #define DPSW_IRQ_EVENT_LINK_CHANGED 0x0001 /** + * DPSW_IRQ_EVENT_ENDPOINT_CHANGED - Indicates a change in endpoint + */ +#define DPSW_IRQ_EVENT_ENDPOINT_CHANGED 0x0002 + +/** * struct dpsw_irq_cfg - IRQ configuration * @addr: Address that must be written to signal a message-based interrupt * @val: Value to write into irq_addr address diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index ae3259164395..d2e9a6c02d1e 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -189,6 +189,8 @@ #define FEC_RXIC0 0xfff #define FEC_RXIC1 0xfff #define FEC_RXIC2 0xfff +#define FEC_LPI_SLEEP 0xfff +#define FEC_LPI_WAKE 0xfff #endif /* CONFIG_M5272 */ diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 40ea318d7396..fdff37b87de7 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2042,6 +2042,34 @@ failed_clk_ptp: return ret; } +static int fec_enet_parse_rgmii_delay(struct fec_enet_private *fep, + struct device_node *np) +{ + u32 rgmii_tx_delay, rgmii_rx_delay; + + /* For rgmii tx internal delay, valid values are 0ps and 2000ps */ + if (!of_property_read_u32(np, "tx-internal-delay-ps", &rgmii_tx_delay)) { + if (rgmii_tx_delay != 0 && rgmii_tx_delay != 2000) { + dev_err(&fep->pdev->dev, "The only allowed RGMII TX delay values are: 0ps, 2000ps"); + return -EINVAL; + } else if (rgmii_tx_delay == 2000) { + fep->rgmii_txc_dly = true; + } + } + + /* For rgmii rx internal delay, valid values are 0ps and 2000ps */ + if (!of_property_read_u32(np, "rx-internal-delay-ps", &rgmii_rx_delay)) { + if (rgmii_rx_delay != 0 && rgmii_rx_delay != 2000) { + dev_err(&fep->pdev->dev, "The only allowed RGMII RX delay values are: 0ps, 2000ps"); + return -EINVAL; + } else if (rgmii_rx_delay == 2000) { + fep->rgmii_rxc_dly = true; + } + } + + return 0; +} + static int fec_enet_mii_probe(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); @@ -3719,7 +3747,6 @@ fec_probe(struct platform_device *pdev) char irq_name[8]; int irq_cnt; struct fec_devinfo *dev_info; - u32 rgmii_delay; fec_enet_get_queue_num(pdev, &num_tx_qs, &num_rx_qs); @@ -3777,12 +3804,6 @@ fec_probe(struct platform_device *pdev) if (ret) goto failed_stop_mode; - /* For rgmii internal delay, valid values are 0ps and 2000ps */ - if (of_property_read_u32(np, "tx-internal-delay-ps", &rgmii_delay)) - fep->rgmii_txc_dly = true; - if (of_property_read_u32(np, "rx-internal-delay-ps", &rgmii_delay)) - fep->rgmii_rxc_dly = true; - phy_node = of_parse_phandle(np, "phy-handle", 0); if (!phy_node && of_phy_is_fixed_link(np)) { ret = of_phy_register_fixed_link(np); @@ -3806,6 +3827,10 @@ fec_probe(struct platform_device *pdev) fep->phy_interface = interface; } + ret = fec_enet_parse_rgmii_delay(fep, np); + if (ret) + goto failed_rgmii_delay; + fep->clk_ipg = devm_clk_get(&pdev->dev, "ipg"); if (IS_ERR(fep->clk_ipg)) { ret = PTR_ERR(fep->clk_ipg); @@ -3835,9 +3860,11 @@ fec_probe(struct platform_device *pdev) fep->clk_ref_rate = clk_get_rate(fep->clk_ref); /* clk_2x_txclk is optional, depends on board */ - fep->clk_2x_txclk = devm_clk_get(&pdev->dev, "enet_2x_txclk"); - if (IS_ERR(fep->clk_2x_txclk)) - fep->clk_2x_txclk = NULL; + if (fep->rgmii_txc_dly || fep->rgmii_rxc_dly) { + fep->clk_2x_txclk = devm_clk_get(&pdev->dev, "enet_2x_txclk"); + if (IS_ERR(fep->clk_2x_txclk)) + fep->clk_2x_txclk = NULL; + } fep->bufdesc_ex = fep->quirks & FEC_QUIRK_HAS_BUFDESC_EX; fep->clk_ptp = devm_clk_get(&pdev->dev, "ptp"); @@ -3955,6 +3982,7 @@ failed_clk_ahb: failed_clk_ipg: fec_enet_clk_enable(ndev, false); failed_clk: +failed_rgmii_delay: if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(phy_node); @@ -3989,13 +4017,13 @@ fec_drv_remove(struct platform_device *pdev) if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(fep->phy_node); - free_netdev(ndev); clk_disable_unprepare(fep->clk_ahb); clk_disable_unprepare(fep->clk_ipg); pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); + free_netdev(ndev); return 0; } diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig index 094e4a37a295..2ba0e7bd3466 100644 --- a/drivers/net/ethernet/hisilicon/Kconfig +++ b/drivers/net/ethernet/hisilicon/Kconfig @@ -91,6 +91,7 @@ config HNS3 tristate "Hisilicon Network Subsystem Support HNS3 (Framework)" depends on PCI select NET_DEVLINK + select PAGE_POOL help This selects the framework support for Hisilicon Network Subsystem 3. This layer facilitates clients like ENET, RoCE and user-space ethernet diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index cb8d5da3654f..fcbeb1fbe5b8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3205,6 +3205,21 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring, unsigned int order = hns3_page_order(ring); struct page *p; + if (ring->page_pool) { + p = page_pool_dev_alloc_frag(ring->page_pool, + &cb->page_offset, + hns3_buf_size(ring)); + if (unlikely(!p)) + return -ENOMEM; + + cb->priv = p; + cb->buf = page_address(p); + cb->dma = page_pool_get_dma_addr(p); + cb->type = DESC_TYPE_PP_FRAG; + cb->reuse_flag = 0; + return 0; + } + p = dev_alloc_pages(order); if (!p) return -ENOMEM; @@ -3227,8 +3242,13 @@ static void hns3_free_buffer(struct hns3_enet_ring *ring, if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_HEAD | DESC_TYPE_BOUNCE_ALL | DESC_TYPE_SGL_SKB)) napi_consume_skb(cb->priv, budget); - else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias) - __page_frag_cache_drain(cb->priv, cb->pagecnt_bias); + else if (!HNAE3_IS_TX_RING(ring)) { + if (cb->type & DESC_TYPE_PAGE && cb->pagecnt_bias) + __page_frag_cache_drain(cb->priv, cb->pagecnt_bias); + else if (cb->type & DESC_TYPE_PP_FRAG) + page_pool_put_full_page(ring->page_pool, cb->priv, + false); + } memset(cb, 0, sizeof(*cb)); } @@ -3315,7 +3335,7 @@ static int hns3_alloc_and_map_buffer(struct hns3_enet_ring *ring, int ret; ret = hns3_alloc_buffer(ring, cb); - if (ret) + if (ret || ring->page_pool) goto out; ret = hns3_map_buffer(ring, cb); @@ -3337,7 +3357,8 @@ static int hns3_alloc_and_attach_buffer(struct hns3_enet_ring *ring, int i) if (ret) return ret; - ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma); + ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma + + ring->desc_cb[i].page_offset); return 0; } @@ -3367,7 +3388,8 @@ static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i, { hns3_unmap_buffer(ring, &ring->desc_cb[i]); ring->desc_cb[i] = *res_cb; - ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma); + ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma + + ring->desc_cb[i].page_offset); ring->desc[i].rx.bd_base_info = 0; } @@ -3539,6 +3561,12 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i, u32 frag_size = size - pull_len; bool reused; + if (ring->page_pool) { + skb_add_rx_frag(skb, i, desc_cb->priv, frag_offset, + frag_size, truesize); + return; + } + /* Avoid re-using remote or pfmem page */ if (unlikely(!dev_page_is_reusable(desc_cb->priv))) goto out; @@ -3856,6 +3884,9 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length, /* We can reuse buffer as-is, just make sure it is reusable */ if (dev_page_is_reusable(desc_cb->priv)) desc_cb->reuse_flag = 1; + else if (desc_cb->type & DESC_TYPE_PP_FRAG) + page_pool_put_full_page(ring->page_pool, desc_cb->priv, + false); else /* This page cannot be reused so discard it */ __page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias); @@ -3863,6 +3894,10 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length, hns3_rx_ring_move_fw(ring); return 0; } + + if (ring->page_pool) + skb_mark_for_recycle(skb); + u64_stats_update_begin(&ring->syncp); ring->stats.seg_pkt_cnt++; u64_stats_update_end(&ring->syncp); @@ -3901,6 +3936,10 @@ static int hns3_add_frag(struct hns3_enet_ring *ring) "alloc rx fraglist skb fail\n"); return -ENXIO; } + + if (ring->page_pool) + skb_mark_for_recycle(new_skb); + ring->frag_num = 0; if (ring->tail_skb) { @@ -4705,6 +4744,29 @@ static void hns3_put_ring_config(struct hns3_nic_priv *priv) priv->ring = NULL; } +static void hns3_alloc_page_pool(struct hns3_enet_ring *ring) +{ + struct page_pool_params pp_params = { + .flags = PP_FLAG_DMA_MAP | PP_FLAG_PAGE_FRAG | + PP_FLAG_DMA_SYNC_DEV, + .order = hns3_page_order(ring), + .pool_size = ring->desc_num * hns3_buf_size(ring) / + (PAGE_SIZE << hns3_page_order(ring)), + .nid = dev_to_node(ring_to_dev(ring)), + .dev = ring_to_dev(ring), + .dma_dir = DMA_FROM_DEVICE, + .offset = 0, + .max_len = PAGE_SIZE << hns3_page_order(ring), + }; + + ring->page_pool = page_pool_create(&pp_params); + if (IS_ERR(ring->page_pool)) { + dev_warn(ring_to_dev(ring), "page pool creation failed: %ld\n", + PTR_ERR(ring->page_pool)); + ring->page_pool = NULL; + } +} + static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring) { int ret; @@ -4724,6 +4786,8 @@ static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring) goto out_with_desc_cb; if (!HNAE3_IS_TX_RING(ring)) { + hns3_alloc_page_pool(ring); + ret = hns3_alloc_ring_buffers(ring); if (ret) goto out_with_desc; @@ -4764,6 +4828,11 @@ void hns3_fini_ring(struct hns3_enet_ring *ring) devm_kfree(ring_to_dev(ring), tx_spare); ring->tx_spare = NULL; } + + if (!HNAE3_IS_TX_RING(ring) && ring->page_pool) { + page_pool_destroy(ring->page_pool); + ring->page_pool = NULL; + } } static int hns3_buf_size2type(u32 buf_size) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 15af3d93857b..27809d68d6ed 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -6,6 +6,7 @@ #include <linux/dim.h> #include <linux/if_vlan.h> +#include <net/page_pool.h> #include "hnae3.h" @@ -307,6 +308,7 @@ enum hns3_desc_type { DESC_TYPE_BOUNCE_ALL = 1 << 3, DESC_TYPE_BOUNCE_HEAD = 1 << 4, DESC_TYPE_SGL_SKB = 1 << 5, + DESC_TYPE_PP_FRAG = 1 << 6, }; struct hns3_desc_cb { @@ -451,6 +453,7 @@ struct hns3_enet_ring { struct hnae3_queue *tqp; int queue_index; struct device *dev; /* will be used for DMA mapping of descriptors */ + struct page_pool *page_pool; /* statistic */ struct ring_stats stats; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c index 06d29945d4e1..448f29aa4e6b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c @@ -112,14 +112,14 @@ int hclge_devlink_init(struct hclge_dev *hdev) int ret; devlink = devlink_alloc(&hclge_devlink_ops, - sizeof(struct hclge_devlink_priv)); + sizeof(struct hclge_devlink_priv), &pdev->dev); if (!devlink) return -ENOMEM; priv = devlink_priv(devlink); priv->hdev = hdev; - ret = devlink_register(devlink, &pdev->dev); + ret = devlink_register(devlink); if (ret) { dev_err(&pdev->dev, "failed to register devlink, ret = %d\n", ret); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c index 21a45279fd99..1e6061fb8ed4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c @@ -112,15 +112,16 @@ int hclgevf_devlink_init(struct hclgevf_dev *hdev) struct devlink *devlink; int ret; - devlink = devlink_alloc(&hclgevf_devlink_ops, - sizeof(struct hclgevf_devlink_priv)); + devlink = + devlink_alloc(&hclgevf_devlink_ops, + sizeof(struct hclgevf_devlink_priv), &pdev->dev); if (!devlink) return -ENOMEM; priv = devlink_priv(devlink); priv->hdev = hdev; - ret = devlink_register(devlink, &pdev->dev); + ret = devlink_register(devlink); if (ret) { dev_err(&pdev->dev, "failed to register devlink, ret = %d\n", ret); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c index 58d5646444b0..6e11ee339f12 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c @@ -293,9 +293,9 @@ static const struct devlink_ops hinic_devlink_ops = { .flash_update = hinic_devlink_flash_update, }; -struct devlink *hinic_devlink_alloc(void) +struct devlink *hinic_devlink_alloc(struct device *dev) { - return devlink_alloc(&hinic_devlink_ops, sizeof(struct hinic_dev)); + return devlink_alloc(&hinic_devlink_ops, sizeof(struct hinic_dev), dev); } void hinic_devlink_free(struct devlink *devlink) @@ -303,11 +303,11 @@ void hinic_devlink_free(struct devlink *devlink) devlink_free(devlink); } -int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev) +int hinic_devlink_register(struct hinic_devlink_priv *priv) { struct devlink *devlink = priv_to_devlink(priv); - return devlink_register(devlink, dev); + return devlink_register(devlink); } void hinic_devlink_unregister(struct hinic_devlink_priv *priv) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.h b/drivers/net/ethernet/huawei/hinic/hinic_devlink.h index a090ebcfaabb..9e315011015c 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.h @@ -108,9 +108,9 @@ struct host_image_st { u32 device_id; }; -struct devlink *hinic_devlink_alloc(void); +struct devlink *hinic_devlink_alloc(struct device *dev); void hinic_devlink_free(struct devlink *devlink); -int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev); +int hinic_devlink_register(struct hinic_devlink_priv *priv); void hinic_devlink_unregister(struct hinic_devlink_priv *priv); int hinic_health_reporters_create(struct hinic_devlink_priv *priv); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c index 428108eb10d2..56b6b04e209b 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c @@ -754,7 +754,7 @@ static int init_pfhwdev(struct hinic_pfhwdev *pfhwdev) return err; } - err = hinic_devlink_register(hwdev->devlink_dev, &pdev->dev); + err = hinic_devlink_register(hwdev->devlink_dev); if (err) { dev_err(&hwif->pdev->dev, "Failed to register devlink\n"); hinic_pf_to_mgmt_free(&pfhwdev->pf_to_mgmt); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 405ee4d2d2b1..881d0b247561 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -1183,7 +1183,7 @@ static int nic_dev_init(struct pci_dev *pdev) struct devlink *devlink; int err, num_qps; - devlink = hinic_devlink_alloc(); + devlink = hinic_devlink_alloc(&pdev->dev); if (!devlink) { dev_err(&pdev->dev, "Hinic devlink alloc failed\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c index fc8c7cd67471..b8a40146b895 100644 --- a/drivers/net/ethernet/i825xx/82596.c +++ b/drivers/net/ethernet/i825xx/82596.c @@ -1110,9 +1110,6 @@ static void print_eth(unsigned char *add, char *str) add, add + 6, add, add[12], add[13], str); } -static int io = 0x300; -static int irq = 10; - static const struct net_device_ops i596_netdev_ops = { .ndo_open = i596_open, .ndo_stop = i596_close, @@ -1123,7 +1120,7 @@ static const struct net_device_ops i596_netdev_ops = { .ndo_validate_addr = eth_validate_addr, }; -struct net_device * __init i82596_probe(int unit) +static struct net_device * __init i82596_probe(void) { struct net_device *dev; int i; @@ -1140,14 +1137,6 @@ struct net_device * __init i82596_probe(int unit) if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } else { - dev->base_addr = io; - dev->irq = irq; - } - #ifdef ENABLE_MVME16x_NET if (MACH_IS_MVME16x) { if (mvme16x_config & MVME16x_CONFIG_NO_ETHERNET) { @@ -1515,22 +1504,22 @@ static void set_multicast_list(struct net_device *dev) } } -#ifdef MODULE static struct net_device *dev_82596; static int debug = -1; module_param(debug, int, 0); MODULE_PARM_DESC(debug, "i82596 debug mask"); -int __init init_module(void) +static int __init i82596_init(void) { if (debug >= 0) i596_debug = debug; - dev_82596 = i82596_probe(-1); + dev_82596 = i82596_probe(); return PTR_ERR_OR_ZERO(dev_82596); } +module_init(i82596_init); -void __exit cleanup_module(void) +static void __exit i82596_cleanup(void) { unregister_netdev(dev_82596); #ifdef __mc68000__ @@ -1544,5 +1533,4 @@ void __exit cleanup_module(void) free_page ((u32)(dev_82596->mem_start)); free_netdev(dev_82596); } - -#endif /* MODULE */ +module_exit(i82596_cleanup); diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c index 4564ee02c95f..893e0ddcb611 100644 --- a/drivers/net/ethernet/i825xx/sun3_82586.c +++ b/drivers/net/ethernet/i825xx/sun3_82586.c @@ -29,6 +29,7 @@ static int rfdadd = 0; /* rfdadd=1 may be better for 8K MEM cards */ static int fifo=0x8; /* don't change */ #include <linux/kernel.h> +#include <linux/module.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/ioport.h> @@ -276,7 +277,7 @@ static void alloc586(struct net_device *dev) memset((char *)p->scb,0,sizeof(struct scb_struct)); } -struct net_device * __init sun3_82586_probe(int unit) +static int __init sun3_82586_probe(void) { struct net_device *dev; unsigned long ioaddr; @@ -291,25 +292,20 @@ struct net_device * __init sun3_82586_probe(int unit) break; default: - return ERR_PTR(-ENODEV); + return -ENODEV; } if (found) - return ERR_PTR(-ENODEV); + return -ENODEV; ioaddr = (unsigned long)ioremap(IE_OBIO, SUN3_82586_TOTAL_SIZE); if (!ioaddr) - return ERR_PTR(-ENOMEM); + return -ENOMEM; found = 1; dev = alloc_etherdev(sizeof(struct priv)); if (!dev) goto out; - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } - dev->irq = IE_IRQ; dev->base_addr = ioaddr; err = sun3_82586_probe1(dev, ioaddr); @@ -326,8 +322,9 @@ out1: free_netdev(dev); out: iounmap((void __iomem *)ioaddr); - return ERR_PTR(err); + return err; } +module_init(sun3_82586_probe); static const struct net_device_ops sun3_82586_netdev_ops = { .ndo_open = sun3_82586_open, diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 91b545ab8b8f..8c863d64930b 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -475,7 +475,7 @@ struct ice_pf *ice_allocate_pf(struct device *dev) { struct devlink *devlink; - devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf)); + devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf), dev); if (!devlink) return NULL; @@ -502,7 +502,7 @@ int ice_devlink_register(struct ice_pf *pf) struct device *dev = ice_pf_to_dev(pf); int err; - err = devlink_register(devlink, dev); + err = devlink_register(devlink); if (err) { dev_err(dev, "devlink registration failed: %d\n", err); return err; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index ff8db311963c..5d1007e1b5c9 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2327,7 +2327,7 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool, if (!skb) return ERR_PTR(-ENOMEM); - skb_mark_for_recycle(skb, virt_to_page(xdp->data), pool); + skb_mark_for_recycle(skb); skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data); @@ -2339,10 +2339,6 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool, skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, skb_frag_page(frag), skb_frag_off(frag), skb_frag_size(frag), PAGE_SIZE); - /* We don't need to reset pp_recycle here. It's already set, so - * just mark fragments for recycling. - */ - page_pool_store_mem_info(skb_frag_page(frag), pool); } return skb; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 99bd8b8aa0e2..744f58f41ecc 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -3995,7 +3995,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, } if (pp) - skb_mark_for_recycle(skb, page, pp); + skb_mark_for_recycle(skb); else dma_unmap_single_attrs(dev->dev.parent, dma_addr, bm_pool->buf_size, DMA_FROM_DEVICE, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index f95573a66ed4..a55b46ad162d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -1397,7 +1397,7 @@ static int rvu_af_dl_dwrr_mtu_validate(struct devlink *devlink, u32 id, NL_SET_ERR_MSG_MOD(extack, "Changing DWRR MTU is not supported when there are active NIXLFs"); NL_SET_ERR_MSG_MOD(extack, - "Makesure none of the PF/VF interfaces are initialized and retry"); + "Make sure none of the PF/VF interfaces are initialized and retry"); return -EOPNOTSUPP; } @@ -1503,13 +1503,14 @@ int rvu_register_dl(struct rvu *rvu) struct devlink *dl; int err; - dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink)); + dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink), + rvu->dev); if (!dl) { dev_warn(rvu->dev, "devlink_alloc failed\n"); return -ENOMEM; } - err = devlink_register(dl, rvu->dev); + err = devlink_register(dl); if (err) { dev_err(rvu->dev, "devlink register failed with error %d\n", err); devlink_free(dl); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c index d12e21db9fd6..68b442eb6d69 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c @@ -390,11 +390,12 @@ static const struct devlink_ops prestera_dl_ops = { .trap_drop_counter_get = prestera_drop_counter_get, }; -struct prestera_switch *prestera_devlink_alloc(void) +struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev) { struct devlink *dl; - dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch)); + dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch), + dev->dev); return devlink_priv(dl); } @@ -411,7 +412,7 @@ int prestera_devlink_register(struct prestera_switch *sw) struct devlink *dl = priv_to_devlink(sw); int err; - err = devlink_register(dl, sw->dev->dev); + err = devlink_register(dl); if (err) { dev_err(prestera_dev(sw), "devlink_register failed: %d\n", err); return err; @@ -530,6 +531,8 @@ err_trap_register: prestera_trap = &prestera_trap_items_arr[i]; devlink_traps_unregister(devlink, &prestera_trap->trap, 1); } + devlink_trap_groups_unregister(devlink, prestera_trap_groups_arr, + groups_count); err_groups_register: kfree(trap_data->trap_items_arr); err_trap_items_alloc: diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.h b/drivers/net/ethernet/marvell/prestera/prestera_devlink.h index 5d73aa9db897..cc34c3db13a2 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.h +++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.h @@ -6,7 +6,7 @@ #include "prestera.h" -struct prestera_switch *prestera_devlink_alloc(void); +struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev); void prestera_devlink_free(struct prestera_switch *sw); int prestera_devlink_register(struct prestera_switch *sw); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 7c569c1abefc..44c670807fb3 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -905,7 +905,7 @@ int prestera_device_register(struct prestera_device *dev) struct prestera_switch *sw; int err; - sw = prestera_devlink_alloc(); + sw = prestera_devlink_alloc(dev); if (!sw) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 28ac4693da3c..7267c6c6d2e2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -4005,7 +4005,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) printk_once(KERN_INFO "%s", mlx4_version); - devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv)); + devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv), &pdev->dev); if (!devlink) return -ENOMEM; priv = devlink_priv(devlink); @@ -4024,7 +4024,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) mutex_init(&dev->persist->interface_state_mutex); mutex_init(&dev->persist->pci_status_mutex); - ret = devlink_register(devlink, &pdev->dev); + ret = devlink_register(devlink); if (ret) goto err_persist_free; ret = devlink_params_register(devlink, mlx4_devlink_params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 6378dc815df7..33e550d77fa6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -15,7 +15,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o port.o mr.o pd.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o fs_ft_pool.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ - lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \ + lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \ fw_reset.o qos.o @@ -28,7 +28,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ en/qos.o en/trap.o en/fs_tt_redirect.o en/rqt.o en/tir.o \ - en/rx_res.o + en/rx_res.o en/channels.o # # Netdev extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index d791d351b489..f38553ff538b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -359,9 +359,10 @@ int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id, return 0; } -struct devlink *mlx5_devlink_alloc(void) +struct devlink *mlx5_devlink_alloc(struct device *dev) { - return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev)); + return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev), + dev); } void mlx5_devlink_free(struct devlink *devlink) @@ -638,11 +639,11 @@ static void mlx5_devlink_traps_unregister(struct devlink *devlink) ARRAY_SIZE(mlx5_trap_groups_arr)); } -int mlx5_devlink_register(struct devlink *devlink, struct device *dev) +int mlx5_devlink_register(struct devlink *devlink) { int err; - err = devlink_register(devlink, dev); + err = devlink_register(devlink); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index 7318d44b774b..30bf4882779b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -31,9 +31,9 @@ int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev); int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id, enum devlink_trap_action *action); -struct devlink *mlx5_devlink_alloc(void); +struct devlink *mlx5_devlink_alloc(struct device *dev); void mlx5_devlink_free(struct devlink *devlink); -int mlx5_devlink_register(struct devlink *devlink, struct device *dev); +int mlx5_devlink_register(struct devlink *devlink); void mlx5_devlink_unregister(struct devlink *devlink); #endif /* __MLX5_DEVLINK_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 35668986878a..4f6897c1ea8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -66,8 +66,6 @@ struct page_pool; #define MLX5E_METADATA_ETHER_TYPE (0x8CE4) #define MLX5E_METADATA_ETHER_LEN 8 -#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) - #define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) #define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu)) @@ -140,6 +138,7 @@ struct page_pool; #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2 #define MLX5E_MIN_NUM_CHANNELS 0x1 +#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2) #define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_TX_XSK_POLL_BUDGET 64 @@ -921,8 +920,6 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); void mlx5e_timestamp_init(struct mlx5e_priv *priv); -int mlx5e_modify_tirs_hash(struct mlx5e_priv *priv); - struct mlx5e_xsk_param; struct mlx5e_rq_param; @@ -984,9 +981,6 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx); -void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, - int num_channels); - int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state); void mlx5e_activate_rq(struct mlx5e_rq *rq); void mlx5e_deactivate_rq(struct mlx5e_rq *rq); @@ -1036,16 +1030,6 @@ void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq); int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node); void mlx5e_free_di_list(struct mlx5e_rq *rq); -int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv); - -int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); -void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv); - -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv); -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); - int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn); void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); @@ -1133,8 +1117,6 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv); void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu); -void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, - u16 num_channels); void mlx5e_rx_dim_work(struct work_struct *work); void mlx5e_tx_dim_work(struct work_struct *work); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c new file mode 100644 index 000000000000..e7c14c0de0a7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include "channels.h" +#include "en.h" +#include "en/ptp.h" + +unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs) +{ + return chs->num; +} + +void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn) +{ + struct mlx5e_channel *c; + + WARN_ON(ix >= mlx5e_channels_get_num(chs)); + c = chs->c[ix]; + + *rqn = c->rq.rqn; +} + +bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn) +{ + struct mlx5e_channel *c; + + WARN_ON(ix >= mlx5e_channels_get_num(chs)); + c = chs->c[ix]; + + if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + return false; + + *rqn = c->xskrq.rqn; + return true; +} + +bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn) +{ + struct mlx5e_ptp *c = chs->ptp; + + if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state)) + return false; + + *rqn = c->rq.rqn; + return true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h new file mode 100644 index 000000000000..ca00cbc827cb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_CHANNELS_H__ +#define __MLX5_EN_CHANNELS_H__ + +#include <linux/kernel.h> + +struct mlx5e_channels; + +unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs); +void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn); +bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn); +bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn); + +#endif /* __MLX5_EN_CHANNELS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 0e053aab12b5..e348c276eaa1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -5,6 +5,7 @@ #define __MLX5E_FLOW_STEER_H__ #include "mod_hdr.h" +#include "lib/fs_ttc.h" enum { MLX5E_TC_FT_LEVEL = 0, @@ -67,21 +68,7 @@ struct mlx5e_l2_table { bool promisc_enabled; }; -enum mlx5e_traffic_types { - MLX5E_TT_IPV4_TCP, - MLX5E_TT_IPV6_TCP, - MLX5E_TT_IPV4_UDP, - MLX5E_TT_IPV6_UDP, - MLX5E_TT_IPV4_IPSEC_AH, - MLX5E_TT_IPV6_IPSEC_AH, - MLX5E_TT_IPV4_IPSEC_ESP, - MLX5E_TT_IPV6_IPSEC_ESP, - MLX5E_TT_IPV4, - MLX5E_TT_IPV6, - MLX5E_TT_ANY, - MLX5E_NUM_TT, - MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY, -}; +#define MLX5E_NUM_INDIR_TIRS (MLX5_NUM_TT - 1) #define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP) @@ -93,30 +80,6 @@ enum mlx5e_traffic_types { MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_IPSEC_SPI) -enum mlx5e_tunnel_types { - MLX5E_TT_IPV4_GRE, - MLX5E_TT_IPV6_GRE, - MLX5E_TT_IPV4_IPIP, - MLX5E_TT_IPV6_IPIP, - MLX5E_TT_IPV4_IPV6, - MLX5E_TT_IPV6_IPV6, - MLX5E_NUM_TUNNEL_TT, -}; - -bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev); - -struct mlx5e_ttc_rule { - struct mlx5_flow_handle *rule; - struct mlx5_flow_destination default_dest; -}; - -/* L3/L4 traffic type classifier */ -struct mlx5e_ttc_table { - struct mlx5e_flow_table ft; - struct mlx5e_ttc_rule rules[MLX5E_NUM_TT]; - struct mlx5_flow_handle *tunnel_rules[MLX5E_NUM_TUNNEL_TT]; -}; - /* NIC prio FTS */ enum { MLX5E_PROMISC_FT_LEVEL, @@ -138,22 +101,6 @@ enum { #endif }; -#define MLX5E_TTC_NUM_GROUPS 3 -#define MLX5E_TTC_GROUP1_SIZE (BIT(3) + MLX5E_NUM_TUNNEL_TT) -#define MLX5E_TTC_GROUP2_SIZE BIT(1) -#define MLX5E_TTC_GROUP3_SIZE BIT(0) -#define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\ - MLX5E_TTC_GROUP2_SIZE +\ - MLX5E_TTC_GROUP3_SIZE) - -#define MLX5E_INNER_TTC_NUM_GROUPS 3 -#define MLX5E_INNER_TTC_GROUP1_SIZE BIT(3) -#define MLX5E_INNER_TTC_GROUP2_SIZE BIT(1) -#define MLX5E_INNER_TTC_GROUP3_SIZE BIT(0) -#define MLX5E_INNER_TTC_TABLE_SIZE (MLX5E_INNER_TTC_GROUP1_SIZE +\ - MLX5E_INNER_TTC_GROUP2_SIZE +\ - MLX5E_INNER_TTC_GROUP3_SIZE) - struct mlx5e_priv; #ifdef CONFIG_MLX5_EN_RXNFC @@ -222,8 +169,8 @@ struct mlx5e_flow_steering { struct mlx5e_promisc_table promisc; struct mlx5e_vlan_table *vlan; struct mlx5e_l2_table l2; - struct mlx5e_ttc_table ttc; - struct mlx5e_ttc_table inner_ttc; + struct mlx5_ttc_table *ttc; + struct mlx5_ttc_table *inner_ttc; #ifdef CONFIG_MLX5_EN_ARFS struct mlx5e_arfs_tables *arfs; #endif @@ -235,27 +182,13 @@ struct mlx5e_flow_steering { struct mlx5e_ptp_fs *ptp_fs; }; -struct ttc_params { - struct mlx5_flow_table_attr ft_attr; - u32 any_tt_tirn; - u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; - struct mlx5e_ttc_table *inner_ttc; -}; - -void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, struct ttc_params *ttc_params); -void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params); +void mlx5e_set_ttc_params(struct mlx5e_priv *priv, + struct ttc_params *ttc_params, bool tunnel); -int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, - struct mlx5e_ttc_table *ttc); -void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv, - struct mlx5e_ttc_table *ttc); +void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv); +int mlx5e_create_ttc_table(struct mlx5e_priv *priv); void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft); -int mlx5e_ttc_fwd_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type, - struct mlx5_flow_destination *new_dest); -struct mlx5_flow_destination -mlx5e_ttc_get_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type); -int mlx5e_ttc_fwd_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type); void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv); void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv); @@ -263,7 +196,6 @@ void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv); int mlx5e_create_flow_steering(struct mlx5e_priv *priv); void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); -u8 mlx5e_get_proto_by_tunnel_type(enum mlx5e_tunnel_types tt); int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int trap_id, int tir_num); void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv); int mlx5e_add_mac_trap(struct mlx5e_priv *priv, int trap_id, int tir_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c index 909faa6c89d7..7aa25a5e29d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c @@ -33,22 +33,22 @@ static char *fs_udp_type2str(enum fs_udp_type i) } } -static enum mlx5e_traffic_types fs_udp2tt(enum fs_udp_type i) +static enum mlx5_traffic_types fs_udp2tt(enum fs_udp_type i) { switch (i) { case FS_IPV4_UDP: - return MLX5E_TT_IPV4_UDP; + return MLX5_TT_IPV4_UDP; default: /* FS_IPV6_UDP */ - return MLX5E_TT_IPV6_UDP; + return MLX5_TT_IPV6_UDP; } } -static enum fs_udp_type tt2fs_udp(enum mlx5e_traffic_types i) +static enum fs_udp_type tt2fs_udp(enum mlx5_traffic_types i) { switch (i) { - case MLX5E_TT_IPV4_UDP: + case MLX5_TT_IPV4_UDP: return FS_IPV4_UDP; - case MLX5E_TT_IPV6_UDP: + case MLX5_TT_IPV6_UDP: return FS_IPV6_UDP; default: return FS_UDP_NUM_TYPES; @@ -75,7 +75,7 @@ static void fs_udp_set_dport_flow(struct mlx5_flow_spec *spec, enum fs_udp_type struct mlx5_flow_handle * mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv, - enum mlx5e_traffic_types ttc_type, + enum mlx5_traffic_types ttc_type, u32 tir_num, u16 d_port) { enum fs_udp_type type = tt2fs_udp(ttc_type); @@ -124,7 +124,7 @@ static int fs_udp_add_default_rule(struct mlx5e_priv *priv, enum fs_udp_type typ fs_udp = priv->fs.udp; fs_udp_t = &fs_udp->tables[type]; - dest = mlx5e_ttc_get_default_dest(priv, fs_udp2tt(type)); + dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_udp2tt(type)); rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -259,7 +259,7 @@ static int fs_udp_disable(struct mlx5e_priv *priv) for (i = 0; i < FS_UDP_NUM_TYPES; i++) { /* Modify ttc rules destination to point back to the indir TIRs */ - err = mlx5e_ttc_fwd_default_dest(priv, fs_udp2tt(i)); + err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_udp2tt(i)); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] default destination failed, err(%d)\n", @@ -281,7 +281,7 @@ static int fs_udp_enable(struct mlx5e_priv *priv) dest.ft = priv->fs.udp->tables[i].t; /* Modify ttc rules destination to point on the accel_fs FTs */ - err = mlx5e_ttc_fwd_dest(priv, fs_udp2tt(i), &dest); + err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_udp2tt(i), &dest); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] destination to accel failed, err(%d)\n", @@ -401,7 +401,7 @@ static int fs_any_add_default_rule(struct mlx5e_priv *priv) fs_any = priv->fs.any; fs_any_t = &fs_any->table; - dest = mlx5e_ttc_get_default_dest(priv, MLX5E_TT_ANY); + dest = mlx5_ttc_get_default_dest(priv->fs.ttc, MLX5_TT_ANY); rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -514,11 +514,11 @@ static int fs_any_disable(struct mlx5e_priv *priv) int err; /* Modify ttc rules destination to point back to the indir TIRs */ - err = mlx5e_ttc_fwd_default_dest(priv, MLX5E_TT_ANY); + err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, MLX5_TT_ANY); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] default destination failed, err(%d)\n", - __func__, MLX5E_TT_ANY, err); + __func__, MLX5_TT_ANY, err); return err; } return 0; @@ -533,11 +533,11 @@ static int fs_any_enable(struct mlx5e_priv *priv) dest.ft = priv->fs.any->table.t; /* Modify ttc rules destination to point on the accel_fs FTs */ - err = mlx5e_ttc_fwd_dest(priv, MLX5E_TT_ANY, &dest); + err = mlx5_ttc_fwd_dest(priv->fs.ttc, MLX5_TT_ANY, &dest); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] destination to accel failed, err(%d)\n", - __func__, MLX5E_TT_ANY, err); + __func__, MLX5_TT_ANY, err); return err; } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h index 8385df24eb99..7a70c4f38fda 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h @@ -12,7 +12,7 @@ void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule); /* UDP traffic type redirect */ struct mlx5_flow_handle * mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv, - enum mlx5e_traffic_types ttc_type, + enum mlx5_traffic_types ttc_type, u32 tir_num, u16 d_port); void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv); int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 8ff8b02c056f..f479ef31ca40 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -605,8 +605,8 @@ static void mlx5e_ptp_rx_unset_fs(struct mlx5e_priv *priv) static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv) { + u32 tirn = mlx5e_rx_res_get_tirn_ptp(priv->rx_res); struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs; - u32 tirn = priv->rx_res->ptp.tir.tirn; struct mlx5_flow_handle *rule; int err; @@ -617,7 +617,7 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv) if (err) goto out_free; - rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV4_UDP, + rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV4_UDP, tirn, PTP_EV_PORT); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -625,7 +625,7 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv) } ptp_fs->udp_v4_rule = rule; - rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV6_UDP, + rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV6_UDP, tirn, PTP_EV_PORT); if (IS_ERR(rule)) { err = PTR_ERR(rule); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c index 38d0e9dbd6bd..b915fb29dd2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c @@ -4,6 +4,15 @@ #include "rqt.h" #include <linux/mlx5/transobj.h> +void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir, + unsigned int num_channels) +{ + unsigned int i; + + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) + indir->table[i] = i % num_channels; +} + static int mlx5e_rqt_init(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, u16 max_size, u32 *init_rqns, u16 init_size) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h index d2c76649efb0..60c985a12f24 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h @@ -14,6 +14,9 @@ struct mlx5e_rss_params_indir { u32 table[MLX5E_INDIR_RQT_SIZE]; }; +void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir, + unsigned int num_channels); + struct mlx5e_rqt { struct mlx5_core_dev *mdev; u32 rqtn; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index 8fc1dfc4e830..e2a8fe13f29d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -2,54 +2,56 @@ /* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ #include "rx_res.h" +#include "channels.h" +#include "params.h" static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_INDIR_TIRS] = { - [MLX5E_TT_IPV4_TCP] = { + [MLX5_TT_IPV4_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, .rx_hash_fields = MLX5_HASH_IP_L4PORTS, }, - [MLX5E_TT_IPV6_TCP] = { + [MLX5_TT_IPV6_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, .rx_hash_fields = MLX5_HASH_IP_L4PORTS, }, - [MLX5E_TT_IPV4_UDP] = { + [MLX5_TT_IPV4_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, .rx_hash_fields = MLX5_HASH_IP_L4PORTS, }, - [MLX5E_TT_IPV6_UDP] = { + [MLX5_TT_IPV6_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, .rx_hash_fields = MLX5_HASH_IP_L4PORTS, }, - [MLX5E_TT_IPV4_IPSEC_AH] = { + [MLX5_TT_IPV4_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, .l4_prot_type = 0, .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, }, - [MLX5E_TT_IPV6_IPSEC_AH] = { + [MLX5_TT_IPV6_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, .l4_prot_type = 0, .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, }, - [MLX5E_TT_IPV4_IPSEC_ESP] = { + [MLX5_TT_IPV4_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, .l4_prot_type = 0, .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, }, - [MLX5E_TT_IPV6_IPSEC_ESP] = { + [MLX5_TT_IPV6_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, .l4_prot_type = 0, .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, }, - [MLX5E_TT_IPV4] = { + [MLX5_TT_IPV4] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, .l4_prot_type = 0, .rx_hash_fields = MLX5_HASH_IP, }, - [MLX5E_TT_IPV6] = { + [MLX5_TT_IPV6] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, .l4_prot_type = 0, .rx_hash_fields = MLX5_HASH_IP, @@ -57,13 +59,556 @@ static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_I }; struct mlx5e_rss_params_traffic_type -mlx5e_rss_get_default_tt_config(enum mlx5e_traffic_types tt) +mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt) { return rss_default_config[tt]; } +struct mlx5e_rx_res { + struct mlx5_core_dev *mdev; + enum mlx5e_rx_res_features features; + unsigned int max_nch; + u32 drop_rqn; + + struct { + struct mlx5e_rss_params_hash hash; + struct mlx5e_rss_params_indir indir; + u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS]; + } rss_params; + + struct mlx5e_rqt indir_rqt; + struct { + struct mlx5e_tir indir_tir; + struct mlx5e_tir inner_indir_tir; + } rss[MLX5E_NUM_INDIR_TIRS]; + + bool rss_active; + u32 rss_rqns[MLX5E_INDIR_RQT_SIZE]; + unsigned int rss_nch; + + struct { + struct mlx5e_rqt direct_rqt; + struct mlx5e_tir direct_tir; + struct mlx5e_rqt xsk_rqt; + struct mlx5e_tir xsk_tir; + } *channels; + + struct { + struct mlx5e_rqt rqt; + struct mlx5e_tir tir; + } ptp; +}; + +struct mlx5e_rx_res *mlx5e_rx_res_alloc(void) +{ + return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL); +} + +static void mlx5e_rx_res_rss_params_init(struct mlx5e_rx_res *res, unsigned int init_nch) +{ + enum mlx5_traffic_types tt; + + res->rss_params.hash.hfunc = ETH_RSS_HASH_TOP; + netdev_rss_key_fill(res->rss_params.hash.toeplitz_hash_key, + sizeof(res->rss_params.hash.toeplitz_hash_key)); + mlx5e_rss_params_indir_init_uniform(&res->rss_params.indir, init_nch); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + res->rss_params.rx_hash_fields[tt] = + mlx5e_rss_get_default_tt_config(tt).rx_hash_fields; +} + +static int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, + const struct mlx5e_lro_param *init_lro_param) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + enum mlx5_traffic_types tt, max_tt; + struct mlx5e_tir_builder *builder; + u32 indir_rqtn; + int err; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + err = mlx5e_rqt_init_direct(&res->indir_rqt, res->mdev, true, res->drop_rqn); + if (err) + goto out; + + indir_rqtn = mlx5e_rqt_get_rqtn(&res->indir_rqt); + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + struct mlx5e_rss_params_traffic_type rss_tt; + + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, + indir_rqtn, inner_ft_support); + mlx5e_tir_builder_build_lro(builder, init_lro_param); + rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); + mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, false); + + err = mlx5e_tir_init(&res->rss[tt].indir_tir, builder, res->mdev, true); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create an indirect TIR: err = %d, tt = %d\n", + err, tt); + goto err_destroy_tirs; + } + + mlx5e_tir_builder_clear(builder); + } + + if (!inner_ft_support) + goto out; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + struct mlx5e_rss_params_traffic_type rss_tt; + + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, + indir_rqtn, inner_ft_support); + mlx5e_tir_builder_build_lro(builder, init_lro_param); + rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); + mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, true); + + err = mlx5e_tir_init(&res->rss[tt].inner_indir_tir, builder, res->mdev, true); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create an inner indirect TIR: err = %d, tt = %d\n", + err, tt); + goto err_destroy_inner_tirs; + } + + mlx5e_tir_builder_clear(builder); + } + + goto out; + +err_destroy_inner_tirs: + max_tt = tt; + for (tt = 0; tt < max_tt; tt++) + mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir); + + tt = MLX5E_NUM_INDIR_TIRS; +err_destroy_tirs: + max_tt = tt; + for (tt = 0; tt < max_tt; tt++) + mlx5e_tir_destroy(&res->rss[tt].indir_tir); + + mlx5e_rqt_destroy(&res->indir_rqt); + +out: + mlx5e_tir_builder_free(builder); + + return err; +} + +static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res, + const struct mlx5e_lro_param *init_lro_param) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_tir_builder *builder; + int err = 0; + int ix; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + res->channels = kvcalloc(res->max_nch, sizeof(*res->channels), GFP_KERNEL); + if (!res->channels) { + err = -ENOMEM; + goto out; + } + + for (ix = 0; ix < res->max_nch; ix++) { + err = mlx5e_rqt_init_direct(&res->channels[ix].direct_rqt, + res->mdev, false, res->drop_rqn); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create a direct RQT: err = %d, ix = %u\n", + err, ix); + goto err_destroy_direct_rqts; + } + } + + for (ix = 0; ix < res->max_nch; ix++) { + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + inner_ft_support); + mlx5e_tir_builder_build_lro(builder, init_lro_param); + mlx5e_tir_builder_build_direct(builder); + + err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create a direct TIR: err = %d, ix = %u\n", + err, ix); + goto err_destroy_direct_tirs; + } + + mlx5e_tir_builder_clear(builder); + } + + if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) + goto out; + + for (ix = 0; ix < res->max_nch; ix++) { + err = mlx5e_rqt_init_direct(&res->channels[ix].xsk_rqt, + res->mdev, false, res->drop_rqn); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create an XSK RQT: err = %d, ix = %u\n", + err, ix); + goto err_destroy_xsk_rqts; + } + } + + for (ix = 0; ix < res->max_nch; ix++) { + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, + mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), + inner_ft_support); + mlx5e_tir_builder_build_lro(builder, init_lro_param); + mlx5e_tir_builder_build_direct(builder); + + err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create an XSK TIR: err = %d, ix = %u\n", + err, ix); + goto err_destroy_xsk_tirs; + } + + mlx5e_tir_builder_clear(builder); + } + + goto out; + +err_destroy_xsk_tirs: + while (--ix >= 0) + mlx5e_tir_destroy(&res->channels[ix].xsk_tir); + + ix = res->max_nch; +err_destroy_xsk_rqts: + while (--ix >= 0) + mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt); + + ix = res->max_nch; +err_destroy_direct_tirs: + while (--ix >= 0) + mlx5e_tir_destroy(&res->channels[ix].direct_tir); + + ix = res->max_nch; +err_destroy_direct_rqts: + while (--ix >= 0) + mlx5e_rqt_destroy(&res->channels[ix].direct_rqt); + + kvfree(res->channels); + +out: + mlx5e_tir_builder_free(builder); + + return err; +} + +static int mlx5e_rx_res_ptp_init(struct mlx5e_rx_res *res) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_tir_builder *builder; + int err; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + err = mlx5e_rqt_init_direct(&res->ptp.rqt, res->mdev, false, res->drop_rqn); + if (err) + goto out; + + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, + mlx5e_rqt_get_rqtn(&res->ptp.rqt), + inner_ft_support); + mlx5e_tir_builder_build_direct(builder); + + err = mlx5e_tir_init(&res->ptp.tir, builder, res->mdev, true); + if (err) + goto err_destroy_ptp_rqt; + + goto out; + +err_destroy_ptp_rqt: + mlx5e_rqt_destroy(&res->ptp.rqt); + +out: + mlx5e_tir_builder_free(builder); + return err; +} + +static void mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res) +{ + enum mlx5_traffic_types tt; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + mlx5e_tir_destroy(&res->rss[tt].indir_tir); + + if (res->features & MLX5E_RX_RES_FEATURE_INNER_FT) + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir); + + mlx5e_rqt_destroy(&res->indir_rqt); +} + +static void mlx5e_rx_res_channels_destroy(struct mlx5e_rx_res *res) +{ + unsigned int ix; + + for (ix = 0; ix < res->max_nch; ix++) { + mlx5e_tir_destroy(&res->channels[ix].direct_tir); + mlx5e_rqt_destroy(&res->channels[ix].direct_rqt); + + if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) + continue; + + mlx5e_tir_destroy(&res->channels[ix].xsk_tir); + mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt); + } + + kvfree(res->channels); +} + +static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res) +{ + mlx5e_tir_destroy(&res->ptp.tir); + mlx5e_rqt_destroy(&res->ptp.rqt); +} + +int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, + enum mlx5e_rx_res_features features, unsigned int max_nch, + u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param, + unsigned int init_nch) +{ + int err; + + res->mdev = mdev; + res->features = features; + res->max_nch = max_nch; + res->drop_rqn = drop_rqn; + + mlx5e_rx_res_rss_params_init(res, init_nch); + + err = mlx5e_rx_res_rss_init(res, init_lro_param); + if (err) + return err; + + err = mlx5e_rx_res_channels_init(res, init_lro_param); + if (err) + goto err_rss_destroy; + + err = mlx5e_rx_res_ptp_init(res); + if (err) + goto err_channels_destroy; + + return 0; + +err_channels_destroy: + mlx5e_rx_res_channels_destroy(res); +err_rss_destroy: + mlx5e_rx_res_rss_destroy(res); + return err; +} + +void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res) +{ + mlx5e_rx_res_ptp_destroy(res); + mlx5e_rx_res_channels_destroy(res); + mlx5e_rx_res_rss_destroy(res); +} + +void mlx5e_rx_res_free(struct mlx5e_rx_res *res) +{ + kvfree(res); +} + +u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix) +{ + return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir); +} + +u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix) +{ + WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_XSK)); + + return mlx5e_tir_get_tirn(&res->channels[ix].xsk_tir); +} + +u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) +{ + return mlx5e_tir_get_tirn(&res->rss[tt].indir_tir); +} + +u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) +{ + WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT)); + return mlx5e_tir_get_tirn(&res->rss[tt].inner_indir_tir); +} + +u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res) +{ + WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_PTP)); + return mlx5e_tir_get_tirn(&res->ptp.tir); +} + +u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) +{ + return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt); +} + +static void mlx5e_rx_res_rss_enable(struct mlx5e_rx_res *res) +{ + int err; + + res->rss_active = true; + + err = mlx5e_rqt_redirect_indir(&res->indir_rqt, res->rss_rqns, res->rss_nch, + res->rss_params.hash.hfunc, + &res->rss_params.indir); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to channels: err = %d\n", + mlx5e_rqt_get_rqtn(&res->indir_rqt), err); +} + +static void mlx5e_rx_res_rss_disable(struct mlx5e_rx_res *res) +{ + int err; + + res->rss_active = false; + + err = mlx5e_rqt_redirect_direct(&res->indir_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to drop RQ %#x: err = %d\n", + mlx5e_rqt_get_rqtn(&res->indir_rqt), res->drop_rqn, err); +} + +void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs) +{ + unsigned int nch, ix; + int err; + + nch = mlx5e_channels_get_num(chs); + + for (ix = 0; ix < chs->num; ix++) + mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]); + res->rss_nch = chs->num; + + mlx5e_rx_res_rss_enable(res); + + for (ix = 0; ix < nch; ix++) { + u32 rqn; + + mlx5e_channels_get_regular_rqn(chs, ix, &rqn); + err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + rqn, ix, err); + + if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) + continue; + + if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn)) + rqn = res->drop_rqn; + err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), + rqn, ix, err); + } + for (ix = nch; ix < res->max_nch; ix++) { + err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + res->drop_rqn, ix, err); + + if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) + continue; + + err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), + res->drop_rqn, ix, err); + } + + if (res->features & MLX5E_RX_RES_FEATURE_PTP) { + u32 rqn; + + if (mlx5e_channels_get_ptp_rqn(chs, &rqn)) + rqn = res->drop_rqn; + + err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (PTP): err = %d\n", + mlx5e_rqt_get_rqtn(&res->ptp.rqt), + rqn, err); + } +} + +void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res) +{ + unsigned int ix; + int err; + + mlx5e_rx_res_rss_disable(res); + + for (ix = 0; ix < res->max_nch; ix++) { + err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + res->drop_rqn, ix, err); + + if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) + continue; + + err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), + res->drop_rqn, ix, err); + } + + if (res->features & MLX5E_RX_RES_FEATURE_PTP) { + err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (PTP): err = %d\n", + mlx5e_rqt_get_rqtn(&res->ptp.rqt), + res->drop_rqn, err); + } +} + +int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs, + unsigned int ix) +{ + u32 rqn; + int err; + + if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn)) + return -EINVAL; + + err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to XSK RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), + rqn, ix, err); + return err; +} + +int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix) +{ + int err; + + err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), + res->drop_rqn, ix, err); + return err; +} + struct mlx5e_rss_params_traffic_type -mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5e_traffic_types tt) +mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) { struct mlx5e_rss_params_traffic_type rss_tt; @@ -71,3 +616,216 @@ mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5e_traf rss_tt.rx_hash_fields = res->rss_params.rx_hash_fields[tt]; return rss_tt; } + +void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch) +{ + mlx5e_rss_params_indir_init_uniform(&res->rss_params.indir, nch); + + if (!res->rss_active) + return; + + mlx5e_rx_res_rss_enable(res); +} + +void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 *indir, u8 *key, u8 *hfunc) +{ + unsigned int i; + + if (indir) + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) + indir[i] = res->rss_params.indir.table[i]; + + if (key) + memcpy(key, res->rss_params.hash.toeplitz_hash_key, + sizeof(res->rss_params.hash.toeplitz_hash_key)); + + if (hfunc) + *hfunc = res->rss_params.hash.hfunc; +} + +static int mlx5e_rx_res_rss_update_tir(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt, + bool inner) +{ + struct mlx5e_rss_params_traffic_type rss_tt; + struct mlx5e_tir_builder *builder; + struct mlx5e_tir *tir; + int err; + + builder = mlx5e_tir_builder_alloc(true); + if (!builder) + return -ENOMEM; + + rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); + + mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, inner); + tir = inner ? &res->rss[tt].inner_indir_tir : &res->rss[tt].indir_tir; + err = mlx5e_tir_modify(tir, builder); + + mlx5e_tir_builder_free(builder); + return err; +} + +int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, const u32 *indir, + const u8 *key, const u8 *hfunc) +{ + enum mlx5_traffic_types tt; + bool changed_indir = false; + bool changed_hash = false; + int err; + + if (hfunc && *hfunc != res->rss_params.hash.hfunc) { + switch (*hfunc) { + case ETH_RSS_HASH_XOR: + case ETH_RSS_HASH_TOP: + break; + default: + return -EINVAL; + } + changed_hash = true; + changed_indir = true; + res->rss_params.hash.hfunc = *hfunc; + } + + if (key) { + if (res->rss_params.hash.hfunc == ETH_RSS_HASH_TOP) + changed_hash = true; + memcpy(res->rss_params.hash.toeplitz_hash_key, key, + sizeof(res->rss_params.hash.toeplitz_hash_key)); + } + + if (indir) { + unsigned int i; + + changed_indir = true; + + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) + res->rss_params.indir.table[i] = indir[i]; + } + + if (changed_indir && res->rss_active) { + err = mlx5e_rqt_redirect_indir(&res->indir_rqt, res->rss_rqns, res->rss_nch, + res->rss_params.hash.hfunc, + &res->rss_params.indir); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to channels: err = %d\n", + mlx5e_rqt_get_rqtn(&res->indir_rqt), err); + } + + if (changed_hash) + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + err = mlx5e_rx_res_rss_update_tir(res, tt, false); + if (err) + mlx5_core_warn(res->mdev, "Failed to update RSS hash of indirect TIR for traffic type %d: err = %d\n", + tt, err); + + if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT)) + continue; + + err = mlx5e_rx_res_rss_update_tir(res, tt, true); + if (err) + mlx5_core_warn(res->mdev, "Failed to update RSS hash of inner indirect TIR for traffic type %d: err = %d\n", + tt, err); + } + + return 0; +} + +u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) +{ + return res->rss_params.rx_hash_fields[tt]; +} + +int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt, + u8 rx_hash_fields) +{ + u8 old_rx_hash_fields; + int err; + + old_rx_hash_fields = res->rss_params.rx_hash_fields[tt]; + + if (old_rx_hash_fields == rx_hash_fields) + return 0; + + res->rss_params.rx_hash_fields[tt] = rx_hash_fields; + + err = mlx5e_rx_res_rss_update_tir(res, tt, false); + if (err) { + res->rss_params.rx_hash_fields[tt] = old_rx_hash_fields; + mlx5_core_warn(res->mdev, "Failed to update RSS hash fields of indirect TIR for traffic type %d: err = %d\n", + tt, err); + return err; + } + + if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT)) + return 0; + + err = mlx5e_rx_res_rss_update_tir(res, tt, true); + if (err) { + /* Partial update happened. Try to revert - it may fail too, but + * there is nothing more we can do. + */ + res->rss_params.rx_hash_fields[tt] = old_rx_hash_fields; + mlx5_core_warn(res->mdev, "Failed to update RSS hash fields of inner indirect TIR for traffic type %d: err = %d\n", + tt, err); + if (mlx5e_rx_res_rss_update_tir(res, tt, false)) + mlx5_core_warn(res->mdev, "Partial update of RSS hash fields happened: failed to revert indirect TIR for traffic type %d to the old values\n", + tt); + } + + return err; +} + +int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param) +{ + struct mlx5e_tir_builder *builder; + enum mlx5_traffic_types tt; + int err, final_err; + unsigned int ix; + + builder = mlx5e_tir_builder_alloc(true); + if (!builder) + return -ENOMEM; + + mlx5e_tir_builder_build_lro(builder, lro_param); + + final_err = 0; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + err = mlx5e_tir_modify(&res->rss[tt].indir_tir, builder); + if (err) { + mlx5_core_warn(res->mdev, "Failed to update LRO state of indirect TIR %#x for traffic type %d: err = %d\n", + mlx5e_tir_get_tirn(&res->rss[tt].indir_tir), tt, err); + if (!final_err) + final_err = err; + } + + if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT)) + continue; + + err = mlx5e_tir_modify(&res->rss[tt].inner_indir_tir, builder); + if (err) { + mlx5_core_warn(res->mdev, "Failed to update LRO state of inner indirect TIR %#x for traffic type %d: err = %d\n", + mlx5e_tir_get_tirn(&res->rss[tt].inner_indir_tir), tt, err); + if (!final_err) + final_err = err; + } + } + + for (ix = 0; ix < res->max_nch; ix++) { + err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder); + if (err) { + mlx5_core_warn(res->mdev, "Failed to update LRO state of direct TIR %#x for channel %u: err = %d\n", + mlx5e_tir_get_tirn(&res->channels[ix].direct_tir), ix, err); + if (!final_err) + final_err = err; + } + } + + mlx5e_tir_builder_free(builder); + return final_err; +} + +struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res) +{ + return res->rss_params.hash; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h index 068e48140a6f..1baeec5158a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -9,39 +9,59 @@ #include "tir.h" #include "fs.h" -#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2) +struct mlx5e_rx_res; -struct mlx5e_rss_params { - struct mlx5e_rss_params_hash hash; - struct mlx5e_rss_params_indir indir; - u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS]; -}; +struct mlx5e_channels; +struct mlx5e_rss_params_hash; -struct mlx5e_rx_res { - struct mlx5e_rss_params rss_params; - - struct mlx5e_rqt indir_rqt; - struct { - struct mlx5e_tir indir_tir; - struct mlx5e_tir inner_indir_tir; - } rss[MLX5E_NUM_INDIR_TIRS]; - - struct { - struct mlx5e_rqt direct_rqt; - struct mlx5e_tir direct_tir; - struct mlx5e_rqt xsk_rqt; - struct mlx5e_tir xsk_tir; - } channels[MLX5E_MAX_NUM_CHANNELS]; - - struct { - struct mlx5e_rqt rqt; - struct mlx5e_tir tir; - } ptp; +enum mlx5e_rx_res_features { + MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0), + MLX5E_RX_RES_FEATURE_XSK = BIT(1), + MLX5E_RX_RES_FEATURE_PTP = BIT(2), }; struct mlx5e_rss_params_traffic_type -mlx5e_rss_get_default_tt_config(enum mlx5e_traffic_types tt); +mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt); + +/* Setup */ +struct mlx5e_rx_res *mlx5e_rx_res_alloc(void); +int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, + enum mlx5e_rx_res_features features, unsigned int max_nch, + u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param, + unsigned int init_nch); +void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res); +void mlx5e_rx_res_free(struct mlx5e_rx_res *res); + +/* TIRN getters for flow steering */ +u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix); +u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix); +u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); +u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); +u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res); + +/* RQTN getters for modules that create their own TIRs */ +u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix); + +/* Activate/deactivate API */ +void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs); +void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res); +int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs, + unsigned int ix); +int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix); + +/* Configuration API */ struct mlx5e_rss_params_traffic_type -mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5e_traffic_types tt); +mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); +void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch); +void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 *indir, u8 *key, u8 *hfunc); +int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, const u32 *indir, + const u8 *key, const u8 *hfunc); +u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); +int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt, + u8 rx_hash_fields); +int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param); + +/* Workaround for hairpin */ +struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res); #endif /* __MLX5_EN_RX_RES_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c index 71e8d66fa150..7b562d2c8a19 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c @@ -122,7 +122,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, * any Fill Ring entries at the setup stage. */ - err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]); + err = mlx5e_rx_res_xsk_activate(priv->rx_res, &priv->channels, ix); if (unlikely(err)) goto err_deactivate; @@ -169,7 +169,7 @@ static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix) goto remove_pool; c = priv->channels.c[ix]; - mlx5e_xsk_redirect_rqt_to_drop(priv, ix); + mlx5e_rx_res_xsk_deactivate(priv->rx_res, ix); mlx5e_deactivate_xsk(c); mlx5e_close_xsk(c); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index ab485d082729..c06267477b27 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -183,59 +183,3 @@ void mlx5e_deactivate_xsk(struct mlx5e_channel *c) mlx5e_deactivate_rq(&c->xskrq); /* TX queue is disabled on close. */ } - -int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c) -{ - return mlx5e_rqt_redirect_direct(&priv->rx_res->channels[c->ix].xsk_rqt, c->xskrq.rqn); -} - -int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix) -{ - return mlx5e_rqt_redirect_direct(&priv->rx_res->channels[ix].xsk_rqt, priv->drop_rq.rqn); -} - -int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) -{ - int err, i; - - if (!priv->xsk.refcnt) - return 0; - - for (i = 0; i < chs->num; i++) { - struct mlx5e_channel *c = chs->c[i]; - - if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) - continue; - - err = mlx5e_xsk_redirect_rqt_to_channel(priv, c); - if (unlikely(err)) - goto err_stop; - } - - return 0; - -err_stop: - for (i--; i >= 0; i--) { - if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state)) - continue; - - mlx5e_xsk_redirect_rqt_to_drop(priv, i); - } - - return err; -} - -void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs) -{ - int i; - - if (!priv->xsk.refcnt) - return; - - for (i = 0; i < chs->num; i++) { - if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state)) - continue; - - mlx5e_xsk_redirect_rqt_to_drop(priv, i); - } -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h index ca20f1ff5e39..50e111b85efd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h @@ -17,9 +17,5 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, void mlx5e_close_xsk(struct mlx5e_channel *c); void mlx5e_activate_xsk(struct mlx5e_channel *c); void mlx5e_deactivate_xsk(struct mlx5e_channel *c); -int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c); -int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix); -int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs); -void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs); #endif /* __MLX5_EN_XSK_SETUP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index e51f60b55daa..4c4ee524176c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -16,13 +16,13 @@ struct mlx5e_accel_fs_tcp { struct mlx5_flow_handle *default_rules[ACCEL_FS_TCP_NUM_TYPES]; }; -static enum mlx5e_traffic_types fs_accel2tt(enum accel_fs_tcp_type i) +static enum mlx5_traffic_types fs_accel2tt(enum accel_fs_tcp_type i) { switch (i) { case ACCEL_FS_IPV4_TCP: - return MLX5E_TT_IPV4_TCP; + return MLX5_TT_IPV4_TCP; default: /* ACCEL_FS_IPV6_TCP */ - return MLX5E_TT_IPV6_TCP; + return MLX5_TT_IPV6_TCP; } } @@ -161,7 +161,7 @@ static int accel_fs_tcp_add_default_rule(struct mlx5e_priv *priv, fs_tcp = priv->fs.accel_tcp; accel_fs_t = &fs_tcp->tables[type]; - dest = mlx5e_ttc_get_default_dest(priv, fs_accel2tt(type)); + dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_accel2tt(type)); rule = mlx5_add_flow_rules(accel_fs_t->t, NULL, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -307,7 +307,7 @@ static int accel_fs_tcp_disable(struct mlx5e_priv *priv) for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) { /* Modify ttc rules destination to point back to the indir TIRs */ - err = mlx5e_ttc_fwd_default_dest(priv, fs_accel2tt(i)); + err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_accel2tt(i)); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] default destination failed, err(%d)\n", @@ -329,7 +329,7 @@ static int accel_fs_tcp_enable(struct mlx5e_priv *priv) dest.ft = priv->fs.accel_tcp->tables[i].t; /* Modify ttc rules destination to point on the accel_fs FTs */ - err = mlx5e_ttc_fwd_dest(priv, fs_accel2tt(i), &dest); + err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_accel2tt(i), &dest); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] destination to accel failed, err(%d)\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 34119ce92031..17da23dff0ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -41,11 +41,11 @@ struct mlx5e_ipsec_tx { }; /* IPsec RX flow steering */ -static enum mlx5e_traffic_types fs_esp2tt(enum accel_fs_esp_type i) +static enum mlx5_traffic_types fs_esp2tt(enum accel_fs_esp_type i) { if (i == ACCEL_FS_ESP4) - return MLX5E_TT_IPV4_IPSEC_ESP; - return MLX5E_TT_IPV6_IPSEC_ESP; + return MLX5_TT_IPV4_IPSEC_ESP; + return MLX5_TT_IPV6_IPSEC_ESP; } static int rx_err_add_rule(struct mlx5e_priv *priv, @@ -265,7 +265,8 @@ static int rx_create(struct mlx5e_priv *priv, enum accel_fs_esp_type type) accel_esp = priv->ipsec->rx_fs; fs_prot = &accel_esp->fs_prot[type]; - fs_prot->default_dest = mlx5e_ttc_get_default_dest(priv, fs_esp2tt(type)); + fs_prot->default_dest = + mlx5_ttc_get_default_dest(priv->fs.ttc, fs_esp2tt(type)); err = rx_err_create_ft(priv, fs_prot, &fs_prot->rx_err); if (err) @@ -301,7 +302,7 @@ static int rx_ft_get(struct mlx5e_priv *priv, enum accel_fs_esp_type type) /* connect */ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = fs_prot->ft; - mlx5e_ttc_fwd_dest(priv, fs_esp2tt(type), &dest); + mlx5_ttc_fwd_dest(priv->fs.ttc, fs_esp2tt(type), &dest); out: mutex_unlock(&fs_prot->prot_mutex); @@ -320,7 +321,7 @@ static void rx_ft_put(struct mlx5e_priv *priv, enum accel_fs_esp_type type) goto out; /* disconnect */ - mlx5e_ttc_fwd_default_dest(priv, fs_esp2tt(type)); + mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_esp2tt(type)); /* remove FT */ rx_destroy(priv, type); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index bfdbc3060755..62abce008c7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -628,7 +628,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, priv_rx->sw_stats = &priv->tls->sw_stats; mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx); - rqtn = mlx5e_rqt_get_rqtn(&priv->rx_res->channels[rxq].direct_rqt); + rqtn = mlx5e_rx_res_get_rqtn_direct(priv->rx_res, rxq); err = mlx5e_ktls_create_tir(mdev, &priv_rx->tir, rqtn); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index db6c6a96a6c9..fe5d82fa6e92 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -98,17 +98,17 @@ struct arfs_rule { for (j = 0; j < ARFS_HASH_SIZE; j++) \ hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist) -static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type) +static enum mlx5_traffic_types arfs_get_tt(enum arfs_type type) { switch (type) { case ARFS_IPV4_TCP: - return MLX5E_TT_IPV4_TCP; + return MLX5_TT_IPV4_TCP; case ARFS_IPV4_UDP: - return MLX5E_TT_IPV4_UDP; + return MLX5_TT_IPV4_UDP; case ARFS_IPV6_TCP: - return MLX5E_TT_IPV6_TCP; + return MLX5_TT_IPV6_TCP; case ARFS_IPV6_UDP: - return MLX5E_TT_IPV6_UDP; + return MLX5_TT_IPV6_UDP; default: return -EINVAL; } @@ -120,7 +120,7 @@ static int arfs_disable(struct mlx5e_priv *priv) for (i = 0; i < ARFS_NUM_TYPES; i++) { /* Modify ttc rules destination back to their default */ - err = mlx5e_ttc_fwd_default_dest(priv, arfs_get_tt(i)); + err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, arfs_get_tt(i)); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] default destination failed, err(%d)\n", @@ -149,7 +149,7 @@ int mlx5e_arfs_enable(struct mlx5e_priv *priv) for (i = 0; i < ARFS_NUM_TYPES; i++) { dest.ft = priv->fs.arfs->arfs_tables[i].ft.t; /* Modify ttc rules destination to point on the aRFS FTs */ - err = mlx5e_ttc_fwd_dest(priv, arfs_get_tt(i), &dest); + err = mlx5_ttc_fwd_dest(priv->fs.ttc, arfs_get_tt(i), &dest); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] dest to arfs, failed err(%d)\n", @@ -194,7 +194,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, struct arfs_table *arfs_t = &priv->fs.arfs->arfs_tables[type]; struct mlx5_flow_destination dest = {}; MLX5_DECLARE_FLOW_ACT(flow_act); - enum mlx5e_traffic_types tt; + enum mlx5_traffic_types tt; int err = 0; dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; @@ -205,10 +205,10 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, return -EINVAL; } - /* FIXME: Must use mlx5e_ttc_get_default_dest(), + /* FIXME: Must use mlx5_ttc_get_default_dest(), * but can't since TTC default is not setup yet ! */ - dest.tir_num = priv->rx_res->rss[tt].indir_tir.tirn; + dest.tir_num = mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt); arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL, &flow_act, &dest, 1); @@ -552,7 +552,7 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, 16); } dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dest.tir_num = priv->rx_res->channels[arfs_rule->rxq].direct_tir.tirn; + dest.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, arfs_rule->rxq); rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -575,7 +575,7 @@ static void arfs_modify_rule_rq(struct mlx5e_priv *priv, int err = 0; dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dst.tir_num = priv->rx_res->channels[rxq].direct_tir.tirn; + dst.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, rxq); err = mlx5_modify_rule_destination(rule, &dst, NULL); if (err) netdev_warn(priv->netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 9264d18b0964..2cf59bb5f898 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1172,7 +1172,7 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev, u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv) { - return sizeof(priv->rx_res->rss_params.hash.toeplitz_hash_key); + return sizeof_field(struct mlx5e_rss_params_hash, toeplitz_hash_key); } static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev) @@ -1198,18 +1198,10 @@ int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_rss_params *rss; - rss = &priv->rx_res->rss_params; - - if (indir) - memcpy(indir, rss->indir.table, sizeof(rss->indir.table)); - - if (key) - memcpy(key, rss->hash.toeplitz_hash_key, sizeof(rss->hash.toeplitz_hash_key)); - - if (hfunc) - *hfunc = rss->hash.hfunc; + mutex_lock(&priv->state_lock); + mlx5e_rx_res_rss_get_rxfh(priv->rx_res, indir, key, hfunc); + mutex_unlock(&priv->state_lock); return 0; } @@ -1218,58 +1210,13 @@ int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_rss_params *rss; - bool refresh_tirs = false; - bool refresh_rqt = false; - - if ((hfunc != ETH_RSS_HASH_NO_CHANGE) && - (hfunc != ETH_RSS_HASH_XOR) && - (hfunc != ETH_RSS_HASH_TOP)) - return -EINVAL; + int err; mutex_lock(&priv->state_lock); - - rss = &priv->rx_res->rss_params; - - if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hash.hfunc) { - rss->hash.hfunc = hfunc; - refresh_rqt = true; - refresh_tirs = true; - } - - if (indir) { - memcpy(rss->indir.table, indir, sizeof(rss->indir.table)); - refresh_rqt = true; - } - - if (key) { - memcpy(rss->hash.toeplitz_hash_key, key, sizeof(rss->hash.toeplitz_hash_key)); - refresh_tirs = refresh_tirs || rss->hash.hfunc == ETH_RSS_HASH_TOP; - } - - if (refresh_rqt && test_bit(MLX5E_STATE_OPENED, &priv->state)) { - u32 *rqns; - - rqns = kvmalloc_array(priv->channels.num, sizeof(*rqns), GFP_KERNEL); - if (rqns) { - unsigned int ix; - - for (ix = 0; ix < priv->channels.num; ix++) - rqns[ix] = priv->channels.c[ix]->rq.rqn; - - mlx5e_rqt_redirect_indir(&priv->rx_res->indir_rqt, rqns, - priv->channels.num, - rss->hash.hfunc, &rss->indir); - kvfree(rqns); - } - } - - if (refresh_tirs) - mlx5e_modify_tirs_hash(priv); - + err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, indir, key, + hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc); mutex_unlock(&priv->state_lock); - - return 0; + return err; } #define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC 100 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index e79815763edf..5c754e9af669 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -718,7 +718,7 @@ static int mlx5e_add_promisc_rule(struct mlx5e_priv *priv) if (!spec) return -ENOMEM; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = priv->fs.ttc.ft.t; + dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc); rule_p = &priv->fs.promisc.rule; *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); @@ -854,587 +854,59 @@ void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft) ft->t = NULL; } -static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc) -{ - int i; - - for (i = 0; i < MLX5E_NUM_TT; i++) { - if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) { - mlx5_del_flow_rules(ttc->rules[i].rule); - ttc->rules[i].rule = NULL; - } - } - - for (i = 0; i < MLX5E_NUM_TUNNEL_TT; i++) { - if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) { - mlx5_del_flow_rules(ttc->tunnel_rules[i]); - ttc->tunnel_rules[i] = NULL; - } - } -} - -struct mlx5e_etype_proto { - u16 etype; - u8 proto; -}; - -static struct mlx5e_etype_proto ttc_rules[] = { - [MLX5E_TT_IPV4_TCP] = { - .etype = ETH_P_IP, - .proto = IPPROTO_TCP, - }, - [MLX5E_TT_IPV6_TCP] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_TCP, - }, - [MLX5E_TT_IPV4_UDP] = { - .etype = ETH_P_IP, - .proto = IPPROTO_UDP, - }, - [MLX5E_TT_IPV6_UDP] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_UDP, - }, - [MLX5E_TT_IPV4_IPSEC_AH] = { - .etype = ETH_P_IP, - .proto = IPPROTO_AH, - }, - [MLX5E_TT_IPV6_IPSEC_AH] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_AH, - }, - [MLX5E_TT_IPV4_IPSEC_ESP] = { - .etype = ETH_P_IP, - .proto = IPPROTO_ESP, - }, - [MLX5E_TT_IPV6_IPSEC_ESP] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_ESP, - }, - [MLX5E_TT_IPV4] = { - .etype = ETH_P_IP, - .proto = 0, - }, - [MLX5E_TT_IPV6] = { - .etype = ETH_P_IPV6, - .proto = 0, - }, - [MLX5E_TT_ANY] = { - .etype = 0, - .proto = 0, - }, -}; - -static struct mlx5e_etype_proto ttc_tunnel_rules[] = { - [MLX5E_TT_IPV4_GRE] = { - .etype = ETH_P_IP, - .proto = IPPROTO_GRE, - }, - [MLX5E_TT_IPV6_GRE] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_GRE, - }, - [MLX5E_TT_IPV4_IPIP] = { - .etype = ETH_P_IP, - .proto = IPPROTO_IPIP, - }, - [MLX5E_TT_IPV6_IPIP] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_IPIP, - }, - [MLX5E_TT_IPV4_IPV6] = { - .etype = ETH_P_IP, - .proto = IPPROTO_IPV6, - }, - [MLX5E_TT_IPV6_IPV6] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_IPV6, - }, - -}; - -u8 mlx5e_get_proto_by_tunnel_type(enum mlx5e_tunnel_types tt) -{ - return ttc_tunnel_rules[tt].proto; -} - -static bool mlx5e_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev, u8 proto_type) -{ - switch (proto_type) { - case IPPROTO_GRE: - return MLX5_CAP_ETH(mdev, tunnel_stateless_gre); - case IPPROTO_IPIP: - case IPPROTO_IPV6: - return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) || - MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx)); - default: - return false; - } -} - -static bool mlx5e_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev) -{ - int tt; - - for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { - if (mlx5e_tunnel_proto_supported_rx(mdev, ttc_tunnel_rules[tt].proto)) - return true; - } - return false; -} - -bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) -{ - return (mlx5e_tunnel_any_rx_proto_supported(mdev) && - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version)); -} - -static u8 mlx5e_etype_to_ipv(u16 ethertype) -{ - if (ethertype == ETH_P_IP) - return 4; - - if (ethertype == ETH_P_IPV6) - return 6; - - return 0; -} - -static struct mlx5_flow_handle * -mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, - struct mlx5_flow_table *ft, - struct mlx5_flow_destination *dest, - u16 etype, - u8 proto) -{ - int match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); - MLX5_DECLARE_FLOW_ACT(flow_act); - struct mlx5_flow_handle *rule; - struct mlx5_flow_spec *spec; - int err = 0; - u8 ipv; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return ERR_PTR(-ENOMEM); - - if (proto) { - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto); - } - - ipv = mlx5e_etype_to_ipv(etype); - if (match_ipv_outer && ipv) { - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv); - } else if (etype) { - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); - } - - rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - netdev_err(priv->netdev, "%s: add rule failed\n", __func__); - } - - kvfree(spec); - return err ? ERR_PTR(err) : rule; -} - -static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv, - struct ttc_params *params, - struct mlx5e_ttc_table *ttc) -{ - struct mlx5_flow_destination dest = {}; - struct mlx5_flow_handle **trules; - struct mlx5e_ttc_rule *rules; - struct mlx5_flow_table *ft; - int tt; - int err; - - ft = ttc->ft.t; - rules = ttc->rules; - - dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - for (tt = 0; tt < MLX5E_NUM_TT; tt++) { - struct mlx5e_ttc_rule *rule = &rules[tt]; - - if (tt == MLX5E_TT_ANY) - dest.tir_num = params->any_tt_tirn; - else - dest.tir_num = params->indir_tirn[tt]; - - rule->rule = mlx5e_generate_ttc_rule(priv, ft, &dest, - ttc_rules[tt].etype, - ttc_rules[tt].proto); - if (IS_ERR(rule->rule)) { - err = PTR_ERR(rule->rule); - rule->rule = NULL; - goto del_rules; - } - rule->default_dest = dest; - } - - if (!params->inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev)) - return 0; - - trules = ttc->tunnel_rules; - dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = params->inner_ttc->ft.t; - for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { - if (!mlx5e_tunnel_proto_supported_rx(priv->mdev, - ttc_tunnel_rules[tt].proto)) - continue; - trules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, - ttc_tunnel_rules[tt].etype, - ttc_tunnel_rules[tt].proto); - if (IS_ERR(trules[tt])) { - err = PTR_ERR(trules[tt]); - trules[tt] = NULL; - goto del_rules; - } - } - - return 0; - -del_rules: - mlx5e_cleanup_ttc_rules(ttc); - return err; -} - -static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc, - bool use_ipv) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5e_flow_table *ft = &ttc->ft; - int ix = 0; - u32 *in; - int err; - u8 *mc; - - ft->g = kcalloc(MLX5E_TTC_NUM_GROUPS, - sizeof(*ft->g), GFP_KERNEL); - if (!ft->g) - return -ENOMEM; - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) { - kfree(ft->g); - ft->g = NULL; - return -ENOMEM; - } - - /* L4 Group */ - mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - if (use_ipv) - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version); - else - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_TTC_GROUP1_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - /* L3 Group */ - MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_TTC_GROUP2_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - /* Any Group */ - memset(in, 0, inlen); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_TTC_GROUP3_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - kvfree(in); - return 0; - -err: - err = PTR_ERR(ft->g[ft->num_groups]); - ft->g[ft->num_groups] = NULL; - kvfree(in); - - return err; -} - -static struct mlx5_flow_handle * -mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv, - struct mlx5_flow_table *ft, - struct mlx5_flow_destination *dest, - u16 etype, u8 proto) -{ - MLX5_DECLARE_FLOW_ACT(flow_act); - struct mlx5_flow_handle *rule; - struct mlx5_flow_spec *spec; - int err = 0; - u8 ipv; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return ERR_PTR(-ENOMEM); - - ipv = mlx5e_etype_to_ipv(etype); - if (etype && ipv) { - spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version); - MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv); - } - - if (proto) { - spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol); - MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto); - } - - rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - netdev_err(priv->netdev, "%s: add rule failed\n", __func__); - } - - kvfree(spec); - return err ? ERR_PTR(err) : rule; -} - -static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv, - struct ttc_params *params, - struct mlx5e_ttc_table *ttc) -{ - struct mlx5_flow_destination dest = {}; - struct mlx5e_ttc_rule *rules; - struct mlx5_flow_table *ft; - int err; - int tt; - - ft = ttc->ft.t; - rules = ttc->rules; - dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - - for (tt = 0; tt < MLX5E_NUM_TT; tt++) { - struct mlx5e_ttc_rule *rule = &rules[tt]; - - if (tt == MLX5E_TT_ANY) - dest.tir_num = params->any_tt_tirn; - else - dest.tir_num = params->indir_tirn[tt]; - - rule->rule = mlx5e_generate_inner_ttc_rule(priv, ft, &dest, - ttc_rules[tt].etype, - ttc_rules[tt].proto); - if (IS_ERR(rule->rule)) { - err = PTR_ERR(rule->rule); - rule->rule = NULL; - goto del_rules; - } - rule->default_dest = dest; - } - - return 0; - -del_rules: - - mlx5e_cleanup_ttc_rules(ttc); - return err; -} - -static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5e_flow_table *ft = &ttc->ft; - int ix = 0; - u32 *in; - int err; - u8 *mc; - - ft->g = kcalloc(MLX5E_INNER_TTC_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); - if (!ft->g) - return -ENOMEM; - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) { - kfree(ft->g); - ft->g = NULL; - return -ENOMEM; - } - - /* L4 Group */ - mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); - MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); - MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_INNER_TTC_GROUP1_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - /* L3 Group */ - MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_INNER_TTC_GROUP2_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - /* Any Group */ - memset(in, 0, inlen); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_INNER_TTC_GROUP3_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - kvfree(in); - return 0; - -err: - err = PTR_ERR(ft->g[ft->num_groups]); - ft->g[ft->num_groups] = NULL; - kvfree(in); - - return err; -} - -void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, - struct ttc_params *ttc_params) -{ - ttc_params->any_tt_tirn = priv->rx_res->channels[0].direct_tir.tirn; - ttc_params->inner_ttc = &priv->fs.inner_ttc; -} - -static void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params) +static void mlx5e_set_inner_ttc_params(struct mlx5e_priv *priv, + struct ttc_params *ttc_params) { struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + int tt; - ft_attr->max_fte = MLX5E_INNER_TTC_TABLE_SIZE; + memset(ttc_params, 0, sizeof(*ttc_params)); + ttc_params->ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL; ft_attr->prio = MLX5E_NIC_PRIO; + + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + ttc_params->dests[tt].tir_num = + tt == MLX5_TT_ANY ? + mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) : + mlx5e_rx_res_get_tirn_rss_inner(priv->rx_res, + tt); + } } -void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params) +void mlx5e_set_ttc_params(struct mlx5e_priv *priv, + struct ttc_params *ttc_params, bool tunnel) { struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + int tt; - ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE; + memset(ttc_params, 0, sizeof(*ttc_params)); + ttc_params->ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); ft_attr->level = MLX5E_TTC_FT_LEVEL; ft_attr->prio = MLX5E_NIC_PRIO; -} - -static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, - struct mlx5e_ttc_table *ttc) -{ - struct mlx5e_flow_table *ft = &ttc->ft; - int err; - ft->t = mlx5_create_flow_table(priv->fs.ns, ¶ms->ft_attr); - if (IS_ERR(ft->t)) { - err = PTR_ERR(ft->t); - ft->t = NULL; - return err; + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + ttc_params->dests[tt].tir_num = + tt == MLX5_TT_ANY ? + mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) : + mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt); } - err = mlx5e_create_inner_ttc_table_groups(ttc); - if (err) - goto err; - - err = mlx5e_generate_inner_ttc_table_rules(priv, params, ttc); - if (err) - goto err; - - return 0; - -err: - mlx5e_destroy_flow_table(ft); - return err; -} - -static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv, - struct mlx5e_ttc_table *ttc) -{ - mlx5e_cleanup_ttc_rules(ttc); - mlx5e_destroy_flow_table(&ttc->ft); -} - -void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv, - struct mlx5e_ttc_table *ttc) -{ - mlx5e_cleanup_ttc_rules(ttc); - mlx5e_destroy_flow_table(&ttc->ft); -} - -int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, - struct mlx5e_ttc_table *ttc) -{ - bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); - struct mlx5e_flow_table *ft = &ttc->ft; - int err; + ttc_params->inner_ttc = tunnel; + if (!tunnel || !mlx5_tunnel_inner_ft_supported(priv->mdev)) + return; - ft->t = mlx5_create_flow_table(priv->fs.ns, ¶ms->ft_attr); - if (IS_ERR(ft->t)) { - err = PTR_ERR(ft->t); - ft->t = NULL; - return err; + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + ttc_params->tunnel_dests[tt].type = + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + ttc_params->tunnel_dests[tt].ft = + mlx5_get_ttc_flow_table(priv->fs.inner_ttc); } - - err = mlx5e_create_ttc_table_groups(ttc, match_ipv_outer); - if (err) - goto err; - - err = mlx5e_generate_ttc_table_rules(priv, params, ttc); - if (err) - goto err; - - return 0; -err: - mlx5e_destroy_flow_table(ft); - return err; -} - -int mlx5e_ttc_fwd_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type, - struct mlx5_flow_destination *new_dest) -{ - return mlx5_modify_rule_destination(priv->fs.ttc.rules[type].rule, new_dest, NULL); -} - -struct mlx5_flow_destination -mlx5e_ttc_get_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type) -{ - struct mlx5_flow_destination *dest = &priv->fs.ttc.rules[type].default_dest; - - WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR, - "TTC[%d] default dest is not setup yet", type); - - return *dest; -} - -int mlx5e_ttc_fwd_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type) -{ - struct mlx5_flow_destination dest = mlx5e_ttc_get_default_dest(priv, type); - - return mlx5e_ttc_fwd_dest(priv, type, &dest); } static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, @@ -1467,7 +939,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, outer_headers.dmac_47_16); dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = priv->fs.ttc.ft.t; + dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc); switch (type) { case MLX5E_FULLMATCH: @@ -1763,10 +1235,46 @@ static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv) kvfree(priv->fs.vlan); } -int mlx5e_create_flow_steering(struct mlx5e_priv *priv) +static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv) +{ + if (!mlx5_tunnel_inner_ft_supported(priv->mdev)) + return; + mlx5_destroy_ttc_table(priv->fs.inner_ttc); +} + +void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) +{ + mlx5_destroy_ttc_table(priv->fs.ttc); +} + +static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv) { struct ttc_params ttc_params = {}; - int tt, err; + + if (!mlx5_tunnel_inner_ft_supported(priv->mdev)) + return 0; + + mlx5e_set_inner_ttc_params(priv, &ttc_params); + priv->fs.inner_ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); + if (IS_ERR(priv->fs.inner_ttc)) + return PTR_ERR(priv->fs.inner_ttc); + return 0; +} + +int mlx5e_create_ttc_table(struct mlx5e_priv *priv) +{ + struct ttc_params ttc_params = {}; + + mlx5e_set_ttc_params(priv, &ttc_params, true); + priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); + if (IS_ERR(priv->fs.ttc)) + return PTR_ERR(priv->fs.ttc); + return 0; +} + +int mlx5e_create_flow_steering(struct mlx5e_priv *priv) +{ + int err; priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); @@ -1781,26 +1289,15 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } - mlx5e_set_ttc_basic_params(priv, &ttc_params); - - if (mlx5e_tunnel_inner_ft_supported(priv->mdev)) { - mlx5e_set_inner_ttc_ft_params(&ttc_params); - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params.indir_tirn[tt] = priv->rx_res->rss[tt].inner_indir_tir.tirn; - - err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc); - if (err) { - netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", - err); - goto err_destroy_arfs_tables; - } + err = mlx5e_create_inner_ttc_table(priv); + if (err) { + netdev_err(priv->netdev, + "Failed to create inner ttc table, err=%d\n", + err); + goto err_destroy_arfs_tables; } - mlx5e_set_ttc_ft_params(&ttc_params); - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params.indir_tirn[tt] = priv->rx_res->rss[tt].indir_tir.tirn; - - err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc); + err = mlx5e_create_ttc_table(priv); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); @@ -1834,10 +1331,9 @@ err_destory_vlan_table: err_destroy_l2_table: mlx5e_destroy_l2_table(priv); err_destroy_ttc_table: - mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); + mlx5e_destroy_ttc_table(priv); err_destroy_inner_ttc_table: - if (mlx5e_tunnel_inner_ft_supported(priv->mdev)) - mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); + mlx5e_destroy_inner_ttc_table(priv); err_destroy_arfs_tables: mlx5e_arfs_destroy_tables(priv); @@ -1849,9 +1345,8 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) mlx5e_ptp_free_rx_fs(priv); mlx5e_destroy_vlan_table(priv); mlx5e_destroy_l2_table(priv); - mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); - if (mlx5e_tunnel_inner_ft_supported(priv->mdev)) - mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); + mlx5e_destroy_ttc_table(priv); + mlx5e_destroy_inner_ttc_table(priv); mlx5e_arfs_destroy_tables(priv); mlx5e_ethtool_cleanup_steering(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 494f6f832407..3d8918f9399e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -433,9 +433,9 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; if (group == MLX5E_RQ_GROUP_XSK) - dst->tir_num = priv->rx_res->channels[ix].xsk_tir.tirn; + dst->tir_num = mlx5e_rx_res_get_tirn_xsk(priv->rx_res, ix); else - dst->tir_num = priv->rx_res->channels[ix].direct_tir.tirn; + dst->tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, ix); flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } @@ -786,43 +786,44 @@ void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) INIT_LIST_HEAD(&priv->fs.ethtool.rules); } -static enum mlx5e_traffic_types flow_type_to_traffic_type(u32 flow_type) +static int flow_type_to_traffic_type(u32 flow_type) { switch (flow_type) { case TCP_V4_FLOW: - return MLX5E_TT_IPV4_TCP; + return MLX5_TT_IPV4_TCP; case TCP_V6_FLOW: - return MLX5E_TT_IPV6_TCP; + return MLX5_TT_IPV6_TCP; case UDP_V4_FLOW: - return MLX5E_TT_IPV4_UDP; + return MLX5_TT_IPV4_UDP; case UDP_V6_FLOW: - return MLX5E_TT_IPV6_UDP; + return MLX5_TT_IPV6_UDP; case AH_V4_FLOW: - return MLX5E_TT_IPV4_IPSEC_AH; + return MLX5_TT_IPV4_IPSEC_AH; case AH_V6_FLOW: - return MLX5E_TT_IPV6_IPSEC_AH; + return MLX5_TT_IPV6_IPSEC_AH; case ESP_V4_FLOW: - return MLX5E_TT_IPV4_IPSEC_ESP; + return MLX5_TT_IPV4_IPSEC_ESP; case ESP_V6_FLOW: - return MLX5E_TT_IPV6_IPSEC_ESP; + return MLX5_TT_IPV6_IPSEC_ESP; case IPV4_FLOW: - return MLX5E_TT_IPV4; + return MLX5_TT_IPV4; case IPV6_FLOW: - return MLX5E_TT_IPV6; + return MLX5_TT_IPV6; default: - return MLX5E_NUM_INDIR_TIRS; + return -EINVAL; } } static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, struct ethtool_rxnfc *nfc) { - enum mlx5e_traffic_types tt; u8 rx_hash_field = 0; + int err; + int tt; tt = flow_type_to_traffic_type(nfc->flow_type); - if (tt == MLX5E_NUM_INDIR_TIRS) - return -EINVAL; + if (tt < 0) + return tt; /* RSS does not support anything other than hashing to queues * on src IP, dest IP, TCP/UDP src port and TCP/UDP dest @@ -848,29 +849,23 @@ static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT; mutex_lock(&priv->state_lock); - - if (rx_hash_field == priv->rx_res->rss_params.rx_hash_fields[tt]) - goto out; - - priv->rx_res->rss_params.rx_hash_fields[tt] = rx_hash_field; - mlx5e_modify_tirs_hash(priv); - -out: + err = mlx5e_rx_res_rss_set_hash_fields(priv->rx_res, tt, rx_hash_field); mutex_unlock(&priv->state_lock); - return 0; + + return err; } static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv, struct ethtool_rxnfc *nfc) { - enum mlx5e_traffic_types tt; u32 hash_field = 0; + int tt; tt = flow_type_to_traffic_type(nfc->flow_type); - if (tt == MLX5E_NUM_INDIR_TIRS) - return -EINVAL; + if (tt < 0) + return tt; - hash_field = priv->rx_res->rss_params.rx_hash_fields[tt]; + hash_field = mlx5e_rx_res_rss_get_hash_fields(priv->rx_res, tt); nfc->data = 0; if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3e644d3955a8..25a0b5f0984a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2194,202 +2194,14 @@ void mlx5e_close_channels(struct mlx5e_channels *chs) chs->num = 0; } -int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv) -{ - int err; - - err = mlx5e_rqt_init_direct(&priv->rx_res->indir_rqt, priv->mdev, true, - priv->drop_rq.rqn); - if (err) - mlx5_core_warn(priv->mdev, "create indirect rqts failed, %d\n", err); - return err; -} - -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) -{ - int err; - int ix; - - for (ix = 0; ix < priv->max_nch; ix++) { - err = mlx5e_rqt_init_direct(&priv->rx_res->channels[ix].direct_rqt, - priv->mdev, false, priv->drop_rq.rqn); - if (unlikely(err)) - goto err_destroy_rqts; - } - - return 0; - -err_destroy_rqts: - mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err); - while (--ix >= 0) - mlx5e_rqt_destroy(&priv->rx_res->channels[ix].direct_rqt); - - return err; -} - -static int mlx5e_create_xsk_rqts(struct mlx5e_priv *priv) -{ - int err; - int ix; - - for (ix = 0; ix < priv->max_nch; ix++) { - err = mlx5e_rqt_init_direct(&priv->rx_res->channels[ix].xsk_rqt, - priv->mdev, false, priv->drop_rq.rqn); - if (unlikely(err)) - goto err_destroy_rqts; - } - - return 0; - -err_destroy_rqts: - mlx5_core_warn(priv->mdev, "create xsk rqts failed, %d\n", err); - while (--ix >= 0) - mlx5e_rqt_destroy(&priv->rx_res->channels[ix].xsk_rqt); - - return err; -} - -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv) -{ - unsigned int ix; - - for (ix = 0; ix < priv->max_nch; ix++) - mlx5e_rqt_destroy(&priv->rx_res->channels[ix].direct_rqt); -} - -static void mlx5e_destroy_xsk_rqts(struct mlx5e_priv *priv) -{ - unsigned int ix; - - for (ix = 0; ix < priv->max_nch; ix++) - mlx5e_rqt_destroy(&priv->rx_res->channels[ix].xsk_rqt); -} - -static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv, - struct mlx5e_channels *chs) -{ - struct mlx5e_rx_res *res = priv->rx_res; - unsigned int ix; - u32 *rqns; - - rqns = kvmalloc_array(chs->num, sizeof(*rqns), GFP_KERNEL); - if (rqns) { - for (ix = 0; ix < chs->num; ix++) - rqns[ix] = chs->c[ix]->rq.rqn; - - mlx5e_rqt_redirect_indir(&res->indir_rqt, rqns, chs->num, - res->rss_params.hash.hfunc, - &res->rss_params.indir); - kvfree(rqns); - } - - for (ix = 0; ix < priv->max_nch; ix++) { - u32 rqn = priv->drop_rq.rqn; - - if (ix < chs->num) - rqn = chs->c[ix]->rq.rqn; - - mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn); - } - - if (priv->profile->rx_ptp_support) { - u32 rqn; - - if (mlx5e_ptp_get_rqn(priv->channels.ptp, &rqn)) - rqn = priv->drop_rq.rqn; - - mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn); - } -} - -static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv) -{ - struct mlx5e_rx_res *res = priv->rx_res; - unsigned int ix; - - mlx5e_rqt_redirect_direct(&res->indir_rqt, priv->drop_rq.rqn); - - for (ix = 0; ix < priv->max_nch; ix++) - mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, priv->drop_rq.rqn); - - if (priv->profile->rx_ptp_support) - mlx5e_rqt_redirect_direct(&res->ptp.rqt, priv->drop_rq.rqn); -} - -int mlx5e_modify_tirs_hash(struct mlx5e_priv *priv) -{ - struct mlx5e_rss_params_hash *rss_hash = &priv->rx_res->rss_params.hash; - struct mlx5e_rss_params_traffic_type rss_tt; - struct mlx5e_rx_res *res = priv->rx_res; - struct mlx5e_tir_builder *builder; - enum mlx5e_traffic_types tt; - - builder = mlx5e_tir_builder_alloc(true); - if (!builder) - return -ENOMEM; - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); - mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, false); - mlx5e_tir_modify(&res->rss[tt].indir_tir, builder); - mlx5e_tir_builder_clear(builder); - } - - /* Verify inner tirs resources allocated */ - if (!res->rss[0].inner_indir_tir.tirn) - goto out; - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); - mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, true); - mlx5e_tir_modify(&res->rss[tt].indir_tir, builder); - mlx5e_tir_builder_clear(builder); - } - -out: - mlx5e_tir_builder_free(builder); - return 0; -} - static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) { struct mlx5e_rx_res *res = priv->rx_res; - struct mlx5e_tir_builder *builder; struct mlx5e_lro_param lro_param; - enum mlx5e_traffic_types tt; - int err; - int ix; - - builder = mlx5e_tir_builder_alloc(true); - if (!builder) - return -ENOMEM; lro_param = mlx5e_get_lro_param(&priv->channels.params); - mlx5e_tir_builder_build_lro(builder, &lro_param); - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - err = mlx5e_tir_modify(&res->rss[tt].indir_tir, builder); - if (err) - goto err_free_builder; - /* Verify inner tirs resources allocated */ - if (!res->rss[0].inner_indir_tir.tirn) - continue; - - err = mlx5e_tir_modify(&res->rss[tt].inner_indir_tir, builder); - if (err) - goto err_free_builder; - } - - for (ix = 0; ix < priv->max_nch; ix++) { - err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder); - if (err) - goto err_free_builder; - } - -err_free_builder: - mlx5e_tir_builder_free(builder); - return err; + return mlx5e_rx_res_lro_set_param(res, &lro_param); } static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_lro); @@ -2572,8 +2384,7 @@ int mlx5e_num_channels_changed(struct mlx5e_priv *priv) /* This function may be called on attach, before priv->rx_res is created. */ if (!netif_is_rxfh_configured(priv->netdev) && priv->rx_res) - mlx5e_build_default_indir_rqt(priv->rx_res->rss_params.indir.table, - MLX5E_INDIR_RQT_SIZE, count); + mlx5e_rx_res_rss_set_indir_uniform(priv->rx_res, count); return 0; } @@ -2633,18 +2444,14 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_wait_channels_min_rx_wqes(&priv->channels); - if (priv->rx_res) { - mlx5e_redirect_rqts_to_channels(priv, &priv->channels); - mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels); - } + if (priv->rx_res) + mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels); } void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) { - if (priv->rx_res) { - mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels); - mlx5e_redirect_rqts_to_drop(priv); - } + if (priv->rx_res) + mlx5e_rx_res_channels_deactivate(priv->rx_res); if (mlx5e_is_vport_rep(priv)) mlx5e_remove_sqs_fwd_rules(priv); @@ -3019,194 +2826,6 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) mlx5e_destroy_tises(priv); } -int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc) -{ - struct mlx5e_rss_params_hash *rss_hash = &priv->rx_res->rss_params.hash; - bool inner_ft_support = priv->channels.params.tunneled_offload_en; - struct mlx5e_rss_params_traffic_type rss_tt; - struct mlx5e_rx_res *res = priv->rx_res; - enum mlx5e_traffic_types tt, max_tt; - struct mlx5e_tir_builder *builder; - struct mlx5e_lro_param lro_param; - u32 indir_rqtn; - int err = 0; - - builder = mlx5e_tir_builder_alloc(false); - if (!builder) - return -ENOMEM; - - lro_param = mlx5e_get_lro_param(&priv->channels.params); - indir_rqtn = mlx5e_rqt_get_rqtn(&res->indir_rqt); - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - mlx5e_tir_builder_build_rqt(builder, priv->mdev->mlx5e_res.hw_objs.td.tdn, - indir_rqtn, inner_ft_support); - mlx5e_tir_builder_build_lro(builder, &lro_param); - rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); - mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, false); - - err = mlx5e_tir_init(&res->rss[tt].indir_tir, builder, priv->mdev, true); - if (err) { - mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err); - goto err_destroy_tirs; - } - - mlx5e_tir_builder_clear(builder); - } - - if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev)) - goto out; - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - mlx5e_tir_builder_build_rqt(builder, priv->mdev->mlx5e_res.hw_objs.td.tdn, - indir_rqtn, inner_ft_support); - mlx5e_tir_builder_build_lro(builder, &lro_param); - rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); - mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, true); - - err = mlx5e_tir_init(&res->rss[tt].inner_indir_tir, builder, priv->mdev, true); - if (err) { - mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err); - goto err_destroy_inner_tirs; - } - - mlx5e_tir_builder_clear(builder); - } - - goto out; - -err_destroy_inner_tirs: - max_tt = tt; - for (tt = 0; tt < max_tt; tt++) - mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir); - - tt = MLX5E_NUM_INDIR_TIRS; -err_destroy_tirs: - max_tt = tt; - for (tt = 0; tt < max_tt; tt++) - mlx5e_tir_destroy(&res->rss[tt].indir_tir); - -out: - mlx5e_tir_builder_free(builder); - - return err; -} - -static int mlx5e_create_direct_tir(struct mlx5e_priv *priv, struct mlx5e_tir *tir, - struct mlx5e_tir_builder *builder, struct mlx5e_rqt *rqt) -{ - bool inner_ft_support = priv->channels.params.tunneled_offload_en; - struct mlx5e_lro_param lro_param; - int err = 0; - - lro_param = mlx5e_get_lro_param(&priv->channels.params); - - mlx5e_tir_builder_build_rqt(builder, priv->mdev->mlx5e_res.hw_objs.td.tdn, - mlx5e_rqt_get_rqtn(rqt), inner_ft_support); - mlx5e_tir_builder_build_lro(builder, &lro_param); - mlx5e_tir_builder_build_direct(builder); - - err = mlx5e_tir_init(tir, builder, priv->mdev, true); - if (unlikely(err)) - mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err); - - mlx5e_tir_builder_clear(builder); - - return err; -} - -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) -{ - struct mlx5e_rx_res *res = priv->rx_res; - struct mlx5e_tir_builder *builder; - int err = 0; - int ix; - - builder = mlx5e_tir_builder_alloc(false); - if (!builder) - return -ENOMEM; - - for (ix = 0; ix < priv->max_nch; ix++) { - err = mlx5e_create_direct_tir(priv, &res->channels[ix].direct_tir, - builder, &res->channels[ix].direct_rqt); - if (err) - goto err_destroy_tirs; - } - - goto out; - -err_destroy_tirs: - while (--ix >= 0) - mlx5e_tir_destroy(&res->channels[ix].direct_tir); - -out: - mlx5e_tir_builder_free(builder); - - return err; -} - -static int mlx5e_create_xsk_tirs(struct mlx5e_priv *priv) -{ - struct mlx5e_rx_res *res = priv->rx_res; - struct mlx5e_tir_builder *builder; - int err; - int ix; - - builder = mlx5e_tir_builder_alloc(false); - if (!builder) - return -ENOMEM; - - for (ix = 0; ix < priv->max_nch; ix++) { - err = mlx5e_create_direct_tir(priv, &res->channels[ix].xsk_tir, - builder, &res->channels[ix].xsk_rqt); - if (err) - goto err_destroy_tirs; - } - - goto out; - -err_destroy_tirs: - while (--ix >= 0) - mlx5e_tir_destroy(&res->channels[ix].xsk_tir); - -out: - mlx5e_tir_builder_free(builder); - - return err; -} - -void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) -{ - struct mlx5e_rx_res *res = priv->rx_res; - enum mlx5e_traffic_types tt; - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - mlx5e_tir_destroy(&res->rss[tt].indir_tir); - - /* Verify inner tirs resources allocated */ - if (!res->rss[0].inner_indir_tir.tirn) - return; - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir); -} - -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) -{ - unsigned int ix; - - for (ix = 0; ix < priv->max_nch; ix++) - mlx5e_tir_destroy(&priv->rx_res->channels[ix].direct_tir); -} - -static void mlx5e_destroy_xsk_tirs(struct mlx5e_priv *priv) -{ - unsigned int ix; - - for (ix = 0; ix < priv->max_nch; ix++) - mlx5e_tir_destroy(&priv->rx_res->channels[ix].xsk_tir); -} - static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable) { int err = 0; @@ -4459,15 +4078,6 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_get_devlink_port = mlx5e_get_devlink_port, }; -void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, - int num_channels) -{ - int i; - - for (i = 0; i < len; i++) - indirection_rqt[i] = i % num_channels; -} - static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) { int i; @@ -4480,21 +4090,6 @@ static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeo return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); } -void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, - u16 num_channels) -{ - enum mlx5e_traffic_types tt; - - rss_params->hash.hfunc = ETH_RSS_HASH_TOP; - netdev_rss_key_fill(rss_params->hash.toeplitz_hash_key, - sizeof(rss_params->hash.toeplitz_hash_key)); - mlx5e_build_default_indir_rqt(rss_params->indir.table, - MLX5E_INDIR_RQT_SIZE, num_channels); - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - rss_params->rx_hash_fields[tt] = - mlx5e_rss_get_default_tt_config(tt).rx_hash_fields; -} - void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu) { struct mlx5e_params *params = &priv->channels.params; @@ -4556,7 +4151,7 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 /* TX inline */ mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); - params->tunneled_offload_en = mlx5e_tunnel_inner_ft_supported(mdev); + params->tunneled_offload_en = mlx5_tunnel_inner_ft_supported(mdev); /* AF_XDP */ params->xsk = xsk; @@ -4616,8 +4211,8 @@ static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev) { int tt; - for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { - if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5e_get_proto_by_tunnel_type(tt))) + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5_get_proto_by_tunnel_type(tt))) return true; } return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)); @@ -4819,15 +4414,14 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_tir_builder *tir_builder; + enum mlx5e_rx_res_features features; + struct mlx5e_lro_param lro_param; int err; - priv->rx_res = kvzalloc(sizeof(*priv->rx_res), GFP_KERNEL); + priv->rx_res = mlx5e_rx_res_alloc(); if (!priv->rx_res) return -ENOMEM; - mlx5e_build_rss_params(&priv->rx_res->rss_params, priv->channels.params.num_channels); - mlx5e_create_q_counters(priv); err = mlx5e_open_drop_rq(priv, &priv->drop_rq); @@ -4836,50 +4430,20 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) goto err_destroy_q_counters; } - err = mlx5e_create_indirect_rqt(priv); + features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP; + if (priv->channels.params.tunneled_offload_en) + features |= MLX5E_RX_RES_FEATURE_INNER_FT; + lro_param = mlx5e_get_lro_param(&priv->channels.params); + err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features, + priv->max_nch, priv->drop_rq.rqn, &lro_param, + priv->channels.params.num_channels); if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv); - if (err) - goto err_destroy_indirect_rqts; - - err = mlx5e_create_indirect_tirs(priv, true); - if (err) - goto err_destroy_direct_rqts; - - err = mlx5e_create_direct_tirs(priv); - if (err) - goto err_destroy_indirect_tirs; - - err = mlx5e_create_xsk_rqts(priv); - if (unlikely(err)) - goto err_destroy_direct_tirs; - - err = mlx5e_create_xsk_tirs(priv); - if (unlikely(err)) - goto err_destroy_xsk_rqts; - - err = mlx5e_rqt_init_direct(&priv->rx_res->ptp.rqt, priv->mdev, false, - priv->drop_rq.rqn); - if (err) - goto err_destroy_xsk_tirs; - - tir_builder = mlx5e_tir_builder_alloc(false); - if (!tir_builder) { - err = -ENOMEM; - goto err_destroy_ptp_rqt; - } - err = mlx5e_create_direct_tir(priv, &priv->rx_res->ptp.tir, tir_builder, - &priv->rx_res->ptp.rqt); - mlx5e_tir_builder_free(tir_builder); - if (err) - goto err_destroy_ptp_rqt; - err = mlx5e_create_flow_steering(priv); if (err) { mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); - goto err_destroy_ptp_direct_tir; + goto err_destroy_rx_res; } err = mlx5e_tc_nic_init(priv); @@ -4900,27 +4464,13 @@ err_tc_nic_cleanup: mlx5e_tc_nic_cleanup(priv); err_destroy_flow_steering: mlx5e_destroy_flow_steering(priv); -err_destroy_ptp_direct_tir: - mlx5e_tir_destroy(&priv->rx_res->ptp.tir); -err_destroy_ptp_rqt: - mlx5e_rqt_destroy(&priv->rx_res->ptp.rqt); -err_destroy_xsk_tirs: - mlx5e_destroy_xsk_tirs(priv); -err_destroy_xsk_rqts: - mlx5e_destroy_xsk_rqts(priv); -err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv); -err_destroy_indirect_tirs: - mlx5e_destroy_indirect_tirs(priv); -err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv); -err_destroy_indirect_rqts: - mlx5e_rqt_destroy(&priv->rx_res->indir_rqt); +err_destroy_rx_res: + mlx5e_rx_res_destroy(priv->rx_res); err_close_drop_rq: mlx5e_close_drop_rq(&priv->drop_rq); err_destroy_q_counters: mlx5e_destroy_q_counters(priv); - kvfree(priv->rx_res); + mlx5e_rx_res_free(priv->rx_res); priv->rx_res = NULL; return err; } @@ -4930,17 +4480,10 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) mlx5e_accel_cleanup_rx(priv); mlx5e_tc_nic_cleanup(priv); mlx5e_destroy_flow_steering(priv); - mlx5e_tir_destroy(&priv->rx_res->ptp.tir); - mlx5e_rqt_destroy(&priv->rx_res->ptp.rqt); - mlx5e_destroy_xsk_tirs(priv); - mlx5e_destroy_xsk_rqts(priv); - mlx5e_destroy_direct_tirs(priv); - mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv); - mlx5e_rqt_destroy(&priv->rx_res->indir_rqt); + mlx5e_rx_res_destroy(priv->rx_res); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); - kvfree(priv->rx_res); + mlx5e_rx_res_free(priv->rx_res); priv->rx_res = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 2c54951c240d..1e520640f7e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -647,27 +647,24 @@ static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) { struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; - struct mlx5e_rx_res *res = priv->rx_res; struct ttc_params ttc_params = {}; - int tt, err; + int err; priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); /* The inner_ttc in the ttc params is intentionally not set */ - ttc_params.any_tt_tirn = res->channels[0].direct_tir.tirn; - mlx5e_set_ttc_ft_params(&ttc_params); + mlx5e_set_ttc_params(priv, &ttc_params, false); if (rep->vport != MLX5_VPORT_UPLINK) /* To give uplik rep TTC a lower level for chaining from root ft */ ttc_params.ft_attr.level = MLX5E_TTC_FT_LEVEL + 1; - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params.indir_tirn[tt] = res->rss[tt].indir_tir.tirn; - - err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc); - if (err) { - netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n", err); + priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); + if (IS_ERR(priv->fs.ttc)) { + err = PTR_ERR(priv->fs.ttc); + netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n", + err); return err; } return 0; @@ -685,7 +682,7 @@ static int mlx5e_create_rep_root_ft(struct mlx5e_priv *priv) /* non uplik reps will skip any bypass tables and go directly to * their own ttc */ - rpriv->root_ft = priv->fs.ttc.ft.t; + rpriv->root_ft = mlx5_get_ttc_flow_table(priv->fs.ttc); return 0; } @@ -758,14 +755,13 @@ int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup) static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_lro_param lro_param; int err; - priv->rx_res = kvzalloc(sizeof(*priv->rx_res), GFP_KERNEL); + priv->rx_res = mlx5e_rx_res_alloc(); if (!priv->rx_res) return -ENOMEM; - mlx5e_build_rss_params(&priv->rx_res->rss_params, priv->channels.params.num_channels); - mlx5e_init_l2_addr(priv); err = mlx5e_open_drop_rq(priv, &priv->drop_rq); @@ -774,25 +770,16 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) return err; } - err = mlx5e_create_indirect_rqt(priv); + lro_param = mlx5e_get_lro_param(&priv->channels.params); + err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0, + priv->max_nch, priv->drop_rq.rqn, &lro_param, + priv->channels.params.num_channels); if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv); - if (err) - goto err_destroy_indirect_rqts; - - err = mlx5e_create_indirect_tirs(priv, false); - if (err) - goto err_destroy_direct_rqts; - - err = mlx5e_create_direct_tirs(priv); - if (err) - goto err_destroy_indirect_tirs; - err = mlx5e_create_rep_ttc_table(priv); if (err) - goto err_destroy_direct_tirs; + goto err_destroy_rx_res; err = mlx5e_create_rep_root_ft(priv); if (err) @@ -809,18 +796,12 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) err_destroy_root_ft: mlx5e_destroy_rep_root_ft(priv); err_destroy_ttc_table: - mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); -err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv); -err_destroy_indirect_tirs: - mlx5e_destroy_indirect_tirs(priv); -err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv); -err_destroy_indirect_rqts: - mlx5e_rqt_destroy(&priv->rx_res->indir_rqt); + mlx5_destroy_ttc_table(priv->fs.ttc); +err_destroy_rx_res: + mlx5e_rx_res_destroy(priv->rx_res); err_close_drop_rq: mlx5e_close_drop_rq(&priv->drop_rq); - kvfree(priv->rx_res); + mlx5e_rx_res_free(priv->rx_res); priv->rx_res = NULL; return err; } @@ -830,13 +811,10 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) mlx5e_ethtool_cleanup_steering(priv); rep_vport_rx_rule_destroy(priv); mlx5e_destroy_rep_root_ft(priv); - mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); - mlx5e_destroy_direct_tirs(priv); - mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv); - mlx5e_rqt_destroy(&priv->rx_res->indir_rqt); + mlx5_destroy_ttc_table(priv->fs.ttc); + mlx5e_rx_res_destroy(priv->rx_res); mlx5e_close_drop_rq(&priv->drop_rq); - kvfree(priv->rx_res); + mlx5e_rx_res_free(priv->rx_res); priv->rx_res = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 2ef02fea119a..349a93e0213d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -34,19 +34,13 @@ #include <net/flow_offload.h> #include <net/sch_generic.h> #include <net/pkt_cls.h> -#include <net/tc_act/tc_gact.h> -#include <net/tc_act/tc_skbedit.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/device.h> #include <linux/rhashtable.h> #include <linux/refcount.h> #include <linux/completion.h> -#include <net/tc_act/tc_mirred.h> -#include <net/tc_act/tc_vlan.h> -#include <net/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_pedit.h> #include <net/tc_act/tc_csum.h> -#include <net/tc_act/tc_mpls.h> #include <net/psample.h> #include <net/arp.h> #include <net/ipv6_stubs.h> @@ -345,7 +339,7 @@ struct mlx5e_hairpin { int num_channels; struct mlx5e_rqt indir_rqt; struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; - struct mlx5e_ttc_table ttc; + struct mlx5_ttc_table *ttc; }; struct mlx5e_hairpin_entry { @@ -525,9 +519,10 @@ static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) if (!indir) return -ENOMEM; - mlx5e_build_default_indir_rqt(indir->table, MLX5E_INDIR_RQT_SIZE, hp->num_channels); + mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels); err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels, - priv->rx_res->rss_params.hash.hfunc, indir); + mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc, + indir); kvfree(indir); return err; @@ -536,8 +531,8 @@ static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) { struct mlx5e_priv *priv = hp->func_priv; - struct mlx5e_rss_params_hash *rss_hash; - enum mlx5e_traffic_types tt, max_tt; + struct mlx5e_rss_params_hash rss_hash; + enum mlx5_traffic_types tt, max_tt; struct mlx5e_tir_builder *builder; int err = 0; @@ -545,7 +540,7 @@ static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) if (!builder) return -ENOMEM; - rss_hash = &priv->rx_res->rss_params.hash; + rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res); for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { struct mlx5e_rss_params_traffic_type rss_tt; @@ -555,7 +550,7 @@ static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) mlx5e_tir_builder_build_rqt(builder, hp->tdn, mlx5e_rqt_get_rqtn(&hp->indir_rqt), false); - mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, false); + mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false); err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false); if (err) { @@ -594,12 +589,16 @@ static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, memset(ttc_params, 0, sizeof(*ttc_params)); - ttc_params->any_tt_tirn = mlx5e_tir_get_tirn(&hp->direct_tir); - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params->indir_tirn[tt] = mlx5e_tir_get_tirn(&hp->indir_tir[tt]); + ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + ttc_params->dests[tt].tir_num = + tt == MLX5_TT_ANY ? + mlx5e_tir_get_tirn(&hp->direct_tir) : + mlx5e_tir_get_tirn(&hp->indir_tir[tt]); + } - ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE; ft_attr->level = MLX5E_TC_TTC_FT_LEVEL; ft_attr->prio = MLX5E_TC_PRIO; } @@ -619,12 +618,15 @@ static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp) goto err_create_indirect_tirs; mlx5e_hairpin_set_ttc_params(hp, &ttc_params); - err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc); - if (err) + hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); + if (IS_ERR(hp->ttc)) { + err = PTR_ERR(hp->ttc); goto err_create_ttc_table; + } netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n", - hp->num_channels, hp->ttc.ft.t->id); + hp->num_channels, + mlx5_get_ttc_flow_table(priv->fs.ttc)->id); return 0; @@ -638,9 +640,7 @@ err_create_indirect_tirs: static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp) { - struct mlx5e_priv *priv = hp->func_priv; - - mlx5e_destroy_ttc_table(priv, &hp->ttc); + mlx5_destroy_ttc_table(hp->ttc); mlx5e_hairpin_destroy_indirect_tirs(hp); mlx5e_rqt_destroy(&hp->indir_rqt); } @@ -884,7 +884,8 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, attach_flow: if (hpe->hp->num_channels > 1) { flow_flag_set(flow, HAIRPIN_RSS); - flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t; + flow->attr->nic_attr->hairpin_ft = + mlx5_get_ttc_flow_table(hpe->hp->ttc); } else { flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir); } @@ -1030,15 +1031,17 @@ err_ft_get: static int mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, - struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; struct mlx5_core_dev *dev = priv->mdev; - struct mlx5_fc *counter = NULL; + struct mlx5_fc *counter; int err; + parse_attr = attr->parse_attr; + if (flow_flag_test(flow, HAIRPIN)) { err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); if (err) @@ -1358,9 +1361,9 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, bool vf_tun = false, encap_valid = true; struct net_device *encap_dev = NULL; struct mlx5_esw_flow_attr *esw_attr; - struct mlx5_fc *counter = NULL; struct mlx5e_rep_priv *rpriv; struct mlx5e_priv *out_priv; + struct mlx5_fc *counter; u32 max_prio, max_chain; int err = 0; int out_index; @@ -3326,10 +3329,10 @@ static int validate_goto_chain(struct mlx5e_priv *priv, static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, - struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; struct pedit_headers_action hdrs[2] = {}; const struct flow_action_entry *act; @@ -3345,8 +3348,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; nic_attr = attr->nic_attr; - nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + parse_attr = attr->parse_attr; flow_action_for_each(i, act, flow_action) { switch (act->id) { @@ -3355,10 +3358,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, MLX5_FLOW_CONTEXT_ACTION_COUNT; break; case FLOW_ACTION_DROP: - action |= MLX5_FLOW_CONTEXT_ACTION_DROP; - if (MLX5_CAP_FLOWTABLE(priv->mdev, - flow_table_properties_nic_receive.flow_counter)) - action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + action |= MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; break; case FLOW_ACTION_MANGLE: case FLOW_ACTION_ADD: @@ -3399,7 +3400,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, "device is not on same HW, can't offload"); netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n", peer_dev->name); - return -EINVAL; + return -EOPNOTSUPP; } } break; @@ -3409,7 +3410,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, if (mark & ~MLX5E_TC_FLOW_ID_MASK) { NL_SET_ERR_MSG_MOD(extack, "Bad flow mark - only 16 bit is supported"); - return -EINVAL; + return -EOPNOTSUPP; } nic_attr->flow_tag = mark; @@ -3706,8 +3707,7 @@ static int verify_uplink_forwarding(struct mlx5e_priv *priv, static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack, - struct net_device *filter_dev) + struct netlink_ext_ack *extack) { struct pedit_headers_action hdrs[2] = {}; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -3772,7 +3772,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, "mpls pop supported only as first action"); return -EOPNOTSUPP; } - if (!netif_is_bareudp(filter_dev)) { + if (!netif_is_bareudp(parse_attr->filter_dev)) { NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only on bareudp devices"); return -EOPNOTSUPP; @@ -3921,7 +3921,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, "devices %s %s not on same switch HW, can't offload forwarding\n", priv->netdev->name, out_dev->name); - return -EINVAL; + return -EOPNOTSUPP; } } break; @@ -4274,7 +4274,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; - err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev); + err = parse_tc_fdb_actions(priv, &rule->action, flow, extack); if (err) goto err_free; @@ -4420,11 +4420,11 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, if (err) goto err_free; - err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack); + err = parse_tc_nic_actions(priv, &rule->action, flow, extack); if (err) goto err_free; - err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack); + err = mlx5e_tc_add_nic_flow(priv, flow, extack); if (err) goto err_free; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 97e6cb6f13c1..2b90388ef209 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1889,8 +1889,7 @@ is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num) mlx5_esw_is_sf_vport(esw, vport_num); } -int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink, - struct devlink_port *port, +int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, u8 *hw_addr, int *hw_addr_len, struct netlink_ext_ack *extack) { @@ -1899,7 +1898,7 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink, int err = -EOPNOTSUPP; u16 vport_num; - esw = mlx5_devlink_eswitch_get(devlink); + esw = mlx5_devlink_eswitch_get(port->devlink); if (IS_ERR(esw)) return PTR_ERR(esw); @@ -1923,8 +1922,7 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink, return err; } -int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink, - struct devlink_port *port, +int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack) { @@ -1933,7 +1931,7 @@ int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink, int err = -EOPNOTSUPP; u16 vport_num; - esw = mlx5_devlink_eswitch_get(devlink); + esw = mlx5_devlink_eswitch_get(port->devlink); if (IS_ERR(esw)) { NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr"); return PTR_ERR(esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index d562edf5b0bc..41eff9dd1bf6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -475,12 +475,10 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, struct netlink_ext_ack *extack); int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, enum devlink_eswitch_encap_mode *encap); -int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink, - struct devlink_port *port, +int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, u8 *hw_addr, int *hw_addr_len, struct netlink_ext_ack *extack); -int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink, - struct devlink_port *port, +int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 011e766e4f67..feecf44994a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3083,8 +3083,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: - if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) + if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) { + err = 0; goto out; + } + fallthrough; case MLX5_CAP_INLINE_MODE_L2: NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index a126cbc6f0d6..67571e5040d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -314,8 +314,7 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv) static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) { - struct ttc_params ttc_params = {}; - int tt, err; + int err; priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); @@ -330,12 +329,7 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } - mlx5e_set_ttc_basic_params(priv, &ttc_params); - mlx5e_set_ttc_ft_params(&ttc_params); - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params.indir_tirn[tt] = priv->rx_res->rss[tt].indir_tir.tirn; - - err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc); + err = mlx5e_create_ttc_table(priv); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); @@ -352,21 +346,20 @@ err_destroy_arfs_tables: static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) { - mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); + mlx5e_destroy_ttc_table(priv); mlx5e_arfs_destroy_tables(priv); } static int mlx5i_init_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_lro_param lro_param; int err; - priv->rx_res = kvzalloc(sizeof(*priv->rx_res), GFP_KERNEL); + priv->rx_res = mlx5e_rx_res_alloc(); if (!priv->rx_res) return -ENOMEM; - mlx5e_build_rss_params(&priv->rx_res->rss_params, priv->channels.params.num_channels); - mlx5e_create_q_counters(priv); err = mlx5e_open_drop_rq(priv, &priv->drop_rq); @@ -375,41 +368,26 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) goto err_destroy_q_counters; } - err = mlx5e_create_indirect_rqt(priv); + lro_param = mlx5e_get_lro_param(&priv->channels.params); + err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0, + priv->max_nch, priv->drop_rq.rqn, &lro_param, + priv->channels.params.num_channels); if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv); - if (err) - goto err_destroy_indirect_rqts; - - err = mlx5e_create_indirect_tirs(priv, false); - if (err) - goto err_destroy_direct_rqts; - - err = mlx5e_create_direct_tirs(priv); - if (err) - goto err_destroy_indirect_tirs; - err = mlx5i_create_flow_steering(priv); if (err) - goto err_destroy_direct_tirs; + goto err_destroy_rx_res; return 0; -err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv); -err_destroy_indirect_tirs: - mlx5e_destroy_indirect_tirs(priv); -err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv); -err_destroy_indirect_rqts: - mlx5e_rqt_destroy(&priv->rx_res->indir_rqt); +err_destroy_rx_res: + mlx5e_rx_res_destroy(priv->rx_res); err_close_drop_rq: mlx5e_close_drop_rq(&priv->drop_rq); err_destroy_q_counters: mlx5e_destroy_q_counters(priv); - kvfree(priv->rx_res); + mlx5e_rx_res_free(priv->rx_res); priv->rx_res = NULL; return err; } @@ -417,13 +395,10 @@ err_destroy_q_counters: static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) { mlx5i_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv); - mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv); - mlx5e_rqt_destroy(&priv->rx_res->indir_rqt); + mlx5e_rx_res_destroy(priv->rx_res); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); - kvfree(priv->rx_res); + mlx5e_rx_res_free(priv->rx_res); priv->rx_res = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c new file mode 100644 index 000000000000..749d17c0057d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -0,0 +1,602 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. + +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/mlx5/fs.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#include "lib/fs_ttc.h" + +#define MLX5_TTC_NUM_GROUPS 3 +#define MLX5_TTC_GROUP1_SIZE (BIT(3) + MLX5_NUM_TUNNEL_TT) +#define MLX5_TTC_GROUP2_SIZE BIT(1) +#define MLX5_TTC_GROUP3_SIZE BIT(0) +#define MLX5_TTC_TABLE_SIZE (MLX5_TTC_GROUP1_SIZE +\ + MLX5_TTC_GROUP2_SIZE +\ + MLX5_TTC_GROUP3_SIZE) + +#define MLX5_INNER_TTC_NUM_GROUPS 3 +#define MLX5_INNER_TTC_GROUP1_SIZE BIT(3) +#define MLX5_INNER_TTC_GROUP2_SIZE BIT(1) +#define MLX5_INNER_TTC_GROUP3_SIZE BIT(0) +#define MLX5_INNER_TTC_TABLE_SIZE (MLX5_INNER_TTC_GROUP1_SIZE +\ + MLX5_INNER_TTC_GROUP2_SIZE +\ + MLX5_INNER_TTC_GROUP3_SIZE) + +/* L3/L4 traffic type classifier */ +struct mlx5_ttc_table { + int num_groups; + struct mlx5_flow_table *t; + struct mlx5_flow_group **g; + struct mlx5_ttc_rule rules[MLX5_NUM_TT]; + struct mlx5_flow_handle *tunnel_rules[MLX5_NUM_TUNNEL_TT]; +}; + +struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc) +{ + return ttc->t; +} + +static void mlx5_cleanup_ttc_rules(struct mlx5_ttc_table *ttc) +{ + int i; + + for (i = 0; i < MLX5_NUM_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) { + mlx5_del_flow_rules(ttc->rules[i].rule); + ttc->rules[i].rule = NULL; + } + } + + for (i = 0; i < MLX5_NUM_TUNNEL_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) { + mlx5_del_flow_rules(ttc->tunnel_rules[i]); + ttc->tunnel_rules[i] = NULL; + } + } +} + +struct mlx5_etype_proto { + u16 etype; + u8 proto; +}; + +static struct mlx5_etype_proto ttc_rules[] = { + [MLX5_TT_IPV4_TCP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_TCP, + }, + [MLX5_TT_IPV6_TCP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_TCP, + }, + [MLX5_TT_IPV4_UDP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_UDP, + }, + [MLX5_TT_IPV6_UDP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_UDP, + }, + [MLX5_TT_IPV4_IPSEC_AH] = { + .etype = ETH_P_IP, + .proto = IPPROTO_AH, + }, + [MLX5_TT_IPV6_IPSEC_AH] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_AH, + }, + [MLX5_TT_IPV4_IPSEC_ESP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_ESP, + }, + [MLX5_TT_IPV6_IPSEC_ESP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_ESP, + }, + [MLX5_TT_IPV4] = { + .etype = ETH_P_IP, + .proto = 0, + }, + [MLX5_TT_IPV6] = { + .etype = ETH_P_IPV6, + .proto = 0, + }, + [MLX5_TT_ANY] = { + .etype = 0, + .proto = 0, + }, +}; + +static struct mlx5_etype_proto ttc_tunnel_rules[] = { + [MLX5_TT_IPV4_GRE] = { + .etype = ETH_P_IP, + .proto = IPPROTO_GRE, + }, + [MLX5_TT_IPV6_GRE] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_GRE, + }, + [MLX5_TT_IPV4_IPIP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_IPIP, + }, + [MLX5_TT_IPV6_IPIP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_IPIP, + }, + [MLX5_TT_IPV4_IPV6] = { + .etype = ETH_P_IP, + .proto = IPPROTO_IPV6, + }, + [MLX5_TT_IPV6_IPV6] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_IPV6, + }, + +}; + +u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt) +{ + return ttc_tunnel_rules[tt].proto; +} + +static bool mlx5_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev, + u8 proto_type) +{ + switch (proto_type) { + case IPPROTO_GRE: + return MLX5_CAP_ETH(mdev, tunnel_stateless_gre); + case IPPROTO_IPIP: + case IPPROTO_IPV6: + return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) || + MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx)); + default: + return false; + } +} + +static bool mlx5_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev) +{ + int tt; + + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + if (mlx5_tunnel_proto_supported_rx(mdev, + ttc_tunnel_rules[tt].proto)) + return true; + } + return false; +} + +bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) +{ + return (mlx5_tunnel_any_rx_proto_supported(mdev) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_ip_version)); +} + +static u8 mlx5_etype_to_ipv(u16 ethertype) +{ + if (ethertype == ETH_P_IP) + return 4; + + if (ethertype == ETH_P_IPV6) + return 6; + + return 0; +} + +static struct mlx5_flow_handle * +mlx5_generate_ttc_rule(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, u16 etype, u8 proto) +{ + int match_ipv_outer = + MLX5_CAP_FLOWTABLE_NIC_RX(dev, + ft_field_support.outer_ip_version); + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + u8 ipv; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + if (proto) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto); + } + + ipv = mlx5_etype_to_ipv(etype); + if (match_ipv_outer && ipv) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv); + } else if (etype) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); + } + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(dev, "%s: add rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev, + struct ttc_params *params, + struct mlx5_ttc_table *ttc) +{ + struct mlx5_flow_handle **trules; + struct mlx5_ttc_rule *rules; + struct mlx5_flow_table *ft; + int tt; + int err; + + ft = ttc->t; + rules = ttc->rules; + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + struct mlx5_ttc_rule *rule = &rules[tt]; + + rule->rule = mlx5_generate_ttc_rule(dev, ft, ¶ms->dests[tt], + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rule->rule)) { + err = PTR_ERR(rule->rule); + rule->rule = NULL; + goto del_rules; + } + rule->default_dest = params->dests[tt]; + } + + if (!params->inner_ttc || !mlx5_tunnel_inner_ft_supported(dev)) + return 0; + + trules = ttc->tunnel_rules; + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + if (!mlx5_tunnel_proto_supported_rx(dev, + ttc_tunnel_rules[tt].proto)) + continue; + trules[tt] = mlx5_generate_ttc_rule(dev, ft, + ¶ms->tunnel_dests[tt], + ttc_tunnel_rules[tt].etype, + ttc_tunnel_rules[tt].proto); + if (IS_ERR(trules[tt])) { + err = PTR_ERR(trules[tt]); + trules[tt] = NULL; + goto del_rules; + } + } + + return 0; + +del_rules: + mlx5_cleanup_ttc_rules(ttc); + return err; +} + +static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc, + bool use_ipv) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ttc->g = kcalloc(MLX5_TTC_NUM_GROUPS, sizeof(*ttc->g), GFP_KERNEL); + if (!ttc->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ttc->g); + ttc->g = NULL; + return -ENOMEM; + } + + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + if (use_ipv) + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version); + else + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_TTC_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* L3 Group */ + MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_TTC_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* Any Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_TTC_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ttc->g[ttc->num_groups]); + ttc->g[ttc->num_groups] = NULL; + kvfree(in); + + return err; +} + +static struct mlx5_flow_handle * +mlx5_generate_inner_ttc_rule(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, + u16 etype, u8 proto) +{ + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + u8 ipv; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + ipv = mlx5_etype_to_ipv(etype); + if (etype && ipv) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv); + } + + if (proto) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto); + } + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(dev, "%s: add inner TTC rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev, + struct ttc_params *params, + struct mlx5_ttc_table *ttc) +{ + struct mlx5_ttc_rule *rules; + struct mlx5_flow_table *ft; + int err; + int tt; + + ft = ttc->t; + rules = ttc->rules; + + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + struct mlx5_ttc_rule *rule = &rules[tt]; + + rule->rule = mlx5_generate_inner_ttc_rule(dev, ft, + ¶ms->dests[tt], + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rule->rule)) { + err = PTR_ERR(rule->rule); + rule->rule = NULL; + goto del_rules; + } + rule->default_dest = params->dests[tt]; + } + + return 0; + +del_rules: + + mlx5_cleanup_ttc_rules(ttc); + return err; +} + +static int mlx5_create_inner_ttc_table_groups(struct mlx5_ttc_table *ttc) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ttc->g = kcalloc(MLX5_INNER_TTC_NUM_GROUPS, sizeof(*ttc->g), + GFP_KERNEL); + if (!ttc->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ttc->g); + ttc->g = NULL; + return -ENOMEM; + } + + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_INNER_TTC_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* L3 Group */ + MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_INNER_TTC_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* Any Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_INNER_TTC_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ttc->g[ttc->num_groups]); + ttc->g[ttc->num_groups] = NULL; + kvfree(in); + + return err; +} + +struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params) +{ + struct mlx5_ttc_table *ttc; + int err; + + ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); + if (!ttc) + return ERR_PTR(-ENOMEM); + + WARN_ON_ONCE(params->ft_attr.max_fte); + params->ft_attr.max_fte = MLX5_INNER_TTC_TABLE_SIZE; + ttc->t = mlx5_create_flow_table(params->ns, ¶ms->ft_attr); + if (IS_ERR(ttc->t)) { + err = PTR_ERR(ttc->t); + kvfree(ttc); + return ERR_PTR(err); + } + + err = mlx5_create_inner_ttc_table_groups(ttc); + if (err) + goto destroy_ft; + + err = mlx5_generate_inner_ttc_table_rules(dev, params, ttc); + if (err) + goto destroy_ft; + + return ttc; + +destroy_ft: + mlx5_destroy_ttc_table(ttc); + return ERR_PTR(err); +} + +void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc) +{ + int i; + + mlx5_cleanup_ttc_rules(ttc); + for (i = ttc->num_groups - 1; i >= 0; i--) { + if (!IS_ERR_OR_NULL(ttc->g[i])) + mlx5_destroy_flow_group(ttc->g[i]); + ttc->g[i] = NULL; + } + + kfree(ttc->g); + mlx5_destroy_flow_table(ttc->t); + kvfree(ttc); +} + +struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params) +{ + bool match_ipv_outer = + MLX5_CAP_FLOWTABLE_NIC_RX(dev, + ft_field_support.outer_ip_version); + struct mlx5_ttc_table *ttc; + int err; + + ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); + if (!ttc) + return ERR_PTR(-ENOMEM); + + WARN_ON_ONCE(params->ft_attr.max_fte); + params->ft_attr.max_fte = MLX5_TTC_TABLE_SIZE; + ttc->t = mlx5_create_flow_table(params->ns, ¶ms->ft_attr); + if (IS_ERR(ttc->t)) { + err = PTR_ERR(ttc->t); + kvfree(ttc); + return ERR_PTR(err); + } + + err = mlx5_create_ttc_table_groups(ttc, match_ipv_outer); + if (err) + goto destroy_ft; + + err = mlx5_generate_ttc_table_rules(dev, params, ttc); + if (err) + goto destroy_ft; + + return ttc; + +destroy_ft: + mlx5_destroy_ttc_table(ttc); + return ERR_PTR(err); +} + +int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type, + struct mlx5_flow_destination *new_dest) +{ + return mlx5_modify_rule_destination(ttc->rules[type].rule, new_dest, + NULL); +} + +struct mlx5_flow_destination +mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type) +{ + struct mlx5_flow_destination *dest = &ttc->rules[type].default_dest; + + WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR, + "TTC[%d] default dest is not setup yet", type); + + return *dest; +} + +int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type) +{ + struct mlx5_flow_destination dest = mlx5_ttc_get_default_dest(ttc, type); + + return mlx5_ttc_fwd_dest(ttc, type, &dest); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h new file mode 100644 index 000000000000..ce95be8f8382 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __ML5_FS_TTC_H__ +#define __ML5_FS_TTC_H__ + +#include <linux/mlx5/fs.h> + +enum mlx5_traffic_types { + MLX5_TT_IPV4_TCP, + MLX5_TT_IPV6_TCP, + MLX5_TT_IPV4_UDP, + MLX5_TT_IPV6_UDP, + MLX5_TT_IPV4_IPSEC_AH, + MLX5_TT_IPV6_IPSEC_AH, + MLX5_TT_IPV4_IPSEC_ESP, + MLX5_TT_IPV6_IPSEC_ESP, + MLX5_TT_IPV4, + MLX5_TT_IPV6, + MLX5_TT_ANY, + MLX5_NUM_TT, + MLX5_NUM_INDIR_TIRS = MLX5_TT_ANY, +}; + +enum mlx5_tunnel_types { + MLX5_TT_IPV4_GRE, + MLX5_TT_IPV6_GRE, + MLX5_TT_IPV4_IPIP, + MLX5_TT_IPV6_IPIP, + MLX5_TT_IPV4_IPV6, + MLX5_TT_IPV6_IPV6, + MLX5_NUM_TUNNEL_TT, +}; + +struct mlx5_ttc_rule { + struct mlx5_flow_handle *rule; + struct mlx5_flow_destination default_dest; +}; + +struct mlx5_ttc_table; + +struct ttc_params { + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table_attr ft_attr; + struct mlx5_flow_destination dests[MLX5_NUM_TT]; + bool inner_ttc; + struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT]; +}; + +struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc); + +struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params); +void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc); + +struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params); + +int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type, + struct mlx5_flow_destination *new_dest); +struct mlx5_flow_destination +mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type); +int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type); + +bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev); +u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt); + +#endif /* __MLX5_FS_TTC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index eb1b316560a8..a8efd9f1af4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1271,7 +1271,7 @@ int mlx5_init_one(struct mlx5_core_dev *dev) set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); - err = mlx5_devlink_register(priv_to_devlink(dev), dev->device); + err = mlx5_devlink_register(priv_to_devlink(dev)); if (err) goto err_devlink_reg; @@ -1452,7 +1452,7 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) struct devlink *devlink; int err; - devlink = mlx5_devlink_alloc(); + devlink = mlx5_devlink_alloc(&pdev->dev); if (!devlink) { dev_err(&pdev->dev, "devlink alloc failed\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index 42c8ee03fe3e..052f48068dc1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -14,7 +14,7 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia struct devlink *devlink; int err; - devlink = mlx5_devlink_alloc(); + devlink = mlx5_devlink_alloc(&adev->dev); if (!devlink) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index 1be048769309..720195c4be7c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -164,12 +164,12 @@ static bool mlx5_sf_is_active(const struct mlx5_sf *sf) return sf->hw_state == MLX5_VHCA_STATE_ACTIVE || sf->hw_state == MLX5_VHCA_STATE_IN_USE; } -int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port, +int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port, enum devlink_port_fn_state *state, enum devlink_port_fn_opstate *opstate, struct netlink_ext_ack *extack) { - struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink); struct mlx5_sf_table *table; struct mlx5_sf *sf; int err = 0; @@ -248,11 +248,11 @@ out: return err; } -int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port, +int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port, enum devlink_port_fn_state state, struct netlink_ext_ack *extack) { - struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink); struct mlx5_sf_table *table; struct mlx5_sf *sf; int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h index 81ce13b19ee8..3a480e06ecc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h @@ -24,11 +24,11 @@ int mlx5_devlink_sf_port_new(struct devlink *devlink, unsigned int *new_port_index); int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, struct netlink_ext_ack *extack); -int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port, +int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port, enum devlink_port_fn_state *state, enum devlink_port_fn_opstate *opstate, struct netlink_ext_ack *extack); -int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port, +int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port, enum devlink_port_fn_state state, struct netlink_ext_ack *extack); #else diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index e775f08fb464..f080fab3de2b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1927,7 +1927,8 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (!reload) { alloc_size = sizeof(*mlxsw_core) + mlxsw_driver->priv_size; - devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size); + devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size, + mlxsw_bus_info->dev); if (!devlink) { err = -ENOMEM; goto err_devlink_alloc; @@ -1974,7 +1975,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_emad_init; if (!reload) { - err = devlink_register(devlink, mlxsw_bus_info->dev); + err = devlink_register(devlink); if (err) goto err_devlink_register; } diff --git a/drivers/net/ethernet/microchip/sparx5/Kconfig b/drivers/net/ethernet/microchip/sparx5/Kconfig index d39ae2a6fb49..7bdbb2d09a14 100644 --- a/drivers/net/ethernet/microchip/sparx5/Kconfig +++ b/drivers/net/ethernet/microchip/sparx5/Kconfig @@ -1,6 +1,5 @@ config SPARX5_SWITCH tristate "Sparx5 switch driver" - depends on BRIDGE || BRIDGE=n depends on NET_SWITCHDEV depends on HAS_IOMEM depends on OF diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c index 9d485a9d1f1f..cb68eaaac881 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c @@ -13,19 +13,26 @@ */ #define VSTAX 73 -static void ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width) +#define ifh_encode_bitfield(ifh, value, pos, _width) \ + ({ \ + u32 width = (_width); \ + \ + /* Max width is 5 bytes - 40 bits. In worst case this will + * spread over 6 bytes - 48 bits + */ \ + compiletime_assert(width <= 40, \ + "Unsupported width, must be <= 40"); \ + __ifh_encode_bitfield((ifh), (value), (pos), width); \ + }) + +static void __ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width) { u8 *ifh_hdr = ifh; /* Calculate the Start IFH byte position of this IFH bit position */ u32 byte = (35 - (pos / 8)); /* Calculate the Start bit position in the Start IFH byte */ u32 bit = (pos % 8); - u64 encode = GENMASK(bit + width - 1, bit) & (value << bit); - - /* Max width is 5 bytes - 40 bits. In worst case this will - * spread over 6 bytes - 48 bits - */ - compiletime_assert(width <= 40, "Unsupported width, must be <= 40"); + u64 encode = GENMASK_ULL(bit + width - 1, bit) & (value << bit); /* The b0-b7 goes into the start IFH byte */ if (encode & 0xFF) diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 4bd7e9d9ec61..aa41c9cde643 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -1103,7 +1103,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev) if (!np && !pdev->dev.platform_data) return -ENODEV; - devlink = devlink_alloc(&ocelot_devlink_ops, sizeof(*ocelot)); + devlink = + devlink_alloc(&ocelot_devlink_ops, sizeof(*ocelot), &pdev->dev); if (!devlink) return -ENOMEM; @@ -1187,7 +1188,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev) if (err) goto out_put_ports; - err = devlink_register(devlink, ocelot->dev); + err = devlink_register(devlink); if (err) goto out_ocelot_deinit; diff --git a/drivers/net/ethernet/natsemi/jazzsonic.c b/drivers/net/ethernet/natsemi/jazzsonic.c index ce3eca5d152b..d74a80f010c5 100644 --- a/drivers/net/ethernet/natsemi/jazzsonic.c +++ b/drivers/net/ethernet/natsemi/jazzsonic.c @@ -193,8 +193,6 @@ static int jazz_sonic_probe(struct platform_device *pdev) SET_NETDEV_DEV(dev, &pdev->dev); platform_set_drvdata(pdev, dev); - netdev_boot_setup_check(dev); - dev->base_addr = res->start; dev->irq = platform_get_irq(pdev, 0); err = sonic_probe1(dev); diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index bd9d026e609d..3f982033944b 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -819,7 +819,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) printk(version); #endif - i = pci_enable_device(pdev); + i = pcim_enable_device(pdev); if (i) return i; /* natsemi has a non-standard PM control register @@ -852,7 +852,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) ioaddr = ioremap(iostart, iosize); if (!ioaddr) { i = -ENOMEM; - goto err_ioremap; + goto err_pci_request_regions; } /* Work around the dropped serial bit. */ @@ -974,9 +974,6 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) err_register_netdev: iounmap(ioaddr); - err_ioremap: - pci_release_regions(pdev); - err_pci_request_regions: free_netdev(dev); return i; @@ -3241,7 +3238,6 @@ static void natsemi_remove1(struct pci_dev *pdev) NATSEMI_REMOVE_FILE(pdev, dspcfg_workaround); unregister_netdev (dev); - pci_release_regions (pdev); iounmap(ioaddr); free_netdev (dev); } diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c index 28d9e98db81a..ca4686094701 100644 --- a/drivers/net/ethernet/natsemi/xtsonic.c +++ b/drivers/net/ethernet/natsemi/xtsonic.c @@ -215,7 +215,6 @@ int xtsonic_probe(struct platform_device *pdev) lp->device = &pdev->dev; platform_set_drvdata(pdev, dev); SET_NETDEV_DEV(dev, &pdev->dev); - netdev_boot_setup_check(dev); dev->base_addr = resmem->start; dev->irq = resirq->start; diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 20fb4ad29865..df4a3f3da83a 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3512,13 +3512,13 @@ static void vxge_device_unregister(struct __vxge_hw_device *hldev) kfree(vdev->vpaths); - /* we are safe to free it now */ - free_netdev(dev); - vxge_debug_init(vdev->level_trace, "%s: ethernet device unregistered", buf); vxge_debug_entryexit(vdev->level_trace, "%s: %s:%d Exiting...", buf, __func__, __LINE__); + + /* we are safe to free it now */ + free_netdev(dev); } /* diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index 742a420152b3..bb3b8a7f6c5d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -692,7 +692,7 @@ static int nfp_pci_probe(struct pci_dev *pdev, goto err_pci_disable; } - devlink = devlink_alloc(&nfp_devlink_ops, sizeof(*pf)); + devlink = devlink_alloc(&nfp_devlink_ops, sizeof(*pf), &pdev->dev); if (!devlink) { err = -ENOMEM; goto err_rel_regions; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 15078f9dc9f1..5bfa22accf2c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3281,17 +3281,12 @@ static int nfp_net_dp_swap_enable(struct nfp_net *nn, struct nfp_net_dp *dp) for (r = 0; r < nn->max_r_vecs; r++) nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r); - err = netif_set_real_num_rx_queues(nn->dp.netdev, nn->dp.num_rx_rings); + err = netif_set_real_num_queues(nn->dp.netdev, + nn->dp.num_stack_tx_rings, + nn->dp.num_rx_rings); if (err) return err; - if (nn->dp.netdev->real_num_tx_queues != nn->dp.num_stack_tx_rings) { - err = netif_set_real_num_tx_queues(nn->dp.netdev, - nn->dp.num_stack_tx_rings); - if (err) - return err; - } - return nfp_net_set_config_and_enable(nn); } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index a213784ffa54..0bf2ff5717bc 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -286,6 +286,8 @@ nfp_net_get_link_ksettings(struct net_device *netdev, /* Init to unknowns */ ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); cmd->base.port = PORT_OTHER; cmd->base.speed = SPEED_UNKNOWN; cmd->base.duplex = DUPLEX_UNKNOWN; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index 921db40047d7..d10a93801344 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -701,7 +701,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf) if (err) goto err_unmap; - err = devlink_register(devlink, &pf->pdev->dev); + err = devlink_register(devlink); if (err) goto err_app_clean; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c index cd520e4c5522..c7d0e195d176 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c @@ -64,7 +64,7 @@ struct ionic *ionic_devlink_alloc(struct device *dev) { struct devlink *dl; - dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic)); + dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic), dev); return devlink_priv(dl); } @@ -82,7 +82,7 @@ int ionic_devlink_register(struct ionic *ionic) struct devlink_port_attrs attrs = {}; int err; - err = devlink_register(dl, ionic->dev); + err = devlink_register(dl); if (err) { dev_warn(ionic->dev, "devlink_register failed: %d\n", err); return err; diff --git a/drivers/net/ethernet/qlogic/qed/qed_devlink.c b/drivers/net/ethernet/qlogic/qed/qed_devlink.c index cf7f4da68e69..4c7501b9c284 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_devlink.c +++ b/drivers/net/ethernet/qlogic/qed/qed_devlink.c @@ -207,14 +207,15 @@ struct devlink *qed_devlink_register(struct qed_dev *cdev) struct devlink *dl; int rc; - dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink)); + dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink), + &cdev->pdev->dev); if (!dl) return ERR_PTR(-ENOMEM); qdevlink = devlink_priv(dl); qdevlink->cdev = cdev; - rc = devlink_register(dl, &cdev->pdev->dev); + rc = devlink_register(dl); if (rc) goto err_free; diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 578935f643b8..ab6d4f737316 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -464,12 +464,19 @@ static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn) u32 int_sts, first_drop_reason, details, address, all_drops_reason; struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt; + int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS); + if (int_sts == 0xdeadbeaf) { + DP_NOTICE(p_hwfn->cdev, + "DORQ is being reset, skipping int_sts handler\n"); + + return 0; + } + /* int_sts may be zero since all PFs were interrupted for doorbell * overflow but another one already handled it. Can abort here. If * This PF also requires overflow recovery we will be interrupted again. * The masked almost full indication may also be set. Ignoring. */ - int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS); if (!(int_sts & ~DORQ_REG_INT_STS_DORQ_FIFO_AFULL)) return 0; @@ -528,6 +535,9 @@ static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn) static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) { + if (p_hwfn->cdev->recov_in_prog) + return 0; + p_hwfn->db_recovery_info.dorq_attn = true; qed_dorq_attn_overflow(p_hwfn); diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index a99861124630..fc8b3e64f153 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1624,8 +1624,6 @@ qed_iwarp_get_listener(struct qed_hwfn *p_hwfn, static const u32 ip_zero[4] = { 0, 0, 0, 0 }; bool found = false; - qed_iwarp_print_cm_info(p_hwfn, cm_info); - list_for_each_entry(listener, &p_hwfn->p_rdma_info->iwarp.listen_list, list_entry) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index aa48b1b7eddc..6871d892eabf 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1215,6 +1215,10 @@ static void qed_slowpath_task(struct work_struct *work) if (test_and_clear_bit(QED_SLOWPATH_PERIODIC_DB_REC, &hwfn->slowpath_task_flags)) { + /* skip qed_db_rec_handler during recovery/unload */ + if (hwfn->cdev->recov_in_prog || !hwfn->slowpath_wq_active) + goto out; + qed_db_rec_handler(hwfn, ptt); if (hwfn->periodic_db_rec_count--) qed_slowpath_delayed_work(hwfn, @@ -1222,6 +1226,7 @@ static void qed_slowpath_task(struct work_struct *work) QED_PERIODIC_DB_REC_INTERVAL); } +out: qed_ptt_release(hwfn, ptt); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c index c1dd71d19f3f..3b84d00cf987 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c +++ b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c @@ -4,7 +4,6 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/pci.h> -#include <linux/kernel.h> #include <linux/list.h> #include <linux/mm.h> #include <linux/types.h> diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 8693117a6180..66c69f0f9af1 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -492,6 +492,7 @@ struct qede_fastpath { #define QEDE_SP_HW_ERR 4 #define QEDE_SP_ARFS_CONFIG 5 #define QEDE_SP_AER 7 +#define QEDE_SP_DISABLE 8 #ifdef CONFIG_RFS_ACCEL int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 033bf2c7f56c..d400e9b235bf 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1005,6 +1005,13 @@ static void qede_sp_task(struct work_struct *work) struct qede_dev *edev = container_of(work, struct qede_dev, sp_task.work); + /* Disable execution of this deferred work once + * qede removal is in progress, this stop any future + * scheduling of sp_task. + */ + if (test_bit(QEDE_SP_DISABLE, &edev->sp_flags)) + return; + /* The locking scheme depends on the specific flag: * In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to * ensure that ongoing flows are ended and new ones are not started. @@ -1292,6 +1299,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) qede_rdma_dev_remove(edev, (mode == QEDE_REMOVE_RECOVERY)); if (mode != QEDE_REMOVE_RECOVERY) { + set_bit(QEDE_SP_DISABLE, &edev->sp_flags); unregister_netdev(ndev); cancel_delayed_work_sync(&edev->sp_task); diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig index c52a38df0e0d..72e42a868346 100644 --- a/drivers/net/ethernet/smsc/Kconfig +++ b/drivers/net/ethernet/smsc/Kconfig @@ -23,6 +23,7 @@ config SMC9194 tristate "SMC 9194 support" depends on ISA select CRC32 + select NETDEV_LEGACY_INIT help This is support for the SMC9xxx based Ethernet cards. Choose this option if you have a DELL laptop with the docking station, or diff --git a/drivers/net/ethernet/smsc/smc9194.c b/drivers/net/ethernet/smsc/smc9194.c index bf7c8c8b1350..0ce403fa5f1a 100644 --- a/drivers/net/ethernet/smsc/smc9194.c +++ b/drivers/net/ethernet/smsc/smc9194.c @@ -1508,7 +1508,7 @@ MODULE_PARM_DESC(io, "SMC 99194 I/O base address"); MODULE_PARM_DESC(irq, "SMC 99194 IRQ number"); MODULE_PARM_DESC(ifport, "SMC 99194 interface port (0-default, 1-TP, 2-AUI)"); -int __init init_module(void) +static int __init smc_init_module(void) { if (io == 0) printk(KERN_WARNING @@ -1518,13 +1518,15 @@ int __init init_module(void) devSMC9194 = smc_init(-1); return PTR_ERR_OR_ZERO(devSMC9194); } +module_init(smc_init_module); -void __exit cleanup_module(void) +static void __exit smc_cleanup_module(void) { unregister_netdev(devSMC9194); free_irq(devSMC9194->irq, devSMC9194); release_region(devSMC9194->base_addr, SMC_IO_EXTENT); free_netdev(devSMC9194); } +module_exit(smc_cleanup_module); #endif /* MODULE */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 28dd0ed85a82..f7dc8458cde8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -289,10 +289,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) val &= ~NSS_COMMON_GMAC_CTL_PHY_IFACE_SEL; break; default: - dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n", - phy_modes(gmac->phy_mode)); - err = -EINVAL; - goto err_remove_config_dt; + goto err_unsupported_phy; } regmap_write(gmac->nss_common, NSS_COMMON_GMAC_CTL(gmac->id), val); @@ -309,10 +306,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) NSS_COMMON_CLK_SRC_CTRL_OFFSET(gmac->id); break; default: - dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n", - phy_modes(gmac->phy_mode)); - err = -EINVAL; - goto err_remove_config_dt; + goto err_unsupported_phy; } regmap_write(gmac->nss_common, NSS_COMMON_CLK_SRC_CTRL, val); @@ -329,8 +323,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) NSS_COMMON_CLK_GATE_GMII_TX_EN(gmac->id); break; default: - /* We don't get here; the switch above will have errored out */ - unreachable(); + goto err_unsupported_phy; } regmap_write(gmac->nss_common, NSS_COMMON_CLK_GATE, val); @@ -361,6 +354,11 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) return 0; +err_unsupported_phy: + dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n", + phy_modes(gmac->phy_mode)); + err = -EINVAL; + err_remove_config_dt: stmmac_remove_config_dt(pdev, plat_dat); diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index 7ac8e5ecbe97..affcf92cd3aa 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -64,7 +64,6 @@ config TI_CPSW config TI_CPSW_SWITCHDEV tristate "TI CPSW Switch Support with switchdev" depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || COMPILE_TEST - depends on BRIDGE || BRIDGE=n depends on NET_SWITCHDEV depends on TI_CPTS || !TI_CPTS select PAGE_POOL @@ -110,7 +109,6 @@ config TI_K3_AM65_CPSW_NUSS config TI_K3_AM65_CPSW_SWITCHDEV bool "TI K3 AM654x/J721E CPSW Switch mode support" depends on TI_K3_AM65_CPSW_NUSS - depends on BRIDGE || BRIDGE=n depends on NET_SWITCHDEV help This enables switchdev support for TI K3 CPSWxG Ethernet diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 4f67d1a98c0d..130346f74ee8 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -7,7 +7,6 @@ #include <linux/clk.h> #include <linux/etherdevice.h> -#include <linux/if_bridge.h> #include <linux/if_vlan.h> #include <linux/interrupt.h> #include <linux/kernel.h> @@ -28,6 +27,7 @@ #include <linux/sys_soc.h> #include <linux/dma/ti-cppi5.h> #include <linux/dma/k3-udma-glue.h> +#include <net/switchdev.h> #include "cpsw_ale.h" #include "cpsw_sl.h" @@ -519,6 +519,10 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common, } napi_enable(&common->napi_rx); + if (common->rx_irq_disabled) { + common->rx_irq_disabled = false; + enable_irq(common->rx_chns.irq); + } dev_dbg(common->dev, "cpsw_nuss started\n"); return 0; @@ -872,8 +876,12 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) dev_dbg(common->dev, "%s num_rx:%d %d\n", __func__, num_rx, budget); - if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) - enable_irq(common->rx_chns.irq); + if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) { + if (common->rx_irq_disabled) { + common->rx_irq_disabled = false; + enable_irq(common->rx_chns.irq); + } + } return num_rx; } @@ -1078,19 +1086,20 @@ static int am65_cpsw_nuss_tx_poll(struct napi_struct *napi_tx, int budget) else num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common, tx_chn->id, budget); - num_tx = min(num_tx, budget); - if (num_tx < budget) { - napi_complete(napi_tx); + if (num_tx >= budget) + return budget; + + if (napi_complete_done(napi_tx, num_tx)) enable_irq(tx_chn->irq); - } - return num_tx; + return 0; } static irqreturn_t am65_cpsw_nuss_rx_irq(int irq, void *dev_id) { struct am65_cpsw_common *common = dev_id; + common->rx_irq_disabled = true; disable_irq_nosync(irq); napi_schedule(&common->napi_rx); @@ -2061,8 +2070,12 @@ static void am65_cpsw_port_offload_fwd_mark_update(struct am65_cpsw_common *comm for (i = 1; i <= common->port_num; i++) { struct am65_cpsw_port *port = am65_common_get_port(common, i); - struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(port->ndev); + struct am65_cpsw_ndev_priv *priv; + + if (!port->ndev) + continue; + priv = am65_ndev_to_priv(port->ndev); priv->offload_fwd_mark = set_val; } } @@ -2409,14 +2422,14 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common) int i; common->devlink = - devlink_alloc(&am65_cpsw_devlink_ops, sizeof(*dl_priv)); + devlink_alloc(&am65_cpsw_devlink_ops, sizeof(*dl_priv), dev); if (!common->devlink) return -ENOMEM; dl_priv = devlink_priv(common->devlink); dl_priv->common = common; - ret = devlink_register(common->devlink, dev); + ret = devlink_register(common->devlink); if (ret) { dev_err(dev, "devlink reg fail ret:%d\n", ret); goto dl_free; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h index 5d93e346f05e..048ed10143c1 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h @@ -126,6 +126,8 @@ struct am65_cpsw_common { struct am65_cpsw_rx_chn rx_chns; struct napi_struct napi_rx; + bool rx_irq_disabled; + u32 nuss_ver; u32 cpsw_ver; unsigned long bus_freq; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index abf9a2a6f7eb..9f70e40779f6 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -431,7 +431,7 @@ static void cpsw_rx_handler(void *token, int len, int status) skb->protocol = eth_type_trans(skb, ndev); /* mark skb for recycling */ - skb_mark_for_recycle(skb, page, pool); + skb_mark_for_recycle(skb); netif_receive_skb(skb); ndev->stats.rx_bytes += len; @@ -905,7 +905,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, struct cpdma_chan *txch; int ret, q_idx; - if (skb_padto(skb, CPSW_MIN_PACKET_SIZE)) { + if (skb_put_padto(skb, CPSW_MIN_PACKET_SIZE)) { cpsw_err(priv, tx_err, "packet pad failed\n"); ndev->stats.tx_dropped++; return NET_XMIT_DROP; diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index b4f55ff4e84f..ff3a96b084ee 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -11,7 +11,6 @@ #include <linux/module.h> #include <linux/irqreturn.h> #include <linux/interrupt.h> -#include <linux/if_bridge.h> #include <linux/if_ether.h> #include <linux/etherdevice.h> #include <linux/net_tstamp.h> @@ -29,6 +28,7 @@ #include <linux/kmemleak.h> #include <linux/sys_soc.h> +#include <net/switchdev.h> #include <net/page_pool.h> #include <net/pkt_cls.h> #include <net/devlink.h> @@ -375,7 +375,7 @@ static void cpsw_rx_handler(void *token, int len, int status) skb->protocol = eth_type_trans(skb, ndev); /* mark skb for recycling */ - skb_mark_for_recycle(skb, page, pool); + skb_mark_for_recycle(skb); netif_receive_skb(skb); ndev->stats.rx_bytes += len; @@ -1800,14 +1800,14 @@ static int cpsw_register_devlink(struct cpsw_common *cpsw) struct cpsw_devlink *dl_priv; int ret = 0; - cpsw->devlink = devlink_alloc(&cpsw_devlink_ops, sizeof(*dl_priv)); + cpsw->devlink = devlink_alloc(&cpsw_devlink_ops, sizeof(*dl_priv), dev); if (!cpsw->devlink) return -ENOMEM; dl_priv = devlink_priv(cpsw->devlink); dl_priv->cpsw = cpsw; - ret = devlink_register(cpsw->devlink, dev); + ret = devlink_register(cpsw->devlink); if (ret) { dev_err(dev, "DL reg fail ret:%d\n", ret); goto dl_free; diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 637796670746..b1c5cbe7478b 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -943,7 +943,7 @@ static int emac_dev_xmit(struct sk_buff *skb, struct net_device *ndev) goto fail_tx; } - ret_code = skb_padto(skb, EMAC_DEF_MIN_ETHPKTSIZE); + ret_code = skb_put_padto(skb, EMAC_DEF_MIN_ETHPKTSIZE); if (unlikely(ret_code < 0)) { if (netif_msg_tx_err(priv) && net_ratelimit()) dev_err(emac_dev, "DaVinci EMAC: packet pad failed"); diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 3de67ba066a6..a2fcdb1abdb9 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -198,77 +198,6 @@ static void gsi_irq_type_disable(struct gsi *gsi, enum gsi_irq_type_id type_id) gsi_irq_type_update(gsi, gsi->type_enabled_bitmap & ~BIT(type_id)); } -/* Turn off all GSI interrupts initially; there is no gsi_irq_teardown() */ -static void gsi_irq_setup(struct gsi *gsi) -{ - /* Disable all interrupt types */ - gsi_irq_type_update(gsi, 0); - - /* Clear all type-specific interrupt masks */ - iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); - iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); - iowrite32(0, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET); - iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET); - - /* The inter-EE interrupts are not supported for IPA v3.0-v3.1 */ - if (gsi->version > IPA_VERSION_3_1) { - u32 offset; - - /* These registers are in the non-adjusted address range */ - offset = GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET; - iowrite32(0, gsi->virt_raw + offset); - offset = GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET; - iowrite32(0, gsi->virt_raw + offset); - } - - iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET); -} - -/* Get # supported channel and event rings; there is no gsi_ring_teardown() */ -static int gsi_ring_setup(struct gsi *gsi) -{ - struct device *dev = gsi->dev; - u32 count; - u32 val; - - if (gsi->version < IPA_VERSION_3_5_1) { - /* No HW_PARAM_2 register prior to IPA v3.5.1, assume the max */ - gsi->channel_count = GSI_CHANNEL_COUNT_MAX; - gsi->evt_ring_count = GSI_EVT_RING_COUNT_MAX; - - return 0; - } - - val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET); - - count = u32_get_bits(val, NUM_CH_PER_EE_FMASK); - if (!count) { - dev_err(dev, "GSI reports zero channels supported\n"); - return -EINVAL; - } - if (count > GSI_CHANNEL_COUNT_MAX) { - dev_warn(dev, "limiting to %u channels; hardware supports %u\n", - GSI_CHANNEL_COUNT_MAX, count); - count = GSI_CHANNEL_COUNT_MAX; - } - gsi->channel_count = count; - - count = u32_get_bits(val, NUM_EV_PER_EE_FMASK); - if (!count) { - dev_err(dev, "GSI reports zero event rings supported\n"); - return -EINVAL; - } - if (count > GSI_EVT_RING_COUNT_MAX) { - dev_warn(dev, - "limiting to %u event rings; hardware supports %u\n", - GSI_EVT_RING_COUNT_MAX, count); - count = GSI_EVT_RING_COUNT_MAX; - } - gsi->evt_ring_count = count; - - return 0; -} - /* Event ring commands are performed one at a time. Their completion * is signaled by the event ring control GSI interrupt type, which is * only enabled when we issue an event ring command. Only the event @@ -920,12 +849,13 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell) /* All done! */ } -static int __gsi_channel_start(struct gsi_channel *channel, bool start) +static int __gsi_channel_start(struct gsi_channel *channel, bool resume) { struct gsi *gsi = channel->gsi; int ret; - if (!start) + /* Prior to IPA v4.0 suspend/resume is not implemented by GSI */ + if (resume && gsi->version < IPA_VERSION_4_0) return 0; mutex_lock(&gsi->mutex); @@ -947,7 +877,7 @@ int gsi_channel_start(struct gsi *gsi, u32 channel_id) napi_enable(&channel->napi); gsi_irq_ieob_enable_one(gsi, channel->evt_ring_id); - ret = __gsi_channel_start(channel, true); + ret = __gsi_channel_start(channel, false); if (ret) { gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id); napi_disable(&channel->napi); @@ -971,7 +901,7 @@ static int gsi_channel_stop_retry(struct gsi_channel *channel) return ret; } -static int __gsi_channel_stop(struct gsi_channel *channel, bool stop) +static int __gsi_channel_stop(struct gsi_channel *channel, bool suspend) { struct gsi *gsi = channel->gsi; int ret; @@ -979,7 +909,8 @@ static int __gsi_channel_stop(struct gsi_channel *channel, bool stop) /* Wait for any underway transactions to complete before stopping. */ gsi_channel_trans_quiesce(channel); - if (!stop) + /* Prior to IPA v4.0 suspend/resume is not implemented by GSI */ + if (suspend && gsi->version < IPA_VERSION_4_0) return 0; mutex_lock(&gsi->mutex); @@ -997,7 +928,7 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id) struct gsi_channel *channel = &gsi->channel[channel_id]; int ret; - ret = __gsi_channel_stop(channel, true); + ret = __gsi_channel_stop(channel, false); if (ret) return ret; @@ -1026,13 +957,13 @@ void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell) mutex_unlock(&gsi->mutex); } -/* Stop a STARTED channel for suspend (using stop if requested) */ -int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop) +/* Stop a started channel for suspend */ +int gsi_channel_suspend(struct gsi *gsi, u32 channel_id) { struct gsi_channel *channel = &gsi->channel[channel_id]; int ret; - ret = __gsi_channel_stop(channel, stop); + ret = __gsi_channel_stop(channel, true); if (ret) return ret; @@ -1042,12 +973,24 @@ int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop) return 0; } -/* Resume a suspended channel (starting will be requested if STOPPED) */ -int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start) +/* Resume a suspended channel (starting if stopped) */ +int gsi_channel_resume(struct gsi *gsi, u32 channel_id) { struct gsi_channel *channel = &gsi->channel[channel_id]; - return __gsi_channel_start(channel, start); + return __gsi_channel_start(channel, true); +} + +/* Prevent all GSI interrupts while suspended */ +void gsi_suspend(struct gsi *gsi) +{ + disable_irq(gsi->irq); +} + +/* Allow all GSI interrupts again when resuming */ +void gsi_resume(struct gsi *gsi) +{ + enable_irq(gsi->irq); } /** @@ -1372,33 +1315,20 @@ static irqreturn_t gsi_isr(int irq, void *dev_id) return IRQ_HANDLED; } +/* Init function for GSI IRQ lookup; there is no gsi_irq_exit() */ static int gsi_irq_init(struct gsi *gsi, struct platform_device *pdev) { - struct device *dev = &pdev->dev; - unsigned int irq; int ret; ret = platform_get_irq_byname(pdev, "gsi"); if (ret <= 0) return ret ? : -EINVAL; - irq = ret; - - ret = request_irq(irq, gsi_isr, 0, "gsi", gsi); - if (ret) { - dev_err(dev, "error %d requesting \"gsi\" IRQ\n", ret); - return ret; - } - gsi->irq = irq; + gsi->irq = ret; return 0; } -static void gsi_irq_exit(struct gsi *gsi) -{ - free_irq(gsi->irq, gsi); -} - /* Return the transaction associated with a transfer completion event */ static struct gsi_trans *gsi_event_trans(struct gsi_channel *channel, struct gsi_event *event) @@ -1876,6 +1806,93 @@ static void gsi_channel_teardown(struct gsi *gsi) gsi_irq_disable(gsi); } +/* Turn off all GSI interrupts initially */ +static int gsi_irq_setup(struct gsi *gsi) +{ + int ret; + + /* Writing 1 indicates IRQ interrupts; 0 would be MSI */ + iowrite32(1, gsi->virt + GSI_CNTXT_INTSET_OFFSET); + + /* Disable all interrupt types */ + gsi_irq_type_update(gsi, 0); + + /* Clear all type-specific interrupt masks */ + iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); + iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); + iowrite32(0, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET); + iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET); + + /* The inter-EE interrupts are not supported for IPA v3.0-v3.1 */ + if (gsi->version > IPA_VERSION_3_1) { + u32 offset; + + /* These registers are in the non-adjusted address range */ + offset = GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET; + iowrite32(0, gsi->virt_raw + offset); + offset = GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET; + iowrite32(0, gsi->virt_raw + offset); + } + + iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET); + + ret = request_irq(gsi->irq, gsi_isr, 0, "gsi", gsi); + if (ret) + dev_err(gsi->dev, "error %d requesting \"gsi\" IRQ\n", ret); + + return ret; +} + +static void gsi_irq_teardown(struct gsi *gsi) +{ + free_irq(gsi->irq, gsi); +} + +/* Get # supported channel and event rings; there is no gsi_ring_teardown() */ +static int gsi_ring_setup(struct gsi *gsi) +{ + struct device *dev = gsi->dev; + u32 count; + u32 val; + + if (gsi->version < IPA_VERSION_3_5_1) { + /* No HW_PARAM_2 register prior to IPA v3.5.1, assume the max */ + gsi->channel_count = GSI_CHANNEL_COUNT_MAX; + gsi->evt_ring_count = GSI_EVT_RING_COUNT_MAX; + + return 0; + } + + val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET); + + count = u32_get_bits(val, NUM_CH_PER_EE_FMASK); + if (!count) { + dev_err(dev, "GSI reports zero channels supported\n"); + return -EINVAL; + } + if (count > GSI_CHANNEL_COUNT_MAX) { + dev_warn(dev, "limiting to %u channels; hardware supports %u\n", + GSI_CHANNEL_COUNT_MAX, count); + count = GSI_CHANNEL_COUNT_MAX; + } + gsi->channel_count = count; + + count = u32_get_bits(val, NUM_EV_PER_EE_FMASK); + if (!count) { + dev_err(dev, "GSI reports zero event rings supported\n"); + return -EINVAL; + } + if (count > GSI_EVT_RING_COUNT_MAX) { + dev_warn(dev, + "limiting to %u event rings; hardware supports %u\n", + GSI_EVT_RING_COUNT_MAX, count); + count = GSI_EVT_RING_COUNT_MAX; + } + gsi->evt_ring_count = count; + + return 0; +} + /* Setup function for GSI. GSI firmware must be loaded and initialized */ int gsi_setup(struct gsi *gsi) { @@ -1889,25 +1906,34 @@ int gsi_setup(struct gsi *gsi) return -EIO; } - gsi_irq_setup(gsi); /* No matching teardown required */ + ret = gsi_irq_setup(gsi); + if (ret) + return ret; ret = gsi_ring_setup(gsi); /* No matching teardown required */ if (ret) - return ret; + goto err_irq_teardown; /* Initialize the error log */ iowrite32(0, gsi->virt + GSI_ERROR_LOG_OFFSET); - /* Writing 1 indicates IRQ interrupts; 0 would be MSI */ - iowrite32(1, gsi->virt + GSI_CNTXT_INTSET_OFFSET); + ret = gsi_channel_setup(gsi); + if (ret) + goto err_irq_teardown; - return gsi_channel_setup(gsi); + return 0; + +err_irq_teardown: + gsi_irq_teardown(gsi); + + return ret; } /* Inverse of gsi_setup() */ void gsi_teardown(struct gsi *gsi) { gsi_channel_teardown(gsi); + gsi_irq_teardown(gsi); } /* Initialize a channel's event ring */ @@ -2204,20 +2230,18 @@ int gsi_init(struct gsi *gsi, struct platform_device *pdev, init_completion(&gsi->completion); - ret = gsi_irq_init(gsi, pdev); + ret = gsi_irq_init(gsi, pdev); /* No matching exit required */ if (ret) goto err_iounmap; ret = gsi_channel_init(gsi, count, data); if (ret) - goto err_irq_exit; + goto err_iounmap; mutex_init(&gsi->mutex); return 0; -err_irq_exit: - gsi_irq_exit(gsi); err_iounmap: iounmap(gsi->virt_raw); @@ -2229,7 +2253,6 @@ void gsi_exit(struct gsi *gsi) { mutex_destroy(&gsi->mutex); gsi_channel_exit(gsi); - gsi_irq_exit(gsi); iounmap(gsi->virt_raw); } diff --git a/drivers/net/ipa/gsi.h b/drivers/net/ipa/gsi.h index 81cd7b07f6e1..88b80dc3db79 100644 --- a/drivers/net/ipa/gsi.h +++ b/drivers/net/ipa/gsi.h @@ -232,8 +232,35 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id); */ void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell); -int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop); -int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start); +/** + * gsi_suspend() - Prepare the GSI subsystem for suspend + * @gsi: GSI pointer + */ +void gsi_suspend(struct gsi *gsi); + +/** + * gsi_resume() - Resume the GSI subsystem following suspend + * @gsi: GSI pointer + */ +void gsi_resume(struct gsi *gsi); + +/** + * gsi_channel_suspend() - Suspend a GSI channel + * @gsi: GSI pointer + * @channel_id: Channel to suspend + * + * For IPA v4.0+, suspend is implemented by stopping the channel. + */ +int gsi_channel_suspend(struct gsi *gsi, u32 channel_id); + +/** + * gsi_channel_resume() - Resume a suspended GSI channel + * @gsi: GSI pointer + * @channel_id: Channel to resume + * + * For IPA v4.0+, the stopped channel is started again. + */ +int gsi_channel_resume(struct gsi *gsi, u32 channel_id); /** * gsi_init() - Initialize the GSI subsystem diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h index 71ba996096bb..34152fe02963 100644 --- a/drivers/net/ipa/ipa.h +++ b/drivers/net/ipa/ipa.h @@ -28,19 +28,8 @@ struct ipa_smp2p; struct ipa_interrupt; /** - * enum ipa_flag - IPA state flags - * @IPA_FLAG_RESUMED: Whether resume from suspend has been signaled - * @IPA_FLAG_COUNT: Number of defined IPA flags - */ -enum ipa_flag { - IPA_FLAG_RESUMED, - IPA_FLAG_COUNT, /* Last; not a flag */ -}; - -/** * struct ipa - IPA information * @gsi: Embedded GSI structure - * @flags: Boolean state flags * @version: IPA hardware version * @pdev: Platform device * @completion: Used to signal pipeline clear transfer complete @@ -83,7 +72,6 @@ enum ipa_flag { */ struct ipa { struct gsi gsi; - DECLARE_BITMAP(flags, IPA_FLAG_COUNT); enum ipa_version version; struct platform_device *pdev; struct completion completion; diff --git a/drivers/net/ipa/ipa_clock.c b/drivers/net/ipa/ipa_clock.c index 69ef6ea41e61..a67b6136e3c0 100644 --- a/drivers/net/ipa/ipa_clock.c +++ b/drivers/net/ipa/ipa_clock.c @@ -9,9 +9,12 @@ #include <linux/clk.h> #include <linux/device.h> #include <linux/interconnect.h> +#include <linux/pm.h> +#include <linux/bitops.h> #include "ipa.h" #include "ipa_clock.h" +#include "ipa_endpoint.h" #include "ipa_modem.h" #include "ipa_data.h" @@ -43,10 +46,21 @@ struct ipa_interconnect { }; /** + * enum ipa_power_flag - IPA power flags + * @IPA_POWER_FLAG_RESUMED: Whether resume from suspend has been signaled + * @IPA_POWER_FLAG_COUNT: Number of defined power flags + */ +enum ipa_power_flag { + IPA_POWER_FLAG_RESUMED, + IPA_POWER_FLAG_COUNT, /* Last; not a flag */ +}; + +/** * struct ipa_clock - IPA clocking information * @count: Clocking reference count * @mutex: Protects clock enable/disable * @core: IPA core clock + * @flags: Boolean state flags * @interconnect_count: Number of elements in interconnect[] * @interconnect: Interconnect array */ @@ -54,6 +68,7 @@ struct ipa_clock { refcount_t count; struct mutex mutex; /* protects clock enable/disable */ struct clk *core; + DECLARE_BITMAP(flags, IPA_POWER_FLAG_COUNT); u32 interconnect_count; struct ipa_interconnect *interconnect; }; @@ -144,8 +159,12 @@ static int ipa_interconnect_enable(struct ipa *ipa) ret = icc_set_bw(interconnect->path, interconnect->average_bandwidth, interconnect->peak_bandwidth); - if (ret) + if (ret) { + dev_err(&ipa->pdev->dev, + "error %d enabling %s interconnect\n", + ret, icc_get_name(interconnect->path)); goto out_unwind; + } interconnect++; } @@ -159,10 +178,11 @@ out_unwind: } /* To disable an interconnect, we just its bandwidth to 0 */ -static void ipa_interconnect_disable(struct ipa *ipa) +static int ipa_interconnect_disable(struct ipa *ipa) { struct ipa_interconnect *interconnect; struct ipa_clock *clock = ipa->clock; + struct device *dev = &ipa->pdev->dev; int result = 0; u32 count; int ret; @@ -172,13 +192,16 @@ static void ipa_interconnect_disable(struct ipa *ipa) while (count--) { interconnect--; ret = icc_set_bw(interconnect->path, 0, 0); - if (ret && !result) - result = ret; + if (ret) { + dev_err(dev, "error %d disabling %s interconnect\n", + ret, icc_get_name(interconnect->path)); + /* Try to disable all; record only the first error */ + if (!result) + result = ret; + } } - if (result) - dev_err(&ipa->pdev->dev, - "error %d disabling IPA interconnects\n", ret); + return result; } /* Turn on IPA clocks, including interconnects */ @@ -191,8 +214,10 @@ static int ipa_clock_enable(struct ipa *ipa) return ret; ret = clk_prepare_enable(ipa->clock->core); - if (ret) - ipa_interconnect_disable(ipa); + if (ret) { + dev_err(&ipa->pdev->dev, "error %d enabling core clock\n", ret); + (void)ipa_interconnect_disable(ipa); + } return ret; } @@ -201,7 +226,7 @@ static int ipa_clock_enable(struct ipa *ipa) static void ipa_clock_disable(struct ipa *ipa) { clk_disable_unprepare(ipa->clock->core); - ipa_interconnect_disable(ipa); + (void)ipa_interconnect_disable(ipa); } /* Get an IPA clock reference, but only if the reference count is @@ -238,13 +263,8 @@ void ipa_clock_get(struct ipa *ipa) goto out_mutex_unlock; ret = ipa_clock_enable(ipa); - if (ret) { - dev_err(&ipa->pdev->dev, "error %d enabling IPA clock\n", ret); - goto out_mutex_unlock; - } - - refcount_set(&clock->count, 1); - + if (!ret) + refcount_set(&clock->count, 1); out_mutex_unlock: mutex_unlock(&clock->mutex); } @@ -271,6 +291,40 @@ u32 ipa_clock_rate(struct ipa *ipa) return ipa->clock ? (u32)clk_get_rate(ipa->clock->core) : 0; } +/** + * ipa_suspend_handler() - Handle the suspend IPA interrupt + * @ipa: IPA pointer + * @irq_id: IPA interrupt type (unused) + * + * If an RX endpoint is suspended, and the IPA has a packet destined for + * that endpoint, the IPA generates a SUSPEND interrupt to inform the AP + * that it should resume the endpoint. If we get one of these interrupts + * we just wake up the system. + */ +static void ipa_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id) +{ + /* Just report the event, and let system resume handle the rest. + * More than one endpoint could signal this; if so, ignore + * all but the first. + */ + if (!test_and_set_bit(IPA_POWER_FLAG_RESUMED, ipa->clock->flags)) + pm_wakeup_dev_event(&ipa->pdev->dev, 0, true); + + /* Acknowledge/clear the suspend interrupt on all endpoints */ + ipa_interrupt_suspend_clear_all(ipa->interrupt); +} + +void ipa_power_setup(struct ipa *ipa) +{ + ipa_interrupt_add(ipa->interrupt, IPA_IRQ_TX_SUSPEND, + ipa_suspend_handler); +} + +void ipa_power_teardown(struct ipa *ipa) +{ + ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND); +} + /* Initialize IPA clocking */ struct ipa_clock * ipa_clock_init(struct device *dev, const struct ipa_clock_data *data) @@ -329,3 +383,62 @@ void ipa_clock_exit(struct ipa_clock *clock) kfree(clock); clk_put(clk); } + +/** + * ipa_suspend() - Power management system suspend callback + * @dev: IPA device structure + * + * Return: Always returns zero + * + * Called by the PM framework when a system suspend operation is invoked. + * Suspends endpoints and releases the clock reference held to keep + * the IPA clock running until this point. + */ +static int ipa_suspend(struct device *dev) +{ + struct ipa *ipa = dev_get_drvdata(dev); + + /* Endpoints aren't usable until setup is complete */ + if (ipa->setup_complete) { + __clear_bit(IPA_POWER_FLAG_RESUMED, ipa->clock->flags); + ipa_endpoint_suspend(ipa); + gsi_suspend(&ipa->gsi); + } + + ipa_clock_put(ipa); + + return 0; +} + +/** + * ipa_resume() - Power management system resume callback + * @dev: IPA device structure + * + * Return: Always returns 0 + * + * Called by the PM framework when a system resume operation is invoked. + * Takes an IPA clock reference to keep the clock running until suspend, + * and resumes endpoints. + */ +static int ipa_resume(struct device *dev) +{ + struct ipa *ipa = dev_get_drvdata(dev); + + /* This clock reference will keep the IPA out of suspend + * until we get a power management suspend request. + */ + ipa_clock_get(ipa); + + /* Endpoints aren't usable until setup is complete */ + if (ipa->setup_complete) { + gsi_resume(&ipa->gsi); + ipa_endpoint_resume(ipa); + } + + return 0; +} + +const struct dev_pm_ops ipa_pm_ops = { + .suspend = ipa_suspend, + .resume = ipa_resume, +}; diff --git a/drivers/net/ipa/ipa_clock.h b/drivers/net/ipa/ipa_clock.h index 1fe634760e59..2a0f7ff3c9e6 100644 --- a/drivers/net/ipa/ipa_clock.h +++ b/drivers/net/ipa/ipa_clock.h @@ -11,6 +11,9 @@ struct device; struct ipa; struct ipa_clock_data; +/* IPA device power management function block */ +extern const struct dev_pm_ops ipa_pm_ops; + /** * ipa_clock_rate() - Return the current IPA core clock rate * @ipa: IPA structure @@ -20,6 +23,18 @@ struct ipa_clock_data; u32 ipa_clock_rate(struct ipa *ipa); /** + * ipa_power_setup() - Set up IPA power management + * @ipa: IPA pointer + */ +void ipa_power_setup(struct ipa *ipa); + +/** + * ipa_power_teardown() - Inverse of ipa_power_setup() + * @ipa: IPA pointer + */ +void ipa_power_teardown(struct ipa *ipa); + +/** * ipa_clock_init() - Initialize IPA clocking * @dev: IPA device * @data: Clock configuration data diff --git a/drivers/net/ipa/ipa_data-v4.9.c b/drivers/net/ipa/ipa_data-v4.9.c index 6ab928266b5c..8d83e14819e2 100644 --- a/drivers/net/ipa/ipa_data-v4.9.c +++ b/drivers/net/ipa/ipa_data-v4.9.c @@ -418,18 +418,13 @@ static const struct ipa_mem_data ipa_mem_data = { /* Interconnect rates are in 1000 byte/second units */ static const struct ipa_interconnect_data ipa_interconnect_data[] = { { - .name = "ipa_to_llcc", + .name = "memory", .peak_bandwidth = 600000, /* 600 MBps */ .average_bandwidth = 150000, /* 150 MBps */ }, - { - .name = "llcc_to_ebi1", - .peak_bandwidth = 1804000, /* 1.804 GBps */ - .average_bandwidth = 150000, /* 150 MBps */ - }, /* Average rate is unused for the next interconnect */ { - .name = "appss_to_ipa", + .name = "config", .peak_bandwidth = 74000, /* 74 MBps */ .average_bandwidth = 0, /* unused */ }, diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 8070d1a1d5df..08ee37ae2881 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -1587,7 +1587,6 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint) { struct device *dev = &endpoint->ipa->pdev->dev; struct gsi *gsi = &endpoint->ipa->gsi; - bool stop_channel; int ret; if (!(endpoint->ipa->enabled & BIT(endpoint->endpoint_id))) @@ -1598,11 +1597,7 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint) (void)ipa_endpoint_program_suspend(endpoint, true); } - /* Starting with IPA v4.0, endpoints are suspended by stopping the - * underlying GSI channel rather than using endpoint suspend mode. - */ - stop_channel = endpoint->ipa->version >= IPA_VERSION_4_0; - ret = gsi_channel_suspend(gsi, endpoint->channel_id, stop_channel); + ret = gsi_channel_suspend(gsi, endpoint->channel_id); if (ret) dev_err(dev, "error %d suspending channel %u\n", ret, endpoint->channel_id); @@ -1612,7 +1607,6 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint) { struct device *dev = &endpoint->ipa->pdev->dev; struct gsi *gsi = &endpoint->ipa->gsi; - bool start_channel; int ret; if (!(endpoint->ipa->enabled & BIT(endpoint->endpoint_id))) @@ -1621,11 +1615,7 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint) if (!endpoint->toward_ipa) (void)ipa_endpoint_program_suspend(endpoint, false); - /* Starting with IPA v4.0, the underlying GSI channel must be - * restarted for resume. - */ - start_channel = endpoint->ipa->version >= IPA_VERSION_4_0; - ret = gsi_channel_resume(gsi, endpoint->channel_id, start_channel); + ret = gsi_channel_resume(gsi, endpoint->channel_id); if (ret) dev_err(dev, "error %d resuming channel %u\n", ret, endpoint->channel_id); diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index 2e728d4914c8..25bbb456e007 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -80,29 +80,6 @@ #define IPA_XO_CLOCK_DIVIDER 192 /* 1 is subtracted where used */ /** - * ipa_suspend_handler() - Handle the suspend IPA interrupt - * @ipa: IPA pointer - * @irq_id: IPA interrupt type (unused) - * - * If an RX endpoint is in suspend state, and the IPA has a packet - * destined for that endpoint, the IPA generates a SUSPEND interrupt - * to inform the AP that it should resume the endpoint. If we get - * one of these interrupts we just resume everything. - */ -static void ipa_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id) -{ - /* Just report the event, and let system resume handle the rest. - * More than one endpoint could signal this; if so, ignore - * all but the first. - */ - if (!test_and_set_bit(IPA_FLAG_RESUMED, ipa->flags)) - pm_wakeup_dev_event(&ipa->pdev->dev, 0, true); - - /* Acknowledge/clear the suspend interrupt on all endpoints */ - ipa_interrupt_suspend_clear_all(ipa->interrupt); -} - -/** * ipa_setup() - Set up IPA hardware * @ipa: IPA pointer * @@ -124,12 +101,11 @@ int ipa_setup(struct ipa *ipa) if (ret) return ret; - ipa_interrupt_add(ipa->interrupt, IPA_IRQ_TX_SUSPEND, - ipa_suspend_handler); + ipa_power_setup(ipa); ret = device_init_wakeup(dev, true); if (ret) - goto err_interrupt_remove; + goto err_gsi_teardown; ipa_endpoint_setup(ipa); @@ -177,9 +153,9 @@ err_command_disable: ipa_endpoint_disable_one(command_endpoint); err_endpoint_teardown: ipa_endpoint_teardown(ipa); + ipa_power_teardown(ipa); (void)device_init_wakeup(dev, false); -err_interrupt_remove: - ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND); +err_gsi_teardown: gsi_teardown(&ipa->gsi); return ret; @@ -204,8 +180,8 @@ static void ipa_teardown(struct ipa *ipa) command_endpoint = ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX]; ipa_endpoint_disable_one(command_endpoint); ipa_endpoint_teardown(ipa); + ipa_power_teardown(ipa); (void)device_init_wakeup(&ipa->pdev->dev, false); - ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND); gsi_teardown(&ipa->gsi); } @@ -474,7 +450,7 @@ static int ipa_config(struct ipa *ipa, const struct ipa_data *data) ret = ipa_endpoint_config(ipa); if (ret) - goto err_interrupt_deconfig; + goto err_uc_deconfig; ipa_table_config(ipa); /* No deconfig required */ @@ -491,7 +467,7 @@ static int ipa_config(struct ipa *ipa, const struct ipa_data *data) err_endpoint_deconfig: ipa_endpoint_deconfig(ipa); -err_interrupt_deconfig: +err_uc_deconfig: ipa_uc_deconfig(ipa); ipa_interrupt_deconfig(ipa->interrupt); ipa->interrupt = NULL; @@ -874,62 +850,6 @@ static void ipa_shutdown(struct platform_device *pdev) dev_err(&pdev->dev, "shutdown: remove returned %d\n", ret); } -/** - * ipa_suspend() - Power management system suspend callback - * @dev: IPA device structure - * - * Return: Always returns zero - * - * Called by the PM framework when a system suspend operation is invoked. - * Suspends endpoints and releases the clock reference held to keep - * the IPA clock running until this point. - */ -static int ipa_suspend(struct device *dev) -{ - struct ipa *ipa = dev_get_drvdata(dev); - - /* Endpoints aren't usable until setup is complete */ - if (ipa->setup_complete) { - __clear_bit(IPA_FLAG_RESUMED, ipa->flags); - ipa_endpoint_suspend(ipa); - } - - ipa_clock_put(ipa); - - return 0; -} - -/** - * ipa_resume() - Power management system resume callback - * @dev: IPA device structure - * - * Return: Always returns 0 - * - * Called by the PM framework when a system resume operation is invoked. - * Takes an IPA clock reference to keep the clock running until suspend, - * and resumes endpoints. - */ -static int ipa_resume(struct device *dev) -{ - struct ipa *ipa = dev_get_drvdata(dev); - - /* This clock reference will keep the IPA out of suspend - * until we get a power management suspend request. - */ - ipa_clock_get(ipa); - - /* Endpoints aren't usable until setup is complete */ - if (ipa->setup_complete) - ipa_endpoint_resume(ipa); - - return 0; -} - -static const struct dev_pm_ops ipa_pm_ops = { - .suspend = ipa_suspend, - .resume = ipa_resume, -}; - static const struct attribute_group *ipa_attribute_groups[] = { &ipa_attribute_group, &ipa_feature_attribute_group, diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c index 4ea8287e9d23..ad4019e8016e 100644 --- a/drivers/net/ipa/ipa_modem.c +++ b/drivers/net/ipa/ipa_modem.c @@ -178,6 +178,9 @@ void ipa_modem_suspend(struct net_device *netdev) struct ipa_priv *priv = netdev_priv(netdev); struct ipa *ipa = priv->ipa; + if (!(netdev->flags & IFF_UP)) + return; + netif_stop_queue(netdev); ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]); @@ -194,6 +197,9 @@ void ipa_modem_resume(struct net_device *netdev) struct ipa_priv *priv = netdev_priv(netdev); struct ipa *ipa = priv->ipa; + if (!(netdev->flags & IFF_UP)) + return; + ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]); ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]); @@ -225,13 +231,15 @@ int ipa_modem_start(struct ipa *ipa) SET_NETDEV_DEV(netdev, &ipa->pdev->dev); priv = netdev_priv(netdev); priv->ipa = ipa; + ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev; + ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev; + ipa->modem_netdev = netdev; ret = register_netdev(netdev); - if (!ret) { - ipa->modem_netdev = netdev; - ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev; - ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev; - } else { + if (ret) { + ipa->modem_netdev = NULL; + ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL; + ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL; free_netdev(netdev); } @@ -265,13 +273,15 @@ int ipa_modem_stop(struct ipa *ipa) /* Prevent the modem from triggering a call to ipa_setup() */ ipa_smp2p_disable(ipa); - /* Stop the queue and disable the endpoints if it's open */ + /* Clean up the netdev and endpoints if it was started */ if (netdev) { - (void)ipa_stop(netdev); + /* If it was opened, stop it first */ + if (netdev->flags & IFF_UP) + (void)ipa_stop(netdev); + unregister_netdev(netdev); + ipa->modem_netdev = NULL; ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL; ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL; - ipa->modem_netdev = NULL; - unregister_netdev(netdev); free_netdev(netdev); } diff --git a/drivers/net/mhi/Makefile b/drivers/net/mhi/Makefile deleted file mode 100644 index f71b9f8f3c4f..000000000000 --- a/drivers/net/mhi/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -obj-$(CONFIG_MHI_NET) += mhi_net.o - -mhi_net-y := net.o proto_mbim.o diff --git a/drivers/net/mhi/mhi.h b/drivers/net/mhi/mhi.h deleted file mode 100644 index 1d0c499d27a3..000000000000 --- a/drivers/net/mhi/mhi.h +++ /dev/null @@ -1,41 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* MHI Network driver - Network over MHI bus - * - * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org> - */ - -struct mhi_net_stats { - u64_stats_t rx_packets; - u64_stats_t rx_bytes; - u64_stats_t rx_errors; - u64_stats_t rx_dropped; - u64_stats_t rx_length_errors; - u64_stats_t tx_packets; - u64_stats_t tx_bytes; - u64_stats_t tx_errors; - u64_stats_t tx_dropped; - struct u64_stats_sync tx_syncp; - struct u64_stats_sync rx_syncp; -}; - -struct mhi_net_dev { - struct mhi_device *mdev; - struct net_device *ndev; - struct sk_buff *skbagg_head; - struct sk_buff *skbagg_tail; - const struct mhi_net_proto *proto; - void *proto_data; - struct delayed_work rx_refill; - struct mhi_net_stats stats; - u32 rx_queue_sz; - int msg_enable; - unsigned int mru; -}; - -struct mhi_net_proto { - int (*init)(struct mhi_net_dev *mhi_netdev); - struct sk_buff * (*tx_fixup)(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb); - void (*rx)(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb); -}; - -extern const struct mhi_net_proto proto_mbim; diff --git a/drivers/net/mhi/proto_mbim.c b/drivers/net/mhi/proto_mbim.c deleted file mode 100644 index 761d90b28ee6..000000000000 --- a/drivers/net/mhi/proto_mbim.c +++ /dev/null @@ -1,310 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* MHI Network driver - Network over MHI bus - * - * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org> - * - * This driver copy some code from cdc_ncm, which is: - * Copyright (C) ST-Ericsson 2010-2012 - * and cdc_mbim, which is: - * Copyright (c) 2012 Smith Micro Software, Inc. - * Copyright (c) 2012 Bjørn Mork <bjorn@mork.no> - * - */ - -#include <linux/ethtool.h> -#include <linux/if_vlan.h> -#include <linux/ip.h> -#include <linux/mii.h> -#include <linux/netdevice.h> -#include <linux/wwan.h> -#include <linux/skbuff.h> -#include <linux/usb.h> -#include <linux/usb/cdc.h> -#include <linux/usb/usbnet.h> -#include <linux/usb/cdc_ncm.h> - -#include "mhi.h" - -#define MBIM_NDP16_SIGN_MASK 0x00ffffff - -/* Usual WWAN MTU */ -#define MHI_MBIM_DEFAULT_MTU 1500 - -/* 3500 allows to optimize skb allocation, the skbs will basically fit in - * one 4K page. Large MBIM packets will simply be split over several MHI - * transfers and chained by the MHI net layer (zerocopy). - */ -#define MHI_MBIM_DEFAULT_MRU 3500 - -struct mbim_context { - u16 rx_seq; - u16 tx_seq; -}; - -static void __mbim_length_errors_inc(struct mhi_net_dev *dev) -{ - u64_stats_update_begin(&dev->stats.rx_syncp); - u64_stats_inc(&dev->stats.rx_length_errors); - u64_stats_update_end(&dev->stats.rx_syncp); -} - -static void __mbim_errors_inc(struct mhi_net_dev *dev) -{ - u64_stats_update_begin(&dev->stats.rx_syncp); - u64_stats_inc(&dev->stats.rx_errors); - u64_stats_update_end(&dev->stats.rx_syncp); -} - -static int mbim_rx_verify_nth16(struct sk_buff *skb) -{ - struct mhi_net_dev *dev = wwan_netdev_drvpriv(skb->dev); - struct mbim_context *ctx = dev->proto_data; - struct usb_cdc_ncm_nth16 *nth16; - int len; - - if (skb->len < sizeof(struct usb_cdc_ncm_nth16) + - sizeof(struct usb_cdc_ncm_ndp16)) { - netif_dbg(dev, rx_err, dev->ndev, "frame too short\n"); - __mbim_length_errors_inc(dev); - return -EINVAL; - } - - nth16 = (struct usb_cdc_ncm_nth16 *)skb->data; - - if (nth16->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN)) { - netif_dbg(dev, rx_err, dev->ndev, - "invalid NTH16 signature <%#010x>\n", - le32_to_cpu(nth16->dwSignature)); - __mbim_errors_inc(dev); - return -EINVAL; - } - - /* No limit on the block length, except the size of the data pkt */ - len = le16_to_cpu(nth16->wBlockLength); - if (len > skb->len) { - netif_dbg(dev, rx_err, dev->ndev, - "NTB does not fit into the skb %u/%u\n", len, - skb->len); - __mbim_length_errors_inc(dev); - return -EINVAL; - } - - if (ctx->rx_seq + 1 != le16_to_cpu(nth16->wSequence) && - (ctx->rx_seq || le16_to_cpu(nth16->wSequence)) && - !(ctx->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) { - netif_dbg(dev, rx_err, dev->ndev, - "sequence number glitch prev=%d curr=%d\n", - ctx->rx_seq, le16_to_cpu(nth16->wSequence)); - } - ctx->rx_seq = le16_to_cpu(nth16->wSequence); - - return le16_to_cpu(nth16->wNdpIndex); -} - -static int mbim_rx_verify_ndp16(struct sk_buff *skb, struct usb_cdc_ncm_ndp16 *ndp16) -{ - struct mhi_net_dev *dev = wwan_netdev_drvpriv(skb->dev); - int ret; - - if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) { - netif_dbg(dev, rx_err, dev->ndev, "invalid DPT16 length <%u>\n", - le16_to_cpu(ndp16->wLength)); - return -EINVAL; - } - - ret = ((le16_to_cpu(ndp16->wLength) - sizeof(struct usb_cdc_ncm_ndp16)) - / sizeof(struct usb_cdc_ncm_dpe16)); - ret--; /* Last entry is always a NULL terminator */ - - if (sizeof(struct usb_cdc_ncm_ndp16) + - ret * sizeof(struct usb_cdc_ncm_dpe16) > skb->len) { - netif_dbg(dev, rx_err, dev->ndev, - "Invalid nframes = %d\n", ret); - return -EINVAL; - } - - return ret; -} - -static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb) -{ - struct net_device *ndev = mhi_netdev->ndev; - int ndpoffset; - - /* Check NTB header and retrieve first NDP offset */ - ndpoffset = mbim_rx_verify_nth16(skb); - if (ndpoffset < 0) { - net_err_ratelimited("%s: Incorrect NTB header\n", ndev->name); - goto error; - } - - /* Process each NDP */ - while (1) { - struct usb_cdc_ncm_ndp16 ndp16; - struct usb_cdc_ncm_dpe16 dpe16; - int nframes, n, dpeoffset; - - if (skb_copy_bits(skb, ndpoffset, &ndp16, sizeof(ndp16))) { - net_err_ratelimited("%s: Incorrect NDP offset (%u)\n", - ndev->name, ndpoffset); - __mbim_length_errors_inc(mhi_netdev); - goto error; - } - - /* Check NDP header and retrieve number of datagrams */ - nframes = mbim_rx_verify_ndp16(skb, &ndp16); - if (nframes < 0) { - net_err_ratelimited("%s: Incorrect NDP16\n", ndev->name); - __mbim_length_errors_inc(mhi_netdev); - goto error; - } - - /* Only IP data type supported, no DSS in MHI context */ - if ((ndp16.dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK)) - != cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN)) { - net_err_ratelimited("%s: Unsupported NDP type\n", ndev->name); - __mbim_errors_inc(mhi_netdev); - goto next_ndp; - } - - /* Only primary IP session 0 (0x00) supported for now */ - if (ndp16.dwSignature & ~cpu_to_le32(MBIM_NDP16_SIGN_MASK)) { - net_err_ratelimited("%s: bad packet session\n", ndev->name); - __mbim_errors_inc(mhi_netdev); - goto next_ndp; - } - - /* de-aggregate and deliver IP packets */ - dpeoffset = ndpoffset + sizeof(struct usb_cdc_ncm_ndp16); - for (n = 0; n < nframes; n++, dpeoffset += sizeof(dpe16)) { - u16 dgram_offset, dgram_len; - struct sk_buff *skbn; - - if (skb_copy_bits(skb, dpeoffset, &dpe16, sizeof(dpe16))) - break; - - dgram_offset = le16_to_cpu(dpe16.wDatagramIndex); - dgram_len = le16_to_cpu(dpe16.wDatagramLength); - - if (!dgram_offset || !dgram_len) - break; /* null terminator */ - - skbn = netdev_alloc_skb(ndev, dgram_len); - if (!skbn) - continue; - - skb_put(skbn, dgram_len); - skb_copy_bits(skb, dgram_offset, skbn->data, dgram_len); - - switch (skbn->data[0] & 0xf0) { - case 0x40: - skbn->protocol = htons(ETH_P_IP); - break; - case 0x60: - skbn->protocol = htons(ETH_P_IPV6); - break; - default: - net_err_ratelimited("%s: unknown protocol\n", - ndev->name); - __mbim_errors_inc(mhi_netdev); - dev_kfree_skb_any(skbn); - continue; - } - - u64_stats_update_begin(&mhi_netdev->stats.rx_syncp); - u64_stats_inc(&mhi_netdev->stats.rx_packets); - u64_stats_add(&mhi_netdev->stats.rx_bytes, skbn->len); - u64_stats_update_end(&mhi_netdev->stats.rx_syncp); - netif_rx(skbn); - } -next_ndp: - /* Other NDP to process? */ - ndpoffset = (int)le16_to_cpu(ndp16.wNextNdpIndex); - if (!ndpoffset) - break; - } - - /* free skb */ - dev_consume_skb_any(skb); - return; -error: - dev_kfree_skb_any(skb); -} - -struct mbim_tx_hdr { - struct usb_cdc_ncm_nth16 nth16; - struct usb_cdc_ncm_ndp16 ndp16; - struct usb_cdc_ncm_dpe16 dpe16[2]; -} __packed; - -static struct sk_buff *mbim_tx_fixup(struct mhi_net_dev *mhi_netdev, - struct sk_buff *skb) -{ - struct mbim_context *ctx = mhi_netdev->proto_data; - unsigned int dgram_size = skb->len; - struct usb_cdc_ncm_nth16 *nth16; - struct usb_cdc_ncm_ndp16 *ndp16; - struct mbim_tx_hdr *mbim_hdr; - - /* For now, this is a partial implementation of CDC MBIM, only one NDP - * is sent, containing the IP packet (no aggregation). - */ - - /* Ensure we have enough headroom for crafting MBIM header */ - if (skb_cow_head(skb, sizeof(struct mbim_tx_hdr))) { - dev_kfree_skb_any(skb); - return NULL; - } - - mbim_hdr = skb_push(skb, sizeof(struct mbim_tx_hdr)); - - /* Fill NTB header */ - nth16 = &mbim_hdr->nth16; - nth16->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN); - nth16->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16)); - nth16->wSequence = cpu_to_le16(ctx->tx_seq++); - nth16->wBlockLength = cpu_to_le16(skb->len); - nth16->wNdpIndex = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16)); - - /* Fill the unique NDP */ - ndp16 = &mbim_hdr->ndp16; - ndp16->dwSignature = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN); - ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16) - + sizeof(struct usb_cdc_ncm_dpe16) * 2); - ndp16->wNextNdpIndex = 0; - - /* Datagram follows the mbim header */ - ndp16->dpe16[0].wDatagramIndex = cpu_to_le16(sizeof(struct mbim_tx_hdr)); - ndp16->dpe16[0].wDatagramLength = cpu_to_le16(dgram_size); - - /* null termination */ - ndp16->dpe16[1].wDatagramIndex = 0; - ndp16->dpe16[1].wDatagramLength = 0; - - return skb; -} - -static int mbim_init(struct mhi_net_dev *mhi_netdev) -{ - struct net_device *ndev = mhi_netdev->ndev; - - mhi_netdev->proto_data = devm_kzalloc(&ndev->dev, - sizeof(struct mbim_context), - GFP_KERNEL); - if (!mhi_netdev->proto_data) - return -ENOMEM; - - ndev->needed_headroom = sizeof(struct mbim_tx_hdr); - ndev->mtu = MHI_MBIM_DEFAULT_MTU; - - if (!mhi_netdev->mru) - mhi_netdev->mru = MHI_MBIM_DEFAULT_MRU; - - return 0; -} - -const struct mhi_net_proto proto_mbim = { - .init = mbim_init, - .rx = mbim_rx, - .tx_fixup = mbim_tx_fixup, -}; diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi_net.c index 0cc7dcd0ff96..975f7f9bdf4c 100644 --- a/drivers/net/mhi/net.c +++ b/drivers/net/mhi_net.c @@ -11,28 +11,42 @@ #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/u64_stats_sync.h> -#include <linux/wwan.h> - -#include "mhi.h" #define MHI_NET_MIN_MTU ETH_MIN_MTU #define MHI_NET_MAX_MTU 0xffff #define MHI_NET_DEFAULT_MTU 0x4000 -/* When set to false, the default netdev (link 0) is not created, and it's up - * to user to create the link (via wwan rtnetlink). - */ -static bool create_default_iface = true; -module_param(create_default_iface, bool, 0); +struct mhi_net_stats { + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_errors; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; + u64_stats_t tx_errors; + u64_stats_t tx_dropped; + struct u64_stats_sync tx_syncp; + struct u64_stats_sync rx_syncp; +}; + +struct mhi_net_dev { + struct mhi_device *mdev; + struct net_device *ndev; + struct sk_buff *skbagg_head; + struct sk_buff *skbagg_tail; + struct delayed_work rx_refill; + struct mhi_net_stats stats; + u32 rx_queue_sz; + int msg_enable; + unsigned int mru; +}; struct mhi_device_info { const char *netname; - const struct mhi_net_proto *proto; }; static int mhi_ndo_open(struct net_device *ndev) { - struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev); + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); /* Feed the rx buffer pool */ schedule_delayed_work(&mhi_netdev->rx_refill, 0); @@ -47,7 +61,7 @@ static int mhi_ndo_open(struct net_device *ndev) static int mhi_ndo_stop(struct net_device *ndev) { - struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev); + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); netif_stop_queue(ndev); netif_carrier_off(ndev); @@ -58,17 +72,10 @@ static int mhi_ndo_stop(struct net_device *ndev) static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev) { - struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev); - const struct mhi_net_proto *proto = mhi_netdev->proto; + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); struct mhi_device *mdev = mhi_netdev->mdev; int err; - if (proto && proto->tx_fixup) { - skb = proto->tx_fixup(mhi_netdev, skb); - if (unlikely(!skb)) - goto exit_drop; - } - err = mhi_queue_skb(mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT); if (unlikely(err)) { net_err_ratelimited("%s: Failed to queue TX buf (%d)\n", @@ -93,7 +100,7 @@ exit_drop: static void mhi_ndo_get_stats64(struct net_device *ndev, struct rtnl_link_stats64 *stats) { - struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev); + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); unsigned int start; do { @@ -101,8 +108,6 @@ static void mhi_ndo_get_stats64(struct net_device *ndev, stats->rx_packets = u64_stats_read(&mhi_netdev->stats.rx_packets); stats->rx_bytes = u64_stats_read(&mhi_netdev->stats.rx_bytes); stats->rx_errors = u64_stats_read(&mhi_netdev->stats.rx_errors); - stats->rx_dropped = u64_stats_read(&mhi_netdev->stats.rx_dropped); - stats->rx_length_errors = u64_stats_read(&mhi_netdev->stats.rx_length_errors); } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.rx_syncp, start)); do { @@ -165,7 +170,6 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev, struct mhi_result *mhi_res) { struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev); - const struct mhi_net_proto *proto = mhi_netdev->proto; struct sk_buff *skb = mhi_res->buf_addr; int free_desc_count; @@ -217,15 +221,11 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev, break; } - if (proto && proto->rx) { - proto->rx(mhi_netdev, skb); - } else { - u64_stats_update_begin(&mhi_netdev->stats.rx_syncp); - u64_stats_inc(&mhi_netdev->stats.rx_packets); - u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len); - u64_stats_update_end(&mhi_netdev->stats.rx_syncp); - netif_rx(skb); - } + u64_stats_update_begin(&mhi_netdev->stats.rx_syncp); + u64_stats_inc(&mhi_netdev->stats.rx_packets); + u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len); + u64_stats_update_end(&mhi_netdev->stats.rx_syncp); + netif_rx(skb); } /* Refill if RX buffers queue becomes low */ @@ -248,7 +248,6 @@ static void mhi_net_ul_callback(struct mhi_device *mhi_dev, u64_stats_update_begin(&mhi_netdev->stats.tx_syncp); if (unlikely(mhi_res->transaction_status)) { - /* MHI layer stopping/resetting the UL channel */ if (mhi_res->transaction_status == -ENOTCONN) { u64_stats_update_end(&mhi_netdev->stats.tx_syncp); @@ -302,33 +301,17 @@ static void mhi_net_rx_refill_work(struct work_struct *work) schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2); } -static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id, - struct netlink_ext_ack *extack) +static int mhi_net_newlink(struct mhi_device *mhi_dev, struct net_device *ndev) { - const struct mhi_device_info *info; - struct mhi_device *mhi_dev = ctxt; struct mhi_net_dev *mhi_netdev; int err; - info = (struct mhi_device_info *)mhi_dev->id->driver_data; - - /* For now we only support one link (link context 0), driver must be - * reworked to break 1:1 relationship for net MBIM and to forward setup - * call to rmnet(QMAP) otherwise. - */ - if (if_id != 0) - return -EINVAL; - - if (dev_get_drvdata(&mhi_dev->dev)) - return -EBUSY; - - mhi_netdev = wwan_netdev_drvpriv(ndev); + mhi_netdev = netdev_priv(ndev); dev_set_drvdata(&mhi_dev->dev, mhi_netdev); mhi_netdev->ndev = ndev; mhi_netdev->mdev = mhi_dev; mhi_netdev->skbagg_head = NULL; - mhi_netdev->proto = info->proto; mhi_netdev->mru = mhi_dev->mhi_cntrl->mru; INIT_DELAYED_WORK(&mhi_netdev->rx_refill, mhi_net_rx_refill_work); @@ -336,45 +319,29 @@ static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id, u64_stats_init(&mhi_netdev->stats.tx_syncp); /* Start MHI channels */ - err = mhi_prepare_for_transfer(mhi_dev); + err = mhi_prepare_for_transfer(mhi_dev, 0); if (err) goto out_err; /* Number of transfer descriptors determines size of the queue */ mhi_netdev->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE); - if (extack) - err = register_netdevice(ndev); - else - err = register_netdev(ndev); + err = register_netdev(ndev); if (err) - goto out_err; - - if (mhi_netdev->proto) { - err = mhi_netdev->proto->init(mhi_netdev); - if (err) - goto out_err_proto; - } + return err; return 0; -out_err_proto: - unregister_netdevice(ndev); out_err: free_netdev(ndev); return err; } -static void mhi_net_dellink(void *ctxt, struct net_device *ndev, - struct list_head *head) +static void mhi_net_dellink(struct mhi_device *mhi_dev, struct net_device *ndev) { - struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev); - struct mhi_device *mhi_dev = ctxt; + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); - if (head) - unregister_netdevice_queue(ndev, head); - else - unregister_netdev(ndev); + unregister_netdev(ndev); mhi_unprepare_from_transfer(mhi_dev); @@ -383,65 +350,34 @@ static void mhi_net_dellink(void *ctxt, struct net_device *ndev, dev_set_drvdata(&mhi_dev->dev, NULL); } -static const struct wwan_ops mhi_wwan_ops = { - .priv_size = sizeof(struct mhi_net_dev), - .setup = mhi_net_setup, - .newlink = mhi_net_newlink, - .dellink = mhi_net_dellink, -}; - static int mhi_net_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) { const struct mhi_device_info *info = (struct mhi_device_info *)id->driver_data; - struct mhi_controller *cntrl = mhi_dev->mhi_cntrl; struct net_device *ndev; int err; - err = wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_wwan_ops, mhi_dev, - WWAN_NO_DEFAULT_LINK); - if (err) - return err; - - if (!create_default_iface) - return 0; - - /* Create a default interface which is used as either RMNET real-dev, - * MBIM link 0 or ip link 0) - */ ndev = alloc_netdev(sizeof(struct mhi_net_dev), info->netname, NET_NAME_PREDICTABLE, mhi_net_setup); - if (!ndev) { - err = -ENOMEM; - goto err_unregister; - } + if (!ndev) + return -ENOMEM; SET_NETDEV_DEV(ndev, &mhi_dev->dev); - err = mhi_net_newlink(mhi_dev, ndev, 0, NULL); - if (err) - goto err_release; + err = mhi_net_newlink(mhi_dev, ndev); + if (err) { + free_netdev(ndev); + return err; + } return 0; - -err_release: - free_netdev(ndev); -err_unregister: - wwan_unregister_ops(&cntrl->mhi_dev->dev); - - return err; } static void mhi_net_remove(struct mhi_device *mhi_dev) { struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev); - struct mhi_controller *cntrl = mhi_dev->mhi_cntrl; - - /* WWAN core takes care of removing remaining links */ - wwan_unregister_ops(&cntrl->mhi_dev->dev); - if (create_default_iface) - mhi_net_dellink(mhi_dev, mhi_netdev->ndev, NULL); + mhi_net_dellink(mhi_dev, mhi_netdev->ndev); } static const struct mhi_device_info mhi_hwip0 = { @@ -452,18 +388,11 @@ static const struct mhi_device_info mhi_swip0 = { .netname = "mhi_swip%d", }; -static const struct mhi_device_info mhi_hwip0_mbim = { - .netname = "mhi_mbim%d", - .proto = &proto_mbim, -}; - static const struct mhi_device_id mhi_net_id_table[] = { /* Hardware accelerated data PATH (to modem IPA), protocol agnostic */ { .chan = "IP_HW0", .driver_data = (kernel_ulong_t)&mhi_hwip0 }, /* Software data PATH (to modem CPU) */ { .chan = "IP_SW0", .driver_data = (kernel_ulong_t)&mhi_swip0 }, - /* Hardware accelerated data PATH (to modem IPA), MBIM protocol */ - { .chan = "IP_HW0_MBIM", .driver_data = (kernel_ulong_t)&mhi_hwip0_mbim }, {} }; MODULE_DEVICE_TABLE(mhi, mhi_net_id_table); diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index ff01e5bdc72e..62d033a1a557 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -183,8 +183,6 @@ new_port_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); - struct nsim_dev *nsim_dev = dev_get_drvdata(dev); - struct devlink *devlink; unsigned int port_index; int ret; @@ -195,12 +193,15 @@ new_port_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; - devlink = priv_to_devlink(nsim_dev); + if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock)) + return -EBUSY; + + if (nsim_bus_dev->in_reload) { + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); + return -EBUSY; + } - mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); - devlink_reload_disable(devlink); ret = nsim_dev_port_add(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index); - devlink_reload_enable(devlink); mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return ret ? ret : count; } @@ -212,8 +213,6 @@ del_port_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); - struct nsim_dev *nsim_dev = dev_get_drvdata(dev); - struct devlink *devlink; unsigned int port_index; int ret; @@ -224,12 +223,15 @@ del_port_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; - devlink = priv_to_devlink(nsim_dev); + if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock)) + return -EBUSY; + + if (nsim_bus_dev->in_reload) { + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); + return -EBUSY; + } - mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); - devlink_reload_disable(devlink); ret = nsim_dev_port_del(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index); - devlink_reload_enable(devlink); mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return ret ? ret : count; } diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index d538a39d4225..54313bd57797 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -864,16 +864,24 @@ static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change, struct netlink_ext_ack *extack) { struct nsim_dev *nsim_dev = devlink_priv(devlink); + struct nsim_bus_dev *nsim_bus_dev; + + nsim_bus_dev = nsim_dev->nsim_bus_dev; + if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock)) + return -EOPNOTSUPP; if (nsim_dev->dont_allow_reload) { /* For testing purposes, user set debugfs dont_allow_reload * value to true. So forbid it. */ NL_SET_ERR_MSG_MOD(extack, "User forbid the reload for testing purposes"); + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return -EOPNOTSUPP; } + nsim_bus_dev->in_reload = true; nsim_dev_reload_destroy(nsim_dev); + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return 0; } @@ -882,17 +890,26 @@ static int nsim_dev_reload_up(struct devlink *devlink, enum devlink_reload_actio struct netlink_ext_ack *extack) { struct nsim_dev *nsim_dev = devlink_priv(devlink); + struct nsim_bus_dev *nsim_bus_dev; + int ret; + + nsim_bus_dev = nsim_dev->nsim_bus_dev; + mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); + nsim_bus_dev->in_reload = false; if (nsim_dev->fail_reload) { /* For testing purposes, user set debugfs fail_reload * value to true. Fail right away. */ NL_SET_ERR_MSG_MOD(extack, "User setup the reload to fail for testing purposes"); + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return -EINVAL; } *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); - return nsim_dev_reload_create(nsim_dev, extack); + ret = nsim_dev_reload_create(nsim_dev, extack); + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); + return ret; } static int nsim_dev_info_get(struct devlink *devlink, @@ -1432,7 +1449,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) int err; devlink = devlink_alloc_ns(&nsim_dev_devlink_ops, sizeof(*nsim_dev), - nsim_bus_dev->initial_net); + nsim_bus_dev->initial_net, &nsim_bus_dev->dev); if (!devlink) return -ENOMEM; nsim_dev = devlink_priv(devlink); @@ -1453,7 +1470,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) if (err) goto err_devlink_free; - err = devlink_register(devlink, &nsim_bus_dev->dev); + err = devlink_register(devlink); if (err) goto err_resources_unregister; diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 1c20bcbd9d91..793c86dc5a9c 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -362,6 +362,7 @@ struct nsim_bus_dev { struct nsim_vf_config *vfconfigs; /* Lock for devlink->reload_enabled in netdevsim module */ struct mutex nsim_bus_reload_lock; + bool in_reload; bool init; }; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 4d53886f7d51..53bdd673ae56 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -401,11 +401,11 @@ static int ksz8041_config_aneg(struct phy_device *phydev) } static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev, - const u32 ksz_phy_id) + const bool ksz_8051) { int ret; - if ((phydev->phy_id & MICREL_PHY_ID_MASK) != ksz_phy_id) + if ((phydev->phy_id & MICREL_PHY_ID_MASK) != PHY_ID_KSZ8051) return 0; ret = phy_read(phydev, MII_BMSR); @@ -418,7 +418,7 @@ static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev, * the switch does not. */ ret &= BMSR_ERCAP; - if (ksz_phy_id == PHY_ID_KSZ8051) + if (ksz_8051) return ret; else return !ret; @@ -426,7 +426,7 @@ static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev, static int ksz8051_match_phy_device(struct phy_device *phydev) { - return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ8051); + return ksz8051_ksz8795_match_phy_device(phydev, true); } static int ksz8081_config_init(struct phy_device *phydev) @@ -535,7 +535,7 @@ static int ksz8061_config_init(struct phy_device *phydev) static int ksz8795_match_phy_device(struct phy_device *phydev) { - return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ87XX); + return ksz8051_ksz8795_match_phy_device(phydev, false); } static int ksz9021_load_values_from_of(struct phy_device *phydev, diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 207e59e74935..06e2181e5810 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -443,7 +443,7 @@ static int ipheth_probe(struct usb_interface *intf, netdev->netdev_ops = &ipheth_netdev_ops; netdev->watchdog_timeo = IPHETH_TX_TIMEOUT; - strcpy(netdev->name, "eth%d"); + strscpy(netdev->name, "eth%d", sizeof(netdev->name)); dev = netdev_priv(netdev); dev->udev = udev; diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 13f86368b78a..4e8d3c28f73e 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1154,7 +1154,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) { struct phy_device *phydev = dev->net->phydev; struct ethtool_link_ksettings ecmd; - int ladv, radv, ret; + int ladv, radv, ret, link; u32 buf; /* clear LAN78xx interrupt status */ @@ -1162,9 +1162,12 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) if (unlikely(ret < 0)) return -EIO; + mutex_lock(&phydev->lock); phy_read_status(phydev); + link = phydev->link; + mutex_unlock(&phydev->lock); - if (!phydev->link && dev->link_on) { + if (!link && dev->link_on) { dev->link_on = false; /* reset MAC */ @@ -1177,7 +1180,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) return -EIO; del_timer(&dev->stat_monitor); - } else if (phydev->link && !dev->link_on) { + } else if (link && !dev->link_on) { dev->link_on = true; phy_ethtool_ksettings_get(phydev, &ecmd); @@ -1466,9 +1469,14 @@ static int lan78xx_set_eee(struct net_device *net, struct ethtool_eee *edata) static u32 lan78xx_get_link(struct net_device *net) { + u32 link; + + mutex_lock(&net->phydev->lock); phy_read_status(net->phydev); + link = net->phydev->link; + mutex_unlock(&net->phydev->lock); - return net->phydev->link; + return link; } static void lan78xx_get_drvinfo(struct net_device *net, diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 0475ef0efdca..36dafcb3d04a 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -1,31 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 1999-2013 Petko Manolov (petkan@nucleusys.com) + * Copyright (c) 1999-2021 Petko Manolov (petkan@nucleusys.com) * - * ChangeLog: - * .... Most of the time spent on reading sources & docs. - * v0.2.x First official release for the Linux kernel. - * v0.3.0 Beutified and structured, some bugs fixed. - * v0.3.x URBifying bulk requests and bugfixing. First relatively - * stable release. Still can touch device's registers only - * from top-halves. - * v0.4.0 Control messages remained unurbified are now URBs. - * Now we can touch the HW at any time. - * v0.4.9 Control urbs again use process context to wait. Argh... - * Some long standing bugs (enable_net_traffic) fixed. - * Also nasty trick about resubmiting control urb from - * interrupt context used. Please let me know how it - * behaves. Pegasus II support added since this version. - * TODO: suppressing HCD warnings spewage on disconnect. - * v0.4.13 Ethernet address is now set at probe(), not at open() - * time as this seems to break dhcpd. - * v0.5.0 branch to 2.5.x kernels - * v0.5.1 ethtool support added - * v0.5.5 rx socket buffers are in a pool and the their allocation - * is out of the interrupt routine. - * ... - * v0.9.3 simplified [get|set]_register(s), async update registers - * logic revisited, receive skb_pool removed. */ #include <linux/sched.h> @@ -45,7 +21,6 @@ /* * Version Information */ -#define DRIVER_VERSION "v0.9.3 (2013/04/25)" #define DRIVER_AUTHOR "Petko Manolov <petkan@nucleusys.com>" #define DRIVER_DESC "Pegasus/Pegasus II USB Ethernet driver" @@ -132,9 +107,15 @@ static int get_registers(pegasus_t *pegasus, __u16 indx, __u16 size, void *data) static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size, const void *data) { - return usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REGS, + int ret; + + ret = usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REGS, PEGASUS_REQT_WRITE, 0, indx, data, size, 1000, GFP_NOIO); + if (ret < 0) + netif_dbg(pegasus, drv, pegasus->net, "%s failed with %d\n", __func__, ret); + + return ret; } /* @@ -145,10 +126,15 @@ static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size, static int set_register(pegasus_t *pegasus, __u16 indx, __u8 data) { void *buf = &data; + int ret; - return usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REG, + ret = usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REG, PEGASUS_REQT_WRITE, data, indx, buf, 1, 1000, GFP_NOIO); + if (ret < 0) + netif_dbg(pegasus, drv, pegasus->net, "%s failed with %d\n", __func__, ret); + + return ret; } static int update_eth_regs_async(pegasus_t *pegasus) @@ -188,10 +174,9 @@ static int update_eth_regs_async(pegasus_t *pegasus) static int __mii_op(pegasus_t *p, __u8 phy, __u8 indx, __u16 *regd, __u8 cmd) { - int i; - __u8 data[4] = { phy, 0, 0, indx }; + int i, ret; __le16 regdi; - int ret = -ETIMEDOUT; + __u8 data[4] = { phy, 0, 0, indx }; if (cmd & PHY_WRITE) { __le16 *t = (__le16 *) & data[1]; @@ -207,12 +192,15 @@ static int __mii_op(pegasus_t *p, __u8 phy, __u8 indx, __u16 *regd, __u8 cmd) if (data[0] & PHY_DONE) break; } - if (i >= REG_TIMEOUT) + if (i >= REG_TIMEOUT) { + ret = -ETIMEDOUT; goto fail; + } if (cmd & PHY_READ) { ret = get_registers(p, PhyData, 2, ®di); + if (ret < 0) + goto fail; *regd = le16_to_cpu(regdi); - return ret; } return 0; fail: @@ -235,9 +223,13 @@ static int write_mii_word(pegasus_t *pegasus, __u8 phy, __u8 indx, __u16 *regd) static int mdio_read(struct net_device *dev, int phy_id, int loc) { pegasus_t *pegasus = netdev_priv(dev); + int ret; u16 res; - read_mii_word(pegasus, phy_id, loc, &res); + ret = read_mii_word(pegasus, phy_id, loc, &res); + if (ret < 0) + return ret; + return (int)res; } @@ -251,10 +243,9 @@ static void mdio_write(struct net_device *dev, int phy_id, int loc, int val) static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata) { - int i; - __u8 tmp = 0; + int ret, i; __le16 retdatai; - int ret; + __u8 tmp = 0; set_register(pegasus, EpromCtrl, 0); set_register(pegasus, EpromOffset, index); @@ -262,21 +253,25 @@ static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata) for (i = 0; i < REG_TIMEOUT; i++) { ret = get_registers(pegasus, EpromCtrl, 1, &tmp); + if (ret < 0) + goto fail; if (tmp & EPROM_DONE) break; - if (ret == -ESHUTDOWN) - goto fail; } - if (i >= REG_TIMEOUT) + if (i >= REG_TIMEOUT) { + ret = -ETIMEDOUT; goto fail; + } ret = get_registers(pegasus, EpromData, 2, &retdatai); + if (ret < 0) + goto fail; *retdata = le16_to_cpu(retdatai); return ret; fail: - netif_warn(pegasus, drv, pegasus->net, "%s failed\n", __func__); - return -ETIMEDOUT; + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); + return ret; } #ifdef PEGASUS_WRITE_EEPROM @@ -324,10 +319,10 @@ static int write_eprom_word(pegasus_t *pegasus, __u8 index, __u16 data) return ret; fail: - netif_warn(pegasus, drv, pegasus->net, "%s failed\n", __func__); + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); return -ETIMEDOUT; } -#endif /* PEGASUS_WRITE_EEPROM */ +#endif /* PEGASUS_WRITE_EEPROM */ static inline int get_node_id(pegasus_t *pegasus, u8 *id) { @@ -367,19 +362,21 @@ static void set_ethernet_addr(pegasus_t *pegasus) return; err: eth_hw_addr_random(pegasus->net); - dev_info(&pegasus->intf->dev, "software assigned MAC address.\n"); + netif_dbg(pegasus, drv, pegasus->net, "software assigned MAC address.\n"); return; } static inline int reset_mac(pegasus_t *pegasus) { + int ret, i; __u8 data = 0x8; - int i; set_register(pegasus, EthCtrl1, data); for (i = 0; i < REG_TIMEOUT; i++) { - get_registers(pegasus, EthCtrl1, 1, &data); + ret = get_registers(pegasus, EthCtrl1, 1, &data); + if (ret < 0) + goto fail; if (~data & 0x08) { if (loopback) break; @@ -402,22 +399,29 @@ static inline int reset_mac(pegasus_t *pegasus) } if (usb_dev_id[pegasus->dev_index].vendor == VENDOR_ELCON) { __u16 auxmode; - read_mii_word(pegasus, 3, 0x1b, &auxmode); + ret = read_mii_word(pegasus, 3, 0x1b, &auxmode); + if (ret < 0) + goto fail; auxmode |= 4; write_mii_word(pegasus, 3, 0x1b, &auxmode); } return 0; +fail: + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); + return ret; } static int enable_net_traffic(struct net_device *dev, struct usb_device *usb) { - __u16 linkpart; - __u8 data[4]; pegasus_t *pegasus = netdev_priv(dev); int ret; + __u16 linkpart; + __u8 data[4]; - read_mii_word(pegasus, pegasus->phy, MII_LPA, &linkpart); + ret = read_mii_word(pegasus, pegasus->phy, MII_LPA, &linkpart); + if (ret < 0) + goto fail; data[0] = 0xc8; /* TX & RX enable, append status, no CRC */ data[1] = 0; if (linkpart & (ADVERTISE_100FULL | ADVERTISE_10FULL)) @@ -435,11 +439,16 @@ static int enable_net_traffic(struct net_device *dev, struct usb_device *usb) usb_dev_id[pegasus->dev_index].vendor == VENDOR_LINKSYS2 || usb_dev_id[pegasus->dev_index].vendor == VENDOR_DLINK) { u16 auxmode; - read_mii_word(pegasus, 0, 0x1b, &auxmode); + ret = read_mii_word(pegasus, 0, 0x1b, &auxmode); + if (ret < 0) + goto fail; auxmode |= 4; write_mii_word(pegasus, 0, 0x1b, &auxmode); } + return 0; +fail: + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); return ret; } @@ -447,9 +456,9 @@ static void read_bulk_callback(struct urb *urb) { pegasus_t *pegasus = urb->context; struct net_device *net; + u8 *buf = urb->transfer_buffer; int rx_status, count = urb->actual_length; int status = urb->status; - u8 *buf = urb->transfer_buffer; __u16 pkt_len; if (!pegasus) @@ -735,12 +744,16 @@ static inline void disable_net_traffic(pegasus_t *pegasus) set_registers(pegasus, EthCtrl0, sizeof(tmp), &tmp); } -static inline void get_interrupt_interval(pegasus_t *pegasus) +static inline int get_interrupt_interval(pegasus_t *pegasus) { u16 data; u8 interval; + int ret; + + ret = read_eprom_word(pegasus, 4, &data); + if (ret < 0) + return ret; - read_eprom_word(pegasus, 4, &data); interval = data >> 8; if (pegasus->usb->speed != USB_SPEED_HIGH) { if (interval < 0x80) { @@ -755,6 +768,8 @@ static inline void get_interrupt_interval(pegasus_t *pegasus) } } pegasus->intr_interval = interval; + + return 0; } static void set_carrier(struct net_device *net) @@ -880,7 +895,6 @@ static void pegasus_get_drvinfo(struct net_device *dev, pegasus_t *pegasus = netdev_priv(dev); strlcpy(info->driver, driver_name, sizeof(info->driver)); - strlcpy(info->version, DRIVER_VERSION, sizeof(info->version)); usb_make_path(pegasus->usb, info->bus_info, sizeof(info->bus_info)); } @@ -999,8 +1013,7 @@ static int pegasus_siocdevprivate(struct net_device *net, struct ifreq *rq, data[0] = pegasus->phy; fallthrough; case SIOCDEVPRIVATE + 1: - read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]); - res = 0; + res = read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]); break; case SIOCDEVPRIVATE + 2: if (!capable(CAP_NET_ADMIN)) @@ -1034,22 +1047,25 @@ static void pegasus_set_multicast(struct net_device *net) static __u8 mii_phy_probe(pegasus_t *pegasus) { - int i; + int i, ret; __u16 tmp; for (i = 0; i < 32; i++) { - read_mii_word(pegasus, i, MII_BMSR, &tmp); + ret = read_mii_word(pegasus, i, MII_BMSR, &tmp); + if (ret < 0) + goto fail; if (tmp == 0 || tmp == 0xffff || (tmp & BMSR_MEDIA) == 0) continue; else return i; } - +fail: return 0xff; } static inline void setup_pegasus_II(pegasus_t *pegasus) { + int ret; __u8 data = 0xa5; set_register(pegasus, Reg1d, 0); @@ -1061,7 +1077,9 @@ static inline void setup_pegasus_II(pegasus_t *pegasus) set_register(pegasus, Reg7b, 2); set_register(pegasus, 0x83, data); - get_registers(pegasus, 0x83, 1, &data); + ret = get_registers(pegasus, 0x83, 1, &data); + if (ret < 0) + goto fail; if (data == 0xa5) pegasus->chip = 0x8513; @@ -1076,6 +1094,10 @@ static inline void setup_pegasus_II(pegasus_t *pegasus) set_register(pegasus, Reg81, 6); else set_register(pegasus, Reg81, 2); + + return; +fail: + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); } static void check_carrier(struct work_struct *work) @@ -1150,7 +1172,9 @@ static int pegasus_probe(struct usb_interface *intf, | NETIF_MSG_PROBE | NETIF_MSG_LINK); pegasus->features = usb_dev_id[dev_index].private; - get_interrupt_interval(pegasus); + res = get_interrupt_interval(pegasus); + if (res) + goto out2; if (reset_mac(pegasus)) { dev_err(&intf->dev, "can't reset MAC\n"); res = -EIO; @@ -1297,7 +1321,7 @@ static void __init parse_id(char *id) static int __init pegasus_init(void) { - pr_info("%s: %s, " DRIVER_DESC "\n", driver_name, DRIVER_VERSION); + pr_info("%s: " DRIVER_DESC "\n", driver_name); if (devid) parse_id(devid); return usb_register(&pegasus_driver); diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 470e1c1e6353..840c1c2ab16a 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1725,7 +1725,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) dev->interrupt_count = 0; dev->net = net; - strcpy (net->name, "usb%d"); + strscpy(net->name, "usb%d", sizeof(net->name)); memcpy (net->dev_addr, node_id, sizeof node_id); /* rx and tx sides can use different message sizes; @@ -1752,13 +1752,13 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) if ((dev->driver_info->flags & FLAG_ETHER) != 0 && ((dev->driver_info->flags & FLAG_POINTTOPOINT) == 0 || (net->dev_addr [0] & 0x02) == 0)) - strcpy (net->name, "eth%d"); + strscpy(net->name, "eth%d", sizeof(net->name)); /* WLAN devices should always be named "wlan%d" */ if ((dev->driver_info->flags & FLAG_WLAN) != 0) - strcpy(net->name, "wlan%d"); + strscpy(net->name, "wlan%d", sizeof(net->name)); /* WWAN devices should always be named "wwan%d" */ if ((dev->driver_info->flags & FLAG_WWAN) != 0) - strcpy(net->name, "wwan%d"); + strscpy(net->name, "wwan%d", sizeof(net->name)); /* devices that cannot do ARP */ if ((dev->driver_info->flags & FLAG_NOARP) != 0) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 56c3f8519093..2e42210a6503 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -380,7 +380,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct page *page, unsigned int offset, unsigned int len, unsigned int truesize, bool hdr_valid, unsigned int metasize, - bool whole_page) + unsigned int headroom) { struct sk_buff *skb; struct virtio_net_hdr_mrg_rxbuf *hdr; @@ -398,28 +398,16 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, else hdr_padded_len = sizeof(struct padded_vnet_hdr); - /* If whole_page, there is an offset between the beginning of the + /* If headroom is not 0, there is an offset between the beginning of the * data and the allocated space, otherwise the data and the allocated * space are aligned. * * Buffers with headroom use PAGE_SIZE as alloc size, see * add_recvbuf_mergeable() + get_mergeable_buf_len() */ - if (whole_page) { - /* Buffers with whole_page use PAGE_SIZE as alloc size, - * see add_recvbuf_mergeable() + get_mergeable_buf_len() - */ - truesize = PAGE_SIZE; - - /* page maybe head page, so we should get the buf by p, not the - * page - */ - tailroom = truesize - len - offset_in_page(p); - buf = (char *)((unsigned long)p & PAGE_MASK); - } else { - tailroom = truesize - len; - buf = p; - } + truesize = headroom ? PAGE_SIZE : truesize; + tailroom = truesize - len - headroom; + buf = p - headroom; len -= hdr_len; offset += hdr_padded_len; @@ -978,7 +966,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, put_page(page); head_skb = page_to_skb(vi, rq, xdp_page, offset, len, PAGE_SIZE, false, - metasize, true); + metasize, + VIRTIO_XDP_HEADROOM); return head_skb; } break; @@ -1029,7 +1018,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, rcu_read_unlock(); head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, - metasize, !!headroom); + metasize, headroom); curr_skb = head_skb; if (unlikely(!curr_skb)) @@ -2208,14 +2197,14 @@ static int virtnet_set_channels(struct net_device *dev, if (vi->rq[0].xdp_prog) return -EINVAL; - get_online_cpus(); + cpus_read_lock(); err = _virtnet_set_queues(vi, queue_pairs); if (err) { - put_online_cpus(); + cpus_read_unlock(); goto err; } virtnet_set_affinity(vi); - put_online_cpus(); + cpus_read_unlock(); netif_set_real_num_tx_queues(dev, queue_pairs); netif_set_real_num_rx_queues(dev, queue_pairs); @@ -2970,9 +2959,9 @@ static int init_vqs(struct virtnet_info *vi) if (ret) goto err_free; - get_online_cpus(); + cpus_read_lock(); virtnet_set_affinity(vi); - put_online_cpus(); + cpus_read_unlock(); return 0; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 2b1b944d4b28..662e26117353 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -857,30 +857,24 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; bool is_v6gw = false; - int ret = -EINVAL; nf_reset_ct(skb); /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { - struct sk_buff *skb2; - - skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); - if (!skb2) { - ret = -ENOMEM; - goto err; + skb = skb_expand_head(skb, hh_len); + if (!skb) { + dev->stats.tx_errors++; + return -ENOMEM; } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - - consume_skb(skb); - skb = skb2; } rcu_read_lock_bh(); neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); if (!IS_ERR(neigh)) { + int ret; + sock_confirm_neigh(skb, neigh); /* if crossing protocols, can not use the cached header */ ret = neigh_output(neigh, skb, is_v6gw); @@ -889,9 +883,8 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s } rcu_read_unlock_bh(); -err: vrf_tx_error(skb->dev, skb); - return ret; + return -EINVAL; } static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig index 473df2505c8e..592a8389fc5a 100644 --- a/drivers/net/wan/Kconfig +++ b/drivers/net/wan/Kconfig @@ -290,30 +290,6 @@ config SLIC_DS26522 To compile this driver as a module, choose M here: the module will be called slic_ds26522. -config DSCC4_PCISYNC - bool "Etinc PCISYNC features" - depends on DSCC4 - help - Due to Etinc's design choice for its PCISYNC cards, some operations - are only allowed on specific ports of the DSCC4. This option is the - only way for the driver to know that it shouldn't return a success - code for these operations. - - Please say Y if your card is an Etinc's PCISYNC. - -config DSCC4_PCI_RST - bool "Hard reset support" - depends on DSCC4 - help - Various DSCC4 bugs forbid any reliable software reset of the ASIC. - As a replacement, some vendors provide a way to assert the PCI #RST - pin of DSCC4 through the GPIO port of the card. If you choose Y, - the driver will make use of this feature before module removal - (i.e. rmmod). The feature is known to be available on Commtech's - cards. Contact your manufacturer for details. - - Say Y if your card supports this feature. - config IXP4XX_HSS tristate "Intel IXP4xx HSS (synchronous serial port) support" depends on HDLC && IXP4XX_NPE && IXP4XX_QMGR @@ -337,33 +313,6 @@ config LAPBETHER To compile this driver as a module, choose M here: the module will be called lapbether. - If unsure, say N. - -config SBNI - tristate "Granch SBNI12 Leased Line adapter support" - depends on X86 - help - Driver for ISA SBNI12-xx cards which are low cost alternatives to - leased line modems. - - You can find more information and last versions of drivers and - utilities at <http://www.granch.ru/>. If you have any question you - can send email to <sbni@granch.ru>. - - To compile this driver as a module, choose M here: the - module will be called sbni. - - If unsure, say N. - -config SBNI_MULTILINE - bool "Multiple line feature support" - depends on SBNI - help - Schedule traffic for some parallel lines, via SBNI12 adapters. - - If you have two computers connected with two parallel lines it's - possible to increase transfer rate nearly twice. You should have - a program named 'sbniconfig' to configure adapters. If unsure, say N. diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile index 081666c36ca2..f6b92efffc94 100644 --- a/drivers/net/wan/Makefile +++ b/drivers/net/wan/Makefile @@ -22,7 +22,6 @@ obj-$(CONFIG_FARSYNC) += farsync.o obj-$(CONFIG_LANMEDIA) += lmc/ obj-$(CONFIG_LAPBETHER) += lapbether.o -obj-$(CONFIG_SBNI) += sbni.o obj-$(CONFIG_N2) += n2.o obj-$(CONFIG_C101) += c101.o obj-$(CONFIG_WANXL) += wanxl.o diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c index 15a754310fd7..e985e54ba75d 100644 --- a/drivers/net/wan/hostess_sv11.c +++ b/drivers/net/wan/hostess_sv11.c @@ -319,16 +319,18 @@ MODULE_DESCRIPTION("Modular driver for the Comtrol Hostess SV11"); static struct z8530_dev *sv11_unit; -int init_module(void) +static int sv11_module_init(void) { sv11_unit = sv11_init(io, irq); if (!sv11_unit) return -ENODEV; return 0; } +module_init(sv11_module_init); -void cleanup_module(void) +static void sv11_module_cleanup(void) { if (sv11_unit) sv11_shutdown(sv11_unit); } +module_exit(sv11_module_cleanup); diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c deleted file mode 100644 index 469fe979d664..000000000000 --- a/drivers/net/wan/sbni.c +++ /dev/null @@ -1,1639 +0,0 @@ -/* sbni.c: Granch SBNI12 leased line adapters driver for linux - * - * Written 2001 by Denis I.Timofeev (timofeev@granch.ru) - * - * Previous versions were written by Yaroslav Polyakov, - * Alexey Zverev and Max Khon. - * - * Driver supports SBNI12-02,-04,-05,-10,-11 cards, single and - * double-channel, PCI and ISA modifications. - * More info and useful utilities to work with SBNI12 cards you can find - * at http://www.granch.com (English) or http://www.granch.ru (Russian) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License. - * - * - * 5.0.1 Jun 22 2001 - * - Fixed bug in probe - * 5.0.0 Jun 06 2001 - * - Driver was completely redesigned by Denis I.Timofeev, - * - now PCI/Dual, ISA/Dual (with single interrupt line) models are - * - supported - * 3.3.0 Thu Feb 24 21:30:28 NOVT 2000 - * - PCI cards support - * 3.2.0 Mon Dec 13 22:26:53 NOVT 1999 - * - Completely rebuilt all the packet storage system - * - to work in Ethernet-like style. - * 3.1.1 just fixed some bugs (5 aug 1999) - * 3.1.0 added balancing feature (26 apr 1999) - * 3.0.1 just fixed some bugs (14 apr 1999). - * 3.0.0 Initial Revision, Yaroslav Polyakov (24 Feb 1999) - * - added pre-calculation for CRC, fixed bug with "len-2" frames, - * - removed outbound fragmentation (MTU=1000), written CRC-calculation - * - on asm, added work with hard_headers and now we have our own cache - * - for them, optionally supported word-interchange on some chipsets, - * - * Known problem: this driver wasn't tested on multiprocessor machine. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/ptrace.h> -#include <linux/fcntl.h> -#include <linux/ioport.h> -#include <linux/interrupt.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/pci.h> -#include <linux/skbuff.h> -#include <linux/timer.h> -#include <linux/init.h> -#include <linux/delay.h> - -#include <net/net_namespace.h> -#include <net/arp.h> -#include <net/Space.h> - -#include <asm/io.h> -#include <asm/types.h> -#include <asm/byteorder.h> -#include <asm/irq.h> -#include <linux/uaccess.h> - -#include "sbni.h" - -/* device private data */ - -struct net_local { - struct timer_list watchdog; - struct net_device *watchdog_dev; - - spinlock_t lock; - struct sk_buff *rx_buf_p; /* receive buffer ptr */ - struct sk_buff *tx_buf_p; /* transmit buffer ptr */ - - unsigned int framelen; /* current frame length */ - unsigned int maxframe; /* maximum valid frame length */ - unsigned int state; - unsigned int inppos, outpos; /* positions in rx/tx buffers */ - - /* transmitting frame number - from frames qty to 1 */ - unsigned int tx_frameno; - - /* expected number of next receiving frame */ - unsigned int wait_frameno; - - /* count of failed attempts to frame send - 32 attempts do before - error - while receiver tunes on opposite side of wire */ - unsigned int trans_errors; - - /* idle time; send pong when limit exceeded */ - unsigned int timer_ticks; - - /* fields used for receive level autoselection */ - int delta_rxl; - unsigned int cur_rxl_index, timeout_rxl; - unsigned long cur_rxl_rcvd, prev_rxl_rcvd; - - struct sbni_csr1 csr1; /* current value of CSR1 */ - struct sbni_in_stats in_stats; /* internal statistics */ - - struct net_device *second; /* for ISA/dual cards */ - -#ifdef CONFIG_SBNI_MULTILINE - struct net_device *master; - struct net_device *link; -#endif -}; - - -static int sbni_card_probe( unsigned long ); -static int sbni_pci_probe( struct net_device * ); -static struct net_device *sbni_probe1(struct net_device *, unsigned long, int); -static int sbni_open( struct net_device * ); -static int sbni_close( struct net_device * ); -static netdev_tx_t sbni_start_xmit(struct sk_buff *, - struct net_device * ); -static int sbni_siocdevprivate(struct net_device *, struct ifreq *, - void __user *, int); -static void set_multicast_list( struct net_device * ); - -static irqreturn_t sbni_interrupt( int, void * ); -static void handle_channel( struct net_device * ); -static int recv_frame( struct net_device * ); -static void send_frame( struct net_device * ); -static int upload_data( struct net_device *, - unsigned, unsigned, unsigned, u32 ); -static void download_data( struct net_device *, u32 * ); -static void sbni_watchdog(struct timer_list *); -static void interpret_ack( struct net_device *, unsigned ); -static int append_frame_to_pkt( struct net_device *, unsigned, u32 ); -static void indicate_pkt( struct net_device * ); -static void card_start( struct net_device * ); -static void prepare_to_send( struct sk_buff *, struct net_device * ); -static void drop_xmit_queue( struct net_device * ); -static void send_frame_header( struct net_device *, u32 * ); -static int skip_tail( unsigned int, unsigned int, u32 ); -static int check_fhdr( u32, u32 *, u32 *, u32 *, u32 *, u32 * ); -static void change_level( struct net_device * ); -static void timeout_change_level( struct net_device * ); -static u32 calc_crc32( u32, u8 *, u32 ); -static struct sk_buff * get_rx_buf( struct net_device * ); -static int sbni_init( struct net_device * ); - -#ifdef CONFIG_SBNI_MULTILINE -static int enslave( struct net_device *, struct net_device * ); -static int emancipate( struct net_device * ); -#endif - -static const char version[] = - "Granch SBNI12 driver ver 5.0.1 Jun 22 2001 Denis I.Timofeev.\n"; - -static bool skip_pci_probe __initdata = false; -static int scandone __initdata = 0; -static int num __initdata = 0; - -static unsigned char rxl_tab[]; -static u32 crc32tab[]; - -/* A list of all installed devices, for removing the driver module. */ -static struct net_device *sbni_cards[ SBNI_MAX_NUM_CARDS ]; - -/* Lists of device's parameters */ -static u32 io[ SBNI_MAX_NUM_CARDS ] __initdata = - { [0 ... SBNI_MAX_NUM_CARDS-1] = -1 }; -static u32 irq[ SBNI_MAX_NUM_CARDS ] __initdata; -static u32 baud[ SBNI_MAX_NUM_CARDS ] __initdata; -static u32 rxl[ SBNI_MAX_NUM_CARDS ] __initdata = - { [0 ... SBNI_MAX_NUM_CARDS-1] = -1 }; -static u32 mac[ SBNI_MAX_NUM_CARDS ] __initdata; - -#ifndef MODULE -typedef u32 iarr[]; -static iarr *dest[5] __initdata = { &io, &irq, &baud, &rxl, &mac }; -#endif - -/* A zero-terminated list of I/O addresses to be probed on ISA bus */ -static unsigned int netcard_portlist[ ] __initdata = { - 0x210, 0x214, 0x220, 0x224, 0x230, 0x234, 0x240, 0x244, 0x250, 0x254, - 0x260, 0x264, 0x270, 0x274, 0x280, 0x284, 0x290, 0x294, 0x2a0, 0x2a4, - 0x2b0, 0x2b4, 0x2c0, 0x2c4, 0x2d0, 0x2d4, 0x2e0, 0x2e4, 0x2f0, 0x2f4, - 0 }; - -#define NET_LOCAL_LOCK(dev) (((struct net_local *)netdev_priv(dev))->lock) - -/* - * Look for SBNI card which addr stored in dev->base_addr, if nonzero. - * Otherwise, look through PCI bus. If none PCI-card was found, scan ISA. - */ - -static inline int __init -sbni_isa_probe( struct net_device *dev ) -{ - if( dev->base_addr > 0x1ff && - request_region( dev->base_addr, SBNI_IO_EXTENT, dev->name ) && - sbni_probe1( dev, dev->base_addr, dev->irq ) ) - - return 0; - else { - pr_err("base address 0x%lx is busy, or adapter is malfunctional!\n", - dev->base_addr); - return -ENODEV; - } -} - -static const struct net_device_ops sbni_netdev_ops = { - .ndo_open = sbni_open, - .ndo_stop = sbni_close, - .ndo_start_xmit = sbni_start_xmit, - .ndo_set_rx_mode = set_multicast_list, - .ndo_siocdevprivate = sbni_siocdevprivate, - .ndo_set_mac_address = eth_mac_addr, - .ndo_validate_addr = eth_validate_addr, -}; - -static void __init sbni_devsetup(struct net_device *dev) -{ - ether_setup( dev ); - dev->netdev_ops = &sbni_netdev_ops; -} - -int __init sbni_probe(int unit) -{ - struct net_device *dev; - int err; - - dev = alloc_netdev(sizeof(struct net_local), "sbni", - NET_NAME_UNKNOWN, sbni_devsetup); - if (!dev) - return -ENOMEM; - - dev->netdev_ops = &sbni_netdev_ops; - - sprintf(dev->name, "sbni%d", unit); - netdev_boot_setup_check(dev); - - err = sbni_init(dev); - if (err) { - free_netdev(dev); - return err; - } - - err = register_netdev(dev); - if (err) { - release_region( dev->base_addr, SBNI_IO_EXTENT ); - free_netdev(dev); - return err; - } - pr_info_once("%s", version); - return 0; -} - -static int __init sbni_init(struct net_device *dev) -{ - int i; - if( dev->base_addr ) - return sbni_isa_probe( dev ); - /* otherwise we have to perform search our adapter */ - - if( io[ num ] != -1 ) { - dev->base_addr = io[ num ]; - dev->irq = irq[ num ]; - } else if( scandone || io[ 0 ] != -1 ) { - return -ENODEV; - } - - /* if io[ num ] contains non-zero address, then that is on ISA bus */ - if( dev->base_addr ) - return sbni_isa_probe( dev ); - - /* ...otherwise - scan PCI first */ - if( !skip_pci_probe && !sbni_pci_probe( dev ) ) - return 0; - - if( io[ num ] == -1 ) { - /* Auto-scan will be stopped when first ISA card were found */ - scandone = 1; - if( num > 0 ) - return -ENODEV; - } - - for( i = 0; netcard_portlist[ i ]; ++i ) { - int ioaddr = netcard_portlist[ i ]; - if( request_region( ioaddr, SBNI_IO_EXTENT, dev->name ) && - sbni_probe1( dev, ioaddr, 0 )) - return 0; - } - - return -ENODEV; -} - - -static int __init -sbni_pci_probe( struct net_device *dev ) -{ - struct pci_dev *pdev = NULL; - - while( (pdev = pci_get_class( PCI_CLASS_NETWORK_OTHER << 8, pdev )) - != NULL ) { - int pci_irq_line; - unsigned long pci_ioaddr; - - if( pdev->vendor != SBNI_PCI_VENDOR && - pdev->device != SBNI_PCI_DEVICE ) - continue; - - pci_ioaddr = pci_resource_start( pdev, 0 ); - pci_irq_line = pdev->irq; - - /* Avoid already found cards from previous calls */ - if( !request_region( pci_ioaddr, SBNI_IO_EXTENT, dev->name ) ) { - if (pdev->subsystem_device != 2) - continue; - - /* Dual adapter is present */ - if (!request_region(pci_ioaddr += 4, SBNI_IO_EXTENT, - dev->name ) ) - continue; - } - - if (pci_irq_line <= 0 || pci_irq_line >= nr_irqs) - pr_warn( -"WARNING: The PCI BIOS assigned this PCI card to IRQ %d, which is unlikely to work!.\n" -"You should use the PCI BIOS setup to assign a valid IRQ line.\n", - pci_irq_line ); - - /* avoiding re-enable dual adapters */ - if( (pci_ioaddr & 7) == 0 && pci_enable_device( pdev ) ) { - release_region( pci_ioaddr, SBNI_IO_EXTENT ); - pci_dev_put( pdev ); - return -EIO; - } - if( sbni_probe1( dev, pci_ioaddr, pci_irq_line ) ) { - SET_NETDEV_DEV(dev, &pdev->dev); - /* not the best thing to do, but this is all messed up - for hotplug systems anyway... */ - pci_dev_put( pdev ); - return 0; - } - } - return -ENODEV; -} - - -static struct net_device * __init -sbni_probe1( struct net_device *dev, unsigned long ioaddr, int irq ) -{ - struct net_local *nl; - - if( sbni_card_probe( ioaddr ) ) { - release_region( ioaddr, SBNI_IO_EXTENT ); - return NULL; - } - - outb( 0, ioaddr + CSR0 ); - - if( irq < 2 ) { - unsigned long irq_mask; - - irq_mask = probe_irq_on(); - outb( EN_INT | TR_REQ, ioaddr + CSR0 ); - outb( PR_RES, ioaddr + CSR1 ); - mdelay(50); - irq = probe_irq_off(irq_mask); - outb( 0, ioaddr + CSR0 ); - - if( !irq ) { - pr_err("%s: can't detect device irq!\n", dev->name); - release_region( ioaddr, SBNI_IO_EXTENT ); - return NULL; - } - } else if( irq == 2 ) - irq = 9; - - dev->irq = irq; - dev->base_addr = ioaddr; - - /* Fill in sbni-specific dev fields. */ - nl = netdev_priv(dev); - if( !nl ) { - pr_err("%s: unable to get memory!\n", dev->name); - release_region( ioaddr, SBNI_IO_EXTENT ); - return NULL; - } - - memset( nl, 0, sizeof(struct net_local) ); - spin_lock_init( &nl->lock ); - - /* store MAC address (generate if that isn't known) */ - *(__be16 *)dev->dev_addr = htons( 0x00ff ); - *(__be32 *)(dev->dev_addr + 2) = htonl( 0x01000000 | - ((mac[num] ? - mac[num] : - (u32)((long)netdev_priv(dev))) & 0x00ffffff)); - - /* store link settings (speed, receive level ) */ - nl->maxframe = DEFAULT_FRAME_LEN; - nl->csr1.rate = baud[ num ]; - - if( (nl->cur_rxl_index = rxl[ num ]) == -1 ) { - /* autotune rxl */ - nl->cur_rxl_index = DEF_RXL; - nl->delta_rxl = DEF_RXL_DELTA; - } else { - nl->delta_rxl = 0; - } - nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ]; - if( inb( ioaddr + CSR0 ) & 0x01 ) - nl->state |= FL_SLOW_MODE; - - pr_notice("%s: ioaddr %#lx, irq %d, MAC: 00:ff:01:%02x:%02x:%02x\n", - dev->name, dev->base_addr, dev->irq, - ((u8 *)dev->dev_addr)[3], - ((u8 *)dev->dev_addr)[4], - ((u8 *)dev->dev_addr)[5]); - - pr_notice("%s: speed %d", - dev->name, - ((nl->state & FL_SLOW_MODE) ? 500000 : 2000000) - / (1 << nl->csr1.rate)); - - if( nl->delta_rxl == 0 ) - pr_cont(", receive level 0x%x (fixed)\n", nl->cur_rxl_index); - else - pr_cont(", receive level (auto)\n"); - -#ifdef CONFIG_SBNI_MULTILINE - nl->master = dev; - nl->link = NULL; -#endif - - sbni_cards[ num++ ] = dev; - return dev; -} - -/* -------------------------------------------------------------------------- */ - -#ifdef CONFIG_SBNI_MULTILINE - -static netdev_tx_t -sbni_start_xmit( struct sk_buff *skb, struct net_device *dev ) -{ - struct net_device *p; - - netif_stop_queue( dev ); - - /* Looking for idle device in the list */ - for( p = dev; p; ) { - struct net_local *nl = netdev_priv(p); - spin_lock( &nl->lock ); - if( nl->tx_buf_p || (nl->state & FL_LINE_DOWN) ) { - p = nl->link; - spin_unlock( &nl->lock ); - } else { - /* Idle dev is found */ - prepare_to_send( skb, p ); - spin_unlock( &nl->lock ); - netif_start_queue( dev ); - return NETDEV_TX_OK; - } - } - - return NETDEV_TX_BUSY; -} - -#else /* CONFIG_SBNI_MULTILINE */ - -static netdev_tx_t -sbni_start_xmit( struct sk_buff *skb, struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - netif_stop_queue( dev ); - spin_lock( &nl->lock ); - - prepare_to_send( skb, dev ); - - spin_unlock( &nl->lock ); - return NETDEV_TX_OK; -} - -#endif /* CONFIG_SBNI_MULTILINE */ - -/* -------------------------------------------------------------------------- */ - -/* interrupt handler */ - -/* - * SBNI12D-10, -11/ISA boards within "common interrupt" mode could not - * be looked as two independent single-channel devices. Every channel seems - * as Ethernet interface but interrupt handler must be common. Really, first - * channel ("master") driver only registers the handler. In its struct net_local - * it has got pointer to "slave" channel's struct net_local and handles that's - * interrupts too. - * dev of successfully attached ISA SBNI boards is linked to list. - * While next board driver is initialized, it scans this list. If one - * has found dev with same irq and ioaddr different by 4 then it assumes - * this board to be "master". - */ - -static irqreturn_t -sbni_interrupt( int irq, void *dev_id ) -{ - struct net_device *dev = dev_id; - struct net_local *nl = netdev_priv(dev); - int repeat; - - spin_lock( &nl->lock ); - if( nl->second ) - spin_lock(&NET_LOCAL_LOCK(nl->second)); - - do { - repeat = 0; - if( inb( dev->base_addr + CSR0 ) & (RC_RDY | TR_RDY) ) { - handle_channel( dev ); - repeat = 1; - } - if( nl->second && /* second channel present */ - (inb( nl->second->base_addr+CSR0 ) & (RC_RDY | TR_RDY)) ) { - handle_channel( nl->second ); - repeat = 1; - } - } while( repeat ); - - if( nl->second ) - spin_unlock(&NET_LOCAL_LOCK(nl->second)); - spin_unlock( &nl->lock ); - return IRQ_HANDLED; -} - - -static void -handle_channel( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - unsigned long ioaddr = dev->base_addr; - - int req_ans; - unsigned char csr0; - -#ifdef CONFIG_SBNI_MULTILINE - /* Lock the master device because we going to change its local data */ - if( nl->state & FL_SLAVE ) - spin_lock(&NET_LOCAL_LOCK(nl->master)); -#endif - - outb( (inb( ioaddr + CSR0 ) & ~EN_INT) | TR_REQ, ioaddr + CSR0 ); - - nl->timer_ticks = CHANGE_LEVEL_START_TICKS; - for(;;) { - csr0 = inb( ioaddr + CSR0 ); - if( ( csr0 & (RC_RDY | TR_RDY) ) == 0 ) - break; - - req_ans = !(nl->state & FL_PREV_OK); - - if( csr0 & RC_RDY ) - req_ans = recv_frame( dev ); - - /* - * TR_RDY always equals 1 here because we have owned the marker, - * and we set TR_REQ when disabled interrupts - */ - csr0 = inb( ioaddr + CSR0 ); - if( !(csr0 & TR_RDY) || (csr0 & RC_RDY) ) - netdev_err(dev, "internal error!\n"); - - /* if state & FL_NEED_RESEND != 0 then tx_frameno != 0 */ - if( req_ans || nl->tx_frameno != 0 ) - send_frame( dev ); - else - /* send marker without any data */ - outb( inb( ioaddr + CSR0 ) & ~TR_REQ, ioaddr + CSR0 ); - } - - outb( inb( ioaddr + CSR0 ) | EN_INT, ioaddr + CSR0 ); - -#ifdef CONFIG_SBNI_MULTILINE - if( nl->state & FL_SLAVE ) - spin_unlock(&NET_LOCAL_LOCK(nl->master)); -#endif -} - - -/* - * Routine returns 1 if it needs to acknowledge received frame. - * Empty frame received without errors won't be acknowledged. - */ - -static int -recv_frame( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - unsigned long ioaddr = dev->base_addr; - - u32 crc = CRC32_INITIAL; - - unsigned framelen = 0, frameno, ack; - unsigned is_first, frame_ok = 0; - - if( check_fhdr( ioaddr, &framelen, &frameno, &ack, &is_first, &crc ) ) { - frame_ok = framelen > 4 - ? upload_data( dev, framelen, frameno, is_first, crc ) - : skip_tail( ioaddr, framelen, crc ); - if( frame_ok ) - interpret_ack( dev, ack ); - } - - outb( inb( ioaddr + CSR0 ) ^ CT_ZER, ioaddr + CSR0 ); - if( frame_ok ) { - nl->state |= FL_PREV_OK; - if( framelen > 4 ) - nl->in_stats.all_rx_number++; - } else { - nl->state &= ~FL_PREV_OK; - change_level( dev ); - nl->in_stats.all_rx_number++; - nl->in_stats.bad_rx_number++; - } - - return !frame_ok || framelen > 4; -} - - -static void -send_frame( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - u32 crc = CRC32_INITIAL; - - if( nl->state & FL_NEED_RESEND ) { - - /* if frame was sended but not ACK'ed - resend it */ - if( nl->trans_errors ) { - --nl->trans_errors; - if( nl->framelen != 0 ) - nl->in_stats.resend_tx_number++; - } else { - /* cannot xmit with many attempts */ -#ifdef CONFIG_SBNI_MULTILINE - if( (nl->state & FL_SLAVE) || nl->link ) -#endif - nl->state |= FL_LINE_DOWN; - drop_xmit_queue( dev ); - goto do_send; - } - } else - nl->trans_errors = TR_ERROR_COUNT; - - send_frame_header( dev, &crc ); - nl->state |= FL_NEED_RESEND; - /* - * FL_NEED_RESEND will be cleared after ACK, but if empty - * frame sended then in prepare_to_send next frame - */ - - - if( nl->framelen ) { - download_data( dev, &crc ); - nl->in_stats.all_tx_number++; - nl->state |= FL_WAIT_ACK; - } - - outsb( dev->base_addr + DAT, (u8 *)&crc, sizeof crc ); - -do_send: - outb( inb( dev->base_addr + CSR0 ) & ~TR_REQ, dev->base_addr + CSR0 ); - - if( nl->tx_frameno ) - /* next frame exists - we request card to send it */ - outb( inb( dev->base_addr + CSR0 ) | TR_REQ, - dev->base_addr + CSR0 ); -} - - -/* - * Write the frame data into adapter's buffer memory, and calculate CRC. - * Do padding if necessary. - */ - -static void -download_data( struct net_device *dev, u32 *crc_p ) -{ - struct net_local *nl = netdev_priv(dev); - struct sk_buff *skb = nl->tx_buf_p; - - unsigned len = min_t(unsigned int, skb->len - nl->outpos, nl->framelen); - - outsb( dev->base_addr + DAT, skb->data + nl->outpos, len ); - *crc_p = calc_crc32( *crc_p, skb->data + nl->outpos, len ); - - /* if packet too short we should write some more bytes to pad */ - for( len = nl->framelen - len; len--; ) { - outb( 0, dev->base_addr + DAT ); - *crc_p = CRC32( 0, *crc_p ); - } -} - - -static int -upload_data( struct net_device *dev, unsigned framelen, unsigned frameno, - unsigned is_first, u32 crc ) -{ - struct net_local *nl = netdev_priv(dev); - - int frame_ok; - - if( is_first ) { - nl->wait_frameno = frameno; - nl->inppos = 0; - } - - if( nl->wait_frameno == frameno ) { - - if( nl->inppos + framelen <= ETHER_MAX_LEN ) - frame_ok = append_frame_to_pkt( dev, framelen, crc ); - - /* - * if CRC is right but framelen incorrect then transmitter - * error was occurred... drop entire packet - */ - else if( (frame_ok = skip_tail( dev->base_addr, framelen, crc )) - != 0 ) { - nl->wait_frameno = 0; - nl->inppos = 0; -#ifdef CONFIG_SBNI_MULTILINE - nl->master->stats.rx_errors++; - nl->master->stats.rx_missed_errors++; -#else - dev->stats.rx_errors++; - dev->stats.rx_missed_errors++; -#endif - } - /* now skip all frames until is_first != 0 */ - } else - frame_ok = skip_tail( dev->base_addr, framelen, crc ); - - if( is_first && !frame_ok ) { - /* - * Frame has been broken, but we had already stored - * is_first... Drop entire packet. - */ - nl->wait_frameno = 0; -#ifdef CONFIG_SBNI_MULTILINE - nl->master->stats.rx_errors++; - nl->master->stats.rx_crc_errors++; -#else - dev->stats.rx_errors++; - dev->stats.rx_crc_errors++; -#endif - } - - return frame_ok; -} - - -static inline void -send_complete( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - -#ifdef CONFIG_SBNI_MULTILINE - nl->master->stats.tx_packets++; - nl->master->stats.tx_bytes += nl->tx_buf_p->len; -#else - dev->stats.tx_packets++; - dev->stats.tx_bytes += nl->tx_buf_p->len; -#endif - dev_consume_skb_irq(nl->tx_buf_p); - - nl->tx_buf_p = NULL; - - nl->outpos = 0; - nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); - nl->framelen = 0; -} - - -static void -interpret_ack( struct net_device *dev, unsigned ack ) -{ - struct net_local *nl = netdev_priv(dev); - - if( ack == FRAME_SENT_OK ) { - nl->state &= ~FL_NEED_RESEND; - - if( nl->state & FL_WAIT_ACK ) { - nl->outpos += nl->framelen; - - if( --nl->tx_frameno ) { - nl->framelen = min_t(unsigned int, - nl->maxframe, - nl->tx_buf_p->len - nl->outpos); - } else { - send_complete( dev ); -#ifdef CONFIG_SBNI_MULTILINE - netif_wake_queue( nl->master ); -#else - netif_wake_queue( dev ); -#endif - } - } - } - - nl->state &= ~FL_WAIT_ACK; -} - - -/* - * Glue received frame with previous fragments of packet. - * Indicate packet when last frame would be accepted. - */ - -static int -append_frame_to_pkt( struct net_device *dev, unsigned framelen, u32 crc ) -{ - struct net_local *nl = netdev_priv(dev); - - u8 *p; - - if( nl->inppos + framelen > ETHER_MAX_LEN ) - return 0; - - if( !nl->rx_buf_p && !(nl->rx_buf_p = get_rx_buf( dev )) ) - return 0; - - p = nl->rx_buf_p->data + nl->inppos; - insb( dev->base_addr + DAT, p, framelen ); - if( calc_crc32( crc, p, framelen ) != CRC32_REMAINDER ) - return 0; - - nl->inppos += framelen - 4; - if( --nl->wait_frameno == 0 ) /* last frame received */ - indicate_pkt( dev ); - - return 1; -} - - -/* - * Prepare to start output on adapter. - * Transmitter will be actually activated when marker is accepted. - */ - -static void -prepare_to_send( struct sk_buff *skb, struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - unsigned int len; - - /* nl->tx_buf_p == NULL here! */ - if( nl->tx_buf_p ) - netdev_err(dev, "memory leak!\n"); - - nl->outpos = 0; - nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); - - len = skb->len; - if( len < SBNI_MIN_LEN ) - len = SBNI_MIN_LEN; - - nl->tx_buf_p = skb; - nl->tx_frameno = DIV_ROUND_UP(len, nl->maxframe); - nl->framelen = len < nl->maxframe ? len : nl->maxframe; - - outb( inb( dev->base_addr + CSR0 ) | TR_REQ, dev->base_addr + CSR0 ); -#ifdef CONFIG_SBNI_MULTILINE - netif_trans_update(nl->master); -#else - netif_trans_update(dev); -#endif -} - - -static void -drop_xmit_queue( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - if( nl->tx_buf_p ) { - dev_kfree_skb_any( nl->tx_buf_p ); - nl->tx_buf_p = NULL; -#ifdef CONFIG_SBNI_MULTILINE - nl->master->stats.tx_errors++; - nl->master->stats.tx_carrier_errors++; -#else - dev->stats.tx_errors++; - dev->stats.tx_carrier_errors++; -#endif - } - - nl->tx_frameno = 0; - nl->framelen = 0; - nl->outpos = 0; - nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); -#ifdef CONFIG_SBNI_MULTILINE - netif_start_queue( nl->master ); - netif_trans_update(nl->master); -#else - netif_start_queue( dev ); - netif_trans_update(dev); -#endif -} - - -static void -send_frame_header( struct net_device *dev, u32 *crc_p ) -{ - struct net_local *nl = netdev_priv(dev); - - u32 crc = *crc_p; - u32 len_field = nl->framelen + 6; /* CRC + frameno + reserved */ - u8 value; - - if( nl->state & FL_NEED_RESEND ) - len_field |= FRAME_RETRY; /* non-first attempt... */ - - if( nl->outpos == 0 ) - len_field |= FRAME_FIRST; - - len_field |= (nl->state & FL_PREV_OK) ? FRAME_SENT_OK : FRAME_SENT_BAD; - outb( SBNI_SIG, dev->base_addr + DAT ); - - value = (u8) len_field; - outb( value, dev->base_addr + DAT ); - crc = CRC32( value, crc ); - value = (u8) (len_field >> 8); - outb( value, dev->base_addr + DAT ); - crc = CRC32( value, crc ); - - outb( nl->tx_frameno, dev->base_addr + DAT ); - crc = CRC32( nl->tx_frameno, crc ); - outb( 0, dev->base_addr + DAT ); - crc = CRC32( 0, crc ); - *crc_p = crc; -} - - -/* - * if frame tail not needed (incorrect number or received twice), - * it won't store, but CRC will be calculated - */ - -static int -skip_tail( unsigned int ioaddr, unsigned int tail_len, u32 crc ) -{ - while( tail_len-- ) - crc = CRC32( inb( ioaddr + DAT ), crc ); - - return crc == CRC32_REMAINDER; -} - - -/* - * Preliminary checks if frame header is correct, calculates its CRC - * and split it to simple fields - */ - -static int -check_fhdr( u32 ioaddr, u32 *framelen, u32 *frameno, u32 *ack, - u32 *is_first, u32 *crc_p ) -{ - u32 crc = *crc_p; - u8 value; - - if( inb( ioaddr + DAT ) != SBNI_SIG ) - return 0; - - value = inb( ioaddr + DAT ); - *framelen = (u32)value; - crc = CRC32( value, crc ); - value = inb( ioaddr + DAT ); - *framelen |= ((u32)value) << 8; - crc = CRC32( value, crc ); - - *ack = *framelen & FRAME_ACK_MASK; - *is_first = (*framelen & FRAME_FIRST) != 0; - - if( (*framelen &= FRAME_LEN_MASK) < 6 || - *framelen > SBNI_MAX_FRAME - 3 ) - return 0; - - value = inb( ioaddr + DAT ); - *frameno = (u32)value; - crc = CRC32( value, crc ); - - crc = CRC32( inb( ioaddr + DAT ), crc ); /* reserved byte */ - *framelen -= 2; - - *crc_p = crc; - return 1; -} - - -static struct sk_buff * -get_rx_buf( struct net_device *dev ) -{ - /* +2 is to compensate for the alignment fixup below */ - struct sk_buff *skb = dev_alloc_skb( ETHER_MAX_LEN + 2 ); - if( !skb ) - return NULL; - - skb_reserve( skb, 2 ); /* Align IP on longword boundaries */ - return skb; -} - - -static void -indicate_pkt( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - struct sk_buff *skb = nl->rx_buf_p; - - skb_put( skb, nl->inppos ); - -#ifdef CONFIG_SBNI_MULTILINE - skb->protocol = eth_type_trans( skb, nl->master ); - netif_rx( skb ); - ++nl->master->stats.rx_packets; - nl->master->stats.rx_bytes += nl->inppos; -#else - skb->protocol = eth_type_trans( skb, dev ); - netif_rx( skb ); - ++dev->stats.rx_packets; - dev->stats.rx_bytes += nl->inppos; -#endif - nl->rx_buf_p = NULL; /* protocol driver will clear this sk_buff */ -} - - -/* -------------------------------------------------------------------------- */ - -/* - * Routine checks periodically wire activity and regenerates marker if - * connect was inactive for a long time. - */ - -static void -sbni_watchdog(struct timer_list *t) -{ - struct net_local *nl = from_timer(nl, t, watchdog); - struct net_device *dev = nl->watchdog_dev; - unsigned long flags; - unsigned char csr0; - - spin_lock_irqsave( &nl->lock, flags ); - - csr0 = inb( dev->base_addr + CSR0 ); - if( csr0 & RC_CHK ) { - - if( nl->timer_ticks ) { - if( csr0 & (RC_RDY | BU_EMP) ) - /* receiving not active */ - nl->timer_ticks--; - } else { - nl->in_stats.timeout_number++; - if( nl->delta_rxl ) - timeout_change_level( dev ); - - outb( *(u_char *)&nl->csr1 | PR_RES, - dev->base_addr + CSR1 ); - csr0 = inb( dev->base_addr + CSR0 ); - } - } else - nl->state &= ~FL_LINE_DOWN; - - outb( csr0 | RC_CHK, dev->base_addr + CSR0 ); - - mod_timer(t, jiffies + SBNI_TIMEOUT); - - spin_unlock_irqrestore( &nl->lock, flags ); -} - - -static unsigned char rxl_tab[] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, - 0x0a, 0x0c, 0x0f, 0x16, 0x18, 0x1a, 0x1c, 0x1f -}; - -#define SIZE_OF_TIMEOUT_RXL_TAB 4 -static unsigned char timeout_rxl_tab[] = { - 0x03, 0x05, 0x08, 0x0b -}; - -/* -------------------------------------------------------------------------- */ - -static void -card_start( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - nl->timer_ticks = CHANGE_LEVEL_START_TICKS; - nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); - nl->state |= FL_PREV_OK; - - nl->inppos = nl->outpos = 0; - nl->wait_frameno = 0; - nl->tx_frameno = 0; - nl->framelen = 0; - - outb( *(u_char *)&nl->csr1 | PR_RES, dev->base_addr + CSR1 ); - outb( EN_INT, dev->base_addr + CSR0 ); -} - -/* -------------------------------------------------------------------------- */ - -/* Receive level auto-selection */ - -static void -change_level( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - if( nl->delta_rxl == 0 ) /* do not auto-negotiate RxL */ - return; - - if( nl->cur_rxl_index == 0 ) - nl->delta_rxl = 1; - else if( nl->cur_rxl_index == 15 ) - nl->delta_rxl = -1; - else if( nl->cur_rxl_rcvd < nl->prev_rxl_rcvd ) - nl->delta_rxl = -nl->delta_rxl; - - nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index += nl->delta_rxl ]; - inb( dev->base_addr + CSR0 ); /* needs for PCI cards */ - outb( *(u8 *)&nl->csr1, dev->base_addr + CSR1 ); - - nl->prev_rxl_rcvd = nl->cur_rxl_rcvd; - nl->cur_rxl_rcvd = 0; -} - - -static void -timeout_change_level( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - nl->cur_rxl_index = timeout_rxl_tab[ nl->timeout_rxl ]; - if( ++nl->timeout_rxl >= 4 ) - nl->timeout_rxl = 0; - - nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ]; - inb( dev->base_addr + CSR0 ); - outb( *(unsigned char *)&nl->csr1, dev->base_addr + CSR1 ); - - nl->prev_rxl_rcvd = nl->cur_rxl_rcvd; - nl->cur_rxl_rcvd = 0; -} - -/* -------------------------------------------------------------------------- */ - -/* - * Open/initialize the board. - */ - -static int -sbni_open( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - struct timer_list *w = &nl->watchdog; - - /* - * For double ISA adapters within "common irq" mode, we have to - * determine whether primary or secondary channel is initialized, - * and set the irq handler only in first case. - */ - if( dev->base_addr < 0x400 ) { /* ISA only */ - struct net_device **p = sbni_cards; - for( ; *p && p < sbni_cards + SBNI_MAX_NUM_CARDS; ++p ) - if( (*p)->irq == dev->irq && - ((*p)->base_addr == dev->base_addr + 4 || - (*p)->base_addr == dev->base_addr - 4) && - (*p)->flags & IFF_UP ) { - - ((struct net_local *) (netdev_priv(*p))) - ->second = dev; - netdev_notice(dev, "using shared irq with %s\n", - (*p)->name); - nl->state |= FL_SECONDARY; - goto handler_attached; - } - } - - if( request_irq(dev->irq, sbni_interrupt, IRQF_SHARED, dev->name, dev) ) { - netdev_err(dev, "unable to get IRQ %d\n", dev->irq); - return -EAGAIN; - } - -handler_attached: - - spin_lock( &nl->lock ); - memset( &dev->stats, 0, sizeof(struct net_device_stats) ); - memset( &nl->in_stats, 0, sizeof(struct sbni_in_stats) ); - - card_start( dev ); - - netif_start_queue( dev ); - - /* set timer watchdog */ - nl->watchdog_dev = dev; - timer_setup(w, sbni_watchdog, 0); - w->expires = jiffies + SBNI_TIMEOUT; - add_timer( w ); - - spin_unlock( &nl->lock ); - return 0; -} - - -static int -sbni_close( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - if( nl->second && nl->second->flags & IFF_UP ) { - netdev_notice(dev, "Secondary channel (%s) is active!\n", - nl->second->name); - return -EBUSY; - } - -#ifdef CONFIG_SBNI_MULTILINE - if( nl->state & FL_SLAVE ) - emancipate( dev ); - else - while( nl->link ) /* it's master device! */ - emancipate( nl->link ); -#endif - - spin_lock( &nl->lock ); - - nl->second = NULL; - drop_xmit_queue( dev ); - netif_stop_queue( dev ); - - del_timer( &nl->watchdog ); - - outb( 0, dev->base_addr + CSR0 ); - - if( !(nl->state & FL_SECONDARY) ) - free_irq( dev->irq, dev ); - nl->state &= FL_SECONDARY; - - spin_unlock( &nl->lock ); - return 0; -} - - -/* - Valid combinations in CSR0 (for probing): - - VALID_DECODER 0000,0011,1011,1010 - - ; 0 ; - - TR_REQ ; 1 ; + - TR_RDY ; 2 ; - - TR_RDY TR_REQ ; 3 ; + - BU_EMP ; 4 ; + - BU_EMP TR_REQ ; 5 ; + - BU_EMP TR_RDY ; 6 ; - - BU_EMP TR_RDY TR_REQ ; 7 ; + - RC_RDY ; 8 ; + - RC_RDY TR_REQ ; 9 ; + - RC_RDY TR_RDY ; 10 ; - - RC_RDY TR_RDY TR_REQ ; 11 ; - - RC_RDY BU_EMP ; 12 ; - - RC_RDY BU_EMP TR_REQ ; 13 ; - - RC_RDY BU_EMP TR_RDY ; 14 ; - - RC_RDY BU_EMP TR_RDY TR_REQ ; 15 ; - -*/ - -#define VALID_DECODER (2 + 8 + 0x10 + 0x20 + 0x80 + 0x100 + 0x200) - - -static int -sbni_card_probe( unsigned long ioaddr ) -{ - unsigned char csr0; - - csr0 = inb( ioaddr + CSR0 ); - if( csr0 != 0xff && csr0 != 0x00 ) { - csr0 &= ~EN_INT; - if( csr0 & BU_EMP ) - csr0 |= EN_INT; - - if( VALID_DECODER & (1 << (csr0 >> 4)) ) - return 0; - } - - return -ENODEV; -} - -/* -------------------------------------------------------------------------- */ - -static int -sbni_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd) -{ - struct net_local *nl = netdev_priv(dev); - struct sbni_flags flags; - int error = 0; - -#ifdef CONFIG_SBNI_MULTILINE - struct net_device *slave_dev; - char slave_name[ 8 ]; -#endif - - switch( cmd ) { - case SIOCDEVGETINSTATS : - if (copy_to_user(data, &nl->in_stats, - sizeof(struct sbni_in_stats))) - error = -EFAULT; - break; - - case SIOCDEVRESINSTATS : - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - memset( &nl->in_stats, 0, sizeof(struct sbni_in_stats) ); - break; - - case SIOCDEVGHWSTATE : - flags.mac_addr = *(u32 *)(dev->dev_addr + 3); - flags.rate = nl->csr1.rate; - flags.slow_mode = (nl->state & FL_SLOW_MODE) != 0; - flags.rxl = nl->cur_rxl_index; - flags.fixed_rxl = nl->delta_rxl == 0; - - if (copy_to_user(data, &flags, sizeof(flags))) - error = -EFAULT; - break; - - case SIOCDEVSHWSTATE : - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - spin_lock( &nl->lock ); - flags = *(struct sbni_flags*) &ifr->ifr_ifru; - if( flags.fixed_rxl ) { - nl->delta_rxl = 0; - nl->cur_rxl_index = flags.rxl; - } else { - nl->delta_rxl = DEF_RXL_DELTA; - nl->cur_rxl_index = DEF_RXL; - } - - nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ]; - nl->csr1.rate = flags.rate; - outb( *(u8 *)&nl->csr1 | PR_RES, dev->base_addr + CSR1 ); - spin_unlock( &nl->lock ); - break; - -#ifdef CONFIG_SBNI_MULTILINE - - case SIOCDEVENSLAVE : - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (copy_from_user(slave_name, data, sizeof(slave_name))) - return -EFAULT; - slave_dev = dev_get_by_name(&init_net, slave_name ); - if( !slave_dev || !(slave_dev->flags & IFF_UP) ) { - netdev_err(dev, "trying to enslave non-active device %s\n", - slave_name); - if (slave_dev) - dev_put(slave_dev); - return -EPERM; - } - - return enslave( dev, slave_dev ); - - case SIOCDEVEMANSIPATE : - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - return emancipate( dev ); - -#endif /* CONFIG_SBNI_MULTILINE */ - - default : - return -EOPNOTSUPP; - } - - return error; -} - - -#ifdef CONFIG_SBNI_MULTILINE - -static int -enslave( struct net_device *dev, struct net_device *slave_dev ) -{ - struct net_local *nl = netdev_priv(dev); - struct net_local *snl = netdev_priv(slave_dev); - - if( nl->state & FL_SLAVE ) /* This isn't master or free device */ - return -EBUSY; - - if( snl->state & FL_SLAVE ) /* That was already enslaved */ - return -EBUSY; - - spin_lock( &nl->lock ); - spin_lock( &snl->lock ); - - /* append to list */ - snl->link = nl->link; - nl->link = slave_dev; - snl->master = dev; - snl->state |= FL_SLAVE; - - /* Summary statistics of MultiLine operation will be stored - in master's counters */ - memset( &slave_dev->stats, 0, sizeof(struct net_device_stats) ); - netif_stop_queue( slave_dev ); - netif_wake_queue( dev ); /* Now we are able to transmit */ - - spin_unlock( &snl->lock ); - spin_unlock( &nl->lock ); - netdev_notice(dev, "slave device (%s) attached\n", slave_dev->name); - return 0; -} - - -static int -emancipate( struct net_device *dev ) -{ - struct net_local *snl = netdev_priv(dev); - struct net_device *p = snl->master; - struct net_local *nl = netdev_priv(p); - - if( !(snl->state & FL_SLAVE) ) - return -EINVAL; - - spin_lock( &nl->lock ); - spin_lock( &snl->lock ); - drop_xmit_queue( dev ); - - /* exclude from list */ - for(;;) { /* must be in list */ - struct net_local *t = netdev_priv(p); - if( t->link == dev ) { - t->link = snl->link; - break; - } - p = t->link; - } - - snl->link = NULL; - snl->master = dev; - snl->state &= ~FL_SLAVE; - - netif_start_queue( dev ); - - spin_unlock( &snl->lock ); - spin_unlock( &nl->lock ); - - dev_put( dev ); - return 0; -} - -#endif - -static void -set_multicast_list( struct net_device *dev ) -{ - return; /* sbni always operate in promiscuos mode */ -} - - -#ifdef MODULE -module_param_hw_array(io, int, ioport, NULL, 0); -module_param_hw_array(irq, int, irq, NULL, 0); -module_param_array(baud, int, NULL, 0); -module_param_array(rxl, int, NULL, 0); -module_param_array(mac, int, NULL, 0); -module_param(skip_pci_probe, bool, 0); - -MODULE_LICENSE("GPL"); - - -int __init init_module( void ) -{ - struct net_device *dev; - int err; - - while( num < SBNI_MAX_NUM_CARDS ) { - dev = alloc_netdev(sizeof(struct net_local), "sbni%d", - NET_NAME_UNKNOWN, sbni_devsetup); - if( !dev) - break; - - sprintf( dev->name, "sbni%d", num ); - - err = sbni_init(dev); - if (err) { - free_netdev(dev); - break; - } - - if( register_netdev( dev ) ) { - release_region( dev->base_addr, SBNI_IO_EXTENT ); - free_netdev( dev ); - break; - } - } - - return *sbni_cards ? 0 : -ENODEV; -} - -void -cleanup_module(void) -{ - int i; - - for (i = 0; i < SBNI_MAX_NUM_CARDS; ++i) { - struct net_device *dev = sbni_cards[i]; - if (dev != NULL) { - unregister_netdev(dev); - release_region(dev->base_addr, SBNI_IO_EXTENT); - free_netdev(dev); - } - } -} - -#else /* MODULE */ - -static int __init -sbni_setup( char *p ) -{ - int n, parm; - - if( *p++ != '(' ) - goto bad_param; - - for( n = 0, parm = 0; *p && n < 8; ) { - (*dest[ parm ])[ n ] = simple_strtoul( p, &p, 0 ); - if( !*p || *p == ')' ) - return 1; - if( *p == ';' ) { - ++p; - ++n; - parm = 0; - } else if( *p++ != ',' ) { - break; - } else { - if( ++parm >= 5 ) - break; - } - } -bad_param: - pr_err("Error in sbni kernel parameter!\n"); - return 0; -} - -__setup( "sbni=", sbni_setup ); - -#endif /* MODULE */ - -/* -------------------------------------------------------------------------- */ - -static u32 -calc_crc32( u32 crc, u8 *p, u32 len ) -{ - while( len-- ) - crc = CRC32( *p++, crc ); - - return crc; -} - -static u32 crc32tab[] __attribute__ ((aligned(8))) = { - 0xD202EF8D, 0xA505DF1B, 0x3C0C8EA1, 0x4B0BBE37, - 0xD56F2B94, 0xA2681B02, 0x3B614AB8, 0x4C667A2E, - 0xDCD967BF, 0xABDE5729, 0x32D70693, 0x45D03605, - 0xDBB4A3A6, 0xACB39330, 0x35BAC28A, 0x42BDF21C, - 0xCFB5FFE9, 0xB8B2CF7F, 0x21BB9EC5, 0x56BCAE53, - 0xC8D83BF0, 0xBFDF0B66, 0x26D65ADC, 0x51D16A4A, - 0xC16E77DB, 0xB669474D, 0x2F6016F7, 0x58672661, - 0xC603B3C2, 0xB1048354, 0x280DD2EE, 0x5F0AE278, - 0xE96CCF45, 0x9E6BFFD3, 0x0762AE69, 0x70659EFF, - 0xEE010B5C, 0x99063BCA, 0x000F6A70, 0x77085AE6, - 0xE7B74777, 0x90B077E1, 0x09B9265B, 0x7EBE16CD, - 0xE0DA836E, 0x97DDB3F8, 0x0ED4E242, 0x79D3D2D4, - 0xF4DBDF21, 0x83DCEFB7, 0x1AD5BE0D, 0x6DD28E9B, - 0xF3B61B38, 0x84B12BAE, 0x1DB87A14, 0x6ABF4A82, - 0xFA005713, 0x8D076785, 0x140E363F, 0x630906A9, - 0xFD6D930A, 0x8A6AA39C, 0x1363F226, 0x6464C2B0, - 0xA4DEAE1D, 0xD3D99E8B, 0x4AD0CF31, 0x3DD7FFA7, - 0xA3B36A04, 0xD4B45A92, 0x4DBD0B28, 0x3ABA3BBE, - 0xAA05262F, 0xDD0216B9, 0x440B4703, 0x330C7795, - 0xAD68E236, 0xDA6FD2A0, 0x4366831A, 0x3461B38C, - 0xB969BE79, 0xCE6E8EEF, 0x5767DF55, 0x2060EFC3, - 0xBE047A60, 0xC9034AF6, 0x500A1B4C, 0x270D2BDA, - 0xB7B2364B, 0xC0B506DD, 0x59BC5767, 0x2EBB67F1, - 0xB0DFF252, 0xC7D8C2C4, 0x5ED1937E, 0x29D6A3E8, - 0x9FB08ED5, 0xE8B7BE43, 0x71BEEFF9, 0x06B9DF6F, - 0x98DD4ACC, 0xEFDA7A5A, 0x76D32BE0, 0x01D41B76, - 0x916B06E7, 0xE66C3671, 0x7F6567CB, 0x0862575D, - 0x9606C2FE, 0xE101F268, 0x7808A3D2, 0x0F0F9344, - 0x82079EB1, 0xF500AE27, 0x6C09FF9D, 0x1B0ECF0B, - 0x856A5AA8, 0xF26D6A3E, 0x6B643B84, 0x1C630B12, - 0x8CDC1683, 0xFBDB2615, 0x62D277AF, 0x15D54739, - 0x8BB1D29A, 0xFCB6E20C, 0x65BFB3B6, 0x12B88320, - 0x3FBA6CAD, 0x48BD5C3B, 0xD1B40D81, 0xA6B33D17, - 0x38D7A8B4, 0x4FD09822, 0xD6D9C998, 0xA1DEF90E, - 0x3161E49F, 0x4666D409, 0xDF6F85B3, 0xA868B525, - 0x360C2086, 0x410B1010, 0xD80241AA, 0xAF05713C, - 0x220D7CC9, 0x550A4C5F, 0xCC031DE5, 0xBB042D73, - 0x2560B8D0, 0x52678846, 0xCB6ED9FC, 0xBC69E96A, - 0x2CD6F4FB, 0x5BD1C46D, 0xC2D895D7, 0xB5DFA541, - 0x2BBB30E2, 0x5CBC0074, 0xC5B551CE, 0xB2B26158, - 0x04D44C65, 0x73D37CF3, 0xEADA2D49, 0x9DDD1DDF, - 0x03B9887C, 0x74BEB8EA, 0xEDB7E950, 0x9AB0D9C6, - 0x0A0FC457, 0x7D08F4C1, 0xE401A57B, 0x930695ED, - 0x0D62004E, 0x7A6530D8, 0xE36C6162, 0x946B51F4, - 0x19635C01, 0x6E646C97, 0xF76D3D2D, 0x806A0DBB, - 0x1E0E9818, 0x6909A88E, 0xF000F934, 0x8707C9A2, - 0x17B8D433, 0x60BFE4A5, 0xF9B6B51F, 0x8EB18589, - 0x10D5102A, 0x67D220BC, 0xFEDB7106, 0x89DC4190, - 0x49662D3D, 0x3E611DAB, 0xA7684C11, 0xD06F7C87, - 0x4E0BE924, 0x390CD9B2, 0xA0058808, 0xD702B89E, - 0x47BDA50F, 0x30BA9599, 0xA9B3C423, 0xDEB4F4B5, - 0x40D06116, 0x37D75180, 0xAEDE003A, 0xD9D930AC, - 0x54D13D59, 0x23D60DCF, 0xBADF5C75, 0xCDD86CE3, - 0x53BCF940, 0x24BBC9D6, 0xBDB2986C, 0xCAB5A8FA, - 0x5A0AB56B, 0x2D0D85FD, 0xB404D447, 0xC303E4D1, - 0x5D677172, 0x2A6041E4, 0xB369105E, 0xC46E20C8, - 0x72080DF5, 0x050F3D63, 0x9C066CD9, 0xEB015C4F, - 0x7565C9EC, 0x0262F97A, 0x9B6BA8C0, 0xEC6C9856, - 0x7CD385C7, 0x0BD4B551, 0x92DDE4EB, 0xE5DAD47D, - 0x7BBE41DE, 0x0CB97148, 0x95B020F2, 0xE2B71064, - 0x6FBF1D91, 0x18B82D07, 0x81B17CBD, 0xF6B64C2B, - 0x68D2D988, 0x1FD5E91E, 0x86DCB8A4, 0xF1DB8832, - 0x616495A3, 0x1663A535, 0x8F6AF48F, 0xF86DC419, - 0x660951BA, 0x110E612C, 0x88073096, 0xFF000000 -}; - diff --git a/drivers/net/wan/sbni.h b/drivers/net/wan/sbni.h deleted file mode 100644 index 84264510a8ed..000000000000 --- a/drivers/net/wan/sbni.h +++ /dev/null @@ -1,147 +0,0 @@ -/* sbni.h: definitions for a Granch SBNI12 driver, version 5.0.0 - * Written 2001 Denis I.Timofeev (timofeev@granch.ru) - * This file is distributed under the GNU GPL - */ - -#ifndef SBNI_H -#define SBNI_H - -#ifdef SBNI_DEBUG -#define DP( A ) A -#else -#define DP( A ) -#endif - - -/* We don't have official vendor id yet... */ -#define SBNI_PCI_VENDOR 0x55 -#define SBNI_PCI_DEVICE 0x9f - -#define ISA_MODE 0x00 -#define PCI_MODE 0x01 - -#define SBNI_IO_EXTENT 4 - -enum sbni_reg { - CSR0 = 0, - CSR1 = 1, - DAT = 2 -}; - -/* CSR0 mapping */ -enum { - BU_EMP = 0x02, - RC_CHK = 0x04, - CT_ZER = 0x08, - TR_REQ = 0x10, - TR_RDY = 0x20, - EN_INT = 0x40, - RC_RDY = 0x80 -}; - - -/* CSR1 mapping */ -#define PR_RES 0x80 - -struct sbni_csr1 { -#ifdef __LITTLE_ENDIAN_BITFIELD - u8 rxl : 5; - u8 rate : 2; - u8 : 1; -#else - u8 : 1; - u8 rate : 2; - u8 rxl : 5; -#endif -}; - -/* fields in frame header */ -#define FRAME_ACK_MASK (unsigned short)0x7000 -#define FRAME_LEN_MASK (unsigned short)0x03FF -#define FRAME_FIRST (unsigned short)0x8000 -#define FRAME_RETRY (unsigned short)0x0800 - -#define FRAME_SENT_BAD (unsigned short)0x4000 -#define FRAME_SENT_OK (unsigned short)0x3000 - - -/* state flags */ -enum { - FL_WAIT_ACK = 0x01, - FL_NEED_RESEND = 0x02, - FL_PREV_OK = 0x04, - FL_SLOW_MODE = 0x08, - FL_SECONDARY = 0x10, -#ifdef CONFIG_SBNI_MULTILINE - FL_SLAVE = 0x20, -#endif - FL_LINE_DOWN = 0x40 -}; - - -enum { - DEFAULT_IOBASEADDR = 0x210, - DEFAULT_INTERRUPTNUMBER = 5, - DEFAULT_RATE = 0, - DEFAULT_FRAME_LEN = 1012 -}; - -#define DEF_RXL_DELTA -1 -#define DEF_RXL 0xf - -#define SBNI_SIG 0x5a - -#define SBNI_MIN_LEN 60 /* Shortest Ethernet frame without FCS */ -#define SBNI_MAX_FRAME 1023 -#define ETHER_MAX_LEN 1518 - -#define SBNI_TIMEOUT (HZ/10) - -#define TR_ERROR_COUNT 32 -#define CHANGE_LEVEL_START_TICKS 4 - -#define SBNI_MAX_NUM_CARDS 16 - -/* internal SBNI-specific statistics */ -struct sbni_in_stats { - u32 all_rx_number; - u32 bad_rx_number; - u32 timeout_number; - u32 all_tx_number; - u32 resend_tx_number; -}; - -/* SBNI ioctl params */ -#define SIOCDEVGETINSTATS SIOCDEVPRIVATE -#define SIOCDEVRESINSTATS SIOCDEVPRIVATE+1 -#define SIOCDEVGHWSTATE SIOCDEVPRIVATE+2 -#define SIOCDEVSHWSTATE SIOCDEVPRIVATE+3 -#define SIOCDEVENSLAVE SIOCDEVPRIVATE+4 -#define SIOCDEVEMANSIPATE SIOCDEVPRIVATE+5 - - -/* data packet for SIOCDEVGHWSTATE/SIOCDEVSHWSTATE ioctl requests */ -struct sbni_flags { - u32 rxl : 4; - u32 rate : 2; - u32 fixed_rxl : 1; - u32 slow_mode : 1; - u32 mac_addr : 24; -}; - -/* - * CRC-32 stuff - */ -#define CRC32(c,crc) (crc32tab[((size_t)(crc) ^ (c)) & 0xff] ^ (((crc) >> 8) & 0x00FFFFFF)) - /* CRC generator 0xEDB88320 */ - /* CRC remainder 0x2144DF1C */ - /* CRC initial value 0x00000000 */ -#define CRC32_REMAINDER 0x2144DF1C -#define CRC32_INITIAL 0x00000000 - -#ifndef __initdata -#define __initdata -#endif - -#endif - diff --git a/drivers/net/wwan/Kconfig b/drivers/net/wwan/Kconfig index de9384326bc8..77dbfc418bce 100644 --- a/drivers/net/wwan/Kconfig +++ b/drivers/net/wwan/Kconfig @@ -38,6 +38,18 @@ config MHI_WWAN_CTRL To compile this driver as a module, choose M here: the module will be called mhi_wwan_ctrl. +config MHI_WWAN_MBIM + tristate "MHI WWAN MBIM network driver for QCOM-based PCIe modems" + depends on MHI_BUS + help + MHI WWAN MBIM is a WWAN network driver for QCOM-based PCIe modems. + It implements MBIM over MHI, for IP data aggregation and muxing. + A default wwan0 network interface is created for MBIM data session + ID 0. Additional links can be created via wwan rtnetlink type. + + To compile this driver as a module, choose M here: the module will be + called mhi_wwan_mbim. + config RPMSG_WWAN_CTRL tristate "RPMSG WWAN control driver" depends on RPMSG diff --git a/drivers/net/wwan/Makefile b/drivers/net/wwan/Makefile index d90ac33abaef..fe51feedac21 100644 --- a/drivers/net/wwan/Makefile +++ b/drivers/net/wwan/Makefile @@ -9,5 +9,6 @@ wwan-objs += wwan_core.o obj-$(CONFIG_WWAN_HWSIM) += wwan_hwsim.o obj-$(CONFIG_MHI_WWAN_CTRL) += mhi_wwan_ctrl.o +obj-$(CONFIG_MHI_WWAN_MBIM) += mhi_wwan_mbim.o obj-$(CONFIG_RPMSG_WWAN_CTRL) += rpmsg_wwan_ctrl.o obj-$(CONFIG_IOSM) += iosm/ diff --git a/drivers/net/wwan/iosm/iosm_ipc_mmio.h b/drivers/net/wwan/iosm/iosm_ipc_mmio.h index 45e6923da78f..f861994a6d90 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mmio.h +++ b/drivers/net/wwan/iosm/iosm_ipc_mmio.h @@ -10,10 +10,10 @@ #define IOSM_CP_VERSION 0x0100UL /* DL dir Aggregation support mask */ -#define DL_AGGR BIT(23) +#define DL_AGGR BIT(9) /* UL dir Aggregation support mask */ -#define UL_AGGR BIT(22) +#define UL_AGGR BIT(8) /* UL flow credit support mask */ #define UL_FLOW_CREDIT BIT(21) diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c index 562de275797a..bdb2d32cdb6d 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c @@ -320,7 +320,7 @@ static void ipc_mux_dl_fcth_decode(struct iosm_mux *ipc_mux, return; } - ul_credits = fct->vfl.nr_of_bytes; + ul_credits = le32_to_cpu(fct->vfl.nr_of_bytes); dev_dbg(ipc_mux->dev, "Flow_Credit:: if_id[%d] Old: %d Grants: %d", if_id, ipc_mux->session[if_id].ul_flow_credits, ul_credits); @@ -586,7 +586,7 @@ static bool ipc_mux_lite_send_qlt(struct iosm_mux *ipc_mux) qlt->reserved[0] = 0; qlt->reserved[1] = 0; - qlt->vfl.nr_of_bytes = session->ul_list.qlen; + qlt->vfl.nr_of_bytes = cpu_to_le32(session->ul_list.qlen); /* Add QLT to the transfer list. */ skb_queue_tail(&ipc_mux->channel->ul_list, diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h index 4a74e3c9457f..aae83db5cbb8 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h @@ -106,7 +106,7 @@ struct mux_lite_cmdh { * @nr_of_bytes: Number of bytes available to transmit in the queue. */ struct mux_lite_vfl { - u32 nr_of_bytes; + __le32 nr_of_bytes; }; /** diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c index 91109e27efd3..35d590743d3a 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c +++ b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c @@ -412,8 +412,8 @@ struct sk_buff *ipc_protocol_dl_td_process(struct iosm_protocol *ipc_protocol, } if (p_td->buffer.address != IPC_CB(skb)->mapping) { - dev_err(ipc_protocol->dev, "invalid buf=%p or skb=%p", - (void *)p_td->buffer.address, skb->data); + dev_err(ipc_protocol->dev, "invalid buf=%llx or skb=%p", + (unsigned long long)p_td->buffer.address, skb->data); ipc_pcie_kfree_skb(ipc_protocol->pcie, skb); skb = NULL; goto ret; diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c index b2357ad5d517..b571d9cedba4 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c +++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c @@ -228,7 +228,7 @@ static void ipc_wwan_dellink(void *ctxt, struct net_device *dev, RCU_INIT_POINTER(ipc_wwan->sub_netlist[if_id], NULL); /* unregistering includes synchronize_net() */ - unregister_netdevice(dev); + unregister_netdevice_queue(dev, head); unlock: mutex_unlock(&ipc_wwan->if_mutex); diff --git a/drivers/net/wwan/mhi_wwan_ctrl.c b/drivers/net/wwan/mhi_wwan_ctrl.c index 1bc6b69aa530..1e18420ce404 100644 --- a/drivers/net/wwan/mhi_wwan_ctrl.c +++ b/drivers/net/wwan/mhi_wwan_ctrl.c @@ -110,7 +110,7 @@ static int mhi_wwan_ctrl_start(struct wwan_port *port) int ret; /* Start mhi device's channel(s) */ - ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev); + ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev, 0); if (ret) return ret; diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c new file mode 100644 index 000000000000..377529bbf124 --- /dev/null +++ b/drivers/net/wwan/mhi_wwan_mbim.c @@ -0,0 +1,658 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* MHI MBIM Network driver - Network/MBIM over MHI bus + * + * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org> + * + * This driver copy some code from cdc_ncm, which is: + * Copyright (C) ST-Ericsson 2010-2012 + * and cdc_mbim, which is: + * Copyright (c) 2012 Smith Micro Software, Inc. + * Copyright (c) 2012 Bjørn Mork <bjorn@mork.no> + * + */ + +#include <linux/ethtool.h> +#include <linux/if_arp.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/mhi.h> +#include <linux/mii.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/u64_stats_sync.h> +#include <linux/usb.h> +#include <linux/usb/cdc.h> +#include <linux/usb/usbnet.h> +#include <linux/usb/cdc_ncm.h> +#include <linux/wwan.h> + +/* 3500 allows to optimize skb allocation, the skbs will basically fit in + * one 4K page. Large MBIM packets will simply be split over several MHI + * transfers and chained by the MHI net layer (zerocopy). + */ +#define MHI_DEFAULT_MRU 3500 + +#define MHI_MBIM_DEFAULT_MTU 1500 +#define MHI_MAX_BUF_SZ 0xffff + +#define MBIM_NDP16_SIGN_MASK 0x00ffffff + +#define MHI_MBIM_LINK_HASH_SIZE 8 +#define LINK_HASH(session) ((session) % MHI_MBIM_LINK_HASH_SIZE) + +struct mhi_mbim_link { + struct mhi_mbim_context *mbim; + struct net_device *ndev; + unsigned int session; + + /* stats */ + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_errors; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; + u64_stats_t tx_errors; + u64_stats_t tx_dropped; + struct u64_stats_sync tx_syncp; + struct u64_stats_sync rx_syncp; + + struct hlist_node hlnode; +}; + +struct mhi_mbim_context { + struct mhi_device *mdev; + struct sk_buff *skbagg_head; + struct sk_buff *skbagg_tail; + unsigned int mru; + u32 rx_queue_sz; + u16 rx_seq; + u16 tx_seq; + struct delayed_work rx_refill; + spinlock_t tx_lock; + struct hlist_head link_list[MHI_MBIM_LINK_HASH_SIZE]; +}; + +struct mbim_tx_hdr { + struct usb_cdc_ncm_nth16 nth16; + struct usb_cdc_ncm_ndp16 ndp16; + struct usb_cdc_ncm_dpe16 dpe16[2]; +} __packed; + +static struct mhi_mbim_link *mhi_mbim_get_link_rcu(struct mhi_mbim_context *mbim, + unsigned int session) +{ + struct mhi_mbim_link *link; + + hlist_for_each_entry_rcu(link, &mbim->link_list[LINK_HASH(session)], hlnode) { + if (link->session == session) + return link; + } + + return NULL; +} + +static struct sk_buff *mbim_tx_fixup(struct sk_buff *skb, unsigned int session, + u16 tx_seq) +{ + unsigned int dgram_size = skb->len; + struct usb_cdc_ncm_nth16 *nth16; + struct usb_cdc_ncm_ndp16 *ndp16; + struct mbim_tx_hdr *mbim_hdr; + + /* Only one NDP is sent, containing the IP packet (no aggregation) */ + + /* Ensure we have enough headroom for crafting MBIM header */ + if (skb_cow_head(skb, sizeof(struct mbim_tx_hdr))) { + dev_kfree_skb_any(skb); + return NULL; + } + + mbim_hdr = skb_push(skb, sizeof(struct mbim_tx_hdr)); + + /* Fill NTB header */ + nth16 = &mbim_hdr->nth16; + nth16->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN); + nth16->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16)); + nth16->wSequence = cpu_to_le16(tx_seq); + nth16->wBlockLength = cpu_to_le16(skb->len); + nth16->wNdpIndex = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16)); + + /* Fill the unique NDP */ + ndp16 = &mbim_hdr->ndp16; + ndp16->dwSignature = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN | (session << 24)); + ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16) + + sizeof(struct usb_cdc_ncm_dpe16) * 2); + ndp16->wNextNdpIndex = 0; + + /* Datagram follows the mbim header */ + ndp16->dpe16[0].wDatagramIndex = cpu_to_le16(sizeof(struct mbim_tx_hdr)); + ndp16->dpe16[0].wDatagramLength = cpu_to_le16(dgram_size); + + /* null termination */ + ndp16->dpe16[1].wDatagramIndex = 0; + ndp16->dpe16[1].wDatagramLength = 0; + + return skb; +} + +static netdev_tx_t mhi_mbim_ndo_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); + struct mhi_mbim_context *mbim = link->mbim; + unsigned long flags; + int err = -ENOMEM; + + /* Serialize MHI channel queuing and MBIM seq */ + spin_lock_irqsave(&mbim->tx_lock, flags); + + skb = mbim_tx_fixup(skb, link->session, mbim->tx_seq); + if (unlikely(!skb)) + goto exit_unlock; + + err = mhi_queue_skb(mbim->mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT); + + if (mhi_queue_is_full(mbim->mdev, DMA_TO_DEVICE)) + netif_stop_queue(ndev); + + if (!err) + mbim->tx_seq++; + +exit_unlock: + spin_unlock_irqrestore(&mbim->tx_lock, flags); + + if (unlikely(err)) { + net_err_ratelimited("%s: Failed to queue TX buf (%d)\n", + ndev->name, err); + dev_kfree_skb_any(skb); + goto exit_drop; + } + + return NETDEV_TX_OK; + +exit_drop: + u64_stats_update_begin(&link->tx_syncp); + u64_stats_inc(&link->tx_dropped); + u64_stats_update_end(&link->tx_syncp); + + return NETDEV_TX_OK; +} + +static int mbim_rx_verify_nth16(struct mhi_mbim_context *mbim, struct sk_buff *skb) +{ + struct usb_cdc_ncm_nth16 *nth16; + int len; + + if (skb->len < sizeof(struct usb_cdc_ncm_nth16) + + sizeof(struct usb_cdc_ncm_ndp16)) { + net_err_ratelimited("frame too short\n"); + return -EINVAL; + } + + nth16 = (struct usb_cdc_ncm_nth16 *)skb->data; + + if (nth16->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN)) { + net_err_ratelimited("invalid NTH16 signature <%#010x>\n", + le32_to_cpu(nth16->dwSignature)); + return -EINVAL; + } + + /* No limit on the block length, except the size of the data pkt */ + len = le16_to_cpu(nth16->wBlockLength); + if (len > skb->len) { + net_err_ratelimited("NTB does not fit into the skb %u/%u\n", + len, skb->len); + return -EINVAL; + } + + if (mbim->rx_seq + 1 != le16_to_cpu(nth16->wSequence) && + (mbim->rx_seq || le16_to_cpu(nth16->wSequence)) && + !(mbim->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) { + net_err_ratelimited("sequence number glitch prev=%d curr=%d\n", + mbim->rx_seq, le16_to_cpu(nth16->wSequence)); + } + mbim->rx_seq = le16_to_cpu(nth16->wSequence); + + return le16_to_cpu(nth16->wNdpIndex); +} + +static int mbim_rx_verify_ndp16(struct sk_buff *skb, struct usb_cdc_ncm_ndp16 *ndp16) +{ + int ret; + + if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) { + net_err_ratelimited("invalid DPT16 length <%u>\n", + le16_to_cpu(ndp16->wLength)); + return -EINVAL; + } + + ret = ((le16_to_cpu(ndp16->wLength) - sizeof(struct usb_cdc_ncm_ndp16)) + / sizeof(struct usb_cdc_ncm_dpe16)); + ret--; /* Last entry is always a NULL terminator */ + + if (sizeof(struct usb_cdc_ncm_ndp16) + + ret * sizeof(struct usb_cdc_ncm_dpe16) > skb->len) { + net_err_ratelimited("Invalid nframes = %d\n", ret); + return -EINVAL; + } + + return ret; +} + +static void mhi_mbim_rx(struct mhi_mbim_context *mbim, struct sk_buff *skb) +{ + int ndpoffset; + + /* Check NTB header and retrieve first NDP offset */ + ndpoffset = mbim_rx_verify_nth16(mbim, skb); + if (ndpoffset < 0) { + net_err_ratelimited("mbim: Incorrect NTB header\n"); + goto error; + } + + /* Process each NDP */ + while (1) { + struct usb_cdc_ncm_ndp16 ndp16; + struct usb_cdc_ncm_dpe16 dpe16; + struct mhi_mbim_link *link; + int nframes, n, dpeoffset; + unsigned int session; + + if (skb_copy_bits(skb, ndpoffset, &ndp16, sizeof(ndp16))) { + net_err_ratelimited("mbim: Incorrect NDP offset (%u)\n", + ndpoffset); + goto error; + } + + /* Check NDP header and retrieve number of datagrams */ + nframes = mbim_rx_verify_ndp16(skb, &ndp16); + if (nframes < 0) { + net_err_ratelimited("mbim: Incorrect NDP16\n"); + goto error; + } + + /* Only IP data type supported, no DSS in MHI context */ + if ((ndp16.dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK)) + != cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN)) { + net_err_ratelimited("mbim: Unsupported NDP type\n"); + goto next_ndp; + } + + session = (le32_to_cpu(ndp16.dwSignature) & ~MBIM_NDP16_SIGN_MASK) >> 24; + + rcu_read_lock(); + + link = mhi_mbim_get_link_rcu(mbim, session); + if (!link) { + net_err_ratelimited("mbim: bad packet session (%u)\n", session); + goto unlock; + } + + /* de-aggregate and deliver IP packets */ + dpeoffset = ndpoffset + sizeof(struct usb_cdc_ncm_ndp16); + for (n = 0; n < nframes; n++, dpeoffset += sizeof(dpe16)) { + u16 dgram_offset, dgram_len; + struct sk_buff *skbn; + + if (skb_copy_bits(skb, dpeoffset, &dpe16, sizeof(dpe16))) + break; + + dgram_offset = le16_to_cpu(dpe16.wDatagramIndex); + dgram_len = le16_to_cpu(dpe16.wDatagramLength); + + if (!dgram_offset || !dgram_len) + break; /* null terminator */ + + skbn = netdev_alloc_skb(link->ndev, dgram_len); + if (!skbn) + continue; + + skb_put(skbn, dgram_len); + skb_copy_bits(skb, dgram_offset, skbn->data, dgram_len); + + switch (skbn->data[0] & 0xf0) { + case 0x40: + skbn->protocol = htons(ETH_P_IP); + break; + case 0x60: + skbn->protocol = htons(ETH_P_IPV6); + break; + default: + net_err_ratelimited("%s: unknown protocol\n", + link->ndev->name); + dev_kfree_skb_any(skbn); + u64_stats_update_begin(&link->rx_syncp); + u64_stats_inc(&link->rx_errors); + u64_stats_update_end(&link->rx_syncp); + continue; + } + + u64_stats_update_begin(&link->rx_syncp); + u64_stats_inc(&link->rx_packets); + u64_stats_add(&link->rx_bytes, skbn->len); + u64_stats_update_end(&link->rx_syncp); + + netif_rx(skbn); + } +unlock: + rcu_read_unlock(); +next_ndp: + /* Other NDP to process? */ + ndpoffset = (int)le16_to_cpu(ndp16.wNextNdpIndex); + if (!ndpoffset) + break; + } + + /* free skb */ + dev_consume_skb_any(skb); + return; +error: + dev_kfree_skb_any(skb); +} + +static struct sk_buff *mhi_net_skb_agg(struct mhi_mbim_context *mbim, + struct sk_buff *skb) +{ + struct sk_buff *head = mbim->skbagg_head; + struct sk_buff *tail = mbim->skbagg_tail; + + /* This is non-paged skb chaining using frag_list */ + if (!head) { + mbim->skbagg_head = skb; + return skb; + } + + if (!skb_shinfo(head)->frag_list) + skb_shinfo(head)->frag_list = skb; + else + tail->next = skb; + + head->len += skb->len; + head->data_len += skb->len; + head->truesize += skb->truesize; + + mbim->skbagg_tail = skb; + + return mbim->skbagg_head; +} + +static void mhi_net_rx_refill_work(struct work_struct *work) +{ + struct mhi_mbim_context *mbim = container_of(work, struct mhi_mbim_context, + rx_refill.work); + struct mhi_device *mdev = mbim->mdev; + int err; + + while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) { + struct sk_buff *skb = alloc_skb(MHI_DEFAULT_MRU, GFP_KERNEL); + + if (unlikely(!skb)) + break; + + err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb, + MHI_DEFAULT_MRU, MHI_EOT); + if (unlikely(err)) { + kfree_skb(skb); + break; + } + + /* Do not hog the CPU if rx buffers are consumed faster than + * queued (unlikely). + */ + cond_resched(); + } + + /* If we're still starved of rx buffers, reschedule later */ + if (mhi_get_free_desc_count(mdev, DMA_FROM_DEVICE) == mbim->rx_queue_sz) + schedule_delayed_work(&mbim->rx_refill, HZ / 2); +} + +static void mhi_mbim_dl_callback(struct mhi_device *mhi_dev, + struct mhi_result *mhi_res) +{ + struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev); + struct sk_buff *skb = mhi_res->buf_addr; + int free_desc_count; + + free_desc_count = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE); + + if (unlikely(mhi_res->transaction_status)) { + switch (mhi_res->transaction_status) { + case -EOVERFLOW: + /* Packet has been split over multiple transfers */ + skb_put(skb, mhi_res->bytes_xferd); + mhi_net_skb_agg(mbim, skb); + break; + case -ENOTCONN: + /* MHI layer stopping/resetting the DL channel */ + dev_kfree_skb_any(skb); + return; + default: + /* Unknown error, simply drop */ + dev_kfree_skb_any(skb); + } + } else { + skb_put(skb, mhi_res->bytes_xferd); + + if (mbim->skbagg_head) { + /* Aggregate the final fragment */ + skb = mhi_net_skb_agg(mbim, skb); + mbim->skbagg_head = NULL; + } + + mhi_mbim_rx(mbim, skb); + } + + /* Refill if RX buffers queue becomes low */ + if (free_desc_count >= mbim->rx_queue_sz / 2) + schedule_delayed_work(&mbim->rx_refill, 0); +} + +static void mhi_mbim_ndo_get_stats64(struct net_device *ndev, + struct rtnl_link_stats64 *stats) +{ + struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); + unsigned int start; + + do { + start = u64_stats_fetch_begin_irq(&link->rx_syncp); + stats->rx_packets = u64_stats_read(&link->rx_packets); + stats->rx_bytes = u64_stats_read(&link->rx_bytes); + stats->rx_errors = u64_stats_read(&link->rx_errors); + } while (u64_stats_fetch_retry_irq(&link->rx_syncp, start)); + + do { + start = u64_stats_fetch_begin_irq(&link->tx_syncp); + stats->tx_packets = u64_stats_read(&link->tx_packets); + stats->tx_bytes = u64_stats_read(&link->tx_bytes); + stats->tx_errors = u64_stats_read(&link->tx_errors); + stats->tx_dropped = u64_stats_read(&link->tx_dropped); + } while (u64_stats_fetch_retry_irq(&link->tx_syncp, start)); +} + +static void mhi_mbim_ul_callback(struct mhi_device *mhi_dev, + struct mhi_result *mhi_res) +{ + struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev); + struct sk_buff *skb = mhi_res->buf_addr; + struct net_device *ndev = skb->dev; + struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); + + /* Hardware has consumed the buffer, so free the skb (which is not + * freed by the MHI stack) and perform accounting. + */ + dev_consume_skb_any(skb); + + u64_stats_update_begin(&link->tx_syncp); + if (unlikely(mhi_res->transaction_status)) { + /* MHI layer stopping/resetting the UL channel */ + if (mhi_res->transaction_status == -ENOTCONN) { + u64_stats_update_end(&link->tx_syncp); + return; + } + + u64_stats_inc(&link->tx_errors); + } else { + u64_stats_inc(&link->tx_packets); + u64_stats_add(&link->tx_bytes, mhi_res->bytes_xferd); + } + u64_stats_update_end(&link->tx_syncp); + + if (netif_queue_stopped(ndev) && !mhi_queue_is_full(mbim->mdev, DMA_TO_DEVICE)) + netif_wake_queue(ndev); +} + +static int mhi_mbim_ndo_open(struct net_device *ndev) +{ + struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); + + /* Feed the MHI rx buffer pool */ + schedule_delayed_work(&link->mbim->rx_refill, 0); + + /* Carrier is established via out-of-band channel (e.g. qmi) */ + netif_carrier_on(ndev); + + netif_start_queue(ndev); + + return 0; +} + +static int mhi_mbim_ndo_stop(struct net_device *ndev) +{ + netif_stop_queue(ndev); + netif_carrier_off(ndev); + + return 0; +} + +static const struct net_device_ops mhi_mbim_ndo = { + .ndo_open = mhi_mbim_ndo_open, + .ndo_stop = mhi_mbim_ndo_stop, + .ndo_start_xmit = mhi_mbim_ndo_xmit, + .ndo_get_stats64 = mhi_mbim_ndo_get_stats64, +}; + +static int mhi_mbim_newlink(void *ctxt, struct net_device *ndev, u32 if_id, + struct netlink_ext_ack *extack) +{ + struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); + struct mhi_mbim_context *mbim = ctxt; + + link->session = if_id; + link->mbim = mbim; + link->ndev = ndev; + u64_stats_init(&link->rx_syncp); + u64_stats_init(&link->tx_syncp); + + rcu_read_lock(); + if (mhi_mbim_get_link_rcu(mbim, if_id)) { + rcu_read_unlock(); + return -EEXIST; + } + rcu_read_unlock(); + + /* Already protected by RTNL lock */ + hlist_add_head_rcu(&link->hlnode, &mbim->link_list[LINK_HASH(if_id)]); + + return register_netdevice(ndev); +} + +static void mhi_mbim_dellink(void *ctxt, struct net_device *ndev, + struct list_head *head) +{ + struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); + + hlist_del_init_rcu(&link->hlnode); + synchronize_rcu(); + + unregister_netdevice_queue(ndev, head); +} + +static void mhi_mbim_setup(struct net_device *ndev) +{ + ndev->header_ops = NULL; /* No header */ + ndev->type = ARPHRD_RAWIP; + ndev->needed_headroom = sizeof(struct mbim_tx_hdr); + ndev->hard_header_len = 0; + ndev->addr_len = 0; + ndev->flags = IFF_POINTOPOINT | IFF_NOARP; + ndev->netdev_ops = &mhi_mbim_ndo; + ndev->mtu = MHI_MBIM_DEFAULT_MTU; + ndev->min_mtu = ETH_MIN_MTU; + ndev->max_mtu = MHI_MAX_BUF_SZ - ndev->needed_headroom; + ndev->tx_queue_len = 1000; +} + +static const struct wwan_ops mhi_mbim_wwan_ops = { + .priv_size = sizeof(struct mhi_mbim_link), + .setup = mhi_mbim_setup, + .newlink = mhi_mbim_newlink, + .dellink = mhi_mbim_dellink, +}; + +static int mhi_mbim_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) +{ + struct mhi_controller *cntrl = mhi_dev->mhi_cntrl; + struct mhi_mbim_context *mbim; + int err; + + mbim = devm_kzalloc(&mhi_dev->dev, sizeof(*mbim), GFP_KERNEL); + if (!mbim) + return -ENOMEM; + + spin_lock_init(&mbim->tx_lock); + dev_set_drvdata(&mhi_dev->dev, mbim); + mbim->mdev = mhi_dev; + mbim->mru = mhi_dev->mhi_cntrl->mru ? mhi_dev->mhi_cntrl->mru : MHI_DEFAULT_MRU; + + INIT_DELAYED_WORK(&mbim->rx_refill, mhi_net_rx_refill_work); + + /* Start MHI channels */ + err = mhi_prepare_for_transfer(mhi_dev, 0); + if (err) + return err; + + /* Number of transfer descriptors determines size of the queue */ + mbim->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE); + + /* Register wwan link ops with MHI controller representing WWAN instance */ + return wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_mbim_wwan_ops, mbim, 0); +} + +static void mhi_mbim_remove(struct mhi_device *mhi_dev) +{ + struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev); + struct mhi_controller *cntrl = mhi_dev->mhi_cntrl; + + mhi_unprepare_from_transfer(mhi_dev); + cancel_delayed_work_sync(&mbim->rx_refill); + wwan_unregister_ops(&cntrl->mhi_dev->dev); + kfree_skb(mbim->skbagg_head); + dev_set_drvdata(&mhi_dev->dev, NULL); +} + +static const struct mhi_device_id mhi_mbim_id_table[] = { + /* Hardware accelerated data PATH (to modem IPA), MBIM protocol */ + { .chan = "IP_HW0_MBIM", .driver_data = 0 }, + {} +}; +MODULE_DEVICE_TABLE(mhi, mhi_mbim_id_table); + +static struct mhi_driver mhi_mbim_driver = { + .probe = mhi_mbim_probe, + .remove = mhi_mbim_remove, + .dl_xfer_cb = mhi_mbim_dl_callback, + .ul_xfer_cb = mhi_mbim_ul_callback, + .id_table = mhi_mbim_id_table, + .driver = { + .name = "mhi_wwan_mbim", + .owner = THIS_MODULE, + }, +}; + +module_mhi_driver(mhi_mbim_driver); + +MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>"); +MODULE_DESCRIPTION("Network/MBIM over MHI"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pcmcia/i82092.c b/drivers/pcmcia/i82092.c index 85887d885b5f..192c9049d654 100644 --- a/drivers/pcmcia/i82092.c +++ b/drivers/pcmcia/i82092.c @@ -112,6 +112,7 @@ static int i82092aa_pci_probe(struct pci_dev *dev, for (i = 0; i < socket_count; i++) { sockets[i].card_state = 1; /* 1 = present but empty */ sockets[i].io_base = pci_resource_start(dev, 0); + sockets[i].dev = dev; sockets[i].socket.features |= SS_CAP_PCCARD; sockets[i].socket.map_size = 0x1000; sockets[i].socket.irq_mask = 0; diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 8c20e524e9ad..8b08745e1ca1 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -157,6 +157,13 @@ config PTP_1588_CLOCK_OCP tristate "OpenCompute TimeCard as PTP clock" depends on PTP_1588_CLOCK depends on HAS_IOMEM && PCI + depends on SPI && I2C && MTD + imply SPI_MEM + imply SPI_XILINX + imply MTD_SPI_NOR + imply I2C_XILINX + select SERIAL_8250 + default n help This driver adds support for an OpenCompute time card. diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 0d1034e3ed0f..92edf772feed 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -6,15 +6,29 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/pci.h> +#include <linux/serial_8250.h> +#include <linux/clkdev.h> +#include <linux/clk-provider.h> +#include <linux/platform_device.h> #include <linux/ptp_clock_kernel.h> +#include <linux/spi/spi.h> +#include <linux/spi/xilinx_spi.h> +#include <net/devlink.h> +#include <linux/i2c.h> +#include <linux/mtd/mtd.h> -static const struct pci_device_id ptp_ocp_pcidev_id[] = { - { PCI_DEVICE(0x1d9b, 0x0400) }, - { 0 } -}; -MODULE_DEVICE_TABLE(pci, ptp_ocp_pcidev_id); +#ifndef PCI_VENDOR_ID_FACEBOOK +#define PCI_VENDOR_ID_FACEBOOK 0x1d9b +#endif -#define OCP_REGISTER_OFFSET 0x01000000 +#ifndef PCI_DEVICE_ID_FACEBOOK_TIMECARD +#define PCI_DEVICE_ID_FACEBOOK_TIMECARD 0x0400 +#endif + +static struct class timecard_class = { + .owner = THIS_MODULE, + .name = "timecard", +}; struct ocp_reg { u32 ctrl; @@ -29,18 +43,29 @@ struct ocp_reg { u32 __pad1[2]; u32 offset_ns; u32 offset_window_ns; + u32 __pad2[2]; + u32 drift_ns; + u32 drift_window_ns; + u32 __pad3[6]; + u32 servo_offset_p; + u32 servo_offset_i; + u32 servo_drift_p; + u32 servo_drift_i; }; #define OCP_CTRL_ENABLE BIT(0) #define OCP_CTRL_ADJUST_TIME BIT(1) #define OCP_CTRL_ADJUST_OFFSET BIT(2) +#define OCP_CTRL_ADJUST_DRIFT BIT(3) +#define OCP_CTRL_ADJUST_SERVO BIT(8) #define OCP_CTRL_READ_TIME_REQ BIT(30) #define OCP_CTRL_READ_TIME_DONE BIT(31) #define OCP_STATUS_IN_SYNC BIT(0) +#define OCP_STATUS_IN_HOLDOVER BIT(1) #define OCP_SELECT_CLK_NONE 0 -#define OCP_SELECT_CLK_REG 6 +#define OCP_SELECT_CLK_REG 0xfe struct tod_reg { u32 ctrl; @@ -55,8 +80,6 @@ struct tod_reg { u32 leap; }; -#define TOD_REGISTER_OFFSET 0x01050000 - #define TOD_CTRL_PROTOCOL BIT(28) #define TOD_CTRL_DISABLE_FMT_A BIT(17) #define TOD_CTRL_DISABLE_FMT_B BIT(16) @@ -68,16 +91,264 @@ struct tod_reg { #define TOD_STATUS_UTC_VALID BIT(8) #define TOD_STATUS_LEAP_VALID BIT(16) +struct ts_reg { + u32 enable; + u32 error; + u32 polarity; + u32 version; + u32 __pad0[4]; + u32 cable_delay; + u32 __pad1[3]; + u32 intr; + u32 intr_mask; + u32 event_count; + u32 __pad2[1]; + u32 ts_count; + u32 time_ns; + u32 time_sec; + u32 data_width; + u32 data; +}; + +struct pps_reg { + u32 ctrl; + u32 status; + u32 __pad0[6]; + u32 cable_delay; +}; + +#define PPS_STATUS_FILTER_ERR BIT(0) +#define PPS_STATUS_SUPERV_ERR BIT(1) + +struct img_reg { + u32 version; +}; + +struct ptp_ocp_flash_info { + const char *name; + int pci_offset; + int data_size; + void *data; +}; + +struct ptp_ocp_ext_info { + const char *name; + int index; + irqreturn_t (*irq_fcn)(int irq, void *priv); + int (*enable)(void *priv, bool enable); +}; + +struct ptp_ocp_ext_src { + void __iomem *mem; + struct ptp_ocp *bp; + struct ptp_ocp_ext_info *info; + int irq_vec; +}; + struct ptp_ocp { struct pci_dev *pdev; + struct device dev; spinlock_t lock; - void __iomem *base; struct ocp_reg __iomem *reg; struct tod_reg __iomem *tod; + struct pps_reg __iomem *pps_to_ext; + struct pps_reg __iomem *pps_to_clk; + struct ptp_ocp_ext_src *pps; + struct ptp_ocp_ext_src *ts0; + struct ptp_ocp_ext_src *ts1; + struct img_reg __iomem *image; struct ptp_clock *ptp; struct ptp_clock_info ptp_info; + struct platform_device *i2c_ctrl; + struct platform_device *spi_flash; + struct clk_hw *i2c_clk; + struct timer_list watchdog; + time64_t gnss_lost; + int id; + int n_irqs; + int gnss_port; + int mac_port; /* miniature atomic clock */ + u8 serial[6]; + int flash_start; + bool has_serial; }; +struct ocp_resource { + unsigned long offset; + int size; + int irq_vec; + int (*setup)(struct ptp_ocp *bp, struct ocp_resource *r); + void *extra; + unsigned long bp_offset; +}; + +static int ptp_ocp_register_mem(struct ptp_ocp *bp, struct ocp_resource *r); +static int ptp_ocp_register_i2c(struct ptp_ocp *bp, struct ocp_resource *r); +static int ptp_ocp_register_spi(struct ptp_ocp *bp, struct ocp_resource *r); +static int ptp_ocp_register_serial(struct ptp_ocp *bp, struct ocp_resource *r); +static int ptp_ocp_register_ext(struct ptp_ocp *bp, struct ocp_resource *r); +static int ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r); +static irqreturn_t ptp_ocp_ts_irq(int irq, void *priv); +static int ptp_ocp_ts_enable(void *priv, bool enable); + +#define bp_assign_entry(bp, res, val) ({ \ + uintptr_t addr = (uintptr_t)(bp) + (res)->bp_offset; \ + *(typeof(val) *)addr = val; \ +}) + +#define OCP_RES_LOCATION(member) \ + .bp_offset = offsetof(struct ptp_ocp, member) + +#define OCP_MEM_RESOURCE(member) \ + OCP_RES_LOCATION(member), .setup = ptp_ocp_register_mem + +#define OCP_SERIAL_RESOURCE(member) \ + OCP_RES_LOCATION(member), .setup = ptp_ocp_register_serial + +#define OCP_I2C_RESOURCE(member) \ + OCP_RES_LOCATION(member), .setup = ptp_ocp_register_i2c + +#define OCP_SPI_RESOURCE(member) \ + OCP_RES_LOCATION(member), .setup = ptp_ocp_register_spi + +#define OCP_EXT_RESOURCE(member) \ + OCP_RES_LOCATION(member), .setup = ptp_ocp_register_ext + +/* This is the MSI vector mapping used. + * 0: N/C + * 1: TS0 + * 2: TS1 + * 3: GPS + * 4: GPS2 (n/c) + * 5: MAC + * 6: SPI IMU (inertial measurement unit) + * 7: I2C oscillator + * 8: HWICAP + * 9: SPI Flash + */ + +static struct ocp_resource ocp_fb_resource[] = { + { + OCP_MEM_RESOURCE(reg), + .offset = 0x01000000, .size = 0x10000, + }, + { + OCP_EXT_RESOURCE(ts0), + .offset = 0x01010000, .size = 0x10000, .irq_vec = 1, + .extra = &(struct ptp_ocp_ext_info) { + .name = "ts0", .index = 0, + .irq_fcn = ptp_ocp_ts_irq, + .enable = ptp_ocp_ts_enable, + }, + }, + { + OCP_EXT_RESOURCE(ts1), + .offset = 0x01020000, .size = 0x10000, .irq_vec = 2, + .extra = &(struct ptp_ocp_ext_info) { + .name = "ts1", .index = 1, + .irq_fcn = ptp_ocp_ts_irq, + .enable = ptp_ocp_ts_enable, + }, + }, + { + OCP_MEM_RESOURCE(pps_to_ext), + .offset = 0x01030000, .size = 0x10000, + }, + { + OCP_MEM_RESOURCE(pps_to_clk), + .offset = 0x01040000, .size = 0x10000, + }, + { + OCP_MEM_RESOURCE(tod), + .offset = 0x01050000, .size = 0x10000, + }, + { + OCP_MEM_RESOURCE(image), + .offset = 0x00020000, .size = 0x1000, + }, + { + OCP_I2C_RESOURCE(i2c_ctrl), + .offset = 0x00150000, .size = 0x10000, .irq_vec = 7, + }, + { + OCP_SERIAL_RESOURCE(gnss_port), + .offset = 0x00160000 + 0x1000, .irq_vec = 3, + }, + { + OCP_SERIAL_RESOURCE(mac_port), + .offset = 0x00180000 + 0x1000, .irq_vec = 5, + }, + { + OCP_SPI_RESOURCE(spi_flash), + .offset = 0x00310000, .size = 0x10000, .irq_vec = 9, + .extra = &(struct ptp_ocp_flash_info) { + .name = "xilinx_spi", .pci_offset = 0, + .data_size = sizeof(struct xspi_platform_data), + .data = &(struct xspi_platform_data) { + .num_chipselect = 1, + .bits_per_word = 8, + .num_devices = 1, + .devices = &(struct spi_board_info) { + .modalias = "spi-nor", + }, + }, + }, + }, + { + .setup = ptp_ocp_fb_board_init, + }, + { } +}; + +static const struct pci_device_id ptp_ocp_pcidev_id[] = { + { PCI_DEVICE_DATA(FACEBOOK, TIMECARD, &ocp_fb_resource) }, + { 0 } +}; +MODULE_DEVICE_TABLE(pci, ptp_ocp_pcidev_id); + +static DEFINE_MUTEX(ptp_ocp_lock); +static DEFINE_IDR(ptp_ocp_idr); + +static struct { + const char *name; + int value; +} ptp_ocp_clock[] = { + { .name = "NONE", .value = 0 }, + { .name = "TOD", .value = 1 }, + { .name = "IRIG", .value = 2 }, + { .name = "PPS", .value = 3 }, + { .name = "PTP", .value = 4 }, + { .name = "RTC", .value = 5 }, + { .name = "DCF", .value = 6 }, + { .name = "REGS", .value = 0xfe }, + { .name = "EXT", .value = 0xff }, +}; + +static const char * +ptp_ocp_clock_name_from_val(int val) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++) + if (ptp_ocp_clock[i].value == val) + return ptp_ocp_clock[i].name; + return NULL; +} + +static int +ptp_ocp_clock_val_from_name(const char *name) +{ + const char *clk; + int i; + + for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++) { + clk = ptp_ocp_clock[i].name; + if (!strncasecmp(name, clk, strlen(clk))) + return ptp_ocp_clock[i].value; + } + return -EINVAL; +} + static int __ptp_ocp_gettime_locked(struct ptp_ocp *bp, struct timespec64 *ts, struct ptp_system_timestamp *sts) @@ -192,6 +463,45 @@ ptp_ocp_null_adjfine(struct ptp_clock_info *ptp_info, long scaled_ppm) return -EOPNOTSUPP; } +static int +ptp_ocp_adjphase(struct ptp_clock_info *ptp_info, s32 phase_ns) +{ + return -EOPNOTSUPP; +} + +static int +ptp_ocp_enable(struct ptp_clock_info *ptp_info, struct ptp_clock_request *rq, + int on) +{ + struct ptp_ocp *bp = container_of(ptp_info, struct ptp_ocp, ptp_info); + struct ptp_ocp_ext_src *ext = NULL; + int err; + + switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + switch (rq->extts.index) { + case 0: + ext = bp->ts0; + break; + case 1: + ext = bp->ts1; + break; + } + break; + case PTP_CLK_REQ_PPS: + ext = bp->pps; + break; + default: + return -EOPNOTSUPP; + } + + err = -ENXIO; + if (ext) + err = ext->info->enable(ext, on); + + return err; +} + static const struct ptp_clock_info ptp_ocp_clock_info = { .owner = THIS_MODULE, .name = KBUILD_MODNAME, @@ -200,10 +510,57 @@ static const struct ptp_clock_info ptp_ocp_clock_info = { .settime64 = ptp_ocp_settime, .adjtime = ptp_ocp_adjtime, .adjfine = ptp_ocp_null_adjfine, + .adjphase = ptp_ocp_adjphase, + .enable = ptp_ocp_enable, + .pps = true, + .n_ext_ts = 2, }; +static void +__ptp_ocp_clear_drift_locked(struct ptp_ocp *bp) +{ + u32 ctrl, select; + + select = ioread32(&bp->reg->select); + iowrite32(OCP_SELECT_CLK_REG, &bp->reg->select); + + iowrite32(0, &bp->reg->drift_ns); + + ctrl = ioread32(&bp->reg->ctrl); + ctrl |= OCP_CTRL_ADJUST_DRIFT; + iowrite32(ctrl, &bp->reg->ctrl); + + /* restore clock selection */ + iowrite32(select >> 16, &bp->reg->select); +} + +static void +ptp_ocp_watchdog(struct timer_list *t) +{ + struct ptp_ocp *bp = from_timer(bp, t, watchdog); + unsigned long flags; + u32 status; + + status = ioread32(&bp->pps_to_clk->status); + + if (status & PPS_STATUS_SUPERV_ERR) { + iowrite32(status, &bp->pps_to_clk->status); + if (!bp->gnss_lost) { + spin_lock_irqsave(&bp->lock, flags); + __ptp_ocp_clear_drift_locked(bp); + spin_unlock_irqrestore(&bp->lock, flags); + bp->gnss_lost = ktime_get_real_seconds(); + } + + } else if (bp->gnss_lost) { + bp->gnss_lost = 0; + } + + mod_timer(&bp->watchdog, jiffies + HZ); +} + static int -ptp_ocp_check_clock(struct ptp_ocp *bp) +ptp_ocp_init_clock(struct ptp_ocp *bp) { struct timespec64 ts; bool sync; @@ -214,6 +571,17 @@ ptp_ocp_check_clock(struct ptp_ocp *bp) ctrl |= OCP_CTRL_ENABLE; iowrite32(ctrl, &bp->reg->ctrl); + /* NO DRIFT Correction */ + /* offset_p:i 1/8, offset_i: 1/16, drift_p: 0, drift_i: 0 */ + iowrite32(0x2000, &bp->reg->servo_offset_p); + iowrite32(0x1000, &bp->reg->servo_offset_i); + iowrite32(0, &bp->reg->servo_drift_p); + iowrite32(0, &bp->reg->servo_drift_i); + + /* latch servo values */ + ctrl |= OCP_CTRL_ADJUST_SERVO; + iowrite32(ctrl, &bp->reg->ctrl); + if ((ioread32(&bp->reg->ctrl) & OCP_CTRL_ENABLE) == 0) { dev_err(&bp->pdev->dev, "clock not enabled\n"); return -ENODEV; @@ -229,6 +597,9 @@ ptp_ocp_check_clock(struct ptp_ocp *bp) ts.tv_sec, ts.tv_nsec, sync ? "in-sync" : "UNSYNCED"); + timer_setup(&bp->watchdog, ptp_ocp_watchdog, 0); + mod_timer(&bp->watchdog, jiffies + HZ); + return 0; } @@ -278,82 +649,839 @@ ptp_ocp_tod_info(struct ptp_ocp *bp) reg & TOD_STATUS_LEAP_VALID ? 1 : 0); } +static int +ptp_ocp_firstchild(struct device *dev, void *data) +{ + return 1; +} + +static int +ptp_ocp_read_i2c(struct i2c_adapter *adap, u8 addr, u8 reg, u8 sz, u8 *data) +{ + struct i2c_msg msgs[2] = { + { + .addr = addr, + .len = 1, + .buf = ®, + }, + { + .addr = addr, + .flags = I2C_M_RD, + .len = 2, + .buf = data, + }, + }; + int err; + u8 len; + + /* xiic-i2c for some stupid reason only does 2 byte reads. */ + while (sz) { + len = min_t(u8, sz, 2); + msgs[1].len = len; + err = i2c_transfer(adap, msgs, 2); + if (err != msgs[1].len) + return err; + msgs[1].buf += len; + reg += len; + sz -= len; + } + return 0; +} + +static void +ptp_ocp_get_serial_number(struct ptp_ocp *bp) +{ + struct i2c_adapter *adap; + struct device *dev; + int err; + + dev = device_find_child(&bp->i2c_ctrl->dev, NULL, ptp_ocp_firstchild); + if (!dev) { + dev_err(&bp->pdev->dev, "Can't find I2C adapter\n"); + return; + } + + adap = i2c_verify_adapter(dev); + if (!adap) { + dev_err(&bp->pdev->dev, "device '%s' isn't an I2C adapter\n", + dev_name(dev)); + goto out; + } + + err = ptp_ocp_read_i2c(adap, 0x58, 0x9A, 6, bp->serial); + if (err) { + dev_err(&bp->pdev->dev, "could not read eeprom: %d\n", err); + goto out; + } + + bp->has_serial = true; + +out: + put_device(dev); +} + static void ptp_ocp_info(struct ptp_ocp *bp) { - static const char * const clock_name[] = { - "NO", "TOD", "IRIG", "PPS", "PTP", "RTC", "REGS", "EXT" - }; u32 version, select; version = ioread32(&bp->reg->version); select = ioread32(&bp->reg->select); dev_info(&bp->pdev->dev, "Version %d.%d.%d, clock %s, device ptp%d\n", version >> 24, (version >> 16) & 0xff, version & 0xffff, - clock_name[select & 7], + ptp_ocp_clock_name_from_val(select >> 16), ptp_clock_index(bp->ptp)); ptp_ocp_tod_info(bp); } +static struct device * +ptp_ocp_find_flash(struct ptp_ocp *bp) +{ + struct device *dev, *last; + + last = NULL; + dev = &bp->spi_flash->dev; + + while ((dev = device_find_child(dev, NULL, ptp_ocp_firstchild))) { + if (!strcmp("mtd", dev_bus_name(dev))) + break; + put_device(last); + last = dev; + } + put_device(last); + + return dev; +} + static int -ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id) +ptp_ocp_devlink_flash(struct devlink *devlink, struct device *dev, + const struct firmware *fw) { - struct ptp_ocp *bp; + struct mtd_info *mtd = dev_get_drvdata(dev); + struct ptp_ocp *bp = devlink_priv(devlink); + size_t off, len, resid, wrote; + struct erase_info erase; + size_t base, blksz; + int err; + + off = 0; + base = bp->flash_start; + blksz = 4096; + resid = fw->size; + + while (resid) { + devlink_flash_update_status_notify(devlink, "Flashing", + NULL, off, fw->size); + + len = min_t(size_t, resid, blksz); + erase.addr = base + off; + erase.len = blksz; + + err = mtd_erase(mtd, &erase); + if (err) + goto out; + + err = mtd_write(mtd, base + off, len, &wrote, &fw->data[off]); + if (err) + goto out; + + off += blksz; + resid -= len; + } +out: + return err; +} + +static int +ptp_ocp_devlink_flash_update(struct devlink *devlink, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) +{ + struct ptp_ocp *bp = devlink_priv(devlink); + struct device *dev; + const char *msg; + int err; + + dev = ptp_ocp_find_flash(bp); + if (!dev) { + dev_err(&bp->pdev->dev, "Can't find Flash SPI adapter\n"); + return -ENODEV; + } + + devlink_flash_update_status_notify(devlink, "Preparing to flash", + NULL, 0, 0); + + err = ptp_ocp_devlink_flash(devlink, dev, params->fw); + + msg = err ? "Flash error" : "Flash complete"; + devlink_flash_update_status_notify(devlink, msg, NULL, 0, 0); + + put_device(dev); + return err; +} + +static int +ptp_ocp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct ptp_ocp *bp = devlink_priv(devlink); + char buf[32]; + int err; + + err = devlink_info_driver_name_put(req, KBUILD_MODNAME); + if (err) + return err; + + if (bp->image) { + u32 ver = ioread32(&bp->image->version); + + if (ver & 0xffff) { + sprintf(buf, "%d", ver); + err = devlink_info_version_running_put(req, + "fw", + buf); + } else { + sprintf(buf, "%d", ver >> 16); + err = devlink_info_version_running_put(req, + "loader", + buf); + } + if (err) + return err; + } + + if (!bp->has_serial) + ptp_ocp_get_serial_number(bp); + + if (bp->has_serial) { + sprintf(buf, "%pM", bp->serial); + err = devlink_info_serial_number_put(req, buf); + if (err) + return err; + } + + return 0; +} + +static const struct devlink_ops ptp_ocp_devlink_ops = { + .flash_update = ptp_ocp_devlink_flash_update, + .info_get = ptp_ocp_devlink_info_get, +}; + +static void __iomem * +__ptp_ocp_get_mem(struct ptp_ocp *bp, unsigned long start, int size) +{ + struct resource res = DEFINE_RES_MEM_NAMED(start, size, "ptp_ocp"); + + return devm_ioremap_resource(&bp->pdev->dev, &res); +} + +static void __iomem * +ptp_ocp_get_mem(struct ptp_ocp *bp, struct ocp_resource *r) +{ + unsigned long start; + + start = pci_resource_start(bp->pdev, 0) + r->offset; + return __ptp_ocp_get_mem(bp, start, r->size); +} + +static void +ptp_ocp_set_irq_resource(struct resource *res, int irq) +{ + struct resource r = DEFINE_RES_IRQ(irq); + *res = r; +} + +static void +ptp_ocp_set_mem_resource(struct resource *res, unsigned long start, int size) +{ + struct resource r = DEFINE_RES_MEM(start, size); + *res = r; +} + +static int +ptp_ocp_register_spi(struct ptp_ocp *bp, struct ocp_resource *r) +{ + struct ptp_ocp_flash_info *info; + struct pci_dev *pdev = bp->pdev; + struct platform_device *p; + struct resource res[2]; + unsigned long start; + int id; + + /* XXX hack to work around old FPGA */ + if (bp->n_irqs < 10) { + dev_err(&bp->pdev->dev, "FPGA does not have SPI devices\n"); + return 0; + } + + if (r->irq_vec > bp->n_irqs) { + dev_err(&bp->pdev->dev, "spi device irq %d out of range\n", + r->irq_vec); + return 0; + } + + start = pci_resource_start(pdev, 0) + r->offset; + ptp_ocp_set_mem_resource(&res[0], start, r->size); + ptp_ocp_set_irq_resource(&res[1], pci_irq_vector(pdev, r->irq_vec)); + + info = r->extra; + id = pci_dev_id(pdev) << 1; + id += info->pci_offset; + + p = platform_device_register_resndata(&pdev->dev, info->name, id, + res, 2, info->data, + info->data_size); + if (IS_ERR(p)) + return PTR_ERR(p); + + bp_assign_entry(bp, r, p); + + return 0; +} + +static struct platform_device * +ptp_ocp_i2c_bus(struct pci_dev *pdev, struct ocp_resource *r, int id) +{ + struct resource res[2]; + unsigned long start; + + start = pci_resource_start(pdev, 0) + r->offset; + ptp_ocp_set_mem_resource(&res[0], start, r->size); + ptp_ocp_set_irq_resource(&res[1], pci_irq_vector(pdev, r->irq_vec)); + + return platform_device_register_resndata(&pdev->dev, "xiic-i2c", + id, res, 2, NULL, 0); +} + +static int +ptp_ocp_register_i2c(struct ptp_ocp *bp, struct ocp_resource *r) +{ + struct pci_dev *pdev = bp->pdev; + struct platform_device *p; + struct clk_hw *clk; + char buf[32]; + int id; + + if (r->irq_vec > bp->n_irqs) { + dev_err(&bp->pdev->dev, "i2c device irq %d out of range\n", + r->irq_vec); + return 0; + } + + id = pci_dev_id(bp->pdev); + + sprintf(buf, "AXI.%d", id); + clk = clk_hw_register_fixed_rate(&pdev->dev, buf, NULL, 0, 50000000); + if (IS_ERR(clk)) + return PTR_ERR(clk); + bp->i2c_clk = clk; + + sprintf(buf, "xiic-i2c.%d", id); + devm_clk_hw_register_clkdev(&pdev->dev, clk, NULL, buf); + p = ptp_ocp_i2c_bus(bp->pdev, r, id); + if (IS_ERR(p)) + return PTR_ERR(p); + + bp_assign_entry(bp, r, p); + + return 0; +} + +static irqreturn_t +ptp_ocp_ts_irq(int irq, void *priv) +{ + struct ptp_ocp_ext_src *ext = priv; + struct ts_reg __iomem *reg = ext->mem; + struct ptp_clock_event ev; + u32 sec, nsec; + + /* XXX should fix API - this converts s/ns -> ts -> s/ns */ + sec = ioread32(®->time_sec); + nsec = ioread32(®->time_ns); + + ev.type = PTP_CLOCK_EXTTS; + ev.index = ext->info->index; + ev.timestamp = sec * 1000000000ULL + nsec; + + ptp_clock_event(ext->bp->ptp, &ev); + + iowrite32(1, ®->intr); /* write 1 to ack */ + + return IRQ_HANDLED; +} + +static int +ptp_ocp_ts_enable(void *priv, bool enable) +{ + struct ptp_ocp_ext_src *ext = priv; + struct ts_reg __iomem *reg = ext->mem; + + if (enable) { + iowrite32(1, ®->enable); + iowrite32(1, ®->intr_mask); + iowrite32(1, ®->intr); + } else { + iowrite32(0, ®->intr_mask); + iowrite32(0, ®->enable); + } + + return 0; +} + +static void +ptp_ocp_unregister_ext(struct ptp_ocp_ext_src *ext) +{ + ext->info->enable(ext, false); + pci_free_irq(ext->bp->pdev, ext->irq_vec, ext); + kfree(ext); +} + +static int +ptp_ocp_register_ext(struct ptp_ocp *bp, struct ocp_resource *r) +{ + struct pci_dev *pdev = bp->pdev; + struct ptp_ocp_ext_src *ext; int err; - bp = kzalloc(sizeof(*bp), GFP_KERNEL); - if (!bp) + ext = kzalloc(sizeof(*ext), GFP_KERNEL); + if (!ext) return -ENOMEM; - bp->pdev = pdev; - pci_set_drvdata(pdev, bp); - err = pci_enable_device(pdev); + err = -EINVAL; + ext->mem = ptp_ocp_get_mem(bp, r); + if (!ext->mem) + goto out; + + ext->bp = bp; + ext->info = r->extra; + ext->irq_vec = r->irq_vec; + + err = pci_request_irq(pdev, r->irq_vec, ext->info->irq_fcn, NULL, + ext, "ocp%d.%s", bp->id, ext->info->name); if (err) { - dev_err(&pdev->dev, "pci_enable_device\n"); - goto out_free; + dev_err(&pdev->dev, "Could not get irq %d\n", r->irq_vec); + goto out; } - err = pci_request_regions(pdev, KBUILD_MODNAME); - if (err) { - dev_err(&pdev->dev, "pci_request_region\n"); - goto out_disable; + bp_assign_entry(bp, r, ext); + + return 0; + +out: + kfree(ext); + return err; +} + +static int +ptp_ocp_serial_line(struct ptp_ocp *bp, struct ocp_resource *r) +{ + struct pci_dev *pdev = bp->pdev; + struct uart_8250_port uart; + + /* Setting UPF_IOREMAP and leaving port.membase unspecified lets + * the serial port device claim and release the pci resource. + */ + memset(&uart, 0, sizeof(uart)); + uart.port.dev = &pdev->dev; + uart.port.iotype = UPIO_MEM; + uart.port.regshift = 2; + uart.port.mapbase = pci_resource_start(pdev, 0) + r->offset; + uart.port.irq = pci_irq_vector(pdev, r->irq_vec); + uart.port.uartclk = 50000000; + uart.port.flags = UPF_FIXED_TYPE | UPF_IOREMAP; + uart.port.type = PORT_16550A; + + return serial8250_register_8250_port(&uart); +} + +static int +ptp_ocp_register_serial(struct ptp_ocp *bp, struct ocp_resource *r) +{ + int port; + + if (r->irq_vec > bp->n_irqs) { + dev_err(&bp->pdev->dev, "serial device irq %d out of range\n", + r->irq_vec); + return 0; } - bp->base = pci_ioremap_bar(pdev, 0); - if (!bp->base) { - dev_err(&pdev->dev, "io_remap bar0\n"); - err = -ENOMEM; - goto out_release_regions; + port = ptp_ocp_serial_line(bp, r); + if (port < 0) + return port; + + bp_assign_entry(bp, r, port); + + return 0; +} + +static int +ptp_ocp_register_mem(struct ptp_ocp *bp, struct ocp_resource *r) +{ + void __iomem *mem; + + mem = ptp_ocp_get_mem(bp, r); + if (!mem) + return -EINVAL; + + bp_assign_entry(bp, r, mem); + + return 0; +} + +/* FB specific board initializers; last "resource" registered. */ +static int +ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r) +{ + bp->flash_start = 1024 * 4096; + + return ptp_ocp_init_clock(bp); +} + +static int +ptp_ocp_register_resources(struct ptp_ocp *bp, kernel_ulong_t driver_data) +{ + struct ocp_resource *r, *table; + int err = 0; + + table = (struct ocp_resource *)driver_data; + for (r = table; r->setup; r++) { + err = r->setup(bp, r); + if (err) + break; + } + return err; +} + +static ssize_t +serialnum_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct ptp_ocp *bp = dev_get_drvdata(dev); + + if (!bp->has_serial) + ptp_ocp_get_serial_number(bp); + + return sysfs_emit(buf, "%pM\n", bp->serial); +} +static DEVICE_ATTR_RO(serialnum); + +static ssize_t +gnss_sync_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct ptp_ocp *bp = dev_get_drvdata(dev); + ssize_t ret; + + if (bp->gnss_lost) + ret = sysfs_emit(buf, "LOST @ %ptT\n", &bp->gnss_lost); + else + ret = sysfs_emit(buf, "SYNC\n"); + + return ret; +} +static DEVICE_ATTR_RO(gnss_sync); + +static ssize_t +clock_source_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct ptp_ocp *bp = dev_get_drvdata(dev); + const char *p; + u32 select; + + select = ioread32(&bp->reg->select); + p = ptp_ocp_clock_name_from_val(select >> 16); + + return sysfs_emit(buf, "%s\n", p); +} + +static ssize_t +clock_source_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ptp_ocp *bp = dev_get_drvdata(dev); + unsigned long flags; + int val; + + val = ptp_ocp_clock_val_from_name(buf); + if (val < 0) + return val; + + spin_lock_irqsave(&bp->lock, flags); + iowrite32(val, &bp->reg->select); + spin_unlock_irqrestore(&bp->lock, flags); + + return count; +} +static DEVICE_ATTR_RW(clock_source); + +static ssize_t +available_clock_sources_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const char *clk; + ssize_t count; + int i; + + count = 0; + for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++) { + clk = ptp_ocp_clock[i].name; + count += sysfs_emit_at(buf, count, "%s ", clk); + } + if (count) + count--; + count += sysfs_emit_at(buf, count, "\n"); + return count; +} +static DEVICE_ATTR_RO(available_clock_sources); + +static struct attribute *timecard_attrs[] = { + &dev_attr_serialnum.attr, + &dev_attr_gnss_sync.attr, + &dev_attr_clock_source.attr, + &dev_attr_available_clock_sources.attr, + NULL, +}; +ATTRIBUTE_GROUPS(timecard); + +static void +ptp_ocp_dev_release(struct device *dev) +{ + struct ptp_ocp *bp = dev_get_drvdata(dev); + + mutex_lock(&ptp_ocp_lock); + idr_remove(&ptp_ocp_idr, bp->id); + mutex_unlock(&ptp_ocp_lock); +} + +static int +ptp_ocp_device_init(struct ptp_ocp *bp, struct pci_dev *pdev) +{ + int err; + + mutex_lock(&ptp_ocp_lock); + err = idr_alloc(&ptp_ocp_idr, bp, 0, 0, GFP_KERNEL); + mutex_unlock(&ptp_ocp_lock); + if (err < 0) { + dev_err(&pdev->dev, "idr_alloc failed: %d\n", err); + return err; } - bp->reg = bp->base + OCP_REGISTER_OFFSET; - bp->tod = bp->base + TOD_REGISTER_OFFSET; + bp->id = err; + bp->ptp_info = ptp_ocp_clock_info; spin_lock_init(&bp->lock); + bp->gnss_port = -1; + bp->mac_port = -1; + bp->pdev = pdev; + + device_initialize(&bp->dev); + dev_set_name(&bp->dev, "ocp%d", bp->id); + bp->dev.class = &timecard_class; + bp->dev.parent = &pdev->dev; + bp->dev.release = ptp_ocp_dev_release; + dev_set_drvdata(&bp->dev, bp); + + err = device_add(&bp->dev); + if (err) { + dev_err(&bp->dev, "device add failed: %d\n", err); + goto out; + } + + pci_set_drvdata(pdev, bp); + + return 0; + +out: + ptp_ocp_dev_release(&bp->dev); + put_device(&bp->dev); + return err; +} + +static void +ptp_ocp_symlink(struct ptp_ocp *bp, struct device *child, const char *link) +{ + struct device *dev = &bp->dev; + + if (sysfs_create_link(&dev->kobj, &child->kobj, link)) + dev_err(dev, "%s symlink failed\n", link); +} + +static void +ptp_ocp_link_child(struct ptp_ocp *bp, const char *name, const char *link) +{ + struct device *dev, *child; + + dev = &bp->pdev->dev; + + child = device_find_child_by_name(dev, name); + if (!child) { + dev_err(dev, "Could not find device %s\n", name); + return; + } + + ptp_ocp_symlink(bp, child, link); + put_device(child); +} + +static int +ptp_ocp_complete(struct ptp_ocp *bp) +{ + struct pps_device *pps; + char buf[32]; + + if (bp->gnss_port != -1) { + sprintf(buf, "ttyS%d", bp->gnss_port); + ptp_ocp_link_child(bp, buf, "ttyGNSS"); + } + if (bp->mac_port != -1) { + sprintf(buf, "ttyS%d", bp->mac_port); + ptp_ocp_link_child(bp, buf, "ttyMAC"); + } + sprintf(buf, "ptp%d", ptp_clock_index(bp->ptp)); + ptp_ocp_link_child(bp, buf, "ptp"); + + pps = pps_lookup_dev(bp->ptp); + if (pps) + ptp_ocp_symlink(bp, pps->dev, "pps"); + + if (device_add_groups(&bp->dev, timecard_groups)) + pr_err("device add groups failed\n"); - err = ptp_ocp_check_clock(bp); + return 0; +} + +static void +ptp_ocp_resource_summary(struct ptp_ocp *bp) +{ + struct device *dev = &bp->pdev->dev; + + if (bp->image) { + u32 ver = ioread32(&bp->image->version); + + dev_info(dev, "version %x\n", ver); + if (ver & 0xffff) + dev_info(dev, "regular image, version %d\n", + ver & 0xffff); + else + dev_info(dev, "golden image, version %d\n", + ver >> 16); + } + if (bp->gnss_port != -1) + dev_info(dev, "GNSS @ /dev/ttyS%d 115200\n", bp->gnss_port); + if (bp->mac_port != -1) + dev_info(dev, "MAC @ /dev/ttyS%d 57600\n", bp->mac_port); +} + +static void +ptp_ocp_detach_sysfs(struct ptp_ocp *bp) +{ + struct device *dev = &bp->dev; + + sysfs_remove_link(&dev->kobj, "ttyGNSS"); + sysfs_remove_link(&dev->kobj, "ttyMAC"); + sysfs_remove_link(&dev->kobj, "ptp"); + sysfs_remove_link(&dev->kobj, "pps"); + device_remove_groups(dev, timecard_groups); +} + +static void +ptp_ocp_detach(struct ptp_ocp *bp) +{ + ptp_ocp_detach_sysfs(bp); + if (timer_pending(&bp->watchdog)) + del_timer_sync(&bp->watchdog); + if (bp->ts0) + ptp_ocp_unregister_ext(bp->ts0); + if (bp->ts1) + ptp_ocp_unregister_ext(bp->ts1); + if (bp->pps) + ptp_ocp_unregister_ext(bp->pps); + if (bp->gnss_port != -1) + serial8250_unregister_port(bp->gnss_port); + if (bp->mac_port != -1) + serial8250_unregister_port(bp->mac_port); + if (bp->spi_flash) + platform_device_unregister(bp->spi_flash); + if (bp->i2c_ctrl) + platform_device_unregister(bp->i2c_ctrl); + if (bp->i2c_clk) + clk_hw_unregister_fixed_rate(bp->i2c_clk); + if (bp->n_irqs) + pci_free_irq_vectors(bp->pdev); + if (bp->ptp) + ptp_clock_unregister(bp->ptp); + device_unregister(&bp->dev); +} + +static int +ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct devlink *devlink; + struct ptp_ocp *bp; + int err; + + devlink = devlink_alloc(&ptp_ocp_devlink_ops, sizeof(*bp), &pdev->dev); + if (!devlink) { + dev_err(&pdev->dev, "devlink_alloc failed\n"); + return -ENOMEM; + } + + err = devlink_register(devlink); + if (err) + goto out_free; + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "pci_enable_device\n"); + goto out_unregister; + } + + bp = devlink_priv(devlink); + err = ptp_ocp_device_init(bp, pdev); + if (err) + goto out_unregister; + + /* compat mode. + * Older FPGA firmware only returns 2 irq's. + * allow this - if not all of the IRQ's are returned, skip the + * extra devices and just register the clock. + */ + err = pci_alloc_irq_vectors(pdev, 1, 10, PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (err < 0) { + dev_err(&pdev->dev, "alloc_irq_vectors err: %d\n", err); + goto out; + } + bp->n_irqs = err; + pci_set_master(pdev); + + err = ptp_ocp_register_resources(bp, id->driver_data); if (err) goto out; bp->ptp = ptp_clock_register(&bp->ptp_info, &pdev->dev); if (IS_ERR(bp->ptp)) { - dev_err(&pdev->dev, "ptp_clock_register\n"); err = PTR_ERR(bp->ptp); + dev_err(&pdev->dev, "ptp_clock_register: %d\n", err); + bp->ptp = NULL; goto out; } + err = ptp_ocp_complete(bp); + if (err) + goto out; + ptp_ocp_info(bp); + ptp_ocp_resource_summary(bp); return 0; out: - pci_iounmap(pdev, bp->base); -out_release_regions: - pci_release_regions(pdev); -out_disable: + ptp_ocp_detach(bp); pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +out_unregister: + devlink_unregister(devlink); out_free: - kfree(bp); + devlink_free(devlink); return err; } @@ -362,13 +1490,14 @@ static void ptp_ocp_remove(struct pci_dev *pdev) { struct ptp_ocp *bp = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(bp); - ptp_clock_unregister(bp->ptp); - pci_iounmap(pdev, bp->base); - pci_release_regions(pdev); + ptp_ocp_detach(bp); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); - kfree(bp); + + devlink_unregister(devlink); + devlink_free(devlink); } static struct pci_driver ptp_ocp_driver = { @@ -378,19 +1507,84 @@ static struct pci_driver ptp_ocp_driver = { .remove = ptp_ocp_remove, }; +static int +ptp_ocp_i2c_notifier_call(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev, *child = data; + struct ptp_ocp *bp; + bool add; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + case BUS_NOTIFY_DEL_DEVICE: + add = action == BUS_NOTIFY_ADD_DEVICE; + break; + default: + return 0; + } + + if (!i2c_verify_adapter(child)) + return 0; + + dev = child; + while ((dev = dev->parent)) + if (dev->driver && !strcmp(dev->driver->name, KBUILD_MODNAME)) + goto found; + return 0; + +found: + bp = dev_get_drvdata(dev); + if (add) + ptp_ocp_symlink(bp, child, "i2c"); + else + sysfs_remove_link(&bp->dev.kobj, "i2c"); + + return 0; +} + +static struct notifier_block ptp_ocp_i2c_notifier = { + .notifier_call = ptp_ocp_i2c_notifier_call, +}; + static int __init ptp_ocp_init(void) { + const char *what; int err; + what = "timecard class"; + err = class_register(&timecard_class); + if (err) + goto out; + + what = "i2c notifier"; + err = bus_register_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier); + if (err) + goto out_notifier; + + what = "ptp_ocp driver"; err = pci_register_driver(&ptp_ocp_driver); + if (err) + goto out_register; + + return 0; + +out_register: + bus_unregister_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier); +out_notifier: + class_unregister(&timecard_class); +out: + pr_err(KBUILD_MODNAME ": failed to register %s: %d\n", what, err); return err; } static void __exit ptp_ocp_fini(void) { + bus_unregister_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier); pci_unregister_driver(&ptp_ocp_driver); + class_unregister(&timecard_class); } module_init(ptp_ocp_init); diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig index cff91b4f1a76..9c67b97faba2 100644 --- a/drivers/s390/net/Kconfig +++ b/drivers/s390/net/Kconfig @@ -74,6 +74,7 @@ config QETH_L2 def_tristate y prompt "qeth layer 2 device support" depends on QETH + depends on BRIDGE || BRIDGE=n help Select this option to be able to run qeth devices in layer 2 mode. To compile as a module, choose M. The module name is qeth_l2. diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 69afc0311dd1..4871f712b874 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -717,6 +717,227 @@ static int qeth_l2_dev2br_an_set(struct qeth_card *card, bool enable) return rc; } +struct qeth_l2_br2dev_event_work { + struct work_struct work; + struct net_device *br_dev; + struct net_device *lsync_dev; + struct net_device *dst_dev; + unsigned long event; + unsigned char addr[ETH_ALEN]; +}; + +static const struct net_device_ops qeth_l2_netdev_ops; + +static bool qeth_l2_must_learn(struct net_device *netdev, + struct net_device *dstdev) +{ + struct qeth_priv *priv; + + priv = netdev_priv(netdev); + return (netdev != dstdev && + (priv->brport_features & BR_LEARNING_SYNC) && + !(br_port_flag_is_set(netdev, BR_ISOLATED) && + br_port_flag_is_set(dstdev, BR_ISOLATED)) && + netdev->netdev_ops == &qeth_l2_netdev_ops); +} + +/** + * qeth_l2_br2dev_worker() - update local MACs + * @work: bridge to device FDB update + * + * Update local MACs of a learning_sync bridgeport so it can receive + * messages for a destination port. + * In case of an isolated learning_sync port, also update its isolated + * siblings. + */ +static void qeth_l2_br2dev_worker(struct work_struct *work) +{ + struct qeth_l2_br2dev_event_work *br2dev_event_work = + container_of(work, struct qeth_l2_br2dev_event_work, work); + struct net_device *lsyncdev = br2dev_event_work->lsync_dev; + struct net_device *dstdev = br2dev_event_work->dst_dev; + struct net_device *brdev = br2dev_event_work->br_dev; + unsigned long event = br2dev_event_work->event; + unsigned char *addr = br2dev_event_work->addr; + struct qeth_card *card = lsyncdev->ml_priv; + struct net_device *lowerdev; + struct list_head *iter; + int err = 0; + + kfree(br2dev_event_work); + QETH_CARD_TEXT_(card, 4, "b2dw%04x", event); + QETH_CARD_TEXT_(card, 4, "ma%012lx", ether_addr_to_u64(addr)); + + rcu_read_lock(); + /* Verify preconditions are still valid: */ + if (!netif_is_bridge_port(lsyncdev) || + brdev != netdev_master_upper_dev_get_rcu(lsyncdev)) + goto unlock; + if (!qeth_l2_must_learn(lsyncdev, dstdev)) + goto unlock; + + if (br_port_flag_is_set(lsyncdev, BR_ISOLATED)) { + /* Update lsyncdev and its isolated sibling(s): */ + iter = &brdev->adj_list.lower; + lowerdev = netdev_next_lower_dev_rcu(brdev, &iter); + while (lowerdev) { + if (br_port_flag_is_set(lowerdev, BR_ISOLATED)) { + switch (event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + err = dev_uc_add(lowerdev, addr); + break; + case SWITCHDEV_FDB_DEL_TO_DEVICE: + err = dev_uc_del(lowerdev, addr); + break; + default: + break; + } + if (err) { + QETH_CARD_TEXT(card, 2, "b2derris"); + QETH_CARD_TEXT_(card, 2, + "err%02x%03d", event, + lowerdev->ifindex); + } + } + lowerdev = netdev_next_lower_dev_rcu(brdev, &iter); + } + } else { + switch (event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + err = dev_uc_add(lsyncdev, addr); + break; + case SWITCHDEV_FDB_DEL_TO_DEVICE: + err = dev_uc_del(lsyncdev, addr); + break; + default: + break; + } + if (err) + QETH_CARD_TEXT_(card, 2, "b2derr%02x", event); + } + +unlock: + rcu_read_unlock(); + dev_put(brdev); + dev_put(lsyncdev); + dev_put(dstdev); +} + +static int qeth_l2_br2dev_queue_work(struct net_device *brdev, + struct net_device *lsyncdev, + struct net_device *dstdev, + unsigned long event, + const unsigned char *addr) +{ + struct qeth_l2_br2dev_event_work *worker_data; + struct qeth_card *card; + + worker_data = kzalloc(sizeof(*worker_data), GFP_ATOMIC); + if (!worker_data) + return -ENOMEM; + INIT_WORK(&worker_data->work, qeth_l2_br2dev_worker); + worker_data->br_dev = brdev; + worker_data->lsync_dev = lsyncdev; + worker_data->dst_dev = dstdev; + worker_data->event = event; + ether_addr_copy(worker_data->addr, addr); + + card = lsyncdev->ml_priv; + /* Take a reference on the sw port devices and the bridge */ + dev_hold(brdev); + dev_hold(lsyncdev); + dev_hold(dstdev); + queue_work(card->event_wq, &worker_data->work); + return 0; +} + +/* Called under rtnl_lock */ +static int qeth_l2_switchdev_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dstdev, *brdev, *lowerdev; + struct switchdev_notifier_fdb_info *fdb_info; + struct switchdev_notifier_info *info = ptr; + struct list_head *iter; + struct qeth_card *card; + int rc; + + if (!(event == SWITCHDEV_FDB_ADD_TO_DEVICE || + event == SWITCHDEV_FDB_DEL_TO_DEVICE)) + return NOTIFY_DONE; + + dstdev = switchdev_notifier_info_to_dev(info); + brdev = netdev_master_upper_dev_get_rcu(dstdev); + if (!brdev || !netif_is_bridge_master(brdev)) + return NOTIFY_DONE; + fdb_info = container_of(info, + struct switchdev_notifier_fdb_info, + info); + iter = &brdev->adj_list.lower; + lowerdev = netdev_next_lower_dev_rcu(brdev, &iter); + while (lowerdev) { + if (qeth_l2_must_learn(lowerdev, dstdev)) { + card = lowerdev->ml_priv; + QETH_CARD_TEXT_(card, 4, "b2dqw%03x", event); + rc = qeth_l2_br2dev_queue_work(brdev, lowerdev, + dstdev, event, + fdb_info->addr); + if (rc) { + QETH_CARD_TEXT(card, 2, "b2dqwerr"); + return NOTIFY_BAD; + } + } + lowerdev = netdev_next_lower_dev_rcu(brdev, &iter); + } + return NOTIFY_DONE; +} + +static struct notifier_block qeth_l2_sw_notifier = { + .notifier_call = qeth_l2_switchdev_event, +}; + +static refcount_t qeth_l2_switchdev_notify_refcnt; + +/* Called under rtnl_lock */ +static void qeth_l2_br2dev_get(void) +{ + int rc; + + if (!refcount_inc_not_zero(&qeth_l2_switchdev_notify_refcnt)) { + rc = register_switchdev_notifier(&qeth_l2_sw_notifier); + if (rc) { + QETH_DBF_MESSAGE(2, + "failed to register qeth_l2_sw_notifier: %d\n", + rc); + } else { + refcount_set(&qeth_l2_switchdev_notify_refcnt, 1); + QETH_DBF_MESSAGE(2, "qeth_l2_sw_notifier registered\n"); + } + } + QETH_DBF_TEXT_(SETUP, 2, "b2d+%04d", + qeth_l2_switchdev_notify_refcnt.refs.counter); +} + +/* Called under rtnl_lock */ +static void qeth_l2_br2dev_put(void) +{ + int rc; + + if (refcount_dec_and_test(&qeth_l2_switchdev_notify_refcnt)) { + rc = unregister_switchdev_notifier(&qeth_l2_sw_notifier); + if (rc) { + QETH_DBF_MESSAGE(2, + "failed to unregister qeth_l2_sw_notifier: %d\n", + rc); + } else { + QETH_DBF_MESSAGE(2, + "qeth_l2_sw_notifier unregistered\n"); + } + } + QETH_DBF_TEXT_(SETUP, 2, "b2d-%04d", + qeth_l2_switchdev_notify_refcnt.refs.counter); +} + static int qeth_l2_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask, int nlflags) @@ -810,16 +1031,19 @@ static int qeth_l2_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, } else if (enable) { qeth_l2_set_pnso_mode(card, QETH_PNSO_ADDR_INFO); rc = qeth_l2_dev2br_an_set(card, true); - if (rc) + if (rc) { qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE); - else + } else { priv->brport_features |= BR_LEARNING_SYNC; + qeth_l2_br2dev_get(); + } } else { rc = qeth_l2_dev2br_an_set(card, false); if (!rc) { qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE); priv->brport_features ^= BR_LEARNING_SYNC; qeth_l2_dev2br_fdb_flush(card); + qeth_l2_br2dev_put(); } } mutex_unlock(&card->sbp_lock); @@ -2072,6 +2296,7 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev) static void qeth_l2_remove_device(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); + struct qeth_priv *priv; if (gdev->dev.type != &qeth_l2_devtype) device_remove_groups(&gdev->dev, qeth_l2_attr_groups); @@ -2083,8 +2308,15 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev) qeth_set_offline(card, card->discipline, false); cancel_work_sync(&card->close_dev_work); - if (card->dev->reg_state == NETREG_REGISTERED) + if (card->dev->reg_state == NETREG_REGISTERED) { + priv = netdev_priv(card->dev); + if (priv->brport_features & BR_LEARNING_SYNC) { + rtnl_lock(); + qeth_l2_br2dev_put(); + rtnl_unlock(); + } unregister_netdev(card->dev); + } } static int qeth_l2_set_online(struct qeth_card *card, bool carrier_ok) @@ -2207,6 +2439,7 @@ EXPORT_SYMBOL_GPL(qeth_l2_discipline); static int __init qeth_l2_init(void) { pr_info("register layer 2 discipline\n"); + refcount_set(&qeth_l2_switchdev_notify_refcnt, 0); return 0; } diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 25f6e1ac9e7b..66652ab409cc 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -453,8 +453,8 @@ static int initialize_controller(struct scsi_device *sdev, if (!h->ctlr) err = SCSI_DH_RES_TEMP_UNAVAIL; else { - list_add_rcu(&h->node, &h->ctlr->dh_list); h->sdev = sdev; + list_add_rcu(&h->node, &h->ctlr->dh_list); } spin_unlock(&list_lock); err = SCSI_DH_OK; @@ -778,11 +778,11 @@ static void rdac_bus_detach( struct scsi_device *sdev ) spin_lock(&list_lock); if (h->ctlr) { list_del_rcu(&h->node); - h->sdev = NULL; kref_put(&h->ctlr->kref, release_controller); } spin_unlock(&list_lock); sdev->handler_data = NULL; + synchronize_rcu(); kfree(h); } diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index bee1bec49c09..935b01ee44b7 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -807,6 +807,13 @@ static int ibmvfc_init_event_pool(struct ibmvfc_host *vhost, for (i = 0; i < size; ++i) { struct ibmvfc_event *evt = &pool->events[i]; + /* + * evt->active states + * 1 = in flight + * 0 = being completed + * -1 = free/freed + */ + atomic_set(&evt->active, -1); atomic_set(&evt->free, 1); evt->crq.valid = 0x80; evt->crq.ioba = cpu_to_be64(pool->iu_token + (sizeof(*evt->xfer_iu) * i)); @@ -1017,6 +1024,7 @@ static void ibmvfc_free_event(struct ibmvfc_event *evt) BUG_ON(!ibmvfc_valid_event(pool, evt)); BUG_ON(atomic_inc_return(&evt->free) != 1); + BUG_ON(atomic_dec_and_test(&evt->active)); spin_lock_irqsave(&evt->queue->l_lock, flags); list_add_tail(&evt->queue_list, &evt->queue->free); @@ -1072,6 +1080,12 @@ static void ibmvfc_complete_purge(struct list_head *purge_list) **/ static void ibmvfc_fail_request(struct ibmvfc_event *evt, int error_code) { + /* + * Anything we are failing should still be active. Otherwise, it + * implies we already got a response for the command and are doing + * something bad like double completing it. + */ + BUG_ON(!atomic_dec_and_test(&evt->active)); if (evt->cmnd) { evt->cmnd->result = (error_code << 16); evt->done = ibmvfc_scsi_eh_done; @@ -1723,6 +1737,7 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt, evt->done(evt); } else { + atomic_set(&evt->active, 1); spin_unlock_irqrestore(&evt->queue->l_lock, flags); ibmvfc_trc_start(evt); } @@ -3251,7 +3266,7 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost, return; } - if (unlikely(atomic_read(&evt->free))) { + if (unlikely(atomic_dec_if_positive(&evt->active))) { dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n", crq->ioba); return; @@ -3778,7 +3793,7 @@ static void ibmvfc_handle_scrq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost return; } - if (unlikely(atomic_read(&evt->free))) { + if (unlikely(atomic_dec_if_positive(&evt->active))) { dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n", crq->ioba); return; diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h index 4f0f3baefae4..92fb889d7eb0 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.h +++ b/drivers/scsi/ibmvscsi/ibmvfc.h @@ -745,6 +745,7 @@ struct ibmvfc_event { struct ibmvfc_target *tgt; struct scsi_cmnd *cmnd; atomic_t free; + atomic_t active; union ibmvfc_iu *xfer_iu; void (*done)(struct ibmvfc_event *evt); void (*_done)(struct ibmvfc_event *evt); diff --git a/drivers/scsi/megaraid/megaraid_mm.c b/drivers/scsi/megaraid/megaraid_mm.c index abf7b401f5b9..c509440bd161 100644 --- a/drivers/scsi/megaraid/megaraid_mm.c +++ b/drivers/scsi/megaraid/megaraid_mm.c @@ -238,7 +238,7 @@ mraid_mm_get_adapter(mimd_t __user *umimd, int *rval) mimd_t mimd; uint32_t adapno; int iterator; - + bool is_found; if (copy_from_user(&mimd, umimd, sizeof(mimd_t))) { *rval = -EFAULT; @@ -254,12 +254,16 @@ mraid_mm_get_adapter(mimd_t __user *umimd, int *rval) adapter = NULL; iterator = 0; + is_found = false; list_for_each_entry(adapter, &adapters_list_g, list) { - if (iterator++ == adapno) break; + if (iterator++ == adapno) { + is_found = true; + break; + } } - if (!adapter) { + if (!is_found) { *rval = -ENODEV; return NULL; } @@ -725,6 +729,7 @@ ioctl_done(uioc_t *kioc) uint32_t adapno; int iterator; mraid_mmadp_t* adapter; + bool is_found; /* * When the kioc returns from driver, make sure it still doesn't @@ -747,19 +752,23 @@ ioctl_done(uioc_t *kioc) iterator = 0; adapter = NULL; adapno = kioc->adapno; + is_found = false; con_log(CL_ANN, ( KERN_WARNING "megaraid cmm: completed " "ioctl that was timedout before\n")); list_for_each_entry(adapter, &adapters_list_g, list) { - if (iterator++ == adapno) break; + if (iterator++ == adapno) { + is_found = true; + break; + } } kioc->timedout = 0; - if (adapter) { + if (is_found) mraid_mm_dealloc_kioc( adapter, kioc ); - } + } else { wake_up(&wait_q); diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index 48548a95327b..32e60f0c3b14 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -684,8 +684,7 @@ int pm8001_dev_found(struct domain_device *dev) void pm8001_task_done(struct sas_task *task) { - if (!del_timer(&task->slow_task->timer)) - return; + del_timer(&task->slow_task->timer); complete(&task->slow_task->completion); } @@ -693,9 +692,14 @@ static void pm8001_tmf_timedout(struct timer_list *t) { struct sas_task_slow *slow = from_timer(slow, t, timer); struct sas_task *task = slow->task; + unsigned long flags; - task->task_state_flags |= SAS_TASK_STATE_ABORTED; - complete(&task->slow_task->completion); + spin_lock_irqsave(&task->task_state_lock, flags); + if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { + task->task_state_flags |= SAS_TASK_STATE_ABORTED; + complete(&task->slow_task->completion); + } + spin_unlock_irqrestore(&task->task_state_lock, flags); } #define PM8001_TASK_TIMEOUT 20 @@ -748,13 +752,10 @@ static int pm8001_exec_internal_tmf_task(struct domain_device *dev, } res = -TMF_RESP_FUNC_FAILED; /* Even TMF timed out, return direct. */ - if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) { - if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { - pm8001_dbg(pm8001_ha, FAIL, - "TMF task[%x]timeout.\n", - tmf->tmf); - goto ex_err; - } + if (task->task_state_flags & SAS_TASK_STATE_ABORTED) { + pm8001_dbg(pm8001_ha, FAIL, "TMF task[%x]timeout.\n", + tmf->tmf); + goto ex_err; } if (task->task_status.resp == SAS_TASK_COMPLETE && @@ -834,12 +835,9 @@ pm8001_exec_internal_task_abort(struct pm8001_hba_info *pm8001_ha, wait_for_completion(&task->slow_task->completion); res = TMF_RESP_FUNC_FAILED; /* Even TMF timed out, return direct. */ - if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) { - if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { - pm8001_dbg(pm8001_ha, FAIL, - "TMF task timeout.\n"); - goto ex_err; - } + if (task->task_state_flags & SAS_TASK_STATE_ABORTED) { + pm8001_dbg(pm8001_ha, FAIL, "TMF task timeout.\n"); + goto ex_err; } if (task->task_status.resp == SAS_TASK_COMPLETE && diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index b059bf2b61d4..5b6996a2401b 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -475,7 +475,8 @@ static struct scsi_target *scsi_alloc_target(struct device *parent, error = shost->hostt->target_alloc(starget); if(error) { - dev_printk(KERN_ERR, dev, "target allocation failed, error %d\n", error); + if (error != -ENXIO) + dev_err(dev, "target allocation failed, error %d\n", error); /* don't want scsi_target_reap to do the final * put because it will be under the host lock */ scsi_target_destroy(starget); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 32489d25158f..ae9bfc658203 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -807,11 +807,14 @@ store_state_field(struct device *dev, struct device_attribute *attr, mutex_lock(&sdev->state_mutex); ret = scsi_device_set_state(sdev, state); /* - * If the device state changes to SDEV_RUNNING, we need to run - * the queue to avoid I/O hang. + * If the device state changes to SDEV_RUNNING, we need to + * rescan the device to revalidate it, and run the queue to + * avoid I/O hang. */ - if (ret == 0 && state == SDEV_RUNNING) + if (ret == 0 && state == SDEV_RUNNING) { + scsi_rescan_device(dev); blk_mq_run_hw_queues(sdev->request_queue, true); + } mutex_unlock(&sdev->state_mutex); return ret == 0 ? count : -EINVAL; diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 94c254e9012e..a6d3ac0a6cbc 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -221,7 +221,7 @@ static unsigned int sr_get_events(struct scsi_device *sdev) else if (med->media_event_code == 2) return DISK_EVENT_MEDIA_CHANGE; else if (med->media_event_code == 3) - return DISK_EVENT_EJECT_REQUEST; + return DISK_EVENT_MEDIA_CHANGE; return 0; } diff --git a/drivers/staging/qlge/qlge_main.c b/drivers/staging/qlge/qlge_main.c index 19a02e958865..8fcdf89da8aa 100644 --- a/drivers/staging/qlge/qlge_main.c +++ b/drivers/staging/qlge/qlge_main.c @@ -4547,7 +4547,8 @@ static int qlge_probe(struct pci_dev *pdev, static int cards_found; int err; - devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter)); + devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter), + &pdev->dev); if (!devlink) return -ENOMEM; @@ -4613,7 +4614,7 @@ static int qlge_probe(struct pci_dev *pdev, goto netdev_free; } - err = devlink_register(devlink, &pdev->dev); + err = devlink_register(devlink); if (err) goto netdev_free; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index cd108607a070..0a72840a88f1 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -4619,7 +4619,7 @@ read_complete: static int cifs_readpage(struct file *file, struct page *page) { - loff_t offset = (loff_t)page->index << PAGE_SHIFT; + loff_t offset = page_file_offset(page); int rc = -EACCES; unsigned int xid; diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 9a59d7ff9a11..eed59bc1d913 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -925,6 +925,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->cred_uid = uid; ctx->cruid_specified = true; break; + case Opt_backupuid: + uid = make_kuid(current_user_ns(), result.uint_32); + if (!uid_valid(uid)) + goto cifs_parse_mount_err; + ctx->backupuid = uid; + ctx->backupuid_specified = true; + break; case Opt_backupgid: gid = make_kgid(current_user_ns(), result.uint_32); if (!gid_valid(gid)) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 23d6f4d71649..2dfd0d8297eb 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3617,7 +3617,8 @@ static int smb3_simple_fallocate_write_range(unsigned int xid, char *buf) { struct cifs_io_parms io_parms = {0}; - int rc, nbytes; + int nbytes; + int rc = 0; struct kvec iov[2]; io_parms.netfid = cfile->fid.netfid; diff --git a/fs/pipe.c b/fs/pipe.c index 9ef4231cce61..8e6ef62aeb1c 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -32,6 +32,21 @@ #include "internal.h" /* + * New pipe buffers will be restricted to this size while the user is exceeding + * their pipe buffer quota. The general pipe use case needs at least two + * buffers: one for data yet to be read, and one for new data. If this is less + * than two, then a write to a non-empty pipe may block even if the pipe is not + * full. This can occur with GNU make jobserver or similar uses of pipes as + * semaphores: multiple processes may be waiting to write tokens back to the + * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/. + * + * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their + * own risk, namely: pipe writes to non-full pipes may block until the pipe is + * emptied. + */ +#define PIPE_MIN_DEF_BUFFERS 2 + +/* * The max size that a non-root user is allowed to grow the pipe. Can * be set by root in /proc/sys/fs/pipe-max-size */ @@ -781,8 +796,8 @@ struct pipe_inode_info *alloc_pipe_info(void) user_bufs = account_pipe_buffers(user, 0, pipe_bufs); if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) { - user_bufs = account_pipe_buffers(user, pipe_bufs, 1); - pipe_bufs = 1; + user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS); + pipe_bufs = PIPE_MIN_DEF_BUFFERS; } if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user()) diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index d548ea4b6aab..2c5bcbc19264 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -411,7 +411,16 @@ struct xfs_log_dinode { /* start of the extended dinode, writable fields */ uint32_t di_crc; /* CRC of the inode */ uint64_t di_changecount; /* number of attribute changes */ - xfs_lsn_t di_lsn; /* flush sequence */ + + /* + * The LSN we write to this field during formatting is not a reflection + * of the current on-disk LSN. It should never be used for recovery + * sequencing, nor should it be recovered into the on-disk inode at all. + * See xlog_recover_inode_commit_pass2() and xfs_log_dinode_to_disk() + * for details. + */ + xfs_lsn_t di_lsn; + uint64_t di_flags2; /* more random flags */ uint32_t di_cowextsize; /* basic cow extent size for file */ uint8_t di_pad2[12]; /* more padding for future expansion */ diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index d44e8b4a3391..4775485b4062 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -698,7 +698,8 @@ xlog_recover_do_inode_buffer( static xfs_lsn_t xlog_recover_get_buf_lsn( struct xfs_mount *mp, - struct xfs_buf *bp) + struct xfs_buf *bp, + struct xfs_buf_log_format *buf_f) { uint32_t magic32; uint16_t magic16; @@ -706,11 +707,20 @@ xlog_recover_get_buf_lsn( void *blk = bp->b_addr; uuid_t *uuid; xfs_lsn_t lsn = -1; + uint16_t blft; /* v4 filesystems always recover immediately */ if (!xfs_sb_version_hascrc(&mp->m_sb)) goto recover_immediately; + /* + * realtime bitmap and summary file blocks do not have magic numbers or + * UUIDs, so we must recover them immediately. + */ + blft = xfs_blft_from_flags(buf_f); + if (blft == XFS_BLFT_RTBITMAP_BUF || blft == XFS_BLFT_RTSUMMARY_BUF) + goto recover_immediately; + magic32 = be32_to_cpu(*(__be32 *)blk); switch (magic32) { case XFS_ABTB_CRC_MAGIC: @@ -796,6 +806,7 @@ xlog_recover_get_buf_lsn( switch (magicda) { case XFS_DIR3_LEAF1_MAGIC: case XFS_DIR3_LEAFN_MAGIC: + case XFS_ATTR3_LEAF_MAGIC: case XFS_DA3_NODE_MAGIC: lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn); uuid = &((struct xfs_da3_blkinfo *)blk)->uuid; @@ -919,7 +930,7 @@ xlog_recover_buf_commit_pass2( * the verifier will be reset to match whatever recover turns that * buffer into. */ - lsn = xlog_recover_get_buf_lsn(mp, bp); + lsn = xlog_recover_get_buf_lsn(mp, bp, buf_f); if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { trace_xfs_log_recover_buf_skip(log, buf_f); xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN); diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c index 7b79518b6c20..e0072a6cd2d3 100644 --- a/fs/xfs/xfs_inode_item_recover.c +++ b/fs/xfs/xfs_inode_item_recover.c @@ -145,7 +145,8 @@ xfs_log_dinode_to_disk_ts( STATIC void xfs_log_dinode_to_disk( struct xfs_log_dinode *from, - struct xfs_dinode *to) + struct xfs_dinode *to, + xfs_lsn_t lsn) { to->di_magic = cpu_to_be16(from->di_magic); to->di_mode = cpu_to_be16(from->di_mode); @@ -182,7 +183,7 @@ xfs_log_dinode_to_disk( to->di_flags2 = cpu_to_be64(from->di_flags2); to->di_cowextsize = cpu_to_be32(from->di_cowextsize); to->di_ino = cpu_to_be64(from->di_ino); - to->di_lsn = cpu_to_be64(from->di_lsn); + to->di_lsn = cpu_to_be64(lsn); memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); uuid_copy(&to->di_uuid, &from->di_uuid); to->di_flushiter = 0; @@ -261,16 +262,25 @@ xlog_recover_inode_commit_pass2( } /* - * If the inode has an LSN in it, recover the inode only if it's less - * than the lsn of the transaction we are replaying. Note: we still - * need to replay an owner change even though the inode is more recent - * than the transaction as there is no guarantee that all the btree - * blocks are more recent than this transaction, too. + * If the inode has an LSN in it, recover the inode only if the on-disk + * inode's LSN is older than the lsn of the transaction we are + * replaying. We can have multiple checkpoints with the same start LSN, + * so the current LSN being equal to the on-disk LSN doesn't necessarily + * mean that the on-disk inode is more recent than the change being + * replayed. + * + * We must check the current_lsn against the on-disk inode + * here because the we can't trust the log dinode to contain a valid LSN + * (see comment below before replaying the log dinode for details). + * + * Note: we still need to replay an owner change even though the inode + * is more recent than the transaction as there is no guarantee that all + * the btree blocks are more recent than this transaction, too. */ if (dip->di_version >= 3) { xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn); - if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { + if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) > 0) { trace_xfs_log_recover_inode_skip(log, in_f); error = 0; goto out_owner_change; @@ -368,8 +378,17 @@ xlog_recover_inode_commit_pass2( goto out_release; } - /* recover the log dinode inode into the on disk inode */ - xfs_log_dinode_to_disk(ldip, dip); + /* + * Recover the log dinode inode into the on disk inode. + * + * The LSN in the log dinode is garbage - it can be zero or reflect + * stale in-memory runtime state that isn't coherent with the changes + * logged in this transaction or the changes written to the on-disk + * inode. Hence we write the current lSN into the inode because that + * matches what xfs_iflush() would write inode the inode when flushing + * the changes in this transaction. + */ + xfs_log_dinode_to_disk(ldip, dip, current_lsn); fields = in_f->ilf_fields; if (fields & XFS_ILOG_DEV) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 36fa2650b081..60ac5fd63f1e 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -78,13 +78,12 @@ xlog_verify_iclog( STATIC void xlog_verify_tail_lsn( struct xlog *log, - struct xlog_in_core *iclog, - xfs_lsn_t tail_lsn); + struct xlog_in_core *iclog); #else #define xlog_verify_dest_ptr(a,b) #define xlog_verify_grant_tail(a) #define xlog_verify_iclog(a,b,c) -#define xlog_verify_tail_lsn(a,b,c) +#define xlog_verify_tail_lsn(a,b) #endif STATIC int @@ -487,51 +486,80 @@ out_error: return error; } -static bool -__xlog_state_release_iclog( - struct xlog *log, - struct xlog_in_core *iclog) -{ - lockdep_assert_held(&log->l_icloglock); - - if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { - /* update tail before writing to iclog */ - xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp); - - iclog->ic_state = XLOG_STATE_SYNCING; - iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); - xlog_verify_tail_lsn(log, iclog, tail_lsn); - /* cycle incremented when incrementing curr_block */ - trace_xlog_iclog_syncing(iclog, _RET_IP_); - return true; - } - - ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); - return false; -} - /* * Flush iclog to disk if this is the last reference to the given iclog and the * it is in the WANT_SYNC state. + * + * If the caller passes in a non-zero @old_tail_lsn and the current log tail + * does not match, there may be metadata on disk that must be persisted before + * this iclog is written. To satisfy that requirement, set the + * XLOG_ICL_NEED_FLUSH flag as a condition for writing this iclog with the new + * log tail value. + * + * If XLOG_ICL_NEED_FUA is already set on the iclog, we need to ensure that the + * log tail is updated correctly. NEED_FUA indicates that the iclog will be + * written to stable storage, and implies that a commit record is contained + * within the iclog. We need to ensure that the log tail does not move beyond + * the tail that the first commit record in the iclog ordered against, otherwise + * correct recovery of that checkpoint becomes dependent on future operations + * performed on this iclog. + * + * Hence if NEED_FUA is set and the current iclog tail lsn is empty, write the + * current tail into iclog. Once the iclog tail is set, future operations must + * not modify it, otherwise they potentially violate ordering constraints for + * the checkpoint commit that wrote the initial tail lsn value. The tail lsn in + * the iclog will get zeroed on activation of the iclog after sync, so we + * always capture the tail lsn on the iclog on the first NEED_FUA release + * regardless of the number of active reference counts on this iclog. */ + int xlog_state_release_iclog( struct xlog *log, - struct xlog_in_core *iclog) + struct xlog_in_core *iclog, + xfs_lsn_t old_tail_lsn) { + xfs_lsn_t tail_lsn; lockdep_assert_held(&log->l_icloglock); trace_xlog_iclog_release(iclog, _RET_IP_); if (iclog->ic_state == XLOG_STATE_IOERROR) return -EIO; - if (atomic_dec_and_test(&iclog->ic_refcnt) && - __xlog_state_release_iclog(log, iclog)) { - spin_unlock(&log->l_icloglock); - xlog_sync(log, iclog); - spin_lock(&log->l_icloglock); + /* + * Grabbing the current log tail needs to be atomic w.r.t. the writing + * of the tail LSN into the iclog so we guarantee that the log tail does + * not move between deciding if a cache flush is required and writing + * the LSN into the iclog below. + */ + if (old_tail_lsn || iclog->ic_state == XLOG_STATE_WANT_SYNC) { + tail_lsn = xlog_assign_tail_lsn(log->l_mp); + + if (old_tail_lsn && tail_lsn != old_tail_lsn) + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; + + if ((iclog->ic_flags & XLOG_ICL_NEED_FUA) && + !iclog->ic_header.h_tail_lsn) + iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); } + if (!atomic_dec_and_test(&iclog->ic_refcnt)) + return 0; + + if (iclog->ic_state != XLOG_STATE_WANT_SYNC) { + ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); + return 0; + } + + iclog->ic_state = XLOG_STATE_SYNCING; + if (!iclog->ic_header.h_tail_lsn) + iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); + xlog_verify_tail_lsn(log, iclog); + trace_xlog_iclog_syncing(iclog, _RET_IP_); + + spin_unlock(&log->l_icloglock); + xlog_sync(log, iclog); + spin_lock(&log->l_icloglock); return 0; } @@ -774,6 +802,21 @@ xfs_log_mount_cancel( } /* + * Flush out the iclog to disk ensuring that device caches are flushed and + * the iclog hits stable storage before any completion waiters are woken. + */ +static inline int +xlog_force_iclog( + struct xlog_in_core *iclog) +{ + atomic_inc(&iclog->ic_refcnt); + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; + if (iclog->ic_state == XLOG_STATE_ACTIVE) + xlog_state_switch_iclogs(iclog->ic_log, iclog, 0); + return xlog_state_release_iclog(iclog->ic_log, iclog, 0); +} + +/* * Wait for the iclog and all prior iclogs to be written disk as required by the * log force state machine. Waiting on ic_force_wait ensures iclog completions * have been ordered and callbacks run before we are woken here, hence @@ -827,13 +870,6 @@ xlog_write_unmount_record( /* account for space used by record data */ ticket->t_curr_res -= sizeof(ulf); - /* - * For external log devices, we need to flush the data device cache - * first to ensure all metadata writeback is on stable storage before we - * stamp the tail LSN into the unmount record. - */ - if (log->l_targ != log->l_mp->m_ddev_targp) - blkdev_issue_flush(log->l_targ->bt_bdev); return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS); } @@ -865,18 +901,7 @@ out_err: spin_lock(&log->l_icloglock); iclog = log->l_iclog; - atomic_inc(&iclog->ic_refcnt); - if (iclog->ic_state == XLOG_STATE_ACTIVE) - xlog_state_switch_iclogs(log, iclog, 0); - else - ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || - iclog->ic_state == XLOG_STATE_IOERROR); - /* - * Ensure the journal is fully flushed and on stable storage once the - * iclog containing the unmount record is written. - */ - iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); - error = xlog_state_release_iclog(log, iclog); + error = xlog_force_iclog(iclog); xlog_wait_on_iclog(iclog); if (tic) { @@ -1796,10 +1821,20 @@ xlog_write_iclog( * metadata writeback and causing priority inversions. */ iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE; - if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) + if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) { iclog->ic_bio.bi_opf |= REQ_PREFLUSH; + /* + * For external log devices, we also need to flush the data + * device cache first to ensure all metadata writeback covered + * by the LSN in this iclog is on stable storage. This is slow, + * but it *must* complete before we issue the external log IO. + */ + if (log->l_targ != log->l_mp->m_ddev_targp) + blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev); + } if (iclog->ic_flags & XLOG_ICL_NEED_FUA) iclog->ic_bio.bi_opf |= REQ_FUA; + iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) { @@ -2310,7 +2345,7 @@ xlog_write_copy_finish( return 0; release_iclog: - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, 0); spin_unlock(&log->l_icloglock); return error; } @@ -2529,7 +2564,7 @@ next_lv: ASSERT(optype & XLOG_COMMIT_TRANS); *commit_iclog = iclog; } else { - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, 0); } spin_unlock(&log->l_icloglock); @@ -2567,6 +2602,7 @@ xlog_state_activate_iclog( memset(iclog->ic_header.h_cycle_data, 0, sizeof(iclog->ic_header.h_cycle_data)); iclog->ic_header.h_lsn = 0; + iclog->ic_header.h_tail_lsn = 0; } /* @@ -2967,7 +3003,7 @@ restart: * reference to the iclog. */ if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, 0); spin_unlock(&log->l_icloglock); if (error) return error; @@ -3132,6 +3168,35 @@ xlog_state_switch_iclogs( } /* + * Force the iclog to disk and check if the iclog has been completed before + * xlog_force_iclog() returns. This can happen on synchronous (e.g. + * pmem) or fast async storage because we drop the icloglock to issue the IO. + * If completion has already occurred, tell the caller so that it can avoid an + * unnecessary wait on the iclog. + */ +static int +xlog_force_and_check_iclog( + struct xlog_in_core *iclog, + bool *completed) +{ + xfs_lsn_t lsn = be64_to_cpu(iclog->ic_header.h_lsn); + int error; + + *completed = false; + error = xlog_force_iclog(iclog); + if (error) + return error; + + /* + * If the iclog has already been completed and reused the header LSN + * will have been rewritten by completion + */ + if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) + *completed = true; + return 0; +} + +/* * Write out all data in the in-core log as of this exact moment in time. * * Data may be written to the in-core log during this call. However, @@ -3165,7 +3230,6 @@ xfs_log_force( { struct xlog *log = mp->m_log; struct xlog_in_core *iclog; - xfs_lsn_t lsn; XFS_STATS_INC(mp, xs_log_force); trace_xfs_log_force(mp, 0, _RET_IP_); @@ -3193,39 +3257,33 @@ xfs_log_force( iclog = iclog->ic_prev; } else if (iclog->ic_state == XLOG_STATE_ACTIVE) { if (atomic_read(&iclog->ic_refcnt) == 0) { - /* - * We are the only one with access to this iclog. - * - * Flush it out now. There should be a roundoff of zero - * to show that someone has already taken care of the - * roundoff from the previous sync. - */ - atomic_inc(&iclog->ic_refcnt); - lsn = be64_to_cpu(iclog->ic_header.h_lsn); - xlog_state_switch_iclogs(log, iclog, 0); - if (xlog_state_release_iclog(log, iclog)) + /* We have exclusive access to this iclog. */ + bool completed; + + if (xlog_force_and_check_iclog(iclog, &completed)) goto out_error; - if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) + if (completed) goto out_unlock; } else { /* - * Someone else is writing to this iclog. - * - * Use its call to flush out the data. However, the - * other thread may not force out this LR, so we mark - * it WANT_SYNC. + * Someone else is still writing to this iclog, so we + * need to ensure that when they release the iclog it + * gets synced immediately as we may be waiting on it. */ xlog_state_switch_iclogs(log, iclog, 0); } - } else { - /* - * If the head iclog is not active nor dirty, we just attach - * ourselves to the head and go to sleep if necessary. - */ - ; } + /* + * The iclog we are about to wait on may contain the checkpoint pushed + * by the above xlog_cil_force() call, but it may not have been pushed + * to disk yet. Like the ACTIVE case above, we need to make sure caches + * are flushed when this iclog is written. + */ + if (iclog->ic_state == XLOG_STATE_WANT_SYNC) + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; + if (flags & XFS_LOG_SYNC) return xlog_wait_on_iclog(iclog); out_unlock: @@ -3245,6 +3303,7 @@ xlog_force_lsn( bool already_slept) { struct xlog_in_core *iclog; + bool completed; spin_lock(&log->l_icloglock); iclog = log->l_iclog; @@ -3258,7 +3317,8 @@ xlog_force_lsn( goto out_unlock; } - if (iclog->ic_state == XLOG_STATE_ACTIVE) { + switch (iclog->ic_state) { + case XLOG_STATE_ACTIVE: /* * We sleep here if we haven't already slept (e.g. this is the * first time we've looked at the correct iclog buf) and the @@ -3281,12 +3341,31 @@ xlog_force_lsn( &log->l_icloglock); return -EAGAIN; } - atomic_inc(&iclog->ic_refcnt); - xlog_state_switch_iclogs(log, iclog, 0); - if (xlog_state_release_iclog(log, iclog)) + if (xlog_force_and_check_iclog(iclog, &completed)) goto out_error; if (log_flushed) *log_flushed = 1; + if (completed) + goto out_unlock; + break; + case XLOG_STATE_WANT_SYNC: + /* + * This iclog may contain the checkpoint pushed by the + * xlog_cil_force_seq() call, but there are other writers still + * accessing it so it hasn't been pushed to disk yet. Like the + * ACTIVE case above, we need to make sure caches are flushed + * when this iclog is written. + */ + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; + break; + default: + /* + * The entire checkpoint was written by the CIL force and is on + * its way to disk already. It will be stable when it + * completes, so we don't need to manipulate caches here at all. + * We just need to wait for completion if necessary. + */ + break; } if (flags & XFS_LOG_SYNC) @@ -3559,10 +3638,10 @@ xlog_verify_grant_tail( STATIC void xlog_verify_tail_lsn( struct xlog *log, - struct xlog_in_core *iclog, - xfs_lsn_t tail_lsn) + struct xlog_in_core *iclog) { - int blocks; + xfs_lsn_t tail_lsn = be64_to_cpu(iclog->ic_header.h_tail_lsn); + int blocks; if (CYCLE_LSN(tail_lsn) == log->l_prev_cycle) { blocks = diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index b128aaa9b870..4c44bc3786c0 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -654,8 +654,9 @@ xlog_cil_push_work( struct xfs_trans_header thdr; struct xfs_log_iovec lhdr; struct xfs_log_vec lvhdr = { NULL }; + xfs_lsn_t preflush_tail_lsn; xfs_lsn_t commit_lsn; - xfs_lsn_t push_seq; + xfs_csn_t push_seq; struct bio bio; DECLARE_COMPLETION_ONSTACK(bdev_flush); @@ -730,7 +731,15 @@ xlog_cil_push_work( * because we hold the flush lock exclusively. Hence we can now issue * a cache flush to ensure all the completed metadata in the journal we * are about to overwrite is on stable storage. + * + * Because we are issuing this cache flush before we've written the + * tail lsn to the iclog, we can have metadata IO completions move the + * tail forwards between the completion of this flush and the iclog + * being written. In this case, we need to re-issue the cache flush + * before the iclog write. To detect whether the log tail moves, sample + * the tail LSN *before* we issue the flush. */ + preflush_tail_lsn = atomic64_read(&log->l_tail_lsn); xfs_flush_bdev_async(&bio, log->l_mp->m_ddev_targp->bt_bdev, &bdev_flush); @@ -941,7 +950,7 @@ restart: * storage. */ commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA; - xlog_state_release_iclog(log, commit_iclog); + xlog_state_release_iclog(log, commit_iclog, preflush_tail_lsn); spin_unlock(&log->l_icloglock); return; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 4c41bbfa33b0..f3e79a45d60a 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -59,6 +59,16 @@ enum xlog_iclog_state { { XLOG_STATE_DIRTY, "XLOG_STATE_DIRTY" }, \ { XLOG_STATE_IOERROR, "XLOG_STATE_IOERROR" } +/* + * In core log flags + */ +#define XLOG_ICL_NEED_FLUSH (1 << 0) /* iclog needs REQ_PREFLUSH */ +#define XLOG_ICL_NEED_FUA (1 << 1) /* iclog needs REQ_FUA */ + +#define XLOG_ICL_STRINGS \ + { XLOG_ICL_NEED_FLUSH, "XLOG_ICL_NEED_FLUSH" }, \ + { XLOG_ICL_NEED_FUA, "XLOG_ICL_NEED_FUA" } + /* * Log ticket flags @@ -143,9 +153,6 @@ enum xlog_iclog_state { #define XLOG_COVER_OPS 5 -#define XLOG_ICL_NEED_FLUSH (1 << 0) /* iclog needs REQ_PREFLUSH */ -#define XLOG_ICL_NEED_FUA (1 << 1) /* iclog needs REQ_FUA */ - /* Ticket reservation region accounting */ #define XLOG_TIC_LEN_MAX 15 @@ -497,7 +504,8 @@ int xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket, void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket); void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); -int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog); +int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog, + xfs_lsn_t log_tail_lsn); /* * When we crack an atomic LSN, we sample it first so that the value will not diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index f9d8d605f9b1..19260291ff8b 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3944,6 +3944,7 @@ DECLARE_EVENT_CLASS(xlog_iclog_class, __field(uint32_t, state) __field(int32_t, refcount) __field(uint32_t, offset) + __field(uint32_t, flags) __field(unsigned long long, lsn) __field(unsigned long, caller_ip) ), @@ -3952,15 +3953,17 @@ DECLARE_EVENT_CLASS(xlog_iclog_class, __entry->state = iclog->ic_state; __entry->refcount = atomic_read(&iclog->ic_refcnt); __entry->offset = iclog->ic_offset; + __entry->flags = iclog->ic_flags; __entry->lsn = be64_to_cpu(iclog->ic_header.h_lsn); __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d state %s refcnt %d offset %u lsn 0x%llx caller %pS", + TP_printk("dev %d:%d state %s refcnt %d offset %u lsn 0x%llx flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __print_symbolic(__entry->state, XLOG_STATE_STRINGS), __entry->refcount, __entry->offset, __entry->lsn, + __print_flags(__entry->flags, "|", XLOG_ICL_STRINGS), (char *)__entry->caller_ip) ); diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 63b56aba925a..30ece3ae6df7 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -423,7 +423,8 @@ int __must_check fsl_mc_allocate_irqs(struct fsl_mc_device *mc_dev); void fsl_mc_free_irqs(struct fsl_mc_device *mc_dev); -struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev); +struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev, + u16 if_id); extern struct bus_type fsl_mc_bus_type; diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 21daed10322e..509e18c7e740 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -190,39 +190,4 @@ static inline clock_t br_get_ageing_time(const struct net_device *br_dev) } #endif -#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_NET_SWITCHDEV) - -int switchdev_bridge_port_offload(struct net_device *brport_dev, - struct net_device *dev, const void *ctx, - struct notifier_block *atomic_nb, - struct notifier_block *blocking_nb, - bool tx_fwd_offload, - struct netlink_ext_ack *extack); -void switchdev_bridge_port_unoffload(struct net_device *brport_dev, - const void *ctx, - struct notifier_block *atomic_nb, - struct notifier_block *blocking_nb); - -#else - -static inline int -switchdev_bridge_port_offload(struct net_device *brport_dev, - struct net_device *dev, const void *ctx, - struct notifier_block *atomic_nb, - struct notifier_block *blocking_nb, - bool tx_fwd_offload, - struct netlink_ext_ack *extack) -{ - return -EINVAL; -} - -static inline void -switchdev_bridge_port_unoffload(struct net_device *brport_dev, - const void *ctx, - struct notifier_block *atomic_nb, - struct notifier_block *blocking_nb) -{ -} -#endif - #endif diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 64ce8cd1cfaf..93c262ecbdc9 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -41,9 +41,6 @@ struct ip_sf_socklist { __be32 sl_addr[]; }; -#define IP_SFLSIZE(count) (sizeof(struct ip_sf_socklist) + \ - (count) * sizeof(__be32)) - #define IP_SFBLOCK 10 /* allocate this many at once */ /* ip_mc_socklist is real list now. Speed is not argument; diff --git a/include/linux/mhi.h b/include/linux/mhi.h index beb918328eef..c493a80cb453 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -721,8 +721,13 @@ void mhi_device_put(struct mhi_device *mhi_dev); * host and device execution environments match and * channels are in a DISABLED state. * @mhi_dev: Device associated with the channels + * @flags: MHI channel flags */ -int mhi_prepare_for_transfer(struct mhi_device *mhi_dev); +int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, + unsigned int flags); + +/* Automatically allocate and queue inbound buffers */ +#define MHI_CH_INBOUND_ALLOC_BUFS BIT(0) /** * mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer. diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 77746f7e35b8..0106c67e8ccb 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -38,6 +38,8 @@ #define MLX5_FS_DEFAULT_FLOW_TAG 0x0 +#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) + enum { MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16, MLX5_FLOW_CONTEXT_ACTION_ENCRYPT = 1 << 17, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 52bbd2b7cb46..7f8ee09c711f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -103,11 +103,19 @@ struct page { unsigned long pp_magic; struct page_pool *pp; unsigned long _pp_mapping_pad; - /** - * @dma_addr: might require a 64-bit value on - * 32-bit architectures. - */ - unsigned long dma_addr[2]; + unsigned long dma_addr; + union { + /** + * dma_addr_upper: might require a 64-bit + * value on 32-bit architectures. + */ + unsigned long dma_addr_upper; + /** + * For frag page support, not supported in + * 32-bit architectures with 64-bit DMA. + */ + atomic_long_t pp_frag_count; + }; }; struct { /* slab, slob and slub */ union { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 02c6e8e10c86..bd8d5b8e2de3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -295,18 +295,6 @@ enum netdev_state_t { }; -/* - * This structure holds boot-time configured netdevice settings. They - * are then used in the device probing. - */ -struct netdev_boot_setup { - char name[IFNAMSIZ]; - struct ifmap map; -}; -#define NETDEV_BOOT_SETUP_MAX 8 - -int __init netdev_boot_setup(char *str); - struct gro_list { struct list_head list; int count; @@ -2944,7 +2932,6 @@ static inline struct net_device *first_net_device_rcu(struct net *net) } int netdev_boot_setup_check(struct net_device *dev); -unsigned long netdev_boot_base(const char *prefix, int unit); struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, const char *hwaddr); struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type); @@ -3934,6 +3921,8 @@ static inline int netif_set_real_num_rx_queues(struct net_device *dev, return 0; } #endif +int netif_set_real_num_queues(struct net_device *dev, + unsigned int txq, unsigned int rxq); static inline struct netdev_rx_queue * __netif_get_rx_queue(struct net_device *dev, unsigned int rxq) @@ -4160,11 +4149,13 @@ void netdev_run_todo(void); */ static inline void dev_put(struct net_device *dev) { + if (dev) { #ifdef CONFIG_PCPU_DEV_REFCNT - this_cpu_dec(*dev->pcpu_refcnt); + this_cpu_dec(*dev->pcpu_refcnt); #else - refcount_dec(&dev->dev_refcnt); + refcount_dec(&dev->dev_refcnt); #endif + } } /** @@ -4175,11 +4166,13 @@ static inline void dev_put(struct net_device *dev) */ static inline void dev_hold(struct net_device *dev) { + if (dev) { #ifdef CONFIG_PCPU_DEV_REFCNT - this_cpu_inc(*dev->pcpu_refcnt); + this_cpu_inc(*dev->pcpu_refcnt); #else - refcount_inc(&dev->dev_refcnt); + refcount_inc(&dev->dev_refcnt); #endif + } } /* Carrier loss detection, dial on demand. The functions netif_carrier_on diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2bcdc8cd38be..6bdb0db3e825 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1183,6 +1183,7 @@ static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask); struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); +struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom); struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, gfp_t priority); int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, @@ -4711,11 +4712,9 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb) } #ifdef CONFIG_PAGE_POOL -static inline void skb_mark_for_recycle(struct sk_buff *skb, struct page *page, - struct page_pool *pp) +static inline void skb_mark_for_recycle(struct sk_buff *skb) { skb->pp_recycle = 1; - page_pool_store_mem_info(page, pp); } #endif diff --git a/include/net/Space.h b/include/net/Space.h index 9cce0d80d37a..08ca9cef0213 100644 --- a/include/net/Space.h +++ b/include/net/Space.h @@ -8,23 +8,13 @@ struct net_device *ultra_probe(int unit); struct net_device *wd_probe(int unit); struct net_device *ne_probe(int unit); struct net_device *fmv18x_probe(int unit); -struct net_device *i82596_probe(int unit); struct net_device *ni65_probe(int unit); struct net_device *sonic_probe(int unit); struct net_device *smc_init(int unit); -struct net_device *atarilance_probe(int unit); -struct net_device *sun3lance_probe(int unit); -struct net_device *sun3_82586_probe(int unit); -struct net_device *apne_probe(int unit); struct net_device *cs89x0_probe(int unit); -struct net_device *mvme147lance_probe(int unit); struct net_device *tc515_probe(int unit); struct net_device *lance_probe(int unit); struct net_device *cops_probe(int unit); -struct net_device *ltpc_probe(void); /* Fibre Channel adapters */ int iph5526_probe(struct net_device *dev); - -/* SBNI adapters */ -int sbni_probe(int unit); diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 435a2c3d5a6f..4757d7f53f13 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -70,6 +70,9 @@ struct unix_sock { struct socket_wq peer_wq; wait_queue_entry_t peer_wake; struct scm_stat scm_stat; +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + struct sk_buff *oob_skb; +#endif }; static inline struct unix_sock *unix_sk(const struct sock *sk) diff --git a/include/net/ax88796.h b/include/net/ax88796.h index aa52b2e8ff7b..2ed23a368602 100644 --- a/include/net/ax88796.h +++ b/include/net/ax88796.h @@ -38,4 +38,7 @@ struct ax_plat_data { int (*check_irq)(struct platform_device *pdev); }; +/* exported from ax88796.c for xsurf100.c */ +extern void ax_NS8390_reinit(struct net_device *dev); + #endif /* __NET_AX88796_PLAT_H */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a53e94459ecd..db4312e44d47 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1230,6 +1230,7 @@ struct hci_dev *hci_alloc_dev(void); void hci_free_dev(struct hci_dev *hdev); int hci_register_dev(struct hci_dev *hdev); void hci_unregister_dev(struct hci_dev *hdev); +void hci_cleanup_dev(struct hci_dev *hdev); int hci_suspend_dev(struct hci_dev *hdev); int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index c8696a230b7d..38785d48baff 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -303,6 +303,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond, int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); int bond_3ad_set_carrier(struct bonding *bond); +void bond_3ad_update_lacp_active(struct bonding *bond); void bond_3ad_update_lacp_rate(struct bonding *bond); void bond_3ad_update_ad_actor_settings(struct bonding *bond); int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats); diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 9d382f2f0bc5..e64833a674eb 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -64,6 +64,7 @@ enum { BOND_OPT_AD_USER_PORT_KEY, BOND_OPT_NUM_PEER_NOTIF_ALIAS, BOND_OPT_PEER_NOTIF_DELAY, + BOND_OPT_LACP_ACTIVE, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index b91c365e4e95..9f3fdc180c6c 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -129,6 +129,7 @@ struct bond_params { int updelay; int downdelay; int peer_notif_delay; + int lacp_active; int lacp_fast; unsigned int min_links; int ad_select; diff --git a/include/net/compat.h b/include/net/compat.h index 84805bdc4435..595fee069b82 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -71,13 +71,26 @@ struct compat_group_source_req { } __packed; struct compat_group_filter { - __u32 gf_interface; - struct __kernel_sockaddr_storage gf_group - __aligned(4); - __u32 gf_fmode; - __u32 gf_numsrc; - struct __kernel_sockaddr_storage gf_slist[1] - __aligned(4); + union { + struct { + __u32 gf_interface_aux; + struct __kernel_sockaddr_storage gf_group_aux + __aligned(4); + __u32 gf_fmode_aux; + __u32 gf_numsrc_aux; + struct __kernel_sockaddr_storage gf_slist[1] + __aligned(4); + } __packed; + struct { + __u32 gf_interface; + struct __kernel_sockaddr_storage gf_group + __aligned(4); + __u32 gf_fmode; + __u32 gf_numsrc; + struct __kernel_sockaddr_storage gf_slist_flex[] + __aligned(4); + } __packed; + }; } __packed; #endif /* NET_COMPAT_H */ diff --git a/include/net/devlink.h b/include/net/devlink.h index 08f4c6191e72..0236c77f2fd0 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1396,8 +1396,8 @@ struct devlink_ops { * * Note: @extack can be NULL when port notifier queries the port function. */ - int (*port_function_hw_addr_get)(struct devlink *devlink, struct devlink_port *port, - u8 *hw_addr, int *hw_addr_len, + int (*port_function_hw_addr_get)(struct devlink_port *port, u8 *hw_addr, + int *hw_addr_len, struct netlink_ext_ack *extack); /** * @port_function_hw_addr_set: Port function's hardware address set function. @@ -1406,7 +1406,7 @@ struct devlink_ops { * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port * function handling for a particular port. */ - int (*port_function_hw_addr_set)(struct devlink *devlink, struct devlink_port *port, + int (*port_function_hw_addr_set)(struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack); /** @@ -1462,8 +1462,7 @@ struct devlink_ops { * * Return: 0 on success, negative value otherwise. */ - int (*port_fn_state_get)(struct devlink *devlink, - struct devlink_port *port, + int (*port_fn_state_get)(struct devlink_port *port, enum devlink_port_fn_state *state, enum devlink_port_fn_opstate *opstate, struct netlink_ext_ack *extack); @@ -1478,8 +1477,7 @@ struct devlink_ops { * * Return: 0 on success, negative value otherwise. */ - int (*port_fn_state_set)(struct devlink *devlink, - struct devlink_port *port, + int (*port_fn_state_set)(struct devlink_port *port, enum devlink_port_fn_state state, struct netlink_ext_ack *extack); @@ -1546,13 +1544,15 @@ struct net *devlink_net(const struct devlink *devlink); * Drivers that operate on real HW must use devlink_alloc() instead. */ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, - size_t priv_size, struct net *net); + size_t priv_size, struct net *net, + struct device *dev); static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, - size_t priv_size) + size_t priv_size, + struct device *dev) { - return devlink_alloc_ns(ops, priv_size, &init_net); + return devlink_alloc_ns(ops, priv_size, &init_net, dev); } -int devlink_register(struct devlink *devlink, struct device *dev); +int devlink_register(struct devlink *devlink); void devlink_unregister(struct devlink *devlink); void devlink_reload_enable(struct devlink *devlink); void devlink_reload_disable(struct devlink *devlink); diff --git a/include/net/dsa.h b/include/net/dsa.h index 7cc9507282d3..0c2cba45fa79 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -254,6 +254,8 @@ struct dsa_port { struct device_node *dn; unsigned int ageing_time; bool vlan_filtering; + /* Managed by DSA on user ports and by drivers on CPU and DSA ports */ + bool learning; u8 stp_state; struct net_device *bridge_dev; int bridge_num; @@ -714,8 +716,6 @@ struct dsa_switch_ops { int (*port_bridge_flags)(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); - int (*port_set_mrouter)(struct dsa_switch *ds, int port, bool mrouter, - struct netlink_ext_ack *extack); /* * VLAN support diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 69c9eabf8325..f3c2841566a0 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -293,7 +293,7 @@ static inline bool flow_action_has_entries(const struct flow_action *action) } /** - * flow_action_has_one_action() - check if exactly one action is present + * flow_offload_has_one_action() - check if exactly one action is present * @action: tc filter flow offload action * * Returns true if exactly one action is present. diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 71bb4cc4d05d..42235c178b06 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -82,9 +82,6 @@ struct ip6_sf_socklist { struct in6_addr sl_addr[]; }; -#define IP6_SFLSIZE(count) (sizeof(struct ip6_sf_socklist) + \ - (count) * sizeof(struct in6_addr)) - #define IP6_SFBLOCK 10 /* allocate this many at once */ struct ipv6_mc_socklist { diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 820eae3ea95f..5efd0b71dc67 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -265,7 +265,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb) { - int mtu; + unsigned int mtu; struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index e946366e8ba5..1f4e1816fd36 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -75,6 +75,7 @@ struct netns_xfrm { #endif spinlock_t xfrm_state_lock; seqcount_spinlock_t xfrm_state_hash_generation; + seqcount_spinlock_t xfrm_policy_hash_generation; spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 3dd62dd73027..a4082406a003 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -45,7 +45,10 @@ * Please note DMA-sync-for-CPU is still * device driver responsibility */ -#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV) +#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */ +#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ + PP_FLAG_DMA_SYNC_DEV |\ + PP_FLAG_PAGE_FRAG) /* * Fast allocation side cache array/stack @@ -88,6 +91,9 @@ struct page_pool { unsigned long defer_warn; u32 pages_state_hold_cnt; + unsigned int frag_offset; + struct page *frag_page; + long frag_users; /* * Data structure for allocation side @@ -137,6 +143,18 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) return page_pool_alloc_pages(pool, gfp); } +struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, + unsigned int size, gfp_t gfp); + +static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, + unsigned int *offset, + unsigned int size) +{ + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + return page_pool_alloc_frag(pool, offset, size, gfp); +} + /* get the stored dma direction. A driver might decide to treat this locally and * avoid the extra cache line from page_pool to determine the direction */ @@ -198,19 +216,48 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, page_pool_put_full_page(pool, page, true); } +#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ + (sizeof(dma_addr_t) > sizeof(unsigned long)) + static inline dma_addr_t page_pool_get_dma_addr(struct page *page) { - dma_addr_t ret = page->dma_addr[0]; - if (sizeof(dma_addr_t) > sizeof(unsigned long)) - ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16; + dma_addr_t ret = page->dma_addr; + + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) + ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16; + return ret; } static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) { - page->dma_addr[0] = addr; - if (sizeof(dma_addr_t) > sizeof(unsigned long)) - page->dma_addr[1] = upper_32_bits(addr); + page->dma_addr = addr; + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) + page->dma_addr_upper = upper_32_bits(addr); +} + +static inline void page_pool_set_frag_count(struct page *page, long nr) +{ + atomic_long_set(&page->pp_frag_count, nr); +} + +static inline long page_pool_atomic_sub_frag_count_return(struct page *page, + long nr) +{ + long ret; + + /* As suggested by Alexander, atomic_long_read() may cover up the + * reference count errors, so avoid calling atomic_long_read() in + * the cases of freeing or draining the page_frags, where we would + * not expect it to match or that are slowpath anyway. + */ + if (__builtin_constant_p(nr) && + atomic_long_read(&page->pp_frag_count) == nr) + return 0; + + ret = atomic_long_sub_return(nr, &page->pp_frag_count); + WARN_ON(ret < 0); + return ret; } static inline bool is_page_pool_compiled_in(void) @@ -253,11 +300,4 @@ static inline void page_pool_ring_unlock(struct page_pool *pool) spin_unlock_bh(&pool->ring.producer_lock); } -/* Store mem_info on struct page and use it while recycling skb frags */ -static inline -void page_pool_store_mem_info(struct page *page, struct page_pool *pp) -{ - page->pp = pp; -} - #endif /* _NET_PAGE_POOL_H */ diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 64de26b7ad39..8fb47fc61097 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -329,6 +329,9 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); /** * struct tcf_pkt_info - packet information + * + * @ptr: start of the pkt data + * @nexthdr: offset of the next header */ struct tcf_pkt_info { unsigned char * ptr; @@ -347,6 +350,7 @@ struct tcf_ematch_ops; * @ops: the operations lookup table of the corresponding ematch module * @datalen: length of the ematch specific configuration data * @data: ematch specific data + * @net: the network namespace */ struct tcf_ematch { struct tcf_ematch_ops * ops; diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 384e800665f2..9f48733bfd21 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -153,7 +153,8 @@ struct rtnl_af_ops { u32 ext_filter_mask); int (*validate_link_af)(const struct net_device *dev, - const struct nlattr *attr); + const struct nlattr *attr, + struct netlink_ext_ack *extack); int (*set_link_af)(struct net_device *dev, const struct nlattr *attr, struct netlink_ext_ack *extack); diff --git a/include/net/sock.h b/include/net/sock.h index ff1be7e7e90b..6e761451c927 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -68,6 +68,7 @@ #include <net/tcp_states.h> #include <linux/net_tstamp.h> #include <net/l3mdev.h> +#include <uapi/linux/socket.h> /* * This structure really needs to be cleaned up. @@ -1438,8 +1439,6 @@ static inline int __sk_prot_rehash(struct sock *sk) #define RCV_SHUTDOWN 1 #define SEND_SHUTDOWN 2 -#define SOCK_SNDBUF_LOCK 1 -#define SOCK_RCVBUF_LOCK 2 #define SOCK_BINDADDR_LOCK 4 #define SOCK_BINDPORT_LOCK 8 diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 66468ff8cc0a..60d806b6a5ae 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -180,6 +180,14 @@ struct switchdev_obj_in_state_mrp { typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj); +struct switchdev_brport { + struct net_device *dev; + const void *ctx; + struct notifier_block *atomic_nb; + struct notifier_block *blocking_nb; + bool tx_fwd_offload; +}; + enum switchdev_notifier_type { SWITCHDEV_FDB_ADD_TO_BRIDGE = 1, SWITCHDEV_FDB_DEL_TO_BRIDGE, @@ -197,6 +205,9 @@ enum switchdev_notifier_type { SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE, SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE, SWITCHDEV_VXLAN_FDB_OFFLOADED, + + SWITCHDEV_BRPORT_OFFLOADED, + SWITCHDEV_BRPORT_UNOFFLOADED, }; struct switchdev_notifier_info { @@ -226,6 +237,11 @@ struct switchdev_notifier_port_attr_info { bool handled; }; +struct switchdev_notifier_brport_info { + struct switchdev_notifier_info info; /* must be first */ + const struct switchdev_brport brport; +}; + static inline struct net_device * switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info) { @@ -246,6 +262,17 @@ switchdev_fdb_is_dynamically_learned(const struct switchdev_notifier_fdb_info *f #ifdef CONFIG_NET_SWITCHDEV +int switchdev_bridge_port_offload(struct net_device *brport_dev, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack); +void switchdev_bridge_port_unoffload(struct net_device *brport_dev, + const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb); + void switchdev_deferred_process(void); int switchdev_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, @@ -316,6 +343,25 @@ int switchdev_handle_port_attr_set(struct net_device *dev, struct netlink_ext_ack *extack)); #else +static inline int +switchdev_bridge_port_offload(struct net_device *brport_dev, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline void +switchdev_bridge_port_unoffload(struct net_device *brport_dev, + const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) +{ +} + static inline void switchdev_deferred_process(void) { } diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index d588c244ec2f..1f0a2b4864e4 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -124,6 +124,8 @@ #define SO_NETNS_COOKIE 71 +#define SO_BUF_LOCK 72 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/include/uapi/linux/can/j1939.h b/include/uapi/linux/can/j1939.h index df6e821075c1..38936460f668 100644 --- a/include/uapi/linux/can/j1939.h +++ b/include/uapi/linux/can/j1939.h @@ -78,11 +78,20 @@ enum { enum { J1939_NLA_PAD, J1939_NLA_BYTES_ACKED, + J1939_NLA_TOTAL_SIZE, + J1939_NLA_PGN, + J1939_NLA_SRC_NAME, + J1939_NLA_DEST_NAME, + J1939_NLA_SRC_ADDR, + J1939_NLA_DEST_ADDR, }; enum { J1939_EE_INFO_NONE, J1939_EE_INFO_TX_ABORT, + J1939_EE_INFO_RX_RTS, + J1939_EE_INFO_RX_DPO, + J1939_EE_INFO_RX_ABORT, }; struct j1939_filter { diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 49b22afab78f..5310003523ce 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -855,6 +855,7 @@ enum { IFLA_BOND_AD_ACTOR_SYSTEM, IFLA_BOND_TLB_DYNAMIC_LB, IFLA_BOND_PEER_NOTIF_DELAY, + IFLA_BOND_AD_LACP_ACTIVE, __IFLA_BOND_MAX, }; diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 193b7cf1f0ac..14168225cecd 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -222,11 +222,22 @@ struct group_source_req { }; struct group_filter { - __u32 gf_interface; /* interface index */ - struct __kernel_sockaddr_storage gf_group; /* multicast address */ - __u32 gf_fmode; /* filter mode */ - __u32 gf_numsrc; /* number of sources */ - struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + union { + struct { + __u32 gf_interface_aux; /* interface index */ + struct __kernel_sockaddr_storage gf_group_aux; /* multicast address */ + __u32 gf_fmode_aux; /* filter mode */ + __u32 gf_numsrc_aux; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + }; + struct { + __u32 gf_interface; /* interface index */ + struct __kernel_sockaddr_storage gf_group; /* multicast address */ + __u32 gf_fmode; /* filter mode */ + __u32 gf_numsrc; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */ + }; + }; }; #define GROUP_FILTER_SIZE(numsrc) \ diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h index c3409c8ec0dd..eb0a9a5b6e71 100644 --- a/include/uapi/linux/socket.h +++ b/include/uapi/linux/socket.h @@ -26,4 +26,9 @@ struct __kernel_sockaddr_storage { }; }; +#define SOCK_SNDBUF_LOCK 1 +#define SOCK_RCVBUF_LOCK 2 + +#define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK) + #endif /* _UAPI_LINUX_SOCKET_H */ diff --git a/init/main.c b/init/main.c index f5b8246e8aa1..11cbbec309fa 100644 --- a/init/main.c +++ b/init/main.c @@ -1221,7 +1221,7 @@ trace_initcall_start_cb(void *data, initcall_t fn) { ktime_t *calltime = (ktime_t *)data; - printk(KERN_DEBUG "calling %pS @ %i\n", fn, task_pid_nr(current)); + printk(KERN_DEBUG "calling %pS @ %i irqs_disabled() %d\n", fn, task_pid_nr(current), irqs_disabled()); *calltime = ktime_get(); } @@ -1235,8 +1235,8 @@ trace_initcall_finish_cb(void *data, initcall_t fn, int ret) rettime = ktime_get(); delta = ktime_sub(rettime, *calltime); duration = (unsigned long long) ktime_to_ns(delta) >> 10; - printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n", - fn, ret, duration); + printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs, irqs_disabled() %d\n", + fn, ret, duration, irqs_disabled()); } static ktime_t initcall_calltime; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c59dd35a6da5..33899a71fdc1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9135,8 +9135,10 @@ static int trace_array_create_dir(struct trace_array *tr) return -EINVAL; ret = event_trace_add_tracer(tr->dir, tr); - if (ret) + if (ret) { tracefs_remove(tr->dir); + return ret; + } init_tracer_tracefs(tr, tr->dir); __update_tracer_options(tr); diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 34325f41ebc0..949ef09dc537 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -65,7 +65,8 @@ C(INVALID_SORT_MODIFIER,"Invalid sort modifier"), \ C(EMPTY_SORT_FIELD, "Empty sort field"), \ C(TOO_MANY_SORT_FIELDS, "Too many sort fields (Max = 2)"), \ - C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), + C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), \ + C(INVALID_STR_OPERAND, "String type can not be an operand in expression"), #undef C #define C(a, b) HIST_ERR_##a @@ -2156,6 +2157,13 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, ret = PTR_ERR(operand1); goto free; } + if (operand1->flags & HIST_FIELD_FL_STRING) { + /* String type can not be the operand of unary operator. */ + hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str)); + destroy_hist_field(operand1, 0); + ret = -EINVAL; + goto free; + } expr->flags |= operand1->flags & (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); @@ -2257,6 +2265,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, operand1 = NULL; goto free; } + if (operand1->flags & HIST_FIELD_FL_STRING) { + hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(operand1_str)); + ret = -EINVAL; + goto free; + } /* rest of string could be another expression e.g. b+c in a+b+c */ operand_flags = 0; @@ -2266,6 +2279,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, operand2 = NULL; goto free; } + if (operand2->flags & HIST_FIELD_FL_STRING) { + hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str)); + ret = -EINVAL; + goto free; + } ret = check_expr_operands(file->tr, operand1, operand2); if (ret) @@ -2287,6 +2305,10 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, expr->operands[0] = operand1; expr->operands[1] = operand2; + + /* The operand sizes should be the same, so just pick one */ + expr->size = operand1->size; + expr->operator = field_op; expr->name = expr_str(expr, 0); expr->type = kstrdup(operand1->type, GFP_KERNEL); diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index a6c0cdaf4b87..14f46aae1981 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -327,7 +327,7 @@ static void move_to_next_cpu(void) get_online_cpus(); cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask); - next_cpu = cpumask_next(smp_processor_id(), current_mask); + next_cpu = cpumask_next(raw_smp_processor_id(), current_mask); put_online_cpus(); if (next_cpu >= nr_cpu_ids) diff --git a/kernel/ucount.c b/kernel/ucount.c index 87799e2379bd..77be3bbe3cc4 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -160,6 +160,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) { struct hlist_head *hashent = ucounts_hashentry(ns, uid); struct ucounts *ucounts, *new; + long overflow; spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); @@ -184,8 +185,12 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) return new; } } + overflow = atomic_add_negative(1, &ucounts->count); spin_unlock_irq(&ucounts_lock); - ucounts = get_ucounts(ucounts); + if (overflow) { + put_ucounts(ucounts); + return NULL; + } return ucounts; } @@ -193,8 +198,7 @@ void put_ucounts(struct ucounts *ucounts) { unsigned long flags; - if (atomic_dec_and_test(&ucounts->count)) { - spin_lock_irqsave(&ucounts_lock, flags); + if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) { hlist_del_init(&ucounts->node); spin_unlock_irqrestore(&ucounts_lock, flags); kfree(ucounts); diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index e4f63dd43cb5..36249776c021 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -193,10 +193,8 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) skb_pull(skb, AX25_KISS_HEADER_LEN); if (digipeat != NULL) { - if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) { - kfree_skb(skb); + if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) goto put; - } skb = ourskb; } diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c index f53751ba81b3..22f2f66c6e0a 100644 --- a/net/ax25/ax25_out.c +++ b/net/ax25/ax25_out.c @@ -325,7 +325,6 @@ void ax25_kick(ax25_cb *ax25) void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type) { - struct sk_buff *skbn; unsigned char *ptr; int headroom; @@ -336,18 +335,12 @@ void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type) headroom = ax25_addr_size(ax25->digipeat); - if (skb_headroom(skb) < headroom) { - if ((skbn = skb_realloc_headroom(skb, headroom)) == NULL) { + if (unlikely(skb_headroom(skb) < headroom)) { + skb = skb_expand_head(skb, headroom); + if (!skb) { printk(KERN_CRIT "AX.25: ax25_transmit_buffer - out of memory\n"); - kfree_skb(skb); return; } - - if (skb->sk != NULL) - skb_set_owner_w(skbn, skb->sk); - - consume_skb(skb); - skb = skbn; } ptr = skb_push(skb, headroom); diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index b40e0bce67ea..d0b2e094bd55 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -441,24 +441,17 @@ put: struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src, ax25_address *dest, ax25_digi *digi) { - struct sk_buff *skbn; unsigned char *bp; int len; len = digi->ndigi * AX25_ADDR_LEN; - if (skb_headroom(skb) < len) { - if ((skbn = skb_realloc_headroom(skb, len)) == NULL) { + if (unlikely(skb_headroom(skb) < len)) { + skb = skb_expand_head(skb, len); + if (!skb) { printk(KERN_CRIT "AX.25: ax25_dg_build_path - out of memory\n"); return NULL; } - - if (skb->sk != NULL) - skb_set_owner_w(skbn, skb->sk); - - consume_skb(skb); - - skb = skbn; } bp = skb_push(skb, len); diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 63d42dcc9324..2b639c8b0ded 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -2274,8 +2274,7 @@ out: if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); return ret; } @@ -2446,8 +2445,7 @@ out: if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); return ret; } diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 8c95a11a830a..7976a0435662 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -984,8 +984,7 @@ out: if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); return ret; } diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 007f2827935d..36a98d3cefe0 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -557,8 +557,7 @@ int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb) out: if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); return ret; } diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 923e2197c2db..0158f267c403 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -91,8 +91,7 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) upper = netdev_master_upper_dev_get_rcu(upper); } while (upper && !(upper->priv_flags & IFF_EBRIDGE)); - if (upper) - dev_hold(upper); + dev_hold(upper); rcu_read_unlock(); return upper; @@ -509,8 +508,7 @@ batadv_mcast_mla_softif_get(struct net_device *dev, } out: - if (bridge) - dev_put(bridge); + dev_put(bridge); return ret4 + ret6; } @@ -2239,8 +2237,7 @@ batadv_mcast_netlink_get_primary(struct netlink_callback *cb, } out: - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); if (!ret && primary_if) *primary_if = hard_iface; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index da7249448474..6a4d3f437e00 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -799,12 +799,10 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb) out: if (hardif) batadv_hardif_put(hardif); - if (hard_iface) - dev_put(hard_iface); + dev_put(hard_iface); if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); return ret; } @@ -1412,12 +1410,10 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) out: if (hardif) batadv_hardif_put(hardif); - if (hard_iface) - dev_put(hard_iface); + dev_put(hard_iface); if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); return ret; } diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 434b4f042909..711fe5a2cec4 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -820,8 +820,7 @@ check_roaming: out: if (in_hardif) batadv_hardif_put(in_hardif); - if (in_dev) - dev_put(in_dev); + dev_put(in_dev); if (tt_local) batadv_tt_local_entry_put(tt_local); if (tt_global) @@ -1217,8 +1216,7 @@ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb) out: if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); cb->args[0] = bucket; cb->args[1] = idx; @@ -2005,8 +2003,7 @@ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb) out: if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); cb->args[0] = bucket; cb->args[1] = idx; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2560ed2f144d..e1a545c8a69f 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3996,14 +3996,10 @@ EXPORT_SYMBOL(hci_register_dev); /* Unregister HCI device */ void hci_unregister_dev(struct hci_dev *hdev) { - int id; - BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); hci_dev_set_flag(hdev, HCI_UNREGISTER); - id = hdev->id; - write_lock(&hci_dev_list_lock); list_del(&hdev->list); write_unlock(&hci_dev_list_lock); @@ -4038,7 +4034,14 @@ void hci_unregister_dev(struct hci_dev *hdev) } device_del(&hdev->dev); + /* Actual cleanup is deferred until hci_cleanup_dev(). */ + hci_dev_put(hdev); +} +EXPORT_SYMBOL(hci_unregister_dev); +/* Cleanup HCI device */ +void hci_cleanup_dev(struct hci_dev *hdev) +{ debugfs_remove_recursive(hdev->debugfs); kfree_const(hdev->hw_info); kfree_const(hdev->fw_info); @@ -4063,11 +4066,8 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_blocked_keys_clear(hdev); hci_dev_unlock(hdev); - hci_dev_put(hdev); - - ida_simple_remove(&hci_index_ida, id); + ida_simple_remove(&hci_index_ida, hdev->id); } -EXPORT_SYMBOL(hci_unregister_dev); /* Suspend HCI device */ int hci_suspend_dev(struct hci_dev *hdev) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index b04a5a02ecf3..f1128c2134f0 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -59,6 +59,17 @@ struct hci_pinfo { char comm[TASK_COMM_LEN]; }; +static struct hci_dev *hci_hdev_from_sock(struct sock *sk) +{ + struct hci_dev *hdev = hci_pi(sk)->hdev; + + if (!hdev) + return ERR_PTR(-EBADFD); + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) + return ERR_PTR(-EPIPE); + return hdev; +} + void hci_sock_set_flag(struct sock *sk, int nr) { set_bit(nr, &hci_pi(sk)->flags); @@ -759,19 +770,13 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) if (event == HCI_DEV_UNREG) { struct sock *sk; - /* Detach sockets from device */ + /* Wake up sockets using this dead device */ read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { - lock_sock(sk); if (hci_pi(sk)->hdev == hdev) { - hci_pi(sk)->hdev = NULL; sk->sk_err = EPIPE; - sk->sk_state = BT_OPEN; sk->sk_state_change(sk); - - hci_dev_put(hdev); } - release_sock(sk); } read_unlock(&hci_sk_list.lock); } @@ -930,10 +935,10 @@ static int hci_sock_reject_list_del(struct hci_dev *hdev, void __user *arg) static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) { - struct hci_dev *hdev = hci_pi(sk)->hdev; + struct hci_dev *hdev = hci_hdev_from_sock(sk); - if (!hdev) - return -EBADFD; + if (IS_ERR(hdev)) + return PTR_ERR(hdev); if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) return -EBUSY; @@ -1103,6 +1108,18 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, lock_sock(sk); + /* Allow detaching from dead device and attaching to alive device, if + * the caller wants to re-bind (instead of close) this socket in + * response to hci_sock_dev_event(HCI_DEV_UNREG) notification. + */ + hdev = hci_pi(sk)->hdev; + if (hdev && hci_dev_test_flag(hdev, HCI_UNREGISTER)) { + hci_pi(sk)->hdev = NULL; + sk->sk_state = BT_OPEN; + hci_dev_put(hdev); + } + hdev = NULL; + if (sk->sk_state == BT_BOUND) { err = -EALREADY; goto done; @@ -1379,9 +1396,9 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, lock_sock(sk); - hdev = hci_pi(sk)->hdev; - if (!hdev) { - err = -EBADFD; + hdev = hci_hdev_from_sock(sk); + if (IS_ERR(hdev)) { + err = PTR_ERR(hdev); goto done; } @@ -1743,9 +1760,9 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, goto done; } - hdev = hci_pi(sk)->hdev; - if (!hdev) { - err = -EBADFD; + hdev = hci_hdev_from_sock(sk); + if (IS_ERR(hdev)) { + err = PTR_ERR(hdev); goto done; } diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 9874844a95a9..b69d88b88d2e 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -83,6 +83,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn) static void bt_host_release(struct device *dev) { struct hci_dev *hdev = to_hci_dev(dev); + + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) + hci_cleanup_dev(hdev); kfree(hdev); module_put(THIS_MODULE); } diff --git a/net/bridge/br.c b/net/bridge/br.c index 8fb5dca5f8e0..c8ae823aa8e7 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -166,7 +166,8 @@ static int br_switchdev_event(struct notifier_block *unused, case SWITCHDEV_FDB_ADD_TO_BRIDGE: fdb_info = ptr; err = br_fdb_external_learn_add(br, p, fdb_info->addr, - fdb_info->vid, false); + fdb_info->vid, + fdb_info->is_local, false); if (err) { err = notifier_from_errno(err); break; @@ -201,6 +202,48 @@ static struct notifier_block br_switchdev_notifier = { .notifier_call = br_switchdev_event, }; +/* called under rtnl_mutex */ +static int br_switchdev_blocking_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + struct switchdev_notifier_brport_info *brport_info; + const struct switchdev_brport *b; + struct net_bridge_port *p; + int err = NOTIFY_DONE; + + p = br_port_get_rtnl(dev); + if (!p) + goto out; + + switch (event) { + case SWITCHDEV_BRPORT_OFFLOADED: + brport_info = ptr; + b = &brport_info->brport; + + err = br_switchdev_port_offload(p, b->dev, b->ctx, + b->atomic_nb, b->blocking_nb, + b->tx_fwd_offload, extack); + err = notifier_from_errno(err); + break; + case SWITCHDEV_BRPORT_UNOFFLOADED: + brport_info = ptr; + b = &brport_info->brport; + + br_switchdev_port_unoffload(p, b->ctx, b->atomic_nb, + b->blocking_nb); + break; + } + +out: + return err; +} + +static struct notifier_block br_switchdev_blocking_notifier = { + .notifier_call = br_switchdev_blocking_event, +}; + /* br_boolopt_toggle - change user-controlled boolean option * * @br: bridge device @@ -355,10 +398,14 @@ static int __init br_init(void) if (err) goto err_out4; - err = br_netlink_init(); + err = register_switchdev_blocking_notifier(&br_switchdev_blocking_notifier); if (err) goto err_out5; + err = br_netlink_init(); + if (err) + goto err_out6; + brioctl_set(br_ioctl_stub); #if IS_ENABLED(CONFIG_ATM_LANE) @@ -373,6 +420,8 @@ static int __init br_init(void) return 0; +err_out6: + unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier); err_out5: unregister_switchdev_notifier(&br_switchdev_notifier); err_out4: @@ -392,6 +441,7 @@ static void __exit br_deinit(void) { stp_proto_unregister(&br_stp_proto); br_netlink_fini(); + unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier); unregister_switchdev_notifier(&br_switchdev_notifier); unregister_netdevice_notifier(&br_device_notifier); brioctl_set(NULL); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 4ff8c67ac88f..ddd09f5994a7 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -745,7 +745,7 @@ static int br_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb, item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags); item.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags); - item.info.dev = item.is_local ? br->dev : p->dev; + item.info.dev = (!p || item.is_local) ? br->dev : p->dev; item.info.ctx = ctx; err = nb->notifier_call(nb, action, &item); @@ -1011,7 +1011,8 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, - u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[]) + u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[], + struct netlink_ext_ack *extack) { int err = 0; @@ -1030,7 +1031,15 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, rcu_read_unlock(); local_bh_enable(); } else if (ndm->ndm_flags & NTF_EXT_LEARNED) { - err = br_fdb_external_learn_add(br, p, addr, vid, true); + if (!p && !(ndm->ndm_state & NUD_PERMANENT)) { + NL_SET_ERR_MSG_MOD(extack, + "FDB entry towards bridge must be permanent"); + return -EINVAL; + } + + err = br_fdb_external_learn_add(br, p, addr, vid, + ndm->ndm_state & NUD_PERMANENT, + true); } else { spin_lock_bh(&br->hash_lock); err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid, nfea_tb); @@ -1102,9 +1111,11 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } /* VID was specified, so use it. */ - err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb); + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb, + extack); } else { - err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb); + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb, + extack); if (err || !vg || !vg->num_vlans) goto out; @@ -1116,7 +1127,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], if (!br_vlan_should_use(v)) continue; err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid, - nfea_tb); + nfea_tb, extack); if (err) goto out; } @@ -1256,7 +1267,7 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p) } int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, - const unsigned char *addr, u16 vid, + const unsigned char *addr, u16 vid, bool is_local, bool swdev_notify) { struct net_bridge_fdb_entry *fdb; @@ -1273,6 +1284,10 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, if (swdev_notify) flags |= BIT(BR_FDB_ADDED_BY_USER); + + if (is_local) + flags |= BIT(BR_FDB_LOCAL); + fdb = fdb_create(br, p, addr, vid, flags); if (!fdb) { err = -ENOMEM; @@ -1299,6 +1314,9 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, if (swdev_notify) set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); + if (is_local) + set_bit(BR_FDB_LOCAL, &fdb->flags); + if (modified) fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 86f6d7e93ea8..67c60240b713 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -456,7 +456,7 @@ int br_add_bridge(struct net *net, const char *name) dev_net_set(dev, net); dev->rtnl_link_ops = &br_link_ops; - res = register_netdev(dev); + res = register_netdevice(dev); if (res) free_netdev(dev); return res; @@ -467,7 +467,6 @@ int br_del_bridge(struct net *net, const char *name) struct net_device *dev; int ret = 0; - rtnl_lock(); dev = __dev_get_by_name(net, name); if (dev == NULL) ret = -ENXIO; /* Could not find device */ @@ -485,7 +484,6 @@ int br_del_bridge(struct net *net, const char *name) else br_dev_delete(dev, NULL); - rtnl_unlock(); return ret; } diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 46a24c20e405..793b0db9d9a3 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -351,7 +351,7 @@ static int old_deviceless(struct net *net, void __user *uarg) if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - if (copy_from_user(buf, uarg, IFNAMSIZ)) + if (copy_from_user(buf, (void __user *)args[1], IFNAMSIZ)) return -EFAULT; buf[IFNAMSIZ-1] = 0; @@ -369,33 +369,44 @@ static int old_deviceless(struct net *net, void __user *uarg) int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd, struct ifreq *ifr, void __user *uarg) { + int ret = -EOPNOTSUPP; + + rtnl_lock(); + switch (cmd) { case SIOCGIFBR: case SIOCSIFBR: - return old_deviceless(net, uarg); - + ret = old_deviceless(net, uarg); + break; case SIOCBRADDBR: case SIOCBRDELBR: { char buf[IFNAMSIZ]; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + ret = -EPERM; + break; + } - if (copy_from_user(buf, uarg, IFNAMSIZ)) - return -EFAULT; + if (copy_from_user(buf, uarg, IFNAMSIZ)) { + ret = -EFAULT; + break; + } buf[IFNAMSIZ-1] = 0; if (cmd == SIOCBRADDBR) - return br_add_bridge(net, buf); - - return br_del_bridge(net, buf); + ret = br_add_bridge(net, buf); + else + ret = br_del_bridge(net, buf); } - + break; case SIOCBRADDIF: case SIOCBRDELIF: - return add_del_if(br, ifr->ifr_ifindex, cmd == SIOCBRADDIF); - + ret = add_del_if(br, ifr->ifr_ifindex, cmd == SIOCBRADDIF); + break; } - return -EOPNOTSUPP; + + rtnl_unlock(); + + return ret; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c939631428b9..51991f1b3e5a 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -770,7 +770,7 @@ int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev, int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, - const unsigned char *addr, u16 vid, + const unsigned char *addr, u16 vid, bool is_local, bool swdev_notify); int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid, @@ -1880,6 +1880,17 @@ static inline void br_sysfs_delbr(struct net_device *dev) { return; } /* br_switchdev.c */ #ifdef CONFIG_NET_SWITCHDEV +int br_switchdev_port_offload(struct net_bridge_port *p, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack); + +void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb); + bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb); void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb); @@ -1908,6 +1919,24 @@ static inline void br_switchdev_frame_unmark(struct sk_buff *skb) skb->offload_fwd_mark = 0; } #else +static inline int +br_switchdev_port_offload(struct net_bridge_port *p, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline void +br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) +{ +} + static inline bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb) { return false; diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 023de0e958f1..6bf518d78f02 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -134,7 +134,7 @@ br_switchdev_fdb_notify(struct net_bridge *br, .is_local = test_bit(BR_FDB_LOCAL, &fdb->flags), .offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags), }; - struct net_device *dev = info.is_local ? br->dev : dst->dev; + struct net_device *dev = (!dst || info.is_local) ? br->dev : dst->dev; switch (type) { case RTM_DELNEIGH: @@ -312,23 +312,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p, /* Let the bridge know that this port is offloaded, so that it can assign a * switchdev hardware domain to it. */ -int switchdev_bridge_port_offload(struct net_device *brport_dev, - struct net_device *dev, const void *ctx, - struct notifier_block *atomic_nb, - struct notifier_block *blocking_nb, - bool tx_fwd_offload, - struct netlink_ext_ack *extack) +int br_switchdev_port_offload(struct net_bridge_port *p, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) { struct netdev_phys_item_id ppid; - struct net_bridge_port *p; int err; - ASSERT_RTNL(); - - p = br_port_get_rtnl(brport_dev); - if (!p) - return -ENODEV; - err = dev_get_port_parent_id(dev, &ppid, false); if (err) return err; @@ -348,23 +341,12 @@ out_switchdev_del: return err; } -EXPORT_SYMBOL_GPL(switchdev_bridge_port_offload); -void switchdev_bridge_port_unoffload(struct net_device *brport_dev, - const void *ctx, - struct notifier_block *atomic_nb, - struct notifier_block *blocking_nb) +void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) { - struct net_bridge_port *p; - - ASSERT_RTNL(); - - p = br_port_get_rtnl(brport_dev); - if (!p) - return; - nbp_switchdev_unsync_objs(p, ctx, atomic_nb, blocking_nb); nbp_switchdev_del(p); } -EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload); diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h index 12369b604ce9..f6df20808f5e 100644 --- a/net/can/j1939/j1939-priv.h +++ b/net/can/j1939/j1939-priv.h @@ -20,9 +20,12 @@ struct j1939_session; enum j1939_sk_errqueue_type { - J1939_ERRQUEUE_ACK, - J1939_ERRQUEUE_SCHED, - J1939_ERRQUEUE_ABORT, + J1939_ERRQUEUE_TX_ACK, + J1939_ERRQUEUE_TX_SCHED, + J1939_ERRQUEUE_TX_ABORT, + J1939_ERRQUEUE_RX_RTS, + J1939_ERRQUEUE_RX_DPO, + J1939_ERRQUEUE_RX_ABORT, }; /* j1939 devices */ @@ -87,6 +90,7 @@ struct j1939_priv { struct list_head j1939_socks; struct kref rx_kref; + u32 rx_tskey; }; void j1939_ecu_put(struct j1939_ecu *ecu); diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index b904c06ab0cf..6dff4510687a 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -905,20 +905,33 @@ failure: return NULL; } -static size_t j1939_sk_opt_stats_get_size(void) +static size_t j1939_sk_opt_stats_get_size(enum j1939_sk_errqueue_type type) { - return - nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */ - 0; + switch (type) { + case J1939_ERRQUEUE_RX_RTS: + return + nla_total_size(sizeof(u32)) + /* J1939_NLA_TOTAL_SIZE */ + nla_total_size(sizeof(u32)) + /* J1939_NLA_PGN */ + nla_total_size(sizeof(u64)) + /* J1939_NLA_SRC_NAME */ + nla_total_size(sizeof(u64)) + /* J1939_NLA_DEST_NAME */ + nla_total_size(sizeof(u8)) + /* J1939_NLA_SRC_ADDR */ + nla_total_size(sizeof(u8)) + /* J1939_NLA_DEST_ADDR */ + 0; + default: + return + nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */ + 0; + } } static struct sk_buff * -j1939_sk_get_timestamping_opt_stats(struct j1939_session *session) +j1939_sk_get_timestamping_opt_stats(struct j1939_session *session, + enum j1939_sk_errqueue_type type) { struct sk_buff *stats; u32 size; - stats = alloc_skb(j1939_sk_opt_stats_get_size(), GFP_ATOMIC); + stats = alloc_skb(j1939_sk_opt_stats_get_size(type), GFP_ATOMIC); if (!stats) return NULL; @@ -928,32 +941,67 @@ j1939_sk_get_timestamping_opt_stats(struct j1939_session *session) size = min(session->pkt.tx_acked * 7, session->total_message_size); - nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size); + switch (type) { + case J1939_ERRQUEUE_RX_RTS: + nla_put_u32(stats, J1939_NLA_TOTAL_SIZE, + session->total_message_size); + nla_put_u32(stats, J1939_NLA_PGN, + session->skcb.addr.pgn); + nla_put_u64_64bit(stats, J1939_NLA_SRC_NAME, + session->skcb.addr.src_name, J1939_NLA_PAD); + nla_put_u64_64bit(stats, J1939_NLA_DEST_NAME, + session->skcb.addr.dst_name, J1939_NLA_PAD); + nla_put_u8(stats, J1939_NLA_SRC_ADDR, + session->skcb.addr.sa); + nla_put_u8(stats, J1939_NLA_DEST_ADDR, + session->skcb.addr.da); + break; + default: + nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size); + } return stats; } -void j1939_sk_errqueue(struct j1939_session *session, - enum j1939_sk_errqueue_type type) +static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk, + enum j1939_sk_errqueue_type type) { struct j1939_priv *priv = session->priv; - struct sock *sk = session->sk; struct j1939_sock *jsk; struct sock_exterr_skb *serr; struct sk_buff *skb; char *state = "UNK"; int err; - /* currently we have no sk for the RX session */ - if (!sk) - return; - jsk = j1939_sk(sk); if (!(jsk->state & J1939_SOCK_ERRQUEUE)) return; - skb = j1939_sk_get_timestamping_opt_stats(session); + switch (type) { + case J1939_ERRQUEUE_TX_ACK: + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) + return; + break; + case J1939_ERRQUEUE_TX_SCHED: + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) + return; + break; + case J1939_ERRQUEUE_TX_ABORT: + break; + case J1939_ERRQUEUE_RX_RTS: + fallthrough; + case J1939_ERRQUEUE_RX_DPO: + fallthrough; + case J1939_ERRQUEUE_RX_ABORT: + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE)) + return; + break; + default: + netdev_err(priv->ndev, "Unknown errqueue type %i\n", type); + } + + skb = j1939_sk_get_timestamping_opt_stats(session, type); if (!skb) return; @@ -964,36 +1012,42 @@ void j1939_sk_errqueue(struct j1939_session *session, serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); switch (type) { - case J1939_ERRQUEUE_ACK: - if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) { - kfree_skb(skb); - return; - } - + case J1939_ERRQUEUE_TX_ACK: serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; serr->ee.ee_info = SCM_TSTAMP_ACK; - state = "ACK"; + state = "TX ACK"; break; - case J1939_ERRQUEUE_SCHED: - if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) { - kfree_skb(skb); - return; - } - + case J1939_ERRQUEUE_TX_SCHED: serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; serr->ee.ee_info = SCM_TSTAMP_SCHED; - state = "SCH"; + state = "TX SCH"; break; - case J1939_ERRQUEUE_ABORT: + case J1939_ERRQUEUE_TX_ABORT: serr->ee.ee_errno = session->err; serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; serr->ee.ee_info = J1939_EE_INFO_TX_ABORT; - state = "ABT"; + state = "TX ABT"; + break; + case J1939_ERRQUEUE_RX_RTS: + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; + serr->ee.ee_info = J1939_EE_INFO_RX_RTS; + state = "RX RTS"; + break; + case J1939_ERRQUEUE_RX_DPO: + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; + serr->ee.ee_info = J1939_EE_INFO_RX_DPO; + state = "RX DPO"; + break; + case J1939_ERRQUEUE_RX_ABORT: + serr->ee.ee_errno = session->err; + serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; + serr->ee.ee_info = J1939_EE_INFO_RX_ABORT; + state = "RX ABT"; break; - default: - netdev_err(priv->ndev, "Unknown errqueue type %i\n", type); } serr->opt_stats = true; @@ -1008,6 +1062,27 @@ void j1939_sk_errqueue(struct j1939_session *session, kfree_skb(skb); }; +void j1939_sk_errqueue(struct j1939_session *session, + enum j1939_sk_errqueue_type type) +{ + struct j1939_priv *priv = session->priv; + struct j1939_sock *jsk; + + if (session->sk) { + /* send TX notifications to the socket of origin */ + __j1939_sk_errqueue(session, session->sk, type); + return; + } + + /* spread RX notifications to all sockets subscribed to this session */ + spin_lock_bh(&priv->j1939_socks_lock); + list_for_each_entry(jsk, &priv->j1939_socks, list) { + if (j1939_sk_recv_match_one(jsk, &session->skcb)) + __j1939_sk_errqueue(session, &jsk->sk, type); + } + spin_unlock_bh(&priv->j1939_socks_lock); +}; + void j1939_sk_send_loop_abort(struct sock *sk, int err) { sk->sk_err = err; diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index dac70cdd3f41..bb5c4b8979be 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -260,10 +260,14 @@ static void __j1939_session_drop(struct j1939_session *session) static void j1939_session_destroy(struct j1939_session *session) { - if (session->err) - j1939_sk_errqueue(session, J1939_ERRQUEUE_ABORT); - else - j1939_sk_errqueue(session, J1939_ERRQUEUE_ACK); + if (session->transmission) { + if (session->err) + j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ABORT); + else + j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ACK); + } else if (session->err) { + j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); + } netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); @@ -822,7 +826,7 @@ static int j1939_session_tx_dat(struct j1939_session *session) memcpy(&dat[1], &tpdat[offset], len); ret = j1939_tp_tx_dat(session, dat, len + 1); if (ret < 0) { - /* ENOBUS == CAN interface TX queue is full */ + /* ENOBUFS == CAN interface TX queue is full */ if (ret != -ENOBUFS) netdev_alert(priv->ndev, "%s: 0x%p: queue data error: %i\n", @@ -1044,7 +1048,7 @@ static int j1939_simple_txnext(struct j1939_session *session) if (ret) goto out_free; - j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED); + j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_SCHED); j1939_sk_queue_activate_next(session); out_free: @@ -1116,6 +1120,8 @@ static void __j1939_session_cancel(struct j1939_session *session, if (session->sk) j1939_sk_send_loop_abort(session->sk, session->err); + else + j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); } static void j1939_session_cancel(struct j1939_session *session, @@ -1330,6 +1336,8 @@ static void j1939_xtp_rx_abort_one(struct j1939_priv *priv, struct sk_buff *skb, session->err = j1939_xtp_abort_to_errno(priv, abort); if (session->sk) j1939_sk_send_loop_abort(session->sk, session->err); + else + j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); j1939_session_deactivate_activate_next(session); abort_put: @@ -1438,7 +1446,7 @@ j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb) if (session->transmission) { if (session->pkt.tx_acked) j1939_sk_errqueue(session, - J1939_ERRQUEUE_SCHED); + J1939_ERRQUEUE_TX_SCHED); j1939_session_txtimer_cancel(session); j1939_tp_schedule_txtimer(session, 0); } @@ -1630,6 +1638,9 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv, session->pkt.rx = 0; session->pkt.tx = 0; + session->tskey = priv->rx_tskey++; + j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_RTS); + WARN_ON_ONCE(j1939_session_activate(session)); return session; @@ -1752,6 +1763,9 @@ static void j1939_xtp_rx_dpo_one(struct j1939_session *session, session->pkt.dpo = j1939_etp_ctl_to_packet(skb->data); session->last_cmd = dat[0]; j1939_tp_set_rxtimeout(session, 750); + + if (!session->transmission) + j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_DPO); } static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb, diff --git a/net/can/raw.c b/net/can/raw.c index cd5a49380116..7105fa4824e4 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -592,9 +592,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, ro->count = count; out_fil: - if (dev) - dev_put(dev); - + dev_put(dev); release_sock(sk); rtnl_unlock(); @@ -638,9 +636,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, ro->err_mask = err_mask; out_err: - if (dev) - dev_put(dev); - + dev_put(dev); release_sock(sk); rtnl_unlock(); diff --git a/net/core/dev.c b/net/core/dev.c index e5045b628dec..88650791c360 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -676,131 +676,6 @@ void dev_remove_offload(struct packet_offload *po) } EXPORT_SYMBOL(dev_remove_offload); -/****************************************************************************** - * - * Device Boot-time Settings Routines - * - ******************************************************************************/ - -/* Boot time configuration table */ -static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; - -/** - * netdev_boot_setup_add - add new setup entry - * @name: name of the device - * @map: configured settings for the device - * - * Adds new setup entry to the dev_boot_setup list. The function - * returns 0 on error and 1 on success. This is a generic routine to - * all netdevices. - */ -static int netdev_boot_setup_add(char *name, struct ifmap *map) -{ - struct netdev_boot_setup *s; - int i; - - s = dev_boot_setup; - for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { - if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { - memset(s[i].name, 0, sizeof(s[i].name)); - strlcpy(s[i].name, name, IFNAMSIZ); - memcpy(&s[i].map, map, sizeof(s[i].map)); - break; - } - } - - return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; -} - -/** - * netdev_boot_setup_check - check boot time settings - * @dev: the netdevice - * - * Check boot time settings for the device. - * The found settings are set for the device to be used - * later in the device probing. - * Returns 0 if no settings found, 1 if they are. - */ -int netdev_boot_setup_check(struct net_device *dev) -{ - struct netdev_boot_setup *s = dev_boot_setup; - int i; - - for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { - if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && - !strcmp(dev->name, s[i].name)) { - dev->irq = s[i].map.irq; - dev->base_addr = s[i].map.base_addr; - dev->mem_start = s[i].map.mem_start; - dev->mem_end = s[i].map.mem_end; - return 1; - } - } - return 0; -} -EXPORT_SYMBOL(netdev_boot_setup_check); - - -/** - * netdev_boot_base - get address from boot time settings - * @prefix: prefix for network device - * @unit: id for network device - * - * Check boot time settings for the base address of device. - * The found settings are set for the device to be used - * later in the device probing. - * Returns 0 if no settings found. - */ -unsigned long netdev_boot_base(const char *prefix, int unit) -{ - const struct netdev_boot_setup *s = dev_boot_setup; - char name[IFNAMSIZ]; - int i; - - sprintf(name, "%s%d", prefix, unit); - - /* - * If device already registered then return base of 1 - * to indicate not to probe for this interface - */ - if (__dev_get_by_name(&init_net, name)) - return 1; - - for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) - if (!strcmp(name, s[i].name)) - return s[i].map.base_addr; - return 0; -} - -/* - * Saves at boot time configured settings for any netdevice. - */ -int __init netdev_boot_setup(char *str) -{ - int ints[5]; - struct ifmap map; - - str = get_options(str, ARRAY_SIZE(ints), ints); - if (!str || !*str) - return 0; - - /* Save settings */ - memset(&map, 0, sizeof(map)); - if (ints[0] > 0) - map.irq = ints[1]; - if (ints[0] > 1) - map.base_addr = ints[2]; - if (ints[0] > 2) - map.mem_start = ints[3]; - if (ints[0] > 3) - map.mem_end = ints[4]; - - /* Add new entry to the list */ - return netdev_boot_setup_add(str, &map); -} - -__setup("netdev=", netdev_boot_setup); - /******************************************************************************* * * Device Interface Subroutines @@ -956,8 +831,7 @@ struct net_device *dev_get_by_name(struct net *net, const char *name) rcu_read_lock(); dev = dev_get_by_name_rcu(net, name); - if (dev) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); return dev; } @@ -1030,8 +904,7 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex) rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); - if (dev) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); return dev; } @@ -3099,6 +2972,50 @@ EXPORT_SYMBOL(netif_set_real_num_rx_queues); #endif /** + * netif_set_real_num_queues - set actual number of RX and TX queues used + * @dev: Network device + * @txq: Actual number of TX queues + * @rxq: Actual number of RX queues + * + * Set the real number of both TX and RX queues. + * Does nothing if the number of queues is already correct. + */ +int netif_set_real_num_queues(struct net_device *dev, + unsigned int txq, unsigned int rxq) +{ + unsigned int old_rxq = dev->real_num_rx_queues; + int err; + + if (txq < 1 || txq > dev->num_tx_queues || + rxq < 1 || rxq > dev->num_rx_queues) + return -EINVAL; + + /* Start from increases, so the error path only does decreases - + * decreases can't fail. + */ + if (rxq > dev->real_num_rx_queues) { + err = netif_set_real_num_rx_queues(dev, rxq); + if (err) + return err; + } + if (txq > dev->real_num_tx_queues) { + err = netif_set_real_num_tx_queues(dev, txq); + if (err) + goto undo_rx; + } + if (rxq < dev->real_num_rx_queues) + WARN_ON(netif_set_real_num_rx_queues(dev, rxq)); + if (txq < dev->real_num_tx_queues) + WARN_ON(netif_set_real_num_tx_queues(dev, txq)); + + return 0; +undo_rx: + WARN_ON(netif_set_real_num_rx_queues(dev, old_rxq)); + return err; +} +EXPORT_SYMBOL(netif_set_real_num_queues); + +/** * netif_get_num_default_rss_queues - default number of RSS queues * * This routine should set an upper limit on the number of RSS queues @@ -5878,7 +5795,7 @@ static void flush_all_backlogs(void) */ ASSERT_RTNL(); - get_online_cpus(); + cpus_read_lock(); cpumask_clear(&flush_cpus); for_each_online_cpu(cpu) { @@ -5896,7 +5813,7 @@ static void flush_all_backlogs(void) for_each_cpu(cpu, &flush_cpus) flush_work(per_cpu_ptr(&flush_works, cpu)); - put_online_cpus(); + cpus_read_unlock(); } /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 4035bce06bf8..0e87237fd871 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -379,7 +379,14 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, case SIOCBRDELIF: if (!netif_device_present(dev)) return -ENODEV; - return br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL); + if (!netif_is_bridge_master(dev)) + return -EOPNOTSUPP; + dev_hold(dev); + rtnl_unlock(); + err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL); + dev_put(dev); + rtnl_lock(); + return err; case SIOCSHWTSTAMP: err = net_hwtstamp_validate(ifr); diff --git a/net/core/devlink.c b/net/core/devlink.c index 8fa015319af6..b02d54ab59ac 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -804,10 +804,11 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, return 0; } -static int -devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *ops, - struct devlink_port *port, struct sk_buff *msg, - struct netlink_ext_ack *extack, bool *msg_updated) +static int devlink_port_fn_hw_addr_fill(const struct devlink_ops *ops, + struct devlink_port *port, + struct sk_buff *msg, + struct netlink_ext_ack *extack, + bool *msg_updated) { u8 hw_addr[MAX_ADDR_LEN]; int hw_addr_len; @@ -816,7 +817,8 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops * if (!ops->port_function_hw_addr_get) return 0; - err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack); + err = ops->port_function_hw_addr_get(port, hw_addr, &hw_addr_len, + extack); if (err) { if (err == -EOPNOTSUPP) return 0; @@ -893,12 +895,11 @@ devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate) opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED; } -static int -devlink_port_fn_state_fill(struct devlink *devlink, - const struct devlink_ops *ops, - struct devlink_port *port, struct sk_buff *msg, - struct netlink_ext_ack *extack, - bool *msg_updated) +static int devlink_port_fn_state_fill(const struct devlink_ops *ops, + struct devlink_port *port, + struct sk_buff *msg, + struct netlink_ext_ack *extack, + bool *msg_updated) { enum devlink_port_fn_opstate opstate; enum devlink_port_fn_state state; @@ -907,7 +908,7 @@ devlink_port_fn_state_fill(struct devlink *devlink, if (!ops->port_fn_state_get) return 0; - err = ops->port_fn_state_get(devlink, port, &state, &opstate, extack); + err = ops->port_fn_state_get(port, &state, &opstate, extack); if (err) { if (err == -EOPNOTSUPP) return 0; @@ -935,7 +936,6 @@ static int devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port, struct netlink_ext_ack *extack) { - struct devlink *devlink = port->devlink; const struct devlink_ops *ops; struct nlattr *function_attr; bool msg_updated = false; @@ -945,13 +945,12 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por if (!function_attr) return -EMSGSIZE; - ops = devlink->ops; - err = devlink_port_fn_hw_addr_fill(devlink, ops, port, msg, - extack, &msg_updated); + ops = port->devlink->ops; + err = devlink_port_fn_hw_addr_fill(ops, port, msg, extack, + &msg_updated); if (err) goto out; - err = devlink_port_fn_state_fill(devlink, ops, port, msg, extack, - &msg_updated); + err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated); out: if (err || !msg_updated) nla_nest_cancel(msg, function_attr); @@ -1269,31 +1268,33 @@ out: return msg->len; } -static int devlink_port_type_set(struct devlink *devlink, - struct devlink_port *devlink_port, +static int devlink_port_type_set(struct devlink_port *devlink_port, enum devlink_port_type port_type) { int err; - if (devlink->ops->port_type_set) { - if (port_type == devlink_port->type) - return 0; - err = devlink->ops->port_type_set(devlink_port, port_type); - if (err) - return err; - devlink_port->desired_type = port_type; - devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); + if (!devlink_port->devlink->ops->port_type_set) + return -EOPNOTSUPP; + + if (port_type == devlink_port->type) return 0; - } - return -EOPNOTSUPP; + + err = devlink_port->devlink->ops->port_type_set(devlink_port, + port_type); + if (err) + return err; + + devlink_port->desired_type = port_type; + devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); + return 0; } -static int -devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *port, - const struct nlattr *attr, struct netlink_ext_ack *extack) +static int devlink_port_function_hw_addr_set(struct devlink_port *port, + const struct nlattr *attr, + struct netlink_ext_ack *extack) { - const struct devlink_ops *ops; + const struct devlink_ops *ops = port->devlink->ops; const u8 *hw_addr; int hw_addr_len; @@ -1314,17 +1315,16 @@ devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port * } } - ops = devlink->ops; if (!ops->port_function_hw_addr_set) { NL_SET_ERR_MSG_MOD(extack, "Port doesn't support function attributes"); return -EOPNOTSUPP; } - return ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack); + return ops->port_function_hw_addr_set(port, hw_addr, hw_addr_len, + extack); } -static int devlink_port_fn_state_set(struct devlink *devlink, - struct devlink_port *port, +static int devlink_port_fn_state_set(struct devlink_port *port, const struct nlattr *attr, struct netlink_ext_ack *extack) { @@ -1332,18 +1332,18 @@ static int devlink_port_fn_state_set(struct devlink *devlink, const struct devlink_ops *ops; state = nla_get_u8(attr); - ops = devlink->ops; + ops = port->devlink->ops; if (!ops->port_fn_state_set) { NL_SET_ERR_MSG_MOD(extack, "Function does not support state setting"); return -EOPNOTSUPP; } - return ops->port_fn_state_set(devlink, port, state, extack); + return ops->port_fn_state_set(port, state, extack); } -static int -devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, - const struct nlattr *attr, struct netlink_ext_ack *extack) +static int devlink_port_function_set(struct devlink_port *port, + const struct nlattr *attr, + struct netlink_ext_ack *extack) { struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1]; int err; @@ -1357,7 +1357,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]; if (attr) { - err = devlink_port_function_hw_addr_set(devlink, port, attr, extack); + err = devlink_port_function_hw_addr_set(port, attr, extack); if (err) return err; } @@ -1367,7 +1367,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, */ attr = tb[DEVLINK_PORT_FN_ATTR_STATE]; if (attr) - err = devlink_port_fn_state_set(devlink, port, attr, extack); + err = devlink_port_fn_state_set(port, attr, extack); if (!err) devlink_port_notify(port, DEVLINK_CMD_PORT_NEW); @@ -1378,14 +1378,13 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; - struct devlink *devlink = devlink_port->devlink; int err; if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) { enum devlink_port_type port_type; port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]); - err = devlink_port_type_set(devlink, devlink_port, port_type); + err = devlink_port_type_set(devlink_port, port_type); if (err) return err; } @@ -1394,7 +1393,7 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION]; struct netlink_ext_ack *extack = info->extack; - err = devlink_port_function_set(devlink, devlink_port, attr, extack); + err = devlink_port_function_set(devlink_port, attr, extack); if (err) return err; } @@ -8769,24 +8768,26 @@ static bool devlink_reload_actions_valid(const struct devlink_ops *ops) * @ops: ops * @priv_size: size of user private data * @net: net namespace + * @dev: parent device * * Allocate new devlink instance resources, including devlink index * and name. */ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, - size_t priv_size, struct net *net) + size_t priv_size, struct net *net, + struct device *dev) { struct devlink *devlink; - if (WARN_ON(!ops)) - return NULL; - + WARN_ON(!ops || !dev); if (!devlink_reload_actions_valid(ops)) return NULL; devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL); if (!devlink) return NULL; + + devlink->dev = dev; devlink->ops = ops; xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC); write_pnet(&devlink->_net, net); @@ -8811,12 +8812,9 @@ EXPORT_SYMBOL_GPL(devlink_alloc_ns); * devlink_register - Register devlink instance * * @devlink: devlink - * @dev: parent device */ -int devlink_register(struct devlink *devlink, struct device *dev) +int devlink_register(struct devlink *devlink) { - WARN_ON(devlink->dev); - devlink->dev = dev; mutex_lock(&devlink_mutex); list_add_tail(&devlink->list, &devlink_list); devlink_notify(devlink, DEVLINK_CMD_NEW); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index ead2a8aa57b4..49442cae6f69 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -850,8 +850,7 @@ net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata) } hw_metadata->input_dev = metadata->input_dev; - if (hw_metadata->input_dev) - dev_hold(hw_metadata->input_dev); + dev_hold(hw_metadata->input_dev); return hw_metadata; @@ -867,8 +866,7 @@ free_hw_metadata: static void net_dm_hw_metadata_free(const struct devlink_trap_metadata *hw_metadata) { - if (hw_metadata->input_dev) - dev_put(hw_metadata->input_dev); + dev_put(hw_metadata->input_dev); kfree(hw_metadata->fa_cookie); kfree(hw_metadata->trap_name); kfree(hw_metadata->trap_group_name); diff --git a/net/core/dst.c b/net/core/dst.c index fb3bcba87744..497ef9b3fc6a 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -49,8 +49,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, unsigned short flags) { dst->dev = dev; - if (dev) - dev_hold(dev); + dev_hold(dev); dst->ops = ops; dst_init_metrics(dst, dst_default_metrics.metrics, true); dst->expires = 0UL; @@ -118,8 +117,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) if (dst->ops->destroy) dst->ops->destroy(dst); - if (dst->dev) - dev_put(dst->dev); + dev_put(dst->dev); lwtstate_put(dst->lwtstate); diff --git a/net/core/filter.c b/net/core/filter.c index ff62cd39046d..3aca07c44fad 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2180,17 +2180,9 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, skb->tstamp = 0; if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { - struct sk_buff *skb2; - - skb2 = skb_realloc_headroom(skb, hh_len); - if (unlikely(!skb2)) { - kfree_skb(skb); + skb = skb_expand_head(skb, hh_len); + if (!skb) return -ENOMEM; - } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - consume_skb(skb); - skb = skb2; } rcu_read_lock_bh(); @@ -2214,8 +2206,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, } rcu_read_unlock_bh(); if (dst) - IP6_INC_STATS(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); out_drop: kfree_skb(skb); return -ENETDOWN; @@ -2287,17 +2278,9 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, skb->tstamp = 0; if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { - struct sk_buff *skb2; - - skb2 = skb_realloc_headroom(skb, hh_len); - if (unlikely(!skb2)) { - kfree_skb(skb); + skb = skb_expand_head(skb, hh_len); + if (!skb) return -ENOMEM; - } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - consume_skb(skb); - skb = skb2; } rcu_read_lock_bh(); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 53e85c70c6e5..b963d6b02c4f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -741,12 +741,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, write_pnet(&n->net, net); memcpy(n->key, pkey, key_len); n->dev = dev; - if (dev) - dev_hold(dev); + dev_hold(dev); if (tbl->pconstructor && tbl->pconstructor(n)) { - if (dev) - dev_put(dev); + dev_put(dev); kfree(n); n = NULL; goto out; @@ -778,8 +776,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, write_unlock_bh(&tbl->lock); if (tbl->pdestructor) tbl->pdestructor(n); - if (n->dev) - dev_put(n->dev); + dev_put(n->dev); kfree(n); return 0; } @@ -812,8 +809,7 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, n->next = NULL; if (tbl->pdestructor) tbl->pdestructor(n); - if (n->dev) - dev_put(n->dev); + dev_put(n->dev); kfree(n); } return -ENOENT; @@ -1662,8 +1658,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) list_del(&parms->list); parms->dead = 1; write_unlock_bh(&tbl->lock); - if (parms->dev) - dev_put(parms->dev); + dev_put(parms->dev); call_rcu(&parms->rcu_head, neigh_rcu_free_parms); } EXPORT_SYMBOL(neigh_parms_release); @@ -3315,12 +3310,13 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v) struct neigh_statistics *st = v; if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n"); + seq_puts(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n"); return 0; } - seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " - "%08lx %08lx %08lx %08lx %08lx %08lx\n", + seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " + "%08lx %08lx %08lx " + "%08lx %08lx %08lx\n", atomic_read(&tbl->entries), st->allocs, diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 5e4eb45b139c..ac116041b35f 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -24,6 +24,8 @@ #define DEFER_TIME (msecs_to_jiffies(1000)) #define DEFER_WARN_INTERVAL (60 * HZ) +#define BIAS_MAX LONG_MAX + static int page_pool_init(struct page_pool *pool, const struct page_pool_params *params) { @@ -67,6 +69,10 @@ static int page_pool_init(struct page_pool *pool, */ } + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT && + pool->p.flags & PP_FLAG_PAGE_FRAG) + return -EINVAL; + if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) return -ENOMEM; @@ -206,6 +212,19 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page) return true; } +static void page_pool_set_pp_info(struct page_pool *pool, + struct page *page) +{ + page->pp = pool; + page->pp_magic |= PP_SIGNATURE; +} + +static void page_pool_clear_pp_info(struct page *page) +{ + page->pp_magic = 0; + page->pp = NULL; +} + static struct page *__page_pool_alloc_page_order(struct page_pool *pool, gfp_t gfp) { @@ -222,7 +241,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, return NULL; } - page->pp_magic |= PP_SIGNATURE; + page_pool_set_pp_info(pool, page); /* Track how many pages are held 'in-flight' */ pool->pages_state_hold_cnt++; @@ -266,7 +285,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, put_page(page); continue; } - page->pp_magic |= PP_SIGNATURE; + + page_pool_set_pp_info(pool, page); pool->alloc.cache[pool->alloc.count++] = page; /* Track how many pages are held 'in-flight' */ pool->pages_state_hold_cnt++; @@ -345,7 +365,7 @@ void page_pool_release_page(struct page_pool *pool, struct page *page) DMA_ATTR_SKIP_CPU_SYNC); page_pool_set_dma_addr(page, 0); skip_dma_unmap: - page->pp_magic = 0; + page_pool_clear_pp_info(page); /* This may be the last page returned, releasing the pool, so * it is not safe to reference pool afterwards. @@ -405,6 +425,11 @@ static __always_inline struct page * __page_pool_put_page(struct page_pool *pool, struct page *page, unsigned int dma_sync_size, bool allow_direct) { + /* It is not the last user for the page frag case */ + if (pool->p.flags & PP_FLAG_PAGE_FRAG && + page_pool_atomic_sub_frag_count_return(page, 1)) + return NULL; + /* This allocator is optimized for the XDP mode that uses * one-frame-per-page, but have fallbacks that act like the * regular page allocator APIs. @@ -497,6 +522,84 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, } EXPORT_SYMBOL(page_pool_put_page_bulk); +static struct page *page_pool_drain_frag(struct page_pool *pool, + struct page *page) +{ + long drain_count = BIAS_MAX - pool->frag_users; + + /* Some user is still using the page frag */ + if (likely(page_pool_atomic_sub_frag_count_return(page, + drain_count))) + return NULL; + + if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) { + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) + page_pool_dma_sync_for_device(pool, page, -1); + + return page; + } + + page_pool_return_page(pool, page); + return NULL; +} + +static void page_pool_free_frag(struct page_pool *pool) +{ + long drain_count = BIAS_MAX - pool->frag_users; + struct page *page = pool->frag_page; + + pool->frag_page = NULL; + + if (!page || + page_pool_atomic_sub_frag_count_return(page, drain_count)) + return; + + page_pool_return_page(pool, page); +} + +struct page *page_pool_alloc_frag(struct page_pool *pool, + unsigned int *offset, + unsigned int size, gfp_t gfp) +{ + unsigned int max_size = PAGE_SIZE << pool->p.order; + struct page *page = pool->frag_page; + + if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) || + size > max_size)) + return NULL; + + size = ALIGN(size, dma_get_cache_alignment()); + *offset = pool->frag_offset; + + if (page && *offset + size > max_size) { + page = page_pool_drain_frag(pool, page); + if (page) + goto frag_reset; + } + + if (!page) { + page = page_pool_alloc_pages(pool, gfp); + if (unlikely(!page)) { + pool->frag_page = NULL; + return NULL; + } + + pool->frag_page = page; + +frag_reset: + pool->frag_users = 1; + *offset = 0; + pool->frag_offset = size; + page_pool_set_frag_count(page, BIAS_MAX); + return page; + } + + pool->frag_users++; + pool->frag_offset = *offset + size; + return page; +} +EXPORT_SYMBOL(page_pool_alloc_frag); + static void page_pool_empty_ring(struct page_pool *pool) { struct page *page; @@ -602,6 +705,8 @@ void page_pool_destroy(struct page_pool *pool) if (!page_pool_put(pool)) return; + page_pool_free_frag(pool); + if (!page_pool_release(pool)) return; @@ -644,7 +749,6 @@ bool page_pool_return_skb_page(struct page *page) * The page will be returned to the pool here regardless of the * 'flipped' fragment being in use or not. */ - page->pp = NULL; page_pool_put_full_page(pp, page, false); return true; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 7e258d255e90..314f97acf39d 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1190,11 +1190,6 @@ static ssize_t pktgen_if_write(struct file *file, * pktgen_xmit() is called */ pkt_dev->last_ok = 1; - - /* override clone_skb if user passed default value - * at module loading time - */ - pkt_dev->clone_skb = 0; } else if (strcmp(f, "queue_xmit") == 0) { pkt_dev->xmit_mode = M_QUEUE_XMIT; pkt_dev->last_ok = 1; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e79aaf1f7139..7c9d32cfe607 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2257,7 +2257,8 @@ invalid_attr: return -EINVAL; } -static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) +static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) { if (dev) { if (tb[IFLA_ADDRESS] && @@ -2284,7 +2285,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return -EOPNOTSUPP; if (af_ops->validate_link_af) { - err = af_ops->validate_link_af(dev, af); + err = af_ops->validate_link_af(dev, af, extack); if (err < 0) return err; } @@ -2592,7 +2593,7 @@ static int do_setlink(const struct sk_buff *skb, const struct net_device_ops *ops = dev->netdev_ops; int err; - err = validate_linkmsg(dev, tb); + err = validate_linkmsg(dev, tb, extack); if (err < 0) return err; @@ -3290,7 +3291,7 @@ replay: m_ops = master_dev->rtnl_link_ops; } - err = validate_linkmsg(dev, tb); + err = validate_linkmsg(dev, tb, extack); if (err < 0) return err; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fcbd977186b0..9240af2ea8c9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1790,6 +1790,48 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) EXPORT_SYMBOL(skb_realloc_headroom); /** + * skb_expand_head - reallocate header of &sk_buff + * @skb: buffer to reallocate + * @headroom: needed headroom + * + * Unlike skb_realloc_headroom, this one does not allocate a new skb + * if possible; copies skb->sk to new skb as needed + * and frees original skb in case of failures. + * + * It expect increased headroom and generates warning otherwise. + */ + +struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom) +{ + int delta = headroom - skb_headroom(skb); + + if (WARN_ONCE(delta <= 0, + "%s is expecting an increase in the headroom", __func__)) + return skb; + + /* pskb_expand_head() might crash, if skb is shared */ + if (skb_shared(skb)) { + struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + + if (likely(nskb)) { + if (skb->sk) + skb_set_owner_w(nskb, skb->sk); + consume_skb(skb); + } else { + kfree_skb(skb); + } + skb = nskb; + } + if (skb && + pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { + kfree_skb(skb); + skb = NULL; + } + return skb; +} +EXPORT_SYMBOL(skb_expand_head); + +/** * skb_copy_expand - copy and expand sk_buff * @skb: buffer to copy * @newheadroom: new free bytes at head @@ -4327,7 +4369,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags); /* We dont need to clear skbinfo->nr_frags here */ - new_truesize = SKB_TRUESIZE(sizeof(struct sk_buff)); + new_truesize = SKB_DATA_ALIGN(sizeof(struct sk_buff)); delta_truesize = skb->truesize - new_truesize; skb->truesize = new_truesize; NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; diff --git a/net/core/sock.c b/net/core/sock.c index 9671c32e6ef5..aada649e07e8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1358,6 +1358,15 @@ set_sndbuf: ret = sock_bindtoindex_locked(sk, val); break; + case SO_BUF_LOCK: + if (val & ~SOCK_BUF_LOCK_MASK) { + ret = -EINVAL; + break; + } + sk->sk_userlocks = val | (sk->sk_userlocks & + ~SOCK_BUF_LOCK_MASK); + break; + default: ret = -ENOPROTOOPT; break; @@ -1720,6 +1729,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val64 = sock_net(sk)->net_cookie; break; + case SO_BUF_LOCK: + v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK; + break; + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index d1c50a48614b..0ee7d4c0c955 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -521,8 +521,7 @@ int dn_dev_set_default(struct net_device *dev, int force) } spin_unlock(&dndev_lock); - if (old) - dev_put(old); + dev_put(old); return rv; } @@ -536,8 +535,7 @@ static void dn_dev_check_default(struct net_device *dev) } spin_unlock(&dndev_lock); - if (dev) - dev_put(dev); + dev_put(dev); } /* diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 387a7e81dd00..269c029ad74f 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -92,8 +92,7 @@ void dn_fib_free_info(struct dn_fib_info *fi) } change_nexthops(fi) { - if (nh->nh_dev) - dev_put(nh->nh_dev); + dev_put(nh->nh_dev); nh->nh_dev = NULL; } endfor_nexthops(fi); kfree(fi); @@ -389,7 +388,7 @@ link_it: return ofi; } - refcount_inc(&fi->fib_treeref); + refcount_set(&fi->fib_treeref, 1); refcount_set(&fi->fib_clntref, 1); spin_lock(&dn_fib_info_lock); fi->fib_next = dn_fib_info_list; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 729d3de6020d..7e85f2a1ae25 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1026,8 +1026,7 @@ source_ok: if (!fld.daddr) { fld.daddr = fld.saddr; - if (dev_out) - dev_put(dev_out); + dev_put(dev_out); err = -EINVAL; dev_out = init_net.loopback_dev; if (!dev_out->dn_ptr) @@ -1084,8 +1083,7 @@ source_ok: neigh_release(neigh); neigh = NULL; } else { - if (dev_out) - dev_put(dev_out); + dev_put(dev_out); if (dn_dev_islocal(neigh->dev, fld.daddr)) { dev_out = init_net.loopback_dev; res.type = RTN_LOCAL; @@ -1144,8 +1142,7 @@ select_source: if (res.type == RTN_LOCAL) { if (!fld.saddr) fld.saddr = fld.daddr; - if (dev_out) - dev_put(dev_out); + dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); if (!dev_out->dn_ptr) @@ -1168,8 +1165,7 @@ select_source: if (!fld.saddr) fld.saddr = DN_FIB_RES_PREFSRC(res); - if (dev_out) - dev_put(dev_out); + dev_put(dev_out); dev_out = DN_FIB_RES_DEV(res); dev_hold(dev_out); fld.flowidn_oif = dev_out->ifindex; @@ -1222,8 +1218,7 @@ done: neigh_release(neigh); if (free_res) dn_fib_res_put(&res); - if (dev_out) - dev_put(dev_out); + dev_put(dev_out); out: return err; @@ -1503,8 +1498,7 @@ done: if (free_res) dn_fib_res_put(&res); dev_put(in_dev); - if (out_dev) - dev_put(out_dev); + dev_put(out_dev); out: return err; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index bca1b5d66df2..970906eb5b2c 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -138,6 +138,7 @@ config NET_DSA_TAG_LAN9303 config NET_DSA_TAG_SJA1105 tristate "Tag driver for NXP SJA1105 switches" + depends on (NET_DSA_SJA1105 && NET_DSA_SJA1105_PTP) || !NET_DSA_SJA1105 || !NET_DSA_SJA1105_PTP select PACKING help Say Y or M if you want to enable support for tagging frames with the diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index c7fa85fb3086..8150e16aaa55 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -311,6 +311,9 @@ static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst) return NULL; } +/* Assign the default CPU port (the first one in the tree) to all ports of the + * fabric which don't already have one as part of their own switch. + */ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) { struct dsa_port *cpu_dp, *dp; @@ -321,15 +324,48 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) return -EINVAL; } - /* Assign the default CPU port to all ports of the fabric */ - list_for_each_entry(dp, &dst->ports, list) + list_for_each_entry(dp, &dst->ports, list) { + if (dp->cpu_dp) + continue; + if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp)) dp->cpu_dp = cpu_dp; + } return 0; } -static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst) +/* Perform initial assignment of CPU ports to user ports and DSA links in the + * fabric, giving preference to CPU ports local to each switch. Default to + * using the first CPU port in the switch tree if the port does not have a CPU + * port local to this switch. + */ +static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst) +{ + struct dsa_port *cpu_dp, *dp; + + list_for_each_entry(cpu_dp, &dst->ports, list) { + if (!dsa_port_is_cpu(cpu_dp)) + continue; + + list_for_each_entry(dp, &dst->ports, list) { + /* Prefer a local CPU port */ + if (dp->ds != cpu_dp->ds) + continue; + + /* Prefer the first local CPU port found */ + if (dp->cpu_dp) + continue; + + if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp)) + dp->cpu_dp = cpu_dp; + } + } + + return dsa_tree_setup_default_cpu(dst); +} + +static void dsa_tree_teardown_cpu_ports(struct dsa_switch_tree *dst) { struct dsa_port *dp; @@ -710,13 +746,14 @@ static int dsa_switch_setup(struct dsa_switch *ds) /* Add the switch to devlink before calling setup, so that setup can * add dpipe tables */ - ds->devlink = devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv)); + ds->devlink = + devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv), ds->dev); if (!ds->devlink) return -ENOMEM; dl_priv = devlink_priv(ds->devlink); dl_priv->ds = ds; - err = devlink_register(ds->devlink, ds->dev); + err = devlink_register(ds->devlink); if (err) goto free_devlink; @@ -921,13 +958,13 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) if (!complete) return 0; - err = dsa_tree_setup_default_cpu(dst); + err = dsa_tree_setup_cpu_ports(dst); if (err) return err; err = dsa_tree_setup_switches(dst); if (err) - goto teardown_default_cpu; + goto teardown_cpu_ports; err = dsa_tree_setup_master(dst); if (err) @@ -947,8 +984,8 @@ teardown_master: dsa_tree_teardown_master(dst); teardown_switches: dsa_tree_teardown_switches(dst); -teardown_default_cpu: - dsa_tree_teardown_default_cpu(dst); +teardown_cpu_ports: + dsa_tree_teardown_cpu_ports(dst); return err; } @@ -966,7 +1003,7 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dsa_tree_teardown_switches(dst); - dsa_tree_teardown_default_cpu(dst); + dsa_tree_teardown_cpu_ports(dst); list_for_each_entry_safe(dl, next, &dst->rtable, list) { list_del(&dl->list); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index e43c5dc04282..9575cabd3ec3 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -199,7 +199,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, /* port.c */ void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, const struct dsa_device_ops *tag_ops); -int dsa_port_set_state(struct dsa_port *dp, u8 state); +int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age); int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy); int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy); void dsa_port_disable_rt(struct dsa_port *dp); @@ -241,11 +241,9 @@ int dsa_port_host_mdb_del(const struct dsa_port *dp, int dsa_port_pre_bridge_flags(const struct dsa_port *dp, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); -int dsa_port_bridge_flags(const struct dsa_port *dp, +int dsa_port_bridge_flags(struct dsa_port *dp, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); -int dsa_port_mrouter(struct dsa_port *dp, bool mrouter, - struct netlink_ext_ack *extack); int dsa_port_vlan_add(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan, struct netlink_ext_ack *extack); diff --git a/net/dsa/port.c b/net/dsa/port.c index b927d94b6934..831d50d28d59 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -30,7 +30,52 @@ static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v) return dsa_tree_notify(dp->ds->dst, e, v); } -int dsa_port_set_state(struct dsa_port *dp, u8 state) +static void dsa_port_notify_bridge_fdb_flush(const struct dsa_port *dp) +{ + struct net_device *brport_dev = dsa_port_to_bridge_port(dp); + struct switchdev_notifier_fdb_info info = { + /* flush all VLANs */ + .vid = 0, + }; + + /* When the port becomes standalone it has already left the bridge. + * Don't notify the bridge in that case. + */ + if (!brport_dev) + return; + + call_switchdev_notifiers(SWITCHDEV_FDB_FLUSH_TO_BRIDGE, + brport_dev, &info.info, NULL); +} + +static void dsa_port_fast_age(const struct dsa_port *dp) +{ + struct dsa_switch *ds = dp->ds; + + if (!ds->ops->port_fast_age) + return; + + ds->ops->port_fast_age(ds, dp->index); + + dsa_port_notify_bridge_fdb_flush(dp); +} + +static bool dsa_port_can_configure_learning(struct dsa_port *dp) +{ + struct switchdev_brport_flags flags = { + .mask = BR_LEARNING, + }; + struct dsa_switch *ds = dp->ds; + int err; + + if (!ds->ops->port_bridge_flags || !ds->ops->port_pre_bridge_flags) + return false; + + err = ds->ops->port_pre_bridge_flags(ds, dp->index, flags, NULL); + return !err; +} + +int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age) { struct dsa_switch *ds = dp->ds; int port = dp->index; @@ -40,10 +85,14 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state) ds->ops->port_stp_state_set(ds, port, state); - if (ds->ops->port_fast_age) { + if (!dsa_port_can_configure_learning(dp) || + (do_fast_age && dp->learning)) { /* Fast age FDB entries or flush appropriate forwarding database * for the given port, if we are moving it from Learning or * Forwarding state, to Disabled or Blocking or Listening state. + * Ports that were standalone before the STP state change don't + * need to fast age the FDB, since address learning is off in + * standalone mode. */ if ((dp->stp_state == BR_STATE_LEARNING || @@ -51,7 +100,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state) (state == BR_STATE_DISABLED || state == BR_STATE_BLOCKING || state == BR_STATE_LISTENING)) - ds->ops->port_fast_age(ds, port); + dsa_port_fast_age(dp); } dp->stp_state = state; @@ -59,11 +108,12 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state) return 0; } -static void dsa_port_set_state_now(struct dsa_port *dp, u8 state) +static void dsa_port_set_state_now(struct dsa_port *dp, u8 state, + bool do_fast_age) { int err; - err = dsa_port_set_state(dp, state); + err = dsa_port_set_state(dp, state, do_fast_age); if (err) pr_err("DSA: failed to set STP state %u (%d)\n", state, err); } @@ -81,7 +131,7 @@ int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy) } if (!dp->bridge_dev) - dsa_port_set_state_now(dp, BR_STATE_FORWARDING); + dsa_port_set_state_now(dp, BR_STATE_FORWARDING, false); if (dp->pl) phylink_start(dp->pl); @@ -109,7 +159,7 @@ void dsa_port_disable_rt(struct dsa_port *dp) phylink_stop(dp->pl); if (!dp->bridge_dev) - dsa_port_set_state_now(dp, BR_STATE_DISABLED); + dsa_port_set_state_now(dp, BR_STATE_DISABLED, false); if (ds->ops->port_disable) ds->ops->port_disable(ds, port); @@ -178,7 +228,7 @@ static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp, if (err) return err; - err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev)); + err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev), false); if (err && err != -EOPNOTSUPP) return err; @@ -186,10 +236,6 @@ static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp, if (err && err != -EOPNOTSUPP) return err; - err = dsa_port_mrouter(dp->cpu_dp, br_multicast_router(br), extack); - if (err && err != -EOPNOTSUPP) - return err; - err = dsa_port_ageing_time(dp, br_get_ageing_time(br)); if (err && err != -EOPNOTSUPP) return err; @@ -215,16 +261,10 @@ static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp) /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional */ - dsa_port_set_state_now(dp, BR_STATE_FORWARDING); + dsa_port_set_state_now(dp, BR_STATE_FORWARDING, true); /* VLAN filtering is handled by dsa_switch_bridge_leave */ - /* Some drivers treat the notification for having a local multicast - * router by allowing multicast to be flooded to the CPU, so we should - * allow this in standalone mode too. - */ - dsa_port_mrouter(dp->cpu_dp, true, NULL); - /* Ageing time may be global to the switch chip, so don't change it * here because we have no good reason (or value) to change it to. */ @@ -639,27 +679,35 @@ int dsa_port_pre_bridge_flags(const struct dsa_port *dp, return ds->ops->port_pre_bridge_flags(ds, dp->index, flags, extack); } -int dsa_port_bridge_flags(const struct dsa_port *dp, +int dsa_port_bridge_flags(struct dsa_port *dp, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack) { struct dsa_switch *ds = dp->ds; + int err; if (!ds->ops->port_bridge_flags) return -EOPNOTSUPP; - return ds->ops->port_bridge_flags(ds, dp->index, flags, extack); -} + err = ds->ops->port_bridge_flags(ds, dp->index, flags, extack); + if (err) + return err; -int dsa_port_mrouter(struct dsa_port *dp, bool mrouter, - struct netlink_ext_ack *extack) -{ - struct dsa_switch *ds = dp->ds; + if (flags.mask & BR_LEARNING) { + bool learning = flags.val & BR_LEARNING; - if (!ds->ops->port_set_mrouter) - return -EOPNOTSUPP; + if (learning == dp->learning) + return 0; + + if ((dp->learning && !learning) && + (dp->stp_state == BR_STATE_LEARNING || + dp->stp_state == BR_STATE_FORWARDING)) + dsa_port_fast_age(dp); - return ds->ops->port_set_mrouter(ds, dp->index, mrouter, extack); + dp->learning = learning; + } + + return 0; } int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu, diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 6e1135d3ee33..acf73db5cafc 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -286,7 +286,7 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx, if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) return -EOPNOTSUPP; - ret = dsa_port_set_state(dp, attr->u.stp_state); + ret = dsa_port_set_state(dp, attr->u.stp_state, true); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: if (!dsa_port_offloads_bridge(dp, attr->orig_dev)) @@ -314,12 +314,6 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx, ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack); break; - case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER: - if (!dsa_port_offloads_bridge(dp, attr->orig_dev)) - return -EOPNOTSUPP; - - ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, extack); - break; default: ret = -EOPNOTSUPP; break; diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 745c4560b4aa..38b2792f971d 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -549,7 +549,7 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb, { int source_port = -1, switch_id = -1; bool host_only = false; - u16 vid; + u16 vid = 0; if (sja1110_skb_has_inband_control_extension(skb)) { skb = sja1110_rcv_inband_control_extension(skb, &source_port, diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 171ba75b74c9..73fce9467467 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -62,8 +62,6 @@ #include <linux/uaccess.h> #include <net/pkt_sched.h> -__setup("ether=", netdev_boot_setup); - /** * eth_header - create the Ethernet header * @skb: buffer to alter diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index b0fa2b00ad43..81fa36a4c9c4 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -24,6 +24,7 @@ #include <linux/rtnetlink.h> #include <linux/sched/signal.h> #include <linux/net.h> +#include <linux/pm_runtime.h> #include <net/devlink.h> #include <net/xdp_sock_drv.h> #include <net/flow_offload.h> @@ -2692,7 +2693,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr) int rc; netdev_features_t old_features; - if (!dev || !netif_device_present(dev)) + if (!dev) return -ENODEV; if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) @@ -2748,10 +2749,18 @@ int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr) return -EPERM; } + if (dev->dev.parent) + pm_runtime_get_sync(dev->dev.parent); + + if (!netif_device_present(dev)) { + rc = -ENODEV; + goto out; + } + if (dev->ethtool_ops->begin) { rc = dev->ethtool_ops->begin(dev); - if (rc < 0) - return rc; + if (rc < 0) + goto out; } old_features = dev->features; @@ -2970,6 +2979,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr) if (old_features != dev->features) netdev_features_change(dev); +out: + if (dev->dev.parent) + pm_runtime_put(dev->dev.parent); return rc; } diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 73e0f5b626bf..1797a0a90019 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -2,6 +2,7 @@ #include <net/sock.h> #include <linux/ethtool_netlink.h> +#include <linux/pm_runtime.h> #include "netlink.h" static struct genl_family ethtool_genl_family; @@ -29,6 +30,44 @@ const struct nla_policy ethnl_header_policy_stats[] = { ETHTOOL_FLAGS_STATS), }; +int ethnl_ops_begin(struct net_device *dev) +{ + int ret; + + if (!dev) + return -ENODEV; + + if (dev->dev.parent) + pm_runtime_get_sync(dev->dev.parent); + + if (!netif_device_present(dev)) { + ret = -ENODEV; + goto err; + } + + if (dev->ethtool_ops->begin) { + ret = dev->ethtool_ops->begin(dev); + if (ret) + goto err; + } + + return 0; +err: + if (dev->dev.parent) + pm_runtime_put(dev->dev.parent); + + return ret; +} + +void ethnl_ops_complete(struct net_device *dev) +{ + if (dev->ethtool_ops->complete) + dev->ethtool_ops->complete(dev); + + if (dev->dev.parent) + pm_runtime_put(dev->dev.parent); +} + /** * ethnl_parse_header_dev_get() - parse request header * @req_info: structure to put results into @@ -101,12 +140,6 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, return -EINVAL; } - if (dev && !netif_device_present(dev)) { - dev_put(dev); - NL_SET_ERR_MSG(extack, "device not present"); - return -ENODEV; - } - req_info->dev = dev; req_info->flags = flags; return 0; @@ -365,8 +398,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info) ops->cleanup_data(reply_data); genlmsg_end(rskb, reply_payload); - if (req_info->dev) - dev_put(req_info->dev); + dev_put(req_info->dev); kfree(reply_data); kfree(req_info); return genlmsg_reply(rskb, info); @@ -378,8 +410,7 @@ err_cleanup: if (ops->cleanup_data) ops->cleanup_data(reply_data); err_dev: - if (req_info->dev) - dev_put(req_info->dev); + dev_put(req_info->dev); kfree(reply_data); kfree(req_info); return ret; diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 3fc395c86702..077aac3929a8 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -247,19 +247,8 @@ struct ethnl_reply_data { struct net_device *dev; }; -static inline int ethnl_ops_begin(struct net_device *dev) -{ - if (dev && dev->ethtool_ops->begin) - return dev->ethtool_ops->begin(dev); - else - return 0; -} - -static inline void ethnl_ops_complete(struct net_device *dev) -{ - if (dev && dev->ethtool_ops->complete) - dev->ethtool_ops->complete(dev); -} +int ethnl_ops_begin(struct net_device *dev); +void ethnl_ops_complete(struct net_device *dev); /** * struct ethnl_request_ops - unified handling of GET requests diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index 88215b5c93aa..dd5a45f8a78a 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -340,8 +340,7 @@ nla_put_failure: out_dev: wpan_phy_put(phy); out: - if (dev) - dev_put(dev); + dev_put(dev); return rc; } diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 0cf2374c143b..277124f206e0 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -2226,8 +2226,7 @@ static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb, if (ops->internal_flags & NL802154_FLAG_NEED_WPAN_DEV) { struct wpan_dev *wpan_dev = info->user_ptr[1]; - if (wpan_dev->netdev) - dev_put(wpan_dev->netdev); + dev_put(wpan_dev->netdev); } else { dev_put(info->user_ptr[1]); } diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index f5077de3619e..90233efa1f6b 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -41,8 +41,7 @@ ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr) ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr); rcu_read_lock(); dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr); - if (dev) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); break; case IEEE802154_ADDR_SHORT: diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index c82aded8da7d..f4468980b675 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1950,7 +1950,8 @@ static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { }; static int inet_validate_link_af(const struct net_device *dev, - const struct nlattr *nla) + const struct nlattr *nla, + struct netlink_ext_ack *extack) { struct nlattr *a, *tb[IFLA_INET_MAX+1]; int err, rem; @@ -1959,7 +1960,7 @@ static int inet_validate_link_af(const struct net_device *dev, return -EAFNOSUPPORT; err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, - inet_af_policy, NULL); + inet_af_policy, extack); if (err < 0) return err; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f29feb7772da..b42c429cebbe 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -208,9 +208,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) void fib_nh_common_release(struct fib_nh_common *nhc) { - if (nhc->nhc_dev) - dev_put(nhc->nhc_dev); - + dev_put(nhc->nhc_dev); lwtstate_put(nhc->nhc_lwtstate); rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); rt_fibinfo_free(&nhc->nhc_rth_input); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c695d294a5df..8b30cadff708 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1095,8 +1095,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) sizeof(struct in6_addr)) goto send_mal_query; dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev); - if (dev) - dev_hold(dev); + dev_hold(dev); break; #endif default: diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a5f4ecb02e97..7e5072722f05 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2233,7 +2233,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, iml->sfmode, psf->sl_count, psf->sl_addr, 0); RCU_INIT_POINTER(iml->sflist, NULL); /* decrease mem now to avoid the memleak warning */ - atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psf, sl_addr, psf->sl_max), &sk->sk_omem_alloc); kfree_rcu(psf, rcu); return err; } @@ -2382,7 +2382,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct if (psl) count += psl->sl_max; - newpsl = sock_kmalloc(sk, IP_SFLSIZE(count), GFP_KERNEL); + newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count), + GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -2393,7 +2394,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct for (i = 0; i < psl->sl_count; i++) newpsl->sl_addr[i] = psl->sl_addr[i]; /* decrease mem now to avoid the memleak warning */ - atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } rcu_assign_pointer(pmc->sflist, newpsl); @@ -2468,8 +2470,9 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) goto done; } if (msf->imsf_numsrc) { - newpsl = sock_kmalloc(sk, IP_SFLSIZE(msf->imsf_numsrc), - GFP_KERNEL); + newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, + msf->imsf_numsrc), + GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -2480,7 +2483,9 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) err = ip_mc_add_src(in_dev, &msf->imsf_multiaddr, msf->imsf_fmode, newpsl->sl_count, newpsl->sl_addr, 0); if (err) { - sock_kfree_s(sk, newpsl, IP_SFLSIZE(newpsl->sl_max)); + sock_kfree_s(sk, newpsl, + struct_size(newpsl, sl_addr, + newpsl->sl_max)); goto done; } } else { @@ -2493,7 +2498,8 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, psl->sl_count, psl->sl_addr, 0); /* decrease mem now to avoid the memleak warning */ - atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } else (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, @@ -2553,8 +2559,8 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, copycount = count < msf->imsf_numsrc ? count : msf->imsf_numsrc; len = flex_array_size(psl, sl_addr, copycount); msf->imsf_numsrc = count; - if (put_user(struct_size(optval, imsf_slist_flex, copycount), optlen) || - copy_to_user(optval, msf, struct_size(optval, imsf_slist_flex, 0))) { + if (put_user(IP_MSFILTER_SIZE(copycount), optlen) || + copy_to_user(optval, msf, IP_MSFILTER_SIZE(0))) { return -EFAULT; } if (len && diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a202dcec0dc2..6b04a88466b2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -198,19 +198,10 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s } else if (rt->rt_type == RTN_BROADCAST) IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len); - /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { - struct sk_buff *skb2; - - skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); - if (!skb2) { - kfree_skb(skb); + skb = skb_expand_head(skb, hh_len); + if (!skb) return -ENOMEM; - } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - consume_skb(skb); - skb = skb2; } if (lwtunnel_xmit_redirect(dst->lwtstate)) { diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index bbe660b84a91..b297bb28556e 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -667,7 +667,7 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex, struct sockaddr_in *psin; int err, i; - msf = kmalloc(struct_size(msf, imsf_slist_flex, numsrc), GFP_KERNEL); + msf = kmalloc(IP_MSFILTER_SIZE(numsrc), GFP_KERNEL); if (!msf) return -ENOBUFS; @@ -790,7 +790,8 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) goto out_free_gsf; err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc, - gsf->gf_fmode, &gsf->gf_group, gsf->gf_slist); + gsf->gf_fmode, &gsf->gf_group, + gsf->gf_slist_flex); out_free_gsf: kfree(gsf); return err; @@ -799,7 +800,7 @@ out_free_gsf: static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter *gf32; unsigned int n; void *p; @@ -813,7 +814,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, p = kmalloc(optlen + 4, GFP_KERNEL); if (!p) return -ENOMEM; - gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ + gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */ err = -EFAULT; if (copy_from_sockptr(gf32, optval, optlen)) @@ -826,7 +827,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, goto out_free_gsf; err = -EINVAL; - if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) + if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen) goto out_free_gsf; /* numsrc >= (4G-140)/128 overflow in 32 bits */ @@ -834,7 +835,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) goto out_free_gsf; err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode, - &gf32->gf_group, gf32->gf_slist); + &gf32->gf_group, gf32->gf_slist_flex); out_free_gsf: kfree(p); return err; @@ -1228,7 +1229,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname, { struct ip_msfilter *msf; - if (optlen < struct_size(msf, imsf_slist_flex, 0)) + if (optlen < IP_MSFILTER_SIZE(0)) goto e_inval; if (optlen > sysctl_optmem_max) { err = -ENOBUFS; @@ -1246,8 +1247,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname, err = -ENOBUFS; break; } - if (struct_size(msf, imsf_slist_flex, msf->imsf_numsrc) > - optlen) { + if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) { kfree(msf); err = -EINVAL; break; @@ -1456,7 +1456,7 @@ static bool getsockopt_needs_rtnl(int optname) static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval, int __user *optlen, int len) { - const int size0 = offsetof(struct group_filter, gf_slist); + const int size0 = offsetof(struct group_filter, gf_slist_flex); struct group_filter __user *p = optval; struct group_filter gsf; int num; @@ -1468,7 +1468,7 @@ static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval, return -EFAULT; num = gsf.gf_numsrc; - err = ip_mc_gsfget(sk, &gsf, p->gf_slist); + err = ip_mc_gsfget(sk, &gsf, p->gf_slist_flex); if (err) return err; if (gsf.gf_numsrc < num) @@ -1482,7 +1482,7 @@ static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval, static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval, int __user *optlen, int len) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter __user *p = optval; struct compat_group_filter gf32; struct group_filter gf; @@ -1499,7 +1499,7 @@ static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval, num = gf.gf_numsrc = gf32.gf_numsrc; gf.gf_group = gf32.gf_group; - err = ip_mc_gsfget(sk, &gf, p->gf_slist); + err = ip_mc_gsfget(sk, &gf, p->gf_slist_flex); if (err) return err; if (gf.gf_numsrc < num) @@ -1660,12 +1660,11 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, { struct ip_msfilter msf; - if (len < struct_size(&msf, imsf_slist_flex, 0)) { + if (len < IP_MSFILTER_SIZE(0)) { err = -EINVAL; goto out; } - if (copy_from_user(&msf, optval, - struct_size(&msf, imsf_slist_flex, 0))) { + if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) { err = -EFAULT; goto out; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 04754d55b3c1..b181773d7ad3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -276,12 +276,13 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v) struct rt_cache_stat *st = v; if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n"); + seq_puts(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n"); return 0; } - seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " - " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", + seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x " + "%08x %08x %08x %08x %08x %08x " + "%08x %08x %08x %08x\n", dst_entries_get_slow(&ipv4_dst_ops), 0, /* st->in_hit */ st->in_slow_tot, @@ -2812,8 +2813,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or new->output = dst_discard_out; new->dev = net->loopback_dev; - if (new->dev) - dev_hold(new->dev); + dev_hold(new->dev); rt->rt_is_input = ort->rt_is_input; rt->rt_iif = ort->rt_iif; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index e09147ac9a99..fc61cd3fea65 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -298,6 +298,9 @@ int tcp_gro_complete(struct sk_buff *skb) if (th->cwr) skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + if (skb->encapsulation) + skb->inner_transport_header = skb->transport_header; + return 0; } EXPORT_SYMBOL(tcp_gro_complete); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 9dde1e5fb449..1380a6b6f4ff 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -624,6 +624,10 @@ static int udp_gro_complete_segment(struct sk_buff *skb) skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4; + + if (skb->encapsulation) + skb->inner_transport_header = skb->transport_header; + return 0; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index db0a89810f28..8381288a0d6e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -701,8 +701,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, errout: if (in6_dev) in6_dev_put(in6_dev); - if (dev) - dev_put(dev); + dev_put(dev); return err; } @@ -5417,8 +5416,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, errout_ifa: in6_ifa_put(ifa); errout: - if (dev) - dev_put(dev); + dev_put(dev); if (fillargs.netnsid >= 0) put_net(tgt_net); @@ -5792,7 +5790,8 @@ static int check_stable_privacy(struct inet6_dev *idev, struct net *net, } static int inet6_validate_link_af(const struct net_device *dev, - const struct nlattr *nla) + const struct nlattr *nla, + struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_INET6_MAX + 1]; struct inet6_dev *idev = NULL; @@ -5805,7 +5804,7 @@ static int inet6_validate_link_af(const struct net_device *dev, } err = nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, - inet6_af_policy, NULL); + inet6_af_policy, extack); if (err) return err; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index d897faa4e9e6..3a871a09f962 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -55,19 +55,6 @@ #include <linux/uaccess.h> -/* - * Parsing tlv encoded headers. - * - * Parsing function "func" returns true, if parsing succeed - * and false, if it failed. - * It MUST NOT touch skb->h. - */ - -struct tlvtype_proc { - int type; - bool (*func)(struct sk_buff *skb, int offset); -}; - /********************* Generic functions *********************/ @@ -112,16 +99,23 @@ drop: return false; } +static bool ipv6_hop_ra(struct sk_buff *skb, int optoff); +static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff); +static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff); +static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff); +#if IS_ENABLED(CONFIG_IPV6_MIP6) +static bool ipv6_dest_hao(struct sk_buff *skb, int optoff); +#endif + /* Parse tlv encoded option header (hop-by-hop or destination) */ -static bool ip6_parse_tlv(const struct tlvtype_proc *procs, +static bool ip6_parse_tlv(bool hopbyhop, struct sk_buff *skb, int max_count) { int len = (skb_transport_header(skb)[1] + 1) << 3; const unsigned char *nh = skb_network_header(skb); int off = skb_network_header_len(skb); - const struct tlvtype_proc *curr; bool disallow_unknowns = false; int tlv_count = 0; int padlen = 0; @@ -176,20 +170,45 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, if (tlv_count > max_count) goto bad; - for (curr = procs; curr->type >= 0; curr++) { - if (curr->type == nh[off]) { - /* type specific length/alignment - checks will be performed in the - func(). */ - if (curr->func(skb, off) == false) + if (hopbyhop) { + switch (nh[off]) { + case IPV6_TLV_ROUTERALERT: + if (!ipv6_hop_ra(skb, off)) + return false; + break; + case IPV6_TLV_IOAM: + if (!ipv6_hop_ioam(skb, off)) + return false; + break; + case IPV6_TLV_JUMBO: + if (!ipv6_hop_jumbo(skb, off)) + return false; + break; + case IPV6_TLV_CALIPSO: + if (!ipv6_hop_calipso(skb, off)) + return false; + break; + default: + if (!ip6_tlvopt_unknown(skb, off, + disallow_unknowns)) + return false; + break; + } + } else { + switch (nh[off]) { +#if IS_ENABLED(CONFIG_IPV6_MIP6) + case IPV6_TLV_HAO: + if (!ipv6_dest_hao(skb, off)) + return false; + break; +#endif + default: + if (!ip6_tlvopt_unknown(skb, off, + disallow_unknowns)) return false; break; } } - if (curr->type < 0 && - !ip6_tlvopt_unknown(skb, off, disallow_unknowns)) - return false; - padlen = 0; } off += optlen; @@ -267,16 +286,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) } #endif -static const struct tlvtype_proc tlvprocdestopt_lst[] = { -#if IS_ENABLED(CONFIG_IPV6_MIP6) - { - .type = IPV6_TLV_HAO, - .func = ipv6_dest_hao, - }, -#endif - {-1, NULL} -}; - static int ipv6_destopt_rcv(struct sk_buff *skb) { struct inet6_dev *idev = __in6_dev_get(skb->dev); @@ -307,8 +316,7 @@ fail_and_free: dstbuf = opt->dst1; #endif - if (ip6_parse_tlv(tlvprocdestopt_lst, skb, - net->ipv6.sysctl.max_dst_opts_cnt)) { + if (ip6_parse_tlv(false, skb, net->ipv6.sysctl.max_dst_opts_cnt)) { skb->transport_header += extlen; opt = IP6CB(skb); #if IS_ENABLED(CONFIG_IPV6_MIP6) @@ -1051,26 +1059,6 @@ drop: return false; } -static const struct tlvtype_proc tlvprochopopt_lst[] = { - { - .type = IPV6_TLV_ROUTERALERT, - .func = ipv6_hop_ra, - }, - { - .type = IPV6_TLV_IOAM, - .func = ipv6_hop_ioam, - }, - { - .type = IPV6_TLV_JUMBO, - .func = ipv6_hop_jumbo, - }, - { - .type = IPV6_TLV_CALIPSO, - .func = ipv6_hop_calipso, - }, - { -1, } -}; - int ipv6_parse_hopopts(struct sk_buff *skb) { struct inet6_skb_parm *opt = IP6CB(skb); @@ -1096,8 +1084,7 @@ fail_and_free: goto fail_and_free; opt->flags |= IP6SKB_HOPBYHOP; - if (ip6_parse_tlv(tlvprochopopt_lst, skb, - net->ipv6.sysctl.max_hbh_opts_cnt)) { + if (ip6_parse_tlv(true, skb, net->ipv6.sysctl.max_hbh_opts_cnt)) { skb->transport_header += extlen; opt = IP6CB(skb); opt->nhoff = sizeof(struct ipv6hdr); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d72347c75f8b..12f985f43bcc 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -60,46 +60,29 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; + struct inet6_dev *idev = ip6_dst_idev(dst); unsigned int hh_len = LL_RESERVED_SPACE(dev); - int delta = hh_len - skb_headroom(skb); - const struct in6_addr *nexthop; + const struct in6_addr *daddr, *nexthop; + struct ipv6hdr *hdr; struct neighbour *neigh; int ret; /* Be paranoid, rather than too clever. */ - if (unlikely(delta > 0) && dev->header_ops) { - /* pskb_expand_head() might crash, if skb is shared */ - if (skb_shared(skb)) { - struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); - - if (likely(nskb)) { - if (skb->sk) - skb_set_owner_w(nskb, skb->sk); - consume_skb(skb); - } else { - kfree_skb(skb); - } - skb = nskb; - } - if (skb && - pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { - kfree_skb(skb); - skb = NULL; - } + if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { + skb = skb_expand_head(skb, hh_len); if (!skb) { - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); return -ENOMEM; } } - if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { - struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); - + hdr = ipv6_hdr(skb); + daddr = &hdr->daddr; + if (ipv6_addr_is_multicast(daddr)) { if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && ((mroute6_is_socket(net, skb) && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || - ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, - &ipv6_hdr(skb)->saddr))) { + ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); /* Do not check for IFF_ALLMULTI; multicast routing @@ -110,7 +93,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * net, sk, newskb, NULL, newskb->dev, dev_loopback_xmit); - if (ipv6_hdr(skb)->hop_limit == 0) { + if (hdr->hop_limit == 0) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); @@ -119,9 +102,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); - - if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= - IPV6_ADDR_SCOPE_NODELOCAL && + if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL && !(dev->flags & IFF_LOOPBACK)) { kfree_skb(skb); return 0; @@ -136,10 +117,10 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } rcu_read_lock_bh(); - nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); - neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); + nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); + neigh = __ipv6_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) - neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); + neigh = __neigh_create(&nd_tbl, nexthop, dev, false); if (!IS_ERR(neigh)) { sock_confirm_neigh(skb, neigh); ret = neigh_output(neigh, skb, false); @@ -148,7 +129,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } rcu_read_unlock_bh(); - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; } @@ -268,6 +249,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); + struct net_device *dev = dst->dev; + struct inet6_dev *idev = ip6_dst_idev(dst); unsigned int head_room; struct ipv6hdr *hdr; u8 proto = fl6->flowi6_proto; @@ -275,22 +258,16 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, int hlimit = -1; u32 mtu; - head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); + head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev); if (opt) head_room += opt->opt_nflen + opt->opt_flen; - if (unlikely(skb_headroom(skb) < head_room)) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); - if (!skb2) { - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTDISCARDS); - kfree_skb(skb); + if (unlikely(head_room > skb_headroom(skb))) { + skb = skb_expand_head(skb, head_room); + if (!skb) { + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); return -ENOBUFS; } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - consume_skb(skb); - skb = skb2; } if (opt) { @@ -332,8 +309,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { - IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUT, skb->len); + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -346,17 +322,17 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, * we promote our socket to non const */ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, - net, (struct sock *)sk, skb, NULL, dst->dev, + net, (struct sock *)sk, skb, NULL, dev, dst_output); } - skb->dev = dst->dev; + skb->dev = dev; /* ipv6_local_error() does not require socket lock, * we promote our socket to non const */ ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 06b0d2c329b9..36ed9efb8825 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -559,8 +559,7 @@ static int pim6_rcv(struct sk_buff *skb) read_lock(&mrt_lock); if (reg_vif_num >= 0) reg_dev = mrt->vif_table[reg_vif_num].dev; - if (reg_dev) - dev_hold(reg_dev); + dev_hold(reg_dev); read_unlock(&mrt_lock); if (!reg_dev) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a6804a7e34c1..e4bdb09c5586 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -225,7 +225,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) goto out_free_gsf; - ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist); + ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist_flex); out_free_gsf: kfree(gsf); return ret; @@ -234,7 +234,7 @@ out_free_gsf: static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter *gf32; void *p; int ret; @@ -249,7 +249,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (!p) return -ENOMEM; - gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ + gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */ ret = -EFAULT; if (copy_from_sockptr(gf32, optval, optlen)) goto out_free_p; @@ -261,14 +261,14 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, goto out_free_p; ret = -EINVAL; - if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) + if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen) goto out_free_p; ret = ip6_mc_msfilter(sk, &(struct group_filter){ .gf_interface = gf32->gf_interface, .gf_group = gf32->gf_group, .gf_fmode = gf32->gf_fmode, - .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist); + .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist_flex); out_free_p: kfree(p); @@ -1048,7 +1048,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, static int ipv6_get_msfilter(struct sock *sk, void __user *optval, int __user *optlen, int len) { - const int size0 = offsetof(struct group_filter, gf_slist); + const int size0 = offsetof(struct group_filter, gf_slist_flex); struct group_filter __user *p = optval; struct group_filter gsf; int num; @@ -1062,7 +1062,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval, return -EADDRNOTAVAIL; num = gsf.gf_numsrc; lock_sock(sk); - err = ip6_mc_msfget(sk, &gsf, p->gf_slist); + err = ip6_mc_msfget(sk, &gsf, p->gf_slist_flex); if (!err) { if (num > gsf.gf_numsrc) num = gsf.gf_numsrc; @@ -1077,7 +1077,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval, static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval, int __user *optlen) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter __user *p = optval; struct compat_group_filter gf32; struct group_filter gf; @@ -1100,7 +1100,7 @@ static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval, return -EADDRNOTAVAIL; lock_sock(sk); - err = ip6_mc_msfget(sk, &gf, p->gf_slist); + err = ip6_mc_msfget(sk, &gf, p->gf_slist_flex); release_sock(sk); if (err) return err; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 54ec163fbafa..cd951faa2fac 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -447,7 +447,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (psl) count += psl->sl_max; - newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_KERNEL); + newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count), + GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -457,7 +458,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (psl) { for (i = 0; i < psl->sl_count; i++) newpsl->sl_addr[i] = psl->sl_addr[i]; - atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } psl = newpsl; @@ -525,8 +527,9 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, goto done; } if (gsf->gf_numsrc) { - newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc), - GFP_KERNEL); + newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, + gsf->gf_numsrc), + GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -543,7 +546,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, newpsl->sl_count, newpsl->sl_addr, 0); if (err) { mutex_unlock(&idev->mc_lock); - sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max)); + sock_kfree_s(sk, newpsl, struct_size(newpsl, sl_addr, + newpsl->sl_max)); goto done; } mutex_unlock(&idev->mc_lock); @@ -559,7 +563,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, if (psl) { ip6_mc_del_src(idev, group, pmc->sfmode, psl->sl_count, psl->sl_addr, 0); - atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } else { ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); @@ -2607,7 +2612,8 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, psl->sl_count, psl->sl_addr, 0); RCU_INIT_POINTER(iml->sflist, NULL); - atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6b8051106aba..6cf4bb89ca69 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3626,8 +3626,7 @@ out: if (err) { lwtstate_put(fib6_nh->fib_nh_lws); fib6_nh->fib_nh_lws = NULL; - if (dev) - dev_put(dev); + dev_put(dev); } return err; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 44453b35c7b7..18316ee3c692 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1044,7 +1044,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, if (err == 0) { atomic_dec(&iucv->skbs_in_xmit); skb_unlink(skb, &iucv->send_skb_q); - kfree_skb(skb); + consume_skb(skb); } /* this error should never happen since the */ @@ -1293,7 +1293,7 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg, } } - kfree_skb(skb); + consume_skb(skb); if (iucv->transport == AF_IUCV_TRANS_HIPER) { atomic_inc(&iucv->msg_recv); if (atomic_read(&iucv->msg_recv) > iucv->msglimit) { @@ -1756,7 +1756,7 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_unlock_irqrestore(&list->lock, flags); if (this) { - kfree_skb(this); + consume_skb(this); /* wake up any process waiting for sending */ iucv_sock_wake_msglim(sk); } @@ -1903,17 +1903,17 @@ static int afiucv_hs_callback_synack(struct sock *sk, struct sk_buff *skb) { struct iucv_sock *iucv = iucv_sk(sk); - if (!iucv) - goto out; - if (sk->sk_state != IUCV_BOUND) - goto out; + if (!iucv || sk->sk_state != IUCV_BOUND) { + kfree_skb(skb); + return NET_RX_SUCCESS; + } + bh_lock_sock(sk); iucv->msglimit_peer = iucv_trans_hdr(skb)->window; sk->sk_state = IUCV_CONNECTED; sk->sk_state_change(sk); bh_unlock_sock(sk); -out: - kfree_skb(skb); + consume_skb(skb); return NET_RX_SUCCESS; } @@ -1924,16 +1924,16 @@ static int afiucv_hs_callback_synfin(struct sock *sk, struct sk_buff *skb) { struct iucv_sock *iucv = iucv_sk(sk); - if (!iucv) - goto out; - if (sk->sk_state != IUCV_BOUND) - goto out; + if (!iucv || sk->sk_state != IUCV_BOUND) { + kfree_skb(skb); + return NET_RX_SUCCESS; + } + bh_lock_sock(sk); sk->sk_state = IUCV_DISCONN; sk->sk_state_change(sk); bh_unlock_sock(sk); -out: - kfree_skb(skb); + consume_skb(skb); return NET_RX_SUCCESS; } @@ -1945,16 +1945,18 @@ static int afiucv_hs_callback_fin(struct sock *sk, struct sk_buff *skb) struct iucv_sock *iucv = iucv_sk(sk); /* other end of connection closed */ - if (!iucv) - goto out; + if (!iucv) { + kfree_skb(skb); + return NET_RX_SUCCESS; + } + bh_lock_sock(sk); if (sk->sk_state == IUCV_CONNECTED) { sk->sk_state = IUCV_DISCONN; sk->sk_state_change(sk); } bh_unlock_sock(sk); -out: - kfree_skb(skb); + consume_skb(skb); return NET_RX_SUCCESS; } @@ -2107,7 +2109,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, case (AF_IUCV_FLAG_WIN): err = afiucv_hs_callback_win(sk, skb); if (skb->len == sizeof(struct af_iucv_trans_hdr)) { - kfree_skb(skb); + consume_skb(skb); break; } fallthrough; /* and receive non-zero length data */ @@ -2262,21 +2264,11 @@ static struct packet_type iucv_packet_type = { .func = afiucv_hs_rcv, }; -static int afiucv_iucv_init(void) -{ - return pr_iucv->iucv_register(&af_iucv_handler, 0); -} - -static void afiucv_iucv_exit(void) -{ - pr_iucv->iucv_unregister(&af_iucv_handler, 0); -} - static int __init afiucv_init(void) { int err; - if (MACHINE_IS_VM) { + if (MACHINE_IS_VM && IS_ENABLED(CONFIG_IUCV)) { cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err); if (unlikely(err)) { WARN_ON(err); @@ -2284,11 +2276,7 @@ static int __init afiucv_init(void) goto out; } - pr_iucv = try_then_request_module(symbol_get(iucv_if), "iucv"); - if (!pr_iucv) { - printk(KERN_WARNING "iucv_if lookup failed\n"); - memset(&iucv_userid, 0, sizeof(iucv_userid)); - } + pr_iucv = &iucv_if; } else { memset(&iucv_userid, 0, sizeof(iucv_userid)); pr_iucv = NULL; @@ -2302,7 +2290,7 @@ static int __init afiucv_init(void) goto out_proto; if (pr_iucv) { - err = afiucv_iucv_init(); + err = pr_iucv->iucv_register(&af_iucv_handler, 0); if (err) goto out_sock; } @@ -2316,23 +2304,19 @@ static int __init afiucv_init(void) out_notifier: if (pr_iucv) - afiucv_iucv_exit(); + pr_iucv->iucv_unregister(&af_iucv_handler, 0); out_sock: sock_unregister(PF_IUCV); out_proto: proto_unregister(&iucv_proto); out: - if (pr_iucv) - symbol_put(iucv_if); return err; } static void __exit afiucv_exit(void) { - if (pr_iucv) { - afiucv_iucv_exit(); - symbol_put(iucv_if); - } + if (pr_iucv) + pr_iucv->iucv_unregister(&af_iucv_handler, 0); unregister_netdevice_notifier(&afiucv_netdev_notifier); dev_remove_pack(&iucv_packet_type); diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index e6795d5a546a..f3343a8541a5 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -286,19 +286,19 @@ static union iucv_param *iucv_param_irq[NR_CPUS]; */ static inline int __iucv_call_b2f0(int command, union iucv_param *parm) { - register unsigned long reg0 asm ("0"); - register unsigned long reg1 asm ("1"); - int ccode; + int cc; - reg0 = command; - reg1 = (unsigned long)parm; asm volatile( - " .long 0xb2f01000\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode), "=m" (*parm), "+d" (reg0), "+a" (reg1) - : "m" (*parm) : "cc"); - return ccode; + " lgr 0,%[reg0]\n" + " lgr 1,%[reg1]\n" + " .long 0xb2f01000\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=&d" (cc), "+m" (*parm) + : [reg0] "d" ((unsigned long)command), + [reg1] "d" ((unsigned long)parm) + : "cc", "0", "1"); + return cc; } static inline int iucv_call_b2f0(int command, union iucv_param *parm) @@ -319,19 +319,21 @@ static inline int iucv_call_b2f0(int command, union iucv_param *parm) */ static int __iucv_query_maxconn(void *param, unsigned long *max_pathid) { - register unsigned long reg0 asm ("0"); - register unsigned long reg1 asm ("1"); - int ccode; + unsigned long reg1 = (unsigned long)param; + int cc; - reg0 = IUCV_QUERY; - reg1 = (unsigned long) param; asm volatile ( + " lghi 0,%[cmd]\n" + " lgr 1,%[reg1]\n" " .long 0xb2f01000\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode), "+d" (reg0), "+d" (reg1) : : "cc"); + " ipm %[cc]\n" + " srl %[cc],28\n" + " lgr %[reg1],1\n" + : [cc] "=&d" (cc), [reg1] "+&d" (reg1) + : [cmd] "K" (IUCV_QUERY) + : "cc", "0", "1"); *max_pathid = reg1; - return ccode; + return cc; } static int iucv_query_maxconn(void) @@ -500,14 +502,14 @@ static void iucv_setmask_mp(void) { int cpu; - get_online_cpus(); + cpus_read_lock(); for_each_online_cpu(cpu) /* Enable all cpus with a declared buffer. */ if (cpumask_test_cpu(cpu, &iucv_buffer_cpumask) && !cpumask_test_cpu(cpu, &iucv_irq_cpumask)) smp_call_function_single(cpu, iucv_allow_cpu, NULL, 1); - put_online_cpus(); + cpus_read_unlock(); } /** @@ -540,7 +542,7 @@ static int iucv_enable(void) size_t alloc_size; int cpu, rc; - get_online_cpus(); + cpus_read_lock(); rc = -ENOMEM; alloc_size = iucv_max_pathid * sizeof(struct iucv_path); iucv_path_table = kzalloc(alloc_size, GFP_KERNEL); @@ -553,12 +555,12 @@ static int iucv_enable(void) if (cpumask_empty(&iucv_buffer_cpumask)) /* No cpu could declare an iucv buffer. */ goto out; - put_online_cpus(); + cpus_read_unlock(); return 0; out: kfree(iucv_path_table); iucv_path_table = NULL; - put_online_cpus(); + cpus_read_unlock(); return rc; } @@ -571,11 +573,11 @@ out: */ static void iucv_disable(void) { - get_online_cpus(); + cpus_read_lock(); on_each_cpu(iucv_retrieve_cpu, NULL, 1); kfree(iucv_path_table); iucv_path_table = NULL; - put_online_cpus(); + cpus_read_unlock(); } static int iucv_cpu_dead(unsigned int cpu) @@ -784,7 +786,7 @@ static int iucv_reboot_event(struct notifier_block *this, if (cpumask_empty(&iucv_irq_cpumask)) return NOTIFY_DONE; - get_online_cpus(); + cpus_read_lock(); on_each_cpu_mask(&iucv_irq_cpumask, iucv_block_cpu, NULL, 1); preempt_disable(); for (i = 0; i < iucv_max_pathid; i++) { @@ -792,7 +794,7 @@ static int iucv_reboot_event(struct notifier_block *this, iucv_sever_pathid(i, NULL); } preempt_enable(); - put_online_cpus(); + cpus_read_unlock(); iucv_disable(); return NOTIFY_DONE; } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index ac5cadd02cfa..3086f4a6ae68 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -224,8 +224,7 @@ static int llc_ui_release(struct socket *sock) } else { release_sock(sk); } - if (llc->dev) - dev_put(llc->dev); + dev_put(llc->dev); sock_put(sk); llc_sk_free(sk); out: @@ -363,8 +362,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) } else llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd, addr->sllc_mac); - if (llc->dev) - dev_hold(llc->dev); + dev_hold(llc->dev); rcu_read_unlock(); if (!llc->dev) goto out; diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 84f722d31fd7..a9526ac29dff 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -170,7 +170,6 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, /* TODO: expand mctp_skb_cb for header fields? */ struct mctp_hdr *hdr = mctp_hdr(skb); - hdr = mctp_hdr(skb); addr = msg->msg_name; addr->smctp_family = AF_MCTP; addr->smctp_network = cb->net; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index d2591ebf01d9..56263c2c4014 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -27,7 +27,6 @@ struct mptcp_pm_addr_entry { struct mptcp_addr_info addr; u8 flags; int ifindex; - struct rcu_head rcu; struct socket *lsk; }; diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index f92006cec94c..2bfd9f1b8f11 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -251,8 +251,7 @@ static int flow_offload_eth_src(struct net *net, flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8, &val, &mask); - if (dev) - dev_put(dev); + dev_put(dev); return 0; } diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index bbd1209694b8..7f2f69b609d8 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -51,18 +51,14 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) struct nf_hook_state *state = &entry->state; /* Release those devices we held, or Alexey will kill me. */ - if (state->in) - dev_put(state->in); - if (state->out) - dev_put(state->out); + dev_put(state->in); + dev_put(state->out); if (state->sk) sock_put(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (entry->physin) - dev_put(entry->physin); - if (entry->physout) - dev_put(entry->physout); + dev_put(entry->physin); + dev_put(entry->physout); #endif } @@ -95,18 +91,14 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry) { struct nf_hook_state *state = &entry->state; - if (state->in) - dev_hold(state->in); - if (state->out) - dev_hold(state->out); + dev_hold(state->in); + dev_hold(state->out); if (state->sk) sock_hold(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (entry->physin) - dev_hold(entry->physin); - if (entry->physout) - dev_hold(entry->physout); + dev_hold(entry->physin); + dev_hold(entry->physout); #endif } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 2483df0bbd7c..566ba4397ee4 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -492,8 +492,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, netlbl_af4list_audit_addr(audit_buf, 1, (dev != NULL ? dev->name : NULL), addr->s_addr, mask->s_addr); - if (dev != NULL) - dev_put(dev); + dev_put(dev); if (entry != NULL && security_secid_to_secctx(entry->secid, &secctx, &secctx_len) == 0) { @@ -553,8 +552,7 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, netlbl_af6list_audit_addr(audit_buf, 1, (dev != NULL ? dev->name : NULL), addr, mask); - if (dev != NULL) - dev_put(dev); + dev_put(dev); if (entry != NULL && security_secid_to_secctx(entry->secid, &secctx, &secctx_len) == 0) { diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c index a880dd33e901..511819fbfa67 100644 --- a/net/netrom/nr_loopback.c +++ b/net/netrom/nr_loopback.c @@ -59,8 +59,7 @@ static void nr_loopback_timer(struct timer_list *unused) if (dev == NULL || nr_rx_frame(skb, dev) == 0) kfree_skb(skb); - if (dev != NULL) - dev_put(dev); + dev_put(dev); if (!skb_queue_empty(&loopback_queue) && !nr_loopback_running()) mod_timer(&loopback_timer, jiffies + 10); diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index de0456073dc0..ddd5cbd455e3 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -582,8 +582,7 @@ struct net_device *nr_dev_first(void) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; } - if (first) - dev_hold(first); + dev_hold(first); rcu_read_unlock(); return first; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 57a1971f29e5..543365f58e97 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -250,8 +250,7 @@ static struct net_device *packet_cached_dev_get(struct packet_sock *po) rcu_read_lock(); dev = rcu_dereference(po->cached_dev); - if (likely(dev)) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); return dev; @@ -3024,8 +3023,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) out_free: kfree_skb(skb); out_unlock: - if (dev) - dev_put(dev); + dev_put(dev); out: return err; } @@ -3158,8 +3156,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, } } - if (dev) - dev_hold(dev); + dev_hold(dev); proto_curr = po->prot_hook.type; dev_curr = po->prot_hook.dev; @@ -3196,8 +3193,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, packet_cached_dev_assign(po, dev); } } - if (dev_curr) - dev_put(dev_curr); + dev_put(dev_curr); if (proto == 0 || !need_rehook) goto out_unlock; @@ -4109,8 +4105,7 @@ static int packet_notifier(struct notifier_block *this, if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); WRITE_ONCE(po->ifindex, -1); - if (po->prot_hook.dev) - dev_put(po->prot_hook.dev); + dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index ca6ae4c59433..65218b7ce9f9 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -275,8 +275,7 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb, drop: kfree_skb(skb); - if (dev) - dev_put(dev); + dev_put(dev); return err; } EXPORT_SYMBOL(pn_skb_send); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 876d0ae5f9fd..cde671d29d5d 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -122,8 +122,7 @@ struct net_device *phonet_device_get(struct net *net) break; dev = NULL; } - if (dev) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); return dev; } @@ -411,8 +410,7 @@ struct net_device *phonet_route_output(struct net *net, u8 daddr) daddr >>= 2; rcu_read_lock(); dev = rcu_dereference(routes->table[daddr]); - if (dev) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); if (!dev) diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 2599235d592e..71e2caf6ab85 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -379,8 +379,7 @@ static int pn_socket_ioctl(struct socket *sock, unsigned int cmd, saddr = PN_NO_ADDR; release_sock(sk); - if (dev) - dev_put(dev); + dev_put(dev); if (saddr == PN_NO_ADDR) return -EHOSTUNREACH; diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c index fa611678af05..1dc955ca57d3 100644 --- a/net/qrtr/mhi.c +++ b/net/qrtr/mhi.c @@ -15,6 +15,7 @@ struct qrtr_mhi_dev { struct qrtr_endpoint ep; struct mhi_device *mhi_dev; struct device *dev; + struct completion ready; }; /* From MHI to QRTR */ @@ -50,6 +51,10 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb) struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep); int rc; + rc = wait_for_completion_interruptible(&qdev->ready); + if (rc) + goto free_skb; + if (skb->sk) sock_hold(skb->sk); @@ -79,7 +84,7 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, int rc; /* start channels */ - rc = mhi_prepare_for_transfer(mhi_dev); + rc = mhi_prepare_for_transfer(mhi_dev, 0); if (rc) return rc; @@ -96,6 +101,15 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, if (rc) return rc; + /* start channels */ + rc = mhi_prepare_for_transfer(mhi_dev, MHI_CH_INBOUND_ALLOC_BUFS); + if (rc) { + qrtr_endpoint_unregister(&qdev->ep); + dev_set_drvdata(&mhi_dev->dev, NULL); + return rc; + } + + complete_all(&qdev->ready); dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n"); return 0; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 77ee80e3effc..37f51d778728 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -78,8 +78,7 @@ static void tcf_mirred_release(struct tc_action *a) /* last reference to action, no need to lock */ dev = rcu_dereference_protected(m->tcfm_dev, 1); - if (dev) - dev_put(dev); + dev_put(dev); } static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { @@ -180,8 +179,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, mac_header_xmit = dev_is_mac_header_xmit(dev); dev = rcu_replace_pointer(m->tcfm_dev, dev, lockdep_is_held(&m->tcf_lock)); - if (dev) - dev_put(dev); + dev_put(dev); m->tcfm_mac_header_xmit = mac_header_xmit; } goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index d9ac60ffe927..a8dd06c74e31 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -913,7 +913,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, /* seqlock has the same scope of busylock, for NOLOCK qdisc */ spin_lock_init(&sch->seqlock); - lockdep_set_class(&sch->busylock, + lockdep_set_class(&sch->seqlock, dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); seqcount_init(&sch->running); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 07b30d0601d7..9c79374457a0 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1739,8 +1739,6 @@ static void taprio_attach(struct Qdisc *sch) if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; old = dev_graft_qdisc(qdisc->dev_queue, qdisc); - if (ntx < dev->real_num_tx_queues) - qdisc_hash_add(qdisc, false); } else { old = dev_graft_qdisc(qdisc->dev_queue, sch); qdisc_refcount_inc(sch); diff --git a/net/sctp/auth.c b/net/sctp/auth.c index fe74c5f95630..db6b7373d16c 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -857,14 +857,18 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength); cur_key->key = key; - if (replace) { - list_del_init(&shkey->key_list); - sctp_auth_shkey_release(shkey); - if (asoc && asoc->active_key_id == auth_key->sca_keynumber) - sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL); + if (!replace) { + list_add(&cur_key->key_list, sh_keys); + return 0; } + + list_del_init(&shkey->key_list); + sctp_auth_shkey_release(shkey); list_add(&cur_key->key_list, sh_keys); + if (asoc && asoc->active_key_id == auth_key->sca_keynumber) + sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL); + return 0; } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index cd0d7c908b2a..edc8962364f3 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1752,21 +1752,30 @@ out: return rc; } -/* convert the RMB size into the compressed notation - minimum 16K. +#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ +#define SMCR_RMBE_SIZES 5 /* 0 -> 16KB, 1 -> 32KB, .. 5 -> 512KB */ + +/* convert the RMB size into the compressed notation (minimum 16K, see + * SMCD/R_DMBE_SIZES. * In contrast to plain ilog2, this rounds towards the next power of 2, * so the socket application gets at least its desired sndbuf / rcvbuf size. */ -static u8 smc_compress_bufsize(int size) +static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb) { + const unsigned int max_scat = SG_MAX_SINGLE_ALLOC * PAGE_SIZE; u8 compressed; if (size <= SMC_BUF_MIN_SIZE) return 0; - size = (size - 1) >> 14; - compressed = ilog2(size) + 1; - if (compressed >= SMC_RMBE_SIZES) - compressed = SMC_RMBE_SIZES - 1; + size = (size - 1) >> 14; /* convert to 16K multiple */ + compressed = min_t(u8, ilog2(size) + 1, + is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES); + + if (!is_smcd && is_rmb) + /* RMBs are backed by & limited to max size of scatterlists */ + compressed = min_t(u8, compressed, ilog2(max_scat >> 14)); + return compressed; } @@ -1982,17 +1991,12 @@ out: return rc; } -#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ - static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, bool is_dmb, int bufsize) { struct smc_buf_desc *buf_desc; int rc; - if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES) - return ERR_PTR(-EAGAIN); - /* try to alloc a new DMB */ buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); if (!buf_desc) @@ -2041,9 +2045,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) /* use socket send buffer size (w/o overhead) as start value */ sk_buf_size = smc->sk.sk_sndbuf / 2; - for (bufsize_short = smc_compress_bufsize(sk_buf_size); + for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb); bufsize_short >= 0; bufsize_short--) { - if (is_rmb) { lock = &lgr->rmbs_lock; buf_list = &lgr->rmbs[bufsize_short]; @@ -2052,8 +2055,6 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) buf_list = &lgr->sndbufs[bufsize_short]; } bufsize = smc_uncompress_bufsize(bufsize_short); - if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) - continue; /* check for reusable slot in the link group */ buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 7d7ba0320d5a..a8845343d183 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -753,8 +753,7 @@ void smc_ib_ndev_change(struct net_device *ndev, unsigned long event) if (!libdev->ops.get_netdev) continue; lndev = libdev->ops.get_netdev(libdev, i + 1); - if (lndev) - dev_put(lndev); + dev_put(lndev); if (lndev != ndev) continue; if (event == NETDEV_REGISTER) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 6f6d33edb135..4a964e9190b0 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -394,8 +394,7 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, return 0; out_put: - if (ndev) - dev_put(ndev); + dev_put(ndev); return rc; } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 0ae3478561f4..0b2c18efc079 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -809,3 +809,51 @@ int switchdev_handle_port_attr_set(struct net_device *dev, return err; } EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set); + +int switchdev_bridge_port_offload(struct net_device *brport_dev, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + struct switchdev_notifier_brport_info brport_info = { + .brport = { + .dev = dev, + .ctx = ctx, + .atomic_nb = atomic_nb, + .blocking_nb = blocking_nb, + .tx_fwd_offload = tx_fwd_offload, + }, + }; + int err; + + ASSERT_RTNL(); + + err = call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_OFFLOADED, + brport_dev, &brport_info.info, + extack); + return notifier_to_errno(err); +} +EXPORT_SYMBOL_GPL(switchdev_bridge_port_offload); + +void switchdev_bridge_port_unoffload(struct net_device *brport_dev, + const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) +{ + struct switchdev_notifier_brport_info brport_info = { + .brport = { + .ctx = ctx, + .atomic_nb = atomic_nb, + .blocking_nb = blocking_nb, + }, + }; + + ASSERT_RTNL(); + + call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_UNOFFLOADED, + brport_dev, &brport_info.info, + NULL); +} +EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload); diff --git a/net/unix/Kconfig b/net/unix/Kconfig index b6c4282899ec..b7f811216820 100644 --- a/net/unix/Kconfig +++ b/net/unix/Kconfig @@ -25,6 +25,11 @@ config UNIX_SCM depends on UNIX default y +config AF_UNIX_OOB + bool + depends on UNIX + default y + config UNIX_DIAG tristate "UNIX: socket monitoring interface" depends on UNIX diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 256c4e31132e..ec02e70a549b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -503,6 +503,12 @@ static void unix_sock_destructor(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (u->oob_skb) { + kfree_skb(u->oob_skb); + u->oob_skb = NULL; + } +#endif WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(!sk_unhashed(sk)); WARN_ON(sk->sk_socket); @@ -1889,6 +1895,46 @@ out: */ #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) +#if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) +static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other) +{ + struct unix_sock *ousk = unix_sk(other); + struct sk_buff *skb; + int err = 0; + + skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); + + if (!skb) + return err; + + skb_put(skb, 1); + skb->len = 1; + err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); + + if (err) { + kfree_skb(skb); + return err; + } + + unix_state_lock(other); + maybe_add_creds(skb, sock, other); + skb_get(skb); + + if (ousk->oob_skb) + kfree_skb(ousk->oob_skb); + + ousk->oob_skb = skb; + + scm_stat_add(other, skb); + skb_queue_tail(&other->sk_receive_queue, skb); + sk_send_sigurg(other); + unix_state_unlock(other); + other->sk_data_ready(other); + + return err; +} +#endif + static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { @@ -1907,8 +1953,14 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, return err; err = -EOPNOTSUPP; - if (msg->msg_flags&MSG_OOB) - goto out_err; + if (msg->msg_flags & MSG_OOB) { +#if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) + if (len) + len--; + else +#endif + goto out_err; + } if (msg->msg_namelen) { err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; @@ -1973,6 +2025,15 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, sent += size; } +#if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) + if (msg->msg_flags & MSG_OOB) { + err = queue_oob(sock, msg, other); + if (err) + goto out_err; + sent++; + } +#endif + scm_destroy(&scm); return sent; @@ -2358,6 +2419,59 @@ struct unix_stream_read_state { unsigned int splice_flags; }; +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) +static int unix_stream_recv_urg(struct unix_stream_read_state *state) +{ + struct socket *sock = state->socket; + struct sock *sk = sock->sk; + struct unix_sock *u = unix_sk(sk); + int chunk = 1; + + if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) + return -EINVAL; + + chunk = state->recv_actor(u->oob_skb, 0, chunk, state); + if (chunk < 0) + return -EFAULT; + + if (!(state->flags & MSG_PEEK)) { + UNIXCB(u->oob_skb).consumed += 1; + kfree_skb(u->oob_skb); + u->oob_skb = NULL; + } + state->msg->msg_flags |= MSG_OOB; + return 1; +} + +static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, + int flags, int copied) +{ + struct unix_sock *u = unix_sk(sk); + + if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) { + skb_unlink(skb, &sk->sk_receive_queue); + consume_skb(skb); + skb = NULL; + } else { + if (skb == u->oob_skb) { + if (copied) { + skb = NULL; + } else if (sock_flag(sk, SOCK_URGINLINE)) { + if (!(flags & MSG_PEEK)) { + u->oob_skb = NULL; + consume_skb(skb); + } + } else if (!(flags & MSG_PEEK)) { + skb_unlink(skb, &sk->sk_receive_queue); + consume_skb(skb); + skb = skb_peek(&sk->sk_receive_queue); + } + } + } + return skb; +} +#endif + static int unix_stream_read_generic(struct unix_stream_read_state *state, bool freezable) { @@ -2383,6 +2497,15 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, if (unlikely(flags & MSG_OOB)) { err = -EOPNOTSUPP; +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + mutex_lock(&u->iolock); + unix_state_lock(sk); + + err = unix_stream_recv_urg(state); + + unix_state_unlock(sk); + mutex_unlock(&u->iolock); +#endif goto out; } @@ -2411,6 +2534,18 @@ redo: } last = skb = skb_peek(&sk->sk_receive_queue); last_len = last ? last->len : 0; + +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (skb) { + skb = manage_oob(skb, sk, flags, copied); + if (!skb) { + unix_state_unlock(sk); + if (copied) + break; + goto redo; + } + } +#endif again: if (skb == NULL) { if (copied >= target) @@ -2746,6 +2881,20 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCUNIXFILE: err = unix_open_file(sk); break; +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + case SIOCATMARK: + { + struct sk_buff *skb; + struct unix_sock *u = unix_sk(sk); + int answ = 0; + + skb = skb_peek(&sk->sk_receive_queue); + if (skb && skb == u->oob_skb) + answ = 1; + err = put_user(answ, (int __user *)arg); + } + break; +#endif default: err = -ENOIOCTLCMD; break; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 169ba8b72a63..081e7ae93cb1 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1079,6 +1079,9 @@ virtio_transport_recv_connected(struct sock *sk, virtio_transport_recv_enqueue(vsk, pkt); sk->sk_data_ready(sk); return err; + case VIRTIO_VSOCK_OP_CREDIT_REQUEST: + virtio_transport_send_credit_update(vsk); + break; case VIRTIO_VSOCK_OP_CREDIT_UPDATE: sk->sk_write_space(sk); break; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 16c88beea48b..dceed5b5b226 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6527,8 +6527,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) err = rdev_change_station(rdev, dev, mac_addr, ¶ms); out_put_vlan: - if (params.vlan) - dev_put(params.vlan); + dev_put(params.vlan); return err; } @@ -6763,8 +6762,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) err = rdev_add_station(rdev, dev, mac_addr, ¶ms); - if (params.vlan) - dev_put(params.vlan); + dev_put(params.vlan); return err; } @@ -8489,8 +8487,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) goto out_free; nl80211_send_scan_start(rdev, wdev); - if (wdev->netdev) - dev_hold(wdev->netdev); + dev_hold(wdev->netdev); return 0; @@ -14860,9 +14857,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, return -ENETDOWN; } - if (dev) - dev_hold(dev); - + dev_hold(dev); info->user_ptr[0] = rdev; } @@ -14884,8 +14879,7 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb, if (ops->internal_flags & NL80211_FLAG_NEED_WDEV) { struct wireless_dev *wdev = info->user_ptr[1]; - if (wdev->netdev) - dev_put(wdev->netdev); + dev_put(wdev->netdev); } else { dev_put(info->user_ptr[1]); } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 7897b1478c3c..11c68b159324 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -975,8 +975,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, } #endif - if (wdev->netdev) - dev_put(wdev->netdev); + dev_put(wdev->netdev); kfree(rdev->int_scan_req); rdev->int_scan_req = NULL; diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index a20aec9d7393..2bf269390163 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -298,8 +298,16 @@ static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src) len = nlmsg_attrlen(nlh_src, xfrm_msg_min[type]); nla_for_each_attr(nla, attrs, len, remaining) { - int err = xfrm_xlate64_attr(dst, nla); + int err; + switch (type) { + case XFRM_MSG_NEWSPDINFO: + err = xfrm_nla_cpy(dst, nla, nla_len(nla)); + break; + default: + err = xfrm_xlate64_attr(dst, nla); + break; + } if (err) return err; } @@ -341,7 +349,8 @@ static int xfrm_alloc_compat(struct sk_buff *skb, const struct nlmsghdr *nlh_src /* Calculates len of translated 64-bit message. */ static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src, - struct nlattr *attrs[XFRMA_MAX+1]) + struct nlattr *attrs[XFRMA_MAX + 1], + int maxtype) { size_t len = nlmsg_len(src); @@ -358,10 +367,20 @@ static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src, case XFRM_MSG_POLEXPIRE: len += 8; break; + case XFRM_MSG_NEWSPDINFO: + /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */ + return len; default: break; } + /* Unexpected for anything, but XFRM_MSG_NEWSPDINFO, please + * correct both 64=>32-bit and 32=>64-bit translators to copy + * new attributes. + */ + if (WARN_ON_ONCE(maxtype)) + return len; + if (attrs[XFRMA_SA]) len += 4; if (attrs[XFRMA_POLICY]) @@ -440,7 +459,8 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src, struct nlattr *attrs[XFRMA_MAX+1], - size_t size, u8 type, struct netlink_ext_ack *extack) + size_t size, u8 type, int maxtype, + struct netlink_ext_ack *extack) { size_t pos; int i; @@ -520,6 +540,25 @@ static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src, } pos = dst->nlmsg_len; + if (maxtype) { + /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */ + WARN_ON_ONCE(src->nlmsg_type != XFRM_MSG_NEWSPDINFO); + + for (i = 1; i <= maxtype; i++) { + int err; + + if (!attrs[i]) + continue; + + /* just copy - no need for translation */ + err = xfrm_attr_cpy32(dst, &pos, attrs[i], size, + nla_len(attrs[i]), nla_len(attrs[i])); + if (err) + return err; + } + return 0; + } + for (i = 1; i < XFRMA_MAX + 1; i++) { int err; @@ -564,7 +603,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32, if (err < 0) return ERR_PTR(err); - len = xfrm_user_rcv_calculate_len64(h32, attrs); + len = xfrm_user_rcv_calculate_len64(h32, attrs, maxtype); /* The message doesn't need translation */ if (len == nlmsg_len(h32)) return NULL; @@ -574,7 +613,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32, if (!h64) return ERR_PTR(-ENOMEM); - err = xfrm_xlate32(h64, h32, attrs, len, type, extack); + err = xfrm_xlate32(h64, h32, attrs, len, type, maxtype, extack); if (err < 0) { kvfree(h64); return ERR_PTR(err); diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 2e8afe078d61..cb40ff0ff28d 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -241,7 +241,7 @@ static void ipcomp_free_tfms(struct crypto_comp * __percpu *tfms) break; } - WARN_ON(!pos); + WARN_ON(list_entry_is_head(pos, &ipcomp_tfms_list, list)); if (--pos->users) return; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 827d84255021..7f881f5a5897 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -155,7 +155,6 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; static struct kmem_cache *xfrm_dst_cache __ro_after_init; -static __read_mostly seqcount_mutex_t xfrm_policy_hash_generation; static struct rhashtable xfrm_policy_inexact_table; static const struct rhashtable_params xfrm_pol_inexact_params; @@ -585,7 +584,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) return; spin_lock_bh(&net->xfrm.xfrm_policy_lock); - write_seqcount_begin(&xfrm_policy_hash_generation); + write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table, lockdep_is_held(&net->xfrm.xfrm_policy_lock)); @@ -596,7 +595,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst); net->xfrm.policy_bydst[dir].hmask = nhashmask; - write_seqcount_end(&xfrm_policy_hash_generation); + write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); synchronize_rcu(); @@ -1245,7 +1244,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); spin_lock_bh(&net->xfrm.xfrm_policy_lock); - write_seqcount_begin(&xfrm_policy_hash_generation); + write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); /* make sure that we can insert the indirect policies again before * we start with destructive action. @@ -1354,7 +1353,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) out_unlock: __xfrm_policy_inexact_flush(net); - write_seqcount_end(&xfrm_policy_hash_generation); + write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); mutex_unlock(&hash_resize_mutex); @@ -2091,15 +2090,12 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (unlikely(!daddr || !saddr)) return NULL; - retry: - sequence = read_seqcount_begin(&xfrm_policy_hash_generation); rcu_read_lock(); - - chain = policy_hash_direct(net, daddr, saddr, family, dir); - if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) { - rcu_read_unlock(); - goto retry; - } + retry: + do { + sequence = read_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); + chain = policy_hash_direct(net, daddr, saddr, family, dir); + } while (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence)); ret = NULL; hlist_for_each_entry_rcu(pol, chain, bydst) { @@ -2130,15 +2126,11 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } skip_inexact: - if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) { - rcu_read_unlock(); + if (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence)) goto retry; - } - if (ret && !xfrm_pol_hold_rcu(ret)) { - rcu_read_unlock(); + if (ret && !xfrm_pol_hold_rcu(ret)) goto retry; - } fail: rcu_read_unlock(); @@ -4089,6 +4081,7 @@ static int __net_init xfrm_net_init(struct net *net) /* Initialize the per-net locks here */ spin_lock_init(&net->xfrm.xfrm_state_lock); spin_lock_init(&net->xfrm.xfrm_policy_lock); + seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock); mutex_init(&net->xfrm.xfrm_cfg_mutex); rv = xfrm_statistics_init(net); @@ -4133,7 +4126,6 @@ void __init xfrm_init(void) { register_pernet_subsys(&xfrm_net_ops); xfrm_dev_init(); - seqcount_mutex_init(&xfrm_policy_hash_generation, &hash_resize_mutex); xfrm_input_init(); #ifdef CONFIG_XFRM_ESPINTCP diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b47d613409b7..7aff641c717d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2811,6 +2811,16 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, err = link->doit(skb, nlh, attrs); + /* We need to free skb allocated in xfrm_alloc_compat() before + * returning from this function, because consume_skb() won't take + * care of frag_list since netlink destructor sets + * sbk->head to NULL. (see netlink_skb_destructor()) + */ + if (skb_has_frag_list(skb)) { + kfree_skb(skb_shinfo(skb)->frag_list); + skb_shinfo(skb)->frag_list = NULL; + } + err: kvfree(nlh64); return err; diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index c17e48020ec3..8f6b13ae46bf 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -173,39 +173,6 @@ my $mcount_regex; # Find the call site to mcount (return offset) my $mcount_adjust; # Address adjustment to mcount offset my $alignment; # The .align value to use for $mcount_section my $section_type; # Section header plus possible alignment command -my $can_use_local = 0; # If we can use local function references - -# Shut up recordmcount if user has older objcopy -my $quiet_recordmcount = ".tmp_quiet_recordmcount"; -my $print_warning = 1; -$print_warning = 0 if ( -f $quiet_recordmcount); - -## -# check_objcopy - whether objcopy supports --globalize-symbols -# -# --globalize-symbols came out in 2.17, we must test the version -# of objcopy, and if it is less than 2.17, then we can not -# record local functions. -sub check_objcopy -{ - open (IN, "$objcopy --version |") or die "error running $objcopy"; - while (<IN>) { - if (/objcopy.*\s(\d+)\.(\d+)/) { - $can_use_local = 1 if ($1 > 2 || ($1 == 2 && $2 >= 17)); - last; - } - } - close (IN); - - if (!$can_use_local && $print_warning) { - print STDERR "WARNING: could not find objcopy version or version " . - "is less than 2.17.\n" . - "\tLocal function references are disabled.\n"; - open (QUIET, ">$quiet_recordmcount"); - printf QUIET "Disables the warning from recordmcount.pl\n"; - close QUIET; - } -} if ($arch =~ /(x86(_64)?)|(i386)/) { if ($bits == 64) { @@ -434,8 +401,6 @@ if ($filename =~ m,^(.*)(\.\S),) { my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s"; my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o"; -check_objcopy(); - # # Step 1: find all the local (static functions) and weak symbols. # 't' is local, 'w/W' is weak @@ -473,11 +438,6 @@ sub update_funcs # is this function static? If so, note this fact. if (defined $locals{$ref_func}) { - - # only use locals if objcopy supports globalize-symbols - if (!$can_use_local) { - return; - } $convert{$ref_func} = 1; } diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py index 74f8aadfd4cb..7011fbe003ff 100755 --- a/scripts/tracing/draw_functrace.py +++ b/scripts/tracing/draw_functrace.py @@ -17,7 +17,7 @@ Usage: $ cat /sys/kernel/debug/tracing/trace_pipe > ~/raw_trace_func Wait some times but not too much, the script is a bit slow. Break the pipe (Ctrl + Z) - $ scripts/draw_functrace.py < raw_trace_func > draw_functrace + $ scripts/tracing/draw_functrace.py < ~/raw_trace_func > draw_functrace Then you have your drawn trace in draw_functrace """ @@ -103,10 +103,10 @@ def parseLine(line): line = line.strip() if line.startswith("#"): raise CommentLineException - m = re.match("[^]]+?\\] +([0-9.]+): (\\w+) <-(\\w+)", line) + m = re.match("[^]]+?\\] +([a-z.]+) +([0-9.]+): (\\w+) <-(\\w+)", line) if m is None: raise BrokenLineException - return (m.group(1), m.group(2), m.group(3)) + return (m.group(2), m.group(3), m.group(4)) def main(): diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index defc5ef35c66..0ae1b718194a 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -874,7 +874,7 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s) rc = sidtab_init(s); if (rc) { pr_err("SELinux: out of memory on SID table init\n"); - goto out; + return rc; } head = p->ocontexts[OCON_ISID]; @@ -885,7 +885,7 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s) if (sid == SECSID_NULL) { pr_err("SELinux: SID 0 was assigned a context.\n"); sidtab_destroy(s); - goto out; + return -EINVAL; } /* Ignore initial SIDs unused by this kernel. */ @@ -897,12 +897,10 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s) pr_err("SELinux: unable to load initial SID %s.\n", name); sidtab_destroy(s); - goto out; + return rc; } } - rc = 0; -out: - return rc; + return 0; } int policydb_class_isvalid(struct policydb *p, unsigned int class) diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index d208b2af697f..eb15f319aa57 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -653,6 +653,7 @@ enum { IFLA_BOND_AD_ACTOR_SYSTEM, IFLA_BOND_TLB_DYNAMIC_LB, IFLA_BOND_PEER_NOTIF_DELAY, + IFLA_BOND_AD_LACP_ACTIVE, __IFLA_BOND_MAX, }; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 22f8326547eb..bc1f64873c8f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2434,6 +2434,22 @@ static int cs_etm__process_event(struct perf_session *session, return 0; } +static void dump_queued_data(struct cs_etm_auxtrace *etm, + struct perf_record_auxtrace *event) +{ + struct auxtrace_buffer *buf; + unsigned int i; + /* + * Find all buffers with same reference in the queues and dump them. + * This is because the queues can contain multiple entries of the same + * buffer that were split on aux records. + */ + for (i = 0; i < etm->queues.nr_queues; ++i) + list_for_each_entry(buf, &etm->queues.queue_array[i].head, list) + if (buf->reference == event->reference) + cs_etm__dump_event(etm, buf); +} + static int cs_etm__process_auxtrace_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool __maybe_unused) @@ -2466,7 +2482,8 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, cs_etm__dump_event(etm, buffer); auxtrace_buffer__put_data(buffer); } - } + } else if (dump_trace) + dump_queued_data(etm, &event->auxtrace); return 0; } @@ -3042,7 +3059,6 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (dump_trace) { cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); - return 0; } err = cs_etm__synth_events(etm, session); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 72e7f3616157..8af693d9678c 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -192,8 +192,6 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, if (!(prot & PROT_EXEC)) dso__set_loaded(dso); } - - nsinfo__put(dso->nsinfo); dso->nsinfo = nsi; if (build_id__is_defined(bid)) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index a1bd7007a8b4..fc683bc41715 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -742,9 +742,13 @@ struct pmu_events_map *__weak pmu_events_map__find(void) return perf_pmu__find_map(NULL); } -static bool perf_pmu__valid_suffix(char *pmu_name, char *tok) +/* + * Suffix must be in form tok_{digits}, or tok{digits}, or same as pmu_name + * to be valid. + */ +static bool perf_pmu__valid_suffix(const char *pmu_name, char *tok) { - char *p; + const char *p; if (strncmp(pmu_name, tok, strlen(tok))) return false; @@ -753,12 +757,16 @@ static bool perf_pmu__valid_suffix(char *pmu_name, char *tok) if (*p == 0) return true; - if (*p != '_') - return false; + if (*p == '_') + ++p; - ++p; - if (*p == 0 || !isdigit(*p)) - return false; + /* Ensure we end in a number */ + while (1) { + if (!isdigit(*p)) + return false; + if (*(++p) == 0) + break; + } return true; } @@ -789,12 +797,19 @@ bool pmu_uncore_alias_match(const char *pmu_name, const char *name) * match "socket" in "socketX_pmunameY" and then "pmuname" in * "pmunameY". */ - for (; tok; name += strlen(tok), tok = strtok_r(NULL, ",", &tmp)) { + while (1) { + char *next_tok = strtok_r(NULL, ",", &tmp); + name = strstr(name, tok); - if (!name || !perf_pmu__valid_suffix((char *)name, tok)) { + if (!name || + (!next_tok && !perf_pmu__valid_suffix(name, tok))) { res = false; goto out; } + if (!next_tok) + break; + tok = next_tok; + name += strlen(tok); } res = true; diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index fb010a35d61a..da9e8b699e42 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -38,6 +38,7 @@ TARGETS += mount_setattr TARGETS += mqueue TARGETS += nci TARGETS += net +TARGETS += net/af_unix TARGETS += net/forwarding TARGETS += net/mptcp TARGETS += netfilter diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index 412eaee7884a..b66910702c0a 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -117,7 +117,7 @@ #define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1) #define HV_X64_PERF_MONITOR_AVAILABLE BIT(2) #define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3) -#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE BIT(4) +#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4) #define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5) #define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8) #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) @@ -182,4 +182,7 @@ #define HV_STATUS_INVALID_CONNECTION_ID 18 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 +/* hypercall options */ +#define HV_HYPERCALL_FAST_BIT BIT(16) + #endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c index bab10ae787b6..e0b2bb1339b1 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c @@ -215,7 +215,7 @@ int main(void) vcpu_set_hv_cpuid(vm, VCPU_ID); tsc_page_gva = vm_vaddr_alloc_page(vm); - memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize()); + memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize()); TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, "TSC page has to be page aligned\n"); vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva)); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index af27c7e829c1..91d88aaa9899 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -47,6 +47,7 @@ static void do_wrmsr(u32 idx, u64 val) } static int nr_gp; +static int nr_ud; static inline u64 hypercall(u64 control, vm_vaddr_t input_address, vm_vaddr_t output_address) @@ -80,6 +81,12 @@ static void guest_gp_handler(struct ex_regs *regs) regs->rip = (uint64_t)&wrmsr_end; } +static void guest_ud_handler(struct ex_regs *regs) +{ + nr_ud++; + regs->rip += 3; +} + struct msr_data { uint32_t idx; bool available; @@ -90,6 +97,7 @@ struct msr_data { struct hcall_data { uint64_t control; uint64_t expect; + bool ud_expected; }; static void guest_msr(struct msr_data *msr) @@ -117,13 +125,26 @@ static void guest_msr(struct msr_data *msr) static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) { int i = 0; + u64 res, input, output; wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID); wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); while (hcall->control) { - GUEST_ASSERT(hypercall(hcall->control, pgs_gpa, - pgs_gpa + 4096) == hcall->expect); + nr_ud = 0; + if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) { + input = pgs_gpa; + output = pgs_gpa + 4096; + } else { + input = output = 0; + } + + res = hypercall(hcall->control, input, output); + if (hcall->ud_expected) + GUEST_ASSERT(nr_ud == 1); + else + GUEST_ASSERT(res == hcall->expect); + GUEST_SYNC(i++); } @@ -552,8 +573,18 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall recomm.ebx = 0xfff; hcall->expect = HV_STATUS_SUCCESS; break; - case 17: + /* XMM fast hypercall */ + hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT; + hcall->ud_expected = true; + break; + case 18: + feat.edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE; + hcall->ud_expected = false; + hcall->expect = HV_STATUS_SUCCESS; + break; + + case 19: /* END */ hcall->control = 0; break; @@ -625,6 +656,10 @@ int main(void) /* Test hypercalls */ vm = vm_create_default(VCPU_ID, 0, guest_hcall); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); + /* Hypercall input/output */ hcall_page = vm_vaddr_alloc_pages(vm, 2); memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 5b169e915679..4f9f73e7a299 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -38,8 +38,10 @@ TEST_GEN_FILES += reuseaddr_ports_exhausted TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp TEST_GEN_FILES += ipsec TEST_GEN_FILES += ioam6_parser +TEST_GEN_FILES += gro TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls +TEST_GEN_FILES += toeplitz TEST_FILES := settings diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile new file mode 100644 index 000000000000..cfc7f4f97fd1 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -0,0 +1,5 @@ +##TEST_GEN_FILES := test_unix_oob +TEST_PROGS := test_unix_oob +include ../../lib.mk + +all: $(TEST_PROGS) diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c new file mode 100644 index 000000000000..0f3e3763f4f8 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c @@ -0,0 +1,437 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <string.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <errno.h> +#include <netinet/tcp.h> +#include <sys/un.h> +#include <sys/signal.h> +#include <sys/poll.h> + +static int pipefd[2]; +static int signal_recvd; +static pid_t producer_id; +static char sock_name[32]; + +static void sig_hand(int sn, siginfo_t *si, void *p) +{ + signal_recvd = sn; +} + +static int set_sig_handler(int signal) +{ + struct sigaction sa; + + sa.sa_sigaction = sig_hand; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO | SA_RESTART; + + return sigaction(signal, &sa, NULL); +} + +static void set_filemode(int fd, int set) +{ + int flags = fcntl(fd, F_GETFL, 0); + + if (set) + flags &= ~O_NONBLOCK; + else + flags |= O_NONBLOCK; + fcntl(fd, F_SETFL, flags); +} + +static void signal_producer(int fd) +{ + char cmd; + + cmd = 'S'; + write(fd, &cmd, sizeof(cmd)); +} + +static void wait_for_signal(int fd) +{ + char buf[5]; + + read(fd, buf, 5); +} + +static void die(int status) +{ + fflush(NULL); + unlink(sock_name); + kill(producer_id, SIGTERM); + exit(status); +} + +int is_sioctatmark(int fd) +{ + int ans = -1; + + if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) { +#ifdef DEBUG + perror("SIOCATMARK Failed"); +#endif + } + return ans; +} + +void read_oob(int fd, char *c) +{ + + *c = ' '; + if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) { +#ifdef DEBUG + perror("Reading MSG_OOB Failed"); +#endif + } +} + +int read_data(int pfd, char *buf, int size) +{ + int len = 0; + + memset(buf, size, '0'); + len = read(pfd, buf, size); +#ifdef DEBUG + if (len < 0) + perror("read failed"); +#endif + return len; +} + +static void wait_for_data(int pfd, int event) +{ + struct pollfd pfds[1]; + + pfds[0].fd = pfd; + pfds[0].events = event; + poll(pfds, 1, -1); +} + +void producer(struct sockaddr_un *consumer_addr) +{ + int cfd; + char buf[64]; + int i; + + memset(buf, 'x', sizeof(buf)); + cfd = socket(AF_UNIX, SOCK_STREAM, 0); + + wait_for_signal(pipefd[0]); + if (connect(cfd, (struct sockaddr *)consumer_addr, + sizeof(struct sockaddr)) != 0) { + perror("Connect failed"); + kill(0, SIGTERM); + exit(1); + } + + for (i = 0; i < 2; i++) { + /* Test 1: Test for SIGURG and OOB */ + wait_for_signal(pipefd[0]); + memset(buf, 'x', sizeof(buf)); + buf[63] = '@'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + wait_for_signal(pipefd[0]); + + /* Test 2: Test for OOB being overwitten */ + memset(buf, 'x', sizeof(buf)); + buf[63] = '%'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + memset(buf, 'x', sizeof(buf)); + buf[63] = '#'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + wait_for_signal(pipefd[0]); + + /* Test 3: Test for SIOCATMARK */ + memset(buf, 'x', sizeof(buf)); + buf[63] = '@'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + memset(buf, 'x', sizeof(buf)); + buf[63] = '%'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + memset(buf, 'x', sizeof(buf)); + send(cfd, buf, sizeof(buf), 0); + + wait_for_signal(pipefd[0]); + + /* Test 4: Test for 1byte OOB msg */ + memset(buf, 'x', sizeof(buf)); + buf[0] = '@'; + send(cfd, buf, 1, MSG_OOB); + } +} + +int +main(int argc, char **argv) +{ + int lfd, pfd; + struct sockaddr_un consumer_addr, paddr; + socklen_t len = sizeof(consumer_addr); + char buf[1024]; + int on = 0; + char oob; + int flags; + int atmark; + char *tmp_file; + + lfd = socket(AF_UNIX, SOCK_STREAM, 0); + memset(&consumer_addr, 0, sizeof(consumer_addr)); + consumer_addr.sun_family = AF_UNIX; + sprintf(sock_name, "unix_oob_%d", getpid()); + unlink(sock_name); + strcpy(consumer_addr.sun_path, sock_name); + + if ((bind(lfd, (struct sockaddr *)&consumer_addr, + sizeof(consumer_addr))) != 0) { + perror("socket bind failed"); + exit(1); + } + + pipe(pipefd); + + listen(lfd, 1); + + producer_id = fork(); + if (producer_id == 0) { + producer(&consumer_addr); + exit(0); + } + + set_sig_handler(SIGURG); + signal_producer(pipefd[1]); + + pfd = accept(lfd, (struct sockaddr *) &paddr, &len); + fcntl(pfd, F_SETOWN, getpid()); + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 1: + * veriyf that SIGURG is + * delivered and 63 bytes are + * read and oob is '@' + */ + wait_for_data(pfd, POLLIN | POLLPRI); + read_oob(pfd, &oob); + len = read_data(pfd, buf, 1024); + if (!signal_recvd || len != 63 || oob != '@') { + fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 2: + * Verify that the first OOB is over written by + * the 2nd one and the first OOB is returned as + * part of the read, and sigurg is received. + */ + wait_for_data(pfd, POLLIN | POLLPRI); + len = 0; + while (len < 70) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + read_oob(pfd, &oob); + if (!signal_recvd || len != 127 || oob != '#') { + fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 3: + * verify that 2nd oob over writes + * the first one and read breaks at + * oob boundary returning 127 bytes + * and sigurg is received and atmark + * is set. + * oob is '%' and second read returns + * 64 bytes. + */ + len = 0; + wait_for_data(pfd, POLLIN | POLLPRI); + while (len < 150) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + read_oob(pfd, &oob); + + if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) { + fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ", + "atmark %d\n", signal_recvd, len, oob, atmark); + die(1); + } + + signal_recvd = 0; + + len = read_data(pfd, buf, 1024); + if (len != 64) { + fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 4: + * verify that a single byte + * oob message is delivered. + * set non blocking mode and + * check proper error is + * returned and sigurg is + * received and correct + * oob is read. + */ + + set_filemode(pfd, 0); + + wait_for_data(pfd, POLLIN | POLLPRI); + len = read_data(pfd, buf, 1024); + if ((len == -1) && (errno == 11)) + len = 0; + + read_oob(pfd, &oob); + + if (!signal_recvd || len != 0 || oob != '@') { + fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n", + signal_recvd, len, oob); + die(1); + } + + set_filemode(pfd, 1); + + /* Inline Testing */ + + on = 1; + if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) { + perror("SO_OOBINLINE"); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 1 -- Inline: + * Check that SIGURG is + * delivered and 63 bytes are + * read and oob is '@' + */ + + wait_for_data(pfd, POLLIN | POLLPRI); + len = read_data(pfd, buf, 1024); + + if (!signal_recvd || len != 63) { + fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n", + signal_recvd, len); + die(1); + } + + len = read_data(pfd, buf, 1024); + + if (len != 1) { + fprintf(stderr, + "Test 1.1 Inline failed, sigurg %d len %d oob %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 2 -- Inline: + * Verify that the first OOB is over written by + * the 2nd one and read breaks correctly on + * 2nd OOB boundary with the first OOB returned as + * part of the read, and sigurg is delivered and + * siocatmark returns true. + * next read returns one byte, the oob byte + * and siocatmark returns false. + */ + len = 0; + wait_for_data(pfd, POLLIN | POLLPRI); + while (len < 70) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (len != 127 || atmark != 1 || !signal_recvd) { + fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n", + len, atmark); + die(1); + } + + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (len != 1 || buf[0] != '#' || atmark == 1) { + fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n", + len, buf[0], atmark); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 3 -- Inline: + * verify that 2nd oob over writes + * the first one and read breaks at + * oob boundary returning 127 bytes + * and sigurg is received and siocatmark + * is true after the read. + * subsequent read returns 65 bytes + * because of oob which should be '%'. + */ + len = 0; + wait_for_data(pfd, POLLIN | POLLPRI); + while (len < 126) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (!signal_recvd || len != 127 || !atmark) { + fprintf(stderr, + "Test 3 Inline failed, sigurg %d len %d data %c\n", + signal_recvd, len, buf[0]); + die(1); + } + + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (len != 65 || buf[0] != '%' || atmark != 0) { + fprintf(stderr, + "Test 3.1 Inline failed, len %d oob %c atmark %d\n", + len, buf[0], atmark); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 4 -- Inline: + * verify that a single + * byte oob message is delivered + * and read returns one byte, the oob + * byte and sigurg is received + */ + wait_for_data(pfd, POLLIN | POLLPRI); + len = read_data(pfd, buf, 1024); + if (!signal_recvd || len != 1 || buf[0] != '@') { + fprintf(stderr, + "Test 4 Inline failed, signal %d len %d data %c\n", + signal_recvd, len, buf[0]); + die(1); + } + die(0); +} diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c new file mode 100644 index 000000000000..cf37ce86b0fd --- /dev/null +++ b/tools/testing/selftests/net/gro.c @@ -0,0 +1,1095 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This testsuite provides conformance testing for GRO coalescing. + * + * Test cases: + * 1.data + * Data packets of the same size and same header setup with correct + * sequence numbers coalesce. The one exception being the last data + * packet coalesced: it can be smaller than the rest and coalesced + * as long as it is in the same flow. + * 2.ack + * Pure ACK does not coalesce. + * 3.flags + * Specific test cases: no packets with PSH, SYN, URG, RST set will + * be coalesced. + * 4.tcp + * Packets with incorrect checksum, non-consecutive seqno and + * different TCP header options shouldn't coalesce. Nit: given that + * some extension headers have paddings, such as timestamp, headers + * that are padding differently would not be coalesced. + * 5.ip: + * Packets with different (ECN, TTL, TOS) header, ip options or + * ip fragments (ipv6) shouldn't coalesce. + * 6.large: + * Packets larger than GRO_MAX_SIZE packets shouldn't coalesce. + * + * MSS is defined as 4096 - header because if it is too small + * (i.e. 1500 MTU - header), it will result in many packets, + * increasing the "large" test case's flakiness. This is because + * due to time sensitivity in the coalescing window, the receiver + * may not coalesce all of the packets. + * + * Note the timing issue applies to all of the test cases, so some + * flakiness is to be expected. + * + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <getopt.h> +#include <linux/filter.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> + +#define DPORT 8000 +#define SPORT 1500 +#define PAYLOAD_LEN 100 +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#define NUM_PACKETS 4 +#define START_SEQ 100 +#define START_ACK 100 +#define SIP6 "fdaa::2" +#define DIP6 "fdaa::1" +#define SIP4 "192.168.1.200" +#define DIP4 "192.168.1.100" +#define ETH_P_NONE 0 +#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) +#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) +#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) +#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS) +#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) + +static int proto = -1; +static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN]; +static char *testname = "data"; +static char *ifname = "eth0"; +static char *smac = "aa:00:00:00:00:02"; +static char *dmac = "aa:00:00:00:00:01"; +static bool verbose; +static bool tx_socket = true; +static int tcp_offset = -1; +static int total_hdr_len = -1; +static int ethhdr_proto = -1; + +static void vlog(const char *fmt, ...) +{ + va_list args; + + if (verbose) { + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + } +} + +static void setup_sock_filter(int fd) +{ + const int dport_off = tcp_offset + offsetof(struct tcphdr, dest); + const int ethproto_off = offsetof(struct ethhdr, h_proto); + int optlen = 0; + int ipproto_off; + int next_off; + + if (proto == PF_INET) + next_off = offsetof(struct iphdr, protocol); + else + next_off = offsetof(struct ipv6hdr, nexthdr); + ipproto_off = ETH_HLEN + next_off; + + if (strcmp(testname, "ip") == 0) { + if (proto == PF_INET) + optlen = sizeof(struct ip_timestamp); + else + optlen = sizeof(struct ip6_frag); + } + + struct sock_filter filter[] = { + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off + optlen), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1), + BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF), + BPF_STMT(BPF_RET + BPF_K, 0), + }; + + struct sock_fprog bpf = { + .len = ARRAY_SIZE(filter), + .filter = filter, + }; + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0) + error(1, errno, "error setting filter"); +} + +static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum) +{ + uint16_t *words = data; + int i; + + for (i = 0; i < len / 2; i++) + sum += words[i]; + if (len & 1) + sum += ((char *)data)[len - 1]; + return sum; +} + +static uint16_t checksum_fold(void *data, size_t len, uint32_t sum) +{ + sum = checksum_nofold(data, len, sum); + while (sum > 0xFFFF) + sum = (sum & 0xFFFF) + (sum >> 16); + return ~sum; +} + +static uint16_t tcp_checksum(void *buf, int payload_len) +{ + struct pseudo_header6 { + struct in6_addr saddr; + struct in6_addr daddr; + uint16_t protocol; + uint16_t payload_len; + } ph6; + struct pseudo_header4 { + struct in_addr saddr; + struct in_addr daddr; + uint16_t protocol; + uint16_t payload_len; + } ph4; + uint32_t sum = 0; + + if (proto == PF_INET6) { + if (inet_pton(AF_INET6, SIP6, &ph6.saddr) != 1) + error(1, errno, "inet_pton6 source ip pseudo"); + if (inet_pton(AF_INET6, DIP6, &ph6.daddr) != 1) + error(1, errno, "inet_pton6 dest ip pseudo"); + ph6.protocol = htons(IPPROTO_TCP); + ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len); + + sum = checksum_nofold(&ph6, sizeof(ph6), 0); + } else if (proto == PF_INET) { + if (inet_pton(AF_INET, SIP4, &ph4.saddr) != 1) + error(1, errno, "inet_pton source ip pseudo"); + if (inet_pton(AF_INET, DIP4, &ph4.daddr) != 1) + error(1, errno, "inet_pton dest ip pseudo"); + ph4.protocol = htons(IPPROTO_TCP); + ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len); + + sum = checksum_nofold(&ph4, sizeof(ph4), 0); + } + + return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum); +} + +static void read_MAC(uint8_t *mac_addr, char *mac) +{ + if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", + &mac_addr[0], &mac_addr[1], &mac_addr[2], + &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6) + error(1, 0, "sscanf"); +} + +static void fill_datalinklayer(void *buf) +{ + struct ethhdr *eth = buf; + + memcpy(eth->h_dest, dst_mac, ETH_ALEN); + memcpy(eth->h_source, src_mac, ETH_ALEN); + eth->h_proto = ethhdr_proto; +} + +static void fill_networklayer(void *buf, int payload_len) +{ + struct ipv6hdr *ip6h = buf; + struct iphdr *iph = buf; + + if (proto == PF_INET6) { + memset(ip6h, 0, sizeof(*ip6h)); + + ip6h->version = 6; + ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len); + ip6h->nexthdr = IPPROTO_TCP; + ip6h->hop_limit = 8; + if (inet_pton(AF_INET6, SIP6, &ip6h->saddr) != 1) + error(1, errno, "inet_pton source ip6"); + if (inet_pton(AF_INET6, DIP6, &ip6h->daddr) != 1) + error(1, errno, "inet_pton dest ip6"); + } else if (proto == PF_INET) { + memset(iph, 0, sizeof(*iph)); + + iph->version = 4; + iph->ihl = 5; + iph->ttl = 8; + iph->protocol = IPPROTO_TCP; + iph->tot_len = htons(sizeof(struct tcphdr) + + payload_len + sizeof(struct iphdr)); + iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */ + if (inet_pton(AF_INET, SIP4, &iph->saddr) != 1) + error(1, errno, "inet_pton source ip"); + if (inet_pton(AF_INET, DIP4, &iph->daddr) != 1) + error(1, errno, "inet_pton dest ip"); + iph->check = checksum_fold(buf, sizeof(struct iphdr), 0); + } +} + +static void fill_transportlayer(void *buf, int seq_offset, int ack_offset, + int payload_len, int fin) +{ + struct tcphdr *tcph = buf; + + memset(tcph, 0, sizeof(*tcph)); + + tcph->source = htons(SPORT); + tcph->dest = htons(DPORT); + tcph->seq = ntohl(START_SEQ + seq_offset); + tcph->ack_seq = ntohl(START_ACK + ack_offset); + tcph->ack = 1; + tcph->fin = fin; + tcph->doff = 5; + tcph->window = htons(TCP_MAXWIN); + tcph->urg_ptr = 0; + tcph->check = tcp_checksum(tcph, payload_len); +} + +static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr) +{ + int ret = -1; + + ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr)); + if (ret == -1) + error(1, errno, "sendto failure"); + if (ret != len) + error(1, errno, "sendto wrong length"); +} + +static void create_packet(void *buf, int seq_offset, int ack_offset, + int payload_len, int fin) +{ + memset(buf, 0, total_hdr_len); + memset(buf + total_hdr_len, 'a', payload_len); + fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset, + payload_len, fin); + fill_networklayer(buf + ETH_HLEN, payload_len); + fill_datalinklayer(buf); +} + +/* send one extra flag, not first and not last pkt */ +static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn, + int rst, int urg) +{ + static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + int payload_len, pkt_size, flag, i; + struct tcphdr *tcph; + + payload_len = PAYLOAD_LEN * psh; + pkt_size = total_hdr_len + payload_len; + flag = NUM_PACKETS / 2; + + create_packet(flag_buf, flag * payload_len, 0, payload_len, 0); + + tcph = (struct tcphdr *)(flag_buf + tcp_offset); + tcph->psh = psh; + tcph->syn = syn; + tcph->rst = rst; + tcph->urg = urg; + tcph->check = 0; + tcph->check = tcp_checksum(tcph, payload_len); + + for (i = 0; i < NUM_PACKETS + 1; i++) { + if (i == flag) { + write_packet(fd, flag_buf, pkt_size, daddr); + continue; + } + create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); + } +} + +/* Test for data of same length, smaller than previous + * and of different lengths + */ +static void send_data_pkts(int fd, struct sockaddr_ll *daddr, + int payload_len1, int payload_len2) +{ + static char buf[ETH_HLEN + IP_MAXPACKET]; + + create_packet(buf, 0, 0, payload_len1, 0); + write_packet(fd, buf, total_hdr_len + payload_len1, daddr); + create_packet(buf, payload_len1, 0, payload_len2, 0); + write_packet(fd, buf, total_hdr_len + payload_len2, daddr); +} + +/* If incoming segments make tracked segment length exceed + * legal IP datagram length, do not coalesce + */ +static void send_large(int fd, struct sockaddr_ll *daddr, int remainder) +{ + static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS]; + static char last[TOTAL_HDR_LEN + MSS]; + static char new_seg[TOTAL_HDR_LEN + MSS]; + int i; + + for (i = 0; i < NUM_LARGE_PKT; i++) + create_packet(pkts[i], i * MSS, 0, MSS, 0); + create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0); + create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0); + + for (i = 0; i < NUM_LARGE_PKT; i++) + write_packet(fd, pkts[i], total_hdr_len + MSS, daddr); + write_packet(fd, last, total_hdr_len + remainder, daddr); + write_packet(fd, new_seg, total_hdr_len + remainder, daddr); +} + +/* Pure acks and dup acks don't coalesce */ +static void send_ack(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN]; + + create_packet(buf, 0, 0, 0, 0); + write_packet(fd, buf, total_hdr_len, daddr); + write_packet(fd, buf, total_hdr_len, daddr); + create_packet(buf, 0, 1, 0, 0); + write_packet(fd, buf, total_hdr_len, daddr); +} + +static void recompute_packet(char *buf, char *no_ext, int extlen) +{ + struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset); + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + + memmove(buf, no_ext, total_hdr_len); + memmove(buf + total_hdr_len + extlen, + no_ext + total_hdr_len, PAYLOAD_LEN); + + tcphdr->doff = tcphdr->doff + (extlen / 4); + tcphdr->check = 0; + tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen); + if (proto == PF_INET) { + iph->tot_len = htons(ntohs(iph->tot_len) + extlen); + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } else { + ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); + } +} + +static void tcp_write_options(char *buf, int kind, int ts) +{ + struct tcp_option_ts { + uint8_t kind; + uint8_t len; + uint32_t tsval; + uint32_t tsecr; + } *opt_ts = (void *)buf; + struct tcp_option_window { + uint8_t kind; + uint8_t len; + uint8_t shift; + } *opt_window = (void *)buf; + + switch (kind) { + case TCPOPT_NOP: + buf[0] = TCPOPT_NOP; + break; + case TCPOPT_WINDOW: + memset(opt_window, 0, sizeof(struct tcp_option_window)); + opt_window->kind = TCPOPT_WINDOW; + opt_window->len = TCPOLEN_WINDOW; + opt_window->shift = 0; + break; + case TCPOPT_TIMESTAMP: + memset(opt_ts, 0, sizeof(struct tcp_option_ts)); + opt_ts->kind = TCPOPT_TIMESTAMP; + opt_ts->len = TCPOLEN_TIMESTAMP; + opt_ts->tsval = ts; + opt_ts->tsecr = 0; + break; + default: + error(1, 0, "unimplemented TCP option"); + break; + } +} + +/* TCP with options is always a permutation of {TS, NOP, NOP}. + * Implement different orders to verify coalescing stops. + */ +static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order) +{ + switch (order) { + case 0: + tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */, + TCPOPT_TIMESTAMP, ts); + break; + case 1: + tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + 1, + TCPOPT_TIMESTAMP, ts); + tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP, + TCPOPT_NOP, 0); + break; + case 2: + tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts); + tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1, + TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2, + TCPOPT_NOP, 0); + break; + default: + error(1, 0, "unknown order"); + break; + } + recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA); +} + +/* Packets with invalid checksum don't coalesce. */ +static void send_changed_checksum(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset); + int pkt_size = total_hdr_len + PAYLOAD_LEN; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + tcph->check = tcph->check - 1; + write_packet(fd, buf, pkt_size, daddr); +} + + /* Packets with non-consecutive sequence number don't coalesce.*/ +static void send_changed_seq(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset); + int pkt_size = total_hdr_len + PAYLOAD_LEN; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + tcph->seq = ntohl(htonl(tcph->seq) + 1); + tcph->check = 0; + tcph->check = tcp_checksum(tcph, PAYLOAD_LEN); + write_packet(fd, buf, pkt_size, daddr); +} + + /* Packet with different timestamp option or different timestamps + * don't coalesce. + */ +static void send_changed_ts(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA]; + int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 0, 0); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 0, 0); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 100, 0); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 100, 1); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 100, 2); + write_packet(fd, extpkt, pkt_size, daddr); +} + +/* Packet with different tcp options don't coalesce. */ +static void send_diff_opt(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA]; + static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG]; + int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA; + int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt1, buf, 0, 0); + write_packet(fd, extpkt1, extpkt1_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt1, buf, 0, 0); + write_packet(fd, extpkt1, extpkt1_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0); + tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0); + recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1); + write_packet(fd, extpkt2, extpkt2_size, daddr); +} + +static void add_ipv4_ts_option(void *buf, void *optpkt) +{ + struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset); + int optlen = sizeof(struct ip_timestamp); + struct iphdr *iph; + + if (optlen % 4) + error(1, 0, "ipv4 timestamp length is not a multiple of 4B"); + + ts->ipt_code = IPOPT_TS; + ts->ipt_len = optlen; + ts->ipt_ptr = 5; + ts->ipt_flg = IPOPT_TS_TSONLY; + + memcpy(optpkt, buf, tcp_offset); + memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset, + sizeof(struct tcphdr) + PAYLOAD_LEN); + + iph = (struct iphdr *)(optpkt + ETH_HLEN); + iph->ihl = 5 + (optlen / 4); + iph->tot_len = htons(ntohs(iph->tot_len) + optlen); + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0); +} + +/* IPv4 options shouldn't coalesce */ +static void send_ip_options(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)]; + int optlen = sizeof(struct ip_timestamp); + int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); + + create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0); + add_ipv4_ts_option(buf, optpkt); + write_packet(fd, optpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); +} + +/* IPv4 fragments shouldn't coalesce */ +static void send_fragment4(int fd, struct sockaddr_ll *daddr) +{ + static char buf[IP_MAXPACKET]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + int pkt_size = total_hdr_len + PAYLOAD_LEN; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + /* Once fragmented, packet would retain the total_len. + * Tcp header is prepared as if rest of data is in follow-up frags, + * but follow up frags aren't actually sent. + */ + memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2); + fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0); + fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN); + fill_datalinklayer(buf); + + iph->frag_off = htons(0x6000); // DF = 1, MF = 1 + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + write_packet(fd, buf, pkt_size, daddr); +} + +/* IPv4 packets with different ttl don't coalesce.*/ +static void send_changed_ttl(int fd, struct sockaddr_ll *daddr) +{ + int pkt_size = total_hdr_len + PAYLOAD_LEN; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + iph->ttl = 7; + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + write_packet(fd, buf, pkt_size, daddr); +} + +/* Packets with different tos don't coalesce.*/ +static void send_changed_tos(int fd, struct sockaddr_ll *daddr) +{ + int pkt_size = total_hdr_len + PAYLOAD_LEN; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + if (proto == PF_INET) { + iph->tos = 1; + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } else if (proto == PF_INET6) { + ip6h->priority = 0xf; + } + write_packet(fd, buf, pkt_size, daddr); +} + +/* Packets with different ECN don't coalesce.*/ +static void send_changed_ECN(int fd, struct sockaddr_ll *daddr) +{ + int pkt_size = total_hdr_len + PAYLOAD_LEN; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + if (proto == PF_INET) { + buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10 + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } else { + buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10 + } + write_packet(fd, buf, pkt_size, daddr); +} + +/* IPv6 fragments and packets with extensions don't coalesce.*/ +static void send_fragment6(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN + + sizeof(struct ip6_frag)]; + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); + struct ip6_frag *frag = (void *)(extpkt + tcp_offset); + int extlen = sizeof(struct ip6_frag); + int bufpkt_len = total_hdr_len + PAYLOAD_LEN; + int extpkt_len = bufpkt_len + extlen; + int i; + + for (i = 0; i < 2; i++) { + create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, bufpkt_len, daddr); + } + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + memset(extpkt, 0, extpkt_len); + + ip6h->nexthdr = IPPROTO_FRAGMENT; + ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); + frag->ip6f_nxt = IPPROTO_TCP; + + memcpy(extpkt, buf, tcp_offset); + memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset, + sizeof(struct tcphdr) + PAYLOAD_LEN); + write_packet(fd, extpkt, extpkt_len, daddr); + + create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, bufpkt_len, daddr); +} + +static void bind_packetsocket(int fd) +{ + struct sockaddr_ll daddr = {}; + + daddr.sll_family = AF_PACKET; + daddr.sll_protocol = ethhdr_proto; + daddr.sll_ifindex = if_nametoindex(ifname); + if (daddr.sll_ifindex == 0) + error(1, errno, "if_nametoindex"); + + if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0) + error(1, errno, "could not bind socket"); +} + +static void set_timeout(int fd) +{ + struct timeval timeout; + + timeout.tv_sec = 120; + timeout.tv_usec = 0; + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, + sizeof(timeout)) < 0) + error(1, errno, "cannot set timeout, setsockopt failed"); +} + +static void check_recv_pkts(int fd, int *correct_payload, + int correct_num_pkts) +{ + static char buffer[IP_MAXPACKET + ETH_HLEN + 1]; + struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN); + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN); + struct tcphdr *tcph; + bool bad_packet = false; + int tcp_ext_len = 0; + int ip_ext_len = 0; + int pkt_size = -1; + int data_len = 0; + int num_pkt = 0; + int i; + + vlog("Expected {"); + for (i = 0; i < correct_num_pkts; i++) + vlog("%d ", correct_payload[i]); + vlog("}, Total %d packets\nReceived {", correct_num_pkts); + + while (1) { + pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0); + if (pkt_size < 0) + error(1, errno, "could not receive"); + + if (iph->version == 4) + ip_ext_len = (iph->ihl - 5) * 4; + else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP) + ip_ext_len = sizeof(struct ip6_frag); + + tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len); + + if (tcph->fin) + break; + + tcp_ext_len = (tcph->doff - 5) * 4; + data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len; + /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3. + * Ipv4/tcp packets without at least 6 bytes of data will be padded. + * Packet sockets are protocol agnostic, and will not trim the padding. + */ + if (pkt_size == ETH_ZLEN && iph->version == 4) { + data_len = ntohs(iph->tot_len) + - sizeof(struct tcphdr) - sizeof(struct iphdr); + } + vlog("%d ", data_len); + if (data_len != correct_payload[num_pkt]) { + vlog("[!=%d]", correct_payload[num_pkt]); + bad_packet = true; + } + num_pkt++; + } + vlog("}, Total %d packets.\n", num_pkt); + if (num_pkt != correct_num_pkts) + error(1, 0, "incorrect number of packets"); + if (bad_packet) + error(1, 0, "incorrect packet geometry"); + + printf("Test succeeded\n\n"); +} + +static void gro_sender(void) +{ + static char fin_pkt[MAX_HDR_LEN]; + struct sockaddr_ll daddr = {}; + int txfd = -1; + + txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW); + if (txfd < 0) + error(1, errno, "socket creation"); + + memset(&daddr, 0, sizeof(daddr)); + daddr.sll_ifindex = if_nametoindex(ifname); + if (daddr.sll_ifindex == 0) + error(1, errno, "if_nametoindex"); + daddr.sll_family = AF_PACKET; + memcpy(daddr.sll_addr, dst_mac, ETH_ALEN); + daddr.sll_halen = ETH_ALEN; + create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1); + + if (strcmp(testname, "data") == 0) { + send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ack") == 0) { + send_ack(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "flags") == 0) { + send_flags(txfd, &daddr, 1, 0, 0, 0); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_flags(txfd, &daddr, 0, 1, 0, 0); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_flags(txfd, &daddr, 0, 0, 1, 0); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_flags(txfd, &daddr, 0, 0, 0, 1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "tcp") == 0) { + send_changed_checksum(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_changed_seq(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_changed_ts(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_diff_opt(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip") == 0) { + send_changed_ECN(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_changed_tos(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + if (proto == PF_INET) { + /* Modified packets may be received out of order. + * Sleep function added to enforce test boundaries + * so that fin pkts are not received prior to other pkts. + */ + sleep(1); + send_changed_ttl(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + sleep(1); + send_ip_options(txfd, &daddr); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + sleep(1); + send_fragment4(txfd, &daddr); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (proto == PF_INET6) { + send_fragment6(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } + } else if (strcmp(testname, "large") == 0) { + /* 20 is the difference between min iphdr size + * and min ipv6hdr size. Like MAX_HDR_SIZE, + * MAX_PAYLOAD is defined with the larger header of the two. + */ + int offset = proto == PF_INET ? 20 : 0; + int remainder = (MAX_PAYLOAD + offset) % MSS; + + send_large(txfd, &daddr, remainder); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_large(txfd, &daddr, remainder + 1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else { + error(1, 0, "Unknown testcase"); + } + + if (close(txfd)) + error(1, errno, "socket close"); +} + +static void gro_receiver(void) +{ + static int correct_payload[NUM_PACKETS]; + int rxfd = -1; + + rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE)); + if (rxfd < 0) + error(1, 0, "socket creation"); + setup_sock_filter(rxfd); + set_timeout(rxfd); + bind_packetsocket(rxfd); + + memset(correct_payload, 0, sizeof(correct_payload)); + + if (strcmp(testname, "data") == 0) { + printf("pure data packet of same size: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("large data packets followed by a smaller one: "); + correct_payload[0] = PAYLOAD_LEN * 1.5; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("small data packets followed by a larger one: "); + correct_payload[0] = PAYLOAD_LEN / 2; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "ack") == 0) { + printf("duplicate ack and pure ack: "); + check_recv_pkts(rxfd, correct_payload, 3); + } else if (strcmp(testname, "flags") == 0) { + correct_payload[0] = PAYLOAD_LEN * 3; + correct_payload[1] = PAYLOAD_LEN * 2; + + printf("psh flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 2); + + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = 0; + correct_payload[2] = PAYLOAD_LEN * 2; + printf("syn flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 3); + + printf("rst flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 3); + + printf("urg flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 3); + } else if (strcmp(testname, "tcp") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + correct_payload[2] = PAYLOAD_LEN; + correct_payload[3] = PAYLOAD_LEN; + + printf("changed checksum does not coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("Wrong Seq number doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("Different timestamp doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 4); + + printf("Different options doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "ip") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + + printf("different ECN doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("different tos doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + if (proto == PF_INET) { + printf("different ttl doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("ip options doesn't coalesce: "); + correct_payload[2] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 3); + + printf("fragmented ip4 doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + } else if (proto == PF_INET6) { + /* GRO doesn't check for ipv6 hop limit when flushing. + * Hence no corresponding test to the ipv4 case. + */ + printf("fragmented ip6 doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 2); + } + } else if (strcmp(testname, "large") == 0) { + int offset = proto == PF_INET ? 20 : 0; + int remainder = (MAX_PAYLOAD + offset) % MSS; + + correct_payload[0] = (MAX_PAYLOAD + offset); + correct_payload[1] = remainder; + printf("Shouldn't coalesce if exceed IP max pkt size: "); + check_recv_pkts(rxfd, correct_payload, 2); + + /* last segment sent individually, doesn't start new segment */ + correct_payload[0] = correct_payload[0] - remainder; + correct_payload[1] = remainder + 1; + correct_payload[2] = remainder + 1; + check_recv_pkts(rxfd, correct_payload, 3); + } else { + error(1, 0, "Test case error, should never trigger"); + } + + if (close(rxfd)) + error(1, 0, "socket close"); +} + +static void parse_args(int argc, char **argv) +{ + static const struct option opts[] = { + { "dmac", required_argument, NULL, 'D' }, + { "iface", required_argument, NULL, 'i' }, + { "ipv4", no_argument, NULL, '4' }, + { "ipv6", no_argument, NULL, '6' }, + { "rx", no_argument, NULL, 'r' }, + { "smac", required_argument, NULL, 'S' }, + { "test", required_argument, NULL, 't' }, + { "verbose", no_argument, NULL, 'v' }, + { 0, 0, 0, 0 } + }; + int c; + + while ((c = getopt_long(argc, argv, "46D:i:rS:t:v", opts, NULL)) != -1) { + switch (c) { + case '4': + proto = PF_INET; + ethhdr_proto = htons(ETH_P_IP); + break; + case '6': + proto = PF_INET6; + ethhdr_proto = htons(ETH_P_IPV6); + break; + case 'D': + dmac = optarg; + break; + case 'i': + ifname = optarg; + break; + case 'r': + tx_socket = false; + break; + case 'S': + smac = optarg; + break; + case 't': + testname = optarg; + break; + case 'v': + verbose = true; + break; + default: + error(1, 0, "%s invalid option %c\n", __func__, c); + break; + } + } +} + +int main(int argc, char **argv) +{ + parse_args(argc, argv); + + if (proto == PF_INET) { + tcp_offset = ETH_HLEN + sizeof(struct iphdr); + total_hdr_len = tcp_offset + sizeof(struct tcphdr); + } else if (proto == PF_INET6) { + tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr); + total_hdr_len = MAX_HDR_LEN; + } else { + error(1, 0, "Protocol family is not ipv4 or ipv6"); + } + + read_MAC(src_mac, smac); + read_MAC(dst_mac, dmac); + + if (tx_socket) + gro_sender(); + else + gro_receiver(); + return 0; +} diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh new file mode 100755 index 000000000000..794d2bf36dd7 --- /dev/null +++ b/tools/testing/selftests/net/gro.sh @@ -0,0 +1,128 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source setup_loopback.sh +readonly SERVER_MAC="aa:00:00:00:00:02" +readonly CLIENT_MAC="aa:00:00:00:00:01" +readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large") +readonly PROTOS=("ipv4" "ipv6") +dev="eth0" +test="all" +proto="ipv4" + +setup_interrupt() { + # Use timer on host to trigger the network stack + # Also disable device interrupt to not depend on NIC interrupt + # Reduce test flakiness caused by unexpected interrupts + echo 100000 >"${FLUSH_PATH}" + echo 50 >"${IRQ_PATH}" +} + +setup_ns() { + # Set up server_ns namespace and client_ns namespace + setup_macvlan_ns "${dev}" server_ns server "${SERVER_MAC}" + setup_macvlan_ns "${dev}" client_ns client "${CLIENT_MAC}" +} + +cleanup_ns() { + cleanup_macvlan_ns server_ns server client_ns client +} + +setup() { + setup_loopback_environment "${dev}" + setup_interrupt +} + +cleanup() { + cleanup_loopback "${dev}" + + echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}" + echo "${HARD_IRQS}" >"${IRQ_PATH}" +} + +run_test() { + local server_pid=0 + local exit_code=0 + local protocol=$1 + local test=$2 + local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \ + "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" ) + + setup_ns + # Each test is run 3 times to deflake, because given the receive timing, + # not all packets that should coalesce will be considered in the same flow + # on every try. + for tries in {1..3}; do + # Actual test starts here + ip netns exec server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \ + 1>>log.txt & + server_pid=$! + sleep 0.5 # to allow for socket init + ip netns exec client_ns ./gro "${ARGS[@]}" "--iface" "client" \ + 1>>log.txt + wait "${server_pid}" + exit_code=$? + if [[ "${exit_code}" -eq 0 ]]; then + break; + fi + done + cleanup_ns + echo ${exit_code} +} + +run_all_tests() { + local failed_tests=() + for proto in "${PROTOS[@]}"; do + for test in "${TESTS[@]}"; do + echo "running test ${proto} ${test}" >&2 + exit_code=$(run_test $proto $test) + if [[ "${exit_code}" -ne 0 ]]; then + failed_tests+=("${proto}_${test}") + fi; + done; + done + if [[ ${#failed_tests[@]} -ne 0 ]]; then + echo "failed tests: ${failed_tests[*]}. \ + Please see log.txt for more logs" + exit 1 + else + echo "All Tests Succeeded!" + fi; +} + +usage() { + echo "Usage: $0 \ + [-i <DEV>] \ + [-t data|ack|flags|tcp|ip|large] \ + [-p <ipv4|ipv6>]" 1>&2; + exit 1; +} + +while getopts "i:t:p:" opt; do + case "${opt}" in + i) + dev="${OPTARG}" + ;; + t) + test="${OPTARG}" + ;; + p) + proto="${OPTARG}" + ;; + *) + usage + ;; + esac +done + +readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout" +readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs" +readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})" +readonly HARD_IRQS="$(< ${IRQ_PATH})" +setup +trap cleanup EXIT +if [[ "${test}" == "all" ]]; then + run_all_tests +else + run_test "${proto}" "${test}" +fi; diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index f23438d512c5..3d7dde2c321b 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -484,13 +484,16 @@ enum desc_type { MONITOR_ACQUIRE, EXPIRE_STATE, EXPIRE_POLICY, + SPDINFO_ATTRS, }; const char *desc_name[] = { "create tunnel", "alloc spi", "monitor acquire", "expire state", - "expire policy" + "expire policy", + "spdinfo attributes", + "" }; struct xfrm_desc { enum desc_type type; @@ -1593,6 +1596,155 @@ out_close: return ret; } +static int xfrm_spdinfo_set_thresh(int xfrm_sock, uint32_t *seq, + unsigned thresh4_l, unsigned thresh4_r, + unsigned thresh6_l, unsigned thresh6_r, + bool add_bad_attr) + +{ + struct { + struct nlmsghdr nh; + union { + uint32_t unused; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } req; + struct xfrmu_spdhthresh thresh; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused)); + req.nh.nlmsg_type = XFRM_MSG_NEWSPDINFO; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = (*seq)++; + + thresh.lbits = thresh4_l; + thresh.rbits = thresh4_r; + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV4_HTHRESH, &thresh, sizeof(thresh))) + return -1; + + thresh.lbits = thresh6_l; + thresh.rbits = thresh6_r; + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV6_HTHRESH, &thresh, sizeof(thresh))) + return -1; + + if (add_bad_attr) { + BUILD_BUG_ON(XFRMA_IF_ID <= XFRMA_SPD_MAX + 1); + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_IF_ID, NULL, 0)) { + pr_err("adding attribute failed: no space"); + return -1; + } + } + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + return -1; + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + return -1; + } + + if (req.error) { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + return -1; + } + + return 0; +} + +static int xfrm_spdinfo_attrs(int xfrm_sock, uint32_t *seq) +{ + struct { + struct nlmsghdr nh; + union { + uint32_t unused; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } req; + + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 31, 120, 16, false)) { + pr_err("Can't set SPD HTHRESH"); + return KSFT_FAIL; + } + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused)); + req.nh.nlmsg_type = XFRM_MSG_GETSPDINFO; + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_seq = (*seq)++; + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return KSFT_FAIL; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + return KSFT_FAIL; + } else if (req.nh.nlmsg_type == XFRM_MSG_NEWSPDINFO) { + size_t len = NLMSG_PAYLOAD(&req.nh, sizeof(req.unused)); + struct rtattr *attr = (void *)req.attrbuf; + int got_thresh = 0; + + for (; RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) { + if (attr->rta_type == XFRMA_SPD_IPV4_HTHRESH) { + struct xfrmu_spdhthresh *t = RTA_DATA(attr); + + got_thresh++; + if (t->lbits != 32 || t->rbits != 31) { + pr_err("thresh differ: %u, %u", + t->lbits, t->rbits); + return KSFT_FAIL; + } + } + if (attr->rta_type == XFRMA_SPD_IPV6_HTHRESH) { + struct xfrmu_spdhthresh *t = RTA_DATA(attr); + + got_thresh++; + if (t->lbits != 120 || t->rbits != 16) { + pr_err("thresh differ: %u, %u", + t->lbits, t->rbits); + return KSFT_FAIL; + } + } + } + if (got_thresh != 2) { + pr_err("only %d thresh returned by XFRM_MSG_GETSPDINFO", got_thresh); + return KSFT_FAIL; + } + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + return KSFT_FAIL; + } else { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + return -1; + } + + /* Restore the default */ + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, false)) { + pr_err("Can't restore SPD HTHRESH"); + return KSFT_FAIL; + } + + /* + * At this moment xfrm uses nlmsg_parse_deprecated(), which + * implies NL_VALIDATE_LIBERAL - ignoring attributes with + * (type > maxtype). nla_parse_depricated_strict() would enforce + * it. Or even stricter nla_parse(). + * Right now it's not expected to fail, but to be ignored. + */ + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, true)) + return KSFT_PASS; + + return KSFT_PASS; +} + static int child_serv(int xfrm_sock, uint32_t *seq, unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc) { @@ -1717,6 +1869,9 @@ static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf) case EXPIRE_POLICY: ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc); break; + case SPDINFO_ATTRS: + ret = xfrm_spdinfo_attrs(xfrm_sock, &seq); + break; default: printk("Unknown desc type %d", desc.type); exit(KSFT_FAIL); @@ -1994,8 +2149,10 @@ static int write_proto_plan(int fd, int proto) * sizeof(xfrm_user_polexpire) = 168 | sizeof(xfrm_user_polexpire) = 176 * * Check the affected by the UABI difference structures. + * Also, check translation for xfrm_set_spdinfo: it has it's own attributes + * which needs to be correctly copied, but not translated. */ -const unsigned int compat_plan = 4; +const unsigned int compat_plan = 5; static int write_compat_struct_tests(int test_desc_fd) { struct xfrm_desc desc = {}; @@ -2019,6 +2176,10 @@ static int write_compat_struct_tests(int test_desc_fd) if (__write_desc(test_desc_fd, &desc)) return -1; + desc.type = SPDINFO_ATTRS; + if (__write_desc(test_desc_fd, &desc)) + return -1; + return 0; } diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh new file mode 100755 index 000000000000..0a8ad97b07ea --- /dev/null +++ b/tools/testing/selftests/net/setup_loopback.sh @@ -0,0 +1,82 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +netdev_check_for_carrier() { + local -r dev="$1" + + for i in {1..5}; do + carrier="$(cat /sys/class/net/${dev}/carrier)" + if [[ "${carrier}" -ne 1 ]] ; then + echo "carrier not ready yet..." >&2 + sleep 1 + else + echo "carrier ready" >&2 + break + fi + done + echo "${carrier}" +} + +# Assumes that there is no existing ipvlan device on the physical device +setup_loopback_environment() { + local dev="$1" + + # Fail hard if cannot turn on loopback mode for current NIC + ethtool -K "${dev}" loopback on || exit 1 + sleep 1 + + # Check for the carrier + carrier=$(netdev_check_for_carrier ${dev}) + if [[ "${carrier}" -ne 1 ]] ; then + echo "setup_loopback_environment failed" + exit 1 + fi +} + +setup_macvlan_ns(){ + local -r link_dev="$1" + local -r ns_name="$2" + local -r ns_dev="$3" + local -r ns_mac="$4" + local -r addr="$5" + + ip link add link "${link_dev}" dev "${ns_dev}" \ + address "${ns_mac}" type macvlan + exit_code=$? + if [[ "${exit_code}" -ne 0 ]]; then + echo "setup_macvlan_ns failed" + exit $exit_code + fi + + [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" + ip link set dev "${ns_dev}" netns "${ns_name}" + ip -netns "${ns_name}" link set dev "${ns_dev}" up + if [[ -n "${addr}" ]]; then + ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}" + fi + + sleep 1 +} + +cleanup_macvlan_ns(){ + while (( $# >= 2 )); do + ns_name="$1" + ns_dev="$2" + ip -netns "${ns_name}" link del dev "${ns_dev}" + ip netns del "${ns_name}" + shift 2 + done +} + +cleanup_loopback(){ + local -r dev="$1" + + ethtool -K "${dev}" loopback off + sleep 1 + + # Check for the carrier + carrier=$(netdev_check_for_carrier ${dev}) + if [[ "${carrier}" -ne 1 ]] ; then + echo "setup_loopback_environment failed" + exit 1 + fi +} diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c new file mode 100644 index 000000000000..710ac956bdb3 --- /dev/null +++ b/tools/testing/selftests/net/toeplitz.c @@ -0,0 +1,585 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Toeplitz test + * + * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3 + * 2. Compute the rx_hash in software based on the packet contents + * 3. Compare the two + * + * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given. + * + * If '-C $rx_irq_cpu_list' is given, also + * + * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU + * 5. Compute the rxqueue that RSS would select based on this rx_hash + * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq + * 7. Compare the cpus from 4 and 6 + * + * Else if '-r $rps_bitmap' is given, also + * + * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU + * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap + * 6. Compare the cpus from 4 and 5 + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <getopt.h> +#include <linux/filter.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <net/if.h> +#include <netdb.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <poll.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/sysinfo.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#define TOEPLITZ_KEY_MIN_LEN 40 +#define TOEPLITZ_KEY_MAX_LEN 60 + +#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */ +#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN) +#define TOEPLITZ_STR_MAX_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN) + +#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2)) + +#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */ + +#define RPS_MAX_CPUS 16UL /* must be a power of 2 */ + +/* configuration options (cmdline arguments) */ +static uint16_t cfg_dport = 8000; +static int cfg_family = AF_INET6; +static char *cfg_ifname = "eth0"; +static int cfg_num_queues; +static int cfg_num_rps_cpus; +static bool cfg_sink; +static int cfg_type = SOCK_STREAM; +static int cfg_timeout_msec = 1000; +static bool cfg_verbose; + +/* global vars */ +static int num_cpus; +static int ring_block_nr; +static int ring_block_sz; + +/* stats */ +static int frames_received; +static int frames_nohash; +static int frames_error; + +#define log_verbose(args...) do { if (cfg_verbose) fprintf(stderr, args); } while (0) + +/* tpacket ring */ +struct ring_state { + int fd; + char *mmap; + int idx; + int cpu; +}; + +static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */ +static int rps_silo_to_cpu[RPS_MAX_CPUS]; +static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN]; +static struct ring_state rings[RSS_MAX_CPUS]; + +static inline uint32_t toeplitz(const unsigned char *four_tuple, + const unsigned char *key) +{ + int i, bit, ret = 0; + uint32_t key32; + + key32 = ntohl(*((uint32_t *)key)); + key += 4; + + for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) { + for (bit = 7; bit >= 0; bit--) { + if (four_tuple[i] & (1 << bit)) + ret ^= key32; + + key32 <<= 1; + key32 |= !!(key[0] & (1 << bit)); + } + key++; + } + + return ret; +} + +/* Compare computed cpu with arrival cpu from packet_fanout_cpu */ +static void verify_rss(uint32_t rx_hash, int cpu) +{ + int queue = rx_hash % cfg_num_queues; + + log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]); + if (rx_irq_cpus[queue] != cpu) { + log_verbose(". error: rss cpu mismatch (%d)", cpu); + frames_error++; + } +} + +static void verify_rps(uint64_t rx_hash, int cpu) +{ + int silo = (rx_hash * cfg_num_rps_cpus) >> 32; + + log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]); + if (rps_silo_to_cpu[silo] != cpu) { + log_verbose(". error: rps cpu mismatch (%d)", cpu); + frames_error++; + } +} + +static void log_rxhash(int cpu, uint32_t rx_hash, + const char *addrs, int addr_len) +{ + char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN]; + uint16_t *ports; + + if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) || + !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr))) + error(1, 0, "address parse error"); + + ports = (void *)addrs + (addr_len * 2); + log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]", + cpu, rx_hash, saddr, daddr, + ntohs(ports[0]), ntohs(ports[1])); +} + +/* Compare computed rxhash with rxhash received from tpacket_v3 */ +static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu) +{ + unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0}; + uint32_t rx_hash_sw; + const char *addrs; + int addr_len; + + if (cfg_family == AF_INET) { + addr_len = sizeof(struct in_addr); + addrs = pkt + offsetof(struct iphdr, saddr); + } else { + addr_len = sizeof(struct in6_addr); + addrs = pkt + offsetof(struct ip6_hdr, ip6_src); + } + + memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2)); + rx_hash_sw = toeplitz(four_tuple, toeplitz_key); + + if (cfg_verbose) + log_rxhash(cpu, rx_hash, addrs, addr_len); + + if (rx_hash != rx_hash_sw) { + log_verbose(" != expected 0x%x\n", rx_hash_sw); + frames_error++; + return; + } + + log_verbose(" OK"); + if (cfg_num_queues) + verify_rss(rx_hash, cpu); + else if (cfg_num_rps_cpus) + verify_rps(rx_hash, cpu); + log_verbose("\n"); +} + +static char *recv_frame(const struct ring_state *ring, char *frame) +{ + struct tpacket3_hdr *hdr = (void *)frame; + + if (hdr->hv1.tp_rxhash) + verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash, + ring->cpu); + else + frames_nohash++; + + return frame + hdr->tp_next_offset; +} + +/* A single TPACKET_V3 block can hold multiple frames */ +static void recv_block(struct ring_state *ring) +{ + struct tpacket_block_desc *block; + char *frame; + int i; + + block = (void *)(ring->mmap + ring->idx * ring_block_sz); + if (!(block->hdr.bh1.block_status & TP_STATUS_USER)) + return; + + frame = (char *)block; + frame += block->hdr.bh1.offset_to_first_pkt; + + for (i = 0; i < block->hdr.bh1.num_pkts; i++) { + frame = recv_frame(ring, frame); + frames_received++; + } + + block->hdr.bh1.block_status = TP_STATUS_KERNEL; + ring->idx = (ring->idx + 1) % ring_block_nr; +} + +/* simple test: sleep once unconditionally and then process all rings */ +static void process_rings(void) +{ + int i; + + usleep(1000 * cfg_timeout_msec); + + for (i = 0; i < num_cpus; i++) + recv_block(&rings[i]); + + fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n", + frames_received - frames_nohash - frames_error, + frames_nohash, frames_error); +} + +static char *setup_ring(int fd) +{ + struct tpacket_req3 req3 = {0}; + void *ring; + + req3.tp_retire_blk_tov = cfg_timeout_msec; + req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH; + + req3.tp_frame_size = 2048; + req3.tp_frame_nr = 1 << 10; + req3.tp_block_nr = 2; + + req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr; + req3.tp_block_size /= req3.tp_block_nr; + + if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3))) + error(1, errno, "setsockopt PACKET_RX_RING"); + + ring_block_sz = req3.tp_block_size; + ring_block_nr = req3.tp_block_nr; + + ring = mmap(0, req3.tp_block_size * req3.tp_block_nr, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0); + if (ring == MAP_FAILED) + error(1, 0, "mmap failed"); + + return ring; +} + +static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_proto), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0), + BPF_STMT(BPF_RET + BPF_K, 0), + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), + }; + struct sock_fprog prog = {}; + + prog.filter = filter; + prog.len = sizeof(filter) / sizeof(struct sock_filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) + error(1, errno, "setsockopt filter"); +} + +/* filter on transport protocol and destination port */ +static void set_filter(int fd) +{ + const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */ + uint8_t proto; + + proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP; + if (cfg_family == AF_INET) + __set_filter(fd, offsetof(struct iphdr, protocol), proto, + sizeof(struct iphdr) + off_dport); + else + __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto, + sizeof(struct ip6_hdr) + off_dport); +} + +/* drop everything: used temporarily during setup */ +static void set_filter_null(int fd) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET + BPF_K, 0), + }; + struct sock_fprog prog = {}; + + prog.filter = filter; + prog.len = sizeof(filter) / sizeof(struct sock_filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) + error(1, errno, "setsockopt filter"); +} + +static int create_ring(char **ring) +{ + struct fanout_args args = { + .id = 1, + .type_flags = PACKET_FANOUT_CPU, + .max_num_members = RSS_MAX_CPUS + }; + struct sockaddr_ll ll = { 0 }; + int fd, val; + + fd = socket(PF_PACKET, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket creation failed"); + + val = TPACKET_V3; + if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val))) + error(1, errno, "setsockopt PACKET_VERSION"); + *ring = setup_ring(fd); + + /* block packets until all rings are added to the fanout group: + * else packets can arrive during setup and get misclassified + */ + set_filter_null(fd); + + ll.sll_family = AF_PACKET; + ll.sll_ifindex = if_nametoindex(cfg_ifname); + ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) : + htons(ETH_P_IPV6); + if (bind(fd, (void *)&ll, sizeof(ll))) + error(1, errno, "bind"); + + /* must come after bind: verifies all programs in group match */ + if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) { + /* on failure, retry using old API if that is sufficient: + * it has a hard limit of 256 sockets, so only try if + * (a) only testing rxhash, not RSS or (b) <= 256 cpus. + * in this API, the third argument is left implicit. + */ + if (cfg_num_queues || num_cpus > 256 || + setsockopt(fd, SOL_PACKET, PACKET_FANOUT, + &args, sizeof(uint32_t))) + error(1, errno, "setsockopt PACKET_FANOUT cpu"); + } + + return fd; +} + +/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */ +static int setup_sink(void) +{ + int fd, val; + + fd = socket(cfg_family, cfg_type, 0); + if (fd == -1) + error(1, errno, "socket %d.%d", cfg_family, cfg_type); + + val = 1 << 20; + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val))) + error(1, errno, "setsockopt rcvbuf"); + + return fd; +} + +static void setup_rings(void) +{ + int i; + + for (i = 0; i < num_cpus; i++) { + rings[i].cpu = i; + rings[i].fd = create_ring(&rings[i].mmap); + } + + /* accept packets once all rings in the fanout group are up */ + for (i = 0; i < num_cpus; i++) + set_filter(rings[i].fd); +} + +static void cleanup_rings(void) +{ + int i; + + for (i = 0; i < num_cpus; i++) { + if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz)) + error(1, errno, "munmap"); + if (close(rings[i].fd)) + error(1, errno, "close"); + } +} + +static void parse_cpulist(const char *arg) +{ + do { + rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10); + + arg = strchr(arg, ','); + if (!arg) + break; + arg++; // skip ',' + } while (1); +} + +static void show_cpulist(void) +{ + int i; + + for (i = 0; i < cfg_num_queues; i++) + fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]); +} + +static void show_silos(void) +{ + int i; + + for (i = 0; i < cfg_num_rps_cpus; i++) + fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]); +} + +static void parse_toeplitz_key(const char *str, int slen, unsigned char *key) +{ + int i, ret, off; + + if (slen < TOEPLITZ_STR_MIN_LEN || + slen > TOEPLITZ_STR_MAX_LEN + 1) + error(1, 0, "invalid toeplitz key"); + + for (i = 0, off = 0; off < slen; i++, off += 3) { + ret = sscanf(str + off, "%hhx", &key[i]); + if (ret != 1) + error(1, 0, "key parse error at %d off %d len %d", + i, off, slen); + } +} + +static void parse_rps_bitmap(const char *arg) +{ + unsigned long bitmap; + int i; + + bitmap = strtoul(arg, NULL, 0); + + if (bitmap & ~(RPS_MAX_CPUS - 1)) + error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu", + bitmap, RPS_MAX_CPUS - 1); + + for (i = 0; i < RPS_MAX_CPUS; i++) + if (bitmap & 1UL << i) + rps_silo_to_cpu[cfg_num_rps_cpus++] = i; +} + +static void parse_opts(int argc, char **argv) +{ + static struct option long_options[] = { + {"dport", required_argument, 0, 'd'}, + {"cpus", required_argument, 0, 'C'}, + {"key", required_argument, 0, 'k'}, + {"iface", required_argument, 0, 'i'}, + {"ipv4", no_argument, 0, '4'}, + {"ipv6", no_argument, 0, '6'}, + {"sink", no_argument, 0, 's'}, + {"tcp", no_argument, 0, 't'}, + {"timeout", required_argument, 0, 'T'}, + {"udp", no_argument, 0, 'u'}, + {"verbose", no_argument, 0, 'v'}, + {"rps", required_argument, 0, 'r'}, + {0, 0, 0, 0} + }; + bool have_toeplitz = false; + int index, c; + + while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:u:v", long_options, &index)) != -1) { + switch (c) { + case '4': + cfg_family = AF_INET; + break; + case '6': + cfg_family = AF_INET6; + break; + case 'C': + parse_cpulist(optarg); + break; + case 'd': + cfg_dport = strtol(optarg, NULL, 0); + break; + case 'i': + cfg_ifname = optarg; + break; + case 'k': + parse_toeplitz_key(optarg, strlen(optarg), + toeplitz_key); + have_toeplitz = true; + break; + case 'r': + parse_rps_bitmap(optarg); + break; + case 's': + cfg_sink = true; + break; + case 't': + cfg_type = SOCK_STREAM; + break; + case 'T': + cfg_timeout_msec = strtol(optarg, NULL, 0); + break; + case 'u': + cfg_type = SOCK_DGRAM; + break; + case 'v': + cfg_verbose = true; + break; + + default: + error(1, 0, "unknown option %c", optopt); + break; + } + } + + if (!have_toeplitz) + error(1, 0, "Must supply rss key ('-k')"); + + num_cpus = get_nprocs(); + if (num_cpus > RSS_MAX_CPUS) + error(1, 0, "increase RSS_MAX_CPUS"); + + if (cfg_num_queues && cfg_num_rps_cpus) + error(1, 0, + "Can't supply both RSS cpus ('-C') and RPS map ('-r')"); + if (cfg_verbose) { + show_cpulist(); + show_silos(); + } +} + +int main(int argc, char **argv) +{ + const int min_tests = 10; + int fd_sink = -1; + + parse_opts(argc, argv); + + if (cfg_sink) + fd_sink = setup_sink(); + + setup_rings(); + process_rings(); + cleanup_rings(); + + if (cfg_sink && close(fd_sink)) + error(1, errno, "close sink"); + + if (frames_received - frames_nohash < min_tests) + error(1, 0, "too few frames for verification"); + + return frames_error; +} diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh new file mode 100755 index 000000000000..0a49907cd4fe --- /dev/null +++ b/tools/testing/selftests/net/toeplitz.sh @@ -0,0 +1,199 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping +# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu +# ('-rps <rps_map>') +# +# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action, +# which is a driver-specific encoding. +# +# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \ +# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)] + +source setup_loopback.sh +readonly SERVER_IP4="192.168.1.200/24" +readonly SERVER_IP6="fda8::1/64" +readonly SERVER_MAC="aa:00:00:00:00:02" + +readonly CLIENT_IP4="192.168.1.100/24" +readonly CLIENT_IP6="fda8::2/64" +readonly CLIENT_MAC="aa:00:00:00:00:01" + +PORT=8000 +KEY="$(</proc/sys/net/core/netdev_rss_key)" +TEST_RSS=false +RPS_MAP="" +PROTO_FLAG="" +IP_FLAG="" +DEV="eth0" + +# Return the number of rxqs among which RSS is configured to spread packets. +# This is determined by reading the RSS indirection table using ethtool. +get_rss_cfg_num_rxqs() { + echo $(ethtool -x "${DEV}" | + egrep [[:space:]]+[0-9]+:[[:space:]]+ | + cut -d: -f2- | + awk '{$1=$1};1' | + tr ' ' '\n' | + sort -u | + wc -l) +} + +# Return a list of the receive irq handler cpus. +# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc. +# Reads /sys/kernel/irq/ in order, so algorithm depends on +# irq_{rxq-0} < irq_{rxq-1}, etc. +get_rx_irq_cpus() { + CPUS="" + # sort so that irq 2 is read before irq 10 + SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V) + # Consider only as many queues as RSS actually uses. We assume that + # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1). + RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs) + RXQ_COUNT=0 + + for i in ${SORTED_IRQS} + do + [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break + # lookup relevant IRQs by action name + [[ -e "$i/actions" ]] || continue + cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue + irqname=$(<"$i/actions") + + # does the IRQ get called + irqcount=$(cat "$i/per_cpu_count" | tr -d '0,') + [[ -n "${irqcount}" ]] || continue + + # lookup CPU + irq=$(basename "$i") + cpu=$(cat "/proc/irq/$irq/smp_affinity_list") + + if [[ -z "${CPUS}" ]]; then + CPUS="${cpu}" + else + CPUS="${CPUS},${cpu}" + fi + RXQ_COUNT=$((RXQ_COUNT+1)) + done + + echo "${CPUS}" +} + +get_disable_rfs_cmd() { + echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;" +} + +get_set_rps_bitmaps_cmd() { + CMD="" + for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus + do + CMD="${CMD} echo $1 > ${i};" + done + + echo "${CMD}" +} + +get_disable_rps_cmd() { + echo "$(get_set_rps_bitmaps_cmd 0)" +} + +die() { + echo "$1" + exit 1 +} + +check_nic_rxhash_enabled() { + local -r pattern="receive-hashing:\ on" + + ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled" +} + +parse_opts() { + local prog=$0 + shift 1 + + while [[ "$1" =~ "-" ]]; do + if [[ "$1" = "-irq_prefix" ]]; then + shift + IRQ_PATTERN="^$1-[0-9]*$" + elif [[ "$1" = "-u" || "$1" = "-t" ]]; then + PROTO_FLAG="$1" + elif [[ "$1" = "-4" ]]; then + IP_FLAG="$1" + SERVER_IP="${SERVER_IP4}" + CLIENT_IP="${CLIENT_IP4}" + elif [[ "$1" = "-6" ]]; then + IP_FLAG="$1" + SERVER_IP="${SERVER_IP6}" + CLIENT_IP="${CLIENT_IP6}" + elif [[ "$1" = "-rss" ]]; then + TEST_RSS=true + elif [[ "$1" = "-rps" ]]; then + shift + RPS_MAP="$1" + elif [[ "$1" = "-i" ]]; then + shift + DEV="$1" + else + die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \ + [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]" + fi + shift + done +} + +setup() { + setup_loopback_environment "${DEV}" + + # Set up server_ns namespace and client_ns namespace + setup_macvlan_ns "${DEV}" server_ns server \ + "${SERVER_MAC}" "${SERVER_IP}" + setup_macvlan_ns "${DEV}" client_ns client \ + "${CLIENT_MAC}" "${CLIENT_IP}" +} + +cleanup() { + cleanup_macvlan_ns server_ns server client_ns client + cleanup_loopback "${DEV}" +} + +parse_opts $0 $@ + +setup +trap cleanup EXIT + +check_nic_rxhash_enabled + +# Actual test starts here +if [[ "${TEST_RSS}" = true ]]; then + # RPS/RFS must be disabled because they move packets between cpus, + # which breaks the PACKET_FANOUT_CPU identification of RSS decisions. + eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \ + ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ + -C "$(get_rx_irq_cpus)" -s -v & +elif [[ ! -z "${RPS_MAP}" ]]; then + eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \ + ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ + -r "0x${RPS_MAP}" -s -v & +else + ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v & +fi + +server_pid=$! + +ip netns exec client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \ + "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" & + +client_pid=$! + +wait "${server_pid}" +exit_code=$? +kill -9 "${client_pid}" +if [[ "${exit_code}" -eq 0 ]]; then + echo "Test Succeeded!" +fi +exit "${exit_code}" diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh new file mode 100755 index 000000000000..2fef34f4aba1 --- /dev/null +++ b/tools/testing/selftests/net/toeplitz_client.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# A simple program for generating traffic for the toeplitz test. +# +# This program sends packets periodically for, conservatively, 20 seconds. The +# intent is for the calling program to kill this program once it is no longer +# needed, rather than waiting for the 20 second expiration. + +send_traffic() { + expiration=$((SECONDS+20)) + while [[ "${SECONDS}" -lt "${expiration}" ]] + do + if [[ "${PROTO}" == "-u" ]]; then + echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}" + else + echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}" + fi + sleep 0.001 + done +} + +PROTO=$1 +IPVER=$2 +ADDR=$3 +PORT=$4 + +send_traffic diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json new file mode 100644 index 000000000000..88a20c781e49 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json @@ -0,0 +1,137 @@ +[ + { + "id": "ce7d", + "name": "Add mq Qdisc to multi-queue device (4 queues)", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchCount": "4", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "2f82", + "name": "Add mq Qdisc to multi-queue device (256 queues)", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 256\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-9,a-f][0-9,a-f]{0,2} bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchCount": "256", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "c525", + "name": "Add duplicate mq Qdisc", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device", + "$TC qdisc add dev $ETH root handle 1: mq" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchCount": "4", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "128a", + "name": "Delete nonexistent mq Qdisc", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchCount": "0", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "03a9", + "name": "Delete mq Qdisc twice", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device", + "$TC qdisc add dev $ETH root handle 1: mq", + "$TC qdisc del dev $ETH root handle 1: mq" + ], + "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchCount": "0", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "be0f", + "name": "Add mq Qdisc to single-queue device", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchCount": "0", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py index cd4a27ee1466..ea04f04c173e 100644 --- a/tools/testing/selftests/tc-testing/tdc_config.py +++ b/tools/testing/selftests/tc-testing/tdc_config.py @@ -17,6 +17,7 @@ NAMES = { 'DEV1': 'v0p1', 'DEV2': '', 'DUMMY': 'dummy1', + 'ETH': 'eth0', 'BATCH_FILE': './batch.txt', 'BATCH_DIR': 'tmp', # Length of time in seconds to wait before terminating a command diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d20fba0fc290..b50dbe269f4b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -892,6 +892,8 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm) static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) { + static DEFINE_MUTEX(kvm_debugfs_lock); + struct dentry *dent; char dir_name[ITOA_MAX_LEN * 2]; struct kvm_stat_data *stat_data; const struct _kvm_stats_desc *pdesc; @@ -903,8 +905,20 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) return 0; snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd); - kvm->debugfs_dentry = debugfs_create_dir(dir_name, kvm_debugfs_dir); + mutex_lock(&kvm_debugfs_lock); + dent = debugfs_lookup(dir_name, kvm_debugfs_dir); + if (dent) { + pr_warn_ratelimited("KVM: debugfs: duplicate directory %s\n", dir_name); + dput(dent); + mutex_unlock(&kvm_debugfs_lock); + return 0; + } + dent = debugfs_create_dir(dir_name, kvm_debugfs_dir); + mutex_unlock(&kvm_debugfs_lock); + if (IS_ERR(dent)) + return 0; + kvm->debugfs_dentry = dent; kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, sizeof(*kvm->debugfs_stat_data), GFP_KERNEL_ACCOUNT); @@ -5201,7 +5215,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) } add_uevent_var(env, "PID=%d", kvm->userspace_pid); - if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) { + if (kvm->debugfs_dentry) { char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT); if (p) { |