diff options
author | Mike Pagano <mpagano@gentoo.org> | 2022-08-31 11:44:02 -0400 |
---|---|---|
committer | Mike Pagano <mpagano@gentoo.org> | 2022-08-31 11:44:02 -0400 |
commit | cf0d78b576a8493230af436a1bcf0c6d71c3f370 (patch) | |
tree | fe6a63070d0862fe013406a93ceea68040284fae | |
parent | Revert fix from upstream for DRM/i915 thanks to Luigi 'Comio' Mantellini (diff) | |
download | linux-patches-cf0d78b5.tar.gz linux-patches-cf0d78b5.tar.bz2 linux-patches-cf0d78b5.zip |
Linux patch 5.19.65.19-8
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
-rw-r--r-- | 0000_README | 4 | ||||
-rw-r--r-- | 1005_linux-5.19.6.patch | 6485 |
2 files changed, 6489 insertions, 0 deletions
diff --git a/0000_README b/0000_README index 4172ad7c..3deab328 100644 --- a/0000_README +++ b/0000_README @@ -63,6 +63,10 @@ Patch: 1004_linux-5.19.5.patch From: http://www.kernel.org Desc: Linux 5.19.5 +Patch: 1005_linux-5.19.6.patch +From: http://www.kernel.org +Desc: Linux 5.19.6 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. diff --git a/1005_linux-5.19.6.patch b/1005_linux-5.19.6.patch new file mode 100644 index 00000000..c5d4f0ee --- /dev/null +++ b/1005_linux-5.19.6.patch @@ -0,0 +1,6485 @@ +diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu +index bcc974d276dc4..3cda940108f63 100644 +--- a/Documentation/ABI/testing/sysfs-devices-system-cpu ++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu +@@ -527,6 +527,7 @@ What: /sys/devices/system/cpu/vulnerabilities + /sys/devices/system/cpu/vulnerabilities/tsx_async_abort + /sys/devices/system/cpu/vulnerabilities/itlb_multihit + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data ++ /sys/devices/system/cpu/vulnerabilities/retbleed + Date: January 2018 + Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> + Description: Information about CPU vulnerabilities +diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst +index 9393c50b5afc9..c98fd11907cc8 100644 +--- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst ++++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst +@@ -230,6 +230,20 @@ The possible values in this file are: + * - 'Mitigation: Clear CPU buffers' + - The processor is vulnerable and the CPU buffer clearing mitigation is + enabled. ++ * - 'Unknown: No mitigations' ++ - The processor vulnerability status is unknown because it is ++ out of Servicing period. Mitigation is not attempted. ++ ++Definitions: ++------------ ++ ++Servicing period: The process of providing functional and security updates to ++Intel processors or platforms, utilizing the Intel Platform Update (IPU) ++process or other similar mechanisms. ++ ++End of Servicing Updates (ESU): ESU is the date at which Intel will no ++longer provide Servicing, such as through IPU or other similar update ++processes. ESU dates will typically be aligned to end of quarter. + + If the processor is vulnerable then the following information is appended to + the above information: +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index e4fe443bea77d..1b38d0f70677e 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -5260,6 +5260,8 @@ + rodata= [KNL] + on Mark read-only kernel memory as read-only (default). + off Leave read-only kernel memory writable for debugging. ++ full Mark read-only kernel memory and aliases as read-only ++ [arm64] + + rockchip.usb_uart + Enable the uart passthrough on the designated usb port +diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst +index fcd650bdbc7e2..01d9858197832 100644 +--- a/Documentation/admin-guide/sysctl/net.rst ++++ b/Documentation/admin-guide/sysctl/net.rst +@@ -271,7 +271,7 @@ poll cycle or the number of packets processed reaches netdev_budget. + netdev_max_backlog + ------------------ + +-Maximum number of packets, queued on the INPUT side, when the interface ++Maximum number of packets, queued on the INPUT side, when the interface + receives packets faster than kernel can process them. + + netdev_rss_key +diff --git a/Makefile b/Makefile +index 1c4f1ecb93488..cb68101ea070a 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + # SPDX-License-Identifier: GPL-2.0 + VERSION = 5 + PATCHLEVEL = 19 +-SUBLEVEL = 5 ++SUBLEVEL = 6 + EXTRAVERSION = + NAME = Superb Owl + +diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h +index 9bb1873f52951..6f86b7ab6c28f 100644 +--- a/arch/arm64/include/asm/fpsimd.h ++++ b/arch/arm64/include/asm/fpsimd.h +@@ -153,7 +153,7 @@ struct vl_info { + + #ifdef CONFIG_ARM64_SVE + +-extern void sve_alloc(struct task_struct *task); ++extern void sve_alloc(struct task_struct *task, bool flush); + extern void fpsimd_release_task(struct task_struct *task); + extern void fpsimd_sync_to_sve(struct task_struct *task); + extern void fpsimd_force_sync_to_sve(struct task_struct *task); +@@ -256,7 +256,7 @@ size_t sve_state_size(struct task_struct const *task); + + #else /* ! CONFIG_ARM64_SVE */ + +-static inline void sve_alloc(struct task_struct *task) { } ++static inline void sve_alloc(struct task_struct *task, bool flush) { } + static inline void fpsimd_release_task(struct task_struct *task) { } + static inline void sve_sync_to_fpsimd(struct task_struct *task) { } + static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { } +diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h +index 6437df6617009..f4af547ef54ca 100644 +--- a/arch/arm64/include/asm/setup.h ++++ b/arch/arm64/include/asm/setup.h +@@ -3,6 +3,8 @@ + #ifndef __ARM64_ASM_SETUP_H + #define __ARM64_ASM_SETUP_H + ++#include <linux/string.h> ++ + #include <uapi/asm/setup.h> + + void *get_early_fdt_ptr(void); +@@ -14,4 +16,19 @@ void early_fdt_map(u64 dt_phys); + extern phys_addr_t __fdt_pointer __initdata; + extern u64 __cacheline_aligned boot_args[4]; + ++static inline bool arch_parse_debug_rodata(char *arg) ++{ ++ extern bool rodata_enabled; ++ extern bool rodata_full; ++ ++ if (arg && !strcmp(arg, "full")) { ++ rodata_enabled = true; ++ rodata_full = true; ++ return true; ++ } ++ ++ return false; ++} ++#define arch_parse_debug_rodata arch_parse_debug_rodata ++ + #endif +diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c +index 6b92989f4cc27..b374e258f705f 100644 +--- a/arch/arm64/kernel/cpu_errata.c ++++ b/arch/arm64/kernel/cpu_errata.c +@@ -208,6 +208,8 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { + #ifdef CONFIG_ARM64_ERRATUM_1286807 + { + ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0), ++ }, ++ { + /* Kryo4xx Gold (rcpe to rfpe) => (r0p0 to r3p0) */ + ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe), + }, +diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c +index aecf3071efddd..52f7ffdffbcb9 100644 +--- a/arch/arm64/kernel/fpsimd.c ++++ b/arch/arm64/kernel/fpsimd.c +@@ -716,10 +716,12 @@ size_t sve_state_size(struct task_struct const *task) + * do_sve_acc() case, there is no ABI requirement to hide stale data + * written previously be task. + */ +-void sve_alloc(struct task_struct *task) ++void sve_alloc(struct task_struct *task, bool flush) + { + if (task->thread.sve_state) { +- memset(task->thread.sve_state, 0, sve_state_size(task)); ++ if (flush) ++ memset(task->thread.sve_state, 0, ++ sve_state_size(task)); + return; + } + +@@ -1389,7 +1391,7 @@ void do_sve_acc(unsigned long esr, struct pt_regs *regs) + return; + } + +- sve_alloc(current); ++ sve_alloc(current, true); + if (!current->thread.sve_state) { + force_sig(SIGKILL); + return; +@@ -1440,7 +1442,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) + return; + } + +- sve_alloc(current); ++ sve_alloc(current, false); + sme_alloc(current); + if (!current->thread.sve_state || !current->thread.za_state) { + force_sig(SIGKILL); +@@ -1461,17 +1463,6 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) + fpsimd_bind_task_to_cpu(); + } + +- /* +- * If SVE was not already active initialise the SVE registers, +- * any non-shared state between the streaming and regular SVE +- * registers is architecturally guaranteed to be zeroed when +- * we enter streaming mode. We do not need to initialize ZA +- * since ZA must be disabled at this point and enabling ZA is +- * architecturally defined to zero ZA. +- */ +- if (system_supports_sve() && !test_thread_flag(TIF_SVE)) +- sve_init_regs(); +- + put_cpu_fpsimd_context(); + } + +diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c +index 21da83187a602..eb7c08dfb8348 100644 +--- a/arch/arm64/kernel/ptrace.c ++++ b/arch/arm64/kernel/ptrace.c +@@ -882,7 +882,7 @@ static int sve_set_common(struct task_struct *target, + * state and ensure there's storage. + */ + if (target->thread.svcr != old_svcr) +- sve_alloc(target); ++ sve_alloc(target, true); + } + + /* Registers: FPSIMD-only case */ +@@ -912,7 +912,7 @@ static int sve_set_common(struct task_struct *target, + goto out; + } + +- sve_alloc(target); ++ sve_alloc(target, true); + if (!target->thread.sve_state) { + ret = -ENOMEM; + clear_tsk_thread_flag(target, TIF_SVE); +@@ -1082,7 +1082,7 @@ static int za_set(struct task_struct *target, + + /* Ensure there is some SVE storage for streaming mode */ + if (!target->thread.sve_state) { +- sve_alloc(target); ++ sve_alloc(target, false); + if (!target->thread.sve_state) { + clear_thread_flag(TIF_SME); + ret = -ENOMEM; +diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c +index b0980fbb6bc7f..8bb631bf9464c 100644 +--- a/arch/arm64/kernel/signal.c ++++ b/arch/arm64/kernel/signal.c +@@ -307,7 +307,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) + fpsimd_flush_task_state(current); + /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ + +- sve_alloc(current); ++ sve_alloc(current, true); + if (!current->thread.sve_state) { + clear_thread_flag(TIF_SVE); + return -ENOMEM; +@@ -922,6 +922,16 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, + + /* Signal handlers are invoked with ZA and streaming mode disabled */ + if (system_supports_sme()) { ++ /* ++ * If we were in streaming mode the saved register ++ * state was SVE but we will exit SM and use the ++ * FPSIMD register state - flush the saved FPSIMD ++ * register state in case it gets loaded. ++ */ ++ if (current->thread.svcr & SVCR_SM_MASK) ++ memset(¤t->thread.uw.fpsimd_state, 0, ++ sizeof(current->thread.uw.fpsimd_state)); ++ + current->thread.svcr &= ~(SVCR_ZA_MASK | + SVCR_SM_MASK); + sme_smstop(); +diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c +index 626ec32873c6c..1de896e4d3347 100644 +--- a/arch/arm64/mm/mmu.c ++++ b/arch/arm64/mm/mmu.c +@@ -625,24 +625,6 @@ static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end, + vm_area_add_early(vma); + } + +-static int __init parse_rodata(char *arg) +-{ +- int ret = strtobool(arg, &rodata_enabled); +- if (!ret) { +- rodata_full = false; +- return 0; +- } +- +- /* permit 'full' in addition to boolean options */ +- if (strcmp(arg, "full")) +- return -EINVAL; +- +- rodata_enabled = true; +- rodata_full = true; +- return 0; +-} +-early_param("rodata", parse_rodata); +- + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + static int __init map_entry_trampoline(void) + { +diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig +index fa400055b2d50..cd2b3fe156724 100644 +--- a/arch/parisc/Kconfig ++++ b/arch/parisc/Kconfig +@@ -147,10 +147,10 @@ menu "Processor type and features" + + choice + prompt "Processor type" +- default PA7000 ++ default PA7000 if "$(ARCH)" = "parisc" + + config PA7000 +- bool "PA7000/PA7100" ++ bool "PA7000/PA7100" if "$(ARCH)" = "parisc" + help + This is the processor type of your CPU. This information is + used for optimizing purposes. In order to compile a kernel +@@ -161,21 +161,21 @@ config PA7000 + which is required on some machines. + + config PA7100LC +- bool "PA7100LC" ++ bool "PA7100LC" if "$(ARCH)" = "parisc" + help + Select this option for the PCX-L processor, as used in the + 712, 715/64, 715/80, 715/100, 715/100XC, 725/100, 743, 748, + D200, D210, D300, D310 and E-class + + config PA7200 +- bool "PA7200" ++ bool "PA7200" if "$(ARCH)" = "parisc" + help + Select this option for the PCX-T' processor, as used in the + C100, C110, J100, J110, J210XC, D250, D260, D350, D360, + K100, K200, K210, K220, K400, K410 and K420 + + config PA7300LC +- bool "PA7300LC" ++ bool "PA7300LC" if "$(ARCH)" = "parisc" + help + Select this option for the PCX-L2 processor, as used in the + 744, A180, B132L, B160L, B180L, C132L, C160L, C180L, +@@ -225,17 +225,8 @@ config MLONGCALLS + Enabling this option will probably slow down your kernel. + + config 64BIT +- bool "64-bit kernel" ++ def_bool "$(ARCH)" = "parisc64" + depends on PA8X00 +- help +- Enable this if you want to support 64bit kernel on PA-RISC platform. +- +- At the moment, only people willing to use more than 2GB of RAM, +- or having a 64bit-only capable PA-RISC machine should say Y here. +- +- Since there is no 64bit userland on PA-RISC, there is no point to +- enable this option otherwise. The 64bit kernel is significantly bigger +- and slower than the 32bit one. + + choice + prompt "Kernel page size" +diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c +index bac581b5ecfc5..e8a4d77cff53a 100644 +--- a/arch/parisc/kernel/unaligned.c ++++ b/arch/parisc/kernel/unaligned.c +@@ -93,7 +93,7 @@ + #define R1(i) (((i)>>21)&0x1f) + #define R2(i) (((i)>>16)&0x1f) + #define R3(i) ((i)&0x1f) +-#define FR3(i) ((((i)<<1)&0x1f)|(((i)>>6)&1)) ++#define FR3(i) ((((i)&0x1f)<<1)|(((i)>>6)&1)) + #define IM(i,n) (((i)>>1&((1<<(n-1))-1))|((i)&1?((0-1L)<<(n-1)):0)) + #define IM5_2(i) IM((i)>>16,5) + #define IM5_3(i) IM((i),5) +diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts +index 044982a11df50..f3f87ed2007f3 100644 +--- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts ++++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts +@@ -84,12 +84,10 @@ + + phy1: ethernet-phy@9 { + reg = <9>; +- ti,fifo-depth = <0x1>; + }; + + phy0: ethernet-phy@8 { + reg = <8>; +- ti,fifo-depth = <0x1>; + }; + }; + +@@ -102,7 +100,6 @@ + disable-wp; + cap-sd-highspeed; + cap-mmc-highspeed; +- card-detect-delay = <200>; + mmc-ddr-1_8v; + mmc-hs200-1_8v; + sd-uhs-sdr12; +diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts +index 82c93c8f5c17e..c87cc2d8fe29f 100644 +--- a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts ++++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts +@@ -54,12 +54,10 @@ + + phy1: ethernet-phy@5 { + reg = <5>; +- ti,fifo-depth = <0x01>; + }; + + phy0: ethernet-phy@4 { + reg = <4>; +- ti,fifo-depth = <0x01>; + }; + }; + +@@ -72,7 +70,6 @@ + disable-wp; + cap-sd-highspeed; + cap-mmc-highspeed; +- card-detect-delay = <200>; + mmc-ddr-1_8v; + mmc-hs200-1_8v; + sd-uhs-sdr12; +diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi +index 496d3b7642bd1..9f5bce1488d93 100644 +--- a/arch/riscv/boot/dts/microchip/mpfs.dtsi ++++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi +@@ -169,7 +169,7 @@ + cache-size = <2097152>; + cache-unified; + interrupt-parent = <&plic>; +- interrupts = <1>, <2>, <3>; ++ interrupts = <1>, <3>, <4>, <2>; + }; + + clint: clint@2000000 { +@@ -446,9 +446,8 @@ + ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>; + msi-parent = <&pcie>; + msi-controller; +- microchip,axi-m-atr0 = <0x10 0x0>; + status = "disabled"; +- pcie_intc: legacy-interrupt-controller { ++ pcie_intc: interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; +diff --git a/arch/riscv/include/asm/signal.h b/arch/riscv/include/asm/signal.h +new file mode 100644 +index 0000000000000..532c29ef03769 +--- /dev/null ++++ b/arch/riscv/include/asm/signal.h +@@ -0,0 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++ ++#ifndef __ASM_SIGNAL_H ++#define __ASM_SIGNAL_H ++ ++#include <uapi/asm/signal.h> ++#include <uapi/asm/ptrace.h> ++ ++asmlinkage __visible ++void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags); ++ ++#endif +diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h +index 78933ac04995b..67322f878e0d7 100644 +--- a/arch/riscv/include/asm/thread_info.h ++++ b/arch/riscv/include/asm/thread_info.h +@@ -42,6 +42,8 @@ + + #ifndef __ASSEMBLY__ + ++extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)]; ++ + #include <asm/processor.h> + #include <asm/csr.h> + +diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c +index 38b05ca6fe669..5a2de6b6f8822 100644 +--- a/arch/riscv/kernel/signal.c ++++ b/arch/riscv/kernel/signal.c +@@ -15,6 +15,7 @@ + + #include <asm/ucontext.h> + #include <asm/vdso.h> ++#include <asm/signal.h> + #include <asm/signal32.h> + #include <asm/switch_to.h> + #include <asm/csr.h> +diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c +index 39d0f8bba4b40..635e6ec269380 100644 +--- a/arch/riscv/kernel/traps.c ++++ b/arch/riscv/kernel/traps.c +@@ -20,9 +20,10 @@ + + #include <asm/asm-prototypes.h> + #include <asm/bug.h> ++#include <asm/csr.h> + #include <asm/processor.h> + #include <asm/ptrace.h> +-#include <asm/csr.h> ++#include <asm/thread_info.h> + + int show_unhandled_signals = 1; + +diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c +index 89949b9f3cf88..d5119e039d855 100644 +--- a/arch/s390/kernel/process.c ++++ b/arch/s390/kernel/process.c +@@ -91,6 +91,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) + + memcpy(dst, src, arch_task_struct_size); + dst->thread.fpu.regs = dst->thread.fpu.fprs; ++ ++ /* ++ * Don't transfer over the runtime instrumentation or the guarded ++ * storage control block pointers. These fields are cleared here instead ++ * of in copy_thread() to avoid premature freeing of associated memory ++ * on fork() failure. Wait to clear the RI flag because ->stack still ++ * refers to the source thread. ++ */ ++ dst->thread.ri_cb = NULL; ++ dst->thread.gs_cb = NULL; ++ dst->thread.gs_bc_cb = NULL; ++ + return 0; + } + +@@ -150,13 +162,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) + frame->childregs.flags = 0; + if (new_stackp) + frame->childregs.gprs[15] = new_stackp; +- +- /* Don't copy runtime instrumentation info */ +- p->thread.ri_cb = NULL; ++ /* ++ * Clear the runtime instrumentation flag after the above childregs ++ * copy. The CB pointer was already cleared in arch_dup_task_struct(). ++ */ + frame->childregs.psw.mask &= ~PSW_MASK_RI; +- /* Don't copy guarded storage control block */ +- p->thread.gs_cb = NULL; +- p->thread.gs_bc_cb = NULL; + + /* Set a new TLS ? */ + if (clone_flags & CLONE_SETTLS) { +diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c +index e173b6187ad56..516d232e66d9d 100644 +--- a/arch/s390/mm/fault.c ++++ b/arch/s390/mm/fault.c +@@ -379,7 +379,9 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) + flags = FAULT_FLAG_DEFAULT; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; +- if (access == VM_WRITE || is_write) ++ if (is_write) ++ access = VM_WRITE; ++ if (access == VM_WRITE) + flags |= FAULT_FLAG_WRITE; + mmap_read_lock(mm); + +diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h +index 4910bf230d7b4..62208ec04ca4b 100644 +--- a/arch/x86/boot/compressed/misc.h ++++ b/arch/x86/boot/compressed/misc.h +@@ -132,7 +132,17 @@ void snp_set_page_private(unsigned long paddr); + void snp_set_page_shared(unsigned long paddr); + void sev_prep_identity_maps(unsigned long top_level_pgt); + #else +-static inline void sev_enable(struct boot_params *bp) { } ++static inline void sev_enable(struct boot_params *bp) ++{ ++ /* ++ * bp->cc_blob_address should only be set by boot/compressed kernel. ++ * Initialize it to 0 unconditionally (thus here in this stub too) to ++ * ensure that uninitialized values from buggy bootloaders aren't ++ * propagated. ++ */ ++ if (bp) ++ bp->cc_blob_address = 0; ++} + static inline void sev_es_shutdown_ghcb(void) { } + static inline bool sev_es_check_ghcb_fault(unsigned long address) + { +diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c +index 52f989f6acc28..c93930d5ccbd0 100644 +--- a/arch/x86/boot/compressed/sev.c ++++ b/arch/x86/boot/compressed/sev.c +@@ -276,6 +276,14 @@ void sev_enable(struct boot_params *bp) + struct msr m; + bool snp; + ++ /* ++ * bp->cc_blob_address should only be set by boot/compressed kernel. ++ * Initialize it to 0 to ensure that uninitialized values from ++ * buggy bootloaders aren't propagated. ++ */ ++ if (bp) ++ bp->cc_blob_address = 0; ++ + /* + * Setup/preliminary detection of SNP. This will be sanity-checked + * against CPUID/MSR values later. +diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S +index 682338e7e2a38..4dd19819053a5 100644 +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -311,7 +311,7 @@ SYM_CODE_START(entry_INT80_compat) + * Interrupts are off on entry. + */ + ASM_CLAC /* Do this early to minimize exposure */ +- SWAPGS ++ ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV + + /* + * User tracing code (ptrace or signal handlers) might assume that +diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c +index ba60427caa6d3..9b48d957d2b3f 100644 +--- a/arch/x86/events/intel/ds.c ++++ b/arch/x86/events/intel/ds.c +@@ -291,6 +291,7 @@ static u64 load_latency_data(struct perf_event *event, u64 status) + static u64 store_latency_data(struct perf_event *event, u64 status) + { + union intel_x86_pebs_dse dse; ++ union perf_mem_data_src src; + u64 val; + + dse.val = status; +@@ -304,7 +305,14 @@ static u64 store_latency_data(struct perf_event *event, u64 status) + + val |= P(BLK, NA); + +- return val; ++ /* ++ * the pebs_data_source table is only for loads ++ * so override the mem_op to say STORE instead ++ */ ++ src.val = val; ++ src.mem_op = P(OP,STORE); ++ ++ return src.val; + } + + struct pebs_record_core { +@@ -822,7 +830,7 @@ struct event_constraint intel_glm_pebs_event_constraints[] = { + + struct event_constraint intel_grt_pebs_event_constraints[] = { + /* Allow all events as PEBS with no flags */ +- INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0xf), ++ INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3), + INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf), + EVENT_CONSTRAINT_END + }; +diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c +index 4f70fb6c2c1eb..47fca6a7a8bcd 100644 +--- a/arch/x86/events/intel/lbr.c ++++ b/arch/x86/events/intel/lbr.c +@@ -1097,6 +1097,14 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) + + if (static_cpu_has(X86_FEATURE_ARCH_LBR)) { + reg->config = mask; ++ ++ /* ++ * The Arch LBR HW can retrieve the common branch types ++ * from the LBR_INFO. It doesn't require the high overhead ++ * SW disassemble. ++ * Enable the branch type by default for the Arch LBR. ++ */ ++ reg->reg |= X86_BR_TYPE_SAVE; + return 0; + } + +diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c +index ce440011cc4e4..1ef4f7861e2ec 100644 +--- a/arch/x86/events/intel/uncore_snb.c ++++ b/arch/x86/events/intel/uncore_snb.c +@@ -841,6 +841,22 @@ int snb_pci2phy_map_init(int devid) + return 0; + } + ++static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event) ++{ ++ struct hw_perf_event *hwc = &event->hw; ++ ++ /* ++ * SNB IMC counters are 32-bit and are laid out back to back ++ * in MMIO space. Therefore we must use a 32-bit accessor function ++ * using readq() from uncore_mmio_read_counter() causes problems ++ * because it is reading 64-bit at a time. This is okay for the ++ * uncore_perf_event_update() function because it drops the upper ++ * 32-bits but not okay for plain uncore_read_counter() as invoked ++ * in uncore_pmu_event_start(). ++ */ ++ return (u64)readl(box->io_addr + hwc->event_base); ++} ++ + static struct pmu snb_uncore_imc_pmu = { + .task_ctx_nr = perf_invalid_context, + .event_init = snb_uncore_imc_event_init, +@@ -860,7 +876,7 @@ static struct intel_uncore_ops snb_uncore_imc_ops = { + .disable_event = snb_uncore_imc_disable_event, + .enable_event = snb_uncore_imc_enable_event, + .hw_config = snb_uncore_imc_hw_config, +- .read_counter = uncore_mmio_read_counter, ++ .read_counter = snb_uncore_imc_read_counter, + }; + + static struct intel_uncore_type snb_uncore_imc = { +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index ede8990f3e416..ccbb838f995c8 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -456,7 +456,8 @@ + #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ + #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ + #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ +-#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ +-#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ ++#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ ++#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ ++#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ + + #endif /* _ASM_X86_CPUFEATURES_H */ +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index d3a3cc6772ee1..c31083d77e09c 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -35,33 +35,56 @@ + #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ + + /* ++ * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN. ++ */ ++#define __FILL_RETURN_SLOT \ ++ ANNOTATE_INTRA_FUNCTION_CALL; \ ++ call 772f; \ ++ int3; \ ++772: ++ ++/* ++ * Stuff the entire RSB. ++ * + * Google experimented with loop-unrolling and this turned out to be + * the optimal version - two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +-#define __FILL_RETURN_BUFFER(reg, nr, sp) \ +- mov $(nr/2), reg; \ +-771: \ +- ANNOTATE_INTRA_FUNCTION_CALL; \ +- call 772f; \ +-773: /* speculation trap */ \ +- UNWIND_HINT_EMPTY; \ +- pause; \ +- lfence; \ +- jmp 773b; \ +-772: \ +- ANNOTATE_INTRA_FUNCTION_CALL; \ +- call 774f; \ +-775: /* speculation trap */ \ +- UNWIND_HINT_EMPTY; \ +- pause; \ +- lfence; \ +- jmp 775b; \ +-774: \ +- add $(BITS_PER_LONG/8) * 2, sp; \ +- dec reg; \ +- jnz 771b; \ +- /* barrier for jnz misprediction */ \ ++#ifdef CONFIG_X86_64 ++#define __FILL_RETURN_BUFFER(reg, nr) \ ++ mov $(nr/2), reg; \ ++771: \ ++ __FILL_RETURN_SLOT \ ++ __FILL_RETURN_SLOT \ ++ add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \ ++ dec reg; \ ++ jnz 771b; \ ++ /* barrier for jnz misprediction */ \ ++ lfence; ++#else ++/* ++ * i386 doesn't unconditionally have LFENCE, as such it can't ++ * do a loop. ++ */ ++#define __FILL_RETURN_BUFFER(reg, nr) \ ++ .rept nr; \ ++ __FILL_RETURN_SLOT; \ ++ .endr; \ ++ add $(BITS_PER_LONG/8) * nr, %_ASM_SP; ++#endif ++ ++/* ++ * Stuff a single RSB slot. ++ * ++ * To mitigate Post-Barrier RSB speculation, one CALL instruction must be ++ * forced to retire before letting a RET instruction execute. ++ * ++ * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed ++ * before this point. ++ */ ++#define __FILL_ONE_RETURN \ ++ __FILL_RETURN_SLOT \ ++ add $(BITS_PER_LONG/8), %_ASM_SP; \ + lfence; + + #ifdef __ASSEMBLY__ +@@ -120,28 +143,15 @@ + #endif + .endm + +-.macro ISSUE_UNBALANCED_RET_GUARD +- ANNOTATE_INTRA_FUNCTION_CALL +- call .Lunbalanced_ret_guard_\@ +- int3 +-.Lunbalanced_ret_guard_\@: +- add $(BITS_PER_LONG/8), %_ASM_SP +- lfence +-.endm +- + /* + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP + * monstrosity above, manually. + */ +-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2 +-.ifb \ftr2 +- ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr +-.else +- ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2 +-.endif +- __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) +-.Lunbalanced_\@: +- ISSUE_UNBALANCED_RET_GUARD ++.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS) ++ ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \ ++ __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \ ++ __stringify(__FILL_ONE_RETURN), \ftr2 ++ + .Lskip_rsb_\@: + .endm + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 510d85261132b..da7c361f47e0d 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -433,7 +433,8 @@ static void __init mmio_select_mitigation(void) + u64 ia32_cap; + + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || +- cpu_mitigations_off()) { ++ boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) || ++ cpu_mitigations_off()) { + mmio_mitigation = MMIO_MITIGATION_OFF; + return; + } +@@ -538,6 +539,8 @@ out: + pr_info("TAA: %s\n", taa_strings[taa_mitigation]); + if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) + pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); ++ else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) ++ pr_info("MMIO Stale Data: Unknown: No mitigations\n"); + } + + static void __init md_clear_select_mitigation(void) +@@ -2275,6 +2278,9 @@ static ssize_t tsx_async_abort_show_state(char *buf) + + static ssize_t mmio_stale_data_show_state(char *buf) + { ++ if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) ++ return sysfs_emit(buf, "Unknown: No mitigations\n"); ++ + if (mmio_mitigation == MMIO_MITIGATION_OFF) + return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]); + +@@ -2421,6 +2427,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr + return srbds_show_state(buf); + + case X86_BUG_MMIO_STALE_DATA: ++ case X86_BUG_MMIO_UNKNOWN: + return mmio_stale_data_show_state(buf); + + case X86_BUG_RETBLEED: +@@ -2480,7 +2487,10 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char * + + ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf) + { +- return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); ++ if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) ++ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_UNKNOWN); ++ else ++ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); + } + + ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 64a73f415f036..3e508f2390983 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1135,7 +1135,8 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) + #define NO_SWAPGS BIT(6) + #define NO_ITLB_MULTIHIT BIT(7) + #define NO_SPECTRE_V2 BIT(8) +-#define NO_EIBRS_PBRSB BIT(9) ++#define NO_MMIO BIT(9) ++#define NO_EIBRS_PBRSB BIT(10) + + #define VULNWL(vendor, family, model, whitelist) \ + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) +@@ -1158,6 +1159,11 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { + VULNWL(VORTEX, 6, X86_MODEL_ANY, NO_SPECULATION), + + /* Intel Family 6 */ ++ VULNWL_INTEL(TIGERLAKE, NO_MMIO), ++ VULNWL_INTEL(TIGERLAKE_L, NO_MMIO), ++ VULNWL_INTEL(ALDERLAKE, NO_MMIO), ++ VULNWL_INTEL(ALDERLAKE_L, NO_MMIO), ++ + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), +@@ -1176,9 +1182,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), + +- VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), ++ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), ++ VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), ++ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), + + /* + * Technically, swapgs isn't serializing on AMD (despite it previously +@@ -1193,18 +1199,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { + VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), + + /* AMD Family 0xf - 0x12 */ +- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), ++ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), ++ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), ++ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + + /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ +- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), ++ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + + /* Zhaoxin Family 7 */ +- VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS), +- VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS), ++ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), ++ VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), + {} + }; + +@@ -1358,10 +1364,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) + * Affected CPU list is generally enough to enumerate the vulnerability, + * but for virtualization case check for ARCH_CAP MSR bits also, VMM may + * not want the guest to enumerate the bug. ++ * ++ * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, ++ * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. + */ +- if (cpu_matches(cpu_vuln_blacklist, MMIO) && +- !arch_cap_mmio_immune(ia32_cap)) +- setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); ++ if (!arch_cap_mmio_immune(ia32_cap)) { ++ if (cpu_matches(cpu_vuln_blacklist, MMIO)) ++ setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); ++ else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) ++ setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN); ++ } + + if (!cpu_has(c, X86_FEATURE_BTC_NO)) { + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) +diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c +index 63dc626627a03..4f84c3f11af5b 100644 +--- a/arch/x86/kernel/sev.c ++++ b/arch/x86/kernel/sev.c +@@ -701,7 +701,13 @@ e_term: + void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, + unsigned int npages) + { +- if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) ++ /* ++ * This can be invoked in early boot while running identity mapped, so ++ * use an open coded check for SNP instead of using cc_platform_has(). ++ * This eliminates worries about jump tables or checking boot_cpu_data ++ * in the cc_platform_has() function. ++ */ ++ if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) + return; + + /* +@@ -717,7 +723,13 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd + void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, + unsigned int npages) + { +- if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) ++ /* ++ * This can be invoked in early boot while running identity mapped, so ++ * use an open coded check for SNP instead of using cc_platform_has(). ++ * This eliminates worries about jump tables or checking boot_cpu_data ++ * in the cc_platform_has() function. ++ */ ++ if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) + return; + + /* Invalidate the memory pages before they are marked shared in the RMP table. */ +diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c +index 38185aedf7d16..0ea57da929407 100644 +--- a/arch/x86/kernel/unwind_orc.c ++++ b/arch/x86/kernel/unwind_orc.c +@@ -93,22 +93,27 @@ static struct orc_entry *orc_find(unsigned long ip); + static struct orc_entry *orc_ftrace_find(unsigned long ip) + { + struct ftrace_ops *ops; +- unsigned long caller; ++ unsigned long tramp_addr, offset; + + ops = ftrace_ops_trampoline(ip); + if (!ops) + return NULL; + ++ /* Set tramp_addr to the start of the code copied by the trampoline */ + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) +- caller = (unsigned long)ftrace_regs_call; ++ tramp_addr = (unsigned long)ftrace_regs_caller; + else +- caller = (unsigned long)ftrace_call; ++ tramp_addr = (unsigned long)ftrace_caller; ++ ++ /* Now place tramp_addr to the location within the trampoline ip is at */ ++ offset = ip - ops->trampoline; ++ tramp_addr += offset; + + /* Prevent unlikely recursion */ +- if (ip == caller) ++ if (ip == tramp_addr) + return NULL; + +- return orc_find(caller); ++ return orc_find(tramp_addr); + } + #else + static struct orc_entry *orc_ftrace_find(unsigned long ip) +diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c +index d5ef64ddd35e9..66a209f7eb86d 100644 +--- a/arch/x86/mm/pat/memtype.c ++++ b/arch/x86/mm/pat/memtype.c +@@ -62,6 +62,7 @@ + + static bool __read_mostly pat_bp_initialized; + static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT); ++static bool __initdata pat_force_disabled = !IS_ENABLED(CONFIG_X86_PAT); + static bool __read_mostly pat_bp_enabled; + static bool __read_mostly pat_cm_initialized; + +@@ -86,6 +87,7 @@ void pat_disable(const char *msg_reason) + static int __init nopat(char *str) + { + pat_disable("PAT support disabled via boot option."); ++ pat_force_disabled = true; + return 0; + } + early_param("nopat", nopat); +@@ -272,7 +274,7 @@ static void pat_ap_init(u64 pat) + wrmsrl(MSR_IA32_CR_PAT, pat); + } + +-void init_cache_modes(void) ++void __init init_cache_modes(void) + { + u64 pat = 0; + +@@ -313,6 +315,12 @@ void init_cache_modes(void) + */ + pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) | + PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC); ++ } else if (!pat_force_disabled && cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) { ++ /* ++ * Clearly PAT is enabled underneath. Allow pat_enabled() to ++ * reflect this. ++ */ ++ pat_bp_enabled = true; + } + + __init_cache_modes(pat); +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 1eb13d57a946f..0a299941c622e 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -1925,7 +1925,8 @@ out: + /* If we didn't flush the entire list, we could have told the driver + * there was more coming, but that turned out to be a lie. + */ +- if ((!list_empty(list) || errors) && q->mq_ops->commit_rqs && queued) ++ if ((!list_empty(list) || errors || needs_resource || ++ ret == BLK_STS_DEV_RESOURCE) && q->mq_ops->commit_rqs && queued) + q->mq_ops->commit_rqs(hctx); + /* + * Any items that need requeuing? Stuff them into hctx->dispatch, +@@ -2678,6 +2679,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, + list_del_init(&rq->queuelist); + ret = blk_mq_request_issue_directly(rq, list_empty(list)); + if (ret != BLK_STS_OK) { ++ errors++; + if (ret == BLK_STS_RESOURCE || + ret == BLK_STS_DEV_RESOURCE) { + blk_mq_request_bypass_insert(rq, false, +@@ -2685,7 +2687,6 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, + break; + } + blk_mq_end_request(rq, ret); +- errors++; + } else + queued++; + } +diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c +index d8b2dfcd59b5f..83a430ad22556 100644 +--- a/drivers/acpi/processor_thermal.c ++++ b/drivers/acpi/processor_thermal.c +@@ -151,7 +151,7 @@ void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy) + unsigned int cpu; + + for_each_cpu(cpu, policy->related_cpus) { +- struct acpi_processor *pr = per_cpu(processors, policy->cpu); ++ struct acpi_processor *pr = per_cpu(processors, cpu); + + if (pr) + freq_qos_remove_request(&pr->thermal_req); +diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c +index d044418294f94..7981a25983764 100644 +--- a/drivers/android/binder_alloc.c ++++ b/drivers/android/binder_alloc.c +@@ -395,12 +395,15 @@ static struct binder_buffer *binder_alloc_new_buf_locked( + size_t size, data_offsets_size; + int ret; + ++ mmap_read_lock(alloc->vma_vm_mm); + if (!binder_alloc_get_vma(alloc)) { ++ mmap_read_unlock(alloc->vma_vm_mm); + binder_alloc_debug(BINDER_DEBUG_USER_ERROR, + "%d: binder_alloc_buf, no vma\n", + alloc->pid); + return ERR_PTR(-ESRCH); + } ++ mmap_read_unlock(alloc->vma_vm_mm); + + data_offsets_size = ALIGN(data_size, sizeof(void *)) + + ALIGN(offsets_size, sizeof(void *)); +@@ -922,17 +925,25 @@ void binder_alloc_print_pages(struct seq_file *m, + * Make sure the binder_alloc is fully initialized, otherwise we might + * read inconsistent state. + */ +- if (binder_alloc_get_vma(alloc) != NULL) { +- for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { +- page = &alloc->pages[i]; +- if (!page->page_ptr) +- free++; +- else if (list_empty(&page->lru)) +- active++; +- else +- lru++; +- } ++ ++ mmap_read_lock(alloc->vma_vm_mm); ++ if (binder_alloc_get_vma(alloc) == NULL) { ++ mmap_read_unlock(alloc->vma_vm_mm); ++ goto uninitialized; + } ++ ++ mmap_read_unlock(alloc->vma_vm_mm); ++ for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { ++ page = &alloc->pages[i]; ++ if (!page->page_ptr) ++ free++; ++ else if (list_empty(&page->lru)) ++ active++; ++ else ++ lru++; ++ } ++ ++uninitialized: + mutex_unlock(&alloc->mutex); + seq_printf(m, " pages: %d:%d:%d\n", active, lru, free); + seq_printf(m, " pages high watermark: %zu\n", alloc->pages_high); +diff --git a/drivers/block/loop.c b/drivers/block/loop.c +index 084f9b8a0ba3c..a59910ef948e9 100644 +--- a/drivers/block/loop.c ++++ b/drivers/block/loop.c +@@ -979,6 +979,11 @@ loop_set_status_from_info(struct loop_device *lo, + + lo->lo_offset = info->lo_offset; + lo->lo_sizelimit = info->lo_sizelimit; ++ ++ /* loff_t vars have been assigned __u64 */ ++ if (lo->lo_offset < 0 || lo->lo_sizelimit < 0) ++ return -EOVERFLOW; ++ + memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); + lo->lo_file_name[LO_NAME_SIZE-1] = 0; + lo->lo_flags = info->lo_flags; +diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c +index b8549c61ff2ce..b144be41290e3 100644 +--- a/drivers/block/zram/zram_drv.c ++++ b/drivers/block/zram/zram_drv.c +@@ -1144,14 +1144,15 @@ static ssize_t bd_stat_show(struct device *dev, + static ssize_t debug_stat_show(struct device *dev, + struct device_attribute *attr, char *buf) + { +- int version = 2; ++ int version = 1; + struct zram *zram = dev_to_zram(dev); + ssize_t ret; + + down_read(&zram->init_lock); + ret = scnprintf(buf, PAGE_SIZE, +- "version: %d\n%8llu\n", ++ "version: %d\n%8llu %8llu\n", + version, ++ (u64)atomic64_read(&zram->stats.writestall), + (u64)atomic64_read(&zram->stats.miss_free)); + up_read(&zram->init_lock); + +@@ -1367,6 +1368,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, + } + kunmap_atomic(mem); + ++compress_again: + zstrm = zcomp_stream_get(zram->comp); + src = kmap_atomic(page); + ret = zcomp_compress(zstrm, src, &comp_len); +@@ -1375,20 +1377,39 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, + if (unlikely(ret)) { + zcomp_stream_put(zram->comp); + pr_err("Compression failed! err=%d\n", ret); ++ zs_free(zram->mem_pool, handle); + return ret; + } + + if (comp_len >= huge_class_size) + comp_len = PAGE_SIZE; +- +- handle = zs_malloc(zram->mem_pool, comp_len, +- __GFP_KSWAPD_RECLAIM | +- __GFP_NOWARN | +- __GFP_HIGHMEM | +- __GFP_MOVABLE); +- +- if (unlikely(!handle)) { ++ /* ++ * handle allocation has 2 paths: ++ * a) fast path is executed with preemption disabled (for ++ * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, ++ * since we can't sleep; ++ * b) slow path enables preemption and attempts to allocate ++ * the page with __GFP_DIRECT_RECLAIM bit set. we have to ++ * put per-cpu compression stream and, thus, to re-do ++ * the compression once handle is allocated. ++ * ++ * if we have a 'non-null' handle here then we are coming ++ * from the slow path and handle has already been allocated. ++ */ ++ if (!handle) ++ handle = zs_malloc(zram->mem_pool, comp_len, ++ __GFP_KSWAPD_RECLAIM | ++ __GFP_NOWARN | ++ __GFP_HIGHMEM | ++ __GFP_MOVABLE); ++ if (!handle) { + zcomp_stream_put(zram->comp); ++ atomic64_inc(&zram->stats.writestall); ++ handle = zs_malloc(zram->mem_pool, comp_len, ++ GFP_NOIO | __GFP_HIGHMEM | ++ __GFP_MOVABLE); ++ if (handle) ++ goto compress_again; + return -ENOMEM; + } + +@@ -1946,6 +1967,7 @@ static int zram_add(void) + if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) + blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); + ++ blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); + ret = device_add_disk(NULL, zram->disk, zram_disk_groups); + if (ret) + goto out_cleanup_disk; +diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h +index 158c91e548501..80c3b43b4828f 100644 +--- a/drivers/block/zram/zram_drv.h ++++ b/drivers/block/zram/zram_drv.h +@@ -81,6 +81,7 @@ struct zram_stats { + atomic64_t huge_pages_since; /* no. of huge pages since zram set up */ + atomic64_t pages_stored; /* no. of pages currently stored */ + atomic_long_t max_used_pages; /* no. of maximum pages stored */ ++ atomic64_t writestall; /* no. of write slow paths */ + atomic64_t miss_free; /* no. of missed free */ + #ifdef CONFIG_ZRAM_WRITEBACK + atomic64_t bd_count; /* no. of pages in backing device */ +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +index d9f57a20a8bc5..29ebc9e51305a 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +@@ -377,12 +377,8 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) + f2g = &gfx_v10_3_kfd2kgd; + break; + case IP_VERSION(10, 3, 6): +- gfx_target_version = 100306; +- if (!vf) +- f2g = &gfx_v10_3_kfd2kgd; +- break; + case IP_VERSION(10, 3, 7): +- gfx_target_version = 100307; ++ gfx_target_version = 100306; + if (!vf) + f2g = &gfx_v10_3_kfd2kgd; + break; +diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c +index 05076e530e7d4..e29175e4b44ce 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_bo.c ++++ b/drivers/gpu/drm/nouveau/nouveau_bo.c +@@ -820,6 +820,15 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, + if (ret == 0) { + ret = nouveau_fence_new(chan, false, &fence); + if (ret == 0) { ++ /* TODO: figure out a better solution here ++ * ++ * wait on the fence here explicitly as going through ++ * ttm_bo_move_accel_cleanup somehow doesn't seem to do it. ++ * ++ * Without this the operation can timeout and we'll fallback to a ++ * software copy, which might take several minutes to finish. ++ */ ++ nouveau_fence_wait(fence, false, false); + ret = ttm_bo_move_accel_cleanup(bo, + &fence->base, + evict, false, +diff --git a/drivers/md/md.c b/drivers/md/md.c +index 522b3d6b8c46b..91e7e80fce489 100644 +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -6244,11 +6244,11 @@ static void mddev_detach(struct mddev *mddev) + static void __md_stop(struct mddev *mddev) + { + struct md_personality *pers = mddev->pers; ++ md_bitmap_destroy(mddev); + mddev_detach(mddev); + /* Ensure ->event_work is done */ + if (mddev->event_work.func) + flush_workqueue(md_misc_wq); +- md_bitmap_destroy(mddev); + spin_lock(&mddev->lock); + mddev->pers = NULL; + spin_unlock(&mddev->lock); +@@ -6266,6 +6266,7 @@ void md_stop(struct mddev *mddev) + /* stop the array and free an attached data structures. + * This is called from dm-raid + */ ++ __md_stop_writes(mddev); + __md_stop(mddev); + bioset_exit(&mddev->bio_set); + bioset_exit(&mddev->sync_set); +diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c +index d7fb33c078e81..1f0120cbe9e80 100644 +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -2007,30 +2007,24 @@ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout) + */ + void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution) + { +- /* check that the bond is not initialized yet */ +- if (!MAC_ADDRESS_EQUAL(&(BOND_AD_INFO(bond).system.sys_mac_addr), +- bond->dev->dev_addr)) { +- +- BOND_AD_INFO(bond).aggregator_identifier = 0; +- +- BOND_AD_INFO(bond).system.sys_priority = +- bond->params.ad_actor_sys_prio; +- if (is_zero_ether_addr(bond->params.ad_actor_system)) +- BOND_AD_INFO(bond).system.sys_mac_addr = +- *((struct mac_addr *)bond->dev->dev_addr); +- else +- BOND_AD_INFO(bond).system.sys_mac_addr = +- *((struct mac_addr *)bond->params.ad_actor_system); ++ BOND_AD_INFO(bond).aggregator_identifier = 0; ++ BOND_AD_INFO(bond).system.sys_priority = ++ bond->params.ad_actor_sys_prio; ++ if (is_zero_ether_addr(bond->params.ad_actor_system)) ++ BOND_AD_INFO(bond).system.sys_mac_addr = ++ *((struct mac_addr *)bond->dev->dev_addr); ++ else ++ BOND_AD_INFO(bond).system.sys_mac_addr = ++ *((struct mac_addr *)bond->params.ad_actor_system); + +- /* initialize how many times this module is called in one +- * second (should be about every 100ms) +- */ +- ad_ticks_per_sec = tick_resolution; ++ /* initialize how many times this module is called in one ++ * second (should be about every 100ms) ++ */ ++ ad_ticks_per_sec = tick_resolution; + +- bond_3ad_initiate_agg_selection(bond, +- AD_AGGREGATOR_SELECTION_TIMER * +- ad_ticks_per_sec); +- } ++ bond_3ad_initiate_agg_selection(bond, ++ AD_AGGREGATOR_SELECTION_TIMER * ++ ad_ticks_per_sec); + } + + /** +diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c +index 12a599d5e61a4..c771797fd902f 100644 +--- a/drivers/net/dsa/microchip/ksz8795.c ++++ b/drivers/net/dsa/microchip/ksz8795.c +@@ -898,17 +898,6 @@ static void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val) + } + } + +-static enum dsa_tag_protocol ksz8_get_tag_protocol(struct dsa_switch *ds, +- int port, +- enum dsa_tag_protocol mp) +-{ +- struct ksz_device *dev = ds->priv; +- +- /* ksz88x3 uses the same tag schema as KSZ9893 */ +- return ksz_is_ksz88x3(dev) ? +- DSA_TAG_PROTO_KSZ9893 : DSA_TAG_PROTO_KSZ8795; +-} +- + static u32 ksz8_sw_get_phy_flags(struct dsa_switch *ds, int port) + { + /* Silicon Errata Sheet (DS80000830A): +@@ -969,11 +958,9 @@ static void ksz8_flush_dyn_mac_table(struct ksz_device *dev, int port) + } + } + +-static int ksz8_port_vlan_filtering(struct dsa_switch *ds, int port, bool flag, ++static int ksz8_port_vlan_filtering(struct ksz_device *dev, int port, bool flag, + struct netlink_ext_ack *extack) + { +- struct ksz_device *dev = ds->priv; +- + if (ksz_is_ksz88x3(dev)) + return -ENOTSUPP; + +@@ -998,12 +985,11 @@ static void ksz8_port_enable_pvid(struct ksz_device *dev, int port, bool state) + } + } + +-static int ksz8_port_vlan_add(struct dsa_switch *ds, int port, ++static int ksz8_port_vlan_add(struct ksz_device *dev, int port, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack) + { + bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; +- struct ksz_device *dev = ds->priv; + struct ksz_port *p = &dev->ports[port]; + u16 data, new_pvid = 0; + u8 fid, member, valid; +@@ -1071,10 +1057,9 @@ static int ksz8_port_vlan_add(struct dsa_switch *ds, int port, + return 0; + } + +-static int ksz8_port_vlan_del(struct dsa_switch *ds, int port, ++static int ksz8_port_vlan_del(struct ksz_device *dev, int port, + const struct switchdev_obj_port_vlan *vlan) + { +- struct ksz_device *dev = ds->priv; + u16 data, pvid; + u8 fid, member, valid; + +@@ -1104,12 +1089,10 @@ static int ksz8_port_vlan_del(struct dsa_switch *ds, int port, + return 0; + } + +-static int ksz8_port_mirror_add(struct dsa_switch *ds, int port, ++static int ksz8_port_mirror_add(struct ksz_device *dev, int port, + struct dsa_mall_mirror_tc_entry *mirror, + bool ingress, struct netlink_ext_ack *extack) + { +- struct ksz_device *dev = ds->priv; +- + if (ingress) { + ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_RX, true); + dev->mirror_rx |= BIT(port); +@@ -1128,10 +1111,9 @@ static int ksz8_port_mirror_add(struct dsa_switch *ds, int port, + return 0; + } + +-static void ksz8_port_mirror_del(struct dsa_switch *ds, int port, ++static void ksz8_port_mirror_del(struct ksz_device *dev, int port, + struct dsa_mall_mirror_tc_entry *mirror) + { +- struct ksz_device *dev = ds->priv; + u8 data; + + if (mirror->ingress) { +@@ -1272,7 +1254,7 @@ static void ksz8_config_cpu_port(struct dsa_switch *ds) + continue; + if (!ksz_is_ksz88x3(dev)) { + ksz_pread8(dev, i, regs[P_REMOTE_STATUS], &remote); +- if (remote & PORT_FIBER_MODE) ++ if (remote & KSZ8_PORT_FIBER_MODE) + p->fiber = 1; + } + if (p->fiber) +@@ -1371,13 +1353,9 @@ static int ksz8_setup(struct dsa_switch *ds) + return ksz8_handle_global_errata(ds); + } + +-static void ksz8_get_caps(struct dsa_switch *ds, int port, ++static void ksz8_get_caps(struct ksz_device *dev, int port, + struct phylink_config *config) + { +- struct ksz_device *dev = ds->priv; +- +- ksz_phylink_get_caps(ds, port, config); +- + config->mac_capabilities = MAC_10 | MAC_100; + + /* Silicon Errata Sheet (DS80000830A): +@@ -1394,12 +1372,12 @@ static void ksz8_get_caps(struct dsa_switch *ds, int port, + } + + static const struct dsa_switch_ops ksz8_switch_ops = { +- .get_tag_protocol = ksz8_get_tag_protocol, ++ .get_tag_protocol = ksz_get_tag_protocol, + .get_phy_flags = ksz8_sw_get_phy_flags, + .setup = ksz8_setup, + .phy_read = ksz_phy_read16, + .phy_write = ksz_phy_write16, +- .phylink_get_caps = ksz8_get_caps, ++ .phylink_get_caps = ksz_phylink_get_caps, + .phylink_mac_link_down = ksz_mac_link_down, + .port_enable = ksz_enable_port, + .get_strings = ksz_get_strings, +@@ -1409,14 +1387,14 @@ static const struct dsa_switch_ops ksz8_switch_ops = { + .port_bridge_leave = ksz_port_bridge_leave, + .port_stp_state_set = ksz8_port_stp_state_set, + .port_fast_age = ksz_port_fast_age, +- .port_vlan_filtering = ksz8_port_vlan_filtering, +- .port_vlan_add = ksz8_port_vlan_add, +- .port_vlan_del = ksz8_port_vlan_del, ++ .port_vlan_filtering = ksz_port_vlan_filtering, ++ .port_vlan_add = ksz_port_vlan_add, ++ .port_vlan_del = ksz_port_vlan_del, + .port_fdb_dump = ksz_port_fdb_dump, + .port_mdb_add = ksz_port_mdb_add, + .port_mdb_del = ksz_port_mdb_del, +- .port_mirror_add = ksz8_port_mirror_add, +- .port_mirror_del = ksz8_port_mirror_del, ++ .port_mirror_add = ksz_port_mirror_add, ++ .port_mirror_del = ksz_port_mirror_del, + }; + + static u32 ksz8_get_port_addr(int port, int offset) +@@ -1424,51 +1402,6 @@ static u32 ksz8_get_port_addr(int port, int offset) + return PORT_CTRL_ADDR(port, offset); + } + +-static int ksz8_switch_detect(struct ksz_device *dev) +-{ +- u8 id1, id2; +- u16 id16; +- int ret; +- +- /* read chip id */ +- ret = ksz_read16(dev, REG_CHIP_ID0, &id16); +- if (ret) +- return ret; +- +- id1 = id16 >> 8; +- id2 = id16 & SW_CHIP_ID_M; +- +- switch (id1) { +- case KSZ87_FAMILY_ID: +- if ((id2 != CHIP_ID_94 && id2 != CHIP_ID_95)) +- return -ENODEV; +- +- if (id2 == CHIP_ID_95) { +- u8 val; +- +- id2 = 0x95; +- ksz_read8(dev, REG_PORT_STATUS_0, &val); +- if (val & PORT_FIBER_MODE) +- id2 = 0x65; +- } else if (id2 == CHIP_ID_94) { +- id2 = 0x94; +- } +- break; +- case KSZ88_FAMILY_ID: +- if (id2 != CHIP_ID_63) +- return -ENODEV; +- break; +- default: +- dev_err(dev->dev, "invalid family id: %d\n", id1); +- return -ENODEV; +- } +- id16 &= ~0xff; +- id16 |= id2; +- dev->chip_id = id16; +- +- return 0; +-} +- + static int ksz8_switch_init(struct ksz_device *dev) + { + struct ksz8 *ksz8 = dev->priv; +@@ -1521,8 +1454,13 @@ static const struct ksz_dev_ops ksz8_dev_ops = { + .r_mib_pkt = ksz8_r_mib_pkt, + .freeze_mib = ksz8_freeze_mib, + .port_init_cnt = ksz8_port_init_cnt, ++ .vlan_filtering = ksz8_port_vlan_filtering, ++ .vlan_add = ksz8_port_vlan_add, ++ .vlan_del = ksz8_port_vlan_del, ++ .mirror_add = ksz8_port_mirror_add, ++ .mirror_del = ksz8_port_mirror_del, ++ .get_caps = ksz8_get_caps, + .shutdown = ksz8_reset_switch, +- .detect = ksz8_switch_detect, + .init = ksz8_switch_init, + .exit = ksz8_switch_exit, + }; +diff --git a/drivers/net/dsa/microchip/ksz8795_reg.h b/drivers/net/dsa/microchip/ksz8795_reg.h +index 4109433b6b6c2..b8f6ad7581bcd 100644 +--- a/drivers/net/dsa/microchip/ksz8795_reg.h ++++ b/drivers/net/dsa/microchip/ksz8795_reg.h +@@ -14,23 +14,10 @@ + #define KS_PRIO_M 0x3 + #define KS_PRIO_S 2 + +-#define REG_CHIP_ID0 0x00 +- +-#define KSZ87_FAMILY_ID 0x87 +-#define KSZ88_FAMILY_ID 0x88 +- +-#define REG_CHIP_ID1 0x01 +- +-#define SW_CHIP_ID_M 0xF0 +-#define SW_CHIP_ID_S 4 + #define SW_REVISION_M 0x0E + #define SW_REVISION_S 1 + #define SW_START 0x01 + +-#define CHIP_ID_94 0x60 +-#define CHIP_ID_95 0x90 +-#define CHIP_ID_63 0x30 +- + #define KSZ8863_REG_SW_RESET 0x43 + + #define KSZ8863_GLOBAL_SOFTWARE_RESET BIT(4) +@@ -217,8 +204,6 @@ + #define REG_PORT_4_STATUS_0 0x48 + + /* For KSZ8765. */ +-#define PORT_FIBER_MODE BIT(7) +- + #define PORT_REMOTE_ASYM_PAUSE BIT(5) + #define PORT_REMOTE_SYM_PAUSE BIT(4) + #define PORT_REMOTE_100BTX_FD BIT(3) +@@ -322,7 +307,6 @@ + + #define REG_PORT_CTRL_5 0x05 + +-#define REG_PORT_STATUS_0 0x08 + #define REG_PORT_STATUS_1 0x09 + #define REG_PORT_LINK_MD_CTRL 0x0A + #define REG_PORT_LINK_MD_RESULT 0x0B +diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c +index ebad795e4e95f..125124fdefbf4 100644 +--- a/drivers/net/dsa/microchip/ksz9477.c ++++ b/drivers/net/dsa/microchip/ksz9477.c +@@ -276,18 +276,6 @@ static void ksz9477_port_init_cnt(struct ksz_device *dev, int port) + mutex_unlock(&mib->cnt_mutex); + } + +-static enum dsa_tag_protocol ksz9477_get_tag_protocol(struct dsa_switch *ds, +- int port, +- enum dsa_tag_protocol mp) +-{ +- enum dsa_tag_protocol proto = DSA_TAG_PROTO_KSZ9477; +- struct ksz_device *dev = ds->priv; +- +- if (dev->features & IS_9893) +- proto = DSA_TAG_PROTO_KSZ9893; +- return proto; +-} +- + static int ksz9477_phy_read16(struct dsa_switch *ds, int addr, int reg) + { + struct ksz_device *dev = ds->priv; +@@ -389,12 +377,10 @@ static void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port) + } + } + +-static int ksz9477_port_vlan_filtering(struct dsa_switch *ds, int port, ++static int ksz9477_port_vlan_filtering(struct ksz_device *dev, int port, + bool flag, + struct netlink_ext_ack *extack) + { +- struct ksz_device *dev = ds->priv; +- + if (flag) { + ksz_port_cfg(dev, port, REG_PORT_LUE_CTRL, + PORT_VLAN_LOOKUP_VID_0, true); +@@ -408,11 +394,10 @@ static int ksz9477_port_vlan_filtering(struct dsa_switch *ds, int port, + return 0; + } + +-static int ksz9477_port_vlan_add(struct dsa_switch *ds, int port, ++static int ksz9477_port_vlan_add(struct ksz_device *dev, int port, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack) + { +- struct ksz_device *dev = ds->priv; + u32 vlan_table[3]; + bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; + int err; +@@ -445,10 +430,9 @@ static int ksz9477_port_vlan_add(struct dsa_switch *ds, int port, + return 0; + } + +-static int ksz9477_port_vlan_del(struct dsa_switch *ds, int port, ++static int ksz9477_port_vlan_del(struct ksz_device *dev, int port, + const struct switchdev_obj_port_vlan *vlan) + { +- struct ksz_device *dev = ds->priv; + bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; + u32 vlan_table[3]; + u16 pvid; +@@ -835,11 +819,10 @@ exit: + return ret; + } + +-static int ksz9477_port_mirror_add(struct dsa_switch *ds, int port, ++static int ksz9477_port_mirror_add(struct ksz_device *dev, int port, + struct dsa_mall_mirror_tc_entry *mirror, + bool ingress, struct netlink_ext_ack *extack) + { +- struct ksz_device *dev = ds->priv; + u8 data; + int p; + +@@ -875,10 +858,9 @@ static int ksz9477_port_mirror_add(struct dsa_switch *ds, int port, + return 0; + } + +-static void ksz9477_port_mirror_del(struct dsa_switch *ds, int port, ++static void ksz9477_port_mirror_del(struct ksz_device *dev, int port, + struct dsa_mall_mirror_tc_entry *mirror) + { +- struct ksz_device *dev = ds->priv; + bool in_use = false; + u8 data; + int p; +@@ -1100,11 +1082,9 @@ static void ksz9477_phy_errata_setup(struct ksz_device *dev, int port) + ksz9477_port_mmd_write(dev, port, 0x1c, 0x20, 0xeeee); + } + +-static void ksz9477_get_caps(struct dsa_switch *ds, int port, ++static void ksz9477_get_caps(struct ksz_device *dev, int port, + struct phylink_config *config) + { +- ksz_phylink_get_caps(ds, port, config); +- + config->mac_capabilities = MAC_10 | MAC_100 | MAC_1000FD | + MAC_ASYM_PAUSE | MAC_SYM_PAUSE; + } +@@ -1329,12 +1309,12 @@ static int ksz9477_setup(struct dsa_switch *ds) + } + + static const struct dsa_switch_ops ksz9477_switch_ops = { +- .get_tag_protocol = ksz9477_get_tag_protocol, ++ .get_tag_protocol = ksz_get_tag_protocol, + .setup = ksz9477_setup, + .phy_read = ksz9477_phy_read16, + .phy_write = ksz9477_phy_write16, + .phylink_mac_link_down = ksz_mac_link_down, +- .phylink_get_caps = ksz9477_get_caps, ++ .phylink_get_caps = ksz_phylink_get_caps, + .port_enable = ksz_enable_port, + .get_strings = ksz_get_strings, + .get_ethtool_stats = ksz_get_ethtool_stats, +@@ -1343,16 +1323,16 @@ static const struct dsa_switch_ops ksz9477_switch_ops = { + .port_bridge_leave = ksz_port_bridge_leave, + .port_stp_state_set = ksz9477_port_stp_state_set, + .port_fast_age = ksz_port_fast_age, +- .port_vlan_filtering = ksz9477_port_vlan_filtering, +- .port_vlan_add = ksz9477_port_vlan_add, +- .port_vlan_del = ksz9477_port_vlan_del, ++ .port_vlan_filtering = ksz_port_vlan_filtering, ++ .port_vlan_add = ksz_port_vlan_add, ++ .port_vlan_del = ksz_port_vlan_del, + .port_fdb_dump = ksz9477_port_fdb_dump, + .port_fdb_add = ksz9477_port_fdb_add, + .port_fdb_del = ksz9477_port_fdb_del, + .port_mdb_add = ksz9477_port_mdb_add, + .port_mdb_del = ksz9477_port_mdb_del, +- .port_mirror_add = ksz9477_port_mirror_add, +- .port_mirror_del = ksz9477_port_mirror_del, ++ .port_mirror_add = ksz_port_mirror_add, ++ .port_mirror_del = ksz_port_mirror_del, + .get_stats64 = ksz_get_stats64, + .port_change_mtu = ksz9477_change_mtu, + .port_max_mtu = ksz9477_max_mtu, +@@ -1363,14 +1343,15 @@ static u32 ksz9477_get_port_addr(int port, int offset) + return PORT_CTRL_ADDR(port, offset); + } + +-static int ksz9477_switch_detect(struct ksz_device *dev) ++static int ksz9477_switch_init(struct ksz_device *dev) + { + u8 data8; +- u8 id_hi; +- u8 id_lo; +- u32 id32; + int ret; + ++ dev->ds->ops = &ksz9477_switch_ops; ++ ++ dev->port_mask = (1 << dev->info->port_cnt) - 1; ++ + /* turn off SPI DO Edge select */ + ret = ksz_read8(dev, REG_SW_GLOBAL_SERIAL_CTRL_0, &data8); + if (ret) +@@ -1381,10 +1362,6 @@ static int ksz9477_switch_detect(struct ksz_device *dev) + if (ret) + return ret; + +- /* read chip id */ +- ret = ksz_read32(dev, REG_CHIP_ID0__1, &id32); +- if (ret) +- return ret; + ret = ksz_read8(dev, REG_GLOBAL_OPTIONS, &data8); + if (ret) + return ret; +@@ -1395,12 +1372,7 @@ static int ksz9477_switch_detect(struct ksz_device *dev) + /* Default capability is gigabit capable. */ + dev->features = GBIT_SUPPORT; + +- dev_dbg(dev->dev, "Switch detect: ID=%08x%02x\n", id32, data8); +- id_hi = (u8)(id32 >> 16); +- id_lo = (u8)(id32 >> 8); +- if ((id_lo & 0xf) == 3) { +- /* Chip is from KSZ9893 design. */ +- dev_info(dev->dev, "Found KSZ9893\n"); ++ if (dev->chip_id == KSZ9893_CHIP_ID) { + dev->features |= IS_9893; + + /* Chip does not support gigabit. */ +@@ -1408,7 +1380,6 @@ static int ksz9477_switch_detect(struct ksz_device *dev) + dev->features &= ~GBIT_SUPPORT; + dev->phy_port_cnt = 2; + } else { +- dev_info(dev->dev, "Found KSZ9477 or compatible\n"); + /* Chip uses new XMII register definitions. */ + dev->features |= NEW_XMII; + +@@ -1416,21 +1387,6 @@ static int ksz9477_switch_detect(struct ksz_device *dev) + if (!(data8 & SW_GIGABIT_ABLE)) + dev->features &= ~GBIT_SUPPORT; + } +- +- /* Change chip id to known ones so it can be matched against them. */ +- id32 = (id_hi << 16) | (id_lo << 8); +- +- dev->chip_id = id32; +- +- return 0; +-} +- +-static int ksz9477_switch_init(struct ksz_device *dev) +-{ +- dev->ds->ops = &ksz9477_switch_ops; +- +- dev->port_mask = (1 << dev->info->port_cnt) - 1; +- + return 0; + } + +@@ -1449,8 +1405,13 @@ static const struct ksz_dev_ops ksz9477_dev_ops = { + .r_mib_stat64 = ksz_r_mib_stats64, + .freeze_mib = ksz9477_freeze_mib, + .port_init_cnt = ksz9477_port_init_cnt, ++ .vlan_filtering = ksz9477_port_vlan_filtering, ++ .vlan_add = ksz9477_port_vlan_add, ++ .vlan_del = ksz9477_port_vlan_del, ++ .mirror_add = ksz9477_port_mirror_add, ++ .mirror_del = ksz9477_port_mirror_del, ++ .get_caps = ksz9477_get_caps, + .shutdown = ksz9477_reset_switch, +- .detect = ksz9477_switch_detect, + .init = ksz9477_switch_init, + .exit = ksz9477_switch_exit, + }; +diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h +index 7a2c8d4767aff..077e35ab11b54 100644 +--- a/drivers/net/dsa/microchip/ksz9477_reg.h ++++ b/drivers/net/dsa/microchip/ksz9477_reg.h +@@ -25,7 +25,6 @@ + + #define REG_CHIP_ID2__1 0x0002 + +-#define CHIP_ID_63 0x63 + #define CHIP_ID_66 0x66 + #define CHIP_ID_67 0x67 + #define CHIP_ID_77 0x77 +diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c +index 92a500e1ccd21..c9389880ad1fa 100644 +--- a/drivers/net/dsa/microchip/ksz_common.c ++++ b/drivers/net/dsa/microchip/ksz_common.c +@@ -453,9 +453,18 @@ void ksz_phylink_get_caps(struct dsa_switch *ds, int port, + if (dev->info->supports_rgmii[port]) + phy_interface_set_rgmii(config->supported_interfaces); + +- if (dev->info->internal_phy[port]) ++ if (dev->info->internal_phy[port]) { + __set_bit(PHY_INTERFACE_MODE_INTERNAL, + config->supported_interfaces); ++ /* Compatibility for phylib's default interface type when the ++ * phy-mode property is absent ++ */ ++ __set_bit(PHY_INTERFACE_MODE_GMII, ++ config->supported_interfaces); ++ } ++ ++ if (dev->dev_ops->get_caps) ++ dev->dev_ops->get_caps(dev, port, config); + } + EXPORT_SYMBOL_GPL(ksz_phylink_get_caps); + +@@ -930,6 +939,156 @@ void ksz_port_stp_state_set(struct dsa_switch *ds, int port, + } + EXPORT_SYMBOL_GPL(ksz_port_stp_state_set); + ++enum dsa_tag_protocol ksz_get_tag_protocol(struct dsa_switch *ds, ++ int port, enum dsa_tag_protocol mp) ++{ ++ struct ksz_device *dev = ds->priv; ++ enum dsa_tag_protocol proto = DSA_TAG_PROTO_NONE; ++ ++ if (dev->chip_id == KSZ8795_CHIP_ID || ++ dev->chip_id == KSZ8794_CHIP_ID || ++ dev->chip_id == KSZ8765_CHIP_ID) ++ proto = DSA_TAG_PROTO_KSZ8795; ++ ++ if (dev->chip_id == KSZ8830_CHIP_ID || ++ dev->chip_id == KSZ9893_CHIP_ID) ++ proto = DSA_TAG_PROTO_KSZ9893; ++ ++ if (dev->chip_id == KSZ9477_CHIP_ID || ++ dev->chip_id == KSZ9897_CHIP_ID || ++ dev->chip_id == KSZ9567_CHIP_ID) ++ proto = DSA_TAG_PROTO_KSZ9477; ++ ++ return proto; ++} ++EXPORT_SYMBOL_GPL(ksz_get_tag_protocol); ++ ++int ksz_port_vlan_filtering(struct dsa_switch *ds, int port, ++ bool flag, struct netlink_ext_ack *extack) ++{ ++ struct ksz_device *dev = ds->priv; ++ ++ if (!dev->dev_ops->vlan_filtering) ++ return -EOPNOTSUPP; ++ ++ return dev->dev_ops->vlan_filtering(dev, port, flag, extack); ++} ++EXPORT_SYMBOL_GPL(ksz_port_vlan_filtering); ++ ++int ksz_port_vlan_add(struct dsa_switch *ds, int port, ++ const struct switchdev_obj_port_vlan *vlan, ++ struct netlink_ext_ack *extack) ++{ ++ struct ksz_device *dev = ds->priv; ++ ++ if (!dev->dev_ops->vlan_add) ++ return -EOPNOTSUPP; ++ ++ return dev->dev_ops->vlan_add(dev, port, vlan, extack); ++} ++EXPORT_SYMBOL_GPL(ksz_port_vlan_add); ++ ++int ksz_port_vlan_del(struct dsa_switch *ds, int port, ++ const struct switchdev_obj_port_vlan *vlan) ++{ ++ struct ksz_device *dev = ds->priv; ++ ++ if (!dev->dev_ops->vlan_del) ++ return -EOPNOTSUPP; ++ ++ return dev->dev_ops->vlan_del(dev, port, vlan); ++} ++EXPORT_SYMBOL_GPL(ksz_port_vlan_del); ++ ++int ksz_port_mirror_add(struct dsa_switch *ds, int port, ++ struct dsa_mall_mirror_tc_entry *mirror, ++ bool ingress, struct netlink_ext_ack *extack) ++{ ++ struct ksz_device *dev = ds->priv; ++ ++ if (!dev->dev_ops->mirror_add) ++ return -EOPNOTSUPP; ++ ++ return dev->dev_ops->mirror_add(dev, port, mirror, ingress, extack); ++} ++EXPORT_SYMBOL_GPL(ksz_port_mirror_add); ++ ++void ksz_port_mirror_del(struct dsa_switch *ds, int port, ++ struct dsa_mall_mirror_tc_entry *mirror) ++{ ++ struct ksz_device *dev = ds->priv; ++ ++ if (dev->dev_ops->mirror_del) ++ dev->dev_ops->mirror_del(dev, port, mirror); ++} ++EXPORT_SYMBOL_GPL(ksz_port_mirror_del); ++ ++static int ksz_switch_detect(struct ksz_device *dev) ++{ ++ u8 id1, id2; ++ u16 id16; ++ u32 id32; ++ int ret; ++ ++ /* read chip id */ ++ ret = ksz_read16(dev, REG_CHIP_ID0, &id16); ++ if (ret) ++ return ret; ++ ++ id1 = FIELD_GET(SW_FAMILY_ID_M, id16); ++ id2 = FIELD_GET(SW_CHIP_ID_M, id16); ++ ++ switch (id1) { ++ case KSZ87_FAMILY_ID: ++ if (id2 == KSZ87_CHIP_ID_95) { ++ u8 val; ++ ++ dev->chip_id = KSZ8795_CHIP_ID; ++ ++ ksz_read8(dev, KSZ8_PORT_STATUS_0, &val); ++ if (val & KSZ8_PORT_FIBER_MODE) ++ dev->chip_id = KSZ8765_CHIP_ID; ++ } else if (id2 == KSZ87_CHIP_ID_94) { ++ dev->chip_id = KSZ8794_CHIP_ID; ++ } else { ++ return -ENODEV; ++ } ++ break; ++ case KSZ88_FAMILY_ID: ++ if (id2 == KSZ88_CHIP_ID_63) ++ dev->chip_id = KSZ8830_CHIP_ID; ++ else ++ return -ENODEV; ++ break; ++ default: ++ ret = ksz_read32(dev, REG_CHIP_ID0, &id32); ++ if (ret) ++ return ret; ++ ++ dev->chip_rev = FIELD_GET(SW_REV_ID_M, id32); ++ id32 &= ~0xFF; ++ ++ switch (id32) { ++ case KSZ9477_CHIP_ID: ++ case KSZ9897_CHIP_ID: ++ case KSZ9893_CHIP_ID: ++ case KSZ9567_CHIP_ID: ++ case LAN9370_CHIP_ID: ++ case LAN9371_CHIP_ID: ++ case LAN9372_CHIP_ID: ++ case LAN9373_CHIP_ID: ++ case LAN9374_CHIP_ID: ++ dev->chip_id = id32; ++ break; ++ default: ++ dev_err(dev->dev, ++ "unsupported switch detected %x)\n", id32); ++ return -ENODEV; ++ } ++ } ++ return 0; ++} ++ + struct ksz_device *ksz_switch_alloc(struct device *base, void *priv) + { + struct dsa_switch *ds; +@@ -986,10 +1145,9 @@ int ksz_switch_register(struct ksz_device *dev, + mutex_init(&dev->alu_mutex); + mutex_init(&dev->vlan_mutex); + +- dev->dev_ops = ops; +- +- if (dev->dev_ops->detect(dev)) +- return -EINVAL; ++ ret = ksz_switch_detect(dev); ++ if (ret) ++ return ret; + + info = ksz_lookup_info(dev->chip_id); + if (!info) +@@ -998,10 +1156,15 @@ int ksz_switch_register(struct ksz_device *dev, + /* Update the compatible info with the probed one */ + dev->info = info; + ++ dev_info(dev->dev, "found switch: %s, rev %i\n", ++ dev->info->dev_name, dev->chip_rev); ++ + ret = ksz_check_device_id(dev); + if (ret) + return ret; + ++ dev->dev_ops = ops; ++ + ret = dev->dev_ops->init(dev); + if (ret) + return ret; +diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h +index 8500eaedad67a..10f9ef2dbf1ca 100644 +--- a/drivers/net/dsa/microchip/ksz_common.h ++++ b/drivers/net/dsa/microchip/ksz_common.h +@@ -90,6 +90,7 @@ struct ksz_device { + + /* chip specific data */ + u32 chip_id; ++ u8 chip_rev; + int cpu_port; /* port connected to CPU */ + int phy_port_cnt; + phy_interface_t compat_interface; +@@ -179,10 +180,23 @@ struct ksz_dev_ops { + void (*r_mib_pkt)(struct ksz_device *dev, int port, u16 addr, + u64 *dropped, u64 *cnt); + void (*r_mib_stat64)(struct ksz_device *dev, int port); ++ int (*vlan_filtering)(struct ksz_device *dev, int port, ++ bool flag, struct netlink_ext_ack *extack); ++ int (*vlan_add)(struct ksz_device *dev, int port, ++ const struct switchdev_obj_port_vlan *vlan, ++ struct netlink_ext_ack *extack); ++ int (*vlan_del)(struct ksz_device *dev, int port, ++ const struct switchdev_obj_port_vlan *vlan); ++ int (*mirror_add)(struct ksz_device *dev, int port, ++ struct dsa_mall_mirror_tc_entry *mirror, ++ bool ingress, struct netlink_ext_ack *extack); ++ void (*mirror_del)(struct ksz_device *dev, int port, ++ struct dsa_mall_mirror_tc_entry *mirror); ++ void (*get_caps)(struct ksz_device *dev, int port, ++ struct phylink_config *config); + void (*freeze_mib)(struct ksz_device *dev, int port, bool freeze); + void (*port_init_cnt)(struct ksz_device *dev, int port); + int (*shutdown)(struct ksz_device *dev); +- int (*detect)(struct ksz_device *dev); + int (*init)(struct ksz_device *dev); + void (*exit)(struct ksz_device *dev); + }; +@@ -231,6 +245,20 @@ int ksz_port_mdb_del(struct dsa_switch *ds, int port, + int ksz_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy); + void ksz_get_strings(struct dsa_switch *ds, int port, + u32 stringset, uint8_t *buf); ++enum dsa_tag_protocol ksz_get_tag_protocol(struct dsa_switch *ds, ++ int port, enum dsa_tag_protocol mp); ++int ksz_port_vlan_filtering(struct dsa_switch *ds, int port, ++ bool flag, struct netlink_ext_ack *extack); ++int ksz_port_vlan_add(struct dsa_switch *ds, int port, ++ const struct switchdev_obj_port_vlan *vlan, ++ struct netlink_ext_ack *extack); ++int ksz_port_vlan_del(struct dsa_switch *ds, int port, ++ const struct switchdev_obj_port_vlan *vlan); ++int ksz_port_mirror_add(struct dsa_switch *ds, int port, ++ struct dsa_mall_mirror_tc_entry *mirror, ++ bool ingress, struct netlink_ext_ack *extack); ++void ksz_port_mirror_del(struct dsa_switch *ds, int port, ++ struct dsa_mall_mirror_tc_entry *mirror); + + /* Common register access functions */ + +@@ -353,6 +381,23 @@ static inline void ksz_regmap_unlock(void *__mtx) + #define PORT_RX_ENABLE BIT(1) + #define PORT_LEARN_DISABLE BIT(0) + ++/* Switch ID Defines */ ++#define REG_CHIP_ID0 0x00 ++ ++#define SW_FAMILY_ID_M GENMASK(15, 8) ++#define KSZ87_FAMILY_ID 0x87 ++#define KSZ88_FAMILY_ID 0x88 ++ ++#define KSZ8_PORT_STATUS_0 0x08 ++#define KSZ8_PORT_FIBER_MODE BIT(7) ++ ++#define SW_CHIP_ID_M GENMASK(7, 4) ++#define KSZ87_CHIP_ID_94 0x6 ++#define KSZ87_CHIP_ID_95 0x9 ++#define KSZ88_CHIP_ID_63 0x3 ++ ++#define SW_REV_ID_M GENMASK(7, 4) ++ + /* Regmap tables generation */ + #define KSZ_SPI_OP_RD 3 + #define KSZ_SPI_OP_WR 2 +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index cf9b00576ed36..964354536f9ce 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -11183,10 +11183,7 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev, + if ((features & NETIF_F_NTUPLE) && !bnxt_rfs_capable(bp)) + features &= ~NETIF_F_NTUPLE; + +- if (bp->flags & BNXT_FLAG_NO_AGG_RINGS) +- features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW); +- +- if (!(bp->flags & BNXT_FLAG_TPA)) ++ if ((bp->flags & BNXT_FLAG_NO_AGG_RINGS) || bp->xdp_prog) + features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW); + + if (!(features & NETIF_F_GRO)) +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h +index 075c6206325ce..b1b17f9113006 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h +@@ -2130,6 +2130,7 @@ struct bnxt { + #define BNXT_DUMP_CRASH 1 + + struct bpf_prog *xdp_prog; ++ u8 xdp_has_frags; + + struct bnxt_ptp_cfg *ptp_cfg; + u8 ptp_all_rx_tstamp; +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +index 6b3d4f4c2a75f..d83be40785b89 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +@@ -1246,6 +1246,7 @@ int bnxt_dl_register(struct bnxt *bp) + if (rc) + goto err_dl_port_unreg; + ++ devlink_set_features(dl, DEVLINK_F_RELOAD); + out: + devlink_register(dl); + return 0; +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +index a1a2c7a64fd58..c9cf0569451a2 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +@@ -623,7 +623,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset) + hw_resc->max_stat_ctxs -= le16_to_cpu(req->min_stat_ctx) * n; + hw_resc->max_vnics -= le16_to_cpu(req->min_vnics) * n; + if (bp->flags & BNXT_FLAG_CHIP_P5) +- hw_resc->max_irqs -= vf_msix * n; ++ hw_resc->max_nqs -= vf_msix; + + rc = pf->active_vfs; + } +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +index f53387ed0167b..c3065ec0a4798 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +@@ -181,6 +181,7 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, + struct xdp_buff *xdp) + { + struct bnxt_sw_rx_bd *rx_buf; ++ u32 buflen = PAGE_SIZE; + struct pci_dev *pdev; + dma_addr_t mapping; + u32 offset; +@@ -192,7 +193,10 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, + mapping = rx_buf->mapping - bp->rx_dma_offset; + dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir); + +- xdp_init_buff(xdp, BNXT_PAGE_MODE_BUF_SIZE + offset, &rxr->xdp_rxq); ++ if (bp->xdp_has_frags) ++ buflen = BNXT_PAGE_MODE_BUF_SIZE + offset; ++ ++ xdp_init_buff(xdp, buflen, &rxr->xdp_rxq); + xdp_prepare_buff(xdp, *data_ptr - offset, offset, *len, false); + } + +@@ -397,8 +401,10 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) + netdev_warn(dev, "ethtool rx/tx channels must be combined to support XDP.\n"); + return -EOPNOTSUPP; + } +- if (prog) ++ if (prog) { + tx_xdp = bp->rx_nr_rings; ++ bp->xdp_has_frags = prog->aux->xdp_has_frags; ++ } + + tc = netdev_get_num_tc(dev); + if (!tc) +diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +index 19704f5c8291c..22a61802a4027 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +@@ -4395,7 +4395,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi, + (struct in6_addr *)&ipv6_full_mask)) + new_mask |= I40E_L3_V6_DST_MASK; + else if (ipv6_addr_any((struct in6_addr *) +- &usr_ip6_spec->ip6src)) ++ &usr_ip6_spec->ip6dst)) + new_mask &= ~I40E_L3_V6_DST_MASK; + else + return -EOPNOTSUPP; +diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h +index 60453b3b8d233..6911cbb7afa50 100644 +--- a/drivers/net/ethernet/intel/ice/ice.h ++++ b/drivers/net/ethernet/intel/ice/ice.h +@@ -684,8 +684,8 @@ static inline void ice_set_ring_xdp(struct ice_tx_ring *ring) + * ice_xsk_pool - get XSK buffer pool bound to a ring + * @ring: Rx ring to use + * +- * Returns a pointer to xdp_umem structure if there is a buffer pool present, +- * NULL otherwise. ++ * Returns a pointer to xsk_buff_pool structure if there is a buffer pool ++ * present, NULL otherwise. + */ + static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) + { +@@ -699,23 +699,33 @@ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) + } + + /** +- * ice_tx_xsk_pool - get XSK buffer pool bound to a ring +- * @ring: Tx ring to use ++ * ice_tx_xsk_pool - assign XSK buff pool to XDP ring ++ * @vsi: pointer to VSI ++ * @qid: index of a queue to look at XSK buff pool presence + * +- * Returns a pointer to xdp_umem structure if there is a buffer pool present, +- * NULL otherwise. Tx equivalent of ice_xsk_pool. ++ * Sets XSK buff pool pointer on XDP ring. ++ * ++ * XDP ring is picked from Rx ring, whereas Rx ring is picked based on provided ++ * queue id. Reason for doing so is that queue vectors might have assigned more ++ * than one XDP ring, e.g. when user reduced the queue count on netdev; Rx ring ++ * carries a pointer to one of these XDP rings for its own purposes, such as ++ * handling XDP_TX action, therefore we can piggyback here on the ++ * rx_ring->xdp_ring assignment that was done during XDP rings initialization. + */ +-static inline struct xsk_buff_pool *ice_tx_xsk_pool(struct ice_tx_ring *ring) ++static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid) + { +- struct ice_vsi *vsi = ring->vsi; +- u16 qid; ++ struct ice_tx_ring *ring; + +- qid = ring->q_index - vsi->alloc_txq; ++ ring = vsi->rx_rings[qid]->xdp_ring; ++ if (!ring) ++ return; + +- if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) +- return NULL; ++ if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) { ++ ring->xsk_pool = NULL; ++ return; ++ } + +- return xsk_get_pool_from_qid(vsi->netdev, qid); ++ ring->xsk_pool = xsk_get_pool_from_qid(vsi->netdev, qid); + } + + /** +diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c +index d6aafa272fb0b..6c4e1d45235ef 100644 +--- a/drivers/net/ethernet/intel/ice/ice_lib.c ++++ b/drivers/net/ethernet/intel/ice/ice_lib.c +@@ -1983,8 +1983,8 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi) + if (ret) + return ret; + +- ice_for_each_xdp_txq(vsi, i) +- vsi->xdp_rings[i]->xsk_pool = ice_tx_xsk_pool(vsi->xdp_rings[i]); ++ ice_for_each_rxq(vsi, i) ++ ice_tx_xsk_pool(vsi, i); + + return ret; + } +diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c +index bfd97a9a8f2e0..3d45e075204e3 100644 +--- a/drivers/net/ethernet/intel/ice/ice_main.c ++++ b/drivers/net/ethernet/intel/ice/ice_main.c +@@ -2581,7 +2581,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) + if (ice_setup_tx_ring(xdp_ring)) + goto free_xdp_rings; + ice_set_ring_xdp(xdp_ring); +- xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring); + spin_lock_init(&xdp_ring->tx_lock); + for (j = 0; j < xdp_ring->count; j++) { + tx_desc = ICE_TX_DESC(xdp_ring, j); +@@ -2589,13 +2588,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) + } + } + +- ice_for_each_rxq(vsi, i) { +- if (static_key_enabled(&ice_xdp_locking_key)) +- vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq]; +- else +- vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i]; +- } +- + return 0; + + free_xdp_rings: +@@ -2685,6 +2677,23 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) + xdp_rings_rem -= xdp_rings_per_v; + } + ++ ice_for_each_rxq(vsi, i) { ++ if (static_key_enabled(&ice_xdp_locking_key)) { ++ vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq]; ++ } else { ++ struct ice_q_vector *q_vector = vsi->rx_rings[i]->q_vector; ++ struct ice_tx_ring *ring; ++ ++ ice_for_each_tx_ring(ring, q_vector->tx) { ++ if (ice_ring_is_xdp(ring)) { ++ vsi->rx_rings[i]->xdp_ring = ring; ++ break; ++ } ++ } ++ } ++ ice_tx_xsk_pool(vsi, i); ++ } ++ + /* omit the scheduler update if in reset path; XDP queues will be + * taken into account at the end of ice_vsi_rebuild, where + * ice_cfg_vsi_lan is being called +diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c +index 49ba8bfdbf047..e48e29258450f 100644 +--- a/drivers/net/ethernet/intel/ice/ice_xsk.c ++++ b/drivers/net/ethernet/intel/ice/ice_xsk.c +@@ -243,7 +243,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) + if (err) + goto free_buf; + ice_set_ring_xdp(xdp_ring); +- xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring); ++ ice_tx_xsk_pool(vsi, q_idx); + } + + err = ice_vsi_cfg_rxq(rx_ring); +@@ -329,6 +329,12 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) + bool if_running, pool_present = !!pool; + int ret = 0, pool_failure = 0; + ++ if (qid >= vsi->num_rxq || qid >= vsi->num_txq) { ++ netdev_err(vsi->netdev, "Please use queue id in scope of combined queues count\n"); ++ pool_failure = -EINVAL; ++ goto failure; ++ } ++ + if (!is_power_of_2(vsi->rx_rings[qid]->count) || + !is_power_of_2(vsi->tx_rings[qid]->count)) { + netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n"); +@@ -353,7 +359,7 @@ xsk_pool_if_up: + if (if_running) { + ret = ice_qp_ena(vsi, qid); + if (!ret && pool_present) +- napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi); ++ napi_schedule(&vsi->rx_rings[qid]->xdp_ring->q_vector->napi); + else if (ret) + netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret); + } +@@ -944,13 +950,13 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, + if (!ice_is_xdp_ena_vsi(vsi)) + return -EINVAL; + +- if (queue_id >= vsi->num_txq) ++ if (queue_id >= vsi->num_txq || queue_id >= vsi->num_rxq) + return -EINVAL; + +- if (!vsi->xdp_rings[queue_id]->xsk_pool) +- return -EINVAL; ++ ring = vsi->rx_rings[queue_id]->xdp_ring; + +- ring = vsi->xdp_rings[queue_id]; ++ if (!ring->xsk_pool) ++ return -EINVAL; + + /* The idea here is that if NAPI is running, mark a miss, so + * it will run again. If not, trigger an interrupt and +diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +index 336426a67ac1b..38cda659f65f4 100644 +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +@@ -1208,7 +1208,6 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) + struct cyclecounter cc; + unsigned long flags; + u32 incval = 0; +- u32 tsauxc = 0; + u32 fuse0 = 0; + + /* For some of the boards below this mask is technically incorrect. +@@ -1243,18 +1242,6 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) + case ixgbe_mac_x550em_a: + case ixgbe_mac_X550: + cc.read = ixgbe_ptp_read_X550; +- +- /* enable SYSTIME counter */ +- IXGBE_WRITE_REG(hw, IXGBE_SYSTIMR, 0); +- IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0); +- IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0); +- tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC); +- IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, +- tsauxc & ~IXGBE_TSAUXC_DISABLE_SYSTIME); +- IXGBE_WRITE_REG(hw, IXGBE_TSIM, IXGBE_TSIM_TXTS); +- IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_TIMESYNC); +- +- IXGBE_WRITE_FLUSH(hw); + break; + case ixgbe_mac_X540: + cc.read = ixgbe_ptp_read_82599; +@@ -1286,6 +1273,50 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + } + ++/** ++ * ixgbe_ptp_init_systime - Initialize SYSTIME registers ++ * @adapter: the ixgbe private board structure ++ * ++ * Initialize and start the SYSTIME registers. ++ */ ++static void ixgbe_ptp_init_systime(struct ixgbe_adapter *adapter) ++{ ++ struct ixgbe_hw *hw = &adapter->hw; ++ u32 tsauxc; ++ ++ switch (hw->mac.type) { ++ case ixgbe_mac_X550EM_x: ++ case ixgbe_mac_x550em_a: ++ case ixgbe_mac_X550: ++ tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC); ++ ++ /* Reset SYSTIME registers to 0 */ ++ IXGBE_WRITE_REG(hw, IXGBE_SYSTIMR, 0); ++ IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0); ++ IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0); ++ ++ /* Reset interrupt settings */ ++ IXGBE_WRITE_REG(hw, IXGBE_TSIM, IXGBE_TSIM_TXTS); ++ IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_TIMESYNC); ++ ++ /* Activate the SYSTIME counter */ ++ IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, ++ tsauxc & ~IXGBE_TSAUXC_DISABLE_SYSTIME); ++ break; ++ case ixgbe_mac_X540: ++ case ixgbe_mac_82599EB: ++ /* Reset SYSTIME registers to 0 */ ++ IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0); ++ IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0); ++ break; ++ default: ++ /* Other devices aren't supported */ ++ return; ++ }; ++ ++ IXGBE_WRITE_FLUSH(hw); ++} ++ + /** + * ixgbe_ptp_reset + * @adapter: the ixgbe private board structure +@@ -1312,6 +1343,8 @@ void ixgbe_ptp_reset(struct ixgbe_adapter *adapter) + + ixgbe_ptp_start_cyclecounter(adapter); + ++ ixgbe_ptp_init_systime(adapter); ++ + spin_lock_irqsave(&adapter->tmreg_lock, flags); + timecounter_init(&adapter->hw_tc, &adapter->hw_cc, + ktime_to_ns(ktime_get_real())); +diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c +index 5edb68a8aab1e..57f27cc7724e7 100644 +--- a/drivers/net/ethernet/lantiq_xrx200.c ++++ b/drivers/net/ethernet/lantiq_xrx200.c +@@ -193,6 +193,7 @@ static int xrx200_alloc_buf(struct xrx200_chan *ch, void *(*alloc)(unsigned int + + ch->rx_buff[ch->dma.desc] = alloc(priv->rx_skb_size); + if (!ch->rx_buff[ch->dma.desc]) { ++ ch->rx_buff[ch->dma.desc] = buf; + ret = -ENOMEM; + goto skip; + } +@@ -239,6 +240,12 @@ static int xrx200_hw_receive(struct xrx200_chan *ch) + } + + skb = build_skb(buf, priv->rx_skb_size); ++ if (!skb) { ++ skb_free_frag(buf); ++ net_dev->stats.rx_dropped++; ++ return -ENOMEM; ++ } ++ + skb_reserve(skb, NET_SKB_PAD); + skb_put(skb, len); + +@@ -288,7 +295,7 @@ static int xrx200_poll_rx(struct napi_struct *napi, int budget) + if (ret == XRX200_DMA_PACKET_IN_PROGRESS) + continue; + if (ret != XRX200_DMA_PACKET_COMPLETE) +- return ret; ++ break; + rx++; + } else { + break; +diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +index 59c9a10f83ba5..dcf0aac0aa65d 100644 +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +@@ -1444,8 +1444,8 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, + int done = 0, bytes = 0; + + while (done < budget) { ++ unsigned int pktlen, *rxdcsum; + struct net_device *netdev; +- unsigned int pktlen; + dma_addr_t dma_addr; + u32 hash, reason; + int mac = 0; +@@ -1512,23 +1512,31 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, + pktlen = RX_DMA_GET_PLEN0(trxd.rxd2); + skb->dev = netdev; + skb_put(skb, pktlen); +- if (trxd.rxd4 & eth->soc->txrx.rx_dma_l4_valid) ++ ++ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) { ++ hash = trxd.rxd5 & MTK_RXD5_FOE_ENTRY; ++ if (hash != MTK_RXD5_FOE_ENTRY) ++ skb_set_hash(skb, jhash_1word(hash, 0), ++ PKT_HASH_TYPE_L4); ++ rxdcsum = &trxd.rxd3; ++ } else { ++ hash = trxd.rxd4 & MTK_RXD4_FOE_ENTRY; ++ if (hash != MTK_RXD4_FOE_ENTRY) ++ skb_set_hash(skb, jhash_1word(hash, 0), ++ PKT_HASH_TYPE_L4); ++ rxdcsum = &trxd.rxd4; ++ } ++ ++ if (*rxdcsum & eth->soc->txrx.rx_dma_l4_valid) + skb->ip_summed = CHECKSUM_UNNECESSARY; + else + skb_checksum_none_assert(skb); + skb->protocol = eth_type_trans(skb, netdev); + bytes += pktlen; + +- hash = trxd.rxd4 & MTK_RXD4_FOE_ENTRY; +- if (hash != MTK_RXD4_FOE_ENTRY) { +- hash = jhash_1word(hash, 0); +- skb_set_hash(skb, hash, PKT_HASH_TYPE_L4); +- } +- + reason = FIELD_GET(MTK_RXD4_PPE_CPU_REASON, trxd.rxd4); + if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED) +- mtk_ppe_check_skb(eth->ppe, skb, +- trxd.rxd4 & MTK_RXD4_FOE_ENTRY); ++ mtk_ppe_check_skb(eth->ppe, skb, hash); + + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) { + if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) { +@@ -3761,6 +3769,7 @@ static const struct mtk_soc_data mt7986_data = { + .txd_size = sizeof(struct mtk_tx_dma_v2), + .rxd_size = sizeof(struct mtk_rx_dma_v2), + .rx_irq_done_mask = MTK_RX_DONE_INT_V2, ++ .rx_dma_l4_valid = RX_DMA_L4_VALID_V2, + .dma_max_len = MTK_TX_DMA_BUF_LEN_V2, + .dma_len_offset = 8, + }, +diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h +index 0a632896451a4..98d6a6d047e32 100644 +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h +@@ -307,6 +307,11 @@ + #define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */ + #define RX_DMA_SPECIAL_TAG BIT(22) + ++/* PDMA descriptor rxd5 */ ++#define MTK_RXD5_FOE_ENTRY GENMASK(14, 0) ++#define MTK_RXD5_PPE_CPU_REASON GENMASK(22, 18) ++#define MTK_RXD5_SRC_PORT GENMASK(29, 26) ++ + #define RX_DMA_GET_SPORT(x) (((x) >> 19) & 0xf) + #define RX_DMA_GET_SPORT_V2(x) (((x) >> 26) & 0x7) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +index 087952b84ccb0..9e6db779b6efa 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -3678,7 +3678,9 @@ static int set_feature_hw_tc(struct net_device *netdev, bool enable) + struct mlx5e_priv *priv = netdev_priv(netdev); + + #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) +- if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) { ++ int tc_flag = mlx5e_is_uplink_rep(priv) ? MLX5_TC_FLAG(ESW_OFFLOAD) : ++ MLX5_TC_FLAG(NIC_OFFLOAD); ++ if (!enable && mlx5e_tc_num_filters(priv, tc_flag)) { + netdev_err(netdev, + "Active offloaded tc filters, can't turn hw_tc_offload off\n"); + return -EINVAL; +@@ -4733,14 +4735,6 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 + /* RQ */ + mlx5e_build_rq_params(mdev, params); + +- /* HW LRO */ +- if (MLX5_CAP_ETH(mdev, lro_cap) && +- params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { +- /* No XSK params: checking the availability of striding RQ in general. */ +- if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) +- params->packet_merge.type = slow_pci_heuristic(mdev) ? +- MLX5E_PACKET_MERGE_NONE : MLX5E_PACKET_MERGE_LRO; +- } + params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); + + /* CQ moderation params */ +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +index f797fd97d305b..7da3dc6261929 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +@@ -662,6 +662,8 @@ static void mlx5e_build_rep_params(struct net_device *netdev) + + params->mqprio.num_tc = 1; + params->tunneled_offload_en = false; ++ if (rep->vport != MLX5_VPORT_UPLINK) ++ params->vlan_strip_disable = true; + + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); + } +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +index eb79810199d3e..d04739cb793e5 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +@@ -427,7 +427,8 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f + dest[dest_idx].vport.vhca_id = + MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); + dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; +- if (mlx5_lag_mpesw_is_activated(esw->dev)) ++ if (dest[dest_idx].vport.num == MLX5_VPORT_UPLINK && ++ mlx5_lag_mpesw_is_activated(esw->dev)) + dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; + } + if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) { +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +index 5d41e19378e09..d98acd68af2ec 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +@@ -1067,30 +1067,32 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, + struct net_device *netdev) + { + unsigned int fn = mlx5_get_dev_index(dev); ++ unsigned long flags; + + if (fn >= ldev->ports) + return; + +- spin_lock(&lag_lock); ++ spin_lock_irqsave(&lag_lock, flags); + ldev->pf[fn].netdev = netdev; + ldev->tracker.netdev_state[fn].link_up = 0; + ldev->tracker.netdev_state[fn].tx_enabled = 0; +- spin_unlock(&lag_lock); ++ spin_unlock_irqrestore(&lag_lock, flags); + } + + static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, + struct net_device *netdev) + { ++ unsigned long flags; + int i; + +- spin_lock(&lag_lock); ++ spin_lock_irqsave(&lag_lock, flags); + for (i = 0; i < ldev->ports; i++) { + if (ldev->pf[i].netdev == netdev) { + ldev->pf[i].netdev = NULL; + break; + } + } +- spin_unlock(&lag_lock); ++ spin_unlock_irqrestore(&lag_lock, flags); + } + + static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, +@@ -1234,7 +1236,7 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, + mlx5_ldev_add_netdev(ldev, dev, netdev); + + for (i = 0; i < ldev->ports; i++) +- if (!ldev->pf[i].dev) ++ if (!ldev->pf[i].netdev) + break; + + if (i >= ldev->ports) +@@ -1246,12 +1248,13 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, + bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) + { + struct mlx5_lag *ldev; ++ unsigned long flags; + bool res; + +- spin_lock(&lag_lock); ++ spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_roce(ldev); +- spin_unlock(&lag_lock); ++ spin_unlock_irqrestore(&lag_lock, flags); + + return res; + } +@@ -1260,12 +1263,13 @@ EXPORT_SYMBOL(mlx5_lag_is_roce); + bool mlx5_lag_is_active(struct mlx5_core_dev *dev) + { + struct mlx5_lag *ldev; ++ unsigned long flags; + bool res; + +- spin_lock(&lag_lock); ++ spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_active(ldev); +- spin_unlock(&lag_lock); ++ spin_unlock_irqrestore(&lag_lock, flags); + + return res; + } +@@ -1274,13 +1278,14 @@ EXPORT_SYMBOL(mlx5_lag_is_active); + bool mlx5_lag_is_master(struct mlx5_core_dev *dev) + { + struct mlx5_lag *ldev; ++ unsigned long flags; + bool res; + +- spin_lock(&lag_lock); ++ spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_active(ldev) && + dev == ldev->pf[MLX5_LAG_P1].dev; +- spin_unlock(&lag_lock); ++ spin_unlock_irqrestore(&lag_lock, flags); + + return res; + } +@@ -1289,12 +1294,13 @@ EXPORT_SYMBOL(mlx5_lag_is_master); + bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) + { + struct mlx5_lag *ldev; ++ unsigned long flags; + bool res; + +- spin_lock(&lag_lock); ++ spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_sriov(ldev); +- spin_unlock(&lag_lock); ++ spin_unlock_irqrestore(&lag_lock, flags); + + return res; + } +@@ -1303,13 +1309,14 @@ EXPORT_SYMBOL(mlx5_lag_is_sriov); + bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) + { + struct mlx5_lag *ldev; ++ unsigned long flags; + bool res; + +- spin_lock(&lag_lock); ++ spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_sriov(ldev) && + test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); +- spin_unlock(&lag_lock); ++ spin_unlock_irqrestore(&lag_lock, flags); + + return res; + } +@@ -1352,9 +1359,10 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) + { + struct net_device *ndev = NULL; + struct mlx5_lag *ldev; ++ unsigned long flags; + int i; + +- spin_lock(&lag_lock); ++ spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + + if (!(ldev && __mlx5_lag_is_roce(ldev))) +@@ -1373,7 +1381,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) + dev_hold(ndev); + + unlock: +- spin_unlock(&lag_lock); ++ spin_unlock_irqrestore(&lag_lock, flags); + + return ndev; + } +@@ -1383,10 +1391,11 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, + struct net_device *slave) + { + struct mlx5_lag *ldev; ++ unsigned long flags; + u8 port = 0; + int i; + +- spin_lock(&lag_lock); ++ spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + if (!(ldev && __mlx5_lag_is_roce(ldev))) + goto unlock; +@@ -1401,7 +1410,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, + port = ldev->v2p_map[port * ldev->buckets]; + + unlock: +- spin_unlock(&lag_lock); ++ spin_unlock_irqrestore(&lag_lock, flags); + return port; + } + EXPORT_SYMBOL(mlx5_lag_get_slave_port); +@@ -1422,8 +1431,9 @@ struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) + { + struct mlx5_core_dev *peer_dev = NULL; + struct mlx5_lag *ldev; ++ unsigned long flags; + +- spin_lock(&lag_lock); ++ spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + if (!ldev) + goto unlock; +@@ -1433,7 +1443,7 @@ struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) + ldev->pf[MLX5_LAG_P1].dev; + + unlock: +- spin_unlock(&lag_lock); ++ spin_unlock_irqrestore(&lag_lock, flags); + return peer_dev; + } + EXPORT_SYMBOL(mlx5_lag_get_peer_mdev); +@@ -1446,6 +1456,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, + int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); + struct mlx5_core_dev **mdev; + struct mlx5_lag *ldev; ++ unsigned long flags; + int num_ports; + int ret, i, j; + void *out; +@@ -1462,7 +1473,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, + + memset(values, 0, sizeof(*values) * num_counters); + +- spin_lock(&lag_lock); ++ spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + if (ldev && __mlx5_lag_is_active(ldev)) { + num_ports = ldev->ports; +@@ -1472,7 +1483,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, + num_ports = 1; + mdev[MLX5_LAG_P1] = dev; + } +- spin_unlock(&lag_lock); ++ spin_unlock_irqrestore(&lag_lock, flags); + + for (i = 0; i < num_ports; ++i) { + u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c +index ba2e5232b90be..616207c3b187a 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -1472,7 +1472,9 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) + memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile)); + INIT_LIST_HEAD(&priv->ctx_list); + spin_lock_init(&priv->ctx_lock); ++ lockdep_register_key(&dev->lock_key); + mutex_init(&dev->intf_state_mutex); ++ lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key); + + mutex_init(&priv->bfregs.reg_head.lock); + mutex_init(&priv->bfregs.wc_head.lock); +@@ -1527,6 +1529,7 @@ err_timeout_init: + mutex_destroy(&priv->bfregs.wc_head.lock); + mutex_destroy(&priv->bfregs.reg_head.lock); + mutex_destroy(&dev->intf_state_mutex); ++ lockdep_unregister_key(&dev->lock_key); + return err; + } + +@@ -1545,6 +1548,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) + mutex_destroy(&priv->bfregs.wc_head.lock); + mutex_destroy(&priv->bfregs.reg_head.lock); + mutex_destroy(&dev->intf_state_mutex); ++ lockdep_unregister_key(&dev->lock_key); + } + + static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +index ec76a8b1acc1c..60596357bfc7a 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +@@ -376,8 +376,8 @@ retry: + goto out_dropped; + } + } ++ err = mlx5_cmd_check(dev, err, in, out); + if (err) { +- err = mlx5_cmd_check(dev, err, in, out); + mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", + func_id, npages, err); + goto out_dropped; +@@ -524,10 +524,13 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, + dev->priv.reclaim_pages_discard += npages; + } + /* if triggered by FW event and failed by FW then ignore */ +- if (event && err == -EREMOTEIO) ++ if (event && err == -EREMOTEIO) { + err = 0; ++ goto out_free; ++ } ++ ++ err = mlx5_cmd_check(dev, err, in, out); + if (err) { +- err = mlx5_cmd_check(dev, err, in, out); + mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err); + goto out_free; + } +diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c +index f11f1cb92025f..3b6beb96ca856 100644 +--- a/drivers/net/ethernet/moxa/moxart_ether.c ++++ b/drivers/net/ethernet/moxa/moxart_ether.c +@@ -74,11 +74,6 @@ static int moxart_set_mac_address(struct net_device *ndev, void *addr) + static void moxart_mac_free_memory(struct net_device *ndev) + { + struct moxart_mac_priv_t *priv = netdev_priv(ndev); +- int i; +- +- for (i = 0; i < RX_DESC_NUM; i++) +- dma_unmap_single(&priv->pdev->dev, priv->rx_mapping[i], +- priv->rx_buf_size, DMA_FROM_DEVICE); + + if (priv->tx_desc_base) + dma_free_coherent(&priv->pdev->dev, +@@ -193,6 +188,7 @@ static int moxart_mac_open(struct net_device *ndev) + static int moxart_mac_stop(struct net_device *ndev) + { + struct moxart_mac_priv_t *priv = netdev_priv(ndev); ++ int i; + + napi_disable(&priv->napi); + +@@ -204,6 +200,11 @@ static int moxart_mac_stop(struct net_device *ndev) + /* disable all functions */ + writel(0, priv->base + REG_MAC_CTRL); + ++ /* unmap areas mapped in moxart_mac_setup_desc_ring() */ ++ for (i = 0; i < RX_DESC_NUM; i++) ++ dma_unmap_single(&priv->pdev->dev, priv->rx_mapping[i], ++ priv->rx_buf_size, DMA_FROM_DEVICE); ++ + return 0; + } + +diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c +index 1443f788ee37c..0be79c5167813 100644 +--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c ++++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c +@@ -1564,8 +1564,67 @@ static int ionic_set_features(struct net_device *netdev, + return err; + } + ++static int ionic_set_attr_mac(struct ionic_lif *lif, u8 *mac) ++{ ++ struct ionic_admin_ctx ctx = { ++ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), ++ .cmd.lif_setattr = { ++ .opcode = IONIC_CMD_LIF_SETATTR, ++ .index = cpu_to_le16(lif->index), ++ .attr = IONIC_LIF_ATTR_MAC, ++ }, ++ }; ++ ++ ether_addr_copy(ctx.cmd.lif_setattr.mac, mac); ++ return ionic_adminq_post_wait(lif, &ctx); ++} ++ ++static int ionic_get_attr_mac(struct ionic_lif *lif, u8 *mac_addr) ++{ ++ struct ionic_admin_ctx ctx = { ++ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), ++ .cmd.lif_getattr = { ++ .opcode = IONIC_CMD_LIF_GETATTR, ++ .index = cpu_to_le16(lif->index), ++ .attr = IONIC_LIF_ATTR_MAC, ++ }, ++ }; ++ int err; ++ ++ err = ionic_adminq_post_wait(lif, &ctx); ++ if (err) ++ return err; ++ ++ ether_addr_copy(mac_addr, ctx.comp.lif_getattr.mac); ++ return 0; ++} ++ ++static int ionic_program_mac(struct ionic_lif *lif, u8 *mac) ++{ ++ u8 get_mac[ETH_ALEN]; ++ int err; ++ ++ err = ionic_set_attr_mac(lif, mac); ++ if (err) ++ return err; ++ ++ err = ionic_get_attr_mac(lif, get_mac); ++ if (err) ++ return err; ++ ++ /* To deal with older firmware that silently ignores the set attr mac: ++ * doesn't actually change the mac and doesn't return an error, so we ++ * do the get attr to verify whether or not the set actually happened ++ */ ++ if (!ether_addr_equal(get_mac, mac)) ++ return 1; ++ ++ return 0; ++} ++ + static int ionic_set_mac_address(struct net_device *netdev, void *sa) + { ++ struct ionic_lif *lif = netdev_priv(netdev); + struct sockaddr *addr = sa; + u8 *mac; + int err; +@@ -1574,6 +1633,14 @@ static int ionic_set_mac_address(struct net_device *netdev, void *sa) + if (ether_addr_equal(netdev->dev_addr, mac)) + return 0; + ++ err = ionic_program_mac(lif, mac); ++ if (err < 0) ++ return err; ++ ++ if (err > 0) ++ netdev_dbg(netdev, "%s: SET and GET ATTR Mac are not equal-due to old FW running\n", ++ __func__); ++ + err = eth_prepare_mac_addr_change(netdev, addr); + if (err) + return err; +@@ -2963,6 +3030,9 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif) + + mutex_lock(&lif->queue_lock); + ++ if (test_and_clear_bit(IONIC_LIF_F_BROKEN, lif->state)) ++ dev_info(ionic->dev, "FW Up: clearing broken state\n"); ++ + err = ionic_qcqs_alloc(lif); + if (err) + goto err_unlock; +@@ -3169,6 +3239,7 @@ static int ionic_station_set(struct ionic_lif *lif) + .attr = IONIC_LIF_ATTR_MAC, + }, + }; ++ u8 mac_address[ETH_ALEN]; + struct sockaddr addr; + int err; + +@@ -3177,8 +3248,23 @@ static int ionic_station_set(struct ionic_lif *lif) + return err; + netdev_dbg(lif->netdev, "found initial MAC addr %pM\n", + ctx.comp.lif_getattr.mac); +- if (is_zero_ether_addr(ctx.comp.lif_getattr.mac)) +- return 0; ++ ether_addr_copy(mac_address, ctx.comp.lif_getattr.mac); ++ ++ if (is_zero_ether_addr(mac_address)) { ++ eth_hw_addr_random(netdev); ++ netdev_dbg(netdev, "Random Mac generated: %pM\n", netdev->dev_addr); ++ ether_addr_copy(mac_address, netdev->dev_addr); ++ ++ err = ionic_program_mac(lif, mac_address); ++ if (err < 0) ++ return err; ++ ++ if (err > 0) { ++ netdev_dbg(netdev, "%s:SET/GET ATTR Mac are not same-due to old FW running\n", ++ __func__); ++ return 0; ++ } ++ } + + if (!is_zero_ether_addr(netdev->dev_addr)) { + /* If the netdev mac is non-zero and doesn't match the default +@@ -3186,12 +3272,11 @@ static int ionic_station_set(struct ionic_lif *lif) + * likely here again after a fw-upgrade reset. We need to be + * sure the netdev mac is in our filter list. + */ +- if (!ether_addr_equal(ctx.comp.lif_getattr.mac, +- netdev->dev_addr)) ++ if (!ether_addr_equal(mac_address, netdev->dev_addr)) + ionic_lif_addr_add(lif, netdev->dev_addr); + } else { + /* Update the netdev mac with the device's mac */ +- memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len); ++ ether_addr_copy(addr.sa_data, mac_address); + addr.sa_family = AF_INET; + err = eth_prepare_mac_addr_change(netdev, &addr); + if (err) { +diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c +index 4029b4e021f86..56f93b0305519 100644 +--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c ++++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c +@@ -474,8 +474,8 @@ try_again: + ionic_opcode_to_str(opcode), opcode, + ionic_error_to_str(err), err); + +- msleep(1000); + iowrite32(0, &idev->dev_cmd_regs->done); ++ msleep(1000); + iowrite32(1, &idev->dev_cmd_regs->doorbell); + goto try_again; + } +@@ -488,6 +488,8 @@ try_again: + return ionic_error_to_errno(err); + } + ++ ionic_dev_cmd_clean(ionic); ++ + return 0; + } + +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +index caa4bfc4c1d62..9b6138b117766 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +@@ -258,14 +258,18 @@ EXPORT_SYMBOL_GPL(stmmac_set_mac_addr); + /* Enable disable MAC RX/TX */ + void stmmac_set_mac(void __iomem *ioaddr, bool enable) + { +- u32 value = readl(ioaddr + MAC_CTRL_REG); ++ u32 old_val, value; ++ ++ old_val = readl(ioaddr + MAC_CTRL_REG); ++ value = old_val; + + if (enable) + value |= MAC_ENABLE_RX | MAC_ENABLE_TX; + else + value &= ~(MAC_ENABLE_TX | MAC_ENABLE_RX); + +- writel(value, ioaddr + MAC_CTRL_REG); ++ if (value != old_val) ++ writel(value, ioaddr + MAC_CTRL_REG); + } + + void stmmac_get_mac_addr(void __iomem *ioaddr, unsigned char *addr, +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +index c5f33630e7718..78f11dabca056 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -983,10 +983,10 @@ static void stmmac_mac_link_up(struct phylink_config *config, + bool tx_pause, bool rx_pause) + { + struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); +- u32 ctrl; ++ u32 old_ctrl, ctrl; + +- ctrl = readl(priv->ioaddr + MAC_CTRL_REG); +- ctrl &= ~priv->hw->link.speed_mask; ++ old_ctrl = readl(priv->ioaddr + MAC_CTRL_REG); ++ ctrl = old_ctrl & ~priv->hw->link.speed_mask; + + if (interface == PHY_INTERFACE_MODE_USXGMII) { + switch (speed) { +@@ -1061,7 +1061,8 @@ static void stmmac_mac_link_up(struct phylink_config *config, + if (tx_pause && rx_pause) + stmmac_mac_flow_ctrl(priv, duplex); + +- writel(ctrl, priv->ioaddr + MAC_CTRL_REG); ++ if (ctrl != old_ctrl) ++ writel(ctrl, priv->ioaddr + MAC_CTRL_REG); + + stmmac_mac_set(priv, priv->ioaddr, true); + if (phy && priv->dma_cap.eee) { +diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c +index 1e9eae208e44f..53a1dbeaffa6d 100644 +--- a/drivers/net/ipa/ipa_mem.c ++++ b/drivers/net/ipa/ipa_mem.c +@@ -568,7 +568,7 @@ static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size) + } + + /* Align the address down and the size up to a page boundary */ +- addr = qcom_smem_virt_to_phys(virt) & PAGE_MASK; ++ addr = qcom_smem_virt_to_phys(virt); + phys = addr & PAGE_MASK; + size = PAGE_ALIGN(size + addr - phys); + iova = phys; /* We just want a direct mapping */ +diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c +index ef02f2cf5ce13..cbabca167a078 100644 +--- a/drivers/net/ipvlan/ipvtap.c ++++ b/drivers/net/ipvlan/ipvtap.c +@@ -194,7 +194,7 @@ static struct notifier_block ipvtap_notifier_block __read_mostly = { + .notifier_call = ipvtap_device_event, + }; + +-static int ipvtap_init(void) ++static int __init ipvtap_init(void) + { + int err; + +@@ -228,7 +228,7 @@ out1: + } + module_init(ipvtap_init); + +-static void ipvtap_exit(void) ++static void __exit ipvtap_exit(void) + { + rtnl_link_unregister(&ipvtap_link_ops); + unregister_netdevice_notifier(&ipvtap_notifier_block); +diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c +index f354fad05714a..5b0b23e55fa76 100644 +--- a/drivers/net/macsec.c ++++ b/drivers/net/macsec.c +@@ -449,11 +449,6 @@ static struct macsec_eth_header *macsec_ethhdr(struct sk_buff *skb) + return (struct macsec_eth_header *)skb_mac_header(skb); + } + +-static sci_t dev_to_sci(struct net_device *dev, __be16 port) +-{ +- return make_sci(dev->dev_addr, port); +-} +- + static void __macsec_pn_wrapped(struct macsec_secy *secy, + struct macsec_tx_sa *tx_sa) + { +@@ -3622,7 +3617,6 @@ static int macsec_set_mac_address(struct net_device *dev, void *p) + + out: + eth_hw_addr_set(dev, addr->sa_data); +- macsec->secy.sci = dev_to_sci(dev, MACSEC_PORT_ES); + + /* If h/w offloading is available, propagate to the device */ + if (macsec_is_offloaded(macsec)) { +@@ -3960,6 +3954,11 @@ static bool sci_exists(struct net_device *dev, sci_t sci) + return false; + } + ++static sci_t dev_to_sci(struct net_device *dev, __be16 port) ++{ ++ return make_sci(dev->dev_addr, port); ++} ++ + static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len) + { + struct macsec_dev *macsec = macsec_priv(dev); +diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c +index 608de5a94165f..f90a21781d8d6 100644 +--- a/drivers/net/phy/phy_device.c ++++ b/drivers/net/phy/phy_device.c +@@ -316,11 +316,11 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev) + + phydev->suspended_by_mdio_bus = 0; + +- /* If we managed to get here with the PHY state machine in a state other +- * than PHY_HALTED this is an indication that something went wrong and +- * we should most likely be using MAC managed PM and we are not. ++ /* If we manged to get here with the PHY state machine in a state neither ++ * PHY_HALTED nor PHY_READY this is an indication that something went wrong ++ * and we should most likely be using MAC managed PM and we are not. + */ +- WARN_ON(phydev->state != PHY_HALTED && !phydev->mac_managed_pm); ++ WARN_ON(phydev->state != PHY_HALTED && phydev->state != PHY_READY); + + ret = phy_init_hw(phydev); + if (ret < 0) +diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c +index 0f6efaabaa32b..d142ac8fcf6e2 100644 +--- a/drivers/net/usb/r8152.c ++++ b/drivers/net/usb/r8152.c +@@ -5906,6 +5906,11 @@ static void r8153_enter_oob(struct r8152 *tp) + ocp_data &= ~NOW_IS_OOB; + ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + ++ /* RX FIFO settings for OOB */ ++ ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL0, RXFIFO_THR1_OOB); ++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL1, RXFIFO_THR2_OOB); ++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL2, RXFIFO_THR3_OOB); ++ + rtl_disable(tp); + rtl_reset_bmu(tp); + +@@ -6431,21 +6436,8 @@ static void r8156_fc_parameter(struct r8152 *tp) + u32 pause_on = tp->fc_pause_on ? tp->fc_pause_on : fc_pause_on_auto(tp); + u32 pause_off = tp->fc_pause_off ? tp->fc_pause_off : fc_pause_off_auto(tp); + +- switch (tp->version) { +- case RTL_VER_10: +- case RTL_VER_11: +- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, pause_on / 8); +- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, pause_off / 8); +- break; +- case RTL_VER_12: +- case RTL_VER_13: +- case RTL_VER_15: +- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, pause_on / 16); +- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, pause_off / 16); +- break; +- default: +- break; +- } ++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, pause_on / 16); ++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, pause_off / 16); + } + + static void rtl8156_change_mtu(struct r8152 *tp) +@@ -6557,6 +6549,11 @@ static void rtl8156_down(struct r8152 *tp) + ocp_data &= ~NOW_IS_OOB; + ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + ++ /* RX FIFO settings for OOB */ ++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_FULL, 64 / 16); ++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, 1024 / 16); ++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, 4096 / 16); ++ + rtl_disable(tp); + rtl_reset_bmu(tp); + +diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c +index faa279bbbcb2c..7eb23805aa942 100644 +--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c ++++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c +@@ -1403,6 +1403,8 @@ int mt76_connac_mcu_uni_add_bss(struct mt76_phy *phy, + else + conn_type = CONNECTION_INFRA_AP; + basic_req.basic.conn_type = cpu_to_le32(conn_type); ++ /* Fully active/deactivate BSS network in AP mode only */ ++ basic_req.basic.active = enable; + break; + case NL80211_IFTYPE_STATION: + if (vif->p2p) +diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c +index e86fe9ee4623e..d3f310877248b 100644 +--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c ++++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c +@@ -653,15 +653,6 @@ static void mt7921_bss_info_changed(struct ieee80211_hw *hw, + } + } + +- if (changed & BSS_CHANGED_BEACON_ENABLED && info->enable_beacon) { +- struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv; +- +- mt76_connac_mcu_uni_add_bss(phy->mt76, vif, &mvif->sta.wcid, +- true); +- mt7921_mcu_sta_update(dev, NULL, vif, true, +- MT76_STA_INFO_STATE_NONE); +- } +- + if (changed & (BSS_CHANGED_BEACON | + BSS_CHANGED_BEACON_ENABLED)) + mt7921_mcu_uni_add_beacon_offload(dev, hw, vif, +@@ -1500,6 +1491,42 @@ mt7921_channel_switch_beacon(struct ieee80211_hw *hw, + mt7921_mutex_release(dev); + } + ++static int ++mt7921_start_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif) ++{ ++ struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv; ++ struct mt7921_phy *phy = mt7921_hw_phy(hw); ++ struct mt7921_dev *dev = mt7921_hw_dev(hw); ++ int err; ++ ++ err = mt76_connac_mcu_uni_add_bss(phy->mt76, vif, &mvif->sta.wcid, ++ true); ++ if (err) ++ return err; ++ ++ err = mt7921_mcu_set_bss_pm(dev, vif, true); ++ if (err) ++ return err; ++ ++ return mt7921_mcu_sta_update(dev, NULL, vif, true, ++ MT76_STA_INFO_STATE_NONE); ++} ++ ++static void ++mt7921_stop_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif) ++{ ++ struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv; ++ struct mt7921_phy *phy = mt7921_hw_phy(hw); ++ struct mt7921_dev *dev = mt7921_hw_dev(hw); ++ int err; ++ ++ err = mt7921_mcu_set_bss_pm(dev, vif, false); ++ if (err) ++ return; ++ ++ mt76_connac_mcu_uni_add_bss(phy->mt76, vif, &mvif->sta.wcid, false); ++} ++ + const struct ieee80211_ops mt7921_ops = { + .tx = mt7921_tx, + .start = mt7921_start, +@@ -1510,6 +1537,8 @@ const struct ieee80211_ops mt7921_ops = { + .conf_tx = mt7921_conf_tx, + .configure_filter = mt7921_configure_filter, + .bss_info_changed = mt7921_bss_info_changed, ++ .start_ap = mt7921_start_ap, ++ .stop_ap = mt7921_stop_ap, + .sta_state = mt7921_sta_state, + .sta_pre_rcu_remove = mt76_sta_pre_rcu_remove, + .set_key = mt7921_set_key, +diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c +index 613a94be8ea44..6d0aceb5226ab 100644 +--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c ++++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c +@@ -1020,7 +1020,7 @@ mt7921_mcu_uni_bss_bcnft(struct mt7921_dev *dev, struct ieee80211_vif *vif, + &bcnft_req, sizeof(bcnft_req), true); + } + +-static int ++int + mt7921_mcu_set_bss_pm(struct mt7921_dev *dev, struct ieee80211_vif *vif, + bool enable) + { +@@ -1049,9 +1049,6 @@ mt7921_mcu_set_bss_pm(struct mt7921_dev *dev, struct ieee80211_vif *vif, + }; + int err; + +- if (vif->type != NL80211_IFTYPE_STATION) +- return 0; +- + err = mt76_mcu_send_msg(&dev->mt76, MCU_CE_CMD(SET_BSS_ABORT), + &req_hdr, sizeof(req_hdr), false); + if (err < 0 || !enable) +diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h +index 66054123bcc47..cebc3cfa01b8a 100644 +--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h ++++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h +@@ -280,6 +280,8 @@ int mt7921_wpdma_reset(struct mt7921_dev *dev, bool force); + int mt7921_wpdma_reinit_cond(struct mt7921_dev *dev); + void mt7921_dma_cleanup(struct mt7921_dev *dev); + int mt7921_run_firmware(struct mt7921_dev *dev); ++int mt7921_mcu_set_bss_pm(struct mt7921_dev *dev, struct ieee80211_vif *vif, ++ bool enable); + int mt7921_mcu_sta_update(struct mt7921_dev *dev, struct ieee80211_sta *sta, + struct ieee80211_vif *vif, bool enable, + enum mt76_sta_info_state state); +diff --git a/drivers/nfc/pn533/uart.c b/drivers/nfc/pn533/uart.c +index 2caf997f9bc94..07596bf5f7d6d 100644 +--- a/drivers/nfc/pn533/uart.c ++++ b/drivers/nfc/pn533/uart.c +@@ -310,6 +310,7 @@ static void pn532_uart_remove(struct serdev_device *serdev) + pn53x_unregister_nfc(pn532->priv); + serdev_device_close(serdev); + pn53x_common_clean(pn532->priv); ++ del_timer_sync(&pn532->cmd_timeout); + kfree_skb(pn532->recv_skb); + kfree(pn532); + } +diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c +index 6ffc9e4258a80..78edb1ea4748d 100644 +--- a/drivers/scsi/scsi_lib.c ++++ b/drivers/scsi/scsi_lib.c +@@ -1549,7 +1549,6 @@ static blk_status_t scsi_prepare_cmd(struct request *req) + scsi_init_command(sdev, cmd); + + cmd->eh_eflags = 0; +- cmd->allowed = 0; + cmd->prot_type = 0; + cmd->prot_flags = 0; + cmd->submitter = 0; +@@ -1600,6 +1599,8 @@ static blk_status_t scsi_prepare_cmd(struct request *req) + return ret; + } + ++ /* Usually overridden by the ULP */ ++ cmd->allowed = 0; + memset(cmd->cmnd, 0, sizeof(cmd->cmnd)); + return scsi_cmd_to_driver(cmd)->init_command(cmd); + } +diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c +index fe000da113327..8ced292c4b962 100644 +--- a/drivers/scsi/storvsc_drv.c ++++ b/drivers/scsi/storvsc_drv.c +@@ -2012,7 +2012,7 @@ static int storvsc_probe(struct hv_device *device, + */ + host_dev->handle_error_wq = + alloc_ordered_workqueue("storvsc_error_wq_%d", +- WQ_MEM_RECLAIM, ++ 0, + host->host_no); + if (!host_dev->handle_error_wq) { + ret = -ENOMEM; +diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c +index b89075f3b6ab7..2efaed36a3adc 100644 +--- a/drivers/video/fbdev/core/fbcon.c ++++ b/drivers/video/fbdev/core/fbcon.c +@@ -2402,15 +2402,21 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, + struct fb_info *info = fbcon_info_from_console(vc->vc_num); + struct fbcon_ops *ops = info->fbcon_par; + struct fbcon_display *p = &fb_display[vc->vc_num]; +- int resize; ++ int resize, ret, old_userfont, old_width, old_height, old_charcount; + char *old_data = NULL; + + resize = (w != vc->vc_font.width) || (h != vc->vc_font.height); + if (p->userfont) + old_data = vc->vc_font.data; + vc->vc_font.data = (void *)(p->fontdata = data); ++ old_userfont = p->userfont; + if ((p->userfont = userfont)) + REFCOUNT(data)++; ++ ++ old_width = vc->vc_font.width; ++ old_height = vc->vc_font.height; ++ old_charcount = vc->vc_font.charcount; ++ + vc->vc_font.width = w; + vc->vc_font.height = h; + vc->vc_font.charcount = charcount; +@@ -2426,7 +2432,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, + rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres); + cols /= w; + rows /= h; +- vc_resize(vc, cols, rows); ++ ret = vc_resize(vc, cols, rows); ++ if (ret) ++ goto err_out; + } else if (con_is_visible(vc) + && vc->vc_mode == KD_TEXT) { + fbcon_clear_margins(vc, 0); +@@ -2436,6 +2444,21 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, + if (old_data && (--REFCOUNT(old_data) == 0)) + kfree(old_data - FONT_EXTRA_WORDS * sizeof(int)); + return 0; ++ ++err_out: ++ p->fontdata = old_data; ++ vc->vc_font.data = (void *)old_data; ++ ++ if (userfont) { ++ p->userfont = old_userfont; ++ REFCOUNT(data)--; ++ } ++ ++ vc->vc_font.width = old_width; ++ vc->vc_font.height = old_height; ++ vc->vc_font.charcount = old_charcount; ++ ++ return ret; + } + + /* +diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c +index 3369734108af2..e88e8f6f0a334 100644 +--- a/drivers/xen/privcmd.c ++++ b/drivers/xen/privcmd.c +@@ -581,27 +581,30 @@ static int lock_pages( + struct privcmd_dm_op_buf kbufs[], unsigned int num, + struct page *pages[], unsigned int nr_pages, unsigned int *pinned) + { +- unsigned int i; ++ unsigned int i, off = 0; + +- for (i = 0; i < num; i++) { ++ for (i = 0; i < num; ) { + unsigned int requested; + int page_count; + + requested = DIV_ROUND_UP( + offset_in_page(kbufs[i].uptr) + kbufs[i].size, +- PAGE_SIZE); ++ PAGE_SIZE) - off; + if (requested > nr_pages) + return -ENOSPC; + + page_count = pin_user_pages_fast( +- (unsigned long) kbufs[i].uptr, ++ (unsigned long)kbufs[i].uptr + off * PAGE_SIZE, + requested, FOLL_WRITE, pages); +- if (page_count < 0) +- return page_count; ++ if (page_count <= 0) ++ return page_count ? : -EFAULT; + + *pinned += page_count; + nr_pages -= page_count; + pages += page_count; ++ ++ off = (requested == page_count) ? 0 : off + page_count; ++ i += !off; + } + + return 0; +@@ -677,10 +680,8 @@ static long privcmd_ioctl_dm_op(struct file *file, void __user *udata) + } + + rc = lock_pages(kbufs, kdata.num, pages, nr_pages, &pinned); +- if (rc < 0) { +- nr_pages = pinned; ++ if (rc < 0) + goto out; +- } + + for (i = 0; i < kdata.num; i++) { + set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr); +@@ -692,7 +693,7 @@ static long privcmd_ioctl_dm_op(struct file *file, void __user *udata) + xen_preemptible_hcall_end(); + + out: +- unlock_pages(pages, nr_pages); ++ unlock_pages(pages, pinned); + kfree(xbufs); + kfree(pages); + kfree(kbufs); +diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c +index deaed255f301e..a8ecd83abb11e 100644 +--- a/fs/btrfs/block-group.c ++++ b/fs/btrfs/block-group.c +@@ -440,39 +440,26 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, + btrfs_put_caching_control(caching_ctl); + } + +-int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache) ++static int btrfs_caching_ctl_wait_done(struct btrfs_block_group *cache, ++ struct btrfs_caching_control *caching_ctl) ++{ ++ wait_event(caching_ctl->wait, btrfs_block_group_done(cache)); ++ return cache->cached == BTRFS_CACHE_ERROR ? -EIO : 0; ++} ++ ++static int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache) + { + struct btrfs_caching_control *caching_ctl; +- int ret = 0; ++ int ret; + + caching_ctl = btrfs_get_caching_control(cache); + if (!caching_ctl) + return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0; +- +- wait_event(caching_ctl->wait, btrfs_block_group_done(cache)); +- if (cache->cached == BTRFS_CACHE_ERROR) +- ret = -EIO; ++ ret = btrfs_caching_ctl_wait_done(cache, caching_ctl); + btrfs_put_caching_control(caching_ctl); + return ret; + } + +-static bool space_cache_v1_done(struct btrfs_block_group *cache) +-{ +- bool ret; +- +- spin_lock(&cache->lock); +- ret = cache->cached != BTRFS_CACHE_FAST; +- spin_unlock(&cache->lock); +- +- return ret; +-} +- +-void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache, +- struct btrfs_caching_control *caching_ctl) +-{ +- wait_event(caching_ctl->wait, space_cache_v1_done(cache)); +-} +- + #ifdef CONFIG_BTRFS_DEBUG + static void fragment_free_space(struct btrfs_block_group *block_group) + { +@@ -750,9 +737,8 @@ done: + btrfs_put_block_group(block_group); + } + +-int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only) ++int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait) + { +- DEFINE_WAIT(wait); + struct btrfs_fs_info *fs_info = cache->fs_info; + struct btrfs_caching_control *caching_ctl = NULL; + int ret = 0; +@@ -785,10 +771,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only + } + WARN_ON(cache->caching_ctl); + cache->caching_ctl = caching_ctl; +- if (btrfs_test_opt(fs_info, SPACE_CACHE)) +- cache->cached = BTRFS_CACHE_FAST; +- else +- cache->cached = BTRFS_CACHE_STARTED; ++ cache->cached = BTRFS_CACHE_STARTED; + cache->has_caching_ctl = 1; + spin_unlock(&cache->lock); + +@@ -801,8 +784,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only + + btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); + out: +- if (load_cache_only && caching_ctl) +- btrfs_wait_space_cache_v1_finished(cache, caching_ctl); ++ if (wait && caching_ctl) ++ ret = btrfs_caching_ctl_wait_done(cache, caching_ctl); + if (caching_ctl) + btrfs_put_caching_control(caching_ctl); + +@@ -3313,7 +3296,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, + * space back to the block group, otherwise we will leak space. + */ + if (!alloc && !btrfs_block_group_done(cache)) +- btrfs_cache_block_group(cache, 1); ++ btrfs_cache_block_group(cache, true); + + byte_in_group = bytenr - cache->start; + WARN_ON(byte_in_group > cache->length); +diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h +index 35e0e860cc0bf..6b3cdc4cbc41e 100644 +--- a/fs/btrfs/block-group.h ++++ b/fs/btrfs/block-group.h +@@ -263,9 +263,7 @@ void btrfs_dec_nocow_writers(struct btrfs_block_group *bg); + void btrfs_wait_nocow_writers(struct btrfs_block_group *bg); + void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, + u64 num_bytes); +-int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache); +-int btrfs_cache_block_group(struct btrfs_block_group *cache, +- int load_cache_only); ++int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait); + void btrfs_put_caching_control(struct btrfs_caching_control *ctl); + struct btrfs_caching_control *btrfs_get_caching_control( + struct btrfs_block_group *cache); +diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h +index 3a51d0c13a957..7d3ca3ea0bcec 100644 +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -494,7 +494,6 @@ struct btrfs_free_cluster { + enum btrfs_caching_type { + BTRFS_CACHE_NO, + BTRFS_CACHE_STARTED, +- BTRFS_CACHE_FAST, + BTRFS_CACHE_FINISHED, + BTRFS_CACHE_ERROR, + }; +diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c +index a7dd6ba25e990..c0f358b958abd 100644 +--- a/fs/btrfs/dev-replace.c ++++ b/fs/btrfs/dev-replace.c +@@ -165,7 +165,7 @@ no_valid_dev_replace_entry_found: + */ + if (btrfs_find_device(fs_info->fs_devices, &args)) { + btrfs_err(fs_info, +- "replace devid present without an active replace item"); ++"replace without active item, run 'device scan --forget' on the target device"); + ret = -EUCLEAN; + } else { + dev_replace->srcdev = NULL; +@@ -1128,8 +1128,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) + up_write(&dev_replace->rwsem); + + /* Scrub for replace must not be running in suspended state */ +- ret = btrfs_scrub_cancel(fs_info); +- ASSERT(ret != -ENOTCONN); ++ btrfs_scrub_cancel(fs_info); + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { +diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c +index f2c79838ebe52..ced3fc76063f1 100644 +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -2567,17 +2567,10 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, + return -EINVAL; + + /* +- * pull in the free space cache (if any) so that our pin +- * removes the free space from the cache. We have load_only set +- * to one because the slow code to read in the free extents does check +- * the pinned extents. ++ * Fully cache the free space first so that our pin removes the free space ++ * from the cache. + */ +- btrfs_cache_block_group(cache, 1); +- /* +- * Make sure we wait until the cache is completely built in case it is +- * missing or is invalid and therefore needs to be rebuilt. +- */ +- ret = btrfs_wait_block_group_cache_done(cache); ++ ret = btrfs_cache_block_group(cache, true); + if (ret) + goto out; + +@@ -2600,12 +2593,7 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info, + if (!block_group) + return -EINVAL; + +- btrfs_cache_block_group(block_group, 1); +- /* +- * Make sure we wait until the cache is completely built in case it is +- * missing or is invalid and therefore needs to be rebuilt. +- */ +- ret = btrfs_wait_block_group_cache_done(block_group); ++ ret = btrfs_cache_block_group(block_group, true); + if (ret) + goto out; + +@@ -4415,7 +4403,7 @@ have_block_group: + ffe_ctl->cached = btrfs_block_group_done(block_group); + if (unlikely(!ffe_ctl->cached)) { + ffe_ctl->have_caching_bg = true; +- ret = btrfs_cache_block_group(block_group, 0); ++ ret = btrfs_cache_block_group(block_group, false); + + /* + * If we get ENOMEM here or something else we want to +@@ -6169,13 +6157,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) + + if (end - start >= range->minlen) { + if (!btrfs_block_group_done(cache)) { +- ret = btrfs_cache_block_group(cache, 0); +- if (ret) { +- bg_failed++; +- bg_ret = ret; +- continue; +- } +- ret = btrfs_wait_block_group_cache_done(cache); ++ ret = btrfs_cache_block_group(cache, true); + if (ret) { + bg_failed++; + bg_ret = ret; +diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c +index 89c6d7ff19874..78df9b8557ddd 100644 +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -2483,6 +2483,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, + btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); + btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); + btrfs_set_file_extent_offset(leaf, fi, 0); ++ btrfs_set_file_extent_generation(leaf, fi, trans->transid); + btrfs_mark_buffer_dirty(leaf); + goto out; + } +@@ -2499,6 +2500,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, + btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); + btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); + btrfs_set_file_extent_offset(leaf, fi, 0); ++ btrfs_set_file_extent_generation(leaf, fi, trans->transid); + btrfs_mark_buffer_dirty(leaf); + goto out; + } +diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c +index a64b26b169040..d647cb2938c01 100644 +--- a/fs/btrfs/root-tree.c ++++ b/fs/btrfs/root-tree.c +@@ -349,9 +349,10 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, + key.offset = ref_id; + again: + ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); +- if (ret < 0) ++ if (ret < 0) { ++ err = ret; + goto out; +- if (ret == 0) { ++ } else if (ret == 0) { + leaf = path->nodes[0]; + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_root_ref); +diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c +index 9cd9d06f54699..3460fd6743807 100644 +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -2344,8 +2344,11 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, + + ret = btrfs_get_bdev_and_sb(path, FMODE_READ, fs_info->bdev_holder, 0, + &bdev, &disk_super); +- if (ret) ++ if (ret) { ++ btrfs_put_dev_args_from_path(args); + return ret; ++ } ++ + args->devid = btrfs_stack_device_id(&disk_super->dev_item); + memcpy(args->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE); + if (btrfs_fs_incompat(fs_info, METADATA_UUID)) +diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c +index 7421abcf325a5..5bb8d8c863119 100644 +--- a/fs/btrfs/xattr.c ++++ b/fs/btrfs/xattr.c +@@ -371,6 +371,9 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler, + const char *name, const void *buffer, + size_t size, int flags) + { ++ if (btrfs_root_readonly(BTRFS_I(inode)->root)) ++ return -EROFS; ++ + name = xattr_full_name(handler, name); + return btrfs_setxattr_trans(inode, name, buffer, size, flags); + } +diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c +index aa4c1d403708f..3898ec2632dc4 100644 +--- a/fs/cifs/smb2ops.c ++++ b/fs/cifs/smb2ops.c +@@ -3671,7 +3671,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, + static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, + loff_t offset, loff_t len) + { +- struct inode *inode; ++ struct inode *inode = file_inode(file); + struct cifsFileInfo *cfile = file->private_data; + struct file_zero_data_information fsctl_buf; + long rc; +@@ -3680,14 +3680,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, + + xid = get_xid(); + +- inode = d_inode(cfile->dentry); +- ++ inode_lock(inode); + /* Need to make file sparse, if not already, before freeing range. */ + /* Consider adding equivalent for compressed since it could also work */ + if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) { + rc = -EOPNOTSUPP; +- free_xid(xid); +- return rc; ++ goto out; + } + + filemap_invalidate_lock(inode->i_mapping); +@@ -3707,8 +3705,10 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, + true /* is_fctl */, (char *)&fsctl_buf, + sizeof(struct file_zero_data_information), + CIFSMaxBufSize, NULL, NULL); +- free_xid(xid); + filemap_invalidate_unlock(inode->i_mapping); ++out: ++ inode_unlock(inode); ++ free_xid(xid); + return rc; + } + +diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c +index c705de32e2257..c7614ade875b5 100644 +--- a/fs/cifs/smb2pdu.c ++++ b/fs/cifs/smb2pdu.c +@@ -2571,19 +2571,15 @@ alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len, + + path_len = UniStrnlen((wchar_t *)path, PATH_MAX); + +- /* +- * make room for one path separator between the treename and +- * path +- */ +- *out_len = treename_len + 1 + path_len; ++ /* make room for one path separator only if @path isn't empty */ ++ *out_len = treename_len + (path[0] ? 1 : 0) + path_len; + + /* +- * final path needs to be null-terminated UTF16 with a +- * size aligned to 8 ++ * final path needs to be 8-byte aligned as specified in ++ * MS-SMB2 2.2.13 SMB2 CREATE Request. + */ +- +- *out_size = roundup((*out_len+1)*2, 8); +- *out_path = kzalloc(*out_size, GFP_KERNEL); ++ *out_size = roundup(*out_len * sizeof(__le16), 8); ++ *out_path = kzalloc(*out_size + sizeof(__le16) /* null */, GFP_KERNEL); + if (!*out_path) + return -ENOMEM; + +diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c +index 05221366a16dc..08a1993ab7fd3 100644 +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -134,10 +134,10 @@ static bool inode_io_list_move_locked(struct inode *inode, + + static void wb_wakeup(struct bdi_writeback *wb) + { +- spin_lock_bh(&wb->work_lock); ++ spin_lock_irq(&wb->work_lock); + if (test_bit(WB_registered, &wb->state)) + mod_delayed_work(bdi_wq, &wb->dwork, 0); +- spin_unlock_bh(&wb->work_lock); ++ spin_unlock_irq(&wb->work_lock); + } + + static void finish_writeback_work(struct bdi_writeback *wb, +@@ -164,7 +164,7 @@ static void wb_queue_work(struct bdi_writeback *wb, + if (work->done) + atomic_inc(&work->done->cnt); + +- spin_lock_bh(&wb->work_lock); ++ spin_lock_irq(&wb->work_lock); + + if (test_bit(WB_registered, &wb->state)) { + list_add_tail(&work->list, &wb->work_list); +@@ -172,7 +172,7 @@ static void wb_queue_work(struct bdi_writeback *wb, + } else + finish_writeback_work(wb, work); + +- spin_unlock_bh(&wb->work_lock); ++ spin_unlock_irq(&wb->work_lock); + } + + /** +@@ -2082,13 +2082,13 @@ static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb) + { + struct wb_writeback_work *work = NULL; + +- spin_lock_bh(&wb->work_lock); ++ spin_lock_irq(&wb->work_lock); + if (!list_empty(&wb->work_list)) { + work = list_entry(wb->work_list.next, + struct wb_writeback_work, list); + list_del_init(&work->list); + } +- spin_unlock_bh(&wb->work_lock); ++ spin_unlock_irq(&wb->work_lock); + return work; + } + +diff --git a/fs/namespace.c b/fs/namespace.c +index e6a7e769d25dd..a59f8d645654a 100644 +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -4238,6 +4238,13 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize, + err = -EPERM; + goto out_fput; + } ++ ++ /* We're not controlling the target namespace. */ ++ if (!ns_capable(mnt_userns, CAP_SYS_ADMIN)) { ++ err = -EPERM; ++ goto out_fput; ++ } ++ + kattr->mnt_userns = get_user_ns(mnt_userns); + + out_fput: +diff --git a/fs/nfs/file.c b/fs/nfs/file.c +index 2d72b1b7ed74c..9a0e4a89cdf14 100644 +--- a/fs/nfs/file.c ++++ b/fs/nfs/file.c +@@ -221,8 +221,10 @@ nfs_file_fsync_commit(struct file *file, int datasync) + int + nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) + { +- struct nfs_open_context *ctx = nfs_file_open_context(file); + struct inode *inode = file_inode(file); ++ struct nfs_inode *nfsi = NFS_I(inode); ++ long save_nredirtied = atomic_long_read(&nfsi->redirtied_pages); ++ long nredirtied; + int ret; + + trace_nfs_fsync_enter(inode); +@@ -237,15 +239,10 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) + ret = pnfs_sync_inode(inode, !!datasync); + if (ret != 0) + break; +- if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags)) ++ nredirtied = atomic_long_read(&nfsi->redirtied_pages); ++ if (nredirtied == save_nredirtied) + break; +- /* +- * If nfs_file_fsync_commit detected a server reboot, then +- * resend all dirty pages that might have been covered by +- * the NFS_CONTEXT_RESEND_WRITES flag +- */ +- start = 0; +- end = LLONG_MAX; ++ save_nredirtied = nredirtied; + } + + trace_nfs_fsync_exit(inode, ret); +diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c +index b4e46b0ffa2dc..bea7c005119c3 100644 +--- a/fs/nfs/inode.c ++++ b/fs/nfs/inode.c +@@ -426,6 +426,7 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh) + static void nfs_inode_init_regular(struct nfs_inode *nfsi) + { + atomic_long_set(&nfsi->nrequests, 0); ++ atomic_long_set(&nfsi->redirtied_pages, 0); + INIT_LIST_HEAD(&nfsi->commit_info.list); + atomic_long_set(&nfsi->commit_info.ncommit, 0); + atomic_set(&nfsi->commit_info.rpcs_out, 0); +diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c +index e88f6b18445ec..9eb1812878795 100644 +--- a/fs/nfs/nfs4file.c ++++ b/fs/nfs/nfs4file.c +@@ -340,6 +340,11 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, + goto out; + } + ++ if (!S_ISREG(fattr->mode)) { ++ res = ERR_PTR(-EBADF); ++ goto out; ++ } ++ + res = ERR_PTR(-ENOMEM); + len = strlen(SSC_READ_NAME_BODY) + 16; + read_name = kzalloc(len, GFP_KERNEL); +@@ -357,6 +362,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, + r_ino->i_fop); + if (IS_ERR(filep)) { + res = ERR_CAST(filep); ++ iput(r_ino); + goto out_free_name; + } + +diff --git a/fs/nfs/write.c b/fs/nfs/write.c +index 1c706465d090b..5d7e1c2061842 100644 +--- a/fs/nfs/write.c ++++ b/fs/nfs/write.c +@@ -1419,10 +1419,12 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, + */ + static void nfs_redirty_request(struct nfs_page *req) + { ++ struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host); ++ + /* Bump the transmission count */ + req->wb_nio++; + nfs_mark_request_dirty(req); +- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags); ++ atomic_long_inc(&nfsi->redirtied_pages); + nfs_end_page_writeback(req); + nfs_release_request(req); + } +@@ -1892,7 +1894,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) + /* We have a mismatch. Write the page again */ + dprintk_cont(" mismatch\n"); + nfs_mark_request_dirty(req); +- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags); ++ atomic_long_inc(&NFS_I(data->inode)->redirtied_pages); + next: + nfs_unlock_and_release_request(req); + /* Latency breaker */ +diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c +index 1b8c89dbf6684..3629049decac1 100644 +--- a/fs/ntfs3/xattr.c ++++ b/fs/ntfs3/xattr.c +@@ -478,8 +478,7 @@ out: + } + + #ifdef CONFIG_NTFS3_FS_POSIX_ACL +-static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns, +- struct inode *inode, int type, ++static struct posix_acl *ntfs_get_acl_ex(struct inode *inode, int type, + int locked) + { + struct ntfs_inode *ni = ntfs_i(inode); +@@ -514,7 +513,7 @@ static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns, + + /* Translate extended attribute to acl. */ + if (err >= 0) { +- acl = posix_acl_from_xattr(mnt_userns, buf, err); ++ acl = posix_acl_from_xattr(&init_user_ns, buf, err); + } else if (err == -ENODATA) { + acl = NULL; + } else { +@@ -537,8 +536,7 @@ struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu) + if (rcu) + return ERR_PTR(-ECHILD); + +- /* TODO: init_user_ns? */ +- return ntfs_get_acl_ex(&init_user_ns, inode, type, 0); ++ return ntfs_get_acl_ex(inode, type, 0); + } + + static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, +@@ -590,7 +588,7 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, + value = kmalloc(size, GFP_NOFS); + if (!value) + return -ENOMEM; +- err = posix_acl_to_xattr(mnt_userns, acl, value, size); ++ err = posix_acl_to_xattr(&init_user_ns, acl, value, size); + if (err < 0) + goto out; + flags = 0; +@@ -641,7 +639,7 @@ static int ntfs_xattr_get_acl(struct user_namespace *mnt_userns, + if (!acl) + return -ENODATA; + +- err = posix_acl_to_xattr(mnt_userns, acl, buffer, size); ++ err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); + posix_acl_release(acl); + + return err; +@@ -665,12 +663,12 @@ static int ntfs_xattr_set_acl(struct user_namespace *mnt_userns, + if (!value) { + acl = NULL; + } else { +- acl = posix_acl_from_xattr(mnt_userns, value, size); ++ acl = posix_acl_from_xattr(&init_user_ns, value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + + if (acl) { +- err = posix_acl_valid(mnt_userns, acl); ++ err = posix_acl_valid(&init_user_ns, acl); + if (err) + goto release_and_out; + } +diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c +index 801e60bab9555..c28bc983a7b1c 100644 +--- a/fs/ocfs2/dlmglue.c ++++ b/fs/ocfs2/dlmglue.c +@@ -3403,10 +3403,12 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb, + ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); + ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres); + +- ocfs2_cluster_disconnect(osb->cconn, hangup_pending); +- osb->cconn = NULL; ++ if (osb->cconn) { ++ ocfs2_cluster_disconnect(osb->cconn, hangup_pending); ++ osb->cconn = NULL; + +- ocfs2_dlm_shutdown_debug(osb); ++ ocfs2_dlm_shutdown_debug(osb); ++ } + } + + static int ocfs2_drop_lock(struct ocfs2_super *osb, +diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c +index 438be028935d2..bc18c27e96830 100644 +--- a/fs/ocfs2/super.c ++++ b/fs/ocfs2/super.c +@@ -1914,8 +1914,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) + !ocfs2_is_hard_readonly(osb)) + hangup_needed = 1; + +- if (osb->cconn) +- ocfs2_dlm_shutdown(osb, hangup_needed); ++ ocfs2_dlm_shutdown(osb, hangup_needed); + + ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats); + debugfs_remove_recursive(osb->osb_debug_root); +diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c +index 2d04e3470d4cd..313788bc0c307 100644 +--- a/fs/proc/task_mmu.c ++++ b/fs/proc/task_mmu.c +@@ -525,10 +525,12 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, + struct vm_area_struct *vma = walk->vma; + bool locked = !!(vma->vm_flags & VM_LOCKED); + struct page *page = NULL; +- bool migration = false; ++ bool migration = false, young = false, dirty = false; + + if (pte_present(*pte)) { + page = vm_normal_page(vma, addr, *pte); ++ young = pte_young(*pte); ++ dirty = pte_dirty(*pte); + } else if (is_swap_pte(*pte)) { + swp_entry_t swpent = pte_to_swp_entry(*pte); + +@@ -558,8 +560,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, + if (!page) + return; + +- smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), +- locked, migration); ++ smaps_account(mss, page, false, young, dirty, locked, migration); + } + + #ifdef CONFIG_TRANSPARENT_HUGEPAGE +diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c +index de86f5b2859f9..ab0576d372d6e 100644 +--- a/fs/userfaultfd.c ++++ b/fs/userfaultfd.c +@@ -1601,6 +1601,10 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, + wake_userfault(vma->vm_userfaultfd_ctx.ctx, &range); + } + ++ /* Reset ptes for the whole vma range if wr-protected */ ++ if (userfaultfd_wp(vma)) ++ uffd_wp_range(mm, vma, start, vma_end - start, false); ++ + new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS; + prev = vma_merge(mm, prev, start, vma_end, new_flags, + vma->anon_vma, vma->vm_file, vma->vm_pgoff, +diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h +index d0f7bdd2fdf23..db13bb620f527 100644 +--- a/include/asm-generic/sections.h ++++ b/include/asm-generic/sections.h +@@ -97,7 +97,7 @@ static inline bool memory_contains(void *begin, void *end, void *virt, + /** + * memory_intersects - checks if the region occupied by an object intersects + * with another memory region +- * @begin: virtual address of the beginning of the memory regien ++ * @begin: virtual address of the beginning of the memory region + * @end: virtual address of the end of the memory region + * @virt: virtual address of the memory object + * @size: size of the memory object +@@ -110,7 +110,10 @@ static inline bool memory_intersects(void *begin, void *end, void *virt, + { + void *vend = virt + size; + +- return (virt >= begin && virt < end) || (vend >= begin && vend < end); ++ if (virt < end && vend > begin) ++ return true; ++ ++ return false; + } + + /** +diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h +index 9ecead1042b9c..c322b98260f52 100644 +--- a/include/linux/memcontrol.h ++++ b/include/linux/memcontrol.h +@@ -978,19 +978,30 @@ static inline void mod_memcg_page_state(struct page *page, + + static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) + { +- return READ_ONCE(memcg->vmstats.state[idx]); ++ long x = READ_ONCE(memcg->vmstats.state[idx]); ++#ifdef CONFIG_SMP ++ if (x < 0) ++ x = 0; ++#endif ++ return x; + } + + static inline unsigned long lruvec_page_state(struct lruvec *lruvec, + enum node_stat_item idx) + { + struct mem_cgroup_per_node *pn; ++ long x; + + if (mem_cgroup_disabled()) + return node_page_state(lruvec_pgdat(lruvec), idx); + + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); +- return READ_ONCE(pn->lruvec_stats.state[idx]); ++ x = READ_ONCE(pn->lruvec_stats.state[idx]); ++#ifdef CONFIG_SMP ++ if (x < 0) ++ x = 0; ++#endif ++ return x; + } + + static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, +diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h +index 5040cd774c5a3..b0b4ac92354a2 100644 +--- a/include/linux/mlx5/driver.h ++++ b/include/linux/mlx5/driver.h +@@ -773,6 +773,7 @@ struct mlx5_core_dev { + enum mlx5_device_state state; + /* sync interface state */ + struct mutex intf_state_mutex; ++ struct lock_class_key lock_key; + unsigned long intf_state; + struct mlx5_priv priv; + struct mlx5_profile profile; +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 7898e29bcfb54..25b8860f47cc6 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -2939,7 +2939,6 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, + #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ + #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ + #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ +-#define FOLL_COW 0x4000 /* internal GUP flag */ + #define FOLL_ANON 0x8000 /* don't do file mappings */ + #define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */ + #define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ +diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h +index 2563d30736e9a..db40bc62213bd 100644 +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -640,9 +640,23 @@ extern int sysctl_devconf_inherit_init_net; + */ + static inline bool net_has_fallback_tunnels(const struct net *net) + { +- return !IS_ENABLED(CONFIG_SYSCTL) || +- !sysctl_fb_tunnels_only_for_init_net || +- (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1); ++#if IS_ENABLED(CONFIG_SYSCTL) ++ int fb_tunnels_only_for_init_net = READ_ONCE(sysctl_fb_tunnels_only_for_init_net); ++ ++ return !fb_tunnels_only_for_init_net || ++ (net_eq(net, &init_net) && fb_tunnels_only_for_init_net == 1); ++#else ++ return true; ++#endif ++} ++ ++static inline int net_inherit_devconf(void) ++{ ++#if IS_ENABLED(CONFIG_SYSCTL) ++ return READ_ONCE(sysctl_devconf_inherit_init_net); ++#else ++ return 0; ++#endif + } + + static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) +diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h +index a13296d6c7ceb..fd533552a062c 100644 +--- a/include/linux/netfilter_bridge/ebtables.h ++++ b/include/linux/netfilter_bridge/ebtables.h +@@ -94,10 +94,6 @@ struct ebt_table { + struct ebt_replace_kernel *table; + unsigned int valid_hooks; + rwlock_t lock; +- /* e.g. could be the table explicitly only allows certain +- * matches, targets, ... 0 == let it in */ +- int (*check)(const struct ebt_table_info *info, +- unsigned int valid_hooks); + /* the data used by the kernel */ + struct ebt_table_info *private; + struct nf_hook_ops *ops; +diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h +index a17c337dbdf1d..b1f83697699ee 100644 +--- a/include/linux/nfs_fs.h ++++ b/include/linux/nfs_fs.h +@@ -182,6 +182,7 @@ struct nfs_inode { + /* Regular file */ + struct { + atomic_long_t nrequests; ++ atomic_long_t redirtied_pages; + struct nfs_mds_commit_info commit_info; + struct mutex commit_mutex; + }; +diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h +index 732b522bacb7e..e1b8a915e9e9f 100644 +--- a/include/linux/userfaultfd_k.h ++++ b/include/linux/userfaultfd_k.h +@@ -73,6 +73,8 @@ extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start, + extern int mwriteprotect_range(struct mm_struct *dst_mm, + unsigned long start, unsigned long len, + bool enable_wp, atomic_t *mmap_changing); ++extern void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *vma, ++ unsigned long start, unsigned long len, bool enable_wp); + + /* mm helpers */ + static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, +diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h +index c4898fcbf923b..f90f0021f5f2d 100644 +--- a/include/net/busy_poll.h ++++ b/include/net/busy_poll.h +@@ -33,7 +33,7 @@ extern unsigned int sysctl_net_busy_poll __read_mostly; + + static inline bool net_busy_loop_on(void) + { +- return sysctl_net_busy_poll; ++ return READ_ONCE(sysctl_net_busy_poll); + } + + static inline bool sk_can_busy_loop(const struct sock *sk) +diff --git a/include/net/gro.h b/include/net/gro.h +index 867656b0739c0..24003dea8fa4d 100644 +--- a/include/net/gro.h ++++ b/include/net/gro.h +@@ -439,7 +439,7 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, + { + list_add_tail(&skb->list, &napi->rx_list); + napi->rx_count += segs; +- if (napi->rx_count >= gro_normal_batch) ++ if (napi->rx_count >= READ_ONCE(gro_normal_batch)) + gro_normal_list(napi); + } + +diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h +index 64daafd1fc41c..9c93e4981b680 100644 +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -270,6 +270,7 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, + + struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, + struct flow_offload_tuple *tuple); ++void nf_flow_table_gc_run(struct nf_flowtable *flow_table); + void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable, + struct net_device *dev); + void nf_flow_table_cleanup(struct net_device *dev); +@@ -306,6 +307,8 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable, + struct flow_offload *flow); + + void nf_flow_table_offload_flush(struct nf_flowtable *flowtable); ++void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable); ++ + int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, + struct net_device *dev, + enum flow_block_command cmd); +diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h +index b8890ace0f879..0daad6e63ccb2 100644 +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -1635,6 +1635,7 @@ struct nftables_pernet { + struct list_head module_list; + struct list_head notify_list; + struct mutex commit_mutex; ++ u64 table_handle; + unsigned int base_seq; + u8 validate_state; + }; +diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h +index f81aa95ffbc40..f525566a0864d 100644 +--- a/include/ufs/ufshci.h ++++ b/include/ufs/ufshci.h +@@ -135,11 +135,7 @@ static inline u32 ufshci_version(u32 major, u32 minor) + + #define UFSHCD_UIC_MASK (UIC_COMMAND_COMPL | UFSHCD_UIC_PWR_MASK) + +-#define UFSHCD_ERROR_MASK (UIC_ERROR |\ +- DEVICE_FATAL_ERROR |\ +- CONTROLLER_FATAL_ERROR |\ +- SYSTEM_BUS_FATAL_ERROR |\ +- CRYPTO_ENGINE_FATAL_ERROR) ++#define UFSHCD_ERROR_MASK (UIC_ERROR | INT_FATAL_ERRORS) + + #define INT_FATAL_ERRORS (DEVICE_FATAL_ERROR |\ + CONTROLLER_FATAL_ERROR |\ +diff --git a/init/main.c b/init/main.c +index 91642a4e69be6..1fe7942f5d4a8 100644 +--- a/init/main.c ++++ b/init/main.c +@@ -1446,13 +1446,25 @@ static noinline void __init kernel_init_freeable(void); + + #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX) + bool rodata_enabled __ro_after_init = true; ++ ++#ifndef arch_parse_debug_rodata ++static inline bool arch_parse_debug_rodata(char *str) { return false; } ++#endif ++ + static int __init set_debug_rodata(char *str) + { +- if (strtobool(str, &rodata_enabled)) ++ if (arch_parse_debug_rodata(str)) ++ return 0; ++ ++ if (str && !strcmp(str, "on")) ++ rodata_enabled = true; ++ else if (str && !strcmp(str, "off")) ++ rodata_enabled = false; ++ else + pr_warn("Invalid option string for rodata: '%s'\n", str); +- return 1; ++ return 0; + } +-__setup("rodata=", set_debug_rodata); ++early_param("rodata", set_debug_rodata); + #endif + + #ifdef CONFIG_STRICT_KERNEL_RWX +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index 6a67dbf5195f0..cd155b7e1346d 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -4331,7 +4331,12 @@ done: + copy_iov: + iov_iter_restore(&s->iter, &s->iter_state); + ret = io_setup_async_rw(req, iovec, s, false); +- return ret ?: -EAGAIN; ++ if (!ret) { ++ if (kiocb->ki_flags & IOCB_WRITE) ++ kiocb_end_write(req); ++ return -EAGAIN; ++ } ++ return ret; + } + out_free: + /* it's reportedly faster than delegating the null check to kfree() */ +diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c +index 6432a37ac1c94..c565fbf66ac87 100644 +--- a/kernel/audit_fsnotify.c ++++ b/kernel/audit_fsnotify.c +@@ -102,6 +102,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa + + ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, 0); + if (ret < 0) { ++ audit_mark->path = NULL; + fsnotify_put_mark(&audit_mark->mark); + audit_mark = ERR_PTR(ret); + } +diff --git a/kernel/auditsc.c b/kernel/auditsc.c +index 3a8c9d744800a..0c33e04c293ad 100644 +--- a/kernel/auditsc.c ++++ b/kernel/auditsc.c +@@ -1965,6 +1965,7 @@ void __audit_uring_exit(int success, long code) + goto out; + } + ++ audit_return_fixup(ctx, success, code); + if (ctx->context == AUDIT_CTX_SYSCALL) { + /* + * NOTE: See the note in __audit_uring_entry() about the case +@@ -2006,7 +2007,6 @@ void __audit_uring_exit(int success, long code) + audit_filter_inodes(current, ctx); + if (ctx->current_state != AUDIT_STATE_RECORD) + goto out; +- audit_return_fixup(ctx, success, code); + audit_log_exit(); + + out: +@@ -2090,13 +2090,13 @@ void __audit_syscall_exit(int success, long return_code) + if (!list_empty(&context->killed_trees)) + audit_kill_trees(context); + ++ audit_return_fixup(context, success, return_code); + /* run through both filters to ensure we set the filterkey properly */ + audit_filter_syscall(current, context); + audit_filter_inodes(current, context); + if (context->current_state < AUDIT_STATE_RECORD) + goto out; + +- audit_return_fixup(context, success, return_code); + audit_log_exit(); + + out: +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index e91d2faef1605..0e45d405f151c 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -6999,8 +6999,7 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, + struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; + struct bpf_reg_state *regs = cur_regs(env), *reg; + struct bpf_map *map = meta->map_ptr; +- struct tnum range; +- u64 val; ++ u64 val, max; + int err; + + if (func_id != BPF_FUNC_tail_call) +@@ -7010,10 +7009,11 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, + return -EINVAL; + } + +- range = tnum_range(0, map->max_entries - 1); + reg = ®s[BPF_REG_3]; ++ val = reg->var_off.value; ++ max = map->max_entries; + +- if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) { ++ if (!(register_is_const(reg) && val < max)) { + bpf_map_key_store(aux, BPF_MAP_KEY_POISON); + return 0; + } +@@ -7021,8 +7021,6 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, + err = mark_chain_precision(env, BPF_REG_3); + if (err) + return err; +- +- val = reg->var_off.value; + if (bpf_map_key_unseen(aux)) + bpf_map_key_store(aux, val); + else if (!bpf_map_key_poisoned(aux) && +diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c +index 13c8e91d78620..ce95aee05e8ae 100644 +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -1811,6 +1811,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) + + if (ss->css_rstat_flush) { + list_del_rcu(&css->rstat_css_node); ++ synchronize_rcu(); + list_add_rcu(&css->rstat_css_node, + &dcgrp->rstat_css_list); + } +diff --git a/kernel/kprobes.c b/kernel/kprobes.c +index 80697e5e03e49..08350e35aba24 100644 +--- a/kernel/kprobes.c ++++ b/kernel/kprobes.c +@@ -1707,11 +1707,12 @@ static struct kprobe *__disable_kprobe(struct kprobe *p) + /* Try to disarm and disable this/parent probe */ + if (p == orig_p || aggr_kprobe_disabled(orig_p)) { + /* +- * If 'kprobes_all_disarmed' is set, 'orig_p' +- * should have already been disarmed, so +- * skip unneed disarming process. ++ * Don't be lazy here. Even if 'kprobes_all_disarmed' ++ * is false, 'orig_p' might not have been armed yet. ++ * Note arm_all_kprobes() __tries__ to arm all kprobes ++ * on the best effort basis. + */ +- if (!kprobes_all_disarmed) { ++ if (!kprobes_all_disarmed && !kprobe_disabled(orig_p)) { + ret = disarm_kprobe(orig_p, true); + if (ret) { + p->flags &= ~KPROBE_FLAG_DISABLED; +diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c +index a492f159624fa..860b2dcf3ac46 100644 +--- a/kernel/sys_ni.c ++++ b/kernel/sys_ni.c +@@ -277,6 +277,7 @@ COND_SYSCALL(landlock_restrict_self); + + /* mm/fadvise.c */ + COND_SYSCALL(fadvise64_64); ++COND_SYSCALL_COMPAT(fadvise64_64); + + /* mm/, CONFIG_MMU only */ + COND_SYSCALL(swapon); +diff --git a/lib/ratelimit.c b/lib/ratelimit.c +index e01a93f46f833..ce945c17980b9 100644 +--- a/lib/ratelimit.c ++++ b/lib/ratelimit.c +@@ -26,10 +26,16 @@ + */ + int ___ratelimit(struct ratelimit_state *rs, const char *func) + { ++ /* Paired with WRITE_ONCE() in .proc_handler(). ++ * Changing two values seperately could be inconsistent ++ * and some message could be lost. (See: net_ratelimit_state). ++ */ ++ int interval = READ_ONCE(rs->interval); ++ int burst = READ_ONCE(rs->burst); + unsigned long flags; + int ret; + +- if (!rs->interval) ++ if (!interval) + return 1; + + /* +@@ -44,7 +50,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func) + if (!rs->begin) + rs->begin = jiffies; + +- if (time_is_before_jiffies(rs->begin + rs->interval)) { ++ if (time_is_before_jiffies(rs->begin + interval)) { + if (rs->missed) { + if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) { + printk_deferred(KERN_WARNING +@@ -56,7 +62,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func) + rs->begin = jiffies; + rs->printed = 0; + } +- if (rs->burst && rs->burst > rs->printed) { ++ if (burst && burst > rs->printed) { + rs->printed++; + ret = 1; + } else { +diff --git a/mm/backing-dev.c b/mm/backing-dev.c +index 95550b8fa7fe2..de65cb1e5f761 100644 +--- a/mm/backing-dev.c ++++ b/mm/backing-dev.c +@@ -260,10 +260,10 @@ void wb_wakeup_delayed(struct bdi_writeback *wb) + unsigned long timeout; + + timeout = msecs_to_jiffies(dirty_writeback_interval * 10); +- spin_lock_bh(&wb->work_lock); ++ spin_lock_irq(&wb->work_lock); + if (test_bit(WB_registered, &wb->state)) + queue_delayed_work(bdi_wq, &wb->dwork, timeout); +- spin_unlock_bh(&wb->work_lock); ++ spin_unlock_irq(&wb->work_lock); + } + + static void wb_update_bandwidth_workfn(struct work_struct *work) +@@ -334,12 +334,12 @@ static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb); + static void wb_shutdown(struct bdi_writeback *wb) + { + /* Make sure nobody queues further work */ +- spin_lock_bh(&wb->work_lock); ++ spin_lock_irq(&wb->work_lock); + if (!test_and_clear_bit(WB_registered, &wb->state)) { +- spin_unlock_bh(&wb->work_lock); ++ spin_unlock_irq(&wb->work_lock); + return; + } +- spin_unlock_bh(&wb->work_lock); ++ spin_unlock_irq(&wb->work_lock); + + cgwb_remove_from_bdi_list(wb); + /* +diff --git a/mm/bootmem_info.c b/mm/bootmem_info.c +index f18a631e74797..b1efebfcf94bb 100644 +--- a/mm/bootmem_info.c ++++ b/mm/bootmem_info.c +@@ -12,6 +12,7 @@ + #include <linux/memblock.h> + #include <linux/bootmem_info.h> + #include <linux/memory_hotplug.h> ++#include <linux/kmemleak.h> + + void get_page_bootmem(unsigned long info, struct page *page, unsigned long type) + { +@@ -33,6 +34,7 @@ void put_page_bootmem(struct page *page) + ClearPagePrivate(page); + set_page_private(page, 0); + INIT_LIST_HEAD(&page->lru); ++ kmemleak_free_part(page_to_virt(page), PAGE_SIZE); + free_reserved_page(page); + } + } +diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c +index a0dab8b5e45f2..53ba8b1e619ca 100644 +--- a/mm/damon/dbgfs.c ++++ b/mm/damon/dbgfs.c +@@ -787,6 +787,9 @@ static int dbgfs_mk_context(char *name) + return -ENOENT; + + new_dir = debugfs_create_dir(name, root); ++ /* Below check is required for a potential duplicated name case */ ++ if (IS_ERR(new_dir)) ++ return PTR_ERR(new_dir); + dbgfs_dirs[dbgfs_nr_ctxs] = new_dir; + + new_ctx = dbgfs_new_ctx(); +diff --git a/mm/gup.c b/mm/gup.c +index fd3262ae92fc2..38effce68b48d 100644 +--- a/mm/gup.c ++++ b/mm/gup.c +@@ -478,14 +478,43 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address, + return -EEXIST; + } + +-/* +- * FOLL_FORCE can write to even unwritable pte's, but only +- * after we've gone through a COW cycle and they are dirty. +- */ +-static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) ++/* FOLL_FORCE can write to even unwritable PTEs in COW mappings. */ ++static inline bool can_follow_write_pte(pte_t pte, struct page *page, ++ struct vm_area_struct *vma, ++ unsigned int flags) + { +- return pte_write(pte) || +- ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte)); ++ /* If the pte is writable, we can write to the page. */ ++ if (pte_write(pte)) ++ return true; ++ ++ /* Maybe FOLL_FORCE is set to override it? */ ++ if (!(flags & FOLL_FORCE)) ++ return false; ++ ++ /* But FOLL_FORCE has no effect on shared mappings */ ++ if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) ++ return false; ++ ++ /* ... or read-only private ones */ ++ if (!(vma->vm_flags & VM_MAYWRITE)) ++ return false; ++ ++ /* ... or already writable ones that just need to take a write fault */ ++ if (vma->vm_flags & VM_WRITE) ++ return false; ++ ++ /* ++ * See can_change_pte_writable(): we broke COW and could map the page ++ * writable if we have an exclusive anonymous page ... ++ */ ++ if (!page || !PageAnon(page) || !PageAnonExclusive(page)) ++ return false; ++ ++ /* ... and a write-fault isn't required for other reasons. */ ++ if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && ++ !(vma->vm_flags & VM_SOFTDIRTY) && !pte_soft_dirty(pte)) ++ return false; ++ return !userfaultfd_pte_wp(vma, pte); + } + + static struct page *follow_page_pte(struct vm_area_struct *vma, +@@ -528,12 +557,19 @@ retry: + } + if ((flags & FOLL_NUMA) && pte_protnone(pte)) + goto no_page; +- if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) { +- pte_unmap_unlock(ptep, ptl); +- return NULL; +- } + + page = vm_normal_page(vma, address, pte); ++ ++ /* ++ * We only care about anon pages in can_follow_write_pte() and don't ++ * have to worry about pte_devmap() because they are never anon. ++ */ ++ if ((flags & FOLL_WRITE) && ++ !can_follow_write_pte(pte, page, vma, flags)) { ++ page = NULL; ++ goto out; ++ } ++ + if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) { + /* + * Only return device mapping pages in the FOLL_GET or FOLL_PIN +@@ -967,17 +1003,6 @@ static int faultin_page(struct vm_area_struct *vma, + return -EBUSY; + } + +- /* +- * The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when +- * necessary, even if maybe_mkwrite decided not to set pte_write. We +- * can thus safely do subsequent page lookups as if they were reads. +- * But only do so when looping for pte_write is futile: in some cases +- * userspace may also be wanting to write to the gotten user page, +- * which a read fault here might prevent (a readonly page might get +- * reCOWed by userspace write). +- */ +- if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE)) +- *flags |= FOLL_COW; + return 0; + } + +diff --git a/mm/huge_memory.c b/mm/huge_memory.c +index 15965084816d3..0f465e70349cb 100644 +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -978,12 +978,6 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, + + assert_spin_locked(pmd_lockptr(mm, pmd)); + +- /* +- * When we COW a devmap PMD entry, we split it into PTEs, so we should +- * not be in this function with `flags & FOLL_COW` set. +- */ +- WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set"); +- + /* FOLL_GET and FOLL_PIN are mutually exclusive. */ + if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == + (FOLL_PIN | FOLL_GET))) +@@ -1349,14 +1343,43 @@ fallback: + return VM_FAULT_FALLBACK; + } + +-/* +- * FOLL_FORCE can write to even unwritable pmd's, but only +- * after we've gone through a COW cycle and they are dirty. +- */ +-static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags) ++/* FOLL_FORCE can write to even unwritable PMDs in COW mappings. */ ++static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page, ++ struct vm_area_struct *vma, ++ unsigned int flags) + { +- return pmd_write(pmd) || +- ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd)); ++ /* If the pmd is writable, we can write to the page. */ ++ if (pmd_write(pmd)) ++ return true; ++ ++ /* Maybe FOLL_FORCE is set to override it? */ ++ if (!(flags & FOLL_FORCE)) ++ return false; ++ ++ /* But FOLL_FORCE has no effect on shared mappings */ ++ if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) ++ return false; ++ ++ /* ... or read-only private ones */ ++ if (!(vma->vm_flags & VM_MAYWRITE)) ++ return false; ++ ++ /* ... or already writable ones that just need to take a write fault */ ++ if (vma->vm_flags & VM_WRITE) ++ return false; ++ ++ /* ++ * See can_change_pte_writable(): we broke COW and could map the page ++ * writable if we have an exclusive anonymous page ... ++ */ ++ if (!page || !PageAnon(page) || !PageAnonExclusive(page)) ++ return false; ++ ++ /* ... and a write-fault isn't required for other reasons. */ ++ if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && ++ !(vma->vm_flags & VM_SOFTDIRTY) && !pmd_soft_dirty(pmd)) ++ return false; ++ return !userfaultfd_huge_pmd_wp(vma, pmd); + } + + struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, +@@ -1365,12 +1388,16 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, + unsigned int flags) + { + struct mm_struct *mm = vma->vm_mm; +- struct page *page = NULL; ++ struct page *page; + + assert_spin_locked(pmd_lockptr(mm, pmd)); + +- if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, flags)) +- goto out; ++ page = pmd_page(*pmd); ++ VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page); ++ ++ if ((flags & FOLL_WRITE) && ++ !can_follow_write_pmd(*pmd, page, vma, flags)) ++ return NULL; + + /* Avoid dumping huge zero page */ + if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) +@@ -1378,10 +1405,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, + + /* Full NUMA hinting faults to serialise migration in fault paths */ + if ((flags & FOLL_NUMA) && pmd_protnone(*pmd)) +- goto out; +- +- page = pmd_page(*pmd); +- VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page); ++ return NULL; + + if (!pmd_write(*pmd) && gup_must_unshare(flags, page)) + return ERR_PTR(-EMLINK); +@@ -1398,7 +1422,6 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, + page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; + VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page); + +-out: + return page; + } + +diff --git a/mm/hugetlb.c b/mm/hugetlb.c +index 474bfbe9929e1..299dcfaa35b25 100644 +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -5232,6 +5232,21 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, + VM_BUG_ON(unshare && (flags & FOLL_WRITE)); + VM_BUG_ON(!unshare && !(flags & FOLL_WRITE)); + ++ /* ++ * hugetlb does not support FOLL_FORCE-style write faults that keep the ++ * PTE mapped R/O such as maybe_mkwrite() would do. ++ */ ++ if (WARN_ON_ONCE(!unshare && !(vma->vm_flags & VM_WRITE))) ++ return VM_FAULT_SIGSEGV; ++ ++ /* Let's take out MAP_SHARED mappings first. */ ++ if (vma->vm_flags & VM_MAYSHARE) { ++ if (unlikely(unshare)) ++ return 0; ++ set_huge_ptep_writable(vma, haddr, ptep); ++ return 0; ++ } ++ + pte = huge_ptep_get(ptep); + old_page = pte_page(pte); + +@@ -5766,12 +5781,11 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, + * If we are going to COW/unshare the mapping later, we examine the + * pending reservations for this page now. This will ensure that any + * allocations necessary to record that reservation occur outside the +- * spinlock. For private mappings, we also lookup the pagecache +- * page now as it is used to determine if a reservation has been +- * consumed. ++ * spinlock. Also lookup the pagecache page now as it is used to ++ * determine if a reservation has been consumed. + */ + if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) && +- !huge_pte_write(entry)) { ++ !(vma->vm_flags & VM_MAYSHARE) && !huge_pte_write(entry)) { + if (vma_needs_reservation(h, vma, haddr) < 0) { + ret = VM_FAULT_OOM; + goto out_mutex; +@@ -5779,9 +5793,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, + /* Just decrements count, does not deallocate */ + vma_end_reservation(h, vma, haddr); + +- if (!(vma->vm_flags & VM_MAYSHARE)) +- pagecache_page = hugetlbfs_pagecache_page(h, +- vma, haddr); ++ pagecache_page = hugetlbfs_pagecache_page(h, vma, haddr); + } + + ptl = huge_pte_lock(h, mm, ptep); +@@ -6014,7 +6026,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, + if (!huge_pte_none_mostly(huge_ptep_get(dst_pte))) + goto out_release_unlock; + +- if (vm_shared) { ++ if (page_in_pagecache) { + page_dup_file_rmap(page, true); + } else { + ClearHPageRestoreReserve(page); +diff --git a/mm/mmap.c b/mm/mmap.c +index 7c59ec73acc34..3b284b091bb7e 100644 +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -1693,8 +1693,12 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot) + pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags))) + return 0; + +- /* Do we need to track softdirty? */ +- if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY)) ++ /* ++ * Do we need to track softdirty? hugetlb does not support softdirty ++ * tracking yet. ++ */ ++ if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY) && ++ !is_vm_hugetlb_page(vma)) + return 1; + + /* Specialty mapping? */ +diff --git a/mm/mprotect.c b/mm/mprotect.c +index ba5592655ee3b..0d38d5b637621 100644 +--- a/mm/mprotect.c ++++ b/mm/mprotect.c +@@ -158,10 +158,11 @@ static unsigned long change_pte_range(struct mmu_gather *tlb, + pages++; + } else if (is_swap_pte(oldpte)) { + swp_entry_t entry = pte_to_swp_entry(oldpte); +- struct page *page = pfn_swap_entry_to_page(entry); + pte_t newpte; + + if (is_writable_migration_entry(entry)) { ++ struct page *page = pfn_swap_entry_to_page(entry); ++ + /* + * A protection check is difficult so + * just be safe and disable write +diff --git a/mm/page-writeback.c b/mm/page-writeback.c +index 55c2776ae6999..3c34db15cf706 100644 +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -2867,6 +2867,7 @@ static void wb_inode_writeback_start(struct bdi_writeback *wb) + + static void wb_inode_writeback_end(struct bdi_writeback *wb) + { ++ unsigned long flags; + atomic_dec(&wb->writeback_inodes); + /* + * Make sure estimate of writeback throughput gets updated after +@@ -2875,7 +2876,10 @@ static void wb_inode_writeback_end(struct bdi_writeback *wb) + * that if multiple inodes end writeback at a similar time, they get + * batched into one bandwidth update. + */ +- queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL); ++ spin_lock_irqsave(&wb->work_lock, flags); ++ if (test_bit(WB_registered, &wb->state)) ++ queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL); ++ spin_unlock_irqrestore(&wb->work_lock, flags); + } + + bool __folio_end_writeback(struct folio *folio) +diff --git a/mm/shmem.c b/mm/shmem.c +index b7f2d4a568673..f152375e770bf 100644 +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -1771,6 +1771,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, + + if (shmem_should_replace_folio(folio, gfp)) { + error = shmem_replace_page(&page, gfp, info, index); ++ folio = page_folio(page); + if (error) + goto failed; + } +diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c +index 07d3befc80e41..7327b2573f7c2 100644 +--- a/mm/userfaultfd.c ++++ b/mm/userfaultfd.c +@@ -703,14 +703,29 @@ ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start, + mmap_changing, 0); + } + ++void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma, ++ unsigned long start, unsigned long len, bool enable_wp) ++{ ++ struct mmu_gather tlb; ++ pgprot_t newprot; ++ ++ if (enable_wp) ++ newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE)); ++ else ++ newprot = vm_get_page_prot(dst_vma->vm_flags); ++ ++ tlb_gather_mmu(&tlb, dst_mm); ++ change_protection(&tlb, dst_vma, start, start + len, newprot, ++ enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE); ++ tlb_finish_mmu(&tlb); ++} ++ + int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, + unsigned long len, bool enable_wp, + atomic_t *mmap_changing) + { + struct vm_area_struct *dst_vma; + unsigned long page_mask; +- struct mmu_gather tlb; +- pgprot_t newprot; + int err; + + /* +@@ -750,15 +765,7 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, + goto out_unlock; + } + +- if (enable_wp) +- newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE)); +- else +- newprot = vm_get_page_prot(dst_vma->vm_flags); +- +- tlb_gather_mmu(&tlb, dst_mm); +- change_protection(&tlb, dst_vma, start, start + len, newprot, +- enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE); +- tlb_finish_mmu(&tlb); ++ uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp); + + err = 0; + out_unlock: +diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c +index 1a11064f99907..8f19253024b0a 100644 +--- a/net/bridge/netfilter/ebtable_broute.c ++++ b/net/bridge/netfilter/ebtable_broute.c +@@ -36,18 +36,10 @@ static struct ebt_replace_kernel initial_table = { + .entries = (char *)&initial_chain, + }; + +-static int check(const struct ebt_table_info *info, unsigned int valid_hooks) +-{ +- if (valid_hooks & ~(1 << NF_BR_BROUTING)) +- return -EINVAL; +- return 0; +-} +- + static const struct ebt_table broute_table = { + .name = "broute", + .table = &initial_table, + .valid_hooks = 1 << NF_BR_BROUTING, +- .check = check, + .me = THIS_MODULE, + }; + +diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c +index cb949436bc0e3..278f324e67524 100644 +--- a/net/bridge/netfilter/ebtable_filter.c ++++ b/net/bridge/netfilter/ebtable_filter.c +@@ -43,18 +43,10 @@ static struct ebt_replace_kernel initial_table = { + .entries = (char *)initial_chains, + }; + +-static int check(const struct ebt_table_info *info, unsigned int valid_hooks) +-{ +- if (valid_hooks & ~FILTER_VALID_HOOKS) +- return -EINVAL; +- return 0; +-} +- + static const struct ebt_table frame_filter = { + .name = "filter", + .table = &initial_table, + .valid_hooks = FILTER_VALID_HOOKS, +- .check = check, + .me = THIS_MODULE, + }; + +diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c +index 5ee0531ae5061..9066f7f376d57 100644 +--- a/net/bridge/netfilter/ebtable_nat.c ++++ b/net/bridge/netfilter/ebtable_nat.c +@@ -43,18 +43,10 @@ static struct ebt_replace_kernel initial_table = { + .entries = (char *)initial_chains, + }; + +-static int check(const struct ebt_table_info *info, unsigned int valid_hooks) +-{ +- if (valid_hooks & ~NAT_VALID_HOOKS) +- return -EINVAL; +- return 0; +-} +- + static const struct ebt_table frame_nat = { + .name = "nat", + .table = &initial_table, + .valid_hooks = NAT_VALID_HOOKS, +- .check = check, + .me = THIS_MODULE, + }; + +diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c +index f2dbefb61ce83..9a0ae59cdc500 100644 +--- a/net/bridge/netfilter/ebtables.c ++++ b/net/bridge/netfilter/ebtables.c +@@ -1040,8 +1040,7 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, + goto free_iterate; + } + +- /* the table doesn't like it */ +- if (t->check && (ret = t->check(newinfo, repl->valid_hooks))) ++ if (repl->valid_hooks != t->valid_hooks) + goto free_unlock; + + if (repl->num_counters && repl->num_counters != t->private->nentries) { +@@ -1231,11 +1230,6 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, + if (ret != 0) + goto free_chainstack; + +- if (table->check && table->check(newinfo, table->valid_hooks)) { +- ret = -EINVAL; +- goto free_chainstack; +- } +- + table->private = newinfo; + rwlock_init(&table->lock); + mutex_lock(&ebt_mutex); +diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c +index 1b7f385643b4c..94374d529ea42 100644 +--- a/net/core/bpf_sk_storage.c ++++ b/net/core/bpf_sk_storage.c +@@ -310,11 +310,12 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) + static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap, + void *owner, u32 size) + { ++ int optmem_max = READ_ONCE(sysctl_optmem_max); + struct sock *sk = (struct sock *)owner; + + /* same check as in sock_kmalloc() */ +- if (size <= sysctl_optmem_max && +- atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { ++ if (size <= optmem_max && ++ atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { + atomic_add(size, &sk->sk_omem_alloc); + return 0; + } +diff --git a/net/core/dev.c b/net/core/dev.c +index 30a1603a7225c..a77a979a4bf75 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -4623,7 +4623,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) + struct softnet_data *sd; + unsigned int old_flow, new_flow; + +- if (qlen < (netdev_max_backlog >> 1)) ++ if (qlen < (READ_ONCE(netdev_max_backlog) >> 1)) + return false; + + sd = this_cpu_ptr(&softnet_data); +@@ -4671,7 +4671,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, + if (!netif_running(skb->dev)) + goto drop; + qlen = skb_queue_len(&sd->input_pkt_queue); +- if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { ++ if (qlen <= READ_ONCE(netdev_max_backlog) && !skb_flow_limit(skb, qlen)) { + if (qlen) { + enqueue: + __skb_queue_tail(&sd->input_pkt_queue, skb); +@@ -4927,7 +4927,7 @@ static int netif_rx_internal(struct sk_buff *skb) + { + int ret; + +- net_timestamp_check(netdev_tstamp_prequeue, skb); ++ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); + + trace_netif_rx(skb); + +@@ -5280,7 +5280,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, + int ret = NET_RX_DROP; + __be16 type; + +- net_timestamp_check(!netdev_tstamp_prequeue, skb); ++ net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb); + + trace_netif_receive_skb(skb); + +@@ -5663,7 +5663,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb) + { + int ret; + +- net_timestamp_check(netdev_tstamp_prequeue, skb); ++ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); + + if (skb_defer_rx_timestamp(skb)) + return NET_RX_SUCCESS; +@@ -5693,7 +5693,7 @@ void netif_receive_skb_list_internal(struct list_head *head) + + INIT_LIST_HEAD(&sublist); + list_for_each_entry_safe(skb, next, head, list) { +- net_timestamp_check(netdev_tstamp_prequeue, skb); ++ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); + skb_list_del_init(skb); + if (!skb_defer_rx_timestamp(skb)) + list_add_tail(&skb->list, &sublist); +@@ -5917,7 +5917,7 @@ static int process_backlog(struct napi_struct *napi, int quota) + net_rps_action_and_irq_enable(sd); + } + +- napi->weight = dev_rx_weight; ++ napi->weight = READ_ONCE(dev_rx_weight); + while (again) { + struct sk_buff *skb; + +@@ -6646,8 +6646,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) + { + struct softnet_data *sd = this_cpu_ptr(&softnet_data); + unsigned long time_limit = jiffies + +- usecs_to_jiffies(netdev_budget_usecs); +- int budget = netdev_budget; ++ usecs_to_jiffies(READ_ONCE(netdev_budget_usecs)); ++ int budget = READ_ONCE(netdev_budget); + LIST_HEAD(list); + LIST_HEAD(repoll); + +@@ -10265,7 +10265,7 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list) + return dev; + + if (time_after(jiffies, warning_time + +- netdev_unregister_timeout_secs * HZ)) { ++ READ_ONCE(netdev_unregister_timeout_secs) * HZ)) { + list_for_each_entry(dev, list, todo_list) { + pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", + dev->name, netdev_refcnt_read(dev)); +diff --git a/net/core/filter.c b/net/core/filter.c +index 74f05ed6aff29..063176428086b 100644 +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -1214,10 +1214,11 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) + static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp) + { + u32 filter_size = bpf_prog_size(fp->prog->len); ++ int optmem_max = READ_ONCE(sysctl_optmem_max); + + /* same check as in sock_kmalloc() */ +- if (filter_size <= sysctl_optmem_max && +- atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) { ++ if (filter_size <= optmem_max && ++ atomic_read(&sk->sk_omem_alloc) + filter_size < optmem_max) { + atomic_add(filter_size, &sk->sk_omem_alloc); + return true; + } +@@ -1548,7 +1549,7 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) + if (IS_ERR(prog)) + return PTR_ERR(prog); + +- if (bpf_prog_size(prog->len) > sysctl_optmem_max) ++ if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) + err = -ENOMEM; + else + err = reuseport_attach_prog(sk, prog); +@@ -1615,7 +1616,7 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) + } + } else { + /* BPF_PROG_TYPE_SOCKET_FILTER */ +- if (bpf_prog_size(prog->len) > sysctl_optmem_max) { ++ if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) { + err = -ENOMEM; + goto err_prog_put; + } +@@ -5036,14 +5037,14 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, + /* Only some socketops are supported */ + switch (optname) { + case SO_RCVBUF: +- val = min_t(u32, val, sysctl_rmem_max); ++ val = min_t(u32, val, READ_ONCE(sysctl_rmem_max)); + val = min_t(int, val, INT_MAX / 2); + sk->sk_userlocks |= SOCK_RCVBUF_LOCK; + WRITE_ONCE(sk->sk_rcvbuf, + max_t(int, val * 2, SOCK_MIN_RCVBUF)); + break; + case SO_SNDBUF: +- val = min_t(u32, val, sysctl_wmem_max); ++ val = min_t(u32, val, READ_ONCE(sysctl_wmem_max)); + val = min_t(int, val, INT_MAX / 2); + sk->sk_userlocks |= SOCK_SNDBUF_LOCK; + WRITE_ONCE(sk->sk_sndbuf, +diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c +index 541c7a72a28a4..21619c70a82b7 100644 +--- a/net/core/gro_cells.c ++++ b/net/core/gro_cells.c +@@ -26,7 +26,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) + + cell = this_cpu_ptr(gcells->cells); + +- if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) { ++ if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(netdev_max_backlog)) { + drop: + dev_core_stats_rx_dropped_inc(dev); + kfree_skb(skb); +diff --git a/net/core/skbuff.c b/net/core/skbuff.c +index 5b3559cb1d827..bebf58464d667 100644 +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -4772,7 +4772,7 @@ static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly) + { + bool ret; + +- if (likely(sysctl_tstamp_allow_data || tsonly)) ++ if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly)) + return true; + + read_lock_bh(&sk->sk_callback_lock); +diff --git a/net/core/sock.c b/net/core/sock.c +index 2ff40dd0a7a65..16ab5ef749c60 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1100,7 +1100,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, + * play 'guess the biggest size' games. RCVBUF/SNDBUF + * are treated in BSD as hints + */ +- val = min_t(u32, val, sysctl_wmem_max); ++ val = min_t(u32, val, READ_ONCE(sysctl_wmem_max)); + set_sndbuf: + /* Ensure val * 2 fits into an int, to prevent max_t() + * from treating it as a negative value. +@@ -1132,7 +1132,7 @@ set_sndbuf: + * play 'guess the biggest size' games. RCVBUF/SNDBUF + * are treated in BSD as hints + */ +- __sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max)); ++ __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max))); + break; + + case SO_RCVBUFFORCE: +@@ -2535,7 +2535,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size, + + /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */ + if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) > +- sysctl_optmem_max) ++ READ_ONCE(sysctl_optmem_max)) + return NULL; + + skb = alloc_skb(size, priority); +@@ -2553,8 +2553,10 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size, + */ + void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) + { +- if ((unsigned int)size <= sysctl_optmem_max && +- atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { ++ int optmem_max = READ_ONCE(sysctl_optmem_max); ++ ++ if ((unsigned int)size <= optmem_max && ++ atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { + void *mem; + /* First do the add, to avoid the race if kmalloc + * might sleep. +@@ -3307,8 +3309,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) + timer_setup(&sk->sk_timer, NULL, 0); + + sk->sk_allocation = GFP_KERNEL; +- sk->sk_rcvbuf = sysctl_rmem_default; +- sk->sk_sndbuf = sysctl_wmem_default; ++ sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default); ++ sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default); + sk->sk_state = TCP_CLOSE; + sk_set_socket(sk, sock); + +@@ -3363,7 +3365,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) + + #ifdef CONFIG_NET_RX_BUSY_POLL + sk->sk_napi_id = 0; +- sk->sk_ll_usec = sysctl_net_busy_read; ++ sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read); + #endif + + sk->sk_max_pacing_rate = ~0UL; +diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c +index 71a13596ea2bf..725891527814c 100644 +--- a/net/core/sysctl_net_core.c ++++ b/net/core/sysctl_net_core.c +@@ -234,14 +234,17 @@ static int set_default_qdisc(struct ctl_table *table, int write, + static int proc_do_dev_weight(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) + { +- int ret; ++ static DEFINE_MUTEX(dev_weight_mutex); ++ int ret, weight; + ++ mutex_lock(&dev_weight_mutex); + ret = proc_dointvec(table, write, buffer, lenp, ppos); +- if (ret != 0) +- return ret; +- +- dev_rx_weight = weight_p * dev_weight_rx_bias; +- dev_tx_weight = weight_p * dev_weight_tx_bias; ++ if (!ret && write) { ++ weight = READ_ONCE(weight_p); ++ WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias); ++ WRITE_ONCE(dev_tx_weight, weight * dev_weight_tx_bias); ++ } ++ mutex_unlock(&dev_weight_mutex); + + return ret; + } +diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c +index b2366ad540e62..787a44e3222db 100644 +--- a/net/ipv4/devinet.c ++++ b/net/ipv4/devinet.c +@@ -2682,23 +2682,27 @@ static __net_init int devinet_init_net(struct net *net) + #endif + + if (!net_eq(net, &init_net)) { +- if (IS_ENABLED(CONFIG_SYSCTL) && +- sysctl_devconf_inherit_init_net == 3) { ++ switch (net_inherit_devconf()) { ++ case 3: + /* copy from the current netns */ + memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all, + sizeof(ipv4_devconf)); + memcpy(dflt, + current->nsproxy->net_ns->ipv4.devconf_dflt, + sizeof(ipv4_devconf_dflt)); +- } else if (!IS_ENABLED(CONFIG_SYSCTL) || +- sysctl_devconf_inherit_init_net != 2) { +- /* inherit == 0 or 1: copy from init_net */ ++ break; ++ case 0: ++ case 1: ++ /* copy from init_net */ + memcpy(all, init_net.ipv4.devconf_all, + sizeof(ipv4_devconf)); + memcpy(dflt, init_net.ipv4.devconf_dflt, + sizeof(ipv4_devconf_dflt)); ++ break; ++ case 2: ++ /* use compiled values */ ++ break; + } +- /* else inherit == 2: use compiled values */ + } + + #ifdef CONFIG_SYSCTL +diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c +index 00b4bf26fd932..da8b3cc67234d 100644 +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -1712,7 +1712,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, + + sk->sk_protocol = ip_hdr(skb)->protocol; + sk->sk_bound_dev_if = arg->bound_dev_if; +- sk->sk_sndbuf = sysctl_wmem_default; ++ sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default); + ipc.sockc.mark = fl4.flowi4_mark; + err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, + len, 0, &ipc, &rt, MSG_DONTWAIT); +diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c +index a8a323ecbb54b..e49a61a053a68 100644 +--- a/net/ipv4/ip_sockglue.c ++++ b/net/ipv4/ip_sockglue.c +@@ -772,7 +772,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) + + if (optlen < GROUP_FILTER_SIZE(0)) + return -EINVAL; +- if (optlen > sysctl_optmem_max) ++ if (optlen > READ_ONCE(sysctl_optmem_max)) + return -ENOBUFS; + + gsf = memdup_sockptr(optval, optlen); +@@ -808,7 +808,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, + + if (optlen < size0) + return -EINVAL; +- if (optlen > sysctl_optmem_max - 4) ++ if (optlen > READ_ONCE(sysctl_optmem_max) - 4) + return -ENOBUFS; + + p = kmalloc(optlen + 4, GFP_KERNEL); +@@ -1233,7 +1233,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname, + + if (optlen < IP_MSFILTER_SIZE(0)) + goto e_inval; +- if (optlen > sysctl_optmem_max) { ++ if (optlen > READ_ONCE(sysctl_optmem_max)) { + err = -ENOBUFS; + break; + } +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 3ae2ea0488838..3d446773ff2a5 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -1000,7 +1000,7 @@ new_segment: + + i = skb_shinfo(skb)->nr_frags; + can_coalesce = skb_can_coalesce(skb, i, page, offset); +- if (!can_coalesce && i >= sysctl_max_skb_frags) { ++ if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) { + tcp_mark_push(tp, skb); + goto new_segment; + } +@@ -1348,7 +1348,7 @@ new_segment: + + if (!skb_can_coalesce(skb, i, pfrag->page, + pfrag->offset)) { +- if (i >= sysctl_max_skb_frags) { ++ if (i >= READ_ONCE(sysctl_max_skb_frags)) { + tcp_mark_push(tp, skb); + goto new_segment; + } +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index aed0c5f828bef..84314de754f87 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -239,7 +239,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, + if (wscale_ok) { + /* Set window scaling on max possible window */ + space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); +- space = max_t(u32, space, sysctl_rmem_max); ++ space = max_t(u32, space, READ_ONCE(sysctl_rmem_max)); + space = min_t(u32, space, *window_clamp); + *rcv_wscale = clamp_t(int, ilog2(space) - 15, + 0, TCP_MAX_WSCALE); +diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c +index 49cc6587dd771..b738eb7e1cae8 100644 +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -7158,9 +7158,8 @@ static int __net_init addrconf_init_net(struct net *net) + if (!dflt) + goto err_alloc_dflt; + +- if (IS_ENABLED(CONFIG_SYSCTL) && +- !net_eq(net, &init_net)) { +- switch (sysctl_devconf_inherit_init_net) { ++ if (!net_eq(net, &init_net)) { ++ switch (net_inherit_devconf()) { + case 1: /* copy from init_net */ + memcpy(all, init_net.ipv6.devconf_all, + sizeof(ipv6_devconf)); +diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c +index 222f6bf220ba0..e0dcc7a193df2 100644 +--- a/net/ipv6/ipv6_sockglue.c ++++ b/net/ipv6/ipv6_sockglue.c +@@ -210,7 +210,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, + + if (optlen < GROUP_FILTER_SIZE(0)) + return -EINVAL; +- if (optlen > sysctl_optmem_max) ++ if (optlen > READ_ONCE(sysctl_optmem_max)) + return -ENOBUFS; + + gsf = memdup_sockptr(optval, optlen); +@@ -244,7 +244,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, + + if (optlen < size0) + return -EINVAL; +- if (optlen > sysctl_optmem_max - 4) ++ if (optlen > READ_ONCE(sysctl_optmem_max) - 4) + return -ENOBUFS; + + p = kmalloc(optlen + 4, GFP_KERNEL); +diff --git a/net/key/af_key.c b/net/key/af_key.c +index fb16d7c4e1b8d..20e73643b9c89 100644 +--- a/net/key/af_key.c ++++ b/net/key/af_key.c +@@ -1697,9 +1697,12 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad + pfk->registered |= (1<<hdr->sadb_msg_satype); + } + ++ mutex_lock(&pfkey_mutex); + xfrm_probe_algs(); + + supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO); ++ mutex_unlock(&pfkey_mutex); ++ + if (!supp_skb) { + if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC) + pfk->registered &= ~(1<<hdr->sadb_msg_satype); +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c +index 3d90fa9653ef3..513f571a082ba 100644 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -1299,7 +1299,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, + + i = skb_shinfo(skb)->nr_frags; + can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset); +- if (!can_coalesce && i >= sysctl_max_skb_frags) { ++ if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) { + tcp_mark_push(tcp_sk(ssk), skb); + goto alloc_skb; + } +diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c +index 9d43277b8b4fe..a56fd0b5a430a 100644 +--- a/net/netfilter/ipvs/ip_vs_sync.c ++++ b/net/netfilter/ipvs/ip_vs_sync.c +@@ -1280,12 +1280,12 @@ static void set_sock_size(struct sock *sk, int mode, int val) + lock_sock(sk); + if (mode) { + val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2, +- sysctl_wmem_max); ++ READ_ONCE(sysctl_wmem_max)); + sk->sk_sndbuf = val * 2; + sk->sk_userlocks |= SOCK_SNDBUF_LOCK; + } else { + val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2, +- sysctl_rmem_max); ++ READ_ONCE(sysctl_rmem_max)); + sk->sk_rcvbuf = val * 2; + sk->sk_userlocks |= SOCK_RCVBUF_LOCK; + } +diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c +index f2def06d10709..483b18d35cade 100644 +--- a/net/netfilter/nf_flow_table_core.c ++++ b/net/netfilter/nf_flow_table_core.c +@@ -442,12 +442,17 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, + } + } + ++void nf_flow_table_gc_run(struct nf_flowtable *flow_table) ++{ ++ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); ++} ++ + static void nf_flow_offload_work_gc(struct work_struct *work) + { + struct nf_flowtable *flow_table; + + flow_table = container_of(work, struct nf_flowtable, gc_work.work); +- nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); ++ nf_flow_table_gc_run(flow_table); + queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); + } + +@@ -605,11 +610,11 @@ void nf_flow_table_free(struct nf_flowtable *flow_table) + mutex_unlock(&flowtable_lock); + + cancel_delayed_work_sync(&flow_table->gc_work); +- nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); +- nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); + nf_flow_table_offload_flush(flow_table); +- if (nf_flowtable_hw_offload(flow_table)) +- nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); ++ /* ... no more pending work after this stage ... */ ++ nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); ++ nf_flow_table_gc_run(flow_table); ++ nf_flow_table_offload_flush_cleanup(flow_table); + rhashtable_destroy(&flow_table->rhashtable); + } + EXPORT_SYMBOL_GPL(nf_flow_table_free); +diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c +index 11b6e19420920..4d1169b634c5f 100644 +--- a/net/netfilter/nf_flow_table_offload.c ++++ b/net/netfilter/nf_flow_table_offload.c +@@ -1063,6 +1063,14 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable, + flow_offload_queue_work(offload); + } + ++void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable) ++{ ++ if (nf_flowtable_hw_offload(flowtable)) { ++ flush_workqueue(nf_flow_offload_del_wq); ++ nf_flow_table_gc_run(flowtable); ++ } ++} ++ + void nf_flow_table_offload_flush(struct nf_flowtable *flowtable) + { + if (nf_flowtable_hw_offload(flowtable)) { +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 4bd6e9427c918..bc690238a3c56 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -32,7 +32,6 @@ static LIST_HEAD(nf_tables_objects); + static LIST_HEAD(nf_tables_flowtables); + static LIST_HEAD(nf_tables_destroy_list); + static DEFINE_SPINLOCK(nf_tables_destroy_list_lock); +-static u64 table_handle; + + enum { + NFT_VALIDATE_SKIP = 0, +@@ -1235,7 +1234,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info, + INIT_LIST_HEAD(&table->flowtables); + table->family = family; + table->flags = flags; +- table->handle = ++table_handle; ++ table->handle = ++nft_net->table_handle; + if (table->flags & NFT_TABLE_F_OWNER) + table->nlpid = NETLINK_CB(skb).portid; + +@@ -2196,9 +2195,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, + struct netlink_ext_ack *extack) + { + const struct nlattr * const *nla = ctx->nla; ++ struct nft_stats __percpu *stats = NULL; + struct nft_table *table = ctx->table; + struct nft_base_chain *basechain; +- struct nft_stats __percpu *stats; + struct net *net = ctx->net; + char name[NFT_NAME_MAXLEN]; + struct nft_rule_blob *blob; +@@ -2236,7 +2235,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, + return PTR_ERR(stats); + } + rcu_assign_pointer(basechain->stats, stats); +- static_branch_inc(&nft_counters_enabled); + } + + err = nft_basechain_init(basechain, family, &hook, flags); +@@ -2319,6 +2317,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, + goto err_unregister_hook; + } + ++ if (stats) ++ static_branch_inc(&nft_counters_enabled); ++ + table->use++; + + return 0; +@@ -2574,6 +2575,9 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info, + nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla); + + if (chain != NULL) { ++ if (chain->flags & NFT_CHAIN_BINDING) ++ return -EINVAL; ++ + if (info->nlh->nlmsg_flags & NLM_F_EXCL) { + NL_SET_BAD_ATTR(extack, attr); + return -EEXIST; +@@ -9653,6 +9657,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, + return PTR_ERR(chain); + if (nft_is_base_chain(chain)) + return -EOPNOTSUPP; ++ if (nft_chain_is_bound(chain)) ++ return -EINVAL; + if (desc->flags & NFT_DATA_DESC_SETELEM && + chain->flags & NFT_CHAIN_BINDING) + return -EINVAL; +diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c +index 5eed18f90b020..175d666c8d87e 100644 +--- a/net/netfilter/nft_osf.c ++++ b/net/netfilter/nft_osf.c +@@ -115,9 +115,21 @@ static int nft_osf_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) + { +- return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) | +- (1 << NF_INET_PRE_ROUTING) | +- (1 << NF_INET_FORWARD)); ++ unsigned int hooks; ++ ++ switch (ctx->family) { ++ case NFPROTO_IPV4: ++ case NFPROTO_IPV6: ++ case NFPROTO_INET: ++ hooks = (1 << NF_INET_LOCAL_IN) | ++ (1 << NF_INET_PRE_ROUTING) | ++ (1 << NF_INET_FORWARD); ++ break; ++ default: ++ return -EOPNOTSUPP; ++ } ++ ++ return nft_chain_validate_hooks(ctx->chain, hooks); + } + + static bool nft_osf_reduce(struct nft_regs_track *track, +diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c +index 2e7ac007cb30f..eb0e40c297121 100644 +--- a/net/netfilter/nft_payload.c ++++ b/net/netfilter/nft_payload.c +@@ -740,17 +740,23 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) + { + struct nft_payload_set *priv = nft_expr_priv(expr); ++ u32 csum_offset, csum_type = NFT_PAYLOAD_CSUM_NONE; ++ int err; + + priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); + priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); + priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + + if (tb[NFTA_PAYLOAD_CSUM_TYPE]) +- priv->csum_type = +- ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE])); +- if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) +- priv->csum_offset = +- ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET])); ++ csum_type = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE])); ++ if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) { ++ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_CSUM_OFFSET], U8_MAX, ++ &csum_offset); ++ if (err < 0) ++ return err; ++ ++ priv->csum_offset = csum_offset; ++ } + if (tb[NFTA_PAYLOAD_CSUM_FLAGS]) { + u32 flags; + +@@ -761,7 +767,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, + priv->csum_flags = flags; + } + +- switch (priv->csum_type) { ++ switch (csum_type) { + case NFT_PAYLOAD_CSUM_NONE: + case NFT_PAYLOAD_CSUM_INET: + break; +@@ -775,6 +781,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, + default: + return -EOPNOTSUPP; + } ++ priv->csum_type = csum_type; + + return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg, + priv->len); +@@ -833,6 +840,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx, + { + enum nft_payload_bases base; + unsigned int offset, len; ++ int err; + + if (tb[NFTA_PAYLOAD_BASE] == NULL || + tb[NFTA_PAYLOAD_OFFSET] == NULL || +@@ -859,8 +867,13 @@ nft_payload_select_ops(const struct nft_ctx *ctx, + if (tb[NFTA_PAYLOAD_DREG] == NULL) + return ERR_PTR(-EINVAL); + +- offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); +- len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); ++ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U8_MAX, &offset); ++ if (err < 0) ++ return ERR_PTR(err); ++ ++ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_LEN], U8_MAX, &len); ++ if (err < 0) ++ return ERR_PTR(err); + + if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) && + base != NFT_PAYLOAD_LL_HEADER && base != NFT_PAYLOAD_INNER_HEADER) +diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c +index 801f013971dfa..a701ad64f10af 100644 +--- a/net/netfilter/nft_tproxy.c ++++ b/net/netfilter/nft_tproxy.c +@@ -312,6 +312,13 @@ static int nft_tproxy_dump(struct sk_buff *skb, + return 0; + } + ++static int nft_tproxy_validate(const struct nft_ctx *ctx, ++ const struct nft_expr *expr, ++ const struct nft_data **data) ++{ ++ return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING); ++} ++ + static struct nft_expr_type nft_tproxy_type; + static const struct nft_expr_ops nft_tproxy_ops = { + .type = &nft_tproxy_type, +@@ -321,6 +328,7 @@ static const struct nft_expr_ops nft_tproxy_ops = { + .destroy = nft_tproxy_destroy, + .dump = nft_tproxy_dump, + .reduce = NFT_REDUCE_READONLY, ++ .validate = nft_tproxy_validate, + }; + + static struct nft_expr_type nft_tproxy_type __read_mostly = { +diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c +index d0f9b1d51b0e9..96b03e0bf74ff 100644 +--- a/net/netfilter/nft_tunnel.c ++++ b/net/netfilter/nft_tunnel.c +@@ -161,6 +161,7 @@ static const struct nft_expr_ops nft_tunnel_get_ops = { + + static struct nft_expr_type nft_tunnel_type __read_mostly = { + .name = "tunnel", ++ .family = NFPROTO_NETDEV, + .ops = &nft_tunnel_get_ops, + .policy = nft_tunnel_policy, + .maxattr = NFTA_TUNNEL_MAX, +diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c +index 11c45c8c6c164..036d92c0ad794 100644 +--- a/net/rose/rose_loopback.c ++++ b/net/rose/rose_loopback.c +@@ -96,7 +96,8 @@ static void rose_loopback_timer(struct timer_list *unused) + } + + if (frametype == ROSE_CALL_REQUEST) { +- if (!rose_loopback_neigh->dev) { ++ if (!rose_loopback_neigh->dev && ++ !rose_loopback_neigh->loopback) { + kfree_skb(skb); + continue; + } +diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c +index 84d0a41096450..6401cdf7a6246 100644 +--- a/net/rxrpc/call_object.c ++++ b/net/rxrpc/call_object.c +@@ -285,8 +285,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, + _enter("%p,%lx", rx, p->user_call_ID); + + limiter = rxrpc_get_call_slot(p, gfp); +- if (!limiter) ++ if (!limiter) { ++ release_sock(&rx->sk); + return ERR_PTR(-ERESTARTSYS); ++ } + + call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id); + if (IS_ERR(call)) { +diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c +index 1d38e279e2efa..3c3a626459deb 100644 +--- a/net/rxrpc/sendmsg.c ++++ b/net/rxrpc/sendmsg.c +@@ -51,10 +51,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx, + return sock_intr_errno(*timeo); + + trace_rxrpc_transmit(call, rxrpc_transmit_wait); +- mutex_unlock(&call->user_mutex); + *timeo = schedule_timeout(*timeo); +- if (mutex_lock_interruptible(&call->user_mutex) < 0) +- return sock_intr_errno(*timeo); + } + } + +@@ -290,37 +287,48 @@ out: + static int rxrpc_send_data(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct msghdr *msg, size_t len, +- rxrpc_notify_end_tx_t notify_end_tx) ++ rxrpc_notify_end_tx_t notify_end_tx, ++ bool *_dropped_lock) + { + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + struct sock *sk = &rx->sk; ++ enum rxrpc_call_state state; + long timeo; +- bool more; +- int ret, copied; ++ bool more = msg->msg_flags & MSG_MORE; ++ int ret, copied = 0; + + timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + + /* this should be in poll */ + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); + ++reload: ++ ret = -EPIPE; + if (sk->sk_shutdown & SEND_SHUTDOWN) +- return -EPIPE; +- +- more = msg->msg_flags & MSG_MORE; +- ++ goto maybe_error; ++ state = READ_ONCE(call->state); ++ ret = -ESHUTDOWN; ++ if (state >= RXRPC_CALL_COMPLETE) ++ goto maybe_error; ++ ret = -EPROTO; ++ if (state != RXRPC_CALL_CLIENT_SEND_REQUEST && ++ state != RXRPC_CALL_SERVER_ACK_REQUEST && ++ state != RXRPC_CALL_SERVER_SEND_REPLY) ++ goto maybe_error; ++ ++ ret = -EMSGSIZE; + if (call->tx_total_len != -1) { +- if (len > call->tx_total_len) +- return -EMSGSIZE; +- if (!more && len != call->tx_total_len) +- return -EMSGSIZE; ++ if (len - copied > call->tx_total_len) ++ goto maybe_error; ++ if (!more && len - copied != call->tx_total_len) ++ goto maybe_error; + } + + skb = call->tx_pending; + call->tx_pending = NULL; + rxrpc_see_skb(skb, rxrpc_skb_seen); + +- copied = 0; + do { + /* Check to see if there's a ping ACK to reply to. */ + if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) +@@ -331,16 +339,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, + + _debug("alloc"); + +- if (!rxrpc_check_tx_space(call, NULL)) { +- ret = -EAGAIN; +- if (msg->msg_flags & MSG_DONTWAIT) +- goto maybe_error; +- ret = rxrpc_wait_for_tx_window(rx, call, +- &timeo, +- msg->msg_flags & MSG_WAITALL); +- if (ret < 0) +- goto maybe_error; +- } ++ if (!rxrpc_check_tx_space(call, NULL)) ++ goto wait_for_space; + + /* Work out the maximum size of a packet. Assume that + * the security header is going to be in the padded +@@ -468,6 +468,27 @@ maybe_error: + efault: + ret = -EFAULT; + goto out; ++ ++wait_for_space: ++ ret = -EAGAIN; ++ if (msg->msg_flags & MSG_DONTWAIT) ++ goto maybe_error; ++ mutex_unlock(&call->user_mutex); ++ *_dropped_lock = true; ++ ret = rxrpc_wait_for_tx_window(rx, call, &timeo, ++ msg->msg_flags & MSG_WAITALL); ++ if (ret < 0) ++ goto maybe_error; ++ if (call->interruptibility == RXRPC_INTERRUPTIBLE) { ++ if (mutex_lock_interruptible(&call->user_mutex) < 0) { ++ ret = sock_intr_errno(timeo); ++ goto maybe_error; ++ } ++ } else { ++ mutex_lock(&call->user_mutex); ++ } ++ *_dropped_lock = false; ++ goto reload; + } + + /* +@@ -629,6 +650,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) + enum rxrpc_call_state state; + struct rxrpc_call *call; + unsigned long now, j; ++ bool dropped_lock = false; + int ret; + + struct rxrpc_send_params p = { +@@ -737,21 +759,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) + ret = rxrpc_send_abort_packet(call); + } else if (p.command != RXRPC_CMD_SEND_DATA) { + ret = -EINVAL; +- } else if (rxrpc_is_client_call(call) && +- state != RXRPC_CALL_CLIENT_SEND_REQUEST) { +- /* request phase complete for this client call */ +- ret = -EPROTO; +- } else if (rxrpc_is_service_call(call) && +- state != RXRPC_CALL_SERVER_ACK_REQUEST && +- state != RXRPC_CALL_SERVER_SEND_REPLY) { +- /* Reply phase not begun or not complete for service call. */ +- ret = -EPROTO; + } else { +- ret = rxrpc_send_data(rx, call, msg, len, NULL); ++ ret = rxrpc_send_data(rx, call, msg, len, NULL, &dropped_lock); + } + + out_put_unlock: +- mutex_unlock(&call->user_mutex); ++ if (!dropped_lock) ++ mutex_unlock(&call->user_mutex); + error_put: + rxrpc_put_call(call, rxrpc_call_put); + _leave(" = %d", ret); +@@ -779,6 +793,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, + struct msghdr *msg, size_t len, + rxrpc_notify_end_tx_t notify_end_tx) + { ++ bool dropped_lock = false; + int ret; + + _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]); +@@ -796,7 +811,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, + case RXRPC_CALL_SERVER_ACK_REQUEST: + case RXRPC_CALL_SERVER_SEND_REPLY: + ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len, +- notify_end_tx); ++ notify_end_tx, &dropped_lock); + break; + case RXRPC_CALL_COMPLETE: + read_lock_bh(&call->state_lock); +@@ -810,7 +825,8 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, + break; + } + +- mutex_unlock(&call->user_mutex); ++ if (!dropped_lock) ++ mutex_unlock(&call->user_mutex); + _leave(" = %d", ret); + return ret; + } +diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c +index dba0b3e24af5e..a64c3c1541118 100644 +--- a/net/sched/sch_generic.c ++++ b/net/sched/sch_generic.c +@@ -409,7 +409,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets) + + void __qdisc_run(struct Qdisc *q) + { +- int quota = dev_tx_weight; ++ int quota = READ_ONCE(dev_tx_weight); + int packets; + + while (qdisc_restart(q, &packets)) { +diff --git a/net/socket.c b/net/socket.c +index 96300cdc06251..34102aa4ab0a6 100644 +--- a/net/socket.c ++++ b/net/socket.c +@@ -1801,7 +1801,7 @@ int __sys_listen(int fd, int backlog) + + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (sock) { +- somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn; ++ somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn); + if ((unsigned int)backlog > somaxconn) + backlog = somaxconn; + +diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c +index 733f9f2260926..c1a01947530f0 100644 +--- a/net/sunrpc/clnt.c ++++ b/net/sunrpc/clnt.c +@@ -1888,7 +1888,7 @@ call_encode(struct rpc_task *task) + break; + case -EKEYEXPIRED: + if (!task->tk_cred_retry) { +- rpc_exit(task, task->tk_status); ++ rpc_call_rpcerror(task, task->tk_status); + } else { + task->tk_action = call_refresh; + task->tk_cred_retry--; +diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c +index 82d14eea1b5ad..974eb97b77d22 100644 +--- a/net/xfrm/espintcp.c ++++ b/net/xfrm/espintcp.c +@@ -168,7 +168,7 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb) + { + struct espintcp_ctx *ctx = espintcp_getctx(sk); + +- if (skb_queue_len(&ctx->out_queue) >= netdev_max_backlog) ++ if (skb_queue_len(&ctx->out_queue) >= READ_ONCE(netdev_max_backlog)) + return -ENOBUFS; + + __skb_queue_tail(&ctx->out_queue, skb); +diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c +index 144238a50f3d4..b2f4ec9c537f0 100644 +--- a/net/xfrm/xfrm_input.c ++++ b/net/xfrm/xfrm_input.c +@@ -669,7 +669,6 @@ resume: + + x->curlft.bytes += skb->len; + x->curlft.packets++; +- x->curlft.use_time = ktime_get_real_seconds(); + + spin_unlock(&x->lock); + +@@ -783,7 +782,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb, + + trans = this_cpu_ptr(&xfrm_trans_tasklet); + +- if (skb_queue_len(&trans->queue) >= netdev_max_backlog) ++ if (skb_queue_len(&trans->queue) >= READ_ONCE(netdev_max_backlog)) + return -ENOBUFS; + + BUILD_BUG_ON(sizeof(struct xfrm_trans_cb) > sizeof(skb->cb)); +diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c +index 555ab35cd119a..9a5e79a38c679 100644 +--- a/net/xfrm/xfrm_output.c ++++ b/net/xfrm/xfrm_output.c +@@ -534,7 +534,6 @@ static int xfrm_output_one(struct sk_buff *skb, int err) + + x->curlft.bytes += skb->len; + x->curlft.packets++; +- x->curlft.use_time = ktime_get_real_seconds(); + + spin_unlock_bh(&x->lock); + +diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c +index f1a0bab920a55..cc6ab79609e29 100644 +--- a/net/xfrm/xfrm_policy.c ++++ b/net/xfrm/xfrm_policy.c +@@ -3162,7 +3162,7 @@ ok: + return dst; + + nopol: +- if (!(dst_orig->dev->flags & IFF_LOOPBACK) && ++ if ((!dst_orig->dev || !(dst_orig->dev->flags & IFF_LOOPBACK)) && + net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) { + err = -EPERM; + goto error; +@@ -3599,6 +3599,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, + if (pols[1]) { + if (IS_ERR(pols[1])) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); ++ xfrm_pol_put(pols[0]); + return 0; + } + pols[1]->curlft.use_time = ktime_get_real_seconds(); +diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c +index ccfb172eb5b8d..11d89af9cb55a 100644 +--- a/net/xfrm/xfrm_state.c ++++ b/net/xfrm/xfrm_state.c +@@ -1592,6 +1592,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, + x->replay = orig->replay; + x->preplay = orig->preplay; + x->mapping_maxage = orig->mapping_maxage; ++ x->lastused = orig->lastused; + x->new_mapping = 0; + x->new_mapping_sport = 0; + +diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config +index 73e0762092feb..02a6000a82bbf 100644 +--- a/tools/perf/Makefile.config ++++ b/tools/perf/Makefile.config +@@ -265,7 +265,7 @@ endif + # defined. get-executable-or-default fails with an error if the first argument is supplied but + # doesn't exist. + override PYTHON_CONFIG := $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON_AUTO)) +-override PYTHON := $(call get-executable-or-default,PYTHON,$(subst -config,,$(PYTHON_AUTO))) ++override PYTHON := $(call get-executable-or-default,PYTHON,$(subst -config,,$(PYTHON_CONFIG))) + + grep-libs = $(filter -l%,$(1)) + strip-libs = $(filter-out -l%,$(1)) +diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c +index 86f838c5661ee..5f0333a8acd8a 100644 +--- a/tools/perf/builtin-stat.c ++++ b/tools/perf/builtin-stat.c +@@ -826,6 +826,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) + } + + evlist__for_each_entry(evsel_list, counter) { ++ counter->reset_group = false; + if (bpf_counter__load(counter, &target)) + return -1; + if (!evsel__is_bpf(counter)) |