diff options
author | Mike Pagano <mpagano@gentoo.org> | 2018-12-05 15:15:52 -0500 |
---|---|---|
committer | Mike Pagano <mpagano@gentoo.org> | 2018-12-05 15:15:52 -0500 |
commit | 5a7c485603ea79c3f41cc5db9f8206335fc9b819 (patch) | |
tree | e6c807e5cca7aea7aad27ec14b902ead01b7e86d | |
parent | proj/linux-patches: Linux patch 4.19.6 (diff) | |
download | linux-patches-4.19-8.tar.gz linux-patches-4.19-8.tar.bz2 linux-patches-4.19-8.zip |
proj/linux-patches: Linuxpatch 4.19.74.19-8
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
-rw-r--r-- | 0000_README | 4 | ||||
-rw-r--r-- | 1006_linux-4.19.7.patch | 5500 |
2 files changed, 5504 insertions, 0 deletions
diff --git a/0000_README b/0000_README index c4c0a77d..e91fad41 100644 --- a/0000_README +++ b/0000_README @@ -67,6 +67,10 @@ Patch: 1005_linux-4.19.6.patch From: http://www.kernel.org Desc: Linux 4.19.6 +Patch: 1006_linux-4.19.7.patch +From: http://www.kernel.org +Desc: Linux 4.19.7 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. diff --git a/1006_linux-4.19.7.patch b/1006_linux-4.19.7.patch new file mode 100644 index 00000000..0840b0c7 --- /dev/null +++ b/1006_linux-4.19.7.patch @@ -0,0 +1,5500 @@ +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index fa4eec22816d..0c404cda531a 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -4169,9 +4169,13 @@ + + spectre_v2= [X86] Control mitigation of Spectre variant 2 + (indirect branch speculation) vulnerability. ++ The default operation protects the kernel from ++ user space attacks. + +- on - unconditionally enable +- off - unconditionally disable ++ on - unconditionally enable, implies ++ spectre_v2_user=on ++ off - unconditionally disable, implies ++ spectre_v2_user=off + auto - kernel detects whether your CPU model is + vulnerable + +@@ -4181,6 +4185,12 @@ + CONFIG_RETPOLINE configuration option, and the + compiler with which the kernel was built. + ++ Selecting 'on' will also enable the mitigation ++ against user space to user space task attacks. ++ ++ Selecting 'off' will disable both the kernel and ++ the user space protections. ++ + Specific mitigations can also be selected manually: + + retpoline - replace indirect branches +@@ -4190,6 +4200,48 @@ + Not specifying this option is equivalent to + spectre_v2=auto. + ++ spectre_v2_user= ++ [X86] Control mitigation of Spectre variant 2 ++ (indirect branch speculation) vulnerability between ++ user space tasks ++ ++ on - Unconditionally enable mitigations. Is ++ enforced by spectre_v2=on ++ ++ off - Unconditionally disable mitigations. Is ++ enforced by spectre_v2=off ++ ++ prctl - Indirect branch speculation is enabled, ++ but mitigation can be enabled via prctl ++ per thread. The mitigation control state ++ is inherited on fork. ++ ++ prctl,ibpb ++ - Like "prctl" above, but only STIBP is ++ controlled per thread. IBPB is issued ++ always when switching between different user ++ space processes. ++ ++ seccomp ++ - Same as "prctl" above, but all seccomp ++ threads will enable the mitigation unless ++ they explicitly opt out. ++ ++ seccomp,ibpb ++ - Like "seccomp" above, but only STIBP is ++ controlled per thread. IBPB is issued ++ always when switching between different ++ user space processes. ++ ++ auto - Kernel selects the mitigation depending on ++ the available CPU features and vulnerability. ++ ++ Default mitigation: ++ If CONFIG_SECCOMP=y then "seccomp", otherwise "prctl" ++ ++ Not specifying this option is equivalent to ++ spectre_v2_user=auto. ++ + spec_store_bypass_disable= + [HW] Control Speculative Store Bypass (SSB) Disable mitigation + (Speculative Store Bypass vulnerability) +diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst +index 32f3d55c54b7..c4dbe6f7cdae 100644 +--- a/Documentation/userspace-api/spec_ctrl.rst ++++ b/Documentation/userspace-api/spec_ctrl.rst +@@ -92,3 +92,12 @@ Speculation misfeature controls + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0); ++ ++- PR_SPEC_INDIR_BRANCH: Indirect Branch Speculation in User Processes ++ (Mitigate Spectre V2 style attacks against user processes) ++ ++ Invocations: ++ * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0); ++ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0); ++ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0); ++ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0); +diff --git a/Makefile b/Makefile +index 20cbb8e84650..d2b4efcfb388 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + # SPDX-License-Identifier: GPL-2.0 + VERSION = 4 + PATCHLEVEL = 19 +-SUBLEVEL = 6 ++SUBLEVEL = 7 + EXTRAVERSION = + NAME = "People's Front" + +diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi +index 2075120cfc4d..d8bf939a3aff 100644 +--- a/arch/arm/boot/dts/rk3288-veyron.dtsi ++++ b/arch/arm/boot/dts/rk3288-veyron.dtsi +@@ -10,7 +10,11 @@ + #include "rk3288.dtsi" + + / { +- memory@0 { ++ /* ++ * The default coreboot on veyron devices ignores memory@0 nodes ++ * and would instead create another memory node. ++ */ ++ memory { + device_type = "memory"; + reg = <0x0 0x0 0x0 0x80000000>; + }; +diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c +index 5617932a83df..ee673c09aa6c 100644 +--- a/arch/arm/kernel/ftrace.c ++++ b/arch/arm/kernel/ftrace.c +@@ -227,9 +227,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) + { + unsigned long return_hooker = (unsigned long) &return_to_handler; +- struct ftrace_graph_ent trace; + unsigned long old; +- int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; +@@ -237,21 +235,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + old = *parent; + *parent = return_hooker; + +- trace.func = self_addr; +- trace.depth = current->curr_ret_stack + 1; +- +- /* Only trace if the calling function expects to */ +- if (!ftrace_graph_entry(&trace)) { ++ if (function_graph_enter(old, self_addr, frame_pointer, NULL)) + *parent = old; +- return; +- } +- +- err = ftrace_push_return_trace(old, self_addr, &trace.depth, +- frame_pointer, NULL); +- if (err == -EBUSY) { +- *parent = old; +- return; +- } + } + + #ifdef CONFIG_DYNAMIC_FTRACE +diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +index e0d64f862322..8ce4a79d9360 100644 +--- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts ++++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +@@ -153,7 +153,7 @@ + }; + + &pcie0 { +- ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_LOW>; ++ ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_HIGH>; + num-lanes = <4>; + pinctrl-names = "default"; + pinctrl-0 = <&pcie_clkreqn_cpm>; +diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c +index 50986e388d2b..57e962290df3 100644 +--- a/arch/arm64/kernel/ftrace.c ++++ b/arch/arm64/kernel/ftrace.c +@@ -216,8 +216,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + { + unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long old; +- struct ftrace_graph_ent trace; +- int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; +@@ -229,18 +227,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + */ + old = *parent; + +- trace.func = self_addr; +- trace.depth = current->curr_ret_stack + 1; +- +- /* Only trace if the calling function expects to */ +- if (!ftrace_graph_entry(&trace)) +- return; +- +- err = ftrace_push_return_trace(old, self_addr, &trace.depth, +- frame_pointer, NULL); +- if (err == -EBUSY) +- return; +- else ++ if (!function_graph_enter(old, self_addr, frame_pointer, NULL)) + *parent = return_hooker; + } + +diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c +index d57563c58a26..224eea40e1ee 100644 +--- a/arch/microblaze/kernel/ftrace.c ++++ b/arch/microblaze/kernel/ftrace.c +@@ -22,8 +22,7 @@ + void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) + { + unsigned long old; +- int faulted, err; +- struct ftrace_graph_ent trace; ++ int faulted; + unsigned long return_hooker = (unsigned long) + &return_to_handler; + +@@ -63,18 +62,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) + return; + } + +- err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL); +- if (err == -EBUSY) { ++ if (function_graph_enter(old, self_addr, 0, NULL)) + *parent = old; +- return; +- } +- +- trace.func = self_addr; +- /* Only trace if the calling function expects to */ +- if (!ftrace_graph_entry(&trace)) { +- current->curr_ret_stack--; +- *parent = old; +- } + } + #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c +index 7f3dfdbc3657..b122cbb4aad1 100644 +--- a/arch/mips/kernel/ftrace.c ++++ b/arch/mips/kernel/ftrace.c +@@ -322,7 +322,6 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, + unsigned long fp) + { + unsigned long old_parent_ra; +- struct ftrace_graph_ent trace; + unsigned long return_hooker = (unsigned long) + &return_to_handler; + int faulted, insns; +@@ -369,12 +368,6 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, + if (unlikely(faulted)) + goto out; + +- if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp, +- NULL) == -EBUSY) { +- *parent_ra_addr = old_parent_ra; +- return; +- } +- + /* + * Get the recorded ip of the current mcount calling site in the + * __mcount_loc section, which will be used to filter the function +@@ -382,13 +375,10 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, + */ + + insns = core_kernel_text(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1; +- trace.func = self_ra - (MCOUNT_INSN_SIZE * insns); ++ self_ra -= (MCOUNT_INSN_SIZE * insns); + +- /* Only trace if the calling function expects to */ +- if (!ftrace_graph_entry(&trace)) { +- current->curr_ret_stack--; ++ if (function_graph_enter(old_parent_ra, self_ra, fp, NULL)) + *parent_ra_addr = old_parent_ra; +- } + return; + out: + ftrace_graph_stop(); +diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c +index a0a9679ad5de..8a41372551ff 100644 +--- a/arch/nds32/kernel/ftrace.c ++++ b/arch/nds32/kernel/ftrace.c +@@ -211,29 +211,15 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) + { + unsigned long return_hooker = (unsigned long)&return_to_handler; +- struct ftrace_graph_ent trace; + unsigned long old; +- int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + old = *parent; + +- trace.func = self_addr; +- trace.depth = current->curr_ret_stack + 1; +- +- /* Only trace if the calling function expects to */ +- if (!ftrace_graph_entry(&trace)) +- return; +- +- err = ftrace_push_return_trace(old, self_addr, &trace.depth, +- frame_pointer, NULL); +- +- if (err == -EBUSY) +- return; +- +- *parent = return_hooker; ++ if (!function_graph_enter(old, self_addr, frame_pointer, NULL)) ++ *parent = return_hooker; + } + + noinline void ftrace_graph_caller(void) +diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c +index 6fa8535d3cce..e46a4157a894 100644 +--- a/arch/parisc/kernel/ftrace.c ++++ b/arch/parisc/kernel/ftrace.c +@@ -30,7 +30,6 @@ static void __hot prepare_ftrace_return(unsigned long *parent, + unsigned long self_addr) + { + unsigned long old; +- struct ftrace_graph_ent trace; + extern int parisc_return_to_handler; + + if (unlikely(ftrace_graph_is_dead())) +@@ -41,19 +40,9 @@ static void __hot prepare_ftrace_return(unsigned long *parent, + + old = *parent; + +- trace.func = self_addr; +- trace.depth = current->curr_ret_stack + 1; +- +- /* Only trace if the calling function expects to */ +- if (!ftrace_graph_entry(&trace)) +- return; +- +- if (ftrace_push_return_trace(old, self_addr, &trace.depth, +- 0, NULL) == -EBUSY) +- return; +- +- /* activate parisc_return_to_handler() as return point */ +- *parent = (unsigned long) &parisc_return_to_handler; ++ if (!function_graph_enter(old, self_addr, 0, NULL)) ++ /* activate parisc_return_to_handler() as return point */ ++ *parent = (unsigned long) &parisc_return_to_handler; + } + #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c +index 4bfbb54dee51..19ef4f5866b6 100644 +--- a/arch/powerpc/kernel/trace/ftrace.c ++++ b/arch/powerpc/kernel/trace/ftrace.c +@@ -697,7 +697,6 @@ int ftrace_disable_ftrace_graph_caller(void) + */ + unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) + { +- struct ftrace_graph_ent trace; + unsigned long return_hooker; + + if (unlikely(ftrace_graph_is_dead())) +@@ -708,18 +707,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) + + return_hooker = ppc_function_entry(return_to_handler); + +- trace.func = ip; +- trace.depth = current->curr_ret_stack + 1; +- +- /* Only trace if the calling function expects to */ +- if (!ftrace_graph_entry(&trace)) +- goto out; +- +- if (ftrace_push_return_trace(parent, ip, &trace.depth, 0, +- NULL) == -EBUSY) +- goto out; +- +- parent = return_hooker; ++ if (!function_graph_enter(parent, ip, 0, NULL)) ++ parent = return_hooker; + out: + return parent; + } +diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c +index 1157b6b52d25..c433f6d3dd64 100644 +--- a/arch/riscv/kernel/ftrace.c ++++ b/arch/riscv/kernel/ftrace.c +@@ -132,7 +132,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + { + unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long old; +- struct ftrace_graph_ent trace; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) +@@ -144,17 +143,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + */ + old = *parent; + +- trace.func = self_addr; +- trace.depth = current->curr_ret_stack + 1; +- +- if (!ftrace_graph_entry(&trace)) +- return; +- +- err = ftrace_push_return_trace(old, self_addr, &trace.depth, +- frame_pointer, parent); +- if (err == -EBUSY) +- return; +- *parent = return_hooker; ++ if (function_graph_enter(old, self_addr, frame_pointer, parent)) ++ *parent = return_hooker; + } + + #ifdef CONFIG_DYNAMIC_FTRACE +diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c +index 84be7f02d0c2..39b13d71a8fe 100644 +--- a/arch/s390/kernel/ftrace.c ++++ b/arch/s390/kernel/ftrace.c +@@ -203,22 +203,13 @@ device_initcall(ftrace_plt_init); + */ + unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) + { +- struct ftrace_graph_ent trace; +- + if (unlikely(ftrace_graph_is_dead())) + goto out; + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + goto out; + ip -= MCOUNT_INSN_SIZE; +- trace.func = ip; +- trace.depth = current->curr_ret_stack + 1; +- /* Only trace if the calling function expects to. */ +- if (!ftrace_graph_entry(&trace)) +- goto out; +- if (ftrace_push_return_trace(parent, ip, &trace.depth, 0, +- NULL) == -EBUSY) +- goto out; +- parent = (unsigned long) return_to_handler; ++ if (!function_graph_enter(parent, ip, 0, NULL)) ++ parent = (unsigned long) return_to_handler; + out: + return parent; + } +diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c +index 96dd9f7da250..1b04270e5460 100644 +--- a/arch/sh/kernel/ftrace.c ++++ b/arch/sh/kernel/ftrace.c +@@ -321,8 +321,7 @@ int ftrace_disable_ftrace_graph_caller(void) + void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) + { + unsigned long old; +- int faulted, err; +- struct ftrace_graph_ent trace; ++ int faulted; + unsigned long return_hooker = (unsigned long)&return_to_handler; + + if (unlikely(ftrace_graph_is_dead())) +@@ -365,18 +364,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) + return; + } + +- err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL); +- if (err == -EBUSY) { ++ if (function_graph_enter(old, self_addr, 0, NULL)) + __raw_writel(old, parent); +- return; +- } +- +- trace.func = self_addr; +- +- /* Only trace if the calling function expects to */ +- if (!ftrace_graph_entry(&trace)) { +- current->curr_ret_stack--; +- __raw_writel(old, parent); +- } + } + #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c +index 915dda4ae412..684b84ce397f 100644 +--- a/arch/sparc/kernel/ftrace.c ++++ b/arch/sparc/kernel/ftrace.c +@@ -126,20 +126,11 @@ unsigned long prepare_ftrace_return(unsigned long parent, + unsigned long frame_pointer) + { + unsigned long return_hooker = (unsigned long) &return_to_handler; +- struct ftrace_graph_ent trace; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return parent + 8UL; + +- trace.func = self_addr; +- trace.depth = current->curr_ret_stack + 1; +- +- /* Only trace if the calling function expects to */ +- if (!ftrace_graph_entry(&trace)) +- return parent + 8UL; +- +- if (ftrace_push_return_trace(parent, self_addr, &trace.depth, +- frame_pointer, NULL) == -EBUSY) ++ if (function_graph_enter(parent, self_addr, frame_pointer, NULL)) + return parent + 8UL; + + return return_hooker; +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 1a0be022f91d..44c6a82b7ce5 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -441,10 +441,6 @@ config RETPOLINE + branches. Requires a compiler with -mindirect-branch=thunk-extern + support for full protection. The kernel may run slower. + +- Without compiler support, at least indirect branches in assembler +- code are eliminated. Since this includes the syscall entry path, +- it is not entirely pointless. +- + config INTEL_RDT + bool "Intel Resource Director Technology support" + default n +@@ -1005,13 +1001,7 @@ config NR_CPUS + to the kernel image. + + config SCHED_SMT +- bool "SMT (Hyperthreading) scheduler support" +- depends on SMP +- ---help--- +- SMT scheduler support improves the CPU scheduler's decision making +- when dealing with Intel Pentium 4 chips with HyperThreading at a +- cost of slightly increased overhead in some places. If unsure say +- N here. ++ def_bool y if SMP + + config SCHED_MC + def_bool y +diff --git a/arch/x86/Makefile b/arch/x86/Makefile +index 8f6e7eb8ae9f..9298f0f3817a 100644 +--- a/arch/x86/Makefile ++++ b/arch/x86/Makefile +@@ -223,9 +223,10 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables + + # Avoid indirect branches in kernel to deal with Spectre + ifdef CONFIG_RETPOLINE +-ifneq ($(RETPOLINE_CFLAGS),) +- KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE ++ifeq ($(RETPOLINE_CFLAGS),) ++ $(error You are building kernel with non-retpoline compiler, please update your compiler.) + endif ++ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) + endif + + archscripts: scripts_basic +diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c +index dfb2f7c0d019..c8d08da5b308 100644 +--- a/arch/x86/events/core.c ++++ b/arch/x86/events/core.c +@@ -438,26 +438,6 @@ int x86_setup_perfctr(struct perf_event *event) + if (config == -1LL) + return -EINVAL; + +- /* +- * Branch tracing: +- */ +- if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && +- !attr->freq && hwc->sample_period == 1) { +- /* BTS is not supported by this architecture. */ +- if (!x86_pmu.bts_active) +- return -EOPNOTSUPP; +- +- /* BTS is currently only allowed for user-mode. */ +- if (!attr->exclude_kernel) +- return -EOPNOTSUPP; +- +- /* disallow bts if conflicting events are present */ +- if (x86_add_exclusive(x86_lbr_exclusive_lbr)) +- return -EBUSY; +- +- event->destroy = hw_perf_lbr_event_destroy; +- } +- + hwc->config |= config; + + return 0; +diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c +index 035c37481f57..155fa4b53c56 100644 +--- a/arch/x86/events/intel/core.c ++++ b/arch/x86/events/intel/core.c +@@ -2358,16 +2358,7 @@ done: + static struct event_constraint * + intel_bts_constraints(struct perf_event *event) + { +- struct hw_perf_event *hwc = &event->hw; +- unsigned int hw_event, bts_event; +- +- if (event->attr.freq) +- return NULL; +- +- hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; +- bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); +- +- if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) ++ if (unlikely(intel_pmu_has_bts(event))) + return &bts_constraint; + + return NULL; +@@ -2986,10 +2977,51 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event) + return flags; + } + ++static int intel_pmu_bts_config(struct perf_event *event) ++{ ++ struct perf_event_attr *attr = &event->attr; ++ ++ if (unlikely(intel_pmu_has_bts(event))) { ++ /* BTS is not supported by this architecture. */ ++ if (!x86_pmu.bts_active) ++ return -EOPNOTSUPP; ++ ++ /* BTS is currently only allowed for user-mode. */ ++ if (!attr->exclude_kernel) ++ return -EOPNOTSUPP; ++ ++ /* BTS is not allowed for precise events. */ ++ if (attr->precise_ip) ++ return -EOPNOTSUPP; ++ ++ /* disallow bts if conflicting events are present */ ++ if (x86_add_exclusive(x86_lbr_exclusive_lbr)) ++ return -EBUSY; ++ ++ event->destroy = hw_perf_lbr_event_destroy; ++ } ++ ++ return 0; ++} ++ ++static int core_pmu_hw_config(struct perf_event *event) ++{ ++ int ret = x86_pmu_hw_config(event); ++ ++ if (ret) ++ return ret; ++ ++ return intel_pmu_bts_config(event); ++} ++ + static int intel_pmu_hw_config(struct perf_event *event) + { + int ret = x86_pmu_hw_config(event); + ++ if (ret) ++ return ret; ++ ++ ret = intel_pmu_bts_config(event); + if (ret) + return ret; + +@@ -3015,7 +3047,7 @@ static int intel_pmu_hw_config(struct perf_event *event) + /* + * BTS is set up earlier in this path, so don't account twice + */ +- if (!intel_pmu_has_bts(event)) { ++ if (!unlikely(intel_pmu_has_bts(event))) { + /* disallow lbr if conflicting events are present */ + if (x86_add_exclusive(x86_lbr_exclusive_lbr)) + return -EBUSY; +@@ -3478,7 +3510,7 @@ static __initconst const struct x86_pmu core_pmu = { + .enable_all = core_pmu_enable_all, + .enable = core_pmu_enable_event, + .disable = x86_pmu_disable_event, +- .hw_config = x86_pmu_hw_config, ++ .hw_config = core_pmu_hw_config, + .schedule_events = x86_schedule_events, + .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, + .perfctr = MSR_ARCH_PERFMON_PERFCTR0, +diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h +index 156286335351..c5ad9cc61f4b 100644 +--- a/arch/x86/events/perf_event.h ++++ b/arch/x86/events/perf_event.h +@@ -857,11 +857,16 @@ static inline int amd_pmu_init(void) + + static inline bool intel_pmu_has_bts(struct perf_event *event) + { +- if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && +- !event->attr.freq && event->hw.sample_period == 1) +- return true; ++ struct hw_perf_event *hwc = &event->hw; ++ unsigned int hw_event, bts_event; ++ ++ if (event->attr.freq) ++ return false; ++ ++ hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; ++ bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); + +- return false; ++ return hw_event == bts_event && hwc->sample_period == 1; + } + + int intel_pmu_save_and_restart(struct perf_event *event); +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index 1c09a0d1771f..022845ee0c88 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -1046,7 +1046,8 @@ struct kvm_x86_ops { + bool (*has_wbinvd_exit)(void); + + u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu); +- void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); ++ /* Returns actual tsc_offset set in active VMCS */ ++ u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); + + void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); + +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index 4731f0cf97c5..b3486c8b570a 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -41,9 +41,10 @@ + + #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ + #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ +-#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ ++#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ ++#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ + #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ +-#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ ++#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ + + #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ + #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index fd2a8c1b88bc..032b6009baab 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -3,6 +3,8 @@ + #ifndef _ASM_X86_NOSPEC_BRANCH_H_ + #define _ASM_X86_NOSPEC_BRANCH_H_ + ++#include <linux/static_key.h> ++ + #include <asm/alternative.h> + #include <asm/alternative-asm.h> + #include <asm/cpufeatures.h> +@@ -162,29 +164,35 @@ + _ASM_PTR " 999b\n\t" \ + ".popsection\n\t" + +-#if defined(CONFIG_X86_64) && defined(RETPOLINE) ++#ifdef CONFIG_RETPOLINE ++#ifdef CONFIG_X86_64 + + /* +- * Since the inline asm uses the %V modifier which is only in newer GCC, +- * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE. ++ * Inline asm uses the %V modifier which is only in newer GCC ++ * which is ensured when CONFIG_RETPOLINE is defined. + */ + # define CALL_NOSPEC \ + ANNOTATE_NOSPEC_ALTERNATIVE \ +- ALTERNATIVE( \ ++ ALTERNATIVE_2( \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%[thunk_target]\n", \ + "call __x86_indirect_thunk_%V[thunk_target]\n", \ +- X86_FEATURE_RETPOLINE) ++ X86_FEATURE_RETPOLINE, \ ++ "lfence;\n" \ ++ ANNOTATE_RETPOLINE_SAFE \ ++ "call *%[thunk_target]\n", \ ++ X86_FEATURE_RETPOLINE_AMD) + # define THUNK_TARGET(addr) [thunk_target] "r" (addr) + +-#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) ++#else /* CONFIG_X86_32 */ + /* + * For i386 we use the original ret-equivalent retpoline, because + * otherwise we'll run out of registers. We don't care about CET + * here, anyway. + */ + # define CALL_NOSPEC \ +- ALTERNATIVE( \ ++ ANNOTATE_NOSPEC_ALTERNATIVE \ ++ ALTERNATIVE_2( \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%[thunk_target]\n", \ + " jmp 904f;\n" \ +@@ -199,9 +207,14 @@ + " ret;\n" \ + " .align 16\n" \ + "904: call 901b;\n", \ +- X86_FEATURE_RETPOLINE) ++ X86_FEATURE_RETPOLINE, \ ++ "lfence;\n" \ ++ ANNOTATE_RETPOLINE_SAFE \ ++ "call *%[thunk_target]\n", \ ++ X86_FEATURE_RETPOLINE_AMD) + + # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) ++#endif + #else /* No retpoline for C / inline asm */ + # define CALL_NOSPEC "call *%[thunk_target]\n" + # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +@@ -210,13 +223,19 @@ + /* The Spectre V2 mitigation variants */ + enum spectre_v2_mitigation { + SPECTRE_V2_NONE, +- SPECTRE_V2_RETPOLINE_MINIMAL, +- SPECTRE_V2_RETPOLINE_MINIMAL_AMD, + SPECTRE_V2_RETPOLINE_GENERIC, + SPECTRE_V2_RETPOLINE_AMD, + SPECTRE_V2_IBRS_ENHANCED, + }; + ++/* The indirect branch speculation control variants */ ++enum spectre_v2_user_mitigation { ++ SPECTRE_V2_USER_NONE, ++ SPECTRE_V2_USER_STRICT, ++ SPECTRE_V2_USER_PRCTL, ++ SPECTRE_V2_USER_SECCOMP, ++}; ++ + /* The Speculative Store Bypass disable variants */ + enum ssb_mitigation { + SPEC_STORE_BYPASS_NONE, +@@ -294,6 +313,10 @@ do { \ + preempt_enable(); \ + } while (0) + ++DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); ++DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); ++DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); ++ + #endif /* __ASSEMBLY__ */ + + /* +diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h +index ae7c2c5cd7f0..5393babc0598 100644 +--- a/arch/x86/include/asm/spec-ctrl.h ++++ b/arch/x86/include/asm/spec-ctrl.h +@@ -53,12 +53,24 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) + return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); + } + ++static inline u64 stibp_tif_to_spec_ctrl(u64 tifn) ++{ ++ BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); ++ return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); ++} ++ + static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) + { + BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); + return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); + } + ++static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl) ++{ ++ BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); ++ return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); ++} ++ + static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) + { + return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; +@@ -70,11 +82,7 @@ extern void speculative_store_bypass_ht_init(void); + static inline void speculative_store_bypass_ht_init(void) { } + #endif + +-extern void speculative_store_bypass_update(unsigned long tif); +- +-static inline void speculative_store_bypass_update_current(void) +-{ +- speculative_store_bypass_update(current_thread_info()->flags); +-} ++extern void speculation_ctrl_update(unsigned long tif); ++extern void speculation_ctrl_update_current(void); + + #endif +diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h +index 36bd243843d6..7cf1a270d891 100644 +--- a/arch/x86/include/asm/switch_to.h ++++ b/arch/x86/include/asm/switch_to.h +@@ -11,9 +11,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev, + + __visible struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *next); +-struct tss_struct; +-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, +- struct tss_struct *tss); + + /* This runs runs on the previous thread's stack. */ + static inline void prepare_switch_to(struct task_struct *next) +diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h +index 2ff2a30a264f..82b73b75d67c 100644 +--- a/arch/x86/include/asm/thread_info.h ++++ b/arch/x86/include/asm/thread_info.h +@@ -79,10 +79,12 @@ struct thread_info { + #define TIF_SIGPENDING 2 /* signal pending */ + #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ + #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ +-#define TIF_SSBD 5 /* Reduced data speculation */ ++#define TIF_SSBD 5 /* Speculative store bypass disable */ + #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ + #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ + #define TIF_SECCOMP 8 /* secure computing */ ++#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ ++#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */ + #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ + #define TIF_UPROBE 12 /* breakpointed or singlestepping */ + #define TIF_PATCH_PENDING 13 /* pending live patching update */ +@@ -110,6 +112,8 @@ struct thread_info { + #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) + #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) + #define _TIF_SECCOMP (1 << TIF_SECCOMP) ++#define _TIF_SPEC_IB (1 << TIF_SPEC_IB) ++#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) + #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) + #define _TIF_UPROBE (1 << TIF_UPROBE) + #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) +@@ -145,8 +149,18 @@ struct thread_info { + _TIF_FSCHECK) + + /* flags to check in __switch_to() */ +-#define _TIF_WORK_CTXSW \ +- (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD) ++#define _TIF_WORK_CTXSW_BASE \ ++ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \ ++ _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE) ++ ++/* ++ * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated. ++ */ ++#ifdef CONFIG_SMP ++# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE | _TIF_SPEC_IB) ++#else ++# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE) ++#endif + + #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) + #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) +diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h +index 0e2130d8d6b1..79ec7add5f98 100644 +--- a/arch/x86/include/asm/tlbflush.h ++++ b/arch/x86/include/asm/tlbflush.h +@@ -185,10 +185,14 @@ struct tlb_state { + + #define LOADED_MM_SWITCHING ((struct mm_struct *)1) + ++ /* Last user mm for optimizing IBPB */ ++ union { ++ struct mm_struct *last_user_mm; ++ unsigned long last_user_mm_ibpb; ++ }; ++ + u16 loaded_mm_asid; + u16 next_asid; +- /* last user mm's ctx id */ +- u64 last_ctx_id; + + /* + * We can be in one of several states: +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 40bdaea97fe7..78928f56cf72 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -14,6 +14,7 @@ + #include <linux/module.h> + #include <linux/nospec.h> + #include <linux/prctl.h> ++#include <linux/sched/smt.h> + + #include <asm/spec-ctrl.h> + #include <asm/cmdline.h> +@@ -35,12 +36,10 @@ static void __init spectre_v2_select_mitigation(void); + static void __init ssb_select_mitigation(void); + static void __init l1tf_select_mitigation(void); + +-/* +- * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any +- * writes to SPEC_CTRL contain whatever reserved bits have been set. +- */ +-u64 __ro_after_init x86_spec_ctrl_base; ++/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ ++u64 x86_spec_ctrl_base; + EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); ++static DEFINE_MUTEX(spec_ctrl_mutex); + + /* + * The vendor and possibly platform specific bits which can be modified in +@@ -55,6 +54,13 @@ static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; + u64 __ro_after_init x86_amd_ls_cfg_base; + u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; + ++/* Control conditional STIPB in switch_to() */ ++DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp); ++/* Control conditional IBPB in switch_mm() */ ++DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); ++/* Control unconditional IBPB in switch_mm() */ ++DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); ++ + void __init check_bugs(void) + { + identify_boot_cpu(); +@@ -125,31 +131,6 @@ void __init check_bugs(void) + #endif + } + +-/* The kernel command line selection */ +-enum spectre_v2_mitigation_cmd { +- SPECTRE_V2_CMD_NONE, +- SPECTRE_V2_CMD_AUTO, +- SPECTRE_V2_CMD_FORCE, +- SPECTRE_V2_CMD_RETPOLINE, +- SPECTRE_V2_CMD_RETPOLINE_GENERIC, +- SPECTRE_V2_CMD_RETPOLINE_AMD, +-}; +- +-static const char *spectre_v2_strings[] = { +- [SPECTRE_V2_NONE] = "Vulnerable", +- [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline", +- [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", +- [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", +- [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", +- [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", +-}; +- +-#undef pr_fmt +-#define pr_fmt(fmt) "Spectre V2 : " fmt +- +-static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = +- SPECTRE_V2_NONE; +- + void + x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + { +@@ -171,6 +152,10 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + static_cpu_has(X86_FEATURE_AMD_SSBD)) + hostval |= ssbd_tif_to_spec_ctrl(ti->flags); + ++ /* Conditional STIBP enabled? */ ++ if (static_branch_unlikely(&switch_to_cond_stibp)) ++ hostval |= stibp_tif_to_spec_ctrl(ti->flags); ++ + if (hostval != guestval) { + msrval = setguest ? guestval : hostval; + wrmsrl(MSR_IA32_SPEC_CTRL, msrval); +@@ -204,7 +189,7 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) : + ssbd_spec_ctrl_to_tif(hostval); + +- speculative_store_bypass_update(tif); ++ speculation_ctrl_update(tif); + } + } + EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); +@@ -219,6 +204,15 @@ static void x86_amd_ssb_disable(void) + wrmsrl(MSR_AMD64_LS_CFG, msrval); + } + ++#undef pr_fmt ++#define pr_fmt(fmt) "Spectre V2 : " fmt ++ ++static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = ++ SPECTRE_V2_NONE; ++ ++static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init = ++ SPECTRE_V2_USER_NONE; ++ + #ifdef RETPOLINE + static bool spectre_v2_bad_module; + +@@ -240,67 +234,217 @@ static inline const char *spectre_v2_module_string(void) + static inline const char *spectre_v2_module_string(void) { return ""; } + #endif + +-static void __init spec2_print_if_insecure(const char *reason) ++static inline bool match_option(const char *arg, int arglen, const char *opt) + { +- if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) +- pr_info("%s selected on command line.\n", reason); ++ int len = strlen(opt); ++ ++ return len == arglen && !strncmp(arg, opt, len); + } + +-static void __init spec2_print_if_secure(const char *reason) ++/* The kernel command line selection for spectre v2 */ ++enum spectre_v2_mitigation_cmd { ++ SPECTRE_V2_CMD_NONE, ++ SPECTRE_V2_CMD_AUTO, ++ SPECTRE_V2_CMD_FORCE, ++ SPECTRE_V2_CMD_RETPOLINE, ++ SPECTRE_V2_CMD_RETPOLINE_GENERIC, ++ SPECTRE_V2_CMD_RETPOLINE_AMD, ++}; ++ ++enum spectre_v2_user_cmd { ++ SPECTRE_V2_USER_CMD_NONE, ++ SPECTRE_V2_USER_CMD_AUTO, ++ SPECTRE_V2_USER_CMD_FORCE, ++ SPECTRE_V2_USER_CMD_PRCTL, ++ SPECTRE_V2_USER_CMD_PRCTL_IBPB, ++ SPECTRE_V2_USER_CMD_SECCOMP, ++ SPECTRE_V2_USER_CMD_SECCOMP_IBPB, ++}; ++ ++static const char * const spectre_v2_user_strings[] = { ++ [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", ++ [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", ++ [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl", ++ [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl", ++}; ++ ++static const struct { ++ const char *option; ++ enum spectre_v2_user_cmd cmd; ++ bool secure; ++} v2_user_options[] __initdata = { ++ { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, ++ { "off", SPECTRE_V2_USER_CMD_NONE, false }, ++ { "on", SPECTRE_V2_USER_CMD_FORCE, true }, ++ { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, ++ { "prctl,ibpb", SPECTRE_V2_USER_CMD_PRCTL_IBPB, false }, ++ { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false }, ++ { "seccomp,ibpb", SPECTRE_V2_USER_CMD_SECCOMP_IBPB, false }, ++}; ++ ++static void __init spec_v2_user_print_cond(const char *reason, bool secure) + { +- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) +- pr_info("%s selected on command line.\n", reason); ++ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure) ++ pr_info("spectre_v2_user=%s forced on command line.\n", reason); + } + +-static inline bool retp_compiler(void) ++static enum spectre_v2_user_cmd __init ++spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) + { +- return __is_defined(RETPOLINE); ++ char arg[20]; ++ int ret, i; ++ ++ switch (v2_cmd) { ++ case SPECTRE_V2_CMD_NONE: ++ return SPECTRE_V2_USER_CMD_NONE; ++ case SPECTRE_V2_CMD_FORCE: ++ return SPECTRE_V2_USER_CMD_FORCE; ++ default: ++ break; ++ } ++ ++ ret = cmdline_find_option(boot_command_line, "spectre_v2_user", ++ arg, sizeof(arg)); ++ if (ret < 0) ++ return SPECTRE_V2_USER_CMD_AUTO; ++ ++ for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) { ++ if (match_option(arg, ret, v2_user_options[i].option)) { ++ spec_v2_user_print_cond(v2_user_options[i].option, ++ v2_user_options[i].secure); ++ return v2_user_options[i].cmd; ++ } ++ } ++ ++ pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg); ++ return SPECTRE_V2_USER_CMD_AUTO; + } + +-static inline bool match_option(const char *arg, int arglen, const char *opt) ++static void __init ++spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) + { +- int len = strlen(opt); ++ enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; ++ bool smt_possible = IS_ENABLED(CONFIG_SMP); ++ enum spectre_v2_user_cmd cmd; + +- return len == arglen && !strncmp(arg, opt, len); ++ if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP)) ++ return; ++ ++ if (cpu_smt_control == CPU_SMT_FORCE_DISABLED || ++ cpu_smt_control == CPU_SMT_NOT_SUPPORTED) ++ smt_possible = false; ++ ++ cmd = spectre_v2_parse_user_cmdline(v2_cmd); ++ switch (cmd) { ++ case SPECTRE_V2_USER_CMD_NONE: ++ goto set_mode; ++ case SPECTRE_V2_USER_CMD_FORCE: ++ mode = SPECTRE_V2_USER_STRICT; ++ break; ++ case SPECTRE_V2_USER_CMD_PRCTL: ++ case SPECTRE_V2_USER_CMD_PRCTL_IBPB: ++ mode = SPECTRE_V2_USER_PRCTL; ++ break; ++ case SPECTRE_V2_USER_CMD_AUTO: ++ case SPECTRE_V2_USER_CMD_SECCOMP: ++ case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: ++ if (IS_ENABLED(CONFIG_SECCOMP)) ++ mode = SPECTRE_V2_USER_SECCOMP; ++ else ++ mode = SPECTRE_V2_USER_PRCTL; ++ break; ++ } ++ ++ /* Initialize Indirect Branch Prediction Barrier */ ++ if (boot_cpu_has(X86_FEATURE_IBPB)) { ++ setup_force_cpu_cap(X86_FEATURE_USE_IBPB); ++ ++ switch (cmd) { ++ case SPECTRE_V2_USER_CMD_FORCE: ++ case SPECTRE_V2_USER_CMD_PRCTL_IBPB: ++ case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: ++ static_branch_enable(&switch_mm_always_ibpb); ++ break; ++ case SPECTRE_V2_USER_CMD_PRCTL: ++ case SPECTRE_V2_USER_CMD_AUTO: ++ case SPECTRE_V2_USER_CMD_SECCOMP: ++ static_branch_enable(&switch_mm_cond_ibpb); ++ break; ++ default: ++ break; ++ } ++ ++ pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", ++ static_key_enabled(&switch_mm_always_ibpb) ? ++ "always-on" : "conditional"); ++ } ++ ++ /* If enhanced IBRS is enabled no STIPB required */ ++ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) ++ return; ++ ++ /* ++ * If SMT is not possible or STIBP is not available clear the STIPB ++ * mode. ++ */ ++ if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP)) ++ mode = SPECTRE_V2_USER_NONE; ++set_mode: ++ spectre_v2_user = mode; ++ /* Only print the STIBP mode when SMT possible */ ++ if (smt_possible) ++ pr_info("%s\n", spectre_v2_user_strings[mode]); + } + ++static const char * const spectre_v2_strings[] = { ++ [SPECTRE_V2_NONE] = "Vulnerable", ++ [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", ++ [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", ++ [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", ++}; ++ + static const struct { + const char *option; + enum spectre_v2_mitigation_cmd cmd; + bool secure; +-} mitigation_options[] = { +- { "off", SPECTRE_V2_CMD_NONE, false }, +- { "on", SPECTRE_V2_CMD_FORCE, true }, +- { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, +- { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, +- { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, +- { "auto", SPECTRE_V2_CMD_AUTO, false }, ++} mitigation_options[] __initdata = { ++ { "off", SPECTRE_V2_CMD_NONE, false }, ++ { "on", SPECTRE_V2_CMD_FORCE, true }, ++ { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, ++ { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, ++ { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, ++ { "auto", SPECTRE_V2_CMD_AUTO, false }, + }; + ++static void __init spec_v2_print_cond(const char *reason, bool secure) ++{ ++ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure) ++ pr_info("%s selected on command line.\n", reason); ++} ++ + static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) + { ++ enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; + char arg[20]; + int ret, i; +- enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; + + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + return SPECTRE_V2_CMD_NONE; +- else { +- ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); +- if (ret < 0) +- return SPECTRE_V2_CMD_AUTO; + +- for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { +- if (!match_option(arg, ret, mitigation_options[i].option)) +- continue; +- cmd = mitigation_options[i].cmd; +- break; +- } ++ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); ++ if (ret < 0) ++ return SPECTRE_V2_CMD_AUTO; + +- if (i >= ARRAY_SIZE(mitigation_options)) { +- pr_err("unknown option (%s). Switching to AUTO select\n", arg); +- return SPECTRE_V2_CMD_AUTO; +- } ++ for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { ++ if (!match_option(arg, ret, mitigation_options[i].option)) ++ continue; ++ cmd = mitigation_options[i].cmd; ++ break; ++ } ++ ++ if (i >= ARRAY_SIZE(mitigation_options)) { ++ pr_err("unknown option (%s). Switching to AUTO select\n", arg); ++ return SPECTRE_V2_CMD_AUTO; + } + + if ((cmd == SPECTRE_V2_CMD_RETPOLINE || +@@ -317,11 +461,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) + return SPECTRE_V2_CMD_AUTO; + } + +- if (mitigation_options[i].secure) +- spec2_print_if_secure(mitigation_options[i].option); +- else +- spec2_print_if_insecure(mitigation_options[i].option); +- ++ spec_v2_print_cond(mitigation_options[i].option, ++ mitigation_options[i].secure); + return cmd; + } + +@@ -377,14 +518,12 @@ retpoline_auto: + pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); + goto retpoline_generic; + } +- mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : +- SPECTRE_V2_RETPOLINE_MINIMAL_AMD; ++ mode = SPECTRE_V2_RETPOLINE_AMD; + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); + setup_force_cpu_cap(X86_FEATURE_RETPOLINE); + } else { + retpoline_generic: +- mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC : +- SPECTRE_V2_RETPOLINE_MINIMAL; ++ mode = SPECTRE_V2_RETPOLINE_GENERIC; + setup_force_cpu_cap(X86_FEATURE_RETPOLINE); + } + +@@ -403,12 +542,6 @@ specv2_set_mode: + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); + pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); + +- /* Initialize Indirect Branch Prediction Barrier if supported */ +- if (boot_cpu_has(X86_FEATURE_IBPB)) { +- setup_force_cpu_cap(X86_FEATURE_USE_IBPB); +- pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); +- } +- + /* + * Retpoline means the kernel is safe because it has no indirect + * branches. Enhanced IBRS protects firmware too, so, enable restricted +@@ -424,6 +557,66 @@ specv2_set_mode: + setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); + pr_info("Enabling Restricted Speculation for firmware calls\n"); + } ++ ++ /* Set up IBPB and STIBP depending on the general spectre V2 command */ ++ spectre_v2_user_select_mitigation(cmd); ++ ++ /* Enable STIBP if appropriate */ ++ arch_smt_update(); ++} ++ ++static void update_stibp_msr(void * __unused) ++{ ++ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++} ++ ++/* Update x86_spec_ctrl_base in case SMT state changed. */ ++static void update_stibp_strict(void) ++{ ++ u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; ++ ++ if (sched_smt_active()) ++ mask |= SPEC_CTRL_STIBP; ++ ++ if (mask == x86_spec_ctrl_base) ++ return; ++ ++ pr_info("Update user space SMT mitigation: STIBP %s\n", ++ mask & SPEC_CTRL_STIBP ? "always-on" : "off"); ++ x86_spec_ctrl_base = mask; ++ on_each_cpu(update_stibp_msr, NULL, 1); ++} ++ ++/* Update the static key controlling the evaluation of TIF_SPEC_IB */ ++static void update_indir_branch_cond(void) ++{ ++ if (sched_smt_active()) ++ static_branch_enable(&switch_to_cond_stibp); ++ else ++ static_branch_disable(&switch_to_cond_stibp); ++} ++ ++void arch_smt_update(void) ++{ ++ /* Enhanced IBRS implies STIBP. No update required. */ ++ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) ++ return; ++ ++ mutex_lock(&spec_ctrl_mutex); ++ ++ switch (spectre_v2_user) { ++ case SPECTRE_V2_USER_NONE: ++ break; ++ case SPECTRE_V2_USER_STRICT: ++ update_stibp_strict(); ++ break; ++ case SPECTRE_V2_USER_PRCTL: ++ case SPECTRE_V2_USER_SECCOMP: ++ update_indir_branch_cond(); ++ break; ++ } ++ ++ mutex_unlock(&spec_ctrl_mutex); + } + + #undef pr_fmt +@@ -440,7 +633,7 @@ enum ssb_mitigation_cmd { + SPEC_STORE_BYPASS_CMD_SECCOMP, + }; + +-static const char *ssb_strings[] = { ++static const char * const ssb_strings[] = { + [SPEC_STORE_BYPASS_NONE] = "Vulnerable", + [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", + [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl", +@@ -450,7 +643,7 @@ static const char *ssb_strings[] = { + static const struct { + const char *option; + enum ssb_mitigation_cmd cmd; +-} ssb_mitigation_options[] = { ++} ssb_mitigation_options[] __initdata = { + { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ + { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ + { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ +@@ -561,10 +754,25 @@ static void ssb_select_mitigation(void) + #undef pr_fmt + #define pr_fmt(fmt) "Speculation prctl: " fmt + +-static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) ++static void task_update_spec_tif(struct task_struct *tsk) + { +- bool update; ++ /* Force the update of the real TIF bits */ ++ set_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE); + ++ /* ++ * Immediately update the speculation control MSRs for the current ++ * task, but for a non-current task delay setting the CPU ++ * mitigation until it is scheduled next. ++ * ++ * This can only happen for SECCOMP mitigation. For PRCTL it's ++ * always the current task. ++ */ ++ if (tsk == current) ++ speculation_ctrl_update_current(); ++} ++ ++static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) ++{ + if (ssb_mode != SPEC_STORE_BYPASS_PRCTL && + ssb_mode != SPEC_STORE_BYPASS_SECCOMP) + return -ENXIO; +@@ -575,28 +783,56 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) + if (task_spec_ssb_force_disable(task)) + return -EPERM; + task_clear_spec_ssb_disable(task); +- update = test_and_clear_tsk_thread_flag(task, TIF_SSBD); ++ task_update_spec_tif(task); + break; + case PR_SPEC_DISABLE: + task_set_spec_ssb_disable(task); +- update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); ++ task_update_spec_tif(task); + break; + case PR_SPEC_FORCE_DISABLE: + task_set_spec_ssb_disable(task); + task_set_spec_ssb_force_disable(task); +- update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); ++ task_update_spec_tif(task); + break; + default: + return -ERANGE; + } ++ return 0; ++} + +- /* +- * If being set on non-current task, delay setting the CPU +- * mitigation until it is next scheduled. +- */ +- if (task == current && update) +- speculative_store_bypass_update_current(); +- ++static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) ++{ ++ switch (ctrl) { ++ case PR_SPEC_ENABLE: ++ if (spectre_v2_user == SPECTRE_V2_USER_NONE) ++ return 0; ++ /* ++ * Indirect branch speculation is always disabled in strict ++ * mode. ++ */ ++ if (spectre_v2_user == SPECTRE_V2_USER_STRICT) ++ return -EPERM; ++ task_clear_spec_ib_disable(task); ++ task_update_spec_tif(task); ++ break; ++ case PR_SPEC_DISABLE: ++ case PR_SPEC_FORCE_DISABLE: ++ /* ++ * Indirect branch speculation is always allowed when ++ * mitigation is force disabled. ++ */ ++ if (spectre_v2_user == SPECTRE_V2_USER_NONE) ++ return -EPERM; ++ if (spectre_v2_user == SPECTRE_V2_USER_STRICT) ++ return 0; ++ task_set_spec_ib_disable(task); ++ if (ctrl == PR_SPEC_FORCE_DISABLE) ++ task_set_spec_ib_force_disable(task); ++ task_update_spec_tif(task); ++ break; ++ default: ++ return -ERANGE; ++ } + return 0; + } + +@@ -606,6 +842,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_set(task, ctrl); ++ case PR_SPEC_INDIRECT_BRANCH: ++ return ib_prctl_set(task, ctrl); + default: + return -ENODEV; + } +@@ -616,6 +854,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task) + { + if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) + ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); ++ if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP) ++ ib_prctl_set(task, PR_SPEC_FORCE_DISABLE); + } + #endif + +@@ -638,11 +878,35 @@ static int ssb_prctl_get(struct task_struct *task) + } + } + ++static int ib_prctl_get(struct task_struct *task) ++{ ++ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) ++ return PR_SPEC_NOT_AFFECTED; ++ ++ switch (spectre_v2_user) { ++ case SPECTRE_V2_USER_NONE: ++ return PR_SPEC_ENABLE; ++ case SPECTRE_V2_USER_PRCTL: ++ case SPECTRE_V2_USER_SECCOMP: ++ if (task_spec_ib_force_disable(task)) ++ return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; ++ if (task_spec_ib_disable(task)) ++ return PR_SPEC_PRCTL | PR_SPEC_DISABLE; ++ return PR_SPEC_PRCTL | PR_SPEC_ENABLE; ++ case SPECTRE_V2_USER_STRICT: ++ return PR_SPEC_DISABLE; ++ default: ++ return PR_SPEC_NOT_AFFECTED; ++ } ++} ++ + int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) + { + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_get(task); ++ case PR_SPEC_INDIRECT_BRANCH: ++ return ib_prctl_get(task); + default: + return -ENODEV; + } +@@ -780,7 +1044,7 @@ early_param("l1tf", l1tf_cmdline); + #define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion" + + #if IS_ENABLED(CONFIG_KVM_INTEL) +-static const char *l1tf_vmx_states[] = { ++static const char * const l1tf_vmx_states[] = { + [VMENTER_L1D_FLUSH_AUTO] = "auto", + [VMENTER_L1D_FLUSH_NEVER] = "vulnerable", + [VMENTER_L1D_FLUSH_COND] = "conditional cache flushes", +@@ -796,13 +1060,14 @@ static ssize_t l1tf_show_state(char *buf) + + if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED || + (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER && +- cpu_smt_control == CPU_SMT_ENABLED)) ++ sched_smt_active())) { + return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG, + l1tf_vmx_states[l1tf_vmx_mitigation]); ++ } + + return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG, + l1tf_vmx_states[l1tf_vmx_mitigation], +- cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled"); ++ sched_smt_active() ? "vulnerable" : "disabled"); + } + #else + static ssize_t l1tf_show_state(char *buf) +@@ -811,6 +1076,36 @@ static ssize_t l1tf_show_state(char *buf) + } + #endif + ++static char *stibp_state(void) ++{ ++ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) ++ return ""; ++ ++ switch (spectre_v2_user) { ++ case SPECTRE_V2_USER_NONE: ++ return ", STIBP: disabled"; ++ case SPECTRE_V2_USER_STRICT: ++ return ", STIBP: forced"; ++ case SPECTRE_V2_USER_PRCTL: ++ case SPECTRE_V2_USER_SECCOMP: ++ if (static_key_enabled(&switch_to_cond_stibp)) ++ return ", STIBP: conditional"; ++ } ++ return ""; ++} ++ ++static char *ibpb_state(void) ++{ ++ if (boot_cpu_has(X86_FEATURE_IBPB)) { ++ if (static_key_enabled(&switch_mm_always_ibpb)) ++ return ", IBPB: always-on"; ++ if (static_key_enabled(&switch_mm_cond_ibpb)) ++ return ", IBPB: conditional"; ++ return ", IBPB: disabled"; ++ } ++ return ""; ++} ++ + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, + char *buf, unsigned int bug) + { +@@ -831,9 +1126,11 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + + case X86_BUG_SPECTRE_V2: +- return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], +- boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", ++ return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], ++ ibpb_state(), + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", ++ stibp_state(), ++ boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + spectre_v2_module_string()); + + case X86_BUG_SPEC_STORE_BYPASS: +diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c +index dd33c357548f..e12454e21b8a 100644 +--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c ++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c +@@ -56,7 +56,7 @@ + /* Threshold LVT offset is at MSR0xC0000410[15:12] */ + #define SMCA_THR_LVT_OFF 0xF000 + +-static bool thresholding_en; ++static bool thresholding_irq_en; + + static const char * const th_names[] = { + "load_store", +@@ -534,9 +534,8 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, + + set_offset: + offset = setup_APIC_mce_threshold(offset, new); +- +- if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt)) +- mce_threshold_vector = amd_threshold_interrupt; ++ if (offset == new) ++ thresholding_irq_en = true; + + done: + mce_threshold_block_init(&b, offset); +@@ -1357,9 +1356,6 @@ int mce_threshold_remove_device(unsigned int cpu) + { + unsigned int bank; + +- if (!thresholding_en) +- return 0; +- + for (bank = 0; bank < mca_cfg.banks; ++bank) { + if (!(per_cpu(bank_map, cpu) & (1 << bank))) + continue; +@@ -1377,9 +1373,6 @@ int mce_threshold_create_device(unsigned int cpu) + struct threshold_bank **bp; + int err = 0; + +- if (!thresholding_en) +- return 0; +- + bp = per_cpu(threshold_banks, cpu); + if (bp) + return 0; +@@ -1408,9 +1401,6 @@ static __init int threshold_init_device(void) + { + unsigned lcpu = 0; + +- if (mce_threshold_vector == amd_threshold_interrupt) +- thresholding_en = true; +- + /* to hit CPUs online before the notifier is up */ + for_each_online_cpu(lcpu) { + int err = mce_threshold_create_device(lcpu); +@@ -1419,6 +1409,9 @@ static __init int threshold_init_device(void) + return err; + } + ++ if (thresholding_irq_en) ++ mce_threshold_vector = amd_threshold_interrupt; ++ + return 0; + } + /* +diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c +index 61a949d84dfa..d99a8ee9e185 100644 +--- a/arch/x86/kernel/fpu/signal.c ++++ b/arch/x86/kernel/fpu/signal.c +@@ -344,10 +344,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) + sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); + } + ++ local_bh_disable(); + fpu->initialized = 1; +- preempt_disable(); + fpu__restore(fpu); +- preempt_enable(); ++ local_bh_enable(); + + return err; + } else { +diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c +index 01ebcb6f263e..7ee8067cbf45 100644 +--- a/arch/x86/kernel/ftrace.c ++++ b/arch/x86/kernel/ftrace.c +@@ -994,7 +994,6 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, + { + unsigned long old; + int faulted; +- struct ftrace_graph_ent trace; + unsigned long return_hooker = (unsigned long) + &return_to_handler; + +@@ -1046,19 +1045,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, + return; + } + +- trace.func = self_addr; +- trace.depth = current->curr_ret_stack + 1; +- +- /* Only trace if the calling function expects to */ +- if (!ftrace_graph_entry(&trace)) { ++ if (function_graph_enter(old, self_addr, frame_pointer, parent)) + *parent = old; +- return; +- } +- +- if (ftrace_push_return_trace(old, self_addr, &trace.depth, +- frame_pointer, parent) == -EBUSY) { +- *parent = old; +- return; +- } + } + #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index c93fcfdf1673..7d31192296a8 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -40,6 +40,8 @@ + #include <asm/prctl.h> + #include <asm/spec-ctrl.h> + ++#include "process.h" ++ + /* + * per-CPU TSS segments. Threads are completely 'soft' on Linux, + * no more per-task TSS's. The TSS size is kept cacheline-aligned +@@ -252,11 +254,12 @@ void arch_setup_new_exec(void) + enable_cpuid(); + } + +-static inline void switch_to_bitmap(struct tss_struct *tss, +- struct thread_struct *prev, ++static inline void switch_to_bitmap(struct thread_struct *prev, + struct thread_struct *next, + unsigned long tifp, unsigned long tifn) + { ++ struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); ++ + if (tifn & _TIF_IO_BITMAP) { + /* + * Copy the relevant range of the IO bitmap. +@@ -395,32 +398,85 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) + wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); + } + +-static __always_inline void intel_set_ssb_state(unsigned long tifn) ++/* ++ * Update the MSRs managing speculation control, during context switch. ++ * ++ * tifp: Previous task's thread flags ++ * tifn: Next task's thread flags ++ */ ++static __always_inline void __speculation_ctrl_update(unsigned long tifp, ++ unsigned long tifn) + { +- u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); ++ unsigned long tif_diff = tifp ^ tifn; ++ u64 msr = x86_spec_ctrl_base; ++ bool updmsr = false; ++ ++ /* ++ * If TIF_SSBD is different, select the proper mitigation ++ * method. Note that if SSBD mitigation is disabled or permanentely ++ * enabled this branch can't be taken because nothing can set ++ * TIF_SSBD. ++ */ ++ if (tif_diff & _TIF_SSBD) { ++ if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { ++ amd_set_ssb_virt_state(tifn); ++ } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { ++ amd_set_core_ssb_state(tifn); ++ } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || ++ static_cpu_has(X86_FEATURE_AMD_SSBD)) { ++ msr |= ssbd_tif_to_spec_ctrl(tifn); ++ updmsr = true; ++ } ++ } ++ ++ /* ++ * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled, ++ * otherwise avoid the MSR write. ++ */ ++ if (IS_ENABLED(CONFIG_SMP) && ++ static_branch_unlikely(&switch_to_cond_stibp)) { ++ updmsr |= !!(tif_diff & _TIF_SPEC_IB); ++ msr |= stibp_tif_to_spec_ctrl(tifn); ++ } + +- wrmsrl(MSR_IA32_SPEC_CTRL, msr); ++ if (updmsr) ++ wrmsrl(MSR_IA32_SPEC_CTRL, msr); + } + +-static __always_inline void __speculative_store_bypass_update(unsigned long tifn) ++static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) + { +- if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) +- amd_set_ssb_virt_state(tifn); +- else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) +- amd_set_core_ssb_state(tifn); +- else +- intel_set_ssb_state(tifn); ++ if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) { ++ if (task_spec_ssb_disable(tsk)) ++ set_tsk_thread_flag(tsk, TIF_SSBD); ++ else ++ clear_tsk_thread_flag(tsk, TIF_SSBD); ++ ++ if (task_spec_ib_disable(tsk)) ++ set_tsk_thread_flag(tsk, TIF_SPEC_IB); ++ else ++ clear_tsk_thread_flag(tsk, TIF_SPEC_IB); ++ } ++ /* Return the updated threadinfo flags*/ ++ return task_thread_info(tsk)->flags; + } + +-void speculative_store_bypass_update(unsigned long tif) ++void speculation_ctrl_update(unsigned long tif) + { ++ /* Forced update. Make sure all relevant TIF flags are different */ + preempt_disable(); +- __speculative_store_bypass_update(tif); ++ __speculation_ctrl_update(~tif, tif); + preempt_enable(); + } + +-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, +- struct tss_struct *tss) ++/* Called from seccomp/prctl update */ ++void speculation_ctrl_update_current(void) ++{ ++ preempt_disable(); ++ speculation_ctrl_update(speculation_ctrl_update_tif(current)); ++ preempt_enable(); ++} ++ ++void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) + { + struct thread_struct *prev, *next; + unsigned long tifp, tifn; +@@ -430,7 +486,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + + tifn = READ_ONCE(task_thread_info(next_p)->flags); + tifp = READ_ONCE(task_thread_info(prev_p)->flags); +- switch_to_bitmap(tss, prev, next, tifp, tifn); ++ switch_to_bitmap(prev, next, tifp, tifn); + + propagate_user_return_notify(prev_p, next_p); + +@@ -451,8 +507,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + if ((tifp ^ tifn) & _TIF_NOCPUID) + set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); + +- if ((tifp ^ tifn) & _TIF_SSBD) +- __speculative_store_bypass_update(tifn); ++ if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) { ++ __speculation_ctrl_update(tifp, tifn); ++ } else { ++ speculation_ctrl_update_tif(prev_p); ++ tifn = speculation_ctrl_update_tif(next_p); ++ ++ /* Enforce MSR update to ensure consistent state */ ++ __speculation_ctrl_update(~tifn, tifn); ++ } + } + + /* +diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h +new file mode 100644 +index 000000000000..898e97cf6629 +--- /dev/null ++++ b/arch/x86/kernel/process.h +@@ -0,0 +1,39 @@ ++// SPDX-License-Identifier: GPL-2.0 ++// ++// Code shared between 32 and 64 bit ++ ++#include <asm/spec-ctrl.h> ++ ++void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p); ++ ++/* ++ * This needs to be inline to optimize for the common case where no extra ++ * work needs to be done. ++ */ ++static inline void switch_to_extra(struct task_struct *prev, ++ struct task_struct *next) ++{ ++ unsigned long next_tif = task_thread_info(next)->flags; ++ unsigned long prev_tif = task_thread_info(prev)->flags; ++ ++ if (IS_ENABLED(CONFIG_SMP)) { ++ /* ++ * Avoid __switch_to_xtra() invocation when conditional ++ * STIPB is disabled and the only different bit is ++ * TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not ++ * in the TIF_WORK_CTXSW masks. ++ */ ++ if (!static_branch_likely(&switch_to_cond_stibp)) { ++ prev_tif &= ~_TIF_SPEC_IB; ++ next_tif &= ~_TIF_SPEC_IB; ++ } ++ } ++ ++ /* ++ * __switch_to_xtra() handles debug registers, i/o bitmaps, ++ * speculation mitigations etc. ++ */ ++ if (unlikely(next_tif & _TIF_WORK_CTXSW_NEXT || ++ prev_tif & _TIF_WORK_CTXSW_PREV)) ++ __switch_to_xtra(prev, next); ++} +diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c +index 5046a3c9dec2..d3e593eb189f 100644 +--- a/arch/x86/kernel/process_32.c ++++ b/arch/x86/kernel/process_32.c +@@ -59,6 +59,8 @@ + #include <asm/intel_rdt_sched.h> + #include <asm/proto.h> + ++#include "process.h" ++ + void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) + { + unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; +@@ -232,7 +234,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) + struct fpu *prev_fpu = &prev->fpu; + struct fpu *next_fpu = &next->fpu; + int cpu = smp_processor_id(); +- struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); + + /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ + +@@ -264,12 +265,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) + if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) + set_iopl_mask(next->iopl); + +- /* +- * Now maybe handle debug registers and/or IO bitmaps +- */ +- if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || +- task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) +- __switch_to_xtra(prev_p, next_p, tss); ++ switch_to_extra(prev_p, next_p); + + /* + * Leave lazy mode, flushing any hypercalls made here. +diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c +index ea5ea850348d..a0854f283efd 100644 +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -59,6 +59,8 @@ + #include <asm/unistd_32_ia32.h> + #endif + ++#include "process.h" ++ + __visible DEFINE_PER_CPU(unsigned long, rsp_scratch); + + /* Prints also some state that isn't saved in the pt_regs */ +@@ -422,7 +424,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) + struct fpu *prev_fpu = &prev->fpu; + struct fpu *next_fpu = &next->fpu; + int cpu = smp_processor_id(); +- struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); + + WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && + this_cpu_read(irq_count) != -1); +@@ -489,12 +490,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) + /* Reload sp0. */ + update_task_stack(next_p); + +- /* +- * Now maybe reload the debug registers and handle I/O bitmaps +- */ +- if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT || +- task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) +- __switch_to_xtra(prev_p, next_p, tss); ++ switch_to_extra(prev_p, next_p); + + #ifdef CONFIG_XEN_PV + /* +diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c +index fbb0e6df121b..2216d21e955d 100644 +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -571,6 +571,11 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, + rcu_read_lock(); + map = rcu_dereference(kvm->arch.apic_map); + ++ if (unlikely(!map)) { ++ count = -EOPNOTSUPP; ++ goto out; ++ } ++ + if (min > map->max_apic_id) + goto out; + /* Bits above cluster_size are masked in the caller. */ +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index 51b953ad9d4e..1b82bc7c3cca 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -5013,9 +5013,9 @@ static bool need_remote_flush(u64 old, u64 new) + } + + static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, +- const u8 *new, int *bytes) ++ int *bytes) + { +- u64 gentry; ++ u64 gentry = 0; + int r; + + /* +@@ -5027,22 +5027,12 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, + /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ + *gpa &= ~(gpa_t)7; + *bytes = 8; +- r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8); +- if (r) +- gentry = 0; +- new = (const u8 *)&gentry; + } + +- switch (*bytes) { +- case 4: +- gentry = *(const u32 *)new; +- break; +- case 8: +- gentry = *(const u64 *)new; +- break; +- default: +- gentry = 0; +- break; ++ if (*bytes == 4 || *bytes == 8) { ++ r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes); ++ if (r) ++ gentry = 0; + } + + return gentry; +@@ -5146,8 +5136,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, + + pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); + +- gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes); +- + /* + * No need to care whether allocation memory is successful + * or not since pte prefetch is skiped if it does not have +@@ -5156,6 +5144,9 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, + mmu_topup_memory_caches(vcpu); + + spin_lock(&vcpu->kvm->mmu_lock); ++ ++ gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes); ++ + ++vcpu->kvm->stat.mmu_pte_write; + kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); + +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index 61ccfb13899e..4397fa0c448f 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -1444,7 +1444,7 @@ static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu) + return vcpu->arch.tsc_offset; + } + +-static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) ++static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) + { + struct vcpu_svm *svm = to_svm(vcpu); + u64 g_tsc_offset = 0; +@@ -1462,6 +1462,7 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) + svm->vmcb->control.tsc_offset = offset + g_tsc_offset; + + mark_dirty(svm->vmcb, VMCB_INTERCEPTS); ++ return svm->vmcb->control.tsc_offset; + } + + static void avic_init_vmcb(struct vcpu_svm *svm) +@@ -2187,21 +2188,31 @@ out: + return ERR_PTR(err); + } + ++static void svm_clear_current_vmcb(struct vmcb *vmcb) ++{ ++ int i; ++ ++ for_each_online_cpu(i) ++ cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL); ++} ++ + static void svm_free_vcpu(struct kvm_vcpu *vcpu) + { + struct vcpu_svm *svm = to_svm(vcpu); + ++ /* ++ * The vmcb page can be recycled, causing a false negative in ++ * svm_vcpu_load(). So, ensure that no logical CPU has this ++ * vmcb page recorded as its current vmcb. ++ */ ++ svm_clear_current_vmcb(svm->vmcb); ++ + __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT)); + __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); + __free_page(virt_to_page(svm->nested.hsave)); + __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); + kvm_vcpu_uninit(vcpu); + kmem_cache_free(kvm_vcpu_cache, svm); +- /* +- * The vmcb page can be recycled, causing a false negative in +- * svm_vcpu_load(). So do a full IBPB now. +- */ +- indirect_branch_prediction_barrier(); + } + + static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +@@ -7145,7 +7156,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { + .has_wbinvd_exit = svm_has_wbinvd_exit, + + .read_l1_tsc_offset = svm_read_l1_tsc_offset, +- .write_tsc_offset = svm_write_tsc_offset, ++ .write_l1_tsc_offset = svm_write_l1_tsc_offset, + + .set_tdp_cr3 = set_tdp_cr3, + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 9f3def7baa6d..e55f7a90d4b2 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -170,6 +170,7 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); + * refer SDM volume 3b section 21.6.13 & 22.1.3. + */ + static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP; ++module_param(ple_gap, uint, 0444); + + static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; + module_param(ple_window, uint, 0444); +@@ -3433,11 +3434,9 @@ static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu) + return vcpu->arch.tsc_offset; + } + +-/* +- * writes 'offset' into guest's timestamp counter offset register +- */ +-static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) ++static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) + { ++ u64 active_offset = offset; + if (is_guest_mode(vcpu)) { + /* + * We're here if L1 chose not to trap WRMSR to TSC. According +@@ -3445,17 +3444,16 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) + * set for L2 remains unchanged, and still needs to be added + * to the newly set TSC to get L2's TSC. + */ +- struct vmcs12 *vmcs12; +- /* recalculate vmcs02.TSC_OFFSET: */ +- vmcs12 = get_vmcs12(vcpu); +- vmcs_write64(TSC_OFFSET, offset + +- (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ? +- vmcs12->tsc_offset : 0)); ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING)) ++ active_offset += vmcs12->tsc_offset; + } else { + trace_kvm_write_tsc_offset(vcpu->vcpu_id, + vmcs_read64(TSC_OFFSET), offset); +- vmcs_write64(TSC_OFFSET, offset); + } ++ ++ vmcs_write64(TSC_OFFSET, active_offset); ++ return active_offset; + } + + /* +@@ -14203,7 +14201,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { + .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, + + .read_l1_tsc_offset = vmx_read_l1_tsc_offset, +- .write_tsc_offset = vmx_write_tsc_offset, ++ .write_l1_tsc_offset = vmx_write_l1_tsc_offset, + + .set_tdp_cr3 = vmx_set_cr3, + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index ca717737347e..68b53f05a420 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -1582,8 +1582,7 @@ EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); + + static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) + { +- kvm_x86_ops->write_tsc_offset(vcpu, offset); +- vcpu->arch.tsc_offset = offset; ++ vcpu->arch.tsc_offset = kvm_x86_ops->write_l1_tsc_offset(vcpu, offset); + } + + static inline bool kvm_check_tsc_unstable(void) +@@ -1711,7 +1710,8 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc); + static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, + s64 adjustment) + { +- kvm_vcpu_write_tsc_offset(vcpu, vcpu->arch.tsc_offset + adjustment); ++ u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu); ++ kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment); + } + + static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) +@@ -6788,6 +6788,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, + clock_pairing.nsec = ts.tv_nsec; + clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle); + clock_pairing.flags = 0; ++ memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad)); + + ret = 0; + if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing, +@@ -7313,7 +7314,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) + else { + if (vcpu->arch.apicv_active) + kvm_x86_ops->sync_pir_to_irr(vcpu); +- kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); ++ if (ioapic_in_kernel(vcpu->kvm)) ++ kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); + } + + if (is_guest_mode(vcpu)) +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index e96b99eb800c..a6d1b0241aea 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -29,6 +29,12 @@ + * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi + */ + ++/* ++ * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is ++ * stored in cpu_tlb_state.last_user_mm_ibpb. ++ */ ++#define LAST_USER_MM_IBPB 0x1UL ++ + /* + * We get here when we do something requiring a TLB invalidation + * but could not go invalidate all of the contexts. We do the +@@ -180,6 +186,89 @@ static void sync_current_stack_to_mm(struct mm_struct *mm) + } + } + ++static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next) ++{ ++ unsigned long next_tif = task_thread_info(next)->flags; ++ unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB; ++ ++ return (unsigned long)next->mm | ibpb; ++} ++ ++static void cond_ibpb(struct task_struct *next) ++{ ++ if (!next || !next->mm) ++ return; ++ ++ /* ++ * Both, the conditional and the always IBPB mode use the mm ++ * pointer to avoid the IBPB when switching between tasks of the ++ * same process. Using the mm pointer instead of mm->context.ctx_id ++ * opens a hypothetical hole vs. mm_struct reuse, which is more or ++ * less impossible to control by an attacker. Aside of that it ++ * would only affect the first schedule so the theoretically ++ * exposed data is not really interesting. ++ */ ++ if (static_branch_likely(&switch_mm_cond_ibpb)) { ++ unsigned long prev_mm, next_mm; ++ ++ /* ++ * This is a bit more complex than the always mode because ++ * it has to handle two cases: ++ * ++ * 1) Switch from a user space task (potential attacker) ++ * which has TIF_SPEC_IB set to a user space task ++ * (potential victim) which has TIF_SPEC_IB not set. ++ * ++ * 2) Switch from a user space task (potential attacker) ++ * which has TIF_SPEC_IB not set to a user space task ++ * (potential victim) which has TIF_SPEC_IB set. ++ * ++ * This could be done by unconditionally issuing IBPB when ++ * a task which has TIF_SPEC_IB set is either scheduled in ++ * or out. Though that results in two flushes when: ++ * ++ * - the same user space task is scheduled out and later ++ * scheduled in again and only a kernel thread ran in ++ * between. ++ * ++ * - a user space task belonging to the same process is ++ * scheduled in after a kernel thread ran in between ++ * ++ * - a user space task belonging to the same process is ++ * scheduled in immediately. ++ * ++ * Optimize this with reasonably small overhead for the ++ * above cases. Mangle the TIF_SPEC_IB bit into the mm ++ * pointer of the incoming task which is stored in ++ * cpu_tlbstate.last_user_mm_ibpb for comparison. ++ */ ++ next_mm = mm_mangle_tif_spec_ib(next); ++ prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb); ++ ++ /* ++ * Issue IBPB only if the mm's are different and one or ++ * both have the IBPB bit set. ++ */ ++ if (next_mm != prev_mm && ++ (next_mm | prev_mm) & LAST_USER_MM_IBPB) ++ indirect_branch_prediction_barrier(); ++ ++ this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm); ++ } ++ ++ if (static_branch_unlikely(&switch_mm_always_ibpb)) { ++ /* ++ * Only flush when switching to a user space task with a ++ * different context than the user space task which ran ++ * last on this CPU. ++ */ ++ if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) { ++ indirect_branch_prediction_barrier(); ++ this_cpu_write(cpu_tlbstate.last_user_mm, next->mm); ++ } ++ } ++} ++ + void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) + { +@@ -254,27 +343,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + } else { + u16 new_asid; + bool need_flush; +- u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); + + /* + * Avoid user/user BTB poisoning by flushing the branch + * predictor when switching between processes. This stops + * one process from doing Spectre-v2 attacks on another. +- * +- * As an optimization, flush indirect branches only when +- * switching into processes that disable dumping. This +- * protects high value processes like gpg, without having +- * too high performance overhead. IBPB is *expensive*! +- * +- * This will not flush branches when switching into kernel +- * threads. It will also not flush if we switch to idle +- * thread and back to the same process. It will flush if we +- * switch to a different non-dumpable process. + */ +- if (tsk && tsk->mm && +- tsk->mm->context.ctx_id != last_ctx_id && +- get_dumpable(tsk->mm) != SUID_DUMP_USER) +- indirect_branch_prediction_barrier(); ++ cond_ibpb(tsk); + + if (IS_ENABLED(CONFIG_VMAP_STACK)) { + /* +@@ -331,14 +406,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); + } + +- /* +- * Record last user mm's context id, so we can avoid +- * flushing branch buffer with IBPB if we switch back +- * to the same user. +- */ +- if (next != &init_mm) +- this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); +- + /* Make sure we write CR3 before loaded_mm. */ + barrier(); + +@@ -419,7 +486,7 @@ void initialize_tlbstate_and_flush(void) + write_cr3(build_cr3(mm->pgd, 0)); + + /* Reinitialize tlbstate. */ +- this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id); ++ this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB); + this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); + this_cpu_write(cpu_tlbstate.next_asid, 1); + this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); +diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c +index 67904f55f188..120dd746a147 100644 +--- a/arch/xtensa/kernel/asm-offsets.c ++++ b/arch/xtensa/kernel/asm-offsets.c +@@ -94,14 +94,14 @@ int main(void) + DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp)); + DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable)); + #if XTENSA_HAVE_COPROCESSORS +- DEFINE(THREAD_XTREGS_CP0, offsetof (struct thread_info, xtregs_cp)); +- DEFINE(THREAD_XTREGS_CP1, offsetof (struct thread_info, xtregs_cp)); +- DEFINE(THREAD_XTREGS_CP2, offsetof (struct thread_info, xtregs_cp)); +- DEFINE(THREAD_XTREGS_CP3, offsetof (struct thread_info, xtregs_cp)); +- DEFINE(THREAD_XTREGS_CP4, offsetof (struct thread_info, xtregs_cp)); +- DEFINE(THREAD_XTREGS_CP5, offsetof (struct thread_info, xtregs_cp)); +- DEFINE(THREAD_XTREGS_CP6, offsetof (struct thread_info, xtregs_cp)); +- DEFINE(THREAD_XTREGS_CP7, offsetof (struct thread_info, xtregs_cp)); ++ DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0)); ++ DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1)); ++ DEFINE(THREAD_XTREGS_CP2, offsetof(struct thread_info, xtregs_cp.cp2)); ++ DEFINE(THREAD_XTREGS_CP3, offsetof(struct thread_info, xtregs_cp.cp3)); ++ DEFINE(THREAD_XTREGS_CP4, offsetof(struct thread_info, xtregs_cp.cp4)); ++ DEFINE(THREAD_XTREGS_CP5, offsetof(struct thread_info, xtregs_cp.cp5)); ++ DEFINE(THREAD_XTREGS_CP6, offsetof(struct thread_info, xtregs_cp.cp6)); ++ DEFINE(THREAD_XTREGS_CP7, offsetof(struct thread_info, xtregs_cp.cp7)); + #endif + DEFINE(THREAD_XTREGS_USER, offsetof (struct thread_info, xtregs_user)); + DEFINE(XTREGS_USER_SIZE, sizeof(xtregs_user_t)); +diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c +index 483dcfb6e681..4bb68133a72a 100644 +--- a/arch/xtensa/kernel/process.c ++++ b/arch/xtensa/kernel/process.c +@@ -94,18 +94,21 @@ void coprocessor_release_all(struct thread_info *ti) + + void coprocessor_flush_all(struct thread_info *ti) + { +- unsigned long cpenable; ++ unsigned long cpenable, old_cpenable; + int i; + + preempt_disable(); + ++ RSR_CPENABLE(old_cpenable); + cpenable = ti->cpenable; ++ WSR_CPENABLE(cpenable); + + for (i = 0; i < XCHAL_CP_MAX; i++) { + if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti) + coprocessor_flush(ti, i); + cpenable >>= 1; + } ++ WSR_CPENABLE(old_cpenable); + + preempt_enable(); + } +diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c +index c0845cb1cbb9..d9541be0605a 100644 +--- a/arch/xtensa/kernel/ptrace.c ++++ b/arch/xtensa/kernel/ptrace.c +@@ -127,12 +127,37 @@ static int ptrace_setregs(struct task_struct *child, void __user *uregs) + } + + ++#if XTENSA_HAVE_COPROCESSORS ++#define CP_OFFSETS(cp) \ ++ { \ ++ .elf_xtregs_offset = offsetof(elf_xtregs_t, cp), \ ++ .ti_offset = offsetof(struct thread_info, xtregs_cp.cp), \ ++ .sz = sizeof(xtregs_ ## cp ## _t), \ ++ } ++ ++static const struct { ++ size_t elf_xtregs_offset; ++ size_t ti_offset; ++ size_t sz; ++} cp_offsets[] = { ++ CP_OFFSETS(cp0), ++ CP_OFFSETS(cp1), ++ CP_OFFSETS(cp2), ++ CP_OFFSETS(cp3), ++ CP_OFFSETS(cp4), ++ CP_OFFSETS(cp5), ++ CP_OFFSETS(cp6), ++ CP_OFFSETS(cp7), ++}; ++#endif ++ + static int ptrace_getxregs(struct task_struct *child, void __user *uregs) + { + struct pt_regs *regs = task_pt_regs(child); + struct thread_info *ti = task_thread_info(child); + elf_xtregs_t __user *xtregs = uregs; + int ret = 0; ++ int i __maybe_unused; + + if (!access_ok(VERIFY_WRITE, uregs, sizeof(elf_xtregs_t))) + return -EIO; +@@ -140,8 +165,13 @@ static int ptrace_getxregs(struct task_struct *child, void __user *uregs) + #if XTENSA_HAVE_COPROCESSORS + /* Flush all coprocessor registers to memory. */ + coprocessor_flush_all(ti); +- ret |= __copy_to_user(&xtregs->cp0, &ti->xtregs_cp, +- sizeof(xtregs_coprocessor_t)); ++ ++ for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i) ++ ret |= __copy_to_user((char __user *)xtregs + ++ cp_offsets[i].elf_xtregs_offset, ++ (const char *)ti + ++ cp_offsets[i].ti_offset, ++ cp_offsets[i].sz); + #endif + ret |= __copy_to_user(&xtregs->opt, ®s->xtregs_opt, + sizeof(xtregs->opt)); +@@ -157,6 +187,7 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs) + struct pt_regs *regs = task_pt_regs(child); + elf_xtregs_t *xtregs = uregs; + int ret = 0; ++ int i __maybe_unused; + + if (!access_ok(VERIFY_READ, uregs, sizeof(elf_xtregs_t))) + return -EFAULT; +@@ -166,8 +197,11 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs) + coprocessor_flush_all(ti); + coprocessor_release_all(ti); + +- ret |= __copy_from_user(&ti->xtregs_cp, &xtregs->cp0, +- sizeof(xtregs_coprocessor_t)); ++ for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i) ++ ret |= __copy_from_user((char *)ti + cp_offsets[i].ti_offset, ++ (const char __user *)xtregs + ++ cp_offsets[i].elf_xtregs_offset, ++ cp_offsets[i].sz); + #endif + ret |= __copy_from_user(®s->xtregs_opt, &xtregs->opt, + sizeof(xtregs->opt)); +diff --git a/drivers/android/binder.c b/drivers/android/binder.c +index d58763b6b009..ce0e4d317d24 100644 +--- a/drivers/android/binder.c ++++ b/drivers/android/binder.c +@@ -2971,7 +2971,6 @@ static void binder_transaction(struct binder_proc *proc, + t->buffer = NULL; + goto err_binder_alloc_buf_failed; + } +- t->buffer->allow_user_free = 0; + t->buffer->debug_id = t->debug_id; + t->buffer->transaction = t; + t->buffer->target_node = target_node; +@@ -3465,14 +3464,18 @@ static int binder_thread_write(struct binder_proc *proc, + + buffer = binder_alloc_prepare_to_free(&proc->alloc, + data_ptr); +- if (buffer == NULL) { +- binder_user_error("%d:%d BC_FREE_BUFFER u%016llx no match\n", +- proc->pid, thread->pid, (u64)data_ptr); +- break; +- } +- if (!buffer->allow_user_free) { +- binder_user_error("%d:%d BC_FREE_BUFFER u%016llx matched unreturned buffer\n", +- proc->pid, thread->pid, (u64)data_ptr); ++ if (IS_ERR_OR_NULL(buffer)) { ++ if (PTR_ERR(buffer) == -EPERM) { ++ binder_user_error( ++ "%d:%d BC_FREE_BUFFER u%016llx matched unreturned or currently freeing buffer\n", ++ proc->pid, thread->pid, ++ (u64)data_ptr); ++ } else { ++ binder_user_error( ++ "%d:%d BC_FREE_BUFFER u%016llx no match\n", ++ proc->pid, thread->pid, ++ (u64)data_ptr); ++ } + break; + } + binder_debug(BINDER_DEBUG_FREE_BUFFER, +diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c +index 64fd96eada31..030c98f35cca 100644 +--- a/drivers/android/binder_alloc.c ++++ b/drivers/android/binder_alloc.c +@@ -151,16 +151,12 @@ static struct binder_buffer *binder_alloc_prepare_to_free_locked( + else { + /* + * Guard against user threads attempting to +- * free the buffer twice ++ * free the buffer when in use by kernel or ++ * after it's already been freed. + */ +- if (buffer->free_in_progress) { +- binder_alloc_debug(BINDER_DEBUG_USER_ERROR, +- "%d:%d FREE_BUFFER u%016llx user freed buffer twice\n", +- alloc->pid, current->pid, +- (u64)user_ptr); +- return NULL; +- } +- buffer->free_in_progress = 1; ++ if (!buffer->allow_user_free) ++ return ERR_PTR(-EPERM); ++ buffer->allow_user_free = 0; + return buffer; + } + } +@@ -500,7 +496,7 @@ static struct binder_buffer *binder_alloc_new_buf_locked( + + rb_erase(best_fit, &alloc->free_buffers); + buffer->free = 0; +- buffer->free_in_progress = 0; ++ buffer->allow_user_free = 0; + binder_insert_allocated_buffer_locked(alloc, buffer); + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: binder_alloc_buf size %zd got %pK\n", +diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h +index 9ef64e563856..fb3238c74c8a 100644 +--- a/drivers/android/binder_alloc.h ++++ b/drivers/android/binder_alloc.h +@@ -50,8 +50,7 @@ struct binder_buffer { + unsigned free:1; + unsigned allow_user_free:1; + unsigned async_transaction:1; +- unsigned free_in_progress:1; +- unsigned debug_id:28; ++ unsigned debug_id:29; + + struct binder_transaction *transaction; + +diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c +index 75f38d19fcbe..dbc51154f122 100644 +--- a/drivers/dma/at_hdmac.c ++++ b/drivers/dma/at_hdmac.c +@@ -1641,6 +1641,12 @@ static void atc_free_chan_resources(struct dma_chan *chan) + atchan->descs_allocated = 0; + atchan->status = 0; + ++ /* ++ * Free atslave allocated in at_dma_xlate() ++ */ ++ kfree(chan->private); ++ chan->private = NULL; ++ + dev_vdbg(chan2dev(chan), "free_chan_resources: done\n"); + } + +@@ -1675,7 +1681,7 @@ static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec, + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + +- atslave = devm_kzalloc(&dmac_pdev->dev, sizeof(*atslave), GFP_KERNEL); ++ atslave = kzalloc(sizeof(*atslave), GFP_KERNEL); + if (!atslave) + return NULL; + +@@ -2000,6 +2006,8 @@ static int at_dma_remove(struct platform_device *pdev) + struct resource *io; + + at_dma_off(atdma); ++ if (pdev->dev.of_node) ++ of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&atdma->dma_common); + + dma_pool_destroy(atdma->memset_pool); +diff --git a/drivers/hid/hid-sensor-custom.c b/drivers/hid/hid-sensor-custom.c +index e8a114157f87..bb012bc032e0 100644 +--- a/drivers/hid/hid-sensor-custom.c ++++ b/drivers/hid/hid-sensor-custom.c +@@ -358,7 +358,7 @@ static ssize_t show_value(struct device *dev, struct device_attribute *attr, + sensor_inst->hsdev, + sensor_inst->hsdev->usage, + usage, report_id, +- SENSOR_HUB_SYNC); ++ SENSOR_HUB_SYNC, false); + } else if (!strncmp(name, "units", strlen("units"))) + value = sensor_inst->fields[field_index].attribute.units; + else if (!strncmp(name, "unit-expo", strlen("unit-expo"))) +diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c +index 2b63487057c2..4256fdc5cd6d 100644 +--- a/drivers/hid/hid-sensor-hub.c ++++ b/drivers/hid/hid-sensor-hub.c +@@ -299,7 +299,8 @@ EXPORT_SYMBOL_GPL(sensor_hub_get_feature); + int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, + u32 usage_id, + u32 attr_usage_id, u32 report_id, +- enum sensor_hub_read_flags flag) ++ enum sensor_hub_read_flags flag, ++ bool is_signed) + { + struct sensor_hub_data *data = hid_get_drvdata(hsdev->hdev); + unsigned long flags; +@@ -331,10 +332,16 @@ int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, + &hsdev->pending.ready, HZ*5); + switch (hsdev->pending.raw_size) { + case 1: +- ret_val = *(u8 *)hsdev->pending.raw_data; ++ if (is_signed) ++ ret_val = *(s8 *)hsdev->pending.raw_data; ++ else ++ ret_val = *(u8 *)hsdev->pending.raw_data; + break; + case 2: +- ret_val = *(u16 *)hsdev->pending.raw_data; ++ if (is_signed) ++ ret_val = *(s16 *)hsdev->pending.raw_data; ++ else ++ ret_val = *(u16 *)hsdev->pending.raw_data; + break; + case 4: + ret_val = *(u32 *)hsdev->pending.raw_data; +diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c +index 741857d80da1..2f164bd74687 100644 +--- a/drivers/hv/channel.c ++++ b/drivers/hv/channel.c +@@ -482,6 +482,14 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, + } + wait_for_completion(&msginfo->waitevent); + ++ if (msginfo->response.gpadl_created.creation_status != 0) { ++ pr_err("Failed to establish GPADL: err = 0x%x\n", ++ msginfo->response.gpadl_created.creation_status); ++ ++ ret = -EDQUOT; ++ goto cleanup; ++ } ++ + if (channel->rescind) { + ret = -ENODEV; + goto cleanup; +diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c +index 41d97faf5013..38ff374a3ca4 100644 +--- a/drivers/iio/accel/hid-sensor-accel-3d.c ++++ b/drivers/iio/accel/hid-sensor-accel-3d.c +@@ -149,6 +149,7 @@ static int accel_3d_read_raw(struct iio_dev *indio_dev, + int report_id = -1; + u32 address; + int ret_type; ++ s32 min; + struct hid_sensor_hub_device *hsdev = + accel_state->common_attributes.hsdev; + +@@ -158,12 +159,14 @@ static int accel_3d_read_raw(struct iio_dev *indio_dev, + case IIO_CHAN_INFO_RAW: + hid_sensor_power_state(&accel_state->common_attributes, true); + report_id = accel_state->accel[chan->scan_index].report_id; ++ min = accel_state->accel[chan->scan_index].logical_minimum; + address = accel_3d_addresses[chan->scan_index]; + if (report_id >= 0) + *val = sensor_hub_input_attr_get_raw_value( + accel_state->common_attributes.hsdev, + hsdev->usage, address, report_id, +- SENSOR_HUB_SYNC); ++ SENSOR_HUB_SYNC, ++ min < 0); + else { + *val = 0; + hid_sensor_power_state(&accel_state->common_attributes, +diff --git a/drivers/iio/gyro/hid-sensor-gyro-3d.c b/drivers/iio/gyro/hid-sensor-gyro-3d.c +index 36941e69f959..88e857c4baf4 100644 +--- a/drivers/iio/gyro/hid-sensor-gyro-3d.c ++++ b/drivers/iio/gyro/hid-sensor-gyro-3d.c +@@ -111,6 +111,7 @@ static int gyro_3d_read_raw(struct iio_dev *indio_dev, + int report_id = -1; + u32 address; + int ret_type; ++ s32 min; + + *val = 0; + *val2 = 0; +@@ -118,13 +119,15 @@ static int gyro_3d_read_raw(struct iio_dev *indio_dev, + case IIO_CHAN_INFO_RAW: + hid_sensor_power_state(&gyro_state->common_attributes, true); + report_id = gyro_state->gyro[chan->scan_index].report_id; ++ min = gyro_state->gyro[chan->scan_index].logical_minimum; + address = gyro_3d_addresses[chan->scan_index]; + if (report_id >= 0) + *val = sensor_hub_input_attr_get_raw_value( + gyro_state->common_attributes.hsdev, + HID_USAGE_SENSOR_GYRO_3D, address, + report_id, +- SENSOR_HUB_SYNC); ++ SENSOR_HUB_SYNC, ++ min < 0); + else { + *val = 0; + hid_sensor_power_state(&gyro_state->common_attributes, +diff --git a/drivers/iio/humidity/hid-sensor-humidity.c b/drivers/iio/humidity/hid-sensor-humidity.c +index beab6d6fd6e1..4bc95f31c730 100644 +--- a/drivers/iio/humidity/hid-sensor-humidity.c ++++ b/drivers/iio/humidity/hid-sensor-humidity.c +@@ -75,7 +75,8 @@ static int humidity_read_raw(struct iio_dev *indio_dev, + HID_USAGE_SENSOR_HUMIDITY, + HID_USAGE_SENSOR_ATMOSPHERIC_HUMIDITY, + humid_st->humidity_attr.report_id, +- SENSOR_HUB_SYNC); ++ SENSOR_HUB_SYNC, ++ humid_st->humidity_attr.logical_minimum < 0); + hid_sensor_power_state(&humid_st->common_attributes, false); + + return IIO_VAL_INT; +diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c +index 406caaee9a3c..94f33250ba5a 100644 +--- a/drivers/iio/light/hid-sensor-als.c ++++ b/drivers/iio/light/hid-sensor-als.c +@@ -93,6 +93,7 @@ static int als_read_raw(struct iio_dev *indio_dev, + int report_id = -1; + u32 address; + int ret_type; ++ s32 min; + + *val = 0; + *val2 = 0; +@@ -102,8 +103,8 @@ static int als_read_raw(struct iio_dev *indio_dev, + case CHANNEL_SCAN_INDEX_INTENSITY: + case CHANNEL_SCAN_INDEX_ILLUM: + report_id = als_state->als_illum.report_id; +- address = +- HID_USAGE_SENSOR_LIGHT_ILLUM; ++ min = als_state->als_illum.logical_minimum; ++ address = HID_USAGE_SENSOR_LIGHT_ILLUM; + break; + default: + report_id = -1; +@@ -116,7 +117,8 @@ static int als_read_raw(struct iio_dev *indio_dev, + als_state->common_attributes.hsdev, + HID_USAGE_SENSOR_ALS, address, + report_id, +- SENSOR_HUB_SYNC); ++ SENSOR_HUB_SYNC, ++ min < 0); + hid_sensor_power_state(&als_state->common_attributes, + false); + } else { +diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c +index 45107f7537b5..cf5a0c242609 100644 +--- a/drivers/iio/light/hid-sensor-prox.c ++++ b/drivers/iio/light/hid-sensor-prox.c +@@ -73,6 +73,7 @@ static int prox_read_raw(struct iio_dev *indio_dev, + int report_id = -1; + u32 address; + int ret_type; ++ s32 min; + + *val = 0; + *val2 = 0; +@@ -81,8 +82,8 @@ static int prox_read_raw(struct iio_dev *indio_dev, + switch (chan->scan_index) { + case CHANNEL_SCAN_INDEX_PRESENCE: + report_id = prox_state->prox_attr.report_id; +- address = +- HID_USAGE_SENSOR_HUMAN_PRESENCE; ++ min = prox_state->prox_attr.logical_minimum; ++ address = HID_USAGE_SENSOR_HUMAN_PRESENCE; + break; + default: + report_id = -1; +@@ -95,7 +96,8 @@ static int prox_read_raw(struct iio_dev *indio_dev, + prox_state->common_attributes.hsdev, + HID_USAGE_SENSOR_PROX, address, + report_id, +- SENSOR_HUB_SYNC); ++ SENSOR_HUB_SYNC, ++ min < 0); + hid_sensor_power_state(&prox_state->common_attributes, + false); + } else { +diff --git a/drivers/iio/magnetometer/hid-sensor-magn-3d.c b/drivers/iio/magnetometer/hid-sensor-magn-3d.c +index d55c4885211a..f3c0d41e5a8c 100644 +--- a/drivers/iio/magnetometer/hid-sensor-magn-3d.c ++++ b/drivers/iio/magnetometer/hid-sensor-magn-3d.c +@@ -163,21 +163,23 @@ static int magn_3d_read_raw(struct iio_dev *indio_dev, + int report_id = -1; + u32 address; + int ret_type; ++ s32 min; + + *val = 0; + *val2 = 0; + switch (mask) { + case IIO_CHAN_INFO_RAW: + hid_sensor_power_state(&magn_state->magn_flux_attributes, true); +- report_id = +- magn_state->magn[chan->address].report_id; ++ report_id = magn_state->magn[chan->address].report_id; ++ min = magn_state->magn[chan->address].logical_minimum; + address = magn_3d_addresses[chan->address]; + if (report_id >= 0) + *val = sensor_hub_input_attr_get_raw_value( + magn_state->magn_flux_attributes.hsdev, + HID_USAGE_SENSOR_COMPASS_3D, address, + report_id, +- SENSOR_HUB_SYNC); ++ SENSOR_HUB_SYNC, ++ min < 0); + else { + *val = 0; + hid_sensor_power_state( +diff --git a/drivers/iio/magnetometer/st_magn_buffer.c b/drivers/iio/magnetometer/st_magn_buffer.c +index 0a9e8fadfa9d..37ab30566464 100644 +--- a/drivers/iio/magnetometer/st_magn_buffer.c ++++ b/drivers/iio/magnetometer/st_magn_buffer.c +@@ -30,11 +30,6 @@ int st_magn_trig_set_state(struct iio_trigger *trig, bool state) + return st_sensors_set_dataready_irq(indio_dev, state); + } + +-static int st_magn_buffer_preenable(struct iio_dev *indio_dev) +-{ +- return st_sensors_set_enable(indio_dev, true); +-} +- + static int st_magn_buffer_postenable(struct iio_dev *indio_dev) + { + int err; +@@ -50,7 +45,7 @@ static int st_magn_buffer_postenable(struct iio_dev *indio_dev) + if (err < 0) + goto st_magn_buffer_postenable_error; + +- return err; ++ return st_sensors_set_enable(indio_dev, true); + + st_magn_buffer_postenable_error: + kfree(mdata->buffer_data); +@@ -63,11 +58,11 @@ static int st_magn_buffer_predisable(struct iio_dev *indio_dev) + int err; + struct st_sensor_data *mdata = iio_priv(indio_dev); + +- err = iio_triggered_buffer_predisable(indio_dev); ++ err = st_sensors_set_enable(indio_dev, false); + if (err < 0) + goto st_magn_buffer_predisable_error; + +- err = st_sensors_set_enable(indio_dev, false); ++ err = iio_triggered_buffer_predisable(indio_dev); + + st_magn_buffer_predisable_error: + kfree(mdata->buffer_data); +@@ -75,7 +70,6 @@ st_magn_buffer_predisable_error: + } + + static const struct iio_buffer_setup_ops st_magn_buffer_setup_ops = { +- .preenable = &st_magn_buffer_preenable, + .postenable = &st_magn_buffer_postenable, + .predisable = &st_magn_buffer_predisable, + }; +diff --git a/drivers/iio/orientation/hid-sensor-incl-3d.c b/drivers/iio/orientation/hid-sensor-incl-3d.c +index 1e5451d1ff88..bdc5e4554ee4 100644 +--- a/drivers/iio/orientation/hid-sensor-incl-3d.c ++++ b/drivers/iio/orientation/hid-sensor-incl-3d.c +@@ -111,21 +111,23 @@ static int incl_3d_read_raw(struct iio_dev *indio_dev, + int report_id = -1; + u32 address; + int ret_type; ++ s32 min; + + *val = 0; + *val2 = 0; + switch (mask) { + case IIO_CHAN_INFO_RAW: + hid_sensor_power_state(&incl_state->common_attributes, true); +- report_id = +- incl_state->incl[chan->scan_index].report_id; ++ report_id = incl_state->incl[chan->scan_index].report_id; ++ min = incl_state->incl[chan->scan_index].logical_minimum; + address = incl_3d_addresses[chan->scan_index]; + if (report_id >= 0) + *val = sensor_hub_input_attr_get_raw_value( + incl_state->common_attributes.hsdev, + HID_USAGE_SENSOR_INCLINOMETER_3D, address, + report_id, +- SENSOR_HUB_SYNC); ++ SENSOR_HUB_SYNC, ++ min < 0); + else { + hid_sensor_power_state(&incl_state->common_attributes, + false); +diff --git a/drivers/iio/pressure/hid-sensor-press.c b/drivers/iio/pressure/hid-sensor-press.c +index 4c437918f1d2..d7b1c00ceb4d 100644 +--- a/drivers/iio/pressure/hid-sensor-press.c ++++ b/drivers/iio/pressure/hid-sensor-press.c +@@ -77,6 +77,7 @@ static int press_read_raw(struct iio_dev *indio_dev, + int report_id = -1; + u32 address; + int ret_type; ++ s32 min; + + *val = 0; + *val2 = 0; +@@ -85,8 +86,8 @@ static int press_read_raw(struct iio_dev *indio_dev, + switch (chan->scan_index) { + case CHANNEL_SCAN_INDEX_PRESSURE: + report_id = press_state->press_attr.report_id; +- address = +- HID_USAGE_SENSOR_ATMOSPHERIC_PRESSURE; ++ min = press_state->press_attr.logical_minimum; ++ address = HID_USAGE_SENSOR_ATMOSPHERIC_PRESSURE; + break; + default: + report_id = -1; +@@ -99,7 +100,8 @@ static int press_read_raw(struct iio_dev *indio_dev, + press_state->common_attributes.hsdev, + HID_USAGE_SENSOR_PRESSURE, address, + report_id, +- SENSOR_HUB_SYNC); ++ SENSOR_HUB_SYNC, ++ min < 0); + hid_sensor_power_state(&press_state->common_attributes, + false); + } else { +diff --git a/drivers/iio/temperature/hid-sensor-temperature.c b/drivers/iio/temperature/hid-sensor-temperature.c +index beaf6fd3e337..b592fc4f007e 100644 +--- a/drivers/iio/temperature/hid-sensor-temperature.c ++++ b/drivers/iio/temperature/hid-sensor-temperature.c +@@ -76,7 +76,8 @@ static int temperature_read_raw(struct iio_dev *indio_dev, + HID_USAGE_SENSOR_TEMPERATURE, + HID_USAGE_SENSOR_DATA_ENVIRONMENTAL_TEMPERATURE, + temp_st->temperature_attr.report_id, +- SENSOR_HUB_SYNC); ++ SENSOR_HUB_SYNC, ++ temp_st->temperature_attr.logical_minimum < 0); + hid_sensor_power_state( + &temp_st->common_attributes, + false); +diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c +index c824329f7012..0e4193cb08cf 100644 +--- a/drivers/misc/mic/scif/scif_rma.c ++++ b/drivers/misc/mic/scif/scif_rma.c +@@ -416,7 +416,7 @@ static int scif_create_remote_lookup(struct scif_dev *remote_dev, + if (err) + goto error_window; + err = scif_map_page(&window->num_pages_lookup.lookup[j], +- vmalloc_dma_phys ? ++ vmalloc_num_pages ? + vmalloc_to_page(&window->num_pages[i]) : + virt_to_page(&window->num_pages[i]), + remote_dev); +diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c +index 768f584f8392..88f8a8fa93cd 100644 +--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c ++++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c +@@ -1784,6 +1784,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) + bool if_up = netif_running(nic->netdev); + struct bpf_prog *old_prog; + bool bpf_attached = false; ++ int ret = 0; + + /* For now just support only the usual MTU sized frames */ + if (prog && (dev->mtu > 1500)) { +@@ -1817,8 +1818,12 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) + if (nic->xdp_prog) { + /* Attach BPF program */ + nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1); +- if (!IS_ERR(nic->xdp_prog)) ++ if (!IS_ERR(nic->xdp_prog)) { + bpf_attached = true; ++ } else { ++ ret = PTR_ERR(nic->xdp_prog); ++ nic->xdp_prog = NULL; ++ } + } + + /* Calculate Tx queues needed for XDP and network stack */ +@@ -1830,7 +1835,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) + netif_trans_update(nic->netdev); + } + +- return 0; ++ return ret; + } + + static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp) +diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +index 187a249ff2d1..fcaf18fa3904 100644 +--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c ++++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +@@ -585,10 +585,12 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) + if (!sq->dmem.base) + return; + +- if (sq->tso_hdrs) ++ if (sq->tso_hdrs) { + dma_free_coherent(&nic->pdev->dev, + sq->dmem.q_len * TSO_HEADER_SIZE, + sq->tso_hdrs, sq->tso_hdrs_phys); ++ sq->tso_hdrs = NULL; ++ } + + /* Free pending skbs in the queue */ + smp_rmb(); +diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c +index 1c9ad3630c77..dfd1ad0b1cb9 100644 +--- a/drivers/net/ethernet/cortina/gemini.c ++++ b/drivers/net/ethernet/cortina/gemini.c +@@ -661,7 +661,7 @@ static void gmac_clean_txq(struct net_device *netdev, struct gmac_txq *txq, + + u64_stats_update_begin(&port->tx_stats_syncp); + port->tx_frag_stats[nfrags]++; +- u64_stats_update_end(&port->ir_stats_syncp); ++ u64_stats_update_end(&port->tx_stats_syncp); + } + } + +diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c +index 001b5f714c1b..aaedf1072460 100644 +--- a/drivers/net/ethernet/microchip/lan743x_main.c ++++ b/drivers/net/ethernet/microchip/lan743x_main.c +@@ -1675,7 +1675,7 @@ static int lan743x_tx_napi_poll(struct napi_struct *napi, int weight) + netif_wake_queue(adapter->netdev); + } + +- if (!napi_complete_done(napi, weight)) ++ if (!napi_complete(napi)) + goto done; + + /* enable isr */ +@@ -1684,7 +1684,7 @@ static int lan743x_tx_napi_poll(struct napi_struct *napi, int weight) + lan743x_csr_read(adapter, INT_STS); + + done: +- return weight; ++ return 0; + } + + static void lan743x_tx_ring_cleanup(struct lan743x_tx *tx) +@@ -1873,9 +1873,9 @@ static int lan743x_tx_open(struct lan743x_tx *tx) + tx->vector_flags = lan743x_intr_get_vector_flags(adapter, + INT_BIT_DMA_TX_ + (tx->channel_number)); +- netif_napi_add(adapter->netdev, +- &tx->napi, lan743x_tx_napi_poll, +- tx->ring_size - 1); ++ netif_tx_napi_add(adapter->netdev, ++ &tx->napi, lan743x_tx_napi_poll, ++ tx->ring_size - 1); + napi_enable(&tx->napi); + + data = 0; +@@ -3020,6 +3020,7 @@ static const struct dev_pm_ops lan743x_pm_ops = { + + static const struct pci_device_id lan743x_pcidev_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_SMSC, PCI_DEVICE_ID_SMSC_LAN7430) }, ++ { PCI_DEVICE(PCI_VENDOR_ID_SMSC, PCI_DEVICE_ID_SMSC_LAN7431) }, + { 0, } + }; + +diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h +index 0e82b6368798..2d6eea18973e 100644 +--- a/drivers/net/ethernet/microchip/lan743x_main.h ++++ b/drivers/net/ethernet/microchip/lan743x_main.h +@@ -548,6 +548,7 @@ struct lan743x_adapter; + /* SMSC acquired EFAR late 1990's, MCHP acquired SMSC 2012 */ + #define PCI_VENDOR_ID_SMSC PCI_VENDOR_ID_EFAR + #define PCI_DEVICE_ID_SMSC_LAN7430 (0x7430) ++#define PCI_DEVICE_ID_SMSC_LAN7431 (0x7431) + + #define PCI_CONFIG_LENGTH (0x1000) + +diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c +index 19ab8a7d1e48..733e35b7c4bb 100644 +--- a/drivers/net/phy/phy_device.c ++++ b/drivers/net/phy/phy_device.c +@@ -1930,6 +1930,14 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner) + new_driver->mdiodrv.driver.remove = phy_remove; + new_driver->mdiodrv.driver.owner = owner; + ++ /* The following works around an issue where the PHY driver doesn't bind ++ * to the device, resulting in the genphy driver being used instead of ++ * the dedicated driver. The root cause of the issue isn't known yet ++ * and seems to be in the base driver core. Once this is fixed we may ++ * remove this workaround. ++ */ ++ new_driver->mdiodrv.driver.probe_type = PROBE_FORCE_SYNCHRONOUS; ++ + retval = driver_register(&new_driver->mdiodrv.driver); + if (retval) { + pr_err("%s: Error %d in registering driver\n", +diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c +index e9f101c9bae2..bfbb39f93554 100644 +--- a/drivers/net/rionet.c ++++ b/drivers/net/rionet.c +@@ -216,9 +216,9 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev) + * it just report sending a packet to the target + * (without actual packet transfer). + */ +- dev_kfree_skb_any(skb); + ndev->stats.tx_packets++; + ndev->stats.tx_bytes += skb->len; ++ dev_kfree_skb_any(skb); + } + } + +diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c +index 7275761a1177..3d8a70d3ea9b 100644 +--- a/drivers/net/usb/ipheth.c ++++ b/drivers/net/usb/ipheth.c +@@ -140,7 +140,6 @@ struct ipheth_device { + struct usb_device *udev; + struct usb_interface *intf; + struct net_device *net; +- struct sk_buff *tx_skb; + struct urb *tx_urb; + struct urb *rx_urb; + unsigned char *tx_buf; +@@ -230,6 +229,7 @@ static void ipheth_rcvbulk_callback(struct urb *urb) + case -ENOENT: + case -ECONNRESET: + case -ESHUTDOWN: ++ case -EPROTO: + return; + case 0: + break; +@@ -281,7 +281,6 @@ static void ipheth_sndbulk_callback(struct urb *urb) + dev_err(&dev->intf->dev, "%s: urb status: %d\n", + __func__, status); + +- dev_kfree_skb_irq(dev->tx_skb); + if (status == 0) + netif_wake_queue(dev->net); + else +@@ -423,7 +422,7 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net) + if (skb->len > IPHETH_BUF_SIZE) { + WARN(1, "%s: skb too large: %d bytes\n", __func__, skb->len); + dev->net->stats.tx_dropped++; +- dev_kfree_skb_irq(skb); ++ dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + +@@ -443,12 +442,11 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net) + dev_err(&dev->intf->dev, "%s: usb_submit_urb: %d\n", + __func__, retval); + dev->net->stats.tx_errors++; +- dev_kfree_skb_irq(skb); ++ dev_kfree_skb_any(skb); + } else { +- dev->tx_skb = skb; +- + dev->net->stats.tx_packets++; + dev->net->stats.tx_bytes += skb->len; ++ dev_consume_skb_any(skb); + netif_stop_queue(net); + } + +diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c +index ddfa3f24204c..c2ca6cd3fbe0 100644 +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -70,7 +70,8 @@ static const unsigned long guest_offloads[] = { + VIRTIO_NET_F_GUEST_TSO4, + VIRTIO_NET_F_GUEST_TSO6, + VIRTIO_NET_F_GUEST_ECN, +- VIRTIO_NET_F_GUEST_UFO ++ VIRTIO_NET_F_GUEST_UFO, ++ VIRTIO_NET_F_GUEST_CSUM + }; + + struct virtnet_stat_desc { +@@ -2285,9 +2286,6 @@ static int virtnet_clear_guest_offloads(struct virtnet_info *vi) + if (!vi->guest_offloads) + return 0; + +- if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM)) +- offloads = 1ULL << VIRTIO_NET_F_GUEST_CSUM; +- + return virtnet_set_guest_offloads(vi, offloads); + } + +@@ -2297,8 +2295,6 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi) + + if (!vi->guest_offloads) + return 0; +- if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM)) +- offloads |= 1ULL << VIRTIO_NET_F_GUEST_CSUM; + + return virtnet_set_guest_offloads(vi, offloads); + } +@@ -2316,8 +2312,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, + && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || + virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || + virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || +- virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO))) { +- NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first"); ++ virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || ++ virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) { ++ NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first"); + return -EOPNOTSUPP; + } + +diff --git a/drivers/pci/controller/dwc/pci-layerscape.c b/drivers/pci/controller/dwc/pci-layerscape.c +index 3724d3ef7008..7aa9a82b7ebd 100644 +--- a/drivers/pci/controller/dwc/pci-layerscape.c ++++ b/drivers/pci/controller/dwc/pci-layerscape.c +@@ -88,7 +88,7 @@ static void ls_pcie_disable_outbound_atus(struct ls_pcie *pcie) + int i; + + for (i = 0; i < PCIE_IATU_NUM; i++) +- dw_pcie_disable_atu(pcie->pci, DW_PCIE_REGION_OUTBOUND, i); ++ dw_pcie_disable_atu(pcie->pci, i, DW_PCIE_REGION_OUTBOUND); + } + + static int ls1021_pcie_link_up(struct dw_pcie *pci) +diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c +index 1e7b02221eac..de8635af4cde 100644 +--- a/drivers/pci/controller/dwc/pcie-designware-ep.c ++++ b/drivers/pci/controller/dwc/pcie-designware-ep.c +@@ -440,7 +440,6 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no, + tbl_offset = dw_pcie_readl_dbi(pci, reg); + bir = (tbl_offset & PCI_MSIX_TABLE_BIR); + tbl_offset &= PCI_MSIX_TABLE_OFFSET; +- tbl_offset >>= 3; + + reg = PCI_BASE_ADDRESS_0 + (4 * bir); + bar_addr_upper = 0; +diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c +index 51b6c81671c1..afc4680c584f 100644 +--- a/drivers/pci/pci.c ++++ b/drivers/pci/pci.c +@@ -5473,9 +5473,13 @@ enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev) + u32 lnkcap2, lnkcap; + + /* +- * PCIe r4.0 sec 7.5.3.18 recommends using the Supported Link +- * Speeds Vector in Link Capabilities 2 when supported, falling +- * back to Max Link Speed in Link Capabilities otherwise. ++ * Link Capabilities 2 was added in PCIe r3.0, sec 7.8.18. The ++ * implementation note there recommends using the Supported Link ++ * Speeds Vector in Link Capabilities 2 when supported. ++ * ++ * Without Link Capabilities 2, i.e., prior to PCIe r3.0, software ++ * should use the Supported Link Speeds field in Link Capabilities, ++ * where only 2.5 GT/s and 5.0 GT/s speeds were defined. + */ + pcie_capability_read_dword(dev, PCI_EXP_LNKCAP2, &lnkcap2); + if (lnkcap2) { /* PCIe r3.0-compliant */ +@@ -5491,16 +5495,10 @@ enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev) + } + + pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap); +- if (lnkcap) { +- if (lnkcap & PCI_EXP_LNKCAP_SLS_16_0GB) +- return PCIE_SPEED_16_0GT; +- else if (lnkcap & PCI_EXP_LNKCAP_SLS_8_0GB) +- return PCIE_SPEED_8_0GT; +- else if (lnkcap & PCI_EXP_LNKCAP_SLS_5_0GB) +- return PCIE_SPEED_5_0GT; +- else if (lnkcap & PCI_EXP_LNKCAP_SLS_2_5GB) +- return PCIE_SPEED_2_5GT; +- } ++ if ((lnkcap & PCI_EXP_LNKCAP_SLS) == PCI_EXP_LNKCAP_SLS_5_0GB) ++ return PCIE_SPEED_5_0GT; ++ else if ((lnkcap & PCI_EXP_LNKCAP_SLS) == PCI_EXP_LNKCAP_SLS_2_5GB) ++ return PCIE_SPEED_2_5GT; + + return PCI_SPEED_UNKNOWN; + } +diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c +index 2751dba850c6..3e1abb455472 100644 +--- a/drivers/rtc/rtc-hid-sensor-time.c ++++ b/drivers/rtc/rtc-hid-sensor-time.c +@@ -213,7 +213,7 @@ static int hid_rtc_read_time(struct device *dev, struct rtc_time *tm) + /* get a report with all values through requesting one value */ + sensor_hub_input_attr_get_raw_value(time_state->common_attributes.hsdev, + HID_USAGE_SENSOR_TIME, hid_time_addresses[0], +- time_state->info[0].report_id, SENSOR_HUB_SYNC); ++ time_state->info[0].report_id, SENSOR_HUB_SYNC, false); + /* wait for all values (event) */ + ret = wait_for_completion_killable_timeout( + &time_state->comp_last_time, HZ*6); +diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c +index ffce6f39828a..b03515d43745 100644 +--- a/drivers/s390/net/qeth_core_main.c ++++ b/drivers/s390/net/qeth_core_main.c +@@ -4524,8 +4524,8 @@ static int qeth_snmp_command_cb(struct qeth_card *card, + { + struct qeth_ipa_cmd *cmd; + struct qeth_arp_query_info *qinfo; +- struct qeth_snmp_cmd *snmp; + unsigned char *data; ++ void *snmp_data; + __u16 data_len; + + QETH_CARD_TEXT(card, 3, "snpcmdcb"); +@@ -4533,7 +4533,6 @@ static int qeth_snmp_command_cb(struct qeth_card *card, + cmd = (struct qeth_ipa_cmd *) sdata; + data = (unsigned char *)((char *)cmd - reply->offset); + qinfo = (struct qeth_arp_query_info *) reply->param; +- snmp = &cmd->data.setadapterparms.data.snmp; + + if (cmd->hdr.return_code) { + QETH_CARD_TEXT_(card, 4, "scer1%x", cmd->hdr.return_code); +@@ -4546,10 +4545,15 @@ static int qeth_snmp_command_cb(struct qeth_card *card, + return 0; + } + data_len = *((__u16 *)QETH_IPA_PDU_LEN_PDU1(data)); +- if (cmd->data.setadapterparms.hdr.seq_no == 1) +- data_len -= (__u16)((char *)&snmp->data - (char *)cmd); +- else +- data_len -= (__u16)((char *)&snmp->request - (char *)cmd); ++ if (cmd->data.setadapterparms.hdr.seq_no == 1) { ++ snmp_data = &cmd->data.setadapterparms.data.snmp; ++ data_len -= offsetof(struct qeth_ipa_cmd, ++ data.setadapterparms.data.snmp); ++ } else { ++ snmp_data = &cmd->data.setadapterparms.data.snmp.request; ++ data_len -= offsetof(struct qeth_ipa_cmd, ++ data.setadapterparms.data.snmp.request); ++ } + + /* check if there is enough room in userspace */ + if ((qinfo->udata_len - qinfo->udata_offset) < data_len) { +@@ -4562,16 +4566,9 @@ static int qeth_snmp_command_cb(struct qeth_card *card, + QETH_CARD_TEXT_(card, 4, "sseqn%i", + cmd->data.setadapterparms.hdr.seq_no); + /*copy entries to user buffer*/ +- if (cmd->data.setadapterparms.hdr.seq_no == 1) { +- memcpy(qinfo->udata + qinfo->udata_offset, +- (char *)snmp, +- data_len + offsetof(struct qeth_snmp_cmd, data)); +- qinfo->udata_offset += offsetof(struct qeth_snmp_cmd, data); +- } else { +- memcpy(qinfo->udata + qinfo->udata_offset, +- (char *)&snmp->request, data_len); +- } ++ memcpy(qinfo->udata + qinfo->udata_offset, snmp_data, data_len); + qinfo->udata_offset += data_len; ++ + /* check if all replies received ... */ + QETH_CARD_TEXT_(card, 4, "srtot%i", + cmd->data.setadapterparms.hdr.used_total); +diff --git a/drivers/staging/most/core.c b/drivers/staging/most/core.c +index f4c464625a67..52ad62722996 100644 +--- a/drivers/staging/most/core.c ++++ b/drivers/staging/most/core.c +@@ -351,7 +351,7 @@ static ssize_t set_datatype_show(struct device *dev, + + for (i = 0; i < ARRAY_SIZE(ch_data_type); i++) { + if (c->cfg.data_type & ch_data_type[i].most_ch_data_type) +- return snprintf(buf, PAGE_SIZE, ch_data_type[i].name); ++ return snprintf(buf, PAGE_SIZE, "%s", ch_data_type[i].name); + } + return snprintf(buf, PAGE_SIZE, "unconfigured\n"); + } +diff --git a/drivers/staging/mt7621-dma/mtk-hsdma.c b/drivers/staging/mt7621-dma/mtk-hsdma.c +index df6ebf41bdea..5831f816c17b 100644 +--- a/drivers/staging/mt7621-dma/mtk-hsdma.c ++++ b/drivers/staging/mt7621-dma/mtk-hsdma.c +@@ -335,6 +335,8 @@ static int mtk_hsdma_start_transfer(struct mtk_hsdam_engine *hsdma, + /* tx desc */ + src = sg->src_addr; + for (i = 0; i < chan->desc->num_sgs; i++) { ++ tx_desc = &chan->tx_ring[chan->tx_idx]; ++ + if (len > HSDMA_MAX_PLEN) + tlen = HSDMA_MAX_PLEN; + else +@@ -344,7 +346,6 @@ static int mtk_hsdma_start_transfer(struct mtk_hsdam_engine *hsdma, + tx_desc->addr1 = src; + tx_desc->flags |= HSDMA_DESC_PLEN1(tlen); + } else { +- tx_desc = &chan->tx_ring[chan->tx_idx]; + tx_desc->addr0 = src; + tx_desc->flags = HSDMA_DESC_PLEN0(tlen); + +diff --git a/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c b/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c +index b8566ed898f1..aa98fbb17013 100644 +--- a/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c ++++ b/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c +@@ -82,7 +82,7 @@ static int rt2880_pinctrl_dt_node_to_map(struct pinctrl_dev *pctrldev, + struct property *prop; + const char *function_name, *group_name; + int ret; +- int ngroups; ++ int ngroups = 0; + unsigned int reserved_maps = 0; + + for_each_node_with_property(np_config, "group") +diff --git a/drivers/staging/rtl8723bs/hal/rtl8723bs_recv.c b/drivers/staging/rtl8723bs/hal/rtl8723bs_recv.c +index 85077947b9b8..85aba8a503cd 100644 +--- a/drivers/staging/rtl8723bs/hal/rtl8723bs_recv.c ++++ b/drivers/staging/rtl8723bs/hal/rtl8723bs_recv.c +@@ -109,12 +109,12 @@ static void update_recvframe_phyinfo(union recv_frame *precvframe, + rx_bssid = get_hdr_bssid(wlanhdr); + pkt_info.bssid_match = ((!IsFrameTypeCtrl(wlanhdr)) && + !pattrib->icv_err && !pattrib->crc_err && +- !ether_addr_equal(rx_bssid, my_bssid)); ++ ether_addr_equal(rx_bssid, my_bssid)); + + rx_ra = get_ra(wlanhdr); + my_hwaddr = myid(&padapter->eeprompriv); + pkt_info.to_self = pkt_info.bssid_match && +- !ether_addr_equal(rx_ra, my_hwaddr); ++ ether_addr_equal(rx_ra, my_hwaddr); + + + pkt_info.is_beacon = pkt_info.bssid_match && +diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +index af2234798fa8..db553f2e4c0b 100644 +--- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c ++++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +@@ -1277,7 +1277,7 @@ static int cfg80211_rtw_get_station(struct wiphy *wiphy, + + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS); + sinfo->tx_packets = psta->sta_stats.tx_pkts; +- ++ sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED); + } + + /* for Ad-Hoc/AP mode */ +diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +index bc05c69383b8..fe431302a030 100644 +--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c ++++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +@@ -1787,6 +1787,7 @@ vchiq_compat_ioctl_await_completion(struct file *file, + struct vchiq_await_completion32 args32; + struct vchiq_completion_data32 completion32; + unsigned int *msgbufcount32; ++ unsigned int msgbufcount_native; + compat_uptr_t msgbuf32; + void *msgbuf; + void **msgbufptr; +@@ -1898,7 +1899,11 @@ vchiq_compat_ioctl_await_completion(struct file *file, + sizeof(completion32))) + return -EFAULT; + +- args32.msgbufcount--; ++ if (get_user(msgbufcount_native, &args->msgbufcount)) ++ return -EFAULT; ++ ++ if (!msgbufcount_native) ++ args32.msgbufcount--; + + msgbufcount32 = + &((struct vchiq_await_completion32 __user *)arg)->msgbufcount; +diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c +index f9ff03e6af93..0690fcff0ea2 100644 +--- a/drivers/usb/core/quirks.c ++++ b/drivers/usb/core/quirks.c +@@ -209,6 +209,9 @@ static const struct usb_device_id usb_quirk_list[] = { + /* Microsoft LifeCam-VX700 v2.0 */ + { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME }, + ++ /* Cherry Stream G230 2.0 (G85-231) and 3.0 (G85-232) */ ++ { USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME }, ++ + /* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */ + { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT }, +diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c +index 2de1a3971a26..558949b826d0 100644 +--- a/drivers/usb/dwc3/gadget.c ++++ b/drivers/usb/dwc3/gadget.c +@@ -1461,9 +1461,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol) + unsigned transfer_in_flight; + unsigned started; + +- if (dep->flags & DWC3_EP_STALL) +- return 0; +- + if (dep->number > 1) + trb = dwc3_ep_prev_trb(dep, dep->trb_enqueue); + else +@@ -1485,8 +1482,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol) + else + dep->flags |= DWC3_EP_STALL; + } else { +- if (!(dep->flags & DWC3_EP_STALL)) +- return 0; + + ret = dwc3_send_clear_stall_ep_cmd(dep); + if (ret) +diff --git a/drivers/usb/storage/unusual_realtek.h b/drivers/usb/storage/unusual_realtek.h +index d17cd95b55bb..6b2140f966ef 100644 +--- a/drivers/usb/storage/unusual_realtek.h ++++ b/drivers/usb/storage/unusual_realtek.h +@@ -27,4 +27,14 @@ UNUSUAL_DEV(0x0bda, 0x0159, 0x0000, 0x9999, + "USB Card Reader", + USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0), + ++UNUSUAL_DEV(0x0bda, 0x0177, 0x0000, 0x9999, ++ "Realtek", ++ "USB Card Reader", ++ USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0), ++ ++UNUSUAL_DEV(0x0bda, 0x0184, 0x0000, 0x9999, ++ "Realtek", ++ "USB Card Reader", ++ USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0), ++ + #endif /* defined(CONFIG_USB_STORAGE_REALTEK) || ... */ +diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c +index dc52ce5e495f..834a3f5ef642 100644 +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -477,9 +477,9 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, + int mirror_num = 0; + int failed_mirror = 0; + +- clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); + io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree; + while (1) { ++ clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); + ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE, + mirror_num); + if (!ret) { +@@ -493,15 +493,6 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, + break; + } + +- /* +- * This buffer's crc is fine, but its contents are corrupted, so +- * there is no reason to read the other copies, they won't be +- * any less wrong. +- */ +- if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags) || +- ret == -EUCLEAN) +- break; +- + num_copies = btrfs_num_copies(fs_info, + eb->start, eb->len); + if (num_copies == 1) +diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c +index 7d81cc415264..ca4902c66dc4 100644 +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -2088,6 +2088,30 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) + + atomic_inc(&root->log_batch); + ++ /* ++ * Before we acquired the inode's lock, someone may have dirtied more ++ * pages in the target range. We need to make sure that writeback for ++ * any such pages does not start while we are logging the inode, because ++ * if it does, any of the following might happen when we are not doing a ++ * full inode sync: ++ * ++ * 1) We log an extent after its writeback finishes but before its ++ * checksums are added to the csum tree, leading to -EIO errors ++ * when attempting to read the extent after a log replay. ++ * ++ * 2) We can end up logging an extent before its writeback finishes. ++ * Therefore after the log replay we will have a file extent item ++ * pointing to an unwritten extent (and no data checksums as well). ++ * ++ * So trigger writeback for any eventual new dirty pages and then we ++ * wait for all ordered extents to complete below. ++ */ ++ ret = start_ordered_ops(inode, start, end); ++ if (ret) { ++ inode_unlock(inode); ++ goto out; ++ } ++ + /* + * We have to do this here to avoid the priority inversion of waiting on + * IO of a lower priority task while holding a transaciton open. +diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c +index b070401406be..ff434663d65b 100644 +--- a/fs/btrfs/qgroup.c ++++ b/fs/btrfs/qgroup.c +@@ -2244,7 +2244,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, + int i; + u64 *i_qgroups; + struct btrfs_fs_info *fs_info = trans->fs_info; +- struct btrfs_root *quota_root = fs_info->quota_root; ++ struct btrfs_root *quota_root; + struct btrfs_qgroup *srcgroup; + struct btrfs_qgroup *dstgroup; + u32 level_size = 0; +@@ -2254,6 +2254,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) + goto out; + ++ quota_root = fs_info->quota_root; + if (!quota_root) { + ret = -EINVAL; + goto out; +diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c +index 60bf8dfe7df4..0526b6c473c7 100644 +--- a/fs/btrfs/relocation.c ++++ b/fs/btrfs/relocation.c +@@ -3963,6 +3963,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) + restart: + if (update_backref_cache(trans, &rc->backref_cache)) { + btrfs_end_transaction(trans); ++ trans = NULL; + continue; + } + +diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c +index 6601c9aa5e35..8ad145820ea8 100644 +--- a/fs/btrfs/super.c ++++ b/fs/btrfs/super.c +@@ -2235,6 +2235,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, + vol = memdup_user((void __user *)arg, sizeof(*vol)); + if (IS_ERR(vol)) + return PTR_ERR(vol); ++ vol->name[BTRFS_PATH_NAME_MAX] = '\0'; + + switch (cmd) { + case BTRFS_IOC_SCAN_DEV: +diff --git a/fs/direct-io.c b/fs/direct-io.c +index 093fb54cd316..199146036093 100644 +--- a/fs/direct-io.c ++++ b/fs/direct-io.c +@@ -325,8 +325,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) + */ + dio->iocb->ki_pos += transferred; + +- if (dio->op == REQ_OP_WRITE) +- ret = generic_write_sync(dio->iocb, transferred); ++ if (ret > 0 && dio->op == REQ_OP_WRITE) ++ ret = generic_write_sync(dio->iocb, ret); + dio->iocb->ki_complete(dio->iocb, ret, 0); + } + +diff --git a/fs/ext2/super.c b/fs/ext2/super.c +index 73bd58fa13de..0c38e31ec938 100644 +--- a/fs/ext2/super.c ++++ b/fs/ext2/super.c +@@ -895,6 +895,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) + if (sb->s_magic != EXT2_SUPER_MAGIC) + goto cantfind_ext2; + ++ opts.s_mount_opt = 0; + /* Set defaults before we parse the mount options */ + def_mount_opts = le32_to_cpu(es->s_default_mount_opts); + if (def_mount_opts & EXT2_DEFM_DEBUG) +diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c +index 62d9a659a8ff..dd8f10db82e9 100644 +--- a/fs/ext2/xattr.c ++++ b/fs/ext2/xattr.c +@@ -612,9 +612,9 @@ skip_replace: + } + + cleanup: +- brelse(bh); + if (!(bh && header == HDR(bh))) + kfree(header); ++ brelse(bh); + up_write(&EXT2_I(inode)->xattr_sem); + + return error; +diff --git a/fs/udf/super.c b/fs/udf/super.c +index b997e3116e37..c495db7165ae 100644 +--- a/fs/udf/super.c ++++ b/fs/udf/super.c +@@ -831,16 +831,20 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block) + + + ret = udf_dstrCS0toChar(sb, outstr, 31, pvoldesc->volIdent, 32); +- if (ret < 0) +- goto out_bh; +- +- strncpy(UDF_SB(sb)->s_volume_ident, outstr, ret); ++ if (ret < 0) { ++ strcpy(UDF_SB(sb)->s_volume_ident, "InvalidName"); ++ pr_warn("incorrect volume identification, setting to " ++ "'InvalidName'\n"); ++ } else { ++ strncpy(UDF_SB(sb)->s_volume_ident, outstr, ret); ++ } + udf_debug("volIdent[] = '%s'\n", UDF_SB(sb)->s_volume_ident); + + ret = udf_dstrCS0toChar(sb, outstr, 127, pvoldesc->volSetIdent, 128); +- if (ret < 0) ++ if (ret < 0) { ++ ret = 0; + goto out_bh; +- ++ } + outstr[ret] = 0; + udf_debug("volSetIdent[] = '%s'\n", outstr); + +diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c +index 45234791fec2..5fcfa96463eb 100644 +--- a/fs/udf/unicode.c ++++ b/fs/udf/unicode.c +@@ -351,6 +351,11 @@ try_again: + return u_len; + } + ++/* ++ * Convert CS0 dstring to output charset. Warning: This function may truncate ++ * input string if it is too long as it is used for informational strings only ++ * and it is better to truncate the string than to refuse mounting a media. ++ */ + int udf_dstrCS0toChar(struct super_block *sb, uint8_t *utf_o, int o_len, + const uint8_t *ocu_i, int i_len) + { +@@ -359,9 +364,12 @@ int udf_dstrCS0toChar(struct super_block *sb, uint8_t *utf_o, int o_len, + if (i_len > 0) { + s_len = ocu_i[i_len - 1]; + if (s_len >= i_len) { +- pr_err("incorrect dstring lengths (%d/%d)\n", +- s_len, i_len); +- return -EINVAL; ++ pr_warn("incorrect dstring lengths (%d/%d)," ++ " truncating\n", s_len, i_len); ++ s_len = i_len - 1; ++ /* 2-byte encoding? Need to round properly... */ ++ if (ocu_i[0] == 16) ++ s_len -= (s_len - 1) & 2; + } + } + +diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c +index 356d2b8568c1..cd58939dc977 100644 +--- a/fs/userfaultfd.c ++++ b/fs/userfaultfd.c +@@ -1361,6 +1361,19 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, + ret = -EINVAL; + if (!vma_can_userfault(cur)) + goto out_unlock; ++ ++ /* ++ * UFFDIO_COPY will fill file holes even without ++ * PROT_WRITE. This check enforces that if this is a ++ * MAP_SHARED, the process has write permission to the backing ++ * file. If VM_MAYWRITE is set it also enforces that on a ++ * MAP_SHARED vma: there is no F_WRITE_SEAL and no further ++ * F_WRITE_SEAL can be taken until the vma is destroyed. ++ */ ++ ret = -EPERM; ++ if (unlikely(!(cur->vm_flags & VM_MAYWRITE))) ++ goto out_unlock; ++ + /* + * If this vma contains ending address, and huge pages + * check alignment. +@@ -1406,6 +1419,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, + BUG_ON(!vma_can_userfault(vma)); + BUG_ON(vma->vm_userfaultfd_ctx.ctx && + vma->vm_userfaultfd_ctx.ctx != ctx); ++ WARN_ON(!(vma->vm_flags & VM_MAYWRITE)); + + /* + * Nothing to do: this vma is already registered into this +@@ -1552,6 +1566,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, + cond_resched(); + + BUG_ON(!vma_can_userfault(vma)); ++ WARN_ON(!(vma->vm_flags & VM_MAYWRITE)); + + /* + * Nothing to do: this vma is already registered into this +diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h +index a397907e8d72..dd16e8218db3 100644 +--- a/include/linux/ftrace.h ++++ b/include/linux/ftrace.h +@@ -777,8 +777,8 @@ struct ftrace_ret_stack { + extern void return_to_handler(void); + + extern int +-ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, +- unsigned long frame_pointer, unsigned long *retp); ++function_graph_enter(unsigned long ret, unsigned long func, ++ unsigned long frame_pointer, unsigned long *retp); + + unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, + unsigned long ret, unsigned long *retp); +diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h +index 331dc377c275..dc12f5c4b076 100644 +--- a/include/linux/hid-sensor-hub.h ++++ b/include/linux/hid-sensor-hub.h +@@ -177,6 +177,7 @@ int sensor_hub_input_get_attribute_info(struct hid_sensor_hub_device *hsdev, + * @attr_usage_id: Attribute usage id as per spec + * @report_id: Report id to look for + * @flag: Synchronous or asynchronous read ++* @is_signed: If true then fields < 32 bits will be sign-extended + * + * Issues a synchronous or asynchronous read request for an input attribute. + * Returns data upto 32 bits. +@@ -190,7 +191,8 @@ enum sensor_hub_read_flags { + int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, + u32 usage_id, + u32 attr_usage_id, u32 report_id, +- enum sensor_hub_read_flags flag ++ enum sensor_hub_read_flags flag, ++ bool is_signed + ); + + /** +diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h +index c79e859408e6..fd458389f7d1 100644 +--- a/include/linux/net_dim.h ++++ b/include/linux/net_dim.h +@@ -406,6 +406,8 @@ static inline void net_dim(struct net_dim *dim, + } + /* fall through */ + case NET_DIM_START_MEASURE: ++ net_dim_sample(end_sample.event_ctr, end_sample.pkt_ctr, end_sample.byte_ctr, ++ &dim->start_sample); + dim->state = NET_DIM_MEASURE_IN_PROGRESS; + break; + case NET_DIM_APPLY_NEW_PROFILE: +diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h +index 4f36431c380b..561feb560619 100644 +--- a/include/linux/ptrace.h ++++ b/include/linux/ptrace.h +@@ -62,8 +62,8 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); + #define PTRACE_MODE_READ 0x01 + #define PTRACE_MODE_ATTACH 0x02 + #define PTRACE_MODE_NOAUDIT 0x04 +-#define PTRACE_MODE_FSCREDS 0x08 +-#define PTRACE_MODE_REALCREDS 0x10 ++#define PTRACE_MODE_FSCREDS 0x08 ++#define PTRACE_MODE_REALCREDS 0x10 + + /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */ + #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS) +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 977cb57d7bc9..4abb5bd74b04 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1108,6 +1108,7 @@ struct task_struct { + #ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* Index of current stored address in ret_stack: */ + int curr_ret_stack; ++ int curr_ret_depth; + + /* Stack of return addresses for return function tracing: */ + struct ftrace_ret_stack *ret_stack; +@@ -1439,6 +1440,8 @@ static inline bool is_percpu_thread(void) + #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ + #define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */ + #define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/ ++#define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */ ++#define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */ + + #define TASK_PFA_TEST(name, func) \ + static inline bool task_##func(struct task_struct *p) \ +@@ -1470,6 +1473,13 @@ TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) + TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) + TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) + ++TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable) ++TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable) ++TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable) ++ ++TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) ++TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) ++ + static inline void + current_restore_flags(unsigned long orig_flags, unsigned long flags) + { +diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h +new file mode 100644 +index 000000000000..59d3736c454c +--- /dev/null ++++ b/include/linux/sched/smt.h +@@ -0,0 +1,20 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _LINUX_SCHED_SMT_H ++#define _LINUX_SCHED_SMT_H ++ ++#include <linux/static_key.h> ++ ++#ifdef CONFIG_SCHED_SMT ++extern struct static_key_false sched_smt_present; ++ ++static __always_inline bool sched_smt_active(void) ++{ ++ return static_branch_likely(&sched_smt_present); ++} ++#else ++static inline bool sched_smt_active(void) { return false; } ++#endif ++ ++void arch_smt_update(void); ++ ++#endif +diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h +index 17a13e4785fc..e6ef9cc05e60 100644 +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -1311,6 +1311,22 @@ static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg) + } + } + ++static inline void skb_zcopy_set_nouarg(struct sk_buff *skb, void *val) ++{ ++ skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t) val | 0x1UL); ++ skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG; ++} ++ ++static inline bool skb_zcopy_is_nouarg(struct sk_buff *skb) ++{ ++ return (uintptr_t) skb_shinfo(skb)->destructor_arg & 0x1UL; ++} ++ ++static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb) ++{ ++ return (void *)((uintptr_t) skb_shinfo(skb)->destructor_arg & ~0x1UL); ++} ++ + /* Release a reference on a zerocopy structure */ + static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) + { +@@ -1320,7 +1336,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) + if (uarg->callback == sock_zerocopy_callback) { + uarg->zerocopy = uarg->zerocopy && zerocopy; + sock_zerocopy_put(uarg); +- } else { ++ } else if (!skb_zcopy_is_nouarg(skb)) { + uarg->callback(uarg, zerocopy); + } + +diff --git a/include/linux/tcp.h b/include/linux/tcp.h +index 263e37271afd..d2c8f280e48f 100644 +--- a/include/linux/tcp.h ++++ b/include/linux/tcp.h +@@ -196,6 +196,7 @@ struct tcp_sock { + u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ + u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ + u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */ ++ u32 compressed_ack_rcv_nxt; + + u32 tsoffset; /* timestamp offset */ + +diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h +index c0d7ea0bf5b6..b17201edfa09 100644 +--- a/include/uapi/linux/prctl.h ++++ b/include/uapi/linux/prctl.h +@@ -212,6 +212,7 @@ struct prctl_mm_map { + #define PR_SET_SPECULATION_CTRL 53 + /* Speculation control variants */ + # define PR_SPEC_STORE_BYPASS 0 ++# define PR_SPEC_INDIRECT_BRANCH 1 + /* Return and control values for PR_SET/GET_SPECULATION_CTRL */ + # define PR_SPEC_NOT_AFFECTED 0 + # define PR_SPEC_PRCTL (1UL << 0) +diff --git a/kernel/cpu.c b/kernel/cpu.c +index 0097acec1c71..1699ff68c412 100644 +--- a/kernel/cpu.c ++++ b/kernel/cpu.c +@@ -10,6 +10,7 @@ + #include <linux/sched/signal.h> + #include <linux/sched/hotplug.h> + #include <linux/sched/task.h> ++#include <linux/sched/smt.h> + #include <linux/unistd.h> + #include <linux/cpu.h> + #include <linux/oom.h> +@@ -346,6 +347,12 @@ void cpu_hotplug_enable(void) + EXPORT_SYMBOL_GPL(cpu_hotplug_enable); + #endif /* CONFIG_HOTPLUG_CPU */ + ++/* ++ * Architectures that need SMT-specific errata handling during SMT hotplug ++ * should override this. ++ */ ++void __weak arch_smt_update(void) { } ++ + #ifdef CONFIG_HOTPLUG_SMT + enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; + EXPORT_SYMBOL_GPL(cpu_smt_control); +@@ -982,6 +989,7 @@ out: + * concurrent CPU hotplug via cpu_add_remove_lock. + */ + lockup_detector_cleanup(); ++ arch_smt_update(); + return ret; + } + +@@ -1110,6 +1118,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) + ret = cpuhp_up_callbacks(cpu, st, target); + out: + cpus_write_unlock(); ++ arch_smt_update(); + return ret; + } + +@@ -2052,8 +2061,10 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) + */ + cpuhp_offline_cpu_device(cpu); + } +- if (!ret) ++ if (!ret) { + cpu_smt_control = ctrlval; ++ arch_smt_update(); ++ } + cpu_maps_update_done(); + return ret; + } +@@ -2064,6 +2075,7 @@ static int cpuhp_smt_enable(void) + + cpu_maps_update_begin(); + cpu_smt_control = CPU_SMT_ENABLED; ++ arch_smt_update(); + for_each_present_cpu(cpu) { + /* Skip online CPUs and CPUs on offline nodes */ + if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 35551110d277..2beda4b726e2 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -5741,15 +5741,10 @@ int sched_cpu_activate(unsigned int cpu) + + #ifdef CONFIG_SCHED_SMT + /* +- * The sched_smt_present static key needs to be evaluated on every +- * hotplug event because at boot time SMT might be disabled when +- * the number of booted CPUs is limited. +- * +- * If then later a sibling gets hotplugged, then the key would stay +- * off and SMT scheduling would never be functional. ++ * When going up, increment the number of cores with SMT present. + */ +- if (cpumask_weight(cpu_smt_mask(cpu)) > 1) +- static_branch_enable_cpuslocked(&sched_smt_present); ++ if (cpumask_weight(cpu_smt_mask(cpu)) == 2) ++ static_branch_inc_cpuslocked(&sched_smt_present); + #endif + set_cpu_active(cpu, true); + +@@ -5793,6 +5788,14 @@ int sched_cpu_deactivate(unsigned int cpu) + */ + synchronize_rcu_mult(call_rcu, call_rcu_sched); + ++#ifdef CONFIG_SCHED_SMT ++ /* ++ * When going down, decrement the number of cores with SMT present. ++ */ ++ if (cpumask_weight(cpu_smt_mask(cpu)) == 2) ++ static_branch_dec_cpuslocked(&sched_smt_present); ++#endif ++ + if (!sched_smp_initialized) + return 0; + +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index 9683f458aec7..6c25bbe87bd3 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -23,6 +23,7 @@ + #include <linux/sched/prio.h> + #include <linux/sched/rt.h> + #include <linux/sched/signal.h> ++#include <linux/sched/smt.h> + #include <linux/sched/stat.h> + #include <linux/sched/sysctl.h> + #include <linux/sched/task.h> +@@ -930,9 +931,6 @@ static inline int cpu_of(struct rq *rq) + + + #ifdef CONFIG_SCHED_SMT +- +-extern struct static_key_false sched_smt_present; +- + extern void __update_idle_core(struct rq *rq); + + static inline void update_idle_core(struct rq *rq) +diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c +index f536f601bd46..77734451cb05 100644 +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -817,7 +817,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip, + #ifdef CONFIG_FUNCTION_GRAPH_TRACER + static int profile_graph_entry(struct ftrace_graph_ent *trace) + { +- int index = trace->depth; ++ int index = current->curr_ret_stack; + + function_profile_call(trace->func, 0, NULL, NULL); + +@@ -852,7 +852,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) + if (!fgraph_graph_time) { + int index; + +- index = trace->depth; ++ index = current->curr_ret_stack; + + /* Append this call time to the parent time to subtract */ + if (index) +@@ -6814,6 +6814,7 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) + atomic_set(&t->tracing_graph_pause, 0); + atomic_set(&t->trace_overrun, 0); + t->curr_ret_stack = -1; ++ t->curr_ret_depth = -1; + /* Make sure the tasks see the -1 first: */ + smp_wmb(); + t->ret_stack = ret_stack_list[start++]; +@@ -7038,6 +7039,7 @@ graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack) + void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) + { + t->curr_ret_stack = -1; ++ t->curr_ret_depth = -1; + /* + * The idle task has no parent, it either has its own + * stack or no stack at all. +@@ -7068,6 +7070,7 @@ void ftrace_graph_init_task(struct task_struct *t) + /* Make sure we do not use the parent ret_stack */ + t->ret_stack = NULL; + t->curr_ret_stack = -1; ++ t->curr_ret_depth = -1; + + if (ftrace_graph_active) { + struct ftrace_ret_stack *ret_stack; +diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c +index 169b3c44ee97..2561460d7baf 100644 +--- a/kernel/trace/trace_functions_graph.c ++++ b/kernel/trace/trace_functions_graph.c +@@ -118,8 +118,8 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration, + struct trace_seq *s, u32 flags); + + /* Add a function return address to the trace stack on thread info.*/ +-int +-ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, ++static int ++ftrace_push_return_trace(unsigned long ret, unsigned long func, + unsigned long frame_pointer, unsigned long *retp) + { + unsigned long long calltime; +@@ -177,9 +177,31 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, + #ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR + current->ret_stack[index].retp = retp; + #endif +- *depth = current->curr_ret_stack; ++ return 0; ++} ++ ++int function_graph_enter(unsigned long ret, unsigned long func, ++ unsigned long frame_pointer, unsigned long *retp) ++{ ++ struct ftrace_graph_ent trace; ++ ++ trace.func = func; ++ trace.depth = ++current->curr_ret_depth; ++ ++ if (ftrace_push_return_trace(ret, func, ++ frame_pointer, retp)) ++ goto out; ++ ++ /* Only trace if the calling function expects to */ ++ if (!ftrace_graph_entry(&trace)) ++ goto out_ret; + + return 0; ++ out_ret: ++ current->curr_ret_stack--; ++ out: ++ current->curr_ret_depth--; ++ return -EBUSY; + } + + /* Retrieve a function return address to the trace stack on thread info.*/ +@@ -241,7 +263,13 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, + trace->func = current->ret_stack[index].func; + trace->calltime = current->ret_stack[index].calltime; + trace->overrun = atomic_read(¤t->trace_overrun); +- trace->depth = index; ++ trace->depth = current->curr_ret_depth--; ++ /* ++ * We still want to trace interrupts coming in if ++ * max_depth is set to 1. Make sure the decrement is ++ * seen before ftrace_graph_return. ++ */ ++ barrier(); + } + + /* +@@ -255,6 +283,12 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) + + ftrace_pop_return_trace(&trace, &ret, frame_pointer); + trace.rettime = trace_clock_local(); ++ ftrace_graph_return(&trace); ++ /* ++ * The ftrace_graph_return() may still access the current ++ * ret_stack structure, we need to make sure the update of ++ * curr_ret_stack is after that. ++ */ + barrier(); + current->curr_ret_stack--; + /* +@@ -267,13 +301,6 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) + return ret; + } + +- /* +- * The trace should run after decrementing the ret counter +- * in case an interrupt were to come in. We don't want to +- * lose the interrupt if max_depth is set. +- */ +- ftrace_graph_return(&trace); +- + if (unlikely(!ret)) { + ftrace_graph_stop(); + WARN_ON(1); +diff --git a/lib/test_kmod.c b/lib/test_kmod.c +index e3ddd836491f..d82d022111e0 100644 +--- a/lib/test_kmod.c ++++ b/lib/test_kmod.c +@@ -1214,7 +1214,6 @@ void unregister_test_dev_kmod(struct kmod_test_device *test_dev) + + dev_info(test_dev->dev, "removing interface\n"); + misc_deregister(&test_dev->misc_dev); +- kfree(&test_dev->misc_dev.name); + + mutex_unlock(&test_dev->config_mutex); + mutex_unlock(&test_dev->trigger_mutex); +diff --git a/mm/huge_memory.c b/mm/huge_memory.c +index deed97fba979..15310f14c25e 100644 +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -2322,7 +2322,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, + } + } + +-static void freeze_page(struct page *page) ++static void unmap_page(struct page *page) + { + enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | + TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD; +@@ -2337,7 +2337,7 @@ static void freeze_page(struct page *page) + VM_BUG_ON_PAGE(!unmap_success, page); + } + +-static void unfreeze_page(struct page *page) ++static void remap_page(struct page *page) + { + int i; + if (PageTransHuge(page)) { +@@ -2373,6 +2373,12 @@ static void __split_huge_page_tail(struct page *head, int tail, + (1L << PG_unevictable) | + (1L << PG_dirty))); + ++ /* ->mapping in first tail page is compound_mapcount */ ++ VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, ++ page_tail); ++ page_tail->mapping = head->mapping; ++ page_tail->index = head->index + tail; ++ + /* Page flags must be visible before we make the page non-compound. */ + smp_wmb(); + +@@ -2393,12 +2399,6 @@ static void __split_huge_page_tail(struct page *head, int tail, + if (page_is_idle(head)) + set_page_idle(page_tail); + +- /* ->mapping in first tail page is compound_mapcount */ +- VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, +- page_tail); +- page_tail->mapping = head->mapping; +- +- page_tail->index = head->index + tail; + page_cpupid_xchg_last(page_tail, page_cpupid_last(head)); + + /* +@@ -2410,12 +2410,11 @@ static void __split_huge_page_tail(struct page *head, int tail, + } + + static void __split_huge_page(struct page *page, struct list_head *list, +- unsigned long flags) ++ pgoff_t end, unsigned long flags) + { + struct page *head = compound_head(page); + struct zone *zone = page_zone(head); + struct lruvec *lruvec; +- pgoff_t end = -1; + int i; + + lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat); +@@ -2423,9 +2422,6 @@ static void __split_huge_page(struct page *page, struct list_head *list, + /* complete memcg works before add pages to LRU */ + mem_cgroup_split_huge_fixup(head); + +- if (!PageAnon(page)) +- end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE); +- + for (i = HPAGE_PMD_NR - 1; i >= 1; i--) { + __split_huge_page_tail(head, i, lruvec, list); + /* Some pages can be beyond i_size: drop them from page cache */ +@@ -2454,7 +2450,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, + + spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags); + +- unfreeze_page(head); ++ remap_page(head); + + for (i = 0; i < HPAGE_PMD_NR; i++) { + struct page *subpage = head + i; +@@ -2597,6 +2593,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) + int count, mapcount, extra_pins, ret; + bool mlocked; + unsigned long flags; ++ pgoff_t end; + + VM_BUG_ON_PAGE(is_huge_zero_page(page), page); + VM_BUG_ON_PAGE(!PageLocked(page), page); +@@ -2619,6 +2616,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) + ret = -EBUSY; + goto out; + } ++ end = -1; + mapping = NULL; + anon_vma_lock_write(anon_vma); + } else { +@@ -2632,10 +2630,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) + + anon_vma = NULL; + i_mmap_lock_read(mapping); ++ ++ /* ++ *__split_huge_page() may need to trim off pages beyond EOF: ++ * but on 32-bit, i_size_read() takes an irq-unsafe seqlock, ++ * which cannot be nested inside the page tree lock. So note ++ * end now: i_size itself may be changed at any moment, but ++ * head page lock is good enough to serialize the trimming. ++ */ ++ end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); + } + + /* +- * Racy check if we can split the page, before freeze_page() will ++ * Racy check if we can split the page, before unmap_page() will + * split PMDs + */ + if (!can_split_huge_page(head, &extra_pins)) { +@@ -2644,7 +2651,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) + } + + mlocked = PageMlocked(page); +- freeze_page(head); ++ unmap_page(head); + VM_BUG_ON_PAGE(compound_mapcount(head), head); + + /* Make sure the page is not on per-CPU pagevec as it takes pin */ +@@ -2681,7 +2688,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) + if (mapping) + __dec_node_page_state(page, NR_SHMEM_THPS); + spin_unlock(&pgdata->split_queue_lock); +- __split_huge_page(page, list, flags); ++ __split_huge_page(page, list, end, flags); + if (PageSwapCache(head)) { + swp_entry_t entry = { .val = page_private(head) }; + +@@ -2701,7 +2708,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) + fail: if (mapping) + xa_unlock(&mapping->i_pages); + spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags); +- unfreeze_page(head); ++ remap_page(head); + ret = -EBUSY; + } + +diff --git a/mm/khugepaged.c b/mm/khugepaged.c +index a31d740e6cd1..fde5820be24d 100644 +--- a/mm/khugepaged.c ++++ b/mm/khugepaged.c +@@ -1287,7 +1287,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) + * collapse_shmem - collapse small tmpfs/shmem pages into huge one. + * + * Basic scheme is simple, details are more complex: +- * - allocate and freeze a new huge page; ++ * - allocate and lock a new huge page; + * - scan over radix tree replacing old pages the new one + * + swap in pages if necessary; + * + fill in gaps; +@@ -1295,11 +1295,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) + * - if replacing succeed: + * + copy data over; + * + free old pages; +- * + unfreeze huge page; ++ * + unlock huge page; + * - if replacing failed; + * + put all pages back and unfreeze them; + * + restore gaps in the radix-tree; +- * + free huge page; ++ * + unlock and free huge page; + */ + static void collapse_shmem(struct mm_struct *mm, + struct address_space *mapping, pgoff_t start, +@@ -1330,18 +1330,15 @@ static void collapse_shmem(struct mm_struct *mm, + goto out; + } + ++ __SetPageLocked(new_page); ++ __SetPageSwapBacked(new_page); + new_page->index = start; + new_page->mapping = mapping; +- __SetPageSwapBacked(new_page); +- __SetPageLocked(new_page); +- BUG_ON(!page_ref_freeze(new_page, 1)); +- + + /* +- * At this point the new_page is 'frozen' (page_count() is zero), locked +- * and not up-to-date. It's safe to insert it into radix tree, because +- * nobody would be able to map it or use it in other way until we +- * unfreeze it. ++ * At this point the new_page is locked and not up-to-date. ++ * It's safe to insert it into the page cache, because nobody would ++ * be able to map it or use it in another way until we unlock it. + */ + + index = start; +@@ -1349,19 +1346,29 @@ static void collapse_shmem(struct mm_struct *mm, + radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) { + int n = min(iter.index, end) - index; + ++ /* ++ * Stop if extent has been hole-punched, and is now completely ++ * empty (the more obvious i_size_read() check would take an ++ * irq-unsafe seqlock on 32-bit). ++ */ ++ if (n >= HPAGE_PMD_NR) { ++ result = SCAN_TRUNCATED; ++ goto tree_locked; ++ } ++ + /* + * Handle holes in the radix tree: charge it from shmem and + * insert relevant subpage of new_page into the radix-tree. + */ + if (n && !shmem_charge(mapping->host, n)) { + result = SCAN_FAIL; +- break; ++ goto tree_locked; + } +- nr_none += n; + for (; index < min(iter.index, end); index++) { + radix_tree_insert(&mapping->i_pages, index, + new_page + (index % HPAGE_PMD_NR)); + } ++ nr_none += n; + + /* We are done. */ + if (index >= end) +@@ -1377,12 +1384,12 @@ static void collapse_shmem(struct mm_struct *mm, + result = SCAN_FAIL; + goto tree_unlocked; + } +- xa_lock_irq(&mapping->i_pages); + } else if (trylock_page(page)) { + get_page(page); ++ xa_unlock_irq(&mapping->i_pages); + } else { + result = SCAN_PAGE_LOCK; +- break; ++ goto tree_locked; + } + + /* +@@ -1391,17 +1398,24 @@ static void collapse_shmem(struct mm_struct *mm, + */ + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(!PageUptodate(page), page); +- VM_BUG_ON_PAGE(PageTransCompound(page), page); ++ ++ /* ++ * If file was truncated then extended, or hole-punched, before ++ * we locked the first page, then a THP might be there already. ++ */ ++ if (PageTransCompound(page)) { ++ result = SCAN_PAGE_COMPOUND; ++ goto out_unlock; ++ } + + if (page_mapping(page) != mapping) { + result = SCAN_TRUNCATED; + goto out_unlock; + } +- xa_unlock_irq(&mapping->i_pages); + + if (isolate_lru_page(page)) { + result = SCAN_DEL_PAGE_LRU; +- goto out_isolate_failed; ++ goto out_unlock; + } + + if (page_mapped(page)) +@@ -1422,7 +1436,9 @@ static void collapse_shmem(struct mm_struct *mm, + */ + if (!page_ref_freeze(page, 3)) { + result = SCAN_PAGE_COUNT; +- goto out_lru; ++ xa_unlock_irq(&mapping->i_pages); ++ putback_lru_page(page); ++ goto out_unlock; + } + + /* +@@ -1438,17 +1454,10 @@ static void collapse_shmem(struct mm_struct *mm, + slot = radix_tree_iter_resume(slot, &iter); + index++; + continue; +-out_lru: +- xa_unlock_irq(&mapping->i_pages); +- putback_lru_page(page); +-out_isolate_failed: +- unlock_page(page); +- put_page(page); +- goto tree_unlocked; + out_unlock: + unlock_page(page); + put_page(page); +- break; ++ goto tree_unlocked; + } + + /* +@@ -1456,14 +1465,18 @@ out_unlock: + * This code only triggers if there's nothing in radix tree + * beyond 'end'. + */ +- if (result == SCAN_SUCCEED && index < end) { ++ if (index < end) { + int n = end - index; + ++ /* Stop if extent has been truncated, and is now empty */ ++ if (n >= HPAGE_PMD_NR) { ++ result = SCAN_TRUNCATED; ++ goto tree_locked; ++ } + if (!shmem_charge(mapping->host, n)) { + result = SCAN_FAIL; + goto tree_locked; + } +- + for (; index < end; index++) { + radix_tree_insert(&mapping->i_pages, index, + new_page + (index % HPAGE_PMD_NR)); +@@ -1471,59 +1484,64 @@ out_unlock: + nr_none += n; + } + ++ __inc_node_page_state(new_page, NR_SHMEM_THPS); ++ if (nr_none) { ++ struct zone *zone = page_zone(new_page); ++ ++ __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none); ++ __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none); ++ } ++ + tree_locked: + xa_unlock_irq(&mapping->i_pages); + tree_unlocked: + + if (result == SCAN_SUCCEED) { +- unsigned long flags; +- struct zone *zone = page_zone(new_page); +- + /* + * Replacing old pages with new one has succeed, now we need to + * copy the content and free old pages. + */ ++ index = start; + list_for_each_entry_safe(page, tmp, &pagelist, lru) { ++ while (index < page->index) { ++ clear_highpage(new_page + (index % HPAGE_PMD_NR)); ++ index++; ++ } + copy_highpage(new_page + (page->index % HPAGE_PMD_NR), + page); + list_del(&page->lru); +- unlock_page(page); +- page_ref_unfreeze(page, 1); + page->mapping = NULL; ++ page_ref_unfreeze(page, 1); + ClearPageActive(page); + ClearPageUnevictable(page); ++ unlock_page(page); + put_page(page); ++ index++; + } +- +- local_irq_save(flags); +- __inc_node_page_state(new_page, NR_SHMEM_THPS); +- if (nr_none) { +- __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none); +- __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none); ++ while (index < end) { ++ clear_highpage(new_page + (index % HPAGE_PMD_NR)); ++ index++; + } +- local_irq_restore(flags); + +- /* +- * Remove pte page tables, so we can re-faulti +- * the page as huge. +- */ +- retract_page_tables(mapping, start); +- +- /* Everything is ready, let's unfreeze the new_page */ +- set_page_dirty(new_page); + SetPageUptodate(new_page); +- page_ref_unfreeze(new_page, HPAGE_PMD_NR); ++ page_ref_add(new_page, HPAGE_PMD_NR - 1); ++ set_page_dirty(new_page); + mem_cgroup_commit_charge(new_page, memcg, false, true); + lru_cache_add_anon(new_page); +- unlock_page(new_page); + ++ /* ++ * Remove pte page tables, so we can re-fault the page as huge. ++ */ ++ retract_page_tables(mapping, start); + *hpage = NULL; + + khugepaged_pages_collapsed++; + } else { + /* Something went wrong: rollback changes to the radix-tree */ +- shmem_uncharge(mapping->host, nr_none); + xa_lock_irq(&mapping->i_pages); ++ mapping->nrpages -= nr_none; ++ shmem_uncharge(mapping->host, nr_none); ++ + radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) { + if (iter.index >= end) + break; +@@ -1546,19 +1564,18 @@ tree_unlocked: + radix_tree_replace_slot(&mapping->i_pages, slot, page); + slot = radix_tree_iter_resume(slot, &iter); + xa_unlock_irq(&mapping->i_pages); +- putback_lru_page(page); + unlock_page(page); ++ putback_lru_page(page); + xa_lock_irq(&mapping->i_pages); + } + VM_BUG_ON(nr_none); + xa_unlock_irq(&mapping->i_pages); + +- /* Unfreeze new_page, caller would take care about freeing it */ +- page_ref_unfreeze(new_page, 1); + mem_cgroup_cancel_charge(new_page, memcg, true); +- unlock_page(new_page); + new_page->mapping = NULL; + } ++ ++ unlock_page(new_page); + out: + VM_BUG_ON(!list_empty(&pagelist)); + /* TODO: tracepoints */ +diff --git a/mm/rmap.c b/mm/rmap.c +index 1e79fac3186b..85b7f9423352 100644 +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -1627,16 +1627,9 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, + address + PAGE_SIZE); + } else { + /* +- * We should not need to notify here as we reach this +- * case only from freeze_page() itself only call from +- * split_huge_page_to_list() so everything below must +- * be true: +- * - page is not anonymous +- * - page is locked +- * +- * So as it is a locked file back page thus it can not +- * be remove from the page cache and replace by a new +- * page before mmu_notifier_invalidate_range_end so no ++ * This is a locked file-backed page, thus it cannot ++ * be removed from the page cache and replaced by a new ++ * page before mmu_notifier_invalidate_range_end, so no + * concurrent thread might update its page table to + * point at new page while a device still is using this + * page. +diff --git a/mm/shmem.c b/mm/shmem.c +index 38d228a30fdc..0b02b539072e 100644 +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -297,12 +297,14 @@ bool shmem_charge(struct inode *inode, long pages) + if (!shmem_inode_acct_block(inode, pages)) + return false; + ++ /* nrpages adjustment first, then shmem_recalc_inode() when balanced */ ++ inode->i_mapping->nrpages += pages; ++ + spin_lock_irqsave(&info->lock, flags); + info->alloced += pages; + inode->i_blocks += pages * BLOCKS_PER_PAGE; + shmem_recalc_inode(inode); + spin_unlock_irqrestore(&info->lock, flags); +- inode->i_mapping->nrpages += pages; + + return true; + } +@@ -312,6 +314,8 @@ void shmem_uncharge(struct inode *inode, long pages) + struct shmem_inode_info *info = SHMEM_I(inode); + unsigned long flags; + ++ /* nrpages adjustment done by __delete_from_page_cache() or caller */ ++ + spin_lock_irqsave(&info->lock, flags); + info->alloced -= pages; + inode->i_blocks -= pages * BLOCKS_PER_PAGE; +@@ -1547,11 +1551,13 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, + { + struct page *oldpage, *newpage; + struct address_space *swap_mapping; ++ swp_entry_t entry; + pgoff_t swap_index; + int error; + + oldpage = *pagep; +- swap_index = page_private(oldpage); ++ entry.val = page_private(oldpage); ++ swap_index = swp_offset(entry); + swap_mapping = page_mapping(oldpage); + + /* +@@ -1570,7 +1576,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, + __SetPageLocked(newpage); + __SetPageSwapBacked(newpage); + SetPageUptodate(newpage); +- set_page_private(newpage, swap_index); ++ set_page_private(newpage, entry.val); + SetPageSwapCache(newpage); + + /* +diff --git a/mm/truncate.c b/mm/truncate.c +index 1d2fb2dca96f..71b65aab8077 100644 +--- a/mm/truncate.c ++++ b/mm/truncate.c +@@ -520,9 +520,13 @@ void truncate_inode_pages_final(struct address_space *mapping) + */ + xa_lock_irq(&mapping->i_pages); + xa_unlock_irq(&mapping->i_pages); +- +- truncate_inode_pages(mapping, 0); + } ++ ++ /* ++ * Cleancache needs notification even if there are no pages or shadow ++ * entries. ++ */ ++ truncate_inode_pages(mapping, 0); + } + EXPORT_SYMBOL(truncate_inode_pages_final); + +diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c +index 5029f241908f..f0af11b1cdf3 100644 +--- a/mm/userfaultfd.c ++++ b/mm/userfaultfd.c +@@ -205,8 +205,9 @@ retry: + if (!dst_vma || !is_vm_hugetlb_page(dst_vma)) + goto out_unlock; + /* +- * Only allow __mcopy_atomic_hugetlb on userfaultfd +- * registered ranges. ++ * Check the vma is registered in uffd, this is ++ * required to enforce the VM_MAYWRITE check done at ++ * uffd registration time. + */ + if (!dst_vma->vm_userfaultfd_ctx.ctx) + goto out_unlock; +@@ -449,13 +450,9 @@ retry: + if (!dst_vma) + goto out_unlock; + /* +- * Be strict and only allow __mcopy_atomic on userfaultfd +- * registered ranges to prevent userland errors going +- * unnoticed. As far as the VM consistency is concerned, it +- * would be perfectly safe to remove this check, but there's +- * no useful usage for __mcopy_atomic ouside of userfaultfd +- * registered ranges. This is after all why these are ioctls +- * belonging to the userfaultfd and not syscalls. ++ * Check the vma is registered in uffd, this is required to ++ * enforce the VM_MAYWRITE check done at uffd registration ++ * time. + */ + if (!dst_vma->vm_userfaultfd_ctx.ctx) + goto out_unlock; +diff --git a/net/core/dev.c b/net/core/dev.c +index 097c02101450..22af88c47756 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -5945,11 +5945,14 @@ bool napi_complete_done(struct napi_struct *n, int work_done) + if (work_done) + timeout = n->dev->gro_flush_timeout; + ++ /* When the NAPI instance uses a timeout and keeps postponing ++ * it, we need to bound somehow the time packets are kept in ++ * the GRO layer ++ */ ++ napi_gro_flush(n, !!timeout); + if (timeout) + hrtimer_start(&n->timer, ns_to_ktime(timeout), + HRTIMER_MODE_REL_PINNED); +- else +- napi_gro_flush(n, false); + } + if (unlikely(!list_empty(&n->poll_list))) { + /* If n->poll_list is not empty, we need to mask irqs */ +diff --git a/net/core/skbuff.c b/net/core/skbuff.c +index f817f336595d..abbbd7fd17fe 100644 +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -4912,6 +4912,11 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) + nf_reset(skb); + nf_reset_trace(skb); + ++#ifdef CONFIG_NET_SWITCHDEV ++ skb->offload_fwd_mark = 0; ++ skb->offload_mr_fwd_mark = 0; ++#endif ++ + if (!xnet) + return; + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 72898cbef43d..664fa7d8f7d9 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -4276,7 +4276,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) + * If the sack array is full, forget about the last one. + */ + if (this_sack >= TCP_NUM_SACKS) { +- if (tp->compressed_ack) ++ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH) + tcp_send_ack(sk); + this_sack--; + tp->rx_opt.num_sacks--; +@@ -5196,7 +5196,17 @@ send_now: + if (!tcp_is_sack(tp) || + tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr) + goto send_now; +- tp->compressed_ack++; ++ ++ if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) { ++ tp->compressed_ack_rcv_nxt = tp->rcv_nxt; ++ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH) ++ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED, ++ tp->compressed_ack - TCP_FASTRETRANS_THRESH); ++ tp->compressed_ack = 0; ++ } ++ ++ if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH) ++ goto send_now; + + if (hrtimer_is_queued(&tp->compressed_ack_timer)) + return; +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index 597dbd749f05..68f65ddf9e3c 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -165,10 +165,10 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts, + { + struct tcp_sock *tp = tcp_sk(sk); + +- if (unlikely(tp->compressed_ack)) { ++ if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) { + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED, +- tp->compressed_ack); +- tp->compressed_ack = 0; ++ tp->compressed_ack - TCP_FASTRETRANS_THRESH); ++ tp->compressed_ack = TCP_FASTRETRANS_THRESH; + if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1) + __sock_put(sk); + } +diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c +index 7fdf222a0bdf..57eae8d70ba1 100644 +--- a/net/ipv4/tcp_timer.c ++++ b/net/ipv4/tcp_timer.c +@@ -740,7 +740,7 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer) + + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) { +- if (tp->compressed_ack) ++ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH) + tcp_send_ack(sk); + } else { + if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, +diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c +index d6e94dc7e290..6477b131e809 100644 +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -2394,7 +2394,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb) + void *ph; + __u32 ts; + +- ph = skb_shinfo(skb)->destructor_arg; ++ ph = skb_zcopy_get_nouarg(skb); + packet_dec_pending(&po->tx_ring); + + ts = __packet_set_timestamp(po, ph, skb); +@@ -2461,7 +2461,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, + skb->mark = po->sk.sk_mark; + skb->tstamp = sockc->transmit_time; + sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); +- skb_shinfo(skb)->destructor_arg = ph.raw; ++ skb_zcopy_set_nouarg(skb, ph.raw); + + skb_reserve(skb, hlen); + skb_reset_network_header(skb); +diff --git a/net/tipc/node.c b/net/tipc/node.c +index 2afc4f8c37a7..488019766433 100644 +--- a/net/tipc/node.c ++++ b/net/tipc/node.c +@@ -584,12 +584,15 @@ static void tipc_node_clear_links(struct tipc_node *node) + /* tipc_node_cleanup - delete nodes that does not + * have active links for NODE_CLEANUP_AFTER time + */ +-static int tipc_node_cleanup(struct tipc_node *peer) ++static bool tipc_node_cleanup(struct tipc_node *peer) + { + struct tipc_net *tn = tipc_net(peer->net); + bool deleted = false; + +- spin_lock_bh(&tn->node_list_lock); ++ /* If lock held by tipc_node_stop() the node will be deleted anyway */ ++ if (!spin_trylock_bh(&tn->node_list_lock)) ++ return false; ++ + tipc_node_write_lock(peer); + + if (!node_is_up(peer) && time_after(jiffies, peer->delete_at)) { +diff --git a/scripts/Makefile.build b/scripts/Makefile.build +index 54da4b070db3..64fac0ad32d6 100644 +--- a/scripts/Makefile.build ++++ b/scripts/Makefile.build +@@ -248,10 +248,8 @@ ifdef CONFIG_GCOV_KERNEL + objtool_args += --no-unreachable + endif + ifdef CONFIG_RETPOLINE +-ifneq ($(RETPOLINE_CFLAGS),) + objtool_args += --retpoline + endif +-endif + + + ifdef CONFIG_MODVERSIONS +diff --git a/sound/core/control.c b/sound/core/control.c +index 9aa15bfc7936..649d3217590e 100644 +--- a/sound/core/control.c ++++ b/sound/core/control.c +@@ -348,6 +348,40 @@ static int snd_ctl_find_hole(struct snd_card *card, unsigned int count) + return 0; + } + ++/* add a new kcontrol object; call with card->controls_rwsem locked */ ++static int __snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol) ++{ ++ struct snd_ctl_elem_id id; ++ unsigned int idx; ++ unsigned int count; ++ ++ id = kcontrol->id; ++ if (id.index > UINT_MAX - kcontrol->count) ++ return -EINVAL; ++ ++ if (snd_ctl_find_id(card, &id)) { ++ dev_err(card->dev, ++ "control %i:%i:%i:%s:%i is already present\n", ++ id.iface, id.device, id.subdevice, id.name, id.index); ++ return -EBUSY; ++ } ++ ++ if (snd_ctl_find_hole(card, kcontrol->count) < 0) ++ return -ENOMEM; ++ ++ list_add_tail(&kcontrol->list, &card->controls); ++ card->controls_count += kcontrol->count; ++ kcontrol->id.numid = card->last_numid + 1; ++ card->last_numid += kcontrol->count; ++ ++ id = kcontrol->id; ++ count = kcontrol->count; ++ for (idx = 0; idx < count; idx++, id.index++, id.numid++) ++ snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id); ++ ++ return 0; ++} ++ + /** + * snd_ctl_add - add the control instance to the card + * @card: the card instance +@@ -364,45 +398,18 @@ static int snd_ctl_find_hole(struct snd_card *card, unsigned int count) + */ + int snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol) + { +- struct snd_ctl_elem_id id; +- unsigned int idx; +- unsigned int count; + int err = -EINVAL; + + if (! kcontrol) + return err; + if (snd_BUG_ON(!card || !kcontrol->info)) + goto error; +- id = kcontrol->id; +- if (id.index > UINT_MAX - kcontrol->count) +- goto error; + + down_write(&card->controls_rwsem); +- if (snd_ctl_find_id(card, &id)) { +- up_write(&card->controls_rwsem); +- dev_err(card->dev, "control %i:%i:%i:%s:%i is already present\n", +- id.iface, +- id.device, +- id.subdevice, +- id.name, +- id.index); +- err = -EBUSY; +- goto error; +- } +- if (snd_ctl_find_hole(card, kcontrol->count) < 0) { +- up_write(&card->controls_rwsem); +- err = -ENOMEM; +- goto error; +- } +- list_add_tail(&kcontrol->list, &card->controls); +- card->controls_count += kcontrol->count; +- kcontrol->id.numid = card->last_numid + 1; +- card->last_numid += kcontrol->count; +- id = kcontrol->id; +- count = kcontrol->count; ++ err = __snd_ctl_add(card, kcontrol); + up_write(&card->controls_rwsem); +- for (idx = 0; idx < count; idx++, id.index++, id.numid++) +- snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id); ++ if (err < 0) ++ goto error; + return 0; + + error: +@@ -1361,9 +1368,12 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file, + kctl->tlv.c = snd_ctl_elem_user_tlv; + + /* This function manage to free the instance on failure. */ +- err = snd_ctl_add(card, kctl); +- if (err < 0) +- return err; ++ down_write(&card->controls_rwsem); ++ err = __snd_ctl_add(card, kctl); ++ if (err < 0) { ++ snd_ctl_free_one(kctl); ++ goto unlock; ++ } + offset = snd_ctl_get_ioff(kctl, &info->id); + snd_ctl_build_ioff(&info->id, kctl, offset); + /* +@@ -1374,10 +1384,10 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file, + * which locks the element. + */ + +- down_write(&card->controls_rwsem); + card->user_ctl_count++; +- up_write(&card->controls_rwsem); + ++ unlock: ++ up_write(&card->controls_rwsem); + return 0; + } + +diff --git a/sound/isa/wss/wss_lib.c b/sound/isa/wss/wss_lib.c +index 32453f81b95a..3a5008837576 100644 +--- a/sound/isa/wss/wss_lib.c ++++ b/sound/isa/wss/wss_lib.c +@@ -1531,7 +1531,6 @@ static int snd_wss_playback_open(struct snd_pcm_substream *substream) + if (err < 0) { + if (chip->release_dma) + chip->release_dma(chip, chip->dma_private_data, chip->dma1); +- snd_free_pages(runtime->dma_area, runtime->dma_bytes); + return err; + } + chip->playback_substream = substream; +@@ -1572,7 +1571,6 @@ static int snd_wss_capture_open(struct snd_pcm_substream *substream) + if (err < 0) { + if (chip->release_dma) + chip->release_dma(chip, chip->dma_private_data, chip->dma2); +- snd_free_pages(runtime->dma_area, runtime->dma_bytes); + return err; + } + chip->capture_substream = substream; +diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c +index f4459d1a9d67..27b468f057dd 100644 +--- a/sound/pci/ac97/ac97_codec.c ++++ b/sound/pci/ac97/ac97_codec.c +@@ -824,7 +824,7 @@ static int snd_ac97_put_spsa(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_ + { + struct snd_ac97 *ac97 = snd_kcontrol_chip(kcontrol); + int reg = kcontrol->private_value & 0xff; +- int shift = (kcontrol->private_value >> 8) & 0xff; ++ int shift = (kcontrol->private_value >> 8) & 0x0f; + int mask = (kcontrol->private_value >> 16) & 0xff; + // int invert = (kcontrol->private_value >> 24) & 0xff; + unsigned short value, old, new; +diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c +index 625cb6c7b7d6..5810be2c6c34 100644 +--- a/sound/pci/hda/hda_intel.c ++++ b/sound/pci/hda/hda_intel.c +@@ -2256,6 +2256,8 @@ static struct snd_pci_quirk power_save_blacklist[] = { + /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ + SND_PCI_QUIRK(0x1849, 0xc892, "Asrock B85M-ITX", 0), + /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ ++ SND_PCI_QUIRK(0x1849, 0x0397, "Asrock N68C-S UCC", 0), ++ /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ + SND_PCI_QUIRK(0x1849, 0x7662, "Asrock H81M-HDS", 0), + /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ + SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0), +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index e58537e13ad3..cf5d26642bcd 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -388,6 +388,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) + case 0x10ec0285: + case 0x10ec0298: + case 0x10ec0289: ++ case 0x10ec0300: + alc_update_coef_idx(codec, 0x10, 1<<9, 0); + break; + case 0x10ec0275: +@@ -2830,6 +2831,7 @@ enum { + ALC269_TYPE_ALC215, + ALC269_TYPE_ALC225, + ALC269_TYPE_ALC294, ++ ALC269_TYPE_ALC300, + ALC269_TYPE_ALC700, + }; + +@@ -2864,6 +2866,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec) + case ALC269_TYPE_ALC215: + case ALC269_TYPE_ALC225: + case ALC269_TYPE_ALC294: ++ case ALC269_TYPE_ALC300: + case ALC269_TYPE_ALC700: + ssids = alc269_ssids; + break; +@@ -5358,6 +5361,16 @@ static void alc274_fixup_bind_dacs(struct hda_codec *codec, + spec->gen.preferred_dacs = preferred_pairs; + } + ++/* The DAC of NID 0x3 will introduce click/pop noise on headphones, so invalidate it */ ++static void alc285_fixup_invalidate_dacs(struct hda_codec *codec, ++ const struct hda_fixup *fix, int action) ++{ ++ if (action != HDA_FIXUP_ACT_PRE_PROBE) ++ return; ++ ++ snd_hda_override_wcaps(codec, 0x03, 0); ++} ++ + /* for hda_fixup_thinkpad_acpi() */ + #include "thinkpad_helper.c" + +@@ -5495,6 +5508,8 @@ enum { + ALC255_FIXUP_DELL_HEADSET_MIC, + ALC295_FIXUP_HP_X360, + ALC221_FIXUP_HP_HEADSET_MIC, ++ ALC285_FIXUP_LENOVO_HEADPHONE_NOISE, ++ ALC295_FIXUP_HP_AUTO_MUTE, + }; + + static const struct hda_fixup alc269_fixups[] = { +@@ -5659,6 +5674,8 @@ static const struct hda_fixup alc269_fixups[] = { + [ALC269_FIXUP_HP_MUTE_LED_MIC3] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_hp_mute_led_mic3, ++ .chained = true, ++ .chain_id = ALC295_FIXUP_HP_AUTO_MUTE + }, + [ALC269_FIXUP_HP_GPIO_LED] = { + .type = HDA_FIXUP_FUNC, +@@ -6362,6 +6379,14 @@ static const struct hda_fixup alc269_fixups[] = { + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MIC + }, ++ [ALC285_FIXUP_LENOVO_HEADPHONE_NOISE] = { ++ .type = HDA_FIXUP_FUNC, ++ .v.func = alc285_fixup_invalidate_dacs, ++ }, ++ [ALC295_FIXUP_HP_AUTO_MUTE] = { ++ .type = HDA_FIXUP_FUNC, ++ .v.func = alc_fixup_auto_mute_via_amp, ++ }, + }; + + static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -6532,6 +6557,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8), + SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), ++ SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS), + SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), + SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE), +@@ -7034,6 +7060,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { + {0x12, 0x90a60130}, + {0x19, 0x03a11020}, + {0x21, 0x0321101f}), ++ SND_HDA_PIN_QUIRK(0x10ec0285, 0x17aa, "Lenovo", ALC285_FIXUP_LENOVO_HEADPHONE_NOISE, ++ {0x12, 0x90a60130}, ++ {0x14, 0x90170110}, ++ {0x19, 0x04a11040}, ++ {0x21, 0x04211020}), + SND_HDA_PIN_QUIRK(0x10ec0288, 0x1028, "Dell", ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60120}, + {0x14, 0x90170110}, +@@ -7295,6 +7326,10 @@ static int patch_alc269(struct hda_codec *codec) + spec->gen.mixer_nid = 0; /* ALC2x4 does not have any loopback mixer path */ + alc_update_coef_idx(codec, 0x6b, 0x0018, (1<<4) | (1<<3)); /* UAJ MIC Vref control by verb */ + break; ++ case 0x10ec0300: ++ spec->codec_variant = ALC269_TYPE_ALC300; ++ spec->gen.mixer_nid = 0; /* no loopback on ALC300 */ ++ break; + case 0x10ec0700: + case 0x10ec0701: + case 0x10ec0703: +@@ -8404,6 +8439,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { + HDA_CODEC_ENTRY(0x10ec0295, "ALC295", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0298, "ALC298", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0299, "ALC299", patch_alc269), ++ HDA_CODEC_ENTRY(0x10ec0300, "ALC300", patch_alc269), + HDA_CODEC_REV_ENTRY(0x10ec0861, 0x100340, "ALC660", patch_alc861), + HDA_CODEC_ENTRY(0x10ec0660, "ALC660-VD", patch_alc861vd), + HDA_CODEC_ENTRY(0x10ec0861, "ALC861", patch_alc861), +diff --git a/sound/soc/codecs/pcm186x.h b/sound/soc/codecs/pcm186x.h +index 2c6ba55bf394..bb3f0c42a1cd 100644 +--- a/sound/soc/codecs/pcm186x.h ++++ b/sound/soc/codecs/pcm186x.h +@@ -139,7 +139,7 @@ enum pcm186x_type { + #define PCM186X_MAX_REGISTER PCM186X_CURR_TRIM_CTRL + + /* PCM186X_PAGE */ +-#define PCM186X_RESET 0xff ++#define PCM186X_RESET 0xfe + + /* PCM186X_ADCX_INPUT_SEL_X */ + #define PCM186X_ADC_INPUT_SEL_POL BIT(7) +diff --git a/sound/soc/intel/boards/cht_bsw_max98090_ti.c b/sound/soc/intel/boards/cht_bsw_max98090_ti.c +index db6976f4ddaa..9d9f6e41d81c 100644 +--- a/sound/soc/intel/boards/cht_bsw_max98090_ti.c ++++ b/sound/soc/intel/boards/cht_bsw_max98090_ti.c +@@ -19,6 +19,7 @@ + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + ++#include <linux/dmi.h> + #include <linux/module.h> + #include <linux/platform_device.h> + #include <linux/slab.h> +@@ -35,6 +36,8 @@ + #define CHT_PLAT_CLK_3_HZ 19200000 + #define CHT_CODEC_DAI "HiFi" + ++#define QUIRK_PMC_PLT_CLK_0 0x01 ++ + struct cht_mc_private { + struct clk *mclk; + struct snd_soc_jack jack; +@@ -385,11 +388,29 @@ static struct snd_soc_card snd_soc_card_cht = { + .num_controls = ARRAY_SIZE(cht_mc_controls), + }; + ++static const struct dmi_system_id cht_max98090_quirk_table[] = { ++ { ++ /* Swanky model Chromebook (Toshiba Chromebook 2) */ ++ .matches = { ++ DMI_MATCH(DMI_PRODUCT_NAME, "Swanky"), ++ }, ++ .driver_data = (void *)QUIRK_PMC_PLT_CLK_0, ++ }, ++ {} ++}; ++ + static int snd_cht_mc_probe(struct platform_device *pdev) + { ++ const struct dmi_system_id *dmi_id; + struct device *dev = &pdev->dev; + int ret_val = 0; + struct cht_mc_private *drv; ++ const char *mclk_name; ++ int quirks = 0; ++ ++ dmi_id = dmi_first_match(cht_max98090_quirk_table); ++ if (dmi_id) ++ quirks = (unsigned long)dmi_id->driver_data; + + drv = devm_kzalloc(&pdev->dev, sizeof(*drv), GFP_KERNEL); + if (!drv) +@@ -411,11 +432,16 @@ static int snd_cht_mc_probe(struct platform_device *pdev) + snd_soc_card_cht.dev = &pdev->dev; + snd_soc_card_set_drvdata(&snd_soc_card_cht, drv); + +- drv->mclk = devm_clk_get(&pdev->dev, "pmc_plt_clk_3"); ++ if (quirks & QUIRK_PMC_PLT_CLK_0) ++ mclk_name = "pmc_plt_clk_0"; ++ else ++ mclk_name = "pmc_plt_clk_3"; ++ ++ drv->mclk = devm_clk_get(&pdev->dev, mclk_name); + if (IS_ERR(drv->mclk)) { + dev_err(&pdev->dev, +- "Failed to get MCLK from pmc_plt_clk_3: %ld\n", +- PTR_ERR(drv->mclk)); ++ "Failed to get MCLK from %s: %ld\n", ++ mclk_name, PTR_ERR(drv->mclk)); + return PTR_ERR(drv->mclk); + } + +diff --git a/sound/sparc/cs4231.c b/sound/sparc/cs4231.c +index e73c962590eb..079063d8038d 100644 +--- a/sound/sparc/cs4231.c ++++ b/sound/sparc/cs4231.c +@@ -1146,10 +1146,8 @@ static int snd_cs4231_playback_open(struct snd_pcm_substream *substream) + runtime->hw = snd_cs4231_playback; + + err = snd_cs4231_open(chip, CS4231_MODE_PLAY); +- if (err < 0) { +- snd_free_pages(runtime->dma_area, runtime->dma_bytes); ++ if (err < 0) + return err; +- } + chip->playback_substream = substream; + chip->p_periods_sent = 0; + snd_pcm_set_sync(substream); +@@ -1167,10 +1165,8 @@ static int snd_cs4231_capture_open(struct snd_pcm_substream *substream) + runtime->hw = snd_cs4231_capture; + + err = snd_cs4231_open(chip, CS4231_MODE_RECORD); +- if (err < 0) { +- snd_free_pages(runtime->dma_area, runtime->dma_bytes); ++ if (err < 0) + return err; +- } + chip->capture_substream = substream; + chip->c_periods_sent = 0; + snd_pcm_set_sync(substream); +diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h +index c0d7ea0bf5b6..b17201edfa09 100644 +--- a/tools/include/uapi/linux/prctl.h ++++ b/tools/include/uapi/linux/prctl.h +@@ -212,6 +212,7 @@ struct prctl_mm_map { + #define PR_SET_SPECULATION_CTRL 53 + /* Speculation control variants */ + # define PR_SPEC_STORE_BYPASS 0 ++# define PR_SPEC_INDIRECT_BRANCH 1 + /* Return and control values for PR_SET/GET_SPECULATION_CTRL */ + # define PR_SPEC_NOT_AFFECTED 0 + # define PR_SPEC_PRCTL (1UL << 0) |