diff options
author | Mike Pagano <mpagano@gentoo.org> | 2021-06-30 10:21:41 -0400 |
---|---|---|
committer | Mike Pagano <mpagano@gentoo.org> | 2021-06-30 10:21:41 -0400 |
commit | fcca3bfe3f19c1750be503fe947954344ceecfbd (patch) | |
tree | fe42af070b11cb94a0ac6ceb57baec0955030d0e | |
parent | Linux patch 5.12.13 (diff) | |
download | linux-patches-fcca3bfe3f19c1750be503fe947954344ceecfbd.tar.gz linux-patches-fcca3bfe3f19c1750be503fe947954344ceecfbd.tar.bz2 linux-patches-fcca3bfe3f19c1750be503fe947954344ceecfbd.zip |
Linux patch 5.12.145.12-17
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
-rw-r--r-- | 0000_README | 4 | ||||
-rw-r--r-- | 1013_linux-5.12.14.patch | 4590 |
2 files changed, 4594 insertions, 0 deletions
diff --git a/0000_README b/0000_README index 34c90d1a..96f1ac7b 100644 --- a/0000_README +++ b/0000_README @@ -95,6 +95,10 @@ Patch: 1012_linux-5.12.13.patch From: http://www.kernel.org Desc: Linux 5.12.13 +Patch: 1013_linux-5.12.14.patch +From: http://www.kernel.org +Desc: Linux 5.12.14 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. diff --git a/1013_linux-5.12.14.patch b/1013_linux-5.12.14.patch new file mode 100644 index 00000000..6e18a21d --- /dev/null +++ b/1013_linux-5.12.14.patch @@ -0,0 +1,4590 @@ +diff --git a/Makefile b/Makefile +index d2fe36db78aed..433f164f9ee0f 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + # SPDX-License-Identifier: GPL-2.0 + VERSION = 5 + PATCHLEVEL = 12 +-SUBLEVEL = 13 ++SUBLEVEL = 14 + EXTRAVERSION = + NAME = Frozen Wasteland + +diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c +index 1a5edf562e85e..73ca7797b92f6 100644 +--- a/arch/arm/kernel/setup.c ++++ b/arch/arm/kernel/setup.c +@@ -545,9 +545,11 @@ void notrace cpu_init(void) + * In Thumb-2, msr with an immediate value is not allowed. + */ + #ifdef CONFIG_THUMB2_KERNEL +-#define PLC "r" ++#define PLC_l "l" ++#define PLC_r "r" + #else +-#define PLC "I" ++#define PLC_l "I" ++#define PLC_r "I" + #endif + + /* +@@ -569,15 +571,15 @@ void notrace cpu_init(void) + "msr cpsr_c, %9" + : + : "r" (stk), +- PLC (PSR_F_BIT | PSR_I_BIT | IRQ_MODE), ++ PLC_r (PSR_F_BIT | PSR_I_BIT | IRQ_MODE), + "I" (offsetof(struct stack, irq[0])), +- PLC (PSR_F_BIT | PSR_I_BIT | ABT_MODE), ++ PLC_r (PSR_F_BIT | PSR_I_BIT | ABT_MODE), + "I" (offsetof(struct stack, abt[0])), +- PLC (PSR_F_BIT | PSR_I_BIT | UND_MODE), ++ PLC_r (PSR_F_BIT | PSR_I_BIT | UND_MODE), + "I" (offsetof(struct stack, und[0])), +- PLC (PSR_F_BIT | PSR_I_BIT | FIQ_MODE), ++ PLC_r (PSR_F_BIT | PSR_I_BIT | FIQ_MODE), + "I" (offsetof(struct stack, fiq[0])), +- PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE) ++ PLC_l (PSR_F_BIT | PSR_I_BIT | SVC_MODE) + : "r14"); + #endif + } +diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile +index 5243bf2327c02..a5ee34117321d 100644 +--- a/arch/riscv/Makefile ++++ b/arch/riscv/Makefile +@@ -16,7 +16,7 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y) + CC_FLAGS_FTRACE := -fpatchable-function-entry=8 + endif + +-ifeq ($(CONFIG_64BIT)$(CONFIG_CMODEL_MEDLOW),yy) ++ifeq ($(CONFIG_CMODEL_MEDLOW),y) + KBUILD_CFLAGS_MODULE += -mcmodel=medany + endif + +diff --git a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi +index eeb4f8c3e0e72..d0d206cdb9990 100644 +--- a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi ++++ b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi +@@ -272,7 +272,7 @@ + cache-size = <2097152>; + cache-unified; + interrupt-parent = <&plic0>; +- interrupts = <19 20 21 22>; ++ interrupts = <19 21 22 20>; + reg = <0x0 0x2010000 0x0 0x1000>; + }; + gpio: gpio@10060000 { +diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h +index 2b543163d90a0..76c6034428be8 100644 +--- a/arch/s390/include/asm/stacktrace.h ++++ b/arch/s390/include/asm/stacktrace.h +@@ -91,12 +91,16 @@ struct stack_frame { + CALL_ARGS_4(arg1, arg2, arg3, arg4); \ + register unsigned long r4 asm("6") = (unsigned long)(arg5) + +-#define CALL_FMT_0 "=&d" (r2) : +-#define CALL_FMT_1 "+&d" (r2) : +-#define CALL_FMT_2 CALL_FMT_1 "d" (r3), +-#define CALL_FMT_3 CALL_FMT_2 "d" (r4), +-#define CALL_FMT_4 CALL_FMT_3 "d" (r5), +-#define CALL_FMT_5 CALL_FMT_4 "d" (r6), ++/* ++ * To keep this simple mark register 2-6 as being changed (volatile) ++ * by the called function, even though register 6 is saved/nonvolatile. ++ */ ++#define CALL_FMT_0 "=&d" (r2) ++#define CALL_FMT_1 "+&d" (r2) ++#define CALL_FMT_2 CALL_FMT_1, "+&d" (r3) ++#define CALL_FMT_3 CALL_FMT_2, "+&d" (r4) ++#define CALL_FMT_4 CALL_FMT_3, "+&d" (r5) ++#define CALL_FMT_5 CALL_FMT_4, "+&d" (r6) + + #define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory" + #define CALL_CLOBBER_4 CALL_CLOBBER_5 +@@ -118,7 +122,7 @@ struct stack_frame { + " brasl 14,%[_fn]\n" \ + " la 15,0(%[_prev])\n" \ + : [_prev] "=&a" (prev), CALL_FMT_##nr \ +- [_stack] "R" (stack), \ ++ : [_stack] "R" (stack), \ + [_bc] "i" (offsetof(struct stack_frame, back_chain)), \ + [_frame] "d" (frame), \ + [_fn] "X" (fn) : CALL_CLOBBER_##nr); \ +diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S +index 9cc71ca9a88f9..e84f495e7eb29 100644 +--- a/arch/s390/kernel/entry.S ++++ b/arch/s390/kernel/entry.S +@@ -418,6 +418,7 @@ ENTRY(\name) + xgr %r6,%r6 + xgr %r7,%r7 + xgr %r10,%r10 ++ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC + stmg %r8,%r9,__PT_PSW(%r11) + tm %r8,0x0001 # coming from user space? +diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c +index 90163e6184f5c..080e7aed181f4 100644 +--- a/arch/s390/kernel/signal.c ++++ b/arch/s390/kernel/signal.c +@@ -512,7 +512,6 @@ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) + + /* No handlers present - check for system call restart */ + clear_pt_regs_flag(regs, PIF_SYSCALL); +- clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); + if (current->thread.system_call) { + regs->int_code = current->thread.system_call; + switch (regs->gprs[2]) { +diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c +index bfcc327acc6b2..26aa2614ee352 100644 +--- a/arch/s390/kernel/topology.c ++++ b/arch/s390/kernel/topology.c +@@ -66,7 +66,10 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c + { + static cpumask_t mask; + +- cpumask_copy(&mask, cpumask_of(cpu)); ++ cpumask_clear(&mask); ++ if (!cpu_online(cpu)) ++ goto out; ++ cpumask_set_cpu(cpu, &mask); + switch (topology_mode) { + case TOPOLOGY_MODE_HW: + while (info) { +@@ -83,10 +86,10 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c + default: + fallthrough; + case TOPOLOGY_MODE_SINGLE: +- cpumask_copy(&mask, cpumask_of(cpu)); + break; + } + cpumask_and(&mask, &mask, cpu_online_mask); ++out: + cpumask_copy(dst, &mask); + } + +@@ -95,7 +98,10 @@ static void cpu_thread_map(cpumask_t *dst, unsigned int cpu) + static cpumask_t mask; + int i; + +- cpumask_copy(&mask, cpumask_of(cpu)); ++ cpumask_clear(&mask); ++ if (!cpu_online(cpu)) ++ goto out; ++ cpumask_set_cpu(cpu, &mask); + if (topology_mode != TOPOLOGY_MODE_HW) + goto out; + cpu -= cpu % (smp_cpu_mtid + 1); +diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c +index 4efd39aacb9f2..8767dc53b5699 100644 +--- a/arch/x86/entry/common.c ++++ b/arch/x86/entry/common.c +@@ -127,8 +127,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) + /* User code screwed up. */ + regs->ax = -EFAULT; + +- instrumentation_end(); + local_irq_disable(); ++ instrumentation_end(); + irqentry_exit_to_user_mode(regs); + return false; + } +@@ -266,15 +266,16 @@ __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs) + irqentry_state_t state = irqentry_enter(regs); + bool inhcall; + ++ instrumentation_begin(); + run_sysvec_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs); + + inhcall = get_and_clear_inhcall(); + if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) { +- instrumentation_begin(); + irqentry_exit_cond_resched(); + instrumentation_end(); + restore_inhcall(inhcall); + } else { ++ instrumentation_end(); + irqentry_exit(regs, state); + } + } +diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c +index 18df171296955..7050a9ebd73f1 100644 +--- a/arch/x86/events/core.c ++++ b/arch/x86/events/core.c +@@ -45,9 +45,11 @@ + #include "perf_event.h" + + struct x86_pmu x86_pmu __read_mostly; ++static struct pmu pmu; + + DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { + .enabled = 1, ++ .pmu = &pmu, + }; + + DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key); +@@ -380,10 +382,12 @@ int x86_reserve_hardware(void) + if (!atomic_inc_not_zero(&pmc_refcount)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&pmc_refcount) == 0) { +- if (!reserve_pmc_hardware()) ++ if (!reserve_pmc_hardware()) { + err = -EBUSY; +- else ++ } else { + reserve_ds_buffers(); ++ reserve_lbr_buffers(); ++ } + } + if (!err) + atomic_inc(&pmc_refcount); +@@ -724,16 +728,23 @@ void x86_pmu_enable_all(int added) + } + } + +-static struct pmu pmu; +- + static inline int is_x86_event(struct perf_event *event) + { + return event->pmu == &pmu; + } + +-struct pmu *x86_get_pmu(void) ++struct pmu *x86_get_pmu(unsigned int cpu) + { +- return &pmu; ++ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); ++ ++ /* ++ * All CPUs of the hybrid type have been offline. ++ * The x86_get_pmu() should not be invoked. ++ */ ++ if (WARN_ON_ONCE(!cpuc->pmu)) ++ return &pmu; ++ ++ return cpuc->pmu; + } + /* + * Event scheduler state: +diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c +index 4c18e7fb58f58..77fe4fece6798 100644 +--- a/arch/x86/events/intel/core.c ++++ b/arch/x86/events/intel/core.c +@@ -4879,7 +4879,7 @@ static void update_tfa_sched(void *ignored) + * and if so force schedule out for all event types all contexts + */ + if (test_bit(3, cpuc->active_mask)) +- perf_pmu_resched(x86_get_pmu()); ++ perf_pmu_resched(x86_get_pmu(smp_processor_id())); + } + + static ssize_t show_sysctl_tfa(struct device *cdev, +diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c +index d32b302719fe5..72df2f392c863 100644 +--- a/arch/x86/events/intel/ds.c ++++ b/arch/x86/events/intel/ds.c +@@ -2192,7 +2192,7 @@ void __init intel_ds_init(void) + PERF_SAMPLE_TIME; + x86_pmu.flags |= PMU_FL_PEBS_ALL; + pebs_qual = "-baseline"; +- x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; ++ x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; + } else { + /* Only basic record supported */ + x86_pmu.large_pebs_flags &= +@@ -2207,7 +2207,7 @@ void __init intel_ds_init(void) + + if (x86_pmu.intel_cap.pebs_output_pt_available) { + pr_cont("PEBS-via-PT, "); +- x86_get_pmu()->capabilities |= PERF_PMU_CAP_AUX_OUTPUT; ++ x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT; + } + + break; +diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c +index 21890dacfcfee..c9cd6ce0fa2ad 100644 +--- a/arch/x86/events/intel/lbr.c ++++ b/arch/x86/events/intel/lbr.c +@@ -658,7 +658,6 @@ static inline bool branch_user_callstack(unsigned br_sel) + + void intel_pmu_lbr_add(struct perf_event *event) + { +- struct kmem_cache *kmem_cache = event->pmu->task_ctx_cache; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (!x86_pmu.lbr_nr) +@@ -696,16 +695,11 @@ void intel_pmu_lbr_add(struct perf_event *event) + perf_sched_cb_inc(event->ctx->pmu); + if (!cpuc->lbr_users++ && !event->total_time_running) + intel_pmu_lbr_reset(); +- +- if (static_cpu_has(X86_FEATURE_ARCH_LBR) && +- kmem_cache && !cpuc->lbr_xsave && +- (cpuc->lbr_users != cpuc->lbr_pebs_users)) +- cpuc->lbr_xsave = kmem_cache_alloc(kmem_cache, GFP_KERNEL); + } + + void release_lbr_buffers(void) + { +- struct kmem_cache *kmem_cache = x86_get_pmu()->task_ctx_cache; ++ struct kmem_cache *kmem_cache; + struct cpu_hw_events *cpuc; + int cpu; + +@@ -714,6 +708,7 @@ void release_lbr_buffers(void) + + for_each_possible_cpu(cpu) { + cpuc = per_cpu_ptr(&cpu_hw_events, cpu); ++ kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; + if (kmem_cache && cpuc->lbr_xsave) { + kmem_cache_free(kmem_cache, cpuc->lbr_xsave); + cpuc->lbr_xsave = NULL; +@@ -721,6 +716,27 @@ void release_lbr_buffers(void) + } + } + ++void reserve_lbr_buffers(void) ++{ ++ struct kmem_cache *kmem_cache; ++ struct cpu_hw_events *cpuc; ++ int cpu; ++ ++ if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) ++ return; ++ ++ for_each_possible_cpu(cpu) { ++ cpuc = per_cpu_ptr(&cpu_hw_events, cpu); ++ kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; ++ if (!kmem_cache || cpuc->lbr_xsave) ++ continue; ++ ++ cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, ++ GFP_KERNEL | __GFP_ZERO, ++ cpu_to_node(cpu)); ++ } ++} ++ + void intel_pmu_lbr_del(struct perf_event *event) + { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); +@@ -1609,7 +1625,7 @@ void intel_pmu_lbr_init_hsw(void) + x86_pmu.lbr_sel_mask = LBR_SEL_MASK; + x86_pmu.lbr_sel_map = hsw_lbr_sel_map; + +- x86_get_pmu()->task_ctx_cache = create_lbr_kmem_cache(size, 0); ++ x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); + + if (lbr_from_signext_quirk_needed()) + static_branch_enable(&lbr_from_quirk_key); +@@ -1629,7 +1645,7 @@ __init void intel_pmu_lbr_init_skl(void) + x86_pmu.lbr_sel_mask = LBR_SEL_MASK; + x86_pmu.lbr_sel_map = hsw_lbr_sel_map; + +- x86_get_pmu()->task_ctx_cache = create_lbr_kmem_cache(size, 0); ++ x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); + + /* + * SW branch filter usage: +@@ -1726,7 +1742,7 @@ static bool is_arch_lbr_xsave_available(void) + + void __init intel_pmu_arch_lbr_init(void) + { +- struct pmu *pmu = x86_get_pmu(); ++ struct pmu *pmu = x86_get_pmu(smp_processor_id()); + union cpuid28_eax eax; + union cpuid28_ebx ebx; + union cpuid28_ecx ecx; +diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h +index 53b2b5fc23bca..35cdece5644fb 100644 +--- a/arch/x86/events/perf_event.h ++++ b/arch/x86/events/perf_event.h +@@ -327,6 +327,8 @@ struct cpu_hw_events { + int n_pair; /* Large increment events */ + + void *kfree_on_online[X86_PERF_KFREE_MAX]; ++ ++ struct pmu *pmu; + }; + + #define __EVENT_CONSTRAINT_RANGE(c, e, n, m, w, o, f) { \ +@@ -905,7 +907,7 @@ static struct perf_pmu_events_ht_attr event_attr_##v = { \ + .event_str_ht = ht, \ + } + +-struct pmu *x86_get_pmu(void); ++struct pmu *x86_get_pmu(unsigned int cpu); + extern struct x86_pmu x86_pmu __read_mostly; + + static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx) +@@ -1135,6 +1137,8 @@ void reserve_ds_buffers(void); + + void release_lbr_buffers(void); + ++void reserve_lbr_buffers(void); ++ + extern struct event_constraint bts_constraint; + extern struct event_constraint vlbr_constraint; + +@@ -1282,6 +1286,10 @@ static inline void release_lbr_buffers(void) + { + } + ++static inline void reserve_lbr_buffers(void) ++{ ++} ++ + static inline int intel_pmu_init(void) + { + return 0; +diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h +index fdee23ea4e173..16bf4d4a8159e 100644 +--- a/arch/x86/include/asm/fpu/internal.h ++++ b/arch/x86/include/asm/fpu/internal.h +@@ -204,6 +204,14 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) + asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); + } + ++static inline void fxsave(struct fxregs_state *fx) ++{ ++ if (IS_ENABLED(CONFIG_X86_32)) ++ asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx)); ++ else ++ asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx)); ++} ++ + /* These macros all use (%edi)/(%rdi) as the single memory argument. */ + #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" + #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" +@@ -268,28 +276,6 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) + : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ + : "memory") + +-/* +- * This function is called only during boot time when x86 caps are not set +- * up and alternative can not be used yet. +- */ +-static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) +-{ +- u64 mask = xfeatures_mask_all; +- u32 lmask = mask; +- u32 hmask = mask >> 32; +- int err; +- +- WARN_ON(system_state != SYSTEM_BOOTING); +- +- if (boot_cpu_has(X86_FEATURE_XSAVES)) +- XSTATE_OP(XSAVES, xstate, lmask, hmask, err); +- else +- XSTATE_OP(XSAVE, xstate, lmask, hmask, err); +- +- /* We should never fault when copying to a kernel buffer: */ +- WARN_ON_FPU(err); +-} +- + /* + * This function is called only during boot time when x86 caps are not set + * up and alternative can not be used yet. +diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c +index ec3ae30547920..b7b92cdf3add4 100644 +--- a/arch/x86/kernel/fpu/signal.c ++++ b/arch/x86/kernel/fpu/signal.c +@@ -221,28 +221,18 @@ sanitize_restored_user_xstate(union fpregs_state *state, + + if (use_xsave()) { + /* +- * Note: we don't need to zero the reserved bits in the +- * xstate_header here because we either didn't copy them at all, +- * or we checked earlier that they aren't set. ++ * Clear all feature bits which are not set in ++ * user_xfeatures and clear all extended features ++ * for fx_only mode. + */ ++ u64 mask = fx_only ? XFEATURE_MASK_FPSSE : user_xfeatures; + + /* +- * 'user_xfeatures' might have bits clear which are +- * set in header->xfeatures. This represents features that +- * were in init state prior to a signal delivery, and need +- * to be reset back to the init state. Clear any user +- * feature bits which are set in the kernel buffer to get +- * them back to the init state. +- * +- * Supervisor state is unchanged by input from userspace. +- * Ensure supervisor state bits stay set and supervisor +- * state is not modified. ++ * Supervisor state has to be preserved. The sigframe ++ * restore can only modify user features, i.e. @mask ++ * cannot contain them. + */ +- if (fx_only) +- header->xfeatures = XFEATURE_MASK_FPSSE; +- else +- header->xfeatures &= user_xfeatures | +- xfeatures_mask_supervisor(); ++ header->xfeatures &= mask | xfeatures_mask_supervisor(); + } + + if (use_fxsr()) { +diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c +index 2ad57cc14b83f..451435d7ff413 100644 +--- a/arch/x86/kernel/fpu/xstate.c ++++ b/arch/x86/kernel/fpu/xstate.c +@@ -440,6 +440,25 @@ static void __init print_xstate_offset_size(void) + } + } + ++/* ++ * All supported features have either init state all zeros or are ++ * handled in setup_init_fpu() individually. This is an explicit ++ * feature list and does not use XFEATURE_MASK*SUPPORTED to catch ++ * newly added supported features at build time and make people ++ * actually look at the init state for the new feature. ++ */ ++#define XFEATURES_INIT_FPSTATE_HANDLED \ ++ (XFEATURE_MASK_FP | \ ++ XFEATURE_MASK_SSE | \ ++ XFEATURE_MASK_YMM | \ ++ XFEATURE_MASK_OPMASK | \ ++ XFEATURE_MASK_ZMM_Hi256 | \ ++ XFEATURE_MASK_Hi16_ZMM | \ ++ XFEATURE_MASK_PKRU | \ ++ XFEATURE_MASK_BNDREGS | \ ++ XFEATURE_MASK_BNDCSR | \ ++ XFEATURE_MASK_PASID) ++ + /* + * setup the xstate image representing the init state + */ +@@ -447,6 +466,10 @@ static void __init setup_init_fpu_buf(void) + { + static int on_boot_cpu __initdata = 1; + ++ BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED | ++ XFEATURE_MASK_SUPERVISOR_SUPPORTED) != ++ XFEATURES_INIT_FPSTATE_HANDLED); ++ + WARN_ON_FPU(!on_boot_cpu); + on_boot_cpu = 0; + +@@ -466,10 +489,22 @@ static void __init setup_init_fpu_buf(void) + copy_kernel_to_xregs_booting(&init_fpstate.xsave); + + /* +- * Dump the init state again. This is to identify the init state +- * of any feature which is not represented by all zero's. ++ * All components are now in init state. Read the state back so ++ * that init_fpstate contains all non-zero init state. This only ++ * works with XSAVE, but not with XSAVEOPT and XSAVES because ++ * those use the init optimization which skips writing data for ++ * components in init state. ++ * ++ * XSAVE could be used, but that would require to reshuffle the ++ * data when XSAVES is available because XSAVES uses xstate ++ * compaction. But doing so is a pointless exercise because most ++ * components have an all zeros init state except for the legacy ++ * ones (FP and SSE). Those can be saved with FXSAVE into the ++ * legacy area. Adding new features requires to ensure that init ++ * state is all zeroes or if not to add the necessary handling ++ * here. + */ +- copy_xregs_to_kernel_booting(&init_fpstate.xsave); ++ fxsave(&init_fpstate.fxsave); + } + + static int xfeature_uncompacted_offset(int xfeature_nr) +diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c +index dbc6214d69def..8f3b438f6fd3b 100644 +--- a/arch/x86/kvm/svm/sev.c ++++ b/arch/x86/kvm/svm/sev.c +@@ -143,9 +143,25 @@ static void sev_asid_free(int asid) + mutex_unlock(&sev_bitmap_lock); + } + +-static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) ++static void sev_decommission(unsigned int handle) + { + struct sev_data_decommission *decommission; ++ ++ if (!handle) ++ return; ++ ++ decommission = kzalloc(sizeof(*decommission), GFP_KERNEL); ++ if (!decommission) ++ return; ++ ++ decommission->handle = handle; ++ sev_guest_decommission(decommission, NULL); ++ ++ kfree(decommission); ++} ++ ++static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) ++{ + struct sev_data_deactivate *data; + + if (!handle) +@@ -165,15 +181,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) + + kfree(data); + +- decommission = kzalloc(sizeof(*decommission), GFP_KERNEL); +- if (!decommission) +- return; +- +- /* decommission handle */ +- decommission->handle = handle; +- sev_guest_decommission(decommission, NULL); +- +- kfree(decommission); ++ sev_decommission(handle); + } + + static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) +@@ -303,8 +311,10 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) + + /* Bind ASID to this guest */ + ret = sev_bind_asid(kvm, start->handle, error); +- if (ret) ++ if (ret) { ++ sev_decommission(start->handle); + goto e_free_session; ++ } + + /* return handle to userspace */ + params.handle = start->handle; +diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c +index 0a0e168be1cbe..9b0e771302cee 100644 +--- a/arch/x86/pci/fixup.c ++++ b/arch/x86/pci/fixup.c +@@ -779,4 +779,48 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar); + DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar); + DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar); + ++#define RS690_LOWER_TOP_OF_DRAM2 0x30 ++#define RS690_LOWER_TOP_OF_DRAM2_VALID 0x1 ++#define RS690_UPPER_TOP_OF_DRAM2 0x31 ++#define RS690_HTIU_NB_INDEX 0xA8 ++#define RS690_HTIU_NB_INDEX_WR_ENABLE 0x100 ++#define RS690_HTIU_NB_DATA 0xAC ++ ++/* ++ * Some BIOS implementations support RAM above 4GB, but do not configure the ++ * PCI host to respond to bus master accesses for these addresses. These ++ * implementations set the TOP_OF_DRAM_SLOT1 register correctly, so PCI DMA ++ * works as expected for addresses below 4GB. ++ * ++ * Reference: "AMD RS690 ASIC Family Register Reference Guide" (pg. 2-57) ++ * https://www.amd.com/system/files/TechDocs/43372_rs690_rrg_3.00o.pdf ++ */ ++static void rs690_fix_64bit_dma(struct pci_dev *pdev) ++{ ++ u32 val = 0; ++ phys_addr_t top_of_dram = __pa(high_memory - 1) + 1; ++ ++ if (top_of_dram <= (1ULL << 32)) ++ return; ++ ++ pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, ++ RS690_LOWER_TOP_OF_DRAM2); ++ pci_read_config_dword(pdev, RS690_HTIU_NB_DATA, &val); ++ ++ if (val) ++ return; ++ ++ pci_info(pdev, "Adjusting top of DRAM to %pa for 64-bit DMA support\n", &top_of_dram); ++ ++ pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, ++ RS690_UPPER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE); ++ pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, top_of_dram >> 32); ++ ++ pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, ++ RS690_LOWER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE); ++ pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, ++ top_of_dram | RS690_LOWER_TOP_OF_DRAM2_VALID); ++} ++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma); ++ + #endif +diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c +index 8183ddb3700c4..64db5852432e7 100644 +--- a/arch/x86/xen/enlighten_pv.c ++++ b/arch/x86/xen/enlighten_pv.c +@@ -592,8 +592,10 @@ DEFINE_IDTENTRY_RAW(xenpv_exc_debug) + DEFINE_IDTENTRY_RAW(exc_xen_unknown_trap) + { + /* This should never happen and there is no way to handle it. */ ++ instrumentation_begin(); + pr_err("Unknown trap in Xen PV mode."); + BUG(); ++ instrumentation_end(); + } + + #ifdef CONFIG_X86_MCE +diff --git a/certs/Kconfig b/certs/Kconfig +index c94e93d8bccf0..ab88d2a7f3c7f 100644 +--- a/certs/Kconfig ++++ b/certs/Kconfig +@@ -83,4 +83,21 @@ config SYSTEM_BLACKLIST_HASH_LIST + wrapper to incorporate the list into the kernel. Each <hash> should + be a string of hex digits. + ++config SYSTEM_REVOCATION_LIST ++ bool "Provide system-wide ring of revocation certificates" ++ depends on SYSTEM_BLACKLIST_KEYRING ++ depends on PKCS7_MESSAGE_PARSER=y ++ help ++ If set, this allows revocation certificates to be stored in the ++ blacklist keyring and implements a hook whereby a PKCS#7 message can ++ be checked to see if it matches such a certificate. ++ ++config SYSTEM_REVOCATION_KEYS ++ string "X.509 certificates to be preloaded into the system blacklist keyring" ++ depends on SYSTEM_REVOCATION_LIST ++ help ++ If set, this option should be the filename of a PEM-formatted file ++ containing X.509 certificates to be included in the default blacklist ++ keyring. ++ + endmenu +diff --git a/certs/Makefile b/certs/Makefile +index f4c25b67aad90..b6db52ebf0beb 100644 +--- a/certs/Makefile ++++ b/certs/Makefile +@@ -3,8 +3,9 @@ + # Makefile for the linux kernel signature checking certificates. + # + +-obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o +-obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o ++obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o common.o ++obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o common.o ++obj-$(CONFIG_SYSTEM_REVOCATION_LIST) += revocation_certificates.o + ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),"") + obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_hashes.o + else +@@ -29,7 +30,7 @@ $(obj)/x509_certificate_list: scripts/extract-cert $(SYSTEM_TRUSTED_KEYS_SRCPREF + $(call if_changed,extract_certs,$(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_TRUSTED_KEYS)) + endif # CONFIG_SYSTEM_TRUSTED_KEYRING + +-clean-files := x509_certificate_list .x509.list ++clean-files := x509_certificate_list .x509.list x509_revocation_list + + ifeq ($(CONFIG_MODULE_SIG),y) + ############################################################################### +@@ -104,3 +105,17 @@ targets += signing_key.x509 + $(obj)/signing_key.x509: scripts/extract-cert $(X509_DEP) FORCE + $(call if_changed,extract_certs,$(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY)) + endif # CONFIG_MODULE_SIG ++ ++ifeq ($(CONFIG_SYSTEM_REVOCATION_LIST),y) ++ ++$(eval $(call config_filename,SYSTEM_REVOCATION_KEYS)) ++ ++$(obj)/revocation_certificates.o: $(obj)/x509_revocation_list ++ ++quiet_cmd_extract_certs = EXTRACT_CERTS $(patsubst "%",%,$(2)) ++ cmd_extract_certs = scripts/extract-cert $(2) $@ ++ ++targets += x509_revocation_list ++$(obj)/x509_revocation_list: scripts/extract-cert $(SYSTEM_REVOCATION_KEYS_SRCPREFIX)$(SYSTEM_REVOCATION_KEYS_FILENAME) FORCE ++ $(call if_changed,extract_certs,$(SYSTEM_REVOCATION_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_REVOCATION_KEYS)) ++endif +diff --git a/certs/blacklist.c b/certs/blacklist.c +index bffe4c6f4a9e2..c9a435b15af40 100644 +--- a/certs/blacklist.c ++++ b/certs/blacklist.c +@@ -17,9 +17,15 @@ + #include <linux/uidgid.h> + #include <keys/system_keyring.h> + #include "blacklist.h" ++#include "common.h" + + static struct key *blacklist_keyring; + ++#ifdef CONFIG_SYSTEM_REVOCATION_LIST ++extern __initconst const u8 revocation_certificate_list[]; ++extern __initconst const unsigned long revocation_certificate_list_size; ++#endif ++ + /* + * The description must be a type prefix, a colon and then an even number of + * hex digits. The hash is kept in the description. +@@ -145,6 +151,49 @@ int is_binary_blacklisted(const u8 *hash, size_t hash_len) + } + EXPORT_SYMBOL_GPL(is_binary_blacklisted); + ++#ifdef CONFIG_SYSTEM_REVOCATION_LIST ++/** ++ * add_key_to_revocation_list - Add a revocation certificate to the blacklist ++ * @data: The data blob containing the certificate ++ * @size: The size of data blob ++ */ ++int add_key_to_revocation_list(const char *data, size_t size) ++{ ++ key_ref_t key; ++ ++ key = key_create_or_update(make_key_ref(blacklist_keyring, true), ++ "asymmetric", ++ NULL, ++ data, ++ size, ++ ((KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW), ++ KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_BUILT_IN); ++ ++ if (IS_ERR(key)) { ++ pr_err("Problem with revocation key (%ld)\n", PTR_ERR(key)); ++ return PTR_ERR(key); ++ } ++ ++ return 0; ++} ++ ++/** ++ * is_key_on_revocation_list - Determine if the key for a PKCS#7 message is revoked ++ * @pkcs7: The PKCS#7 message to check ++ */ ++int is_key_on_revocation_list(struct pkcs7_message *pkcs7) ++{ ++ int ret; ++ ++ ret = pkcs7_validate_trust(pkcs7, blacklist_keyring); ++ ++ if (ret == 0) ++ return -EKEYREJECTED; ++ ++ return -ENOKEY; ++} ++#endif ++ + /* + * Initialise the blacklist + */ +@@ -177,3 +226,18 @@ static int __init blacklist_init(void) + * Must be initialised before we try and load the keys into the keyring. + */ + device_initcall(blacklist_init); ++ ++#ifdef CONFIG_SYSTEM_REVOCATION_LIST ++/* ++ * Load the compiled-in list of revocation X.509 certificates. ++ */ ++static __init int load_revocation_certificate_list(void) ++{ ++ if (revocation_certificate_list_size) ++ pr_notice("Loading compiled-in revocation X.509 certificates\n"); ++ ++ return load_certificate_list(revocation_certificate_list, revocation_certificate_list_size, ++ blacklist_keyring); ++} ++late_initcall(load_revocation_certificate_list); ++#endif +diff --git a/certs/blacklist.h b/certs/blacklist.h +index 1efd6fa0dc608..51b320cf85749 100644 +--- a/certs/blacklist.h ++++ b/certs/blacklist.h +@@ -1,3 +1,5 @@ + #include <linux/kernel.h> ++#include <linux/errno.h> ++#include <crypto/pkcs7.h> + + extern const char __initconst *const blacklist_hashes[]; +diff --git a/certs/common.c b/certs/common.c +new file mode 100644 +index 0000000000000..16a220887a53e +--- /dev/null ++++ b/certs/common.c +@@ -0,0 +1,57 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later ++ ++#include <linux/kernel.h> ++#include <linux/key.h> ++#include "common.h" ++ ++int load_certificate_list(const u8 cert_list[], ++ const unsigned long list_size, ++ const struct key *keyring) ++{ ++ key_ref_t key; ++ const u8 *p, *end; ++ size_t plen; ++ ++ p = cert_list; ++ end = p + list_size; ++ while (p < end) { ++ /* Each cert begins with an ASN.1 SEQUENCE tag and must be more ++ * than 256 bytes in size. ++ */ ++ if (end - p < 4) ++ goto dodgy_cert; ++ if (p[0] != 0x30 && ++ p[1] != 0x82) ++ goto dodgy_cert; ++ plen = (p[2] << 8) | p[3]; ++ plen += 4; ++ if (plen > end - p) ++ goto dodgy_cert; ++ ++ key = key_create_or_update(make_key_ref(keyring, 1), ++ "asymmetric", ++ NULL, ++ p, ++ plen, ++ ((KEY_POS_ALL & ~KEY_POS_SETATTR) | ++ KEY_USR_VIEW | KEY_USR_READ), ++ KEY_ALLOC_NOT_IN_QUOTA | ++ KEY_ALLOC_BUILT_IN | ++ KEY_ALLOC_BYPASS_RESTRICTION); ++ if (IS_ERR(key)) { ++ pr_err("Problem loading in-kernel X.509 certificate (%ld)\n", ++ PTR_ERR(key)); ++ } else { ++ pr_notice("Loaded X.509 cert '%s'\n", ++ key_ref_to_ptr(key)->description); ++ key_ref_put(key); ++ } ++ p += plen; ++ } ++ ++ return 0; ++ ++dodgy_cert: ++ pr_err("Problem parsing in-kernel X.509 certificate list\n"); ++ return 0; ++} +diff --git a/certs/common.h b/certs/common.h +new file mode 100644 +index 0000000000000..abdb5795936b7 +--- /dev/null ++++ b/certs/common.h +@@ -0,0 +1,9 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#ifndef _CERT_COMMON_H ++#define _CERT_COMMON_H ++ ++int load_certificate_list(const u8 cert_list[], const unsigned long list_size, ++ const struct key *keyring); ++ ++#endif +diff --git a/certs/revocation_certificates.S b/certs/revocation_certificates.S +new file mode 100644 +index 0000000000000..f21aae8a8f0ef +--- /dev/null ++++ b/certs/revocation_certificates.S +@@ -0,0 +1,21 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#include <linux/export.h> ++#include <linux/init.h> ++ ++ __INITRODATA ++ ++ .align 8 ++ .globl revocation_certificate_list ++revocation_certificate_list: ++__revocation_list_start: ++ .incbin "certs/x509_revocation_list" ++__revocation_list_end: ++ ++ .align 8 ++ .globl revocation_certificate_list_size ++revocation_certificate_list_size: ++#ifdef CONFIG_64BIT ++ .quad __revocation_list_end - __revocation_list_start ++#else ++ .long __revocation_list_end - __revocation_list_start ++#endif +diff --git a/certs/system_keyring.c b/certs/system_keyring.c +index 4b693da488f14..0c9a4795e847b 100644 +--- a/certs/system_keyring.c ++++ b/certs/system_keyring.c +@@ -16,6 +16,7 @@ + #include <keys/asymmetric-type.h> + #include <keys/system_keyring.h> + #include <crypto/pkcs7.h> ++#include "common.h" + + static struct key *builtin_trusted_keys; + #ifdef CONFIG_SECONDARY_TRUSTED_KEYRING +@@ -137,54 +138,10 @@ device_initcall(system_trusted_keyring_init); + */ + static __init int load_system_certificate_list(void) + { +- key_ref_t key; +- const u8 *p, *end; +- size_t plen; +- + pr_notice("Loading compiled-in X.509 certificates\n"); + +- p = system_certificate_list; +- end = p + system_certificate_list_size; +- while (p < end) { +- /* Each cert begins with an ASN.1 SEQUENCE tag and must be more +- * than 256 bytes in size. +- */ +- if (end - p < 4) +- goto dodgy_cert; +- if (p[0] != 0x30 && +- p[1] != 0x82) +- goto dodgy_cert; +- plen = (p[2] << 8) | p[3]; +- plen += 4; +- if (plen > end - p) +- goto dodgy_cert; +- +- key = key_create_or_update(make_key_ref(builtin_trusted_keys, 1), +- "asymmetric", +- NULL, +- p, +- plen, +- ((KEY_POS_ALL & ~KEY_POS_SETATTR) | +- KEY_USR_VIEW | KEY_USR_READ), +- KEY_ALLOC_NOT_IN_QUOTA | +- KEY_ALLOC_BUILT_IN | +- KEY_ALLOC_BYPASS_RESTRICTION); +- if (IS_ERR(key)) { +- pr_err("Problem loading in-kernel X.509 certificate (%ld)\n", +- PTR_ERR(key)); +- } else { +- pr_notice("Loaded X.509 cert '%s'\n", +- key_ref_to_ptr(key)->description); +- key_ref_put(key); +- } +- p += plen; +- } +- +- return 0; +- +-dodgy_cert: +- pr_err("Problem parsing in-kernel X.509 certificate list\n"); +- return 0; ++ return load_certificate_list(system_certificate_list, system_certificate_list_size, ++ builtin_trusted_keys); + } + late_initcall(load_system_certificate_list); + +@@ -242,6 +199,12 @@ int verify_pkcs7_message_sig(const void *data, size_t len, + pr_devel("PKCS#7 platform keyring is not available\n"); + goto error; + } ++ ++ ret = is_key_on_revocation_list(pkcs7); ++ if (ret != -ENOKEY) { ++ pr_devel("PKCS#7 platform key is on revocation list\n"); ++ goto error; ++ } + } + ret = pkcs7_validate_trust(pkcs7, trusted_keys); + if (ret < 0) { +diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c +index 88310ac9ce906..62c536f9d9258 100644 +--- a/drivers/base/swnode.c ++++ b/drivers/base/swnode.c +@@ -1032,7 +1032,15 @@ int device_add_software_node(struct device *dev, const struct software_node *nod + } + + set_secondary_fwnode(dev, &swnode->fwnode); +- software_node_notify(dev, KOBJ_ADD); ++ ++ /* ++ * If the device has been fully registered by the time this function is ++ * called, software_node_notify() must be called separately so that the ++ * symlinks get created and the reference count of the node is kept in ++ * balance. ++ */ ++ if (device_is_registered(dev)) ++ software_node_notify(dev, KOBJ_ADD); + + return 0; + } +@@ -1052,7 +1060,8 @@ void device_remove_software_node(struct device *dev) + if (!swnode) + return; + +- software_node_notify(dev, KOBJ_REMOVE); ++ if (device_is_registered(dev)) ++ software_node_notify(dev, KOBJ_REMOVE); + set_secondary_fwnode(dev, NULL); + kobject_put(&swnode->kobj); + } +@@ -1106,8 +1115,7 @@ int software_node_notify(struct device *dev, unsigned long action) + + switch (action) { + case KOBJ_ADD: +- ret = sysfs_create_link_nowarn(&dev->kobj, &swnode->kobj, +- "software_node"); ++ ret = sysfs_create_link(&dev->kobj, &swnode->kobj, "software_node"); + if (ret) + break; + +diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig +index 03b1b03349477..c42b17b76640e 100644 +--- a/drivers/dma/Kconfig ++++ b/drivers/dma/Kconfig +@@ -690,6 +690,7 @@ config XILINX_ZYNQMP_DMA + + config XILINX_ZYNQMP_DPDMA + tristate "Xilinx DPDMA Engine" ++ depends on HAS_IOMEM && OF + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help +diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c +index 1d8a3876b7452..5ba8e8bc609fc 100644 +--- a/drivers/dma/idxd/cdev.c ++++ b/drivers/dma/idxd/cdev.c +@@ -110,6 +110,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) + pasid = iommu_sva_get_pasid(sva); + if (pasid == IOMMU_PASID_INVALID) { + iommu_sva_unbind_device(sva); ++ rc = -EINVAL; + goto failed; + } + +diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c +index 27c07350971dd..375e7e647df6b 100644 +--- a/drivers/dma/mediatek/mtk-uart-apdma.c ++++ b/drivers/dma/mediatek/mtk-uart-apdma.c +@@ -131,10 +131,7 @@ static unsigned int mtk_uart_apdma_read(struct mtk_chan *c, unsigned int reg) + + static void mtk_uart_apdma_desc_free(struct virt_dma_desc *vd) + { +- struct dma_chan *chan = vd->tx.chan; +- struct mtk_chan *c = to_mtk_uart_apdma_chan(chan); +- +- kfree(c->desc); ++ kfree(container_of(vd, struct mtk_uart_apdma_desc, vd)); + } + + static void mtk_uart_apdma_start_tx(struct mtk_chan *c) +@@ -207,14 +204,9 @@ static void mtk_uart_apdma_start_rx(struct mtk_chan *c) + + static void mtk_uart_apdma_tx_handler(struct mtk_chan *c) + { +- struct mtk_uart_apdma_desc *d = c->desc; +- + mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_TX_INT_CLR_B); + mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B); + mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B); +- +- list_del(&d->vd.node); +- vchan_cookie_complete(&d->vd); + } + + static void mtk_uart_apdma_rx_handler(struct mtk_chan *c) +@@ -245,9 +237,17 @@ static void mtk_uart_apdma_rx_handler(struct mtk_chan *c) + + c->rx_status = d->avail_len - cnt; + mtk_uart_apdma_write(c, VFF_RPT, wg); ++} + +- list_del(&d->vd.node); +- vchan_cookie_complete(&d->vd); ++static void mtk_uart_apdma_chan_complete_handler(struct mtk_chan *c) ++{ ++ struct mtk_uart_apdma_desc *d = c->desc; ++ ++ if (d) { ++ list_del(&d->vd.node); ++ vchan_cookie_complete(&d->vd); ++ c->desc = NULL; ++ } + } + + static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id) +@@ -261,6 +261,7 @@ static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id) + mtk_uart_apdma_rx_handler(c); + else if (c->dir == DMA_MEM_TO_DEV) + mtk_uart_apdma_tx_handler(c); ++ mtk_uart_apdma_chan_complete_handler(c); + spin_unlock_irqrestore(&c->vc.lock, flags); + + return IRQ_HANDLED; +@@ -348,7 +349,7 @@ static struct dma_async_tx_descriptor *mtk_uart_apdma_prep_slave_sg + return NULL; + + /* Now allocate and setup the descriptor */ +- d = kzalloc(sizeof(*d), GFP_ATOMIC); ++ d = kzalloc(sizeof(*d), GFP_NOWAIT); + if (!d) + return NULL; + +@@ -366,7 +367,7 @@ static void mtk_uart_apdma_issue_pending(struct dma_chan *chan) + unsigned long flags; + + spin_lock_irqsave(&c->vc.lock, flags); +- if (vchan_issue_pending(&c->vc)) { ++ if (vchan_issue_pending(&c->vc) && !c->desc) { + vd = vchan_next_desc(&c->vc); + c->desc = to_mtk_uart_apdma_desc(&vd->tx); + +diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c +index d530c1bf11d97..6885b3dcd7a97 100644 +--- a/drivers/dma/sh/rcar-dmac.c ++++ b/drivers/dma/sh/rcar-dmac.c +@@ -1913,7 +1913,7 @@ static int rcar_dmac_probe(struct platform_device *pdev) + + /* Enable runtime PM and initialize the device. */ + pm_runtime_enable(&pdev->dev); +- ret = pm_runtime_get_sync(&pdev->dev); ++ ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret < 0) { + dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret); + return ret; +diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c +index 36ba8b43e78de..18cbd1e43c2e8 100644 +--- a/drivers/dma/stm32-mdma.c ++++ b/drivers/dma/stm32-mdma.c +@@ -1452,7 +1452,7 @@ static int stm32_mdma_alloc_chan_resources(struct dma_chan *c) + return -ENOMEM; + } + +- ret = pm_runtime_get_sync(dmadev->ddev.dev); ++ ret = pm_runtime_resume_and_get(dmadev->ddev.dev); + if (ret < 0) + return ret; + +@@ -1718,7 +1718,7 @@ static int stm32_mdma_pm_suspend(struct device *dev) + u32 ccr, id; + int ret; + +- ret = pm_runtime_get_sync(dev); ++ ret = pm_runtime_resume_and_get(dev); + if (ret < 0) + return ret; + +diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c +index ff7dfb3fdeb47..6c709803203ad 100644 +--- a/drivers/dma/xilinx/xilinx_dpdma.c ++++ b/drivers/dma/xilinx/xilinx_dpdma.c +@@ -113,6 +113,7 @@ + #define XILINX_DPDMA_CH_VDO 0x020 + #define XILINX_DPDMA_CH_PYLD_SZ 0x024 + #define XILINX_DPDMA_CH_DESC_ID 0x028 ++#define XILINX_DPDMA_CH_DESC_ID_MASK GENMASK(15, 0) + + /* DPDMA descriptor fields */ + #define XILINX_DPDMA_DESC_CONTROL_PREEMBLE 0xa5 +@@ -866,7 +867,8 @@ static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan) + * will be used, but it should be enough. + */ + list_for_each_entry(sw_desc, &desc->descriptors, node) +- sw_desc->hw.desc_id = desc->vdesc.tx.cookie; ++ sw_desc->hw.desc_id = desc->vdesc.tx.cookie ++ & XILINX_DPDMA_CH_DESC_ID_MASK; + + sw_desc = list_first_entry(&desc->descriptors, + struct xilinx_dpdma_sw_desc, node); +@@ -1086,7 +1088,8 @@ static void xilinx_dpdma_chan_vsync_irq(struct xilinx_dpdma_chan *chan) + if (!chan->running || !pending) + goto out; + +- desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID); ++ desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID) ++ & XILINX_DPDMA_CH_DESC_ID_MASK; + + /* If the retrigger raced with vsync, retry at the next frame. */ + sw_desc = list_first_entry(&pending->descriptors, +diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c +index d8419565b92cc..5fecf5aa6e858 100644 +--- a/drivers/dma/xilinx/zynqmp_dma.c ++++ b/drivers/dma/xilinx/zynqmp_dma.c +@@ -468,7 +468,7 @@ static int zynqmp_dma_alloc_chan_resources(struct dma_chan *dchan) + struct zynqmp_dma_desc_sw *desc; + int i, ret; + +- ret = pm_runtime_get_sync(chan->dev); ++ ret = pm_runtime_resume_and_get(chan->dev); + if (ret < 0) + return ret; + +diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c +index 1631727bf0da1..c7b5446d01fd2 100644 +--- a/drivers/gpio/gpiolib-cdev.c ++++ b/drivers/gpio/gpiolib-cdev.c +@@ -1880,6 +1880,7 @@ static void gpio_v2_line_info_changed_to_v1( + struct gpio_v2_line_info_changed *lic_v2, + struct gpioline_info_changed *lic_v1) + { ++ memset(lic_v1, 0, sizeof(*lic_v1)); + gpio_v2_line_info_to_v1(&lic_v2->info, &lic_v1->info); + lic_v1->timestamp = lic_v2->timestamp_ns; + lic_v1->event_type = lic_v2->event_type; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +index 47e0b48dc26fd..1c4623d25a62a 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +@@ -214,9 +214,21 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) + { + struct drm_gem_object *obj = attach->dmabuf->priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); ++ int r; + + /* pin buffer into GTT */ +- return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); ++ r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); ++ if (r) ++ return r; ++ ++ if (bo->tbo.moving) { ++ r = dma_fence_wait(bo->tbo.moving, true); ++ if (r) { ++ amdgpu_bo_unpin(bo); ++ return r; ++ } ++ } ++ return 0; + } + + /** +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +index 72d23651501d4..2342c5d216f9b 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +@@ -6769,12 +6769,8 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring) + if (ring->use_doorbell) { + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, + (adev->doorbell_index.kiq * 2) << 2); +- /* If GC has entered CGPG, ringing doorbell > first page doesn't +- * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround +- * this issue. +- */ + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, +- (adev->doorbell.size - 4)); ++ (adev->doorbell_index.userqueue_end * 2) << 2); + } + + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +index 1fdfb7783404e..d2c020a91c0be 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +@@ -3623,12 +3623,8 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) + if (ring->use_doorbell) { + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, + (adev->doorbell_index.kiq * 2) << 2); +- /* If GC has entered CGPG, ringing doorbell > first page doesn't +- * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround +- * this issue. +- */ + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, +- (adev->doorbell.size - 4)); ++ (adev->doorbell_index.userqueue_end * 2) << 2); + } + + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, +diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c +index f64e06e1067dd..96ea1a2c11dd6 100644 +--- a/drivers/gpu/drm/kmb/kmb_drv.c ++++ b/drivers/gpu/drm/kmb/kmb_drv.c +@@ -137,6 +137,7 @@ static int kmb_hw_init(struct drm_device *drm, unsigned long flags) + /* Allocate LCD interrupt resources */ + irq_lcd = platform_get_irq(pdev, 0); + if (irq_lcd < 0) { ++ ret = irq_lcd; + drm_err(&kmb->drm, "irq_lcd not found"); + goto setup_fail; + } +diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c +index 347488685f745..60019d0532fcf 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_prime.c ++++ b/drivers/gpu/drm/nouveau/nouveau_prime.c +@@ -93,7 +93,22 @@ int nouveau_gem_prime_pin(struct drm_gem_object *obj) + if (ret) + return -EINVAL; + +- return 0; ++ ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL); ++ if (ret) ++ goto error; ++ ++ if (nvbo->bo.moving) ++ ret = dma_fence_wait(nvbo->bo.moving, true); ++ ++ ttm_bo_unreserve(&nvbo->bo); ++ if (ret) ++ goto error; ++ ++ return ret; ++ ++error: ++ nouveau_bo_unpin(nvbo); ++ return ret; + } + + void nouveau_gem_prime_unpin(struct drm_gem_object *obj) +diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c +index 42a87948e28c5..4a90807351e72 100644 +--- a/drivers/gpu/drm/radeon/radeon_prime.c ++++ b/drivers/gpu/drm/radeon/radeon_prime.c +@@ -77,9 +77,19 @@ int radeon_gem_prime_pin(struct drm_gem_object *obj) + + /* pin buffer into GTT */ + ret = radeon_bo_pin(bo, RADEON_GEM_DOMAIN_GTT, NULL); +- if (likely(ret == 0)) +- bo->prime_shared_count++; +- ++ if (unlikely(ret)) ++ goto error; ++ ++ if (bo->tbo.moving) { ++ ret = dma_fence_wait(bo->tbo.moving, false); ++ if (unlikely(ret)) { ++ radeon_bo_unpin(bo); ++ goto error; ++ } ++ } ++ ++ bo->prime_shared_count++; ++error: + radeon_bo_unreserve(bo); + return ret; + } +diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c +index 1fda574579afc..8106b5634fe10 100644 +--- a/drivers/gpu/drm/vc4/vc4_hdmi.c ++++ b/drivers/gpu/drm/vc4/vc4_hdmi.c +@@ -159,6 +159,8 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force) + struct vc4_hdmi *vc4_hdmi = connector_to_vc4_hdmi(connector); + bool connected = false; + ++ WARN_ON(pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev)); ++ + if (vc4_hdmi->hpd_gpio) { + if (gpio_get_value_cansleep(vc4_hdmi->hpd_gpio) ^ + vc4_hdmi->hpd_active_low) +@@ -180,10 +182,12 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force) + } + } + ++ pm_runtime_put(&vc4_hdmi->pdev->dev); + return connector_status_connected; + } + + cec_phys_addr_invalidate(vc4_hdmi->cec_adap); ++ pm_runtime_put(&vc4_hdmi->pdev->dev); + return connector_status_disconnected; + } + +@@ -473,7 +477,6 @@ static void vc4_hdmi_encoder_post_crtc_powerdown(struct drm_encoder *encoder, + HDMI_READ(HDMI_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE); + + clk_disable_unprepare(vc4_hdmi->pixel_bvb_clock); +- clk_disable_unprepare(vc4_hdmi->hsm_clock); + clk_disable_unprepare(vc4_hdmi->pixel_clock); + + ret = pm_runtime_put(&vc4_hdmi->pdev->dev); +@@ -784,13 +787,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, + return; + } + +- ret = clk_prepare_enable(vc4_hdmi->hsm_clock); +- if (ret) { +- DRM_ERROR("Failed to turn on HSM clock: %d\n", ret); +- clk_disable_unprepare(vc4_hdmi->pixel_clock); +- return; +- } +- + vc4_hdmi_cec_update_clk_div(vc4_hdmi); + + /* +@@ -801,7 +797,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, + (hsm_rate > VC4_HSM_MID_CLOCK ? 150000000 : 75000000)); + if (ret) { + DRM_ERROR("Failed to set pixel bvb clock rate: %d\n", ret); +- clk_disable_unprepare(vc4_hdmi->hsm_clock); + clk_disable_unprepare(vc4_hdmi->pixel_clock); + return; + } +@@ -809,7 +804,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, + ret = clk_prepare_enable(vc4_hdmi->pixel_bvb_clock); + if (ret) { + DRM_ERROR("Failed to turn on pixel bvb clock: %d\n", ret); +- clk_disable_unprepare(vc4_hdmi->hsm_clock); + clk_disable_unprepare(vc4_hdmi->pixel_clock); + return; + } +@@ -1929,6 +1923,29 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi) + return 0; + } + ++#ifdef CONFIG_PM ++static int vc4_hdmi_runtime_suspend(struct device *dev) ++{ ++ struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); ++ ++ clk_disable_unprepare(vc4_hdmi->hsm_clock); ++ ++ return 0; ++} ++ ++static int vc4_hdmi_runtime_resume(struct device *dev) ++{ ++ struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); ++ int ret; ++ ++ ret = clk_prepare_enable(vc4_hdmi->hsm_clock); ++ if (ret) ++ return ret; ++ ++ return 0; ++} ++#endif ++ + static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) + { + const struct vc4_hdmi_variant *variant = of_device_get_match_data(dev); +@@ -2165,11 +2182,18 @@ static const struct of_device_id vc4_hdmi_dt_match[] = { + {} + }; + ++static const struct dev_pm_ops vc4_hdmi_pm_ops = { ++ SET_RUNTIME_PM_OPS(vc4_hdmi_runtime_suspend, ++ vc4_hdmi_runtime_resume, ++ NULL) ++}; ++ + struct platform_driver vc4_hdmi_driver = { + .probe = vc4_hdmi_dev_probe, + .remove = vc4_hdmi_dev_remove, + .driver = { + .name = "vc4_hdmi", + .of_match_table = vc4_hdmi_dt_match, ++ .pm = &vc4_hdmi_pm_ops, + }, + }; +diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c +index f9e1c2ceaac05..04a1e38f2a6f0 100644 +--- a/drivers/i2c/busses/i2c-i801.c ++++ b/drivers/i2c/busses/i2c-i801.c +@@ -978,6 +978,9 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr, + } + + out: ++ /* Unlock the SMBus device for use by BIOS/ACPI */ ++ outb_p(SMBHSTSTS_INUSE_STS, SMBHSTSTS(priv)); ++ + pm_runtime_mark_last_busy(&priv->pci_dev->dev); + pm_runtime_put_autosuspend(&priv->pci_dev->dev); + mutex_unlock(&priv->acpi_lock); +diff --git a/drivers/i2c/busses/i2c-robotfuzz-osif.c b/drivers/i2c/busses/i2c-robotfuzz-osif.c +index a39f7d0927973..66dfa211e736b 100644 +--- a/drivers/i2c/busses/i2c-robotfuzz-osif.c ++++ b/drivers/i2c/busses/i2c-robotfuzz-osif.c +@@ -83,7 +83,7 @@ static int osif_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, + } + } + +- ret = osif_usb_read(adapter, OSIFI2C_STOP, 0, 0, NULL, 0); ++ ret = osif_usb_write(adapter, OSIFI2C_STOP, 0, 0, NULL, 0); + if (ret) { + dev_err(&adapter->dev, "failure sending STOP\n"); + return -EREMOTEIO; +@@ -153,7 +153,7 @@ static int osif_probe(struct usb_interface *interface, + * Set bus frequency. The frequency is: + * 120,000,000 / ( 16 + 2 * div * 4^prescale). + * Using dev = 52, prescale = 0 give 100KHz */ +- ret = osif_usb_read(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0, ++ ret = osif_usb_write(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0, + NULL, 0); + if (ret) { + dev_err(&interface->dev, "failure sending bit rate"); +diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c +index 016a6106151a5..3f28eb4d17fe7 100644 +--- a/drivers/mmc/host/meson-gx-mmc.c ++++ b/drivers/mmc/host/meson-gx-mmc.c +@@ -165,6 +165,7 @@ struct meson_host { + + unsigned int bounce_buf_size; + void *bounce_buf; ++ void __iomem *bounce_iomem_buf; + dma_addr_t bounce_dma_addr; + struct sd_emmc_desc *descs; + dma_addr_t descs_dma_addr; +@@ -745,6 +746,47 @@ static void meson_mmc_desc_chain_transfer(struct mmc_host *mmc, u32 cmd_cfg) + writel(start, host->regs + SD_EMMC_START); + } + ++/* local sg copy to buffer version with _to/fromio usage for dram_access_quirk */ ++static void meson_mmc_copy_buffer(struct meson_host *host, struct mmc_data *data, ++ size_t buflen, bool to_buffer) ++{ ++ unsigned int sg_flags = SG_MITER_ATOMIC; ++ struct scatterlist *sgl = data->sg; ++ unsigned int nents = data->sg_len; ++ struct sg_mapping_iter miter; ++ unsigned int offset = 0; ++ ++ if (to_buffer) ++ sg_flags |= SG_MITER_FROM_SG; ++ else ++ sg_flags |= SG_MITER_TO_SG; ++ ++ sg_miter_start(&miter, sgl, nents, sg_flags); ++ ++ while ((offset < buflen) && sg_miter_next(&miter)) { ++ unsigned int len; ++ ++ len = min(miter.length, buflen - offset); ++ ++ /* When dram_access_quirk, the bounce buffer is a iomem mapping */ ++ if (host->dram_access_quirk) { ++ if (to_buffer) ++ memcpy_toio(host->bounce_iomem_buf + offset, miter.addr, len); ++ else ++ memcpy_fromio(miter.addr, host->bounce_iomem_buf + offset, len); ++ } else { ++ if (to_buffer) ++ memcpy(host->bounce_buf + offset, miter.addr, len); ++ else ++ memcpy(miter.addr, host->bounce_buf + offset, len); ++ } ++ ++ offset += len; ++ } ++ ++ sg_miter_stop(&miter); ++} ++ + static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) + { + struct meson_host *host = mmc_priv(mmc); +@@ -788,8 +830,7 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) + if (data->flags & MMC_DATA_WRITE) { + cmd_cfg |= CMD_CFG_DATA_WR; + WARN_ON(xfer_bytes > host->bounce_buf_size); +- sg_copy_to_buffer(data->sg, data->sg_len, +- host->bounce_buf, xfer_bytes); ++ meson_mmc_copy_buffer(host, data, xfer_bytes, true); + dma_wmb(); + } + +@@ -958,8 +999,7 @@ static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id) + if (meson_mmc_bounce_buf_read(data)) { + xfer_bytes = data->blksz * data->blocks; + WARN_ON(xfer_bytes > host->bounce_buf_size); +- sg_copy_from_buffer(data->sg, data->sg_len, +- host->bounce_buf, xfer_bytes); ++ meson_mmc_copy_buffer(host, data, xfer_bytes, false); + } + + next_cmd = meson_mmc_get_next_command(cmd); +@@ -1179,7 +1219,7 @@ static int meson_mmc_probe(struct platform_device *pdev) + * instead of the DDR memory + */ + host->bounce_buf_size = SD_EMMC_SRAM_DATA_BUF_LEN; +- host->bounce_buf = host->regs + SD_EMMC_SRAM_DATA_BUF_OFF; ++ host->bounce_iomem_buf = host->regs + SD_EMMC_SRAM_DATA_BUF_OFF; + host->bounce_dma_addr = res->start + SD_EMMC_SRAM_DATA_BUF_OFF; + } else { + /* data bounce buffer */ +diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c +index 9f30748da4ab9..8c38f224becbc 100644 +--- a/drivers/net/caif/caif_serial.c ++++ b/drivers/net/caif/caif_serial.c +@@ -350,6 +350,7 @@ static int ldisc_open(struct tty_struct *tty) + rtnl_lock(); + result = register_netdevice(dev); + if (result) { ++ tty_kref_put(tty); + rtnl_unlock(); + free_netdev(dev); + return -ENODEV; +diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +index 17d5b649eb36b..e81dd34a3cac2 100644 +--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c ++++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +@@ -1266,9 +1266,11 @@ int qed_dcbx_get_config_params(struct qed_hwfn *p_hwfn, + p_hwfn->p_dcbx_info->set.ver_num |= DCBX_CONFIG_VERSION_STATIC; + + p_hwfn->p_dcbx_info->set.enabled = dcbx_info->operational.enabled; ++ BUILD_BUG_ON(sizeof(dcbx_info->operational.params) != ++ sizeof(p_hwfn->p_dcbx_info->set.config.params)); + memcpy(&p_hwfn->p_dcbx_info->set.config.params, + &dcbx_info->operational.params, +- sizeof(struct qed_dcbx_admin_params)); ++ sizeof(p_hwfn->p_dcbx_info->set.config.params)); + p_hwfn->p_dcbx_info->set.config.valid = true; + + memcpy(params, &p_hwfn->p_dcbx_info->set, sizeof(struct qed_dcbx_set)); +diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c +index 1df2c002c9f64..f7a56e05ec8a4 100644 +--- a/drivers/net/ethernet/realtek/r8169_main.c ++++ b/drivers/net/ethernet/realtek/r8169_main.c +@@ -1673,7 +1673,7 @@ static void rtl8169_get_strings(struct net_device *dev, u32 stringset, u8 *data) + { + switch(stringset) { + case ETH_SS_STATS: +- memcpy(data, *rtl8169_gstrings, sizeof(rtl8169_gstrings)); ++ memcpy(data, rtl8169_gstrings, sizeof(rtl8169_gstrings)); + break; + } + } +diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c +index f029c7c03804f..393cf99856ed3 100644 +--- a/drivers/net/ethernet/renesas/sh_eth.c ++++ b/drivers/net/ethernet/renesas/sh_eth.c +@@ -2287,7 +2287,7 @@ static void sh_eth_get_strings(struct net_device *ndev, u32 stringset, u8 *data) + { + switch (stringset) { + case ETH_SS_STATS: +- memcpy(data, *sh_eth_gstrings_stats, ++ memcpy(data, sh_eth_gstrings_stats, + sizeof(sh_eth_gstrings_stats)); + break; + } +diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c +index 01bb36e7cff0a..6bd3a389d389c 100644 +--- a/drivers/net/ethernet/xilinx/ll_temac_main.c ++++ b/drivers/net/ethernet/xilinx/ll_temac_main.c +@@ -774,12 +774,15 @@ static void temac_start_xmit_done(struct net_device *ndev) + stat = be32_to_cpu(cur_p->app0); + + while (stat & STS_CTRL_APP0_CMPLT) { ++ /* Make sure that the other fields are read after bd is ++ * released by dma ++ */ ++ rmb(); + dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys), + be32_to_cpu(cur_p->len), DMA_TO_DEVICE); + skb = (struct sk_buff *)ptr_from_txbd(cur_p); + if (skb) + dev_consume_skb_irq(skb); +- cur_p->app0 = 0; + cur_p->app1 = 0; + cur_p->app2 = 0; + cur_p->app3 = 0; +@@ -788,6 +791,12 @@ static void temac_start_xmit_done(struct net_device *ndev) + ndev->stats.tx_packets++; + ndev->stats.tx_bytes += be32_to_cpu(cur_p->len); + ++ /* app0 must be visible last, as it is used to flag ++ * availability of the bd ++ */ ++ smp_mb(); ++ cur_p->app0 = 0; ++ + lp->tx_bd_ci++; + if (lp->tx_bd_ci >= lp->tx_bd_num) + lp->tx_bd_ci = 0; +@@ -814,6 +823,9 @@ static inline int temac_check_tx_bd_space(struct temac_local *lp, int num_frag) + if (cur_p->app0) + return NETDEV_TX_BUSY; + ++ /* Make sure to read next bd app0 after this one */ ++ rmb(); ++ + tail++; + if (tail >= lp->tx_bd_num) + tail = 0; +@@ -930,6 +942,11 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) + wmb(); + lp->dma_out(lp, TX_TAILDESC_PTR, tail_p); /* DMA start */ + ++ if (temac_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) { ++ netdev_info(ndev, "%s -> netif_stop_queue\n", __func__); ++ netif_stop_queue(ndev); ++ } ++ + return NETDEV_TX_OK; + } + +diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c +index 9bd9a5c0b1db3..6bbc81ad295fb 100644 +--- a/drivers/net/phy/dp83867.c ++++ b/drivers/net/phy/dp83867.c +@@ -826,16 +826,12 @@ static int dp83867_phy_reset(struct phy_device *phydev) + { + int err; + +- err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESET); ++ err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESTART); + if (err < 0) + return err; + + usleep_range(10, 20); + +- /* After reset FORCE_LINK_GOOD bit is set. Although the +- * default value should be unset. Disable FORCE_LINK_GOOD +- * for the phy to work properly. +- */ + return phy_modify(phydev, MII_DP83867_PHYCTRL, + DP83867_PHYCR_FORCE_LINK_GOOD, 0); + } +diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c +index 6700f1970b240..bc55ec739af90 100644 +--- a/drivers/net/usb/qmi_wwan.c ++++ b/drivers/net/usb/qmi_wwan.c +@@ -575,7 +575,7 @@ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb) + + if (info->flags & QMI_WWAN_FLAG_PASS_THROUGH) { + skb->protocol = htons(ETH_P_MAP); +- return (netif_rx(skb) == NET_RX_SUCCESS); ++ return 1; + } + + switch (skb->data[0] & 0xf0) { +diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c +index 20fb5638ac653..23fae943a1192 100644 +--- a/drivers/net/usb/r8152.c ++++ b/drivers/net/usb/r8152.c +@@ -6078,7 +6078,7 @@ static void rtl8152_get_strings(struct net_device *dev, u32 stringset, u8 *data) + { + switch (stringset) { + case ETH_SS_STATS: +- memcpy(data, *rtl8152_gstrings, sizeof(rtl8152_gstrings)); ++ memcpy(data, rtl8152_gstrings, sizeof(rtl8152_gstrings)); + break; + } + } +diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c +index fa7d4c20dc13a..30b39cb4056a3 100644 +--- a/drivers/net/wireless/mac80211_hwsim.c ++++ b/drivers/net/wireless/mac80211_hwsim.c +@@ -1693,8 +1693,13 @@ static int mac80211_hwsim_start(struct ieee80211_hw *hw) + static void mac80211_hwsim_stop(struct ieee80211_hw *hw) + { + struct mac80211_hwsim_data *data = hw->priv; ++ + data->started = false; + hrtimer_cancel(&data->beacon_timer); ++ ++ while (!skb_queue_empty(&data->pending)) ++ ieee80211_free_txskb(hw, skb_dequeue(&data->pending)); ++ + wiphy_dbg(hw->wiphy, "%s\n", __func__); + } + +diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c +index e4d4e399004b4..16a17215f633d 100644 +--- a/drivers/pci/pci.c ++++ b/drivers/pci/pci.c +@@ -1870,11 +1870,21 @@ static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags) + int err; + int i, bars = 0; + +- if (atomic_inc_return(&dev->enable_cnt) > 1) { +- pci_update_current_state(dev, dev->current_state); +- return 0; /* already enabled */ ++ /* ++ * Power state could be unknown at this point, either due to a fresh ++ * boot or a device removal call. So get the current power state ++ * so that things like MSI message writing will behave as expected ++ * (e.g. if the device really is in D0 at enable time). ++ */ ++ if (dev->pm_cap) { ++ u16 pmcsr; ++ pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); ++ dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK); + } + ++ if (atomic_inc_return(&dev->enable_cnt) > 1) ++ return 0; /* already enabled */ ++ + bridge = pci_upstream_bridge(dev); + if (bridge) + pci_enable_bridge(bridge); +diff --git a/drivers/pinctrl/pinctrl-microchip-sgpio.c b/drivers/pinctrl/pinctrl-microchip-sgpio.c +index c12fa57ebd12c..165cb7a597155 100644 +--- a/drivers/pinctrl/pinctrl-microchip-sgpio.c ++++ b/drivers/pinctrl/pinctrl-microchip-sgpio.c +@@ -845,8 +845,10 @@ static int microchip_sgpio_probe(struct platform_device *pdev) + i = 0; + device_for_each_child_node(dev, fwnode) { + ret = microchip_sgpio_register_bank(dev, priv, fwnode, i++); +- if (ret) ++ if (ret) { ++ fwnode_handle_put(fwnode); + return ret; ++ } + } + + if (priv->in.gpio.ngpio != priv->out.gpio.ngpio) { +diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c +index 7d9bdedcd71bb..3af4430543dca 100644 +--- a/drivers/pinctrl/stm32/pinctrl-stm32.c ++++ b/drivers/pinctrl/stm32/pinctrl-stm32.c +@@ -1229,7 +1229,7 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, + struct device *dev = pctl->dev; + struct resource res; + int npins = STM32_GPIO_PINS_PER_BANK; +- int bank_nr, err; ++ int bank_nr, err, i = 0; + + if (!IS_ERR(bank->rstc)) + reset_control_deassert(bank->rstc); +@@ -1251,9 +1251,14 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, + + of_property_read_string(np, "st,bank-name", &bank->gpio_chip.label); + +- if (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0, &args)) { ++ if (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, i, &args)) { + bank_nr = args.args[1] / STM32_GPIO_PINS_PER_BANK; + bank->gpio_chip.base = args.args[1]; ++ ++ npins = args.args[2]; ++ while (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, ++ ++i, &args)) ++ npins += args.args[2]; + } else { + bank_nr = pctl->nbanks; + bank->gpio_chip.base = bank_nr * STM32_GPIO_PINS_PER_BANK; +diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c +index ed0b1bb99f083..a0356f3707b86 100644 +--- a/drivers/scsi/sd.c ++++ b/drivers/scsi/sd.c +@@ -1387,6 +1387,22 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt) + } + } + ++static bool sd_need_revalidate(struct block_device *bdev, ++ struct scsi_disk *sdkp) ++{ ++ if (sdkp->device->removable || sdkp->write_prot) { ++ if (bdev_check_media_change(bdev)) ++ return true; ++ } ++ ++ /* ++ * Force a full rescan after ioctl(BLKRRPART). While the disk state has ++ * nothing to do with partitions, BLKRRPART is used to force a full ++ * revalidate after things like a format for historical reasons. ++ */ ++ return test_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); ++} ++ + /** + * sd_open - open a scsi disk device + * @bdev: Block device of the scsi disk to open +@@ -1423,10 +1439,8 @@ static int sd_open(struct block_device *bdev, fmode_t mode) + if (!scsi_block_when_processing_errors(sdev)) + goto error_out; + +- if (sdev->removable || sdkp->write_prot) { +- if (bdev_check_media_change(bdev)) +- sd_revalidate_disk(bdev->bd_disk); +- } ++ if (sd_need_revalidate(bdev, sdkp)) ++ sd_revalidate_disk(bdev->bd_disk); + + /* + * If the drive is empty, just let the open fail. +diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c +index ab9035662717a..bcc0b5a3a459c 100644 +--- a/drivers/spi/spi-nxp-fspi.c ++++ b/drivers/spi/spi-nxp-fspi.c +@@ -1033,12 +1033,6 @@ static int nxp_fspi_probe(struct platform_device *pdev) + goto err_put_ctrl; + } + +- /* Clear potential interrupts */ +- reg = fspi_readl(f, f->iobase + FSPI_INTR); +- if (reg) +- fspi_writel(f, reg, f->iobase + FSPI_INTR); +- +- + /* find the resources - controller memory mapped space */ + if (is_acpi_node(f->dev->fwnode)) + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); +@@ -1076,6 +1070,11 @@ static int nxp_fspi_probe(struct platform_device *pdev) + } + } + ++ /* Clear potential interrupts */ ++ reg = fspi_readl(f, f->iobase + FSPI_INTR); ++ if (reg) ++ fspi_writel(f, reg, f->iobase + FSPI_INTR); ++ + /* find the irq */ + ret = platform_get_irq(pdev, 0); + if (ret < 0) +diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c +index 7bbfd58958bcc..d7e361fb05482 100644 +--- a/drivers/xen/events/events_base.c ++++ b/drivers/xen/events/events_base.c +@@ -642,6 +642,9 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) + } + + info->eoi_time = 0; ++ ++ /* is_active hasn't been reset yet, do it now. */ ++ smp_store_release(&info->is_active, 0); + do_unmask(info, EVT_MASK_REASON_EOI_PENDING); + } + +@@ -811,6 +814,7 @@ static void xen_evtchn_close(evtchn_port_t port) + BUG(); + } + ++/* Not called for lateeoi events. */ + static void event_handler_exit(struct irq_info *info) + { + smp_store_release(&info->is_active, 0); +@@ -1883,7 +1887,12 @@ static void lateeoi_ack_dynirq(struct irq_data *data) + + if (VALID_EVTCHN(evtchn)) { + do_mask(info, EVT_MASK_REASON_EOI_PENDING); +- event_handler_exit(info); ++ /* ++ * Don't call event_handler_exit(). ++ * Need to keep is_active non-zero in order to ignore re-raised ++ * events after cpu affinity changes while a lateeoi is pending. ++ */ ++ clear_evtchn(evtchn); + } + } + +diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c +index 26e66436f0058..c000fe338f7e0 100644 +--- a/fs/ceph/addr.c ++++ b/fs/ceph/addr.c +@@ -1302,6 +1302,45 @@ ceph_find_incompatible(struct page *page) + return NULL; + } + ++/** ++ * prep_noread_page - prep a page for writing without reading first ++ * @page: page being prepared ++ * @pos: starting position for the write ++ * @len: length of write ++ * ++ * In some cases, write_begin doesn't need to read at all: ++ * - full page write ++ * - file is currently zero-length ++ * - write that lies in a page that is completely beyond EOF ++ * - write that covers the the page from start to EOF or beyond it ++ * ++ * If any of these criteria are met, then zero out the unwritten parts ++ * of the page and return true. Otherwise, return false. ++ */ ++static bool skip_page_read(struct page *page, loff_t pos, size_t len) ++{ ++ struct inode *inode = page->mapping->host; ++ loff_t i_size = i_size_read(inode); ++ size_t offset = offset_in_page(pos); ++ ++ /* Full page write */ ++ if (offset == 0 && len >= PAGE_SIZE) ++ return true; ++ ++ /* pos beyond last page in the file */ ++ if (pos - offset >= i_size) ++ goto zero_out; ++ ++ /* write that covers the whole page from start to EOF or beyond it */ ++ if (offset == 0 && (pos + len) >= i_size) ++ goto zero_out; ++ ++ return false; ++zero_out: ++ zero_user_segments(page, 0, offset, offset + len, PAGE_SIZE); ++ return true; ++} ++ + /* + * We are only allowed to write into/dirty the page if the page is + * clean, or already dirty within the same snap context. +@@ -1315,7 +1354,6 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, + struct ceph_snap_context *snapc; + struct page *page = NULL; + pgoff_t index = pos >> PAGE_SHIFT; +- int pos_in_page = pos & ~PAGE_MASK; + int r = 0; + + dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len); +@@ -1350,19 +1388,9 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, + break; + } + +- /* +- * In some cases we don't need to read at all: +- * - full page write +- * - write that lies completely beyond EOF +- * - write that covers the the page from start to EOF or beyond it +- */ +- if ((pos_in_page == 0 && len == PAGE_SIZE) || +- (pos >= i_size_read(inode)) || +- (pos_in_page == 0 && (pos + len) >= i_size_read(inode))) { +- zero_user_segments(page, 0, pos_in_page, +- pos_in_page + len, PAGE_SIZE); ++ /* No need to read in some cases */ ++ if (skip_page_read(page, pos, len)) + break; +- } + + /* + * We need to read it. If we get back -EINPROGRESS, then the page was +diff --git a/fs/ceph/file.c b/fs/ceph/file.c +index 209535d5b8d38..3d2e3dd4ee01d 100644 +--- a/fs/ceph/file.c ++++ b/fs/ceph/file.c +@@ -578,6 +578,7 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, + struct ceph_inode_info *ci = ceph_inode(dir); + struct inode *inode; + struct timespec64 now; ++ struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); + struct ceph_vino vino = { .ino = req->r_deleg_ino, + .snap = CEPH_NOSNAP }; + +@@ -615,8 +616,10 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, + + ceph_file_layout_to_legacy(lo, &in.layout); + ++ down_read(&mdsc->snap_rwsem); + ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session, + req->r_fmode, NULL); ++ up_read(&mdsc->snap_rwsem); + if (ret) { + dout("%s failed to fill inode: %d\n", __func__, ret); + ceph_dir_clear_complete(dir); +diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c +index 179d2ef69a24a..7ee6023adb363 100644 +--- a/fs/ceph/inode.c ++++ b/fs/ceph/inode.c +@@ -762,6 +762,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, + bool new_version = false; + bool fill_inline = false; + ++ lockdep_assert_held(&mdsc->snap_rwsem); ++ + dout("%s %p ino %llx.%llx v %llu had %llu\n", __func__, + inode, ceph_vinop(inode), le64_to_cpu(info->version), + ci->i_version); +diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c +index 303d71430bdd1..9c6c0e2e5880a 100644 +--- a/fs/nilfs2/sysfs.c ++++ b/fs/nilfs2/sysfs.c +@@ -1053,6 +1053,7 @@ void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs) + nilfs_sysfs_delete_superblock_group(nilfs); + nilfs_sysfs_delete_segctor_group(nilfs); + kobject_del(&nilfs->ns_dev_kobj); ++ kobject_put(&nilfs->ns_dev_kobj); + kfree(nilfs->ns_dev_subgroups); + } + +diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h +index fb8b07daa9d15..875e002a41804 100644 +--- a/include/keys/system_keyring.h ++++ b/include/keys/system_keyring.h +@@ -31,6 +31,7 @@ extern int restrict_link_by_builtin_and_secondary_trusted( + #define restrict_link_by_builtin_and_secondary_trusted restrict_link_by_builtin_trusted + #endif + ++extern struct pkcs7_message *pkcs7; + #ifdef CONFIG_SYSTEM_BLACKLIST_KEYRING + extern int mark_hash_blacklisted(const char *hash); + extern int is_hash_blacklisted(const u8 *hash, size_t hash_len, +@@ -49,6 +50,20 @@ static inline int is_binary_blacklisted(const u8 *hash, size_t hash_len) + } + #endif + ++#ifdef CONFIG_SYSTEM_REVOCATION_LIST ++extern int add_key_to_revocation_list(const char *data, size_t size); ++extern int is_key_on_revocation_list(struct pkcs7_message *pkcs7); ++#else ++static inline int add_key_to_revocation_list(const char *data, size_t size) ++{ ++ return 0; ++} ++static inline int is_key_on_revocation_list(struct pkcs7_message *pkcs7) ++{ ++ return -ENOKEY; ++} ++#endif ++ + #ifdef CONFIG_IMA_BLACKLIST_KEYRING + extern struct key *ima_blacklist_keyring; + +diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h +index 2915f56ad4214..edb5c186b0b7a 100644 +--- a/include/linux/debug_locks.h ++++ b/include/linux/debug_locks.h +@@ -27,8 +27,10 @@ extern int debug_locks_off(void); + int __ret = 0; \ + \ + if (!oops_in_progress && unlikely(c)) { \ ++ instrumentation_begin(); \ + if (debug_locks_off() && !debug_locks_silent) \ + WARN(1, "DEBUG_LOCKS_WARN_ON(%s)", #c); \ ++ instrumentation_end(); \ + __ret = 1; \ + } \ + __ret; \ +diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h +index ba973efcd3692..6686a0baa91d3 100644 +--- a/include/linux/huge_mm.h ++++ b/include/linux/huge_mm.h +@@ -289,6 +289,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, + vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd); + + extern struct page *huge_zero_page; ++extern unsigned long huge_zero_pfn; + + static inline bool is_huge_zero_page(struct page *page) + { +@@ -297,7 +298,7 @@ static inline bool is_huge_zero_page(struct page *page) + + static inline bool is_huge_zero_pmd(pmd_t pmd) + { +- return is_huge_zero_page(pmd_page(pmd)); ++ return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd); + } + + static inline bool is_huge_zero_pud(pud_t pud) +@@ -443,6 +444,11 @@ static inline bool is_huge_zero_page(struct page *page) + return false; + } + ++static inline bool is_huge_zero_pmd(pmd_t pmd) ++{ ++ return false; ++} ++ + static inline bool is_huge_zero_pud(pud_t pud) + { + return false; +diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h +index 5dae4187210d9..28fa3f9bbbfdd 100644 +--- a/include/linux/hugetlb.h ++++ b/include/linux/hugetlb.h +@@ -728,17 +728,6 @@ static inline int hstate_index(struct hstate *h) + return h - hstates; + } + +-pgoff_t __basepage_index(struct page *page); +- +-/* Return page->index in PAGE_SIZE units */ +-static inline pgoff_t basepage_index(struct page *page) +-{ +- if (!PageCompound(page)) +- return page->index; +- +- return __basepage_index(page); +-} +- + extern int dissolve_free_huge_page(struct page *page); + extern int dissolve_free_huge_pages(unsigned long start_pfn, + unsigned long end_pfn); +@@ -969,11 +958,6 @@ static inline int hstate_index(struct hstate *h) + return 0; + } + +-static inline pgoff_t basepage_index(struct page *page) +-{ +- return page->index; +-} +- + static inline int dissolve_free_huge_page(struct page *page) + { + return 0; +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 6c1b29bb35636..cfb0842a7fb96 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1680,6 +1680,7 @@ struct zap_details { + struct address_space *check_mapping; /* Check page->mapping if set */ + pgoff_t first_index; /* Lowest page->index to unmap */ + pgoff_t last_index; /* Highest page->index to unmap */ ++ struct page *single_page; /* Locked page to be unmapped */ + }; + + struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, +@@ -1727,6 +1728,7 @@ extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma, + extern int fixup_user_fault(struct mm_struct *mm, + unsigned long address, unsigned int fault_flags, + bool *unlocked); ++void unmap_mapping_page(struct page *page); + void unmap_mapping_pages(struct address_space *mapping, + pgoff_t start, pgoff_t nr, bool even_cows); + void unmap_mapping_range(struct address_space *mapping, +@@ -1747,6 +1749,7 @@ static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address, + BUG(); + return -EFAULT; + } ++static inline void unmap_mapping_page(struct page *page) { } + static inline void unmap_mapping_pages(struct address_space *mapping, + pgoff_t start, pgoff_t nr, bool even_cows) { } + static inline void unmap_mapping_range(struct address_space *mapping, +diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h +index 8c9947fd62f30..e0023e5f9aa67 100644 +--- a/include/linux/pagemap.h ++++ b/include/linux/pagemap.h +@@ -501,7 +501,7 @@ static inline struct page *read_mapping_page(struct address_space *mapping, + } + + /* +- * Get index of the page with in radix-tree ++ * Get index of the page within radix-tree (but not for hugetlb pages). + * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE) + */ + static inline pgoff_t page_to_index(struct page *page) +@@ -520,15 +520,16 @@ static inline pgoff_t page_to_index(struct page *page) + return pgoff; + } + ++extern pgoff_t hugetlb_basepage_index(struct page *page); ++ + /* +- * Get the offset in PAGE_SIZE. +- * (TODO: hugepage should have ->index in PAGE_SIZE) ++ * Get the offset in PAGE_SIZE (even for hugetlb pages). ++ * (TODO: hugetlb pages should have ->index in PAGE_SIZE) + */ + static inline pgoff_t page_to_pgoff(struct page *page) + { +- if (unlikely(PageHeadHuge(page))) +- return page->index << compound_order(page); +- ++ if (unlikely(PageHuge(page))) ++ return hugetlb_basepage_index(page); + return page_to_index(page); + } + +diff --git a/include/linux/rmap.h b/include/linux/rmap.h +index def5c62c93b3b..8d04e7deedc66 100644 +--- a/include/linux/rmap.h ++++ b/include/linux/rmap.h +@@ -91,6 +91,7 @@ enum ttu_flags { + + TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */ + TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */ ++ TTU_SYNC = 0x10, /* avoid racy checks with PVMW_SYNC */ + TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */ + TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible + * and caller guarantees they will +diff --git a/include/net/sock.h b/include/net/sock.h +index 62e3811e95a78..b9bdeca1d784f 100644 +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -1928,7 +1928,8 @@ static inline u32 net_tx_rndhash(void) + + static inline void sk_set_txhash(struct sock *sk) + { +- sk->sk_txhash = net_tx_rndhash(); ++ /* This pairs with READ_ONCE() in skb_set_hash_from_sk() */ ++ WRITE_ONCE(sk->sk_txhash, net_tx_rndhash()); + } + + static inline bool sk_rethink_txhash(struct sock *sk) +@@ -2200,9 +2201,12 @@ static inline void sock_poll_wait(struct file *filp, struct socket *sock, + + static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk) + { +- if (sk->sk_txhash) { ++ /* This pairs with WRITE_ONCE() in sk_set_txhash() */ ++ u32 txhash = READ_ONCE(sk->sk_txhash); ++ ++ if (txhash) { + skb->l4_hash = 1; +- skb->hash = sk->sk_txhash; ++ skb->hash = txhash; + } + } + +@@ -2260,8 +2264,13 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk); + static inline int sock_error(struct sock *sk) + { + int err; +- if (likely(!sk->sk_err)) ++ ++ /* Avoid an atomic operation for the common case. ++ * This is racy since another cpu/thread can change sk_err under us. ++ */ ++ if (likely(data_race(!sk->sk_err))) + return 0; ++ + err = xchg(&sk->sk_err, 0); + return -err; + } +diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c +index fe4c01c14ab2c..e96f3808e4316 100644 +--- a/kernel/dma/swiotlb.c ++++ b/kernel/dma/swiotlb.c +@@ -724,11 +724,17 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, + int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; + size_t orig_size = io_tlb_orig_size[index]; + phys_addr_t orig_addr = io_tlb_orig_addr[index]; ++ unsigned int tlb_offset; + + if (orig_addr == INVALID_PHYS_ADDR) + return; + +- validate_sync_size_and_truncate(hwdev, orig_size, &size); ++ tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) - ++ swiotlb_align_offset(hwdev, orig_addr); ++ ++ orig_addr += tlb_offset; ++ ++ validate_sync_size_and_truncate(hwdev, orig_size - tlb_offset, &size); + + switch (target) { + case SYNC_FOR_CPU: +diff --git a/kernel/futex.c b/kernel/futex.c +index a8629b695d38e..5aa6d0a6c7677 100644 +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -35,7 +35,6 @@ + #include <linux/jhash.h> + #include <linux/pagemap.h> + #include <linux/syscalls.h> +-#include <linux/hugetlb.h> + #include <linux/freezer.h> + #include <linux/memblock.h> + #include <linux/fault-inject.h> +@@ -650,7 +649,7 @@ again: + + key->both.offset |= FUT_OFF_INODE; /* inode-based key */ + key->shared.i_seq = get_inode_sequence_number(inode); +- key->shared.pgoff = basepage_index(tail); ++ key->shared.pgoff = page_to_pgoff(tail); + rcu_read_unlock(); + } + +diff --git a/kernel/kthread.c b/kernel/kthread.c +index 6d3c488a0f824..4fdf2bd9b5589 100644 +--- a/kernel/kthread.c ++++ b/kernel/kthread.c +@@ -1092,8 +1092,38 @@ void kthread_flush_work(struct kthread_work *work) + EXPORT_SYMBOL_GPL(kthread_flush_work); + + /* +- * This function removes the work from the worker queue. Also it makes sure +- * that it won't get queued later via the delayed work's timer. ++ * Make sure that the timer is neither set nor running and could ++ * not manipulate the work list_head any longer. ++ * ++ * The function is called under worker->lock. The lock is temporary ++ * released but the timer can't be set again in the meantime. ++ */ ++static void kthread_cancel_delayed_work_timer(struct kthread_work *work, ++ unsigned long *flags) ++{ ++ struct kthread_delayed_work *dwork = ++ container_of(work, struct kthread_delayed_work, work); ++ struct kthread_worker *worker = work->worker; ++ ++ /* ++ * del_timer_sync() must be called to make sure that the timer ++ * callback is not running. The lock must be temporary released ++ * to avoid a deadlock with the callback. In the meantime, ++ * any queuing is blocked by setting the canceling counter. ++ */ ++ work->canceling++; ++ raw_spin_unlock_irqrestore(&worker->lock, *flags); ++ del_timer_sync(&dwork->timer); ++ raw_spin_lock_irqsave(&worker->lock, *flags); ++ work->canceling--; ++} ++ ++/* ++ * This function removes the work from the worker queue. ++ * ++ * It is called under worker->lock. The caller must make sure that ++ * the timer used by delayed work is not running, e.g. by calling ++ * kthread_cancel_delayed_work_timer(). + * + * The work might still be in use when this function finishes. See the + * current_work proceed by the worker. +@@ -1101,28 +1131,8 @@ EXPORT_SYMBOL_GPL(kthread_flush_work); + * Return: %true if @work was pending and successfully canceled, + * %false if @work was not pending + */ +-static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork, +- unsigned long *flags) ++static bool __kthread_cancel_work(struct kthread_work *work) + { +- /* Try to cancel the timer if exists. */ +- if (is_dwork) { +- struct kthread_delayed_work *dwork = +- container_of(work, struct kthread_delayed_work, work); +- struct kthread_worker *worker = work->worker; +- +- /* +- * del_timer_sync() must be called to make sure that the timer +- * callback is not running. The lock must be temporary released +- * to avoid a deadlock with the callback. In the meantime, +- * any queuing is blocked by setting the canceling counter. +- */ +- work->canceling++; +- raw_spin_unlock_irqrestore(&worker->lock, *flags); +- del_timer_sync(&dwork->timer); +- raw_spin_lock_irqsave(&worker->lock, *flags); +- work->canceling--; +- } +- + /* + * Try to remove the work from a worker list. It might either + * be from worker->work_list or from worker->delayed_work_list. +@@ -1175,11 +1185,23 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker, + /* Work must not be used with >1 worker, see kthread_queue_work() */ + WARN_ON_ONCE(work->worker != worker); + +- /* Do not fight with another command that is canceling this work. */ ++ /* ++ * Temporary cancel the work but do not fight with another command ++ * that is canceling the work as well. ++ * ++ * It is a bit tricky because of possible races with another ++ * mod_delayed_work() and cancel_delayed_work() callers. ++ * ++ * The timer must be canceled first because worker->lock is released ++ * when doing so. But the work can be removed from the queue (list) ++ * only when it can be queued again so that the return value can ++ * be used for reference counting. ++ */ ++ kthread_cancel_delayed_work_timer(work, &flags); + if (work->canceling) + goto out; ++ ret = __kthread_cancel_work(work); + +- ret = __kthread_cancel_work(work, true, &flags); + fast_queue: + __kthread_queue_delayed_work(worker, dwork, delay); + out: +@@ -1201,7 +1223,10 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork) + /* Work must not be used with >1 worker, see kthread_queue_work(). */ + WARN_ON_ONCE(work->worker != worker); + +- ret = __kthread_cancel_work(work, is_dwork, &flags); ++ if (is_dwork) ++ kthread_cancel_delayed_work_timer(work, &flags); ++ ++ ret = __kthread_cancel_work(work); + + if (worker->current_work != work) + goto out_fast; +diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c +index f39c383c71804..5bf6b1659215d 100644 +--- a/kernel/locking/lockdep.c ++++ b/kernel/locking/lockdep.c +@@ -842,7 +842,7 @@ static int count_matching_names(struct lock_class *new_class) + } + + /* used from NMI context -- must be lockless */ +-static __always_inline struct lock_class * ++static noinstr struct lock_class * + look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass) + { + struct lockdep_subclass_key *key; +@@ -850,12 +850,14 @@ look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass) + struct lock_class *class; + + if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { ++ instrumentation_begin(); + debug_locks_off(); + printk(KERN_ERR + "BUG: looking up invalid subclass: %u\n", subclass); + printk(KERN_ERR + "turning off the locking correctness validator.\n"); + dump_stack(); ++ instrumentation_end(); + return NULL; + } + +diff --git a/kernel/module.c b/kernel/module.c +index 30479355ab850..260d6f3f6d68f 100644 +--- a/kernel/module.c ++++ b/kernel/module.c +@@ -266,9 +266,18 @@ static void module_assert_mutex_or_preempt(void) + #endif + } + ++#ifdef CONFIG_MODULE_SIG + static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE); + module_param(sig_enforce, bool_enable_only, 0644); + ++void set_module_sig_enforced(void) ++{ ++ sig_enforce = true; ++} ++#else ++#define sig_enforce false ++#endif ++ + /* + * Export sig_enforce kernel cmdline parameter to allow other subsystems rely + * on that instead of directly to CONFIG_MODULE_SIG_FORCE config. +@@ -279,11 +288,6 @@ bool is_module_sig_enforced(void) + } + EXPORT_SYMBOL(is_module_sig_enforced); + +-void set_module_sig_enforced(void) +-{ +- sig_enforce = true; +-} +- + /* Block module loading/unloading? */ + int modules_disabled = 0; + core_param(nomodule, modules_disabled, bint, 0); +diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c +index 651218ded9817..ef37acd28e4ac 100644 +--- a/kernel/sched/psi.c ++++ b/kernel/sched/psi.c +@@ -965,7 +965,7 @@ void psi_cgroup_free(struct cgroup *cgroup) + */ + void cgroup_move_task(struct task_struct *task, struct css_set *to) + { +- unsigned int task_flags = 0; ++ unsigned int task_flags; + struct rq_flags rf; + struct rq *rq; + +@@ -980,15 +980,31 @@ void cgroup_move_task(struct task_struct *task, struct css_set *to) + + rq = task_rq_lock(task, &rf); + +- if (task_on_rq_queued(task)) { +- task_flags = TSK_RUNNING; +- if (task_current(rq, task)) +- task_flags |= TSK_ONCPU; +- } else if (task->in_iowait) +- task_flags = TSK_IOWAIT; +- +- if (task->in_memstall) +- task_flags |= TSK_MEMSTALL; ++ /* ++ * We may race with schedule() dropping the rq lock between ++ * deactivating prev and switching to next. Because the psi ++ * updates from the deactivation are deferred to the switch ++ * callback to save cgroup tree updates, the task's scheduling ++ * state here is not coherent with its psi state: ++ * ++ * schedule() cgroup_move_task() ++ * rq_lock() ++ * deactivate_task() ++ * p->on_rq = 0 ++ * psi_dequeue() // defers TSK_RUNNING & TSK_IOWAIT updates ++ * pick_next_task() ++ * rq_unlock() ++ * rq_lock() ++ * psi_task_change() // old cgroup ++ * task->cgroups = to ++ * psi_task_change() // new cgroup ++ * rq_unlock() ++ * rq_lock() ++ * psi_sched_switch() // does deferred updates in new cgroup ++ * ++ * Don't rely on the scheduling state. Use psi_flags instead. ++ */ ++ task_flags = task->psi_flags; + + if (task_flags) + psi_task_change(task, task_flags, 0); +diff --git a/lib/debug_locks.c b/lib/debug_locks.c +index 06d3135bd184c..a75ee30b77cb8 100644 +--- a/lib/debug_locks.c ++++ b/lib/debug_locks.c +@@ -36,7 +36,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent); + /* + * Generic 'turn off all lock debugging' function: + */ +-noinstr int debug_locks_off(void) ++int debug_locks_off(void) + { + if (debug_locks && __debug_locks_off()) { + if (!debug_locks_silent) { +diff --git a/mm/huge_memory.c b/mm/huge_memory.c +index ae907a9c20506..44c455dbbd637 100644 +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -61,6 +61,7 @@ static struct shrinker deferred_split_shrinker; + + static atomic_t huge_zero_refcount; + struct page *huge_zero_page __read_mostly; ++unsigned long huge_zero_pfn __read_mostly = ~0UL; + + bool transparent_hugepage_enabled(struct vm_area_struct *vma) + { +@@ -97,6 +98,7 @@ retry: + __free_pages(zero_page, compound_order(zero_page)); + goto retry; + } ++ WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page)); + + /* We take additional reference here. It will be put back by shrinker */ + atomic_set(&huge_zero_refcount, 2); +@@ -146,6 +148,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink, + if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { + struct page *zero_page = xchg(&huge_zero_page, NULL); + BUG_ON(zero_page == NULL); ++ WRITE_ONCE(huge_zero_pfn, ~0UL); + __free_pages(zero_page, compound_order(zero_page)); + return HPAGE_PMD_NR; + } +@@ -2046,7 +2049,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, + count_vm_event(THP_SPLIT_PMD); + + if (!vma_is_anonymous(vma)) { +- _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); ++ old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); + /* + * We are going to unmap this huge page. So + * just go ahead and zap it +@@ -2055,16 +2058,25 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, + zap_deposited_table(mm, pmd); + if (vma_is_special_huge(vma)) + return; +- page = pmd_page(_pmd); +- if (!PageDirty(page) && pmd_dirty(_pmd)) +- set_page_dirty(page); +- if (!PageReferenced(page) && pmd_young(_pmd)) +- SetPageReferenced(page); +- page_remove_rmap(page, true); +- put_page(page); ++ if (unlikely(is_pmd_migration_entry(old_pmd))) { ++ swp_entry_t entry; ++ ++ entry = pmd_to_swp_entry(old_pmd); ++ page = migration_entry_to_page(entry); ++ } else { ++ page = pmd_page(old_pmd); ++ if (!PageDirty(page) && pmd_dirty(old_pmd)) ++ set_page_dirty(page); ++ if (!PageReferenced(page) && pmd_young(old_pmd)) ++ SetPageReferenced(page); ++ page_remove_rmap(page, true); ++ put_page(page); ++ } + add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR); + return; +- } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) { ++ } ++ ++ if (is_huge_zero_pmd(*pmd)) { + /* + * FIXME: Do we want to invalidate secondary mmu by calling + * mmu_notifier_invalidate_range() see comments below inside +@@ -2346,17 +2358,17 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, + + static void unmap_page(struct page *page) + { +- enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | ++ enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_SYNC | + TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD; +- bool unmap_success; + + VM_BUG_ON_PAGE(!PageHead(page), page); + + if (PageAnon(page)) + ttu_flags |= TTU_SPLIT_FREEZE; + +- unmap_success = try_to_unmap(page, ttu_flags); +- VM_BUG_ON_PAGE(!unmap_success, page); ++ try_to_unmap(page, ttu_flags); ++ ++ VM_WARN_ON_ONCE_PAGE(page_mapped(page), page); + } + + static void remap_page(struct page *page, unsigned int nr) +@@ -2667,7 +2679,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) + struct deferred_split *ds_queue = get_deferred_split_queue(head); + struct anon_vma *anon_vma = NULL; + struct address_space *mapping = NULL; +- int count, mapcount, extra_pins, ret; ++ int extra_pins, ret; + pgoff_t end; + + VM_BUG_ON_PAGE(is_huge_zero_page(head), head); +@@ -2726,7 +2738,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) + } + + unmap_page(head); +- VM_BUG_ON_PAGE(compound_mapcount(head), head); + + /* block interrupt reentry in xa_lock and spinlock */ + local_irq_disable(); +@@ -2744,9 +2755,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) + + /* Prevent deferred_split_scan() touching ->_refcount */ + spin_lock(&ds_queue->split_queue_lock); +- count = page_count(head); +- mapcount = total_mapcount(head); +- if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) { ++ if (page_ref_freeze(head, 1 + extra_pins)) { + if (!list_empty(page_deferred_list(head))) { + ds_queue->split_queue_len--; + list_del(page_deferred_list(head)); +@@ -2766,16 +2775,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) + __split_huge_page(page, list, end); + ret = 0; + } else { +- if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { +- pr_alert("total_mapcount: %u, page_count(): %u\n", +- mapcount, count); +- if (PageTail(page)) +- dump_page(head, NULL); +- dump_page(page, "total_mapcount(head) > 0"); +- BUG(); +- } + spin_unlock(&ds_queue->split_queue_lock); +-fail: if (mapping) ++fail: ++ if (mapping) + xa_unlock(&mapping->i_pages); + local_irq_enable(); + remap_page(head, thp_nr_pages(head)); +diff --git a/mm/hugetlb.c b/mm/hugetlb.c +index 3da4817190f3d..7ba7d9b20494a 100644 +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -1584,15 +1584,12 @@ struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage) + return NULL; + } + +-pgoff_t __basepage_index(struct page *page) ++pgoff_t hugetlb_basepage_index(struct page *page) + { + struct page *page_head = compound_head(page); + pgoff_t index = page_index(page_head); + unsigned long compound_idx; + +- if (!PageHuge(page_head)) +- return page_index(page); +- + if (compound_order(page_head) >= MAX_ORDER) + compound_idx = page_to_pfn(page) - page_to_pfn(page_head); + else +diff --git a/mm/internal.h b/mm/internal.h +index 1432feec62df0..08323e622bbd1 100644 +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -379,27 +379,52 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page) + extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); + + /* +- * At what user virtual address is page expected in @vma? ++ * At what user virtual address is page expected in vma? ++ * Returns -EFAULT if all of the page is outside the range of vma. ++ * If page is a compound head, the entire compound page is considered. + */ + static inline unsigned long +-__vma_address(struct page *page, struct vm_area_struct *vma) ++vma_address(struct page *page, struct vm_area_struct *vma) + { +- pgoff_t pgoff = page_to_pgoff(page); +- return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); ++ pgoff_t pgoff; ++ unsigned long address; ++ ++ VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */ ++ pgoff = page_to_pgoff(page); ++ if (pgoff >= vma->vm_pgoff) { ++ address = vma->vm_start + ++ ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); ++ /* Check for address beyond vma (or wrapped through 0?) */ ++ if (address < vma->vm_start || address >= vma->vm_end) ++ address = -EFAULT; ++ } else if (PageHead(page) && ++ pgoff + compound_nr(page) - 1 >= vma->vm_pgoff) { ++ /* Test above avoids possibility of wrap to 0 on 32-bit */ ++ address = vma->vm_start; ++ } else { ++ address = -EFAULT; ++ } ++ return address; + } + ++/* ++ * Then at what user virtual address will none of the page be found in vma? ++ * Assumes that vma_address() already returned a good starting address. ++ * If page is a compound head, the entire compound page is considered. ++ */ + static inline unsigned long +-vma_address(struct page *page, struct vm_area_struct *vma) ++vma_address_end(struct page *page, struct vm_area_struct *vma) + { +- unsigned long start, end; +- +- start = __vma_address(page, vma); +- end = start + thp_size(page) - PAGE_SIZE; +- +- /* page should be within @vma mapping range */ +- VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma); +- +- return max(start, vma->vm_start); ++ pgoff_t pgoff; ++ unsigned long address; ++ ++ VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */ ++ pgoff = page_to_pgoff(page) + compound_nr(page); ++ address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); ++ /* Check for address beyond vma (or wrapped through 0?) */ ++ if (address < vma->vm_start || address > vma->vm_end) ++ address = vma->vm_end; ++ return address; + } + + static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, +diff --git a/mm/memory-failure.c b/mm/memory-failure.c +index 704d05057d8c3..4db6f95e55be0 100644 +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -658,6 +658,7 @@ static int truncate_error_page(struct page *p, unsigned long pfn, + */ + static int me_kernel(struct page *p, unsigned long pfn) + { ++ unlock_page(p); + return MF_IGNORED; + } + +@@ -667,6 +668,7 @@ static int me_kernel(struct page *p, unsigned long pfn) + static int me_unknown(struct page *p, unsigned long pfn) + { + pr_err("Memory failure: %#lx: Unknown page state\n", pfn); ++ unlock_page(p); + return MF_FAILED; + } + +@@ -675,6 +677,7 @@ static int me_unknown(struct page *p, unsigned long pfn) + */ + static int me_pagecache_clean(struct page *p, unsigned long pfn) + { ++ int ret; + struct address_space *mapping; + + delete_from_lru_cache(p); +@@ -683,8 +686,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) + * For anonymous pages we're done the only reference left + * should be the one m_f() holds. + */ +- if (PageAnon(p)) +- return MF_RECOVERED; ++ if (PageAnon(p)) { ++ ret = MF_RECOVERED; ++ goto out; ++ } + + /* + * Now truncate the page in the page cache. This is really +@@ -698,7 +703,8 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) + /* + * Page has been teared down in the meanwhile + */ +- return MF_FAILED; ++ ret = MF_FAILED; ++ goto out; + } + + /* +@@ -706,7 +712,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) + * + * Open: to take i_mutex or not for this? Right now we don't. + */ +- return truncate_error_page(p, pfn, mapping); ++ ret = truncate_error_page(p, pfn, mapping); ++out: ++ unlock_page(p); ++ return ret; + } + + /* +@@ -782,24 +791,26 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn) + */ + static int me_swapcache_dirty(struct page *p, unsigned long pfn) + { ++ int ret; ++ + ClearPageDirty(p); + /* Trigger EIO in shmem: */ + ClearPageUptodate(p); + +- if (!delete_from_lru_cache(p)) +- return MF_DELAYED; +- else +- return MF_FAILED; ++ ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED; ++ unlock_page(p); ++ return ret; + } + + static int me_swapcache_clean(struct page *p, unsigned long pfn) + { ++ int ret; ++ + delete_from_swap_cache(p); + +- if (!delete_from_lru_cache(p)) +- return MF_RECOVERED; +- else +- return MF_FAILED; ++ ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED; ++ unlock_page(p); ++ return ret; + } + + /* +@@ -820,6 +831,7 @@ static int me_huge_page(struct page *p, unsigned long pfn) + mapping = page_mapping(hpage); + if (mapping) { + res = truncate_error_page(hpage, pfn, mapping); ++ unlock_page(hpage); + } else { + res = MF_FAILED; + unlock_page(hpage); +@@ -834,7 +846,6 @@ static int me_huge_page(struct page *p, unsigned long pfn) + page_ref_inc(p); + res = MF_RECOVERED; + } +- lock_page(hpage); + } + + return res; +@@ -866,6 +877,8 @@ static struct page_state { + unsigned long mask; + unsigned long res; + enum mf_action_page_type type; ++ ++ /* Callback ->action() has to unlock the relevant page inside it. */ + int (*action)(struct page *p, unsigned long pfn); + } error_states[] = { + { reserved, reserved, MF_MSG_KERNEL, me_kernel }, +@@ -929,6 +942,7 @@ static int page_action(struct page_state *ps, struct page *p, + int result; + int count; + ++ /* page p should be unlocked after returning from ps->action(). */ + result = ps->action(p, pfn); + + count = page_count(p) - 1; +@@ -1313,7 +1327,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) + goto out; + } + +- res = identify_page_state(pfn, p, page_flags); ++ return identify_page_state(pfn, p, page_flags); + out: + unlock_page(head); + return res; +@@ -1429,9 +1443,10 @@ int memory_failure(unsigned long pfn, int flags) + struct page *hpage; + struct page *orig_head; + struct dev_pagemap *pgmap; +- int res; ++ int res = 0; + unsigned long page_flags; + bool retry = true; ++ static DEFINE_MUTEX(mf_mutex); + + if (!sysctl_memory_failure_recovery) + panic("Memory failure on page %lx", pfn); +@@ -1449,13 +1464,18 @@ int memory_failure(unsigned long pfn, int flags) + return -ENXIO; + } + ++ mutex_lock(&mf_mutex); ++ + try_again: +- if (PageHuge(p)) +- return memory_failure_hugetlb(pfn, flags); ++ if (PageHuge(p)) { ++ res = memory_failure_hugetlb(pfn, flags); ++ goto unlock_mutex; ++ } ++ + if (TestSetPageHWPoison(p)) { + pr_err("Memory failure: %#lx: already hardware poisoned\n", + pfn); +- return 0; ++ goto unlock_mutex; + } + + orig_head = hpage = compound_head(p); +@@ -1488,17 +1508,19 @@ try_again: + res = MF_FAILED; + } + action_result(pfn, MF_MSG_BUDDY, res); +- return res == MF_RECOVERED ? 0 : -EBUSY; ++ res = res == MF_RECOVERED ? 0 : -EBUSY; + } else { + action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED); +- return -EBUSY; ++ res = -EBUSY; + } ++ goto unlock_mutex; + } + + if (PageTransHuge(hpage)) { + if (try_to_split_thp_page(p, "Memory Failure") < 0) { + action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); +- return -EBUSY; ++ res = -EBUSY; ++ goto unlock_mutex; + } + VM_BUG_ON_PAGE(!page_count(p), p); + } +@@ -1522,7 +1544,7 @@ try_again: + if (PageCompound(p) && compound_head(p) != orig_head) { + action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED); + res = -EBUSY; +- goto out; ++ goto unlock_page; + } + + /* +@@ -1542,14 +1564,14 @@ try_again: + num_poisoned_pages_dec(); + unlock_page(p); + put_page(p); +- return 0; ++ goto unlock_mutex; + } + if (hwpoison_filter(p)) { + if (TestClearPageHWPoison(p)) + num_poisoned_pages_dec(); + unlock_page(p); + put_page(p); +- return 0; ++ goto unlock_mutex; + } + + /* +@@ -1573,7 +1595,7 @@ try_again: + if (!hwpoison_user_mappings(p, pfn, flags, &p)) { + action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); + res = -EBUSY; +- goto out; ++ goto unlock_page; + } + + /* +@@ -1582,13 +1604,17 @@ try_again: + if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) { + action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED); + res = -EBUSY; +- goto out; ++ goto unlock_page; + } + + identify_page_state: + res = identify_page_state(pfn, p, page_flags); +-out: ++ mutex_unlock(&mf_mutex); ++ return res; ++unlock_page: + unlock_page(p); ++unlock_mutex: ++ mutex_unlock(&mf_mutex); + return res; + } + EXPORT_SYMBOL_GPL(memory_failure); +diff --git a/mm/memory.c b/mm/memory.c +index 14a6c66b37483..36624986130be 100644 +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -1361,7 +1361,18 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, + else if (zap_huge_pmd(tlb, vma, pmd, addr)) + goto next; + /* fall through */ ++ } else if (details && details->single_page && ++ PageTransCompound(details->single_page) && ++ next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { ++ spinlock_t *ptl = pmd_lock(tlb->mm, pmd); ++ /* ++ * Take and drop THP pmd lock so that we cannot return ++ * prematurely, while zap_huge_pmd() has cleared *pmd, ++ * but not yet decremented compound_mapcount(). ++ */ ++ spin_unlock(ptl); + } ++ + /* + * Here there can be other concurrent MADV_DONTNEED or + * trans huge page faults running, and if the pmd is +@@ -3193,6 +3204,36 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root, + } + } + ++/** ++ * unmap_mapping_page() - Unmap single page from processes. ++ * @page: The locked page to be unmapped. ++ * ++ * Unmap this page from any userspace process which still has it mmaped. ++ * Typically, for efficiency, the range of nearby pages has already been ++ * unmapped by unmap_mapping_pages() or unmap_mapping_range(). But once ++ * truncation or invalidation holds the lock on a page, it may find that ++ * the page has been remapped again: and then uses unmap_mapping_page() ++ * to unmap it finally. ++ */ ++void unmap_mapping_page(struct page *page) ++{ ++ struct address_space *mapping = page->mapping; ++ struct zap_details details = { }; ++ ++ VM_BUG_ON(!PageLocked(page)); ++ VM_BUG_ON(PageTail(page)); ++ ++ details.check_mapping = mapping; ++ details.first_index = page->index; ++ details.last_index = page->index + thp_nr_pages(page) - 1; ++ details.single_page = page; ++ ++ i_mmap_lock_write(mapping); ++ if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) ++ unmap_mapping_range_tree(&mapping->i_mmap, &details); ++ i_mmap_unlock_write(mapping); ++} ++ + /** + * unmap_mapping_pages() - Unmap pages from processes. + * @mapping: The address space containing pages to be unmapped. +diff --git a/mm/migrate.c b/mm/migrate.c +index 773622cffe779..40455e753c5b4 100644 +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -322,6 +322,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, + goto out; + + page = migration_entry_to_page(entry); ++ page = compound_head(page); + + /* + * Once page cache replacement of page migration started, page_count +diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c +index 86e3a3688d592..3350faeb199a6 100644 +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -116,6 +116,13 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw) + return pfn_is_match(pvmw->page, pfn); + } + ++static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size) ++{ ++ pvmw->address = (pvmw->address + size) & ~(size - 1); ++ if (!pvmw->address) ++ pvmw->address = ULONG_MAX; ++} ++ + /** + * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at + * @pvmw->address +@@ -144,6 +151,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + { + struct mm_struct *mm = pvmw->vma->vm_mm; + struct page *page = pvmw->page; ++ unsigned long end; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; +@@ -153,10 +161,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + if (pvmw->pmd && !pvmw->pte) + return not_found(pvmw); + +- if (pvmw->pte) +- goto next_pte; ++ if (unlikely(PageHuge(page))) { ++ /* The only possible mapping was handled on last iteration */ ++ if (pvmw->pte) ++ return not_found(pvmw); + +- if (unlikely(PageHuge(pvmw->page))) { + /* when pud is not present, pte will be NULL */ + pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page)); + if (!pvmw->pte) +@@ -168,78 +177,108 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + return not_found(pvmw); + return true; + } +-restart: +- pgd = pgd_offset(mm, pvmw->address); +- if (!pgd_present(*pgd)) +- return false; +- p4d = p4d_offset(pgd, pvmw->address); +- if (!p4d_present(*p4d)) +- return false; +- pud = pud_offset(p4d, pvmw->address); +- if (!pud_present(*pud)) +- return false; +- pvmw->pmd = pmd_offset(pud, pvmw->address); ++ + /* +- * Make sure the pmd value isn't cached in a register by the +- * compiler and used as a stale value after we've observed a +- * subsequent update. ++ * Seek to next pte only makes sense for THP. ++ * But more important than that optimization, is to filter out ++ * any PageKsm page: whose page->index misleads vma_address() ++ * and vma_address_end() to disaster. + */ +- pmde = READ_ONCE(*pvmw->pmd); +- if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { +- pvmw->ptl = pmd_lock(mm, pvmw->pmd); +- if (likely(pmd_trans_huge(*pvmw->pmd))) { +- if (pvmw->flags & PVMW_MIGRATION) +- return not_found(pvmw); +- if (pmd_page(*pvmw->pmd) != page) +- return not_found(pvmw); +- return true; +- } else if (!pmd_present(*pvmw->pmd)) { +- if (thp_migration_supported()) { +- if (!(pvmw->flags & PVMW_MIGRATION)) ++ end = PageTransCompound(page) ? ++ vma_address_end(page, pvmw->vma) : ++ pvmw->address + PAGE_SIZE; ++ if (pvmw->pte) ++ goto next_pte; ++restart: ++ do { ++ pgd = pgd_offset(mm, pvmw->address); ++ if (!pgd_present(*pgd)) { ++ step_forward(pvmw, PGDIR_SIZE); ++ continue; ++ } ++ p4d = p4d_offset(pgd, pvmw->address); ++ if (!p4d_present(*p4d)) { ++ step_forward(pvmw, P4D_SIZE); ++ continue; ++ } ++ pud = pud_offset(p4d, pvmw->address); ++ if (!pud_present(*pud)) { ++ step_forward(pvmw, PUD_SIZE); ++ continue; ++ } ++ ++ pvmw->pmd = pmd_offset(pud, pvmw->address); ++ /* ++ * Make sure the pmd value isn't cached in a register by the ++ * compiler and used as a stale value after we've observed a ++ * subsequent update. ++ */ ++ pmde = READ_ONCE(*pvmw->pmd); ++ ++ if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { ++ pvmw->ptl = pmd_lock(mm, pvmw->pmd); ++ pmde = *pvmw->pmd; ++ if (likely(pmd_trans_huge(pmde))) { ++ if (pvmw->flags & PVMW_MIGRATION) + return not_found(pvmw); +- if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) { +- swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd); ++ if (pmd_page(pmde) != page) ++ return not_found(pvmw); ++ return true; ++ } ++ if (!pmd_present(pmde)) { ++ swp_entry_t entry; + +- if (migration_entry_to_page(entry) != page) +- return not_found(pvmw); +- return true; +- } ++ if (!thp_migration_supported() || ++ !(pvmw->flags & PVMW_MIGRATION)) ++ return not_found(pvmw); ++ entry = pmd_to_swp_entry(pmde); ++ if (!is_migration_entry(entry) || ++ migration_entry_to_page(entry) != page) ++ return not_found(pvmw); ++ return true; + } +- return not_found(pvmw); +- } else { + /* THP pmd was split under us: handle on pte level */ + spin_unlock(pvmw->ptl); + pvmw->ptl = NULL; ++ } else if (!pmd_present(pmde)) { ++ /* ++ * If PVMW_SYNC, take and drop THP pmd lock so that we ++ * cannot return prematurely, while zap_huge_pmd() has ++ * cleared *pmd but not decremented compound_mapcount(). ++ */ ++ if ((pvmw->flags & PVMW_SYNC) && ++ PageTransCompound(page)) { ++ spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); ++ ++ spin_unlock(ptl); ++ } ++ step_forward(pvmw, PMD_SIZE); ++ continue; + } +- } else if (!pmd_present(pmde)) { +- return false; +- } +- if (!map_pte(pvmw)) +- goto next_pte; +- while (1) { ++ if (!map_pte(pvmw)) ++ goto next_pte; ++this_pte: + if (check_pte(pvmw)) + return true; + next_pte: +- /* Seek to next pte only makes sense for THP */ +- if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page)) +- return not_found(pvmw); + do { + pvmw->address += PAGE_SIZE; +- if (pvmw->address >= pvmw->vma->vm_end || +- pvmw->address >= +- __vma_address(pvmw->page, pvmw->vma) + +- thp_size(pvmw->page)) ++ if (pvmw->address >= end) + return not_found(pvmw); + /* Did we cross page table boundary? */ +- if (pvmw->address % PMD_SIZE == 0) { +- pte_unmap(pvmw->pte); ++ if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) { + if (pvmw->ptl) { + spin_unlock(pvmw->ptl); + pvmw->ptl = NULL; + } ++ pte_unmap(pvmw->pte); ++ pvmw->pte = NULL; + goto restart; +- } else { +- pvmw->pte++; ++ } ++ pvmw->pte++; ++ if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) { ++ pvmw->ptl = pte_lockptr(mm, pvmw->pmd); ++ spin_lock(pvmw->ptl); + } + } while (pte_none(*pvmw->pte)); + +@@ -247,7 +286,10 @@ next_pte: + pvmw->ptl = pte_lockptr(mm, pvmw->pmd); + spin_lock(pvmw->ptl); + } +- } ++ goto this_pte; ++ } while (pvmw->address < end); ++ ++ return false; + } + + /** +@@ -266,14 +308,10 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) + .vma = vma, + .flags = PVMW_SYNC, + }; +- unsigned long start, end; +- +- start = __vma_address(page, vma); +- end = start + thp_size(page) - PAGE_SIZE; + +- if (unlikely(end < vma->vm_start || start >= vma->vm_end)) ++ pvmw.address = vma_address(page, vma); ++ if (pvmw.address == -EFAULT) + return 0; +- pvmw.address = max(start, vma->vm_start); + if (!page_vma_mapped_walk(&pvmw)) + return 0; + page_vma_mapped_walk_done(&pvmw); +diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c +index c2210e1cdb515..4e640baf97948 100644 +--- a/mm/pgtable-generic.c ++++ b/mm/pgtable-generic.c +@@ -135,9 +135,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, + { + pmd_t pmd; + VM_BUG_ON(address & ~HPAGE_PMD_MASK); +- VM_BUG_ON(!pmd_present(*pmdp)); +- /* Below assumes pmd_present() is true */ +- VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); ++ VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) && ++ !pmd_devmap(*pmdp)); + pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + return pmd; +diff --git a/mm/rmap.c b/mm/rmap.c +index b0fc27e77d6d7..3665d062cc9ce 100644 +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -707,7 +707,6 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) + */ + unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) + { +- unsigned long address; + if (PageAnon(page)) { + struct anon_vma *page__anon_vma = page_anon_vma(page); + /* +@@ -717,15 +716,13 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) + if (!vma->anon_vma || !page__anon_vma || + vma->anon_vma->root != page__anon_vma->root) + return -EFAULT; +- } else if (page->mapping) { +- if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping) +- return -EFAULT; +- } else ++ } else if (!vma->vm_file) { + return -EFAULT; +- address = __vma_address(page, vma); +- if (unlikely(address < vma->vm_start || address >= vma->vm_end)) ++ } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) { + return -EFAULT; +- return address; ++ } ++ ++ return vma_address(page, vma); + } + + pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) +@@ -919,7 +916,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, + */ + mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, + 0, vma, vma->vm_mm, address, +- min(vma->vm_end, address + page_size(page))); ++ vma_address_end(page, vma)); + mmu_notifier_invalidate_range_start(&range); + + while (page_vma_mapped_walk(&pvmw)) { +@@ -1405,6 +1402,15 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, + struct mmu_notifier_range range; + enum ttu_flags flags = (enum ttu_flags)(long)arg; + ++ /* ++ * When racing against e.g. zap_pte_range() on another cpu, ++ * in between its ptep_get_and_clear_full() and page_remove_rmap(), ++ * try_to_unmap() may return false when it is about to become true, ++ * if page table locking is skipped: use TTU_SYNC to wait for that. ++ */ ++ if (flags & TTU_SYNC) ++ pvmw.flags = PVMW_SYNC; ++ + /* munlock has nothing to gain from examining un-locked vmas */ + if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) + return true; +@@ -1426,9 +1432,10 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, + * Note that the page can not be free in this function as call of + * try_to_unmap() must hold a reference on the page. + */ ++ range.end = PageKsm(page) ? ++ address + PAGE_SIZE : vma_address_end(page, vma); + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, +- address, +- min(vma->vm_end, address + page_size(page))); ++ address, range.end); + if (PageHuge(page)) { + /* + * If sharing is possible, start and end will be adjusted +@@ -1777,7 +1784,13 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags) + else + rmap_walk(page, &rwc); + +- return !page_mapcount(page) ? true : false; ++ /* ++ * When racing against e.g. zap_pte_range() on another cpu, ++ * in between its ptep_get_and_clear_full() and page_remove_rmap(), ++ * try_to_unmap() may return false when it is about to become true, ++ * if page table locking is skipped: use TTU_SYNC to wait for that. ++ */ ++ return !page_mapcount(page); + } + + /** +@@ -1874,6 +1887,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc, + struct vm_area_struct *vma = avc->vma; + unsigned long address = vma_address(page, vma); + ++ VM_BUG_ON_VMA(address == -EFAULT, vma); + cond_resched(); + + if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) +@@ -1928,6 +1942,7 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc, + pgoff_start, pgoff_end) { + unsigned long address = vma_address(page, vma); + ++ VM_BUG_ON_VMA(address == -EFAULT, vma); + cond_resched(); + + if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) +diff --git a/mm/truncate.c b/mm/truncate.c +index 455944264663e..bf092be0a6f01 100644 +--- a/mm/truncate.c ++++ b/mm/truncate.c +@@ -168,13 +168,10 @@ void do_invalidatepage(struct page *page, unsigned int offset, + * its lock, b) when a concurrent invalidate_mapping_pages got there first and + * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. + */ +-static void +-truncate_cleanup_page(struct address_space *mapping, struct page *page) ++static void truncate_cleanup_page(struct page *page) + { +- if (page_mapped(page)) { +- unsigned int nr = thp_nr_pages(page); +- unmap_mapping_pages(mapping, page->index, nr, false); +- } ++ if (page_mapped(page)) ++ unmap_mapping_page(page); + + if (page_has_private(page)) + do_invalidatepage(page, 0, thp_size(page)); +@@ -219,7 +216,7 @@ int truncate_inode_page(struct address_space *mapping, struct page *page) + if (page->mapping != mapping) + return -EIO; + +- truncate_cleanup_page(mapping, page); ++ truncate_cleanup_page(page); + delete_from_page_cache(page); + return 0; + } +@@ -326,7 +323,7 @@ void truncate_inode_pages_range(struct address_space *mapping, + index = indices[pagevec_count(&pvec) - 1] + 1; + truncate_exceptional_pvec_entries(mapping, &pvec, indices); + for (i = 0; i < pagevec_count(&pvec); i++) +- truncate_cleanup_page(mapping, pvec.pages[i]); ++ truncate_cleanup_page(pvec.pages[i]); + delete_from_page_cache_batch(mapping, &pvec); + for (i = 0; i < pagevec_count(&pvec); i++) + unlock_page(pvec.pages[i]); +@@ -652,6 +649,16 @@ int invalidate_inode_pages2_range(struct address_space *mapping, + continue; + } + ++ if (!did_range_unmap && page_mapped(page)) { ++ /* ++ * If page is mapped, before taking its lock, ++ * zap the rest of the file in one hit. ++ */ ++ unmap_mapping_pages(mapping, index, ++ (1 + end - index), false); ++ did_range_unmap = 1; ++ } ++ + lock_page(page); + WARN_ON(page_to_index(page) != index); + if (page->mapping != mapping) { +@@ -659,23 +666,11 @@ int invalidate_inode_pages2_range(struct address_space *mapping, + continue; + } + wait_on_page_writeback(page); +- if (page_mapped(page)) { +- if (!did_range_unmap) { +- /* +- * Zap the rest of the file in one hit. +- */ +- unmap_mapping_pages(mapping, index, +- (1 + end - index), false); +- did_range_unmap = 1; +- } else { +- /* +- * Just zap this page +- */ +- unmap_mapping_pages(mapping, index, +- 1, false); +- } +- } ++ ++ if (page_mapped(page)) ++ unmap_mapping_page(page); + BUG_ON(page_mapped(page)); ++ + ret2 = do_launder_page(mapping, page); + if (ret2 == 0) { + if (!invalidate_complete_page2(mapping, page)) +diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c +index 2603966da904d..e910890a868c1 100644 +--- a/net/ethtool/ioctl.c ++++ b/net/ethtool/ioctl.c +@@ -1421,7 +1421,7 @@ static int ethtool_get_any_eeprom(struct net_device *dev, void __user *useraddr, + if (eeprom.offset + eeprom.len > total_len) + return -EINVAL; + +- data = kmalloc(PAGE_SIZE, GFP_USER); ++ data = kzalloc(PAGE_SIZE, GFP_USER); + if (!data) + return -ENOMEM; + +@@ -1486,7 +1486,7 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) + if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) + return -EINVAL; + +- data = kmalloc(PAGE_SIZE, GFP_USER); ++ data = kzalloc(PAGE_SIZE, GFP_USER); + if (!data) + return -ENOMEM; + +@@ -1765,7 +1765,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) + return -EFAULT; + + test.len = test_len; +- data = kmalloc_array(test_len, sizeof(u64), GFP_USER); ++ data = kcalloc(test_len, sizeof(u64), GFP_USER); + if (!data) + return -ENOMEM; + +@@ -2281,7 +2281,7 @@ static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr) + ret = ethtool_tunable_valid(&tuna); + if (ret) + return ret; +- data = kmalloc(tuna.len, GFP_USER); ++ data = kzalloc(tuna.len, GFP_USER); + if (!data) + return -ENOMEM; + ret = ops->get_tunable(dev, &tuna, data); +@@ -2473,7 +2473,7 @@ static int get_phy_tunable(struct net_device *dev, void __user *useraddr) + ret = ethtool_phy_tunable_valid(&tuna); + if (ret) + return ret; +- data = kmalloc(tuna.len, GFP_USER); ++ data = kzalloc(tuna.len, GFP_USER); + if (!data) + return -ENOMEM; + if (phy_drv_tunable) { +diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c +index 1355e6c0d5677..faa7856c7fb07 100644 +--- a/net/ipv4/af_inet.c ++++ b/net/ipv4/af_inet.c +@@ -575,7 +575,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, + return err; + } + +- if (!inet_sk(sk)->inet_num && inet_autobind(sk)) ++ if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk)) + return -EAGAIN; + return sk->sk_prot->connect(sk, uaddr, addr_len); + } +@@ -803,7 +803,7 @@ int inet_send_prepare(struct sock *sk) + sock_rps_record_flow(sk); + + /* We may need to bind the socket. */ +- if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind && ++ if (data_race(!inet_sk(sk)->inet_num) && !sk->sk_prot->no_autobind && + inet_autobind(sk)) + return -EAGAIN; + +diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c +index 2e35f68da40a7..1c6429c353a96 100644 +--- a/net/ipv4/devinet.c ++++ b/net/ipv4/devinet.c +@@ -1989,7 +1989,7 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla, + return -EAFNOSUPPORT; + + if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0) +- BUG(); ++ return -EINVAL; + + if (tb[IFLA_INET_CONF]) { + nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) +diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c +index 8b943f85fff9d..ea22768f76b8a 100644 +--- a/net/ipv4/ping.c ++++ b/net/ipv4/ping.c +@@ -952,6 +952,7 @@ bool ping_rcv(struct sk_buff *skb) + struct sock *sk; + struct net *net = dev_net(skb->dev); + struct icmphdr *icmph = icmp_hdr(skb); ++ bool rc = false; + + /* We assume the packet has already been checked by icmp_rcv */ + +@@ -966,14 +967,15 @@ bool ping_rcv(struct sk_buff *skb) + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + + pr_debug("rcv on socket %p\n", sk); +- if (skb2) +- ping_queue_rcv_skb(sk, skb2); ++ if (skb2 && !ping_queue_rcv_skb(sk, skb2)) ++ rc = true; + sock_put(sk); +- return true; + } +- pr_debug("no socket, dropping\n"); + +- return false; ++ if (!rc) ++ pr_debug("no socket, dropping\n"); ++ ++ return rc; + } + EXPORT_SYMBOL_GPL(ping_rcv); + +diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c +index a9e53f5942fae..eab0a46983c0b 100644 +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -5822,7 +5822,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla, + return -EAFNOSUPPORT; + + if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0) +- BUG(); ++ return -EINVAL; + + if (tb[IFLA_INET6_TOKEN]) { + err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]), +diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h +index 02e818d740f60..5ec437e8e7132 100644 +--- a/net/mac80211/ieee80211_i.h ++++ b/net/mac80211/ieee80211_i.h +@@ -1442,7 +1442,7 @@ ieee80211_get_sband(struct ieee80211_sub_if_data *sdata) + rcu_read_lock(); + chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + +- if (WARN_ON_ONCE(!chanctx_conf)) { ++ if (!chanctx_conf) { + rcu_read_unlock(); + return NULL; + } +diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c +index 0fe91dc9817eb..437d88822d8f8 100644 +--- a/net/mac80211/mlme.c ++++ b/net/mac80211/mlme.c +@@ -4062,10 +4062,14 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, + if (elems.mbssid_config_ie) + bss_conf->profile_periodicity = + elems.mbssid_config_ie->profile_periodicity; ++ else ++ bss_conf->profile_periodicity = 0; + + if (elems.ext_capab_len >= 11 && + (elems.ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT)) + bss_conf->ema_ap = true; ++ else ++ bss_conf->ema_ap = false; + + /* continue assoc process */ + ifmgd->assoc_data->timeout = jiffies; +@@ -5802,12 +5806,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, + beacon_ies->data, beacon_ies->len); + if (elem && elem->datalen >= 3) + sdata->vif.bss_conf.profile_periodicity = elem->data[2]; ++ else ++ sdata->vif.bss_conf.profile_periodicity = 0; + + elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY, + beacon_ies->data, beacon_ies->len); + if (elem && elem->datalen >= 11 && + (elem->data[10] & WLAN_EXT_CAPA11_EMA_SUPPORT)) + sdata->vif.bss_conf.ema_ap = true; ++ else ++ sdata->vif.bss_conf.ema_ap = false; + } else { + assoc_data->timeout = jiffies; + assoc_data->timeout_started = true; +diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c +index 59de7a86599dc..cb5cbf02dbac9 100644 +--- a/net/mac80211/rx.c ++++ b/net/mac80211/rx.c +@@ -2239,17 +2239,15 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) + sc = le16_to_cpu(hdr->seq_ctrl); + frag = sc & IEEE80211_SCTL_FRAG; + +- if (is_multicast_ether_addr(hdr->addr1)) { +- I802_DEBUG_INC(rx->local->dot11MulticastReceivedFrameCount); +- goto out_no_led; +- } +- + if (rx->sta) + cache = &rx->sta->frags; + + if (likely(!ieee80211_has_morefrags(fc) && frag == 0)) + goto out; + ++ if (is_multicast_ether_addr(hdr->addr1)) ++ return RX_DROP_MONITOR; ++ + I802_DEBUG_INC(rx->local->rx_handlers_fragments); + + if (skb_linearize(rx->skb)) +@@ -2375,7 +2373,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) + + out: + ieee80211_led_rx(rx->local); +- out_no_led: + if (rx->sta) + rx->sta->rx_stats.packets++; + return RX_CONTINUE; +diff --git a/net/mac80211/util.c b/net/mac80211/util.c +index 53755a05f73b5..06342693799eb 100644 +--- a/net/mac80211/util.c ++++ b/net/mac80211/util.c +@@ -955,7 +955,7 @@ static void ieee80211_parse_extension_element(u32 *crc, + + switch (elem->data[0]) { + case WLAN_EID_EXT_HE_MU_EDCA: +- if (len == sizeof(*elems->mu_edca_param_set)) { ++ if (len >= sizeof(*elems->mu_edca_param_set)) { + elems->mu_edca_param_set = data; + if (crc) + *crc = crc32_be(*crc, (void *)elem, +@@ -976,7 +976,7 @@ static void ieee80211_parse_extension_element(u32 *crc, + } + break; + case WLAN_EID_EXT_UORA: +- if (len == 1) ++ if (len >= 1) + elems->uora_element = data; + break; + case WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME: +@@ -984,7 +984,7 @@ static void ieee80211_parse_extension_element(u32 *crc, + elems->max_channel_switch_time = data; + break; + case WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION: +- if (len == sizeof(*elems->mbssid_config_ie)) ++ if (len >= sizeof(*elems->mbssid_config_ie)) + elems->mbssid_config_ie = data; + break; + case WLAN_EID_EXT_HE_SPR: +@@ -993,7 +993,7 @@ static void ieee80211_parse_extension_element(u32 *crc, + elems->he_spr = data; + break; + case WLAN_EID_EXT_HE_6GHZ_CAPA: +- if (len == sizeof(*elems->he_6ghz_capa)) ++ if (len >= sizeof(*elems->he_6ghz_capa)) + elems->he_6ghz_capa = data; + break; + } +@@ -1082,14 +1082,14 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, + + switch (id) { + case WLAN_EID_LINK_ID: +- if (elen + 2 != sizeof(struct ieee80211_tdls_lnkie)) { ++ if (elen + 2 < sizeof(struct ieee80211_tdls_lnkie)) { + elem_parse_failed = true; + break; + } + elems->lnk_id = (void *)(pos - 2); + break; + case WLAN_EID_CHAN_SWITCH_TIMING: +- if (elen != sizeof(struct ieee80211_ch_switch_timing)) { ++ if (elen < sizeof(struct ieee80211_ch_switch_timing)) { + elem_parse_failed = true; + break; + } +@@ -1252,7 +1252,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, + elems->sec_chan_offs = (void *)pos; + break; + case WLAN_EID_CHAN_SWITCH_PARAM: +- if (elen != ++ if (elen < + sizeof(*elems->mesh_chansw_params_ie)) { + elem_parse_failed = true; + break; +@@ -1261,7 +1261,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, + break; + case WLAN_EID_WIDE_BW_CHANNEL_SWITCH: + if (!action || +- elen != sizeof(*elems->wide_bw_chansw_ie)) { ++ elen < sizeof(*elems->wide_bw_chansw_ie)) { + elem_parse_failed = true; + break; + } +@@ -1280,7 +1280,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, + ie = cfg80211_find_ie(WLAN_EID_WIDE_BW_CHANNEL_SWITCH, + pos, elen); + if (ie) { +- if (ie[1] == sizeof(*elems->wide_bw_chansw_ie)) ++ if (ie[1] >= sizeof(*elems->wide_bw_chansw_ie)) + elems->wide_bw_chansw_ie = + (void *)(ie + 2); + else +@@ -1324,7 +1324,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, + elems->cisco_dtpc_elem = pos; + break; + case WLAN_EID_ADDBA_EXT: +- if (elen != sizeof(struct ieee80211_addba_ext_ie)) { ++ if (elen < sizeof(struct ieee80211_addba_ext_ie)) { + elem_parse_failed = true; + break; + } +@@ -1350,7 +1350,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, + elem, elems); + break; + case WLAN_EID_S1G_CAPABILITIES: +- if (elen == sizeof(*elems->s1g_capab)) ++ if (elen >= sizeof(*elems->s1g_capab)) + elems->s1g_capab = (void *)pos; + else + elem_parse_failed = true; +diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c +index c52557ec7fb33..68a4dd2512427 100644 +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -2683,7 +2683,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) + } + if (likely(saddr == NULL)) { + dev = packet_cached_dev_get(po); +- proto = po->num; ++ proto = READ_ONCE(po->num); + } else { + err = -EINVAL; + if (msg->msg_namelen < sizeof(struct sockaddr_ll)) +@@ -2896,7 +2896,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) + + if (likely(saddr == NULL)) { + dev = packet_cached_dev_get(po); +- proto = po->num; ++ proto = READ_ONCE(po->num); + } else { + err = -EINVAL; + if (msg->msg_namelen < sizeof(struct sockaddr_ll)) +@@ -3034,10 +3034,13 @@ static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) + struct sock *sk = sock->sk; + struct packet_sock *po = pkt_sk(sk); + +- if (po->tx_ring.pg_vec) ++ /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy. ++ * tpacket_snd() will redo the check safely. ++ */ ++ if (data_race(po->tx_ring.pg_vec)) + return tpacket_snd(po, msg); +- else +- return packet_snd(sock, msg, len); ++ ++ return packet_snd(sock, msg, len); + } + + /* +@@ -3168,7 +3171,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, + /* prevents packet_notifier() from calling + * register_prot_hook() + */ +- po->num = 0; ++ WRITE_ONCE(po->num, 0); + __unregister_prot_hook(sk, true); + rcu_read_lock(); + dev_curr = po->prot_hook.dev; +@@ -3178,17 +3181,17 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, + } + + BUG_ON(po->running); +- po->num = proto; ++ WRITE_ONCE(po->num, proto); + po->prot_hook.type = proto; + + if (unlikely(unlisted)) { + dev_put(dev); + po->prot_hook.dev = NULL; +- po->ifindex = -1; ++ WRITE_ONCE(po->ifindex, -1); + packet_cached_dev_reset(po); + } else { + po->prot_hook.dev = dev; +- po->ifindex = dev ? dev->ifindex : 0; ++ WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0); + packet_cached_dev_assign(po, dev); + } + } +@@ -3502,7 +3505,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, + uaddr->sa_family = AF_PACKET; + memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); + rcu_read_lock(); +- dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); ++ dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); + if (dev) + strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); + rcu_read_unlock(); +@@ -3517,16 +3520,18 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, + struct sock *sk = sock->sk; + struct packet_sock *po = pkt_sk(sk); + DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr); ++ int ifindex; + + if (peer) + return -EOPNOTSUPP; + ++ ifindex = READ_ONCE(po->ifindex); + sll->sll_family = AF_PACKET; +- sll->sll_ifindex = po->ifindex; +- sll->sll_protocol = po->num; ++ sll->sll_ifindex = ifindex; ++ sll->sll_protocol = READ_ONCE(po->num); + sll->sll_pkttype = 0; + rcu_read_lock(); +- dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex); ++ dev = dev_get_by_index_rcu(sock_net(sk), ifindex); + if (dev) { + sll->sll_hatype = dev->type; + sll->sll_halen = dev->addr_len; +@@ -4105,7 +4110,7 @@ static int packet_notifier(struct notifier_block *this, + } + if (msg == NETDEV_UNREGISTER) { + packet_cached_dev_reset(po); +- po->ifindex = -1; ++ WRITE_ONCE(po->ifindex, -1); + if (po->prot_hook.dev) + dev_put(po->prot_hook.dev); + po->prot_hook.dev = NULL; +@@ -4411,7 +4416,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, + was_running = po->running; + num = po->num; + if (was_running) { +- po->num = 0; ++ WRITE_ONCE(po->num, 0); + __unregister_prot_hook(sk, false); + } + spin_unlock(&po->bind_lock); +@@ -4446,7 +4451,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, + + spin_lock(&po->bind_lock); + if (was_running) { +- po->num = num; ++ WRITE_ONCE(po->num, num); + register_prot_hook(sk); + } + spin_unlock(&po->bind_lock); +@@ -4616,8 +4621,8 @@ static int packet_seq_show(struct seq_file *seq, void *v) + s, + refcount_read(&s->sk_refcnt), + s->sk_type, +- ntohs(po->num), +- po->ifindex, ++ ntohs(READ_ONCE(po->num)), ++ READ_ONCE(po->ifindex), + po->running, + atomic_read(&s->sk_rmem_alloc), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)), +diff --git a/net/wireless/util.c b/net/wireless/util.c +index f342b61476754..726e7d2342bd5 100644 +--- a/net/wireless/util.c ++++ b/net/wireless/util.c +@@ -1059,6 +1059,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, + case NL80211_IFTYPE_MESH_POINT: + /* mesh should be handled? */ + break; ++ case NL80211_IFTYPE_OCB: ++ cfg80211_leave_ocb(rdev, dev); ++ break; + default: + break; + } +diff --git a/scripts/Makefile b/scripts/Makefile +index c36106bce80ee..9adb6d247818f 100644 +--- a/scripts/Makefile ++++ b/scripts/Makefile +@@ -14,6 +14,7 @@ hostprogs-always-$(CONFIG_ASN1) += asn1_compiler + hostprogs-always-$(CONFIG_MODULE_SIG_FORMAT) += sign-file + hostprogs-always-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += extract-cert + hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert ++hostprogs-always-$(CONFIG_SYSTEM_REVOCATION_LIST) += extract-cert + + HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include + HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include +diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h +index f9b19524da112..1e9baa5c4fc6e 100644 +--- a/scripts/recordmcount.h ++++ b/scripts/recordmcount.h +@@ -192,15 +192,20 @@ static unsigned int get_symindex(Elf_Sym const *sym, Elf32_Word const *symtab, + Elf32_Word const *symtab_shndx) + { + unsigned long offset; ++ unsigned short shndx = w2(sym->st_shndx); + int index; + +- if (sym->st_shndx != SHN_XINDEX) +- return w2(sym->st_shndx); ++ if (shndx > SHN_UNDEF && shndx < SHN_LORESERVE) ++ return shndx; + +- offset = (unsigned long)sym - (unsigned long)symtab; +- index = offset / sizeof(*sym); ++ if (shndx == SHN_XINDEX) { ++ offset = (unsigned long)sym - (unsigned long)symtab; ++ index = offset / sizeof(*sym); + +- return w(symtab_shndx[index]); ++ return w(symtab_shndx[index]); ++ } ++ ++ return 0; + } + + static unsigned int get_shnum(Elf_Ehdr const *ehdr, Elf_Shdr const *shdr0) +diff --git a/security/integrity/platform_certs/keyring_handler.c b/security/integrity/platform_certs/keyring_handler.c +index c5ba695c10e3a..5604bd57c9907 100644 +--- a/security/integrity/platform_certs/keyring_handler.c ++++ b/security/integrity/platform_certs/keyring_handler.c +@@ -55,6 +55,15 @@ static __init void uefi_blacklist_binary(const char *source, + uefi_blacklist_hash(source, data, len, "bin:", 4); + } + ++/* ++ * Add an X509 cert to the revocation list. ++ */ ++static __init void uefi_revocation_list_x509(const char *source, ++ const void *data, size_t len) ++{ ++ add_key_to_revocation_list(data, len); ++} ++ + /* + * Return the appropriate handler for particular signature list types found in + * the UEFI db and MokListRT tables. +@@ -76,5 +85,7 @@ __init efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type) + return uefi_blacklist_x509_tbs; + if (efi_guidcmp(*sig_type, efi_cert_sha256_guid) == 0) + return uefi_blacklist_binary; ++ if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0) ++ return uefi_revocation_list_x509; + return 0; + } +diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c +index ee4b4c666854f..f290f78c3f301 100644 +--- a/security/integrity/platform_certs/load_uefi.c ++++ b/security/integrity/platform_certs/load_uefi.c +@@ -132,8 +132,9 @@ static int __init load_moklist_certs(void) + static int __init load_uefi_certs(void) + { + efi_guid_t secure_var = EFI_IMAGE_SECURITY_DATABASE_GUID; +- void *db = NULL, *dbx = NULL; +- unsigned long dbsize = 0, dbxsize = 0; ++ efi_guid_t mok_var = EFI_SHIM_LOCK_GUID; ++ void *db = NULL, *dbx = NULL, *mokx = NULL; ++ unsigned long dbsize = 0, dbxsize = 0, mokxsize = 0; + efi_status_t status; + int rc = 0; + +@@ -175,6 +176,21 @@ static int __init load_uefi_certs(void) + kfree(dbx); + } + ++ mokx = get_cert_list(L"MokListXRT", &mok_var, &mokxsize, &status); ++ if (!mokx) { ++ if (status == EFI_NOT_FOUND) ++ pr_debug("mokx variable wasn't found\n"); ++ else ++ pr_info("Couldn't get mokx list\n"); ++ } else { ++ rc = parse_efi_signature_list("UEFI:MokListXRT", ++ mokx, mokxsize, ++ get_handler_for_dbx); ++ if (rc) ++ pr_err("Couldn't parse mokx signatures %d\n", rc); ++ kfree(mokx); ++ } ++ + /* Load the MokListRT certs */ + rc = load_moklist_certs(); + +diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c +index 58b5a349d3baf..ea3158b0d551d 100644 +--- a/tools/testing/selftests/bpf/test_verifier.c ++++ b/tools/testing/selftests/bpf/test_verifier.c +@@ -1147,7 +1147,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, + } + } + +- if (test->insn_processed) { ++ if (!unpriv && test->insn_processed) { + uint32_t insn_processed; + char *proc; + +diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c +index ca8fdb1b3f015..7d7ebee5cc7a8 100644 +--- a/tools/testing/selftests/bpf/verifier/and.c ++++ b/tools/testing/selftests/bpf/verifier/and.c +@@ -61,6 +61,8 @@ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R1 !read_ok", ++ .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 0 + }, +diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c +index 8a1caf46ffbc3..e061e8799ce23 100644 +--- a/tools/testing/selftests/bpf/verifier/bounds.c ++++ b/tools/testing/selftests/bpf/verifier/bounds.c +@@ -508,6 +508,8 @@ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R0 invalid mem access 'inv'", ++ .result_unpriv = REJECT, + .result = ACCEPT + }, + { +@@ -528,6 +530,8 @@ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R0 invalid mem access 'inv'", ++ .result_unpriv = REJECT, + .result = ACCEPT + }, + { +@@ -569,6 +573,8 @@ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R0 min value is outside of the allowed memory range", ++ .result_unpriv = REJECT, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, + }, +@@ -589,6 +595,8 @@ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R0 min value is outside of the allowed memory range", ++ .result_unpriv = REJECT, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, + }, +@@ -609,6 +617,8 @@ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R0 min value is outside of the allowed memory range", ++ .result_unpriv = REJECT, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, + }, +@@ -674,6 +684,8 @@ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R0 min value is outside of the allowed memory range", ++ .result_unpriv = REJECT, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, + }, +@@ -695,6 +707,8 @@ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R0 min value is outside of the allowed memory range", ++ .result_unpriv = REJECT, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, + }, +diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c +index 5cf361d8eb1cc..721ec9391be5a 100644 +--- a/tools/testing/selftests/bpf/verifier/dead_code.c ++++ b/tools/testing/selftests/bpf/verifier/dead_code.c +@@ -8,6 +8,8 @@ + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R9 !read_ok", ++ .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 7, + }, +diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c +index bd5cae4a7f733..1c857b2fbdf0a 100644 +--- a/tools/testing/selftests/bpf/verifier/jmp32.c ++++ b/tools/testing/selftests/bpf/verifier/jmp32.c +@@ -87,6 +87,8 @@ + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R9 !read_ok", ++ .result_unpriv = REJECT, + .result = ACCEPT, + }, + { +@@ -150,6 +152,8 @@ + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R9 !read_ok", ++ .result_unpriv = REJECT, + .result = ACCEPT, + }, + { +@@ -213,6 +217,8 @@ + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R9 !read_ok", ++ .result_unpriv = REJECT, + .result = ACCEPT, + }, + { +@@ -280,6 +286,8 @@ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R0 invalid mem access 'inv'", ++ .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 2, + }, +@@ -348,6 +356,8 @@ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R0 invalid mem access 'inv'", ++ .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 2, + }, +@@ -416,6 +426,8 @@ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R0 invalid mem access 'inv'", ++ .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 2, + }, +@@ -484,6 +496,8 @@ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R0 invalid mem access 'inv'", ++ .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 2, + }, +@@ -552,6 +566,8 @@ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R0 invalid mem access 'inv'", ++ .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 2, + }, +@@ -620,6 +636,8 @@ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R0 invalid mem access 'inv'", ++ .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 2, + }, +@@ -688,6 +706,8 @@ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R0 invalid mem access 'inv'", ++ .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 2, + }, +@@ -756,6 +776,8 @@ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R0 invalid mem access 'inv'", ++ .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 2, + }, +diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c +index 8dcd4e0383d57..11fc68da735ea 100644 +--- a/tools/testing/selftests/bpf/verifier/jset.c ++++ b/tools/testing/selftests/bpf/verifier/jset.c +@@ -82,8 +82,8 @@ + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, +- .retval_unpriv = 1, +- .result_unpriv = ACCEPT, ++ .errstr_unpriv = "R9 !read_ok", ++ .result_unpriv = REJECT, + .retval = 1, + .result = ACCEPT, + }, +@@ -141,7 +141,8 @@ + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, +- .result_unpriv = ACCEPT, ++ .errstr_unpriv = "R9 !read_ok", ++ .result_unpriv = REJECT, + .result = ACCEPT, + }, + { +@@ -162,6 +163,7 @@ + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, +- .result_unpriv = ACCEPT, ++ .errstr_unpriv = "R9 !read_ok", ++ .result_unpriv = REJECT, + .result = ACCEPT, + }, +diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c +index bd436df5cc326..111801aea5e35 100644 +--- a/tools/testing/selftests/bpf/verifier/unpriv.c ++++ b/tools/testing/selftests/bpf/verifier/unpriv.c +@@ -420,6 +420,8 @@ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, ++ .errstr_unpriv = "R7 invalid mem access 'inv'", ++ .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 0, + }, +diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +index 7ae2859d495c5..a3e593ddfafc9 100644 +--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c ++++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +@@ -120,7 +120,7 @@ + .fixup_map_array_48b = { 1 }, + .result = ACCEPT, + .result_unpriv = REJECT, +- .errstr_unpriv = "R2 tried to add from different maps, paths or scalars", ++ .errstr_unpriv = "R2 pointer comparison prohibited", + .retval = 0, + }, + { +@@ -159,7 +159,8 @@ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + // fake-dead code; targeted from branch A to +- // prevent dead code sanitization ++ // prevent dead code sanitization, rejected ++ // via branch B however + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), +@@ -167,7 +168,7 @@ + .fixup_map_array_48b = { 1 }, + .result = ACCEPT, + .result_unpriv = REJECT, +- .errstr_unpriv = "R2 tried to add from different maps, paths or scalars", ++ .errstr_unpriv = "R0 invalid mem access 'inv'", + .retval = 0, + }, + { +diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c +index 2f0e4365f61bd..8b90256bca96d 100644 +--- a/tools/testing/selftests/kvm/lib/kvm_util.c ++++ b/tools/testing/selftests/kvm/lib/kvm_util.c +@@ -58,7 +58,7 @@ int kvm_check_cap(long cap) + exit(KSFT_SKIP); + + ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); +- TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n" ++ TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n" + " rc: %i errno: %i", ret, errno); + + close(kvm_fd); +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index 5cabc6c748db1..4cce5735271ef 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -1919,6 +1919,13 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault) + return true; + } + ++static int kvm_try_get_pfn(kvm_pfn_t pfn) ++{ ++ if (kvm_is_reserved_pfn(pfn)) ++ return 1; ++ return get_page_unless_zero(pfn_to_page(pfn)); ++} ++ + static int hva_to_pfn_remapped(struct vm_area_struct *vma, + unsigned long addr, bool *async, + bool write_fault, bool *writable, +@@ -1968,13 +1975,21 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, + * Whoever called remap_pfn_range is also going to call e.g. + * unmap_mapping_range before the underlying pages are freed, + * causing a call to our MMU notifier. ++ * ++ * Certain IO or PFNMAP mappings can be backed with valid ++ * struct pages, but be allocated without refcounting e.g., ++ * tail pages of non-compound higher order allocations, which ++ * would then underflow the refcount when the caller does the ++ * required put_page. Don't allow those pages here. + */ +- kvm_get_pfn(pfn); ++ if (!kvm_try_get_pfn(pfn)) ++ r = -EFAULT; + + out: + pte_unmap_unlock(ptep, ptl); + *p_pfn = pfn; +- return 0; ++ ++ return r; + } + + /* |