summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pagano <mpagano@gentoo.org>2018-07-28 06:36:54 -0400
committerMike Pagano <mpagano@gentoo.org>2018-07-28 06:36:54 -0400
commitd2c4861cad10ea6344c01b2d2781c3d29a8a2c9b (patch)
treebdae36a44a5ed38fb9fe93347f07bb84ebd60ae8
parentLinux patch 4.4.143 (diff)
downloadlinux-patches-d2c4861c.tar.gz
linux-patches-d2c4861c.tar.bz2
linux-patches-d2c4861c.zip
Linux patch 4.4.144 and 4.4.1454.4-146
-rw-r--r--0000_README8
-rw-r--r--1143_linux-4.4.144.patch4228
-rw-r--r--1144_linux-4.4.145.patch1006
3 files changed, 5242 insertions, 0 deletions
diff --git a/0000_README b/0000_README
index 42e6d1fa..5149ed75 100644
--- a/0000_README
+++ b/0000_README
@@ -615,6 +615,14 @@ Patch: 1142_linux-4.4.143.patch
From: http://www.kernel.org
Desc: Linux 4.4.143
+Patch: 1143_linux-4.4.144.patch
+From: http://www.kernel.org
+Desc: Linux 4.4.144
+
+Patch: 1144_linux-4.4.145.patch
+From: http://www.kernel.org
+Desc: Linux 4.4.145
+
Patch: 1500_XATTR_USER_PREFIX.patch
From: https://bugs.gentoo.org/show_bug.cgi?id=470644
Desc: Support for namespace user.pax.* on tmpfs.
diff --git a/1143_linux-4.4.144.patch b/1143_linux-4.4.144.patch
new file mode 100644
index 00000000..d0155cc7
--- /dev/null
+++ b/1143_linux-4.4.144.patch
@@ -0,0 +1,4228 @@
+diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
+index ea6a043f5beb..50f95689ab38 100644
+--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
+@@ -276,6 +276,7 @@ What: /sys/devices/system/cpu/vulnerabilities
+ /sys/devices/system/cpu/vulnerabilities/meltdown
+ /sys/devices/system/cpu/vulnerabilities/spectre_v1
+ /sys/devices/system/cpu/vulnerabilities/spectre_v2
++ /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
+ Date: January 2018
+ Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+ Description: Information about CPU vulnerabilities
+diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
+index e60d0b5809c1..3fd53e193b7f 100644
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2460,6 +2460,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
+ allow data leaks with this option, which is equivalent
+ to spectre_v2=off.
+
++ nospec_store_bypass_disable
++ [HW] Disable all mitigations for the Speculative Store Bypass vulnerability
++
+ noxsave [BUGS=X86] Disables x86 extended register state save
+ and restore using xsave. The kernel will fallback to
+ enabling legacy floating-point and sse state.
+@@ -3623,6 +3626,48 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
+ Not specifying this option is equivalent to
+ spectre_v2=auto.
+
++ spec_store_bypass_disable=
++ [HW] Control Speculative Store Bypass (SSB) Disable mitigation
++ (Speculative Store Bypass vulnerability)
++
++ Certain CPUs are vulnerable to an exploit against a
++ a common industry wide performance optimization known
++ as "Speculative Store Bypass" in which recent stores
++ to the same memory location may not be observed by
++ later loads during speculative execution. The idea
++ is that such stores are unlikely and that they can
++ be detected prior to instruction retirement at the
++ end of a particular speculation execution window.
++
++ In vulnerable processors, the speculatively forwarded
++ store can be used in a cache side channel attack, for
++ example to read memory to which the attacker does not
++ directly have access (e.g. inside sandboxed code).
++
++ This parameter controls whether the Speculative Store
++ Bypass optimization is used.
++
++ on - Unconditionally disable Speculative Store Bypass
++ off - Unconditionally enable Speculative Store Bypass
++ auto - Kernel detects whether the CPU model contains an
++ implementation of Speculative Store Bypass and
++ picks the most appropriate mitigation. If the
++ CPU is not vulnerable, "off" is selected. If the
++ CPU is vulnerable the default mitigation is
++ architecture and Kconfig dependent. See below.
++ prctl - Control Speculative Store Bypass per thread
++ via prctl. Speculative Store Bypass is enabled
++ for a process by default. The state of the control
++ is inherited on fork.
++ seccomp - Same as "prctl" above, but all seccomp threads
++ will disable SSB unless they explicitly opt out.
++
++ Not specifying this option is equivalent to
++ spec_store_bypass_disable=auto.
++
++ Default mitigations:
++ X86: If CONFIG_SECCOMP=y "seccomp", otherwise "prctl"
++
+ spia_io_base= [HW,MTD]
+ spia_fio_base=
+ spia_pedr=
+diff --git a/Documentation/spec_ctrl.txt b/Documentation/spec_ctrl.txt
+new file mode 100644
+index 000000000000..32f3d55c54b7
+--- /dev/null
++++ b/Documentation/spec_ctrl.txt
+@@ -0,0 +1,94 @@
++===================
++Speculation Control
++===================
++
++Quite some CPUs have speculation-related misfeatures which are in
++fact vulnerabilities causing data leaks in various forms even across
++privilege domains.
++
++The kernel provides mitigation for such vulnerabilities in various
++forms. Some of these mitigations are compile-time configurable and some
++can be supplied on the kernel command line.
++
++There is also a class of mitigations which are very expensive, but they can
++be restricted to a certain set of processes or tasks in controlled
++environments. The mechanism to control these mitigations is via
++:manpage:`prctl(2)`.
++
++There are two prctl options which are related to this:
++
++ * PR_GET_SPECULATION_CTRL
++
++ * PR_SET_SPECULATION_CTRL
++
++PR_GET_SPECULATION_CTRL
++-----------------------
++
++PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature
++which is selected with arg2 of prctl(2). The return value uses bits 0-3 with
++the following meaning:
++
++==== ===================== ===================================================
++Bit Define Description
++==== ===================== ===================================================
++0 PR_SPEC_PRCTL Mitigation can be controlled per task by
++ PR_SET_SPECULATION_CTRL.
++1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is
++ disabled.
++2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is
++ enabled.
++3 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A
++ subsequent prctl(..., PR_SPEC_ENABLE) will fail.
++==== ===================== ===================================================
++
++If all bits are 0 the CPU is not affected by the speculation misfeature.
++
++If PR_SPEC_PRCTL is set, then the per-task control of the mitigation is
++available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation
++misfeature will fail.
++
++PR_SET_SPECULATION_CTRL
++-----------------------
++
++PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which
++is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand
++in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE or
++PR_SPEC_FORCE_DISABLE.
++
++Common error codes
++------------------
++======= =================================================================
++Value Meaning
++======= =================================================================
++EINVAL The prctl is not implemented by the architecture or unused
++ prctl(2) arguments are not 0.
++
++ENODEV arg2 is selecting a not supported speculation misfeature.
++======= =================================================================
++
++PR_SET_SPECULATION_CTRL error codes
++-----------------------------------
++======= =================================================================
++Value Meaning
++======= =================================================================
++0 Success
++
++ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor
++ PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE.
++
++ENXIO Control of the selected speculation misfeature is not possible.
++ See PR_GET_SPECULATION_CTRL.
++
++EPERM Speculation was disabled with PR_SPEC_FORCE_DISABLE and caller
++ tried to enable it again.
++======= =================================================================
++
++Speculation misfeature controls
++-------------------------------
++- PR_SPEC_STORE_BYPASS: Speculative Store Bypass
++
++ Invocations:
++ * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0);
++ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
++ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
++ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);
+diff --git a/Makefile b/Makefile
+index 54690fee0485..63f3e2438a26 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 4
+-SUBLEVEL = 143
++SUBLEVEL = 144
+ EXTRAVERSION =
+ NAME = Blurry Fish Butt
+
+diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
+index 429957f1c236..8f1145ed0046 100644
+--- a/arch/arc/include/asm/page.h
++++ b/arch/arc/include/asm/page.h
+@@ -102,7 +102,7 @@ typedef pte_t * pgtable_t;
+ #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
+ /* Default Permissions for stack/heaps pages (Non Executable) */
+-#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE)
++#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+ #define WANT_PAGE_VIRTUAL 1
+
+diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
+index e5fec320f158..c07d7b0a4058 100644
+--- a/arch/arc/include/asm/pgtable.h
++++ b/arch/arc/include/asm/pgtable.h
+@@ -372,7 +372,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+
+ /* Decode a PTE containing swap "identifier "into constituents */
+ #define __swp_type(pte_lookalike) (((pte_lookalike).val) & 0x1f)
+-#define __swp_offset(pte_lookalike) ((pte_lookalike).val << 13)
++#define __swp_offset(pte_lookalike) ((pte_lookalike).val >> 13)
+
+ /* NOPs, to keep generic kernel happy */
+ #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index d03bf0e28b8b..48c27c3fdfdb 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -79,24 +79,33 @@ ENTRY(entry_SYSENTER_compat)
+ ASM_CLAC /* Clear AC after saving FLAGS */
+
+ pushq $__USER32_CS /* pt_regs->cs */
+- xorq %r8,%r8
+- pushq %r8 /* pt_regs->ip = 0 (placeholder) */
++ pushq $0 /* pt_regs->ip = 0 (placeholder) */
+ pushq %rax /* pt_regs->orig_ax */
+ pushq %rdi /* pt_regs->di */
+ pushq %rsi /* pt_regs->si */
+ pushq %rdx /* pt_regs->dx */
+ pushq %rcx /* pt_regs->cx */
+ pushq $-ENOSYS /* pt_regs->ax */
+- pushq %r8 /* pt_regs->r8 = 0 */
+- pushq %r8 /* pt_regs->r9 = 0 */
+- pushq %r8 /* pt_regs->r10 = 0 */
+- pushq %r8 /* pt_regs->r11 = 0 */
++ pushq $0 /* pt_regs->r8 = 0 */
++ xorq %r8, %r8 /* nospec r8 */
++ pushq $0 /* pt_regs->r9 = 0 */
++ xorq %r9, %r9 /* nospec r9 */
++ pushq $0 /* pt_regs->r10 = 0 */
++ xorq %r10, %r10 /* nospec r10 */
++ pushq $0 /* pt_regs->r11 = 0 */
++ xorq %r11, %r11 /* nospec r11 */
+ pushq %rbx /* pt_regs->rbx */
++ xorl %ebx, %ebx /* nospec rbx */
+ pushq %rbp /* pt_regs->rbp (will be overwritten) */
+- pushq %r8 /* pt_regs->r12 = 0 */
+- pushq %r8 /* pt_regs->r13 = 0 */
+- pushq %r8 /* pt_regs->r14 = 0 */
+- pushq %r8 /* pt_regs->r15 = 0 */
++ xorl %ebp, %ebp /* nospec rbp */
++ pushq $0 /* pt_regs->r12 = 0 */
++ xorq %r12, %r12 /* nospec r12 */
++ pushq $0 /* pt_regs->r13 = 0 */
++ xorq %r13, %r13 /* nospec r13 */
++ pushq $0 /* pt_regs->r14 = 0 */
++ xorq %r14, %r14 /* nospec r14 */
++ pushq $0 /* pt_regs->r15 = 0 */
++ xorq %r15, %r15 /* nospec r15 */
+ cld
+
+ /*
+@@ -185,17 +194,26 @@ ENTRY(entry_SYSCALL_compat)
+ pushq %rdx /* pt_regs->dx */
+ pushq %rbp /* pt_regs->cx (stashed in bp) */
+ pushq $-ENOSYS /* pt_regs->ax */
+- xorq %r8,%r8
+- pushq %r8 /* pt_regs->r8 = 0 */
+- pushq %r8 /* pt_regs->r9 = 0 */
+- pushq %r8 /* pt_regs->r10 = 0 */
+- pushq %r8 /* pt_regs->r11 = 0 */
++ pushq $0 /* pt_regs->r8 = 0 */
++ xorq %r8, %r8 /* nospec r8 */
++ pushq $0 /* pt_regs->r9 = 0 */
++ xorq %r9, %r9 /* nospec r9 */
++ pushq $0 /* pt_regs->r10 = 0 */
++ xorq %r10, %r10 /* nospec r10 */
++ pushq $0 /* pt_regs->r11 = 0 */
++ xorq %r11, %r11 /* nospec r11 */
+ pushq %rbx /* pt_regs->rbx */
++ xorl %ebx, %ebx /* nospec rbx */
+ pushq %rbp /* pt_regs->rbp (will be overwritten) */
+- pushq %r8 /* pt_regs->r12 = 0 */
+- pushq %r8 /* pt_regs->r13 = 0 */
+- pushq %r8 /* pt_regs->r14 = 0 */
+- pushq %r8 /* pt_regs->r15 = 0 */
++ xorl %ebp, %ebp /* nospec rbp */
++ pushq $0 /* pt_regs->r12 = 0 */
++ xorq %r12, %r12 /* nospec r12 */
++ pushq $0 /* pt_regs->r13 = 0 */
++ xorq %r13, %r13 /* nospec r13 */
++ pushq $0 /* pt_regs->r14 = 0 */
++ xorq %r14, %r14 /* nospec r14 */
++ pushq $0 /* pt_regs->r15 = 0 */
++ xorq %r15, %r15 /* nospec r15 */
+
+ /*
+ * User mode is traced as though IRQs are on, and SYSENTER
+@@ -292,17 +310,26 @@ ENTRY(entry_INT80_compat)
+ pushq %rdx /* pt_regs->dx */
+ pushq %rcx /* pt_regs->cx */
+ pushq $-ENOSYS /* pt_regs->ax */
+- xorq %r8,%r8
+- pushq %r8 /* pt_regs->r8 = 0 */
+- pushq %r8 /* pt_regs->r9 = 0 */
+- pushq %r8 /* pt_regs->r10 = 0 */
+- pushq %r8 /* pt_regs->r11 = 0 */
++ pushq $0 /* pt_regs->r8 = 0 */
++ xorq %r8, %r8 /* nospec r8 */
++ pushq $0 /* pt_regs->r9 = 0 */
++ xorq %r9, %r9 /* nospec r9 */
++ pushq $0 /* pt_regs->r10 = 0 */
++ xorq %r10, %r10 /* nospec r10 */
++ pushq $0 /* pt_regs->r11 = 0 */
++ xorq %r11, %r11 /* nospec r11 */
+ pushq %rbx /* pt_regs->rbx */
++ xorl %ebx, %ebx /* nospec rbx */
+ pushq %rbp /* pt_regs->rbp */
++ xorl %ebp, %ebp /* nospec rbp */
+ pushq %r12 /* pt_regs->r12 */
++ xorq %r12, %r12 /* nospec r12 */
+ pushq %r13 /* pt_regs->r13 */
++ xorq %r13, %r13 /* nospec r13 */
+ pushq %r14 /* pt_regs->r14 */
++ xorq %r14, %r14 /* nospec r14 */
+ pushq %r15 /* pt_regs->r15 */
++ xorq %r15, %r15 /* nospec r15 */
+ cld
+
+ /*
+diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h
+index 20370c6db74b..3d1ec41ae09a 100644
+--- a/arch/x86/include/asm/apm.h
++++ b/arch/x86/include/asm/apm.h
+@@ -6,6 +6,8 @@
+ #ifndef _ASM_X86_MACH_DEFAULT_APM_H
+ #define _ASM_X86_MACH_DEFAULT_APM_H
+
++#include <asm/nospec-branch.h>
++
+ #ifdef APM_ZERO_SEGS
+ # define APM_DO_ZERO_SEGS \
+ "pushl %%ds\n\t" \
+@@ -31,6 +33,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
+ * N.B. We do NOT need a cld after the BIOS call
+ * because we always save and restore the flags.
+ */
++ firmware_restrict_branch_speculation_start();
+ __asm__ __volatile__(APM_DO_ZERO_SEGS
+ "pushl %%edi\n\t"
+ "pushl %%ebp\n\t"
+@@ -43,6 +46,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
+ "=S" (*esi)
+ : "a" (func), "b" (ebx_in), "c" (ecx_in)
+ : "memory", "cc");
++ firmware_restrict_branch_speculation_end();
+ }
+
+ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in,
+@@ -55,6 +59,7 @@ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in,
+ * N.B. We do NOT need a cld after the BIOS call
+ * because we always save and restore the flags.
+ */
++ firmware_restrict_branch_speculation_start();
+ __asm__ __volatile__(APM_DO_ZERO_SEGS
+ "pushl %%edi\n\t"
+ "pushl %%ebp\n\t"
+@@ -67,6 +72,7 @@ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in,
+ "=S" (si)
+ : "a" (func), "b" (ebx_in), "c" (ecx_in)
+ : "memory", "cc");
++ firmware_restrict_branch_speculation_end();
+ return error;
+ }
+
+diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
+index e3a6f66d288c..7f5dcb64cedb 100644
+--- a/arch/x86/include/asm/barrier.h
++++ b/arch/x86/include/asm/barrier.h
+@@ -40,7 +40,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
+
+ asm volatile ("cmp %1,%2; sbb %0,%0;"
+ :"=r" (mask)
+- :"r"(size),"r" (index)
++ :"g"(size),"r" (index)
+ :"cc");
+ return mask;
+ }
+diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
+index dd0089841a0f..d72c1db64679 100644
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -28,6 +28,7 @@ enum cpuid_leafs
+ CPUID_8000_000A_EDX,
+ CPUID_7_ECX,
+ CPUID_8000_0007_EBX,
++ CPUID_7_EDX,
+ };
+
+ #ifdef CONFIG_X86_FEATURE_NAMES
+@@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \
++ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
+ REQUIRED_MASK_CHECK || \
+- BUILD_BUG_ON_ZERO(NCAPINTS != 18))
++ BUILD_BUG_ON_ZERO(NCAPINTS != 19))
+
+ #define DISABLED_MASK_BIT_SET(feature_bit) \
+ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
+@@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \
++ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
+ DISABLED_MASK_CHECK || \
+- BUILD_BUG_ON_ZERO(NCAPINTS != 18))
++ BUILD_BUG_ON_ZERO(NCAPINTS != 19))
+
+ #define cpu_has(c, bit) \
+ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index 205ce70c1d6c..f4b175db70f4 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -12,7 +12,7 @@
+ /*
+ * Defines x86 CPU feature bits
+ */
+-#define NCAPINTS 18 /* N 32-bit words worth of info */
++#define NCAPINTS 19 /* N 32-bit words worth of info */
+ #define NBUGINTS 1 /* N 32-bit bug flags */
+
+ /*
+@@ -194,13 +194,28 @@
+ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+
+ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
+-#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
++#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
++
++#define X86_FEATURE_RETPOLINE ( 7*32+29) /* "" Generic Retpoline mitigation for Spectre variant 2 */
++#define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* "" AMD Retpoline mitigation for Spectre variant 2 */
++
++#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
++#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
+
+-#define X86_FEATURE_RETPOLINE ( 7*32+29) /* Generic Retpoline mitigation for Spectre variant 2 */
+-#define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* AMD Retpoline mitigation for Spectre variant 2 */
+ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
+ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
+
++#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled*/
++#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
++#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
++#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation */
++
++#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
++#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
++#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
++#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
++
++
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
+ #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
+@@ -251,6 +266,10 @@
+
+ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
+ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
++#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
++#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
++#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
++#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
+
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
+ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
+@@ -285,6 +304,15 @@
+ #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
+ #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */
+
++
++/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
++#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
++#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
++#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
++#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
++#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
++#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
++
+ /*
+ * BUG word(s)
+ */
+@@ -302,5 +330,6 @@
+ #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+ #define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
++#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
+
+ #endif /* _ASM_X86_CPUFEATURES_H */
+diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
+index 21c5ac15657b..1f8cca459c6c 100644
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -59,6 +59,7 @@
+ #define DISABLED_MASK15 0
+ #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE)
+ #define DISABLED_MASK17 0
+-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
++#define DISABLED_MASK18 0
++#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
+
+ #endif /* _ASM_X86_DISABLED_FEATURES_H */
+diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
+index 0010c78c4998..7e5a2ffb6938 100644
+--- a/arch/x86/include/asm/efi.h
++++ b/arch/x86/include/asm/efi.h
+@@ -3,6 +3,7 @@
+
+ #include <asm/fpu/api.h>
+ #include <asm/pgtable.h>
++#include <asm/nospec-branch.h>
+
+ /*
+ * We map the EFI regions needed for runtime services non-contiguously,
+@@ -39,8 +40,10 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
+ ({ \
+ efi_status_t __s; \
+ kernel_fpu_begin(); \
++ firmware_restrict_branch_speculation_start(); \
+ __s = ((efi_##f##_t __attribute__((regparm(0)))*) \
+ efi.systab->runtime->f)(args); \
++ firmware_restrict_branch_speculation_end(); \
+ kernel_fpu_end(); \
+ __s; \
+ })
+@@ -49,8 +52,10 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
+ #define __efi_call_virt(f, args...) \
+ ({ \
+ kernel_fpu_begin(); \
++ firmware_restrict_branch_speculation_start(); \
+ ((efi_##f##_t __attribute__((regparm(0)))*) \
+ efi.systab->runtime->f)(args); \
++ firmware_restrict_branch_speculation_end(); \
+ kernel_fpu_end(); \
+ })
+
+@@ -71,7 +76,9 @@ extern u64 asmlinkage efi_call(void *fp, ...);
+ efi_sync_low_kernel_mappings(); \
+ preempt_disable(); \
+ __kernel_fpu_begin(); \
++ firmware_restrict_branch_speculation_start(); \
+ __s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__); \
++ firmware_restrict_branch_speculation_end(); \
+ __kernel_fpu_end(); \
+ preempt_enable(); \
+ __s; \
+diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
+index 6999f7d01a0d..e13ff5a14633 100644
+--- a/arch/x86/include/asm/intel-family.h
++++ b/arch/x86/include/asm/intel-family.h
+@@ -12,6 +12,7 @@
+ */
+
+ #define INTEL_FAM6_CORE_YONAH 0x0E
++
+ #define INTEL_FAM6_CORE2_MEROM 0x0F
+ #define INTEL_FAM6_CORE2_MEROM_L 0x16
+ #define INTEL_FAM6_CORE2_PENRYN 0x17
+@@ -20,6 +21,7 @@
+ #define INTEL_FAM6_NEHALEM 0x1E
+ #define INTEL_FAM6_NEHALEM_EP 0x1A
+ #define INTEL_FAM6_NEHALEM_EX 0x2E
++
+ #define INTEL_FAM6_WESTMERE 0x25
+ #define INTEL_FAM6_WESTMERE2 0x1F
+ #define INTEL_FAM6_WESTMERE_EP 0x2C
+@@ -36,9 +38,9 @@
+ #define INTEL_FAM6_HASWELL_GT3E 0x46
+
+ #define INTEL_FAM6_BROADWELL_CORE 0x3D
+-#define INTEL_FAM6_BROADWELL_XEON_D 0x56
+ #define INTEL_FAM6_BROADWELL_GT3E 0x47
+ #define INTEL_FAM6_BROADWELL_X 0x4F
++#define INTEL_FAM6_BROADWELL_XEON_D 0x56
+
+ #define INTEL_FAM6_SKYLAKE_MOBILE 0x4E
+ #define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E
+@@ -56,13 +58,15 @@
+ #define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */
+ #define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */
+ #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */
+-#define INTEL_FAM6_ATOM_MERRIFIELD1 0x4A /* Tangier */
+-#define INTEL_FAM6_ATOM_MERRIFIELD2 0x5A /* Annidale */
++#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */
++#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */
+ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C
+ #define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */
++#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A
+
+ /* Xeon Phi */
+
+ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
++#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */
+
+ #endif /* _ASM_X86_INTEL_FAMILY_H */
+diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
+index b77f5edb03b0..0056bc945cd1 100644
+--- a/arch/x86/include/asm/irqflags.h
++++ b/arch/x86/include/asm/irqflags.h
+@@ -8,7 +8,7 @@
+ * Interrupt control:
+ */
+
+-static inline unsigned long native_save_fl(void)
++extern inline unsigned long native_save_fl(void)
+ {
+ unsigned long flags;
+
+diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
+index 7680b76adafc..3359dfedc7ee 100644
+--- a/arch/x86/include/asm/mmu.h
++++ b/arch/x86/include/asm/mmu.h
+@@ -3,12 +3,18 @@
+
+ #include <linux/spinlock.h>
+ #include <linux/mutex.h>
++#include <linux/atomic.h>
+
+ /*
+- * The x86 doesn't have a mmu context, but
+- * we put the segment information here.
++ * x86 has arch-specific MMU state beyond what lives in mm_struct.
+ */
+ typedef struct {
++ /*
++ * ctx_id uniquely identifies this mm_struct. A ctx_id will never
++ * be reused, and zero is not a valid ctx_id.
++ */
++ u64 ctx_id;
++
+ #ifdef CONFIG_MODIFY_LDT_SYSCALL
+ struct ldt_struct *ldt;
+ #endif
+@@ -24,6 +30,11 @@ typedef struct {
+ atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
+ } mm_context_t;
+
++#define INIT_MM_CONTEXT(mm) \
++ .context = { \
++ .ctx_id = 1, \
++ }
++
+ void leave_mm(int cpu);
+
+ #endif /* _ASM_X86_MMU_H */
+diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
+index 9bfc5fd77015..effc12767cbf 100644
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -11,6 +11,9 @@
+ #include <asm/tlbflush.h>
+ #include <asm/paravirt.h>
+ #include <asm/mpx.h>
++
++extern atomic64_t last_mm_ctx_id;
++
+ #ifndef CONFIG_PARAVIRT
+ static inline void paravirt_activate_mm(struct mm_struct *prev,
+ struct mm_struct *next)
+@@ -52,15 +55,15 @@ struct ldt_struct {
+ /*
+ * Used for LDT copy/destruction.
+ */
+-int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
+-void destroy_context(struct mm_struct *mm);
++int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
++void destroy_context_ldt(struct mm_struct *mm);
+ #else /* CONFIG_MODIFY_LDT_SYSCALL */
+-static inline int init_new_context(struct task_struct *tsk,
+- struct mm_struct *mm)
++static inline int init_new_context_ldt(struct task_struct *tsk,
++ struct mm_struct *mm)
+ {
+ return 0;
+ }
+-static inline void destroy_context(struct mm_struct *mm) {}
++static inline void destroy_context_ldt(struct mm_struct *mm) {}
+ #endif
+
+ static inline void load_mm_ldt(struct mm_struct *mm)
+@@ -102,6 +105,18 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
+ }
+
++static inline int init_new_context(struct task_struct *tsk,
++ struct mm_struct *mm)
++{
++ mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
++ init_new_context_ldt(tsk, mm);
++ return 0;
++}
++static inline void destroy_context(struct mm_struct *mm)
++{
++ destroy_context_ldt(mm);
++}
++
+ extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk);
+
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index b8911aecf035..caa00191e565 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -32,6 +32,15 @@
+ #define EFER_FFXSR (1<<_EFER_FFXSR)
+
+ /* Intel MSRs. Some also available on other CPUs */
++#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
++#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
++#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
++#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
++#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
++
++#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
++#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
++
+ #define MSR_IA32_PERFCTR0 0x000000c1
+ #define MSR_IA32_PERFCTR1 0x000000c2
+ #define MSR_FSB_FREQ 0x000000cd
+@@ -45,6 +54,16 @@
+ #define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
+
+ #define MSR_MTRRcap 0x000000fe
++
++#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
++#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
++#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
++#define ARCH_CAP_SSB_NO (1 << 4) /*
++ * Not susceptible to Speculative Store Bypass
++ * attack, so no Speculative Store Bypass
++ * control required.
++ */
++
+ #define MSR_IA32_BBL_CR_CTL 0x00000119
+ #define MSR_IA32_BBL_CR_CTL3 0x0000011e
+
+@@ -132,6 +151,7 @@
+
+ /* DEBUGCTLMSR bits (others vary by model): */
+ #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
++#define DEBUGCTLMSR_BTF_SHIFT 1
+ #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
+ #define DEBUGCTLMSR_TR (1UL << 6)
+ #define DEBUGCTLMSR_BTS (1UL << 7)
+@@ -308,6 +328,8 @@
+ #define MSR_AMD64_IBSOPDATA4 0xc001103d
+ #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+
++#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
++
+ /* Fam 16h MSRs */
+ #define MSR_F16H_L2I_PERF_CTL 0xc0010230
+ #define MSR_F16H_L2I_PERF_CTR 0xc0010231
+diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
+index 8b910416243c..b4c74c24c890 100644
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -6,6 +6,7 @@
+ #include <asm/alternative.h>
+ #include <asm/alternative-asm.h>
+ #include <asm/cpufeatures.h>
++#include <asm/msr-index.h>
+
+ /*
+ * Fill the CPU return stack buffer.
+@@ -171,6 +172,14 @@ enum spectre_v2_mitigation {
+ SPECTRE_V2_IBRS,
+ };
+
++/* The Speculative Store Bypass disable variants */
++enum ssb_mitigation {
++ SPEC_STORE_BYPASS_NONE,
++ SPEC_STORE_BYPASS_DISABLE,
++ SPEC_STORE_BYPASS_PRCTL,
++ SPEC_STORE_BYPASS_SECCOMP,
++};
++
+ extern char __indirect_thunk_start[];
+ extern char __indirect_thunk_end[];
+
+@@ -194,6 +203,51 @@ static inline void vmexit_fill_RSB(void)
+ #endif
+ }
+
++static __always_inline
++void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
++{
++ asm volatile(ALTERNATIVE("", "wrmsr", %c[feature])
++ : : "c" (msr),
++ "a" ((u32)val),
++ "d" ((u32)(val >> 32)),
++ [feature] "i" (feature)
++ : "memory");
++}
++
++static inline void indirect_branch_prediction_barrier(void)
++{
++ u64 val = PRED_CMD_IBPB;
++
++ alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
++}
++
++/* The Intel SPEC CTRL MSR base value cache */
++extern u64 x86_spec_ctrl_base;
++
++/*
++ * With retpoline, we must use IBRS to restrict branch prediction
++ * before calling into firmware.
++ *
++ * (Implemented as CPP macros due to header hell.)
++ */
++#define firmware_restrict_branch_speculation_start() \
++do { \
++ u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \
++ \
++ preempt_disable(); \
++ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
++ X86_FEATURE_USE_IBRS_FW); \
++} while (0)
++
++#define firmware_restrict_branch_speculation_end() \
++do { \
++ u64 val = x86_spec_ctrl_base; \
++ \
++ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
++ X86_FEATURE_USE_IBRS_FW); \
++ preempt_enable(); \
++} while (0)
++
+ #endif /* __ASSEMBLY__ */
+
+ /*
+diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
+index fac9a5c0abe9..6847d85400a8 100644
+--- a/arch/x86/include/asm/required-features.h
++++ b/arch/x86/include/asm/required-features.h
+@@ -100,6 +100,7 @@
+ #define REQUIRED_MASK15 0
+ #define REQUIRED_MASK16 0
+ #define REQUIRED_MASK17 0
+-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
++#define REQUIRED_MASK18 0
++#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
+
+ #endif /* _ASM_X86_REQUIRED_FEATURES_H */
+diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
+new file mode 100644
+index 000000000000..ae7c2c5cd7f0
+--- /dev/null
++++ b/arch/x86/include/asm/spec-ctrl.h
+@@ -0,0 +1,80 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _ASM_X86_SPECCTRL_H_
++#define _ASM_X86_SPECCTRL_H_
++
++#include <linux/thread_info.h>
++#include <asm/nospec-branch.h>
++
++/*
++ * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR
++ * the guest has, while on VMEXIT we restore the host view. This
++ * would be easier if SPEC_CTRL were architecturally maskable or
++ * shadowable for guests but this is not (currently) the case.
++ * Takes the guest view of SPEC_CTRL MSR as a parameter and also
++ * the guest's version of VIRT_SPEC_CTRL, if emulated.
++ */
++extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest);
++
++/**
++ * x86_spec_ctrl_set_guest - Set speculation control registers for the guest
++ * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL
++ * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL
++ * (may get translated to MSR_AMD64_LS_CFG bits)
++ *
++ * Avoids writing to the MSR if the content/bits are the same
++ */
++static inline
++void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
++{
++ x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true);
++}
++
++/**
++ * x86_spec_ctrl_restore_host - Restore host speculation control registers
++ * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL
++ * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL
++ * (may get translated to MSR_AMD64_LS_CFG bits)
++ *
++ * Avoids writing to the MSR if the content/bits are the same
++ */
++static inline
++void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
++{
++ x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false);
++}
++
++/* AMD specific Speculative Store Bypass MSR data */
++extern u64 x86_amd_ls_cfg_base;
++extern u64 x86_amd_ls_cfg_ssbd_mask;
++
++static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
++{
++ BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
++ return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
++}
++
++static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl)
++{
++ BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
++ return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
++}
++
++static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
++{
++ return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
++}
++
++#ifdef CONFIG_SMP
++extern void speculative_store_bypass_ht_init(void);
++#else
++static inline void speculative_store_bypass_ht_init(void) { }
++#endif
++
++extern void speculative_store_bypass_update(unsigned long tif);
++
++static inline void speculative_store_bypass_update_current(void)
++{
++ speculative_store_bypass_update(current_thread_info()->flags);
++}
++
++#endif
+diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
+index 18c9aaa8c043..a96e88b243ef 100644
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -92,6 +92,7 @@ struct thread_info {
+ #define TIF_SIGPENDING 2 /* signal pending */
+ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */
+ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
++#define TIF_SSBD 5 /* Reduced data speculation */
+ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */
+ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
+ #define TIF_SECCOMP 8 /* secure computing */
+@@ -114,8 +115,9 @@ struct thread_info {
+ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
+ #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
+ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
+-#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
+ #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
++#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
++#define _TIF_SSBD (1 << TIF_SSBD)
+ #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
+ #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
+ #define _TIF_SECCOMP (1 << TIF_SECCOMP)
+@@ -147,7 +149,7 @@ struct thread_info {
+
+ /* flags to check in __switch_to() */
+ #define _TIF_WORK_CTXSW \
+- (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
++ (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD)
+
+ #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
+ #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index e2a89d2577fb..72cfe3e53af1 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -68,6 +68,8 @@ static inline void invpcid_flush_all_nonglobals(void)
+ struct tlb_state {
+ struct mm_struct *active_mm;
+ int state;
++ /* last user mm's ctx id */
++ u64 last_ctx_id;
+
+ /*
+ * Access to this CR4 shadow and to H/W CR4 is protected by
+@@ -109,6 +111,16 @@ static inline void cr4_clear_bits(unsigned long mask)
+ }
+ }
+
++static inline void cr4_toggle_bits(unsigned long mask)
++{
++ unsigned long cr4;
++
++ cr4 = this_cpu_read(cpu_tlbstate.cr4);
++ cr4 ^= mask;
++ this_cpu_write(cpu_tlbstate.cr4, cr4);
++ __write_cr4(cr4);
++}
++
+ /* Read the CR4 shadow. */
+ static inline unsigned long cr4_read_shadow(void)
+ {
+diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
+index b1b78ffe01d0..7947cee61f61 100644
+--- a/arch/x86/kernel/Makefile
++++ b/arch/x86/kernel/Makefile
+@@ -41,6 +41,7 @@ obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
+ obj-y += tsc.o tsc_msr.o io_delay.o rtc.o
+ obj-y += pci-iommu_table.o
+ obj-y += resource.o
++obj-y += irqflags.o
+
+ obj-y += process.o
+ obj-y += fpu/
+diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
+index f4fb8f5b0be4..9f6151884249 100644
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -9,6 +9,7 @@
+ #include <asm/processor.h>
+ #include <asm/apic.h>
+ #include <asm/cpu.h>
++#include <asm/spec-ctrl.h>
+ #include <asm/smp.h>
+ #include <asm/pci-direct.h>
+ #include <asm/delay.h>
+@@ -519,6 +520,26 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
+
+ if (cpu_has(c, X86_FEATURE_MWAITX))
+ use_mwaitx_delay();
++
++ if (c->x86 >= 0x15 && c->x86 <= 0x17) {
++ unsigned int bit;
++
++ switch (c->x86) {
++ case 0x15: bit = 54; break;
++ case 0x16: bit = 33; break;
++ case 0x17: bit = 10; break;
++ default: return;
++ }
++ /*
++ * Try to cache the base value so further operations can
++ * avoid RMW. If that faults, do not enable SSBD.
++ */
++ if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) {
++ setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD);
++ setup_force_cpu_cap(X86_FEATURE_SSBD);
++ x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
++ }
++ }
+ }
+
+ static void early_init_amd(struct cpuinfo_x86 *c)
+@@ -692,6 +713,17 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
+ }
+ }
+
++static void init_amd_zn(struct cpuinfo_x86 *c)
++{
++ set_cpu_cap(c, X86_FEATURE_ZEN);
++ /*
++ * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
++ * all up to and including B1.
++ */
++ if (c->x86_model <= 1 && c->x86_mask <= 1)
++ set_cpu_cap(c, X86_FEATURE_CPB);
++}
++
+ static void init_amd(struct cpuinfo_x86 *c)
+ {
+ u32 dummy;
+@@ -722,6 +754,7 @@ static void init_amd(struct cpuinfo_x86 *c)
+ case 0x10: init_amd_gh(c); break;
+ case 0x12: init_amd_ln(c); break;
+ case 0x15: init_amd_bd(c); break;
++ case 0x17: init_amd_zn(c); break;
+ }
+
+ /* Enable workaround for FXSAVE leak */
+@@ -791,8 +824,9 @@ static void init_amd(struct cpuinfo_x86 *c)
+ if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
+ set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
+
+- /* AMD CPUs don't reset SS attributes on SYSRET */
+- set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
++ /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */
++ if (!cpu_has(c, X86_FEATURE_XENPV))
++ set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+ }
+
+ #ifdef CONFIG_X86_32
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index 2bbc74f8a4a8..12a8867071f3 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -11,8 +11,10 @@
+ #include <linux/utsname.h>
+ #include <linux/cpu.h>
+ #include <linux/module.h>
++#include <linux/nospec.h>
++#include <linux/prctl.h>
+
+-#include <asm/nospec-branch.h>
++#include <asm/spec-ctrl.h>
+ #include <asm/cmdline.h>
+ #include <asm/bugs.h>
+ #include <asm/processor.h>
+@@ -26,6 +28,27 @@
+ #include <asm/intel-family.h>
+
+ static void __init spectre_v2_select_mitigation(void);
++static void __init ssb_select_mitigation(void);
++
++/*
++ * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
++ * writes to SPEC_CTRL contain whatever reserved bits have been set.
++ */
++u64 x86_spec_ctrl_base;
++EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
++
++/*
++ * The vendor and possibly platform specific bits which can be modified in
++ * x86_spec_ctrl_base.
++ */
++static u64 x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
++
++/*
++ * AMD specific MSR info for Speculative Store Bypass control.
++ * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu().
++ */
++u64 x86_amd_ls_cfg_base;
++u64 x86_amd_ls_cfg_ssbd_mask;
+
+ void __init check_bugs(void)
+ {
+@@ -36,9 +59,27 @@ void __init check_bugs(void)
+ print_cpu_info(&boot_cpu_data);
+ }
+
++ /*
++ * Read the SPEC_CTRL MSR to account for reserved bits which may
++ * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
++ * init code as it is not enumerated and depends on the family.
++ */
++ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
++ rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++
++ /* Allow STIBP in MSR_SPEC_CTRL if supported */
++ if (boot_cpu_has(X86_FEATURE_STIBP))
++ x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
++
+ /* Select the proper spectre mitigation before patching alternatives */
+ spectre_v2_select_mitigation();
+
++ /*
++ * Select proper mitigation for any exposure to the Speculative Store
++ * Bypass vulnerability.
++ */
++ ssb_select_mitigation();
++
+ #ifdef CONFIG_X86_32
+ /*
+ * Check whether we are able to run this kernel safely on SMP.
+@@ -94,6 +135,73 @@ static const char *spectre_v2_strings[] = {
+
+ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+
++void
++x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
++{
++ u64 msrval, guestval, hostval = x86_spec_ctrl_base;
++ struct thread_info *ti = current_thread_info();
++
++ /* Is MSR_SPEC_CTRL implemented ? */
++ if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
++ /*
++ * Restrict guest_spec_ctrl to supported values. Clear the
++ * modifiable bits in the host base value and or the
++ * modifiable bits from the guest value.
++ */
++ guestval = hostval & ~x86_spec_ctrl_mask;
++ guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
++
++ /* SSBD controlled in MSR_SPEC_CTRL */
++ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
++ hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
++
++ if (hostval != guestval) {
++ msrval = setguest ? guestval : hostval;
++ wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
++ }
++ }
++
++ /*
++ * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update
++ * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported.
++ */
++ if (!static_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
++ !static_cpu_has(X86_FEATURE_VIRT_SSBD))
++ return;
++
++ /*
++ * If the host has SSBD mitigation enabled, force it in the host's
++ * virtual MSR value. If its not permanently enabled, evaluate
++ * current's TIF_SSBD thread flag.
++ */
++ if (static_cpu_has(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE))
++ hostval = SPEC_CTRL_SSBD;
++ else
++ hostval = ssbd_tif_to_spec_ctrl(ti->flags);
++
++ /* Sanitize the guest value */
++ guestval = guest_virt_spec_ctrl & SPEC_CTRL_SSBD;
++
++ if (hostval != guestval) {
++ unsigned long tif;
++
++ tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) :
++ ssbd_spec_ctrl_to_tif(hostval);
++
++ speculative_store_bypass_update(tif);
++ }
++}
++EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
++
++static void x86_amd_ssb_disable(void)
++{
++ u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask;
++
++ if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
++ wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD);
++ else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
++ wrmsrl(MSR_AMD64_LS_CFG, msrval);
++}
+
+ #ifdef RETPOLINE
+ static bool spectre_v2_bad_module;
+@@ -162,8 +270,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+ if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+ return SPECTRE_V2_CMD_NONE;
+ else {
+- ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
+- sizeof(arg));
++ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
+ if (ret < 0)
+ return SPECTRE_V2_CMD_AUTO;
+
+@@ -184,8 +291,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+ cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
+ cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
+ !IS_ENABLED(CONFIG_RETPOLINE)) {
+- pr_err("%s selected but not compiled in. Switching to AUTO select\n",
+- mitigation_options[i].option);
++ pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+@@ -255,14 +361,14 @@ static void __init spectre_v2_select_mitigation(void)
+ goto retpoline_auto;
+ break;
+ }
+- pr_err("kernel not compiled with retpoline; no mitigation available!");
++ pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");
+ return;
+
+ retpoline_auto:
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ retpoline_amd:
+ if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+- pr_err("LFENCE not serializing. Switching to generic retpoline\n");
++ pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
+ goto retpoline_generic;
+ }
+ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
+@@ -280,7 +386,7 @@ retpoline_auto:
+ pr_info("%s\n", spectre_v2_strings[mode]);
+
+ /*
+- * If neither SMEP or KPTI are available, there is a risk of
++ * If neither SMEP nor PTI are available, there is a risk of
+ * hitting userspace addresses in the RSB after a context switch
+ * from a shallow call stack to a deeper one. To prevent this fill
+ * the entire RSB, even when using IBRS.
+@@ -294,38 +400,309 @@ retpoline_auto:
+ if ((!boot_cpu_has(X86_FEATURE_KAISER) &&
+ !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
+ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+- pr_info("Filling RSB on context switch\n");
++ pr_info("Spectre v2 mitigation: Filling RSB on context switch\n");
++ }
++
++ /* Initialize Indirect Branch Prediction Barrier if supported */
++ if (boot_cpu_has(X86_FEATURE_IBPB)) {
++ setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
++ pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
++ }
++
++ /*
++ * Retpoline means the kernel is safe because it has no indirect
++ * branches. But firmware isn't, so use IBRS to protect that.
++ */
++ if (boot_cpu_has(X86_FEATURE_IBRS)) {
++ setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
++ pr_info("Enabling Restricted Speculation for firmware calls\n");
++ }
++}
++
++#undef pr_fmt
++#define pr_fmt(fmt) "Speculative Store Bypass: " fmt
++
++static enum ssb_mitigation ssb_mode = SPEC_STORE_BYPASS_NONE;
++
++/* The kernel command line selection */
++enum ssb_mitigation_cmd {
++ SPEC_STORE_BYPASS_CMD_NONE,
++ SPEC_STORE_BYPASS_CMD_AUTO,
++ SPEC_STORE_BYPASS_CMD_ON,
++ SPEC_STORE_BYPASS_CMD_PRCTL,
++ SPEC_STORE_BYPASS_CMD_SECCOMP,
++};
++
++static const char *ssb_strings[] = {
++ [SPEC_STORE_BYPASS_NONE] = "Vulnerable",
++ [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled",
++ [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl",
++ [SPEC_STORE_BYPASS_SECCOMP] = "Mitigation: Speculative Store Bypass disabled via prctl and seccomp",
++};
++
++static const struct {
++ const char *option;
++ enum ssb_mitigation_cmd cmd;
++} ssb_mitigation_options[] = {
++ { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
++ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
++ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
++ { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */
++ { "seccomp", SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */
++};
++
++static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
++{
++ enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO;
++ char arg[20];
++ int ret, i;
++
++ if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) {
++ return SPEC_STORE_BYPASS_CMD_NONE;
++ } else {
++ ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
++ arg, sizeof(arg));
++ if (ret < 0)
++ return SPEC_STORE_BYPASS_CMD_AUTO;
++
++ for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) {
++ if (!match_option(arg, ret, ssb_mitigation_options[i].option))
++ continue;
++
++ cmd = ssb_mitigation_options[i].cmd;
++ break;
++ }
++
++ if (i >= ARRAY_SIZE(ssb_mitigation_options)) {
++ pr_err("unknown option (%s). Switching to AUTO select\n", arg);
++ return SPEC_STORE_BYPASS_CMD_AUTO;
++ }
++ }
++
++ return cmd;
++}
++
++static enum ssb_mitigation __init __ssb_select_mitigation(void)
++{
++ enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE;
++ enum ssb_mitigation_cmd cmd;
++
++ if (!boot_cpu_has(X86_FEATURE_SSBD))
++ return mode;
++
++ cmd = ssb_parse_cmdline();
++ if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) &&
++ (cmd == SPEC_STORE_BYPASS_CMD_NONE ||
++ cmd == SPEC_STORE_BYPASS_CMD_AUTO))
++ return mode;
++
++ switch (cmd) {
++ case SPEC_STORE_BYPASS_CMD_AUTO:
++ case SPEC_STORE_BYPASS_CMD_SECCOMP:
++ /*
++ * Choose prctl+seccomp as the default mode if seccomp is
++ * enabled.
++ */
++ if (IS_ENABLED(CONFIG_SECCOMP))
++ mode = SPEC_STORE_BYPASS_SECCOMP;
++ else
++ mode = SPEC_STORE_BYPASS_PRCTL;
++ break;
++ case SPEC_STORE_BYPASS_CMD_ON:
++ mode = SPEC_STORE_BYPASS_DISABLE;
++ break;
++ case SPEC_STORE_BYPASS_CMD_PRCTL:
++ mode = SPEC_STORE_BYPASS_PRCTL;
++ break;
++ case SPEC_STORE_BYPASS_CMD_NONE:
++ break;
++ }
++
++ /*
++ * We have three CPU feature flags that are in play here:
++ * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
++ * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
++ * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation
++ */
++ if (mode == SPEC_STORE_BYPASS_DISABLE) {
++ setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
++ /*
++ * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
++ * a completely different MSR and bit dependent on family.
++ */
++ switch (boot_cpu_data.x86_vendor) {
++ case X86_VENDOR_INTEL:
++ x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
++ x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
++ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++ break;
++ case X86_VENDOR_AMD:
++ x86_amd_ssb_disable();
++ break;
++ }
+ }
++
++ return mode;
++}
++
++static void ssb_select_mitigation(void)
++{
++ ssb_mode = __ssb_select_mitigation();
++
++ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
++ pr_info("%s\n", ssb_strings[ssb_mode]);
+ }
+
+ #undef pr_fmt
++#define pr_fmt(fmt) "Speculation prctl: " fmt
++
++static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
++{
++ bool update;
++
++ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
++ ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
++ return -ENXIO;
++
++ switch (ctrl) {
++ case PR_SPEC_ENABLE:
++ /* If speculation is force disabled, enable is not allowed */
++ if (task_spec_ssb_force_disable(task))
++ return -EPERM;
++ task_clear_spec_ssb_disable(task);
++ update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
++ break;
++ case PR_SPEC_DISABLE:
++ task_set_spec_ssb_disable(task);
++ update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
++ break;
++ case PR_SPEC_FORCE_DISABLE:
++ task_set_spec_ssb_disable(task);
++ task_set_spec_ssb_force_disable(task);
++ update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
++ break;
++ default:
++ return -ERANGE;
++ }
++
++ /*
++ * If being set on non-current task, delay setting the CPU
++ * mitigation until it is next scheduled.
++ */
++ if (task == current && update)
++ speculative_store_bypass_update_current();
++
++ return 0;
++}
++
++int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
++ unsigned long ctrl)
++{
++ switch (which) {
++ case PR_SPEC_STORE_BYPASS:
++ return ssb_prctl_set(task, ctrl);
++ default:
++ return -ENODEV;
++ }
++}
++
++#ifdef CONFIG_SECCOMP
++void arch_seccomp_spec_mitigate(struct task_struct *task)
++{
++ if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
++ ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
++}
++#endif
++
++static int ssb_prctl_get(struct task_struct *task)
++{
++ switch (ssb_mode) {
++ case SPEC_STORE_BYPASS_DISABLE:
++ return PR_SPEC_DISABLE;
++ case SPEC_STORE_BYPASS_SECCOMP:
++ case SPEC_STORE_BYPASS_PRCTL:
++ if (task_spec_ssb_force_disable(task))
++ return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
++ if (task_spec_ssb_disable(task))
++ return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
++ return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
++ default:
++ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
++ return PR_SPEC_ENABLE;
++ return PR_SPEC_NOT_AFFECTED;
++ }
++}
++
++int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
++{
++ switch (which) {
++ case PR_SPEC_STORE_BYPASS:
++ return ssb_prctl_get(task);
++ default:
++ return -ENODEV;
++ }
++}
++
++void x86_spec_ctrl_setup_ap(void)
++{
++ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
++ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++
++ if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
++ x86_amd_ssb_disable();
++}
+
+ #ifdef CONFIG_SYSFS
+-ssize_t cpu_show_meltdown(struct device *dev,
+- struct device_attribute *attr, char *buf)
++
++static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
++ char *buf, unsigned int bug)
+ {
+- if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
++ if (!boot_cpu_has_bug(bug))
+ return sprintf(buf, "Not affected\n");
+- if (boot_cpu_has(X86_FEATURE_KAISER))
+- return sprintf(buf, "Mitigation: PTI\n");
++
++ switch (bug) {
++ case X86_BUG_CPU_MELTDOWN:
++ if (boot_cpu_has(X86_FEATURE_KAISER))
++ return sprintf(buf, "Mitigation: PTI\n");
++
++ break;
++
++ case X86_BUG_SPECTRE_V1:
++ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
++
++ case X86_BUG_SPECTRE_V2:
++ return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
++ boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
++ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
++ spectre_v2_module_string());
++
++ case X86_BUG_SPEC_STORE_BYPASS:
++ return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
++
++ default:
++ break;
++ }
++
+ return sprintf(buf, "Vulnerable\n");
+ }
+
+-ssize_t cpu_show_spectre_v1(struct device *dev,
+- struct device_attribute *attr, char *buf)
++ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+ {
+- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
+- return sprintf(buf, "Not affected\n");
+- return sprintf(buf, "Mitigation: __user pointer sanitization\n");
++ return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN);
+ }
+
+-ssize_t cpu_show_spectre_v2(struct device *dev,
+- struct device_attribute *attr, char *buf)
++ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
+ {
+- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+- return sprintf(buf, "Not affected\n");
++ return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1);
++}
+
+- return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+- spectre_v2_module_string());
++ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
++{
++ return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2);
++}
++
++ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
++{
++ return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
+ }
+ #endif
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 736e2843139b..3d21b28f9826 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -43,6 +43,8 @@
+ #include <asm/pat.h>
+ #include <asm/microcode.h>
+ #include <asm/microcode_intel.h>
++#include <asm/intel-family.h>
++#include <asm/cpu_device_id.h>
+
+ #ifdef CONFIG_X86_LOCAL_APIC
+ #include <asm/uv/uv.h>
+@@ -674,6 +676,40 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
+ }
+ }
+
++static void init_speculation_control(struct cpuinfo_x86 *c)
++{
++ /*
++ * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
++ * and they also have a different bit for STIBP support. Also,
++ * a hypervisor might have set the individual AMD bits even on
++ * Intel CPUs, for finer-grained selection of what's available.
++ */
++ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
++ set_cpu_cap(c, X86_FEATURE_IBRS);
++ set_cpu_cap(c, X86_FEATURE_IBPB);
++ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
++ }
++
++ if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
++ set_cpu_cap(c, X86_FEATURE_STIBP);
++
++ if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD))
++ set_cpu_cap(c, X86_FEATURE_SSBD);
++
++ if (cpu_has(c, X86_FEATURE_AMD_IBRS)) {
++ set_cpu_cap(c, X86_FEATURE_IBRS);
++ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
++ }
++
++ if (cpu_has(c, X86_FEATURE_AMD_IBPB))
++ set_cpu_cap(c, X86_FEATURE_IBPB);
++
++ if (cpu_has(c, X86_FEATURE_AMD_STIBP)) {
++ set_cpu_cap(c, X86_FEATURE_STIBP);
++ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
++ }
++}
++
+ void get_cpu_cap(struct cpuinfo_x86 *c)
+ {
+ u32 eax, ebx, ecx, edx;
+@@ -695,6 +731,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
+ cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
+ c->x86_capability[CPUID_7_0_EBX] = ebx;
+ c->x86_capability[CPUID_7_ECX] = ecx;
++ c->x86_capability[CPUID_7_EDX] = edx;
+ }
+
+ /* Extended state features: level 0x0000000d */
+@@ -765,6 +802,14 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
+ c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
+
+ init_scattered_cpuid_features(c);
++ init_speculation_control(c);
++
++ /*
++ * Clear/Set all flags overridden by options, after probe.
++ * This needs to happen each time we re-probe, which may happen
++ * several times during CPU initialization.
++ */
++ apply_forced_caps(c);
+ }
+
+ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
+@@ -793,6 +838,75 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
+ #endif
+ }
+
++static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY },
++ { X86_VENDOR_CENTAUR, 5 },
++ { X86_VENDOR_INTEL, 5 },
++ { X86_VENDOR_NSC, 5 },
++ { X86_VENDOR_ANY, 4 },
++ {}
++};
++
++static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
++ { X86_VENDOR_AMD },
++ {}
++};
++
++static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
++ { X86_VENDOR_CENTAUR, 5, },
++ { X86_VENDOR_INTEL, 5, },
++ { X86_VENDOR_NSC, 5, },
++ { X86_VENDOR_AMD, 0x12, },
++ { X86_VENDOR_AMD, 0x11, },
++ { X86_VENDOR_AMD, 0x10, },
++ { X86_VENDOR_AMD, 0xf, },
++ { X86_VENDOR_ANY, 4, },
++ {}
++};
++
++static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
++{
++ u64 ia32_cap = 0;
++
++ if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
++ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
++
++ if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
++ !(ia32_cap & ARCH_CAP_SSB_NO))
++ setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
++
++ if (x86_match_cpu(cpu_no_speculation))
++ return;
++
++ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
++ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
++
++ if (x86_match_cpu(cpu_no_meltdown))
++ return;
++
++ /* Rogue Data Cache Load? No! */
++ if (ia32_cap & ARCH_CAP_RDCL_NO)
++ return;
++
++ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
++}
++
+ /*
+ * Do minimum CPU detection early.
+ * Fields really needed: vendor, cpuid_level, family, model, mask,
+@@ -839,11 +953,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
+
+ setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+
+- if (c->x86_vendor != X86_VENDOR_AMD)
+- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+-
+- setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+- setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
++ cpu_set_bug_bits(c);
+
+ fpu__init_system(c);
+
+@@ -1132,6 +1242,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
+ enable_sep_cpu();
+ #endif
+ mtrr_ap_init();
++ x86_spec_ctrl_setup_ap();
+ }
+
+ struct msr_range {
+diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
+index 2584265d4745..3b19d82f7932 100644
+--- a/arch/x86/kernel/cpu/cpu.h
++++ b/arch/x86/kernel/cpu/cpu.h
+@@ -46,4 +46,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
+
+ extern void get_cpu_cap(struct cpuinfo_x86 *c);
+ extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
++
++extern void x86_spec_ctrl_setup_ap(void);
++
+ #endif /* ARCH_X86_CPU_H */
+diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
+index 9299e3bdfad6..4dce22d3cb06 100644
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -13,6 +13,7 @@
+ #include <asm/msr.h>
+ #include <asm/bugs.h>
+ #include <asm/cpu.h>
++#include <asm/intel-family.h>
+
+ #ifdef CONFIG_X86_64
+ #include <linux/topology.h>
+@@ -25,6 +26,62 @@
+ #include <asm/apic.h>
+ #endif
+
++/*
++ * Early microcode releases for the Spectre v2 mitigation were broken.
++ * Information taken from;
++ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf
++ * - https://kb.vmware.com/s/article/52345
++ * - Microcode revisions observed in the wild
++ * - Release note from 20180108 microcode release
++ */
++struct sku_microcode {
++ u8 model;
++ u8 stepping;
++ u32 microcode;
++};
++static const struct sku_microcode spectre_bad_microcodes[] = {
++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x80 },
++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x80 },
++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x80 },
++ { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x80 },
++ { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 },
++ { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
++ { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
++ { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 },
++ { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b },
++ { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 },
++ { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 },
++ { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 },
++ { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 },
++ { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 },
++ { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 },
++ { INTEL_FAM6_HASWELL_X, 0x02, 0x3b },
++ { INTEL_FAM6_HASWELL_X, 0x04, 0x10 },
++ { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a },
++ /* Observed in the wild */
++ { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b },
++ { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 },
++};
++
++static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
++{
++ int i;
++
++ /*
++ * We know that the hypervisor lie to us on the microcode version so
++ * we may as well hope that it is running the correct version.
++ */
++ if (cpu_has(c, X86_FEATURE_HYPERVISOR))
++ return false;
++
++ for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
++ if (c->x86_model == spectre_bad_microcodes[i].model &&
++ c->x86_mask == spectre_bad_microcodes[i].stepping)
++ return (c->microcode <= spectre_bad_microcodes[i].microcode);
++ }
++ return false;
++}
++
+ static void early_init_intel(struct cpuinfo_x86 *c)
+ {
+ u64 misc_enable;
+@@ -51,6 +108,22 @@ static void early_init_intel(struct cpuinfo_x86 *c)
+ rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
+ }
+
++ /* Now if any of them are set, check the blacklist and clear the lot */
++ if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
++ cpu_has(c, X86_FEATURE_INTEL_STIBP) ||
++ cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
++ cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
++ pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
++ setup_clear_cpu_cap(X86_FEATURE_IBRS);
++ setup_clear_cpu_cap(X86_FEATURE_IBPB);
++ setup_clear_cpu_cap(X86_FEATURE_STIBP);
++ setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
++ setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL);
++ setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
++ setup_clear_cpu_cap(X86_FEATURE_SSBD);
++ setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD);
++ }
++
+ /*
+ * Atom erratum AAE44/AAF40/AAG38/AAH41:
+ *
+diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
+index ddc9b8125918..7b8c8c838191 100644
+--- a/arch/x86/kernel/cpu/mcheck/mce.c
++++ b/arch/x86/kernel/cpu/mcheck/mce.c
+@@ -2294,9 +2294,6 @@ static ssize_t store_int_with_restart(struct device *s,
+ if (check_interval == old_check_interval)
+ return ret;
+
+- if (check_interval < 1)
+- check_interval = 1;
+-
+ mutex_lock(&mce_sysfs_mutex);
+ mce_restart();
+ mutex_unlock(&mce_sysfs_mutex);
+diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S
+new file mode 100644
+index 000000000000..3817eb748eb4
+--- /dev/null
++++ b/arch/x86/kernel/irqflags.S
+@@ -0,0 +1,26 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++
++#include <asm/asm.h>
++#include <asm-generic/export.h>
++#include <linux/linkage.h>
++
++/*
++ * unsigned long native_save_fl(void)
++ */
++ENTRY(native_save_fl)
++ pushf
++ pop %_ASM_AX
++ ret
++ENDPROC(native_save_fl)
++EXPORT_SYMBOL(native_save_fl)
++
++/*
++ * void native_restore_fl(unsigned long flags)
++ * %eax/%rdi: flags
++ */
++ENTRY(native_restore_fl)
++ push %_ASM_ARG1
++ popf
++ ret
++ENDPROC(native_restore_fl)
++EXPORT_SYMBOL(native_restore_fl)
+diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
+index bc429365b72a..8bc68cfc0d33 100644
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -119,7 +119,7 @@ static void free_ldt_struct(struct ldt_struct *ldt)
+ * we do not have to muck with descriptors here, that is
+ * done in switch_mm() as needed.
+ */
+-int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
++int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
+ {
+ struct ldt_struct *new_ldt;
+ struct mm_struct *old_mm;
+@@ -160,7 +160,7 @@ out_unlock:
+ *
+ * 64bit: Don't touch the LDT register - we're already in the next thread.
+ */
+-void destroy_context(struct mm_struct *mm)
++void destroy_context_ldt(struct mm_struct *mm)
+ {
+ free_ldt_struct(mm->context.ldt);
+ mm->context.ldt = NULL;
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
+index 7c5c5dc90ffa..e18c8798c3a2 100644
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -31,6 +31,7 @@
+ #include <asm/tlbflush.h>
+ #include <asm/mce.h>
+ #include <asm/vm86.h>
++#include <asm/spec-ctrl.h>
+
+ /*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+@@ -130,11 +131,6 @@ void flush_thread(void)
+ fpu__clear(&tsk->thread.fpu);
+ }
+
+-static void hard_disable_TSC(void)
+-{
+- cr4_set_bits(X86_CR4_TSD);
+-}
+-
+ void disable_TSC(void)
+ {
+ preempt_disable();
+@@ -143,15 +139,10 @@ void disable_TSC(void)
+ * Must flip the CPU state synchronously with
+ * TIF_NOTSC in the current running context.
+ */
+- hard_disable_TSC();
++ cr4_set_bits(X86_CR4_TSD);
+ preempt_enable();
+ }
+
+-static void hard_enable_TSC(void)
+-{
+- cr4_clear_bits(X86_CR4_TSD);
+-}
+-
+ static void enable_TSC(void)
+ {
+ preempt_disable();
+@@ -160,7 +151,7 @@ static void enable_TSC(void)
+ * Must flip the CPU state synchronously with
+ * TIF_NOTSC in the current running context.
+ */
+- hard_enable_TSC();
++ cr4_clear_bits(X86_CR4_TSD);
+ preempt_enable();
+ }
+
+@@ -188,48 +179,199 @@ int set_tsc_mode(unsigned int val)
+ return 0;
+ }
+
+-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+- struct tss_struct *tss)
++static inline void switch_to_bitmap(struct tss_struct *tss,
++ struct thread_struct *prev,
++ struct thread_struct *next,
++ unsigned long tifp, unsigned long tifn)
+ {
+- struct thread_struct *prev, *next;
+-
+- prev = &prev_p->thread;
+- next = &next_p->thread;
+-
+- if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
+- test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
+- unsigned long debugctl = get_debugctlmsr();
+-
+- debugctl &= ~DEBUGCTLMSR_BTF;
+- if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
+- debugctl |= DEBUGCTLMSR_BTF;
+-
+- update_debugctlmsr(debugctl);
+- }
+-
+- if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
+- test_tsk_thread_flag(next_p, TIF_NOTSC)) {
+- /* prev and next are different */
+- if (test_tsk_thread_flag(next_p, TIF_NOTSC))
+- hard_disable_TSC();
+- else
+- hard_enable_TSC();
+- }
+-
+- if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
++ if (tifn & _TIF_IO_BITMAP) {
+ /*
+ * Copy the relevant range of the IO bitmap.
+ * Normally this is 128 bytes or less:
+ */
+ memcpy(tss->io_bitmap, next->io_bitmap_ptr,
+ max(prev->io_bitmap_max, next->io_bitmap_max));
+- } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
++ } else if (tifp & _TIF_IO_BITMAP) {
+ /*
+ * Clear any possible leftover bits:
+ */
+ memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
+ }
++}
++
++#ifdef CONFIG_SMP
++
++struct ssb_state {
++ struct ssb_state *shared_state;
++ raw_spinlock_t lock;
++ unsigned int disable_state;
++ unsigned long local_state;
++};
++
++#define LSTATE_SSB 0
++
++static DEFINE_PER_CPU(struct ssb_state, ssb_state);
++
++void speculative_store_bypass_ht_init(void)
++{
++ struct ssb_state *st = this_cpu_ptr(&ssb_state);
++ unsigned int this_cpu = smp_processor_id();
++ unsigned int cpu;
++
++ st->local_state = 0;
++
++ /*
++ * Shared state setup happens once on the first bringup
++ * of the CPU. It's not destroyed on CPU hotunplug.
++ */
++ if (st->shared_state)
++ return;
++
++ raw_spin_lock_init(&st->lock);
++
++ /*
++ * Go over HT siblings and check whether one of them has set up the
++ * shared state pointer already.
++ */
++ for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) {
++ if (cpu == this_cpu)
++ continue;
++
++ if (!per_cpu(ssb_state, cpu).shared_state)
++ continue;
++
++ /* Link it to the state of the sibling: */
++ st->shared_state = per_cpu(ssb_state, cpu).shared_state;
++ return;
++ }
++
++ /*
++ * First HT sibling to come up on the core. Link shared state of
++ * the first HT sibling to itself. The siblings on the same core
++ * which come up later will see the shared state pointer and link
++ * themself to the state of this CPU.
++ */
++ st->shared_state = st;
++}
++
++/*
++ * Logic is: First HT sibling enables SSBD for both siblings in the core
++ * and last sibling to disable it, disables it for the whole core. This how
++ * MSR_SPEC_CTRL works in "hardware":
++ *
++ * CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL
++ */
++static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
++{
++ struct ssb_state *st = this_cpu_ptr(&ssb_state);
++ u64 msr = x86_amd_ls_cfg_base;
++
++ if (!static_cpu_has(X86_FEATURE_ZEN)) {
++ msr |= ssbd_tif_to_amd_ls_cfg(tifn);
++ wrmsrl(MSR_AMD64_LS_CFG, msr);
++ return;
++ }
++
++ if (tifn & _TIF_SSBD) {
++ /*
++ * Since this can race with prctl(), block reentry on the
++ * same CPU.
++ */
++ if (__test_and_set_bit(LSTATE_SSB, &st->local_state))
++ return;
++
++ msr |= x86_amd_ls_cfg_ssbd_mask;
++
++ raw_spin_lock(&st->shared_state->lock);
++ /* First sibling enables SSBD: */
++ if (!st->shared_state->disable_state)
++ wrmsrl(MSR_AMD64_LS_CFG, msr);
++ st->shared_state->disable_state++;
++ raw_spin_unlock(&st->shared_state->lock);
++ } else {
++ if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state))
++ return;
++
++ raw_spin_lock(&st->shared_state->lock);
++ st->shared_state->disable_state--;
++ if (!st->shared_state->disable_state)
++ wrmsrl(MSR_AMD64_LS_CFG, msr);
++ raw_spin_unlock(&st->shared_state->lock);
++ }
++}
++#else
++static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
++{
++ u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn);
++
++ wrmsrl(MSR_AMD64_LS_CFG, msr);
++}
++#endif
++
++static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
++{
++ /*
++ * SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL,
++ * so ssbd_tif_to_spec_ctrl() just works.
++ */
++ wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
++}
++
++static __always_inline void intel_set_ssb_state(unsigned long tifn)
++{
++ u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
++
++ wrmsrl(MSR_IA32_SPEC_CTRL, msr);
++}
++
++static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
++{
++ if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
++ amd_set_ssb_virt_state(tifn);
++ else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
++ amd_set_core_ssb_state(tifn);
++ else
++ intel_set_ssb_state(tifn);
++}
++
++void speculative_store_bypass_update(unsigned long tif)
++{
++ preempt_disable();
++ __speculative_store_bypass_update(tif);
++ preempt_enable();
++}
++
++void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
++ struct tss_struct *tss)
++{
++ struct thread_struct *prev, *next;
++ unsigned long tifp, tifn;
++
++ prev = &prev_p->thread;
++ next = &next_p->thread;
++
++ tifn = READ_ONCE(task_thread_info(next_p)->flags);
++ tifp = READ_ONCE(task_thread_info(prev_p)->flags);
++ switch_to_bitmap(tss, prev, next, tifp, tifn);
++
+ propagate_user_return_notify(prev_p, next_p);
++
++ if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) &&
++ arch_has_block_step()) {
++ unsigned long debugctl, msk;
++
++ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
++ debugctl &= ~DEBUGCTLMSR_BTF;
++ msk = tifn & _TIF_BLOCKSTEP;
++ debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
++ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
++ }
++
++ if ((tifp ^ tifn) & _TIF_NOTSC)
++ cr4_toggle_bits(X86_CR4_TSD);
++
++ if ((tifp ^ tifn) & _TIF_SSBD)
++ __speculative_store_bypass_update(tifn);
+ }
+
+ /*
+diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
+index 1f7aefc7b0b4..c017f1c71560 100644
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -75,6 +75,7 @@
+ #include <asm/i8259.h>
+ #include <asm/realmode.h>
+ #include <asm/misc.h>
++#include <asm/spec-ctrl.h>
+
+ /* Number of siblings per CPU package */
+ int smp_num_siblings = 1;
+@@ -217,6 +218,8 @@ static void notrace start_secondary(void *unused)
+ */
+ check_tsc_sync_target();
+
++ speculative_store_bypass_ht_init();
++
+ /*
+ * Lock vector_lock and initialize the vectors on this cpu
+ * before setting the cpu online. We must set it online with
+@@ -1209,6 +1212,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
+ set_mtrr_aps_delayed_init();
+
+ smp_quirk_init_udelay();
++
++ speculative_store_bypass_ht_init();
+ }
+
+ void arch_enable_nonboot_cpus_begin(void)
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index 42654375b73f..df7827a981dd 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -37,7 +37,7 @@
+ #include <asm/desc.h>
+ #include <asm/debugreg.h>
+ #include <asm/kvm_para.h>
+-#include <asm/nospec-branch.h>
++#include <asm/spec-ctrl.h>
+
+ #include <asm/virtext.h>
+ #include "trace.h"
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 63c44a9bf6bb..18143886b186 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -48,7 +48,7 @@
+ #include <asm/kexec.h>
+ #include <asm/apic.h>
+ #include <asm/irq_remapping.h>
+-#include <asm/nospec-branch.h>
++#include <asm/spec-ctrl.h>
+
+ #include "trace.h"
+ #include "pmu.h"
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 7cad01af6dcd..6d683bbb3502 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -10,6 +10,7 @@
+
+ #include <asm/tlbflush.h>
+ #include <asm/mmu_context.h>
++#include <asm/nospec-branch.h>
+ #include <asm/cache.h>
+ #include <asm/apic.h>
+ #include <asm/uv/uv.h>
+@@ -29,6 +30,8 @@
+ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
+ */
+
++atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
++
+ struct flush_tlb_info {
+ struct mm_struct *flush_mm;
+ unsigned long flush_start;
+@@ -104,6 +107,36 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ unsigned cpu = smp_processor_id();
+
+ if (likely(prev != next)) {
++ u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
++
++ /*
++ * Avoid user/user BTB poisoning by flushing the branch
++ * predictor when switching between processes. This stops
++ * one process from doing Spectre-v2 attacks on another.
++ *
++ * As an optimization, flush indirect branches only when
++ * switching into processes that disable dumping. This
++ * protects high value processes like gpg, without having
++ * too high performance overhead. IBPB is *expensive*!
++ *
++ * This will not flush branches when switching into kernel
++ * threads. It will also not flush if we switch to idle
++ * thread and back to the same process. It will flush if we
++ * switch to a different non-dumpable process.
++ */
++ if (tsk && tsk->mm &&
++ tsk->mm->context.ctx_id != last_ctx_id &&
++ get_dumpable(tsk->mm) != SUID_DUMP_USER)
++ indirect_branch_prediction_barrier();
++
++ /*
++ * Record last user mm's context id, so we can avoid
++ * flushing branch buffer with IBPB if we switch back
++ * to the same user.
++ */
++ if (next != &init_mm)
++ this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
++
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ this_cpu_write(cpu_tlbstate.active_mm, next);
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
+index a0ac0f9c307f..f5a8cd96bae4 100644
+--- a/arch/x86/platform/efi/efi_64.c
++++ b/arch/x86/platform/efi/efi_64.c
+@@ -40,6 +40,7 @@
+ #include <asm/fixmap.h>
+ #include <asm/realmode.h>
+ #include <asm/time.h>
++#include <asm/nospec-branch.h>
+
+ /*
+ * We allocate runtime services regions bottom-up, starting from -4G, i.e.
+@@ -347,6 +348,7 @@ extern efi_status_t efi64_thunk(u32, ...);
+ \
+ efi_sync_low_kernel_mappings(); \
+ local_irq_save(flags); \
++ firmware_restrict_branch_speculation_start(); \
+ \
+ efi_scratch.prev_cr3 = read_cr3(); \
+ write_cr3((unsigned long)efi_scratch.efi_pgt); \
+@@ -357,6 +359,7 @@ extern efi_status_t efi64_thunk(u32, ...);
+ \
+ write_cr3(efi_scratch.prev_cr3); \
+ __flush_tlb_all(); \
++ firmware_restrict_branch_speculation_end(); \
+ local_irq_restore(flags); \
+ \
+ __s; \
+diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
+index cbef64b508e1..82fd84d5e1aa 100644
+--- a/arch/x86/xen/enlighten.c
++++ b/arch/x86/xen/enlighten.c
+@@ -460,6 +460,12 @@ static void __init xen_init_cpuid_mask(void)
+ cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32));
+ }
+
++static void __init xen_init_capabilities(void)
++{
++ if (xen_pv_domain())
++ setup_force_cpu_cap(X86_FEATURE_XENPV);
++}
++
+ static void xen_set_debugreg(int reg, unsigned long val)
+ {
+ HYPERVISOR_set_debugreg(reg, val);
+@@ -1587,6 +1593,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
+
+ xen_init_irq_ops();
+ xen_init_cpuid_mask();
++ xen_init_capabilities();
+
+ #ifdef CONFIG_X86_LOCAL_APIC
+ /*
+@@ -1883,14 +1890,6 @@ bool xen_hvm_need_lapic(void)
+ }
+ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
+
+-static void xen_set_cpu_features(struct cpuinfo_x86 *c)
+-{
+- if (xen_pv_domain()) {
+- clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+- set_cpu_cap(c, X86_FEATURE_XENPV);
+- }
+-}
+-
+ const struct hypervisor_x86 x86_hyper_xen = {
+ .name = "Xen",
+ .detect = xen_platform,
+@@ -1898,7 +1897,6 @@ const struct hypervisor_x86 x86_hyper_xen = {
+ .init_platform = xen_hvm_guest_init,
+ #endif
+ .x2apic_available = xen_x2apic_para_available,
+- .set_cpu_features = xen_set_cpu_features,
+ };
+ EXPORT_SYMBOL(x86_hyper_xen);
+
+diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
+index 3f4ebf0261f2..29e50d1229bc 100644
+--- a/arch/x86/xen/smp.c
++++ b/arch/x86/xen/smp.c
+@@ -28,6 +28,7 @@
+ #include <xen/interface/vcpu.h>
+ #include <xen/interface/xenpmu.h>
+
++#include <asm/spec-ctrl.h>
+ #include <asm/xen/interface.h>
+ #include <asm/xen/hypercall.h>
+
+@@ -87,6 +88,8 @@ static void cpu_bringup(void)
+ cpu_data(cpu).x86_max_cores = 1;
+ set_cpu_sibling_map(cpu);
+
++ speculative_store_bypass_ht_init();
++
+ xen_setup_cpu_clockevents();
+
+ notify_cpu_starting(cpu);
+@@ -357,6 +360,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
+ }
+ set_cpu_sibling_map(0);
+
++ speculative_store_bypass_ht_init();
++
+ xen_pmu_init(0);
+
+ if (xen_smp_intr_init(0))
+diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
+index 7f664c416faf..4ecd0de08557 100644
+--- a/arch/x86/xen/suspend.c
++++ b/arch/x86/xen/suspend.c
+@@ -1,11 +1,14 @@
+ #include <linux/types.h>
+ #include <linux/tick.h>
++#include <linux/percpu-defs.h>
+
+ #include <xen/xen.h>
+ #include <xen/interface/xen.h>
+ #include <xen/grant_table.h>
+ #include <xen/events.h>
+
++#include <asm/cpufeatures.h>
++#include <asm/msr-index.h>
+ #include <asm/xen/hypercall.h>
+ #include <asm/xen/page.h>
+ #include <asm/fixmap.h>
+@@ -68,6 +71,8 @@ static void xen_pv_post_suspend(int suspend_cancelled)
+ xen_mm_unpin_all();
+ }
+
++static DEFINE_PER_CPU(u64, spec_ctrl);
++
+ void xen_arch_pre_suspend(void)
+ {
+ if (xen_pv_domain())
+@@ -84,6 +89,9 @@ void xen_arch_post_suspend(int cancelled)
+
+ static void xen_vcpu_notify_restore(void *data)
+ {
++ if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++ wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl));
++
+ /* Boot processor notified via generic timekeeping_resume() */
+ if (smp_processor_id() == 0)
+ return;
+@@ -93,7 +101,15 @@ static void xen_vcpu_notify_restore(void *data)
+
+ static void xen_vcpu_notify_suspend(void *data)
+ {
++ u64 tmp;
++
+ tick_suspend_local();
++
++ if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
++ rdmsrl(MSR_IA32_SPEC_CTRL, tmp);
++ this_cpu_write(spec_ctrl, tmp);
++ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
++ }
+ }
+
+ void xen_arch_resume(void)
+diff --git a/block/blk-core.c b/block/blk-core.c
+index f5f1a55703ae..50d77c90070d 100644
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -651,21 +651,17 @@ EXPORT_SYMBOL(blk_alloc_queue);
+ int blk_queue_enter(struct request_queue *q, gfp_t gfp)
+ {
+ while (true) {
+- int ret;
+-
+ if (percpu_ref_tryget_live(&q->q_usage_counter))
+ return 0;
+
+ if (!gfpflags_allow_blocking(gfp))
+ return -EBUSY;
+
+- ret = wait_event_interruptible(q->mq_freeze_wq,
+- !atomic_read(&q->mq_freeze_depth) ||
+- blk_queue_dying(q));
++ wait_event(q->mq_freeze_wq,
++ !atomic_read(&q->mq_freeze_depth) ||
++ blk_queue_dying(q));
+ if (blk_queue_dying(q))
+ return -ENODEV;
+- if (ret)
+- return ret;
+ }
+ }
+
+diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
+index 3db71afbba93..143edea1076f 100644
+--- a/drivers/base/cpu.c
++++ b/drivers/base/cpu.c
+@@ -518,14 +518,22 @@ ssize_t __weak cpu_show_spectre_v2(struct device *dev,
+ return sprintf(buf, "Not affected\n");
+ }
+
++ssize_t __weak cpu_show_spec_store_bypass(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ return sprintf(buf, "Not affected\n");
++}
++
+ static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+ static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+ static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
++static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
+
+ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+ &dev_attr_meltdown.attr,
+ &dev_attr_spectre_v1.attr,
+ &dev_attr_spectre_v2.attr,
++ &dev_attr_spec_store_bypass.attr,
+ NULL
+ };
+
+diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c
+index 8c41c6fcb9ee..acf83569f86f 100644
+--- a/drivers/clk/tegra/clk-tegra30.c
++++ b/drivers/clk/tegra/clk-tegra30.c
+@@ -333,11 +333,11 @@ static struct pdiv_map pllu_p[] = {
+ };
+
+ static struct tegra_clk_pll_freq_table pll_u_freq_table[] = {
+- { 12000000, 480000000, 960, 12, 0, 12},
+- { 13000000, 480000000, 960, 13, 0, 12},
+- { 16800000, 480000000, 400, 7, 0, 5},
+- { 19200000, 480000000, 200, 4, 0, 3},
+- { 26000000, 480000000, 960, 26, 0, 12},
++ { 12000000, 480000000, 960, 12, 2, 12 },
++ { 13000000, 480000000, 960, 13, 2, 12 },
++ { 16800000, 480000000, 400, 7, 2, 5 },
++ { 19200000, 480000000, 200, 4, 2, 3 },
++ { 26000000, 480000000, 960, 26, 2, 12 },
+ { 0, 0, 0, 0, 0, 0 },
+ };
+
+@@ -1372,6 +1372,7 @@ static struct tegra_clk_init_table init_table[] __initdata = {
+ {TEGRA30_CLK_GR2D, TEGRA30_CLK_PLL_C, 300000000, 0},
+ {TEGRA30_CLK_GR3D, TEGRA30_CLK_PLL_C, 300000000, 0},
+ {TEGRA30_CLK_GR3D2, TEGRA30_CLK_PLL_C, 300000000, 0},
++ { TEGRA30_CLK_PLL_U, TEGRA30_CLK_CLK_MAX, 480000000, 0 },
+ {TEGRA30_CLK_CLK_MAX, TEGRA30_CLK_CLK_MAX, 0, 0}, /* This MUST be the last entry. */
+ };
+
+diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c
+index c1aaf0336cf2..5cde3ad1665e 100644
+--- a/drivers/mtd/ubi/attach.c
++++ b/drivers/mtd/ubi/attach.c
+@@ -174,6 +174,40 @@ static int add_corrupted(struct ubi_attach_info *ai, int pnum, int ec)
+ return 0;
+ }
+
++/**
++ * add_fastmap - add a Fastmap related physical eraseblock.
++ * @ai: attaching information
++ * @pnum: physical eraseblock number the VID header came from
++ * @vid_hdr: the volume identifier header
++ * @ec: erase counter of the physical eraseblock
++ *
++ * This function allocates a 'struct ubi_ainf_peb' object for a Fastamp
++ * physical eraseblock @pnum and adds it to the 'fastmap' list.
++ * Such blocks can be Fastmap super and data blocks from both the most
++ * recent Fastmap we're attaching from or from old Fastmaps which will
++ * be erased.
++ */
++static int add_fastmap(struct ubi_attach_info *ai, int pnum,
++ struct ubi_vid_hdr *vid_hdr, int ec)
++{
++ struct ubi_ainf_peb *aeb;
++
++ aeb = kmem_cache_alloc(ai->aeb_slab_cache, GFP_KERNEL);
++ if (!aeb)
++ return -ENOMEM;
++
++ aeb->pnum = pnum;
++ aeb->vol_id = be32_to_cpu(vidh->vol_id);
++ aeb->sqnum = be64_to_cpu(vidh->sqnum);
++ aeb->ec = ec;
++ list_add(&aeb->u.list, &ai->fastmap);
++
++ dbg_bld("add to fastmap list: PEB %d, vol_id %d, sqnum: %llu", pnum,
++ aeb->vol_id, aeb->sqnum);
++
++ return 0;
++}
++
+ /**
+ * validate_vid_hdr - check volume identifier header.
+ * @ubi: UBI device description object
+@@ -803,13 +837,26 @@ out_unlock:
+ return err;
+ }
+
++static bool vol_ignored(int vol_id)
++{
++ switch (vol_id) {
++ case UBI_LAYOUT_VOLUME_ID:
++ return true;
++ }
++
++#ifdef CONFIG_MTD_UBI_FASTMAP
++ return ubi_is_fm_vol(vol_id);
++#else
++ return false;
++#endif
++}
++
+ /**
+ * scan_peb - scan and process UBI headers of a PEB.
+ * @ubi: UBI device description object
+ * @ai: attaching information
+ * @pnum: the physical eraseblock number
+- * @vid: The volume ID of the found volume will be stored in this pointer
+- * @sqnum: The sqnum of the found volume will be stored in this pointer
++ * @fast: true if we're scanning for a Fastmap
+ *
+ * This function reads UBI headers of PEB @pnum, checks them, and adds
+ * information about this PEB to the corresponding list or RB-tree in the
+@@ -817,9 +864,9 @@ out_unlock:
+ * successfully handled and a negative error code in case of failure.
+ */
+ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
+- int pnum, int *vid, unsigned long long *sqnum)
++ int pnum, bool fast)
+ {
+- long long uninitialized_var(ec);
++ long long ec;
+ int err, bitflips = 0, vol_id = -1, ec_err = 0;
+
+ dbg_bld("scan PEB %d", pnum);
+@@ -935,6 +982,20 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
+ */
+ ai->maybe_bad_peb_count += 1;
+ case UBI_IO_BAD_HDR:
++ /*
++ * If we're facing a bad VID header we have to drop *all*
++ * Fastmap data structures we find. The most recent Fastmap
++ * could be bad and therefore there is a chance that we attach
++ * from an old one. On a fine MTD stack a PEB must not render
++ * bad all of a sudden, but the reality is different.
++ * So, let's be paranoid and help finding the root cause by
++ * falling back to scanning mode instead of attaching with a
++ * bad EBA table and cause data corruption which is hard to
++ * analyze.
++ */
++ if (fast)
++ ai->force_full_scan = 1;
++
+ if (ec_err)
+ /*
+ * Both headers are corrupted. There is a possibility
+@@ -991,21 +1052,15 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
+ }
+
+ vol_id = be32_to_cpu(vidh->vol_id);
+- if (vid)
+- *vid = vol_id;
+- if (sqnum)
+- *sqnum = be64_to_cpu(vidh->sqnum);
+- if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOLUME_ID) {
++ if (vol_id > UBI_MAX_VOLUMES && !vol_ignored(vol_id)) {
+ int lnum = be32_to_cpu(vidh->lnum);
+
+ /* Unsupported internal volume */
+ switch (vidh->compat) {
+ case UBI_COMPAT_DELETE:
+- if (vol_id != UBI_FM_SB_VOLUME_ID
+- && vol_id != UBI_FM_DATA_VOLUME_ID) {
+- ubi_msg(ubi, "\"delete\" compatible internal volume %d:%d found, will remove it",
+- vol_id, lnum);
+- }
++ ubi_msg(ubi, "\"delete\" compatible internal volume %d:%d found, will remove it",
++ vol_id, lnum);
++
+ err = add_to_list(ai, pnum, vol_id, lnum,
+ ec, 1, &ai->erase);
+ if (err)
+@@ -1037,7 +1092,12 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
+ if (ec_err)
+ ubi_warn(ubi, "valid VID header but corrupted EC header at PEB %d",
+ pnum);
+- err = ubi_add_to_av(ubi, ai, pnum, ec, vidh, bitflips);
++
++ if (ubi_is_fm_vol(vol_id))
++ err = add_fastmap(ai, pnum, vidh, ec);
++ else
++ err = ubi_add_to_av(ubi, ai, pnum, ec, vidh, bitflips);
++
+ if (err)
+ return err;
+
+@@ -1186,6 +1246,10 @@ static void destroy_ai(struct ubi_attach_info *ai)
+ list_del(&aeb->u.list);
+ kmem_cache_free(ai->aeb_slab_cache, aeb);
+ }
++ list_for_each_entry_safe(aeb, aeb_tmp, &ai->fastmap, u.list) {
++ list_del(&aeb->u.list);
++ kmem_cache_free(ai->aeb_slab_cache, aeb);
++ }
+
+ /* Destroy the volume RB-tree */
+ rb = ai->volumes.rb_node;
+@@ -1245,7 +1309,7 @@ static int scan_all(struct ubi_device *ubi, struct ubi_attach_info *ai,
+ cond_resched();
+
+ dbg_gen("process PEB %d", pnum);
+- err = scan_peb(ubi, ai, pnum, NULL, NULL);
++ err = scan_peb(ubi, ai, pnum, false);
+ if (err < 0)
+ goto out_vidh;
+ }
+@@ -1311,6 +1375,7 @@ static struct ubi_attach_info *alloc_ai(void)
+ INIT_LIST_HEAD(&ai->free);
+ INIT_LIST_HEAD(&ai->erase);
+ INIT_LIST_HEAD(&ai->alien);
++ INIT_LIST_HEAD(&ai->fastmap);
+ ai->volumes = RB_ROOT;
+ ai->aeb_slab_cache = kmem_cache_create("ubi_aeb_slab_cache",
+ sizeof(struct ubi_ainf_peb),
+@@ -1337,52 +1402,58 @@ static struct ubi_attach_info *alloc_ai(void)
+ */
+ static int scan_fast(struct ubi_device *ubi, struct ubi_attach_info **ai)
+ {
+- int err, pnum, fm_anchor = -1;
+- unsigned long long max_sqnum = 0;
++ int err, pnum;
++ struct ubi_attach_info *scan_ai;
+
+ err = -ENOMEM;
+
++ scan_ai = alloc_ai();
++ if (!scan_ai)
++ goto out;
++
+ ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL);
+ if (!ech)
+- goto out;
++ goto out_ai;
+
+ vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);
+ if (!vidh)
+ goto out_ech;
+
+ for (pnum = 0; pnum < UBI_FM_MAX_START; pnum++) {
+- int vol_id = -1;
+- unsigned long long sqnum = -1;
+ cond_resched();
+
+ dbg_gen("process PEB %d", pnum);
+- err = scan_peb(ubi, *ai, pnum, &vol_id, &sqnum);
++ err = scan_peb(ubi, scan_ai, pnum, true);
+ if (err < 0)
+ goto out_vidh;
+-
+- if (vol_id == UBI_FM_SB_VOLUME_ID && sqnum > max_sqnum) {
+- max_sqnum = sqnum;
+- fm_anchor = pnum;
+- }
+ }
+
+ ubi_free_vid_hdr(ubi, vidh);
+ kfree(ech);
+
+- if (fm_anchor < 0)
+- return UBI_NO_FASTMAP;
++ if (scan_ai->force_full_scan)
++ err = UBI_NO_FASTMAP;
++ else
++ err = ubi_scan_fastmap(ubi, *ai, scan_ai);
+
+- destroy_ai(*ai);
+- *ai = alloc_ai();
+- if (!*ai)
+- return -ENOMEM;
++ if (err) {
++ /*
++ * Didn't attach via fastmap, do a full scan but reuse what
++ * we've aready scanned.
++ */
++ destroy_ai(*ai);
++ *ai = scan_ai;
++ } else
++ destroy_ai(scan_ai);
+
+- return ubi_scan_fastmap(ubi, *ai, fm_anchor);
++ return err;
+
+ out_vidh:
+ ubi_free_vid_hdr(ubi, vidh);
+ out_ech:
+ kfree(ech);
++out_ai:
++ destroy_ai(scan_ai);
+ out:
+ return err;
+ }
+diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
+index c4a25c858c07..03cf0553ec1b 100644
+--- a/drivers/mtd/ubi/eba.c
++++ b/drivers/mtd/ubi/eba.c
+@@ -1178,6 +1178,8 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
+ struct ubi_volume *vol;
+ uint32_t crc;
+
++ ubi_assert(rwsem_is_locked(&ubi->fm_eba_sem));
++
+ vol_id = be32_to_cpu(vid_hdr->vol_id);
+ lnum = be32_to_cpu(vid_hdr->lnum);
+
+@@ -1346,9 +1348,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
+ }
+
+ ubi_assert(vol->eba_tbl[lnum] == from);
+- down_read(&ubi->fm_eba_sem);
+ vol->eba_tbl[lnum] = to;
+- up_read(&ubi->fm_eba_sem);
+
+ out_unlock_buf:
+ mutex_unlock(&ubi->buf_mutex);
+diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c
+index ed62f1efe6eb..69dd21679a30 100644
+--- a/drivers/mtd/ubi/fastmap-wl.c
++++ b/drivers/mtd/ubi/fastmap-wl.c
+@@ -262,6 +262,8 @@ static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi)
+ struct ubi_fm_pool *pool = &ubi->fm_wl_pool;
+ int pnum;
+
++ ubi_assert(rwsem_is_locked(&ubi->fm_eba_sem));
++
+ if (pool->used == pool->size) {
+ /* We cannot update the fastmap here because this
+ * function is called in atomic context.
+@@ -303,7 +305,7 @@ int ubi_ensure_anchor_pebs(struct ubi_device *ubi)
+
+ wrk->anchor = 1;
+ wrk->func = &wear_leveling_worker;
+- schedule_ubi_work(ubi, wrk);
++ __schedule_ubi_work(ubi, wrk);
+ return 0;
+ }
+
+@@ -344,7 +346,7 @@ int ubi_wl_put_fm_peb(struct ubi_device *ubi, struct ubi_wl_entry *fm_e,
+ spin_unlock(&ubi->wl_lock);
+
+ vol_id = lnum ? UBI_FM_DATA_VOLUME_ID : UBI_FM_SB_VOLUME_ID;
+- return schedule_erase(ubi, e, vol_id, lnum, torture);
++ return schedule_erase(ubi, e, vol_id, lnum, torture, true);
+ }
+
+ /**
+diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c
+index bba7dd1b5ebf..72e89b352034 100644
+--- a/drivers/mtd/ubi/fastmap.c
++++ b/drivers/mtd/ubi/fastmap.c
+@@ -326,6 +326,7 @@ static int update_vol(struct ubi_device *ubi, struct ubi_attach_info *ai,
+ aeb->pnum = new_aeb->pnum;
+ aeb->copy_flag = new_vh->copy_flag;
+ aeb->scrub = new_aeb->scrub;
++ aeb->sqnum = new_aeb->sqnum;
+ kmem_cache_free(ai->aeb_slab_cache, new_aeb);
+
+ /* new_aeb is older */
+@@ -850,28 +851,58 @@ fail:
+ return ret;
+ }
+
++/**
++ * find_fm_anchor - find the most recent Fastmap superblock (anchor)
++ * @ai: UBI attach info to be filled
++ */
++static int find_fm_anchor(struct ubi_attach_info *ai)
++{
++ int ret = -1;
++ struct ubi_ainf_peb *aeb;
++ unsigned long long max_sqnum = 0;
++
++ list_for_each_entry(aeb, &ai->fastmap, u.list) {
++ if (aeb->vol_id == UBI_FM_SB_VOLUME_ID && aeb->sqnum > max_sqnum) {
++ max_sqnum = aeb->sqnum;
++ ret = aeb->pnum;
++ }
++ }
++
++ return ret;
++}
++
+ /**
+ * ubi_scan_fastmap - scan the fastmap.
+ * @ubi: UBI device object
+ * @ai: UBI attach info to be filled
+- * @fm_anchor: The fastmap starts at this PEB
++ * @scan_ai: UBI attach info from the first 64 PEBs,
++ * used to find the most recent Fastmap data structure
+ *
+ * Returns 0 on success, UBI_NO_FASTMAP if no fastmap was found,
+ * UBI_BAD_FASTMAP if one was found but is not usable.
+ * < 0 indicates an internal error.
+ */
+ int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai,
+- int fm_anchor)
++ struct ubi_attach_info *scan_ai)
+ {
+ struct ubi_fm_sb *fmsb, *fmsb2;
+ struct ubi_vid_hdr *vh;
+ struct ubi_ec_hdr *ech;
+ struct ubi_fastmap_layout *fm;
+- int i, used_blocks, pnum, ret = 0;
++ struct ubi_ainf_peb *tmp_aeb, *aeb;
++ int i, used_blocks, pnum, fm_anchor, ret = 0;
+ size_t fm_size;
+ __be32 crc, tmp_crc;
+ unsigned long long sqnum = 0;
+
++ fm_anchor = find_fm_anchor(scan_ai);
++ if (fm_anchor < 0)
++ return UBI_NO_FASTMAP;
++
++ /* Move all (possible) fastmap blocks into our new attach structure. */
++ list_for_each_entry_safe(aeb, tmp_aeb, &scan_ai->fastmap, u.list)
++ list_move_tail(&aeb->u.list, &ai->fastmap);
++
+ down_write(&ubi->fm_protect);
+ memset(ubi->fm_buf, 0, ubi->fm_size);
+
+@@ -1484,22 +1515,30 @@ int ubi_update_fastmap(struct ubi_device *ubi)
+ struct ubi_wl_entry *tmp_e;
+
+ down_write(&ubi->fm_protect);
++ down_write(&ubi->work_sem);
++ down_write(&ubi->fm_eba_sem);
+
+ ubi_refill_pools(ubi);
+
+ if (ubi->ro_mode || ubi->fm_disabled) {
++ up_write(&ubi->fm_eba_sem);
++ up_write(&ubi->work_sem);
+ up_write(&ubi->fm_protect);
+ return 0;
+ }
+
+ ret = ubi_ensure_anchor_pebs(ubi);
+ if (ret) {
++ up_write(&ubi->fm_eba_sem);
++ up_write(&ubi->work_sem);
+ up_write(&ubi->fm_protect);
+ return ret;
+ }
+
+ new_fm = kzalloc(sizeof(*new_fm), GFP_KERNEL);
+ if (!new_fm) {
++ up_write(&ubi->fm_eba_sem);
++ up_write(&ubi->work_sem);
+ up_write(&ubi->fm_protect);
+ return -ENOMEM;
+ }
+@@ -1608,16 +1647,14 @@ int ubi_update_fastmap(struct ubi_device *ubi)
+ new_fm->e[0] = tmp_e;
+ }
+
+- down_write(&ubi->work_sem);
+- down_write(&ubi->fm_eba_sem);
+ ret = ubi_write_fastmap(ubi, new_fm);
+- up_write(&ubi->fm_eba_sem);
+- up_write(&ubi->work_sem);
+
+ if (ret)
+ goto err;
+
+ out_unlock:
++ up_write(&ubi->fm_eba_sem);
++ up_write(&ubi->work_sem);
+ up_write(&ubi->fm_protect);
+ kfree(old_fm);
+ return ret;
+diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
+index de1ea2e4c37d..05d9ec66437c 100644
+--- a/drivers/mtd/ubi/ubi.h
++++ b/drivers/mtd/ubi/ubi.h
+@@ -699,6 +699,8 @@ struct ubi_ainf_volume {
+ * @erase: list of physical eraseblocks which have to be erased
+ * @alien: list of physical eraseblocks which should not be used by UBI (e.g.,
+ * those belonging to "preserve"-compatible internal volumes)
++ * @fastmap: list of physical eraseblocks which relate to fastmap (e.g.,
++ * eraseblocks of the current and not yet erased old fastmap blocks)
+ * @corr_peb_count: count of PEBs in the @corr list
+ * @empty_peb_count: count of PEBs which are presumably empty (contain only
+ * 0xFF bytes)
+@@ -709,6 +711,8 @@ struct ubi_ainf_volume {
+ * @vols_found: number of volumes found
+ * @highest_vol_id: highest volume ID
+ * @is_empty: flag indicating whether the MTD device is empty or not
++ * @force_full_scan: flag indicating whether we need to do a full scan and drop
++ all existing Fastmap data structures
+ * @min_ec: lowest erase counter value
+ * @max_ec: highest erase counter value
+ * @max_sqnum: highest sequence number value
+@@ -727,6 +731,7 @@ struct ubi_attach_info {
+ struct list_head free;
+ struct list_head erase;
+ struct list_head alien;
++ struct list_head fastmap;
+ int corr_peb_count;
+ int empty_peb_count;
+ int alien_peb_count;
+@@ -735,6 +740,7 @@ struct ubi_attach_info {
+ int vols_found;
+ int highest_vol_id;
+ int is_empty;
++ int force_full_scan;
+ int min_ec;
+ int max_ec;
+ unsigned long long max_sqnum;
+@@ -907,7 +913,7 @@ int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb,
+ size_t ubi_calc_fm_size(struct ubi_device *ubi);
+ int ubi_update_fastmap(struct ubi_device *ubi);
+ int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai,
+- int fm_anchor);
++ struct ubi_attach_info *scan_ai);
+ #else
+ static inline int ubi_update_fastmap(struct ubi_device *ubi) { return 0; }
+ #endif
+@@ -1101,4 +1107,42 @@ static inline int idx2vol_id(const struct ubi_device *ubi, int idx)
+ return idx;
+ }
+
++/**
++ * ubi_is_fm_vol - check whether a volume ID is a Fastmap volume.
++ * @vol_id: volume ID
++ */
++static inline bool ubi_is_fm_vol(int vol_id)
++{
++ switch (vol_id) {
++ case UBI_FM_SB_VOLUME_ID:
++ case UBI_FM_DATA_VOLUME_ID:
++ return true;
++ }
++
++ return false;
++}
++
++/**
++ * ubi_find_fm_block - check whether a PEB is part of the current Fastmap.
++ * @ubi: UBI device description object
++ * @pnum: physical eraseblock to look for
++ *
++ * This function returns a wear leveling object if @pnum relates to the current
++ * fastmap, @NULL otherwise.
++ */
++static inline struct ubi_wl_entry *ubi_find_fm_block(const struct ubi_device *ubi,
++ int pnum)
++{
++ int i;
++
++ if (ubi->fm) {
++ for (i = 0; i < ubi->fm->used_blocks; i++) {
++ if (ubi->fm->e[i]->pnum == pnum)
++ return ubi->fm->e[i];
++ }
++ }
++
++ return NULL;
++}
++
+ #endif /* !__UBI_UBI_H__ */
+diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
+index ca9746f41ff1..b3c1b8106a68 100644
+--- a/drivers/mtd/ubi/wl.c
++++ b/drivers/mtd/ubi/wl.c
+@@ -580,7 +580,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
+ * failure.
+ */
+ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
+- int vol_id, int lnum, int torture)
++ int vol_id, int lnum, int torture, bool nested)
+ {
+ struct ubi_work *wl_wrk;
+
+@@ -599,7 +599,10 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
+ wl_wrk->lnum = lnum;
+ wl_wrk->torture = torture;
+
+- schedule_ubi_work(ubi, wl_wrk);
++ if (nested)
++ __schedule_ubi_work(ubi, wl_wrk);
++ else
++ schedule_ubi_work(ubi, wl_wrk);
+ return 0;
+ }
+
+@@ -658,6 +661,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
+ if (!vid_hdr)
+ return -ENOMEM;
+
++ down_read(&ubi->fm_eba_sem);
+ mutex_lock(&ubi->move_mutex);
+ spin_lock(&ubi->wl_lock);
+ ubi_assert(!ubi->move_from && !ubi->move_to);
+@@ -884,6 +888,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
+
+ dbg_wl("done");
+ mutex_unlock(&ubi->move_mutex);
++ up_read(&ubi->fm_eba_sem);
+ return 0;
+
+ /*
+@@ -925,6 +930,7 @@ out_not_moved:
+ }
+
+ mutex_unlock(&ubi->move_mutex);
++ up_read(&ubi->fm_eba_sem);
+ return 0;
+
+ out_error:
+@@ -946,6 +952,7 @@ out_error:
+ out_ro:
+ ubi_ro_mode(ubi);
+ mutex_unlock(&ubi->move_mutex);
++ up_read(&ubi->fm_eba_sem);
+ ubi_assert(err != 0);
+ return err < 0 ? err : -EIO;
+
+@@ -953,6 +960,7 @@ out_cancel:
+ ubi->wl_scheduled = 0;
+ spin_unlock(&ubi->wl_lock);
+ mutex_unlock(&ubi->move_mutex);
++ up_read(&ubi->fm_eba_sem);
+ ubi_free_vid_hdr(ubi, vid_hdr);
+ return 0;
+ }
+@@ -1075,7 +1083,7 @@ static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk)
+ int err1;
+
+ /* Re-schedule the LEB for erasure */
+- err1 = schedule_erase(ubi, e, vol_id, lnum, 0);
++ err1 = schedule_erase(ubi, e, vol_id, lnum, 0, false);
+ if (err1) {
+ wl_entry_destroy(ubi, e);
+ err = err1;
+@@ -1256,7 +1264,7 @@ retry:
+ }
+ spin_unlock(&ubi->wl_lock);
+
+- err = schedule_erase(ubi, e, vol_id, lnum, torture);
++ err = schedule_erase(ubi, e, vol_id, lnum, torture, false);
+ if (err) {
+ spin_lock(&ubi->wl_lock);
+ wl_tree_add(e, &ubi->used);
+@@ -1500,6 +1508,46 @@ static void shutdown_work(struct ubi_device *ubi)
+ }
+ }
+
++/**
++ * erase_aeb - erase a PEB given in UBI attach info PEB
++ * @ubi: UBI device description object
++ * @aeb: UBI attach info PEB
++ * @sync: If true, erase synchronously. Otherwise schedule for erasure
++ */
++static int erase_aeb(struct ubi_device *ubi, struct ubi_ainf_peb *aeb, bool sync)
++{
++ struct ubi_wl_entry *e;
++ int err;
++
++ e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
++ if (!e)
++ return -ENOMEM;
++
++ e->pnum = aeb->pnum;
++ e->ec = aeb->ec;
++ ubi->lookuptbl[e->pnum] = e;
++
++ if (sync) {
++ err = sync_erase(ubi, e, false);
++ if (err)
++ goto out_free;
++
++ wl_tree_add(e, &ubi->free);
++ ubi->free_count++;
++ } else {
++ err = schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false);
++ if (err)
++ goto out_free;
++ }
++
++ return 0;
++
++out_free:
++ wl_entry_destroy(ubi, e);
++
++ return err;
++}
++
+ /**
+ * ubi_wl_init - initialize the WL sub-system using attaching information.
+ * @ubi: UBI device description object
+@@ -1537,17 +1585,9 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai)
+ list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) {
+ cond_resched();
+
+- e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
+- if (!e)
+- goto out_free;
+-
+- e->pnum = aeb->pnum;
+- e->ec = aeb->ec;
+- ubi->lookuptbl[e->pnum] = e;
+- if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) {
+- wl_entry_destroy(ubi, e);
++ err = erase_aeb(ubi, aeb, false);
++ if (err)
+ goto out_free;
+- }
+
+ found_pebs++;
+ }
+@@ -1598,19 +1638,49 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai)
+ }
+ }
+
+- dbg_wl("found %i PEBs", found_pebs);
++ list_for_each_entry(aeb, &ai->fastmap, u.list) {
++ cond_resched();
+
+- if (ubi->fm) {
+- ubi_assert(ubi->good_peb_count ==
+- found_pebs + ubi->fm->used_blocks);
++ e = ubi_find_fm_block(ubi, aeb->pnum);
+
+- for (i = 0; i < ubi->fm->used_blocks; i++) {
+- e = ubi->fm->e[i];
++ if (e) {
++ ubi_assert(!ubi->lookuptbl[e->pnum]);
+ ubi->lookuptbl[e->pnum] = e;
++ } else {
++ bool sync = false;
++
++ /*
++ * Usually old Fastmap PEBs are scheduled for erasure
++ * and we don't have to care about them but if we face
++ * an power cut before scheduling them we need to
++ * take care of them here.
++ */
++ if (ubi->lookuptbl[aeb->pnum])
++ continue;
++
++ /*
++ * The fastmap update code might not find a free PEB for
++ * writing the fastmap anchor to and then reuses the
++ * current fastmap anchor PEB. When this PEB gets erased
++ * and a power cut happens before it is written again we
++ * must make sure that the fastmap attach code doesn't
++ * find any outdated fastmap anchors, hence we erase the
++ * outdated fastmap anchor PEBs synchronously here.
++ */
++ if (aeb->vol_id == UBI_FM_SB_VOLUME_ID)
++ sync = true;
++
++ err = erase_aeb(ubi, aeb, sync);
++ if (err)
++ goto out_free;
+ }
++
++ found_pebs++;
+ }
+- else
+- ubi_assert(ubi->good_peb_count == found_pebs);
++
++ dbg_wl("found %i PEBs", found_pebs);
++
++ ubi_assert(ubi->good_peb_count == found_pebs);
+
+ reserved_pebs = WL_RESERVED_PEBS;
+ ubi_fastmap_init(ubi, &reserved_pebs);
+diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
+index 1325825d5225..ce3a56bea6e6 100644
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -9278,6 +9278,15 @@ static int tg3_chip_reset(struct tg3 *tp)
+
+ tg3_restore_clk(tp);
+
++ /* Increase the core clock speed to fix tx timeout issue for 5762
++ * with 100Mbps link speed.
++ */
++ if (tg3_asic_rev(tp) == ASIC_REV_5762) {
++ val = tr32(TG3_CPMU_CLCK_ORIDE_ENABLE);
++ tw32(TG3_CPMU_CLCK_ORIDE_ENABLE, val |
++ TG3_CPMU_MAC_ORIDE_ENABLE);
++ }
++
+ /* Reprobe ASF enable state. */
+ tg3_flag_clear(tp, ENABLE_ASF);
+ tp->phy_flags &= ~(TG3_PHYFLG_1G_ON_VAUX_OK |
+diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
+index 8179727d3423..1f2f25a71d18 100644
+--- a/drivers/net/phy/phy_device.c
++++ b/drivers/net/phy/phy_device.c
+@@ -1265,11 +1265,8 @@ static int gen10g_resume(struct phy_device *phydev)
+
+ static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
+ {
+- /* The default values for phydev->supported are provided by the PHY
+- * driver "features" member, we want to reset to sane defaults first
+- * before supporting higher speeds.
+- */
+- phydev->supported &= PHY_DEFAULT_FEATURES;
++ phydev->supported &= ~(PHY_1000BT_FEATURES | PHY_100BT_FEATURES |
++ PHY_10BT_FEATURES);
+
+ switch (max_speed) {
+ default:
+diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
+index da7bae991552..d877ff124365 100644
+--- a/drivers/ptp/ptp_chardev.c
++++ b/drivers/ptp/ptp_chardev.c
+@@ -88,6 +88,7 @@ int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin,
+ case PTP_PF_PHYSYNC:
+ if (chan != 0)
+ return -EINVAL;
++ break;
+ default:
+ return -EINVAL;
+ }
+diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
+index f2e9f59c90d6..2d837b6bd495 100644
+--- a/drivers/usb/host/xhci.c
++++ b/drivers/usb/host/xhci.c
+@@ -887,6 +887,41 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci)
+ spin_unlock_irqrestore(&xhci->lock, flags);
+ }
+
++static bool xhci_pending_portevent(struct xhci_hcd *xhci)
++{
++ __le32 __iomem **port_array;
++ int port_index;
++ u32 status;
++ u32 portsc;
++
++ status = readl(&xhci->op_regs->status);
++ if (status & STS_EINT)
++ return true;
++ /*
++ * Checking STS_EINT is not enough as there is a lag between a change
++ * bit being set and the Port Status Change Event that it generated
++ * being written to the Event Ring. See note in xhci 1.1 section 4.19.2.
++ */
++
++ port_index = xhci->num_usb2_ports;
++ port_array = xhci->usb2_ports;
++ while (port_index--) {
++ portsc = readl(port_array[port_index]);
++ if (portsc & PORT_CHANGE_MASK ||
++ (portsc & PORT_PLS_MASK) == XDEV_RESUME)
++ return true;
++ }
++ port_index = xhci->num_usb3_ports;
++ port_array = xhci->usb3_ports;
++ while (port_index--) {
++ portsc = readl(port_array[port_index]);
++ if (portsc & PORT_CHANGE_MASK ||
++ (portsc & PORT_PLS_MASK) == XDEV_RESUME)
++ return true;
++ }
++ return false;
++}
++
+ /*
+ * Stop HC (not bus-specific)
+ *
+@@ -983,7 +1018,7 @@ EXPORT_SYMBOL_GPL(xhci_suspend);
+ */
+ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
+ {
+- u32 command, temp = 0, status;
++ u32 command, temp = 0;
+ struct usb_hcd *hcd = xhci_to_hcd(xhci);
+ struct usb_hcd *secondary_hcd;
+ int retval = 0;
+@@ -1105,8 +1140,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
+ done:
+ if (retval == 0) {
+ /* Resume root hubs only when have pending events. */
+- status = readl(&xhci->op_regs->status);
+- if (status & STS_EINT) {
++ if (xhci_pending_portevent(xhci)) {
+ usb_hcd_resume_root_hub(xhci->shared_hcd);
+ usb_hcd_resume_root_hub(hcd);
+ }
+diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
+index 1715705acc59..84d8871755b7 100644
+--- a/drivers/usb/host/xhci.h
++++ b/drivers/usb/host/xhci.h
+@@ -382,6 +382,10 @@ struct xhci_op_regs {
+ #define PORT_PLC (1 << 22)
+ /* port configure error change - port failed to configure its link partner */
+ #define PORT_CEC (1 << 23)
++#define PORT_CHANGE_MASK (PORT_CSC | PORT_PEC | PORT_WRC | PORT_OCC | \
++ PORT_RC | PORT_PLC | PORT_CEC)
++
++
+ /* Cold Attach Status - xHC can set this bit to report device attached during
+ * Sx state. Warm port reset should be perfomed to clear this bit and move port
+ * to connected state.
+diff --git a/fs/fat/inode.c b/fs/fat/inode.c
+index cf644d52c0cf..c81cfb79a339 100644
+--- a/fs/fat/inode.c
++++ b/fs/fat/inode.c
+@@ -613,13 +613,21 @@ static void fat_set_state(struct super_block *sb,
+ brelse(bh);
+ }
+
++static void fat_reset_iocharset(struct fat_mount_options *opts)
++{
++ if (opts->iocharset != fat_default_iocharset) {
++ /* Note: opts->iocharset can be NULL here */
++ kfree(opts->iocharset);
++ opts->iocharset = fat_default_iocharset;
++ }
++}
++
+ static void delayed_free(struct rcu_head *p)
+ {
+ struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu);
+ unload_nls(sbi->nls_disk);
+ unload_nls(sbi->nls_io);
+- if (sbi->options.iocharset != fat_default_iocharset)
+- kfree(sbi->options.iocharset);
++ fat_reset_iocharset(&sbi->options);
+ kfree(sbi);
+ }
+
+@@ -1034,7 +1042,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
+ opts->fs_fmask = opts->fs_dmask = current_umask();
+ opts->allow_utime = -1;
+ opts->codepage = fat_default_codepage;
+- opts->iocharset = fat_default_iocharset;
++ fat_reset_iocharset(opts);
+ if (is_vfat) {
+ opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95;
+ opts->rodir = 0;
+@@ -1184,8 +1192,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
+
+ /* vfat specific */
+ case Opt_charset:
+- if (opts->iocharset != fat_default_iocharset)
+- kfree(opts->iocharset);
++ fat_reset_iocharset(opts);
+ iocharset = match_strdup(&args[0]);
+ if (!iocharset)
+ return -ENOMEM;
+@@ -1776,8 +1783,7 @@ out_fail:
+ iput(fat_inode);
+ unload_nls(sbi->nls_io);
+ unload_nls(sbi->nls_disk);
+- if (sbi->options.iocharset != fat_default_iocharset)
+- kfree(sbi->options.iocharset);
++ fat_reset_iocharset(&sbi->options);
+ sb->s_fs_info = NULL;
+ kfree(sbi);
+ return error;
+diff --git a/fs/proc/array.c b/fs/proc/array.c
+index b6c00ce0e29e..cb71cbae606d 100644
+--- a/fs/proc/array.c
++++ b/fs/proc/array.c
+@@ -79,6 +79,7 @@
+ #include <linux/delayacct.h>
+ #include <linux/seq_file.h>
+ #include <linux/pid_namespace.h>
++#include <linux/prctl.h>
+ #include <linux/ptrace.h>
+ #include <linux/tracehook.h>
+ #include <linux/string_helpers.h>
+@@ -332,6 +333,31 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
+ #ifdef CONFIG_SECCOMP
+ seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode);
+ #endif
++ seq_printf(m, "\nSpeculation_Store_Bypass:\t");
++ switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
++ case -EINVAL:
++ seq_printf(m, "unknown");
++ break;
++ case PR_SPEC_NOT_AFFECTED:
++ seq_printf(m, "not vulnerable");
++ break;
++ case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE:
++ seq_printf(m, "thread force mitigated");
++ break;
++ case PR_SPEC_PRCTL | PR_SPEC_DISABLE:
++ seq_printf(m, "thread mitigated");
++ break;
++ case PR_SPEC_PRCTL | PR_SPEC_ENABLE:
++ seq_printf(m, "thread vulnerable");
++ break;
++ case PR_SPEC_DISABLE:
++ seq_printf(m, "globally mitigated");
++ break;
++ default:
++ seq_printf(m, "vulnerable");
++ break;
++ }
++ seq_putc(m, '\n');
+ }
+
+ static inline void task_context_switch_counts(struct seq_file *m,
+diff --git a/include/linux/cpu.h b/include/linux/cpu.h
+index 7e04bcd9af8e..2f9d12022100 100644
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -46,6 +46,8 @@ extern ssize_t cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf);
+ extern ssize_t cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf);
++extern ssize_t cpu_show_spec_store_bypass(struct device *dev,
++ struct device_attribute *attr, char *buf);
+
+ extern __printf(4, 5)
+ struct device *cpu_device_create(struct device *parent, void *drvdata,
+diff --git a/include/linux/nospec.h b/include/linux/nospec.h
+index e791ebc65c9c..0c5ef54fd416 100644
+--- a/include/linux/nospec.h
++++ b/include/linux/nospec.h
+@@ -7,6 +7,8 @@
+ #define _LINUX_NOSPEC_H
+ #include <asm/barrier.h>
+
++struct task_struct;
++
+ /**
+ * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
+ * @index: array element index
+@@ -55,4 +57,12 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
+ \
+ (typeof(_i)) (_i & _mask); \
+ })
++
++/* Speculation control prctl */
++int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which);
++int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
++ unsigned long ctrl);
++/* Speculation control for seccomp enforced mitigation */
++void arch_seccomp_spec_mitigate(struct task_struct *task);
++
+ #endif /* _LINUX_NOSPEC_H */
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 90bea398e5e0..725498cc5d30 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -2167,6 +2167,8 @@ static inline void memalloc_noio_restore(unsigned int flags)
+ #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
+ #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
+ #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
++#define PFA_SPEC_SSB_DISABLE 4 /* Speculative Store Bypass disabled */
++#define PFA_SPEC_SSB_FORCE_DISABLE 5 /* Speculative Store Bypass force disabled*/
+
+
+ #define TASK_PFA_TEST(name, func) \
+@@ -2190,6 +2192,13 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
+ TASK_PFA_SET(SPREAD_SLAB, spread_slab)
+ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
+
++TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
++TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
++TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
++
++TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
++TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
++
+ /*
+ * task->jobctl flags
+ */
+diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
+index 2296e6b2f690..5a53d34bba26 100644
+--- a/include/linux/seccomp.h
++++ b/include/linux/seccomp.h
+@@ -3,7 +3,8 @@
+
+ #include <uapi/linux/seccomp.h>
+
+-#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC)
++#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \
++ SECCOMP_FILTER_FLAG_SPEC_ALLOW)
+
+ #ifdef CONFIG_SECCOMP
+
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index a6da214d0584..c28bd8be290a 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -514,6 +514,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
+ * @hash: the packet hash
+ * @queue_mapping: Queue mapping for multiqueue devices
+ * @xmit_more: More SKBs are pending for this queue
++ * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves
+ * @ndisc_nodetype: router type (from link layer)
+ * @ooo_okay: allow the mapping of a socket to a queue to be changed
+ * @l4_hash: indicate hash is a canonical 4-tuple hash over transport
+@@ -594,8 +595,8 @@ struct sk_buff {
+ fclone:2,
+ peeked:1,
+ head_frag:1,
+- xmit_more:1;
+- /* one bit hole */
++ xmit_more:1,
++ pfmemalloc:1;
+ kmemcheck_bitfield_end(flags1);
+
+ /* fields enclosed in headers_start/headers_end are copied
+@@ -615,19 +616,18 @@ struct sk_buff {
+
+ __u8 __pkt_type_offset[0];
+ __u8 pkt_type:3;
+- __u8 pfmemalloc:1;
+ __u8 ignore_df:1;
+ __u8 nfctinfo:3;
+-
+ __u8 nf_trace:1;
++
+ __u8 ip_summed:2;
+ __u8 ooo_okay:1;
+ __u8 l4_hash:1;
+ __u8 sw_hash:1;
+ __u8 wifi_acked_valid:1;
+ __u8 wifi_acked:1;
+-
+ __u8 no_fcs:1;
++
+ /* Indicates the inner headers are valid in the skbuff. */
+ __u8 encapsulation:1;
+ __u8 encap_hdr_csum:1;
+@@ -635,11 +635,11 @@ struct sk_buff {
+ __u8 csum_complete_sw:1;
+ __u8 csum_level:2;
+ __u8 csum_bad:1;
+-
+ #ifdef CONFIG_IPV6_NDISC_NODETYPE
+ __u8 ndisc_nodetype:2;
+ #endif
+ __u8 ipvs_property:1;
++
+ __u8 inner_protocol_type:1;
+ __u8 remcsum_offload:1;
+ /* 3 or 5 bit hole */
+diff --git a/include/net/ipv6.h b/include/net/ipv6.h
+index 84f0d0602433..0e01d570fa22 100644
+--- a/include/net/ipv6.h
++++ b/include/net/ipv6.h
+@@ -762,7 +762,7 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
+ * to minimize possbility that any useful information to an
+ * attacker is leaked. Only lower 20 bits are relevant.
+ */
+- rol32(hash, 16);
++ hash = rol32(hash, 16);
+
+ flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
+
+diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
+index a8d0759a9e40..64776b72e1eb 100644
+--- a/include/uapi/linux/prctl.h
++++ b/include/uapi/linux/prctl.h
+@@ -197,4 +197,16 @@ struct prctl_mm_map {
+ # define PR_CAP_AMBIENT_LOWER 3
+ # define PR_CAP_AMBIENT_CLEAR_ALL 4
+
++/* Per task speculation control */
++#define PR_GET_SPECULATION_CTRL 52
++#define PR_SET_SPECULATION_CTRL 53
++/* Speculation control variants */
++# define PR_SPEC_STORE_BYPASS 0
++/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
++# define PR_SPEC_NOT_AFFECTED 0
++# define PR_SPEC_PRCTL (1UL << 0)
++# define PR_SPEC_ENABLE (1UL << 1)
++# define PR_SPEC_DISABLE (1UL << 2)
++# define PR_SPEC_FORCE_DISABLE (1UL << 3)
++
+ #endif /* _LINUX_PRCTL_H */
+diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
+index 0f238a43ff1e..e4acb615792b 100644
+--- a/include/uapi/linux/seccomp.h
++++ b/include/uapi/linux/seccomp.h
+@@ -15,7 +15,9 @@
+ #define SECCOMP_SET_MODE_FILTER 1
+
+ /* Valid flags for SECCOMP_SET_MODE_FILTER */
+-#define SECCOMP_FILTER_FLAG_TSYNC 1
++#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
++/* In v4.14+ SECCOMP_FILTER_FLAG_LOG is (1UL << 1) */
++#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
+
+ /*
+ * All BPF programs must return a 32-bit value.
+diff --git a/kernel/seccomp.c b/kernel/seccomp.c
+index efd384f3f852..9a9203b15cde 100644
+--- a/kernel/seccomp.c
++++ b/kernel/seccomp.c
+@@ -16,6 +16,8 @@
+ #include <linux/atomic.h>
+ #include <linux/audit.h>
+ #include <linux/compat.h>
++#include <linux/nospec.h>
++#include <linux/prctl.h>
+ #include <linux/sched.h>
+ #include <linux/seccomp.h>
+ #include <linux/slab.h>
+@@ -214,8 +216,11 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
+ return true;
+ }
+
++void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { }
++
+ static inline void seccomp_assign_mode(struct task_struct *task,
+- unsigned long seccomp_mode)
++ unsigned long seccomp_mode,
++ unsigned long flags)
+ {
+ assert_spin_locked(&task->sighand->siglock);
+
+@@ -225,6 +230,9 @@ static inline void seccomp_assign_mode(struct task_struct *task,
+ * filter) is set.
+ */
+ smp_mb__before_atomic();
++ /* Assume default seccomp processes want spec flaw mitigation. */
++ if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0)
++ arch_seccomp_spec_mitigate(task);
+ set_tsk_thread_flag(task, TIF_SECCOMP);
+ }
+
+@@ -292,7 +300,7 @@ static inline pid_t seccomp_can_sync_threads(void)
+ * without dropping the locks.
+ *
+ */
+-static inline void seccomp_sync_threads(void)
++static inline void seccomp_sync_threads(unsigned long flags)
+ {
+ struct task_struct *thread, *caller;
+
+@@ -333,7 +341,8 @@ static inline void seccomp_sync_threads(void)
+ * allow one thread to transition the other.
+ */
+ if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
+- seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
++ seccomp_assign_mode(thread, SECCOMP_MODE_FILTER,
++ flags);
+ }
+ }
+
+@@ -452,7 +461,7 @@ static long seccomp_attach_filter(unsigned int flags,
+
+ /* Now that the new filter is in place, synchronize to all threads. */
+ if (flags & SECCOMP_FILTER_FLAG_TSYNC)
+- seccomp_sync_threads();
++ seccomp_sync_threads(flags);
+
+ return 0;
+ }
+@@ -747,7 +756,7 @@ static long seccomp_set_mode_strict(void)
+ #ifdef TIF_NOTSC
+ disable_TSC();
+ #endif
+- seccomp_assign_mode(current, seccomp_mode);
++ seccomp_assign_mode(current, seccomp_mode, 0);
+ ret = 0;
+
+ out:
+@@ -805,7 +814,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
+ /* Do not free the successfully attached filter. */
+ prepared = NULL;
+
+- seccomp_assign_mode(current, seccomp_mode);
++ seccomp_assign_mode(current, seccomp_mode, flags);
+ out:
+ spin_unlock_irq(&current->sighand->siglock);
+ if (flags & SECCOMP_FILTER_FLAG_TSYNC)
+diff --git a/kernel/sys.c b/kernel/sys.c
+index 6624919ef0e7..f718742e55e6 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -2075,6 +2075,17 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
+ }
+ #endif
+
++int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which)
++{
++ return -EINVAL;
++}
++
++int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
++ unsigned long ctrl)
++{
++ return -EINVAL;
++}
++
+ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
+ unsigned long, arg4, unsigned long, arg5)
+ {
+@@ -2269,6 +2280,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
+ case PR_GET_FP_MODE:
+ error = GET_FP_MODE(me);
+ break;
++ case PR_GET_SPECULATION_CTRL:
++ if (arg3 || arg4 || arg5)
++ return -EINVAL;
++ error = arch_prctl_spec_ctrl_get(me, arg2);
++ break;
++ case PR_SET_SPECULATION_CTRL:
++ if (arg4 || arg5)
++ return -EINVAL;
++ error = arch_prctl_spec_ctrl_set(me, arg2, arg3);
++ break;
+ default:
+ error = -EINVAL;
+ break;
+diff --git a/lib/rhashtable.c b/lib/rhashtable.c
+index 51282f579760..37ea94b636a3 100644
+--- a/lib/rhashtable.c
++++ b/lib/rhashtable.c
+@@ -670,8 +670,16 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
+
+ static size_t rounded_hashtable_size(const struct rhashtable_params *params)
+ {
+- return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
+- (unsigned long)params->min_size);
++ size_t retsize;
++
++ if (params->nelem_hint)
++ retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
++ (unsigned long)params->min_size);
++ else
++ retsize = max(HASH_DEFAULT_SIZE,
++ (unsigned long)params->min_size);
++
++ return retsize;
+ }
+
+ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
+@@ -728,8 +736,6 @@ int rhashtable_init(struct rhashtable *ht,
+ struct bucket_table *tbl;
+ size_t size;
+
+- size = HASH_DEFAULT_SIZE;
+-
+ if ((!params->key_len && !params->obj_hashfn) ||
+ (params->obj_hashfn && !params->obj_cmpfn))
+ return -EINVAL;
+@@ -756,8 +762,7 @@ int rhashtable_init(struct rhashtable *ht,
+
+ ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
+
+- if (params->nelem_hint)
+- size = rounded_hashtable_size(&ht->p);
++ size = rounded_hashtable_size(&ht->p);
+
+ /* The maximum (not average) chain length grows with the
+ * size of the hash table, at a rate of (log N)/(log log N).
+diff --git a/mm/memcontrol.c b/mm/memcontrol.c
+index 55a9facb8e8d..9a8e688724b1 100644
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -996,7 +996,7 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
+ int nid, zid;
+ int i;
+
+- while ((memcg = parent_mem_cgroup(memcg))) {
++ for (; memcg; memcg = parent_mem_cgroup(memcg)) {
+ for_each_node(nid) {
+ for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+ mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index fa02c680eebc..55be076706e5 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -828,6 +828,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
+ n->cloned = 1;
+ n->nohdr = 0;
+ n->peeked = 0;
++ C(pfmemalloc);
+ n->destructor = NULL;
+ C(tail);
+ C(end);
+diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
+index c9e68ff48a72..8f05816a8be2 100644
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -297,6 +297,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
+ if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
+ struct flowi4 fl4 = {
+ .flowi4_iif = LOOPBACK_IFINDEX,
++ .flowi4_oif = l3mdev_master_ifindex_rcu(dev),
+ .daddr = ip_hdr(skb)->saddr,
+ .flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
+ .flowi4_scope = scope,
+diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
+index 75abf978ef30..da90c74d12ef 100644
+--- a/net/ipv4/sysctl_net_ipv4.c
++++ b/net/ipv4/sysctl_net_ipv4.c
+@@ -141,8 +141,9 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
+ if (write && ret == 0) {
+ low = make_kgid(user_ns, urange[0]);
+ high = make_kgid(user_ns, urange[1]);
+- if (!gid_valid(low) || !gid_valid(high) ||
+- (urange[1] < urange[0]) || gid_lt(high, low)) {
++ if (!gid_valid(low) || !gid_valid(high))
++ return -EINVAL;
++ if (urange[1] < urange[0] || gid_lt(high, low)) {
+ low = make_kgid(&init_user_ns, 1);
+ high = make_kgid(&init_user_ns, 0);
+ }
+diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
+index 16f8124b1150..59111cadaec2 100644
+--- a/sound/core/rawmidi.c
++++ b/sound/core/rawmidi.c
+@@ -635,7 +635,7 @@ static int snd_rawmidi_info_select_user(struct snd_card *card,
+ int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream,
+ struct snd_rawmidi_params * params)
+ {
+- char *newbuf;
++ char *newbuf, *oldbuf;
+ struct snd_rawmidi_runtime *runtime = substream->runtime;
+
+ if (substream->append && substream->use_count > 1)
+@@ -648,13 +648,17 @@ int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream,
+ return -EINVAL;
+ }
+ if (params->buffer_size != runtime->buffer_size) {
+- newbuf = krealloc(runtime->buffer, params->buffer_size,
+- GFP_KERNEL);
++ newbuf = kmalloc(params->buffer_size, GFP_KERNEL);
+ if (!newbuf)
+ return -ENOMEM;
++ spin_lock_irq(&runtime->lock);
++ oldbuf = runtime->buffer;
+ runtime->buffer = newbuf;
+ runtime->buffer_size = params->buffer_size;
+ runtime->avail = runtime->buffer_size;
++ runtime->appl_ptr = runtime->hw_ptr = 0;
++ spin_unlock_irq(&runtime->lock);
++ kfree(oldbuf);
+ }
+ runtime->avail_min = params->avail_min;
+ substream->active_sensing = !params->no_active_sensing;
+@@ -665,7 +669,7 @@ EXPORT_SYMBOL(snd_rawmidi_output_params);
+ int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream,
+ struct snd_rawmidi_params * params)
+ {
+- char *newbuf;
++ char *newbuf, *oldbuf;
+ struct snd_rawmidi_runtime *runtime = substream->runtime;
+
+ snd_rawmidi_drain_input(substream);
+@@ -676,12 +680,16 @@ int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream,
+ return -EINVAL;
+ }
+ if (params->buffer_size != runtime->buffer_size) {
+- newbuf = krealloc(runtime->buffer, params->buffer_size,
+- GFP_KERNEL);
++ newbuf = kmalloc(params->buffer_size, GFP_KERNEL);
+ if (!newbuf)
+ return -ENOMEM;
++ spin_lock_irq(&runtime->lock);
++ oldbuf = runtime->buffer;
+ runtime->buffer = newbuf;
+ runtime->buffer_size = params->buffer_size;
++ runtime->appl_ptr = runtime->hw_ptr = 0;
++ spin_unlock_irq(&runtime->lock);
++ kfree(oldbuf);
+ }
+ runtime->avail_min = params->avail_min;
+ return 0;
+diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
+index 882fe83a3554..b3f345433ec7 100644
+--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
++++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
+@@ -1476,15 +1476,19 @@ TEST_F(TRACE_syscall, syscall_dropped)
+ #define SECCOMP_SET_MODE_FILTER 1
+ #endif
+
+-#ifndef SECCOMP_FLAG_FILTER_TSYNC
+-#define SECCOMP_FLAG_FILTER_TSYNC 1
++#ifndef SECCOMP_FILTER_FLAG_TSYNC
++#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
++#endif
++
++#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
++#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
+ #endif
+
+ #ifndef seccomp
+-int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter)
++int seccomp(unsigned int op, unsigned int flags, void *args)
+ {
+ errno = 0;
+- return syscall(__NR_seccomp, op, flags, filter);
++ return syscall(__NR_seccomp, op, flags, args);
+ }
+ #endif
+
+@@ -1576,6 +1580,78 @@ TEST(seccomp_syscall_mode_lock)
+ }
+ }
+
++/*
++ * Test detection of known and unknown filter flags. Userspace needs to be able
++ * to check if a filter flag is supported by the current kernel and a good way
++ * of doing that is by attempting to enter filter mode, with the flag bit in
++ * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
++ * that the flag is valid and EINVAL indicates that the flag is invalid.
++ */
++TEST(detect_seccomp_filter_flags)
++{
++ unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
++ SECCOMP_FILTER_FLAG_SPEC_ALLOW };
++ unsigned int flag, all_flags;
++ int i;
++ long ret;
++
++ /* Test detection of known-good filter flags */
++ for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
++ int bits = 0;
++
++ flag = flags[i];
++ /* Make sure the flag is a single bit! */
++ while (flag) {
++ if (flag & 0x1)
++ bits ++;
++ flag >>= 1;
++ }
++ ASSERT_EQ(1, bits);
++ flag = flags[i];
++
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
++ ASSERT_NE(ENOSYS, errno) {
++ TH_LOG("Kernel does not support seccomp syscall!");
++ }
++ EXPECT_EQ(-1, ret);
++ EXPECT_EQ(EFAULT, errno) {
++ TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
++ flag);
++ }
++
++ all_flags |= flag;
++ }
++
++ /* Test detection of all known-good filter flags */
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL);
++ EXPECT_EQ(-1, ret);
++ EXPECT_EQ(EFAULT, errno) {
++ TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
++ all_flags);
++ }
++
++ /* Test detection of an unknown filter flag */
++ flag = -1;
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
++ EXPECT_EQ(-1, ret);
++ EXPECT_EQ(EINVAL, errno) {
++ TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
++ flag);
++ }
++
++ /*
++ * Test detection of an unknown filter flag that may simply need to be
++ * added to this test
++ */
++ flag = flags[ARRAY_SIZE(flags) - 1] << 1;
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
++ EXPECT_EQ(-1, ret);
++ EXPECT_EQ(EINVAL, errno) {
++ TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
++ flag);
++ }
++}
++
+ TEST(TSYNC_first)
+ {
+ struct sock_filter filter[] = {
+@@ -1592,7 +1668,7 @@ TEST(TSYNC_first)
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+@@ -1810,7 +1886,7 @@ TEST_F(TSYNC, two_siblings_with_ancestor)
+ self->sibling_count++;
+ }
+
+- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Could install filter on all threads!");
+@@ -1871,7 +1947,7 @@ TEST_F(TSYNC, two_siblings_with_no_filter)
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &self->apply_prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+@@ -1919,7 +1995,7 @@ TEST_F(TSYNC, two_siblings_with_one_divergence)
+ self->sibling_count++;
+ }
+
+- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(self->sibling[0].system_tid, ret) {
+ TH_LOG("Did not fail on diverged sibling.");
+@@ -1971,7 +2047,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter)
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+ }
+
+- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(ret, self->sibling[0].system_tid) {
+ TH_LOG("Did not fail on diverged sibling.");
+@@ -2000,7 +2076,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter)
+ /* Switch to the remaining sibling */
+ sib = !sib;
+
+- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Expected the remaining sibling to sync");
+@@ -2023,7 +2099,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter)
+ while (!kill(self->sibling[sib].system_tid, 0))
+ sleep(0.1);
+
+- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(0, ret); /* just us chickens */
+ }
+diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
+index 49001fa84ead..1203829316b2 100644
+--- a/virt/kvm/eventfd.c
++++ b/virt/kvm/eventfd.c
+@@ -119,8 +119,12 @@ irqfd_shutdown(struct work_struct *work)
+ {
+ struct kvm_kernel_irqfd *irqfd =
+ container_of(work, struct kvm_kernel_irqfd, shutdown);
++ struct kvm *kvm = irqfd->kvm;
+ u64 cnt;
+
++ /* Make sure irqfd has been initalized in assign path. */
++ synchronize_srcu(&kvm->irq_srcu);
++
+ /*
+ * Synchronize with the wait-queue and unhook ourselves to prevent
+ * further events.
+@@ -387,7 +391,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
+
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ irqfd_update(kvm, irqfd);
+- srcu_read_unlock(&kvm->irq_srcu, idx);
+
+ list_add_tail(&irqfd->list, &kvm->irqfds.items);
+
+@@ -419,6 +422,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
+ irqfd->consumer.token, ret);
+ #endif
+
++ srcu_read_unlock(&kvm->irq_srcu, idx);
+ return 0;
+
+ fail:
diff --git a/1144_linux-4.4.145.patch b/1144_linux-4.4.145.patch
new file mode 100644
index 00000000..f7b3f94d
--- /dev/null
+++ b/1144_linux-4.4.145.patch
@@ -0,0 +1,1006 @@
+diff --git a/Makefile b/Makefile
+index 63f3e2438a26..be31491a2d67 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 4
+-SUBLEVEL = 144
++SUBLEVEL = 145
+ EXTRAVERSION =
+ NAME = Blurry Fish Butt
+
+@@ -624,6 +624,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,)
+ KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation)
+ KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
+ KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context)
++KBUILD_CFLAGS += $(call cc-disable-warning, attribute-alias)
+
+ ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
+ KBUILD_CFLAGS += -Os
+diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
+index 35c9db857ebe..cd8b589111ba 100644
+--- a/arch/arm/include/asm/uaccess.h
++++ b/arch/arm/include/asm/uaccess.h
+@@ -251,7 +251,7 @@ extern int __put_user_8(void *, unsigned long long);
+ ({ \
+ unsigned long __limit = current_thread_info()->addr_limit - 1; \
+ const typeof(*(p)) __user *__tmp_p = (p); \
+- register const typeof(*(p)) __r2 asm("r2") = (x); \
++ register typeof(*(p)) __r2 asm("r2") = (x); \
+ register const typeof(*(p)) __user *__p asm("r0") = __tmp_p; \
+ register unsigned long __l asm("r1") = __limit; \
+ register int __e asm("r0"); \
+diff --git a/arch/mips/ath79/common.c b/arch/mips/ath79/common.c
+index 8ae4067a5eda..40ecb6e700cd 100644
+--- a/arch/mips/ath79/common.c
++++ b/arch/mips/ath79/common.c
+@@ -58,7 +58,7 @@ EXPORT_SYMBOL_GPL(ath79_ddr_ctrl_init);
+
+ void ath79_ddr_wb_flush(u32 reg)
+ {
+- void __iomem *flush_reg = ath79_ddr_wb_flush_base + reg;
++ void __iomem *flush_reg = ath79_ddr_wb_flush_base + (reg * 4);
+
+ /* Flush the DDR write buffer. */
+ __raw_writel(0x1, flush_reg);
+diff --git a/drivers/base/dd.c b/drivers/base/dd.c
+index a641cf3ccad6..1dffb018a7fe 100644
+--- a/drivers/base/dd.c
++++ b/drivers/base/dd.c
+@@ -304,14 +304,6 @@ static int really_probe(struct device *dev, struct device_driver *drv)
+ goto probe_failed;
+ }
+
+- /*
+- * Ensure devices are listed in devices_kset in correct order
+- * It's important to move Dev to the end of devices_kset before
+- * calling .probe, because it could be recursive and parent Dev
+- * should always go first
+- */
+- devices_kset_move_last(dev);
+-
+ if (dev->bus->probe) {
+ ret = dev->bus->probe(dev);
+ if (ret)
+diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
+index 51670b322409..700b98d9c250 100644
+--- a/drivers/net/can/xilinx_can.c
++++ b/drivers/net/can/xilinx_can.c
+@@ -2,6 +2,7 @@
+ *
+ * Copyright (C) 2012 - 2014 Xilinx, Inc.
+ * Copyright (C) 2009 PetaLogix. All rights reserved.
++ * Copyright (C) 2017 Sandvik Mining and Construction Oy
+ *
+ * Description:
+ * This driver is developed for Axi CAN IP and for Zynq CANPS Controller.
+@@ -25,8 +26,10 @@
+ #include <linux/module.h>
+ #include <linux/netdevice.h>
+ #include <linux/of.h>
++#include <linux/of_device.h>
+ #include <linux/platform_device.h>
+ #include <linux/skbuff.h>
++#include <linux/spinlock.h>
+ #include <linux/string.h>
+ #include <linux/types.h>
+ #include <linux/can/dev.h>
+@@ -100,7 +103,7 @@ enum xcan_reg {
+ #define XCAN_INTR_ALL (XCAN_IXR_TXOK_MASK | XCAN_IXR_BSOFF_MASK |\
+ XCAN_IXR_WKUP_MASK | XCAN_IXR_SLP_MASK | \
+ XCAN_IXR_RXNEMP_MASK | XCAN_IXR_ERROR_MASK | \
+- XCAN_IXR_ARBLST_MASK | XCAN_IXR_RXOK_MASK)
++ XCAN_IXR_RXOFLW_MASK | XCAN_IXR_ARBLST_MASK)
+
+ /* CAN register bit shift - XCAN_<REG>_<BIT>_SHIFT */
+ #define XCAN_BTR_SJW_SHIFT 7 /* Synchronous jump width */
+@@ -117,6 +120,7 @@ enum xcan_reg {
+ /**
+ * struct xcan_priv - This definition define CAN driver instance
+ * @can: CAN private data structure.
++ * @tx_lock: Lock for synchronizing TX interrupt handling
+ * @tx_head: Tx CAN packets ready to send on the queue
+ * @tx_tail: Tx CAN packets successfully sended on the queue
+ * @tx_max: Maximum number packets the driver can send
+@@ -131,6 +135,7 @@ enum xcan_reg {
+ */
+ struct xcan_priv {
+ struct can_priv can;
++ spinlock_t tx_lock;
+ unsigned int tx_head;
+ unsigned int tx_tail;
+ unsigned int tx_max;
+@@ -158,6 +163,11 @@ static const struct can_bittiming_const xcan_bittiming_const = {
+ .brp_inc = 1,
+ };
+
++#define XCAN_CAP_WATERMARK 0x0001
++struct xcan_devtype_data {
++ unsigned int caps;
++};
++
+ /**
+ * xcan_write_reg_le - Write a value to the device register little endian
+ * @priv: Driver private data structure
+@@ -237,6 +247,10 @@ static int set_reset_mode(struct net_device *ndev)
+ usleep_range(500, 10000);
+ }
+
++ /* reset clears FIFOs */
++ priv->tx_head = 0;
++ priv->tx_tail = 0;
++
+ return 0;
+ }
+
+@@ -391,6 +405,7 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+ struct net_device_stats *stats = &ndev->stats;
+ struct can_frame *cf = (struct can_frame *)skb->data;
+ u32 id, dlc, data[2] = {0, 0};
++ unsigned long flags;
+
+ if (can_dropped_invalid_skb(ndev, skb))
+ return NETDEV_TX_OK;
+@@ -438,6 +453,9 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+ data[1] = be32_to_cpup((__be32 *)(cf->data + 4));
+
+ can_put_echo_skb(skb, ndev, priv->tx_head % priv->tx_max);
++
++ spin_lock_irqsave(&priv->tx_lock, flags);
++
+ priv->tx_head++;
+
+ /* Write the Frame to Xilinx CAN TX FIFO */
+@@ -453,10 +471,16 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+ stats->tx_bytes += cf->can_dlc;
+ }
+
++ /* Clear TX-FIFO-empty interrupt for xcan_tx_interrupt() */
++ if (priv->tx_max > 1)
++ priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXFEMP_MASK);
++
+ /* Check if the TX buffer is full */
+ if ((priv->tx_head - priv->tx_tail) == priv->tx_max)
+ netif_stop_queue(ndev);
+
++ spin_unlock_irqrestore(&priv->tx_lock, flags);
++
+ return NETDEV_TX_OK;
+ }
+
+@@ -528,6 +552,123 @@ static int xcan_rx(struct net_device *ndev)
+ return 1;
+ }
+
++/**
++ * xcan_current_error_state - Get current error state from HW
++ * @ndev: Pointer to net_device structure
++ *
++ * Checks the current CAN error state from the HW. Note that this
++ * only checks for ERROR_PASSIVE and ERROR_WARNING.
++ *
++ * Return:
++ * ERROR_PASSIVE or ERROR_WARNING if either is active, ERROR_ACTIVE
++ * otherwise.
++ */
++static enum can_state xcan_current_error_state(struct net_device *ndev)
++{
++ struct xcan_priv *priv = netdev_priv(ndev);
++ u32 status = priv->read_reg(priv, XCAN_SR_OFFSET);
++
++ if ((status & XCAN_SR_ESTAT_MASK) == XCAN_SR_ESTAT_MASK)
++ return CAN_STATE_ERROR_PASSIVE;
++ else if (status & XCAN_SR_ERRWRN_MASK)
++ return CAN_STATE_ERROR_WARNING;
++ else
++ return CAN_STATE_ERROR_ACTIVE;
++}
++
++/**
++ * xcan_set_error_state - Set new CAN error state
++ * @ndev: Pointer to net_device structure
++ * @new_state: The new CAN state to be set
++ * @cf: Error frame to be populated or NULL
++ *
++ * Set new CAN error state for the device, updating statistics and
++ * populating the error frame if given.
++ */
++static void xcan_set_error_state(struct net_device *ndev,
++ enum can_state new_state,
++ struct can_frame *cf)
++{
++ struct xcan_priv *priv = netdev_priv(ndev);
++ u32 ecr = priv->read_reg(priv, XCAN_ECR_OFFSET);
++ u32 txerr = ecr & XCAN_ECR_TEC_MASK;
++ u32 rxerr = (ecr & XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT;
++
++ priv->can.state = new_state;
++
++ if (cf) {
++ cf->can_id |= CAN_ERR_CRTL;
++ cf->data[6] = txerr;
++ cf->data[7] = rxerr;
++ }
++
++ switch (new_state) {
++ case CAN_STATE_ERROR_PASSIVE:
++ priv->can.can_stats.error_passive++;
++ if (cf)
++ cf->data[1] = (rxerr > 127) ?
++ CAN_ERR_CRTL_RX_PASSIVE :
++ CAN_ERR_CRTL_TX_PASSIVE;
++ break;
++ case CAN_STATE_ERROR_WARNING:
++ priv->can.can_stats.error_warning++;
++ if (cf)
++ cf->data[1] |= (txerr > rxerr) ?
++ CAN_ERR_CRTL_TX_WARNING :
++ CAN_ERR_CRTL_RX_WARNING;
++ break;
++ case CAN_STATE_ERROR_ACTIVE:
++ if (cf)
++ cf->data[1] |= CAN_ERR_CRTL_ACTIVE;
++ break;
++ default:
++ /* non-ERROR states are handled elsewhere */
++ WARN_ON(1);
++ break;
++ }
++}
++
++/**
++ * xcan_update_error_state_after_rxtx - Update CAN error state after RX/TX
++ * @ndev: Pointer to net_device structure
++ *
++ * If the device is in a ERROR-WARNING or ERROR-PASSIVE state, check if
++ * the performed RX/TX has caused it to drop to a lesser state and set
++ * the interface state accordingly.
++ */
++static void xcan_update_error_state_after_rxtx(struct net_device *ndev)
++{
++ struct xcan_priv *priv = netdev_priv(ndev);
++ enum can_state old_state = priv->can.state;
++ enum can_state new_state;
++
++ /* changing error state due to successful frame RX/TX can only
++ * occur from these states
++ */
++ if (old_state != CAN_STATE_ERROR_WARNING &&
++ old_state != CAN_STATE_ERROR_PASSIVE)
++ return;
++
++ new_state = xcan_current_error_state(ndev);
++
++ if (new_state != old_state) {
++ struct sk_buff *skb;
++ struct can_frame *cf;
++
++ skb = alloc_can_err_skb(ndev, &cf);
++
++ xcan_set_error_state(ndev, new_state, skb ? cf : NULL);
++
++ if (skb) {
++ struct net_device_stats *stats = &ndev->stats;
++
++ stats->rx_packets++;
++ stats->rx_bytes += cf->can_dlc;
++ netif_rx(skb);
++ }
++ }
++}
++
+ /**
+ * xcan_err_interrupt - error frame Isr
+ * @ndev: net_device pointer
+@@ -543,16 +684,12 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr)
+ struct net_device_stats *stats = &ndev->stats;
+ struct can_frame *cf;
+ struct sk_buff *skb;
+- u32 err_status, status, txerr = 0, rxerr = 0;
++ u32 err_status;
+
+ skb = alloc_can_err_skb(ndev, &cf);
+
+ err_status = priv->read_reg(priv, XCAN_ESR_OFFSET);
+ priv->write_reg(priv, XCAN_ESR_OFFSET, err_status);
+- txerr = priv->read_reg(priv, XCAN_ECR_OFFSET) & XCAN_ECR_TEC_MASK;
+- rxerr = ((priv->read_reg(priv, XCAN_ECR_OFFSET) &
+- XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT);
+- status = priv->read_reg(priv, XCAN_SR_OFFSET);
+
+ if (isr & XCAN_IXR_BSOFF_MASK) {
+ priv->can.state = CAN_STATE_BUS_OFF;
+@@ -562,28 +699,10 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr)
+ can_bus_off(ndev);
+ if (skb)
+ cf->can_id |= CAN_ERR_BUSOFF;
+- } else if ((status & XCAN_SR_ESTAT_MASK) == XCAN_SR_ESTAT_MASK) {
+- priv->can.state = CAN_STATE_ERROR_PASSIVE;
+- priv->can.can_stats.error_passive++;
+- if (skb) {
+- cf->can_id |= CAN_ERR_CRTL;
+- cf->data[1] = (rxerr > 127) ?
+- CAN_ERR_CRTL_RX_PASSIVE :
+- CAN_ERR_CRTL_TX_PASSIVE;
+- cf->data[6] = txerr;
+- cf->data[7] = rxerr;
+- }
+- } else if (status & XCAN_SR_ERRWRN_MASK) {
+- priv->can.state = CAN_STATE_ERROR_WARNING;
+- priv->can.can_stats.error_warning++;
+- if (skb) {
+- cf->can_id |= CAN_ERR_CRTL;
+- cf->data[1] |= (txerr > rxerr) ?
+- CAN_ERR_CRTL_TX_WARNING :
+- CAN_ERR_CRTL_RX_WARNING;
+- cf->data[6] = txerr;
+- cf->data[7] = rxerr;
+- }
++ } else {
++ enum can_state new_state = xcan_current_error_state(ndev);
++
++ xcan_set_error_state(ndev, new_state, skb ? cf : NULL);
+ }
+
+ /* Check for Arbitration lost interrupt */
+@@ -599,7 +718,6 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr)
+ if (isr & XCAN_IXR_RXOFLW_MASK) {
+ stats->rx_over_errors++;
+ stats->rx_errors++;
+- priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK);
+ if (skb) {
+ cf->can_id |= CAN_ERR_CRTL;
+ cf->data[1] |= CAN_ERR_CRTL_RX_OVERFLOW;
+@@ -708,26 +826,20 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota)
+
+ isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
+ while ((isr & XCAN_IXR_RXNEMP_MASK) && (work_done < quota)) {
+- if (isr & XCAN_IXR_RXOK_MASK) {
+- priv->write_reg(priv, XCAN_ICR_OFFSET,
+- XCAN_IXR_RXOK_MASK);
+- work_done += xcan_rx(ndev);
+- } else {
+- priv->write_reg(priv, XCAN_ICR_OFFSET,
+- XCAN_IXR_RXNEMP_MASK);
+- break;
+- }
++ work_done += xcan_rx(ndev);
+ priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_RXNEMP_MASK);
+ isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
+ }
+
+- if (work_done)
++ if (work_done) {
+ can_led_event(ndev, CAN_LED_EVENT_RX);
++ xcan_update_error_state_after_rxtx(ndev);
++ }
+
+ if (work_done < quota) {
+ napi_complete(napi);
+ ier = priv->read_reg(priv, XCAN_IER_OFFSET);
+- ier |= (XCAN_IXR_RXOK_MASK | XCAN_IXR_RXNEMP_MASK);
++ ier |= XCAN_IXR_RXNEMP_MASK;
+ priv->write_reg(priv, XCAN_IER_OFFSET, ier);
+ }
+ return work_done;
+@@ -742,18 +854,71 @@ static void xcan_tx_interrupt(struct net_device *ndev, u32 isr)
+ {
+ struct xcan_priv *priv = netdev_priv(ndev);
+ struct net_device_stats *stats = &ndev->stats;
++ unsigned int frames_in_fifo;
++ int frames_sent = 1; /* TXOK => at least 1 frame was sent */
++ unsigned long flags;
++ int retries = 0;
++
++ /* Synchronize with xmit as we need to know the exact number
++ * of frames in the FIFO to stay in sync due to the TXFEMP
++ * handling.
++ * This also prevents a race between netif_wake_queue() and
++ * netif_stop_queue().
++ */
++ spin_lock_irqsave(&priv->tx_lock, flags);
+
+- while ((priv->tx_head - priv->tx_tail > 0) &&
+- (isr & XCAN_IXR_TXOK_MASK)) {
++ frames_in_fifo = priv->tx_head - priv->tx_tail;
++
++ if (WARN_ON_ONCE(frames_in_fifo == 0)) {
++ /* clear TXOK anyway to avoid getting back here */
+ priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK);
++ spin_unlock_irqrestore(&priv->tx_lock, flags);
++ return;
++ }
++
++ /* Check if 2 frames were sent (TXOK only means that at least 1
++ * frame was sent).
++ */
++ if (frames_in_fifo > 1) {
++ WARN_ON(frames_in_fifo > priv->tx_max);
++
++ /* Synchronize TXOK and isr so that after the loop:
++ * (1) isr variable is up-to-date at least up to TXOK clear
++ * time. This avoids us clearing a TXOK of a second frame
++ * but not noticing that the FIFO is now empty and thus
++ * marking only a single frame as sent.
++ * (2) No TXOK is left. Having one could mean leaving a
++ * stray TXOK as we might process the associated frame
++ * via TXFEMP handling as we read TXFEMP *after* TXOK
++ * clear to satisfy (1).
++ */
++ while ((isr & XCAN_IXR_TXOK_MASK) && !WARN_ON(++retries == 100)) {
++ priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK);
++ isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
++ }
++
++ if (isr & XCAN_IXR_TXFEMP_MASK) {
++ /* nothing in FIFO anymore */
++ frames_sent = frames_in_fifo;
++ }
++ } else {
++ /* single frame in fifo, just clear TXOK */
++ priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK);
++ }
++
++ while (frames_sent--) {
+ can_get_echo_skb(ndev, priv->tx_tail %
+ priv->tx_max);
+ priv->tx_tail++;
+ stats->tx_packets++;
+- isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
+ }
+- can_led_event(ndev, CAN_LED_EVENT_TX);
++
+ netif_wake_queue(ndev);
++
++ spin_unlock_irqrestore(&priv->tx_lock, flags);
++
++ can_led_event(ndev, CAN_LED_EVENT_TX);
++ xcan_update_error_state_after_rxtx(ndev);
+ }
+
+ /**
+@@ -772,6 +937,7 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id)
+ struct net_device *ndev = (struct net_device *)dev_id;
+ struct xcan_priv *priv = netdev_priv(ndev);
+ u32 isr, ier;
++ u32 isr_errors;
+
+ /* Get the interrupt status from Xilinx CAN */
+ isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
+@@ -790,18 +956,17 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id)
+ xcan_tx_interrupt(ndev, isr);
+
+ /* Check for the type of error interrupt and Processing it */
+- if (isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK |
+- XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK)) {
+- priv->write_reg(priv, XCAN_ICR_OFFSET, (XCAN_IXR_ERROR_MASK |
+- XCAN_IXR_RXOFLW_MASK | XCAN_IXR_BSOFF_MASK |
+- XCAN_IXR_ARBLST_MASK));
++ isr_errors = isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK |
++ XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK);
++ if (isr_errors) {
++ priv->write_reg(priv, XCAN_ICR_OFFSET, isr_errors);
+ xcan_err_interrupt(ndev, isr);
+ }
+
+ /* Check for the type of receive interrupt and Processing it */
+- if (isr & (XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK)) {
++ if (isr & XCAN_IXR_RXNEMP_MASK) {
+ ier = priv->read_reg(priv, XCAN_IER_OFFSET);
+- ier &= ~(XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK);
++ ier &= ~XCAN_IXR_RXNEMP_MASK;
+ priv->write_reg(priv, XCAN_IER_OFFSET, ier);
+ napi_schedule(&priv->napi);
+ }
+@@ -1030,6 +1195,18 @@ static int __maybe_unused xcan_resume(struct device *dev)
+
+ static SIMPLE_DEV_PM_OPS(xcan_dev_pm_ops, xcan_suspend, xcan_resume);
+
++static const struct xcan_devtype_data xcan_zynq_data = {
++ .caps = XCAN_CAP_WATERMARK,
++};
++
++/* Match table for OF platform binding */
++static const struct of_device_id xcan_of_match[] = {
++ { .compatible = "xlnx,zynq-can-1.0", .data = &xcan_zynq_data },
++ { .compatible = "xlnx,axi-can-1.00.a", },
++ { /* end of list */ },
++};
++MODULE_DEVICE_TABLE(of, xcan_of_match);
++
+ /**
+ * xcan_probe - Platform registration call
+ * @pdev: Handle to the platform device structure
+@@ -1044,8 +1221,10 @@ static int xcan_probe(struct platform_device *pdev)
+ struct resource *res; /* IO mem resources */
+ struct net_device *ndev;
+ struct xcan_priv *priv;
++ const struct of_device_id *of_id;
++ int caps = 0;
+ void __iomem *addr;
+- int ret, rx_max, tx_max;
++ int ret, rx_max, tx_max, tx_fifo_depth;
+
+ /* Get the virtual base address for the device */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+@@ -1055,7 +1234,8 @@ static int xcan_probe(struct platform_device *pdev)
+ goto err;
+ }
+
+- ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth", &tx_max);
++ ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth",
++ &tx_fifo_depth);
+ if (ret < 0)
+ goto err;
+
+@@ -1063,6 +1243,30 @@ static int xcan_probe(struct platform_device *pdev)
+ if (ret < 0)
+ goto err;
+
++ of_id = of_match_device(xcan_of_match, &pdev->dev);
++ if (of_id) {
++ const struct xcan_devtype_data *devtype_data = of_id->data;
++
++ if (devtype_data)
++ caps = devtype_data->caps;
++ }
++
++ /* There is no way to directly figure out how many frames have been
++ * sent when the TXOK interrupt is processed. If watermark programming
++ * is supported, we can have 2 frames in the FIFO and use TXFEMP
++ * to determine if 1 or 2 frames have been sent.
++ * Theoretically we should be able to use TXFWMEMP to determine up
++ * to 3 frames, but it seems that after putting a second frame in the
++ * FIFO, with watermark at 2 frames, it can happen that TXFWMEMP (less
++ * than 2 frames in FIFO) is set anyway with no TXOK (a frame was
++ * sent), which is not a sensible state - possibly TXFWMEMP is not
++ * completely synchronized with the rest of the bits?
++ */
++ if (caps & XCAN_CAP_WATERMARK)
++ tx_max = min(tx_fifo_depth, 2);
++ else
++ tx_max = 1;
++
+ /* Create a CAN device instance */
+ ndev = alloc_candev(sizeof(struct xcan_priv), tx_max);
+ if (!ndev)
+@@ -1077,6 +1281,7 @@ static int xcan_probe(struct platform_device *pdev)
+ CAN_CTRLMODE_BERR_REPORTING;
+ priv->reg_base = addr;
+ priv->tx_max = tx_max;
++ spin_lock_init(&priv->tx_lock);
+
+ /* Get IRQ for the device */
+ ndev->irq = platform_get_irq(pdev, 0);
+@@ -1144,9 +1349,9 @@ static int xcan_probe(struct platform_device *pdev)
+ devm_can_led_init(ndev);
+ clk_disable_unprepare(priv->bus_clk);
+ clk_disable_unprepare(priv->can_clk);
+- netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth:%d\n",
++ netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth: actual %d, using %d\n",
+ priv->reg_base, ndev->irq, priv->can.clock.freq,
+- priv->tx_max);
++ tx_fifo_depth, priv->tx_max);
+
+ return 0;
+
+@@ -1182,14 +1387,6 @@ static int xcan_remove(struct platform_device *pdev)
+ return 0;
+ }
+
+-/* Match table for OF platform binding */
+-static const struct of_device_id xcan_of_match[] = {
+- { .compatible = "xlnx,zynq-can-1.0", },
+- { .compatible = "xlnx,axi-can-1.00.a", },
+- { /* end of list */ },
+-};
+-MODULE_DEVICE_TABLE(of, xcan_of_match);
+-
+ static struct platform_driver xcan_driver = {
+ .probe = xcan_probe,
+ .remove = xcan_remove,
+diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+index e3080fbd9d00..7911dc3da98e 100644
+--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
++++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+@@ -2891,7 +2891,7 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
+ u32 srqn = qp_get_srqn(qpc) & 0xffffff;
+ int use_srq = (qp_get_srqn(qpc) >> 24) & 1;
+ struct res_srq *srq;
+- int local_qpn = be32_to_cpu(qpc->local_qpn) & 0xffffff;
++ int local_qpn = vhcr->in_modifier & 0xffffff;
+
+ err = adjust_qp_sched_queue(dev, slave, qpc, inbox);
+ if (err)
+diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
+index 7ed30d0b5273..a501f3ba6a3f 100644
+--- a/drivers/usb/class/cdc-acm.c
++++ b/drivers/usb/class/cdc-acm.c
+@@ -1771,6 +1771,9 @@ static const struct usb_device_id acm_ids[] = {
+ { USB_DEVICE(0x09d8, 0x0320), /* Elatec GmbH TWN3 */
+ .driver_info = NO_UNION_NORMAL, /* has misplaced union descriptor */
+ },
++ { USB_DEVICE(0x0ca6, 0xa050), /* Castles VEGA3000 */
++ .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */
++ },
+
+ { USB_DEVICE(0x2912, 0x0001), /* ATOL FPrint */
+ .driver_info = CLEAR_HALT_CONDITIONS,
+diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
+index 4d86da0df131..93756664592a 100644
+--- a/drivers/usb/core/hub.c
++++ b/drivers/usb/core/hub.c
+@@ -1123,10 +1123,14 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
+
+ if (!udev || udev->state == USB_STATE_NOTATTACHED) {
+ /* Tell hub_wq to disconnect the device or
+- * check for a new connection
++ * check for a new connection or over current condition.
++ * Based on USB2.0 Spec Section 11.12.5,
++ * C_PORT_OVER_CURRENT could be set while
++ * PORT_OVER_CURRENT is not. So check for any of them.
+ */
+ if (udev || (portstatus & USB_PORT_STAT_CONNECTION) ||
+- (portstatus & USB_PORT_STAT_OVERCURRENT))
++ (portstatus & USB_PORT_STAT_OVERCURRENT) ||
++ (portchange & USB_PORT_STAT_C_OVERCURRENT))
+ set_bit(port1, hub->change_bits);
+
+ } else if (portstatus & USB_PORT_STAT_ENABLE) {
+diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
+index 4191feb765b1..4800bb22cdd6 100644
+--- a/drivers/usb/gadget/function/f_fs.c
++++ b/drivers/usb/gadget/function/f_fs.c
+@@ -3037,7 +3037,7 @@ static int ffs_func_setup(struct usb_function *f,
+ __ffs_event_add(ffs, FUNCTIONFS_SETUP);
+ spin_unlock_irqrestore(&ffs->ev.waitq.lock, flags);
+
+- return USB_GADGET_DELAYED_STATUS;
++ return creq->wLength == 0 ? USB_GADGET_DELAYED_STATUS : 0;
+ }
+
+ static void ffs_func_suspend(struct usb_function *f)
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index a3696b778757..65babd8a682d 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -376,6 +376,7 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags);
+
++void tcp_enter_quickack_mode(struct sock *sk);
+ static inline void tcp_dec_quickack_mode(struct sock *sk,
+ const unsigned int pkts)
+ {
+@@ -559,6 +560,7 @@ void tcp_send_fin(struct sock *sk);
+ void tcp_send_active_reset(struct sock *sk, gfp_t priority);
+ int tcp_send_synack(struct sock *);
+ void tcp_push_one(struct sock *, unsigned int mss_now);
++void __tcp_send_ack(struct sock *sk, u32 rcv_nxt);
+ void tcp_send_ack(struct sock *sk);
+ void tcp_send_delayed_ack(struct sock *sk);
+ void tcp_send_loss_probe(struct sock *sk);
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index 2017ffa5197a..96c9c0f0905a 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -2087,9 +2087,12 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
+ return err;
+ }
+
+- dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
+-
+- __dev_notify_flags(dev, old_flags, ~0U);
++ if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) {
++ __dev_notify_flags(dev, old_flags, 0U);
++ } else {
++ dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
++ __dev_notify_flags(dev, old_flags, ~0U);
++ }
+ return 0;
+ }
+ EXPORT_SYMBOL(rtnl_configure_link);
+diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
+index 10286432f684..c11bb6d2d00a 100644
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -480,6 +480,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
+ to->dev = from->dev;
+ to->mark = from->mark;
+
++ skb_copy_hash(to, from);
++
+ /* Copy the flags to each fragment. */
+ IPCB(to)->flags = IPCB(from)->flags;
+
+diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
+index ce9a7fbb7c5f..88426a6a7a85 100644
+--- a/net/ipv4/ip_sockglue.c
++++ b/net/ipv4/ip_sockglue.c
+@@ -135,15 +135,18 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
+ {
+ struct sockaddr_in sin;
+ const struct iphdr *iph = ip_hdr(skb);
+- __be16 *ports = (__be16 *)skb_transport_header(skb);
++ __be16 *ports;
++ int end;
+
+- if (skb_transport_offset(skb) + 4 > skb->len)
++ end = skb_transport_offset(skb) + 4;
++ if (end > 0 && !pskb_may_pull(skb, end))
+ return;
+
+ /* All current transport protocols have the port numbers in the
+ * first four bytes of the transport header and this function is
+ * written with this assumption in mind.
+ */
++ ports = (__be16 *)skb_transport_header(skb);
+
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = iph->daddr;
+diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
+index 55d7da1d2ce9..e63b764e55ea 100644
+--- a/net/ipv4/tcp_dctcp.c
++++ b/net/ipv4/tcp_dctcp.c
+@@ -131,23 +131,14 @@ static void dctcp_ce_state_0_to_1(struct sock *sk)
+ struct dctcp *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+- /* State has changed from CE=0 to CE=1 and delayed
+- * ACK has not sent yet.
+- */
+- if (!ca->ce_state && ca->delayed_ack_reserved) {
+- u32 tmp_rcv_nxt;
+-
+- /* Save current rcv_nxt. */
+- tmp_rcv_nxt = tp->rcv_nxt;
+-
+- /* Generate previous ack with CE=0. */
+- tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+- tp->rcv_nxt = ca->prior_rcv_nxt;
+-
+- tcp_send_ack(sk);
+-
+- /* Recover current rcv_nxt. */
+- tp->rcv_nxt = tmp_rcv_nxt;
++ if (!ca->ce_state) {
++ /* State has changed from CE=0 to CE=1, force an immediate
++ * ACK to reflect the new CE state. If an ACK was delayed,
++ * send that first to reflect the prior CE state.
++ */
++ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
++ __tcp_send_ack(sk, ca->prior_rcv_nxt);
++ tcp_enter_quickack_mode(sk);
+ }
+
+ ca->prior_rcv_nxt = tp->rcv_nxt;
+@@ -161,23 +152,14 @@ static void dctcp_ce_state_1_to_0(struct sock *sk)
+ struct dctcp *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+- /* State has changed from CE=1 to CE=0 and delayed
+- * ACK has not sent yet.
+- */
+- if (ca->ce_state && ca->delayed_ack_reserved) {
+- u32 tmp_rcv_nxt;
+-
+- /* Save current rcv_nxt. */
+- tmp_rcv_nxt = tp->rcv_nxt;
+-
+- /* Generate previous ack with CE=1. */
+- tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+- tp->rcv_nxt = ca->prior_rcv_nxt;
+-
+- tcp_send_ack(sk);
+-
+- /* Recover current rcv_nxt. */
+- tp->rcv_nxt = tmp_rcv_nxt;
++ if (ca->ce_state) {
++ /* State has changed from CE=1 to CE=0, force an immediate
++ * ACK to reflect the new CE state. If an ACK was delayed,
++ * send that first to reflect the prior CE state.
++ */
++ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
++ __tcp_send_ack(sk, ca->prior_rcv_nxt);
++ tcp_enter_quickack_mode(sk);
+ }
+
+ ca->prior_rcv_nxt = tp->rcv_nxt;
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 4350ee058441..5c645069a09a 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -187,13 +187,14 @@ static void tcp_incr_quickack(struct sock *sk)
+ icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
+ }
+
+-static void tcp_enter_quickack_mode(struct sock *sk)
++void tcp_enter_quickack_mode(struct sock *sk)
+ {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ tcp_incr_quickack(sk);
+ icsk->icsk_ack.pingpong = 0;
+ icsk->icsk_ack.ato = TCP_ATO_MIN;
+ }
++EXPORT_SYMBOL(tcp_enter_quickack_mode);
+
+ /* Send ACKs quickly, if "quick" count is not exhausted
+ * and the session is not interactive.
+@@ -4788,6 +4789,7 @@ restart:
+ static void tcp_collapse_ofo_queue(struct sock *sk)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
++ u32 range_truesize, sum_tiny = 0;
+ struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
+ struct sk_buff *head;
+ u32 start, end;
+@@ -4797,6 +4799,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
+
+ start = TCP_SKB_CB(skb)->seq;
+ end = TCP_SKB_CB(skb)->end_seq;
++ range_truesize = skb->truesize;
+ head = skb;
+
+ for (;;) {
+@@ -4811,14 +4814,24 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
+ if (!skb ||
+ after(TCP_SKB_CB(skb)->seq, end) ||
+ before(TCP_SKB_CB(skb)->end_seq, start)) {
+- tcp_collapse(sk, &tp->out_of_order_queue,
+- head, skb, start, end);
++ /* Do not attempt collapsing tiny skbs */
++ if (range_truesize != head->truesize ||
++ end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
++ tcp_collapse(sk, &tp->out_of_order_queue,
++ head, skb, start, end);
++ } else {
++ sum_tiny += range_truesize;
++ if (sum_tiny > sk->sk_rcvbuf >> 3)
++ return;
++ }
++
+ head = skb;
+ if (!skb)
+ break;
+ /* Start new segment */
+ start = TCP_SKB_CB(skb)->seq;
+ end = TCP_SKB_CB(skb)->end_seq;
++ range_truesize = skb->truesize;
+ } else {
+ if (before(TCP_SKB_CB(skb)->seq, start))
+ start = TCP_SKB_CB(skb)->seq;
+@@ -4874,6 +4887,9 @@ static int tcp_prune_queue(struct sock *sk)
+ else if (tcp_under_memory_pressure(sk))
+ tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
+
++ if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
++ return 0;
++
+ tcp_collapse_ofo_queue(sk);
+ if (!skb_queue_empty(&sk->sk_receive_queue))
+ tcp_collapse(sk, &sk->sk_receive_queue,
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 2854db094864..6fa749ce231f 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -177,8 +177,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
+ }
+
+ /* Account for an ACK we sent. */
+-static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
++static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
++ u32 rcv_nxt)
+ {
++ struct tcp_sock *tp = tcp_sk(sk);
++
++ if (unlikely(rcv_nxt != tp->rcv_nxt))
++ return; /* Special ACK sent by DCTCP to reflect ECN */
+ tcp_dec_quickack_mode(sk, pkts);
+ inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
+ }
+@@ -901,8 +906,8 @@ out:
+ * We are working here with either a clone of the original
+ * SKB, or a fresh unique copy made by the retransmit engine.
+ */
+-static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
+- gfp_t gfp_mask)
++static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
++ int clone_it, gfp_t gfp_mask, u32 rcv_nxt)
+ {
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_sock *inet;
+@@ -962,7 +967,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
+ th->source = inet->inet_sport;
+ th->dest = inet->inet_dport;
+ th->seq = htonl(tcb->seq);
+- th->ack_seq = htonl(tp->rcv_nxt);
++ th->ack_seq = htonl(rcv_nxt);
+ *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
+ tcb->tcp_flags);
+
+@@ -1005,7 +1010,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
+ icsk->icsk_af_ops->send_check(sk, skb);
+
+ if (likely(tcb->tcp_flags & TCPHDR_ACK))
+- tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
++ tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
+
+ if (skb->len != tcp_header_size)
+ tcp_event_data_sent(tp, sk);
+@@ -1036,6 +1041,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
+ return net_xmit_eval(err);
+ }
+
++static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
++ gfp_t gfp_mask)
++{
++ return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask,
++ tcp_sk(sk)->rcv_nxt);
++}
++
+ /* This routine just queues the buffer for sending.
+ *
+ * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
+@@ -3354,7 +3366,7 @@ void tcp_send_delayed_ack(struct sock *sk)
+ }
+
+ /* This routine sends an ack and also updates the window. */
+-void tcp_send_ack(struct sock *sk)
++void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
+ {
+ struct sk_buff *buff;
+
+@@ -3391,9 +3403,14 @@ void tcp_send_ack(struct sock *sk)
+
+ /* Send it off, this clears delayed acks for us. */
+ skb_mstamp_get(&buff->skb_mstamp);
+- tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
++ __tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC), rcv_nxt);
++}
++EXPORT_SYMBOL_GPL(__tcp_send_ack);
++
++void tcp_send_ack(struct sock *sk)
++{
++ __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
+ }
+-EXPORT_SYMBOL_GPL(tcp_send_ack);
+
+ /* This routine sends a packet with an out of date sequence
+ * number. It assumes the other end will try to ack it.
+diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
+index cae37bfd12ab..9f6e57ded338 100644
+--- a/net/ipv6/datagram.c
++++ b/net/ipv6/datagram.c
+@@ -657,13 +657,16 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
+ }
+ if (np->rxopt.bits.rxorigdstaddr) {
+ struct sockaddr_in6 sin6;
+- __be16 *ports = (__be16 *) skb_transport_header(skb);
++ __be16 *ports;
++ int end;
+
+- if (skb_transport_offset(skb) + 4 <= skb->len) {
++ end = skb_transport_offset(skb) + 4;
++ if (end <= 0 || pskb_may_pull(skb, end)) {
+ /* All current transport protocols have the port numbers in the
+ * first four bytes of the transport header and this function is
+ * written with this assumption in mind.
+ */
++ ports = (__be16 *)skb_transport_header(skb);
+
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_addr = ipv6_hdr(skb)->daddr;
+diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
+index 74786783834b..0feede45bd28 100644
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -559,6 +559,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
+ to->dev = from->dev;
+ to->mark = from->mark;
+
++ skb_copy_hash(to, from);
++
+ #ifdef CONFIG_NET_SCHED
+ to->tc_index = from->tc_index;
+ #endif