summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pagano <mpagano@gentoo.org>2024-03-06 13:07:47 -0500
committerMike Pagano <mpagano@gentoo.org>2024-03-06 13:07:47 -0500
commit8bc8ef53218c6b5ef66b10198d58003c5a0a0397 (patch)
tree98f175d113a89fd018246d7870fd1073e06a2e98
parentLinux patch 6.1.80 (diff)
downloadlinux-patches-8bc8ef53218c6b5ef66b10198d58003c5a0a0397.tar.gz
linux-patches-8bc8ef53218c6b5ef66b10198d58003c5a0a0397.tar.bz2
linux-patches-8bc8ef53218c6b5ef66b10198d58003c5a0a0397.zip
Linuxpatch 6.1.816.1-90
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
-rw-r--r--0000_README4
-rw-r--r--1080_linux-6.1.81.patch14458
2 files changed, 14462 insertions, 0 deletions
diff --git a/0000_README b/0000_README
index 0bde520b..d0f067de 100644
--- a/0000_README
+++ b/0000_README
@@ -363,6 +363,10 @@ Patch: 1079_linux-6.1.80.patch
From: https://www.kernel.org
Desc: Linux 6.1.80
+Patch: 1080_linux-6.1.81.patch
+From: https://www.kernel.org
+Desc: Linux 6.1.81
+
Patch: 1500_XATTR_USER_PREFIX.patch
From: https://bugs.gentoo.org/show_bug.cgi?id=470644
Desc: Support for namespace user.pax.* on tmpfs.
diff --git a/1080_linux-6.1.81.patch b/1080_linux-6.1.81.patch
new file mode 100644
index 00000000..59f3b367
--- /dev/null
+++ b/1080_linux-6.1.81.patch
@@ -0,0 +1,14458 @@
+diff --git a/Documentation/x86/boot.rst b/Documentation/x86/boot.rst
+index 894a198970055..bac3789f3e8fa 100644
+--- a/Documentation/x86/boot.rst
++++ b/Documentation/x86/boot.rst
+@@ -1416,7 +1416,7 @@ execution context provided by the EFI firmware.
+
+ The function prototype for the handover entry point looks like this::
+
+- efi_main(void *handle, efi_system_table_t *table, struct boot_params *bp)
++ efi_stub_entry(void *handle, efi_system_table_t *table, struct boot_params *bp)
+
+ 'handle' is the EFI image handle passed to the boot loader by the EFI
+ firmware, 'table' is the EFI system table - these are the first two
+diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst
+index 5d4330be200f9..e801df0bb3a81 100644
+--- a/Documentation/x86/mds.rst
++++ b/Documentation/x86/mds.rst
+@@ -95,6 +95,9 @@ The kernel provides a function to invoke the buffer clearing:
+
+ mds_clear_cpu_buffers()
+
++Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path.
++Other than CFLAGS.ZF, this macro doesn't clobber any registers.
++
+ The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state
+ (idle) transitions.
+
+@@ -138,17 +141,30 @@ Mitigation points
+
+ When transitioning from kernel to user space the CPU buffers are flushed
+ on affected CPUs when the mitigation is not disabled on the kernel
+- command line. The migitation is enabled through the static key
+- mds_user_clear.
+-
+- The mitigation is invoked in prepare_exit_to_usermode() which covers
+- all but one of the kernel to user space transitions. The exception
+- is when we return from a Non Maskable Interrupt (NMI), which is
+- handled directly in do_nmi().
+-
+- (The reason that NMI is special is that prepare_exit_to_usermode() can
+- enable IRQs. In NMI context, NMIs are blocked, and we don't want to
+- enable IRQs with NMIs blocked.)
++ command line. The mitigation is enabled through the feature flag
++ X86_FEATURE_CLEAR_CPU_BUF.
++
++ The mitigation is invoked just before transitioning to userspace after
++ user registers are restored. This is done to minimize the window in
++ which kernel data could be accessed after VERW e.g. via an NMI after
++ VERW.
++
++ **Corner case not handled**
++ Interrupts returning to kernel don't clear CPUs buffers since the
++ exit-to-user path is expected to do that anyways. But, there could be
++ a case when an NMI is generated in kernel after the exit-to-user path
++ has cleared the buffers. This case is not handled and NMI returning to
++ kernel don't clear CPU buffers because:
++
++ 1. It is rare to get an NMI after VERW, but before returning to userspace.
++ 2. For an unprivileged user, there is no known way to make that NMI
++ less rare or target it.
++ 3. It would take a large number of these precisely-timed NMIs to mount
++ an actual attack. There's presumably not enough bandwidth.
++ 4. The NMI in question occurs after a VERW, i.e. when user state is
++ restored and most interesting data is already scrubbed. Whats left
++ is only the data that NMI touches, and that may or may not be of
++ any interest.
+
+
+ 2. C-State transition
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 13d1078808bb5..bbfedb0b20938 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -10051,6 +10051,7 @@ F: drivers/infiniband/
+ F: include/rdma/
+ F: include/trace/events/ib_mad.h
+ F: include/trace/events/ib_umad.h
++F: include/trace/misc/rdma.h
+ F: include/uapi/linux/if_infiniband.h
+ F: include/uapi/rdma/
+ F: samples/bpf/ibumad_kern.c
+@@ -11139,6 +11140,12 @@ F: fs/nfs_common/
+ F: fs/nfsd/
+ F: include/linux/lockd/
+ F: include/linux/sunrpc/
++F: include/trace/events/rpcgss.h
++F: include/trace/events/rpcrdma.h
++F: include/trace/events/sunrpc.h
++F: include/trace/misc/fs.h
++F: include/trace/misc/nfs.h
++F: include/trace/misc/sunrpc.h
+ F: include/uapi/linux/nfsd/
+ F: include/uapi/linux/sunrpc/
+ F: net/sunrpc/
+diff --git a/Makefile b/Makefile
+index bc4adb561a7cf..e13df565a1cb6 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 6
+ PATCHLEVEL = 1
+-SUBLEVEL = 80
++SUBLEVEL = 81
+ EXTRAVERSION =
+ NAME = Curry Ramen
+
+diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi
+index ec476b1596496..b236d23f80715 100644
+--- a/arch/arm/boot/dts/imx23.dtsi
++++ b/arch/arm/boot/dts/imx23.dtsi
+@@ -59,7 +59,7 @@ icoll: interrupt-controller@80000000 {
+ reg = <0x80000000 0x2000>;
+ };
+
+- dma_apbh: dma-apbh@80004000 {
++ dma_apbh: dma-controller@80004000 {
+ compatible = "fsl,imx23-dma-apbh";
+ reg = <0x80004000 0x2000>;
+ interrupts = <0 14 20 0
+diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi
+index b15df16ecb01a..b81592a613112 100644
+--- a/arch/arm/boot/dts/imx28.dtsi
++++ b/arch/arm/boot/dts/imx28.dtsi
+@@ -78,7 +78,7 @@ hsadc: hsadc@80002000 {
+ status = "disabled";
+ };
+
+- dma_apbh: dma-apbh@80004000 {
++ dma_apbh: dma-controller@80004000 {
+ compatible = "fsl,imx28-dma-apbh";
+ reg = <0x80004000 0x2000>;
+ interrupts = <82 83 84 85
+diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
+index ff1e0173b39be..2c6eada01d792 100644
+--- a/arch/arm/boot/dts/imx6qdl.dtsi
++++ b/arch/arm/boot/dts/imx6qdl.dtsi
+@@ -150,7 +150,7 @@ soc: soc {
+ interrupt-parent = <&gpc>;
+ ranges;
+
+- dma_apbh: dma-apbh@110000 {
++ dma_apbh: dma-controller@110000 {
+ compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh";
+ reg = <0x00110000 0x2000>;
+ interrupts = <0 13 IRQ_TYPE_LEVEL_HIGH>,
+diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi
+index 1f1053a898fbf..67d344ae76b51 100644
+--- a/arch/arm/boot/dts/imx6sx.dtsi
++++ b/arch/arm/boot/dts/imx6sx.dtsi
+@@ -209,7 +209,7 @@ gpu: gpu@1800000 {
+ power-domains = <&pd_pu>;
+ };
+
+- dma_apbh: dma-apbh@1804000 {
++ dma_apbh: dma-controller@1804000 {
+ compatible = "fsl,imx6sx-dma-apbh", "fsl,imx28-dma-apbh";
+ reg = <0x01804000 0x2000>;
+ interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>,
+diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi
+index 2b5996395701a..aac081b6daaac 100644
+--- a/arch/arm/boot/dts/imx6ul.dtsi
++++ b/arch/arm/boot/dts/imx6ul.dtsi
+@@ -164,7 +164,7 @@ intc: interrupt-controller@a01000 {
+ <0x00a06000 0x2000>;
+ };
+
+- dma_apbh: dma-apbh@1804000 {
++ dma_apbh: dma-controller@1804000 {
+ compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh";
+ reg = <0x01804000 0x2000>;
+ interrupts = <0 13 IRQ_TYPE_LEVEL_HIGH>,
+diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi
+index 4b23630fc738d..69aebc691526f 100644
+--- a/arch/arm/boot/dts/imx7s.dtsi
++++ b/arch/arm/boot/dts/imx7s.dtsi
+@@ -1267,14 +1267,13 @@ fec1: ethernet@30be0000 {
+ };
+ };
+
+- dma_apbh: dma-apbh@33000000 {
++ dma_apbh: dma-controller@33000000 {
+ compatible = "fsl,imx7d-dma-apbh", "fsl,imx28-dma-apbh";
+ reg = <0x33000000 0x2000>;
+ interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
+- interrupt-names = "gpmi0", "gpmi1", "gpmi2", "gpmi3";
+ #dma-cells = <1>;
+ dma-channels = <4>;
+ clocks = <&clks IMX7D_NAND_USDHC_BUS_RAWNAND_CLK>;
+diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
+index bac4cabef6073..467ac2f768ac2 100644
+--- a/arch/arm64/crypto/aes-neonbs-glue.c
++++ b/arch/arm64/crypto/aes-neonbs-glue.c
+@@ -227,8 +227,19 @@ static int ctr_encrypt(struct skcipher_request *req)
+ src += blocks * AES_BLOCK_SIZE;
+ }
+ if (nbytes && walk.nbytes == walk.total) {
++ u8 buf[AES_BLOCK_SIZE];
++ u8 *d = dst;
++
++ if (unlikely(nbytes < AES_BLOCK_SIZE))
++ src = dst = memcpy(buf + sizeof(buf) - nbytes,
++ src, nbytes);
++
+ neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds,
+ nbytes, walk.iv);
++
++ if (unlikely(nbytes < AES_BLOCK_SIZE))
++ memcpy(d, dst, nbytes);
++
+ nbytes = 0;
+ }
+ kernel_neon_end();
+diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
+index 62c846be2d76a..a75c0772ecfca 100644
+--- a/arch/arm64/include/asm/efi.h
++++ b/arch/arm64/include/asm/efi.h
+@@ -103,6 +103,7 @@ static inline void free_screen_info(struct screen_info *si)
+ }
+
+ #define EFI_ALLOC_ALIGN SZ_64K
++#define EFI_ALLOC_LIMIT ((1UL << 48) - 1)
+
+ /*
+ * On ARM systems, virtually remapped UEFI runtime services are set up in two
+diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
+index 97b026130c71b..1e5f083cdb720 100644
+--- a/arch/powerpc/platforms/pseries/iommu.c
++++ b/arch/powerpc/platforms/pseries/iommu.c
+@@ -569,29 +569,6 @@ static void iommu_table_setparms(struct pci_controller *phb,
+
+ struct iommu_table_ops iommu_table_lpar_multi_ops;
+
+-/*
+- * iommu_table_setparms_lpar
+- *
+- * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
+- */
+-static void iommu_table_setparms_lpar(struct pci_controller *phb,
+- struct device_node *dn,
+- struct iommu_table *tbl,
+- struct iommu_table_group *table_group,
+- const __be32 *dma_window)
+-{
+- unsigned long offset, size, liobn;
+-
+- of_parse_dma_window(dn, dma_window, &liobn, &offset, &size);
+-
+- iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL,
+- &iommu_table_lpar_multi_ops);
+-
+-
+- table_group->tce32_start = offset;
+- table_group->tce32_size = size;
+-}
+-
+ struct iommu_table_ops iommu_table_pseries_ops = {
+ .set = tce_build_pSeries,
+ .clear = tce_free_pSeries,
+@@ -719,26 +696,71 @@ struct iommu_table_ops iommu_table_lpar_multi_ops = {
+ * dynamic 64bit DMA window, walking up the device tree.
+ */
+ static struct device_node *pci_dma_find(struct device_node *dn,
+- const __be32 **dma_window)
++ struct dynamic_dma_window_prop *prop)
+ {
+- const __be32 *dw = NULL;
++ const __be32 *default_prop = NULL;
++ const __be32 *ddw_prop = NULL;
++ struct device_node *rdn = NULL;
++ bool default_win = false, ddw_win = false;
+
+ for ( ; dn && PCI_DN(dn); dn = dn->parent) {
+- dw = of_get_property(dn, "ibm,dma-window", NULL);
+- if (dw) {
+- if (dma_window)
+- *dma_window = dw;
+- return dn;
++ default_prop = of_get_property(dn, "ibm,dma-window", NULL);
++ if (default_prop) {
++ rdn = dn;
++ default_win = true;
++ }
++ ddw_prop = of_get_property(dn, DIRECT64_PROPNAME, NULL);
++ if (ddw_prop) {
++ rdn = dn;
++ ddw_win = true;
++ break;
++ }
++ ddw_prop = of_get_property(dn, DMA64_PROPNAME, NULL);
++ if (ddw_prop) {
++ rdn = dn;
++ ddw_win = true;
++ break;
+ }
+- dw = of_get_property(dn, DIRECT64_PROPNAME, NULL);
+- if (dw)
+- return dn;
+- dw = of_get_property(dn, DMA64_PROPNAME, NULL);
+- if (dw)
+- return dn;
++
++ /* At least found default window, which is the case for normal boot */
++ if (default_win)
++ break;
+ }
+
+- return NULL;
++ /* For PCI devices there will always be a DMA window, either on the device
++ * or parent bus
++ */
++ WARN_ON(!(default_win | ddw_win));
++
++ /* caller doesn't want to get DMA window property */
++ if (!prop)
++ return rdn;
++
++ /* parse DMA window property. During normal system boot, only default
++ * DMA window is passed in OF. But, for kdump, a dedicated adapter might
++ * have both default and DDW in FDT. In this scenario, DDW takes precedence
++ * over default window.
++ */
++ if (ddw_win) {
++ struct dynamic_dma_window_prop *p;
++
++ p = (struct dynamic_dma_window_prop *)ddw_prop;
++ prop->liobn = p->liobn;
++ prop->dma_base = p->dma_base;
++ prop->tce_shift = p->tce_shift;
++ prop->window_shift = p->window_shift;
++ } else if (default_win) {
++ unsigned long offset, size, liobn;
++
++ of_parse_dma_window(rdn, default_prop, &liobn, &offset, &size);
++
++ prop->liobn = cpu_to_be32((u32)liobn);
++ prop->dma_base = cpu_to_be64(offset);
++ prop->tce_shift = cpu_to_be32(IOMMU_PAGE_SHIFT_4K);
++ prop->window_shift = cpu_to_be32(order_base_2(size));
++ }
++
++ return rdn;
+ }
+
+ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
+@@ -746,17 +768,20 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
+ struct iommu_table *tbl;
+ struct device_node *dn, *pdn;
+ struct pci_dn *ppci;
+- const __be32 *dma_window = NULL;
++ struct dynamic_dma_window_prop prop;
+
+ dn = pci_bus_to_OF_node(bus);
+
+ pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n",
+ dn);
+
+- pdn = pci_dma_find(dn, &dma_window);
++ pdn = pci_dma_find(dn, &prop);
+
+- if (dma_window == NULL)
+- pr_debug(" no ibm,dma-window property !\n");
++ /* In PPC architecture, there will always be DMA window on bus or one of the
++ * parent bus. During reboot, there will be ibm,dma-window property to
++ * define DMA window. For kdump, there will at least be default window or DDW
++ * or both.
++ */
+
+ ppci = PCI_DN(pdn);
+
+@@ -766,13 +791,24 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
+ if (!ppci->table_group) {
+ ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node);
+ tbl = ppci->table_group->tables[0];
+- if (dma_window) {
+- iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
+- ppci->table_group, dma_window);
+
+- if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
+- panic("Failed to initialize iommu table");
+- }
++ iommu_table_setparms_common(tbl, ppci->phb->bus->number,
++ be32_to_cpu(prop.liobn),
++ be64_to_cpu(prop.dma_base),
++ 1ULL << be32_to_cpu(prop.window_shift),
++ be32_to_cpu(prop.tce_shift), NULL,
++ &iommu_table_lpar_multi_ops);
++
++ /* Only for normal boot with default window. Doesn't matter even
++ * if we set these with DDW which is 64bit during kdump, since
++ * these will not be used during kdump.
++ */
++ ppci->table_group->tce32_start = be64_to_cpu(prop.dma_base);
++ ppci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift);
++
++ if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
++ panic("Failed to initialize iommu table");
++
+ iommu_register_group(ppci->table_group,
+ pci_domain_nr(bus), 0);
+ pr_debug(" created table: %p\n", ppci->table_group);
+@@ -960,6 +996,12 @@ static void find_existing_ddw_windows_named(const char *name)
+ continue;
+ }
+
++ /* If at the time of system initialization, there are DDWs in OF,
++ * it means this is during kexec. DDW could be direct or dynamic.
++ * We will just mark DDWs as "dynamic" since this is kdump path,
++ * no need to worry about perforance. ddw_list_new_entry() will
++ * set window->direct = false.
++ */
+ window = ddw_list_new_entry(pdn, dma64);
+ if (!window) {
+ of_node_put(pdn);
+@@ -1525,8 +1567,8 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
+ {
+ struct device_node *pdn, *dn;
+ struct iommu_table *tbl;
+- const __be32 *dma_window = NULL;
+ struct pci_dn *pci;
++ struct dynamic_dma_window_prop prop;
+
+ pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev));
+
+@@ -1539,7 +1581,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
+ dn = pci_device_to_OF_node(dev);
+ pr_debug(" node is %pOF\n", dn);
+
+- pdn = pci_dma_find(dn, &dma_window);
++ pdn = pci_dma_find(dn, &prop);
+ if (!pdn || !PCI_DN(pdn)) {
+ printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: "
+ "no DMA window found for pci dev=%s dn=%pOF\n",
+@@ -1552,8 +1594,20 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
+ if (!pci->table_group) {
+ pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
+ tbl = pci->table_group->tables[0];
+- iommu_table_setparms_lpar(pci->phb, pdn, tbl,
+- pci->table_group, dma_window);
++
++ iommu_table_setparms_common(tbl, pci->phb->bus->number,
++ be32_to_cpu(prop.liobn),
++ be64_to_cpu(prop.dma_base),
++ 1ULL << be32_to_cpu(prop.window_shift),
++ be32_to_cpu(prop.tce_shift), NULL,
++ &iommu_table_lpar_multi_ops);
++
++ /* Only for normal boot with default window. Doesn't matter even
++ * if we set these with DDW which is 64bit during kdump, since
++ * these will not be used during kdump.
++ */
++ pci->table_group->tce32_start = be64_to_cpu(prop.dma_base);
++ pci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift);
+
+ iommu_init_table(tbl, pci->phb->node, 0, 0);
+ iommu_register_group(pci->table_group,
+diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
+index d47d87c2d7e3d..dcf1bc9de5841 100644
+--- a/arch/riscv/include/asm/ftrace.h
++++ b/arch/riscv/include/asm/ftrace.h
+@@ -25,6 +25,11 @@
+
+ #define ARCH_SUPPORTS_FTRACE_OPS 1
+ #ifndef __ASSEMBLY__
++
++extern void *return_address(unsigned int level);
++
++#define ftrace_return_address(n) return_address(n)
++
+ void MCOUNT_NAME(void);
+ static inline unsigned long ftrace_call_adjust(unsigned long addr)
+ {
+diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
+index 59bb53da473dd..63055c6ad2c25 100644
+--- a/arch/riscv/include/asm/pgtable.h
++++ b/arch/riscv/include/asm/pgtable.h
+@@ -79,7 +79,7 @@
+ * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
+ * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
+ */
+-#define vmemmap ((struct page *)VMEMMAP_START)
++#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT))
+
+ #define PCI_IO_SIZE SZ_16M
+ #define PCI_IO_END VMEMMAP_START
+diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
+index ab333cb792fd9..4c0805d264ca8 100644
+--- a/arch/riscv/kernel/Makefile
++++ b/arch/riscv/kernel/Makefile
+@@ -7,6 +7,7 @@ ifdef CONFIG_FTRACE
+ CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+ CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE)
+ CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE)
++CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
+ endif
+ CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,)
+ CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,)
+@@ -41,6 +42,7 @@ obj-y += irq.o
+ obj-y += process.o
+ obj-y += ptrace.o
+ obj-y += reset.o
++obj-y += return_address.o
+ obj-y += setup.o
+ obj-y += signal.o
+ obj-y += syscall_table.o
+diff --git a/arch/riscv/kernel/return_address.c b/arch/riscv/kernel/return_address.c
+new file mode 100644
+index 0000000000000..c8115ec8fb304
+--- /dev/null
++++ b/arch/riscv/kernel/return_address.c
+@@ -0,0 +1,48 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * This code come from arch/arm64/kernel/return_address.c
++ *
++ * Copyright (C) 2023 SiFive.
++ */
++
++#include <linux/export.h>
++#include <linux/kprobes.h>
++#include <linux/stacktrace.h>
++
++struct return_address_data {
++ unsigned int level;
++ void *addr;
++};
++
++static bool save_return_addr(void *d, unsigned long pc)
++{
++ struct return_address_data *data = d;
++
++ if (!data->level) {
++ data->addr = (void *)pc;
++ return false;
++ }
++
++ --data->level;
++
++ return true;
++}
++NOKPROBE_SYMBOL(save_return_addr);
++
++noinline void *return_address(unsigned int level)
++{
++ struct return_address_data data;
++
++ data.level = level + 3;
++ data.addr = NULL;
++
++ arch_stack_walk(save_return_addr, &data, current, NULL);
++
++ if (!data.level)
++ return data.addr;
++ else
++ return NULL;
++
++}
++EXPORT_SYMBOL_GPL(return_address);
++NOKPROBE_SYMBOL(return_address);
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index 4c9bfc4be58d4..2f7af61b49b6c 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -1982,6 +1982,23 @@ config EFI_STUB
+
+ See Documentation/admin-guide/efi-stub.rst for more information.
+
++config EFI_HANDOVER_PROTOCOL
++ bool "EFI handover protocol (DEPRECATED)"
++ depends on EFI_STUB
++ default y
++ help
++ Select this in order to include support for the deprecated EFI
++ handover protocol, which defines alternative entry points into the
++ EFI stub. This is a practice that has no basis in the UEFI
++ specification, and requires a priori knowledge on the part of the
++ bootloader about Linux/x86 specific ways of passing the command line
++ and initrd, and where in memory those assets may be loaded.
++
++ If in doubt, say Y. Even though the corresponding support is not
++ present in upstream GRUB or other bootloaders, most distros build
++ GRUB with numerous downstream patches applied, and may rely on the
++ handover protocol as as result.
++
+ config EFI_MIXED
+ bool "EFI mixed-mode support"
+ depends on EFI_STUB && X86_64
+diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
+index 15b7b403a4bd0..3965b2c9efee0 100644
+--- a/arch/x86/boot/compressed/Makefile
++++ b/arch/x86/boot/compressed/Makefile
+@@ -74,6 +74,11 @@ LDFLAGS_vmlinux += -z noexecstack
+ ifeq ($(CONFIG_LD_IS_BFD),y)
+ LDFLAGS_vmlinux += $(call ld-option,--no-warn-rwx-segments)
+ endif
++ifeq ($(CONFIG_EFI_STUB),y)
++# ensure that the static EFI stub library will be pulled in, even if it is
++# never referenced explicitly from the startup code
++LDFLAGS_vmlinux += -u efi_pe_entry
++endif
+ LDFLAGS_vmlinux += -T
+
+ hostprogs := mkpiggy
+@@ -100,7 +105,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
+ ifdef CONFIG_X86_64
+ vmlinux-objs-y += $(obj)/ident_map_64.o
+ vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o
+- vmlinux-objs-y += $(obj)/mem_encrypt.o
++ vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/mem_encrypt.o
+ vmlinux-objs-y += $(obj)/pgtable_64.o
+ vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o
+ endif
+@@ -108,11 +113,11 @@ endif
+ vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
+ vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o
+
+-vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
+ vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o
+-efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a
++vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o
++vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
+
+-$(obj)/vmlinux: $(vmlinux-objs-y) $(efi-obj-y) FORCE
++$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
+ $(call if_changed,ld)
+
+ OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
+diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
+index 9caf89063e775..55c98fdd67d2b 100644
+--- a/arch/x86/boot/compressed/acpi.c
++++ b/arch/x86/boot/compressed/acpi.c
+@@ -30,13 +30,13 @@ __efi_get_rsdp_addr(unsigned long cfg_tbl_pa, unsigned int cfg_tbl_len)
+ * Search EFI system tables for RSDP. Preferred is ACPI_20_TABLE_GUID to
+ * ACPI_TABLE_GUID because it has more features.
+ */
+- rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len,
++ rsdp_addr = efi_find_vendor_table(boot_params_ptr, cfg_tbl_pa, cfg_tbl_len,
+ ACPI_20_TABLE_GUID);
+ if (rsdp_addr)
+ return (acpi_physical_address)rsdp_addr;
+
+ /* No ACPI_20_TABLE_GUID found, fallback to ACPI_TABLE_GUID. */
+- rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len,
++ rsdp_addr = efi_find_vendor_table(boot_params_ptr, cfg_tbl_pa, cfg_tbl_len,
+ ACPI_TABLE_GUID);
+ if (rsdp_addr)
+ return (acpi_physical_address)rsdp_addr;
+@@ -56,15 +56,15 @@ static acpi_physical_address efi_get_rsdp_addr(void)
+ enum efi_type et;
+ int ret;
+
+- et = efi_get_type(boot_params);
++ et = efi_get_type(boot_params_ptr);
+ if (et == EFI_TYPE_NONE)
+ return 0;
+
+- systab_pa = efi_get_system_table(boot_params);
++ systab_pa = efi_get_system_table(boot_params_ptr);
+ if (!systab_pa)
+ error("EFI support advertised, but unable to locate system table.");
+
+- ret = efi_get_conf_table(boot_params, &cfg_tbl_pa, &cfg_tbl_len);
++ ret = efi_get_conf_table(boot_params_ptr, &cfg_tbl_pa, &cfg_tbl_len);
+ if (ret || !cfg_tbl_pa)
+ error("EFI config table not found.");
+
+@@ -156,7 +156,7 @@ acpi_physical_address get_rsdp_addr(void)
+ {
+ acpi_physical_address pa;
+
+- pa = boot_params->acpi_rsdp_addr;
++ pa = boot_params_ptr->acpi_rsdp_addr;
+
+ if (!pa)
+ pa = efi_get_rsdp_addr();
+@@ -210,7 +210,7 @@ static unsigned long get_acpi_srat_table(void)
+ rsdp = (struct acpi_table_rsdp *)get_cmdline_acpi_rsdp();
+ if (!rsdp)
+ rsdp = (struct acpi_table_rsdp *)(long)
+- boot_params->acpi_rsdp_addr;
++ boot_params_ptr->acpi_rsdp_addr;
+
+ if (!rsdp)
+ return 0;
+diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
+index f1add5d85da9d..c1bb180973ea2 100644
+--- a/arch/x86/boot/compressed/cmdline.c
++++ b/arch/x86/boot/compressed/cmdline.c
+@@ -14,9 +14,9 @@ static inline char rdfs8(addr_t addr)
+ #include "../cmdline.c"
+ unsigned long get_cmd_line_ptr(void)
+ {
+- unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr;
++ unsigned long cmd_line_ptr = boot_params_ptr->hdr.cmd_line_ptr;
+
+- cmd_line_ptr |= (u64)boot_params->ext_cmd_line_ptr << 32;
++ cmd_line_ptr |= (u64)boot_params_ptr->ext_cmd_line_ptr << 32;
+
+ return cmd_line_ptr;
+ }
+diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S
+new file mode 100644
+index 0000000000000..8232c5b2a9bf5
+--- /dev/null
++++ b/arch/x86/boot/compressed/efi_mixed.S
+@@ -0,0 +1,328 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming
++ *
++ * Early support for invoking 32-bit EFI services from a 64-bit kernel.
++ *
++ * Because this thunking occurs before ExitBootServices() we have to
++ * restore the firmware's 32-bit GDT and IDT before we make EFI service
++ * calls.
++ *
++ * On the plus side, we don't have to worry about mangling 64-bit
++ * addresses into 32-bits because we're executing with an identity
++ * mapped pagetable and haven't transitioned to 64-bit virtual addresses
++ * yet.
++ */
++
++#include <linux/linkage.h>
++#include <asm/msr.h>
++#include <asm/page_types.h>
++#include <asm/processor-flags.h>
++#include <asm/segment.h>
++
++ .code64
++ .text
++/*
++ * When booting in 64-bit mode on 32-bit EFI firmware, startup_64_mixed_mode()
++ * is the first thing that runs after switching to long mode. Depending on
++ * whether the EFI handover protocol or the compat entry point was used to
++ * enter the kernel, it will either branch to the common 64-bit EFI stub
++ * entrypoint efi_stub_entry() directly, or via the 64-bit EFI PE/COFF
++ * entrypoint efi_pe_entry(). In the former case, the bootloader must provide a
++ * struct bootparams pointer as the third argument, so the presence of such a
++ * pointer is used to disambiguate.
++ *
++ * +--------------+
++ * +------------------+ +------------+ +------>| efi_pe_entry |
++ * | efi32_pe_entry |---->| | | +-----------+--+
++ * +------------------+ | | +------+----------------+ |
++ * | startup_32 |---->| startup_64_mixed_mode | |
++ * +------------------+ | | +------+----------------+ |
++ * | efi32_stub_entry |---->| | | |
++ * +------------------+ +------------+ | |
++ * V |
++ * +------------+ +----------------+ |
++ * | startup_64 |<----| efi_stub_entry |<--------+
++ * +------------+ +----------------+
++ */
++SYM_FUNC_START(startup_64_mixed_mode)
++ lea efi32_boot_args(%rip), %rdx
++ mov 0(%rdx), %edi
++ mov 4(%rdx), %esi
++#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
++ mov 8(%rdx), %edx // saved bootparams pointer
++ test %edx, %edx
++ jnz efi_stub_entry
++#endif
++ /*
++ * efi_pe_entry uses MS calling convention, which requires 32 bytes of
++ * shadow space on the stack even if all arguments are passed in
++ * registers. We also need an additional 8 bytes for the space that
++ * would be occupied by the return address, and this also results in
++ * the correct stack alignment for entry.
++ */
++ sub $40, %rsp
++ mov %rdi, %rcx // MS calling convention
++ mov %rsi, %rdx
++ jmp efi_pe_entry
++SYM_FUNC_END(startup_64_mixed_mode)
++
++SYM_FUNC_START(__efi64_thunk)
++ push %rbp
++ push %rbx
++
++ movl %ds, %eax
++ push %rax
++ movl %es, %eax
++ push %rax
++ movl %ss, %eax
++ push %rax
++
++ /* Copy args passed on stack */
++ movq 0x30(%rsp), %rbp
++ movq 0x38(%rsp), %rbx
++ movq 0x40(%rsp), %rax
++
++ /*
++ * Convert x86-64 ABI params to i386 ABI
++ */
++ subq $64, %rsp
++ movl %esi, 0x0(%rsp)
++ movl %edx, 0x4(%rsp)
++ movl %ecx, 0x8(%rsp)
++ movl %r8d, 0xc(%rsp)
++ movl %r9d, 0x10(%rsp)
++ movl %ebp, 0x14(%rsp)
++ movl %ebx, 0x18(%rsp)
++ movl %eax, 0x1c(%rsp)
++
++ leaq 0x20(%rsp), %rbx
++ sgdt (%rbx)
++ sidt 16(%rbx)
++
++ leaq 1f(%rip), %rbp
++
++ /*
++ * Switch to IDT and GDT with 32-bit segments. These are the firmware
++ * GDT and IDT that were installed when the kernel started executing.
++ * The pointers were saved by the efi32_entry() routine below.
++ *
++ * Pass the saved DS selector to the 32-bit code, and use far return to
++ * restore the saved CS selector.
++ */
++ lidt efi32_boot_idt(%rip)
++ lgdt efi32_boot_gdt(%rip)
++
++ movzwl efi32_boot_ds(%rip), %edx
++ movzwq efi32_boot_cs(%rip), %rax
++ pushq %rax
++ leaq efi_enter32(%rip), %rax
++ pushq %rax
++ lretq
++
++1: addq $64, %rsp
++ movq %rdi, %rax
++
++ pop %rbx
++ movl %ebx, %ss
++ pop %rbx
++ movl %ebx, %es
++ pop %rbx
++ movl %ebx, %ds
++ /* Clear out 32-bit selector from FS and GS */
++ xorl %ebx, %ebx
++ movl %ebx, %fs
++ movl %ebx, %gs
++
++ /*
++ * Convert 32-bit status code into 64-bit.
++ */
++ roll $1, %eax
++ rorq $1, %rax
++
++ pop %rbx
++ pop %rbp
++ RET
++SYM_FUNC_END(__efi64_thunk)
++
++ .code32
++#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
++SYM_FUNC_START(efi32_stub_entry)
++ call 1f
++1: popl %ecx
++
++ /* Clear BSS */
++ xorl %eax, %eax
++ leal (_bss - 1b)(%ecx), %edi
++ leal (_ebss - 1b)(%ecx), %ecx
++ subl %edi, %ecx
++ shrl $2, %ecx
++ cld
++ rep stosl
++
++ add $0x4, %esp /* Discard return address */
++ popl %ecx
++ popl %edx
++ popl %esi
++ jmp efi32_entry
++SYM_FUNC_END(efi32_stub_entry)
++#endif
++
++/*
++ * EFI service pointer must be in %edi.
++ *
++ * The stack should represent the 32-bit calling convention.
++ */
++SYM_FUNC_START_LOCAL(efi_enter32)
++ /* Load firmware selector into data and stack segment registers */
++ movl %edx, %ds
++ movl %edx, %es
++ movl %edx, %fs
++ movl %edx, %gs
++ movl %edx, %ss
++
++ /* Reload pgtables */
++ movl %cr3, %eax
++ movl %eax, %cr3
++
++ /* Disable paging */
++ movl %cr0, %eax
++ btrl $X86_CR0_PG_BIT, %eax
++ movl %eax, %cr0
++
++ /* Disable long mode via EFER */
++ movl $MSR_EFER, %ecx
++ rdmsr
++ btrl $_EFER_LME, %eax
++ wrmsr
++
++ call *%edi
++
++ /* We must preserve return value */
++ movl %eax, %edi
++
++ /*
++ * Some firmware will return with interrupts enabled. Be sure to
++ * disable them before we switch GDTs and IDTs.
++ */
++ cli
++
++ lidtl 16(%ebx)
++ lgdtl (%ebx)
++
++ movl %cr4, %eax
++ btsl $(X86_CR4_PAE_BIT), %eax
++ movl %eax, %cr4
++
++ movl %cr3, %eax
++ movl %eax, %cr3
++
++ movl $MSR_EFER, %ecx
++ rdmsr
++ btsl $_EFER_LME, %eax
++ wrmsr
++
++ xorl %eax, %eax
++ lldt %ax
++
++ pushl $__KERNEL_CS
++ pushl %ebp
++
++ /* Enable paging */
++ movl %cr0, %eax
++ btsl $X86_CR0_PG_BIT, %eax
++ movl %eax, %cr0
++ lret
++SYM_FUNC_END(efi_enter32)
++
++/*
++ * This is the common EFI stub entry point for mixed mode.
++ *
++ * Arguments: %ecx image handle
++ * %edx EFI system table pointer
++ * %esi struct bootparams pointer (or NULL when not using
++ * the EFI handover protocol)
++ *
++ * Since this is the point of no return for ordinary execution, no registers
++ * are considered live except for the function parameters. [Note that the EFI
++ * stub may still exit and return to the firmware using the Exit() EFI boot
++ * service.]
++ */
++SYM_FUNC_START_LOCAL(efi32_entry)
++ call 1f
++1: pop %ebx
++
++ /* Save firmware GDTR and code/data selectors */
++ sgdtl (efi32_boot_gdt - 1b)(%ebx)
++ movw %cs, (efi32_boot_cs - 1b)(%ebx)
++ movw %ds, (efi32_boot_ds - 1b)(%ebx)
++
++ /* Store firmware IDT descriptor */
++ sidtl (efi32_boot_idt - 1b)(%ebx)
++
++ /* Store boot arguments */
++ leal (efi32_boot_args - 1b)(%ebx), %ebx
++ movl %ecx, 0(%ebx)
++ movl %edx, 4(%ebx)
++ movl %esi, 8(%ebx)
++ movb $0x0, 12(%ebx) // efi_is64
++
++ /* Disable paging */
++ movl %cr0, %eax
++ btrl $X86_CR0_PG_BIT, %eax
++ movl %eax, %cr0
++
++ jmp startup_32
++SYM_FUNC_END(efi32_entry)
++
++/*
++ * efi_status_t efi32_pe_entry(efi_handle_t image_handle,
++ * efi_system_table_32_t *sys_table)
++ */
++SYM_FUNC_START(efi32_pe_entry)
++ pushl %ebp
++ movl %esp, %ebp
++ pushl %ebx // save callee-save registers
++ pushl %edi
++
++ call verify_cpu // check for long mode support
++ testl %eax, %eax
++ movl $0x80000003, %eax // EFI_UNSUPPORTED
++ jnz 2f
++
++ movl 8(%ebp), %ecx // image_handle
++ movl 12(%ebp), %edx // sys_table
++ xorl %esi, %esi
++ jmp efi32_entry // pass %ecx, %edx, %esi
++ // no other registers remain live
++
++2: popl %edi // restore callee-save registers
++ popl %ebx
++ leave
++ RET
++SYM_FUNC_END(efi32_pe_entry)
++
++#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
++ .org efi32_stub_entry + 0x200
++ .code64
++SYM_FUNC_START_NOALIGN(efi64_stub_entry)
++ jmp efi_handover_entry
++SYM_FUNC_END(efi64_stub_entry)
++#endif
++
++ .data
++ .balign 8
++SYM_DATA_START_LOCAL(efi32_boot_gdt)
++ .word 0
++ .quad 0
++SYM_DATA_END(efi32_boot_gdt)
++
++SYM_DATA_START_LOCAL(efi32_boot_idt)
++ .word 0
++ .quad 0
++SYM_DATA_END(efi32_boot_idt)
++
++SYM_DATA_LOCAL(efi32_boot_cs, .word 0)
++SYM_DATA_LOCAL(efi32_boot_ds, .word 0)
++SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0)
++SYM_DATA(efi_is64, .byte 1)
+diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
+deleted file mode 100644
+index 67e7edcdfea8f..0000000000000
+--- a/arch/x86/boot/compressed/efi_thunk_64.S
++++ /dev/null
+@@ -1,195 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-/*
+- * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming
+- *
+- * Early support for invoking 32-bit EFI services from a 64-bit kernel.
+- *
+- * Because this thunking occurs before ExitBootServices() we have to
+- * restore the firmware's 32-bit GDT and IDT before we make EFI service
+- * calls.
+- *
+- * On the plus side, we don't have to worry about mangling 64-bit
+- * addresses into 32-bits because we're executing with an identity
+- * mapped pagetable and haven't transitioned to 64-bit virtual addresses
+- * yet.
+- */
+-
+-#include <linux/linkage.h>
+-#include <asm/msr.h>
+-#include <asm/page_types.h>
+-#include <asm/processor-flags.h>
+-#include <asm/segment.h>
+-
+- .code64
+- .text
+-SYM_FUNC_START(__efi64_thunk)
+- push %rbp
+- push %rbx
+-
+- movl %ds, %eax
+- push %rax
+- movl %es, %eax
+- push %rax
+- movl %ss, %eax
+- push %rax
+-
+- /* Copy args passed on stack */
+- movq 0x30(%rsp), %rbp
+- movq 0x38(%rsp), %rbx
+- movq 0x40(%rsp), %rax
+-
+- /*
+- * Convert x86-64 ABI params to i386 ABI
+- */
+- subq $64, %rsp
+- movl %esi, 0x0(%rsp)
+- movl %edx, 0x4(%rsp)
+- movl %ecx, 0x8(%rsp)
+- movl %r8d, 0xc(%rsp)
+- movl %r9d, 0x10(%rsp)
+- movl %ebp, 0x14(%rsp)
+- movl %ebx, 0x18(%rsp)
+- movl %eax, 0x1c(%rsp)
+-
+- leaq 0x20(%rsp), %rbx
+- sgdt (%rbx)
+-
+- addq $16, %rbx
+- sidt (%rbx)
+-
+- leaq 1f(%rip), %rbp
+-
+- /*
+- * Switch to IDT and GDT with 32-bit segments. This is the firmware GDT
+- * and IDT that was installed when the kernel started executing. The
+- * pointers were saved at the EFI stub entry point in head_64.S.
+- *
+- * Pass the saved DS selector to the 32-bit code, and use far return to
+- * restore the saved CS selector.
+- */
+- leaq efi32_boot_idt(%rip), %rax
+- lidt (%rax)
+- leaq efi32_boot_gdt(%rip), %rax
+- lgdt (%rax)
+-
+- movzwl efi32_boot_ds(%rip), %edx
+- movzwq efi32_boot_cs(%rip), %rax
+- pushq %rax
+- leaq efi_enter32(%rip), %rax
+- pushq %rax
+- lretq
+-
+-1: addq $64, %rsp
+- movq %rdi, %rax
+-
+- pop %rbx
+- movl %ebx, %ss
+- pop %rbx
+- movl %ebx, %es
+- pop %rbx
+- movl %ebx, %ds
+- /* Clear out 32-bit selector from FS and GS */
+- xorl %ebx, %ebx
+- movl %ebx, %fs
+- movl %ebx, %gs
+-
+- /*
+- * Convert 32-bit status code into 64-bit.
+- */
+- roll $1, %eax
+- rorq $1, %rax
+-
+- pop %rbx
+- pop %rbp
+- RET
+-SYM_FUNC_END(__efi64_thunk)
+-
+- .code32
+-/*
+- * EFI service pointer must be in %edi.
+- *
+- * The stack should represent the 32-bit calling convention.
+- */
+-SYM_FUNC_START_LOCAL(efi_enter32)
+- /* Load firmware selector into data and stack segment registers */
+- movl %edx, %ds
+- movl %edx, %es
+- movl %edx, %fs
+- movl %edx, %gs
+- movl %edx, %ss
+-
+- /* Reload pgtables */
+- movl %cr3, %eax
+- movl %eax, %cr3
+-
+- /* Disable paging */
+- movl %cr0, %eax
+- btrl $X86_CR0_PG_BIT, %eax
+- movl %eax, %cr0
+-
+- /* Disable long mode via EFER */
+- movl $MSR_EFER, %ecx
+- rdmsr
+- btrl $_EFER_LME, %eax
+- wrmsr
+-
+- call *%edi
+-
+- /* We must preserve return value */
+- movl %eax, %edi
+-
+- /*
+- * Some firmware will return with interrupts enabled. Be sure to
+- * disable them before we switch GDTs and IDTs.
+- */
+- cli
+-
+- lidtl (%ebx)
+- subl $16, %ebx
+-
+- lgdtl (%ebx)
+-
+- movl %cr4, %eax
+- btsl $(X86_CR4_PAE_BIT), %eax
+- movl %eax, %cr4
+-
+- movl %cr3, %eax
+- movl %eax, %cr3
+-
+- movl $MSR_EFER, %ecx
+- rdmsr
+- btsl $_EFER_LME, %eax
+- wrmsr
+-
+- xorl %eax, %eax
+- lldt %ax
+-
+- pushl $__KERNEL_CS
+- pushl %ebp
+-
+- /* Enable paging */
+- movl %cr0, %eax
+- btsl $X86_CR0_PG_BIT, %eax
+- movl %eax, %cr0
+- lret
+-SYM_FUNC_END(efi_enter32)
+-
+- .data
+- .balign 8
+-SYM_DATA_START(efi32_boot_gdt)
+- .word 0
+- .quad 0
+-SYM_DATA_END(efi32_boot_gdt)
+-
+-SYM_DATA_START(efi32_boot_idt)
+- .word 0
+- .quad 0
+-SYM_DATA_END(efi32_boot_idt)
+-
+-SYM_DATA_START(efi32_boot_cs)
+- .word 0
+-SYM_DATA_END(efi32_boot_cs)
+-
+-SYM_DATA_START(efi32_boot_ds)
+- .word 0
+-SYM_DATA_END(efi32_boot_ds)
+diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
+index 3b354eb9516df..1cfe9802a42fe 100644
+--- a/arch/x86/boot/compressed/head_32.S
++++ b/arch/x86/boot/compressed/head_32.S
+@@ -84,19 +84,6 @@ SYM_FUNC_START(startup_32)
+
+ #ifdef CONFIG_RELOCATABLE
+ leal startup_32@GOTOFF(%edx), %ebx
+-
+-#ifdef CONFIG_EFI_STUB
+-/*
+- * If we were loaded via the EFI LoadImage service, startup_32() will be at an
+- * offset to the start of the space allocated for the image. efi_pe_entry() will
+- * set up image_offset to tell us where the image actually starts, so that we
+- * can use the full available buffer.
+- * image_offset = startup_32 - image_base
+- * Otherwise image_offset will be zero and has no effect on the calculations.
+- */
+- subl image_offset@GOTOFF(%edx), %ebx
+-#endif
+-
+ movl BP_kernel_alignment(%esi), %eax
+ decl %eax
+ addl %eax, %ebx
+@@ -150,17 +137,6 @@ SYM_FUNC_START(startup_32)
+ jmp *%eax
+ SYM_FUNC_END(startup_32)
+
+-#ifdef CONFIG_EFI_STUB
+-SYM_FUNC_START(efi32_stub_entry)
+- add $0x4, %esp
+- movl 8(%esp), %esi /* save boot_params pointer */
+- call efi_main
+- /* efi_main returns the possibly relocated address of startup_32 */
+- jmp *%eax
+-SYM_FUNC_END(efi32_stub_entry)
+-SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry)
+-#endif
+-
+ .text
+ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
+
+@@ -179,15 +155,9 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
+ */
+ /* push arguments for extract_kernel: */
+
+- pushl output_len@GOTOFF(%ebx) /* decompressed length, end of relocs */
+ pushl %ebp /* output address */
+- pushl input_len@GOTOFF(%ebx) /* input_len */
+- leal input_data@GOTOFF(%ebx), %eax
+- pushl %eax /* input_data */
+- leal boot_heap@GOTOFF(%ebx), %eax
+- pushl %eax /* heap area */
+ pushl %esi /* real mode pointer */
+- call extract_kernel /* returns kernel location in %eax */
++ call extract_kernel /* returns kernel entry point in %eax */
+ addl $24, %esp
+
+ /*
+@@ -208,17 +178,11 @@ SYM_DATA_START_LOCAL(gdt)
+ .quad 0x00cf92000000ffff /* __KERNEL_DS */
+ SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
+
+-#ifdef CONFIG_EFI_STUB
+-SYM_DATA(image_offset, .long 0)
+-#endif
+-
+ /*
+ * Stack and heap for uncompression
+ */
+ .bss
+ .balign 4
+-boot_heap:
+- .fill BOOT_HEAP_SIZE, 1, 0
+ boot_stack:
+ .fill BOOT_STACK_SIZE, 1, 0
+ boot_stack_end:
+diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
+index b4bd6df29116f..0d7aef10b19ad 100644
+--- a/arch/x86/boot/compressed/head_64.S
++++ b/arch/x86/boot/compressed/head_64.S
+@@ -118,7 +118,9 @@ SYM_FUNC_START(startup_32)
+ 1:
+
+ /* Setup Exception handling for SEV-ES */
++#ifdef CONFIG_AMD_MEM_ENCRYPT
+ call startup32_load_idt
++#endif
+
+ /* Make sure cpu supports long mode. */
+ call verify_cpu
+@@ -136,19 +138,6 @@ SYM_FUNC_START(startup_32)
+
+ #ifdef CONFIG_RELOCATABLE
+ movl %ebp, %ebx
+-
+-#ifdef CONFIG_EFI_STUB
+-/*
+- * If we were loaded via the EFI LoadImage service, startup_32 will be at an
+- * offset to the start of the space allocated for the image. efi_pe_entry will
+- * set up image_offset to tell us where the image actually starts, so that we
+- * can use the full available buffer.
+- * image_offset = startup_32 - image_base
+- * Otherwise image_offset will be zero and has no effect on the calculations.
+- */
+- subl rva(image_offset)(%ebp), %ebx
+-#endif
+-
+ movl BP_kernel_alignment(%esi), %eax
+ decl %eax
+ addl %eax, %ebx
+@@ -178,12 +167,13 @@ SYM_FUNC_START(startup_32)
+ */
+ /*
+ * If SEV is active then set the encryption mask in the page tables.
+- * This will insure that when the kernel is copied and decompressed
++ * This will ensure that when the kernel is copied and decompressed
+ * it will be done so encrypted.
+ */
+- call get_sev_encryption_bit
+ xorl %edx, %edx
+ #ifdef CONFIG_AMD_MEM_ENCRYPT
++ call get_sev_encryption_bit
++ xorl %edx, %edx
+ testl %eax, %eax
+ jz 1f
+ subl $32, %eax /* Encryption bit is always above bit 31 */
+@@ -249,6 +239,11 @@ SYM_FUNC_START(startup_32)
+ movl $__BOOT_TSS, %eax
+ ltr %ax
+
++#ifdef CONFIG_AMD_MEM_ENCRYPT
++ /* Check if the C-bit position is correct when SEV is active */
++ call startup32_check_sev_cbit
++#endif
++
+ /*
+ * Setup for the jump to 64bit mode
+ *
+@@ -261,29 +256,11 @@ SYM_FUNC_START(startup_32)
+ */
+ leal rva(startup_64)(%ebp), %eax
+ #ifdef CONFIG_EFI_MIXED
+- movl rva(efi32_boot_args)(%ebp), %edi
+- testl %edi, %edi
+- jz 1f
+- leal rva(efi64_stub_entry)(%ebp), %eax
+- movl rva(efi32_boot_args+4)(%ebp), %esi
+- movl rva(efi32_boot_args+8)(%ebp), %edx // saved bootparams pointer
+- testl %edx, %edx
+- jnz 1f
+- /*
+- * efi_pe_entry uses MS calling convention, which requires 32 bytes of
+- * shadow space on the stack even if all arguments are passed in
+- * registers. We also need an additional 8 bytes for the space that
+- * would be occupied by the return address, and this also results in
+- * the correct stack alignment for entry.
+- */
+- subl $40, %esp
+- leal rva(efi_pe_entry)(%ebp), %eax
+- movl %edi, %ecx // MS calling convention
+- movl %esi, %edx
++ cmpb $1, rva(efi_is64)(%ebp)
++ je 1f
++ leal rva(startup_64_mixed_mode)(%ebp), %eax
+ 1:
+ #endif
+- /* Check if the C-bit position is correct when SEV is active */
+- call startup32_check_sev_cbit
+
+ pushl $__KERNEL_CS
+ pushl %eax
+@@ -296,41 +273,6 @@ SYM_FUNC_START(startup_32)
+ lret
+ SYM_FUNC_END(startup_32)
+
+-#ifdef CONFIG_EFI_MIXED
+- .org 0x190
+-SYM_FUNC_START(efi32_stub_entry)
+- add $0x4, %esp /* Discard return address */
+- popl %ecx
+- popl %edx
+- popl %esi
+-
+- call 1f
+-1: pop %ebp
+- subl $ rva(1b), %ebp
+-
+- movl %esi, rva(efi32_boot_args+8)(%ebp)
+-SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL)
+- movl %ecx, rva(efi32_boot_args)(%ebp)
+- movl %edx, rva(efi32_boot_args+4)(%ebp)
+- movb $0, rva(efi_is64)(%ebp)
+-
+- /* Save firmware GDTR and code/data selectors */
+- sgdtl rva(efi32_boot_gdt)(%ebp)
+- movw %cs, rva(efi32_boot_cs)(%ebp)
+- movw %ds, rva(efi32_boot_ds)(%ebp)
+-
+- /* Store firmware IDT descriptor */
+- sidtl rva(efi32_boot_idt)(%ebp)
+-
+- /* Disable paging */
+- movl %cr0, %eax
+- btrl $X86_CR0_PG_BIT, %eax
+- movl %eax, %cr0
+-
+- jmp startup_32
+-SYM_FUNC_END(efi32_stub_entry)
+-#endif
+-
+ .code64
+ .org 0x200
+ SYM_CODE_START(startup_64)
+@@ -372,20 +314,6 @@ SYM_CODE_START(startup_64)
+ /* Start with the delta to where the kernel will run at. */
+ #ifdef CONFIG_RELOCATABLE
+ leaq startup_32(%rip) /* - $startup_32 */, %rbp
+-
+-#ifdef CONFIG_EFI_STUB
+-/*
+- * If we were loaded via the EFI LoadImage service, startup_32 will be at an
+- * offset to the start of the space allocated for the image. efi_pe_entry will
+- * set up image_offset to tell us where the image actually starts, so that we
+- * can use the full available buffer.
+- * image_offset = startup_32 - image_base
+- * Otherwise image_offset will be zero and has no effect on the calculations.
+- */
+- movl image_offset(%rip), %eax
+- subq %rax, %rbp
+-#endif
+-
+ movl BP_kernel_alignment(%rsi), %eax
+ decl %eax
+ addq %rax, %rbp
+@@ -424,10 +352,6 @@ SYM_CODE_START(startup_64)
+ * For the trampoline, we need the top page table to reside in lower
+ * memory as we don't have a way to load 64-bit values into CR3 in
+ * 32-bit mode.
+- *
+- * We go though the trampoline even if we don't have to: if we're
+- * already in a desired paging mode. This way the trampoline code gets
+- * tested on every boot.
+ */
+
+ /* Make sure we have GDT with 32-bit code segment */
+@@ -442,10 +366,14 @@ SYM_CODE_START(startup_64)
+ lretq
+
+ .Lon_kernel_cs:
++ /*
++ * RSI holds a pointer to a boot_params structure provided by the
++ * loader, and this needs to be preserved across C function calls. So
++ * move it into a callee saved register.
++ */
++ movq %rsi, %r15
+
+- pushq %rsi
+ call load_stage1_idt
+- popq %rsi
+
+ #ifdef CONFIG_AMD_MEM_ENCRYPT
+ /*
+@@ -456,82 +384,24 @@ SYM_CODE_START(startup_64)
+ * CPUID instructions being issued, so go ahead and do that now via
+ * sev_enable(), which will also handle the rest of the SEV-related
+ * detection/setup to ensure that has been done in advance of any dependent
+- * code.
++ * code. Pass the boot_params pointer as the first argument.
+ */
+- pushq %rsi
+- movq %rsi, %rdi /* real mode address */
++ movq %r15, %rdi
+ call sev_enable
+- popq %rsi
+ #endif
+
+ /*
+- * paging_prepare() sets up the trampoline and checks if we need to
+- * enable 5-level paging.
+- *
+- * paging_prepare() returns a two-quadword structure which lands
+- * into RDX:RAX:
+- * - Address of the trampoline is returned in RAX.
+- * - Non zero RDX means trampoline needs to enable 5-level
+- * paging.
+- *
+- * RSI holds real mode data and needs to be preserved across
+- * this function call.
+- */
+- pushq %rsi
+- movq %rsi, %rdi /* real mode address */
+- call paging_prepare
+- popq %rsi
+-
+- /* Save the trampoline address in RCX */
+- movq %rax, %rcx
+-
+- /* Set up 32-bit addressable stack */
+- leaq TRAMPOLINE_32BIT_STACK_END(%rcx), %rsp
+-
+- /*
+- * Preserve live 64-bit registers on the stack: this is necessary
+- * because the architecture does not guarantee that GPRs will retain
+- * their full 64-bit values across a 32-bit mode switch.
+- */
+- pushq %rbp
+- pushq %rbx
+- pushq %rsi
+-
+- /*
+- * Push the 64-bit address of trampoline_return() onto the new stack.
+- * It will be used by the trampoline to return to the main code. Due to
+- * the 32-bit mode switch, it cannot be kept it in a register either.
+- */
+- leaq trampoline_return(%rip), %rdi
+- pushq %rdi
+-
+- /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
+- pushq $__KERNEL32_CS
+- leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
+- pushq %rax
+- lretq
+-trampoline_return:
+- /* Restore live 64-bit registers */
+- popq %rsi
+- popq %rbx
+- popq %rbp
+-
+- /* Restore the stack, the 32-bit trampoline uses its own stack */
+- leaq rva(boot_stack_end)(%rbx), %rsp
+-
+- /*
+- * cleanup_trampoline() would restore trampoline memory.
+- *
+- * RDI is address of the page table to use instead of page table
+- * in trampoline memory (if required).
++ * configure_5level_paging() updates the number of paging levels using
++ * a trampoline in 32-bit addressable memory if the current number does
++ * not match the desired number.
+ *
+- * RSI holds real mode data and needs to be preserved across
+- * this function call.
++ * Pass the boot_params pointer as the first argument. The second
++ * argument is the relocated address of the page table to use instead
++ * of the page table in trampoline memory (if required).
+ */
+- pushq %rsi
+- leaq rva(top_pgtable)(%rbx), %rdi
+- call cleanup_trampoline
+- popq %rsi
++ movq %r15, %rdi
++ leaq rva(top_pgtable)(%rbx), %rsi
++ call configure_5level_paging
+
+ /* Zero EFLAGS */
+ pushq $0
+@@ -541,7 +411,6 @@ trampoline_return:
+ * Copy the compressed kernel to the end of our buffer
+ * where decompression in place becomes safe.
+ */
+- pushq %rsi
+ leaq (_bss-8)(%rip), %rsi
+ leaq rva(_bss-8)(%rbx), %rdi
+ movl $(_bss - startup_32), %ecx
+@@ -549,7 +418,6 @@ trampoline_return:
+ std
+ rep movsq
+ cld
+- popq %rsi
+
+ /*
+ * The GDT may get overwritten either during the copy we just did or
+@@ -568,19 +436,6 @@ trampoline_return:
+ jmp *%rax
+ SYM_CODE_END(startup_64)
+
+-#ifdef CONFIG_EFI_STUB
+- .org 0x390
+-SYM_FUNC_START(efi64_stub_entry)
+- and $~0xf, %rsp /* realign the stack */
+- movq %rdx, %rbx /* save boot_params pointer */
+- call efi_main
+- movq %rbx,%rsi
+- leaq rva(startup_64)(%rax), %rax
+- jmp *%rax
+-SYM_FUNC_END(efi64_stub_entry)
+-SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry)
+-#endif
+-
+ .text
+ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
+
+@@ -594,125 +449,122 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
+ shrq $3, %rcx
+ rep stosq
+
+- pushq %rsi
+ call load_stage2_idt
+
+ /* Pass boot_params to initialize_identity_maps() */
+- movq (%rsp), %rdi
++ movq %r15, %rdi
+ call initialize_identity_maps
+- popq %rsi
+
+ /*
+ * Do the extraction, and jump to the new kernel..
+ */
+- pushq %rsi /* Save the real mode argument */
+- movq %rsi, %rdi /* real mode address */
+- leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
+- leaq input_data(%rip), %rdx /* input_data */
+- movl input_len(%rip), %ecx /* input_len */
+- movq %rbp, %r8 /* output target address */
+- movl output_len(%rip), %r9d /* decompressed length, end of relocs */
+- call extract_kernel /* returns kernel location in %rax */
+- popq %rsi
++ /* pass struct boot_params pointer and output target address */
++ movq %r15, %rdi
++ movq %rbp, %rsi
++ call extract_kernel /* returns kernel entry point in %rax */
+
+ /*
+ * Jump to the decompressed kernel.
+ */
++ movq %r15, %rsi
+ jmp *%rax
+ SYM_FUNC_END(.Lrelocated)
+
+- .code32
+ /*
+- * This is the 32-bit trampoline that will be copied over to low memory.
++ * This is the 32-bit trampoline that will be copied over to low memory. It
++ * will be called using the ordinary 64-bit calling convention from code
++ * running in 64-bit mode.
+ *
+ * Return address is at the top of the stack (might be above 4G).
+- * ECX contains the base address of the trampoline memory.
+- * Non zero RDX means trampoline needs to enable 5-level paging.
++ * The first argument (EDI) contains the address of the temporary PGD level
++ * page table in 32-bit addressable memory which will be programmed into
++ * register CR3.
+ */
++ .section ".rodata", "a", @progbits
+ SYM_CODE_START(trampoline_32bit_src)
+- /* Set up data and stack segments */
+- movl $__KERNEL_DS, %eax
+- movl %eax, %ds
+- movl %eax, %ss
++ /*
++ * Preserve callee save 64-bit registers on the stack: this is
++ * necessary because the architecture does not guarantee that GPRs will
++ * retain their full 64-bit values across a 32-bit mode switch.
++ */
++ pushq %r15
++ pushq %r14
++ pushq %r13
++ pushq %r12
++ pushq %rbp
++ pushq %rbx
++
++ /* Preserve top half of RSP in a legacy mode GPR to avoid truncation */
++ movq %rsp, %rbx
++ shrq $32, %rbx
+
++ /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
++ pushq $__KERNEL32_CS
++ leaq 0f(%rip), %rax
++ pushq %rax
++ lretq
++
++ /*
++ * The 32-bit code below will do a far jump back to long mode and end
++ * up here after reconfiguring the number of paging levels. First, the
++ * stack pointer needs to be restored to its full 64-bit value before
++ * the callee save register contents can be popped from the stack.
++ */
++.Lret:
++ shlq $32, %rbx
++ orq %rbx, %rsp
++
++ /* Restore the preserved 64-bit registers */
++ popq %rbx
++ popq %rbp
++ popq %r12
++ popq %r13
++ popq %r14
++ popq %r15
++ retq
++
++ .code32
++0:
+ /* Disable paging */
+ movl %cr0, %eax
+ btrl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+- /* Check what paging mode we want to be in after the trampoline */
+- testl %edx, %edx
+- jz 1f
+-
+- /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
+- movl %cr4, %eax
+- testl $X86_CR4_LA57, %eax
+- jnz 3f
+- jmp 2f
+-1:
+- /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */
+- movl %cr4, %eax
+- testl $X86_CR4_LA57, %eax
+- jz 3f
+-2:
+ /* Point CR3 to the trampoline's new top level page table */
+- leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
+- movl %eax, %cr3
+-3:
++ movl %edi, %cr3
++
+ /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
+- pushl %ecx
+- pushl %edx
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ /* Avoid writing EFER if no change was made (for TDX guest) */
+ jc 1f
+ wrmsr
+-1: popl %edx
+- popl %ecx
+-
+-#ifdef CONFIG_X86_MCE
+- /*
+- * Preserve CR4.MCE if the kernel will enable #MC support.
+- * Clearing MCE may fault in some environments (that also force #MC
+- * support). Any machine check that occurs before #MC support is fully
+- * configured will crash the system regardless of the CR4.MCE value set
+- * here.
+- */
+- movl %cr4, %eax
+- andl $X86_CR4_MCE, %eax
+-#else
+- movl $0, %eax
+-#endif
+-
+- /* Enable PAE and LA57 (if required) paging modes */
+- orl $X86_CR4_PAE, %eax
+- testl %edx, %edx
+- jz 1f
+- orl $X86_CR4_LA57, %eax
+ 1:
++ /* Toggle CR4.LA57 */
++ movl %cr4, %eax
++ btcl $X86_CR4_LA57_BIT, %eax
+ movl %eax, %cr4
+
+- /* Calculate address of paging_enabled() once we are executing in the trampoline */
+- leal .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
+-
+- /* Prepare the stack for far return to Long Mode */
+- pushl $__KERNEL_CS
+- pushl %eax
+-
+ /* Enable paging again. */
+ movl %cr0, %eax
+ btsl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+- lret
++ /*
++ * Return to the 64-bit calling code using LJMP rather than LRET, to
++ * avoid the need for a 32-bit addressable stack. The destination
++ * address will be adjusted after the template code is copied into a
++ * 32-bit addressable buffer.
++ */
++.Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src)
+ SYM_CODE_END(trampoline_32bit_src)
+
+- .code64
+-SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
+- /* Return from the trampoline */
+- retq
+-SYM_FUNC_END(.Lpaging_enabled)
++/*
++ * This symbol is placed right after trampoline_32bit_src() so its address can
++ * be used to infer the size of the trampoline code.
++ */
++SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src)
+
+ /*
+ * The trampoline code has a size limit.
+@@ -721,7 +573,7 @@ SYM_FUNC_END(.Lpaging_enabled)
+ */
+ .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
+
+- .code32
++ .text
+ SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
+ /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
+ 1:
+@@ -729,6 +581,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
+ jmp 1b
+ SYM_FUNC_END(.Lno_longmode)
+
++ .globl verify_cpu
+ #include "../../kernel/verify_cpu.S"
+
+ .data
+@@ -760,249 +613,11 @@ SYM_DATA_START(boot_idt)
+ .endr
+ SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)
+
+-#ifdef CONFIG_AMD_MEM_ENCRYPT
+-SYM_DATA_START(boot32_idt_desc)
+- .word boot32_idt_end - boot32_idt - 1
+- .long 0
+-SYM_DATA_END(boot32_idt_desc)
+- .balign 8
+-SYM_DATA_START(boot32_idt)
+- .rept 32
+- .quad 0
+- .endr
+-SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end)
+-#endif
+-
+-#ifdef CONFIG_EFI_STUB
+-SYM_DATA(image_offset, .long 0)
+-#endif
+-#ifdef CONFIG_EFI_MIXED
+-SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0)
+-SYM_DATA(efi_is64, .byte 1)
+-
+-#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime)
+-#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol)
+-#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base)
+-
+- __HEAD
+- .code32
+-SYM_FUNC_START(efi32_pe_entry)
+-/*
+- * efi_status_t efi32_pe_entry(efi_handle_t image_handle,
+- * efi_system_table_32_t *sys_table)
+- */
+-
+- pushl %ebp
+- movl %esp, %ebp
+- pushl %eax // dummy push to allocate loaded_image
+-
+- pushl %ebx // save callee-save registers
+- pushl %edi
+-
+- call verify_cpu // check for long mode support
+- testl %eax, %eax
+- movl $0x80000003, %eax // EFI_UNSUPPORTED
+- jnz 2f
+-
+- call 1f
+-1: pop %ebx
+- subl $ rva(1b), %ebx
+-
+- /* Get the loaded image protocol pointer from the image handle */
+- leal -4(%ebp), %eax
+- pushl %eax // &loaded_image
+- leal rva(loaded_image_proto)(%ebx), %eax
+- pushl %eax // pass the GUID address
+- pushl 8(%ebp) // pass the image handle
+-
+- /*
+- * Note the alignment of the stack frame.
+- * sys_table
+- * handle <-- 16-byte aligned on entry by ABI
+- * return address
+- * frame pointer
+- * loaded_image <-- local variable
+- * saved %ebx <-- 16-byte aligned here
+- * saved %edi
+- * &loaded_image
+- * &loaded_image_proto
+- * handle <-- 16-byte aligned for call to handle_protocol
+- */
+-
+- movl 12(%ebp), %eax // sys_table
+- movl ST32_boottime(%eax), %eax // sys_table->boottime
+- call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol
+- addl $12, %esp // restore argument space
+- testl %eax, %eax
+- jnz 2f
+-
+- movl 8(%ebp), %ecx // image_handle
+- movl 12(%ebp), %edx // sys_table
+- movl -4(%ebp), %esi // loaded_image
+- movl LI32_image_base(%esi), %esi // loaded_image->image_base
+- movl %ebx, %ebp // startup_32 for efi32_pe_stub_entry
+- /*
+- * We need to set the image_offset variable here since startup_32() will
+- * use it before we get to the 64-bit efi_pe_entry() in C code.
+- */
+- subl %esi, %ebx
+- movl %ebx, rva(image_offset)(%ebp) // save image_offset
+- jmp efi32_pe_stub_entry
+-
+-2: popl %edi // restore callee-save registers
+- popl %ebx
+- leave
+- RET
+-SYM_FUNC_END(efi32_pe_entry)
+-
+- .section ".rodata"
+- /* EFI loaded image protocol GUID */
+- .balign 4
+-SYM_DATA_START_LOCAL(loaded_image_proto)
+- .long 0x5b1b31a1
+- .word 0x9562, 0x11d2
+- .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b
+-SYM_DATA_END(loaded_image_proto)
+-#endif
+-
+-#ifdef CONFIG_AMD_MEM_ENCRYPT
+- __HEAD
+- .code32
+-/*
+- * Write an IDT entry into boot32_idt
+- *
+- * Parameters:
+- *
+- * %eax: Handler address
+- * %edx: Vector number
+- *
+- * Physical offset is expected in %ebp
+- */
+-SYM_FUNC_START(startup32_set_idt_entry)
+- push %ebx
+- push %ecx
+-
+- /* IDT entry address to %ebx */
+- leal rva(boot32_idt)(%ebp), %ebx
+- shl $3, %edx
+- addl %edx, %ebx
+-
+- /* Build IDT entry, lower 4 bytes */
+- movl %eax, %edx
+- andl $0x0000ffff, %edx # Target code segment offset [15:0]
+- movl $__KERNEL32_CS, %ecx # Target code segment selector
+- shl $16, %ecx
+- orl %ecx, %edx
+-
+- /* Store lower 4 bytes to IDT */
+- movl %edx, (%ebx)
+-
+- /* Build IDT entry, upper 4 bytes */
+- movl %eax, %edx
+- andl $0xffff0000, %edx # Target code segment offset [31:16]
+- orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate
+-
+- /* Store upper 4 bytes to IDT */
+- movl %edx, 4(%ebx)
+-
+- pop %ecx
+- pop %ebx
+- RET
+-SYM_FUNC_END(startup32_set_idt_entry)
+-#endif
+-
+-SYM_FUNC_START(startup32_load_idt)
+-#ifdef CONFIG_AMD_MEM_ENCRYPT
+- /* #VC handler */
+- leal rva(startup32_vc_handler)(%ebp), %eax
+- movl $X86_TRAP_VC, %edx
+- call startup32_set_idt_entry
+-
+- /* Load IDT */
+- leal rva(boot32_idt)(%ebp), %eax
+- movl %eax, rva(boot32_idt_desc+2)(%ebp)
+- lidt rva(boot32_idt_desc)(%ebp)
+-#endif
+- RET
+-SYM_FUNC_END(startup32_load_idt)
+-
+-/*
+- * Check for the correct C-bit position when the startup_32 boot-path is used.
+- *
+- * The check makes use of the fact that all memory is encrypted when paging is
+- * disabled. The function creates 64 bits of random data using the RDRAND
+- * instruction. RDRAND is mandatory for SEV guests, so always available. If the
+- * hypervisor violates that the kernel will crash right here.
+- *
+- * The 64 bits of random data are stored to a memory location and at the same
+- * time kept in the %eax and %ebx registers. Since encryption is always active
+- * when paging is off the random data will be stored encrypted in main memory.
+- *
+- * Then paging is enabled. When the C-bit position is correct all memory is
+- * still mapped encrypted and comparing the register values with memory will
+- * succeed. An incorrect C-bit position will map all memory unencrypted, so that
+- * the compare will use the encrypted random data and fail.
+- */
+-SYM_FUNC_START(startup32_check_sev_cbit)
+-#ifdef CONFIG_AMD_MEM_ENCRYPT
+- pushl %eax
+- pushl %ebx
+- pushl %ecx
+- pushl %edx
+-
+- /* Check for non-zero sev_status */
+- movl rva(sev_status)(%ebp), %eax
+- testl %eax, %eax
+- jz 4f
+-
+- /*
+- * Get two 32-bit random values - Don't bail out if RDRAND fails
+- * because it is better to prevent forward progress if no random value
+- * can be gathered.
+- */
+-1: rdrand %eax
+- jnc 1b
+-2: rdrand %ebx
+- jnc 2b
+-
+- /* Store to memory and keep it in the registers */
+- movl %eax, rva(sev_check_data)(%ebp)
+- movl %ebx, rva(sev_check_data+4)(%ebp)
+-
+- /* Enable paging to see if encryption is active */
+- movl %cr0, %edx /* Backup %cr0 in %edx */
+- movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */
+- movl %ecx, %cr0
+-
+- cmpl %eax, rva(sev_check_data)(%ebp)
+- jne 3f
+- cmpl %ebx, rva(sev_check_data+4)(%ebp)
+- jne 3f
+-
+- movl %edx, %cr0 /* Restore previous %cr0 */
+-
+- jmp 4f
+-
+-3: /* Check failed - hlt the machine */
+- hlt
+- jmp 3b
+-
+-4:
+- popl %edx
+- popl %ecx
+- popl %ebx
+- popl %eax
+-#endif
+- RET
+-SYM_FUNC_END(startup32_check_sev_cbit)
+-
+ /*
+ * Stack and heap for uncompression
+ */
+ .bss
+ .balign 4
+-SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0)
+-
+ SYM_DATA_START_LOCAL(boot_stack)
+ .fill BOOT_STACK_SIZE, 1, 0
+ .balign 16
+diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c
+index d34222816c9f5..b8c42339bc355 100644
+--- a/arch/x86/boot/compressed/ident_map_64.c
++++ b/arch/x86/boot/compressed/ident_map_64.c
+@@ -167,8 +167,9 @@ void initialize_identity_maps(void *rmode)
+ * or does not touch all the pages covering them.
+ */
+ kernel_add_identity_map((unsigned long)_head, (unsigned long)_end);
+- boot_params = rmode;
+- kernel_add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1));
++ boot_params_ptr = rmode;
++ kernel_add_identity_map((unsigned long)boot_params_ptr,
++ (unsigned long)(boot_params_ptr + 1));
+ cmdline = get_cmd_line_ptr();
+ kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);
+
+@@ -176,7 +177,7 @@ void initialize_identity_maps(void *rmode)
+ * Also map the setup_data entries passed via boot_params in case they
+ * need to be accessed by uncompressed kernel via the identity mapping.
+ */
+- sd = (struct setup_data *)boot_params->hdr.setup_data;
++ sd = (struct setup_data *)boot_params_ptr->hdr.setup_data;
+ while (sd) {
+ unsigned long sd_addr = (unsigned long)sd;
+
+diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
+index e476bcbd9b422..9794d9174795d 100644
+--- a/arch/x86/boot/compressed/kaslr.c
++++ b/arch/x86/boot/compressed/kaslr.c
+@@ -63,7 +63,7 @@ static unsigned long get_boot_seed(void)
+ unsigned long hash = 0;
+
+ hash = rotate_xor(hash, build_str, sizeof(build_str));
+- hash = rotate_xor(hash, boot_params, sizeof(*boot_params));
++ hash = rotate_xor(hash, boot_params_ptr, sizeof(*boot_params_ptr));
+
+ return hash;
+ }
+@@ -383,7 +383,7 @@ static void handle_mem_options(void)
+ static void mem_avoid_init(unsigned long input, unsigned long input_size,
+ unsigned long output)
+ {
+- unsigned long init_size = boot_params->hdr.init_size;
++ unsigned long init_size = boot_params_ptr->hdr.init_size;
+ u64 initrd_start, initrd_size;
+ unsigned long cmd_line, cmd_line_size;
+
+@@ -395,10 +395,10 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
+ mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input;
+
+ /* Avoid initrd. */
+- initrd_start = (u64)boot_params->ext_ramdisk_image << 32;
+- initrd_start |= boot_params->hdr.ramdisk_image;
+- initrd_size = (u64)boot_params->ext_ramdisk_size << 32;
+- initrd_size |= boot_params->hdr.ramdisk_size;
++ initrd_start = (u64)boot_params_ptr->ext_ramdisk_image << 32;
++ initrd_start |= boot_params_ptr->hdr.ramdisk_image;
++ initrd_size = (u64)boot_params_ptr->ext_ramdisk_size << 32;
++ initrd_size |= boot_params_ptr->hdr.ramdisk_size;
+ mem_avoid[MEM_AVOID_INITRD].start = initrd_start;
+ mem_avoid[MEM_AVOID_INITRD].size = initrd_size;
+ /* No need to set mapping for initrd, it will be handled in VO. */
+@@ -413,8 +413,8 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
+ }
+
+ /* Avoid boot parameters. */
+- mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params;
+- mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params);
++ mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params_ptr;
++ mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params_ptr);
+
+ /* We don't need to set a mapping for setup_data. */
+
+@@ -447,7 +447,7 @@ static bool mem_avoid_overlap(struct mem_vector *img,
+ }
+
+ /* Avoid all entries in the setup_data linked list. */
+- ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
++ ptr = (struct setup_data *)(unsigned long)boot_params_ptr->hdr.setup_data;
+ while (ptr) {
+ struct mem_vector avoid;
+
+@@ -679,7 +679,7 @@ static bool process_mem_region(struct mem_vector *region,
+ static bool
+ process_efi_entries(unsigned long minimum, unsigned long image_size)
+ {
+- struct efi_info *e = &boot_params->efi_info;
++ struct efi_info *e = &boot_params_ptr->efi_info;
+ bool efi_mirror_found = false;
+ struct mem_vector region;
+ efi_memory_desc_t *md;
+@@ -761,8 +761,8 @@ static void process_e820_entries(unsigned long minimum,
+ struct boot_e820_entry *entry;
+
+ /* Verify potential e820 positions, appending to slots list. */
+- for (i = 0; i < boot_params->e820_entries; i++) {
+- entry = &boot_params->e820_table[i];
++ for (i = 0; i < boot_params_ptr->e820_entries; i++) {
++ entry = &boot_params_ptr->e820_table[i];
+ /* Skip non-RAM entries. */
+ if (entry->type != E820_TYPE_RAM)
+ continue;
+@@ -836,7 +836,7 @@ void choose_random_location(unsigned long input,
+ return;
+ }
+
+- boot_params->hdr.loadflags |= KASLR_FLAG;
++ boot_params_ptr->hdr.loadflags |= KASLR_FLAG;
+
+ if (IS_ENABLED(CONFIG_X86_32))
+ mem_limit = KERNEL_IMAGE_SIZE;
+diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S
+index a73e4d783cae2..32f7cc8a86254 100644
+--- a/arch/x86/boot/compressed/mem_encrypt.S
++++ b/arch/x86/boot/compressed/mem_encrypt.S
+@@ -12,16 +12,13 @@
+ #include <asm/processor-flags.h>
+ #include <asm/msr.h>
+ #include <asm/asm-offsets.h>
++#include <asm/segment.h>
++#include <asm/trapnr.h>
+
+ .text
+ .code32
+ SYM_FUNC_START(get_sev_encryption_bit)
+- xor %eax, %eax
+-
+-#ifdef CONFIG_AMD_MEM_ENCRYPT
+ push %ebx
+- push %ecx
+- push %edx
+
+ movl $0x80000000, %eax /* CPUID to check the highest leaf */
+ cpuid
+@@ -52,12 +49,7 @@ SYM_FUNC_START(get_sev_encryption_bit)
+ xor %eax, %eax
+
+ .Lsev_exit:
+- pop %edx
+- pop %ecx
+ pop %ebx
+-
+-#endif /* CONFIG_AMD_MEM_ENCRYPT */
+-
+ RET
+ SYM_FUNC_END(get_sev_encryption_bit)
+
+@@ -98,7 +90,7 @@ SYM_CODE_START_LOCAL(sev_es_req_cpuid)
+ jmp 1b
+ SYM_CODE_END(sev_es_req_cpuid)
+
+-SYM_CODE_START(startup32_vc_handler)
++SYM_CODE_START_LOCAL(startup32_vc_handler)
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+@@ -184,15 +176,149 @@ SYM_CODE_START(startup32_vc_handler)
+ jmp .Lfail
+ SYM_CODE_END(startup32_vc_handler)
+
++/*
++ * Write an IDT entry into boot32_idt
++ *
++ * Parameters:
++ *
++ * %eax: Handler address
++ * %edx: Vector number
++ * %ecx: IDT address
++ */
++SYM_FUNC_START_LOCAL(startup32_set_idt_entry)
++ /* IDT entry address to %ecx */
++ leal (%ecx, %edx, 8), %ecx
++
++ /* Build IDT entry, lower 4 bytes */
++ movl %eax, %edx
++ andl $0x0000ffff, %edx # Target code segment offset [15:0]
++ orl $(__KERNEL32_CS << 16), %edx # Target code segment selector
++
++ /* Store lower 4 bytes to IDT */
++ movl %edx, (%ecx)
++
++ /* Build IDT entry, upper 4 bytes */
++ movl %eax, %edx
++ andl $0xffff0000, %edx # Target code segment offset [31:16]
++ orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate
++
++ /* Store upper 4 bytes to IDT */
++ movl %edx, 4(%ecx)
++
++ RET
++SYM_FUNC_END(startup32_set_idt_entry)
++
++SYM_FUNC_START(startup32_load_idt)
++ push %ebp
++ push %ebx
++
++ call 1f
++1: pop %ebp
++
++ leal (boot32_idt - 1b)(%ebp), %ebx
++
++ /* #VC handler */
++ leal (startup32_vc_handler - 1b)(%ebp), %eax
++ movl $X86_TRAP_VC, %edx
++ movl %ebx, %ecx
++ call startup32_set_idt_entry
++
++ /* Load IDT */
++ leal (boot32_idt_desc - 1b)(%ebp), %ecx
++ movl %ebx, 2(%ecx)
++ lidt (%ecx)
++
++ pop %ebx
++ pop %ebp
++ RET
++SYM_FUNC_END(startup32_load_idt)
++
++/*
++ * Check for the correct C-bit position when the startup_32 boot-path is used.
++ *
++ * The check makes use of the fact that all memory is encrypted when paging is
++ * disabled. The function creates 64 bits of random data using the RDRAND
++ * instruction. RDRAND is mandatory for SEV guests, so always available. If the
++ * hypervisor violates that the kernel will crash right here.
++ *
++ * The 64 bits of random data are stored to a memory location and at the same
++ * time kept in the %eax and %ebx registers. Since encryption is always active
++ * when paging is off the random data will be stored encrypted in main memory.
++ *
++ * Then paging is enabled. When the C-bit position is correct all memory is
++ * still mapped encrypted and comparing the register values with memory will
++ * succeed. An incorrect C-bit position will map all memory unencrypted, so that
++ * the compare will use the encrypted random data and fail.
++ */
++SYM_FUNC_START(startup32_check_sev_cbit)
++ pushl %ebx
++ pushl %ebp
++
++ call 0f
++0: popl %ebp
++
++ /* Check for non-zero sev_status */
++ movl (sev_status - 0b)(%ebp), %eax
++ testl %eax, %eax
++ jz 4f
++
++ /*
++ * Get two 32-bit random values - Don't bail out if RDRAND fails
++ * because it is better to prevent forward progress if no random value
++ * can be gathered.
++ */
++1: rdrand %eax
++ jnc 1b
++2: rdrand %ebx
++ jnc 2b
++
++ /* Store to memory and keep it in the registers */
++ leal (sev_check_data - 0b)(%ebp), %ebp
++ movl %eax, 0(%ebp)
++ movl %ebx, 4(%ebp)
++
++ /* Enable paging to see if encryption is active */
++ movl %cr0, %edx /* Backup %cr0 in %edx */
++ movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */
++ movl %ecx, %cr0
++
++ cmpl %eax, 0(%ebp)
++ jne 3f
++ cmpl %ebx, 4(%ebp)
++ jne 3f
++
++ movl %edx, %cr0 /* Restore previous %cr0 */
++
++ jmp 4f
++
++3: /* Check failed - hlt the machine */
++ hlt
++ jmp 3b
++
++4:
++ popl %ebp
++ popl %ebx
++ RET
++SYM_FUNC_END(startup32_check_sev_cbit)
++
+ .code64
+
+ #include "../../kernel/sev_verify_cbit.S"
+
+ .data
+
+-#ifdef CONFIG_AMD_MEM_ENCRYPT
+ .balign 8
+ SYM_DATA(sme_me_mask, .quad 0)
+ SYM_DATA(sev_status, .quad 0)
+ SYM_DATA(sev_check_data, .quad 0)
+-#endif
++
++SYM_DATA_START_LOCAL(boot32_idt)
++ .rept 32
++ .quad 0
++ .endr
++SYM_DATA_END(boot32_idt)
++
++SYM_DATA_START_LOCAL(boot32_idt_desc)
++ .word . - boot32_idt - 1
++ .long 0
++SYM_DATA_END(boot32_idt_desc)
+diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
+index cf690d8712f4e..8ae7893d712ff 100644
+--- a/arch/x86/boot/compressed/misc.c
++++ b/arch/x86/boot/compressed/misc.c
+@@ -46,7 +46,7 @@ void *memmove(void *dest, const void *src, size_t n);
+ /*
+ * This is set up by the setup-routine at boot-time
+ */
+-struct boot_params *boot_params;
++struct boot_params *boot_params_ptr;
+
+ struct port_io_ops pio_ops;
+
+@@ -132,8 +132,8 @@ void __putstr(const char *s)
+ if (lines == 0 || cols == 0)
+ return;
+
+- x = boot_params->screen_info.orig_x;
+- y = boot_params->screen_info.orig_y;
++ x = boot_params_ptr->screen_info.orig_x;
++ y = boot_params_ptr->screen_info.orig_y;
+
+ while ((c = *s++) != '\0') {
+ if (c == '\n') {
+@@ -154,8 +154,8 @@ void __putstr(const char *s)
+ }
+ }
+
+- boot_params->screen_info.orig_x = x;
+- boot_params->screen_info.orig_y = y;
++ boot_params_ptr->screen_info.orig_x = x;
++ boot_params_ptr->screen_info.orig_y = y;
+
+ pos = (x + cols * y) * 2; /* Update cursor position */
+ outb(14, vidport);
+@@ -277,7 +277,7 @@ static inline void handle_relocations(void *output, unsigned long output_len,
+ { }
+ #endif
+
+-static void parse_elf(void *output)
++static size_t parse_elf(void *output)
+ {
+ #ifdef CONFIG_X86_64
+ Elf64_Ehdr ehdr;
+@@ -293,10 +293,8 @@ static void parse_elf(void *output)
+ if (ehdr.e_ident[EI_MAG0] != ELFMAG0 ||
+ ehdr.e_ident[EI_MAG1] != ELFMAG1 ||
+ ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
+- ehdr.e_ident[EI_MAG3] != ELFMAG3) {
++ ehdr.e_ident[EI_MAG3] != ELFMAG3)
+ error("Kernel is not a valid ELF file");
+- return;
+- }
+
+ debug_putstr("Parsing ELF... ");
+
+@@ -328,6 +326,35 @@ static void parse_elf(void *output)
+ }
+
+ free(phdrs);
++
++ return ehdr.e_entry - LOAD_PHYSICAL_ADDR;
++}
++
++const unsigned long kernel_total_size = VO__end - VO__text;
++
++static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4);
++
++extern unsigned char input_data[];
++extern unsigned int input_len, output_len;
++
++unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
++ void (*error)(char *x))
++{
++ unsigned long entry;
++
++ if (!free_mem_ptr) {
++ free_mem_ptr = (unsigned long)boot_heap;
++ free_mem_end_ptr = (unsigned long)boot_heap + sizeof(boot_heap);
++ }
++
++ if (__decompress(input_data, input_len, NULL, NULL, outbuf, output_len,
++ NULL, error) < 0)
++ return ULONG_MAX;
++
++ entry = parse_elf(outbuf);
++ handle_relocations(outbuf, output_len, virt_addr);
++
++ return entry;
+ }
+
+ /*
+@@ -347,25 +374,22 @@ static void parse_elf(void *output)
+ * |-------uncompressed kernel image---------|
+ *
+ */
+-asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
+- unsigned char *input_data,
+- unsigned long input_len,
+- unsigned char *output,
+- unsigned long output_len)
++asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
+ {
+- const unsigned long kernel_total_size = VO__end - VO__text;
+ unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
++ memptr heap = (memptr)boot_heap;
+ unsigned long needed_size;
++ size_t entry_offset;
+
+ /* Retain x86 boot parameters pointer passed from startup_32/64. */
+- boot_params = rmode;
++ boot_params_ptr = rmode;
+
+ /* Clear flags intended for solely in-kernel use. */
+- boot_params->hdr.loadflags &= ~KASLR_FLAG;
++ boot_params_ptr->hdr.loadflags &= ~KASLR_FLAG;
+
+- sanitize_boot_params(boot_params);
++ sanitize_boot_params(boot_params_ptr);
+
+- if (boot_params->screen_info.orig_video_mode == 7) {
++ if (boot_params_ptr->screen_info.orig_video_mode == 7) {
+ vidmem = (char *) 0xb0000;
+ vidport = 0x3b4;
+ } else {
+@@ -373,8 +397,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
+ vidport = 0x3d4;
+ }
+
+- lines = boot_params->screen_info.orig_video_lines;
+- cols = boot_params->screen_info.orig_video_cols;
++ lines = boot_params_ptr->screen_info.orig_video_lines;
++ cols = boot_params_ptr->screen_info.orig_video_cols;
+
+ init_default_io_ops();
+
+@@ -393,7 +417,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
+ * so that early debugging output from the RSDP parsing code can be
+ * collected.
+ */
+- boot_params->acpi_rsdp_addr = get_rsdp_addr();
++ boot_params_ptr->acpi_rsdp_addr = get_rsdp_addr();
+
+ debug_putstr("early console in extract_kernel\n");
+
+@@ -411,7 +435,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
+ * entries. This ensures the full mapped area is usable RAM
+ * and doesn't include any reserved areas.
+ */
+- needed_size = max(output_len, kernel_total_size);
++ needed_size = max_t(unsigned long, output_len, kernel_total_size);
+ #ifdef CONFIG_X86_64
+ needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN);
+ #endif
+@@ -442,7 +466,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
+ #ifdef CONFIG_X86_64
+ if (heap > 0x3fffffffffffUL)
+ error("Destination address too large");
+- if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE)
++ if (virt_addr + needed_size > KERNEL_IMAGE_SIZE)
+ error("Destination virtual address is beyond the kernel mapping area");
+ #else
+ if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
+@@ -454,16 +478,17 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
+ #endif
+
+ debug_putstr("\nDecompressing Linux... ");
+- __decompress(input_data, input_len, NULL, NULL, output, output_len,
+- NULL, error);
+- parse_elf(output);
+- handle_relocations(output, output_len, virt_addr);
+- debug_putstr("done.\nBooting the kernel.\n");
++
++ entry_offset = decompress_kernel(output, virt_addr, error);
++
++ debug_putstr("done.\nBooting the kernel (entry_offset: 0x");
++ debug_puthex(entry_offset);
++ debug_putstr(").\n");
+
+ /* Disable exception handling before booting the kernel */
+ cleanup_exception_handling();
+
+- return output;
++ return output + entry_offset;
+ }
+
+ void fortify_panic(const char *name)
+diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
+index a49d9219c06e5..254acd76efde2 100644
+--- a/arch/x86/boot/compressed/misc.h
++++ b/arch/x86/boot/compressed/misc.h
+@@ -52,7 +52,6 @@ extern memptr free_mem_ptr;
+ extern memptr free_mem_end_ptr;
+ void *malloc(int size);
+ void free(void *where);
+-extern struct boot_params *boot_params;
+ void __putstr(const char *s);
+ void __puthex(unsigned long value);
+ #define error_putstr(__x) __putstr(__x)
+@@ -170,9 +169,7 @@ static inline int count_immovable_mem_regions(void) { return 0; }
+ #endif
+
+ /* ident_map_64.c */
+-#ifdef CONFIG_X86_5LEVEL
+ extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
+-#endif
+ extern void kernel_add_identity_map(unsigned long start, unsigned long end);
+
+ /* Used by PAGE_KERN* macros: */
+diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
+index cc9b2529a0863..6d595abe06b34 100644
+--- a/arch/x86/boot/compressed/pgtable.h
++++ b/arch/x86/boot/compressed/pgtable.h
+@@ -3,18 +3,16 @@
+
+ #define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE)
+
+-#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0
+-
+ #define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE
+-#define TRAMPOLINE_32BIT_CODE_SIZE 0x80
+-
+-#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE
++#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0
+
+ #ifndef __ASSEMBLER__
+
+ extern unsigned long *trampoline_32bit;
+
+-extern void trampoline_32bit_src(void *return_ptr);
++extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
++
++extern const u16 trampoline_ljmp_imm_offset;
+
+ #endif /* __ASSEMBLER__ */
+ #endif /* BOOT_COMPRESSED_PAGETABLE_H */
+diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
+index 2ac12ff4111bf..51f957b24ba7a 100644
+--- a/arch/x86/boot/compressed/pgtable_64.c
++++ b/arch/x86/boot/compressed/pgtable_64.c
+@@ -16,11 +16,6 @@ unsigned int __section(".data") pgdir_shift = 39;
+ unsigned int __section(".data") ptrs_per_p4d = 1;
+ #endif
+
+-struct paging_config {
+- unsigned long trampoline_start;
+- unsigned long l5_required;
+-};
+-
+ /* Buffer to preserve trampoline memory */
+ static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
+
+@@ -29,11 +24,10 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
+ * purposes.
+ *
+ * Avoid putting the pointer into .bss as it will be cleared between
+- * paging_prepare() and extract_kernel().
++ * configure_5level_paging() and extract_kernel().
+ */
+ unsigned long *trampoline_32bit __section(".data");
+
+-extern struct boot_params *boot_params;
+ int cmdline_find_option_bool(const char *option);
+
+ static unsigned long find_trampoline_placement(void)
+@@ -54,7 +48,7 @@ static unsigned long find_trampoline_placement(void)
+ *
+ * Only look for values in the legacy ROM for non-EFI system.
+ */
+- signature = (char *)&boot_params->efi_info.efi_loader_signature;
++ signature = (char *)&boot_params_ptr->efi_info.efi_loader_signature;
+ if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
+ strncmp(signature, EFI64_LOADER_SIGNATURE, 4)) {
+ ebda_start = *(unsigned short *)0x40e << 4;
+@@ -70,10 +64,10 @@ static unsigned long find_trampoline_placement(void)
+ bios_start = round_down(bios_start, PAGE_SIZE);
+
+ /* Find the first usable memory region under bios_start. */
+- for (i = boot_params->e820_entries - 1; i >= 0; i--) {
++ for (i = boot_params_ptr->e820_entries - 1; i >= 0; i--) {
+ unsigned long new = bios_start;
+
+- entry = &boot_params->e820_table[i];
++ entry = &boot_params_ptr->e820_table[i];
+
+ /* Skip all entries above bios_start. */
+ if (bios_start <= entry->addr)
+@@ -106,12 +100,13 @@ static unsigned long find_trampoline_placement(void)
+ return bios_start - TRAMPOLINE_32BIT_SIZE;
+ }
+
+-struct paging_config paging_prepare(void *rmode)
++asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable)
+ {
+- struct paging_config paging_config = {};
++ void (*toggle_la57)(void *cr3);
++ bool l5_required = false;
+
+ /* Initialize boot_params. Required for cmdline_find_option_bool(). */
+- boot_params = rmode;
++ boot_params_ptr = bp;
+
+ /*
+ * Check if LA57 is desired and supported.
+@@ -129,12 +124,22 @@ struct paging_config paging_prepare(void *rmode)
+ !cmdline_find_option_bool("no5lvl") &&
+ native_cpuid_eax(0) >= 7 &&
+ (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
+- paging_config.l5_required = 1;
++ l5_required = true;
++
++ /* Initialize variables for 5-level paging */
++ __pgtable_l5_enabled = 1;
++ pgdir_shift = 48;
++ ptrs_per_p4d = 512;
+ }
+
+- paging_config.trampoline_start = find_trampoline_placement();
++ /*
++ * The trampoline will not be used if the paging mode is already set to
++ * the desired one.
++ */
++ if (l5_required == !!(native_read_cr4() & X86_CR4_LA57))
++ return;
+
+- trampoline_32bit = (unsigned long *)paging_config.trampoline_start;
++ trampoline_32bit = (unsigned long *)find_trampoline_placement();
+
+ /* Preserve trampoline memory */
+ memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE);
+@@ -143,32 +148,32 @@ struct paging_config paging_prepare(void *rmode)
+ memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);
+
+ /* Copy trampoline code in place */
+- memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
++ toggle_la57 = memcpy(trampoline_32bit +
++ TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
+ &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);
+
++ /*
++ * Avoid the need for a stack in the 32-bit trampoline code, by using
++ * LJMP rather than LRET to return back to long mode. LJMP takes an
++ * immediate absolute address, which needs to be adjusted based on the
++ * placement of the trampoline.
++ */
++ *(u32 *)((u8 *)toggle_la57 + trampoline_ljmp_imm_offset) +=
++ (unsigned long)toggle_la57;
++
+ /*
+ * The code below prepares page table in trampoline memory.
+ *
+ * The new page table will be used by trampoline code for switching
+ * from 4- to 5-level paging or vice versa.
+- *
+- * If switching is not required, the page table is unused: trampoline
+- * code wouldn't touch CR3.
+- */
+-
+- /*
+- * We are not going to use the page table in trampoline memory if we
+- * are already in the desired paging mode.
+ */
+- if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57))
+- goto out;
+
+- if (paging_config.l5_required) {
++ if (l5_required) {
+ /*
+ * For 4- to 5-level paging transition, set up current CR3 as
+ * the first and the only entry in a new top-level page table.
+ */
+- trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC;
++ *trampoline_32bit = __native_read_cr3() | _PAGE_TABLE_NOENC;
+ } else {
+ unsigned long src;
+
+@@ -181,38 +186,17 @@ struct paging_config paging_prepare(void *rmode)
+ * may be above 4G.
+ */
+ src = *(unsigned long *)__native_read_cr3() & PAGE_MASK;
+- memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long),
+- (void *)src, PAGE_SIZE);
++ memcpy(trampoline_32bit, (void *)src, PAGE_SIZE);
+ }
+
+-out:
+- return paging_config;
+-}
+-
+-void cleanup_trampoline(void *pgtable)
+-{
+- void *trampoline_pgtable;
+-
+- trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long);
++ toggle_la57(trampoline_32bit);
+
+ /*
+- * Move the top level page table out of trampoline memory,
+- * if it's there.
++ * Move the top level page table out of trampoline memory.
+ */
+- if ((void *)__native_read_cr3() == trampoline_pgtable) {
+- memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
+- native_write_cr3((unsigned long)pgtable);
+- }
++ memcpy(pgtable, trampoline_32bit, PAGE_SIZE);
++ native_write_cr3((unsigned long)pgtable);
+
+ /* Restore trampoline memory */
+ memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);
+-
+- /* Initialize variables for 5-level paging */
+-#ifdef CONFIG_X86_5LEVEL
+- if (__read_cr4() & X86_CR4_LA57) {
+- __pgtable_l5_enabled = 1;
+- pgdir_shift = 48;
+- ptrs_per_p4d = 512;
+- }
+-#endif
+ }
+diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
+index 9c91cc40f4565..d07e665bb265b 100644
+--- a/arch/x86/boot/compressed/sev.c
++++ b/arch/x86/boot/compressed/sev.c
+@@ -327,20 +327,25 @@ static void enforce_vmpl0(void)
+ */
+ #define SNP_FEATURES_PRESENT (0)
+
++u64 snp_get_unsupported_features(u64 status)
++{
++ if (!(status & MSR_AMD64_SEV_SNP_ENABLED))
++ return 0;
++
++ return status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT;
++}
++
+ void snp_check_features(void)
+ {
+ u64 unsupported;
+
+- if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+- return;
+-
+ /*
+ * Terminate the boot if hypervisor has enabled any feature lacking
+ * guest side implementation. Pass on the unsupported features mask through
+ * EXIT_INFO_2 of the GHCB protocol so that those features can be reported
+ * as part of the guest boot failure.
+ */
+- unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT;
++ unsupported = snp_get_unsupported_features(sev_status);
+ if (unsupported) {
+ if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb()))
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+@@ -350,35 +355,22 @@ void snp_check_features(void)
+ }
+ }
+
+-void sev_enable(struct boot_params *bp)
++/*
++ * sev_check_cpu_support - Check for SEV support in the CPU capabilities
++ *
++ * Returns < 0 if SEV is not supported, otherwise the position of the
++ * encryption bit in the page table descriptors.
++ */
++static int sev_check_cpu_support(void)
+ {
+ unsigned int eax, ebx, ecx, edx;
+- struct msr m;
+- bool snp;
+-
+- /*
+- * bp->cc_blob_address should only be set by boot/compressed kernel.
+- * Initialize it to 0 to ensure that uninitialized values from
+- * buggy bootloaders aren't propagated.
+- */
+- if (bp)
+- bp->cc_blob_address = 0;
+-
+- /*
+- * Do an initial SEV capability check before snp_init() which
+- * loads the CPUID page and the same checks afterwards are done
+- * without the hypervisor and are trustworthy.
+- *
+- * If the HV fakes SEV support, the guest will crash'n'burn
+- * which is good enough.
+- */
+
+ /* Check for the SME/SEV support leaf */
+ eax = 0x80000000;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ if (eax < 0x8000001f)
+- return;
++ return -ENODEV;
+
+ /*
+ * Check for the SME/SEV feature:
+@@ -393,6 +385,35 @@ void sev_enable(struct boot_params *bp)
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ /* Check whether SEV is supported */
+ if (!(eax & BIT(1)))
++ return -ENODEV;
++
++ return ebx & 0x3f;
++}
++
++void sev_enable(struct boot_params *bp)
++{
++ struct msr m;
++ int bitpos;
++ bool snp;
++
++ /*
++ * bp->cc_blob_address should only be set by boot/compressed kernel.
++ * Initialize it to 0 to ensure that uninitialized values from
++ * buggy bootloaders aren't propagated.
++ */
++ if (bp)
++ bp->cc_blob_address = 0;
++
++ /*
++ * Do an initial SEV capability check before snp_init() which
++ * loads the CPUID page and the same checks afterwards are done
++ * without the hypervisor and are trustworthy.
++ *
++ * If the HV fakes SEV support, the guest will crash'n'burn
++ * which is good enough.
++ */
++
++ if (sev_check_cpu_support() < 0)
+ return;
+
+ /*
+@@ -403,26 +424,8 @@ void sev_enable(struct boot_params *bp)
+
+ /* Now repeat the checks with the SNP CPUID table. */
+
+- /* Recheck the SME/SEV support leaf */
+- eax = 0x80000000;
+- ecx = 0;
+- native_cpuid(&eax, &ebx, &ecx, &edx);
+- if (eax < 0x8000001f)
+- return;
+-
+- /*
+- * Recheck for the SME/SEV feature:
+- * CPUID Fn8000_001F[EAX]
+- * - Bit 0 - Secure Memory Encryption support
+- * - Bit 1 - Secure Encrypted Virtualization support
+- * CPUID Fn8000_001F[EBX]
+- * - Bits 5:0 - Pagetable bit position used to indicate encryption
+- */
+- eax = 0x8000001f;
+- ecx = 0;
+- native_cpuid(&eax, &ebx, &ecx, &edx);
+- /* Check whether SEV is supported */
+- if (!(eax & BIT(1))) {
++ bitpos = sev_check_cpu_support();
++ if (bitpos < 0) {
+ if (snp)
+ error("SEV-SNP support indicated by CC blob, but not CPUID.");
+ return;
+@@ -454,7 +457,24 @@ void sev_enable(struct boot_params *bp)
+ if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+ error("SEV-SNP supported indicated by CC blob, but not SEV status MSR.");
+
+- sme_me_mask = BIT_ULL(ebx & 0x3f);
++ sme_me_mask = BIT_ULL(bitpos);
++}
++
++/*
++ * sev_get_status - Retrieve the SEV status mask
++ *
++ * Returns 0 if the CPU is not SEV capable, otherwise the value of the
++ * AMD64_SEV MSR.
++ */
++u64 sev_get_status(void)
++{
++ struct msr m;
++
++ if (sev_check_cpu_support() < 0)
++ return 0;
++
++ boot_rdmsr(MSR_AMD64_SEV, &m);
++ return m.q;
+ }
+
+ /* Search for Confidential Computing blob in the EFI config table. */
+@@ -545,7 +565,7 @@ void sev_prep_identity_maps(unsigned long top_level_pgt)
+ * accessed after switchover.
+ */
+ if (sev_snp_enabled()) {
+- unsigned long cc_info_pa = boot_params->cc_blob_address;
++ unsigned long cc_info_pa = boot_params_ptr->cc_blob_address;
+ struct cc_blob_sev_info *cc_info;
+
+ kernel_add_identity_map(cc_info_pa, cc_info_pa + sizeof(*cc_info));
+diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
+index f912d77701305..d31982509654d 100644
+--- a/arch/x86/boot/header.S
++++ b/arch/x86/boot/header.S
+@@ -406,7 +406,7 @@ xloadflags:
+ # define XLF1 0
+ #endif
+
+-#ifdef CONFIG_EFI_STUB
++#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
+ # ifdef CONFIG_EFI_MIXED
+ # define XLF23 (XLF_EFI_HANDOVER_32|XLF_EFI_HANDOVER_64)
+ # else
+diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
+index a3725ad46c5a0..bd247692b7017 100644
+--- a/arch/x86/boot/tools/build.c
++++ b/arch/x86/boot/tools/build.c
+@@ -290,6 +290,7 @@ static void efi_stub_entry_update(void)
+ {
+ unsigned long addr = efi32_stub_entry;
+
++#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
+ #ifdef CONFIG_X86_64
+ /* Yes, this is really how we defined it :( */
+ addr = efi64_stub_entry - 0x200;
+@@ -298,6 +299,7 @@ static void efi_stub_entry_update(void)
+ #ifdef CONFIG_EFI_MIXED
+ if (efi32_stub_entry != addr)
+ die("32-bit and 64-bit EFI entry points do not match\n");
++#endif
+ #endif
+ put_unaligned_le32(addr, &buf[0x264]);
+ }
+diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
+index bfb7bcb362bcf..09e99d13fc0b3 100644
+--- a/arch/x86/entry/entry.S
++++ b/arch/x86/entry/entry.S
+@@ -6,6 +6,9 @@
+ #include <linux/linkage.h>
+ #include <asm/export.h>
+ #include <asm/msr-index.h>
++#include <asm/unwind_hints.h>
++#include <asm/segment.h>
++#include <asm/cache.h>
+
+ .pushsection .noinstr.text, "ax"
+
+@@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb)
+ EXPORT_SYMBOL_GPL(entry_ibpb);
+
+ .popsection
++
++/*
++ * Define the VERW operand that is disguised as entry code so that
++ * it can be referenced with KPTI enabled. This ensure VERW can be
++ * used late in exit-to-user path after page tables are switched.
++ */
++.pushsection .entry.text, "ax"
++
++.align L1_CACHE_BYTES, 0xcc
++SYM_CODE_START_NOALIGN(mds_verw_sel)
++ UNWIND_HINT_EMPTY
++ ANNOTATE_NOENDBR
++ .word __KERNEL_DS
++.align L1_CACHE_BYTES, 0xcc
++SYM_CODE_END(mds_verw_sel);
++/* For KVM */
++EXPORT_SYMBOL_GPL(mds_verw_sel);
++
++.popsection
++
+diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
+index e309e71560389..ee5def1060c86 100644
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -912,6 +912,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
+ BUG_IF_WRONG_CR3 no_user_check=1
+ popfl
+ popl %eax
++ CLEAR_CPU_BUFFERS
+
+ /*
+ * Return back to the vDSO, which will pop ecx and edx.
+@@ -981,6 +982,7 @@ restore_all_switch_stack:
+
+ /* Restore user state */
+ RESTORE_REGS pop=4 # skip orig_eax/error_code
++ CLEAR_CPU_BUFFERS
+ .Lirq_return:
+ /*
+ * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
+@@ -1173,6 +1175,7 @@ SYM_CODE_START(asm_exc_nmi)
+
+ /* Not on SYSENTER stack. */
+ call exc_nmi
++ CLEAR_CPU_BUFFERS
+ jmp .Lnmi_return
+
+ .Lnmi_from_sysenter_stack:
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 9953d966d1244..c2383c2880ec6 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -223,6 +223,7 @@ syscall_return_via_sysret:
+ SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR
+ swapgs
++ CLEAR_CPU_BUFFERS
+ sysretq
+ SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR
+@@ -656,6 +657,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
+ /* Restore RDI. */
+ popq %rdi
+ swapgs
++ CLEAR_CPU_BUFFERS
+ jmp .Lnative_iret
+
+
+@@ -767,6 +769,8 @@ native_irq_return_ldt:
+ */
+ popq %rax /* Restore user RAX */
+
++ CLEAR_CPU_BUFFERS
++
+ /*
+ * RSP now points to an ordinary IRET frame, except that the page
+ * is read-only and RSP[31:16] are preloaded with the userspace
+@@ -1493,6 +1497,12 @@ nmi_restore:
+ std
+ movq $0, 5*8(%rsp) /* clear "NMI executing" */
+
++ /*
++ * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like
++ * NMI in kernel after user state is restored. For an unprivileged user
++ * these conditions are hard to meet.
++ */
++
+ /*
+ * iretq reads the "iret" frame and exits the NMI stack in a
+ * single instruction. We are returning to kernel mode, so this
+@@ -1511,6 +1521,7 @@ SYM_CODE_START(ignore_sysret)
+ UNWIND_HINT_EMPTY
+ ENDBR
+ mov $-ENOSYS, %eax
++ CLEAR_CPU_BUFFERS
+ sysretl
+ SYM_CODE_END(ignore_sysret)
+ #endif
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index d6c08d8986b17..4bcd009a232bf 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -272,6 +272,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL)
+ xorl %r9d, %r9d
+ xorl %r10d, %r10d
+ swapgs
++ CLEAR_CPU_BUFFERS
+ sysretl
+ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR
+diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
+index 215d37f7dde8a..a38cc0afc90a0 100644
+--- a/arch/x86/include/asm/boot.h
++++ b/arch/x86/include/asm/boot.h
+@@ -79,4 +79,14 @@
+ # define BOOT_STACK_SIZE 0x1000
+ #endif
+
++#ifndef __ASSEMBLY__
++extern unsigned int output_len;
++extern const unsigned long kernel_total_size;
++
++unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
++ void (*error)(char *x));
++
++extern struct boot_params *boot_params_ptr;
++#endif
++
+ #endif /* _ASM_X86_BOOT_H */
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index b122708792c4d..b60f24b30cb90 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -304,7 +304,7 @@
+ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
+ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
+ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
+-
++#define X86_FEATURE_CLEAR_CPU_BUF (11*32+18) /* "" Clear CPU buffers using VERW */
+
+ #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
+
+diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
+index 233ae6986d6f2..e601264b1a243 100644
+--- a/arch/x86/include/asm/efi.h
++++ b/arch/x86/include/asm/efi.h
+@@ -88,6 +88,8 @@ static inline void efi_fpu_end(void)
+ }
+
+ #ifdef CONFIG_X86_32
++#define EFI_X86_KERNEL_ALLOC_LIMIT (SZ_512M - 1)
++
+ #define arch_efi_call_virt_setup() \
+ ({ \
+ efi_fpu_begin(); \
+@@ -101,8 +103,7 @@ static inline void efi_fpu_end(void)
+ })
+
+ #else /* !CONFIG_X86_32 */
+-
+-#define EFI_LOADER_SIGNATURE "EL64"
++#define EFI_X86_KERNEL_ALLOC_LIMIT EFI_ALLOC_LIMIT
+
+ extern asmlinkage u64 __efi_call(void *fp, ...);
+
+@@ -214,6 +215,8 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size,
+
+ #ifdef CONFIG_EFI_MIXED
+
++#define EFI_ALLOC_LIMIT (efi_is_64bit() ? ULONG_MAX : U32_MAX)
++
+ #define ARCH_HAS_EFISTUB_WRAPPERS
+
+ static inline bool efi_is_64bit(void)
+@@ -325,6 +328,13 @@ static inline u32 efi64_convert_status(efi_status_t status)
+ #define __efi64_argmap_set_memory_space_attributes(phys, size, flags) \
+ (__efi64_split(phys), __efi64_split(size), __efi64_split(flags))
+
++/* Memory Attribute Protocol */
++#define __efi64_argmap_set_memory_attributes(protocol, phys, size, flags) \
++ ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags))
++
++#define __efi64_argmap_clear_memory_attributes(protocol, phys, size, flags) \
++ ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags))
++
+ /*
+ * The macros below handle the plumbing for the argument mapping. To add a
+ * mapping for a specific EFI method, simply define a macro
+diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
+index 11203a9fe0a87..ffe72790ceafd 100644
+--- a/arch/x86/include/asm/entry-common.h
++++ b/arch/x86/include/asm/entry-common.h
+@@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
+
+ static __always_inline void arch_exit_to_user_mode(void)
+ {
+- mds_user_clear_cpu_buffers();
+ amd_clear_divider();
+ }
+ #define arch_exit_to_user_mode arch_exit_to_user_mode
+diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
+index d3706de91a934..8f6f17a8617b6 100644
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -194,6 +194,19 @@
+ #endif
+ .endm
+
++/*
++ * Macro to execute VERW instruction that mitigate transient data sampling
++ * attacks such as MDS. On affected systems a microcode update overloaded VERW
++ * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
++ *
++ * Note: Only the memory operand variant of VERW clears the CPU buffers.
++ */
++.macro CLEAR_CPU_BUFFERS
++ ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF
++ verw _ASM_RIP(mds_verw_sel)
++.Lskip_verw_\@:
++.endm
++
+ #else /* __ASSEMBLY__ */
+
+ #define ANNOTATE_RETPOLINE_SAFE \
+@@ -368,13 +381,14 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
+ DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
+ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
+
+-DECLARE_STATIC_KEY_FALSE(mds_user_clear);
+ DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
+
+ DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
+
+ DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+
++extern u16 mds_verw_sel;
++
+ #include <asm/segment.h>
+
+ /**
+@@ -400,17 +414,6 @@ static __always_inline void mds_clear_cpu_buffers(void)
+ asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
+ }
+
+-/**
+- * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
+- *
+- * Clear CPU buffers if the corresponding static key is enabled
+- */
+-static __always_inline void mds_user_clear_cpu_buffers(void)
+-{
+- if (static_branch_likely(&mds_user_clear))
+- mds_clear_cpu_buffers();
+-}
+-
+ /**
+ * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
+ *
+diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
+index 7ca5c9ec8b52e..cf98fc28601fb 100644
+--- a/arch/x86/include/asm/sev.h
++++ b/arch/x86/include/asm/sev.h
+@@ -157,6 +157,7 @@ static __always_inline void sev_es_nmi_complete(void)
+ __sev_es_nmi_complete();
+ }
+ extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
++extern void sev_enable(struct boot_params *bp);
+
+ static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
+ {
+@@ -202,12 +203,15 @@ void snp_set_wakeup_secondary_cpu(void);
+ bool snp_init(struct boot_params *bp);
+ void __init __noreturn snp_abort(void);
+ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
++u64 snp_get_unsupported_features(u64 status);
++u64 sev_get_status(void);
+ #else
+ static inline void sev_es_ist_enter(struct pt_regs *regs) { }
+ static inline void sev_es_ist_exit(void) { }
+ static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
+ static inline void sev_es_nmi_complete(void) { }
+ static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
++static inline void sev_enable(struct boot_params *bp) { }
+ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; }
+ static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; }
+ static inline void setup_ghcb(void) { }
+@@ -225,6 +229,9 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in
+ {
+ return -ENOTTY;
+ }
++
++static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
++static inline u64 sev_get_status(void) { return 0; }
+ #endif
+
+ #endif
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index 13dffc43ded02..d1895930e6eb8 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -110,9 +110,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
+ /* Control unconditional IBPB in switch_mm() */
+ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
+
+-/* Control MDS CPU buffer clear before returning to user space */
+-DEFINE_STATIC_KEY_FALSE(mds_user_clear);
+-EXPORT_SYMBOL_GPL(mds_user_clear);
+ /* Control MDS CPU buffer clear before idling (halt, mwait) */
+ DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
+ EXPORT_SYMBOL_GPL(mds_idle_clear);
+@@ -251,7 +248,7 @@ static void __init mds_select_mitigation(void)
+ if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
+ mds_mitigation = MDS_MITIGATION_VMWERV;
+
+- static_branch_enable(&mds_user_clear);
++ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+
+ if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) &&
+ (mds_nosmt || cpu_mitigations_auto_nosmt()))
+@@ -355,7 +352,7 @@ static void __init taa_select_mitigation(void)
+ * For guests that can't determine whether the correct microcode is
+ * present on host, enable the mitigation for UCODE_NEEDED as well.
+ */
+- static_branch_enable(&mds_user_clear);
++ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+
+ if (taa_nosmt || cpu_mitigations_auto_nosmt())
+ cpu_smt_disable(false);
+@@ -423,7 +420,7 @@ static void __init mmio_select_mitigation(void)
+ */
+ if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
+ boot_cpu_has(X86_FEATURE_RTM)))
+- static_branch_enable(&mds_user_clear);
++ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+ else
+ static_branch_enable(&mmio_stale_data_clear);
+
+@@ -483,12 +480,12 @@ static void __init md_clear_update_mitigation(void)
+ if (cpu_mitigations_off())
+ return;
+
+- if (!static_key_enabled(&mds_user_clear))
++ if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
+ goto out;
+
+ /*
+- * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data
+- * mitigation, if necessary.
++ * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO
++ * Stale Data mitigation, if necessary.
+ */
+ if (mds_mitigation == MDS_MITIGATION_OFF &&
+ boot_cpu_has_bug(X86_BUG_MDS)) {
+diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
+index 4278996504833..32bd640170475 100644
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -216,6 +216,90 @@ int intel_cpu_collect_info(struct ucode_cpu_info *uci)
+ }
+ EXPORT_SYMBOL_GPL(intel_cpu_collect_info);
+
++#define MSR_IA32_TME_ACTIVATE 0x982
++
++/* Helpers to access TME_ACTIVATE MSR */
++#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
++#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
++
++#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
++#define TME_ACTIVATE_POLICY_AES_XTS_128 0
++
++#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
++
++#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
++#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
++
++/* Values for mktme_status (SW only construct) */
++#define MKTME_ENABLED 0
++#define MKTME_DISABLED 1
++#define MKTME_UNINITIALIZED 2
++static int mktme_status = MKTME_UNINITIALIZED;
++
++static void detect_tme_early(struct cpuinfo_x86 *c)
++{
++ u64 tme_activate, tme_policy, tme_crypto_algs;
++ int keyid_bits = 0, nr_keyids = 0;
++ static u64 tme_activate_cpu0 = 0;
++
++ rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
++
++ if (mktme_status != MKTME_UNINITIALIZED) {
++ if (tme_activate != tme_activate_cpu0) {
++ /* Broken BIOS? */
++ pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
++ pr_err_once("x86/tme: MKTME is not usable\n");
++ mktme_status = MKTME_DISABLED;
++
++ /* Proceed. We may need to exclude bits from x86_phys_bits. */
++ }
++ } else {
++ tme_activate_cpu0 = tme_activate;
++ }
++
++ if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
++ pr_info_once("x86/tme: not enabled by BIOS\n");
++ mktme_status = MKTME_DISABLED;
++ return;
++ }
++
++ if (mktme_status != MKTME_UNINITIALIZED)
++ goto detect_keyid_bits;
++
++ pr_info("x86/tme: enabled by BIOS\n");
++
++ tme_policy = TME_ACTIVATE_POLICY(tme_activate);
++ if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
++ pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
++
++ tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
++ if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
++ pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
++ tme_crypto_algs);
++ mktme_status = MKTME_DISABLED;
++ }
++detect_keyid_bits:
++ keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
++ nr_keyids = (1UL << keyid_bits) - 1;
++ if (nr_keyids) {
++ pr_info_once("x86/mktme: enabled by BIOS\n");
++ pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
++ } else {
++ pr_info_once("x86/mktme: disabled by BIOS\n");
++ }
++
++ if (mktme_status == MKTME_UNINITIALIZED) {
++ /* MKTME is usable */
++ mktme_status = MKTME_ENABLED;
++ }
++
++ /*
++ * KeyID bits effectively lower the number of physical address
++ * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
++ */
++ c->x86_phys_bits -= keyid_bits;
++}
++
+ static void early_init_intel(struct cpuinfo_x86 *c)
+ {
+ u64 misc_enable;
+@@ -367,6 +451,13 @@ static void early_init_intel(struct cpuinfo_x86 *c)
+ */
+ if (detect_extended_topology_early(c) < 0)
+ detect_ht_early(c);
++
++ /*
++ * Adjust the number of physical bits early because it affects the
++ * valid bits of the MTRR mask registers.
++ */
++ if (cpu_has(c, X86_FEATURE_TME))
++ detect_tme_early(c);
+ }
+
+ static void bsp_init_intel(struct cpuinfo_x86 *c)
+@@ -527,90 +618,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
+ #endif
+ }
+
+-#define MSR_IA32_TME_ACTIVATE 0x982
+-
+-/* Helpers to access TME_ACTIVATE MSR */
+-#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
+-#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
+-
+-#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
+-#define TME_ACTIVATE_POLICY_AES_XTS_128 0
+-
+-#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
+-
+-#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
+-#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
+-
+-/* Values for mktme_status (SW only construct) */
+-#define MKTME_ENABLED 0
+-#define MKTME_DISABLED 1
+-#define MKTME_UNINITIALIZED 2
+-static int mktme_status = MKTME_UNINITIALIZED;
+-
+-static void detect_tme(struct cpuinfo_x86 *c)
+-{
+- u64 tme_activate, tme_policy, tme_crypto_algs;
+- int keyid_bits = 0, nr_keyids = 0;
+- static u64 tme_activate_cpu0 = 0;
+-
+- rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
+-
+- if (mktme_status != MKTME_UNINITIALIZED) {
+- if (tme_activate != tme_activate_cpu0) {
+- /* Broken BIOS? */
+- pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
+- pr_err_once("x86/tme: MKTME is not usable\n");
+- mktme_status = MKTME_DISABLED;
+-
+- /* Proceed. We may need to exclude bits from x86_phys_bits. */
+- }
+- } else {
+- tme_activate_cpu0 = tme_activate;
+- }
+-
+- if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
+- pr_info_once("x86/tme: not enabled by BIOS\n");
+- mktme_status = MKTME_DISABLED;
+- return;
+- }
+-
+- if (mktme_status != MKTME_UNINITIALIZED)
+- goto detect_keyid_bits;
+-
+- pr_info("x86/tme: enabled by BIOS\n");
+-
+- tme_policy = TME_ACTIVATE_POLICY(tme_activate);
+- if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
+- pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
+-
+- tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
+- if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
+- pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
+- tme_crypto_algs);
+- mktme_status = MKTME_DISABLED;
+- }
+-detect_keyid_bits:
+- keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
+- nr_keyids = (1UL << keyid_bits) - 1;
+- if (nr_keyids) {
+- pr_info_once("x86/mktme: enabled by BIOS\n");
+- pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
+- } else {
+- pr_info_once("x86/mktme: disabled by BIOS\n");
+- }
+-
+- if (mktme_status == MKTME_UNINITIALIZED) {
+- /* MKTME is usable */
+- mktme_status = MKTME_ENABLED;
+- }
+-
+- /*
+- * KeyID bits effectively lower the number of physical address
+- * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
+- */
+- c->x86_phys_bits -= keyid_bits;
+-}
+-
+ static void init_cpuid_fault(struct cpuinfo_x86 *c)
+ {
+ u64 msr;
+@@ -747,9 +754,6 @@ static void init_intel(struct cpuinfo_x86 *c)
+
+ init_ia32_feat_ctl(c);
+
+- if (cpu_has(c, X86_FEATURE_TME))
+- detect_tme(c);
+-
+ init_intel_misc_features(c);
+
+ split_lock_init();
+diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
+index 9dac24680ff8e..993734e96615a 100644
+--- a/arch/x86/kernel/e820.c
++++ b/arch/x86/kernel/e820.c
+@@ -1017,10 +1017,12 @@ void __init e820__reserve_setup_data(void)
+ e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+
+ /*
+- * SETUP_EFI and SETUP_IMA are supplied by kexec and do not need
+- * to be reserved.
++ * SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by
++ * kexec and do not need to be reserved.
+ */
+- if (data->type != SETUP_EFI && data->type != SETUP_IMA)
++ if (data->type != SETUP_EFI &&
++ data->type != SETUP_IMA &&
++ data->type != SETUP_RNG_SEED)
+ e820__range_update_kexec(pa_data,
+ sizeof(*data) + data->len,
+ E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
+index cec0bfa3bc04f..ed6cce6c39504 100644
+--- a/arch/x86/kernel/nmi.c
++++ b/arch/x86/kernel/nmi.c
+@@ -522,9 +522,6 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
+ write_cr2(this_cpu_read(nmi_cr2));
+ if (this_cpu_dec_return(nmi_state))
+ goto nmi_restart;
+-
+- if (user_mode(regs))
+- mds_user_clear_cpu_buffers();
+ }
+
+ #if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
+diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
+index edc3f16cc1896..6a9bfdfbb6e59 100644
+--- a/arch/x86/kvm/vmx/run_flags.h
++++ b/arch/x86/kvm/vmx/run_flags.h
+@@ -2,7 +2,10 @@
+ #ifndef __KVM_X86_VMX_RUN_FLAGS_H
+ #define __KVM_X86_VMX_RUN_FLAGS_H
+
+-#define VMX_RUN_VMRESUME (1 << 0)
+-#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1)
++#define VMX_RUN_VMRESUME_SHIFT 0
++#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
++
++#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
++#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
+
+ #endif /* __KVM_X86_VMX_RUN_FLAGS_H */
+diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
+index 0b5db4de4d09e..0b2cad66dee12 100644
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -106,7 +106,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ mov (%_ASM_SP), %_ASM_AX
+
+ /* Check if vmlaunch or vmresume is needed */
+- testb $VMX_RUN_VMRESUME, %bl
++ bt $VMX_RUN_VMRESUME_SHIFT, %bx
+
+ /* Load guest registers. Don't clobber flags. */
+ mov VCPU_RCX(%_ASM_AX), %_ASM_CX
+@@ -128,8 +128,11 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ /* Load guest RAX. This kills the @regs pointer! */
+ mov VCPU_RAX(%_ASM_AX), %_ASM_AX
+
+- /* Check EFLAGS.ZF from 'testb' above */
+- jz .Lvmlaunch
++ /* Clobbers EFLAGS.ZF */
++ CLEAR_CPU_BUFFERS
++
++ /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */
++ jnc .Lvmlaunch
+
+ /*
+ * After a successful VMRESUME/VMLAUNCH, control flow "magically"
+diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
+index 57c1374fdfd49..5c1590855ffcd 100644
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -407,7 +407,8 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
+
+ static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
+ {
+- vmx->disable_fb_clear = vmx_fb_clear_ctrl_available;
++ vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) &&
++ vmx_fb_clear_ctrl_available;
+
+ /*
+ * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
+@@ -7120,11 +7121,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
+ {
+ guest_state_enter_irqoff();
+
+- /* L1D Flush includes CPU buffer clear to mitigate MDS */
++ /*
++ * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW
++ * mitigation for MDS is done late in VMentry and is still
++ * executed in spite of L1D Flush. This is because an extra VERW
++ * should not matter much after the big hammer L1D Flush.
++ */
+ if (static_branch_unlikely(&vmx_l1d_should_flush))
+ vmx_l1d_flush(vcpu);
+- else if (static_branch_unlikely(&mds_user_clear))
+- mds_clear_cpu_buffers();
+ else if (static_branch_unlikely(&mmio_stale_data_clear) &&
+ kvm_arch_has_assigned_device(vcpu->kvm))
+ mds_clear_cpu_buffers();
+diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
+index c9064d34d8308..0211f704a358b 100644
+--- a/drivers/bluetooth/btqca.c
++++ b/drivers/bluetooth/btqca.c
+@@ -152,7 +152,7 @@ static int qca_send_patch_config_cmd(struct hci_dev *hdev)
+ bt_dev_dbg(hdev, "QCA Patch config");
+
+ skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, sizeof(cmd),
+- cmd, HCI_EV_VENDOR, HCI_INIT_TIMEOUT);
++ cmd, 0, HCI_INIT_TIMEOUT);
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ bt_dev_err(hdev, "Sending QCA Patch config failed (%d)", err);
+@@ -594,27 +594,48 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
+ /* Firmware files to download are based on ROM version.
+ * ROM version is derived from last two bytes of soc_ver.
+ */
+- rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f);
++ if (soc_type == QCA_WCN3988)
++ rom_ver = ((soc_ver & 0x00000f00) >> 0x05) | (soc_ver & 0x0000000f);
++ else
++ rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f);
+
+ if (soc_type == QCA_WCN6750)
+ qca_send_patch_config_cmd(hdev);
+
+ /* Download rampatch file */
+ config.type = TLV_TYPE_PATCH;
+- if (qca_is_wcn399x(soc_type)) {
++ switch (soc_type) {
++ case QCA_WCN3990:
++ case QCA_WCN3991:
++ case QCA_WCN3998:
+ snprintf(config.fwname, sizeof(config.fwname),
+ "qca/crbtfw%02x.tlv", rom_ver);
+- } else if (soc_type == QCA_QCA6390) {
++ break;
++ case QCA_WCN3988:
++ snprintf(config.fwname, sizeof(config.fwname),
++ "qca/apbtfw%02x.tlv", rom_ver);
++ break;
++ case QCA_QCA6390:
+ snprintf(config.fwname, sizeof(config.fwname),
+ "qca/htbtfw%02x.tlv", rom_ver);
+- } else if (soc_type == QCA_WCN6750) {
++ break;
++ case QCA_WCN6750:
+ /* Choose mbn file by default.If mbn file is not found
+ * then choose tlv file
+ */
+ config.type = ELF_TYPE_PATCH;
+ snprintf(config.fwname, sizeof(config.fwname),
+ "qca/msbtfw%02x.mbn", rom_ver);
+- } else {
++ break;
++ case QCA_WCN6855:
++ snprintf(config.fwname, sizeof(config.fwname),
++ "qca/hpbtfw%02x.tlv", rom_ver);
++ break;
++ case QCA_WCN7850:
++ snprintf(config.fwname, sizeof(config.fwname),
++ "qca/hmtbtfw%02x.tlv", rom_ver);
++ break;
++ default:
+ snprintf(config.fwname, sizeof(config.fwname),
+ "qca/rampatch_%08x.bin", soc_ver);
+ }
+@@ -630,27 +651,48 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
+
+ /* Download NVM configuration */
+ config.type = TLV_TYPE_NVM;
+- if (firmware_name)
++ if (firmware_name) {
+ snprintf(config.fwname, sizeof(config.fwname),
+ "qca/%s", firmware_name);
+- else if (qca_is_wcn399x(soc_type)) {
+- if (ver.soc_id == QCA_WCN3991_SOC_ID) {
++ } else {
++ switch (soc_type) {
++ case QCA_WCN3990:
++ case QCA_WCN3991:
++ case QCA_WCN3998:
++ if (le32_to_cpu(ver.soc_id) == QCA_WCN3991_SOC_ID) {
++ snprintf(config.fwname, sizeof(config.fwname),
++ "qca/crnv%02xu.bin", rom_ver);
++ } else {
++ snprintf(config.fwname, sizeof(config.fwname),
++ "qca/crnv%02x.bin", rom_ver);
++ }
++ break;
++ case QCA_WCN3988:
+ snprintf(config.fwname, sizeof(config.fwname),
+- "qca/crnv%02xu.bin", rom_ver);
+- } else {
++ "qca/apnv%02x.bin", rom_ver);
++ break;
++ case QCA_QCA6390:
++ snprintf(config.fwname, sizeof(config.fwname),
++ "qca/htnv%02x.bin", rom_ver);
++ break;
++ case QCA_WCN6750:
+ snprintf(config.fwname, sizeof(config.fwname),
+- "qca/crnv%02x.bin", rom_ver);
++ "qca/msnv%02x.bin", rom_ver);
++ break;
++ case QCA_WCN6855:
++ snprintf(config.fwname, sizeof(config.fwname),
++ "qca/hpnv%02x.bin", rom_ver);
++ break;
++ case QCA_WCN7850:
++ snprintf(config.fwname, sizeof(config.fwname),
++ "qca/hmtnv%02x.bin", rom_ver);
++ break;
++
++ default:
++ snprintf(config.fwname, sizeof(config.fwname),
++ "qca/nvm_%08x.bin", soc_ver);
+ }
+ }
+- else if (soc_type == QCA_QCA6390)
+- snprintf(config.fwname, sizeof(config.fwname),
+- "qca/htnv%02x.bin", rom_ver);
+- else if (soc_type == QCA_WCN6750)
+- snprintf(config.fwname, sizeof(config.fwname),
+- "qca/msnv%02x.bin", rom_ver);
+- else
+- snprintf(config.fwname, sizeof(config.fwname),
+- "qca/nvm_%08x.bin", soc_ver);
+
+ err = qca_download_firmware(hdev, &config, soc_type, rom_ver);
+ if (err < 0) {
+@@ -658,16 +700,25 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
+ return err;
+ }
+
+- if (soc_type >= QCA_WCN3991) {
++ switch (soc_type) {
++ case QCA_WCN3991:
++ case QCA_QCA6390:
++ case QCA_WCN6750:
++ case QCA_WCN6855:
++ case QCA_WCN7850:
+ err = qca_disable_soc_logging(hdev);
+ if (err < 0)
+ return err;
++ break;
++ default:
++ break;
+ }
+
+ /* WCN399x and WCN6750 supports the Microsoft vendor extension with 0xFD70 as the
+ * VsMsftOpCode.
+ */
+ switch (soc_type) {
++ case QCA_WCN3988:
+ case QCA_WCN3990:
+ case QCA_WCN3991:
+ case QCA_WCN3998:
+@@ -685,11 +736,18 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
+ return err;
+ }
+
+- if (soc_type == QCA_WCN3991 || soc_type == QCA_WCN6750) {
++ switch (soc_type) {
++ case QCA_WCN3991:
++ case QCA_WCN6750:
++ case QCA_WCN6855:
++ case QCA_WCN7850:
+ /* get fw build info */
+ err = qca_read_fw_build_info(hdev);
+ if (err < 0)
+ return err;
++ break;
++ default:
++ break;
+ }
+
+ bt_dev_info(hdev, "QCA setup on UART is completed");
+diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h
+index 61e9a50e66ae1..03bff5c0059de 100644
+--- a/drivers/bluetooth/btqca.h
++++ b/drivers/bluetooth/btqca.h
+@@ -142,11 +142,14 @@ enum qca_btsoc_type {
+ QCA_INVALID = -1,
+ QCA_AR3002,
+ QCA_ROME,
++ QCA_WCN3988,
+ QCA_WCN3990,
+ QCA_WCN3998,
+ QCA_WCN3991,
+ QCA_QCA6390,
+ QCA_WCN6750,
++ QCA_WCN6855,
++ QCA_WCN7850,
+ };
+
+ #if IS_ENABLED(CONFIG_BT_QCA)
+@@ -159,16 +162,6 @@ int qca_read_soc_version(struct hci_dev *hdev, struct qca_btsoc_version *ver,
+ enum qca_btsoc_type);
+ int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr);
+ int qca_send_pre_shutdown_cmd(struct hci_dev *hdev);
+-static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type)
+-{
+- return soc_type == QCA_WCN3990 || soc_type == QCA_WCN3991 ||
+- soc_type == QCA_WCN3998;
+-}
+-static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type)
+-{
+- return soc_type == QCA_WCN6750;
+-}
+-
+ #else
+
+ static inline int qca_set_bdaddr_rome(struct hci_dev *hdev, const bdaddr_t *bdaddr)
+@@ -196,16 +189,6 @@ static inline int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
+ return -EOPNOTSUPP;
+ }
+
+-static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type)
+-{
+- return false;
+-}
+-
+-static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type)
+-{
+- return false;
+-}
+-
+ static inline int qca_send_pre_shutdown_cmd(struct hci_dev *hdev)
+ {
+ return -EOPNOTSUPP;
+diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
+index 76ceb8a0183d1..8bfef7f81b417 100644
+--- a/drivers/bluetooth/hci_qca.c
++++ b/drivers/bluetooth/hci_qca.c
+@@ -7,6 +7,7 @@
+ *
+ * Copyright (C) 2007 Texas Instruments, Inc.
+ * Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved.
++ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ *
+ * Acknowledgements:
+ * This file is based on hci_ll.c, which was...
+@@ -606,9 +607,18 @@ static int qca_open(struct hci_uart *hu)
+ if (hu->serdev) {
+ qcadev = serdev_device_get_drvdata(hu->serdev);
+
+- if (qca_is_wcn399x(qcadev->btsoc_type) ||
+- qca_is_wcn6750(qcadev->btsoc_type))
++ switch (qcadev->btsoc_type) {
++ case QCA_WCN3988:
++ case QCA_WCN3990:
++ case QCA_WCN3991:
++ case QCA_WCN3998:
++ case QCA_WCN6750:
+ hu->init_speed = qcadev->init_speed;
++ break;
++
++ default:
++ break;
++ }
+
+ if (qcadev->oper_speed)
+ hu->oper_speed = qcadev->oper_speed;
+@@ -1314,11 +1324,20 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate)
+ msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS));
+
+ /* Give the controller time to process the request */
+- if (qca_is_wcn399x(qca_soc_type(hu)) ||
+- qca_is_wcn6750(qca_soc_type(hu)))
++ switch (qca_soc_type(hu)) {
++ case QCA_WCN3988:
++ case QCA_WCN3990:
++ case QCA_WCN3991:
++ case QCA_WCN3998:
++ case QCA_WCN6750:
++ case QCA_WCN6855:
++ case QCA_WCN7850:
+ usleep_range(1000, 10000);
+- else
++ break;
++
++ default:
+ msleep(300);
++ }
+
+ return 0;
+ }
+@@ -1391,12 +1410,20 @@ static unsigned int qca_get_speed(struct hci_uart *hu,
+
+ static int qca_check_speeds(struct hci_uart *hu)
+ {
+- if (qca_is_wcn399x(qca_soc_type(hu)) ||
+- qca_is_wcn6750(qca_soc_type(hu))) {
++ switch (qca_soc_type(hu)) {
++ case QCA_WCN3988:
++ case QCA_WCN3990:
++ case QCA_WCN3991:
++ case QCA_WCN3998:
++ case QCA_WCN6750:
++ case QCA_WCN6855:
++ case QCA_WCN7850:
+ if (!qca_get_speed(hu, QCA_INIT_SPEED) &&
+ !qca_get_speed(hu, QCA_OPER_SPEED))
+ return -EINVAL;
+- } else {
++ break;
++
++ default:
+ if (!qca_get_speed(hu, QCA_INIT_SPEED) ||
+ !qca_get_speed(hu, QCA_OPER_SPEED))
+ return -EINVAL;
+@@ -1425,13 +1452,29 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type)
+ /* Disable flow control for wcn3990 to deassert RTS while
+ * changing the baudrate of chip and host.
+ */
+- if (qca_is_wcn399x(soc_type) ||
+- qca_is_wcn6750(soc_type))
++ switch (soc_type) {
++ case QCA_WCN3988:
++ case QCA_WCN3990:
++ case QCA_WCN3991:
++ case QCA_WCN3998:
++ case QCA_WCN6750:
++ case QCA_WCN6855:
++ case QCA_WCN7850:
+ hci_uart_set_flow_control(hu, true);
++ break;
+
+- if (soc_type == QCA_WCN3990) {
++ default:
++ break;
++ }
++
++ switch (soc_type) {
++ case QCA_WCN3990:
+ reinit_completion(&qca->drop_ev_comp);
+ set_bit(QCA_DROP_VENDOR_EVENT, &qca->flags);
++ break;
++
++ default:
++ break;
+ }
+
+ qca_baudrate = qca_get_baudrate_value(speed);
+@@ -1443,11 +1486,23 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type)
+ host_set_baudrate(hu, speed);
+
+ error:
+- if (qca_is_wcn399x(soc_type) ||
+- qca_is_wcn6750(soc_type))
++ switch (soc_type) {
++ case QCA_WCN3988:
++ case QCA_WCN3990:
++ case QCA_WCN3991:
++ case QCA_WCN3998:
++ case QCA_WCN6750:
++ case QCA_WCN6855:
++ case QCA_WCN7850:
+ hci_uart_set_flow_control(hu, false);
++ break;
+
+- if (soc_type == QCA_WCN3990) {
++ default:
++ break;
++ }
++
++ switch (soc_type) {
++ case QCA_WCN3990:
+ /* Wait for the controller to send the vendor event
+ * for the baudrate change command.
+ */
+@@ -1459,6 +1514,10 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type)
+ }
+
+ clear_bit(QCA_DROP_VENDOR_EVENT, &qca->flags);
++ break;
++
++ default:
++ break;
+ }
+ }
+
+@@ -1620,12 +1679,20 @@ static int qca_regulator_init(struct hci_uart *hu)
+ }
+ }
+
+- if (qca_is_wcn399x(soc_type)) {
++ switch (soc_type) {
++ case QCA_WCN3988:
++ case QCA_WCN3990:
++ case QCA_WCN3991:
++ case QCA_WCN3998:
+ /* Forcefully enable wcn399x to enter in to boot mode. */
+ host_set_baudrate(hu, 2400);
+ ret = qca_send_power_pulse(hu, false);
+ if (ret)
+ return ret;
++ break;
++
++ default:
++ break;
+ }
+
+ /* For wcn6750 need to enable gpio bt_en */
+@@ -1642,10 +1709,18 @@ static int qca_regulator_init(struct hci_uart *hu)
+
+ qca_set_speed(hu, QCA_INIT_SPEED);
+
+- if (qca_is_wcn399x(soc_type)) {
++ switch (soc_type) {
++ case QCA_WCN3988:
++ case QCA_WCN3990:
++ case QCA_WCN3991:
++ case QCA_WCN3998:
+ ret = qca_send_power_pulse(hu, true);
+ if (ret)
+ return ret;
++ break;
++
++ default:
++ break;
+ }
+
+ /* Now the device is in ready state to communicate with host.
+@@ -1679,10 +1754,18 @@ static int qca_power_on(struct hci_dev *hdev)
+ if (!hu->serdev)
+ return 0;
+
+- if (qca_is_wcn399x(soc_type) ||
+- qca_is_wcn6750(soc_type)) {
++ switch (soc_type) {
++ case QCA_WCN3988:
++ case QCA_WCN3990:
++ case QCA_WCN3991:
++ case QCA_WCN3998:
++ case QCA_WCN6750:
++ case QCA_WCN6855:
++ case QCA_WCN7850:
+ ret = qca_regulator_init(hu);
+- } else {
++ break;
++
++ default:
+ qcadev = serdev_device_get_drvdata(hu->serdev);
+ if (qcadev->bt_en) {
+ gpiod_set_value_cansleep(qcadev->bt_en, 1);
+@@ -1705,6 +1788,7 @@ static int qca_setup(struct hci_uart *hu)
+ const char *firmware_name = qca_get_firmware_name(hu);
+ int ret;
+ struct qca_btsoc_version ver;
++ const char *soc_name;
+
+ ret = qca_check_speeds(hu);
+ if (ret)
+@@ -1719,9 +1803,30 @@ static int qca_setup(struct hci_uart *hu)
+ */
+ set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
+
+- bt_dev_info(hdev, "setting up %s",
+- qca_is_wcn399x(soc_type) ? "wcn399x" :
+- (soc_type == QCA_WCN6750) ? "wcn6750" : "ROME/QCA6390");
++ switch (soc_type) {
++ case QCA_WCN3988:
++ case QCA_WCN3990:
++ case QCA_WCN3991:
++ case QCA_WCN3998:
++ soc_name = "wcn399x";
++ break;
++
++ case QCA_WCN6750:
++ soc_name = "wcn6750";
++ break;
++
++ case QCA_WCN6855:
++ soc_name = "wcn6855";
++ break;
++
++ case QCA_WCN7850:
++ soc_name = "wcn7850";
++ break;
++
++ default:
++ soc_name = "ROME/QCA6390";
++ }
++ bt_dev_info(hdev, "setting up %s", soc_name);
+
+ qca->memdump_state = QCA_MEMDUMP_IDLE;
+
+@@ -1732,15 +1837,33 @@ static int qca_setup(struct hci_uart *hu)
+
+ clear_bit(QCA_SSR_TRIGGERED, &qca->flags);
+
+- if (qca_is_wcn399x(soc_type) ||
+- qca_is_wcn6750(soc_type)) {
+- set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
++ switch (soc_type) {
++ case QCA_WCN3988:
++ case QCA_WCN3990:
++ case QCA_WCN3991:
++ case QCA_WCN3998:
++ case QCA_WCN6750:
++ case QCA_WCN6855:
++ case QCA_WCN7850:
++
++ /* Set BDA quirk bit for reading BDA value from fwnode property
++ * only if that property exist in DT.
++ */
++ if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) {
++ set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
++ bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later");
++ } else {
++ bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA");
++ }
++
+ hci_set_aosp_capable(hdev);
+
+ ret = qca_read_soc_version(hdev, &ver, soc_type);
+ if (ret)
+ goto out;
+- } else {
++ break;
++
++ default:
+ qca_set_speed(hu, QCA_INIT_SPEED);
+ }
+
+@@ -1754,8 +1877,17 @@ static int qca_setup(struct hci_uart *hu)
+ qca_baudrate = qca_get_baudrate_value(speed);
+ }
+
+- if (!(qca_is_wcn399x(soc_type) ||
+- qca_is_wcn6750(soc_type))) {
++ switch (soc_type) {
++ case QCA_WCN3988:
++ case QCA_WCN3990:
++ case QCA_WCN3991:
++ case QCA_WCN3998:
++ case QCA_WCN6750:
++ case QCA_WCN6855:
++ case QCA_WCN7850:
++ break;
++
++ default:
+ /* Get QCA version information */
+ ret = qca_read_soc_version(hdev, &ver, soc_type);
+ if (ret)
+@@ -1824,7 +1956,18 @@ static const struct hci_uart_proto qca_proto = {
+ .dequeue = qca_dequeue,
+ };
+
+-static const struct qca_device_data qca_soc_data_wcn3990 = {
++static const struct qca_device_data qca_soc_data_wcn3988 __maybe_unused = {
++ .soc_type = QCA_WCN3988,
++ .vregs = (struct qca_vreg []) {
++ { "vddio", 15000 },
++ { "vddxo", 80000 },
++ { "vddrf", 300000 },
++ { "vddch0", 450000 },
++ },
++ .num_vregs = 4,
++};
++
++static const struct qca_device_data qca_soc_data_wcn3990 __maybe_unused = {
+ .soc_type = QCA_WCN3990,
+ .vregs = (struct qca_vreg []) {
+ { "vddio", 15000 },
+@@ -1835,7 +1978,7 @@ static const struct qca_device_data qca_soc_data_wcn3990 = {
+ .num_vregs = 4,
+ };
+
+-static const struct qca_device_data qca_soc_data_wcn3991 = {
++static const struct qca_device_data qca_soc_data_wcn3991 __maybe_unused = {
+ .soc_type = QCA_WCN3991,
+ .vregs = (struct qca_vreg []) {
+ { "vddio", 15000 },
+@@ -1847,7 +1990,7 @@ static const struct qca_device_data qca_soc_data_wcn3991 = {
+ .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES,
+ };
+
+-static const struct qca_device_data qca_soc_data_wcn3998 = {
++static const struct qca_device_data qca_soc_data_wcn3998 __maybe_unused = {
+ .soc_type = QCA_WCN3998,
+ .vregs = (struct qca_vreg []) {
+ { "vddio", 10000 },
+@@ -1858,13 +2001,13 @@ static const struct qca_device_data qca_soc_data_wcn3998 = {
+ .num_vregs = 4,
+ };
+
+-static const struct qca_device_data qca_soc_data_qca6390 = {
++static const struct qca_device_data qca_soc_data_qca6390 __maybe_unused = {
+ .soc_type = QCA_QCA6390,
+ .num_vregs = 0,
+ .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES,
+ };
+
+-static const struct qca_device_data qca_soc_data_wcn6750 = {
++static const struct qca_device_data qca_soc_data_wcn6750 __maybe_unused = {
+ .soc_type = QCA_WCN6750,
+ .vregs = (struct qca_vreg []) {
+ { "vddio", 5000 },
+@@ -1881,6 +2024,34 @@ static const struct qca_device_data qca_soc_data_wcn6750 = {
+ .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES,
+ };
+
++static const struct qca_device_data qca_soc_data_wcn6855 = {
++ .soc_type = QCA_WCN6855,
++ .vregs = (struct qca_vreg []) {
++ { "vddio", 5000 },
++ { "vddbtcxmx", 126000 },
++ { "vddrfacmn", 12500 },
++ { "vddrfa0p8", 102000 },
++ { "vddrfa1p7", 302000 },
++ { "vddrfa1p2", 257000 },
++ },
++ .num_vregs = 6,
++ .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES,
++};
++
++static const struct qca_device_data qca_soc_data_wcn7850 __maybe_unused = {
++ .soc_type = QCA_WCN7850,
++ .vregs = (struct qca_vreg []) {
++ { "vddio", 5000 },
++ { "vddaon", 26000 },
++ { "vdddig", 126000 },
++ { "vddrfa0p8", 102000 },
++ { "vddrfa1p2", 257000 },
++ { "vddrfa1p9", 302000 },
++ },
++ .num_vregs = 6,
++ .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES,
++};
++
+ static void qca_power_shutdown(struct hci_uart *hu)
+ {
+ struct qca_serdev *qcadev;
+@@ -1906,11 +2077,18 @@ static void qca_power_shutdown(struct hci_uart *hu)
+
+ qcadev = serdev_device_get_drvdata(hu->serdev);
+
+- if (qca_is_wcn399x(soc_type)) {
++ switch (soc_type) {
++ case QCA_WCN3988:
++ case QCA_WCN3990:
++ case QCA_WCN3991:
++ case QCA_WCN3998:
+ host_set_baudrate(hu, 2400);
+ qca_send_power_pulse(hu, false);
+ qca_regulator_disable(qcadev);
+- } else if (soc_type == QCA_WCN6750) {
++ break;
++
++ case QCA_WCN6750:
++ case QCA_WCN6855:
+ gpiod_set_value_cansleep(qcadev->bt_en, 0);
+ msleep(100);
+ qca_regulator_disable(qcadev);
+@@ -1918,7 +2096,9 @@ static void qca_power_shutdown(struct hci_uart *hu)
+ sw_ctrl_state = gpiod_get_value_cansleep(qcadev->sw_ctrl);
+ bt_dev_dbg(hu->hdev, "SW_CTRL is %d", sw_ctrl_state);
+ }
+- } else if (qcadev->bt_en) {
++ break;
++
++ default:
+ gpiod_set_value_cansleep(qcadev->bt_en, 0);
+ }
+
+@@ -2043,10 +2223,19 @@ static int qca_serdev_probe(struct serdev_device *serdev)
+ if (!qcadev->oper_speed)
+ BT_DBG("UART will pick default operating speed");
+
+- if (data &&
+- (qca_is_wcn399x(data->soc_type) ||
+- qca_is_wcn6750(data->soc_type))) {
++ if (data)
+ qcadev->btsoc_type = data->soc_type;
++ else
++ qcadev->btsoc_type = QCA_ROME;
++
++ switch (qcadev->btsoc_type) {
++ case QCA_WCN3988:
++ case QCA_WCN3990:
++ case QCA_WCN3991:
++ case QCA_WCN3998:
++ case QCA_WCN6750:
++ case QCA_WCN6855:
++ case QCA_WCN7850:
+ qcadev->bt_power = devm_kzalloc(&serdev->dev,
+ sizeof(struct qca_power),
+ GFP_KERNEL);
+@@ -2065,14 +2254,19 @@ static int qca_serdev_probe(struct serdev_device *serdev)
+
+ qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable",
+ GPIOD_OUT_LOW);
+- if (IS_ERR_OR_NULL(qcadev->bt_en) && data->soc_type == QCA_WCN6750) {
++ if (IS_ERR_OR_NULL(qcadev->bt_en) &&
++ (data->soc_type == QCA_WCN6750 ||
++ data->soc_type == QCA_WCN6855)) {
+ dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n");
+ power_ctrl_enabled = false;
+ }
+
+ qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl",
+ GPIOD_IN);
+- if (IS_ERR_OR_NULL(qcadev->sw_ctrl) && data->soc_type == QCA_WCN6750)
++ if (IS_ERR_OR_NULL(qcadev->sw_ctrl) &&
++ (data->soc_type == QCA_WCN6750 ||
++ data->soc_type == QCA_WCN6855 ||
++ data->soc_type == QCA_WCN7850))
+ dev_warn(&serdev->dev, "failed to acquire SW_CTRL gpio\n");
+
+ qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL);
+@@ -2086,12 +2280,9 @@ static int qca_serdev_probe(struct serdev_device *serdev)
+ BT_ERR("wcn3990 serdev registration failed");
+ return err;
+ }
+- } else {
+- if (data)
+- qcadev->btsoc_type = data->soc_type;
+- else
+- qcadev->btsoc_type = QCA_ROME;
++ break;
+
++ default:
+ qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable",
+ GPIOD_OUT_LOW);
+ if (IS_ERR_OR_NULL(qcadev->bt_en)) {
+@@ -2147,12 +2338,24 @@ static void qca_serdev_remove(struct serdev_device *serdev)
+ struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev);
+ struct qca_power *power = qcadev->bt_power;
+
+- if ((qca_is_wcn399x(qcadev->btsoc_type) ||
+- qca_is_wcn6750(qcadev->btsoc_type)) &&
+- power->vregs_on)
+- qca_power_shutdown(&qcadev->serdev_hu);
+- else if (qcadev->susclk)
+- clk_disable_unprepare(qcadev->susclk);
++ switch (qcadev->btsoc_type) {
++ case QCA_WCN3988:
++ case QCA_WCN3990:
++ case QCA_WCN3991:
++ case QCA_WCN3998:
++ case QCA_WCN6750:
++ case QCA_WCN6855:
++ case QCA_WCN7850:
++ if (power->vregs_on) {
++ qca_power_shutdown(&qcadev->serdev_hu);
++ break;
++ }
++ fallthrough;
++
++ default:
++ if (qcadev->susclk)
++ clk_disable_unprepare(qcadev->susclk);
++ }
+
+ hci_uart_unregister_device(&qcadev->serdev_hu);
+ }
+@@ -2329,10 +2532,13 @@ static const struct of_device_id qca_bluetooth_of_match[] = {
+ { .compatible = "qcom,qca6174-bt" },
+ { .compatible = "qcom,qca6390-bt", .data = &qca_soc_data_qca6390},
+ { .compatible = "qcom,qca9377-bt" },
++ { .compatible = "qcom,wcn3988-bt", .data = &qca_soc_data_wcn3988},
+ { .compatible = "qcom,wcn3990-bt", .data = &qca_soc_data_wcn3990},
+ { .compatible = "qcom,wcn3991-bt", .data = &qca_soc_data_wcn3991},
+ { .compatible = "qcom,wcn3998-bt", .data = &qca_soc_data_wcn3998},
+ { .compatible = "qcom,wcn6750-bt", .data = &qca_soc_data_wcn6750},
++ { .compatible = "qcom,wcn6855-bt", .data = &qca_soc_data_wcn6855},
++ { .compatible = "qcom,wcn7850-bt", .data = &qca_soc_data_wcn7850},
+ { /* sentinel */ }
+ };
+ MODULE_DEVICE_TABLE(of, qca_bluetooth_of_match);
+diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c
+index 422d782475532..dcacc5064d339 100644
+--- a/drivers/clk/tegra/clk-tegra20.c
++++ b/drivers/clk/tegra/clk-tegra20.c
+@@ -21,24 +21,24 @@
+ #define MISC_CLK_ENB 0x48
+
+ #define OSC_CTRL 0x50
+-#define OSC_CTRL_OSC_FREQ_MASK (3<<30)
+-#define OSC_CTRL_OSC_FREQ_13MHZ (0<<30)
+-#define OSC_CTRL_OSC_FREQ_19_2MHZ (1<<30)
+-#define OSC_CTRL_OSC_FREQ_12MHZ (2<<30)
+-#define OSC_CTRL_OSC_FREQ_26MHZ (3<<30)
+-#define OSC_CTRL_MASK (0x3f2 | OSC_CTRL_OSC_FREQ_MASK)
+-
+-#define OSC_CTRL_PLL_REF_DIV_MASK (3<<28)
+-#define OSC_CTRL_PLL_REF_DIV_1 (0<<28)
+-#define OSC_CTRL_PLL_REF_DIV_2 (1<<28)
+-#define OSC_CTRL_PLL_REF_DIV_4 (2<<28)
++#define OSC_CTRL_OSC_FREQ_MASK (3u<<30)
++#define OSC_CTRL_OSC_FREQ_13MHZ (0u<<30)
++#define OSC_CTRL_OSC_FREQ_19_2MHZ (1u<<30)
++#define OSC_CTRL_OSC_FREQ_12MHZ (2u<<30)
++#define OSC_CTRL_OSC_FREQ_26MHZ (3u<<30)
++#define OSC_CTRL_MASK (0x3f2u | OSC_CTRL_OSC_FREQ_MASK)
++
++#define OSC_CTRL_PLL_REF_DIV_MASK (3u<<28)
++#define OSC_CTRL_PLL_REF_DIV_1 (0u<<28)
++#define OSC_CTRL_PLL_REF_DIV_2 (1u<<28)
++#define OSC_CTRL_PLL_REF_DIV_4 (2u<<28)
+
+ #define OSC_FREQ_DET 0x58
+-#define OSC_FREQ_DET_TRIG (1<<31)
++#define OSC_FREQ_DET_TRIG (1u<<31)
+
+ #define OSC_FREQ_DET_STATUS 0x5c
+-#define OSC_FREQ_DET_BUSY (1<<31)
+-#define OSC_FREQ_DET_CNT_MASK 0xFFFF
++#define OSC_FREQ_DET_BUSYu (1<<31)
++#define OSC_FREQ_DET_CNT_MASK 0xFFFFu
+
+ #define TEGRA20_CLK_PERIPH_BANKS 3
+
+diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
+index abdd26f7d04c9..5771f3fc6115d 100644
+--- a/drivers/cpufreq/intel_pstate.c
++++ b/drivers/cpufreq/intel_pstate.c
+@@ -2952,6 +2952,9 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum,
+ if (min_pstate < cpu->min_perf_ratio)
+ min_pstate = cpu->min_perf_ratio;
+
++ if (min_pstate > cpu->max_perf_ratio)
++ min_pstate = cpu->max_perf_ratio;
++
+ max_pstate = min(cap_pstate, cpu->max_perf_ratio);
+ if (max_pstate < min_pstate)
+ max_pstate = min_pstate;
+diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c
+index f383f219ed008..7082a5a6814a4 100644
+--- a/drivers/dma/fsl-qdma.c
++++ b/drivers/dma/fsl-qdma.c
+@@ -109,6 +109,7 @@
+ #define FSL_QDMA_CMD_WTHROTL_OFFSET 20
+ #define FSL_QDMA_CMD_DSEN_OFFSET 19
+ #define FSL_QDMA_CMD_LWC_OFFSET 16
++#define FSL_QDMA_CMD_PF BIT(17)
+
+ /* Field definition for Descriptor status */
+ #define QDMA_CCDF_STATUS_RTE BIT(5)
+@@ -384,7 +385,8 @@ static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp,
+ qdma_csgf_set_f(csgf_dest, len);
+ /* Descriptor Buffer */
+ cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE <<
+- FSL_QDMA_CMD_RWTTYPE_OFFSET);
++ FSL_QDMA_CMD_RWTTYPE_OFFSET) |
++ FSL_QDMA_CMD_PF;
+ sdf->data = QDMA_SDDF_CMD(cmd);
+
+ cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE <<
+@@ -1201,10 +1203,6 @@ static int fsl_qdma_probe(struct platform_device *pdev)
+ if (!fsl_qdma->queue)
+ return -ENOMEM;
+
+- ret = fsl_qdma_irq_init(pdev, fsl_qdma);
+- if (ret)
+- return ret;
+-
+ fsl_qdma->irq_base = platform_get_irq_byname(pdev, "qdma-queue0");
+ if (fsl_qdma->irq_base < 0)
+ return fsl_qdma->irq_base;
+@@ -1243,16 +1241,19 @@ static int fsl_qdma_probe(struct platform_device *pdev)
+
+ platform_set_drvdata(pdev, fsl_qdma);
+
+- ret = dma_async_device_register(&fsl_qdma->dma_dev);
++ ret = fsl_qdma_reg_init(fsl_qdma);
+ if (ret) {
+- dev_err(&pdev->dev,
+- "Can't register NXP Layerscape qDMA engine.\n");
++ dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n");
+ return ret;
+ }
+
+- ret = fsl_qdma_reg_init(fsl_qdma);
++ ret = fsl_qdma_irq_init(pdev, fsl_qdma);
++ if (ret)
++ return ret;
++
++ ret = dma_async_device_register(&fsl_qdma->dma_dev);
+ if (ret) {
+- dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n");
++ dev_err(&pdev->dev, "Can't register NXP Layerscape qDMA engine.\n");
+ return ret;
+ }
+
+diff --git a/drivers/dma/ptdma/ptdma-dmaengine.c b/drivers/dma/ptdma/ptdma-dmaengine.c
+index 1aa65e5de0f3a..f792407348077 100644
+--- a/drivers/dma/ptdma/ptdma-dmaengine.c
++++ b/drivers/dma/ptdma/ptdma-dmaengine.c
+@@ -385,8 +385,6 @@ int pt_dmaengine_register(struct pt_device *pt)
+ chan->vc.desc_free = pt_do_cleanup;
+ vchan_init(&chan->vc, dma_dev);
+
+- dma_set_mask_and_coherent(pt->dev, DMA_BIT_MASK(64));
+-
+ ret = dma_async_device_register(dma_dev);
+ if (ret)
+ goto err_reg;
+diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
+index 3e8d4b51a8140..97bafb5f70389 100644
+--- a/drivers/firmware/efi/capsule-loader.c
++++ b/drivers/firmware/efi/capsule-loader.c
+@@ -292,7 +292,7 @@ static int efi_capsule_open(struct inode *inode, struct file *file)
+ return -ENOMEM;
+ }
+
+- cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL);
++ cap_info->phys = kzalloc(sizeof(phys_addr_t), GFP_KERNEL);
+ if (!cap_info->phys) {
+ kfree(cap_info->pages);
+ kfree(cap_info);
+diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
+index b7c0e8cc0764f..9077353d1c98d 100644
+--- a/drivers/firmware/efi/efi.c
++++ b/drivers/firmware/efi/efi.c
+@@ -185,8 +185,27 @@ static const struct attribute_group efi_subsys_attr_group = {
+ static struct efivars generic_efivars;
+ static struct efivar_operations generic_ops;
+
++static bool generic_ops_supported(void)
++{
++ unsigned long name_size;
++ efi_status_t status;
++ efi_char16_t name;
++ efi_guid_t guid;
++
++ name_size = sizeof(name);
++
++ status = efi.get_next_variable(&name_size, &name, &guid);
++ if (status == EFI_UNSUPPORTED)
++ return false;
++
++ return true;
++}
++
+ static int generic_ops_register(void)
+ {
++ if (!generic_ops_supported())
++ return 0;
++
+ generic_ops.get_variable = efi.get_variable;
+ generic_ops.get_next_variable = efi.get_next_variable;
+ generic_ops.query_variable_store = efi_query_variable_store;
+@@ -200,6 +219,9 @@ static int generic_ops_register(void)
+
+ static void generic_ops_unregister(void)
+ {
++ if (!generic_ops.get_variable)
++ return;
++
+ efivars_unregister(&generic_efivars);
+ }
+
+diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
+index b6e1dcb98a64c..473ef18421db0 100644
+--- a/drivers/firmware/efi/libstub/Makefile
++++ b/drivers/firmware/efi/libstub/Makefile
+@@ -84,6 +84,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o string.o intrinsics.o systable.o
+ lib-$(CONFIG_ARM) += arm32-stub.o
+ lib-$(CONFIG_ARM64) += arm64-stub.o smbios.o
+ lib-$(CONFIG_X86) += x86-stub.o
++lib-$(CONFIG_X86_64) += x86-5lvl.o
+ lib-$(CONFIG_RISCV) += riscv-stub.o
+ lib-$(CONFIG_LOONGARCH) += loongarch-stub.o
+
+diff --git a/drivers/firmware/efi/libstub/alignedmem.c b/drivers/firmware/efi/libstub/alignedmem.c
+index 1de9878ddd3a2..6b83c492c3b82 100644
+--- a/drivers/firmware/efi/libstub/alignedmem.c
++++ b/drivers/firmware/efi/libstub/alignedmem.c
+@@ -22,12 +22,15 @@
+ * Return: status code
+ */
+ efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr,
+- unsigned long max, unsigned long align)
++ unsigned long max, unsigned long align,
++ int memory_type)
+ {
+ efi_physical_addr_t alloc_addr;
+ efi_status_t status;
+ int slack;
+
++ max = min(max, EFI_ALLOC_LIMIT);
++
+ if (align < EFI_ALLOC_ALIGN)
+ align = EFI_ALLOC_ALIGN;
+
+@@ -36,7 +39,7 @@ efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr,
+ slack = align / EFI_PAGE_SIZE - 1;
+
+ status = efi_bs_call(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS,
+- EFI_LOADER_DATA, size / EFI_PAGE_SIZE + slack,
++ memory_type, size / EFI_PAGE_SIZE + slack,
+ &alloc_addr);
+ if (status != EFI_SUCCESS)
+ return status;
+diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
+index e2f90566b291a..16f15e36f9a7d 100644
+--- a/drivers/firmware/efi/libstub/arm64-stub.c
++++ b/drivers/firmware/efi/libstub/arm64-stub.c
+@@ -180,7 +180,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
+ * locate the kernel at a randomized offset in physical memory.
+ */
+ status = efi_random_alloc(*reserve_size, min_kimg_align,
+- reserve_addr, phys_seed);
++ reserve_addr, phys_seed,
++ EFI_LOADER_CODE, 0, EFI_ALLOC_LIMIT);
+ if (status != EFI_SUCCESS)
+ efi_warn("efi_random_alloc() failed: 0x%lx\n", status);
+ } else {
+@@ -190,10 +191,11 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
+ if (status != EFI_SUCCESS) {
+ if (!check_image_region((u64)_text, kernel_memsize)) {
+ efi_err("FIRMWARE BUG: Image BSS overlaps adjacent EFI memory region\n");
+- } else if (IS_ALIGNED((u64)_text, min_kimg_align)) {
++ } else if (IS_ALIGNED((u64)_text, min_kimg_align) &&
++ (u64)_end < EFI_ALLOC_LIMIT) {
+ /*
+ * Just execute from wherever we were loaded by the
+- * UEFI PE/COFF loader if the alignment is suitable.
++ * UEFI PE/COFF loader if the placement is suitable.
+ */
+ *image_addr = (u64)_text;
+ *reserve_size = 0;
+@@ -201,7 +203,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
+ }
+
+ status = efi_allocate_pages_aligned(*reserve_size, reserve_addr,
+- ULONG_MAX, min_kimg_align);
++ ULONG_MAX, min_kimg_align,
++ EFI_LOADER_CODE);
+
+ if (status != EFI_SUCCESS) {
+ efi_err("Failed to relocate kernel\n");
+diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
+index 3d9b2469a0dfd..97744822dd951 100644
+--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
++++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
+@@ -216,6 +216,8 @@ efi_status_t efi_parse_options(char const *cmdline)
+ efi_loglevel = CONSOLE_LOGLEVEL_QUIET;
+ } else if (!strcmp(param, "noinitrd")) {
+ efi_noinitrd = true;
++ } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) {
++ efi_no5lvl = true;
+ } else if (!strcmp(param, "efi") && val) {
+ efi_nochunk = parse_option_str(val, "nochunk");
+ efi_novamap |= parse_option_str(val, "novamap");
+diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
+index 970e86e3aab05..6741f3d900c5a 100644
+--- a/drivers/firmware/efi/libstub/efistub.h
++++ b/drivers/firmware/efi/libstub/efistub.h
+@@ -29,6 +29,11 @@
+ #define EFI_ALLOC_ALIGN EFI_PAGE_SIZE
+ #endif
+
++#ifndef EFI_ALLOC_LIMIT
++#define EFI_ALLOC_LIMIT ULONG_MAX
++#endif
++
++extern bool efi_no5lvl;
+ extern bool efi_nochunk;
+ extern bool efi_nokaslr;
+ extern int efi_loglevel;
+@@ -415,6 +420,26 @@ union efi_dxe_services_table {
+ } mixed_mode;
+ };
+
++typedef union efi_memory_attribute_protocol efi_memory_attribute_protocol_t;
++
++union efi_memory_attribute_protocol {
++ struct {
++ efi_status_t (__efiapi *get_memory_attributes)(
++ efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64 *);
++
++ efi_status_t (__efiapi *set_memory_attributes)(
++ efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64);
++
++ efi_status_t (__efiapi *clear_memory_attributes)(
++ efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64);
++ };
++ struct {
++ u32 get_memory_attributes;
++ u32 set_memory_attributes;
++ u32 clear_memory_attributes;
++ } mixed_mode;
++};
++
+ typedef union efi_uga_draw_protocol efi_uga_draw_protocol_t;
+
+ union efi_uga_draw_protocol {
+@@ -880,7 +905,9 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size,
+ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out);
+
+ efi_status_t efi_random_alloc(unsigned long size, unsigned long align,
+- unsigned long *addr, unsigned long random_seed);
++ unsigned long *addr, unsigned long random_seed,
++ int memory_type, unsigned long alloc_min,
++ unsigned long alloc_max);
+
+ efi_status_t efi_random_get_seed(void);
+
+@@ -907,7 +934,8 @@ efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr,
+ unsigned long max);
+
+ efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr,
+- unsigned long max, unsigned long align);
++ unsigned long max, unsigned long align,
++ int memory_type);
+
+ efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align,
+ unsigned long *addr, unsigned long min);
+diff --git a/drivers/firmware/efi/libstub/mem.c b/drivers/firmware/efi/libstub/mem.c
+index 45841ef55a9f6..4f1fa302234d8 100644
+--- a/drivers/firmware/efi/libstub/mem.c
++++ b/drivers/firmware/efi/libstub/mem.c
+@@ -89,9 +89,12 @@ efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr,
+ efi_physical_addr_t alloc_addr;
+ efi_status_t status;
+
++ max = min(max, EFI_ALLOC_LIMIT);
++
+ if (EFI_ALLOC_ALIGN > EFI_PAGE_SIZE)
+ return efi_allocate_pages_aligned(size, addr, max,
+- EFI_ALLOC_ALIGN);
++ EFI_ALLOC_ALIGN,
++ EFI_LOADER_DATA);
+
+ alloc_addr = ALIGN_DOWN(max + 1, EFI_ALLOC_ALIGN) - 1;
+ status = efi_bs_call(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS,
+diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c
+index 9fb5869896be7..7ba05719a53ba 100644
+--- a/drivers/firmware/efi/libstub/randomalloc.c
++++ b/drivers/firmware/efi/libstub/randomalloc.c
+@@ -16,7 +16,8 @@
+ */
+ static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
+ unsigned long size,
+- unsigned long align_shift)
++ unsigned long align_shift,
++ u64 alloc_min, u64 alloc_max)
+ {
+ unsigned long align = 1UL << align_shift;
+ u64 first_slot, last_slot, region_end;
+@@ -29,11 +30,11 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
+ return 0;
+
+ region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1,
+- (u64)ULONG_MAX);
++ alloc_max);
+ if (region_end < size)
+ return 0;
+
+- first_slot = round_up(md->phys_addr, align);
++ first_slot = round_up(max(md->phys_addr, alloc_min), align);
+ last_slot = round_down(region_end - size + 1, align);
+
+ if (first_slot > last_slot)
+@@ -53,7 +54,10 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
+ efi_status_t efi_random_alloc(unsigned long size,
+ unsigned long align,
+ unsigned long *addr,
+- unsigned long random_seed)
++ unsigned long random_seed,
++ int memory_type,
++ unsigned long alloc_min,
++ unsigned long alloc_max)
+ {
+ unsigned long total_slots = 0, target_slot;
+ unsigned long total_mirrored_slots = 0;
+@@ -75,7 +79,8 @@ efi_status_t efi_random_alloc(unsigned long size,
+ efi_memory_desc_t *md = (void *)map->map + map_offset;
+ unsigned long slots;
+
+- slots = get_entry_num_slots(md, size, ilog2(align));
++ slots = get_entry_num_slots(md, size, ilog2(align), alloc_min,
++ alloc_max);
+ MD_NUM_SLOTS(md) = slots;
+ total_slots += slots;
+ if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
+@@ -118,7 +123,7 @@ efi_status_t efi_random_alloc(unsigned long size,
+ pages = size / EFI_PAGE_SIZE;
+
+ status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS,
+- EFI_LOADER_DATA, pages, &target);
++ memory_type, pages, &target);
+ if (status == EFI_SUCCESS)
+ *addr = target;
+ break;
+diff --git a/drivers/firmware/efi/libstub/x86-5lvl.c b/drivers/firmware/efi/libstub/x86-5lvl.c
+new file mode 100644
+index 0000000000000..479dd445acdcf
+--- /dev/null
++++ b/drivers/firmware/efi/libstub/x86-5lvl.c
+@@ -0,0 +1,95 @@
++// SPDX-License-Identifier: GPL-2.0-only
++#include <linux/efi.h>
++
++#include <asm/boot.h>
++#include <asm/desc.h>
++#include <asm/efi.h>
++
++#include "efistub.h"
++#include "x86-stub.h"
++
++bool efi_no5lvl;
++
++static void (*la57_toggle)(void *cr3);
++
++static const struct desc_struct gdt[] = {
++ [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
++ [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
++};
++
++/*
++ * Enabling (or disabling) 5 level paging is tricky, because it can only be
++ * done from 32-bit mode with paging disabled. This means not only that the
++ * code itself must be running from 32-bit addressable physical memory, but
++ * also that the root page table must be 32-bit addressable, as programming
++ * a 64-bit value into CR3 when running in 32-bit mode is not supported.
++ */
++efi_status_t efi_setup_5level_paging(void)
++{
++ u8 tmpl_size = (u8 *)&trampoline_ljmp_imm_offset - (u8 *)&trampoline_32bit_src;
++ efi_status_t status;
++ u8 *la57_code;
++
++ if (!efi_is_64bit())
++ return EFI_SUCCESS;
++
++ /* check for 5 level paging support */
++ if (native_cpuid_eax(0) < 7 ||
++ !(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
++ return EFI_SUCCESS;
++
++ /* allocate some 32-bit addressable memory for code and a page table */
++ status = efi_allocate_pages(2 * PAGE_SIZE, (unsigned long *)&la57_code,
++ U32_MAX);
++ if (status != EFI_SUCCESS)
++ return status;
++
++ la57_toggle = memcpy(la57_code, trampoline_32bit_src, tmpl_size);
++ memset(la57_code + tmpl_size, 0x90, PAGE_SIZE - tmpl_size);
++
++ /*
++ * To avoid the need to allocate a 32-bit addressable stack, the
++ * trampoline uses a LJMP instruction to switch back to long mode.
++ * LJMP takes an absolute destination address, which needs to be
++ * fixed up at runtime.
++ */
++ *(u32 *)&la57_code[trampoline_ljmp_imm_offset] += (unsigned long)la57_code;
++
++ efi_adjust_memory_range_protection((unsigned long)la57_toggle, PAGE_SIZE);
++
++ return EFI_SUCCESS;
++}
++
++void efi_5level_switch(void)
++{
++ bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl;
++ bool have_la57 = native_read_cr4() & X86_CR4_LA57;
++ bool need_toggle = want_la57 ^ have_la57;
++ u64 *pgt = (void *)la57_toggle + PAGE_SIZE;
++ u64 *cr3 = (u64 *)__native_read_cr3();
++ u64 *new_cr3;
++
++ if (!la57_toggle || !need_toggle)
++ return;
++
++ if (!have_la57) {
++ /*
++ * 5 level paging will be enabled, so a root level page needs
++ * to be allocated from the 32-bit addressable physical region,
++ * with its first entry referring to the existing hierarchy.
++ */
++ new_cr3 = memset(pgt, 0, PAGE_SIZE);
++ new_cr3[0] = (u64)cr3 | _PAGE_TABLE_NOENC;
++ } else {
++ /* take the new root table pointer from the current entry #0 */
++ new_cr3 = (u64 *)(cr3[0] & PAGE_MASK);
++
++ /* copy the new root table if it is not 32-bit addressable */
++ if ((u64)new_cr3 > U32_MAX)
++ new_cr3 = memcpy(pgt, new_cr3, PAGE_SIZE);
++ }
++
++ native_load_gdt(&(struct desc_ptr){ sizeof(gdt) - 1, (u64)gdt });
++
++ la57_toggle(new_cr3);
++}
+diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
+index 4f0152b11a890..784e1b2ae5ccd 100644
+--- a/drivers/firmware/efi/libstub/x86-stub.c
++++ b/drivers/firmware/efi/libstub/x86-stub.c
+@@ -15,16 +15,16 @@
+ #include <asm/setup.h>
+ #include <asm/desc.h>
+ #include <asm/boot.h>
++#include <asm/kaslr.h>
++#include <asm/sev.h>
+
+ #include "efistub.h"
+-
+-/* Maximum physical address for 64-bit kernel with 4-level paging */
+-#define MAXMEM_X86_64_4LEVEL (1ull << 46)
++#include "x86-stub.h"
+
+ const efi_system_table_t *efi_system_table;
+ const efi_dxe_services_table_t *efi_dxe_table;
+-extern u32 image_offset;
+ static efi_loaded_image_t *image = NULL;
++static efi_memory_attribute_protocol_t *memattr;
+
+ static efi_status_t
+ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
+@@ -212,8 +212,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
+ }
+ }
+
+-static void
+-adjust_memory_range_protection(unsigned long start, unsigned long size)
++efi_status_t efi_adjust_memory_range_protection(unsigned long start,
++ unsigned long size)
+ {
+ efi_status_t status;
+ efi_gcd_memory_space_desc_t desc;
+@@ -221,12 +221,22 @@ adjust_memory_range_protection(unsigned long start, unsigned long size)
+ unsigned long rounded_start, rounded_end;
+ unsigned long unprotect_start, unprotect_size;
+
+- if (efi_dxe_table == NULL)
+- return;
+-
+ rounded_start = rounddown(start, EFI_PAGE_SIZE);
+ rounded_end = roundup(start + size, EFI_PAGE_SIZE);
+
++ if (memattr != NULL) {
++ status = efi_call_proto(memattr, clear_memory_attributes,
++ rounded_start,
++ rounded_end - rounded_start,
++ EFI_MEMORY_XP);
++ if (status != EFI_SUCCESS)
++ efi_warn("Failed to clear EFI_MEMORY_XP attribute\n");
++ return status;
++ }
++
++ if (efi_dxe_table == NULL)
++ return EFI_SUCCESS;
++
+ /*
+ * Don't modify memory region attributes, they are
+ * already suitable, to lower the possibility to
+@@ -238,7 +248,7 @@ adjust_memory_range_protection(unsigned long start, unsigned long size)
+ status = efi_dxe_call(get_memory_space_descriptor, start, &desc);
+
+ if (status != EFI_SUCCESS)
+- return;
++ break;
+
+ next = desc.base_address + desc.length;
+
+@@ -263,69 +273,26 @@ adjust_memory_range_protection(unsigned long start, unsigned long size)
+ unprotect_start,
+ unprotect_start + unprotect_size,
+ status);
++ break;
+ }
+ }
++ return EFI_SUCCESS;
+ }
+
+-/*
+- * Trampoline takes 2 pages and can be loaded in first megabyte of memory
+- * with its end placed between 128k and 640k where BIOS might start.
+- * (see arch/x86/boot/compressed/pgtable_64.c)
+- *
+- * We cannot find exact trampoline placement since memory map
+- * can be modified by UEFI, and it can alter the computed address.
+- */
+-
+-#define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024)
+-#define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024)
+-
+-void startup_32(struct boot_params *boot_params);
+-
+-static void
+-setup_memory_protection(unsigned long image_base, unsigned long image_size)
++static efi_char16_t *efistub_fw_vendor(void)
+ {
+- /*
+- * Allow execution of possible trampoline used
+- * for switching between 4- and 5-level page tables
+- * and relocated kernel image.
+- */
+-
+- adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE,
+- TRAMPOLINE_PLACEMENT_SIZE);
++ unsigned long vendor = efi_table_attr(efi_system_table, fw_vendor);
+
+-#ifdef CONFIG_64BIT
+- if (image_base != (unsigned long)startup_32)
+- adjust_memory_range_protection(image_base, image_size);
+-#else
+- /*
+- * Clear protection flags on a whole range of possible
+- * addresses used for KASLR. We don't need to do that
+- * on x86_64, since KASLR/extraction is performed after
+- * dedicated identity page tables are built and we only
+- * need to remove possible protection on relocated image
+- * itself disregarding further relocations.
+- */
+- adjust_memory_range_protection(LOAD_PHYSICAL_ADDR,
+- KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR);
+-#endif
++ return (efi_char16_t *)vendor;
+ }
+
+ static const efi_char16_t apple[] = L"Apple";
+
+-static void setup_quirks(struct boot_params *boot_params,
+- unsigned long image_base,
+- unsigned long image_size)
++static void setup_quirks(struct boot_params *boot_params)
+ {
+- efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long)
+- efi_table_attr(efi_system_table, fw_vendor);
+-
+- if (!memcmp(fw_vendor, apple, sizeof(apple))) {
+- if (IS_ENABLED(CONFIG_APPLE_PROPERTIES))
+- retrieve_apple_device_properties(boot_params);
+- }
+-
+- if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES))
+- setup_memory_protection(image_base, image_size);
++ if (IS_ENABLED(CONFIG_APPLE_PROPERTIES) &&
++ !memcmp(efistub_fw_vendor(), apple, sizeof(apple)))
++ retrieve_apple_device_properties(boot_params);
+ }
+
+ /*
+@@ -478,7 +445,6 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
+ }
+
+ image_base = efi_table_attr(image, image_base);
+- image_offset = (void *)startup_32 - image_base;
+
+ status = efi_allocate_pages(sizeof(struct boot_params),
+ (unsigned long *)&boot_params, ULONG_MAX);
+@@ -760,85 +726,139 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
+ return EFI_SUCCESS;
+ }
+
++static bool have_unsupported_snp_features(void)
++{
++ u64 unsupported;
++
++ unsupported = snp_get_unsupported_features(sev_get_status());
++ if (unsupported) {
++ efi_err("Unsupported SEV-SNP features detected: 0x%llx\n",
++ unsupported);
++ return true;
++ }
++ return false;
++}
++
++static void efi_get_seed(void *seed, int size)
++{
++ efi_get_random_bytes(size, seed);
++
++ /*
++ * This only updates seed[0] when running on 32-bit, but in that case,
++ * seed[1] is not used anyway, as there is no virtual KASLR on 32-bit.
++ */
++ *(unsigned long *)seed ^= kaslr_get_random_long("EFI");
++}
++
++static void error(char *str)
++{
++ efi_warn("Decompression failed: %s\n", str);
++}
++
++static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
++{
++ unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
++ unsigned long addr, alloc_size, entry;
++ efi_status_t status;
++ u32 seed[2] = {};
++
++ /* determine the required size of the allocation */
++ alloc_size = ALIGN(max_t(unsigned long, output_len, kernel_total_size),
++ MIN_KERNEL_ALIGN);
++
++ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) {
++ u64 range = KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR - kernel_total_size;
++ static const efi_char16_t ami[] = L"American Megatrends";
++
++ efi_get_seed(seed, sizeof(seed));
++
++ virt_addr += (range * seed[1]) >> 32;
++ virt_addr &= ~(CONFIG_PHYSICAL_ALIGN - 1);
++
++ /*
++ * Older Dell systems with AMI UEFI firmware v2.0 may hang
++ * while decompressing the kernel if physical address
++ * randomization is enabled.
++ *
++ * https://bugzilla.kernel.org/show_bug.cgi?id=218173
++ */
++ if (efi_system_table->hdr.revision <= EFI_2_00_SYSTEM_TABLE_REVISION &&
++ !memcmp(efistub_fw_vendor(), ami, sizeof(ami))) {
++ efi_debug("AMI firmware v2.0 or older detected - disabling physical KASLR\n");
++ seed[0] = 0;
++ }
++
++ boot_params_ptr->hdr.loadflags |= KASLR_FLAG;
++ }
++
++ status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr,
++ seed[0], EFI_LOADER_CODE,
++ LOAD_PHYSICAL_ADDR,
++ EFI_X86_KERNEL_ALLOC_LIMIT);
++ if (status != EFI_SUCCESS)
++ return status;
++
++ entry = decompress_kernel((void *)addr, virt_addr, error);
++ if (entry == ULONG_MAX) {
++ efi_free(alloc_size, addr);
++ return EFI_LOAD_ERROR;
++ }
++
++ *kernel_entry = addr + entry;
++
++ return efi_adjust_memory_range_protection(addr, kernel_total_size);
++}
++
++static void __noreturn enter_kernel(unsigned long kernel_addr,
++ struct boot_params *boot_params)
++{
++ /* enter decompressed kernel with boot_params pointer in RSI/ESI */
++ asm("jmp *%0"::"r"(kernel_addr), "S"(boot_params));
++
++ unreachable();
++}
++
+ /*
+- * On success, we return the address of startup_32, which has potentially been
+- * relocated by efi_relocate_kernel.
+- * On failure, we exit to the firmware via efi_exit instead of returning.
++ * On success, this routine will jump to the relocated image directly and never
++ * return. On failure, it will exit to the firmware via efi_exit() instead of
++ * returning.
+ */
+-asmlinkage unsigned long efi_main(efi_handle_t handle,
+- efi_system_table_t *sys_table_arg,
+- struct boot_params *boot_params)
++void __noreturn efi_stub_entry(efi_handle_t handle,
++ efi_system_table_t *sys_table_arg,
++ struct boot_params *boot_params)
+ {
+- unsigned long bzimage_addr = (unsigned long)startup_32;
+- unsigned long buffer_start, buffer_end;
++ efi_guid_t guid = EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID;
+ struct setup_header *hdr = &boot_params->hdr;
+ const struct linux_efi_initrd *initrd = NULL;
++ unsigned long kernel_entry;
+ efi_status_t status;
+
++ boot_params_ptr = boot_params;
++
+ efi_system_table = sys_table_arg;
+ /* Check if we were booted by the EFI firmware */
+ if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+ efi_exit(handle, EFI_INVALID_PARAMETER);
+
+- efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID);
+- if (efi_dxe_table &&
+- efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) {
+- efi_warn("Ignoring DXE services table: invalid signature\n");
+- efi_dxe_table = NULL;
++ if (have_unsupported_snp_features())
++ efi_exit(handle, EFI_UNSUPPORTED);
++
++ if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) {
++ efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID);
++ if (efi_dxe_table &&
++ efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) {
++ efi_warn("Ignoring DXE services table: invalid signature\n");
++ efi_dxe_table = NULL;
++ }
+ }
+
+- /*
+- * If the kernel isn't already loaded at a suitable address,
+- * relocate it.
+- *
+- * It must be loaded above LOAD_PHYSICAL_ADDR.
+- *
+- * The maximum address for 64-bit is 1 << 46 for 4-level paging. This
+- * is defined as the macro MAXMEM, but unfortunately that is not a
+- * compile-time constant if 5-level paging is configured, so we instead
+- * define our own macro for use here.
+- *
+- * For 32-bit, the maximum address is complicated to figure out, for
+- * now use KERNEL_IMAGE_SIZE, which will be 512MiB, the same as what
+- * KASLR uses.
+- *
+- * Also relocate it if image_offset is zero, i.e. the kernel wasn't
+- * loaded by LoadImage, but rather by a bootloader that called the
+- * handover entry. The reason we must always relocate in this case is
+- * to handle the case of systemd-boot booting a unified kernel image,
+- * which is a PE executable that contains the bzImage and an initrd as
+- * COFF sections. The initrd section is placed after the bzImage
+- * without ensuring that there are at least init_size bytes available
+- * for the bzImage, and thus the compressed kernel's startup code may
+- * overwrite the initrd unless it is moved out of the way.
+- */
++ /* grab the memory attributes protocol if it exists */
++ efi_bs_call(locate_protocol, &guid, NULL, (void **)&memattr);
+
+- buffer_start = ALIGN(bzimage_addr - image_offset,
+- hdr->kernel_alignment);
+- buffer_end = buffer_start + hdr->init_size;
+-
+- if ((buffer_start < LOAD_PHYSICAL_ADDR) ||
+- (IS_ENABLED(CONFIG_X86_32) && buffer_end > KERNEL_IMAGE_SIZE) ||
+- (IS_ENABLED(CONFIG_X86_64) && buffer_end > MAXMEM_X86_64_4LEVEL) ||
+- (image_offset == 0)) {
+- extern char _bss[];
+-
+- status = efi_relocate_kernel(&bzimage_addr,
+- (unsigned long)_bss - bzimage_addr,
+- hdr->init_size,
+- hdr->pref_address,
+- hdr->kernel_alignment,
+- LOAD_PHYSICAL_ADDR);
+- if (status != EFI_SUCCESS) {
+- efi_err("efi_relocate_kernel() failed!\n");
+- goto fail;
+- }
+- /*
+- * Now that we've copied the kernel elsewhere, we no longer
+- * have a set up block before startup_32(), so reset image_offset
+- * to zero in case it was set earlier.
+- */
+- image_offset = 0;
++ status = efi_setup_5level_paging();
++ if (status != EFI_SUCCESS) {
++ efi_err("efi_setup_5level_paging() failed!\n");
++ goto fail;
+ }
+
+ #ifdef CONFIG_CMDLINE_BOOL
+@@ -858,6 +878,12 @@ asmlinkage unsigned long efi_main(efi_handle_t handle,
+ }
+ }
+
++ status = efi_decompress_kernel(&kernel_entry);
++ if (status != EFI_SUCCESS) {
++ efi_err("Failed to decompress kernel\n");
++ goto fail;
++ }
++
+ /*
+ * At this point, an initrd may already have been loaded by the
+ * bootloader and passed via bootparams. We permit an initrd loaded
+@@ -897,7 +923,7 @@ asmlinkage unsigned long efi_main(efi_handle_t handle,
+
+ setup_efi_pci(boot_params);
+
+- setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start);
++ setup_quirks(boot_params);
+
+ status = exit_boot(boot_params, handle);
+ if (status != EFI_SUCCESS) {
+@@ -905,9 +931,38 @@ asmlinkage unsigned long efi_main(efi_handle_t handle,
+ goto fail;
+ }
+
+- return bzimage_addr;
++ /*
++ * Call the SEV init code while still running with the firmware's
++ * GDT/IDT, so #VC exceptions will be handled by EFI.
++ */
++ sev_enable(boot_params);
++
++ efi_5level_switch();
++
++ enter_kernel(kernel_entry, boot_params);
+ fail:
+- efi_err("efi_main() failed!\n");
++ efi_err("efi_stub_entry() failed!\n");
+
+ efi_exit(handle, status);
+ }
++
++#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
++void efi_handover_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg,
++ struct boot_params *boot_params)
++{
++ extern char _bss[], _ebss[];
++
++ memset(_bss, 0, _ebss - _bss);
++ efi_stub_entry(handle, sys_table_arg, boot_params);
++}
++
++#ifndef CONFIG_EFI_MIXED
++extern __alias(efi_handover_entry)
++void efi32_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg,
++ struct boot_params *boot_params);
++
++extern __alias(efi_handover_entry)
++void efi64_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg,
++ struct boot_params *boot_params);
++#endif
++#endif
+diff --git a/drivers/firmware/efi/libstub/x86-stub.h b/drivers/firmware/efi/libstub/x86-stub.h
+new file mode 100644
+index 0000000000000..1c20e99a64944
+--- /dev/null
++++ b/drivers/firmware/efi/libstub/x86-stub.h
+@@ -0,0 +1,17 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++
++#include <linux/efi.h>
++
++extern void trampoline_32bit_src(void *, bool);
++extern const u16 trampoline_ljmp_imm_offset;
++
++efi_status_t efi_adjust_memory_range_protection(unsigned long start,
++ unsigned long size);
++
++#ifdef CONFIG_X86_64
++efi_status_t efi_setup_5level_paging(void);
++void efi_5level_switch(void);
++#else
++static inline efi_status_t efi_setup_5level_paging(void) { return EFI_SUCCESS; }
++static inline void efi_5level_switch(void) {}
++#endif
+diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
+index 0ba9f18312f5b..4ca256bcd6971 100644
+--- a/drivers/firmware/efi/vars.c
++++ b/drivers/firmware/efi/vars.c
+@@ -66,19 +66,28 @@ int efivars_register(struct efivars *efivars,
+ const struct efivar_operations *ops,
+ struct kobject *kobject)
+ {
++ int rv;
++
+ if (down_interruptible(&efivars_lock))
+ return -EINTR;
+
++ if (__efivars) {
++ pr_warn("efivars already registered\n");
++ rv = -EBUSY;
++ goto out;
++ }
++
+ efivars->ops = ops;
+ efivars->kobject = kobject;
+
+ __efivars = efivars;
+
+ pr_info("Registered efivars operations\n");
+-
++ rv = 0;
++out:
+ up(&efivars_lock);
+
+- return 0;
++ return rv;
+ }
+ EXPORT_SYMBOL_GPL(efivars_register);
+
+diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c
+index e00c333105170..753e7be039e4d 100644
+--- a/drivers/gpio/gpio-74x164.c
++++ b/drivers/gpio/gpio-74x164.c
+@@ -127,8 +127,6 @@ static int gen_74x164_probe(struct spi_device *spi)
+ if (IS_ERR(chip->gpiod_oe))
+ return PTR_ERR(chip->gpiod_oe);
+
+- gpiod_set_value_cansleep(chip->gpiod_oe, 1);
+-
+ spi_set_drvdata(spi, chip);
+
+ chip->gpio_chip.label = spi->modalias;
+@@ -153,6 +151,8 @@ static int gen_74x164_probe(struct spi_device *spi)
+ goto exit_destroy;
+ }
+
++ gpiod_set_value_cansleep(chip->gpiod_oe, 1);
++
+ ret = gpiochip_add_data(&chip->gpio_chip, chip);
+ if (!ret)
+ return 0;
+diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
+index 6d3e3454a6ed6..9d8c783124033 100644
+--- a/drivers/gpio/gpiolib.c
++++ b/drivers/gpio/gpiolib.c
+@@ -784,11 +784,11 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
+
+ ret = gpiochip_irqchip_init_valid_mask(gc);
+ if (ret)
+- goto err_remove_acpi_chip;
++ goto err_free_hogs;
+
+ ret = gpiochip_irqchip_init_hw(gc);
+ if (ret)
+- goto err_remove_acpi_chip;
++ goto err_remove_irqchip_mask;
+
+ ret = gpiochip_add_irqchip(gc, lock_key, request_key);
+ if (ret)
+@@ -813,13 +813,13 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
+ gpiochip_irqchip_remove(gc);
+ err_remove_irqchip_mask:
+ gpiochip_irqchip_free_valid_mask(gc);
+-err_remove_acpi_chip:
++err_free_hogs:
++ gpiochip_free_hogs(gc);
+ acpi_gpiochip_remove(gc);
++ gpiochip_remove_pin_ranges(gc);
+ err_remove_of_chip:
+- gpiochip_free_hogs(gc);
+ of_gpiochip_remove(gc);
+ err_free_gpiochip_mask:
+- gpiochip_remove_pin_ranges(gc);
+ gpiochip_free_valid_mask(gc);
+ if (gdev->dev.release) {
+ /* release() has been registered by gpiochip_setup_dev() */
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
+index 6fdf87a6e240f..6c7b286e1123d 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
++++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
+@@ -51,8 +51,12 @@ endif
+ endif
+
+ ifneq ($(CONFIG_FRAME_WARN),0)
++ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
++frame_warn_flag := -Wframe-larger-than=3072
++else
+ frame_warn_flag := -Wframe-larger-than=2048
+ endif
++endif
+
+ CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
+
+diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+index dc0a6fba7050f..ff1032de4f76d 100644
+--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
++++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+@@ -6925,6 +6925,23 @@ static int si_dpm_enable(struct amdgpu_device *adev)
+ return 0;
+ }
+
++static int si_set_temperature_range(struct amdgpu_device *adev)
++{
++ int ret;
++
++ ret = si_thermal_enable_alert(adev, false);
++ if (ret)
++ return ret;
++ ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX);
++ if (ret)
++ return ret;
++ ret = si_thermal_enable_alert(adev, true);
++ if (ret)
++ return ret;
++
++ return ret;
++}
++
+ static void si_dpm_disable(struct amdgpu_device *adev)
+ {
+ struct rv7xx_power_info *pi = rv770_get_pi(adev);
+@@ -7608,6 +7625,18 @@ static int si_dpm_process_interrupt(struct amdgpu_device *adev,
+
+ static int si_dpm_late_init(void *handle)
+ {
++ int ret;
++ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
++
++ if (!adev->pm.dpm_enabled)
++ return 0;
++
++ ret = si_set_temperature_range(adev);
++ if (ret)
++ return ret;
++#if 0 //TODO ?
++ si_dpm_powergate_uvd(adev, true);
++#endif
+ return 0;
+ }
+
+diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
+index 7098f125b54a9..fd32041f82263 100644
+--- a/drivers/gpu/drm/drm_buddy.c
++++ b/drivers/gpu/drm/drm_buddy.c
+@@ -332,6 +332,7 @@ alloc_range_bias(struct drm_buddy *mm,
+ u64 start, u64 end,
+ unsigned int order)
+ {
++ u64 req_size = mm->chunk_size << order;
+ struct drm_buddy_block *block;
+ struct drm_buddy_block *buddy;
+ LIST_HEAD(dfs);
+@@ -367,6 +368,15 @@ alloc_range_bias(struct drm_buddy *mm,
+ if (drm_buddy_block_is_allocated(block))
+ continue;
+
++ if (block_start < start || block_end > end) {
++ u64 adjusted_start = max(block_start, start);
++ u64 adjusted_end = min(block_end, end);
++
++ if (round_down(adjusted_end + 1, req_size) <=
++ round_up(adjusted_start, req_size))
++ continue;
++ }
++
+ if (contains(start, end, block_start, block_end) &&
+ order == drm_buddy_block_order(block)) {
+ /*
+diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
+index 119544d88b586..fbac39aa38cc4 100644
+--- a/drivers/gpu/drm/meson/meson_drv.c
++++ b/drivers/gpu/drm/meson/meson_drv.c
+@@ -316,32 +316,34 @@ static int meson_drv_bind_master(struct device *dev, bool has_components)
+ goto exit_afbcd;
+
+ if (has_components) {
+- ret = component_bind_all(drm->dev, drm);
++ ret = component_bind_all(dev, drm);
+ if (ret) {
+ dev_err(drm->dev, "Couldn't bind all components\n");
++ /* Do not try to unbind */
++ has_components = false;
+ goto exit_afbcd;
+ }
+ }
+
+ ret = meson_encoder_hdmi_init(priv);
+ if (ret)
+- goto unbind_all;
++ goto exit_afbcd;
+
+ ret = meson_plane_create(priv);
+ if (ret)
+- goto unbind_all;
++ goto exit_afbcd;
+
+ ret = meson_overlay_create(priv);
+ if (ret)
+- goto unbind_all;
++ goto exit_afbcd;
+
+ ret = meson_crtc_create(priv);
+ if (ret)
+- goto unbind_all;
++ goto exit_afbcd;
+
+ ret = request_irq(priv->vsync_irq, meson_irq, 0, drm->driver->name, drm);
+ if (ret)
+- goto unbind_all;
++ goto exit_afbcd;
+
+ drm_mode_config_reset(drm);
+
+@@ -359,15 +361,18 @@ static int meson_drv_bind_master(struct device *dev, bool has_components)
+
+ uninstall_irq:
+ free_irq(priv->vsync_irq, drm);
+-unbind_all:
+- if (has_components)
+- component_unbind_all(drm->dev, drm);
+ exit_afbcd:
+ if (priv->afbcd.ops)
+ priv->afbcd.ops->exit(priv);
+ free_drm:
+ drm_dev_put(drm);
+
++ meson_encoder_hdmi_remove(priv);
++ meson_encoder_cvbs_remove(priv);
++
++ if (has_components)
++ component_unbind_all(dev, drm);
++
+ return ret;
+ }
+
+diff --git a/drivers/gpu/drm/meson/meson_encoder_cvbs.c b/drivers/gpu/drm/meson/meson_encoder_cvbs.c
+index 3f73b211fa8e3..3407450435e20 100644
+--- a/drivers/gpu/drm/meson/meson_encoder_cvbs.c
++++ b/drivers/gpu/drm/meson/meson_encoder_cvbs.c
+@@ -294,6 +294,5 @@ void meson_encoder_cvbs_remove(struct meson_drm *priv)
+ if (priv->encoders[MESON_ENC_CVBS]) {
+ meson_encoder_cvbs = priv->encoders[MESON_ENC_CVBS];
+ drm_bridge_remove(&meson_encoder_cvbs->bridge);
+- drm_bridge_remove(meson_encoder_cvbs->next_bridge);
+ }
+ }
+diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
+index b14e6e507c61b..03062e7a02b64 100644
+--- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c
++++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
+@@ -472,6 +472,5 @@ void meson_encoder_hdmi_remove(struct meson_drm *priv)
+ if (priv->encoders[MESON_ENC_HDMI]) {
+ meson_encoder_hdmi = priv->encoders[MESON_ENC_HDMI];
+ drm_bridge_remove(&meson_encoder_hdmi->bridge);
+- drm_bridge_remove(meson_encoder_hdmi->next_bridge);
+ }
+ }
+diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
+index 5fc55b9777cbf..6806779f8ecce 100644
+--- a/drivers/gpu/drm/tegra/drm.c
++++ b/drivers/gpu/drm/tegra/drm.c
+@@ -1252,9 +1252,26 @@ static int host1x_drm_probe(struct host1x_device *dev)
+
+ drm_mode_config_reset(drm);
+
+- err = drm_aperture_remove_framebuffers(&tegra_drm_driver);
+- if (err < 0)
+- goto hub;
++ /*
++ * Only take over from a potential firmware framebuffer if any CRTCs
++ * have been registered. This must not be a fatal error because there
++ * are other accelerators that are exposed via this driver.
++ *
++ * Another case where this happens is on Tegra234 where the display
++ * hardware is no longer part of the host1x complex, so this driver
++ * will not expose any modesetting features.
++ */
++ if (drm->mode_config.num_crtc > 0) {
++ err = drm_aperture_remove_framebuffers(&tegra_drm_driver);
++ if (err < 0)
++ goto hub;
++ } else {
++ /*
++ * Indicate to userspace that this doesn't expose any display
++ * capabilities.
++ */
++ drm->driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC);
++ }
+
+ err = tegra_drm_fb_init(drm);
+ if (err < 0)
+diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h
+index e9d282679ef15..944d9071245d2 100644
+--- a/drivers/infiniband/core/cm_trace.h
++++ b/drivers/infiniband/core/cm_trace.h
+@@ -16,7 +16,7 @@
+
+ #include <linux/tracepoint.h>
+ #include <rdma/ib_cm.h>
+-#include <trace/events/rdma.h>
++#include <trace/misc/rdma.h>
+
+ /*
+ * enum ib_cm_state, from include/rdma/ib_cm.h
+diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
+index 0773ca7ace247..067d7f42871ff 100644
+--- a/drivers/infiniband/core/cma.c
++++ b/drivers/infiniband/core/cma.c
+@@ -3547,121 +3547,6 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
+ return ret;
+ }
+
+-static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+- const struct sockaddr *dst_addr)
+-{
+- struct sockaddr_storage zero_sock = {};
+-
+- if (src_addr && src_addr->sa_family)
+- return rdma_bind_addr(id, src_addr);
+-
+- /*
+- * When the src_addr is not specified, automatically supply an any addr
+- */
+- zero_sock.ss_family = dst_addr->sa_family;
+- if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) {
+- struct sockaddr_in6 *src_addr6 =
+- (struct sockaddr_in6 *)&zero_sock;
+- struct sockaddr_in6 *dst_addr6 =
+- (struct sockaddr_in6 *)dst_addr;
+-
+- src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
+- if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+- id->route.addr.dev_addr.bound_dev_if =
+- dst_addr6->sin6_scope_id;
+- } else if (dst_addr->sa_family == AF_IB) {
+- ((struct sockaddr_ib *)&zero_sock)->sib_pkey =
+- ((struct sockaddr_ib *)dst_addr)->sib_pkey;
+- }
+- return rdma_bind_addr(id, (struct sockaddr *)&zero_sock);
+-}
+-
+-/*
+- * If required, resolve the source address for bind and leave the id_priv in
+- * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior
+- * calls made by ULP, a previously bound ID will not be re-bound and src_addr is
+- * ignored.
+- */
+-static int resolve_prepare_src(struct rdma_id_private *id_priv,
+- struct sockaddr *src_addr,
+- const struct sockaddr *dst_addr)
+-{
+- int ret;
+-
+- memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
+- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
+- /* For a well behaved ULP state will be RDMA_CM_IDLE */
+- ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr);
+- if (ret)
+- goto err_dst;
+- if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
+- RDMA_CM_ADDR_QUERY))) {
+- ret = -EINVAL;
+- goto err_dst;
+- }
+- }
+-
+- if (cma_family(id_priv) != dst_addr->sa_family) {
+- ret = -EINVAL;
+- goto err_state;
+- }
+- return 0;
+-
+-err_state:
+- cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
+-err_dst:
+- memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
+- return ret;
+-}
+-
+-int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+- const struct sockaddr *dst_addr, unsigned long timeout_ms)
+-{
+- struct rdma_id_private *id_priv =
+- container_of(id, struct rdma_id_private, id);
+- int ret;
+-
+- ret = resolve_prepare_src(id_priv, src_addr, dst_addr);
+- if (ret)
+- return ret;
+-
+- if (cma_any_addr(dst_addr)) {
+- ret = cma_resolve_loopback(id_priv);
+- } else {
+- if (dst_addr->sa_family == AF_IB) {
+- ret = cma_resolve_ib_addr(id_priv);
+- } else {
+- /*
+- * The FSM can return back to RDMA_CM_ADDR_BOUND after
+- * rdma_resolve_ip() is called, eg through the error
+- * path in addr_handler(). If this happens the existing
+- * request must be canceled before issuing a new one.
+- * Since canceling a request is a bit slow and this
+- * oddball path is rare, keep track once a request has
+- * been issued. The track turns out to be a permanent
+- * state since this is the only cancel as it is
+- * immediately before rdma_resolve_ip().
+- */
+- if (id_priv->used_resolve_ip)
+- rdma_addr_cancel(&id->route.addr.dev_addr);
+- else
+- id_priv->used_resolve_ip = 1;
+- ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
+- &id->route.addr.dev_addr,
+- timeout_ms, addr_handler,
+- false, id_priv);
+- }
+- }
+- if (ret)
+- goto err;
+-
+- return 0;
+-err:
+- cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
+- return ret;
+-}
+-EXPORT_SYMBOL(rdma_resolve_addr);
+-
+ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
+ {
+ struct rdma_id_private *id_priv;
+@@ -4064,27 +3949,26 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
+ }
+ EXPORT_SYMBOL(rdma_listen);
+
+-int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
++static int rdma_bind_addr_dst(struct rdma_id_private *id_priv,
++ struct sockaddr *addr, const struct sockaddr *daddr)
+ {
+- struct rdma_id_private *id_priv;
++ struct sockaddr *id_daddr;
+ int ret;
+- struct sockaddr *daddr;
+
+ if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
+ addr->sa_family != AF_IB)
+ return -EAFNOSUPPORT;
+
+- id_priv = container_of(id, struct rdma_id_private, id);
+ if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND))
+ return -EINVAL;
+
+- ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
++ ret = cma_check_linklocal(&id_priv->id.route.addr.dev_addr, addr);
+ if (ret)
+ goto err1;
+
+ memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
+ if (!cma_any_addr(addr)) {
+- ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
++ ret = cma_translate_addr(addr, &id_priv->id.route.addr.dev_addr);
+ if (ret)
+ goto err1;
+
+@@ -4104,8 +3988,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
+ }
+ #endif
+ }
+- daddr = cma_dst_addr(id_priv);
+- daddr->sa_family = addr->sa_family;
++ id_daddr = cma_dst_addr(id_priv);
++ if (daddr != id_daddr)
++ memcpy(id_daddr, daddr, rdma_addr_size(addr));
++ id_daddr->sa_family = addr->sa_family;
+
+ ret = cma_get_port(id_priv);
+ if (ret)
+@@ -4121,6 +4007,129 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
+ cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
+ return ret;
+ }
++
++static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
++ const struct sockaddr *dst_addr)
++{
++ struct rdma_id_private *id_priv =
++ container_of(id, struct rdma_id_private, id);
++ struct sockaddr_storage zero_sock = {};
++
++ if (src_addr && src_addr->sa_family)
++ return rdma_bind_addr_dst(id_priv, src_addr, dst_addr);
++
++ /*
++ * When the src_addr is not specified, automatically supply an any addr
++ */
++ zero_sock.ss_family = dst_addr->sa_family;
++ if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) {
++ struct sockaddr_in6 *src_addr6 =
++ (struct sockaddr_in6 *)&zero_sock;
++ struct sockaddr_in6 *dst_addr6 =
++ (struct sockaddr_in6 *)dst_addr;
++
++ src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
++ if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
++ id->route.addr.dev_addr.bound_dev_if =
++ dst_addr6->sin6_scope_id;
++ } else if (dst_addr->sa_family == AF_IB) {
++ ((struct sockaddr_ib *)&zero_sock)->sib_pkey =
++ ((struct sockaddr_ib *)dst_addr)->sib_pkey;
++ }
++ return rdma_bind_addr_dst(id_priv, (struct sockaddr *)&zero_sock, dst_addr);
++}
++
++/*
++ * If required, resolve the source address for bind and leave the id_priv in
++ * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior
++ * calls made by ULP, a previously bound ID will not be re-bound and src_addr is
++ * ignored.
++ */
++static int resolve_prepare_src(struct rdma_id_private *id_priv,
++ struct sockaddr *src_addr,
++ const struct sockaddr *dst_addr)
++{
++ int ret;
++
++ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
++ /* For a well behaved ULP state will be RDMA_CM_IDLE */
++ ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr);
++ if (ret)
++ return ret;
++ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
++ RDMA_CM_ADDR_QUERY)))
++ return -EINVAL;
++
++ } else {
++ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
++ }
++
++ if (cma_family(id_priv) != dst_addr->sa_family) {
++ ret = -EINVAL;
++ goto err_state;
++ }
++ return 0;
++
++err_state:
++ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
++ return ret;
++}
++
++int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
++ const struct sockaddr *dst_addr, unsigned long timeout_ms)
++{
++ struct rdma_id_private *id_priv =
++ container_of(id, struct rdma_id_private, id);
++ int ret;
++
++ ret = resolve_prepare_src(id_priv, src_addr, dst_addr);
++ if (ret)
++ return ret;
++
++ if (cma_any_addr(dst_addr)) {
++ ret = cma_resolve_loopback(id_priv);
++ } else {
++ if (dst_addr->sa_family == AF_IB) {
++ ret = cma_resolve_ib_addr(id_priv);
++ } else {
++ /*
++ * The FSM can return back to RDMA_CM_ADDR_BOUND after
++ * rdma_resolve_ip() is called, eg through the error
++ * path in addr_handler(). If this happens the existing
++ * request must be canceled before issuing a new one.
++ * Since canceling a request is a bit slow and this
++ * oddball path is rare, keep track once a request has
++ * been issued. The track turns out to be a permanent
++ * state since this is the only cancel as it is
++ * immediately before rdma_resolve_ip().
++ */
++ if (id_priv->used_resolve_ip)
++ rdma_addr_cancel(&id->route.addr.dev_addr);
++ else
++ id_priv->used_resolve_ip = 1;
++ ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
++ &id->route.addr.dev_addr,
++ timeout_ms, addr_handler,
++ false, id_priv);
++ }
++ }
++ if (ret)
++ goto err;
++
++ return 0;
++err:
++ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
++ return ret;
++}
++EXPORT_SYMBOL(rdma_resolve_addr);
++
++int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
++{
++ struct rdma_id_private *id_priv =
++ container_of(id, struct rdma_id_private, id);
++
++ return rdma_bind_addr_dst(id_priv, addr, cma_dst_addr(id_priv));
++}
+ EXPORT_SYMBOL(rdma_bind_addr);
+
+ static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
+diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h
+index e45264267bcc9..47f3c6e4be893 100644
+--- a/drivers/infiniband/core/cma_trace.h
++++ b/drivers/infiniband/core/cma_trace.h
+@@ -15,7 +15,7 @@
+ #define _TRACE_RDMA_CMA_H
+
+ #include <linux/tracepoint.h>
+-#include <trace/events/rdma.h>
++#include <trace/misc/rdma.h>
+
+
+ DECLARE_EVENT_CLASS(cma_fsm_class,
+diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
+index d96c78e436f98..5c284dfbe6923 100644
+--- a/drivers/infiniband/core/user_mad.c
++++ b/drivers/infiniband/core/user_mad.c
+@@ -131,6 +131,11 @@ struct ib_umad_packet {
+ struct ib_user_mad mad;
+ };
+
++struct ib_rmpp_mad_hdr {
++ struct ib_mad_hdr mad_hdr;
++ struct ib_rmpp_hdr rmpp_hdr;
++} __packed;
++
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/ib_umad.h>
+
+@@ -494,11 +499,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+ {
+ struct ib_umad_file *file = filp->private_data;
++ struct ib_rmpp_mad_hdr *rmpp_mad_hdr;
+ struct ib_umad_packet *packet;
+ struct ib_mad_agent *agent;
+ struct rdma_ah_attr ah_attr;
+ struct ib_ah *ah;
+- struct ib_rmpp_mad *rmpp_mad;
+ __be64 *tid;
+ int ret, data_len, hdr_len, copy_offset, rmpp_active;
+ u8 base_version;
+@@ -506,7 +511,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
+ if (count < hdr_size(file) + IB_MGMT_RMPP_HDR)
+ return -EINVAL;
+
+- packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
++ packet = kzalloc(sizeof(*packet) + IB_MGMT_RMPP_HDR, GFP_KERNEL);
+ if (!packet)
+ return -ENOMEM;
+
+@@ -560,13 +565,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
+ goto err_up;
+ }
+
+- rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data;
+- hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class);
++ rmpp_mad_hdr = (struct ib_rmpp_mad_hdr *)packet->mad.data;
++ hdr_len = ib_get_mad_data_offset(rmpp_mad_hdr->mad_hdr.mgmt_class);
+
+- if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
++ if (ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class)
+ && ib_mad_kernel_rmpp_agent(agent)) {
+ copy_offset = IB_MGMT_RMPP_HDR;
+- rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
++ rmpp_active = ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) &
+ IB_MGMT_RMPP_FLAG_ACTIVE;
+ } else {
+ copy_offset = IB_MGMT_MAD_HDR;
+@@ -615,12 +620,12 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
+ tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid;
+ *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 |
+ (be64_to_cpup(tid) & 0xffffffff));
+- rmpp_mad->mad_hdr.tid = *tid;
++ rmpp_mad_hdr->mad_hdr.tid = *tid;
+ }
+
+ if (!ib_mad_kernel_rmpp_agent(agent)
+- && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
+- && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) {
++ && ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class)
++ && (ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) {
+ spin_lock_irq(&file->send_lock);
+ list_add_tail(&packet->list, &file->send_list);
+ spin_unlock_irq(&file->send_lock);
+diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
+index 02f3bc4e4895e..13c36f51b9353 100644
+--- a/drivers/input/joystick/xpad.c
++++ b/drivers/input/joystick/xpad.c
+@@ -564,6 +564,9 @@ struct xboxone_init_packet {
+ #define GIP_MOTOR_LT BIT(3)
+ #define GIP_MOTOR_ALL (GIP_MOTOR_R | GIP_MOTOR_L | GIP_MOTOR_RT | GIP_MOTOR_LT)
+
++#define GIP_WIRED_INTF_DATA 0
++#define GIP_WIRED_INTF_AUDIO 1
++
+ /*
+ * This packet is required for all Xbox One pads with 2015
+ * or later firmware installed (or present from the factory).
+@@ -2008,7 +2011,7 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
+ }
+
+ if (xpad->xtype == XTYPE_XBOXONE &&
+- intf->cur_altsetting->desc.bInterfaceNumber != 0) {
++ intf->cur_altsetting->desc.bInterfaceNumber != GIP_WIRED_INTF_DATA) {
+ /*
+ * The Xbox One controller lists three interfaces all with the
+ * same interface class, subclass and protocol. Differentiate by
+diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
+index 1d9494f64a215..4526ff2e1bd5f 100644
+--- a/drivers/interconnect/core.c
++++ b/drivers/interconnect/core.c
+@@ -29,7 +29,6 @@ static LIST_HEAD(icc_providers);
+ static int providers_count;
+ static bool synced_state;
+ static DEFINE_MUTEX(icc_lock);
+-static DEFINE_MUTEX(icc_bw_lock);
+ static struct dentry *icc_debugfs_dir;
+
+ static void icc_summary_show_one(struct seq_file *s, struct icc_node *n)
+@@ -636,7 +635,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw)
+ if (WARN_ON(IS_ERR(path) || !path->num_nodes))
+ return -EINVAL;
+
+- mutex_lock(&icc_bw_lock);
++ mutex_lock(&icc_lock);
+
+ old_avg = path->reqs[0].avg_bw;
+ old_peak = path->reqs[0].peak_bw;
+@@ -668,7 +667,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw)
+ apply_constraints(path);
+ }
+
+- mutex_unlock(&icc_bw_lock);
++ mutex_unlock(&icc_lock);
+
+ trace_icc_set_bw_end(path, ret);
+
+@@ -971,7 +970,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider)
+ return;
+
+ mutex_lock(&icc_lock);
+- mutex_lock(&icc_bw_lock);
+
+ node->provider = provider;
+ list_add_tail(&node->node_list, &provider->nodes);
+@@ -997,7 +995,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider)
+ node->avg_bw = 0;
+ node->peak_bw = 0;
+
+- mutex_unlock(&icc_bw_lock);
+ mutex_unlock(&icc_lock);
+ }
+ EXPORT_SYMBOL_GPL(icc_node_add);
+@@ -1137,7 +1134,6 @@ void icc_sync_state(struct device *dev)
+ return;
+
+ mutex_lock(&icc_lock);
+- mutex_lock(&icc_bw_lock);
+ synced_state = true;
+ list_for_each_entry(p, &icc_providers, provider_list) {
+ dev_dbg(p->dev, "interconnect provider is in synced state\n");
+@@ -1150,21 +1146,13 @@ void icc_sync_state(struct device *dev)
+ }
+ }
+ }
+- mutex_unlock(&icc_bw_lock);
+ mutex_unlock(&icc_lock);
+ }
+ EXPORT_SYMBOL_GPL(icc_sync_state);
+
+ static int __init icc_init(void)
+ {
+- struct device_node *root;
+-
+- /* Teach lockdep about lock ordering wrt. shrinker: */
+- fs_reclaim_acquire(GFP_KERNEL);
+- might_lock(&icc_bw_lock);
+- fs_reclaim_release(GFP_KERNEL);
+-
+- root = of_find_node_by_path("/");
++ struct device_node *root = of_find_node_by_path("/");
+
+ providers_count = of_count_icc_providers(root);
+ of_node_put(root);
+diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+index 8966f7d5aab61..82f100e591b5a 100644
+--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
++++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+@@ -152,6 +152,18 @@ static void queue_inc_cons(struct arm_smmu_ll_queue *q)
+ q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
+ }
+
++static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
++{
++ struct arm_smmu_ll_queue *llq = &q->llq;
++
++ if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
++ return;
++
++ llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
++ Q_IDX(llq, llq->cons);
++ queue_sync_cons_out(q);
++}
++
+ static int queue_sync_prod_in(struct arm_smmu_queue *q)
+ {
+ u32 prod;
+@@ -1583,8 +1595,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
+ } while (!queue_empty(llq));
+
+ /* Sync our overflow flag, as we believe we're up to speed */
+- llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
+- Q_IDX(llq, llq->cons);
++ queue_sync_cons_ovf(q);
+ return IRQ_HANDLED;
+ }
+
+@@ -1642,9 +1653,7 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
+ } while (!queue_empty(llq));
+
+ /* Sync our overflow flag, as we believe we're up to speed */
+- llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
+- Q_IDX(llq, llq->cons);
+- queue_sync_cons_out(q);
++ queue_sync_cons_ovf(q);
+ return IRQ_HANDLED;
+ }
+
+diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+index d80065c8105af..f15dcb9e4175c 100644
+--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
++++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+@@ -267,12 +267,26 @@ static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain,
+
+ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
+ {
+- unsigned int last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1);
+ struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
++ unsigned int last_s2cr;
+ u32 reg;
+ u32 smr;
+ int i;
+
++ /*
++ * Some platforms support more than the Arm SMMU architected maximum of
++ * 128 stream matching groups. For unknown reasons, the additional
++ * groups don't exhibit the same behavior as the architected registers,
++ * so limit the groups to 128 until the behavior is fixed for the other
++ * groups.
++ */
++ if (smmu->num_mapping_groups > 128) {
++ dev_notice(smmu->dev, "\tLimiting the stream matching groups to 128\n");
++ smmu->num_mapping_groups = 128;
++ }
++
++ last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1);
++
+ /*
+ * With some firmware versions writes to S2CR of type FAULT are
+ * ignored, and writing BYPASS will end up written as FAULT in the
+diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c
+index 8261066de07d7..e4358393fe378 100644
+--- a/drivers/iommu/sprd-iommu.c
++++ b/drivers/iommu/sprd-iommu.c
+@@ -152,13 +152,6 @@ static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type)
+ return &dom->domain;
+ }
+
+-static void sprd_iommu_domain_free(struct iommu_domain *domain)
+-{
+- struct sprd_iommu_domain *dom = to_sprd_domain(domain);
+-
+- kfree(dom);
+-}
+-
+ static void sprd_iommu_first_vpn(struct sprd_iommu_domain *dom)
+ {
+ struct sprd_iommu_device *sdev = dom->sdev;
+@@ -231,6 +224,28 @@ static void sprd_iommu_hw_en(struct sprd_iommu_device *sdev, bool en)
+ sprd_iommu_update_bits(sdev, reg_cfg, mask, 0, val);
+ }
+
++static void sprd_iommu_cleanup(struct sprd_iommu_domain *dom)
++{
++ size_t pgt_size;
++
++ /* Nothing need to do if the domain hasn't been attached */
++ if (!dom->sdev)
++ return;
++
++ pgt_size = sprd_iommu_pgt_size(&dom->domain);
++ dma_free_coherent(dom->sdev->dev, pgt_size, dom->pgt_va, dom->pgt_pa);
++ dom->sdev = NULL;
++ sprd_iommu_hw_en(dom->sdev, false);
++}
++
++static void sprd_iommu_domain_free(struct iommu_domain *domain)
++{
++ struct sprd_iommu_domain *dom = to_sprd_domain(domain);
++
++ sprd_iommu_cleanup(dom);
++ kfree(dom);
++}
++
+ static int sprd_iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev)
+ {
+diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
+index a46ce0868fe1f..3a927452a6501 100644
+--- a/drivers/mmc/core/mmc.c
++++ b/drivers/mmc/core/mmc.c
+@@ -1007,10 +1007,12 @@ static int mmc_select_bus_width(struct mmc_card *card)
+ static unsigned ext_csd_bits[] = {
+ EXT_CSD_BUS_WIDTH_8,
+ EXT_CSD_BUS_WIDTH_4,
++ EXT_CSD_BUS_WIDTH_1,
+ };
+ static unsigned bus_widths[] = {
+ MMC_BUS_WIDTH_8,
+ MMC_BUS_WIDTH_4,
++ MMC_BUS_WIDTH_1,
+ };
+ struct mmc_host *host = card->host;
+ unsigned idx, bus_width = 0;
+diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c
+index 60bca78a72b19..0511583ffa764 100644
+--- a/drivers/mmc/host/mmci_stm32_sdmmc.c
++++ b/drivers/mmc/host/mmci_stm32_sdmmc.c
+@@ -200,6 +200,8 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl)
+ struct scatterlist *sg;
+ int i;
+
++ host->dma_in_progress = true;
++
+ if (!host->variant->dma_lli || data->sg_len == 1 ||
+ idma->use_bounce_buffer) {
+ u32 dma_addr;
+@@ -238,9 +240,30 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl)
+ return 0;
+ }
+
++static void sdmmc_idma_error(struct mmci_host *host)
++{
++ struct mmc_data *data = host->data;
++ struct sdmmc_idma *idma = host->dma_priv;
++
++ if (!dma_inprogress(host))
++ return;
++
++ writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR);
++ host->dma_in_progress = false;
++ data->host_cookie = 0;
++
++ if (!idma->use_bounce_buffer)
++ dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
++ mmc_get_dma_dir(data));
++}
++
+ static void sdmmc_idma_finalize(struct mmci_host *host, struct mmc_data *data)
+ {
++ if (!dma_inprogress(host))
++ return;
++
+ writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR);
++ host->dma_in_progress = false;
+
+ if (!data->host_cookie)
+ sdmmc_idma_unprep_data(host, data, 0);
+@@ -567,6 +590,7 @@ static struct mmci_host_ops sdmmc_variant_ops = {
+ .dma_setup = sdmmc_idma_setup,
+ .dma_start = sdmmc_idma_start,
+ .dma_finalize = sdmmc_idma_finalize,
++ .dma_error = sdmmc_idma_error,
+ .set_clkreg = mmci_sdmmc_set_clkreg,
+ .set_pwrreg = mmci_sdmmc_set_pwrreg,
+ .busy_complete = sdmmc_busy_complete,
+diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c
+index 8cf3a375de659..cc9d28b75eb91 100644
+--- a/drivers/mmc/host/sdhci-xenon-phy.c
++++ b/drivers/mmc/host/sdhci-xenon-phy.c
+@@ -11,6 +11,7 @@
+ #include <linux/slab.h>
+ #include <linux/delay.h>
+ #include <linux/ktime.h>
++#include <linux/iopoll.h>
+ #include <linux/of_address.h>
+
+ #include "sdhci-pltfm.h"
+@@ -109,6 +110,8 @@
+ #define XENON_EMMC_PHY_LOGIC_TIMING_ADJUST (XENON_EMMC_PHY_REG_BASE + 0x18)
+ #define XENON_LOGIC_TIMING_VALUE 0x00AA8977
+
++#define XENON_MAX_PHY_TIMEOUT_LOOPS 100
++
+ /*
+ * List offset of PHY registers and some special register values
+ * in eMMC PHY 5.0 or eMMC PHY 5.1
+@@ -216,6 +219,19 @@ static int xenon_alloc_emmc_phy(struct sdhci_host *host)
+ return 0;
+ }
+
++static int xenon_check_stability_internal_clk(struct sdhci_host *host)
++{
++ u32 reg;
++ int err;
++
++ err = read_poll_timeout(sdhci_readw, reg, reg & SDHCI_CLOCK_INT_STABLE,
++ 1100, 20000, false, host, SDHCI_CLOCK_CONTROL);
++ if (err)
++ dev_err(mmc_dev(host->mmc), "phy_init: Internal clock never stabilized.\n");
++
++ return err;
++}
++
+ /*
+ * eMMC 5.0/5.1 PHY init/re-init.
+ * eMMC PHY init should be executed after:
+@@ -232,6 +248,11 @@ static int xenon_emmc_phy_init(struct sdhci_host *host)
+ struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+ struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs;
+
++ int ret = xenon_check_stability_internal_clk(host);
++
++ if (ret)
++ return ret;
++
+ reg = sdhci_readl(host, phy_regs->timing_adj);
+ reg |= XENON_PHY_INITIALIZAION;
+ sdhci_writel(host, reg, phy_regs->timing_adj);
+@@ -259,18 +280,27 @@ static int xenon_emmc_phy_init(struct sdhci_host *host)
+ /* get the wait time */
+ wait /= clock;
+ wait++;
+- /* wait for host eMMC PHY init completes */
+- udelay(wait);
+
+- reg = sdhci_readl(host, phy_regs->timing_adj);
+- reg &= XENON_PHY_INITIALIZAION;
+- if (reg) {
++ /*
++ * AC5X spec says bit must be polled until zero.
++ * We see cases in which timeout can take longer
++ * than the standard calculation on AC5X, which is
++ * expected following the spec comment above.
++ * According to the spec, we must wait as long as
++ * it takes for that bit to toggle on AC5X.
++ * Cap that with 100 delay loops so we won't get
++ * stuck here forever:
++ */
++
++ ret = read_poll_timeout(sdhci_readl, reg,
++ !(reg & XENON_PHY_INITIALIZAION),
++ wait, XENON_MAX_PHY_TIMEOUT_LOOPS * wait,
++ false, host, phy_regs->timing_adj);
++ if (ret)
+ dev_err(mmc_dev(host->mmc), "eMMC PHY init cannot complete after %d us\n",
+- wait);
+- return -ETIMEDOUT;
+- }
++ wait * XENON_MAX_PHY_TIMEOUT_LOOPS);
+
+- return 0;
++ return ret;
+ }
+
+ #define ARMADA_3700_SOC_PAD_1_8V 0x1
+diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c
+index 6b043e24855fb..9116ee7f023ed 100644
+--- a/drivers/mtd/nand/spi/gigadevice.c
++++ b/drivers/mtd/nand/spi/gigadevice.c
+@@ -186,7 +186,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand,
+ {
+ u8 status2;
+ struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2,
+- &status2);
++ spinand->scratchbuf);
+ int ret;
+
+ switch (status & STATUS_ECC_MASK) {
+@@ -207,6 +207,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand,
+ * report the maximum of 4 in this case
+ */
+ /* bits sorted this way (3...0): ECCS1,ECCS0,ECCSE1,ECCSE0 */
++ status2 = *(spinand->scratchbuf);
+ return ((status & STATUS_ECC_MASK) >> 2) |
+ ((status2 & STATUS_ECC_MASK) >> 4);
+
+@@ -228,7 +229,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand,
+ {
+ u8 status2;
+ struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2,
+- &status2);
++ spinand->scratchbuf);
+ int ret;
+
+ switch (status & STATUS_ECC_MASK) {
+@@ -248,6 +249,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand,
+ * 1 ... 4 bits are flipped (and corrected)
+ */
+ /* bits sorted this way (1...0): ECCSE1, ECCSE0 */
++ status2 = *(spinand->scratchbuf);
+ return ((status2 & STATUS_ECC_MASK) >> 4) + 1;
+
+ case STATUS_ECC_UNCOR_ERROR:
+diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
+index 1917da7841919..5a274b99f2992 100644
+--- a/drivers/net/ethernet/Kconfig
++++ b/drivers/net/ethernet/Kconfig
+@@ -84,7 +84,6 @@ source "drivers/net/ethernet/huawei/Kconfig"
+ source "drivers/net/ethernet/i825xx/Kconfig"
+ source "drivers/net/ethernet/ibm/Kconfig"
+ source "drivers/net/ethernet/intel/Kconfig"
+-source "drivers/net/ethernet/wangxun/Kconfig"
+ source "drivers/net/ethernet/xscale/Kconfig"
+
+ config JME
+@@ -189,6 +188,7 @@ source "drivers/net/ethernet/toshiba/Kconfig"
+ source "drivers/net/ethernet/tundra/Kconfig"
+ source "drivers/net/ethernet/vertexcom/Kconfig"
+ source "drivers/net/ethernet/via/Kconfig"
++source "drivers/net/ethernet/wangxun/Kconfig"
+ source "drivers/net/ethernet/wiznet/Kconfig"
+ source "drivers/net/ethernet/xilinx/Kconfig"
+ source "drivers/net/ethernet/xircom/Kconfig"
+diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
+index 07171e574e7d7..36e62197fba0b 100644
+--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
++++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
+@@ -976,7 +976,7 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
+
+ igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
+ /* adjust timestamp for the TX latency based on link speed */
+- if (adapter->hw.mac.type == e1000_i210) {
++ if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) {
+ switch (adapter->link_speed) {
+ case SPEED_10:
+ adjust = IGB_I210_TX_LATENCY_10;
+@@ -1022,6 +1022,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
+ ktime_t *timestamp)
+ {
+ struct igb_adapter *adapter = q_vector->adapter;
++ struct e1000_hw *hw = &adapter->hw;
+ struct skb_shared_hwtstamps ts;
+ __le64 *regval = (__le64 *)va;
+ int adjust = 0;
+@@ -1041,7 +1042,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
+ igb_ptp_systim_to_hwtstamp(adapter, &ts, le64_to_cpu(regval[1]));
+
+ /* adjust timestamp for the RX latency based on link speed */
+- if (adapter->hw.mac.type == e1000_i210) {
++ if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) {
+ switch (adapter->link_speed) {
+ case SPEED_10:
+ adjust = IGB_I210_RX_LATENCY_10;
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+index dc2e204bcd727..41eac7dfb67e7 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+@@ -52,8 +52,10 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp,
+ max_regions = max_tcam_regions;
+
+ tcam->used_regions = bitmap_zalloc(max_regions, GFP_KERNEL);
+- if (!tcam->used_regions)
+- return -ENOMEM;
++ if (!tcam->used_regions) {
++ err = -ENOMEM;
++ goto err_alloc_used_regions;
++ }
+ tcam->max_regions = max_regions;
+
+ max_groups = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUPS);
+@@ -78,6 +80,8 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp,
+ bitmap_free(tcam->used_groups);
+ err_alloc_used_groups:
+ bitmap_free(tcam->used_regions);
++err_alloc_used_regions:
++ mutex_destroy(&tcam->lock);
+ return err;
+ }
+
+@@ -86,10 +90,10 @@ void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp,
+ {
+ const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+
+- mutex_destroy(&tcam->lock);
+ ops->fini(mlxsw_sp, tcam->priv);
+ bitmap_free(tcam->used_groups);
+ bitmap_free(tcam->used_regions);
++ mutex_destroy(&tcam->lock);
+ }
+
+ int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp,
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+index 91b2aa81914ba..e2d51014ab4bc 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -3900,8 +3900,10 @@ static void stmmac_fpe_stop_wq(struct stmmac_priv *priv)
+ {
+ set_bit(__FPE_REMOVING, &priv->fpe_task_state);
+
+- if (priv->fpe_wq)
++ if (priv->fpe_wq) {
+ destroy_workqueue(priv->fpe_wq);
++ priv->fpe_wq = NULL;
++ }
+
+ netdev_info(priv->dev, "FPE workqueue stop");
+ }
+diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
+index 937dd9cf4fbaf..7086acfed5b90 100644
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -1902,26 +1902,26 @@ static int __init gtp_init(void)
+
+ get_random_bytes(&gtp_h_initval, sizeof(gtp_h_initval));
+
+- err = rtnl_link_register(&gtp_link_ops);
++ err = register_pernet_subsys(&gtp_net_ops);
+ if (err < 0)
+ goto error_out;
+
+- err = register_pernet_subsys(&gtp_net_ops);
++ err = rtnl_link_register(&gtp_link_ops);
+ if (err < 0)
+- goto unreg_rtnl_link;
++ goto unreg_pernet_subsys;
+
+ err = genl_register_family(&gtp_genl_family);
+ if (err < 0)
+- goto unreg_pernet_subsys;
++ goto unreg_rtnl_link;
+
+ pr_info("GTP module loaded (pdp ctx size %zd bytes)\n",
+ sizeof(struct pdp_ctx));
+ return 0;
+
+-unreg_pernet_subsys:
+- unregister_pernet_subsys(&gtp_net_ops);
+ unreg_rtnl_link:
+ rtnl_link_unregister(&gtp_link_ops);
++unreg_pernet_subsys:
++ unregister_pernet_subsys(&gtp_net_ops);
+ error_out:
+ pr_err("error loading GTP module loaded\n");
+ return err;
+diff --git a/drivers/net/tun.c b/drivers/net/tun.c
+index 367255bb44cdc..922d6f16d99d1 100644
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -653,6 +653,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
+ tun->tfiles[tun->numqueues - 1]);
+ ntfile = rtnl_dereference(tun->tfiles[index]);
+ ntfile->queue_index = index;
++ ntfile->xdp_rxq.queue_index = index;
+ rcu_assign_pointer(tun->tfiles[tun->numqueues - 1],
+ NULL);
+
+diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c
+index 99ec1d4a972db..8b6d6a1b3c2ec 100644
+--- a/drivers/net/usb/dm9601.c
++++ b/drivers/net/usb/dm9601.c
+@@ -232,7 +232,7 @@ static int dm9601_mdio_read(struct net_device *netdev, int phy_id, int loc)
+ err = dm_read_shared_word(dev, 1, loc, &res);
+ if (err < 0) {
+ netdev_err(dev->net, "MDIO read error: %d\n", err);
+- return err;
++ return 0;
+ }
+
+ netdev_dbg(dev->net,
+diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
+index c458c030fadf6..4fd4563811299 100644
+--- a/drivers/net/usb/lan78xx.c
++++ b/drivers/net/usb/lan78xx.c
+@@ -1501,7 +1501,9 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
+
+ lan78xx_rx_urb_submit_all(dev);
+
++ local_bh_disable();
+ napi_schedule(&dev->napi);
++ local_bh_enable();
+ }
+
+ return 0;
+@@ -3035,7 +3037,8 @@ static int lan78xx_reset(struct lan78xx_net *dev)
+ if (dev->chipid == ID_REV_CHIP_ID_7801_)
+ buf &= ~MAC_CR_GMII_EN_;
+
+- if (dev->chipid == ID_REV_CHIP_ID_7800_) {
++ if (dev->chipid == ID_REV_CHIP_ID_7800_ ||
++ dev->chipid == ID_REV_CHIP_ID_7850_) {
+ ret = lan78xx_read_raw_eeprom(dev, 0, 1, &sig);
+ if (!ret && sig != EEPROM_INDICATOR) {
+ /* Implies there is no external eeprom. Set mac speed */
+diff --git a/drivers/net/veth.c b/drivers/net/veth.c
+index 36c5a41f84e44..dd9f5f1461921 100644
+--- a/drivers/net/veth.c
++++ b/drivers/net/veth.c
+@@ -1135,14 +1135,6 @@ static int veth_enable_xdp(struct net_device *dev)
+ veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true);
+ return err;
+ }
+-
+- if (!veth_gro_requested(dev)) {
+- /* user-space did not require GRO, but adding XDP
+- * is supposed to get GRO working
+- */
+- dev->features |= NETIF_F_GRO;
+- netdev_features_change(dev);
+- }
+ }
+ }
+
+@@ -1162,18 +1154,9 @@ static void veth_disable_xdp(struct net_device *dev)
+ for (i = 0; i < dev->real_num_rx_queues; i++)
+ rcu_assign_pointer(priv->rq[i].xdp_prog, NULL);
+
+- if (!netif_running(dev) || !veth_gro_requested(dev)) {
++ if (!netif_running(dev) || !veth_gro_requested(dev))
+ veth_napi_del(dev);
+
+- /* if user-space did not require GRO, since adding XDP
+- * enabled it, clear it now
+- */
+- if (!veth_gro_requested(dev) && netif_running(dev)) {
+- dev->features &= ~NETIF_F_GRO;
+- netdev_features_change(dev);
+- }
+- }
+-
+ veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false);
+ }
+
+@@ -1376,7 +1359,8 @@ static int veth_alloc_queues(struct net_device *dev)
+ struct veth_priv *priv = netdev_priv(dev);
+ int i;
+
+- priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT);
++ priv->rq = kvcalloc(dev->num_rx_queues, sizeof(*priv->rq),
++ GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
+ if (!priv->rq)
+ return -ENOMEM;
+
+@@ -1392,7 +1376,7 @@ static void veth_free_queues(struct net_device *dev)
+ {
+ struct veth_priv *priv = netdev_priv(dev);
+
+- kfree(priv->rq);
++ kvfree(priv->rq);
+ }
+
+ static int veth_dev_init(struct net_device *dev)
+@@ -1558,6 +1542,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ }
+
+ if (!old_prog) {
++ if (!veth_gro_requested(dev)) {
++ /* user-space did not require GRO, but adding
++ * XDP is supposed to get GRO working
++ */
++ dev->features |= NETIF_F_GRO;
++ netdev_features_change(dev);
++ }
++
+ peer->hw_features &= ~NETIF_F_GSO_SOFTWARE;
+ peer->max_mtu = max_mtu;
+ }
+@@ -1568,6 +1560,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ if (dev->flags & IFF_UP)
+ veth_disable_xdp(dev);
+
++ /* if user-space did not require GRO, since adding XDP
++ * enabled it, clear it now
++ */
++ if (!veth_gro_requested(dev)) {
++ dev->features &= ~NETIF_F_GRO;
++ netdev_features_change(dev);
++ }
++
+ if (peer) {
+ peer->hw_features |= NETIF_F_GSO_SOFTWARE;
+ peer->max_mtu = ETH_MAX_MTU;
+diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
+index 4402871b5c0c0..e663d5585a057 100644
+--- a/drivers/of/overlay.c
++++ b/drivers/of/overlay.c
+@@ -45,8 +45,8 @@ struct target {
+
+ /**
+ * struct fragment - info about fragment nodes in overlay expanded device tree
+- * @target: target of the overlay operation
+ * @overlay: pointer to the __overlay__ node
++ * @target: target of the overlay operation
+ */
+ struct fragment {
+ struct device_node *overlay;
+diff --git a/drivers/of/property.c b/drivers/of/property.c
+index 33d5f16c81204..da5d712197704 100644
+--- a/drivers/of/property.c
++++ b/drivers/of/property.c
+@@ -1332,7 +1332,7 @@ static struct device_node *parse_remote_endpoint(struct device_node *np,
+ int index)
+ {
+ /* Return NULL for index > 0 to signify end of remote-endpoints. */
+- if (!index || strcmp(prop_name, "remote-endpoint"))
++ if (index > 0 || strcmp(prop_name, "remote-endpoint"))
+ return NULL;
+
+ return of_graph_get_remote_port_parent(np);
+diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c b/drivers/pci/controller/dwc/pci-layerscape-ep.c
+index ad99707b3b994..dd7d74fecc48e 100644
+--- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
++++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
+@@ -18,6 +18,20 @@
+
+ #include "pcie-designware.h"
+
++#define PEX_PF0_CONFIG 0xC0014
++#define PEX_PF0_CFG_READY BIT(0)
++
++/* PEX PFa PCIE PME and message interrupt registers*/
++#define PEX_PF0_PME_MES_DR 0xC0020
++#define PEX_PF0_PME_MES_DR_LUD BIT(7)
++#define PEX_PF0_PME_MES_DR_LDD BIT(9)
++#define PEX_PF0_PME_MES_DR_HRD BIT(10)
++
++#define PEX_PF0_PME_MES_IER 0xC0028
++#define PEX_PF0_PME_MES_IER_LUDIE BIT(7)
++#define PEX_PF0_PME_MES_IER_LDDIE BIT(9)
++#define PEX_PF0_PME_MES_IER_HRDIE BIT(10)
++
+ #define to_ls_pcie_ep(x) dev_get_drvdata((x)->dev)
+
+ struct ls_pcie_ep_drvdata {
+@@ -30,8 +44,99 @@ struct ls_pcie_ep {
+ struct dw_pcie *pci;
+ struct pci_epc_features *ls_epc;
+ const struct ls_pcie_ep_drvdata *drvdata;
++ int irq;
++ u32 lnkcap;
++ bool big_endian;
+ };
+
++static u32 ls_lut_readl(struct ls_pcie_ep *pcie, u32 offset)
++{
++ struct dw_pcie *pci = pcie->pci;
++
++ if (pcie->big_endian)
++ return ioread32be(pci->dbi_base + offset);
++ else
++ return ioread32(pci->dbi_base + offset);
++}
++
++static void ls_lut_writel(struct ls_pcie_ep *pcie, u32 offset, u32 value)
++{
++ struct dw_pcie *pci = pcie->pci;
++
++ if (pcie->big_endian)
++ iowrite32be(value, pci->dbi_base + offset);
++ else
++ iowrite32(value, pci->dbi_base + offset);
++}
++
++static irqreturn_t ls_pcie_ep_event_handler(int irq, void *dev_id)
++{
++ struct ls_pcie_ep *pcie = dev_id;
++ struct dw_pcie *pci = pcie->pci;
++ u32 val, cfg;
++ u8 offset;
++
++ val = ls_lut_readl(pcie, PEX_PF0_PME_MES_DR);
++ ls_lut_writel(pcie, PEX_PF0_PME_MES_DR, val);
++
++ if (!val)
++ return IRQ_NONE;
++
++ if (val & PEX_PF0_PME_MES_DR_LUD) {
++
++ offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
++
++ /*
++ * The values of the Maximum Link Width and Supported Link
++ * Speed from the Link Capabilities Register will be lost
++ * during link down or hot reset. Restore initial value
++ * that configured by the Reset Configuration Word (RCW).
++ */
++ dw_pcie_dbi_ro_wr_en(pci);
++ dw_pcie_writel_dbi(pci, offset + PCI_EXP_LNKCAP, pcie->lnkcap);
++ dw_pcie_dbi_ro_wr_dis(pci);
++
++ cfg = ls_lut_readl(pcie, PEX_PF0_CONFIG);
++ cfg |= PEX_PF0_CFG_READY;
++ ls_lut_writel(pcie, PEX_PF0_CONFIG, cfg);
++ dw_pcie_ep_linkup(&pci->ep);
++
++ dev_dbg(pci->dev, "Link up\n");
++ } else if (val & PEX_PF0_PME_MES_DR_LDD) {
++ dev_dbg(pci->dev, "Link down\n");
++ } else if (val & PEX_PF0_PME_MES_DR_HRD) {
++ dev_dbg(pci->dev, "Hot reset\n");
++ }
++
++ return IRQ_HANDLED;
++}
++
++static int ls_pcie_ep_interrupt_init(struct ls_pcie_ep *pcie,
++ struct platform_device *pdev)
++{
++ u32 val;
++ int ret;
++
++ pcie->irq = platform_get_irq_byname(pdev, "pme");
++ if (pcie->irq < 0)
++ return pcie->irq;
++
++ ret = devm_request_irq(&pdev->dev, pcie->irq, ls_pcie_ep_event_handler,
++ IRQF_SHARED, pdev->name, pcie);
++ if (ret) {
++ dev_err(&pdev->dev, "Can't register PCIe IRQ\n");
++ return ret;
++ }
++
++ /* Enable interrupts */
++ val = ls_lut_readl(pcie, PEX_PF0_PME_MES_IER);
++ val |= PEX_PF0_PME_MES_IER_LDDIE | PEX_PF0_PME_MES_IER_HRDIE |
++ PEX_PF0_PME_MES_IER_LUDIE;
++ ls_lut_writel(pcie, PEX_PF0_PME_MES_IER, val);
++
++ return 0;
++}
++
+ static const struct pci_epc_features*
+ ls_pcie_ep_get_features(struct dw_pcie_ep *ep)
+ {
+@@ -124,6 +229,8 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev)
+ struct ls_pcie_ep *pcie;
+ struct pci_epc_features *ls_epc;
+ struct resource *dbi_base;
++ u8 offset;
++ int ret;
+
+ pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+ if (!pcie)
+@@ -143,6 +250,7 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev)
+ pci->ops = pcie->drvdata->dw_pcie_ops;
+
+ ls_epc->bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4);
++ ls_epc->linkup_notifier = true;
+
+ pcie->pci = pci;
+ pcie->ls_epc = ls_epc;
+@@ -154,9 +262,18 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev)
+
+ pci->ep.ops = &ls_pcie_ep_ops;
+
++ pcie->big_endian = of_property_read_bool(dev->of_node, "big-endian");
++
+ platform_set_drvdata(pdev, pcie);
+
+- return dw_pcie_ep_init(&pci->ep);
++ offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
++ pcie->lnkcap = dw_pcie_readl_dbi(pci, offset + PCI_EXP_LNKCAP);
++
++ ret = dw_pcie_ep_init(&pci->ep);
++ if (ret)
++ return ret;
++
++ return ls_pcie_ep_interrupt_init(pcie, pdev);
+ }
+
+ static struct platform_driver ls_pcie_ep_driver = {
+diff --git a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c
+index e625b32889bfc..0928a526e2ab3 100644
+--- a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c
++++ b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c
+@@ -706,7 +706,7 @@ static int mixel_dphy_probe(struct platform_device *pdev)
+ return ret;
+ }
+
+- priv->id = of_alias_get_id(np, "mipi_dphy");
++ priv->id = of_alias_get_id(np, "mipi-dphy");
+ if (priv->id < 0) {
+ dev_err(dev, "Failed to get phy node alias id: %d\n",
+ priv->id);
+diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c
+index 0713a52a25107..17b37354e32c0 100644
+--- a/drivers/power/supply/bq27xxx_battery_i2c.c
++++ b/drivers/power/supply/bq27xxx_battery_i2c.c
+@@ -209,7 +209,9 @@ static void bq27xxx_battery_i2c_remove(struct i2c_client *client)
+ {
+ struct bq27xxx_device_info *di = i2c_get_clientdata(client);
+
+- free_irq(client->irq, di);
++ if (client->irq)
++ free_irq(client->irq, di);
++
+ bq27xxx_battery_teardown(di);
+
+ mutex_lock(&battery_mutex);
+diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
+index 5c5954b78585e..edd296f950a33 100644
+--- a/drivers/scsi/scsi_lib.c
++++ b/drivers/scsi/scsi_lib.c
+@@ -185,39 +185,37 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
+ __scsi_queue_insert(cmd, reason, true);
+ }
+
+-
+ /**
+- * __scsi_execute - insert request and wait for the result
+- * @sdev: scsi device
++ * scsi_execute_cmd - insert request and wait for the result
++ * @sdev: scsi_device
+ * @cmd: scsi command
+- * @data_direction: data direction
++ * @opf: block layer request cmd_flags
+ * @buffer: data buffer
+ * @bufflen: len of buffer
+- * @sense: optional sense buffer
+- * @sshdr: optional decoded sense header
+ * @timeout: request timeout in HZ
+ * @retries: number of times to retry request
+- * @flags: flags for ->cmd_flags
+- * @rq_flags: flags for ->rq_flags
+- * @resid: optional residual length
++ * @args: Optional args. See struct definition for field descriptions
+ *
+ * Returns the scsi_cmnd result field if a command was executed, or a negative
+ * Linux error code if we didn't get that far.
+ */
+-int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
+- int data_direction, void *buffer, unsigned bufflen,
+- unsigned char *sense, struct scsi_sense_hdr *sshdr,
+- int timeout, int retries, blk_opf_t flags,
+- req_flags_t rq_flags, int *resid)
++int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd,
++ blk_opf_t opf, void *buffer, unsigned int bufflen,
++ int timeout, int retries,
++ const struct scsi_exec_args *args)
+ {
++ static const struct scsi_exec_args default_args;
+ struct request *req;
+ struct scsi_cmnd *scmd;
+ int ret;
+
+- req = scsi_alloc_request(sdev->request_queue,
+- data_direction == DMA_TO_DEVICE ?
+- REQ_OP_DRV_OUT : REQ_OP_DRV_IN,
+- rq_flags & RQF_PM ? BLK_MQ_REQ_PM : 0);
++ if (!args)
++ args = &default_args;
++ else if (WARN_ON_ONCE(args->sense &&
++ args->sense_len != SCSI_SENSE_BUFFERSIZE))
++ return -EINVAL;
++
++ req = scsi_alloc_request(sdev->request_queue, opf, args->req_flags);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+@@ -232,8 +230,7 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
+ memcpy(scmd->cmnd, cmd, scmd->cmd_len);
+ scmd->allowed = retries;
+ req->timeout = timeout;
+- req->cmd_flags |= flags;
+- req->rq_flags |= rq_flags | RQF_QUIET;
++ req->rq_flags |= RQF_QUIET;
+
+ /*
+ * head injection *required* here otherwise quiesce won't work
+@@ -249,20 +246,21 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
+ if (unlikely(scmd->resid_len > 0 && scmd->resid_len <= bufflen))
+ memset(buffer + bufflen - scmd->resid_len, 0, scmd->resid_len);
+
+- if (resid)
+- *resid = scmd->resid_len;
+- if (sense && scmd->sense_len)
+- memcpy(sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE);
+- if (sshdr)
++ if (args->resid)
++ *args->resid = scmd->resid_len;
++ if (args->sense)
++ memcpy(args->sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE);
++ if (args->sshdr)
+ scsi_normalize_sense(scmd->sense_buffer, scmd->sense_len,
+- sshdr);
++ args->sshdr);
++
+ ret = scmd->result;
+ out:
+ blk_mq_free_request(req);
+
+ return ret;
+ }
+-EXPORT_SYMBOL(__scsi_execute);
++EXPORT_SYMBOL(scsi_execute_cmd);
+
+ /*
+ * Wake up the error handler if necessary. Avoid as follows that the error
+diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
+index 31b5273f43a71..4433b02c8935f 100644
+--- a/drivers/scsi/sd.c
++++ b/drivers/scsi/sd.c
+@@ -3284,6 +3284,24 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp,
+ return true;
+ }
+
++static void sd_read_block_zero(struct scsi_disk *sdkp)
++{
++ unsigned int buf_len = sdkp->device->sector_size;
++ char *buffer, cmd[10] = { };
++
++ buffer = kmalloc(buf_len, GFP_KERNEL);
++ if (!buffer)
++ return;
++
++ cmd[0] = READ_10;
++ put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */
++ put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */
++
++ scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len,
++ SD_TIMEOUT, sdkp->max_retries, NULL);
++ kfree(buffer);
++}
++
+ /**
+ * sd_revalidate_disk - called the first time a new disk is seen,
+ * performs disk spin up, read_capacity, etc.
+@@ -3323,7 +3341,13 @@ static int sd_revalidate_disk(struct gendisk *disk)
+ */
+ if (sdkp->media_present) {
+ sd_read_capacity(sdkp, buffer);
+-
++ /*
++ * Some USB/UAS devices return generic values for mode pages
++ * until the media has been accessed. Trigger a READ operation
++ * to force the device to populate mode pages.
++ */
++ if (sdp->read_before_ms)
++ sd_read_block_zero(sdkp);
+ /*
+ * set the default to rotational. All non-rotational devices
+ * support the block characteristics VPD page, which will
+diff --git a/drivers/soc/qcom/rpmhpd.c b/drivers/soc/qcom/rpmhpd.c
+index 092f6ab09acf3..9a90f241bb97f 100644
+--- a/drivers/soc/qcom/rpmhpd.c
++++ b/drivers/soc/qcom/rpmhpd.c
+@@ -492,12 +492,15 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner)
+ unsigned int active_corner, sleep_corner;
+ unsigned int this_active_corner = 0, this_sleep_corner = 0;
+ unsigned int peer_active_corner = 0, peer_sleep_corner = 0;
++ unsigned int peer_enabled_corner;
+
+ to_active_sleep(pd, corner, &this_active_corner, &this_sleep_corner);
+
+- if (peer && peer->enabled)
+- to_active_sleep(peer, peer->corner, &peer_active_corner,
++ if (peer && peer->enabled) {
++ peer_enabled_corner = max(peer->corner, peer->enable_corner);
++ to_active_sleep(peer, peer_enabled_corner, &peer_active_corner,
+ &peer_sleep_corner);
++ }
+
+ active_corner = max(this_active_corner, peer_active_corner);
+
+diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
+index cb0a4e2cdbb73..247cca46cdfae 100644
+--- a/drivers/usb/gadget/composite.c
++++ b/drivers/usb/gadget/composite.c
+@@ -511,6 +511,19 @@ static u8 encode_bMaxPower(enum usb_device_speed speed,
+ return min(val, 900U) / 8;
+ }
+
++void check_remote_wakeup_config(struct usb_gadget *g,
++ struct usb_configuration *c)
++{
++ if (USB_CONFIG_ATT_WAKEUP & c->bmAttributes) {
++ /* Reset the rw bit if gadget is not capable of it */
++ if (!g->wakeup_capable && g->ops->set_remote_wakeup) {
++ WARN(c->cdev, "Clearing wakeup bit for config c.%d\n",
++ c->bConfigurationValue);
++ c->bmAttributes &= ~USB_CONFIG_ATT_WAKEUP;
++ }
++ }
++}
++
+ static int config_buf(struct usb_configuration *config,
+ enum usb_device_speed speed, void *buf, u8 type)
+ {
+@@ -959,6 +972,11 @@ static int set_config(struct usb_composite_dev *cdev,
+ power = min(power, 500U);
+ else
+ power = min(power, 900U);
++
++ if (USB_CONFIG_ATT_WAKEUP & c->bmAttributes)
++ usb_gadget_set_remote_wakeup(gadget, 1);
++ else
++ usb_gadget_set_remote_wakeup(gadget, 0);
+ done:
+ if (power <= USB_SELF_POWER_VBUS_MAX_DRAW)
+ usb_gadget_set_selfpowered(gadget);
+diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
+index 4dcf29577f8f1..b94aec6227c51 100644
+--- a/drivers/usb/gadget/configfs.c
++++ b/drivers/usb/gadget/configfs.c
+@@ -1376,6 +1376,9 @@ static int configfs_composite_bind(struct usb_gadget *gadget,
+ if (gadget_is_otg(gadget))
+ c->descriptors = otg_desc;
+
++ /* Properly configure the bmAttributes wakeup bit */
++ check_remote_wakeup_config(gadget, c);
++
+ cfg = container_of(c, struct config_usb_cfg, c);
+ if (!list_empty(&cfg->string_list)) {
+ i = 0;
+diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
+index c40f2ecbe1b8c..0edd9e53fc5a1 100644
+--- a/drivers/usb/gadget/udc/core.c
++++ b/drivers/usb/gadget/udc/core.c
+@@ -525,6 +525,33 @@ int usb_gadget_wakeup(struct usb_gadget *gadget)
+ }
+ EXPORT_SYMBOL_GPL(usb_gadget_wakeup);
+
++/**
++ * usb_gadget_set_remote_wakeup - configures the device remote wakeup feature.
++ * @gadget:the device being configured for remote wakeup
++ * @set:value to be configured.
++ *
++ * set to one to enable remote wakeup feature and zero to disable it.
++ *
++ * returns zero on success, else negative errno.
++ */
++int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set)
++{
++ int ret = 0;
++
++ if (!gadget->ops->set_remote_wakeup) {
++ ret = -EOPNOTSUPP;
++ goto out;
++ }
++
++ ret = gadget->ops->set_remote_wakeup(gadget, set);
++
++out:
++ trace_usb_gadget_set_remote_wakeup(gadget, ret);
++
++ return ret;
++}
++EXPORT_SYMBOL_GPL(usb_gadget_set_remote_wakeup);
++
+ /**
+ * usb_gadget_set_selfpowered - sets the device selfpowered feature.
+ * @gadget:the device being declared as self-powered
+diff --git a/drivers/usb/gadget/udc/trace.h b/drivers/usb/gadget/udc/trace.h
+index abdbcb1bacb0b..a5ed26fbc2dad 100644
+--- a/drivers/usb/gadget/udc/trace.h
++++ b/drivers/usb/gadget/udc/trace.h
+@@ -91,6 +91,11 @@ DEFINE_EVENT(udc_log_gadget, usb_gadget_wakeup,
+ TP_ARGS(g, ret)
+ );
+
++DEFINE_EVENT(udc_log_gadget, usb_gadget_set_remote_wakeup,
++ TP_PROTO(struct usb_gadget *g, int ret),
++ TP_ARGS(g, ret)
++);
++
+ DEFINE_EVENT(udc_log_gadget, usb_gadget_set_selfpowered,
+ TP_PROTO(struct usb_gadget *g, int ret),
+ TP_ARGS(g, ret)
+diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
+index c54e9805da536..12cf9940e5b67 100644
+--- a/drivers/usb/storage/scsiglue.c
++++ b/drivers/usb/storage/scsiglue.c
+@@ -179,6 +179,13 @@ static int slave_configure(struct scsi_device *sdev)
+ */
+ sdev->use_192_bytes_for_3f = 1;
+
++ /*
++ * Some devices report generic values until the media has been
++ * accessed. Force a READ(10) prior to querying device
++ * characteristics.
++ */
++ sdev->read_before_ms = 1;
++
+ /*
+ * Some devices don't like MODE SENSE with page=0x3f,
+ * which is the command used for checking if a device
+diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
+index de3836412bf32..ed22053b3252f 100644
+--- a/drivers/usb/storage/uas.c
++++ b/drivers/usb/storage/uas.c
+@@ -878,6 +878,13 @@ static int uas_slave_configure(struct scsi_device *sdev)
+ if (devinfo->flags & US_FL_CAPACITY_HEURISTICS)
+ sdev->guess_capacity = 1;
+
++ /*
++ * Some devices report generic values until the media has been
++ * accessed. Force a READ(10) prior to querying device
++ * characteristics.
++ */
++ sdev->read_before_ms = 1;
++
+ /*
+ * Some devices don't like MODE SENSE with page=0x3f,
+ * which is the command used for checking if a device
+diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
+index fa205be94a4b8..14498a0d13e0b 100644
+--- a/drivers/video/fbdev/core/fbcon.c
++++ b/drivers/video/fbdev/core/fbcon.c
+@@ -2397,11 +2397,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
+ struct fbcon_ops *ops = info->fbcon_par;
+ struct fbcon_display *p = &fb_display[vc->vc_num];
+ int resize, ret, old_userfont, old_width, old_height, old_charcount;
+- char *old_data = NULL;
++ u8 *old_data = vc->vc_font.data;
+
+ resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
+- if (p->userfont)
+- old_data = vc->vc_font.data;
+ vc->vc_font.data = (void *)(p->fontdata = data);
+ old_userfont = p->userfont;
+ if ((p->userfont = userfont))
+@@ -2435,13 +2433,13 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
+ update_screen(vc);
+ }
+
+- if (old_data && (--REFCOUNT(old_data) == 0))
++ if (old_userfont && (--REFCOUNT(old_data) == 0))
+ kfree(old_data - FONT_EXTRA_WORDS * sizeof(int));
+ return 0;
+
+ err_out:
+ p->fontdata = old_data;
+- vc->vc_font.data = (void *)old_data;
++ vc->vc_font.data = old_data;
+
+ if (userfont) {
+ p->userfont = old_userfont;
+diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
+index 00f8e349921d4..96b96516c9806 100644
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -937,8 +937,8 @@ static void shutdown_pirq(struct irq_data *data)
+ return;
+
+ do_mask(info, EVT_MASK_REASON_EXPLICIT);
+- xen_evtchn_close(evtchn);
+ xen_irq_info_cleanup(info);
++ xen_evtchn_close(evtchn);
+ }
+
+ static void enable_pirq(struct irq_data *data)
+@@ -982,8 +982,6 @@ static void __unbind_from_irq(unsigned int irq)
+ unsigned int cpu = cpu_from_irq(irq);
+ struct xenbus_device *dev;
+
+- xen_evtchn_close(evtchn);
+-
+ switch (type_from_irq(irq)) {
+ case IRQT_VIRQ:
+ per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
+@@ -1001,6 +999,7 @@ static void __unbind_from_irq(unsigned int irq)
+ }
+
+ xen_irq_info_cleanup(info);
++ xen_evtchn_close(evtchn);
+ }
+
+ xen_free_irq(irq);
+diff --git a/fs/afs/dir.c b/fs/afs/dir.c
+index cf811b77ee671..6e2c967fae6fc 100644
+--- a/fs/afs/dir.c
++++ b/fs/afs/dir.c
+@@ -478,8 +478,10 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
+ dire->u.name[0] == '.' &&
+ ctx->actor != afs_lookup_filldir &&
+ ctx->actor != afs_lookup_one_filldir &&
+- memcmp(dire->u.name, ".__afs", 6) == 0)
++ memcmp(dire->u.name, ".__afs", 6) == 0) {
++ ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
+ continue;
++ }
+
+ /* found the next entry */
+ if (!dir_emit(ctx, dire->u.name, nlen,
+diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
+index 61e58066b5fd2..9c856a73d5333 100644
+--- a/fs/btrfs/dev-replace.c
++++ b/fs/btrfs/dev-replace.c
+@@ -740,6 +740,23 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
+ return ret;
+ }
+
++static int btrfs_check_replace_dev_names(struct btrfs_ioctl_dev_replace_args *args)
++{
++ if (args->start.srcdevid == 0) {
++ if (memchr(args->start.srcdev_name, 0,
++ sizeof(args->start.srcdev_name)) == NULL)
++ return -ENAMETOOLONG;
++ } else {
++ args->start.srcdev_name[0] = 0;
++ }
++
++ if (memchr(args->start.tgtdev_name, 0,
++ sizeof(args->start.tgtdev_name)) == NULL)
++ return -ENAMETOOLONG;
++
++ return 0;
++}
++
+ int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
+ struct btrfs_ioctl_dev_replace_args *args)
+ {
+@@ -752,10 +769,9 @@ int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
+ default:
+ return -EINVAL;
+ }
+-
+- if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') ||
+- args->start.tgtdev_name[0] == '\0')
+- return -EINVAL;
++ ret = btrfs_check_replace_dev_names(args);
++ if (ret < 0)
++ return ret;
+
+ ret = btrfs_dev_replace_start(fs_info, args->start.tgtdev_name,
+ args->start.srcdevid,
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 0d1b05ded1e35..5756edb37c61e 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -1643,12 +1643,12 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
+ *
+ * @objectid: root id
+ * @anon_dev: preallocated anonymous block device number for new roots,
+- * pass 0 for new allocation.
++ * pass NULL for a new allocation.
+ * @check_ref: whether to check root item references, If true, return -ENOENT
+ * for orphan roots
+ */
+ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
+- u64 objectid, dev_t anon_dev,
++ u64 objectid, dev_t *anon_dev,
+ bool check_ref)
+ {
+ struct btrfs_root *root;
+@@ -1668,9 +1668,9 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
+ * that common but still possible. In that case, we just need
+ * to free the anon_dev.
+ */
+- if (unlikely(anon_dev)) {
+- free_anon_bdev(anon_dev);
+- anon_dev = 0;
++ if (unlikely(anon_dev && *anon_dev)) {
++ free_anon_bdev(*anon_dev);
++ *anon_dev = 0;
+ }
+
+ if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
+@@ -1692,7 +1692,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
+ goto fail;
+ }
+
+- ret = btrfs_init_fs_root(root, anon_dev);
++ ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0);
+ if (ret)
+ goto fail;
+
+@@ -1728,7 +1728,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
+ * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root()
+ * and once again by our caller.
+ */
+- if (anon_dev)
++ if (anon_dev && *anon_dev)
+ root->anon_dev = 0;
+ btrfs_put_root(root);
+ return ERR_PTR(ret);
+@@ -1744,7 +1744,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
+ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
+ u64 objectid, bool check_ref)
+ {
+- return btrfs_get_root_ref(fs_info, objectid, 0, check_ref);
++ return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref);
+ }
+
+ /*
+@@ -1752,11 +1752,11 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
+ * the anonymous block device id
+ *
+ * @objectid: tree objectid
+- * @anon_dev: if zero, allocate a new anonymous block device or use the
+- * parameter value
++ * @anon_dev: if NULL, allocate a new anonymous block device or use the
++ * parameter value if not NULL
+ */
+ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
+- u64 objectid, dev_t anon_dev)
++ u64 objectid, dev_t *anon_dev)
+ {
+ return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
+ }
+diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
+index 7322af63c0cc7..24bddca86e9c9 100644
+--- a/fs/btrfs/disk-io.h
++++ b/fs/btrfs/disk-io.h
+@@ -65,7 +65,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
+ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
+ u64 objectid, bool check_ref);
+ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
+- u64 objectid, dev_t anon_dev);
++ u64 objectid, dev_t *anon_dev);
+ struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path,
+ u64 objectid);
+diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
+index 196e222749ccd..64b37afb7c87f 100644
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -708,7 +708,7 @@ static noinline int create_subvol(struct user_namespace *mnt_userns,
+ free_extent_buffer(leaf);
+ leaf = NULL;
+
+- new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
++ new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev);
+ if (IS_ERR(new_root)) {
+ ret = PTR_ERR(new_root);
+ btrfs_abort_transaction(trans, ret);
+diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
+index a75669972dc73..9f7ffd9ef6fd7 100644
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -6462,11 +6462,20 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
+ if (ret)
+ goto out;
+ }
+- if (sctx->cur_inode_last_extent <
+- sctx->cur_inode_size) {
+- ret = send_hole(sctx, sctx->cur_inode_size);
+- if (ret)
++ if (sctx->cur_inode_last_extent < sctx->cur_inode_size) {
++ ret = range_is_hole_in_parent(sctx,
++ sctx->cur_inode_last_extent,
++ sctx->cur_inode_size);
++ if (ret < 0) {
+ goto out;
++ } else if (ret == 0) {
++ ret = send_hole(sctx, sctx->cur_inode_size);
++ if (ret < 0)
++ goto out;
++ } else {
++ /* Range is already a hole, skip. */
++ ret = 0;
++ }
+ }
+ }
+ if (need_truncate) {
+diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
+index 60db4c3b82fa1..b172091f42612 100644
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -1809,7 +1809,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+ }
+
+ key.offset = (u64)-1;
+- pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
++ pending->snap = btrfs_get_new_fs_root(fs_info, objectid, &pending->anon_dev);
+ if (IS_ERR(pending->snap)) {
+ ret = PTR_ERR(pending->snap);
+ pending->snap = NULL;
+diff --git a/fs/efivarfs/vars.c b/fs/efivarfs/vars.c
+index 9e4f47808bd5a..13bc606989557 100644
+--- a/fs/efivarfs/vars.c
++++ b/fs/efivarfs/vars.c
+@@ -372,7 +372,7 @@ static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid,
+ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
+ void *data, bool duplicates, struct list_head *head)
+ {
+- unsigned long variable_name_size = 1024;
++ unsigned long variable_name_size = 512;
+ efi_char16_t *variable_name;
+ efi_status_t status;
+ efi_guid_t vendor_guid;
+@@ -389,12 +389,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
+ goto free;
+
+ /*
+- * Per EFI spec, the maximum storage allocated for both
+- * the variable name and variable data is 1024 bytes.
++ * A small set of old UEFI implementations reject sizes
++ * above a certain threshold, the lowest seen in the wild
++ * is 512.
+ */
+
+ do {
+- variable_name_size = 1024;
++ variable_name_size = 512;
+
+ status = efivar_get_next_variable(&variable_name_size,
+ variable_name,
+@@ -431,9 +432,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
+ break;
+ case EFI_NOT_FOUND:
+ break;
++ case EFI_BUFFER_TOO_SMALL:
++ pr_warn("efivars: Variable name size exceeds maximum (%lu > 512)\n",
++ variable_name_size);
++ status = EFI_NOT_FOUND;
++ break;
+ default:
+- printk(KERN_WARNING "efivars: get_next_variable: status=%lx\n",
+- status);
++ pr_warn("efivars: get_next_variable: status=%lx\n", status);
+ status = EFI_NOT_FOUND;
+ break;
+ }
+diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
+index c648a493faf23..3204bd33e4e8a 100644
+--- a/fs/exportfs/expfs.c
++++ b/fs/exportfs/expfs.c
+@@ -18,7 +18,7 @@
+ #include <linux/sched.h>
+ #include <linux/cred.h>
+
+-#define dprintk(fmt, args...) do{}while(0)
++#define dprintk(fmt, args...) pr_debug(fmt, ##args)
+
+
+ static int get_name(const struct path *path, char *name, struct dentry *child);
+@@ -132,8 +132,8 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
+ inode_unlock(dentry->d_inode);
+
+ if (IS_ERR(parent)) {
+- dprintk("%s: get_parent of %ld failed, err %d\n",
+- __func__, dentry->d_inode->i_ino, PTR_ERR(parent));
++ dprintk("get_parent of %lu failed, err %ld\n",
++ dentry->d_inode->i_ino, PTR_ERR(parent));
+ return parent;
+ }
+
+@@ -147,7 +147,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
+ dprintk("%s: found name: %s\n", __func__, nbuf);
+ tmp = lookup_one_unlocked(mnt_user_ns(mnt), nbuf, parent, strlen(nbuf));
+ if (IS_ERR(tmp)) {
+- dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
++ dprintk("lookup failed: %ld\n", PTR_ERR(tmp));
+ err = PTR_ERR(tmp);
+ goto out_err;
+ }
+diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
+index 284b019cb6529..b72023a6b4c16 100644
+--- a/fs/lockd/svc4proc.c
++++ b/fs/lockd/svc4proc.c
+@@ -52,6 +52,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
+ *filp = file;
+
+ /* Set up the missing parts of the file_lock structure */
++ lock->fl.fl_flags = FL_POSIX;
+ lock->fl.fl_file = file->f_file[mode];
+ lock->fl.fl_pid = current->tgid;
+ lock->fl.fl_start = (loff_t)lock->lock_start;
+diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
+index 9c1aa75441e1c..4e30f3c509701 100644
+--- a/fs/lockd/svclock.c
++++ b/fs/lockd/svclock.c
+@@ -659,11 +659,13 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
+ nlmsvc_cancel_blocked(net, file, lock);
+
+ lock->fl.fl_type = F_UNLCK;
+- if (file->f_file[O_RDONLY])
+- error = vfs_lock_file(file->f_file[O_RDONLY], F_SETLK,
++ lock->fl.fl_file = file->f_file[O_RDONLY];
++ if (lock->fl.fl_file)
++ error = vfs_lock_file(lock->fl.fl_file, F_SETLK,
+ &lock->fl, NULL);
+- if (file->f_file[O_WRONLY])
+- error = vfs_lock_file(file->f_file[O_WRONLY], F_SETLK,
++ lock->fl.fl_file = file->f_file[O_WRONLY];
++ if (lock->fl.fl_file)
++ error |= vfs_lock_file(lock->fl.fl_file, F_SETLK,
+ &lock->fl, NULL);
+
+ return (error < 0)? nlm_lck_denied_nolocks : nlm_granted;
+@@ -697,9 +699,10 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
+ block = nlmsvc_lookup_block(file, lock);
+ mutex_unlock(&file->f_mutex);
+ if (block != NULL) {
+- mode = lock_to_openmode(&lock->fl);
+- vfs_cancel_lock(block->b_file->f_file[mode],
+- &block->b_call->a_args.lock.fl);
++ struct file_lock *fl = &block->b_call->a_args.lock.fl;
++
++ mode = lock_to_openmode(fl);
++ vfs_cancel_lock(block->b_file->f_file[mode], fl);
+ status = nlmsvc_unlink_block(block);
+ nlmsvc_release_block(block);
+ }
+diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
+index e35c05e278061..32784f508c810 100644
+--- a/fs/lockd/svcproc.c
++++ b/fs/lockd/svcproc.c
+@@ -77,6 +77,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
+
+ /* Set up the missing parts of the file_lock structure */
+ mode = lock_to_openmode(&lock->fl);
++ lock->fl.fl_flags = FL_POSIX;
+ lock->fl.fl_file = file->f_file[mode];
+ lock->fl.fl_pid = current->tgid;
+ lock->fl.fl_lmops = &nlmsvc_lock_operations;
+diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
+index 3515f17eaf3fb..e3b6229e7ae5c 100644
+--- a/fs/lockd/svcsubs.c
++++ b/fs/lockd/svcsubs.c
+@@ -210,7 +210,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
+ {
+ struct inode *inode = nlmsvc_file_inode(file);
+ struct file_lock *fl;
+- struct file_lock_context *flctx = inode->i_flctx;
++ struct file_lock_context *flctx = locks_inode_context(inode);
+ struct nlm_host *lockhost;
+
+ if (!flctx || list_empty_careful(&flctx->flc_posix))
+@@ -265,7 +265,7 @@ nlm_file_inuse(struct nlm_file *file)
+ {
+ struct inode *inode = nlmsvc_file_inode(file);
+ struct file_lock *fl;
+- struct file_lock_context *flctx = inode->i_flctx;
++ struct file_lock_context *flctx = locks_inode_context(inode);
+
+ if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
+ return 1;
+diff --git a/fs/locks.c b/fs/locks.c
+index 1047ab2b15e96..7d0918b8fe5d6 100644
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -175,7 +175,7 @@ locks_get_lock_context(struct inode *inode, int type)
+ struct file_lock_context *ctx;
+
+ /* paired with cmpxchg() below */
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ if (likely(ctx) || type == F_UNLCK)
+ goto out;
+
+@@ -194,7 +194,7 @@ locks_get_lock_context(struct inode *inode, int type)
+ */
+ if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
+ kmem_cache_free(flctx_cache, ctx);
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ }
+ out:
+ trace_locks_get_lock_context(inode, type, ctx);
+@@ -247,7 +247,7 @@ locks_check_ctx_file_list(struct file *filp, struct list_head *list,
+ void
+ locks_free_lock_context(struct inode *inode)
+ {
+- struct file_lock_context *ctx = inode->i_flctx;
++ struct file_lock_context *ctx = locks_inode_context(inode);
+
+ if (unlikely(ctx)) {
+ locks_check_ctx_lists(inode);
+@@ -891,7 +891,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
+ void *owner;
+ void (*func)(void);
+
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ if (!ctx || list_empty_careful(&ctx->flc_posix)) {
+ fl->fl_type = F_UNLCK;
+ return;
+@@ -1483,7 +1483,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
+ new_fl->fl_flags = type;
+
+ /* typically we will check that ctx is non-NULL before calling */
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ if (!ctx) {
+ WARN_ON_ONCE(1);
+ goto free_lock;
+@@ -1588,7 +1588,7 @@ void lease_get_mtime(struct inode *inode, struct timespec64 *time)
+ struct file_lock_context *ctx;
+ struct file_lock *fl;
+
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ if (ctx && !list_empty_careful(&ctx->flc_lease)) {
+ spin_lock(&ctx->flc_lock);
+ fl = list_first_entry_or_null(&ctx->flc_lease,
+@@ -1634,7 +1634,7 @@ int fcntl_getlease(struct file *filp)
+ int type = F_UNLCK;
+ LIST_HEAD(dispose);
+
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ if (ctx && !list_empty_careful(&ctx->flc_lease)) {
+ percpu_down_read(&file_rwsem);
+ spin_lock(&ctx->flc_lock);
+@@ -1823,7 +1823,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
+ struct file_lock_context *ctx;
+ LIST_HEAD(dispose);
+
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ if (!ctx) {
+ trace_generic_delete_lease(inode, NULL);
+ return error;
+@@ -2562,7 +2562,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
+ * posix_lock_file(). Another process could be setting a lock on this
+ * file at the same time, but we wouldn't remove that lock anyway.
+ */
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ if (!ctx || list_empty(&ctx->flc_posix))
+ return;
+
+@@ -2635,7 +2635,7 @@ void locks_remove_file(struct file *filp)
+ {
+ struct file_lock_context *ctx;
+
+- ctx = smp_load_acquire(&locks_inode(filp)->i_flctx);
++ ctx = locks_inode_context(locks_inode(filp));
+ if (!ctx)
+ return;
+
+@@ -2682,7 +2682,7 @@ bool vfs_inode_has_locks(struct inode *inode)
+ struct file_lock_context *ctx;
+ bool ret;
+
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ if (!ctx)
+ return false;
+
+@@ -2863,7 +2863,7 @@ void show_fd_locks(struct seq_file *f,
+ struct file_lock_context *ctx;
+ int id = 0;
+
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ if (!ctx)
+ return;
+
+diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
+index 3fa77ad7258f2..c8a57cfde64b4 100644
+--- a/fs/nfs/nfs4trace.h
++++ b/fs/nfs/nfs4trace.h
+@@ -9,10 +9,10 @@
+ #define _TRACE_NFS4_H
+
+ #include <linux/tracepoint.h>
+-#include <trace/events/sunrpc_base.h>
++#include <trace/misc/sunrpc.h>
+
+-#include <trace/events/fs.h>
+-#include <trace/events/nfs.h>
++#include <trace/misc/fs.h>
++#include <trace/misc/nfs.h>
+
+ #define show_nfs_fattr_flags(valid) \
+ __print_flags((unsigned long)valid, "|", \
+diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
+index 8c6cc58679ff8..642f6921852fa 100644
+--- a/fs/nfs/nfstrace.h
++++ b/fs/nfs/nfstrace.h
+@@ -11,9 +11,9 @@
+ #include <linux/tracepoint.h>
+ #include <linux/iversion.h>
+
+-#include <trace/events/fs.h>
+-#include <trace/events/nfs.h>
+-#include <trace/events/sunrpc_base.h>
++#include <trace/misc/fs.h>
++#include <trace/misc/nfs.h>
++#include <trace/misc/sunrpc.h>
+
+ #define nfs_show_cache_validity(v) \
+ __print_flags(v, "|", \
+diff --git a/fs/nfs/write.c b/fs/nfs/write.c
+index f41d24b54fd1f..6a06066684172 100644
+--- a/fs/nfs/write.c
++++ b/fs/nfs/write.c
+@@ -667,8 +667,10 @@ static int nfs_writepage_locked(struct page *page,
+ int err;
+
+ if (wbc->sync_mode == WB_SYNC_NONE &&
+- NFS_SERVER(inode)->write_congested)
++ NFS_SERVER(inode)->write_congested) {
++ redirty_page_for_writepage(wbc, page);
+ return AOP_WRITEPAGE_ACTIVATE;
++ }
+
+ nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
+ nfs_pageio_init_write(&pgio, inode, 0,
+diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
+index f6a2fd3015e75..7c441f2bd4440 100644
+--- a/fs/nfsd/Kconfig
++++ b/fs/nfsd/Kconfig
+@@ -8,6 +8,7 @@ config NFSD
+ select SUNRPC
+ select EXPORTFS
+ select NFS_ACL_SUPPORT if NFSD_V2_ACL
++ select NFS_ACL_SUPPORT if NFSD_V3_ACL
+ depends on MULTIUSER
+ help
+ Choose Y here if you want to allow other computers to access
+@@ -26,19 +27,29 @@ config NFSD
+
+ Below you can choose which versions of the NFS protocol are
+ available to clients mounting the NFS server on this system.
+- Support for NFS version 2 (RFC 1094) is always available when
++ Support for NFS version 3 (RFC 1813) is always available when
+ CONFIG_NFSD is selected.
+
+ If unsure, say N.
+
+-config NFSD_V2_ACL
+- bool
++config NFSD_V2
++ bool "NFS server support for NFS version 2 (DEPRECATED)"
+ depends on NFSD
++ default n
++ help
++ NFSv2 (RFC 1094) was the first publicly-released version of NFS.
++ Unless you are hosting ancient (1990's era) NFS clients, you don't
++ need this.
++
++ If unsure, say N.
++
++config NFSD_V2_ACL
++ bool "NFS server support for the NFSv2 ACL protocol extension"
++ depends on NFSD_V2
+
+ config NFSD_V3_ACL
+ bool "NFS server support for the NFSv3 ACL protocol extension"
+ depends on NFSD
+- select NFSD_V2_ACL
+ help
+ Solaris NFS servers support an auxiliary NFSv3 ACL protocol that
+ never became an official part of the NFS version 3 protocol.
+diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
+index 805c06d5f1b4b..6fffc8f03f740 100644
+--- a/fs/nfsd/Makefile
++++ b/fs/nfsd/Makefile
+@@ -10,9 +10,10 @@ obj-$(CONFIG_NFSD) += nfsd.o
+ # this one should be compiled first, as the tracing macros can easily blow up
+ nfsd-y += trace.o
+
+-nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
+- export.o auth.o lockd.o nfscache.o nfsxdr.o \
++nfsd-y += nfssvc.o nfsctl.o nfsfh.o vfs.o \
++ export.o auth.o lockd.o nfscache.o \
+ stats.o filecache.o nfs3proc.o nfs3xdr.o
++nfsd-$(CONFIG_NFSD_V2) += nfsproc.o nfsxdr.o
+ nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
+ nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
+ nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
+diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
+index e7e6e78d965db..01d7fd108cf3d 100644
+--- a/fs/nfsd/blocklayout.c
++++ b/fs/nfsd/blocklayout.c
+@@ -12,6 +12,7 @@
+ #include "blocklayoutxdr.h"
+ #include "pnfs.h"
+ #include "filecache.h"
++#include "vfs.h"
+
+ #define NFSDDBG_FACILITY NFSDDBG_PNFS
+
+diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
+index 2455dc8be18a8..1ed2f691ebb90 100644
+--- a/fs/nfsd/blocklayoutxdr.c
++++ b/fs/nfsd/blocklayoutxdr.c
+@@ -9,6 +9,7 @@
+
+ #include "nfsd.h"
+ #include "blocklayoutxdr.h"
++#include "vfs.h"
+
+ #define NFSDDBG_FACILITY NFSDDBG_PNFS
+
+diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
+index ee0e3aba4a6e5..d03f7f6a8642d 100644
+--- a/fs/nfsd/export.h
++++ b/fs/nfsd/export.h
+@@ -115,7 +115,6 @@ struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *);
+ int exp_rootfh(struct net *, struct auth_domain *,
+ char *path, struct knfsd_fh *, int maxsize);
+ __be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
+-__be32 nfserrno(int errno);
+
+ static inline void exp_put(struct svc_export *exp)
+ {
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 5cb8cce153a57..697acf5c3c681 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1,7 +1,32 @@
++// SPDX-License-Identifier: GPL-2.0
+ /*
+- * Open file cache.
++ * The NFSD open file cache.
+ *
+ * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
++ *
++ * An nfsd_file object is a per-file collection of open state that binds
++ * together:
++ * - a struct file *
++ * - a user credential
++ * - a network namespace
++ * - a read-ahead context
++ * - monitoring for writeback errors
++ *
++ * nfsd_file objects are reference-counted. Consumers acquire a new
++ * object via the nfsd_file_acquire API. They manage their interest in
++ * the acquired object, and hence the object's reference count, via
++ * nfsd_file_get and nfsd_file_put. There are two varieties of nfsd_file
++ * object:
++ *
++ * * non-garbage-collected: When a consumer wants to precisely control
++ * the lifetime of a file's open state, it acquires a non-garbage-
++ * collected nfsd_file. The final nfsd_file_put releases the open
++ * state immediately.
++ *
++ * * garbage-collected: When a consumer does not control the lifetime
++ * of open state, it acquires a garbage-collected nfsd_file. The
++ * final nfsd_file_put allows the open state to linger for a period
++ * during which it may be re-used.
+ */
+
+ #include <linux/hash.h>
+@@ -186,12 +211,9 @@ static const struct rhashtable_params nfsd_file_rhash_params = {
+ static void
+ nfsd_file_schedule_laundrette(void)
+ {
+- if ((atomic_read(&nfsd_file_rhash_tbl.nelems) == 0) ||
+- test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0)
+- return;
+-
+- queue_delayed_work(system_wq, &nfsd_filecache_laundrette,
+- NFSD_LAUNDRETTE_DELAY);
++ if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags))
++ queue_delayed_work(system_wq, &nfsd_filecache_laundrette,
++ NFSD_LAUNDRETTE_DELAY);
+ }
+
+ static void
+@@ -589,7 +611,8 @@ static void
+ nfsd_file_gc_worker(struct work_struct *work)
+ {
+ nfsd_file_gc();
+- nfsd_file_schedule_laundrette();
++ if (list_lru_count(&nfsd_file_lru))
++ nfsd_file_schedule_laundrette();
+ }
+
+ static unsigned long
+diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
+index 070f90ed09b61..3ca5304440ff0 100644
+--- a/fs/nfsd/flexfilelayout.c
++++ b/fs/nfsd/flexfilelayout.c
+@@ -15,6 +15,7 @@
+
+ #include "flexfilelayoutxdr.h"
+ #include "pnfs.h"
++#include "vfs.h"
+
+ #define NFSDDBG_FACILITY NFSDDBG_PNFS
+
+diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
+index 8c854ba3285bb..51a4b7885cae2 100644
+--- a/fs/nfsd/netns.h
++++ b/fs/nfsd/netns.h
+@@ -195,7 +195,7 @@ struct nfsd_net {
+
+ atomic_t nfsd_courtesy_clients;
+ struct shrinker nfsd_client_shrinker;
+- struct delayed_work nfsd_shrinker_work;
++ struct work_struct nfsd_shrinker_work;
+ };
+
+ /* Simple check to find out if a given net was properly initialized */
+diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
+index 39989c14c8a1e..4eae2c5af2edf 100644
+--- a/fs/nfsd/nfs4callback.c
++++ b/fs/nfsd/nfs4callback.c
+@@ -76,6 +76,17 @@ static __be32 *xdr_encode_empty_array(__be32 *p)
+ * 1 Protocol"
+ */
+
++static void encode_uint32(struct xdr_stream *xdr, u32 n)
++{
++ WARN_ON_ONCE(xdr_stream_encode_u32(xdr, n) < 0);
++}
++
++static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap,
++ size_t len)
++{
++ WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0);
++}
++
+ /*
+ * nfs_cb_opnum4
+ *
+@@ -328,6 +339,24 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
+ hdr->nops++;
+ }
+
++/*
++ * CB_RECALLANY4args
++ *
++ * struct CB_RECALLANY4args {
++ * uint32_t craa_objects_to_keep;
++ * bitmap4 craa_type_mask;
++ * };
++ */
++static void
++encode_cb_recallany4args(struct xdr_stream *xdr,
++ struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra)
++{
++ encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
++ encode_uint32(xdr, ra->ra_keep);
++ encode_bitmap4(xdr, ra->ra_bmval, ARRAY_SIZE(ra->ra_bmval));
++ hdr->nops++;
++}
++
+ /*
+ * CB_SEQUENCE4args
+ *
+@@ -482,6 +511,26 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
+ encode_cb_nops(&hdr);
+ }
+
++/*
++ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
++ */
++static void
++nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
++ struct xdr_stream *xdr, const void *data)
++{
++ const struct nfsd4_callback *cb = data;
++ struct nfsd4_cb_recall_any *ra;
++ struct nfs4_cb_compound_hdr hdr = {
++ .ident = cb->cb_clp->cl_cb_ident,
++ .minorversion = cb->cb_clp->cl_minorversion,
++ };
++
++ ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb);
++ encode_cb_compound4args(xdr, &hdr);
++ encode_cb_sequence4args(xdr, cb, &hdr);
++ encode_cb_recallany4args(xdr, &hdr, ra);
++ encode_cb_nops(&hdr);
++}
+
+ /*
+ * NFSv4.0 and NFSv4.1 XDR decode functions
+@@ -520,6 +569,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
+ return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
+ }
+
++/*
++ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
++ */
++static int
++nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
++ struct xdr_stream *xdr,
++ void *data)
++{
++ struct nfsd4_callback *cb = data;
++ struct nfs4_cb_compound_hdr hdr;
++ int status;
++
++ status = decode_cb_compound4res(xdr, &hdr);
++ if (unlikely(status))
++ return status;
++ status = decode_cb_sequence4res(xdr, cb);
++ if (unlikely(status || cb->cb_seq_status))
++ return status;
++ status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
++ return status;
++}
++
+ #ifdef CONFIG_NFSD_PNFS
+ /*
+ * CB_LAYOUTRECALL4args
+@@ -783,6 +854,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
+ #endif
+ PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
+ PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
++ PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
+ };
+
+ static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
+diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
+index e70a1a2999b7b..5e9809aff37eb 100644
+--- a/fs/nfsd/nfs4idmap.c
++++ b/fs/nfsd/nfs4idmap.c
+@@ -41,6 +41,7 @@
+ #include "idmap.h"
+ #include "nfsd.h"
+ #include "netns.h"
++#include "vfs.h"
+
+ /*
+ * Turn off idmapping when using AUTH_SYS.
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index a9105e95b59c5..ba53cd89ec62c 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -943,12 +943,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ &read->rd_stateid, RD_STATE,
+ &read->rd_nf, NULL);
+- if (status) {
+- dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
+- goto out;
+- }
+- status = nfs_ok;
+-out:
++
+ read->rd_rqstp = rqstp;
+ read->rd_fhp = &cstate->current_fh;
+ return status;
+@@ -1117,10 +1112,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ status = nfs4_preprocess_stateid_op(rqstp, cstate,
+ &cstate->current_fh, &setattr->sa_stateid,
+ WR_STATE, NULL, NULL);
+- if (status) {
+- dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
++ if (status)
+ return status;
+- }
+ }
+ err = fh_want_write(&cstate->current_fh);
+ if (err)
+@@ -1170,10 +1163,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ write->wr_offset, cnt);
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ stateid, WR_STATE, &nf, NULL);
+- if (status) {
+- dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
++ if (status)
+ return status;
+- }
+
+ write->wr_how_written = write->wr_stable_how;
+
+@@ -1204,17 +1195,13 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
+ src_stateid, RD_STATE, src, NULL);
+- if (status) {
+- dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
++ if (status)
+ goto out;
+- }
+
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ dst_stateid, WR_STATE, dst, NULL);
+- if (status) {
+- dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
++ if (status)
+ goto out_put_src;
+- }
+
+ /* fix up for NFS-specific error code */
+ if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) ||
+@@ -1935,10 +1922,8 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ &fallocate->falloc_stateid,
+ WR_STATE, &nf, NULL);
+- if (status != nfs_ok) {
+- dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
++ if (status != nfs_ok)
+ return status;
+- }
+
+ status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file,
+ fallocate->falloc_offset,
+@@ -1994,10 +1979,8 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ &seek->seek_stateid,
+ RD_STATE, &nf, NULL);
+- if (status) {
+- dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
++ if (status)
+ return status;
+- }
+
+ switch (seek->seek_whence) {
+ case NFS4_CONTENT_DATA:
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index b3f6dda930d8b..b9d694ec25d19 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -44,7 +44,9 @@
+ #include <linux/jhash.h>
+ #include <linux/string_helpers.h>
+ #include <linux/fsnotify.h>
++#include <linux/rhashtable.h>
+ #include <linux/nfs_ssc.h>
++
+ #include "xdr4.h"
+ #include "xdr4cb.h"
+ #include "vfs.h"
+@@ -84,6 +86,7 @@ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
+ static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
+ void nfsd4_end_grace(struct nfsd_net *nn);
+ static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps);
++static void nfsd4_file_hash_remove(struct nfs4_file *fi);
+
+ /* Locking: */
+
+@@ -588,11 +591,8 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu)
+ void
+ put_nfs4_file(struct nfs4_file *fi)
+ {
+- might_lock(&state_lock);
+-
+- if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) {
+- hlist_del_rcu(&fi->fi_hash);
+- spin_unlock(&state_lock);
++ if (refcount_dec_and_test(&fi->fi_ref)) {
++ nfsd4_file_hash_remove(fi);
+ WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
+ WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
+ call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
+@@ -717,19 +717,20 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername)
+ return ret & OWNER_HASH_MASK;
+ }
+
+-/* hash table for nfs4_file */
+-#define FILE_HASH_BITS 8
+-#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
+-
+-static unsigned int file_hashval(struct svc_fh *fh)
+-{
+- struct inode *inode = d_inode(fh->fh_dentry);
++static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp;
+
+- /* XXX: why not (here & in file cache) use inode? */
+- return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS);
+-}
++static const struct rhashtable_params nfs4_file_rhash_params = {
++ .key_len = sizeof_field(struct nfs4_file, fi_inode),
++ .key_offset = offsetof(struct nfs4_file, fi_inode),
++ .head_offset = offsetof(struct nfs4_file, fi_rlist),
+
+-static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
++ /*
++ * Start with a single page hash table to reduce resizing churn
++ * on light workloads.
++ */
++ .min_size = 256,
++ .automatic_shrinking = true,
++};
+
+ /*
+ * Check if courtesy clients have conflicting access and resolve it if possible
+@@ -1367,6 +1368,8 @@ static void revoke_delegation(struct nfs4_delegation *dp)
+
+ WARN_ON(!list_empty(&dp->dl_recall_lru));
+
++ trace_nfsd_stid_revoke(&dp->dl_stid);
++
+ if (clp->cl_minorversion) {
+ spin_lock(&clp->cl_lock);
+ dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
+@@ -1831,13 +1834,12 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
+ int numslots = fattrs->maxreqs;
+ int slotsize = slot_bytes(fattrs);
+ struct nfsd4_session *new;
+- int mem, i;
++ int i;
+
+- BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *)
+- + sizeof(struct nfsd4_session) > PAGE_SIZE);
+- mem = numslots * sizeof(struct nfsd4_slot *);
++ BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION)
++ > PAGE_SIZE);
+
+- new = kzalloc(sizeof(*new) + mem, GFP_KERNEL);
++ new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL);
+ if (!new)
+ return NULL;
+ /* allocate each struct nfsd4_slot and data cache in one piece */
+@@ -2143,6 +2145,7 @@ static void __free_client(struct kref *k)
+ kfree(clp->cl_nii_domain.data);
+ kfree(clp->cl_nii_name.data);
+ idr_destroy(&clp->cl_stateids);
++ kfree(clp->cl_ra);
+ kmem_cache_free(client_slab, clp);
+ }
+
+@@ -2870,6 +2873,37 @@ static const struct tree_descr client_files[] = {
+ [3] = {""},
+ };
+
++static int
++nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
++ struct rpc_task *task)
++{
++ trace_nfsd_cb_recall_any_done(cb, task);
++ switch (task->tk_status) {
++ case -NFS4ERR_DELAY:
++ rpc_delay(task, 2 * HZ);
++ return 0;
++ default:
++ return 1;
++ }
++}
++
++static void
++nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
++{
++ struct nfs4_client *clp = cb->cb_clp;
++ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
++
++ spin_lock(&nn->client_lock);
++ clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
++ put_client_renew_locked(clp);
++ spin_unlock(&nn->client_lock);
++}
++
++static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
++ .done = nfsd4_cb_recall_any_done,
++ .release = nfsd4_cb_recall_any_release,
++};
++
+ static struct nfs4_client *create_client(struct xdr_netobj name,
+ struct svc_rqst *rqstp, nfs4_verifier *verf)
+ {
+@@ -2907,6 +2941,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
+ free_client(clp);
+ return NULL;
+ }
++ clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
++ if (!clp->cl_ra) {
++ free_client(clp);
++ return NULL;
++ }
++ clp->cl_ra_time = 0;
++ nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
++ NFSPROC4_CLNT_CB_RECALL_ANY);
+ return clp;
+ }
+
+@@ -4276,11 +4318,9 @@ static struct nfs4_file *nfsd4_alloc_file(void)
+ }
+
+ /* OPEN Share state helper functions */
+-static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval,
+- struct nfs4_file *fp)
+-{
+- lockdep_assert_held(&state_lock);
+
++static void nfsd4_file_init(const struct svc_fh *fh, struct nfs4_file *fp)
++{
+ refcount_set(&fp->fi_ref, 1);
+ spin_lock_init(&fp->fi_lock);
+ INIT_LIST_HEAD(&fp->fi_stateids);
+@@ -4298,7 +4338,6 @@ static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval,
+ INIT_LIST_HEAD(&fp->fi_lo_states);
+ atomic_set(&fp->fi_lo_recalls, 0);
+ #endif
+- hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]);
+ }
+
+ void
+@@ -4363,25 +4402,27 @@ nfsd4_init_slabs(void)
+ }
+
+ static unsigned long
+-nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc)
++nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
+ {
+- int cnt;
++ int count;
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_client_shrinker);
+
+- cnt = atomic_read(&nn->nfsd_courtesy_clients);
+- if (cnt > 0)
+- mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
+- return (unsigned long)cnt;
++ count = atomic_read(&nn->nfsd_courtesy_clients);
++ if (!count)
++ count = atomic_long_read(&num_delegations);
++ if (count)
++ queue_work(laundry_wq, &nn->nfsd_shrinker_work);
++ return (unsigned long)count;
+ }
+
+ static unsigned long
+-nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
++nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
+ {
+ return SHRINK_STOP;
+ }
+
+-int
++void
+ nfsd4_init_leases_net(struct nfsd_net *nn)
+ {
+ struct sysinfo si;
+@@ -4403,16 +4444,6 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
+ nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB);
+
+ atomic_set(&nn->nfsd_courtesy_clients, 0);
+- nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
+- nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
+- nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
+- return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+-}
+-
+-void
+-nfsd4_leases_net_shutdown(struct nfsd_net *nn)
+-{
+- unregister_shrinker(&nn->nfsd_client_shrinker);
+ }
+
+ static void init_nfs4_replay(struct nfs4_replay *rp)
+@@ -4683,71 +4714,80 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
+ nfs4_put_stid(&last->st_stid);
+ }
+
+-/* search file_hashtbl[] for file */
+-static struct nfs4_file *
+-find_file_locked(struct svc_fh *fh, unsigned int hashval)
++static noinline_for_stack struct nfs4_file *
++nfsd4_file_hash_lookup(const struct svc_fh *fhp)
+ {
+- struct nfs4_file *fp;
++ struct inode *inode = d_inode(fhp->fh_dentry);
++ struct rhlist_head *tmp, *list;
++ struct nfs4_file *fi;
+
+- hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash,
+- lockdep_is_held(&state_lock)) {
+- if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) {
+- if (refcount_inc_not_zero(&fp->fi_ref))
+- return fp;
++ rcu_read_lock();
++ list = rhltable_lookup(&nfs4_file_rhltable, &inode,
++ nfs4_file_rhash_params);
++ rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) {
++ if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) {
++ if (refcount_inc_not_zero(&fi->fi_ref)) {
++ rcu_read_unlock();
++ return fi;
++ }
+ }
+ }
++ rcu_read_unlock();
+ return NULL;
+ }
+
+-static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh,
+- unsigned int hashval)
++/*
++ * On hash insertion, identify entries with the same inode but
++ * distinct filehandles. They will all be on the list returned
++ * by rhltable_lookup().
++ *
++ * inode->i_lock prevents racing insertions from adding an entry
++ * for the same inode/fhp pair twice.
++ */
++static noinline_for_stack struct nfs4_file *
++nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp)
+ {
+- struct nfs4_file *fp;
++ struct inode *inode = d_inode(fhp->fh_dentry);
++ struct rhlist_head *tmp, *list;
+ struct nfs4_file *ret = NULL;
+ bool alias_found = false;
++ struct nfs4_file *fi;
++ int err;
+
+- spin_lock(&state_lock);
+- hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash,
+- lockdep_is_held(&state_lock)) {
+- if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) {
+- if (refcount_inc_not_zero(&fp->fi_ref))
+- ret = fp;
+- } else if (d_inode(fh->fh_dentry) == fp->fi_inode)
+- fp->fi_aliased = alias_found = true;
+- }
+- if (likely(ret == NULL)) {
+- nfsd4_init_file(fh, hashval, new);
+- new->fi_aliased = alias_found;
+- ret = new;
++ rcu_read_lock();
++ spin_lock(&inode->i_lock);
++
++ list = rhltable_lookup(&nfs4_file_rhltable, &inode,
++ nfs4_file_rhash_params);
++ rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) {
++ if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) {
++ if (refcount_inc_not_zero(&fi->fi_ref))
++ ret = fi;
++ } else
++ fi->fi_aliased = alias_found = true;
+ }
+- spin_unlock(&state_lock);
+- return ret;
+-}
++ if (ret)
++ goto out_unlock;
+
+-static struct nfs4_file * find_file(struct svc_fh *fh)
+-{
+- struct nfs4_file *fp;
+- unsigned int hashval = file_hashval(fh);
++ nfsd4_file_init(fhp, new);
++ err = rhltable_insert(&nfs4_file_rhltable, &new->fi_rlist,
++ nfs4_file_rhash_params);
++ if (err)
++ goto out_unlock;
+
+- rcu_read_lock();
+- fp = find_file_locked(fh, hashval);
++ new->fi_aliased = alias_found;
++ ret = new;
++
++out_unlock:
++ spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
+- return fp;
++ return ret;
+ }
+
+-static struct nfs4_file *
+-find_or_add_file(struct nfs4_file *new, struct svc_fh *fh)
++static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi)
+ {
+- struct nfs4_file *fp;
+- unsigned int hashval = file_hashval(fh);
+-
+- rcu_read_lock();
+- fp = find_file_locked(fh, hashval);
+- rcu_read_unlock();
+- if (fp)
+- return fp;
+-
+- return insert_file(new, fh, hashval);
++ rhltable_remove(&nfs4_file_rhltable, &fi->fi_rlist,
++ nfs4_file_rhash_params);
+ }
+
+ /*
+@@ -4760,9 +4800,10 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
+ struct nfs4_file *fp;
+ __be32 ret = nfs_ok;
+
+- fp = find_file(current_fh);
++ fp = nfsd4_file_hash_lookup(current_fh);
+ if (!fp)
+ return ret;
++
+ /* Check for conflicting share reservations */
+ spin_lock(&fp->fi_lock);
+ if (fp->fi_share_deny & deny_type)
+@@ -4774,7 +4815,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
+
+ static bool nfsd4_deleg_present(const struct inode *inode)
+ {
+- struct file_lock_context *ctx = smp_load_acquire(&inode->i_flctx);
++ struct file_lock_context *ctx = locks_inode_context(inode);
+
+ return ctx && !list_empty_careful(&ctx->flc_lease);
+ }
+@@ -5655,7 +5696,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
+ * and check for delegations in the process of being recalled.
+ * If not found, create the nfs4_file struct
+ */
+- fp = find_or_add_file(open->op_file, current_fh);
++ fp = nfsd4_file_hash_insert(open->op_file, current_fh);
++ if (unlikely(!fp))
++ return nfserr_jukebox;
+ if (fp != open->op_file) {
+ status = nfs4_check_deleg(cl, open, &dp);
+ if (status)
+@@ -5932,7 +5975,7 @@ nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo)
+
+ list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
+ nf = stp->st_stid.sc_file;
+- ctx = nf->fi_inode->i_flctx;
++ ctx = locks_inode_context(nf->fi_inode);
+ if (!ctx)
+ continue;
+ if (locks_owner_has_blockers(ctx, lo))
+@@ -6160,17 +6203,63 @@ laundromat_main(struct work_struct *laundry)
+ }
+
+ static void
+-courtesy_client_reaper(struct work_struct *reaper)
++courtesy_client_reaper(struct nfsd_net *nn)
+ {
+ struct list_head reaplist;
+- struct delayed_work *dwork = to_delayed_work(reaper);
+- struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+- nfsd_shrinker_work);
+
+ nfs4_get_courtesy_client_reaplist(nn, &reaplist);
+ nfs4_process_client_reaplist(&reaplist);
+ }
+
++static void
++deleg_reaper(struct nfsd_net *nn)
++{
++ struct list_head *pos, *next;
++ struct nfs4_client *clp;
++ struct list_head cblist;
++
++ INIT_LIST_HEAD(&cblist);
++ spin_lock(&nn->client_lock);
++ list_for_each_safe(pos, next, &nn->client_lru) {
++ clp = list_entry(pos, struct nfs4_client, cl_lru);
++ if (clp->cl_state != NFSD4_ACTIVE ||
++ list_empty(&clp->cl_delegations) ||
++ atomic_read(&clp->cl_delegs_in_recall) ||
++ test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
++ (ktime_get_boottime_seconds() -
++ clp->cl_ra_time < 5)) {
++ continue;
++ }
++ list_add(&clp->cl_ra_cblist, &cblist);
++
++ /* release in nfsd4_cb_recall_any_release */
++ atomic_inc(&clp->cl_rpc_users);
++ set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
++ clp->cl_ra_time = ktime_get_boottime_seconds();
++ }
++ spin_unlock(&nn->client_lock);
++
++ while (!list_empty(&cblist)) {
++ clp = list_first_entry(&cblist, struct nfs4_client,
++ cl_ra_cblist);
++ list_del_init(&clp->cl_ra_cblist);
++ clp->cl_ra->ra_keep = 0;
++ clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG);
++ trace_nfsd_cb_recall_any(clp->cl_ra);
++ nfsd4_run_cb(&clp->cl_ra->ra_cb);
++ }
++}
++
++static void
++nfsd4_state_shrinker_worker(struct work_struct *work)
++{
++ struct nfsd_net *nn = container_of(work, struct nfsd_net,
++ nfsd_shrinker_work);
++
++ courtesy_client_reaper(nn);
++ deleg_reaper(nn);
++}
++
+ static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
+ {
+ if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
+@@ -6935,6 +7024,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ if (status)
+ goto put_stateid;
+
++ trace_nfsd_deleg_return(stateid);
+ wake_up_var(d_inode(cstate->current_fh.fh_dentry));
+ destroy_delegation(dp);
+ put_stateid:
+@@ -7748,7 +7838,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
+ }
+
+ inode = locks_inode(nf->nf_file);
+- flctx = inode->i_flctx;
++ flctx = locks_inode_context(inode);
+
+ if (flctx && !list_empty_careful(&flctx->flc_posix)) {
+ spin_lock(&flctx->flc_lock);
+@@ -7995,11 +8085,20 @@ static int nfs4_state_create_net(struct net *net)
+ INIT_LIST_HEAD(&nn->blocked_locks_lru);
+
+ INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
+- INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
++ INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
+ get_net(net);
+
++ nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
++ nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count;
++ nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
++
++ if (register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"))
++ goto err_shrinker;
+ return 0;
+
++err_shrinker:
++ put_net(net);
++ kfree(nn->sessionid_hashtbl);
+ err_sessionid:
+ kfree(nn->unconf_id_hashtbl);
+ err_unconf_id:
+@@ -8071,10 +8170,16 @@ nfs4_state_start(void)
+ {
+ int ret;
+
+- ret = nfsd4_create_callback_queue();
++ ret = rhltable_init(&nfs4_file_rhltable, &nfs4_file_rhash_params);
+ if (ret)
+ return ret;
+
++ ret = nfsd4_create_callback_queue();
++ if (ret) {
++ rhltable_destroy(&nfs4_file_rhltable);
++ return ret;
++ }
++
+ set_max_delegations();
+ return 0;
+ }
+@@ -8086,6 +8191,8 @@ nfs4_state_shutdown_net(struct net *net)
+ struct list_head *pos, *next, reaplist;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
++ unregister_shrinker(&nn->nfsd_client_shrinker);
++ cancel_work(&nn->nfsd_shrinker_work);
+ cancel_delayed_work_sync(&nn->laundromat_work);
+ locks_end_grace(&nn->nfsd4_manager);
+
+@@ -8114,6 +8221,7 @@ void
+ nfs4_state_shutdown(void)
+ {
+ nfsd4_destroy_callback_queue();
++ rhltable_destroy(&nfs4_file_rhltable);
+ }
+
+ static void
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 89a579be042e5..597f14a80512f 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -770,16 +770,18 @@ nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs)
+
+ static __be32
+ nfsd4_decode_access(struct nfsd4_compoundargs *argp,
+- struct nfsd4_access *access)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_access *access = &u->access;
+ if (xdr_stream_decode_u32(argp->xdr, &access->ac_req_access) < 0)
+ return nfserr_bad_xdr;
+ return nfs_ok;
+ }
+
+ static __be32
+-nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
++nfsd4_decode_close(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_close *close = &u->close;
+ if (xdr_stream_decode_u32(argp->xdr, &close->cl_seqid) < 0)
+ return nfserr_bad_xdr;
+ return nfsd4_decode_stateid4(argp, &close->cl_stateid);
+@@ -787,8 +789,9 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
+
+
+ static __be32
+-nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit)
++nfsd4_decode_commit(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_commit *commit = &u->commit;
+ if (xdr_stream_decode_u64(argp->xdr, &commit->co_offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0)
+@@ -798,8 +801,9 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit
+ }
+
+ static __be32
+-nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create)
++nfsd4_decode_create(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_create *create = &u->create;
+ __be32 *p, status;
+
+ memset(create, 0, sizeof(*create));
+@@ -844,22 +848,25 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
+ }
+
+ static inline __be32
+-nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr)
++nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_delegreturn *dr = &u->delegreturn;
+ return nfsd4_decode_stateid4(argp, &dr->dr_stateid);
+ }
+
+ static inline __be32
+-nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr)
++nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_getattr *getattr = &u->getattr;
+ memset(getattr, 0, sizeof(*getattr));
+ return nfsd4_decode_bitmap4(argp, getattr->ga_bmval,
+ ARRAY_SIZE(getattr->ga_bmval));
+ }
+
+ static __be32
+-nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link)
++nfsd4_decode_link(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_link *link = &u->link;
+ memset(link, 0, sizeof(*link));
+ return nfsd4_decode_component4(argp, &link->li_name, &link->li_namelen);
+ }
+@@ -907,8 +914,9 @@ nfsd4_decode_locker4(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
+ }
+
+ static __be32
+-nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
++nfsd4_decode_lock(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_lock *lock = &u->lock;
+ memset(lock, 0, sizeof(*lock));
+ if (xdr_stream_decode_u32(argp->xdr, &lock->lk_type) < 0)
+ return nfserr_bad_xdr;
+@@ -924,8 +932,9 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
+ }
+
+ static __be32
+-nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
++nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_lockt *lockt = &u->lockt;
+ memset(lockt, 0, sizeof(*lockt));
+ if (xdr_stream_decode_u32(argp->xdr, &lockt->lt_type) < 0)
+ return nfserr_bad_xdr;
+@@ -940,8 +949,9 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
+ }
+
+ static __be32
+-nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)
++nfsd4_decode_locku(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_locku *locku = &u->locku;
+ __be32 status;
+
+ if (xdr_stream_decode_u32(argp->xdr, &locku->lu_type) < 0)
+@@ -962,8 +972,9 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)
+ }
+
+ static __be32
+-nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup)
++nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_lookup *lookup = &u->lookup;
+ return nfsd4_decode_component4(argp, &lookup->lo_name, &lookup->lo_len);
+ }
+
+@@ -1143,8 +1154,9 @@ nfsd4_decode_open_claim4(struct nfsd4_compoundargs *argp,
+ }
+
+ static __be32
+-nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
++nfsd4_decode_open(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_open *open = &u->open;
+ __be32 status;
+ u32 dummy;
+
+@@ -1171,8 +1183,10 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
+ }
+
+ static __be32
+-nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf)
++nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_open_confirm *open_conf = &u->open_confirm;
+ __be32 status;
+
+ if (argp->minorversion >= 1)
+@@ -1190,8 +1204,10 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con
+ }
+
+ static __be32
+-nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down)
++nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_open_downgrade *open_down = &u->open_downgrade;
+ __be32 status;
+
+ memset(open_down, 0, sizeof(*open_down));
+@@ -1209,8 +1225,9 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d
+ }
+
+ static __be32
+-nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
++nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_putfh *putfh = &u->putfh;
+ __be32 *p;
+
+ if (xdr_stream_decode_u32(argp->xdr, &putfh->pf_fhlen) < 0)
+@@ -1229,7 +1246,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
+ }
+
+ static __be32
+-nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p)
++nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p)
+ {
+ if (argp->minorversion == 0)
+ return nfs_ok;
+@@ -1237,8 +1254,9 @@ nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p)
+ }
+
+ static __be32
+-nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
++nfsd4_decode_read(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_read *read = &u->read;
+ __be32 status;
+
+ memset(read, 0, sizeof(*read));
+@@ -1254,8 +1272,9 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
+ }
+
+ static __be32
+-nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir)
++nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_readdir *readdir = &u->readdir;
+ __be32 status;
+
+ memset(readdir, 0, sizeof(*readdir));
+@@ -1276,15 +1295,17 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read
+ }
+
+ static __be32
+-nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove)
++nfsd4_decode_remove(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_remove *remove = &u->remove;
+ memset(&remove->rm_cinfo, 0, sizeof(remove->rm_cinfo));
+ return nfsd4_decode_component4(argp, &remove->rm_name, &remove->rm_namelen);
+ }
+
+ static __be32
+-nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename)
++nfsd4_decode_rename(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_rename *rename = &u->rename;
+ __be32 status;
+
+ memset(rename, 0, sizeof(*rename));
+@@ -1295,22 +1316,25 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename
+ }
+
+ static __be32
+-nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid)
++nfsd4_decode_renew(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ clientid_t *clientid = &u->renew;
+ return nfsd4_decode_clientid4(argp, clientid);
+ }
+
+ static __be32
+ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp,
+- struct nfsd4_secinfo *secinfo)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_secinfo *secinfo = &u->secinfo;
+ secinfo->si_exp = NULL;
+ return nfsd4_decode_component4(argp, &secinfo->si_name, &secinfo->si_namelen);
+ }
+
+ static __be32
+-nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr)
++nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_setattr *setattr = &u->setattr;
+ __be32 status;
+
+ memset(setattr, 0, sizeof(*setattr));
+@@ -1324,8 +1348,9 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta
+ }
+
+ static __be32
+-nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid)
++nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_setclientid *setclientid = &u->setclientid;
+ __be32 *p, status;
+
+ memset(setclientid, 0, sizeof(*setclientid));
+@@ -1367,8 +1392,10 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient
+ }
+
+ static __be32
+-nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c)
++nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_setclientid_confirm *scd_c = &u->setclientid_confirm;
+ __be32 status;
+
+ if (argp->minorversion >= 1)
+@@ -1382,8 +1409,9 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s
+
+ /* Also used for NVERIFY */
+ static __be32
+-nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify)
++nfsd4_decode_verify(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_verify *verify = &u->verify;
+ __be32 *p, status;
+
+ memset(verify, 0, sizeof(*verify));
+@@ -1409,8 +1437,9 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify
+ }
+
+ static __be32
+-nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
++nfsd4_decode_write(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_write *write = &u->write;
+ __be32 status;
+
+ status = nfsd4_decode_stateid4(argp, &write->wr_stateid);
+@@ -1434,8 +1463,10 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
+ }
+
+ static __be32
+-nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner)
++nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner;
+ __be32 status;
+
+ if (argp->minorversion >= 1)
+@@ -1452,16 +1483,20 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel
+ return nfs_ok;
+ }
+
+-static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc)
++static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_backchannel_ctl *bc = &u->backchannel_ctl;
+ memset(bc, 0, sizeof(*bc));
+ if (xdr_stream_decode_u32(argp->xdr, &bc->bc_cb_program) < 0)
+ return nfserr_bad_xdr;
+ return nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec);
+ }
+
+-static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts)
++static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session;
+ u32 use_conn_in_rdma_mode;
+ __be32 status;
+
+@@ -1603,8 +1638,9 @@ nfsd4_decode_nfs_impl_id4(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
+- struct nfsd4_exchange_id *exid)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_exchange_id *exid = &u->exchange_id;
+ __be32 status;
+
+ memset(exid, 0, sizeof(*exid));
+@@ -1656,8 +1692,9 @@ nfsd4_decode_channel_attrs4(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
+- struct nfsd4_create_session *sess)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_create_session *sess = &u->create_session;
+ __be32 status;
+
+ memset(sess, 0, sizeof(*sess));
+@@ -1681,23 +1718,26 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp,
+- struct nfsd4_destroy_session *destroy_session)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_destroy_session *destroy_session = &u->destroy_session;
+ return nfsd4_decode_sessionid4(argp, &destroy_session->sessionid);
+ }
+
+ static __be32
+ nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp,
+- struct nfsd4_free_stateid *free_stateid)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_free_stateid *free_stateid = &u->free_stateid;
+ return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid);
+ }
+
+ #ifdef CONFIG_NFSD_PNFS
+ static __be32
+ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
+- struct nfsd4_getdeviceinfo *gdev)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo;
+ __be32 status;
+
+ memset(gdev, 0, sizeof(*gdev));
+@@ -1717,8 +1757,9 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
+- struct nfsd4_layoutcommit *lcp)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_layoutcommit *lcp = &u->layoutcommit;
+ __be32 *p, status;
+
+ memset(lcp, 0, sizeof(*lcp));
+@@ -1753,8 +1794,9 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
+- struct nfsd4_layoutget *lgp)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_layoutget *lgp = &u->layoutget;
+ __be32 status;
+
+ memset(lgp, 0, sizeof(*lgp));
+@@ -1781,8 +1823,9 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
+- struct nfsd4_layoutreturn *lrp)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_layoutreturn *lrp = &u->layoutreturn;
+ memset(lrp, 0, sizeof(*lrp));
+ if (xdr_stream_decode_bool(argp->xdr, &lrp->lr_reclaim) < 0)
+ return nfserr_bad_xdr;
+@@ -1795,8 +1838,9 @@ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
+ #endif /* CONFIG_NFSD_PNFS */
+
+ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp,
+- struct nfsd4_secinfo_no_name *sin)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_secinfo_no_name *sin = &u->secinfo_no_name;
+ if (xdr_stream_decode_u32(argp->xdr, &sin->sin_style) < 0)
+ return nfserr_bad_xdr;
+
+@@ -1806,8 +1850,9 @@ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
+- struct nfsd4_sequence *seq)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_sequence *seq = &u->sequence;
+ __be32 *p, status;
+
+ status = nfsd4_decode_sessionid4(argp, &seq->sessionid);
+@@ -1826,8 +1871,10 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
+ }
+
+ static __be32
+-nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid)
++nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_test_stateid *test_stateid = &u->test_stateid;
+ struct nfsd4_test_stateid_id *stateid;
+ __be32 status;
+ u32 i;
+@@ -1852,14 +1899,16 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta
+ }
+
+ static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp,
+- struct nfsd4_destroy_clientid *dc)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_destroy_clientid *dc = &u->destroy_clientid;
+ return nfsd4_decode_clientid4(argp, &dc->clientid);
+ }
+
+ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp,
+- struct nfsd4_reclaim_complete *rc)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_reclaim_complete *rc = &u->reclaim_complete;
+ if (xdr_stream_decode_bool(argp->xdr, &rc->rca_one_fs) < 0)
+ return nfserr_bad_xdr;
+ return nfs_ok;
+@@ -1867,8 +1916,9 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
+- struct nfsd4_fallocate *fallocate)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_fallocate *fallocate = &u->allocate;
+ __be32 status;
+
+ status = nfsd4_decode_stateid4(argp, &fallocate->falloc_stateid);
+@@ -1924,8 +1974,9 @@ static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
+ }
+
+ static __be32
+-nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
++nfsd4_decode_copy(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_copy *copy = &u->copy;
+ u32 consecutive, i, count, sync;
+ struct nl4_server *ns_dummy;
+ __be32 status;
+@@ -1982,8 +2033,9 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
+
+ static __be32
+ nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
+- struct nfsd4_copy_notify *cn)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_copy_notify *cn = &u->copy_notify;
+ __be32 status;
+
+ memset(cn, 0, sizeof(*cn));
+@@ -2002,16 +2054,18 @@ nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp,
+- struct nfsd4_offload_status *os)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_offload_status *os = &u->offload_status;
+ os->count = 0;
+ os->status = 0;
+ return nfsd4_decode_stateid4(argp, &os->stateid);
+ }
+
+ static __be32
+-nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
++nfsd4_decode_seek(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_seek *seek = &u->seek;
+ __be32 status;
+
+ status = nfsd4_decode_stateid4(argp, &seek->seek_stateid);
+@@ -2028,8 +2082,9 @@ nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
+ }
+
+ static __be32
+-nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone)
++nfsd4_decode_clone(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_clone *clone = &u->clone;
+ __be32 status;
+
+ status = nfsd4_decode_stateid4(argp, &clone->cl_src_stateid);
+@@ -2154,8 +2209,9 @@ nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep)
+ */
+ static __be32
+ nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp,
+- struct nfsd4_getxattr *getxattr)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_getxattr *getxattr = &u->getxattr;
+ __be32 status;
+ u32 maxcount;
+
+@@ -2173,8 +2229,9 @@ nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp,
+- struct nfsd4_setxattr *setxattr)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_setxattr *setxattr = &u->setxattr;
+ u32 flags, maxcount, size;
+ __be32 status;
+
+@@ -2214,8 +2271,9 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp,
+- struct nfsd4_listxattrs *listxattrs)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_listxattrs *listxattrs = &u->listxattrs;
+ u32 maxcount;
+
+ memset(listxattrs, 0, sizeof(*listxattrs));
+@@ -2245,113 +2303,114 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_removexattr(struct nfsd4_compoundargs *argp,
+- struct nfsd4_removexattr *removexattr)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_removexattr *removexattr = &u->removexattr;
+ memset(removexattr, 0, sizeof(*removexattr));
+ return nfsd4_decode_xattr_name(argp, &removexattr->rmxa_name);
+ }
+
+ static __be32
+-nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
++nfsd4_decode_noop(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p)
+ {
+ return nfs_ok;
+ }
+
+ static __be32
+-nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p)
++nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p)
+ {
+ return nfserr_notsupp;
+ }
+
+-typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *);
++typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u);
+
+ static const nfsd4_dec nfsd4_dec_ops[] = {
+- [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access,
+- [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close,
+- [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit,
+- [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create,
+- [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn,
+- [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr,
+- [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop,
+- [OP_LINK] = (nfsd4_dec)nfsd4_decode_link,
+- [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock,
+- [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt,
+- [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku,
+- [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup,
+- [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop,
+- [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify,
+- [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open,
+- [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm,
+- [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade,
+- [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh,
+- [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_putpubfh,
+- [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop,
+- [OP_READ] = (nfsd4_dec)nfsd4_decode_read,
+- [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir,
+- [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop,
+- [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove,
+- [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename,
+- [OP_RENEW] = (nfsd4_dec)nfsd4_decode_renew,
+- [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop,
+- [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop,
+- [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo,
+- [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr,
+- [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_setclientid,
+- [OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm,
+- [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify,
+- [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write,
+- [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner,
++ [OP_ACCESS] = nfsd4_decode_access,
++ [OP_CLOSE] = nfsd4_decode_close,
++ [OP_COMMIT] = nfsd4_decode_commit,
++ [OP_CREATE] = nfsd4_decode_create,
++ [OP_DELEGPURGE] = nfsd4_decode_notsupp,
++ [OP_DELEGRETURN] = nfsd4_decode_delegreturn,
++ [OP_GETATTR] = nfsd4_decode_getattr,
++ [OP_GETFH] = nfsd4_decode_noop,
++ [OP_LINK] = nfsd4_decode_link,
++ [OP_LOCK] = nfsd4_decode_lock,
++ [OP_LOCKT] = nfsd4_decode_lockt,
++ [OP_LOCKU] = nfsd4_decode_locku,
++ [OP_LOOKUP] = nfsd4_decode_lookup,
++ [OP_LOOKUPP] = nfsd4_decode_noop,
++ [OP_NVERIFY] = nfsd4_decode_verify,
++ [OP_OPEN] = nfsd4_decode_open,
++ [OP_OPENATTR] = nfsd4_decode_notsupp,
++ [OP_OPEN_CONFIRM] = nfsd4_decode_open_confirm,
++ [OP_OPEN_DOWNGRADE] = nfsd4_decode_open_downgrade,
++ [OP_PUTFH] = nfsd4_decode_putfh,
++ [OP_PUTPUBFH] = nfsd4_decode_putpubfh,
++ [OP_PUTROOTFH] = nfsd4_decode_noop,
++ [OP_READ] = nfsd4_decode_read,
++ [OP_READDIR] = nfsd4_decode_readdir,
++ [OP_READLINK] = nfsd4_decode_noop,
++ [OP_REMOVE] = nfsd4_decode_remove,
++ [OP_RENAME] = nfsd4_decode_rename,
++ [OP_RENEW] = nfsd4_decode_renew,
++ [OP_RESTOREFH] = nfsd4_decode_noop,
++ [OP_SAVEFH] = nfsd4_decode_noop,
++ [OP_SECINFO] = nfsd4_decode_secinfo,
++ [OP_SETATTR] = nfsd4_decode_setattr,
++ [OP_SETCLIENTID] = nfsd4_decode_setclientid,
++ [OP_SETCLIENTID_CONFIRM] = nfsd4_decode_setclientid_confirm,
++ [OP_VERIFY] = nfsd4_decode_verify,
++ [OP_WRITE] = nfsd4_decode_write,
++ [OP_RELEASE_LOCKOWNER] = nfsd4_decode_release_lockowner,
+
+ /* new operations for NFSv4.1 */
+- [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_backchannel_ctl,
+- [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session,
+- [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id,
+- [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session,
+- [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session,
+- [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid,
+- [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
++ [OP_BACKCHANNEL_CTL] = nfsd4_decode_backchannel_ctl,
++ [OP_BIND_CONN_TO_SESSION] = nfsd4_decode_bind_conn_to_session,
++ [OP_EXCHANGE_ID] = nfsd4_decode_exchange_id,
++ [OP_CREATE_SESSION] = nfsd4_decode_create_session,
++ [OP_DESTROY_SESSION] = nfsd4_decode_destroy_session,
++ [OP_FREE_STATEID] = nfsd4_decode_free_stateid,
++ [OP_GET_DIR_DELEGATION] = nfsd4_decode_notsupp,
+ #ifdef CONFIG_NFSD_PNFS
+- [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdeviceinfo,
+- [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_layoutcommit,
+- [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_layoutget,
+- [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_layoutreturn,
++ [OP_GETDEVICEINFO] = nfsd4_decode_getdeviceinfo,
++ [OP_GETDEVICELIST] = nfsd4_decode_notsupp,
++ [OP_LAYOUTCOMMIT] = nfsd4_decode_layoutcommit,
++ [OP_LAYOUTGET] = nfsd4_decode_layoutget,
++ [OP_LAYOUTRETURN] = nfsd4_decode_layoutreturn,
+ #else
+- [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp,
++ [OP_GETDEVICEINFO] = nfsd4_decode_notsupp,
++ [OP_GETDEVICELIST] = nfsd4_decode_notsupp,
++ [OP_LAYOUTCOMMIT] = nfsd4_decode_notsupp,
++ [OP_LAYOUTGET] = nfsd4_decode_notsupp,
++ [OP_LAYOUTRETURN] = nfsd4_decode_notsupp,
+ #endif
+- [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name,
+- [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence,
+- [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid,
+- [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid,
+- [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete,
++ [OP_SECINFO_NO_NAME] = nfsd4_decode_secinfo_no_name,
++ [OP_SEQUENCE] = nfsd4_decode_sequence,
++ [OP_SET_SSV] = nfsd4_decode_notsupp,
++ [OP_TEST_STATEID] = nfsd4_decode_test_stateid,
++ [OP_WANT_DELEGATION] = nfsd4_decode_notsupp,
++ [OP_DESTROY_CLIENTID] = nfsd4_decode_destroy_clientid,
++ [OP_RECLAIM_COMPLETE] = nfsd4_decode_reclaim_complete,
+
+ /* new operations for NFSv4.2 */
+- [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
+- [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy,
+- [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_copy_notify,
+- [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
+- [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status,
+- [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status,
+- [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read,
+- [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek,
+- [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone,
++ [OP_ALLOCATE] = nfsd4_decode_fallocate,
++ [OP_COPY] = nfsd4_decode_copy,
++ [OP_COPY_NOTIFY] = nfsd4_decode_copy_notify,
++ [OP_DEALLOCATE] = nfsd4_decode_fallocate,
++ [OP_IO_ADVISE] = nfsd4_decode_notsupp,
++ [OP_LAYOUTERROR] = nfsd4_decode_notsupp,
++ [OP_LAYOUTSTATS] = nfsd4_decode_notsupp,
++ [OP_OFFLOAD_CANCEL] = nfsd4_decode_offload_status,
++ [OP_OFFLOAD_STATUS] = nfsd4_decode_offload_status,
++ [OP_READ_PLUS] = nfsd4_decode_read,
++ [OP_SEEK] = nfsd4_decode_seek,
++ [OP_WRITE_SAME] = nfsd4_decode_notsupp,
++ [OP_CLONE] = nfsd4_decode_clone,
+ /* RFC 8276 extended atributes operations */
+- [OP_GETXATTR] = (nfsd4_dec)nfsd4_decode_getxattr,
+- [OP_SETXATTR] = (nfsd4_dec)nfsd4_decode_setxattr,
+- [OP_LISTXATTRS] = (nfsd4_dec)nfsd4_decode_listxattrs,
+- [OP_REMOVEXATTR] = (nfsd4_dec)nfsd4_decode_removexattr,
++ [OP_GETXATTR] = nfsd4_decode_getxattr,
++ [OP_SETXATTR] = nfsd4_decode_setxattr,
++ [OP_LISTXATTRS] = nfsd4_decode_listxattrs,
++ [OP_REMOVEXATTR] = nfsd4_decode_removexattr,
+ };
+
+ static inline bool
+@@ -3643,8 +3702,10 @@ nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
+ }
+
+ static __be32
+-nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)
++nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_access *access = &u->access;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -3656,8 +3717,10 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
+ return 0;
+ }
+
+-static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts)
++static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -3673,8 +3736,10 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp,
+ }
+
+ static __be32
+-nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close)
++nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_close *close = &u->close;
+ struct xdr_stream *xdr = resp->xdr;
+
+ return nfsd4_encode_stateid(xdr, &close->cl_stateid);
+@@ -3682,8 +3747,10 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c
+
+
+ static __be32
+-nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit)
++nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_commit *commit = &u->commit;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -3696,8 +3763,10 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
+ }
+
+ static __be32
+-nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create)
++nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_create *create = &u->create;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -3710,8 +3779,10 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
+ }
+
+ static __be32
+-nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr)
++nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_getattr *getattr = &u->getattr;
+ struct svc_fh *fhp = getattr->ga_fhp;
+ struct xdr_stream *xdr = resp->xdr;
+
+@@ -3720,8 +3791,10 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
+ }
+
+ static __be32
+-nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp)
++nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct svc_fh **fhpp = &u->getfh;
+ struct xdr_stream *xdr = resp->xdr;
+ struct svc_fh *fhp = *fhpp;
+ unsigned int len;
+@@ -3775,8 +3848,10 @@ nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld)
+ }
+
+ static __be32
+-nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock)
++nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_lock *lock = &u->lock;
+ struct xdr_stream *xdr = resp->xdr;
+
+ if (!nfserr)
+@@ -3788,8 +3863,10 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo
+ }
+
+ static __be32
+-nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt)
++nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_lockt *lockt = &u->lockt;
+ struct xdr_stream *xdr = resp->xdr;
+
+ if (nfserr == nfserr_denied)
+@@ -3798,8 +3875,10 @@ nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l
+ }
+
+ static __be32
+-nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku)
++nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_locku *locku = &u->locku;
+ struct xdr_stream *xdr = resp->xdr;
+
+ return nfsd4_encode_stateid(xdr, &locku->lu_stateid);
+@@ -3807,8 +3886,10 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l
+
+
+ static __be32
+-nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link)
++nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_link *link = &u->link;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -3821,8 +3902,10 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li
+
+
+ static __be32
+-nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
++nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_open *open = &u->open;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -3915,16 +3998,20 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
+ }
+
+ static __be32
+-nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc)
++nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_open_confirm *oc = &u->open_confirm;
+ struct xdr_stream *xdr = resp->xdr;
+
+ return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid);
+ }
+
+ static __be32
+-nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od)
++nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_open_downgrade *od = &u->open_downgrade;
+ struct xdr_stream *xdr = resp->xdr;
+
+ return nfsd4_encode_stateid(xdr, &od->od_stateid);
+@@ -4023,8 +4110,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
+
+ static __be32
+ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_read *read)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_read *read = &u->read;
+ bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags);
+ unsigned long maxcount;
+ struct xdr_stream *xdr = resp->xdr;
+@@ -4065,8 +4153,10 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
+ }
+
+ static __be32
+-nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink)
++nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_readlink *readlink = &u->readlink;
+ __be32 *p, *maxcount_p, zero = xdr_zero;
+ struct xdr_stream *xdr = resp->xdr;
+ int length_offset = xdr->buf->len;
+@@ -4110,8 +4200,10 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
+ }
+
+ static __be32
+-nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
++nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_readdir *readdir = &u->readdir;
+ int maxcount;
+ int bytes_left;
+ loff_t offset;
+@@ -4201,8 +4293,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
+ }
+
+ static __be32
+-nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove)
++nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_remove *remove = &u->remove;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4214,8 +4308,10 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
+ }
+
+ static __be32
+-nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename)
++nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_rename *rename = &u->rename;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4297,8 +4393,9 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp)
+
+ static __be32
+ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_secinfo *secinfo)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_secinfo *secinfo = &u->secinfo;
+ struct xdr_stream *xdr = resp->xdr;
+
+ return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp);
+@@ -4306,8 +4403,9 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_secinfo_no_name *secinfo)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_secinfo_no_name *secinfo = &u->secinfo_no_name;
+ struct xdr_stream *xdr = resp->xdr;
+
+ return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp);
+@@ -4318,8 +4416,10 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,
+ * regardless of the error status.
+ */
+ static __be32
+-nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr)
++nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_setattr *setattr = &u->setattr;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4342,8 +4442,10 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
+ }
+
+ static __be32
+-nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd)
++nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_setclientid *scd = &u->setclientid;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4366,8 +4468,10 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n
+ }
+
+ static __be32
+-nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write)
++nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_write *write = &u->write;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4383,8 +4487,9 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w
+
+ static __be32
+ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_exchange_id *exid)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_exchange_id *exid = &u->exchange_id;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+ char *major_id;
+@@ -4461,8 +4566,9 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_create_session *sess)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_create_session *sess = &u->create_session;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4514,8 +4620,9 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_sequence *seq)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_sequence *seq = &u->sequence;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4537,8 +4644,9 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_test_stateid *test_stateid)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_test_stateid *test_stateid = &u->test_stateid;
+ struct xdr_stream *xdr = resp->xdr;
+ struct nfsd4_test_stateid_id *stateid, *next;
+ __be32 *p;
+@@ -4558,8 +4666,9 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
+ #ifdef CONFIG_NFSD_PNFS
+ static __be32
+ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_getdeviceinfo *gdev)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo;
+ struct xdr_stream *xdr = resp->xdr;
+ const struct nfsd4_layout_ops *ops;
+ u32 starting_len = xdr->buf->len, needed_len;
+@@ -4611,8 +4720,9 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_layoutget *lgp)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_layoutget *lgp = &u->layoutget;
+ struct xdr_stream *xdr = resp->xdr;
+ const struct nfsd4_layout_ops *ops;
+ __be32 *p;
+@@ -4638,8 +4748,9 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_layoutcommit *lcp)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_layoutcommit *lcp = &u->layoutcommit;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4659,8 +4770,9 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_layoutreturn *lrp)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_layoutreturn *lrp = &u->layoutreturn;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4745,8 +4857,9 @@ nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns)
+
+ static __be32
+ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_copy *copy)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_copy *copy = &u->copy;
+ __be32 *p;
+
+ nfserr = nfsd42_encode_write_res(resp, &copy->cp_res,
+@@ -4762,8 +4875,9 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_offload_status *os)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_offload_status *os = &u->offload_status;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4777,156 +4891,83 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
+- struct nfsd4_read *read,
+- unsigned long *maxcount, u32 *eof,
+- loff_t *pos)
++ struct nfsd4_read *read)
+ {
+- struct xdr_stream *xdr = resp->xdr;
++ bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags);
+ struct file *file = read->rd_nf->nf_file;
+- int starting_len = xdr->buf->len;
+- loff_t hole_pos;
+- __be32 nfserr;
+- __be32 *p, tmp;
+- __be64 tmp64;
+-
+- hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE);
+- if (hole_pos > read->rd_offset)
+- *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset);
+- *maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len));
++ struct xdr_stream *xdr = resp->xdr;
++ unsigned long maxcount;
++ __be32 nfserr, *p;
+
+ /* Content type, offset, byte count */
+ p = xdr_reserve_space(xdr, 4 + 8 + 4);
+ if (!p)
+- return nfserr_resource;
++ return nfserr_io;
++ if (resp->xdr->buf->page_len && splice_ok) {
++ WARN_ON_ONCE(splice_ok);
++ return nfserr_serverfault;
++ }
+
+- read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount);
+- if (read->rd_vlen < 0)
+- return nfserr_resource;
++ maxcount = min_t(unsigned long, read->rd_length,
++ (xdr->buf->buflen - xdr->buf->len));
+
+- nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
+- resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof);
++ if (file->f_op->splice_read && splice_ok)
++ nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
++ else
++ nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
+ if (nfserr)
+ return nfserr;
+- xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount));
+-
+- tmp = htonl(NFS4_CONTENT_DATA);
+- write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
+- tmp64 = cpu_to_be64(read->rd_offset);
+- write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8);
+- tmp = htonl(*maxcount);
+- write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4);
+-
+- tmp = xdr_zero;
+- write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp,
+- xdr_pad_size(*maxcount));
+- return nfs_ok;
+-}
+-
+-static __be32
+-nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
+- struct nfsd4_read *read,
+- unsigned long *maxcount, u32 *eof)
+-{
+- struct file *file = read->rd_nf->nf_file;
+- loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
+- loff_t f_size = i_size_read(file_inode(file));
+- unsigned long count;
+- __be32 *p;
+-
+- if (data_pos == -ENXIO)
+- data_pos = f_size;
+- else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE))
+- return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size);
+- count = data_pos - read->rd_offset;
+
+- /* Content type, offset, byte count */
+- p = xdr_reserve_space(resp->xdr, 4 + 8 + 8);
+- if (!p)
+- return nfserr_resource;
+-
+- *p++ = htonl(NFS4_CONTENT_HOLE);
++ *p++ = cpu_to_be32(NFS4_CONTENT_DATA);
+ p = xdr_encode_hyper(p, read->rd_offset);
+- p = xdr_encode_hyper(p, count);
++ *p = cpu_to_be32(read->rd_length);
+
+- *eof = (read->rd_offset + count) >= f_size;
+- *maxcount = min_t(unsigned long, count, *maxcount);
+ return nfs_ok;
+ }
+
+ static __be32
+ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_read *read)
++ union nfsd4_op_u *u)
+ {
+- unsigned long maxcount, count;
++ struct nfsd4_read *read = &u->read;
++ struct file *file = read->rd_nf->nf_file;
+ struct xdr_stream *xdr = resp->xdr;
+- struct file *file;
+ int starting_len = xdr->buf->len;
+- int last_segment = xdr->buf->len;
+- int segments = 0;
+- __be32 *p, tmp;
+- bool is_data;
+- loff_t pos;
+- u32 eof;
++ u32 segments = 0;
++ __be32 *p;
+
+ if (nfserr)
+ return nfserr;
+- file = read->rd_nf->nf_file;
+
+ /* eof flag, segment count */
+ p = xdr_reserve_space(xdr, 4 + 4);
+ if (!p)
+- return nfserr_resource;
++ return nfserr_io;
+ xdr_commit_encode(xdr);
+
+- maxcount = min_t(unsigned long, read->rd_length,
+- (xdr->buf->buflen - xdr->buf->len));
+- count = maxcount;
+-
+- eof = read->rd_offset >= i_size_read(file_inode(file));
+- if (eof)
++ read->rd_eof = read->rd_offset >= i_size_read(file_inode(file));
++ if (read->rd_eof)
+ goto out;
+
+- pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE);
+- is_data = pos > read->rd_offset;
+-
+- while (count > 0 && !eof) {
+- maxcount = count;
+- if (is_data)
+- nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof,
+- segments == 0 ? &pos : NULL);
+- else
+- nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof);
+- if (nfserr)
+- goto out;
+- count -= maxcount;
+- read->rd_offset += maxcount;
+- is_data = !is_data;
+- last_segment = xdr->buf->len;
+- segments++;
+- }
+-
+-out:
+- if (nfserr && segments == 0)
++ nfserr = nfsd4_encode_read_plus_data(resp, read);
++ if (nfserr) {
+ xdr_truncate_encode(xdr, starting_len);
+- else {
+- if (nfserr) {
+- xdr_truncate_encode(xdr, last_segment);
+- nfserr = nfs_ok;
+- eof = 0;
+- }
+- tmp = htonl(eof);
+- write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
+- tmp = htonl(segments);
+- write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
++ return nfserr;
+ }
+
++ segments++;
++
++out:
++ p = xdr_encode_bool(p, read->rd_eof);
++ *p = cpu_to_be32(segments);
+ return nfserr;
+ }
+
+ static __be32
+ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_copy_notify *cn)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_copy_notify *cn = &u->copy_notify;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4960,8 +5001,9 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_seek *seek)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_seek *seek = &u->seek;
+ __be32 *p;
+
+ p = xdr_reserve_space(resp->xdr, 4 + 8);
+@@ -4972,7 +5014,8 @@ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
+ }
+
+ static __be32
+-nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
++nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *p)
+ {
+ return nfserr;
+ }
+@@ -5023,8 +5066,9 @@ nfsd4_vbuf_to_stream(struct xdr_stream *xdr, char *buf, u32 buflen)
+
+ static __be32
+ nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_getxattr *getxattr)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_getxattr *getxattr = &u->getxattr;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p, err;
+
+@@ -5047,8 +5091,9 @@ nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_setxattr *setxattr)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_setxattr *setxattr = &u->setxattr;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -5088,8 +5133,9 @@ nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs,
+
+ static __be32
+ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_listxattrs *listxattrs)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_listxattrs *listxattrs = &u->listxattrs;
+ struct xdr_stream *xdr = resp->xdr;
+ u32 cookie_offset, count_offset, eof;
+ u32 left, xdrleft, slen, count;
+@@ -5199,8 +5245,9 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_removexattr *removexattr)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_removexattr *removexattr = &u->removexattr;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -5212,7 +5259,7 @@ nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr,
+ return 0;
+ }
+
+-typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *);
++typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u *u);
+
+ /*
+ * Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1
+@@ -5220,93 +5267,93 @@ typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *);
+ * done in the decoding phase.
+ */
+ static const nfsd4_enc nfsd4_enc_ops[] = {
+- [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access,
+- [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close,
+- [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit,
+- [OP_CREATE] = (nfsd4_enc)nfsd4_encode_create,
+- [OP_DELEGPURGE] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_DELEGRETURN] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_GETATTR] = (nfsd4_enc)nfsd4_encode_getattr,
+- [OP_GETFH] = (nfsd4_enc)nfsd4_encode_getfh,
+- [OP_LINK] = (nfsd4_enc)nfsd4_encode_link,
+- [OP_LOCK] = (nfsd4_enc)nfsd4_encode_lock,
+- [OP_LOCKT] = (nfsd4_enc)nfsd4_encode_lockt,
+- [OP_LOCKU] = (nfsd4_enc)nfsd4_encode_locku,
+- [OP_LOOKUP] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open,
+- [OP_OPENATTR] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm,
+- [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade,
+- [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_PUTPUBFH] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_PUTROOTFH] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_READ] = (nfsd4_enc)nfsd4_encode_read,
+- [OP_READDIR] = (nfsd4_enc)nfsd4_encode_readdir,
+- [OP_READLINK] = (nfsd4_enc)nfsd4_encode_readlink,
+- [OP_REMOVE] = (nfsd4_enc)nfsd4_encode_remove,
+- [OP_RENAME] = (nfsd4_enc)nfsd4_encode_rename,
+- [OP_RENEW] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_RESTOREFH] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_SAVEFH] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_SECINFO] = (nfsd4_enc)nfsd4_encode_secinfo,
+- [OP_SETATTR] = (nfsd4_enc)nfsd4_encode_setattr,
+- [OP_SETCLIENTID] = (nfsd4_enc)nfsd4_encode_setclientid,
+- [OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write,
+- [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop,
++ [OP_ACCESS] = nfsd4_encode_access,
++ [OP_CLOSE] = nfsd4_encode_close,
++ [OP_COMMIT] = nfsd4_encode_commit,
++ [OP_CREATE] = nfsd4_encode_create,
++ [OP_DELEGPURGE] = nfsd4_encode_noop,
++ [OP_DELEGRETURN] = nfsd4_encode_noop,
++ [OP_GETATTR] = nfsd4_encode_getattr,
++ [OP_GETFH] = nfsd4_encode_getfh,
++ [OP_LINK] = nfsd4_encode_link,
++ [OP_LOCK] = nfsd4_encode_lock,
++ [OP_LOCKT] = nfsd4_encode_lockt,
++ [OP_LOCKU] = nfsd4_encode_locku,
++ [OP_LOOKUP] = nfsd4_encode_noop,
++ [OP_LOOKUPP] = nfsd4_encode_noop,
++ [OP_NVERIFY] = nfsd4_encode_noop,
++ [OP_OPEN] = nfsd4_encode_open,
++ [OP_OPENATTR] = nfsd4_encode_noop,
++ [OP_OPEN_CONFIRM] = nfsd4_encode_open_confirm,
++ [OP_OPEN_DOWNGRADE] = nfsd4_encode_open_downgrade,
++ [OP_PUTFH] = nfsd4_encode_noop,
++ [OP_PUTPUBFH] = nfsd4_encode_noop,
++ [OP_PUTROOTFH] = nfsd4_encode_noop,
++ [OP_READ] = nfsd4_encode_read,
++ [OP_READDIR] = nfsd4_encode_readdir,
++ [OP_READLINK] = nfsd4_encode_readlink,
++ [OP_REMOVE] = nfsd4_encode_remove,
++ [OP_RENAME] = nfsd4_encode_rename,
++ [OP_RENEW] = nfsd4_encode_noop,
++ [OP_RESTOREFH] = nfsd4_encode_noop,
++ [OP_SAVEFH] = nfsd4_encode_noop,
++ [OP_SECINFO] = nfsd4_encode_secinfo,
++ [OP_SETATTR] = nfsd4_encode_setattr,
++ [OP_SETCLIENTID] = nfsd4_encode_setclientid,
++ [OP_SETCLIENTID_CONFIRM] = nfsd4_encode_noop,
++ [OP_VERIFY] = nfsd4_encode_noop,
++ [OP_WRITE] = nfsd4_encode_write,
++ [OP_RELEASE_LOCKOWNER] = nfsd4_encode_noop,
+
+ /* NFSv4.1 operations */
+- [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session,
+- [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id,
+- [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session,
+- [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
++ [OP_BACKCHANNEL_CTL] = nfsd4_encode_noop,
++ [OP_BIND_CONN_TO_SESSION] = nfsd4_encode_bind_conn_to_session,
++ [OP_EXCHANGE_ID] = nfsd4_encode_exchange_id,
++ [OP_CREATE_SESSION] = nfsd4_encode_create_session,
++ [OP_DESTROY_SESSION] = nfsd4_encode_noop,
++ [OP_FREE_STATEID] = nfsd4_encode_noop,
++ [OP_GET_DIR_DELEGATION] = nfsd4_encode_noop,
+ #ifdef CONFIG_NFSD_PNFS
+- [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdeviceinfo,
+- [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_layoutcommit,
+- [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_layoutget,
+- [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_layoutreturn,
++ [OP_GETDEVICEINFO] = nfsd4_encode_getdeviceinfo,
++ [OP_GETDEVICELIST] = nfsd4_encode_noop,
++ [OP_LAYOUTCOMMIT] = nfsd4_encode_layoutcommit,
++ [OP_LAYOUTGET] = nfsd4_encode_layoutget,
++ [OP_LAYOUTRETURN] = nfsd4_encode_layoutreturn,
+ #else
+- [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop,
++ [OP_GETDEVICEINFO] = nfsd4_encode_noop,
++ [OP_GETDEVICELIST] = nfsd4_encode_noop,
++ [OP_LAYOUTCOMMIT] = nfsd4_encode_noop,
++ [OP_LAYOUTGET] = nfsd4_encode_noop,
++ [OP_LAYOUTRETURN] = nfsd4_encode_noop,
+ #endif
+- [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name,
+- [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence,
+- [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_test_stateid,
+- [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop,
++ [OP_SECINFO_NO_NAME] = nfsd4_encode_secinfo_no_name,
++ [OP_SEQUENCE] = nfsd4_encode_sequence,
++ [OP_SET_SSV] = nfsd4_encode_noop,
++ [OP_TEST_STATEID] = nfsd4_encode_test_stateid,
++ [OP_WANT_DELEGATION] = nfsd4_encode_noop,
++ [OP_DESTROY_CLIENTID] = nfsd4_encode_noop,
++ [OP_RECLAIM_COMPLETE] = nfsd4_encode_noop,
+
+ /* NFSv4.2 operations */
+- [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy,
+- [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_copy_notify,
+- [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status,
+- [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus,
+- [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek,
+- [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop,
++ [OP_ALLOCATE] = nfsd4_encode_noop,
++ [OP_COPY] = nfsd4_encode_copy,
++ [OP_COPY_NOTIFY] = nfsd4_encode_copy_notify,
++ [OP_DEALLOCATE] = nfsd4_encode_noop,
++ [OP_IO_ADVISE] = nfsd4_encode_noop,
++ [OP_LAYOUTERROR] = nfsd4_encode_noop,
++ [OP_LAYOUTSTATS] = nfsd4_encode_noop,
++ [OP_OFFLOAD_CANCEL] = nfsd4_encode_noop,
++ [OP_OFFLOAD_STATUS] = nfsd4_encode_offload_status,
++ [OP_READ_PLUS] = nfsd4_encode_read_plus,
++ [OP_SEEK] = nfsd4_encode_seek,
++ [OP_WRITE_SAME] = nfsd4_encode_noop,
++ [OP_CLONE] = nfsd4_encode_noop,
+
+ /* RFC 8276 extended atributes operations */
+- [OP_GETXATTR] = (nfsd4_enc)nfsd4_encode_getxattr,
+- [OP_SETXATTR] = (nfsd4_enc)nfsd4_encode_setxattr,
+- [OP_LISTXATTRS] = (nfsd4_enc)nfsd4_encode_listxattrs,
+- [OP_REMOVEXATTR] = (nfsd4_enc)nfsd4_encode_removexattr,
++ [OP_GETXATTR] = nfsd4_encode_getxattr,
++ [OP_SETXATTR] = nfsd4_encode_setxattr,
++ [OP_LISTXATTRS] = nfsd4_encode_listxattrs,
++ [OP_REMOVEXATTR] = nfsd4_encode_removexattr,
+ };
+
+ /*
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 573de0d49e172..76a60e7a75097 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -581,7 +581,9 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
+
+ cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET;
+ switch(num) {
++#ifdef CONFIG_NFSD_V2
+ case 2:
++#endif
+ case 3:
+ nfsd_vers(nn, num, cmd);
+ break;
+@@ -601,7 +603,9 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
+ }
+ break;
+ default:
+- return -EINVAL;
++ /* Ignore requests to disable non-existent versions */
++ if (cmd == NFSD_SET)
++ return -EINVAL;
+ }
+ vers += len + 1;
+ } while ((len = qword_get(&mesg, vers, size)) > 0);
+@@ -1448,9 +1452,7 @@ static __net_init int nfsd_init_net(struct net *net)
+ goto out_idmap_error;
+ nn->nfsd_versions = NULL;
+ nn->nfsd4_minorversions = NULL;
+- retval = nfsd4_init_leases_net(nn);
+- if (retval)
+- goto out_drc_error;
++ nfsd4_init_leases_net(nn);
+ retval = nfsd_reply_cache_init(nn);
+ if (retval)
+ goto out_cache_error;
+@@ -1460,8 +1462,6 @@ static __net_init int nfsd_init_net(struct net *net)
+ return 0;
+
+ out_cache_error:
+- nfsd4_leases_net_shutdown(nn);
+-out_drc_error:
+ nfsd_idmap_shutdown(net);
+ out_idmap_error:
+ nfsd_export_shutdown(net);
+@@ -1477,7 +1477,6 @@ static __net_exit void nfsd_exit_net(struct net *net)
+ nfsd_idmap_shutdown(net);
+ nfsd_export_shutdown(net);
+ nfsd_netns_free_versions(net_generic(net, nfsd_net_id));
+- nfsd4_leases_net_shutdown(nn);
+ }
+
+ static struct pernet_operations nfsd_net_ops = {
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index 09726c5b9a317..fa0144a742678 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -64,8 +64,7 @@ struct readdir_cd {
+
+
+ extern struct svc_program nfsd_program;
+-extern const struct svc_version nfsd_version2, nfsd_version3,
+- nfsd_version4;
++extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4;
+ extern struct mutex nfsd_mutex;
+ extern spinlock_t nfsd_drc_lock;
+ extern unsigned long nfsd_drc_max_mem;
+@@ -505,8 +504,7 @@ extern void unregister_cld_notifier(void);
+ extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
+ #endif
+
+-extern int nfsd4_init_leases_net(struct nfsd_net *nn);
+-extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn);
++extern void nfsd4_init_leases_net(struct nfsd_net *nn);
+
+ #else /* CONFIG_NFSD_V4 */
+ static inline int nfsd4_is_junction(struct dentry *dentry)
+@@ -514,8 +512,7 @@ static inline int nfsd4_is_junction(struct dentry *dentry)
+ return 0;
+ }
+
+-static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; };
+-static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {};
++static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { };
+
+ #define register_cld_notifier() 0
+ #define unregister_cld_notifier() do { } while(0)
+diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
+index c3ae6414fc5cf..513e028b0bbee 100644
+--- a/fs/nfsd/nfsfh.h
++++ b/fs/nfsd/nfsfh.h
+@@ -220,7 +220,7 @@ __be32 fh_update(struct svc_fh *);
+ void fh_put(struct svc_fh *);
+
+ static __inline__ struct svc_fh *
+-fh_copy(struct svc_fh *dst, struct svc_fh *src)
++fh_copy(struct svc_fh *dst, const struct svc_fh *src)
+ {
+ WARN_ON(src->fh_dentry);
+
+@@ -229,7 +229,7 @@ fh_copy(struct svc_fh *dst, struct svc_fh *src)
+ }
+
+ static inline void
+-fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src)
++fh_copy_shallow(struct knfsd_fh *dst, const struct knfsd_fh *src)
+ {
+ dst->fh_size = src->fh_size;
+ memcpy(&dst->fh_raw, &src->fh_raw, src->fh_size);
+@@ -243,7 +243,8 @@ fh_init(struct svc_fh *fhp, int maxsize)
+ return fhp;
+ }
+
+-static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
++static inline bool fh_match(const struct knfsd_fh *fh1,
++ const struct knfsd_fh *fh2)
+ {
+ if (fh1->fh_size != fh2->fh_size)
+ return false;
+@@ -252,7 +253,8 @@ static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
+ return true;
+ }
+
+-static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
++static inline bool fh_fsid_match(const struct knfsd_fh *fh1,
++ const struct knfsd_fh *fh2)
+ {
+ if (fh1->fh_fsid_type != fh2->fh_fsid_type)
+ return false;
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index 82b3ddeacc338..9744443c39652 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -211,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
+ if (resp->status == nfs_ok)
+ resp->status = fh_getattr(&resp->fh, &resp->stat);
+ else if (resp->status == nfserr_jukebox)
+- return rpc_drop_reply;
++ set_bit(RQ_DROPME, &rqstp->rq_flags);
+ return rpc_success;
+ }
+
+@@ -246,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp)
+ if (resp->status == nfs_ok)
+ resp->status = fh_getattr(&resp->fh, &resp->stat);
+ else if (resp->status == nfserr_jukebox)
+- return rpc_drop_reply;
++ set_bit(RQ_DROPME, &rqstp->rq_flags);
+ return rpc_success;
+ }
+
+@@ -848,65 +848,3 @@ const struct svc_version nfsd_version2 = {
+ .vs_dispatch = nfsd_dispatch,
+ .vs_xdrsize = NFS2_SVC_XDRSIZE,
+ };
+-
+-/*
+- * Map errnos to NFS errnos.
+- */
+-__be32
+-nfserrno (int errno)
+-{
+- static struct {
+- __be32 nfserr;
+- int syserr;
+- } nfs_errtbl[] = {
+- { nfs_ok, 0 },
+- { nfserr_perm, -EPERM },
+- { nfserr_noent, -ENOENT },
+- { nfserr_io, -EIO },
+- { nfserr_nxio, -ENXIO },
+- { nfserr_fbig, -E2BIG },
+- { nfserr_stale, -EBADF },
+- { nfserr_acces, -EACCES },
+- { nfserr_exist, -EEXIST },
+- { nfserr_xdev, -EXDEV },
+- { nfserr_mlink, -EMLINK },
+- { nfserr_nodev, -ENODEV },
+- { nfserr_notdir, -ENOTDIR },
+- { nfserr_isdir, -EISDIR },
+- { nfserr_inval, -EINVAL },
+- { nfserr_fbig, -EFBIG },
+- { nfserr_nospc, -ENOSPC },
+- { nfserr_rofs, -EROFS },
+- { nfserr_mlink, -EMLINK },
+- { nfserr_nametoolong, -ENAMETOOLONG },
+- { nfserr_notempty, -ENOTEMPTY },
+-#ifdef EDQUOT
+- { nfserr_dquot, -EDQUOT },
+-#endif
+- { nfserr_stale, -ESTALE },
+- { nfserr_jukebox, -ETIMEDOUT },
+- { nfserr_jukebox, -ERESTARTSYS },
+- { nfserr_jukebox, -EAGAIN },
+- { nfserr_jukebox, -EWOULDBLOCK },
+- { nfserr_jukebox, -ENOMEM },
+- { nfserr_io, -ETXTBSY },
+- { nfserr_notsupp, -EOPNOTSUPP },
+- { nfserr_toosmall, -ETOOSMALL },
+- { nfserr_serverfault, -ESERVERFAULT },
+- { nfserr_serverfault, -ENFILE },
+- { nfserr_io, -EREMOTEIO },
+- { nfserr_stale, -EOPENSTALE },
+- { nfserr_io, -EUCLEAN },
+- { nfserr_perm, -ENOKEY },
+- { nfserr_no_grace, -ENOGRACE},
+- };
+- int i;
+-
+- for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) {
+- if (nfs_errtbl[i].syserr == errno)
+- return nfs_errtbl[i].nfserr;
+- }
+- WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno);
+- return nfserr_io;
+-}
+-
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index c7695ebd28dc3..0c75636054a54 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -91,8 +91,12 @@ unsigned long nfsd_drc_mem_used;
+ #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+ static struct svc_stat nfsd_acl_svcstats;
+ static const struct svc_version *nfsd_acl_version[] = {
++# if defined(CONFIG_NFSD_V2_ACL)
+ [2] = &nfsd_acl_version2,
++# endif
++# if defined(CONFIG_NFSD_V3_ACL)
+ [3] = &nfsd_acl_version3,
++# endif
+ };
+
+ #define NFSD_ACL_MINVERS 2
+@@ -116,7 +120,9 @@ static struct svc_stat nfsd_acl_svcstats = {
+ #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
+
+ static const struct svc_version *nfsd_version[] = {
++#if defined(CONFIG_NFSD_V2)
+ [2] = &nfsd_version2,
++#endif
+ [3] = &nfsd_version3,
+ #if defined(CONFIG_NFSD_V4)
+ [4] = &nfsd_version4,
+@@ -1065,7 +1071,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+
+ nfs_reply = xdr_inline_decode(&rqstp->rq_res_stream, 0);
+ *statp = proc->pc_func(rqstp);
+- if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags))
++ if (test_bit(RQ_DROPME, &rqstp->rq_flags))
+ goto out_update_drop;
+
+ if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
+diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
+index e2daef3cc0034..e94634d305912 100644
+--- a/fs/nfsd/state.h
++++ b/fs/nfsd/state.h
+@@ -368,6 +368,7 @@ struct nfs4_client {
+ #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
+ #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
+ 1 << NFSD4_CLIENT_CB_KILL)
++#define NFSD4_CLIENT_CB_RECALL_ANY (6)
+ unsigned long cl_flags;
+ const struct cred *cl_cb_cred;
+ struct rpc_clnt *cl_cb_client;
+@@ -411,6 +412,10 @@ struct nfs4_client {
+
+ unsigned int cl_state;
+ atomic_t cl_delegs_in_recall;
++
++ struct nfsd4_cb_recall_any *cl_ra;
++ time64_t cl_ra_time;
++ struct list_head cl_ra_cblist;
+ };
+
+ /* struct nfs4_client_reset
+@@ -536,16 +541,13 @@ struct nfs4_clnt_odstate {
+ * inode can have multiple filehandles associated with it, so there is
+ * (potentially) a many to one relationship between this struct and struct
+ * inode.
+- *
+- * These are hashed by filehandle in the file_hashtbl, which is protected by
+- * the global state_lock spinlock.
+ */
+ struct nfs4_file {
+ refcount_t fi_ref;
+ struct inode * fi_inode;
+ bool fi_aliased;
+ spinlock_t fi_lock;
+- struct hlist_node fi_hash; /* hash on fi_fhandle */
++ struct rhlist_head fi_rlist;
+ struct list_head fi_stateids;
+ union {
+ struct list_head fi_delegations;
+@@ -639,6 +641,7 @@ enum nfsd4_cb_op {
+ NFSPROC4_CLNT_CB_OFFLOAD,
+ NFSPROC4_CLNT_CB_SEQUENCE,
+ NFSPROC4_CLNT_CB_NOTIFY_LOCK,
++ NFSPROC4_CLNT_CB_RECALL_ANY,
+ };
+
+ /* Returns true iff a is later than b: */
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index 132335011ccae..4183819ea0829 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -9,9 +9,12 @@
+ #define _NFSD_TRACE_H
+
+ #include <linux/tracepoint.h>
++#include <linux/sunrpc/xprt.h>
++#include <trace/misc/nfs.h>
+
+ #include "export.h"
+ #include "nfsfh.h"
++#include "xdr4.h"
+
+ #define NFSD_TRACE_PROC_RES_FIELDS \
+ __field(unsigned int, netns_ino) \
+@@ -604,6 +607,7 @@ DEFINE_STATEID_EVENT(layout_recall_release);
+
+ DEFINE_STATEID_EVENT(open);
+ DEFINE_STATEID_EVENT(deleg_read);
++DEFINE_STATEID_EVENT(deleg_return);
+ DEFINE_STATEID_EVENT(deleg_recall);
+
+ DECLARE_EVENT_CLASS(nfsd_stateseqid_class,
+@@ -636,6 +640,61 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \
+ DEFINE_STATESEQID_EVENT(preprocess);
+ DEFINE_STATESEQID_EVENT(open_confirm);
+
++TRACE_DEFINE_ENUM(NFS4_OPEN_STID);
++TRACE_DEFINE_ENUM(NFS4_LOCK_STID);
++TRACE_DEFINE_ENUM(NFS4_DELEG_STID);
++TRACE_DEFINE_ENUM(NFS4_CLOSED_STID);
++TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID);
++TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID);
++TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
++
++#define show_stid_type(x) \
++ __print_flags(x, "|", \
++ { NFS4_OPEN_STID, "OPEN" }, \
++ { NFS4_LOCK_STID, "LOCK" }, \
++ { NFS4_DELEG_STID, "DELEG" }, \
++ { NFS4_CLOSED_STID, "CLOSED" }, \
++ { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \
++ { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \
++ { NFS4_LAYOUT_STID, "LAYOUT" })
++
++DECLARE_EVENT_CLASS(nfsd_stid_class,
++ TP_PROTO(
++ const struct nfs4_stid *stid
++ ),
++ TP_ARGS(stid),
++ TP_STRUCT__entry(
++ __field(unsigned long, sc_type)
++ __field(int, sc_count)
++ __field(u32, cl_boot)
++ __field(u32, cl_id)
++ __field(u32, si_id)
++ __field(u32, si_generation)
++ ),
++ TP_fast_assign(
++ const stateid_t *stp = &stid->sc_stateid;
++
++ __entry->sc_type = stid->sc_type;
++ __entry->sc_count = refcount_read(&stid->sc_count);
++ __entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
++ __entry->cl_id = stp->si_opaque.so_clid.cl_id;
++ __entry->si_id = stp->si_opaque.so_id;
++ __entry->si_generation = stp->si_generation;
++ ),
++ TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s",
++ __entry->cl_boot, __entry->cl_id,
++ __entry->si_id, __entry->si_generation,
++ __entry->sc_count, show_stid_type(__entry->sc_type)
++ )
++);
++
++#define DEFINE_STID_EVENT(name) \
++DEFINE_EVENT(nfsd_stid_class, nfsd_stid_##name, \
++ TP_PROTO(const struct nfs4_stid *stid), \
++ TP_ARGS(stid))
++
++DEFINE_STID_EVENT(revoke);
++
+ DECLARE_EVENT_CLASS(nfsd_clientid_class,
+ TP_PROTO(const clientid_t *clid),
+ TP_ARGS(clid),
+@@ -1436,6 +1495,32 @@ TRACE_EVENT(nfsd_cb_offload,
+ __entry->fh_hash, __entry->count, __entry->status)
+ );
+
++TRACE_EVENT(nfsd_cb_recall_any,
++ TP_PROTO(
++ const struct nfsd4_cb_recall_any *ra
++ ),
++ TP_ARGS(ra),
++ TP_STRUCT__entry(
++ __field(u32, cl_boot)
++ __field(u32, cl_id)
++ __field(u32, keep)
++ __field(unsigned long, bmval0)
++ __sockaddr(addr, ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen)
++ ),
++ TP_fast_assign(
++ __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
++ __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
++ __entry->keep = ra->ra_keep;
++ __entry->bmval0 = ra->ra_bmval[0];
++ __assign_sockaddr(addr, &ra->ra_cb.cb_clp->cl_addr,
++ ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen);
++ ),
++ TP_printk("addr=%pISpc client %08x:%08x keep=%u bmval0=%s",
++ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
++ __entry->keep, show_rca_mask(__entry->bmval0)
++ )
++);
++
+ DECLARE_EVENT_CLASS(nfsd_cb_done_class,
+ TP_PROTO(
+ const stateid_t *stp,
+@@ -1475,6 +1560,27 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
+ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
+ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
+
++TRACE_EVENT(nfsd_cb_recall_any_done,
++ TP_PROTO(
++ const struct nfsd4_callback *cb,
++ const struct rpc_task *task
++ ),
++ TP_ARGS(cb, task),
++ TP_STRUCT__entry(
++ __field(u32, cl_boot)
++ __field(u32, cl_id)
++ __field(int, status)
++ ),
++ TP_fast_assign(
++ __entry->status = task->tk_status;
++ __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
++ __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
++ ),
++ TP_printk("client %08x:%08x status=%d",
++ __entry->cl_boot, __entry->cl_id, __entry->status
++ )
++);
++
+ #endif /* _NFSD_TRACE_H */
+
+ #undef TRACE_INCLUDE_PATH
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index eccc6ce55a63a..5d6a61d47a905 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -49,6 +49,69 @@
+
+ #define NFSDDBG_FACILITY NFSDDBG_FILEOP
+
++/**
++ * nfserrno - Map Linux errnos to NFS errnos
++ * @errno: POSIX(-ish) error code to be mapped
++ *
++ * Returns the appropriate (net-endian) nfserr_* (or nfs_ok if errno is 0). If
++ * it's an error we don't expect, log it once and return nfserr_io.
++ */
++__be32
++nfserrno (int errno)
++{
++ static struct {
++ __be32 nfserr;
++ int syserr;
++ } nfs_errtbl[] = {
++ { nfs_ok, 0 },
++ { nfserr_perm, -EPERM },
++ { nfserr_noent, -ENOENT },
++ { nfserr_io, -EIO },
++ { nfserr_nxio, -ENXIO },
++ { nfserr_fbig, -E2BIG },
++ { nfserr_stale, -EBADF },
++ { nfserr_acces, -EACCES },
++ { nfserr_exist, -EEXIST },
++ { nfserr_xdev, -EXDEV },
++ { nfserr_mlink, -EMLINK },
++ { nfserr_nodev, -ENODEV },
++ { nfserr_notdir, -ENOTDIR },
++ { nfserr_isdir, -EISDIR },
++ { nfserr_inval, -EINVAL },
++ { nfserr_fbig, -EFBIG },
++ { nfserr_nospc, -ENOSPC },
++ { nfserr_rofs, -EROFS },
++ { nfserr_mlink, -EMLINK },
++ { nfserr_nametoolong, -ENAMETOOLONG },
++ { nfserr_notempty, -ENOTEMPTY },
++ { nfserr_dquot, -EDQUOT },
++ { nfserr_stale, -ESTALE },
++ { nfserr_jukebox, -ETIMEDOUT },
++ { nfserr_jukebox, -ERESTARTSYS },
++ { nfserr_jukebox, -EAGAIN },
++ { nfserr_jukebox, -EWOULDBLOCK },
++ { nfserr_jukebox, -ENOMEM },
++ { nfserr_io, -ETXTBSY },
++ { nfserr_notsupp, -EOPNOTSUPP },
++ { nfserr_toosmall, -ETOOSMALL },
++ { nfserr_serverfault, -ESERVERFAULT },
++ { nfserr_serverfault, -ENFILE },
++ { nfserr_io, -EREMOTEIO },
++ { nfserr_stale, -EOPENSTALE },
++ { nfserr_io, -EUCLEAN },
++ { nfserr_perm, -ENOKEY },
++ { nfserr_no_grace, -ENOGRACE},
++ };
++ int i;
++
++ for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) {
++ if (nfs_errtbl[i].syserr == errno)
++ return nfs_errtbl[i].nfserr;
++ }
++ WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno);
++ return nfserr_io;
++}
++
+ /*
+ * Called from nfsd_lookup and encode_dirent. Check if we have crossed
+ * a mount point.
+@@ -1317,7 +1380,6 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ iap->ia_mode &= ~current_umask();
+
+ err = 0;
+- host_err = 0;
+ switch (type) {
+ case S_IFREG:
+ host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true);
+diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
+index 9744b041105b5..dbdfef7ae85bb 100644
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -60,6 +60,7 @@ static inline void nfsd_attrs_free(struct nfsd_attrs *attrs)
+ posix_acl_release(attrs->na_dpacl);
+ }
+
++__be32 nfserrno (int errno);
+ int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
+ struct svc_export **expp);
+ __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index 36c3340c1d54a..510978e602da6 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -896,5 +896,10 @@ struct nfsd4_operation {
+ union nfsd4_op_u *);
+ };
+
++struct nfsd4_cb_recall_any {
++ struct nfsd4_callback ra_cb;
++ u32 ra_keep;
++ u32 ra_bmval[1];
++};
+
+ #endif
+diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
+index 547cf07cf4e08..0d39af1b00a0f 100644
+--- a/fs/nfsd/xdr4cb.h
++++ b/fs/nfsd/xdr4cb.h
+@@ -48,3 +48,9 @@
+ #define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \
+ cb_sequence_dec_sz + \
+ op_dec_sz)
++#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \
++ cb_sequence_enc_sz + \
++ 1 + 1 + 1)
++#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \
++ cb_sequence_dec_sz + \
++ op_dec_sz)
+diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
+index bb7e33c240737..d260260900241 100644
+--- a/fs/ntfs3/frecord.c
++++ b/fs/ntfs3/frecord.c
+@@ -102,7 +102,7 @@ void ni_clear(struct ntfs_inode *ni)
+ {
+ struct rb_node *node;
+
+- if (!ni->vfs_inode.i_nlink && is_rec_inuse(ni->mi.mrec))
++ if (!ni->vfs_inode.i_nlink && ni->mi.mrec && is_rec_inuse(ni->mi.mrec))
+ ni_delete_all(ni);
+
+ al_destroy(ni);
+@@ -3255,6 +3255,9 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint)
+ return 0;
+ }
+
++ if (!ni->mi.mrec)
++ goto out;
++
+ if (is_rec_inuse(ni->mi.mrec) &&
+ !(sbi->flags & NTFS_FLAGS_LOG_REPLAYING) && inode->i_nlink) {
+ bool modified = false;
+diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
+index 1eac80d55b554..4c2d079b3d49b 100644
+--- a/fs/ntfs3/fsntfs.c
++++ b/fs/ntfs3/fsntfs.c
+@@ -1674,6 +1674,7 @@ struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, bool dir)
+
+ out:
+ if (err) {
++ make_bad_inode(inode);
+ iput(inode);
+ ni = ERR_PTR(err);
+ }
+diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
+index 7371f7855e4c4..eee01db6e0cc5 100644
+--- a/fs/ntfs3/index.c
++++ b/fs/ntfs3/index.c
+@@ -998,6 +998,7 @@ struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni,
+ struct ATTR_LIST_ENTRY *le = NULL;
+ struct ATTRIB *a;
+ const struct INDEX_NAMES *in = &s_index_names[indx->type];
++ struct INDEX_ROOT *root = NULL;
+
+ a = ni_find_attr(ni, NULL, &le, ATTR_ROOT, in->name, in->name_len, NULL,
+ mi);
+@@ -1007,7 +1008,15 @@ struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni,
+ if (attr)
+ *attr = a;
+
+- return resident_data_ex(a, sizeof(struct INDEX_ROOT));
++ root = resident_data_ex(a, sizeof(struct INDEX_ROOT));
++
++ /* length check */
++ if (root && offsetof(struct INDEX_ROOT, ihdr) + le32_to_cpu(root->ihdr.used) >
++ le32_to_cpu(a->res.data_size)) {
++ return NULL;
++ }
++
++ return root;
+ }
+
+ static int indx_write(struct ntfs_index *indx, struct ntfs_inode *ni,
+diff --git a/include/linux/bvec.h b/include/linux/bvec.h
+index 9e3dac51eb26b..d4dbaae8b5218 100644
+--- a/include/linux/bvec.h
++++ b/include/linux/bvec.h
+@@ -59,7 +59,7 @@ struct bvec_iter {
+
+ unsigned int bi_bvec_done; /* number of bytes completed in
+ current bvec */
+-} __packed;
++} __packed __aligned(4);
+
+ struct bvec_iter_all {
+ struct bio_vec bv;
+diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h
+index 9192986b1a731..ac862422df158 100644
+--- a/include/linux/decompress/mm.h
++++ b/include/linux/decompress/mm.h
+@@ -48,7 +48,7 @@ MALLOC_VISIBLE void *malloc(int size)
+ if (!malloc_ptr)
+ malloc_ptr = free_mem_ptr;
+
+- malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */
++ malloc_ptr = (malloc_ptr + 7) & ~7; /* Align */
+
+ p = (void *)malloc_ptr;
+ malloc_ptr += size;
+diff --git a/include/linux/efi.h b/include/linux/efi.h
+index 4e1bfee9675d2..de6d6558a4d30 100644
+--- a/include/linux/efi.h
++++ b/include/linux/efi.h
+@@ -390,6 +390,7 @@ void efi_native_runtime_setup(void);
+ #define EFI_RT_PROPERTIES_TABLE_GUID EFI_GUID(0xeb66918a, 0x7eef, 0x402a, 0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9)
+ #define EFI_DXE_SERVICES_TABLE_GUID EFI_GUID(0x05ad34ba, 0x6f02, 0x4214, 0x95, 0x2e, 0x4d, 0xa0, 0x39, 0x8e, 0x2b, 0xb9)
+ #define EFI_SMBIOS_PROTOCOL_GUID EFI_GUID(0x03583ff6, 0xcb36, 0x4940, 0x94, 0x7e, 0xb9, 0xb3, 0x9f, 0x4a, 0xfa, 0xf7)
++#define EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID EFI_GUID(0xf4560cf6, 0x40ec, 0x4b4a, 0xa1, 0x92, 0xbf, 0x1d, 0x57, 0xd0, 0xb1, 0x89)
+
+ #define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f)
+ #define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23)
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index 67313881f8ac1..092d8fa10153f 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -1189,6 +1189,13 @@ extern void show_fd_locks(struct seq_file *f,
+ struct file *filp, struct files_struct *files);
+ extern bool locks_owner_has_blockers(struct file_lock_context *flctx,
+ fl_owner_t owner);
++
++static inline struct file_lock_context *
++locks_inode_context(const struct inode *inode)
++{
++ return smp_load_acquire(&inode->i_flctx);
++}
++
+ #else /* !CONFIG_FILE_LOCKING */
+ static inline int fcntl_getlk(struct file *file, unsigned int cmd,
+ struct flock __user *user)
+@@ -1334,6 +1341,13 @@ static inline bool locks_owner_has_blockers(struct file_lock_context *flctx,
+ {
+ return false;
+ }
++
++static inline struct file_lock_context *
++locks_inode_context(const struct inode *inode)
++{
++ return NULL;
++}
++
+ #endif /* !CONFIG_FILE_LOCKING */
+
+ static inline struct inode *file_inode(const struct file *f)
+diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
+index bef8db9d6c085..e5f4b6f8d1c09 100644
+--- a/include/linux/netfilter.h
++++ b/include/linux/netfilter.h
+@@ -437,11 +437,13 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
+ #include <linux/netfilter/nf_conntrack_zones_common.h>
+
+ void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
++void nf_ct_set_closing(struct nf_conntrack *nfct);
+ struct nf_conntrack_tuple;
+ bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+ const struct sk_buff *skb);
+ #else
+ static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
++static inline void nf_ct_set_closing(struct nf_conntrack *nfct) {}
+ struct nf_conntrack_tuple;
+ static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+ const struct sk_buff *skb)
+@@ -459,6 +461,8 @@ struct nf_ct_hook {
+ bool (*get_tuple_skb)(struct nf_conntrack_tuple *,
+ const struct sk_buff *);
+ void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
++ void (*set_closing)(struct nf_conntrack *nfct);
++ int (*confirm)(struct sk_buff *skb);
+ };
+ extern const struct nf_ct_hook __rcu *nf_ct_hook;
+
+diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
+index 8d04b6a5964c4..730003c4f4af4 100644
+--- a/include/linux/nfs4.h
++++ b/include/linux/nfs4.h
+@@ -732,4 +732,17 @@ enum nfs4_setxattr_options {
+ SETXATTR4_CREATE = 1,
+ SETXATTR4_REPLACE = 2,
+ };
++
++enum {
++ RCA4_TYPE_MASK_RDATA_DLG = 0,
++ RCA4_TYPE_MASK_WDATA_DLG = 1,
++ RCA4_TYPE_MASK_DIR_DLG = 2,
++ RCA4_TYPE_MASK_FILE_LAYOUT = 3,
++ RCA4_TYPE_MASK_BLK_LAYOUT = 4,
++ RCA4_TYPE_MASK_OBJ_LAYOUT_MIN = 8,
++ RCA4_TYPE_MASK_OBJ_LAYOUT_MAX = 9,
++ RCA4_TYPE_MASK_OTHER_LAYOUT_MIN = 12,
++ RCA4_TYPE_MASK_OTHER_LAYOUT_MAX = 15,
++};
++
+ #endif
+diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
+index 43ac3fa760dbe..9783b9107d76b 100644
+--- a/include/linux/usb/composite.h
++++ b/include/linux/usb/composite.h
+@@ -412,6 +412,8 @@ extern int composite_dev_prepare(struct usb_composite_driver *composite,
+ extern int composite_os_desc_req_prepare(struct usb_composite_dev *cdev,
+ struct usb_ep *ep0);
+ void composite_dev_cleanup(struct usb_composite_dev *cdev);
++void check_remote_wakeup_config(struct usb_gadget *g,
++ struct usb_configuration *c);
+
+ static inline struct usb_composite_driver *to_cdriver(
+ struct usb_gadget_driver *gdrv)
+diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
+index dc3092cea99e9..5bec668b41dcd 100644
+--- a/include/linux/usb/gadget.h
++++ b/include/linux/usb/gadget.h
+@@ -309,6 +309,7 @@ struct usb_udc;
+ struct usb_gadget_ops {
+ int (*get_frame)(struct usb_gadget *);
+ int (*wakeup)(struct usb_gadget *);
++ int (*set_remote_wakeup)(struct usb_gadget *, int set);
+ int (*set_selfpowered) (struct usb_gadget *, int is_selfpowered);
+ int (*vbus_session) (struct usb_gadget *, int is_active);
+ int (*vbus_draw) (struct usb_gadget *, unsigned mA);
+@@ -383,6 +384,8 @@ struct usb_gadget_ops {
+ * @connected: True if gadget is connected.
+ * @lpm_capable: If the gadget max_speed is FULL or HIGH, this flag
+ * indicates that it supports LPM as per the LPM ECN & errata.
++ * @wakeup_capable: True if gadget is capable of sending remote wakeup.
++ * @wakeup_armed: True if gadget is armed by the host for remote wakeup.
+ * @irq: the interrupt number for device controller.
+ * @id_number: a unique ID number for ensuring that gadget names are distinct
+ *
+@@ -444,6 +447,8 @@ struct usb_gadget {
+ unsigned deactivated:1;
+ unsigned connected:1;
+ unsigned lpm_capable:1;
++ unsigned wakeup_capable:1;
++ unsigned wakeup_armed:1;
+ int irq;
+ int id_number;
+ };
+@@ -600,6 +605,7 @@ static inline int gadget_is_otg(struct usb_gadget *g)
+ #if IS_ENABLED(CONFIG_USB_GADGET)
+ int usb_gadget_frame_number(struct usb_gadget *gadget);
+ int usb_gadget_wakeup(struct usb_gadget *gadget);
++int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set);
+ int usb_gadget_set_selfpowered(struct usb_gadget *gadget);
+ int usb_gadget_clear_selfpowered(struct usb_gadget *gadget);
+ int usb_gadget_vbus_connect(struct usb_gadget *gadget);
+@@ -615,6 +621,8 @@ static inline int usb_gadget_frame_number(struct usb_gadget *gadget)
+ { return 0; }
+ static inline int usb_gadget_wakeup(struct usb_gadget *gadget)
+ { return 0; }
++static inline int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set)
++{ return 0; }
+ static inline int usb_gadget_set_selfpowered(struct usb_gadget *gadget)
+ { return 0; }
+ static inline int usb_gadget_clear_selfpowered(struct usb_gadget *gadget)
+diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
+index c48186bf47372..21da31e1dff5d 100644
+--- a/include/net/ipv6_stubs.h
++++ b/include/net/ipv6_stubs.h
+@@ -85,6 +85,11 @@ struct ipv6_bpf_stub {
+ sockptr_t optval, unsigned int optlen);
+ int (*ipv6_getsockopt)(struct sock *sk, int level, int optname,
+ sockptr_t optval, sockptr_t optlen);
++ int (*ipv6_dev_get_saddr)(struct net *net,
++ const struct net_device *dst_dev,
++ const struct in6_addr *daddr,
++ unsigned int prefs,
++ struct in6_addr *saddr);
+ };
+ extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
+
+diff --git a/include/net/mctp.h b/include/net/mctp.h
+index 82800d521c3de..7ed84054f4623 100644
+--- a/include/net/mctp.h
++++ b/include/net/mctp.h
+@@ -249,6 +249,7 @@ struct mctp_route {
+ struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
+ mctp_eid_t daddr);
+
++/* always takes ownership of skb */
+ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
+ struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);
+
+diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
+index 6a2019aaa4644..3dbf947285be2 100644
+--- a/include/net/netfilter/nf_conntrack.h
++++ b/include/net/netfilter/nf_conntrack.h
+@@ -125,6 +125,12 @@ struct nf_conn {
+ union nf_conntrack_proto proto;
+ };
+
++static inline struct nf_conn *
++nf_ct_to_nf_conn(const struct nf_conntrack *nfct)
++{
++ return container_of(nfct, struct nf_conn, ct_general);
++}
++
+ static inline struct nf_conn *
+ nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash)
+ {
+@@ -175,6 +181,8 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
+
+ void nf_ct_destroy(struct nf_conntrack *nfct);
+
++void nf_conntrack_tcp_set_closing(struct nf_conn *ct);
++
+ /* decrement reference count on a conntrack */
+ static inline void nf_ct_put(struct nf_conn *ct)
+ {
+diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
+index d2751ed536df2..a64713fe52640 100644
+--- a/include/scsi/scsi_device.h
++++ b/include/scsi/scsi_device.h
+@@ -204,6 +204,7 @@ struct scsi_device {
+ unsigned use_10_for_rw:1; /* first try 10-byte read / write */
+ unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */
+ unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */
++ unsigned read_before_ms:1; /* perform a READ before MODE SENSE */
+ unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */
+ unsigned no_write_same:1; /* no WRITE SAME command */
+ unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */
+@@ -479,28 +480,51 @@ extern const char *scsi_device_state_name(enum scsi_device_state);
+ extern int scsi_is_sdev_device(const struct device *);
+ extern int scsi_is_target_device(const struct device *);
+ extern void scsi_sanitize_inquiry_string(unsigned char *s, int len);
+-extern int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
+- int data_direction, void *buffer, unsigned bufflen,
+- unsigned char *sense, struct scsi_sense_hdr *sshdr,
+- int timeout, int retries, blk_opf_t flags,
+- req_flags_t rq_flags, int *resid);
++
++/* Optional arguments to scsi_execute_cmd */
++struct scsi_exec_args {
++ unsigned char *sense; /* sense buffer */
++ unsigned int sense_len; /* sense buffer len */
++ struct scsi_sense_hdr *sshdr; /* decoded sense header */
++ blk_mq_req_flags_t req_flags; /* BLK_MQ_REQ flags */
++ int *resid; /* residual length */
++};
++
++int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd,
++ blk_opf_t opf, void *buffer, unsigned int bufflen,
++ int timeout, int retries,
++ const struct scsi_exec_args *args);
++
+ /* Make sure any sense buffer is the correct size. */
+-#define scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense, \
+- sshdr, timeout, retries, flags, rq_flags, resid) \
++#define scsi_execute(_sdev, _cmd, _data_dir, _buffer, _bufflen, _sense, \
++ _sshdr, _timeout, _retries, _flags, _rq_flags, \
++ _resid) \
+ ({ \
+- BUILD_BUG_ON((sense) != NULL && \
+- sizeof(sense) != SCSI_SENSE_BUFFERSIZE); \
+- __scsi_execute(sdev, cmd, data_direction, buffer, bufflen, \
+- sense, sshdr, timeout, retries, flags, rq_flags, \
+- resid); \
++ scsi_execute_cmd(_sdev, _cmd, (_data_dir == DMA_TO_DEVICE ? \
++ REQ_OP_DRV_OUT : REQ_OP_DRV_IN) | _flags, \
++ _buffer, _bufflen, _timeout, _retries, \
++ &(struct scsi_exec_args) { \
++ .sense = _sense, \
++ .sshdr = _sshdr, \
++ .req_flags = _rq_flags & RQF_PM ? \
++ BLK_MQ_REQ_PM : 0, \
++ .resid = _resid, \
++ }); \
+ })
++
+ static inline int scsi_execute_req(struct scsi_device *sdev,
+ const unsigned char *cmd, int data_direction, void *buffer,
+ unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout,
+ int retries, int *resid)
+ {
+- return scsi_execute(sdev, cmd, data_direction, buffer,
+- bufflen, NULL, sshdr, timeout, retries, 0, 0, resid);
++ return scsi_execute_cmd(sdev, cmd,
++ data_direction == DMA_TO_DEVICE ?
++ REQ_OP_DRV_OUT : REQ_OP_DRV_IN, buffer,
++ bufflen, timeout, retries,
++ &(struct scsi_exec_args) {
++ .sshdr = sshdr,
++ .resid = resid,
++ });
+ }
+ extern void sdev_disable_disk_events(struct scsi_device *sdev);
+ extern void sdev_enable_disk_events(struct scsi_device *sdev);
+diff --git a/include/trace/events/fs.h b/include/trace/events/fs.h
+deleted file mode 100644
+index 738b97f22f365..0000000000000
+--- a/include/trace/events/fs.h
++++ /dev/null
+@@ -1,122 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-/*
+- * Display helpers for generic filesystem items
+- *
+- * Author: Chuck Lever <chuck.lever@oracle.com>
+- *
+- * Copyright (c) 2020, Oracle and/or its affiliates.
+- */
+-
+-#include <linux/fs.h>
+-
+-#define show_fs_dirent_type(x) \
+- __print_symbolic(x, \
+- { DT_UNKNOWN, "UNKNOWN" }, \
+- { DT_FIFO, "FIFO" }, \
+- { DT_CHR, "CHR" }, \
+- { DT_DIR, "DIR" }, \
+- { DT_BLK, "BLK" }, \
+- { DT_REG, "REG" }, \
+- { DT_LNK, "LNK" }, \
+- { DT_SOCK, "SOCK" }, \
+- { DT_WHT, "WHT" })
+-
+-#define show_fs_fcntl_open_flags(x) \
+- __print_flags(x, "|", \
+- { O_WRONLY, "O_WRONLY" }, \
+- { O_RDWR, "O_RDWR" }, \
+- { O_CREAT, "O_CREAT" }, \
+- { O_EXCL, "O_EXCL" }, \
+- { O_NOCTTY, "O_NOCTTY" }, \
+- { O_TRUNC, "O_TRUNC" }, \
+- { O_APPEND, "O_APPEND" }, \
+- { O_NONBLOCK, "O_NONBLOCK" }, \
+- { O_DSYNC, "O_DSYNC" }, \
+- { O_DIRECT, "O_DIRECT" }, \
+- { O_LARGEFILE, "O_LARGEFILE" }, \
+- { O_DIRECTORY, "O_DIRECTORY" }, \
+- { O_NOFOLLOW, "O_NOFOLLOW" }, \
+- { O_NOATIME, "O_NOATIME" }, \
+- { O_CLOEXEC, "O_CLOEXEC" })
+-
+-#define __fmode_flag(x) { (__force unsigned long)FMODE_##x, #x }
+-#define show_fs_fmode_flags(x) \
+- __print_flags(x, "|", \
+- __fmode_flag(READ), \
+- __fmode_flag(WRITE), \
+- __fmode_flag(EXEC))
+-
+-#ifdef CONFIG_64BIT
+-#define show_fs_fcntl_cmd(x) \
+- __print_symbolic(x, \
+- { F_DUPFD, "DUPFD" }, \
+- { F_GETFD, "GETFD" }, \
+- { F_SETFD, "SETFD" }, \
+- { F_GETFL, "GETFL" }, \
+- { F_SETFL, "SETFL" }, \
+- { F_GETLK, "GETLK" }, \
+- { F_SETLK, "SETLK" }, \
+- { F_SETLKW, "SETLKW" }, \
+- { F_SETOWN, "SETOWN" }, \
+- { F_GETOWN, "GETOWN" }, \
+- { F_SETSIG, "SETSIG" }, \
+- { F_GETSIG, "GETSIG" }, \
+- { F_SETOWN_EX, "SETOWN_EX" }, \
+- { F_GETOWN_EX, "GETOWN_EX" }, \
+- { F_GETOWNER_UIDS, "GETOWNER_UIDS" }, \
+- { F_OFD_GETLK, "OFD_GETLK" }, \
+- { F_OFD_SETLK, "OFD_SETLK" }, \
+- { F_OFD_SETLKW, "OFD_SETLKW" })
+-#else /* CONFIG_64BIT */
+-#define show_fs_fcntl_cmd(x) \
+- __print_symbolic(x, \
+- { F_DUPFD, "DUPFD" }, \
+- { F_GETFD, "GETFD" }, \
+- { F_SETFD, "SETFD" }, \
+- { F_GETFL, "GETFL" }, \
+- { F_SETFL, "SETFL" }, \
+- { F_GETLK, "GETLK" }, \
+- { F_SETLK, "SETLK" }, \
+- { F_SETLKW, "SETLKW" }, \
+- { F_SETOWN, "SETOWN" }, \
+- { F_GETOWN, "GETOWN" }, \
+- { F_SETSIG, "SETSIG" }, \
+- { F_GETSIG, "GETSIG" }, \
+- { F_GETLK64, "GETLK64" }, \
+- { F_SETLK64, "SETLK64" }, \
+- { F_SETLKW64, "SETLKW64" }, \
+- { F_SETOWN_EX, "SETOWN_EX" }, \
+- { F_GETOWN_EX, "GETOWN_EX" }, \
+- { F_GETOWNER_UIDS, "GETOWNER_UIDS" }, \
+- { F_OFD_GETLK, "OFD_GETLK" }, \
+- { F_OFD_SETLK, "OFD_SETLK" }, \
+- { F_OFD_SETLKW, "OFD_SETLKW" })
+-#endif /* CONFIG_64BIT */
+-
+-#define show_fs_fcntl_lock_type(x) \
+- __print_symbolic(x, \
+- { F_RDLCK, "RDLCK" }, \
+- { F_WRLCK, "WRLCK" }, \
+- { F_UNLCK, "UNLCK" })
+-
+-#define show_fs_lookup_flags(flags) \
+- __print_flags(flags, "|", \
+- { LOOKUP_FOLLOW, "FOLLOW" }, \
+- { LOOKUP_DIRECTORY, "DIRECTORY" }, \
+- { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \
+- { LOOKUP_EMPTY, "EMPTY" }, \
+- { LOOKUP_DOWN, "DOWN" }, \
+- { LOOKUP_MOUNTPOINT, "MOUNTPOINT" }, \
+- { LOOKUP_REVAL, "REVAL" }, \
+- { LOOKUP_RCU, "RCU" }, \
+- { LOOKUP_OPEN, "OPEN" }, \
+- { LOOKUP_CREATE, "CREATE" }, \
+- { LOOKUP_EXCL, "EXCL" }, \
+- { LOOKUP_RENAME_TARGET, "RENAME_TARGET" }, \
+- { LOOKUP_PARENT, "PARENT" }, \
+- { LOOKUP_NO_SYMLINKS, "NO_SYMLINKS" }, \
+- { LOOKUP_NO_MAGICLINKS, "NO_MAGICLINKS" }, \
+- { LOOKUP_NO_XDEV, "NO_XDEV" }, \
+- { LOOKUP_BENEATH, "BENEATH" }, \
+- { LOOKUP_IN_ROOT, "IN_ROOT" }, \
+- { LOOKUP_CACHED, "CACHED" })
+diff --git a/include/trace/events/nfs.h b/include/trace/events/nfs.h
+deleted file mode 100644
+index 09ffdbb04134d..0000000000000
+--- a/include/trace/events/nfs.h
++++ /dev/null
+@@ -1,375 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-/*
+- * Display helpers for NFS protocol elements
+- *
+- * Author: Chuck Lever <chuck.lever@oracle.com>
+- *
+- * Copyright (c) 2020, Oracle and/or its affiliates.
+- */
+-
+-#include <linux/nfs.h>
+-#include <linux/nfs4.h>
+-#include <uapi/linux/nfs.h>
+-
+-TRACE_DEFINE_ENUM(NFS_OK);
+-TRACE_DEFINE_ENUM(NFSERR_PERM);
+-TRACE_DEFINE_ENUM(NFSERR_NOENT);
+-TRACE_DEFINE_ENUM(NFSERR_IO);
+-TRACE_DEFINE_ENUM(NFSERR_NXIO);
+-TRACE_DEFINE_ENUM(NFSERR_EAGAIN);
+-TRACE_DEFINE_ENUM(NFSERR_ACCES);
+-TRACE_DEFINE_ENUM(NFSERR_EXIST);
+-TRACE_DEFINE_ENUM(NFSERR_XDEV);
+-TRACE_DEFINE_ENUM(NFSERR_NODEV);
+-TRACE_DEFINE_ENUM(NFSERR_NOTDIR);
+-TRACE_DEFINE_ENUM(NFSERR_ISDIR);
+-TRACE_DEFINE_ENUM(NFSERR_INVAL);
+-TRACE_DEFINE_ENUM(NFSERR_FBIG);
+-TRACE_DEFINE_ENUM(NFSERR_NOSPC);
+-TRACE_DEFINE_ENUM(NFSERR_ROFS);
+-TRACE_DEFINE_ENUM(NFSERR_MLINK);
+-TRACE_DEFINE_ENUM(NFSERR_OPNOTSUPP);
+-TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG);
+-TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY);
+-TRACE_DEFINE_ENUM(NFSERR_DQUOT);
+-TRACE_DEFINE_ENUM(NFSERR_STALE);
+-TRACE_DEFINE_ENUM(NFSERR_REMOTE);
+-TRACE_DEFINE_ENUM(NFSERR_WFLUSH);
+-TRACE_DEFINE_ENUM(NFSERR_BADHANDLE);
+-TRACE_DEFINE_ENUM(NFSERR_NOT_SYNC);
+-TRACE_DEFINE_ENUM(NFSERR_BAD_COOKIE);
+-TRACE_DEFINE_ENUM(NFSERR_NOTSUPP);
+-TRACE_DEFINE_ENUM(NFSERR_TOOSMALL);
+-TRACE_DEFINE_ENUM(NFSERR_SERVERFAULT);
+-TRACE_DEFINE_ENUM(NFSERR_BADTYPE);
+-TRACE_DEFINE_ENUM(NFSERR_JUKEBOX);
+-
+-#define show_nfs_status(x) \
+- __print_symbolic(x, \
+- { NFS_OK, "OK" }, \
+- { NFSERR_PERM, "PERM" }, \
+- { NFSERR_NOENT, "NOENT" }, \
+- { NFSERR_IO, "IO" }, \
+- { NFSERR_NXIO, "NXIO" }, \
+- { ECHILD, "CHILD" }, \
+- { NFSERR_EAGAIN, "AGAIN" }, \
+- { NFSERR_ACCES, "ACCES" }, \
+- { NFSERR_EXIST, "EXIST" }, \
+- { NFSERR_XDEV, "XDEV" }, \
+- { NFSERR_NODEV, "NODEV" }, \
+- { NFSERR_NOTDIR, "NOTDIR" }, \
+- { NFSERR_ISDIR, "ISDIR" }, \
+- { NFSERR_INVAL, "INVAL" }, \
+- { NFSERR_FBIG, "FBIG" }, \
+- { NFSERR_NOSPC, "NOSPC" }, \
+- { NFSERR_ROFS, "ROFS" }, \
+- { NFSERR_MLINK, "MLINK" }, \
+- { NFSERR_OPNOTSUPP, "OPNOTSUPP" }, \
+- { NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \
+- { NFSERR_NOTEMPTY, "NOTEMPTY" }, \
+- { NFSERR_DQUOT, "DQUOT" }, \
+- { NFSERR_STALE, "STALE" }, \
+- { NFSERR_REMOTE, "REMOTE" }, \
+- { NFSERR_WFLUSH, "WFLUSH" }, \
+- { NFSERR_BADHANDLE, "BADHANDLE" }, \
+- { NFSERR_NOT_SYNC, "NOTSYNC" }, \
+- { NFSERR_BAD_COOKIE, "BADCOOKIE" }, \
+- { NFSERR_NOTSUPP, "NOTSUPP" }, \
+- { NFSERR_TOOSMALL, "TOOSMALL" }, \
+- { NFSERR_SERVERFAULT, "REMOTEIO" }, \
+- { NFSERR_BADTYPE, "BADTYPE" }, \
+- { NFSERR_JUKEBOX, "JUKEBOX" })
+-
+-TRACE_DEFINE_ENUM(NFS_UNSTABLE);
+-TRACE_DEFINE_ENUM(NFS_DATA_SYNC);
+-TRACE_DEFINE_ENUM(NFS_FILE_SYNC);
+-
+-#define show_nfs_stable_how(x) \
+- __print_symbolic(x, \
+- { NFS_UNSTABLE, "UNSTABLE" }, \
+- { NFS_DATA_SYNC, "DATA_SYNC" }, \
+- { NFS_FILE_SYNC, "FILE_SYNC" })
+-
+-TRACE_DEFINE_ENUM(NFS4_OK);
+-TRACE_DEFINE_ENUM(NFS4ERR_ACCESS);
+-TRACE_DEFINE_ENUM(NFS4ERR_ATTRNOTSUPP);
+-TRACE_DEFINE_ENUM(NFS4ERR_ADMIN_REVOKED);
+-TRACE_DEFINE_ENUM(NFS4ERR_BACK_CHAN_BUSY);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADCHAR);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADHANDLE);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADIOMODE);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADLAYOUT);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADLABEL);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADNAME);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADOWNER);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADSESSION);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADSLOT);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADTYPE);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADXDR);
+-TRACE_DEFINE_ENUM(NFS4ERR_BAD_COOKIE);
+-TRACE_DEFINE_ENUM(NFS4ERR_BAD_HIGH_SLOT);
+-TRACE_DEFINE_ENUM(NFS4ERR_BAD_RANGE);
+-TRACE_DEFINE_ENUM(NFS4ERR_BAD_SEQID);
+-TRACE_DEFINE_ENUM(NFS4ERR_BAD_SESSION_DIGEST);
+-TRACE_DEFINE_ENUM(NFS4ERR_BAD_STATEID);
+-TRACE_DEFINE_ENUM(NFS4ERR_CB_PATH_DOWN);
+-TRACE_DEFINE_ENUM(NFS4ERR_CLID_INUSE);
+-TRACE_DEFINE_ENUM(NFS4ERR_CLIENTID_BUSY);
+-TRACE_DEFINE_ENUM(NFS4ERR_COMPLETE_ALREADY);
+-TRACE_DEFINE_ENUM(NFS4ERR_CONN_NOT_BOUND_TO_SESSION);
+-TRACE_DEFINE_ENUM(NFS4ERR_DEADLOCK);
+-TRACE_DEFINE_ENUM(NFS4ERR_DEADSESSION);
+-TRACE_DEFINE_ENUM(NFS4ERR_DELAY);
+-TRACE_DEFINE_ENUM(NFS4ERR_DELEG_ALREADY_WANTED);
+-TRACE_DEFINE_ENUM(NFS4ERR_DELEG_REVOKED);
+-TRACE_DEFINE_ENUM(NFS4ERR_DENIED);
+-TRACE_DEFINE_ENUM(NFS4ERR_DIRDELEG_UNAVAIL);
+-TRACE_DEFINE_ENUM(NFS4ERR_DQUOT);
+-TRACE_DEFINE_ENUM(NFS4ERR_ENCR_ALG_UNSUPP);
+-TRACE_DEFINE_ENUM(NFS4ERR_EXIST);
+-TRACE_DEFINE_ENUM(NFS4ERR_EXPIRED);
+-TRACE_DEFINE_ENUM(NFS4ERR_FBIG);
+-TRACE_DEFINE_ENUM(NFS4ERR_FHEXPIRED);
+-TRACE_DEFINE_ENUM(NFS4ERR_FILE_OPEN);
+-TRACE_DEFINE_ENUM(NFS4ERR_GRACE);
+-TRACE_DEFINE_ENUM(NFS4ERR_HASH_ALG_UNSUPP);
+-TRACE_DEFINE_ENUM(NFS4ERR_INVAL);
+-TRACE_DEFINE_ENUM(NFS4ERR_IO);
+-TRACE_DEFINE_ENUM(NFS4ERR_ISDIR);
+-TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTTRYLATER);
+-TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTUNAVAILABLE);
+-TRACE_DEFINE_ENUM(NFS4ERR_LEASE_MOVED);
+-TRACE_DEFINE_ENUM(NFS4ERR_LOCKED);
+-TRACE_DEFINE_ENUM(NFS4ERR_LOCKS_HELD);
+-TRACE_DEFINE_ENUM(NFS4ERR_LOCK_RANGE);
+-TRACE_DEFINE_ENUM(NFS4ERR_MINOR_VERS_MISMATCH);
+-TRACE_DEFINE_ENUM(NFS4ERR_MLINK);
+-TRACE_DEFINE_ENUM(NFS4ERR_MOVED);
+-TRACE_DEFINE_ENUM(NFS4ERR_NAMETOOLONG);
+-TRACE_DEFINE_ENUM(NFS4ERR_NOENT);
+-TRACE_DEFINE_ENUM(NFS4ERR_NOFILEHANDLE);
+-TRACE_DEFINE_ENUM(NFS4ERR_NOMATCHING_LAYOUT);
+-TRACE_DEFINE_ENUM(NFS4ERR_NOSPC);
+-TRACE_DEFINE_ENUM(NFS4ERR_NOTDIR);
+-TRACE_DEFINE_ENUM(NFS4ERR_NOTEMPTY);
+-TRACE_DEFINE_ENUM(NFS4ERR_NOTSUPP);
+-TRACE_DEFINE_ENUM(NFS4ERR_NOT_ONLY_OP);
+-TRACE_DEFINE_ENUM(NFS4ERR_NOT_SAME);
+-TRACE_DEFINE_ENUM(NFS4ERR_NO_GRACE);
+-TRACE_DEFINE_ENUM(NFS4ERR_NXIO);
+-TRACE_DEFINE_ENUM(NFS4ERR_OLD_STATEID);
+-TRACE_DEFINE_ENUM(NFS4ERR_OPENMODE);
+-TRACE_DEFINE_ENUM(NFS4ERR_OP_ILLEGAL);
+-TRACE_DEFINE_ENUM(NFS4ERR_OP_NOT_IN_SESSION);
+-TRACE_DEFINE_ENUM(NFS4ERR_PERM);
+-TRACE_DEFINE_ENUM(NFS4ERR_PNFS_IO_HOLE);
+-TRACE_DEFINE_ENUM(NFS4ERR_PNFS_NO_LAYOUT);
+-TRACE_DEFINE_ENUM(NFS4ERR_RECALLCONFLICT);
+-TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_BAD);
+-TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_CONFLICT);
+-TRACE_DEFINE_ENUM(NFS4ERR_REJECT_DELEG);
+-TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG);
+-TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG_TO_CACHE);
+-TRACE_DEFINE_ENUM(NFS4ERR_REQ_TOO_BIG);
+-TRACE_DEFINE_ENUM(NFS4ERR_RESOURCE);
+-TRACE_DEFINE_ENUM(NFS4ERR_RESTOREFH);
+-TRACE_DEFINE_ENUM(NFS4ERR_RETRY_UNCACHED_REP);
+-TRACE_DEFINE_ENUM(NFS4ERR_RETURNCONFLICT);
+-TRACE_DEFINE_ENUM(NFS4ERR_ROFS);
+-TRACE_DEFINE_ENUM(NFS4ERR_SAME);
+-TRACE_DEFINE_ENUM(NFS4ERR_SHARE_DENIED);
+-TRACE_DEFINE_ENUM(NFS4ERR_SEQUENCE_POS);
+-TRACE_DEFINE_ENUM(NFS4ERR_SEQ_FALSE_RETRY);
+-TRACE_DEFINE_ENUM(NFS4ERR_SEQ_MISORDERED);
+-TRACE_DEFINE_ENUM(NFS4ERR_SERVERFAULT);
+-TRACE_DEFINE_ENUM(NFS4ERR_STALE);
+-TRACE_DEFINE_ENUM(NFS4ERR_STALE_CLIENTID);
+-TRACE_DEFINE_ENUM(NFS4ERR_STALE_STATEID);
+-TRACE_DEFINE_ENUM(NFS4ERR_SYMLINK);
+-TRACE_DEFINE_ENUM(NFS4ERR_TOOSMALL);
+-TRACE_DEFINE_ENUM(NFS4ERR_TOO_MANY_OPS);
+-TRACE_DEFINE_ENUM(NFS4ERR_UNKNOWN_LAYOUTTYPE);
+-TRACE_DEFINE_ENUM(NFS4ERR_UNSAFE_COMPOUND);
+-TRACE_DEFINE_ENUM(NFS4ERR_WRONGSEC);
+-TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED);
+-TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE);
+-TRACE_DEFINE_ENUM(NFS4ERR_XDEV);
+-
+-TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS);
+-TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS);
+-
+-#define show_nfs4_status(x) \
+- __print_symbolic(x, \
+- { NFS4_OK, "OK" }, \
+- { EPERM, "EPERM" }, \
+- { ENOENT, "ENOENT" }, \
+- { EIO, "EIO" }, \
+- { ENXIO, "ENXIO" }, \
+- { EACCES, "EACCES" }, \
+- { EEXIST, "EEXIST" }, \
+- { EXDEV, "EXDEV" }, \
+- { ENOTDIR, "ENOTDIR" }, \
+- { EISDIR, "EISDIR" }, \
+- { EFBIG, "EFBIG" }, \
+- { ENOSPC, "ENOSPC" }, \
+- { EROFS, "EROFS" }, \
+- { EMLINK, "EMLINK" }, \
+- { ENAMETOOLONG, "ENAMETOOLONG" }, \
+- { ENOTEMPTY, "ENOTEMPTY" }, \
+- { EDQUOT, "EDQUOT" }, \
+- { ESTALE, "ESTALE" }, \
+- { EBADHANDLE, "EBADHANDLE" }, \
+- { EBADCOOKIE, "EBADCOOKIE" }, \
+- { ENOTSUPP, "ENOTSUPP" }, \
+- { ETOOSMALL, "ETOOSMALL" }, \
+- { EREMOTEIO, "EREMOTEIO" }, \
+- { EBADTYPE, "EBADTYPE" }, \
+- { EAGAIN, "EAGAIN" }, \
+- { ELOOP, "ELOOP" }, \
+- { EOPNOTSUPP, "EOPNOTSUPP" }, \
+- { EDEADLK, "EDEADLK" }, \
+- { ENOMEM, "ENOMEM" }, \
+- { EKEYEXPIRED, "EKEYEXPIRED" }, \
+- { ETIMEDOUT, "ETIMEDOUT" }, \
+- { ERESTARTSYS, "ERESTARTSYS" }, \
+- { ECONNREFUSED, "ECONNREFUSED" }, \
+- { ECONNRESET, "ECONNRESET" }, \
+- { ENETUNREACH, "ENETUNREACH" }, \
+- { EHOSTUNREACH, "EHOSTUNREACH" }, \
+- { EHOSTDOWN, "EHOSTDOWN" }, \
+- { EPIPE, "EPIPE" }, \
+- { EPFNOSUPPORT, "EPFNOSUPPORT" }, \
+- { EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \
+- { NFS4ERR_ACCESS, "ACCESS" }, \
+- { NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \
+- { NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \
+- { NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \
+- { NFS4ERR_BADCHAR, "BADCHAR" }, \
+- { NFS4ERR_BADHANDLE, "BADHANDLE" }, \
+- { NFS4ERR_BADIOMODE, "BADIOMODE" }, \
+- { NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \
+- { NFS4ERR_BADLABEL, "BADLABEL" }, \
+- { NFS4ERR_BADNAME, "BADNAME" }, \
+- { NFS4ERR_BADOWNER, "BADOWNER" }, \
+- { NFS4ERR_BADSESSION, "BADSESSION" }, \
+- { NFS4ERR_BADSLOT, "BADSLOT" }, \
+- { NFS4ERR_BADTYPE, "BADTYPE" }, \
+- { NFS4ERR_BADXDR, "BADXDR" }, \
+- { NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \
+- { NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \
+- { NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \
+- { NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \
+- { NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \
+- { NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \
+- { NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
+- { NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \
+- { NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \
+- { NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \
+- { NFS4ERR_CONN_NOT_BOUND_TO_SESSION, "CONN_NOT_BOUND_TO_SESSION" }, \
+- { NFS4ERR_DEADLOCK, "DEADLOCK" }, \
+- { NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \
+- { NFS4ERR_DELAY, "DELAY" }, \
+- { NFS4ERR_DELEG_ALREADY_WANTED, "DELEG_ALREADY_WANTED" }, \
+- { NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \
+- { NFS4ERR_DENIED, "DENIED" }, \
+- { NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \
+- { NFS4ERR_DQUOT, "DQUOT" }, \
+- { NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \
+- { NFS4ERR_EXIST, "EXIST" }, \
+- { NFS4ERR_EXPIRED, "EXPIRED" }, \
+- { NFS4ERR_FBIG, "FBIG" }, \
+- { NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \
+- { NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \
+- { NFS4ERR_GRACE, "GRACE" }, \
+- { NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \
+- { NFS4ERR_INVAL, "INVAL" }, \
+- { NFS4ERR_IO, "IO" }, \
+- { NFS4ERR_ISDIR, "ISDIR" }, \
+- { NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \
+- { NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \
+- { NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \
+- { NFS4ERR_LOCKED, "LOCKED" }, \
+- { NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \
+- { NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \
+- { NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \
+- { NFS4ERR_MLINK, "MLINK" }, \
+- { NFS4ERR_MOVED, "MOVED" }, \
+- { NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \
+- { NFS4ERR_NOENT, "NOENT" }, \
+- { NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \
+- { NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \
+- { NFS4ERR_NOSPC, "NOSPC" }, \
+- { NFS4ERR_NOTDIR, "NOTDIR" }, \
+- { NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \
+- { NFS4ERR_NOTSUPP, "NOTSUPP" }, \
+- { NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \
+- { NFS4ERR_NOT_SAME, "NOT_SAME" }, \
+- { NFS4ERR_NO_GRACE, "NO_GRACE" }, \
+- { NFS4ERR_NXIO, "NXIO" }, \
+- { NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \
+- { NFS4ERR_OPENMODE, "OPENMODE" }, \
+- { NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \
+- { NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \
+- { NFS4ERR_PERM, "PERM" }, \
+- { NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \
+- { NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \
+- { NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \
+- { NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \
+- { NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \
+- { NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \
+- { NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \
+- { NFS4ERR_REP_TOO_BIG_TO_CACHE, "REP_TOO_BIG_TO_CACHE" }, \
+- { NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \
+- { NFS4ERR_RESOURCE, "RESOURCE" }, \
+- { NFS4ERR_RESTOREFH, "RESTOREFH" }, \
+- { NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \
+- { NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \
+- { NFS4ERR_ROFS, "ROFS" }, \
+- { NFS4ERR_SAME, "SAME" }, \
+- { NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \
+- { NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \
+- { NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \
+- { NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \
+- { NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \
+- { NFS4ERR_STALE, "STALE" }, \
+- { NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \
+- { NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \
+- { NFS4ERR_SYMLINK, "SYMLINK" }, \
+- { NFS4ERR_TOOSMALL, "TOOSMALL" }, \
+- { NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \
+- { NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \
+- { NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \
+- { NFS4ERR_WRONGSEC, "WRONGSEC" }, \
+- { NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \
+- { NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \
+- { NFS4ERR_XDEV, "XDEV" }, \
+- /* ***** Internal to Linux NFS client ***** */ \
+- { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \
+- { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" })
+-
+-#define show_nfs4_verifier(x) \
+- __print_hex_str(x, NFS4_VERIFIER_SIZE)
+-
+-TRACE_DEFINE_ENUM(IOMODE_READ);
+-TRACE_DEFINE_ENUM(IOMODE_RW);
+-TRACE_DEFINE_ENUM(IOMODE_ANY);
+-
+-#define show_pnfs_layout_iomode(x) \
+- __print_symbolic(x, \
+- { IOMODE_READ, "READ" }, \
+- { IOMODE_RW, "RW" }, \
+- { IOMODE_ANY, "ANY" })
+-
+-#define show_nfs4_seq4_status(x) \
+- __print_flags(x, "|", \
+- { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
+- { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING, "CB_GSS_CONTEXTS_EXPIRING" }, \
+- { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED, "CB_GSS_CONTEXTS_EXPIRED" }, \
+- { SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, "EXPIRED_ALL_STATE_REVOKED" }, \
+- { SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, "EXPIRED_SOME_STATE_REVOKED" }, \
+- { SEQ4_STATUS_ADMIN_STATE_REVOKED, "ADMIN_STATE_REVOKED" }, \
+- { SEQ4_STATUS_RECALLABLE_STATE_REVOKED, "RECALLABLE_STATE_REVOKED" }, \
+- { SEQ4_STATUS_LEASE_MOVED, "LEASE_MOVED" }, \
+- { SEQ4_STATUS_RESTART_RECLAIM_NEEDED, "RESTART_RECLAIM_NEEDED" }, \
+- { SEQ4_STATUS_CB_PATH_DOWN_SESSION, "CB_PATH_DOWN_SESSION" }, \
+- { SEQ4_STATUS_BACKCHANNEL_FAULT, "BACKCHANNEL_FAULT" })
+diff --git a/include/trace/events/rdma.h b/include/trace/events/rdma.h
+deleted file mode 100644
+index 81bb454fc2888..0000000000000
+--- a/include/trace/events/rdma.h
++++ /dev/null
+@@ -1,168 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-/*
+- * Copyright (c) 2017 Oracle. All rights reserved.
+- */
+-
+-/*
+- * enum ib_event_type, from include/rdma/ib_verbs.h
+- */
+-#define IB_EVENT_LIST \
+- ib_event(CQ_ERR) \
+- ib_event(QP_FATAL) \
+- ib_event(QP_REQ_ERR) \
+- ib_event(QP_ACCESS_ERR) \
+- ib_event(COMM_EST) \
+- ib_event(SQ_DRAINED) \
+- ib_event(PATH_MIG) \
+- ib_event(PATH_MIG_ERR) \
+- ib_event(DEVICE_FATAL) \
+- ib_event(PORT_ACTIVE) \
+- ib_event(PORT_ERR) \
+- ib_event(LID_CHANGE) \
+- ib_event(PKEY_CHANGE) \
+- ib_event(SM_CHANGE) \
+- ib_event(SRQ_ERR) \
+- ib_event(SRQ_LIMIT_REACHED) \
+- ib_event(QP_LAST_WQE_REACHED) \
+- ib_event(CLIENT_REREGISTER) \
+- ib_event(GID_CHANGE) \
+- ib_event_end(WQ_FATAL)
+-
+-#undef ib_event
+-#undef ib_event_end
+-
+-#define ib_event(x) TRACE_DEFINE_ENUM(IB_EVENT_##x);
+-#define ib_event_end(x) TRACE_DEFINE_ENUM(IB_EVENT_##x);
+-
+-IB_EVENT_LIST
+-
+-#undef ib_event
+-#undef ib_event_end
+-
+-#define ib_event(x) { IB_EVENT_##x, #x },
+-#define ib_event_end(x) { IB_EVENT_##x, #x }
+-
+-#define rdma_show_ib_event(x) \
+- __print_symbolic(x, IB_EVENT_LIST)
+-
+-/*
+- * enum ib_wc_status type, from include/rdma/ib_verbs.h
+- */
+-#define IB_WC_STATUS_LIST \
+- ib_wc_status(SUCCESS) \
+- ib_wc_status(LOC_LEN_ERR) \
+- ib_wc_status(LOC_QP_OP_ERR) \
+- ib_wc_status(LOC_EEC_OP_ERR) \
+- ib_wc_status(LOC_PROT_ERR) \
+- ib_wc_status(WR_FLUSH_ERR) \
+- ib_wc_status(MW_BIND_ERR) \
+- ib_wc_status(BAD_RESP_ERR) \
+- ib_wc_status(LOC_ACCESS_ERR) \
+- ib_wc_status(REM_INV_REQ_ERR) \
+- ib_wc_status(REM_ACCESS_ERR) \
+- ib_wc_status(REM_OP_ERR) \
+- ib_wc_status(RETRY_EXC_ERR) \
+- ib_wc_status(RNR_RETRY_EXC_ERR) \
+- ib_wc_status(LOC_RDD_VIOL_ERR) \
+- ib_wc_status(REM_INV_RD_REQ_ERR) \
+- ib_wc_status(REM_ABORT_ERR) \
+- ib_wc_status(INV_EECN_ERR) \
+- ib_wc_status(INV_EEC_STATE_ERR) \
+- ib_wc_status(FATAL_ERR) \
+- ib_wc_status(RESP_TIMEOUT_ERR) \
+- ib_wc_status_end(GENERAL_ERR)
+-
+-#undef ib_wc_status
+-#undef ib_wc_status_end
+-
+-#define ib_wc_status(x) TRACE_DEFINE_ENUM(IB_WC_##x);
+-#define ib_wc_status_end(x) TRACE_DEFINE_ENUM(IB_WC_##x);
+-
+-IB_WC_STATUS_LIST
+-
+-#undef ib_wc_status
+-#undef ib_wc_status_end
+-
+-#define ib_wc_status(x) { IB_WC_##x, #x },
+-#define ib_wc_status_end(x) { IB_WC_##x, #x }
+-
+-#define rdma_show_wc_status(x) \
+- __print_symbolic(x, IB_WC_STATUS_LIST)
+-
+-/*
+- * enum ib_cm_event_type, from include/rdma/ib_cm.h
+- */
+-#define IB_CM_EVENT_LIST \
+- ib_cm_event(REQ_ERROR) \
+- ib_cm_event(REQ_RECEIVED) \
+- ib_cm_event(REP_ERROR) \
+- ib_cm_event(REP_RECEIVED) \
+- ib_cm_event(RTU_RECEIVED) \
+- ib_cm_event(USER_ESTABLISHED) \
+- ib_cm_event(DREQ_ERROR) \
+- ib_cm_event(DREQ_RECEIVED) \
+- ib_cm_event(DREP_RECEIVED) \
+- ib_cm_event(TIMEWAIT_EXIT) \
+- ib_cm_event(MRA_RECEIVED) \
+- ib_cm_event(REJ_RECEIVED) \
+- ib_cm_event(LAP_ERROR) \
+- ib_cm_event(LAP_RECEIVED) \
+- ib_cm_event(APR_RECEIVED) \
+- ib_cm_event(SIDR_REQ_ERROR) \
+- ib_cm_event(SIDR_REQ_RECEIVED) \
+- ib_cm_event_end(SIDR_REP_RECEIVED)
+-
+-#undef ib_cm_event
+-#undef ib_cm_event_end
+-
+-#define ib_cm_event(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+-#define ib_cm_event_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+-
+-IB_CM_EVENT_LIST
+-
+-#undef ib_cm_event
+-#undef ib_cm_event_end
+-
+-#define ib_cm_event(x) { IB_CM_##x, #x },
+-#define ib_cm_event_end(x) { IB_CM_##x, #x }
+-
+-#define rdma_show_ib_cm_event(x) \
+- __print_symbolic(x, IB_CM_EVENT_LIST)
+-
+-/*
+- * enum rdma_cm_event_type, from include/rdma/rdma_cm.h
+- */
+-#define RDMA_CM_EVENT_LIST \
+- rdma_cm_event(ADDR_RESOLVED) \
+- rdma_cm_event(ADDR_ERROR) \
+- rdma_cm_event(ROUTE_RESOLVED) \
+- rdma_cm_event(ROUTE_ERROR) \
+- rdma_cm_event(CONNECT_REQUEST) \
+- rdma_cm_event(CONNECT_RESPONSE) \
+- rdma_cm_event(CONNECT_ERROR) \
+- rdma_cm_event(UNREACHABLE) \
+- rdma_cm_event(REJECTED) \
+- rdma_cm_event(ESTABLISHED) \
+- rdma_cm_event(DISCONNECTED) \
+- rdma_cm_event(DEVICE_REMOVAL) \
+- rdma_cm_event(MULTICAST_JOIN) \
+- rdma_cm_event(MULTICAST_ERROR) \
+- rdma_cm_event(ADDR_CHANGE) \
+- rdma_cm_event_end(TIMEWAIT_EXIT)
+-
+-#undef rdma_cm_event
+-#undef rdma_cm_event_end
+-
+-#define rdma_cm_event(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x);
+-#define rdma_cm_event_end(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x);
+-
+-RDMA_CM_EVENT_LIST
+-
+-#undef rdma_cm_event
+-#undef rdma_cm_event_end
+-
+-#define rdma_cm_event(x) { RDMA_CM_EVENT_##x, #x },
+-#define rdma_cm_event_end(x) { RDMA_CM_EVENT_##x, #x }
+-
+-#define rdma_show_cm_event(x) \
+- __print_symbolic(x, RDMA_CM_EVENT_LIST)
+diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h
+index c9048f3e471bb..3f121eed369e8 100644
+--- a/include/trace/events/rpcgss.h
++++ b/include/trace/events/rpcgss.h
+@@ -13,7 +13,7 @@
+
+ #include <linux/tracepoint.h>
+
+-#include <trace/events/sunrpc_base.h>
++#include <trace/misc/sunrpc.h>
+
+ /**
+ ** GSS-API related trace events
+diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
+index fcd3b3f1020a6..8f461e04e5f09 100644
+--- a/include/trace/events/rpcrdma.h
++++ b/include/trace/events/rpcrdma.h
+@@ -15,8 +15,8 @@
+ #include <linux/tracepoint.h>
+ #include <rdma/ib_cm.h>
+
+-#include <trace/events/rdma.h>
+-#include <trace/events/sunrpc_base.h>
++#include <trace/misc/rdma.h>
++#include <trace/misc/sunrpc.h>
+
+ /**
+ ** Event classes
+diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
+index f48f2ab9d238b..ffe2679a13ced 100644
+--- a/include/trace/events/sunrpc.h
++++ b/include/trace/events/sunrpc.h
+@@ -14,7 +14,7 @@
+ #include <linux/net.h>
+ #include <linux/tracepoint.h>
+
+-#include <trace/events/sunrpc_base.h>
++#include <trace/misc/sunrpc.h>
+
+ TRACE_DEFINE_ENUM(SOCK_STREAM);
+ TRACE_DEFINE_ENUM(SOCK_DGRAM);
+diff --git a/include/trace/events/sunrpc_base.h b/include/trace/events/sunrpc_base.h
+deleted file mode 100644
+index 588557d07ea82..0000000000000
+--- a/include/trace/events/sunrpc_base.h
++++ /dev/null
+@@ -1,18 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-/*
+- * Copyright (c) 2021 Oracle and/or its affiliates.
+- *
+- * Common types and format specifiers for sunrpc.
+- */
+-
+-#if !defined(_TRACE_SUNRPC_BASE_H)
+-#define _TRACE_SUNRPC_BASE_H
+-
+-#include <linux/tracepoint.h>
+-
+-#define SUNRPC_TRACE_PID_SPECIFIER "%08x"
+-#define SUNRPC_TRACE_CLID_SPECIFIER "%08x"
+-#define SUNRPC_TRACE_TASK_SPECIFIER \
+- "task:" SUNRPC_TRACE_PID_SPECIFIER "@" SUNRPC_TRACE_CLID_SPECIFIER
+-
+-#endif /* _TRACE_SUNRPC_BASE_H */
+diff --git a/include/trace/misc/fs.h b/include/trace/misc/fs.h
+new file mode 100644
+index 0000000000000..738b97f22f365
+--- /dev/null
++++ b/include/trace/misc/fs.h
+@@ -0,0 +1,122 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Display helpers for generic filesystem items
++ *
++ * Author: Chuck Lever <chuck.lever@oracle.com>
++ *
++ * Copyright (c) 2020, Oracle and/or its affiliates.
++ */
++
++#include <linux/fs.h>
++
++#define show_fs_dirent_type(x) \
++ __print_symbolic(x, \
++ { DT_UNKNOWN, "UNKNOWN" }, \
++ { DT_FIFO, "FIFO" }, \
++ { DT_CHR, "CHR" }, \
++ { DT_DIR, "DIR" }, \
++ { DT_BLK, "BLK" }, \
++ { DT_REG, "REG" }, \
++ { DT_LNK, "LNK" }, \
++ { DT_SOCK, "SOCK" }, \
++ { DT_WHT, "WHT" })
++
++#define show_fs_fcntl_open_flags(x) \
++ __print_flags(x, "|", \
++ { O_WRONLY, "O_WRONLY" }, \
++ { O_RDWR, "O_RDWR" }, \
++ { O_CREAT, "O_CREAT" }, \
++ { O_EXCL, "O_EXCL" }, \
++ { O_NOCTTY, "O_NOCTTY" }, \
++ { O_TRUNC, "O_TRUNC" }, \
++ { O_APPEND, "O_APPEND" }, \
++ { O_NONBLOCK, "O_NONBLOCK" }, \
++ { O_DSYNC, "O_DSYNC" }, \
++ { O_DIRECT, "O_DIRECT" }, \
++ { O_LARGEFILE, "O_LARGEFILE" }, \
++ { O_DIRECTORY, "O_DIRECTORY" }, \
++ { O_NOFOLLOW, "O_NOFOLLOW" }, \
++ { O_NOATIME, "O_NOATIME" }, \
++ { O_CLOEXEC, "O_CLOEXEC" })
++
++#define __fmode_flag(x) { (__force unsigned long)FMODE_##x, #x }
++#define show_fs_fmode_flags(x) \
++ __print_flags(x, "|", \
++ __fmode_flag(READ), \
++ __fmode_flag(WRITE), \
++ __fmode_flag(EXEC))
++
++#ifdef CONFIG_64BIT
++#define show_fs_fcntl_cmd(x) \
++ __print_symbolic(x, \
++ { F_DUPFD, "DUPFD" }, \
++ { F_GETFD, "GETFD" }, \
++ { F_SETFD, "SETFD" }, \
++ { F_GETFL, "GETFL" }, \
++ { F_SETFL, "SETFL" }, \
++ { F_GETLK, "GETLK" }, \
++ { F_SETLK, "SETLK" }, \
++ { F_SETLKW, "SETLKW" }, \
++ { F_SETOWN, "SETOWN" }, \
++ { F_GETOWN, "GETOWN" }, \
++ { F_SETSIG, "SETSIG" }, \
++ { F_GETSIG, "GETSIG" }, \
++ { F_SETOWN_EX, "SETOWN_EX" }, \
++ { F_GETOWN_EX, "GETOWN_EX" }, \
++ { F_GETOWNER_UIDS, "GETOWNER_UIDS" }, \
++ { F_OFD_GETLK, "OFD_GETLK" }, \
++ { F_OFD_SETLK, "OFD_SETLK" }, \
++ { F_OFD_SETLKW, "OFD_SETLKW" })
++#else /* CONFIG_64BIT */
++#define show_fs_fcntl_cmd(x) \
++ __print_symbolic(x, \
++ { F_DUPFD, "DUPFD" }, \
++ { F_GETFD, "GETFD" }, \
++ { F_SETFD, "SETFD" }, \
++ { F_GETFL, "GETFL" }, \
++ { F_SETFL, "SETFL" }, \
++ { F_GETLK, "GETLK" }, \
++ { F_SETLK, "SETLK" }, \
++ { F_SETLKW, "SETLKW" }, \
++ { F_SETOWN, "SETOWN" }, \
++ { F_GETOWN, "GETOWN" }, \
++ { F_SETSIG, "SETSIG" }, \
++ { F_GETSIG, "GETSIG" }, \
++ { F_GETLK64, "GETLK64" }, \
++ { F_SETLK64, "SETLK64" }, \
++ { F_SETLKW64, "SETLKW64" }, \
++ { F_SETOWN_EX, "SETOWN_EX" }, \
++ { F_GETOWN_EX, "GETOWN_EX" }, \
++ { F_GETOWNER_UIDS, "GETOWNER_UIDS" }, \
++ { F_OFD_GETLK, "OFD_GETLK" }, \
++ { F_OFD_SETLK, "OFD_SETLK" }, \
++ { F_OFD_SETLKW, "OFD_SETLKW" })
++#endif /* CONFIG_64BIT */
++
++#define show_fs_fcntl_lock_type(x) \
++ __print_symbolic(x, \
++ { F_RDLCK, "RDLCK" }, \
++ { F_WRLCK, "WRLCK" }, \
++ { F_UNLCK, "UNLCK" })
++
++#define show_fs_lookup_flags(flags) \
++ __print_flags(flags, "|", \
++ { LOOKUP_FOLLOW, "FOLLOW" }, \
++ { LOOKUP_DIRECTORY, "DIRECTORY" }, \
++ { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \
++ { LOOKUP_EMPTY, "EMPTY" }, \
++ { LOOKUP_DOWN, "DOWN" }, \
++ { LOOKUP_MOUNTPOINT, "MOUNTPOINT" }, \
++ { LOOKUP_REVAL, "REVAL" }, \
++ { LOOKUP_RCU, "RCU" }, \
++ { LOOKUP_OPEN, "OPEN" }, \
++ { LOOKUP_CREATE, "CREATE" }, \
++ { LOOKUP_EXCL, "EXCL" }, \
++ { LOOKUP_RENAME_TARGET, "RENAME_TARGET" }, \
++ { LOOKUP_PARENT, "PARENT" }, \
++ { LOOKUP_NO_SYMLINKS, "NO_SYMLINKS" }, \
++ { LOOKUP_NO_MAGICLINKS, "NO_MAGICLINKS" }, \
++ { LOOKUP_NO_XDEV, "NO_XDEV" }, \
++ { LOOKUP_BENEATH, "BENEATH" }, \
++ { LOOKUP_IN_ROOT, "IN_ROOT" }, \
++ { LOOKUP_CACHED, "CACHED" })
+diff --git a/include/trace/misc/nfs.h b/include/trace/misc/nfs.h
+new file mode 100644
+index 0000000000000..0d9d48dca38a8
+--- /dev/null
++++ b/include/trace/misc/nfs.h
+@@ -0,0 +1,387 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Display helpers for NFS protocol elements
++ *
++ * Author: Chuck Lever <chuck.lever@oracle.com>
++ *
++ * Copyright (c) 2020, Oracle and/or its affiliates.
++ */
++
++#include <linux/nfs.h>
++#include <linux/nfs4.h>
++#include <uapi/linux/nfs.h>
++
++TRACE_DEFINE_ENUM(NFS_OK);
++TRACE_DEFINE_ENUM(NFSERR_PERM);
++TRACE_DEFINE_ENUM(NFSERR_NOENT);
++TRACE_DEFINE_ENUM(NFSERR_IO);
++TRACE_DEFINE_ENUM(NFSERR_NXIO);
++TRACE_DEFINE_ENUM(NFSERR_EAGAIN);
++TRACE_DEFINE_ENUM(NFSERR_ACCES);
++TRACE_DEFINE_ENUM(NFSERR_EXIST);
++TRACE_DEFINE_ENUM(NFSERR_XDEV);
++TRACE_DEFINE_ENUM(NFSERR_NODEV);
++TRACE_DEFINE_ENUM(NFSERR_NOTDIR);
++TRACE_DEFINE_ENUM(NFSERR_ISDIR);
++TRACE_DEFINE_ENUM(NFSERR_INVAL);
++TRACE_DEFINE_ENUM(NFSERR_FBIG);
++TRACE_DEFINE_ENUM(NFSERR_NOSPC);
++TRACE_DEFINE_ENUM(NFSERR_ROFS);
++TRACE_DEFINE_ENUM(NFSERR_MLINK);
++TRACE_DEFINE_ENUM(NFSERR_OPNOTSUPP);
++TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG);
++TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY);
++TRACE_DEFINE_ENUM(NFSERR_DQUOT);
++TRACE_DEFINE_ENUM(NFSERR_STALE);
++TRACE_DEFINE_ENUM(NFSERR_REMOTE);
++TRACE_DEFINE_ENUM(NFSERR_WFLUSH);
++TRACE_DEFINE_ENUM(NFSERR_BADHANDLE);
++TRACE_DEFINE_ENUM(NFSERR_NOT_SYNC);
++TRACE_DEFINE_ENUM(NFSERR_BAD_COOKIE);
++TRACE_DEFINE_ENUM(NFSERR_NOTSUPP);
++TRACE_DEFINE_ENUM(NFSERR_TOOSMALL);
++TRACE_DEFINE_ENUM(NFSERR_SERVERFAULT);
++TRACE_DEFINE_ENUM(NFSERR_BADTYPE);
++TRACE_DEFINE_ENUM(NFSERR_JUKEBOX);
++
++#define show_nfs_status(x) \
++ __print_symbolic(x, \
++ { NFS_OK, "OK" }, \
++ { NFSERR_PERM, "PERM" }, \
++ { NFSERR_NOENT, "NOENT" }, \
++ { NFSERR_IO, "IO" }, \
++ { NFSERR_NXIO, "NXIO" }, \
++ { ECHILD, "CHILD" }, \
++ { NFSERR_EAGAIN, "AGAIN" }, \
++ { NFSERR_ACCES, "ACCES" }, \
++ { NFSERR_EXIST, "EXIST" }, \
++ { NFSERR_XDEV, "XDEV" }, \
++ { NFSERR_NODEV, "NODEV" }, \
++ { NFSERR_NOTDIR, "NOTDIR" }, \
++ { NFSERR_ISDIR, "ISDIR" }, \
++ { NFSERR_INVAL, "INVAL" }, \
++ { NFSERR_FBIG, "FBIG" }, \
++ { NFSERR_NOSPC, "NOSPC" }, \
++ { NFSERR_ROFS, "ROFS" }, \
++ { NFSERR_MLINK, "MLINK" }, \
++ { NFSERR_OPNOTSUPP, "OPNOTSUPP" }, \
++ { NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \
++ { NFSERR_NOTEMPTY, "NOTEMPTY" }, \
++ { NFSERR_DQUOT, "DQUOT" }, \
++ { NFSERR_STALE, "STALE" }, \
++ { NFSERR_REMOTE, "REMOTE" }, \
++ { NFSERR_WFLUSH, "WFLUSH" }, \
++ { NFSERR_BADHANDLE, "BADHANDLE" }, \
++ { NFSERR_NOT_SYNC, "NOTSYNC" }, \
++ { NFSERR_BAD_COOKIE, "BADCOOKIE" }, \
++ { NFSERR_NOTSUPP, "NOTSUPP" }, \
++ { NFSERR_TOOSMALL, "TOOSMALL" }, \
++ { NFSERR_SERVERFAULT, "REMOTEIO" }, \
++ { NFSERR_BADTYPE, "BADTYPE" }, \
++ { NFSERR_JUKEBOX, "JUKEBOX" })
++
++TRACE_DEFINE_ENUM(NFS_UNSTABLE);
++TRACE_DEFINE_ENUM(NFS_DATA_SYNC);
++TRACE_DEFINE_ENUM(NFS_FILE_SYNC);
++
++#define show_nfs_stable_how(x) \
++ __print_symbolic(x, \
++ { NFS_UNSTABLE, "UNSTABLE" }, \
++ { NFS_DATA_SYNC, "DATA_SYNC" }, \
++ { NFS_FILE_SYNC, "FILE_SYNC" })
++
++TRACE_DEFINE_ENUM(NFS4_OK);
++TRACE_DEFINE_ENUM(NFS4ERR_ACCESS);
++TRACE_DEFINE_ENUM(NFS4ERR_ATTRNOTSUPP);
++TRACE_DEFINE_ENUM(NFS4ERR_ADMIN_REVOKED);
++TRACE_DEFINE_ENUM(NFS4ERR_BACK_CHAN_BUSY);
++TRACE_DEFINE_ENUM(NFS4ERR_BADCHAR);
++TRACE_DEFINE_ENUM(NFS4ERR_BADHANDLE);
++TRACE_DEFINE_ENUM(NFS4ERR_BADIOMODE);
++TRACE_DEFINE_ENUM(NFS4ERR_BADLAYOUT);
++TRACE_DEFINE_ENUM(NFS4ERR_BADLABEL);
++TRACE_DEFINE_ENUM(NFS4ERR_BADNAME);
++TRACE_DEFINE_ENUM(NFS4ERR_BADOWNER);
++TRACE_DEFINE_ENUM(NFS4ERR_BADSESSION);
++TRACE_DEFINE_ENUM(NFS4ERR_BADSLOT);
++TRACE_DEFINE_ENUM(NFS4ERR_BADTYPE);
++TRACE_DEFINE_ENUM(NFS4ERR_BADXDR);
++TRACE_DEFINE_ENUM(NFS4ERR_BAD_COOKIE);
++TRACE_DEFINE_ENUM(NFS4ERR_BAD_HIGH_SLOT);
++TRACE_DEFINE_ENUM(NFS4ERR_BAD_RANGE);
++TRACE_DEFINE_ENUM(NFS4ERR_BAD_SEQID);
++TRACE_DEFINE_ENUM(NFS4ERR_BAD_SESSION_DIGEST);
++TRACE_DEFINE_ENUM(NFS4ERR_BAD_STATEID);
++TRACE_DEFINE_ENUM(NFS4ERR_CB_PATH_DOWN);
++TRACE_DEFINE_ENUM(NFS4ERR_CLID_INUSE);
++TRACE_DEFINE_ENUM(NFS4ERR_CLIENTID_BUSY);
++TRACE_DEFINE_ENUM(NFS4ERR_COMPLETE_ALREADY);
++TRACE_DEFINE_ENUM(NFS4ERR_CONN_NOT_BOUND_TO_SESSION);
++TRACE_DEFINE_ENUM(NFS4ERR_DEADLOCK);
++TRACE_DEFINE_ENUM(NFS4ERR_DEADSESSION);
++TRACE_DEFINE_ENUM(NFS4ERR_DELAY);
++TRACE_DEFINE_ENUM(NFS4ERR_DELEG_ALREADY_WANTED);
++TRACE_DEFINE_ENUM(NFS4ERR_DELEG_REVOKED);
++TRACE_DEFINE_ENUM(NFS4ERR_DENIED);
++TRACE_DEFINE_ENUM(NFS4ERR_DIRDELEG_UNAVAIL);
++TRACE_DEFINE_ENUM(NFS4ERR_DQUOT);
++TRACE_DEFINE_ENUM(NFS4ERR_ENCR_ALG_UNSUPP);
++TRACE_DEFINE_ENUM(NFS4ERR_EXIST);
++TRACE_DEFINE_ENUM(NFS4ERR_EXPIRED);
++TRACE_DEFINE_ENUM(NFS4ERR_FBIG);
++TRACE_DEFINE_ENUM(NFS4ERR_FHEXPIRED);
++TRACE_DEFINE_ENUM(NFS4ERR_FILE_OPEN);
++TRACE_DEFINE_ENUM(NFS4ERR_GRACE);
++TRACE_DEFINE_ENUM(NFS4ERR_HASH_ALG_UNSUPP);
++TRACE_DEFINE_ENUM(NFS4ERR_INVAL);
++TRACE_DEFINE_ENUM(NFS4ERR_IO);
++TRACE_DEFINE_ENUM(NFS4ERR_ISDIR);
++TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTTRYLATER);
++TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTUNAVAILABLE);
++TRACE_DEFINE_ENUM(NFS4ERR_LEASE_MOVED);
++TRACE_DEFINE_ENUM(NFS4ERR_LOCKED);
++TRACE_DEFINE_ENUM(NFS4ERR_LOCKS_HELD);
++TRACE_DEFINE_ENUM(NFS4ERR_LOCK_RANGE);
++TRACE_DEFINE_ENUM(NFS4ERR_MINOR_VERS_MISMATCH);
++TRACE_DEFINE_ENUM(NFS4ERR_MLINK);
++TRACE_DEFINE_ENUM(NFS4ERR_MOVED);
++TRACE_DEFINE_ENUM(NFS4ERR_NAMETOOLONG);
++TRACE_DEFINE_ENUM(NFS4ERR_NOENT);
++TRACE_DEFINE_ENUM(NFS4ERR_NOFILEHANDLE);
++TRACE_DEFINE_ENUM(NFS4ERR_NOMATCHING_LAYOUT);
++TRACE_DEFINE_ENUM(NFS4ERR_NOSPC);
++TRACE_DEFINE_ENUM(NFS4ERR_NOTDIR);
++TRACE_DEFINE_ENUM(NFS4ERR_NOTEMPTY);
++TRACE_DEFINE_ENUM(NFS4ERR_NOTSUPP);
++TRACE_DEFINE_ENUM(NFS4ERR_NOT_ONLY_OP);
++TRACE_DEFINE_ENUM(NFS4ERR_NOT_SAME);
++TRACE_DEFINE_ENUM(NFS4ERR_NO_GRACE);
++TRACE_DEFINE_ENUM(NFS4ERR_NXIO);
++TRACE_DEFINE_ENUM(NFS4ERR_OLD_STATEID);
++TRACE_DEFINE_ENUM(NFS4ERR_OPENMODE);
++TRACE_DEFINE_ENUM(NFS4ERR_OP_ILLEGAL);
++TRACE_DEFINE_ENUM(NFS4ERR_OP_NOT_IN_SESSION);
++TRACE_DEFINE_ENUM(NFS4ERR_PERM);
++TRACE_DEFINE_ENUM(NFS4ERR_PNFS_IO_HOLE);
++TRACE_DEFINE_ENUM(NFS4ERR_PNFS_NO_LAYOUT);
++TRACE_DEFINE_ENUM(NFS4ERR_RECALLCONFLICT);
++TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_BAD);
++TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_CONFLICT);
++TRACE_DEFINE_ENUM(NFS4ERR_REJECT_DELEG);
++TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG);
++TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG_TO_CACHE);
++TRACE_DEFINE_ENUM(NFS4ERR_REQ_TOO_BIG);
++TRACE_DEFINE_ENUM(NFS4ERR_RESOURCE);
++TRACE_DEFINE_ENUM(NFS4ERR_RESTOREFH);
++TRACE_DEFINE_ENUM(NFS4ERR_RETRY_UNCACHED_REP);
++TRACE_DEFINE_ENUM(NFS4ERR_RETURNCONFLICT);
++TRACE_DEFINE_ENUM(NFS4ERR_ROFS);
++TRACE_DEFINE_ENUM(NFS4ERR_SAME);
++TRACE_DEFINE_ENUM(NFS4ERR_SHARE_DENIED);
++TRACE_DEFINE_ENUM(NFS4ERR_SEQUENCE_POS);
++TRACE_DEFINE_ENUM(NFS4ERR_SEQ_FALSE_RETRY);
++TRACE_DEFINE_ENUM(NFS4ERR_SEQ_MISORDERED);
++TRACE_DEFINE_ENUM(NFS4ERR_SERVERFAULT);
++TRACE_DEFINE_ENUM(NFS4ERR_STALE);
++TRACE_DEFINE_ENUM(NFS4ERR_STALE_CLIENTID);
++TRACE_DEFINE_ENUM(NFS4ERR_STALE_STATEID);
++TRACE_DEFINE_ENUM(NFS4ERR_SYMLINK);
++TRACE_DEFINE_ENUM(NFS4ERR_TOOSMALL);
++TRACE_DEFINE_ENUM(NFS4ERR_TOO_MANY_OPS);
++TRACE_DEFINE_ENUM(NFS4ERR_UNKNOWN_LAYOUTTYPE);
++TRACE_DEFINE_ENUM(NFS4ERR_UNSAFE_COMPOUND);
++TRACE_DEFINE_ENUM(NFS4ERR_WRONGSEC);
++TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED);
++TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE);
++TRACE_DEFINE_ENUM(NFS4ERR_XDEV);
++
++TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS);
++TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS);
++
++#define show_nfs4_status(x) \
++ __print_symbolic(x, \
++ { NFS4_OK, "OK" }, \
++ { EPERM, "EPERM" }, \
++ { ENOENT, "ENOENT" }, \
++ { EIO, "EIO" }, \
++ { ENXIO, "ENXIO" }, \
++ { EACCES, "EACCES" }, \
++ { EEXIST, "EEXIST" }, \
++ { EXDEV, "EXDEV" }, \
++ { ENOTDIR, "ENOTDIR" }, \
++ { EISDIR, "EISDIR" }, \
++ { EFBIG, "EFBIG" }, \
++ { ENOSPC, "ENOSPC" }, \
++ { EROFS, "EROFS" }, \
++ { EMLINK, "EMLINK" }, \
++ { ENAMETOOLONG, "ENAMETOOLONG" }, \
++ { ENOTEMPTY, "ENOTEMPTY" }, \
++ { EDQUOT, "EDQUOT" }, \
++ { ESTALE, "ESTALE" }, \
++ { EBADHANDLE, "EBADHANDLE" }, \
++ { EBADCOOKIE, "EBADCOOKIE" }, \
++ { ENOTSUPP, "ENOTSUPP" }, \
++ { ETOOSMALL, "ETOOSMALL" }, \
++ { EREMOTEIO, "EREMOTEIO" }, \
++ { EBADTYPE, "EBADTYPE" }, \
++ { EAGAIN, "EAGAIN" }, \
++ { ELOOP, "ELOOP" }, \
++ { EOPNOTSUPP, "EOPNOTSUPP" }, \
++ { EDEADLK, "EDEADLK" }, \
++ { ENOMEM, "ENOMEM" }, \
++ { EKEYEXPIRED, "EKEYEXPIRED" }, \
++ { ETIMEDOUT, "ETIMEDOUT" }, \
++ { ERESTARTSYS, "ERESTARTSYS" }, \
++ { ECONNREFUSED, "ECONNREFUSED" }, \
++ { ECONNRESET, "ECONNRESET" }, \
++ { ENETUNREACH, "ENETUNREACH" }, \
++ { EHOSTUNREACH, "EHOSTUNREACH" }, \
++ { EHOSTDOWN, "EHOSTDOWN" }, \
++ { EPIPE, "EPIPE" }, \
++ { EPFNOSUPPORT, "EPFNOSUPPORT" }, \
++ { EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \
++ { NFS4ERR_ACCESS, "ACCESS" }, \
++ { NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \
++ { NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \
++ { NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \
++ { NFS4ERR_BADCHAR, "BADCHAR" }, \
++ { NFS4ERR_BADHANDLE, "BADHANDLE" }, \
++ { NFS4ERR_BADIOMODE, "BADIOMODE" }, \
++ { NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \
++ { NFS4ERR_BADLABEL, "BADLABEL" }, \
++ { NFS4ERR_BADNAME, "BADNAME" }, \
++ { NFS4ERR_BADOWNER, "BADOWNER" }, \
++ { NFS4ERR_BADSESSION, "BADSESSION" }, \
++ { NFS4ERR_BADSLOT, "BADSLOT" }, \
++ { NFS4ERR_BADTYPE, "BADTYPE" }, \
++ { NFS4ERR_BADXDR, "BADXDR" }, \
++ { NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \
++ { NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \
++ { NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \
++ { NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \
++ { NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \
++ { NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \
++ { NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
++ { NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \
++ { NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \
++ { NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \
++ { NFS4ERR_CONN_NOT_BOUND_TO_SESSION, "CONN_NOT_BOUND_TO_SESSION" }, \
++ { NFS4ERR_DEADLOCK, "DEADLOCK" }, \
++ { NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \
++ { NFS4ERR_DELAY, "DELAY" }, \
++ { NFS4ERR_DELEG_ALREADY_WANTED, "DELEG_ALREADY_WANTED" }, \
++ { NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \
++ { NFS4ERR_DENIED, "DENIED" }, \
++ { NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \
++ { NFS4ERR_DQUOT, "DQUOT" }, \
++ { NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \
++ { NFS4ERR_EXIST, "EXIST" }, \
++ { NFS4ERR_EXPIRED, "EXPIRED" }, \
++ { NFS4ERR_FBIG, "FBIG" }, \
++ { NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \
++ { NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \
++ { NFS4ERR_GRACE, "GRACE" }, \
++ { NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \
++ { NFS4ERR_INVAL, "INVAL" }, \
++ { NFS4ERR_IO, "IO" }, \
++ { NFS4ERR_ISDIR, "ISDIR" }, \
++ { NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \
++ { NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \
++ { NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \
++ { NFS4ERR_LOCKED, "LOCKED" }, \
++ { NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \
++ { NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \
++ { NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \
++ { NFS4ERR_MLINK, "MLINK" }, \
++ { NFS4ERR_MOVED, "MOVED" }, \
++ { NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \
++ { NFS4ERR_NOENT, "NOENT" }, \
++ { NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \
++ { NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \
++ { NFS4ERR_NOSPC, "NOSPC" }, \
++ { NFS4ERR_NOTDIR, "NOTDIR" }, \
++ { NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \
++ { NFS4ERR_NOTSUPP, "NOTSUPP" }, \
++ { NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \
++ { NFS4ERR_NOT_SAME, "NOT_SAME" }, \
++ { NFS4ERR_NO_GRACE, "NO_GRACE" }, \
++ { NFS4ERR_NXIO, "NXIO" }, \
++ { NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \
++ { NFS4ERR_OPENMODE, "OPENMODE" }, \
++ { NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \
++ { NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \
++ { NFS4ERR_PERM, "PERM" }, \
++ { NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \
++ { NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \
++ { NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \
++ { NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \
++ { NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \
++ { NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \
++ { NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \
++ { NFS4ERR_REP_TOO_BIG_TO_CACHE, "REP_TOO_BIG_TO_CACHE" }, \
++ { NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \
++ { NFS4ERR_RESOURCE, "RESOURCE" }, \
++ { NFS4ERR_RESTOREFH, "RESTOREFH" }, \
++ { NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \
++ { NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \
++ { NFS4ERR_ROFS, "ROFS" }, \
++ { NFS4ERR_SAME, "SAME" }, \
++ { NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \
++ { NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \
++ { NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \
++ { NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \
++ { NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \
++ { NFS4ERR_STALE, "STALE" }, \
++ { NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \
++ { NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \
++ { NFS4ERR_SYMLINK, "SYMLINK" }, \
++ { NFS4ERR_TOOSMALL, "TOOSMALL" }, \
++ { NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \
++ { NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \
++ { NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \
++ { NFS4ERR_WRONGSEC, "WRONGSEC" }, \
++ { NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \
++ { NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \
++ { NFS4ERR_XDEV, "XDEV" }, \
++ /* ***** Internal to Linux NFS client ***** */ \
++ { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \
++ { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" })
++
++#define show_nfs4_verifier(x) \
++ __print_hex_str(x, NFS4_VERIFIER_SIZE)
++
++TRACE_DEFINE_ENUM(IOMODE_READ);
++TRACE_DEFINE_ENUM(IOMODE_RW);
++TRACE_DEFINE_ENUM(IOMODE_ANY);
++
++#define show_pnfs_layout_iomode(x) \
++ __print_symbolic(x, \
++ { IOMODE_READ, "READ" }, \
++ { IOMODE_RW, "RW" }, \
++ { IOMODE_ANY, "ANY" })
++
++#define show_rca_mask(x) \
++ __print_flags(x, "|", \
++ { BIT(RCA4_TYPE_MASK_RDATA_DLG), "RDATA_DLG" }, \
++ { BIT(RCA4_TYPE_MASK_WDATA_DLG), "WDATA_DLG" }, \
++ { BIT(RCA4_TYPE_MASK_DIR_DLG), "DIR_DLG" }, \
++ { BIT(RCA4_TYPE_MASK_FILE_LAYOUT), "FILE_LAYOUT" }, \
++ { BIT(RCA4_TYPE_MASK_BLK_LAYOUT), "BLK_LAYOUT" }, \
++ { BIT(RCA4_TYPE_MASK_OBJ_LAYOUT_MIN), "OBJ_LAYOUT_MIN" }, \
++ { BIT(RCA4_TYPE_MASK_OBJ_LAYOUT_MAX), "OBJ_LAYOUT_MAX" }, \
++ { BIT(RCA4_TYPE_MASK_OTHER_LAYOUT_MIN), "OTHER_LAYOUT_MIN" }, \
++ { BIT(RCA4_TYPE_MASK_OTHER_LAYOUT_MAX), "OTHER_LAYOUT_MAX" })
++
++#define show_nfs4_seq4_status(x) \
++ __print_flags(x, "|", \
++ { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
++ { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING, "CB_GSS_CONTEXTS_EXPIRING" }, \
++ { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED, "CB_GSS_CONTEXTS_EXPIRED" }, \
++ { SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, "EXPIRED_ALL_STATE_REVOKED" }, \
++ { SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, "EXPIRED_SOME_STATE_REVOKED" }, \
++ { SEQ4_STATUS_ADMIN_STATE_REVOKED, "ADMIN_STATE_REVOKED" }, \
++ { SEQ4_STATUS_RECALLABLE_STATE_REVOKED, "RECALLABLE_STATE_REVOKED" }, \
++ { SEQ4_STATUS_LEASE_MOVED, "LEASE_MOVED" }, \
++ { SEQ4_STATUS_RESTART_RECLAIM_NEEDED, "RESTART_RECLAIM_NEEDED" }, \
++ { SEQ4_STATUS_CB_PATH_DOWN_SESSION, "CB_PATH_DOWN_SESSION" }, \
++ { SEQ4_STATUS_BACKCHANNEL_FAULT, "BACKCHANNEL_FAULT" })
+diff --git a/include/trace/misc/rdma.h b/include/trace/misc/rdma.h
+new file mode 100644
+index 0000000000000..81bb454fc2888
+--- /dev/null
++++ b/include/trace/misc/rdma.h
+@@ -0,0 +1,168 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2017 Oracle. All rights reserved.
++ */
++
++/*
++ * enum ib_event_type, from include/rdma/ib_verbs.h
++ */
++#define IB_EVENT_LIST \
++ ib_event(CQ_ERR) \
++ ib_event(QP_FATAL) \
++ ib_event(QP_REQ_ERR) \
++ ib_event(QP_ACCESS_ERR) \
++ ib_event(COMM_EST) \
++ ib_event(SQ_DRAINED) \
++ ib_event(PATH_MIG) \
++ ib_event(PATH_MIG_ERR) \
++ ib_event(DEVICE_FATAL) \
++ ib_event(PORT_ACTIVE) \
++ ib_event(PORT_ERR) \
++ ib_event(LID_CHANGE) \
++ ib_event(PKEY_CHANGE) \
++ ib_event(SM_CHANGE) \
++ ib_event(SRQ_ERR) \
++ ib_event(SRQ_LIMIT_REACHED) \
++ ib_event(QP_LAST_WQE_REACHED) \
++ ib_event(CLIENT_REREGISTER) \
++ ib_event(GID_CHANGE) \
++ ib_event_end(WQ_FATAL)
++
++#undef ib_event
++#undef ib_event_end
++
++#define ib_event(x) TRACE_DEFINE_ENUM(IB_EVENT_##x);
++#define ib_event_end(x) TRACE_DEFINE_ENUM(IB_EVENT_##x);
++
++IB_EVENT_LIST
++
++#undef ib_event
++#undef ib_event_end
++
++#define ib_event(x) { IB_EVENT_##x, #x },
++#define ib_event_end(x) { IB_EVENT_##x, #x }
++
++#define rdma_show_ib_event(x) \
++ __print_symbolic(x, IB_EVENT_LIST)
++
++/*
++ * enum ib_wc_status type, from include/rdma/ib_verbs.h
++ */
++#define IB_WC_STATUS_LIST \
++ ib_wc_status(SUCCESS) \
++ ib_wc_status(LOC_LEN_ERR) \
++ ib_wc_status(LOC_QP_OP_ERR) \
++ ib_wc_status(LOC_EEC_OP_ERR) \
++ ib_wc_status(LOC_PROT_ERR) \
++ ib_wc_status(WR_FLUSH_ERR) \
++ ib_wc_status(MW_BIND_ERR) \
++ ib_wc_status(BAD_RESP_ERR) \
++ ib_wc_status(LOC_ACCESS_ERR) \
++ ib_wc_status(REM_INV_REQ_ERR) \
++ ib_wc_status(REM_ACCESS_ERR) \
++ ib_wc_status(REM_OP_ERR) \
++ ib_wc_status(RETRY_EXC_ERR) \
++ ib_wc_status(RNR_RETRY_EXC_ERR) \
++ ib_wc_status(LOC_RDD_VIOL_ERR) \
++ ib_wc_status(REM_INV_RD_REQ_ERR) \
++ ib_wc_status(REM_ABORT_ERR) \
++ ib_wc_status(INV_EECN_ERR) \
++ ib_wc_status(INV_EEC_STATE_ERR) \
++ ib_wc_status(FATAL_ERR) \
++ ib_wc_status(RESP_TIMEOUT_ERR) \
++ ib_wc_status_end(GENERAL_ERR)
++
++#undef ib_wc_status
++#undef ib_wc_status_end
++
++#define ib_wc_status(x) TRACE_DEFINE_ENUM(IB_WC_##x);
++#define ib_wc_status_end(x) TRACE_DEFINE_ENUM(IB_WC_##x);
++
++IB_WC_STATUS_LIST
++
++#undef ib_wc_status
++#undef ib_wc_status_end
++
++#define ib_wc_status(x) { IB_WC_##x, #x },
++#define ib_wc_status_end(x) { IB_WC_##x, #x }
++
++#define rdma_show_wc_status(x) \
++ __print_symbolic(x, IB_WC_STATUS_LIST)
++
++/*
++ * enum ib_cm_event_type, from include/rdma/ib_cm.h
++ */
++#define IB_CM_EVENT_LIST \
++ ib_cm_event(REQ_ERROR) \
++ ib_cm_event(REQ_RECEIVED) \
++ ib_cm_event(REP_ERROR) \
++ ib_cm_event(REP_RECEIVED) \
++ ib_cm_event(RTU_RECEIVED) \
++ ib_cm_event(USER_ESTABLISHED) \
++ ib_cm_event(DREQ_ERROR) \
++ ib_cm_event(DREQ_RECEIVED) \
++ ib_cm_event(DREP_RECEIVED) \
++ ib_cm_event(TIMEWAIT_EXIT) \
++ ib_cm_event(MRA_RECEIVED) \
++ ib_cm_event(REJ_RECEIVED) \
++ ib_cm_event(LAP_ERROR) \
++ ib_cm_event(LAP_RECEIVED) \
++ ib_cm_event(APR_RECEIVED) \
++ ib_cm_event(SIDR_REQ_ERROR) \
++ ib_cm_event(SIDR_REQ_RECEIVED) \
++ ib_cm_event_end(SIDR_REP_RECEIVED)
++
++#undef ib_cm_event
++#undef ib_cm_event_end
++
++#define ib_cm_event(x) TRACE_DEFINE_ENUM(IB_CM_##x);
++#define ib_cm_event_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
++
++IB_CM_EVENT_LIST
++
++#undef ib_cm_event
++#undef ib_cm_event_end
++
++#define ib_cm_event(x) { IB_CM_##x, #x },
++#define ib_cm_event_end(x) { IB_CM_##x, #x }
++
++#define rdma_show_ib_cm_event(x) \
++ __print_symbolic(x, IB_CM_EVENT_LIST)
++
++/*
++ * enum rdma_cm_event_type, from include/rdma/rdma_cm.h
++ */
++#define RDMA_CM_EVENT_LIST \
++ rdma_cm_event(ADDR_RESOLVED) \
++ rdma_cm_event(ADDR_ERROR) \
++ rdma_cm_event(ROUTE_RESOLVED) \
++ rdma_cm_event(ROUTE_ERROR) \
++ rdma_cm_event(CONNECT_REQUEST) \
++ rdma_cm_event(CONNECT_RESPONSE) \
++ rdma_cm_event(CONNECT_ERROR) \
++ rdma_cm_event(UNREACHABLE) \
++ rdma_cm_event(REJECTED) \
++ rdma_cm_event(ESTABLISHED) \
++ rdma_cm_event(DISCONNECTED) \
++ rdma_cm_event(DEVICE_REMOVAL) \
++ rdma_cm_event(MULTICAST_JOIN) \
++ rdma_cm_event(MULTICAST_ERROR) \
++ rdma_cm_event(ADDR_CHANGE) \
++ rdma_cm_event_end(TIMEWAIT_EXIT)
++
++#undef rdma_cm_event
++#undef rdma_cm_event_end
++
++#define rdma_cm_event(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x);
++#define rdma_cm_event_end(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x);
++
++RDMA_CM_EVENT_LIST
++
++#undef rdma_cm_event
++#undef rdma_cm_event_end
++
++#define rdma_cm_event(x) { RDMA_CM_EVENT_##x, #x },
++#define rdma_cm_event_end(x) { RDMA_CM_EVENT_##x, #x }
++
++#define rdma_show_cm_event(x) \
++ __print_symbolic(x, RDMA_CM_EVENT_LIST)
+diff --git a/include/trace/misc/sunrpc.h b/include/trace/misc/sunrpc.h
+new file mode 100644
+index 0000000000000..588557d07ea82
+--- /dev/null
++++ b/include/trace/misc/sunrpc.h
+@@ -0,0 +1,18 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2021 Oracle and/or its affiliates.
++ *
++ * Common types and format specifiers for sunrpc.
++ */
++
++#if !defined(_TRACE_SUNRPC_BASE_H)
++#define _TRACE_SUNRPC_BASE_H
++
++#include <linux/tracepoint.h>
++
++#define SUNRPC_TRACE_PID_SPECIFIER "%08x"
++#define SUNRPC_TRACE_CLID_SPECIFIER "%08x"
++#define SUNRPC_TRACE_TASK_SPECIFIER \
++ "task:" SUNRPC_TRACE_PID_SPECIFIER "@" SUNRPC_TRACE_CLID_SPECIFIER
++
++#endif /* _TRACE_SUNRPC_BASE_H */
+diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
+index 201dc77ebbd77..d5d2183730b9f 100644
+--- a/include/uapi/linux/bpf.h
++++ b/include/uapi/linux/bpf.h
+@@ -3109,6 +3109,10 @@ union bpf_attr {
+ * **BPF_FIB_LOOKUP_DIRECT**
+ * Do a direct table lookup vs full lookup using FIB
+ * rules.
++ * **BPF_FIB_LOOKUP_TBID**
++ * Used with BPF_FIB_LOOKUP_DIRECT.
++ * Use the routing table ID present in *params*->tbid
++ * for the fib lookup.
+ * **BPF_FIB_LOOKUP_OUTPUT**
+ * Perform lookup from an egress perspective (default is
+ * ingress).
+@@ -3117,6 +3121,11 @@ union bpf_attr {
+ * and *params*->smac will not be set as output. A common
+ * use case is to call **bpf_redirect_neigh**\ () after
+ * doing **bpf_fib_lookup**\ ().
++ * **BPF_FIB_LOOKUP_SRC**
++ * Derive and set source IP addr in *params*->ipv{4,6}_src
++ * for the nexthop. If the src addr cannot be derived,
++ * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
++ * case, *params*->dmac and *params*->smac are not set either.
+ *
+ * *ctx* is either **struct xdp_md** for XDP programs or
+ * **struct sk_buff** tc cls_act programs.
+@@ -6687,6 +6696,8 @@ enum {
+ BPF_FIB_LOOKUP_DIRECT = (1U << 0),
+ BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
+ BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
++ BPF_FIB_LOOKUP_TBID = (1U << 3),
++ BPF_FIB_LOOKUP_SRC = (1U << 4),
+ };
+
+ enum {
+@@ -6699,6 +6710,7 @@ enum {
+ BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
+ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
+ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
++ BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
+ };
+
+ struct bpf_fib_lookup {
+@@ -6733,6 +6745,9 @@ struct bpf_fib_lookup {
+ __u32 rt_metric;
+ };
+
++ /* input: source address to consider for lookup
++ * output: source address result from lookup
++ */
+ union {
+ __be32 ipv4_src;
+ __u32 ipv6_src[4]; /* in6_addr; network order */
+@@ -6747,9 +6762,19 @@ struct bpf_fib_lookup {
+ __u32 ipv6_dst[4]; /* in6_addr; network order */
+ };
+
+- /* output */
+- __be16 h_vlan_proto;
+- __be16 h_vlan_TCI;
++ union {
++ struct {
++ /* output */
++ __be16 h_vlan_proto;
++ __be16 h_vlan_TCI;
++ };
++ /* input: when accompanied with the
++ * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a
++ * specific routing table to use for the fib lookup.
++ */
++ __u32 tbid;
++ };
++
+ __u8 smac[6]; /* ETH_ALEN */
+ __u8 dmac[6]; /* ETH_ALEN */
+ };
+diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
+index c4c53a9ab9595..ff8d21f9e95b7 100644
+--- a/include/uapi/linux/in6.h
++++ b/include/uapi/linux/in6.h
+@@ -145,7 +145,7 @@ struct in6_flowlabel_req {
+ #define IPV6_TLV_PADN 1
+ #define IPV6_TLV_ROUTERALERT 5
+ #define IPV6_TLV_CALIPSO 7 /* RFC 5570 */
+-#define IPV6_TLV_IOAM 49 /* TEMPORARY IANA allocation for IOAM */
++#define IPV6_TLV_IOAM 49 /* RFC 9486 */
+ #define IPV6_TLV_JUMBO 194
+ #define IPV6_TLV_HAO 201 /* home address option */
+
+diff --git a/lib/nlattr.c b/lib/nlattr.c
+index dffd60e4065fd..86344df0ccf7b 100644
+--- a/lib/nlattr.c
++++ b/lib/nlattr.c
+@@ -30,6 +30,8 @@ static const u8 nla_attr_len[NLA_TYPE_MAX+1] = {
+ [NLA_S16] = sizeof(s16),
+ [NLA_S32] = sizeof(s32),
+ [NLA_S64] = sizeof(s64),
++ [NLA_BE16] = sizeof(__be16),
++ [NLA_BE32] = sizeof(__be32),
+ };
+
+ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = {
+@@ -43,6 +45,8 @@ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = {
+ [NLA_S16] = sizeof(s16),
+ [NLA_S32] = sizeof(s32),
+ [NLA_S64] = sizeof(s64),
++ [NLA_BE16] = sizeof(__be16),
++ [NLA_BE32] = sizeof(__be32),
+ };
+
+ /*
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index 59577946735b1..9736e762184bd 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -37,6 +37,7 @@
+ #include <linux/page_owner.h>
+ #include <linux/sched/sysctl.h>
+ #include <linux/memory-tiers.h>
++#include <linux/compat.h>
+
+ #include <asm/tlb.h>
+ #include <asm/pgalloc.h>
+@@ -607,6 +608,9 @@ static unsigned long __thp_get_unmapped_area(struct file *filp,
+ loff_t off_align = round_up(off, size);
+ unsigned long len_pad, ret;
+
++ if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall())
++ return 0;
++
+ if (off_end <= off_align || (off_end - off_align) < size)
+ return 0;
+
+diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
+index 6a1db678d032f..a8932d449eb63 100644
+--- a/net/bluetooth/hci_core.c
++++ b/net/bluetooth/hci_core.c
+@@ -1049,6 +1049,7 @@ static void hci_error_reset(struct work_struct *work)
+ {
+ struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset);
+
++ hci_dev_hold(hdev);
+ BT_DBG("%s", hdev->name);
+
+ if (hdev->hw_error)
+@@ -1056,10 +1057,10 @@ static void hci_error_reset(struct work_struct *work)
+ else
+ bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code);
+
+- if (hci_dev_do_close(hdev))
+- return;
++ if (!hci_dev_do_close(hdev))
++ hci_dev_do_open(hdev);
+
+- hci_dev_do_open(hdev);
++ hci_dev_put(hdev);
+ }
+
+ void hci_uuids_clear(struct hci_dev *hdev)
+diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
+index 56ecc5f97b916..452d839c152fc 100644
+--- a/net/bluetooth/hci_event.c
++++ b/net/bluetooth/hci_event.c
+@@ -5282,9 +5282,12 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data,
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+- if (!conn || !hci_conn_ssp_enabled(conn))
++ if (!conn || !hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
+ goto unlock;
+
++ /* Assume remote supports SSP since it has triggered this event */
++ set_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
++
+ hci_conn_hold(conn);
+
+ if (!hci_dev_test_flag(hdev, HCI_MGMT))
+@@ -6716,6 +6719,10 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data,
+ return send_conn_param_neg_reply(hdev, handle,
+ HCI_ERROR_UNKNOWN_CONN_ID);
+
++ if (max > hcon->le_conn_max_interval)
++ return send_conn_param_neg_reply(hdev, handle,
++ HCI_ERROR_INVALID_LL_PARAMS);
++
+ if (hci_check_conn_params(min, max, latency, timeout))
+ return send_conn_param_neg_reply(hdev, handle,
+ HCI_ERROR_INVALID_LL_PARAMS);
+@@ -7245,10 +7252,10 @@ static void hci_store_wake_reason(struct hci_dev *hdev, u8 event,
+ * keep track of the bdaddr of the connection event that woke us up.
+ */
+ if (event == HCI_EV_CONN_REQUEST) {
+- bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
++ bacpy(&hdev->wake_addr, &conn_request->bdaddr);
+ hdev->wake_addr_type = BDADDR_BREDR;
+ } else if (event == HCI_EV_CONN_COMPLETE) {
+- bacpy(&hdev->wake_addr, &conn_request->bdaddr);
++ bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
+ hdev->wake_addr_type = BDADDR_BREDR;
+ } else if (event == HCI_EV_LE_META) {
+ struct hci_ev_le_meta *le_ev = (void *)skb->data;
+diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
+index 45d19294aa772..a337340464567 100644
+--- a/net/bluetooth/hci_sync.c
++++ b/net/bluetooth/hci_sync.c
+@@ -2251,8 +2251,11 @@ static int hci_le_add_accept_list_sync(struct hci_dev *hdev,
+
+ /* During suspend, only wakeable devices can be in acceptlist */
+ if (hdev->suspended &&
+- !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP))
++ !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) {
++ hci_le_del_accept_list_sync(hdev, &params->addr,
++ params->addr_type);
+ return 0;
++ }
+
+ /* Select filter policy to accept all advertising */
+ if (*num_entries >= hdev->le_accept_list_size)
+@@ -5482,7 +5485,7 @@ static int hci_inquiry_sync(struct hci_dev *hdev, u8 length)
+
+ bt_dev_dbg(hdev, "");
+
+- if (hci_dev_test_flag(hdev, HCI_INQUIRY))
++ if (test_bit(HCI_INQUIRY, &hdev->flags))
+ return 0;
+
+ hci_dev_lock(hdev);
+diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
+index 81f5974e5eb5a..b4cba55be5ad9 100644
+--- a/net/bluetooth/l2cap_core.c
++++ b/net/bluetooth/l2cap_core.c
+@@ -5614,7 +5614,13 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
+
+ memset(&rsp, 0, sizeof(rsp));
+
+- err = hci_check_conn_params(min, max, latency, to_multiplier);
++ if (max > hcon->le_conn_max_interval) {
++ BT_DBG("requested connection interval exceeds current bounds.");
++ err = -EINVAL;
++ } else {
++ err = hci_check_conn_params(min, max, latency, to_multiplier);
++ }
++
+ if (err)
+ rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED);
+ else
+diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
+index 202ad43e35d6b..bff48d5763635 100644
+--- a/net/bridge/br_netfilter_hooks.c
++++ b/net/bridge/br_netfilter_hooks.c
+@@ -43,6 +43,10 @@
+ #include <linux/sysctl.h>
+ #endif
+
++#if IS_ENABLED(CONFIG_NF_CONNTRACK)
++#include <net/netfilter/nf_conntrack_core.h>
++#endif
++
+ static unsigned int brnf_net_id __read_mostly;
+
+ struct brnf_net {
+@@ -553,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv,
+ return NF_STOLEN;
+ }
+
++#if IS_ENABLED(CONFIG_NF_CONNTRACK)
++/* conntracks' nf_confirm logic cannot handle cloned skbs referencing
++ * the same nf_conn entry, which will happen for multicast (broadcast)
++ * Frames on bridges.
++ *
++ * Example:
++ * macvlan0
++ * br0
++ * ethX ethY
++ *
++ * ethX (or Y) receives multicast or broadcast packet containing
++ * an IP packet, not yet in conntrack table.
++ *
++ * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting.
++ * -> skb->_nfct now references a unconfirmed entry
++ * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge
++ * interface.
++ * 3. skb gets passed up the stack.
++ * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb
++ * and schedules a work queue to send them out on the lower devices.
++ *
++ * The clone skb->_nfct is not a copy, it is the same entry as the
++ * original skb. The macvlan rx handler then returns RX_HANDLER_PASS.
++ * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb.
++ *
++ * The Macvlan broadcast worker and normal confirm path will race.
++ *
++ * This race will not happen if step 2 already confirmed a clone. In that
++ * case later steps perform skb_clone() with skb->_nfct already confirmed (in
++ * hash table). This works fine.
++ *
++ * But such confirmation won't happen when eb/ip/nftables rules dropped the
++ * packets before they reached the nf_confirm step in postrouting.
++ *
++ * Work around this problem by explicit confirmation of the entry at
++ * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed
++ * entry.
++ *
++ */
++static unsigned int br_nf_local_in(void *priv,
++ struct sk_buff *skb,
++ const struct nf_hook_state *state)
++{
++ struct nf_conntrack *nfct = skb_nfct(skb);
++ const struct nf_ct_hook *ct_hook;
++ struct nf_conn *ct;
++ int ret;
++
++ if (!nfct || skb->pkt_type == PACKET_HOST)
++ return NF_ACCEPT;
++
++ ct = container_of(nfct, struct nf_conn, ct_general);
++ if (likely(nf_ct_is_confirmed(ct)))
++ return NF_ACCEPT;
++
++ WARN_ON_ONCE(skb_shared(skb));
++ WARN_ON_ONCE(refcount_read(&nfct->use) != 1);
++
++ /* We can't call nf_confirm here, it would create a dependency
++ * on nf_conntrack module.
++ */
++ ct_hook = rcu_dereference(nf_ct_hook);
++ if (!ct_hook) {
++ skb->_nfct = 0ul;
++ nf_conntrack_put(nfct);
++ return NF_ACCEPT;
++ }
++
++ nf_bridge_pull_encap_header(skb);
++ ret = ct_hook->confirm(skb);
++ switch (ret & NF_VERDICT_MASK) {
++ case NF_STOLEN:
++ return NF_STOLEN;
++ default:
++ nf_bridge_push_encap_header(skb);
++ break;
++ }
++
++ ct = container_of(nfct, struct nf_conn, ct_general);
++ WARN_ON_ONCE(!nf_ct_is_confirmed(ct));
++
++ return ret;
++}
++#endif
+
+ /* PF_BRIDGE/FORWARD *************************************************/
+ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+@@ -962,6 +1050,14 @@ static const struct nf_hook_ops br_nf_ops[] = {
+ .hooknum = NF_BR_PRE_ROUTING,
+ .priority = NF_BR_PRI_BRNF,
+ },
++#if IS_ENABLED(CONFIG_NF_CONNTRACK)
++ {
++ .hook = br_nf_local_in,
++ .pf = NFPROTO_BRIDGE,
++ .hooknum = NF_BR_LOCAL_IN,
++ .priority = NF_BR_PRI_LAST,
++ },
++#endif
+ {
+ .hook = br_nf_forward_ip,
+ .pf = NFPROTO_BRIDGE,
+diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
+index 06d94b2c6b5de..c7c27ada67044 100644
+--- a/net/bridge/netfilter/nf_conntrack_bridge.c
++++ b/net/bridge/netfilter/nf_conntrack_bridge.c
+@@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
+ return nf_conntrack_in(skb, &bridge_state);
+ }
+
++static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
++ const struct nf_hook_state *state)
++{
++ enum ip_conntrack_info ctinfo;
++ struct nf_conn *ct;
++
++ if (skb->pkt_type == PACKET_HOST)
++ return NF_ACCEPT;
++
++ /* nf_conntrack_confirm() cannot handle concurrent clones,
++ * this happens for broad/multicast frames with e.g. macvlan on top
++ * of the bridge device.
++ */
++ ct = nf_ct_get(skb, &ctinfo);
++ if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
++ return NF_ACCEPT;
++
++ /* let inet prerouting call conntrack again */
++ skb->_nfct = 0;
++ nf_ct_put(ct);
++
++ return NF_ACCEPT;
++}
++
+ static void nf_ct_bridge_frag_save(struct sk_buff *skb,
+ struct nf_bridge_frag_data *data)
+ {
+@@ -415,6 +439,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
+ .hooknum = NF_BR_PRE_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK,
+ },
++ {
++ .hook = nf_ct_bridge_in,
++ .pf = NFPROTO_BRIDGE,
++ .hooknum = NF_BR_LOCAL_IN,
++ .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
++ },
+ {
+ .hook = nf_ct_bridge_post,
+ .pf = NFPROTO_BRIDGE,
+diff --git a/net/core/filter.c b/net/core/filter.c
+index 3a6110ea4009f..cb7c4651eaec8 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -5752,6 +5752,12 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
+ u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
+ struct fib_table *tb;
+
++ if (flags & BPF_FIB_LOOKUP_TBID) {
++ tbid = params->tbid;
++ /* zero out for vlan output */
++ params->tbid = 0;
++ }
++
+ tb = fib_get_table(net, tbid);
+ if (unlikely(!tb))
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
+@@ -5803,6 +5809,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
+ params->rt_metric = res.fi->fib_priority;
+ params->ifindex = dev->ifindex;
+
++ if (flags & BPF_FIB_LOOKUP_SRC)
++ params->ipv4_src = fib_result_prefsrc(net, &res);
++
+ /* xdp and cls_bpf programs are run in RCU-bh so
+ * rcu_read_lock_bh is not needed here
+ */
+@@ -5885,6 +5894,12 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
+ u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
+ struct fib6_table *tb;
+
++ if (flags & BPF_FIB_LOOKUP_TBID) {
++ tbid = params->tbid;
++ /* zero out for vlan output */
++ params->tbid = 0;
++ }
++
+ tb = ipv6_stub->fib6_get_table(net, tbid);
+ if (unlikely(!tb))
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
+@@ -5939,6 +5954,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
+ params->rt_metric = res.f6i->fib6_metric;
+ params->ifindex = dev->ifindex;
+
++ if (flags & BPF_FIB_LOOKUP_SRC) {
++ if (res.f6i->fib6_prefsrc.plen) {
++ *src = res.f6i->fib6_prefsrc.addr;
++ } else {
++ err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev,
++ &fl6.daddr, 0,
++ src);
++ if (err)
++ return BPF_FIB_LKUP_RET_NO_SRC_ADDR;
++ }
++ }
++
+ if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
+ goto set_fwd_params;
+
+@@ -5957,7 +5984,8 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
+ #endif
+
+ #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
+- BPF_FIB_LOOKUP_SKIP_NEIGH)
++ BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
++ BPF_FIB_LOOKUP_SRC)
+
+ BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
+ struct bpf_fib_lookup *, params, int, plen, u32, flags)
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index 7cf1e42d7f93b..ac379e4590f8d 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -5026,10 +5026,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct net *net = sock_net(skb->sk);
+ struct ifinfomsg *ifm;
+ struct net_device *dev;
+- struct nlattr *br_spec, *attr = NULL;
++ struct nlattr *br_spec, *attr, *br_flags_attr = NULL;
+ int rem, err = -EOPNOTSUPP;
+ u16 flags = 0;
+- bool have_flags = false;
+
+ if (nlmsg_len(nlh) < sizeof(*ifm))
+ return -EINVAL;
+@@ -5047,11 +5046,11 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+ if (br_spec) {
+ nla_for_each_nested(attr, br_spec, rem) {
+- if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) {
++ if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) {
+ if (nla_len(attr) < sizeof(flags))
+ return -EINVAL;
+
+- have_flags = true;
++ br_flags_attr = attr;
+ flags = nla_get_u16(attr);
+ }
+
+@@ -5095,8 +5094,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ }
+ }
+
+- if (have_flags)
+- memcpy(nla_data(attr), &flags, sizeof(flags));
++ if (br_flags_attr)
++ memcpy(nla_data(br_flags_attr), &flags, sizeof(flags));
+ out:
+ return err;
+ }
+diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
+index 80cdc6f6b34c9..0323ab5023c69 100644
+--- a/net/hsr/hsr_forward.c
++++ b/net/hsr/hsr_forward.c
+@@ -83,7 +83,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb)
+ return false;
+
+ /* Get next tlv */
+- total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tag->tlv.HSR_TLV_length;
++ total_length += hsr_sup_tag->tlv.HSR_TLV_length;
+ if (!pskb_may_pull(skb, total_length))
+ return false;
+ skb_pull(skb, total_length);
+diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
+index 24961b304dad0..328f9068c6a43 100644
+--- a/net/ipv4/ip_tunnel.c
++++ b/net/ipv4/ip_tunnel.c
+@@ -540,6 +540,20 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
+ return 0;
+ }
+
++static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
++{
++ /* we must cap headroom to some upperlimit, else pskb_expand_head
++ * will overflow header offsets in skb_headers_offset_update().
++ */
++ static const unsigned int max_allowed = 512;
++
++ if (headroom > max_allowed)
++ headroom = max_allowed;
++
++ if (headroom > READ_ONCE(dev->needed_headroom))
++ WRITE_ONCE(dev->needed_headroom, headroom);
++}
++
+ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+ u8 proto, int tunnel_hlen)
+ {
+@@ -614,13 +628,13 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+ }
+
+ headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
+- if (headroom > READ_ONCE(dev->needed_headroom))
+- WRITE_ONCE(dev->needed_headroom, headroom);
+-
+- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
++ if (skb_cow_head(skb, headroom)) {
+ ip_rt_put(rt);
+ goto tx_dropped;
+ }
++
++ ip_tunnel_adj_headroom(dev, headroom);
++
+ iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
+ df, !net_eq(tunnel->net, dev_net(dev)));
+ return;
+@@ -800,16 +814,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+
+ max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
+- if (max_headroom > READ_ONCE(dev->needed_headroom))
+- WRITE_ONCE(dev->needed_headroom, max_headroom);
+
+- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
++ if (skb_cow_head(skb, max_headroom)) {
+ ip_rt_put(rt);
+ dev->stats.tx_dropped++;
+ kfree_skb(skb);
+ return;
+ }
+
++ ip_tunnel_adj_headroom(dev, max_headroom);
++
+ iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
+ df, !net_eq(tunnel->net, dev_net(dev)));
+ return;
+diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
+index 4073762996e22..fc761915c5f6f 100644
+--- a/net/ipv4/netfilter/nf_reject_ipv4.c
++++ b/net/ipv4/netfilter/nf_reject_ipv4.c
+@@ -279,6 +279,7 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb,
+ goto free_nskb;
+
+ nf_ct_attach(nskb, oldskb);
++ nf_ct_set_closing(skb_nfct(oldskb));
+
+ #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ /* If we use ip_local_out for bridged traffic, the MAC source on
+diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
+index 46527b5cc8f0c..1648373692a99 100644
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -5473,9 +5473,10 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ }
+
+ addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
+- if (!addr)
+- return -EINVAL;
+-
++ if (!addr) {
++ err = -EINVAL;
++ goto errout;
++ }
+ ifm = nlmsg_data(nlh);
+ if (ifm->ifa_index)
+ dev = dev_get_by_index(tgt_net, ifm->ifa_index);
+diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
+index 0b42eb8c55aaf..62247621cea52 100644
+--- a/net/ipv6/af_inet6.c
++++ b/net/ipv6/af_inet6.c
+@@ -1077,6 +1077,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
+ .udp6_lib_lookup = __udp6_lib_lookup,
+ .ipv6_setsockopt = do_ipv6_setsockopt,
+ .ipv6_getsockopt = do_ipv6_getsockopt,
++ .ipv6_dev_get_saddr = ipv6_dev_get_saddr,
+ };
+
+ static int __init inet6_init(void)
+diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
+index 433d98bbe33f7..71d692728230e 100644
+--- a/net/ipv6/netfilter/nf_reject_ipv6.c
++++ b/net/ipv6/netfilter/nf_reject_ipv6.c
+@@ -344,6 +344,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
+ nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen);
+
+ nf_ct_attach(nskb, oldskb);
++ nf_ct_set_closing(skb_nfct(oldskb));
+
+ #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ /* If we use ip6_local_out for bridged traffic, the MAC source on
+diff --git a/net/mctp/route.c b/net/mctp/route.c
+index 256bf0b89e6ca..0144d8ebdaefb 100644
+--- a/net/mctp/route.c
++++ b/net/mctp/route.c
+@@ -888,7 +888,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
+ dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex);
+ if (!dev) {
+ rcu_read_unlock();
+- return rc;
++ goto out_free;
+ }
+ rt->dev = __mctp_dev_get(dev);
+ rcu_read_unlock();
+@@ -903,7 +903,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
+ rt->mtu = 0;
+
+ } else {
+- return -EINVAL;
++ rc = -EINVAL;
++ goto out_free;
+ }
+
+ spin_lock_irqsave(&rt->dev->addrs_lock, flags);
+@@ -966,12 +967,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
+ rc = mctp_do_fragment_route(rt, skb, mtu, tag);
+ }
+
++ /* route output functions consume the skb, even on error */
++ skb = NULL;
++
+ out_release:
+ if (!ext_rt)
+ mctp_route_release(rt);
+
+ mctp_dev_put(tmp_rt.dev);
+
++out_free:
++ kfree_skb(skb);
+ return rc;
+ }
+
+diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c
+index e57c5f47f0351..7017dd60659dc 100644
+--- a/net/mptcp/diag.c
++++ b/net/mptcp/diag.c
+@@ -21,6 +21,9 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb)
+ bool slow;
+ int err;
+
++ if (inet_sk_state_load(sk) == TCP_LISTEN)
++ return 0;
++
+ start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP);
+ if (!start)
+ return -EMSGSIZE;
+@@ -65,7 +68,7 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb)
+ sf->map_data_len) ||
+ nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) ||
+ nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) ||
+- nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) {
++ nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) {
+ err = -EMSGSIZE;
+ goto nla_failure;
+ }
+diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
+index 70a1025f093cf..3328870b0c1f8 100644
+--- a/net/mptcp/pm_netlink.c
++++ b/net/mptcp/pm_netlink.c
+@@ -407,23 +407,12 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
+ }
+ }
+
+-static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr,
+- const struct mptcp_addr_info *addr)
+-{
+- int i;
+-
+- for (i = 0; i < nr; i++) {
+- if (addrs[i].id == addr->id)
+- return true;
+- }
+-
+- return false;
+-}
+-
+ /* Fill all the remote addresses into the array addrs[],
+ * and return the array size.
+ */
+-static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh,
++static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk,
++ struct mptcp_addr_info *local,
++ bool fullmesh,
+ struct mptcp_addr_info *addrs)
+ {
+ bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0);
+@@ -446,15 +435,28 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
+ msk->pm.subflows++;
+ addrs[i++] = remote;
+ } else {
++ DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
++
++ /* Forbid creation of new subflows matching existing
++ * ones, possibly already created by incoming ADD_ADDR
++ */
++ bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
++ mptcp_for_each_subflow(msk, subflow)
++ if (READ_ONCE(subflow->local_id) == local->id)
++ __set_bit(subflow->remote_id, unavail_id);
++
+ mptcp_for_each_subflow(msk, subflow) {
+ ssk = mptcp_subflow_tcp_sock(subflow);
+ remote_address((struct sock_common *)ssk, &addrs[i]);
+- addrs[i].id = subflow->remote_id;
++ addrs[i].id = READ_ONCE(subflow->remote_id);
+ if (deny_id0 && !addrs[i].id)
+ continue;
+
+- if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
+- msk->pm.subflows < subflows_max) {
++ if (msk->pm.subflows < subflows_max) {
++ /* forbid creating multiple address towards
++ * this id
++ */
++ __set_bit(addrs[i].id, unavail_id);
+ msk->pm.subflows++;
+ i++;
+ }
+@@ -603,7 +605,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
+ fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
+
+ msk->pm.local_addr_used++;
+- nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
++ nr = fill_remote_addresses_vec(msk, &local->addr, fullmesh, addrs);
+ if (nr)
+ __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
+ spin_unlock_bh(&msk->pm.lock);
+@@ -798,18 +800,18 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
+
+ mptcp_for_each_subflow_safe(msk, subflow, tmp) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
++ u8 remote_id = READ_ONCE(subflow->remote_id);
+ int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
+- u8 id = subflow->local_id;
++ u8 id = subflow_get_local_id(subflow);
+
+- if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id)
++ if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id)
+ continue;
+ if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id))
+ continue;
+
+ pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u",
+ rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow",
+- i, rm_id, subflow->local_id, subflow->remote_id,
+- msk->mpc_endpoint_id);
++ i, rm_id, id, remote_id, msk->mpc_endpoint_id);
+ spin_unlock_bh(&msk->pm.lock);
+ mptcp_subflow_shutdown(sk, ssk, how);
+
+@@ -2028,7 +2030,7 @@ static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk)
+ if (WARN_ON_ONCE(!sf))
+ return -EINVAL;
+
+- if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id))
++ if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf)))
+ return -EMSGSIZE;
+
+ if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id))
+diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
+index 631fa104617c3..414ed70e7ba2e 100644
+--- a/net/mptcp/pm_userspace.c
++++ b/net/mptcp/pm_userspace.c
+@@ -233,7 +233,7 @@ static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk,
+
+ lock_sock(sk);
+ mptcp_for_each_subflow(msk, subflow) {
+- if (subflow->local_id == 0) {
++ if (READ_ONCE(subflow->local_id) == 0) {
+ has_id_0 = true;
+ break;
+ }
+@@ -489,6 +489,16 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
+ goto destroy_err;
+ }
+
++#if IS_ENABLED(CONFIG_MPTCP_IPV6)
++ if (addr_l.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) {
++ ipv6_addr_set_v4mapped(addr_l.addr.s_addr, &addr_l.addr6);
++ addr_l.family = AF_INET6;
++ }
++ if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr6)) {
++ ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_r.addr6);
++ addr_r.family = AF_INET6;
++ }
++#endif
+ if (addr_l.family != addr_r.family) {
+ GENL_SET_ERR_MSG(info, "address families do not match");
+ err = -EINVAL;
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 859b18cb8e4f6..3bc21581486ae 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -119,7 +119,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
+ subflow->request_mptcp = 1;
+
+ /* This is the first subflow, always with id 0 */
+- subflow->local_id_valid = 1;
++ WRITE_ONCE(subflow->local_id, 0);
+ mptcp_sock_graft(msk->first, sk->sk_socket);
+
+ return 0;
+@@ -1319,6 +1319,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
+ mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
+ if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) {
+ TCP_SKB_CB(skb)->eor = 1;
++ tcp_mark_push(tcp_sk(ssk), skb);
+ goto alloc_skb;
+ }
+
+@@ -2440,6 +2441,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
+ need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
+ if (!dispose_it) {
+ __mptcp_subflow_disconnect(ssk, subflow, flags);
++ if (msk->subflow && ssk == msk->subflow->sk)
++ msk->subflow->state = SS_UNCONNECTED;
+ release_sock(ssk);
+
+ goto out;
+@@ -3166,8 +3169,50 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
+
+ return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
+ }
++
++static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk)
++{
++ const struct ipv6_pinfo *np = inet6_sk(sk);
++ struct ipv6_txoptions *opt;
++ struct ipv6_pinfo *newnp;
++
++ newnp = inet6_sk(newsk);
++
++ rcu_read_lock();
++ opt = rcu_dereference(np->opt);
++ if (opt) {
++ opt = ipv6_dup_options(newsk, opt);
++ if (!opt)
++ net_warn_ratelimited("%s: Failed to copy ip6 options\n", __func__);
++ }
++ RCU_INIT_POINTER(newnp->opt, opt);
++ rcu_read_unlock();
++}
+ #endif
+
++static void mptcp_copy_ip_options(struct sock *newsk, const struct sock *sk)
++{
++ struct ip_options_rcu *inet_opt, *newopt = NULL;
++ const struct inet_sock *inet = inet_sk(sk);
++ struct inet_sock *newinet;
++
++ newinet = inet_sk(newsk);
++
++ rcu_read_lock();
++ inet_opt = rcu_dereference(inet->inet_opt);
++ if (inet_opt) {
++ newopt = sock_kmalloc(newsk, sizeof(*inet_opt) +
++ inet_opt->opt.optlen, GFP_ATOMIC);
++ if (newopt)
++ memcpy(newopt, inet_opt, sizeof(*inet_opt) +
++ inet_opt->opt.optlen);
++ else
++ net_warn_ratelimited("%s: Failed to copy ip options\n", __func__);
++ }
++ RCU_INIT_POINTER(newinet->inet_opt, newopt);
++ rcu_read_unlock();
++}
++
+ struct sock *mptcp_sk_clone_init(const struct sock *sk,
+ const struct mptcp_options_received *mp_opt,
+ struct sock *ssk,
+@@ -3188,6 +3233,13 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
+
+ __mptcp_init_sock(nsk);
+
++#if IS_ENABLED(CONFIG_MPTCP_IPV6)
++ if (nsk->sk_family == AF_INET6)
++ mptcp_copy_ip6_options(nsk, sk);
++ else
++#endif
++ mptcp_copy_ip_options(nsk, sk);
++
+ msk = mptcp_sk(nsk);
+ msk->local_key = subflow_req->local_key;
+ msk->token = subflow_req->token;
+@@ -3200,7 +3252,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
+ msk->write_seq = subflow_req->idsn + 1;
+ msk->snd_nxt = msk->write_seq;
+ msk->snd_una = msk->write_seq;
+- msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
++ msk->wnd_end = msk->snd_nxt + tcp_sk(ssk)->snd_wnd;
+ msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
+
+ if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) {
+diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
+index b092205213234..2bc37773e7803 100644
+--- a/net/mptcp/protocol.h
++++ b/net/mptcp/protocol.h
+@@ -475,7 +475,6 @@ struct mptcp_subflow_context {
+ can_ack : 1, /* only after processing the remote a key */
+ disposable : 1, /* ctx can be free at ulp release time */
+ stale : 1, /* unable to snd/rcv data, do not use for xmit */
+- local_id_valid : 1, /* local_id is correctly initialized */
+ valid_csum_seen : 1; /* at least one csum validated */
+ enum mptcp_data_avail data_avail;
+ u32 remote_nonce;
+@@ -483,7 +482,7 @@ struct mptcp_subflow_context {
+ u32 local_nonce;
+ u32 remote_token;
+ u8 hmac[MPTCPOPT_HMAC_LEN];
+- u8 local_id;
++ s16 local_id; /* if negative not initialized yet */
+ u8 remote_id;
+ u8 reset_seen:1;
+ u8 reset_transient:1;
+@@ -529,6 +528,7 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
+ {
+ memset(&subflow->reset, 0, sizeof(subflow->reset));
+ subflow->request_mptcp = 1;
++ WRITE_ONCE(subflow->local_id, -1);
+ }
+
+ static inline u64
+@@ -909,6 +909,15 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
+ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
+ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
+
++static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow)
++{
++ int local_id = READ_ONCE(subflow->local_id);
++
++ if (local_id < 0)
++ return 0;
++ return local_id;
++}
++
+ void __init mptcp_pm_nl_init(void);
+ void mptcp_pm_nl_work(struct mptcp_sock *msk);
+ void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 45d20e20cfc00..891c2f4fed080 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -446,7 +446,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
+ subflow->backup = mp_opt.backup;
+ subflow->thmac = mp_opt.thmac;
+ subflow->remote_nonce = mp_opt.nonce;
+- subflow->remote_id = mp_opt.join_id;
++ WRITE_ONCE(subflow->remote_id, mp_opt.join_id);
+ pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
+ subflow, subflow->thmac, subflow->remote_nonce,
+ subflow->backup);
+@@ -489,8 +489,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
+
+ static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id)
+ {
+- subflow->local_id = local_id;
+- subflow->local_id_valid = 1;
++ WARN_ON_ONCE(local_id < 0 || local_id > 255);
++ WRITE_ONCE(subflow->local_id, local_id);
+ }
+
+ static int subflow_chk_local_id(struct sock *sk)
+@@ -499,7 +499,7 @@ static int subflow_chk_local_id(struct sock *sk)
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+ int err;
+
+- if (likely(subflow->local_id_valid))
++ if (likely(subflow->local_id >= 0))
+ return 0;
+
+ err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
+@@ -1477,7 +1477,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
+ pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk,
+ remote_token, local_id, remote_id);
+ subflow->remote_token = remote_token;
+- subflow->remote_id = remote_id;
++ WRITE_ONCE(subflow->remote_id, remote_id);
+ subflow->request_join = 1;
+ subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
+ mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
+@@ -1630,6 +1630,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
+ pr_debug("subflow=%p", ctx);
+
+ ctx->tcp_sock = sk;
++ WRITE_ONCE(ctx->local_id, -1);
+
+ return ctx;
+ }
+@@ -1867,13 +1868,13 @@ static void subflow_ulp_clone(const struct request_sock *req,
+ new_ctx->idsn = subflow_req->idsn;
+
+ /* this is the first subflow, id is always 0 */
+- new_ctx->local_id_valid = 1;
++ subflow_set_local_id(new_ctx, 0);
+ } else if (subflow_req->mp_join) {
+ new_ctx->ssn_offset = subflow_req->ssn_offset;
+ new_ctx->mp_join = 1;
+ new_ctx->fully_established = 1;
+ new_ctx->backup = subflow_req->backup;
+- new_ctx->remote_id = subflow_req->remote_id;
++ WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id);
+ new_ctx->token = subflow_req->token;
+ new_ctx->thmac = subflow_req->thmac;
+
+diff --git a/net/netfilter/core.c b/net/netfilter/core.c
+index 55a7f72d547cd..edf92074221e2 100644
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -707,6 +707,22 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct)
+ }
+ EXPORT_SYMBOL(nf_conntrack_destroy);
+
++void nf_ct_set_closing(struct nf_conntrack *nfct)
++{
++ const struct nf_ct_hook *ct_hook;
++
++ if (!nfct)
++ return;
++
++ rcu_read_lock();
++ ct_hook = rcu_dereference(nf_ct_hook);
++ if (ct_hook)
++ ct_hook->set_closing(nfct);
++
++ rcu_read_unlock();
++}
++EXPORT_SYMBOL_GPL(nf_ct_set_closing);
++
+ bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+ const struct sk_buff *skb)
+ {
+diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
+index 7960262966094..024f93fc8c0bb 100644
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -2772,11 +2772,24 @@ int nf_conntrack_init_start(void)
+ return ret;
+ }
+
++static void nf_conntrack_set_closing(struct nf_conntrack *nfct)
++{
++ struct nf_conn *ct = nf_ct_to_nf_conn(nfct);
++
++ switch (nf_ct_protonum(ct)) {
++ case IPPROTO_TCP:
++ nf_conntrack_tcp_set_closing(ct);
++ break;
++ }
++}
++
+ static const struct nf_ct_hook nf_conntrack_hook = {
+ .update = nf_conntrack_update,
+ .destroy = nf_ct_destroy,
+ .get_tuple_skb = nf_conntrack_get_tuple_skb,
+ .attach = nf_conntrack_attach,
++ .set_closing = nf_conntrack_set_closing,
++ .confirm = __nf_conntrack_confirm,
+ };
+
+ void nf_conntrack_init_end(void)
+diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
+index e0092bf273fd0..9480e638e5d15 100644
+--- a/net/netfilter/nf_conntrack_proto_tcp.c
++++ b/net/netfilter/nf_conntrack_proto_tcp.c
+@@ -913,6 +913,41 @@ static bool tcp_can_early_drop(const struct nf_conn *ct)
+ return false;
+ }
+
++void nf_conntrack_tcp_set_closing(struct nf_conn *ct)
++{
++ enum tcp_conntrack old_state;
++ const unsigned int *timeouts;
++ u32 timeout;
++
++ if (!nf_ct_is_confirmed(ct))
++ return;
++
++ spin_lock_bh(&ct->lock);
++ old_state = ct->proto.tcp.state;
++ ct->proto.tcp.state = TCP_CONNTRACK_CLOSE;
++
++ if (old_state == TCP_CONNTRACK_CLOSE ||
++ test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
++ spin_unlock_bh(&ct->lock);
++ return;
++ }
++
++ timeouts = nf_ct_timeout_lookup(ct);
++ if (!timeouts) {
++ const struct nf_tcp_net *tn;
++
++ tn = nf_tcp_pernet(nf_ct_net(ct));
++ timeouts = tn->timeouts;
++ }
++
++ timeout = timeouts[TCP_CONNTRACK_CLOSE];
++ WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
++
++ spin_unlock_bh(&ct->lock);
++
++ nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
++}
++
+ static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state)
+ {
+ state->td_end = 0;
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index e21ec3ad80939..d3ba947f43761 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -4752,6 +4752,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+ if (!(flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+
++ if (flags & NFT_SET_ANONYMOUS)
++ return -EOPNOTSUPP;
++
+ err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout);
+ if (err)
+ return err;
+@@ -4760,6 +4763,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+ if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
+ if (!(flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
++
++ if (flags & NFT_SET_ANONYMOUS)
++ return -EOPNOTSUPP;
++
+ desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
+ }
+
+diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
+index e1623fbf36548..e4b8c02c5e6ae 100644
+--- a/net/netfilter/nft_compat.c
++++ b/net/netfilter/nft_compat.c
+@@ -358,10 +358,20 @@ static int nft_target_validate(const struct nft_ctx *ctx,
+
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
++ ctx->family != NFPROTO_INET &&
+ ctx->family != NFPROTO_BRIDGE &&
+ ctx->family != NFPROTO_ARP)
+ return -EOPNOTSUPP;
+
++ ret = nft_chain_validate_hooks(ctx->chain,
++ (1 << NF_INET_PRE_ROUTING) |
++ (1 << NF_INET_LOCAL_IN) |
++ (1 << NF_INET_FORWARD) |
++ (1 << NF_INET_LOCAL_OUT) |
++ (1 << NF_INET_POST_ROUTING));
++ if (ret)
++ return ret;
++
+ if (nft_is_base_chain(ctx->chain)) {
+ const struct nft_base_chain *basechain =
+ nft_base_chain(ctx->chain);
+@@ -607,10 +617,20 @@ static int nft_match_validate(const struct nft_ctx *ctx,
+
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
++ ctx->family != NFPROTO_INET &&
+ ctx->family != NFPROTO_BRIDGE &&
+ ctx->family != NFPROTO_ARP)
+ return -EOPNOTSUPP;
+
++ ret = nft_chain_validate_hooks(ctx->chain,
++ (1 << NF_INET_PRE_ROUTING) |
++ (1 << NF_INET_LOCAL_IN) |
++ (1 << NF_INET_FORWARD) |
++ (1 << NF_INET_LOCAL_OUT) |
++ (1 << NF_INET_POST_ROUTING));
++ if (ret)
++ return ret;
++
+ if (nft_is_base_chain(ctx->chain)) {
+ const struct nft_base_chain *basechain =
+ nft_base_chain(ctx->chain);
+diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
+index 6857a4965fe87..e9b81cba1e2b4 100644
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -167,7 +167,7 @@ static inline u32 netlink_group_mask(u32 group)
+ static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
+ gfp_t gfp_mask)
+ {
+- unsigned int len = skb_end_offset(skb);
++ unsigned int len = skb->len;
+ struct sk_buff *new;
+
+ new = alloc_skb(len, gfp_mask);
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index 93e1bfa72d791..2bd27b77769cb 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -273,6 +273,8 @@ static int tls_do_decryption(struct sock *sk,
+ return 0;
+
+ ret = crypto_wait_req(ret, &ctx->async_wait);
++ } else if (darg->async) {
++ atomic_dec(&ctx->decrypt_pending);
+ }
+ darg->async = false;
+
+@@ -2021,6 +2023,7 @@ int tls_sw_recvmsg(struct sock *sk,
+ struct strp_msg *rxm;
+ struct tls_msg *tlm;
+ ssize_t copied = 0;
++ ssize_t peeked = 0;
+ bool async = false;
+ int target, err;
+ bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
+@@ -2168,8 +2171,10 @@ int tls_sw_recvmsg(struct sock *sk,
+ if (err < 0)
+ goto put_on_rx_list_err;
+
+- if (is_peek)
++ if (is_peek) {
++ peeked += chunk;
+ goto put_on_rx_list;
++ }
+
+ if (partially_consumed) {
+ rxm->offset += chunk;
+@@ -2208,8 +2213,8 @@ int tls_sw_recvmsg(struct sock *sk,
+
+ /* Drain records from the rx_list & copy if required */
+ if (is_peek || is_kvec)
+- err = process_rx_list(ctx, msg, &control, copied,
+- decrypted, is_peek, NULL);
++ err = process_rx_list(ctx, msg, &control, copied + peeked,
++ decrypted - peeked, is_peek, NULL);
+ else
+ err = process_rx_list(ctx, msg, &control, 0,
+ async_copy_bytes, is_peek, NULL);
+diff --git a/net/unix/garbage.c b/net/unix/garbage.c
+index 767b338a7a2d4..ab2c83d58b62a 100644
+--- a/net/unix/garbage.c
++++ b/net/unix/garbage.c
+@@ -284,9 +284,17 @@ void unix_gc(void)
+ * which are creating the cycle(s).
+ */
+ skb_queue_head_init(&hitlist);
+- list_for_each_entry(u, &gc_candidates, link)
++ list_for_each_entry(u, &gc_candidates, link) {
+ scan_children(&u->sk, inc_inflight, &hitlist);
+
++#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
++ if (u->oob_skb) {
++ kfree_skb(u->oob_skb);
++ u->oob_skb = NULL;
++ }
++#endif
++ }
++
+ /* not_cycle_list contains those sockets which do not make up a
+ * cycle. Restore these to the inflight list.
+ */
+@@ -314,17 +322,6 @@ void unix_gc(void)
+ /* Here we are. Hitlist is filled. Die. */
+ __skb_queue_purge(&hitlist);
+
+-#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+- list_for_each_entry_safe(u, next, &gc_candidates, link) {
+- struct sk_buff *skb = u->oob_skb;
+-
+- if (skb) {
+- u->oob_skb = NULL;
+- kfree_skb(skb);
+- }
+- }
+-#endif
+-
+ spin_lock(&unix_gc_lock);
+
+ /* There could be io_uring registered files, just push them back to
+diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
+index c259d3227a9e2..1a3bd554e2586 100644
+--- a/net/wireless/nl80211.c
++++ b/net/wireless/nl80211.c
+@@ -4137,6 +4137,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
+
+ if (ntype != NL80211_IFTYPE_MESH_POINT)
+ return -EINVAL;
++ if (otype != NL80211_IFTYPE_MESH_POINT)
++ return -EINVAL;
+ if (netif_running(dev))
+ return -EBUSY;
+
+diff --git a/security/landlock/fs.c b/security/landlock/fs.c
+index 64ed7665455fe..d328965f32f7f 100644
+--- a/security/landlock/fs.c
++++ b/security/landlock/fs.c
+@@ -824,8 +824,8 @@ static int current_check_refer_path(struct dentry *const old_dentry,
+ bool allow_parent1, allow_parent2;
+ access_mask_t access_request_parent1, access_request_parent2;
+ struct path mnt_dir;
+- layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS],
+- layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS];
++ layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {},
++ layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {};
+
+ if (!dom)
+ return 0;
+diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
+index f4cd9b58b2054..a7af085550b2d 100644
+--- a/security/tomoyo/common.c
++++ b/security/tomoyo/common.c
+@@ -2648,13 +2648,14 @@ ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head,
+ {
+ int error = buffer_len;
+ size_t avail_len = buffer_len;
+- char *cp0 = head->write_buf;
++ char *cp0;
+ int idx;
+
+ if (!head->write)
+ return -EINVAL;
+ if (mutex_lock_interruptible(&head->io_sem))
+ return -EINTR;
++ cp0 = head->write_buf;
+ head->read_user_buf_avail = 0;
+ idx = tomoyo_read_lock();
+ /* Read a line and dispatch it to the policy handler. */
+diff --git a/sound/core/Makefile b/sound/core/Makefile
+index 2762f03d9b7bc..a7a1590b29526 100644
+--- a/sound/core/Makefile
++++ b/sound/core/Makefile
+@@ -30,7 +30,6 @@ snd-ctl-led-objs := control_led.o
+ snd-rawmidi-objs := rawmidi.o
+ snd-timer-objs := timer.o
+ snd-hrtimer-objs := hrtimer.o
+-snd-rtctimer-objs := rtctimer.o
+ snd-hwdep-objs := hwdep.o
+ snd-seq-device-objs := seq_device.o
+
+diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
+index 9be2260e4ca2d..f8b644cb9157a 100644
+--- a/sound/firewire/amdtp-stream.c
++++ b/sound/firewire/amdtp-stream.c
+@@ -934,7 +934,7 @@ static int generate_device_pkt_descs(struct amdtp_stream *s,
+ // to the reason.
+ unsigned int safe_cycle = increment_ohci_cycle_count(next_cycle,
+ IR_JUMBO_PAYLOAD_MAX_SKIP_CYCLES);
+- lost = (compare_ohci_cycle_count(safe_cycle, cycle) > 0);
++ lost = (compare_ohci_cycle_count(safe_cycle, cycle) < 0);
+ }
+ if (lost) {
+ dev_err(&s->unit->device, "Detect discontinuity of cycle: %d %d\n",
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 92a656fb53212..75bd7b2fa4ee6 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -9662,6 +9662,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x103c, 0x8973, "HP EliteBook 860 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8974, "HP EliteBook 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
++ SND_PCI_QUIRK(0x103c, 0x897d, "HP mt440 Mobile Thin Client U74", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8981, "HP Elite Dragonfly G3", ALC245_FIXUP_CS35L41_SPI_4),
+ SND_PCI_QUIRK(0x103c, 0x898e, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x898f, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2),
+@@ -9687,11 +9688,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED),
++ SND_PCI_QUIRK(0x103c, 0x8ab9, "HP EliteBook 840 G8 (MB 8AB8)", ALC285_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b0f, "HP Elite mt645 G7 Mobile Thin Client U81", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
++ SND_PCI_QUIRK(0x103c, 0x8b3f, "HP mt440 Mobile Thin Client U91", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
+index 201dc77ebbd77..d5d2183730b9f 100644
+--- a/tools/include/uapi/linux/bpf.h
++++ b/tools/include/uapi/linux/bpf.h
+@@ -3109,6 +3109,10 @@ union bpf_attr {
+ * **BPF_FIB_LOOKUP_DIRECT**
+ * Do a direct table lookup vs full lookup using FIB
+ * rules.
++ * **BPF_FIB_LOOKUP_TBID**
++ * Used with BPF_FIB_LOOKUP_DIRECT.
++ * Use the routing table ID present in *params*->tbid
++ * for the fib lookup.
+ * **BPF_FIB_LOOKUP_OUTPUT**
+ * Perform lookup from an egress perspective (default is
+ * ingress).
+@@ -3117,6 +3121,11 @@ union bpf_attr {
+ * and *params*->smac will not be set as output. A common
+ * use case is to call **bpf_redirect_neigh**\ () after
+ * doing **bpf_fib_lookup**\ ().
++ * **BPF_FIB_LOOKUP_SRC**
++ * Derive and set source IP addr in *params*->ipv{4,6}_src
++ * for the nexthop. If the src addr cannot be derived,
++ * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
++ * case, *params*->dmac and *params*->smac are not set either.
+ *
+ * *ctx* is either **struct xdp_md** for XDP programs or
+ * **struct sk_buff** tc cls_act programs.
+@@ -6687,6 +6696,8 @@ enum {
+ BPF_FIB_LOOKUP_DIRECT = (1U << 0),
+ BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
+ BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
++ BPF_FIB_LOOKUP_TBID = (1U << 3),
++ BPF_FIB_LOOKUP_SRC = (1U << 4),
+ };
+
+ enum {
+@@ -6699,6 +6710,7 @@ enum {
+ BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
+ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
+ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
++ BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
+ };
+
+ struct bpf_fib_lookup {
+@@ -6733,6 +6745,9 @@ struct bpf_fib_lookup {
+ __u32 rt_metric;
+ };
+
++ /* input: source address to consider for lookup
++ * output: source address result from lookup
++ */
+ union {
+ __be32 ipv4_src;
+ __u32 ipv6_src[4]; /* in6_addr; network order */
+@@ -6747,9 +6762,19 @@ struct bpf_fib_lookup {
+ __u32 ipv6_dst[4]; /* in6_addr; network order */
+ };
+
+- /* output */
+- __be16 h_vlan_proto;
+- __be16 h_vlan_TCI;
++ union {
++ struct {
++ /* output */
++ __be16 h_vlan_proto;
++ __be16 h_vlan_TCI;
++ };
++ /* input: when accompanied with the
++ * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a
++ * specific routing table to use for the fib lookup.
++ */
++ __u32 tbid;
++ };
++
+ __u8 smac[6]; /* ETH_ALEN */
+ __u8 dmac[6]; /* ETH_ALEN */
+ };
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+index 2107579e2939d..a20dca9d26d68 100755
+--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+@@ -144,6 +144,11 @@ check_tools()
+ exit $ksft_skip
+ fi
+
++ if ! ss -h | grep -q MPTCP; then
++ echo "SKIP: ss tool does not support MPTCP"
++ exit $ksft_skip
++ fi
++
+ # Use the legacy version if available to support old kernel versions
+ if iptables-legacy -V &> /dev/null; then
+ iptables="iptables-legacy"