diff options
author | Mike Pagano <mpagano@gentoo.org> | 2017-04-08 09:51:03 -0400 |
---|---|---|
committer | Mike Pagano <mpagano@gentoo.org> | 2017-04-08 09:51:03 -0400 |
commit | 8fb2c956e0adbcdcac001eff148fcbf3b7d81ae6 (patch) | |
tree | 3b5341e07feeed1c0f0905baac155a20f9799a11 | |
parent | Linux patch 4.10.8 (diff) | |
download | linux-patches-8fb2c956e0adbcdcac001eff148fcbf3b7d81ae6.tar.gz linux-patches-8fb2c956e0adbcdcac001eff148fcbf3b7d81ae6.tar.bz2 linux-patches-8fb2c956e0adbcdcac001eff148fcbf3b7d81ae6.zip |
Linux patch 4.10.94.10-9
-rw-r--r-- | 0000_README | 4 | ||||
-rw-r--r-- | 1008_linux-4.10.9.patch | 4556 |
2 files changed, 4560 insertions, 0 deletions
diff --git a/0000_README b/0000_README index 4c7de504..5f8d5b02 100644 --- a/0000_README +++ b/0000_README @@ -75,6 +75,10 @@ Patch: 1007_linux-4.10.8.patch From: http://www.kernel.org Desc: Linux 4.10.8 +Patch: 1008_linux-4.10.9.patch +From: http://www.kernel.org +Desc: Linux 4.10.9 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. diff --git a/1008_linux-4.10.9.patch b/1008_linux-4.10.9.patch new file mode 100644 index 00000000..1aba6be7 --- /dev/null +++ b/1008_linux-4.10.9.patch @@ -0,0 +1,4556 @@ +diff --git a/Documentation/devicetree/bindings/rng/omap_rng.txt b/Documentation/devicetree/bindings/rng/omap_rng.txt +index 471477299ece..9cf7876ab434 100644 +--- a/Documentation/devicetree/bindings/rng/omap_rng.txt ++++ b/Documentation/devicetree/bindings/rng/omap_rng.txt +@@ -12,7 +12,8 @@ Required properties: + - reg : Offset and length of the register set for the module + - interrupts : the interrupt number for the RNG module. + Used for "ti,omap4-rng" and "inside-secure,safexcel-eip76" +-- clocks: the trng clock source ++- clocks: the trng clock source. Only mandatory for the ++ "inside-secure,safexcel-eip76" compatible. + + Example: + /* AM335x */ +diff --git a/Makefile b/Makefile +index 82e0809fed9b..4ebd511dee58 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,6 +1,6 @@ + VERSION = 4 + PATCHLEVEL = 10 +-SUBLEVEL = 8 ++SUBLEVEL = 9 + EXTRAVERSION = + NAME = Fearless Coyote + +diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c +index d408fa21a07c..928562967f3c 100644 +--- a/arch/arc/mm/cache.c ++++ b/arch/arc/mm/cache.c +@@ -633,6 +633,9 @@ noinline static void slc_entire_op(const int op) + + write_aux_reg(ARC_REG_SLC_INVALIDATE, 1); + ++ /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ ++ read_aux_reg(r); ++ + /* Important to wait for flush to complete */ + while (read_aux_reg(r) & SLC_CTRL_BUSY); + } +diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi +index f09a2bb08979..4b6049240ec2 100644 +--- a/arch/arm/boot/dts/bcm5301x.dtsi ++++ b/arch/arm/boot/dts/bcm5301x.dtsi +@@ -66,14 +66,14 @@ + timer@20200 { + compatible = "arm,cortex-a9-global-timer"; + reg = <0x20200 0x100>; +- interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>; ++ interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>; + clocks = <&periph_clk>; + }; + + local-timer@20600 { + compatible = "arm,cortex-a9-twd-timer"; + reg = <0x20600 0x100>; +- interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>; ++ interrupts = <GIC_PPI 13 IRQ_TYPE_EDGE_RISING>; + clocks = <&periph_clk>; + }; + +diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c +index 8ac0e5994ed2..0ddf3698b85d 100644 +--- a/arch/mips/lantiq/irq.c ++++ b/arch/mips/lantiq/irq.c +@@ -269,6 +269,11 @@ static void ltq_hw5_irqdispatch(void) + DEFINE_HWx_IRQDISPATCH(5) + #endif + ++static void ltq_hw_irq_handler(struct irq_desc *desc) ++{ ++ ltq_hw_irqdispatch(irq_desc_get_irq(desc) - 2); ++} ++ + #ifdef CONFIG_MIPS_MT_SMP + void __init arch_init_ipiirq(int irq, struct irqaction *action) + { +@@ -313,23 +318,19 @@ static struct irqaction irq_call = { + asmlinkage void plat_irq_dispatch(void) + { + unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM; +- unsigned int i; +- +- if ((MIPS_CPU_TIMER_IRQ == 7) && (pending & CAUSEF_IP7)) { +- do_IRQ(MIPS_CPU_TIMER_IRQ); +- goto out; +- } else { +- for (i = 0; i < MAX_IM; i++) { +- if (pending & (CAUSEF_IP2 << i)) { +- ltq_hw_irqdispatch(i); +- goto out; +- } +- } ++ int irq; ++ ++ if (!pending) { ++ spurious_interrupt(); ++ return; + } +- pr_alert("Spurious IRQ: CAUSE=0x%08x\n", read_c0_status()); + +-out: +- return; ++ pending >>= CAUSEB_IP; ++ while (pending) { ++ irq = fls(pending) - 1; ++ do_IRQ(MIPS_CPU_IRQ_BASE + irq); ++ pending &= ~BIT(irq); ++ } + } + + static int icu_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) +@@ -354,11 +355,6 @@ static const struct irq_domain_ops irq_domain_ops = { + .map = icu_map, + }; + +-static struct irqaction cascade = { +- .handler = no_action, +- .name = "cascade", +-}; +- + int __init icu_of_init(struct device_node *node, struct device_node *parent) + { + struct device_node *eiu_node; +@@ -390,7 +386,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent) + mips_cpu_irq_init(); + + for (i = 0; i < MAX_IM; i++) +- setup_irq(i + 2, &cascade); ++ irq_set_chained_handler(i + 2, ltq_hw_irq_handler); + + if (cpu_has_vint) { + pr_info("Setting up vectored interrupts\n"); +diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h +index 9a2aee1b90fc..7fcf5128996a 100644 +--- a/arch/parisc/include/asm/uaccess.h ++++ b/arch/parisc/include/asm/uaccess.h +@@ -68,6 +68,15 @@ struct exception_table_entry { + ".previous\n" + + /* ++ * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry ++ * (with lowest bit set) for which the fault handler in fixup_exception() will ++ * load -EFAULT into %r8 for a read or write fault, and zeroes the target ++ * register in case of a read fault in get_user(). ++ */ ++#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\ ++ ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1) ++ ++/* + * The page fault handler stores, in a per-cpu area, the following information + * if a fixup routine is available. + */ +@@ -94,7 +103,7 @@ struct exception_data { + #define __get_user(x, ptr) \ + ({ \ + register long __gu_err __asm__ ("r8") = 0; \ +- register long __gu_val __asm__ ("r9") = 0; \ ++ register long __gu_val; \ + \ + load_sr2(); \ + switch (sizeof(*(ptr))) { \ +@@ -110,22 +119,23 @@ struct exception_data { + }) + + #define __get_user_asm(ldx, ptr) \ +- __asm__("\n1:\t" ldx "\t0(%%sr2,%2),%0\n\t" \ +- ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_1)\ ++ __asm__("1: " ldx " 0(%%sr2,%2),%0\n" \ ++ "9:\n" \ ++ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + : "=r"(__gu_val), "=r"(__gu_err) \ +- : "r"(ptr), "1"(__gu_err) \ +- : "r1"); ++ : "r"(ptr), "1"(__gu_err)); + + #if !defined(CONFIG_64BIT) + + #define __get_user_asm64(ptr) \ +- __asm__("\n1:\tldw 0(%%sr2,%2),%0" \ +- "\n2:\tldw 4(%%sr2,%2),%R0\n\t" \ +- ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_2)\ +- ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_get_user_skip_1)\ ++ __asm__(" copy %%r0,%R0\n" \ ++ "1: ldw 0(%%sr2,%2),%0\n" \ ++ "2: ldw 4(%%sr2,%2),%R0\n" \ ++ "9:\n" \ ++ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ ++ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + : "=r"(__gu_val), "=r"(__gu_err) \ +- : "r"(ptr), "1"(__gu_err) \ +- : "r1"); ++ : "r"(ptr), "1"(__gu_err)); + + #endif /* !defined(CONFIG_64BIT) */ + +@@ -151,32 +161,31 @@ struct exception_data { + * The "__put_user/kernel_asm()" macros tell gcc they read from memory + * instead of writing. This is because they do not write to any memory + * gcc knows about, so there are no aliasing issues. These macros must +- * also be aware that "fixup_put_user_skip_[12]" are executed in the +- * context of the fault, and any registers used there must be listed +- * as clobbers. In this case only "r1" is used by the current routines. +- * r8/r9 are already listed as err/val. ++ * also be aware that fixups are executed in the context of the fault, ++ * and any registers used there must be listed as clobbers. ++ * r8 is already listed as err. + */ + + #define __put_user_asm(stx, x, ptr) \ + __asm__ __volatile__ ( \ +- "\n1:\t" stx "\t%2,0(%%sr2,%1)\n\t" \ +- ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_1)\ ++ "1: " stx " %2,0(%%sr2,%1)\n" \ ++ "9:\n" \ ++ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + : "=r"(__pu_err) \ +- : "r"(ptr), "r"(x), "0"(__pu_err) \ +- : "r1") ++ : "r"(ptr), "r"(x), "0"(__pu_err)) + + + #if !defined(CONFIG_64BIT) + + #define __put_user_asm64(__val, ptr) do { \ + __asm__ __volatile__ ( \ +- "\n1:\tstw %2,0(%%sr2,%1)" \ +- "\n2:\tstw %R2,4(%%sr2,%1)\n\t" \ +- ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_2)\ +- ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_put_user_skip_1)\ ++ "1: stw %2,0(%%sr2,%1)\n" \ ++ "2: stw %R2,4(%%sr2,%1)\n" \ ++ "9:\n" \ ++ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ ++ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + : "=r"(__pu_err) \ +- : "r"(ptr), "r"(__val), "0"(__pu_err) \ +- : "r1"); \ ++ : "r"(ptr), "r"(__val), "0"(__pu_err)); \ + } while (0) + + #endif /* !defined(CONFIG_64BIT) */ +diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c +index 7484b3d11e0d..c6d6272a934f 100644 +--- a/arch/parisc/kernel/parisc_ksyms.c ++++ b/arch/parisc/kernel/parisc_ksyms.c +@@ -47,16 +47,6 @@ EXPORT_SYMBOL(__cmpxchg_u64); + EXPORT_SYMBOL(lclear_user); + EXPORT_SYMBOL(lstrnlen_user); + +-/* Global fixups - defined as int to avoid creation of function pointers */ +-extern int fixup_get_user_skip_1; +-extern int fixup_get_user_skip_2; +-extern int fixup_put_user_skip_1; +-extern int fixup_put_user_skip_2; +-EXPORT_SYMBOL(fixup_get_user_skip_1); +-EXPORT_SYMBOL(fixup_get_user_skip_2); +-EXPORT_SYMBOL(fixup_put_user_skip_1); +-EXPORT_SYMBOL(fixup_put_user_skip_2); +- + #ifndef CONFIG_64BIT + /* Needed so insmod can set dp value */ + extern int $global$; +diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c +index 9e2d98ee6f9c..3286cbc7b934 100644 +--- a/arch/parisc/kernel/process.c ++++ b/arch/parisc/kernel/process.c +@@ -140,6 +140,8 @@ void machine_power_off(void) + printk(KERN_EMERG "System shut down completed.\n" + "Please power this system off now."); + ++ /* prevent soft lockup/stalled CPU messages for endless loop. */ ++ rcu_sysrq_start(); + for (;;); + } + +diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile +index 8fa92b8d839a..f2dac4d73b1b 100644 +--- a/arch/parisc/lib/Makefile ++++ b/arch/parisc/lib/Makefile +@@ -2,7 +2,7 @@ + # Makefile for parisc-specific library files + # + +-lib-y := lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o \ ++lib-y := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \ + ucmpdi2.o delay.o + + obj-y := iomap.o +diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S +deleted file mode 100644 +index a5b72f22c7a6..000000000000 +--- a/arch/parisc/lib/fixup.S ++++ /dev/null +@@ -1,98 +0,0 @@ +-/* +- * Linux/PA-RISC Project (http://www.parisc-linux.org/) +- * +- * Copyright (C) 2004 Randolph Chung <tausq@debian.org> +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License as published by +- * the Free Software Foundation; either version 2, or (at your option) +- * any later version. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, write to the Free Software +- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +- * +- * Fixup routines for kernel exception handling. +- */ +-#include <asm/asm-offsets.h> +-#include <asm/assembly.h> +-#include <asm/errno.h> +-#include <linux/linkage.h> +- +-#ifdef CONFIG_SMP +- .macro get_fault_ip t1 t2 +- loadgp +- addil LT%__per_cpu_offset,%r27 +- LDREG RT%__per_cpu_offset(%r1),\t1 +- /* t2 = smp_processor_id() */ +- mfctl 30,\t2 +- ldw TI_CPU(\t2),\t2 +-#ifdef CONFIG_64BIT +- extrd,u \t2,63,32,\t2 +-#endif +- /* t2 = &__per_cpu_offset[smp_processor_id()]; */ +- LDREGX \t2(\t1),\t2 +- addil LT%exception_data,%r27 +- LDREG RT%exception_data(%r1),\t1 +- /* t1 = this_cpu_ptr(&exception_data) */ +- add,l \t1,\t2,\t1 +- /* %r27 = t1->fault_gp - restore gp */ +- LDREG EXCDATA_GP(\t1), %r27 +- /* t1 = t1->fault_ip */ +- LDREG EXCDATA_IP(\t1), \t1 +- .endm +-#else +- .macro get_fault_ip t1 t2 +- loadgp +- /* t1 = this_cpu_ptr(&exception_data) */ +- addil LT%exception_data,%r27 +- LDREG RT%exception_data(%r1),\t2 +- /* %r27 = t2->fault_gp - restore gp */ +- LDREG EXCDATA_GP(\t2), %r27 +- /* t1 = t2->fault_ip */ +- LDREG EXCDATA_IP(\t2), \t1 +- .endm +-#endif +- +- .level LEVEL +- +- .text +- .section .fixup, "ax" +- +- /* get_user() fixups, store -EFAULT in r8, and 0 in r9 */ +-ENTRY_CFI(fixup_get_user_skip_1) +- get_fault_ip %r1,%r8 +- ldo 4(%r1), %r1 +- ldi -EFAULT, %r8 +- bv %r0(%r1) +- copy %r0, %r9 +-ENDPROC_CFI(fixup_get_user_skip_1) +- +-ENTRY_CFI(fixup_get_user_skip_2) +- get_fault_ip %r1,%r8 +- ldo 8(%r1), %r1 +- ldi -EFAULT, %r8 +- bv %r0(%r1) +- copy %r0, %r9 +-ENDPROC_CFI(fixup_get_user_skip_2) +- +- /* put_user() fixups, store -EFAULT in r8 */ +-ENTRY_CFI(fixup_put_user_skip_1) +- get_fault_ip %r1,%r8 +- ldo 4(%r1), %r1 +- bv %r0(%r1) +- ldi -EFAULT, %r8 +-ENDPROC_CFI(fixup_put_user_skip_1) +- +-ENTRY_CFI(fixup_put_user_skip_2) +- get_fault_ip %r1,%r8 +- ldo 8(%r1), %r1 +- bv %r0(%r1) +- ldi -EFAULT, %r8 +-ENDPROC_CFI(fixup_put_user_skip_2) +- +diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S +index 56845de6b5df..f01188c044ee 100644 +--- a/arch/parisc/lib/lusercopy.S ++++ b/arch/parisc/lib/lusercopy.S +@@ -5,6 +5,8 @@ + * Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org> + * Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr> + * Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org> ++ * Copyright (C) 2017 Helge Deller <deller@gmx.de> ++ * Copyright (C) 2017 John David Anglin <dave.anglin@bell.net> + * + * + * This program is free software; you can redistribute it and/or modify +@@ -132,4 +134,320 @@ ENDPROC_CFI(lstrnlen_user) + + .procend + ++ ++ ++/* ++ * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) ++ * ++ * Inputs: ++ * - sr1 already contains space of source region ++ * - sr2 already contains space of destination region ++ * ++ * Returns: ++ * - number of bytes that could not be copied. ++ * On success, this will be zero. ++ * ++ * This code is based on a C-implementation of a copy routine written by ++ * Randolph Chung, which in turn was derived from the glibc. ++ * ++ * Several strategies are tried to try to get the best performance for various ++ * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes ++ * at a time using general registers. Unaligned copies are handled either by ++ * aligning the destination and then using shift-and-write method, or in a few ++ * cases by falling back to a byte-at-a-time copy. ++ * ++ * Testing with various alignments and buffer sizes shows that this code is ++ * often >10x faster than a simple byte-at-a-time copy, even for strangely ++ * aligned operands. It is interesting to note that the glibc version of memcpy ++ * (written in C) is actually quite fast already. This routine is able to beat ++ * it by 30-40% for aligned copies because of the loop unrolling, but in some ++ * cases the glibc version is still slightly faster. This lends more ++ * credibility that gcc can generate very good code as long as we are careful. ++ * ++ * Possible optimizations: ++ * - add cache prefetching ++ * - try not to use the post-increment address modifiers; they may create ++ * additional interlocks. Assumption is that those were only efficient on old ++ * machines (pre PA8000 processors) ++ */ ++ ++ dst = arg0 ++ src = arg1 ++ len = arg2 ++ end = arg3 ++ t1 = r19 ++ t2 = r20 ++ t3 = r21 ++ t4 = r22 ++ srcspc = sr1 ++ dstspc = sr2 ++ ++ t0 = r1 ++ a1 = t1 ++ a2 = t2 ++ a3 = t3 ++ a0 = t4 ++ ++ save_src = ret0 ++ save_dst = ret1 ++ save_len = r31 ++ ++ENTRY_CFI(pa_memcpy) ++ .proc ++ .callinfo NO_CALLS ++ .entry ++ ++ /* Last destination address */ ++ add dst,len,end ++ ++ /* short copy with less than 16 bytes? */ ++ cmpib,>>=,n 15,len,.Lbyte_loop ++ ++ /* same alignment? */ ++ xor src,dst,t0 ++ extru t0,31,2,t1 ++ cmpib,<>,n 0,t1,.Lunaligned_copy ++ ++#ifdef CONFIG_64BIT ++ /* only do 64-bit copies if we can get aligned. */ ++ extru t0,31,3,t1 ++ cmpib,<>,n 0,t1,.Lalign_loop32 ++ ++ /* loop until we are 64-bit aligned */ ++.Lalign_loop64: ++ extru dst,31,3,t1 ++ cmpib,=,n 0,t1,.Lcopy_loop_16 ++20: ldb,ma 1(srcspc,src),t1 ++21: stb,ma t1,1(dstspc,dst) ++ b .Lalign_loop64 ++ ldo -1(len),len ++ ++ ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) ++ ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) ++ ++ ldi 31,t0 ++.Lcopy_loop_16: ++ cmpb,COND(>>=),n t0,len,.Lword_loop ++ ++10: ldd 0(srcspc,src),t1 ++11: ldd 8(srcspc,src),t2 ++ ldo 16(src),src ++12: std,ma t1,8(dstspc,dst) ++13: std,ma t2,8(dstspc,dst) ++14: ldd 0(srcspc,src),t1 ++15: ldd 8(srcspc,src),t2 ++ ldo 16(src),src ++16: std,ma t1,8(dstspc,dst) ++17: std,ma t2,8(dstspc,dst) ++ ++ ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) ++ ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault) ++ ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) ++ ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) ++ ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) ++ ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault) ++ ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) ++ ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) ++ ++ b .Lcopy_loop_16 ++ ldo -32(len),len ++ ++.Lword_loop: ++ cmpib,COND(>>=),n 3,len,.Lbyte_loop ++20: ldw,ma 4(srcspc,src),t1 ++21: stw,ma t1,4(dstspc,dst) ++ b .Lword_loop ++ ldo -4(len),len ++ ++ ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) ++ ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) ++ ++#endif /* CONFIG_64BIT */ ++ ++ /* loop until we are 32-bit aligned */ ++.Lalign_loop32: ++ extru dst,31,2,t1 ++ cmpib,=,n 0,t1,.Lcopy_loop_4 ++20: ldb,ma 1(srcspc,src),t1 ++21: stb,ma t1,1(dstspc,dst) ++ b .Lalign_loop32 ++ ldo -1(len),len ++ ++ ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) ++ ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) ++ ++ ++.Lcopy_loop_4: ++ cmpib,COND(>>=),n 15,len,.Lbyte_loop ++ ++10: ldw 0(srcspc,src),t1 ++11: ldw 4(srcspc,src),t2 ++12: stw,ma t1,4(dstspc,dst) ++13: stw,ma t2,4(dstspc,dst) ++14: ldw 8(srcspc,src),t1 ++15: ldw 12(srcspc,src),t2 ++ ldo 16(src),src ++16: stw,ma t1,4(dstspc,dst) ++17: stw,ma t2,4(dstspc,dst) ++ ++ ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) ++ ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault) ++ ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) ++ ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) ++ ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) ++ ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault) ++ ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) ++ ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) ++ ++ b .Lcopy_loop_4 ++ ldo -16(len),len ++ ++.Lbyte_loop: ++ cmpclr,COND(<>) len,%r0,%r0 ++ b,n .Lcopy_done ++20: ldb 0(srcspc,src),t1 ++ ldo 1(src),src ++21: stb,ma t1,1(dstspc,dst) ++ b .Lbyte_loop ++ ldo -1(len),len ++ ++ ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) ++ ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) ++ ++.Lcopy_done: ++ bv %r0(%r2) ++ sub end,dst,ret0 ++ ++ ++ /* src and dst are not aligned the same way. */ ++ /* need to go the hard way */ ++.Lunaligned_copy: ++ /* align until dst is 32bit-word-aligned */ ++ extru dst,31,2,t1 ++ cmpib,COND(=),n 0,t1,.Lcopy_dstaligned ++20: ldb 0(srcspc,src),t1 ++ ldo 1(src),src ++21: stb,ma t1,1(dstspc,dst) ++ b .Lunaligned_copy ++ ldo -1(len),len ++ ++ ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) ++ ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) ++ ++.Lcopy_dstaligned: ++ ++ /* store src, dst and len in safe place */ ++ copy src,save_src ++ copy dst,save_dst ++ copy len,save_len ++ ++ /* len now needs give number of words to copy */ ++ SHRREG len,2,len ++ ++ /* ++ * Copy from a not-aligned src to an aligned dst using shifts. ++ * Handles 4 words per loop. ++ */ ++ ++ depw,z src,28,2,t0 ++ subi 32,t0,t0 ++ mtsar t0 ++ extru len,31,2,t0 ++ cmpib,= 2,t0,.Lcase2 ++ /* Make src aligned by rounding it down. */ ++ depi 0,31,2,src ++ ++ cmpiclr,<> 3,t0,%r0 ++ b,n .Lcase3 ++ cmpiclr,<> 1,t0,%r0 ++ b,n .Lcase1 ++.Lcase0: ++ cmpb,= %r0,len,.Lcda_finish ++ nop ++ ++1: ldw,ma 4(srcspc,src), a3 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) ++1: ldw,ma 4(srcspc,src), a0 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) ++ b,n .Ldo3 ++.Lcase1: ++1: ldw,ma 4(srcspc,src), a2 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) ++1: ldw,ma 4(srcspc,src), a3 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) ++ ldo -1(len),len ++ cmpb,=,n %r0,len,.Ldo0 ++.Ldo4: ++1: ldw,ma 4(srcspc,src), a0 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) ++ shrpw a2, a3, %sar, t0 ++1: stw,ma t0, 4(dstspc,dst) ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) ++.Ldo3: ++1: ldw,ma 4(srcspc,src), a1 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) ++ shrpw a3, a0, %sar, t0 ++1: stw,ma t0, 4(dstspc,dst) ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) ++.Ldo2: ++1: ldw,ma 4(srcspc,src), a2 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) ++ shrpw a0, a1, %sar, t0 ++1: stw,ma t0, 4(dstspc,dst) ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) ++.Ldo1: ++1: ldw,ma 4(srcspc,src), a3 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) ++ shrpw a1, a2, %sar, t0 ++1: stw,ma t0, 4(dstspc,dst) ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) ++ ldo -4(len),len ++ cmpb,<> %r0,len,.Ldo4 ++ nop ++.Ldo0: ++ shrpw a2, a3, %sar, t0 ++1: stw,ma t0, 4(dstspc,dst) ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) ++ ++.Lcda_rdfault: ++.Lcda_finish: ++ /* calculate new src, dst and len and jump to byte-copy loop */ ++ sub dst,save_dst,t0 ++ add save_src,t0,src ++ b .Lbyte_loop ++ sub save_len,t0,len ++ ++.Lcase3: ++1: ldw,ma 4(srcspc,src), a0 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) ++1: ldw,ma 4(srcspc,src), a1 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) ++ b .Ldo2 ++ ldo 1(len),len ++.Lcase2: ++1: ldw,ma 4(srcspc,src), a1 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) ++1: ldw,ma 4(srcspc,src), a2 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) ++ b .Ldo1 ++ ldo 2(len),len ++ ++ ++ /* fault exception fixup handlers: */ ++#ifdef CONFIG_64BIT ++.Lcopy16_fault: ++10: b .Lcopy_done ++ std,ma t1,8(dstspc,dst) ++ ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) ++#endif ++ ++.Lcopy8_fault: ++10: b .Lcopy_done ++ stw,ma t1,4(dstspc,dst) ++ ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) ++ ++ .exit ++ENDPROC_CFI(pa_memcpy) ++ .procend ++ + .end +diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c +index f82ff10ed974..b3d47ec1d80a 100644 +--- a/arch/parisc/lib/memcpy.c ++++ b/arch/parisc/lib/memcpy.c +@@ -2,7 +2,7 @@ + * Optimized memory copy routines. + * + * Copyright (C) 2004 Randolph Chung <tausq@debian.org> +- * Copyright (C) 2013 Helge Deller <deller@gmx.de> ++ * Copyright (C) 2013-2017 Helge Deller <deller@gmx.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -21,474 +21,21 @@ + * Portions derived from the GNU C Library + * Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc. + * +- * Several strategies are tried to try to get the best performance for various +- * conditions. In the optimal case, we copy 64-bytes in an unrolled loop using +- * fp regs. This is followed by loops that copy 32- or 16-bytes at a time using +- * general registers. Unaligned copies are handled either by aligning the +- * destination and then using shift-and-write method, or in a few cases by +- * falling back to a byte-at-a-time copy. +- * +- * I chose to implement this in C because it is easier to maintain and debug, +- * and in my experiments it appears that the C code generated by gcc (3.3/3.4 +- * at the time of writing) is fairly optimal. Unfortunately some of the +- * semantics of the copy routine (exception handling) is difficult to express +- * in C, so we have to play some tricks to get it to work. +- * +- * All the loads and stores are done via explicit asm() code in order to use +- * the right space registers. +- * +- * Testing with various alignments and buffer sizes shows that this code is +- * often >10x faster than a simple byte-at-a-time copy, even for strangely +- * aligned operands. It is interesting to note that the glibc version +- * of memcpy (written in C) is actually quite fast already. This routine is +- * able to beat it by 30-40% for aligned copies because of the loop unrolling, +- * but in some cases the glibc version is still slightly faster. This lends +- * more credibility that gcc can generate very good code as long as we are +- * careful. +- * +- * TODO: +- * - cache prefetching needs more experimentation to get optimal settings +- * - try not to use the post-increment address modifiers; they create additional +- * interlocks +- * - replace byte-copy loops with stybs sequences + */ + +-#ifdef __KERNEL__ + #include <linux/module.h> + #include <linux/compiler.h> + #include <linux/uaccess.h> +-#define s_space "%%sr1" +-#define d_space "%%sr2" +-#else +-#include "memcpy.h" +-#define s_space "%%sr0" +-#define d_space "%%sr0" +-#define pa_memcpy new2_copy +-#endif + + DECLARE_PER_CPU(struct exception_data, exception_data); + +-#define preserve_branch(label) do { \ +- volatile int dummy = 0; \ +- /* The following branch is never taken, it's just here to */ \ +- /* prevent gcc from optimizing away our exception code. */ \ +- if (unlikely(dummy != dummy)) \ +- goto label; \ +-} while (0) +- + #define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3)) + #define get_kernel_space() (0) + +-#define MERGE(w0, sh_1, w1, sh_2) ({ \ +- unsigned int _r; \ +- asm volatile ( \ +- "mtsar %3\n" \ +- "shrpw %1, %2, %%sar, %0\n" \ +- : "=r"(_r) \ +- : "r"(w0), "r"(w1), "r"(sh_2) \ +- ); \ +- _r; \ +-}) +-#define THRESHOLD 16 +- +-#ifdef DEBUG_MEMCPY +-#define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, __LINE__, __func__ ); printk(KERN_DEBUG fmt, ##args ); } while (0) +-#else +-#define DPRINTF(fmt, args...) +-#endif +- +-#define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \ +- __asm__ __volatile__ ( \ +- "1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n\t" \ +- ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ +- : _tt(_t), "+r"(_a) \ +- : \ +- : "r8") +- +-#define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \ +- __asm__ __volatile__ ( \ +- "1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n\t" \ +- ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ +- : "+r"(_a) \ +- : _tt(_t) \ +- : "r8") +- +-#define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,"=r",_s,_a,_t,_e) +-#define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,"r",_s,_a,_t,_e) +-#define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,"=r",_s,_a,_t,_e) +-#define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,"r",_s,_a,_t,_e) +-#define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,"=f",_s,_a,_t,_e) +-#define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,"f",_s,_a,_t,_e) +- +-#define def_load_insn(_insn,_tt,_s,_o,_a,_t,_e) \ +- __asm__ __volatile__ ( \ +- "1:\t" #_insn " " #_o "(" _s ",%1), %0\n\t" \ +- ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ +- : _tt(_t) \ +- : "r"(_a) \ +- : "r8") +- +-#define def_store_insn(_insn,_tt,_s,_t,_o,_a,_e) \ +- __asm__ __volatile__ ( \ +- "1:\t" #_insn " %0, " #_o "(" _s ",%1)\n\t" \ +- ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ +- : \ +- : _tt(_t), "r"(_a) \ +- : "r8") +- +-#define ldw(_s,_o,_a,_t,_e) def_load_insn(ldw,"=r",_s,_o,_a,_t,_e) +-#define stw(_s,_t,_o,_a,_e) def_store_insn(stw,"r",_s,_t,_o,_a,_e) +- +-#ifdef CONFIG_PREFETCH +-static inline void prefetch_src(const void *addr) +-{ +- __asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr)); +-} +- +-static inline void prefetch_dst(const void *addr) +-{ +- __asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr)); +-} +-#else +-#define prefetch_src(addr) do { } while(0) +-#define prefetch_dst(addr) do { } while(0) +-#endif +- +-#define PA_MEMCPY_OK 0 +-#define PA_MEMCPY_LOAD_ERROR 1 +-#define PA_MEMCPY_STORE_ERROR 2 +- +-/* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words +- * per loop. This code is derived from glibc. +- */ +-static noinline unsigned long copy_dstaligned(unsigned long dst, +- unsigned long src, unsigned long len) +-{ +- /* gcc complains that a2 and a3 may be uninitialized, but actually +- * they cannot be. Initialize a2/a3 to shut gcc up. +- */ +- register unsigned int a0, a1, a2 = 0, a3 = 0; +- int sh_1, sh_2; +- +- /* prefetch_src((const void *)src); */ +- +- /* Calculate how to shift a word read at the memory operation +- aligned srcp to make it aligned for copy. */ +- sh_1 = 8 * (src % sizeof(unsigned int)); +- sh_2 = 8 * sizeof(unsigned int) - sh_1; +- +- /* Make src aligned by rounding it down. */ +- src &= -sizeof(unsigned int); +- +- switch (len % 4) +- { +- case 2: +- /* a1 = ((unsigned int *) src)[0]; +- a2 = ((unsigned int *) src)[1]; */ +- ldw(s_space, 0, src, a1, cda_ldw_exc); +- ldw(s_space, 4, src, a2, cda_ldw_exc); +- src -= 1 * sizeof(unsigned int); +- dst -= 3 * sizeof(unsigned int); +- len += 2; +- goto do1; +- case 3: +- /* a0 = ((unsigned int *) src)[0]; +- a1 = ((unsigned int *) src)[1]; */ +- ldw(s_space, 0, src, a0, cda_ldw_exc); +- ldw(s_space, 4, src, a1, cda_ldw_exc); +- src -= 0 * sizeof(unsigned int); +- dst -= 2 * sizeof(unsigned int); +- len += 1; +- goto do2; +- case 0: +- if (len == 0) +- return PA_MEMCPY_OK; +- /* a3 = ((unsigned int *) src)[0]; +- a0 = ((unsigned int *) src)[1]; */ +- ldw(s_space, 0, src, a3, cda_ldw_exc); +- ldw(s_space, 4, src, a0, cda_ldw_exc); +- src -=-1 * sizeof(unsigned int); +- dst -= 1 * sizeof(unsigned int); +- len += 0; +- goto do3; +- case 1: +- /* a2 = ((unsigned int *) src)[0]; +- a3 = ((unsigned int *) src)[1]; */ +- ldw(s_space, 0, src, a2, cda_ldw_exc); +- ldw(s_space, 4, src, a3, cda_ldw_exc); +- src -=-2 * sizeof(unsigned int); +- dst -= 0 * sizeof(unsigned int); +- len -= 1; +- if (len == 0) +- goto do0; +- goto do4; /* No-op. */ +- } +- +- do +- { +- /* prefetch_src((const void *)(src + 4 * sizeof(unsigned int))); */ +-do4: +- /* a0 = ((unsigned int *) src)[0]; */ +- ldw(s_space, 0, src, a0, cda_ldw_exc); +- /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */ +- stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc); +-do3: +- /* a1 = ((unsigned int *) src)[1]; */ +- ldw(s_space, 4, src, a1, cda_ldw_exc); +- /* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */ +- stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc); +-do2: +- /* a2 = ((unsigned int *) src)[2]; */ +- ldw(s_space, 8, src, a2, cda_ldw_exc); +- /* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */ +- stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc); +-do1: +- /* a3 = ((unsigned int *) src)[3]; */ +- ldw(s_space, 12, src, a3, cda_ldw_exc); +- /* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */ +- stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc); +- +- src += 4 * sizeof(unsigned int); +- dst += 4 * sizeof(unsigned int); +- len -= 4; +- } +- while (len != 0); +- +-do0: +- /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */ +- stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc); +- +- preserve_branch(handle_load_error); +- preserve_branch(handle_store_error); +- +- return PA_MEMCPY_OK; +- +-handle_load_error: +- __asm__ __volatile__ ("cda_ldw_exc:\n"); +- return PA_MEMCPY_LOAD_ERROR; +- +-handle_store_error: +- __asm__ __volatile__ ("cda_stw_exc:\n"); +- return PA_MEMCPY_STORE_ERROR; +-} +- +- +-/* Returns PA_MEMCPY_OK, PA_MEMCPY_LOAD_ERROR or PA_MEMCPY_STORE_ERROR. +- * In case of an access fault the faulty address can be read from the per_cpu +- * exception data struct. */ +-static noinline unsigned long pa_memcpy_internal(void *dstp, const void *srcp, +- unsigned long len) +-{ +- register unsigned long src, dst, t1, t2, t3; +- register unsigned char *pcs, *pcd; +- register unsigned int *pws, *pwd; +- register double *pds, *pdd; +- unsigned long ret; +- +- src = (unsigned long)srcp; +- dst = (unsigned long)dstp; +- pcs = (unsigned char *)srcp; +- pcd = (unsigned char *)dstp; +- +- /* prefetch_src((const void *)srcp); */ +- +- if (len < THRESHOLD) +- goto byte_copy; +- +- /* Check alignment */ +- t1 = (src ^ dst); +- if (unlikely(t1 & (sizeof(double)-1))) +- goto unaligned_copy; +- +- /* src and dst have same alignment. */ +- +- /* Copy bytes till we are double-aligned. */ +- t2 = src & (sizeof(double) - 1); +- if (unlikely(t2 != 0)) { +- t2 = sizeof(double) - t2; +- while (t2 && len) { +- /* *pcd++ = *pcs++; */ +- ldbma(s_space, pcs, t3, pmc_load_exc); +- len--; +- stbma(d_space, t3, pcd, pmc_store_exc); +- t2--; +- } +- } +- +- pds = (double *)pcs; +- pdd = (double *)pcd; +- +-#if 0 +- /* Copy 8 doubles at a time */ +- while (len >= 8*sizeof(double)) { +- register double r1, r2, r3, r4, r5, r6, r7, r8; +- /* prefetch_src((char *)pds + L1_CACHE_BYTES); */ +- flddma(s_space, pds, r1, pmc_load_exc); +- flddma(s_space, pds, r2, pmc_load_exc); +- flddma(s_space, pds, r3, pmc_load_exc); +- flddma(s_space, pds, r4, pmc_load_exc); +- fstdma(d_space, r1, pdd, pmc_store_exc); +- fstdma(d_space, r2, pdd, pmc_store_exc); +- fstdma(d_space, r3, pdd, pmc_store_exc); +- fstdma(d_space, r4, pdd, pmc_store_exc); +- +-#if 0 +- if (L1_CACHE_BYTES <= 32) +- prefetch_src((char *)pds + L1_CACHE_BYTES); +-#endif +- flddma(s_space, pds, r5, pmc_load_exc); +- flddma(s_space, pds, r6, pmc_load_exc); +- flddma(s_space, pds, r7, pmc_load_exc); +- flddma(s_space, pds, r8, pmc_load_exc); +- fstdma(d_space, r5, pdd, pmc_store_exc); +- fstdma(d_space, r6, pdd, pmc_store_exc); +- fstdma(d_space, r7, pdd, pmc_store_exc); +- fstdma(d_space, r8, pdd, pmc_store_exc); +- len -= 8*sizeof(double); +- } +-#endif +- +- pws = (unsigned int *)pds; +- pwd = (unsigned int *)pdd; +- +-word_copy: +- while (len >= 8*sizeof(unsigned int)) { +- register unsigned int r1,r2,r3,r4,r5,r6,r7,r8; +- /* prefetch_src((char *)pws + L1_CACHE_BYTES); */ +- ldwma(s_space, pws, r1, pmc_load_exc); +- ldwma(s_space, pws, r2, pmc_load_exc); +- ldwma(s_space, pws, r3, pmc_load_exc); +- ldwma(s_space, pws, r4, pmc_load_exc); +- stwma(d_space, r1, pwd, pmc_store_exc); +- stwma(d_space, r2, pwd, pmc_store_exc); +- stwma(d_space, r3, pwd, pmc_store_exc); +- stwma(d_space, r4, pwd, pmc_store_exc); +- +- ldwma(s_space, pws, r5, pmc_load_exc); +- ldwma(s_space, pws, r6, pmc_load_exc); +- ldwma(s_space, pws, r7, pmc_load_exc); +- ldwma(s_space, pws, r8, pmc_load_exc); +- stwma(d_space, r5, pwd, pmc_store_exc); +- stwma(d_space, r6, pwd, pmc_store_exc); +- stwma(d_space, r7, pwd, pmc_store_exc); +- stwma(d_space, r8, pwd, pmc_store_exc); +- len -= 8*sizeof(unsigned int); +- } +- +- while (len >= 4*sizeof(unsigned int)) { +- register unsigned int r1,r2,r3,r4; +- ldwma(s_space, pws, r1, pmc_load_exc); +- ldwma(s_space, pws, r2, pmc_load_exc); +- ldwma(s_space, pws, r3, pmc_load_exc); +- ldwma(s_space, pws, r4, pmc_load_exc); +- stwma(d_space, r1, pwd, pmc_store_exc); +- stwma(d_space, r2, pwd, pmc_store_exc); +- stwma(d_space, r3, pwd, pmc_store_exc); +- stwma(d_space, r4, pwd, pmc_store_exc); +- len -= 4*sizeof(unsigned int); +- } +- +- pcs = (unsigned char *)pws; +- pcd = (unsigned char *)pwd; +- +-byte_copy: +- while (len) { +- /* *pcd++ = *pcs++; */ +- ldbma(s_space, pcs, t3, pmc_load_exc); +- stbma(d_space, t3, pcd, pmc_store_exc); +- len--; +- } +- +- return PA_MEMCPY_OK; +- +-unaligned_copy: +- /* possibly we are aligned on a word, but not on a double... */ +- if (likely((t1 & (sizeof(unsigned int)-1)) == 0)) { +- t2 = src & (sizeof(unsigned int) - 1); +- +- if (unlikely(t2 != 0)) { +- t2 = sizeof(unsigned int) - t2; +- while (t2) { +- /* *pcd++ = *pcs++; */ +- ldbma(s_space, pcs, t3, pmc_load_exc); +- stbma(d_space, t3, pcd, pmc_store_exc); +- len--; +- t2--; +- } +- } +- +- pws = (unsigned int *)pcs; +- pwd = (unsigned int *)pcd; +- goto word_copy; +- } +- +- /* Align the destination. */ +- if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) { +- t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1)); +- while (t2) { +- /* *pcd++ = *pcs++; */ +- ldbma(s_space, pcs, t3, pmc_load_exc); +- stbma(d_space, t3, pcd, pmc_store_exc); +- len--; +- t2--; +- } +- dst = (unsigned long)pcd; +- src = (unsigned long)pcs; +- } +- +- ret = copy_dstaligned(dst, src, len / sizeof(unsigned int)); +- if (ret) +- return ret; +- +- pcs += (len & -sizeof(unsigned int)); +- pcd += (len & -sizeof(unsigned int)); +- len %= sizeof(unsigned int); +- +- preserve_branch(handle_load_error); +- preserve_branch(handle_store_error); +- +- goto byte_copy; +- +-handle_load_error: +- __asm__ __volatile__ ("pmc_load_exc:\n"); +- return PA_MEMCPY_LOAD_ERROR; +- +-handle_store_error: +- __asm__ __volatile__ ("pmc_store_exc:\n"); +- return PA_MEMCPY_STORE_ERROR; +-} +- +- + /* Returns 0 for success, otherwise, returns number of bytes not transferred. */ +-static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) +-{ +- unsigned long ret, fault_addr, reference; +- struct exception_data *d; +- +- ret = pa_memcpy_internal(dstp, srcp, len); +- if (likely(ret == PA_MEMCPY_OK)) +- return 0; +- +- /* if a load or store fault occured we can get the faulty addr */ +- d = this_cpu_ptr(&exception_data); +- fault_addr = d->fault_addr; +- +- /* error in load or store? */ +- if (ret == PA_MEMCPY_LOAD_ERROR) +- reference = (unsigned long) srcp; +- else +- reference = (unsigned long) dstp; ++extern unsigned long pa_memcpy(void *dst, const void *src, ++ unsigned long len); + +- DPRINTF("pa_memcpy: fault type = %lu, len=%lu fault_addr=%lu ref=%lu\n", +- ret, len, fault_addr, reference); +- +- if (fault_addr >= reference) +- return len - (fault_addr - reference); +- else +- return len; +-} +- +-#ifdef __KERNEL__ + unsigned long __copy_to_user(void __user *dst, const void *src, + unsigned long len) + { +@@ -537,5 +84,3 @@ long probe_kernel_read(void *dst, const void *src, size_t size) + + return __probe_kernel_read(dst, src, size); + } +- +-#endif +diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c +index 1a0b4f63f0e9..040c48fc5391 100644 +--- a/arch/parisc/mm/fault.c ++++ b/arch/parisc/mm/fault.c +@@ -149,6 +149,23 @@ int fixup_exception(struct pt_regs *regs) + d->fault_space = regs->isr; + d->fault_addr = regs->ior; + ++ /* ++ * Fix up get_user() and put_user(). ++ * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant ++ * bit in the relative address of the fixup routine to indicate ++ * that %r8 should be loaded with -EFAULT to report a userspace ++ * access error. ++ */ ++ if (fix->fixup & 1) { ++ regs->gr[8] = -EFAULT; ++ ++ /* zero target register for get_user() */ ++ if (parisc_acctyp(0, regs->iir) == VM_READ) { ++ int treg = regs->iir & 0x1f; ++ regs->gr[treg] = 0; ++ } ++ } ++ + regs->iaoq[0] = (unsigned long)&fix->fixup + fix->fixup; + regs->iaoq[0] &= ~3; + /* +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index c989e67dcc9d..9764463ce833 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -10027,7 +10027,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, + { + struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 exec_control; +- bool nested_ept_enabled = false; + + vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); + vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); +@@ -10192,7 +10191,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, + vmcs12->guest_intr_status); + } + +- nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0; + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); + } + +@@ -10344,7 +10342,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, + vmx_set_efer(vcpu, vcpu->arch.efer); + + /* Shadow page tables on either EPT or shadow page tables. */ +- if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_ept_enabled, ++ if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12), + entry_failure_code)) + return 1; + +diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S +index 779782f58324..9a53a06e5a3e 100644 +--- a/arch/x86/lib/memcpy_64.S ++++ b/arch/x86/lib/memcpy_64.S +@@ -290,7 +290,7 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) + _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) +- _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) ++ _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) +diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c +index 887e57182716..aed206475aa7 100644 +--- a/arch/x86/mm/kaslr.c ++++ b/arch/x86/mm/kaslr.c +@@ -48,7 +48,7 @@ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; + #if defined(CONFIG_X86_ESPFIX64) + static const unsigned long vaddr_end = ESPFIX_BASE_ADDR; + #elif defined(CONFIG_EFI) +-static const unsigned long vaddr_end = EFI_VA_START; ++static const unsigned long vaddr_end = EFI_VA_END; + #else + static const unsigned long vaddr_end = __START_KERNEL_map; + #endif +@@ -105,7 +105,7 @@ void __init kernel_randomize_memory(void) + */ + BUILD_BUG_ON(vaddr_start >= vaddr_end); + BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) && +- vaddr_end >= EFI_VA_START); ++ vaddr_end >= EFI_VA_END); + BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) || + IS_ENABLED(CONFIG_EFI)) && + vaddr_end >= __START_KERNEL_map); +diff --git a/block/bio.c b/block/bio.c +index 2b375020fc49..17ece5b40a2f 100644 +--- a/block/bio.c ++++ b/block/bio.c +@@ -376,10 +376,14 @@ static void punt_bios_to_rescuer(struct bio_set *bs) + bio_list_init(&punt); + bio_list_init(&nopunt); + +- while ((bio = bio_list_pop(current->bio_list))) ++ while ((bio = bio_list_pop(¤t->bio_list[0]))) + bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); ++ current->bio_list[0] = nopunt; + +- *current->bio_list = nopunt; ++ bio_list_init(&nopunt); ++ while ((bio = bio_list_pop(¤t->bio_list[1]))) ++ bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); ++ current->bio_list[1] = nopunt; + + spin_lock(&bs->rescue_lock); + bio_list_merge(&bs->rescue_list, &punt); +@@ -466,7 +470,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) + * we retry with the original gfp_flags. + */ + +- if (current->bio_list && !bio_list_empty(current->bio_list)) ++ if (current->bio_list && ++ (!bio_list_empty(¤t->bio_list[0]) || ++ !bio_list_empty(¤t->bio_list[1]))) + gfp_mask &= ~__GFP_DIRECT_RECLAIM; + + p = mempool_alloc(bs->bio_pool, gfp_mask); +diff --git a/block/blk-core.c b/block/blk-core.c +index 61ba08c58b64..9734b5d0d932 100644 +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -1977,7 +1977,14 @@ generic_make_request_checks(struct bio *bio) + */ + blk_qc_t generic_make_request(struct bio *bio) + { +- struct bio_list bio_list_on_stack; ++ /* ++ * bio_list_on_stack[0] contains bios submitted by the current ++ * make_request_fn. ++ * bio_list_on_stack[1] contains bios that were submitted before ++ * the current make_request_fn, but that haven't been processed ++ * yet. ++ */ ++ struct bio_list bio_list_on_stack[2]; + blk_qc_t ret = BLK_QC_T_NONE; + + if (!generic_make_request_checks(bio)) +@@ -1994,7 +2001,7 @@ blk_qc_t generic_make_request(struct bio *bio) + * should be added at the tail + */ + if (current->bio_list) { +- bio_list_add(current->bio_list, bio); ++ bio_list_add(¤t->bio_list[0], bio); + goto out; + } + +@@ -2013,23 +2020,39 @@ blk_qc_t generic_make_request(struct bio *bio) + * bio_list, and call into ->make_request() again. + */ + BUG_ON(bio->bi_next); +- bio_list_init(&bio_list_on_stack); +- current->bio_list = &bio_list_on_stack; ++ bio_list_init(&bio_list_on_stack[0]); ++ current->bio_list = bio_list_on_stack; + do { + struct request_queue *q = bdev_get_queue(bio->bi_bdev); + + if (likely(blk_queue_enter(q, false) == 0)) { ++ struct bio_list lower, same; ++ ++ /* Create a fresh bio_list for all subordinate requests */ ++ bio_list_on_stack[1] = bio_list_on_stack[0]; ++ bio_list_init(&bio_list_on_stack[0]); + ret = q->make_request_fn(q, bio); + + blk_queue_exit(q); + +- bio = bio_list_pop(current->bio_list); ++ /* sort new bios into those for a lower level ++ * and those for the same level ++ */ ++ bio_list_init(&lower); ++ bio_list_init(&same); ++ while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) ++ if (q == bdev_get_queue(bio->bi_bdev)) ++ bio_list_add(&same, bio); ++ else ++ bio_list_add(&lower, bio); ++ /* now assemble so we handle the lowest level first */ ++ bio_list_merge(&bio_list_on_stack[0], &lower); ++ bio_list_merge(&bio_list_on_stack[0], &same); ++ bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); + } else { +- struct bio *bio_next = bio_list_pop(current->bio_list); +- + bio_io_error(bio); +- bio = bio_next; + } ++ bio = bio_list_pop(&bio_list_on_stack[0]); + } while (bio); + current->bio_list = NULL; /* deactivate */ + +diff --git a/crypto/lrw.c b/crypto/lrw.c +index ecd8474018e3..3ea095adafd9 100644 +--- a/crypto/lrw.c ++++ b/crypto/lrw.c +@@ -286,8 +286,11 @@ static int init_crypt(struct skcipher_request *req, crypto_completion_t done) + + subreq->cryptlen = LRW_BUFFER_SIZE; + if (req->cryptlen > LRW_BUFFER_SIZE) { +- subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE); +- rctx->ext = kmalloc(subreq->cryptlen, gfp); ++ unsigned int n = min(req->cryptlen, (unsigned int)PAGE_SIZE); ++ ++ rctx->ext = kmalloc(n, gfp); ++ if (rctx->ext) ++ subreq->cryptlen = n; + } + + rctx->src = req->src; +diff --git a/crypto/xts.c b/crypto/xts.c +index baeb34dd8582..c976bfac29da 100644 +--- a/crypto/xts.c ++++ b/crypto/xts.c +@@ -230,8 +230,11 @@ static int init_crypt(struct skcipher_request *req, crypto_completion_t done) + + subreq->cryptlen = XTS_BUFFER_SIZE; + if (req->cryptlen > XTS_BUFFER_SIZE) { +- subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE); +- rctx->ext = kmalloc(subreq->cryptlen, gfp); ++ unsigned int n = min(req->cryptlen, (unsigned int)PAGE_SIZE); ++ ++ rctx->ext = kmalloc(n, gfp); ++ if (rctx->ext) ++ subreq->cryptlen = n; + } + + rctx->src = req->src; +diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile +index 9ed087853dee..4c5678cfa9c4 100644 +--- a/drivers/acpi/Makefile ++++ b/drivers/acpi/Makefile +@@ -2,7 +2,6 @@ + # Makefile for the Linux ACPI interpreter + # + +-ccflags-y := -Os + ccflags-$(CONFIG_ACPI_DEBUG) += -DACPI_DEBUG_OUTPUT + + # +diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c +index b4c1a6a51da4..03250e1f1103 100644 +--- a/drivers/acpi/acpi_platform.c ++++ b/drivers/acpi/acpi_platform.c +@@ -25,9 +25,11 @@ + ACPI_MODULE_NAME("platform"); + + static const struct acpi_device_id forbidden_id_list[] = { +- {"PNP0000", 0}, /* PIC */ +- {"PNP0100", 0}, /* Timer */ +- {"PNP0200", 0}, /* AT DMA Controller */ ++ {"PNP0000", 0}, /* PIC */ ++ {"PNP0100", 0}, /* Timer */ ++ {"PNP0200", 0}, /* AT DMA Controller */ ++ {"ACPI0009", 0}, /* IOxAPIC */ ++ {"ACPI000A", 0}, /* IOAPIC */ + {"", 0}, + }; + +diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c +index 612898b4aaad..3422f203455d 100644 +--- a/drivers/crypto/ccp/ccp-dev-v5.c ++++ b/drivers/crypto/ccp/ccp-dev-v5.c +@@ -1014,6 +1014,7 @@ const struct ccp_vdata ccpv5a = { + + const struct ccp_vdata ccpv5b = { + .version = CCP_VERSION(5, 0), ++ .dma_chan_attr = DMA_PRIVATE, + .setup = ccp5other_config, + .perform = &ccp5_actions, + .bar = 2, +diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h +index 649e5610a5ce..cd9a7051da3c 100644 +--- a/drivers/crypto/ccp/ccp-dev.h ++++ b/drivers/crypto/ccp/ccp-dev.h +@@ -179,6 +179,10 @@ + + /* ------------------------ General CCP Defines ------------------------ */ + ++#define CCP_DMA_DFLT 0x0 ++#define CCP_DMA_PRIV 0x1 ++#define CCP_DMA_PUB 0x2 ++ + #define CCP_DMAPOOL_MAX_SIZE 64 + #define CCP_DMAPOOL_ALIGN BIT(5) + +@@ -635,6 +639,7 @@ struct ccp_actions { + /* Structure to hold CCP version-specific values */ + struct ccp_vdata { + const unsigned int version; ++ const unsigned int dma_chan_attr; + void (*setup)(struct ccp_device *); + const struct ccp_actions *perform; + const unsigned int bar; +diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c +index 8d0eeb46d4a2..e00be01fbf5a 100644 +--- a/drivers/crypto/ccp/ccp-dmaengine.c ++++ b/drivers/crypto/ccp/ccp-dmaengine.c +@@ -10,6 +10,7 @@ + * published by the Free Software Foundation. + */ + ++#include <linux/module.h> + #include <linux/kernel.h> + #include <linux/dmaengine.h> + #include <linux/spinlock.h> +@@ -25,6 +26,37 @@ + (mask == 0) ? 64 : fls64(mask); \ + }) + ++/* The CCP as a DMA provider can be configured for public or private ++ * channels. Default is specified in the vdata for the device (PCI ID). ++ * This module parameter will override for all channels on all devices: ++ * dma_chan_attr = 0x2 to force all channels public ++ * = 0x1 to force all channels private ++ * = 0x0 to defer to the vdata setting ++ * = any other value: warning, revert to 0x0 ++ */ ++static unsigned int dma_chan_attr = CCP_DMA_DFLT; ++module_param(dma_chan_attr, uint, 0444); ++MODULE_PARM_DESC(dma_chan_attr, "Set DMA channel visibility: 0 (default) = device defaults, 1 = make private, 2 = make public"); ++ ++unsigned int ccp_get_dma_chan_attr(struct ccp_device *ccp) ++{ ++ switch (dma_chan_attr) { ++ case CCP_DMA_DFLT: ++ return ccp->vdata->dma_chan_attr; ++ ++ case CCP_DMA_PRIV: ++ return DMA_PRIVATE; ++ ++ case CCP_DMA_PUB: ++ return 0; ++ ++ default: ++ dev_info_once(ccp->dev, "Invalid value for dma_chan_attr: %d\n", ++ dma_chan_attr); ++ return ccp->vdata->dma_chan_attr; ++ } ++} ++ + static void ccp_free_cmd_resources(struct ccp_device *ccp, + struct list_head *list) + { +@@ -675,6 +707,15 @@ int ccp_dmaengine_register(struct ccp_device *ccp) + dma_cap_set(DMA_SG, dma_dev->cap_mask); + dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask); + ++ /* The DMA channels for this device can be set to public or private, ++ * and overridden by the module parameter dma_chan_attr. ++ * Default: according to the value in vdata (dma_chan_attr=0) ++ * dma_chan_attr=0x1: all channels private (override vdata) ++ * dma_chan_attr=0x2: all channels public (override vdata) ++ */ ++ if (ccp_get_dma_chan_attr(ccp) == DMA_PRIVATE) ++ dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask); ++ + INIT_LIST_HEAD(&dma_dev->channels); + for (i = 0; i < ccp->cmd_q_count; i++) { + chan = ccp->ccp_dma_chan + i; +diff --git a/drivers/gpu/drm/armada/Makefile b/drivers/gpu/drm/armada/Makefile +index a18f156c8b66..64c0b4546fb2 100644 +--- a/drivers/gpu/drm/armada/Makefile ++++ b/drivers/gpu/drm/armada/Makefile +@@ -4,3 +4,5 @@ armada-y += armada_510.o + armada-$(CONFIG_DEBUG_FS) += armada_debugfs.o + + obj-$(CONFIG_DRM_ARMADA) := armada.o ++ ++CFLAGS_armada_trace.o := -I$(src) +diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +index 0a67124bb2a4..db0a43a090d0 100644 +--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c ++++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +@@ -1303,6 +1303,8 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, + goto out_pm_put; + } + ++ mutex_lock(&gpu->lock); ++ + fence = etnaviv_gpu_fence_alloc(gpu); + if (!fence) { + event_free(gpu, event); +@@ -1310,8 +1312,6 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, + goto out_pm_put; + } + +- mutex_lock(&gpu->lock); +- + gpu->event[event].fence = fence; + submit->fence = fence->seqno; + gpu->active_fence = submit->fence; +diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c +index 3f656e3a6e5a..325cb9b55989 100644 +--- a/drivers/gpu/drm/i915/gvt/kvmgt.c ++++ b/drivers/gpu/drm/i915/gvt/kvmgt.c +@@ -1334,6 +1334,7 @@ static int kvmgt_guest_init(struct mdev_device *mdev) + vgpu->handle = (unsigned long)info; + info->vgpu = vgpu; + info->kvm = kvm; ++ kvm_get_kvm(info->kvm); + + kvmgt_protect_table_init(info); + gvt_cache_init(vgpu); +@@ -1353,6 +1354,7 @@ static bool kvmgt_guest_exit(struct kvmgt_guest_info *info) + } + + kvm_page_track_unregister_notifier(info->kvm, &info->track_node); ++ kvm_put_kvm(info->kvm); + kvmgt_protect_table_destroy(info); + gvt_cache_destroy(info->vgpu); + vfree(info); +diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c +index b4bde1452f2a..6924a8e79da9 100644 +--- a/drivers/gpu/drm/i915/i915_gem_gtt.c ++++ b/drivers/gpu/drm/i915/i915_gem_gtt.c +@@ -735,10 +735,9 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, + GEM_BUG_ON(pte_end > GEN8_PTES); + + bitmap_clear(pt->used_ptes, pte, num_entries); +- +- if (bitmap_empty(pt->used_ptes, GEN8_PTES)) { +- free_pt(to_i915(vm->dev), pt); +- return true; ++ if (USES_FULL_PPGTT(vm->i915)) { ++ if (bitmap_empty(pt->used_ptes, GEN8_PTES)) ++ return true; + } + + pt_vaddr = kmap_px(pt); +@@ -775,13 +774,12 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, + pde_vaddr = kmap_px(pd); + pde_vaddr[pde] = scratch_pde; + kunmap_px(ppgtt, pde_vaddr); ++ free_pt(to_i915(vm->dev), pt); + } + } + +- if (bitmap_empty(pd->used_pdes, I915_PDES)) { +- free_pd(to_i915(vm->dev), pd); ++ if (bitmap_empty(pd->used_pdes, I915_PDES)) + return true; +- } + + return false; + } +@@ -795,7 +793,6 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, + uint64_t length) + { + struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); +- struct drm_i915_private *dev_priv = to_i915(vm->dev); + struct i915_page_directory *pd; + uint64_t pdpe; + gen8_ppgtt_pdpe_t *pdpe_vaddr; +@@ -813,16 +810,14 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, + pdpe_vaddr[pdpe] = scratch_pdpe; + kunmap_px(ppgtt, pdpe_vaddr); + } ++ free_pd(to_i915(vm->dev), pd); + } + } + + mark_tlbs_dirty(ppgtt); + +- if (USES_FULL_48BIT_PPGTT(dev_priv) && +- bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv))) { +- free_pdp(dev_priv, pdp); ++ if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv))) + return true; +- } + + return false; + } +@@ -836,6 +831,7 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, + uint64_t start, + uint64_t length) + { ++ struct drm_i915_private *dev_priv = to_i915(vm->dev); + struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct i915_page_directory_pointer *pdp; + uint64_t pml4e; +@@ -854,6 +850,7 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, + pml4e_vaddr = kmap_px(pml4); + pml4e_vaddr[pml4e] = scratch_pml4e; + kunmap_px(ppgtt, pml4e_vaddr); ++ free_pdp(dev_priv, pdp); + } + } + } +diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c +index beabc17e7c8a..2af4522d60e6 100644 +--- a/drivers/gpu/drm/i915/intel_lrc.c ++++ b/drivers/gpu/drm/i915/intel_lrc.c +@@ -362,7 +362,8 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) + static u64 execlists_update_context(struct drm_i915_gem_request *rq) + { + struct intel_context *ce = &rq->ctx->engine[rq->engine->id]; +- struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt; ++ struct i915_hw_ppgtt *ppgtt = ++ rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; + u32 *reg_state = ce->lrc_reg_state; + + reg_state[CTX_RING_TAIL+1] = rq->tail; +diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c +index 0cf03ccbf0a7..445a907552c1 100644 +--- a/drivers/gpu/drm/radeon/radeon_ttm.c ++++ b/drivers/gpu/drm/radeon/radeon_ttm.c +@@ -213,8 +213,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo, + rbo->placement.num_busy_placement = 0; + for (i = 0; i < rbo->placement.num_placement; i++) { + if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) { +- if (rbo->placements[0].fpfn < fpfn) +- rbo->placements[0].fpfn = fpfn; ++ if (rbo->placements[i].fpfn < fpfn) ++ rbo->placements[i].fpfn = fpfn; + } else { + rbo->placement.busy_placement = + &rbo->placements[i]; +diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c +index 7aadce1f7e7a..c7e6c9839c9a 100644 +--- a/drivers/gpu/drm/vc4/vc4_crtc.c ++++ b/drivers/gpu/drm/vc4/vc4_crtc.c +@@ -842,6 +842,17 @@ static void vc4_crtc_destroy_state(struct drm_crtc *crtc, + drm_atomic_helper_crtc_destroy_state(crtc, state); + } + ++static void ++vc4_crtc_reset(struct drm_crtc *crtc) ++{ ++ if (crtc->state) ++ __drm_atomic_helper_crtc_destroy_state(crtc->state); ++ ++ crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL); ++ if (crtc->state) ++ crtc->state->crtc = crtc; ++} ++ + static const struct drm_crtc_funcs vc4_crtc_funcs = { + .set_config = drm_atomic_helper_set_config, + .destroy = vc4_crtc_destroy, +@@ -849,7 +860,7 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = { + .set_property = NULL, + .cursor_set = NULL, /* handled by drm_mode_cursor_universal */ + .cursor_move = NULL, /* handled by drm_mode_cursor_universal */ +- .reset = drm_atomic_helper_crtc_reset, ++ .reset = vc4_crtc_reset, + .atomic_duplicate_state = vc4_crtc_duplicate_state, + .atomic_destroy_state = vc4_crtc_destroy_state, + .gamma_set = vc4_crtc_gamma_set, +diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c +index 8aeca038cc73..5f282bb0ea10 100644 +--- a/drivers/hid/wacom_sys.c ++++ b/drivers/hid/wacom_sys.c +@@ -2081,6 +2081,14 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) + + wacom_update_name(wacom, wireless ? " (WL)" : ""); + ++ /* pen only Bamboo neither support touch nor pad */ ++ if ((features->type == BAMBOO_PEN) && ++ ((features->device_type & WACOM_DEVICETYPE_TOUCH) || ++ (features->device_type & WACOM_DEVICETYPE_PAD))) { ++ error = -ENODEV; ++ goto fail; ++ } ++ + error = wacom_add_shared_data(hdev); + if (error) + goto fail; +@@ -2128,14 +2136,6 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) + goto fail_quirks; + } + +- /* pen only Bamboo neither support touch nor pad */ +- if ((features->type == BAMBOO_PEN) && +- ((features->device_type & WACOM_DEVICETYPE_TOUCH) || +- (features->device_type & WACOM_DEVICETYPE_PAD))) { +- error = -ENODEV; +- goto fail_quirks; +- } +- + if (features->device_type & WACOM_DEVICETYPE_WL_MONITOR) + error = hid_hw_open(hdev); + +diff --git a/drivers/md/dm.c b/drivers/md/dm.c +index 0ff5469c03d2..b78bc2916664 100644 +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -986,26 +986,29 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule) + struct dm_offload *o = container_of(cb, struct dm_offload, cb); + struct bio_list list; + struct bio *bio; ++ int i; + + INIT_LIST_HEAD(&o->cb.list); + + if (unlikely(!current->bio_list)) + return; + +- list = *current->bio_list; +- bio_list_init(current->bio_list); +- +- while ((bio = bio_list_pop(&list))) { +- struct bio_set *bs = bio->bi_pool; +- if (unlikely(!bs) || bs == fs_bio_set) { +- bio_list_add(current->bio_list, bio); +- continue; ++ for (i = 0; i < 2; i++) { ++ list = current->bio_list[i]; ++ bio_list_init(¤t->bio_list[i]); ++ ++ while ((bio = bio_list_pop(&list))) { ++ struct bio_set *bs = bio->bi_pool; ++ if (unlikely(!bs) || bs == fs_bio_set) { ++ bio_list_add(¤t->bio_list[i], bio); ++ continue; ++ } ++ ++ spin_lock(&bs->rescue_lock); ++ bio_list_add(&bs->rescue_list, bio); ++ queue_work(bs->rescue_workqueue, &bs->rescue_work); ++ spin_unlock(&bs->rescue_lock); + } +- +- spin_lock(&bs->rescue_lock); +- bio_list_add(&bs->rescue_list, bio); +- queue_work(bs->rescue_workqueue, &bs->rescue_work); +- spin_unlock(&bs->rescue_lock); + } + } + +diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c +index 87f14080c2cd..41693890e2b8 100644 +--- a/drivers/md/raid10.c ++++ b/drivers/md/raid10.c +@@ -974,7 +974,8 @@ static void wait_barrier(struct r10conf *conf) + !conf->barrier || + (atomic_read(&conf->nr_pending) && + current->bio_list && +- !bio_list_empty(current->bio_list)), ++ (!bio_list_empty(¤t->bio_list[0]) || ++ !bio_list_empty(¤t->bio_list[1]))), + conf->resync_lock); + conf->nr_waiting--; + if (!conf->nr_waiting) +diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c +index 7fd964256faa..d5430ed02a67 100644 +--- a/drivers/mmc/host/sdhci-of-at91.c ++++ b/drivers/mmc/host/sdhci-of-at91.c +@@ -29,6 +29,8 @@ + + #include "sdhci-pltfm.h" + ++#define SDMMC_MC1R 0x204 ++#define SDMMC_MC1R_DDR BIT(3) + #define SDMMC_CACR 0x230 + #define SDMMC_CACR_CAPWREN BIT(0) + #define SDMMC_CACR_KEY (0x46 << 8) +@@ -103,11 +105,18 @@ static void sdhci_at91_set_power(struct sdhci_host *host, unsigned char mode, + sdhci_set_power_noreg(host, mode, vdd); + } + ++void sdhci_at91_set_uhs_signaling(struct sdhci_host *host, unsigned int timing) ++{ ++ if (timing == MMC_TIMING_MMC_DDR52) ++ sdhci_writeb(host, SDMMC_MC1R_DDR, SDMMC_MC1R); ++ sdhci_set_uhs_signaling(host, timing); ++} ++ + static const struct sdhci_ops sdhci_at91_sama5d2_ops = { + .set_clock = sdhci_at91_set_clock, + .set_bus_width = sdhci_set_bus_width, + .reset = sdhci_reset, +- .set_uhs_signaling = sdhci_set_uhs_signaling, ++ .set_uhs_signaling = sdhci_at91_set_uhs_signaling, + .set_power = sdhci_at91_set_power, + }; + +diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c +index d0819d18ad08..d2a4adc50a84 100644 +--- a/drivers/mmc/host/sdhci.c ++++ b/drivers/mmc/host/sdhci.c +@@ -1830,6 +1830,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable) + struct sdhci_host *host = mmc_priv(mmc); + unsigned long flags; + ++ if (enable) ++ pm_runtime_get_noresume(host->mmc->parent); ++ + spin_lock_irqsave(&host->lock, flags); + if (enable) + host->flags |= SDHCI_SDIO_IRQ_ENABLED; +@@ -1838,6 +1841,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable) + + sdhci_enable_sdio_irq_nolock(host, enable); + spin_unlock_irqrestore(&host->lock, flags); ++ ++ if (!enable) ++ pm_runtime_put_noidle(host->mmc->parent); + } + + static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc, +diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c +index 8a3c3e32a704..3818ff609d55 100644 +--- a/drivers/nvme/host/core.c ++++ b/drivers/nvme/host/core.c +@@ -2034,9 +2034,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) + * Revalidating a dead namespace sets capacity to 0. This will + * end buffered writers dirtying pages that can't be synced. + */ +- if (ns->disk && !test_and_set_bit(NVME_NS_DEAD, &ns->flags)) +- revalidate_disk(ns->disk); +- ++ if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags)) ++ continue; ++ revalidate_disk(ns->disk); + blk_set_queue_dying(ns->queue); + blk_mq_abort_requeue_list(ns->queue); + blk_mq_start_stopped_hw_queues(ns->queue, true); +diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c +index 3faefabf339c..410c3d15b0cb 100644 +--- a/drivers/nvme/host/pci.c ++++ b/drivers/nvme/host/pci.c +@@ -1990,8 +1990,10 @@ static void nvme_remove(struct pci_dev *pdev) + + pci_set_drvdata(pdev, NULL); + +- if (!pci_device_is_present(pdev)) ++ if (!pci_device_is_present(pdev)) { + nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD); ++ nvme_dev_disable(dev, false); ++ } + + flush_work(&dev->reset_work); + nvme_uninit_ctrl(&dev->ctrl); +diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c +index af722eb0ca75..e354010fb006 100644 +--- a/drivers/pci/host/pci-thunder-pem.c ++++ b/drivers/pci/host/pci-thunder-pem.c +@@ -331,7 +331,7 @@ static int thunder_pem_acpi_init(struct pci_config_window *cfg) + if (!res_pem) + return -ENOMEM; + +- ret = acpi_get_rc_resources(dev, "THRX0002", root->segment, res_pem); ++ ret = acpi_get_rc_resources(dev, "CAVA02B", root->segment, res_pem); + if (ret) { + dev_err(dev, "can't get rc base address\n"); + return ret; +diff --git a/drivers/pci/host/pcie-iproc-bcma.c b/drivers/pci/host/pcie-iproc-bcma.c +index bd4c9ec25edc..384c27e664fe 100644 +--- a/drivers/pci/host/pcie-iproc-bcma.c ++++ b/drivers/pci/host/pcie-iproc-bcma.c +@@ -44,8 +44,7 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev) + { + struct device *dev = &bdev->dev; + struct iproc_pcie *pcie; +- LIST_HEAD(res); +- struct resource res_mem; ++ LIST_HEAD(resources); + int ret; + + pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); +@@ -63,22 +62,23 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev) + + pcie->base_addr = bdev->addr; + +- res_mem.start = bdev->addr_s[0]; +- res_mem.end = bdev->addr_s[0] + SZ_128M - 1; +- res_mem.name = "PCIe MEM space"; +- res_mem.flags = IORESOURCE_MEM; +- pci_add_resource(&res, &res_mem); ++ pcie->mem.start = bdev->addr_s[0]; ++ pcie->mem.end = bdev->addr_s[0] + SZ_128M - 1; ++ pcie->mem.name = "PCIe MEM space"; ++ pcie->mem.flags = IORESOURCE_MEM; ++ pci_add_resource(&resources, &pcie->mem); + + pcie->map_irq = iproc_pcie_bcma_map_irq; + +- ret = iproc_pcie_setup(pcie, &res); +- if (ret) ++ ret = iproc_pcie_setup(pcie, &resources); ++ if (ret) { + dev_err(dev, "PCIe controller setup failed\n"); +- +- pci_free_resource_list(&res); ++ pci_free_resource_list(&resources); ++ return ret; ++ } + + bcma_set_drvdata(bdev, pcie); +- return ret; ++ return 0; + } + + static void iproc_pcie_bcma_remove(struct bcma_device *bdev) +diff --git a/drivers/pci/host/pcie-iproc-platform.c b/drivers/pci/host/pcie-iproc-platform.c +index 22d814a78a78..f95564ac37df 100644 +--- a/drivers/pci/host/pcie-iproc-platform.c ++++ b/drivers/pci/host/pcie-iproc-platform.c +@@ -52,7 +52,7 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev) + struct device_node *np = dev->of_node; + struct resource reg; + resource_size_t iobase = 0; +- LIST_HEAD(res); ++ LIST_HEAD(resources); + int ret; + + of_id = of_match_device(iproc_pcie_of_match_table, dev); +@@ -101,10 +101,10 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev) + pcie->phy = NULL; + } + +- ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &res, &iobase); ++ ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &resources, ++ &iobase); + if (ret) { +- dev_err(dev, +- "unable to get PCI host bridge resources\n"); ++ dev_err(dev, "unable to get PCI host bridge resources\n"); + return ret; + } + +@@ -117,14 +117,15 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev) + pcie->map_irq = of_irq_parse_and_map_pci; + } + +- ret = iproc_pcie_setup(pcie, &res); +- if (ret) ++ ret = iproc_pcie_setup(pcie, &resources); ++ if (ret) { + dev_err(dev, "PCIe controller setup failed\n"); +- +- pci_free_resource_list(&res); ++ pci_free_resource_list(&resources); ++ return ret; ++ } + + platform_set_drvdata(pdev, pcie); +- return ret; ++ return 0; + } + + static int iproc_pcie_pltfm_remove(struct platform_device *pdev) +diff --git a/drivers/pci/host/pcie-iproc.h b/drivers/pci/host/pcie-iproc.h +index 04fed8e907f1..0bbe2ea44f3e 100644 +--- a/drivers/pci/host/pcie-iproc.h ++++ b/drivers/pci/host/pcie-iproc.h +@@ -90,6 +90,7 @@ struct iproc_pcie { + #ifdef CONFIG_ARM + struct pci_sys_data sysdata; + #endif ++ struct resource mem; + struct pci_bus *root_bus; + struct phy *phy; + int (*map_irq)(const struct pci_dev *, u8, u8); +diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c +index d704752b6332..6021cb9ea910 100644 +--- a/drivers/scsi/device_handler/scsi_dh_alua.c ++++ b/drivers/scsi/device_handler/scsi_dh_alua.c +@@ -113,7 +113,7 @@ struct alua_queue_data { + #define ALUA_POLICY_SWITCH_ALL 1 + + static void alua_rtpg_work(struct work_struct *work); +-static void alua_rtpg_queue(struct alua_port_group *pg, ++static bool alua_rtpg_queue(struct alua_port_group *pg, + struct scsi_device *sdev, + struct alua_queue_data *qdata, bool force); + static void alua_check(struct scsi_device *sdev, bool force); +@@ -866,7 +866,13 @@ static void alua_rtpg_work(struct work_struct *work) + kref_put(&pg->kref, release_port_group); + } + +-static void alua_rtpg_queue(struct alua_port_group *pg, ++/** ++ * alua_rtpg_queue() - cause RTPG to be submitted asynchronously ++ * ++ * Returns true if and only if alua_rtpg_work() will be called asynchronously. ++ * That function is responsible for calling @qdata->fn(). ++ */ ++static bool alua_rtpg_queue(struct alua_port_group *pg, + struct scsi_device *sdev, + struct alua_queue_data *qdata, bool force) + { +@@ -874,8 +880,8 @@ static void alua_rtpg_queue(struct alua_port_group *pg, + unsigned long flags; + struct workqueue_struct *alua_wq = kaluad_wq; + +- if (!pg) +- return; ++ if (!pg || scsi_device_get(sdev)) ++ return false; + + spin_lock_irqsave(&pg->lock, flags); + if (qdata) { +@@ -888,14 +894,12 @@ static void alua_rtpg_queue(struct alua_port_group *pg, + pg->flags |= ALUA_PG_RUN_RTPG; + kref_get(&pg->kref); + pg->rtpg_sdev = sdev; +- scsi_device_get(sdev); + start_queue = 1; + } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) { + pg->flags |= ALUA_PG_RUN_RTPG; + /* Do not queue if the worker is already running */ + if (!(pg->flags & ALUA_PG_RUNNING)) { + kref_get(&pg->kref); +- sdev = NULL; + start_queue = 1; + } + } +@@ -904,13 +908,17 @@ static void alua_rtpg_queue(struct alua_port_group *pg, + alua_wq = kaluad_sync_wq; + spin_unlock_irqrestore(&pg->lock, flags); + +- if (start_queue && +- !queue_delayed_work(alua_wq, &pg->rtpg_work, +- msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) { +- if (sdev) +- scsi_device_put(sdev); +- kref_put(&pg->kref, release_port_group); ++ if (start_queue) { ++ if (queue_delayed_work(alua_wq, &pg->rtpg_work, ++ msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) ++ sdev = NULL; ++ else ++ kref_put(&pg->kref, release_port_group); + } ++ if (sdev) ++ scsi_device_put(sdev); ++ ++ return true; + } + + /* +@@ -1011,11 +1019,13 @@ static int alua_activate(struct scsi_device *sdev, + mutex_unlock(&h->init_mutex); + goto out; + } +- fn = NULL; + rcu_read_unlock(); + mutex_unlock(&h->init_mutex); + +- alua_rtpg_queue(pg, sdev, qdata, true); ++ if (alua_rtpg_queue(pg, sdev, qdata, true)) ++ fn = NULL; ++ else ++ err = SCSI_DH_DEV_OFFLINED; + kref_put(&pg->kref, release_port_group); + out: + if (fn) +diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c +index 763f012fdeca..87f5e694dbed 100644 +--- a/drivers/scsi/libsas/sas_ata.c ++++ b/drivers/scsi/libsas/sas_ata.c +@@ -221,7 +221,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) + task->num_scatter = qc->n_elem; + } else { + for_each_sg(qc->sg, sg, qc->n_elem, si) +- xfer += sg->length; ++ xfer += sg_dma_len(sg); + + task->total_xfer_len = xfer; + task->num_scatter = si; +diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c +index 121de0aaa6ad..f753df25ba34 100644 +--- a/drivers/scsi/sg.c ++++ b/drivers/scsi/sg.c +@@ -998,6 +998,8 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) + result = get_user(val, ip); + if (result) + return result; ++ if (val > SG_MAX_CDB_SIZE) ++ return -ENOMEM; + sfp->next_cmd_len = (val > 0) ? val : 0; + return 0; + case SG_GET_VERSION_NUM: +diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c +index fabbe76203bb..4d079cdaa7a3 100644 +--- a/drivers/tty/serial/atmel_serial.c ++++ b/drivers/tty/serial/atmel_serial.c +@@ -1938,6 +1938,11 @@ static void atmel_flush_buffer(struct uart_port *port) + atmel_uart_writel(port, ATMEL_PDC_TCR, 0); + atmel_port->pdc_tx.ofs = 0; + } ++ /* ++ * in uart_flush_buffer(), the xmit circular buffer has just ++ * been cleared, so we have to reset tx_len accordingly. ++ */ ++ atmel_port->tx_len = 0; + } + + /* +@@ -2471,6 +2476,9 @@ static void atmel_console_write(struct console *co, const char *s, u_int count) + pdc_tx = atmel_uart_readl(port, ATMEL_PDC_PTSR) & ATMEL_PDC_TXTEN; + atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS); + ++ /* Make sure that tx path is actually able to send characters */ ++ atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN); ++ + uart_console_write(port, s, count, atmel_console_putchar); + + /* +diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c +index 8c1c9112b3fd..181972b03845 100644 +--- a/drivers/tty/serial/mxs-auart.c ++++ b/drivers/tty/serial/mxs-auart.c +@@ -1085,7 +1085,7 @@ static void mxs_auart_settermios(struct uart_port *u, + AUART_LINECTRL_BAUD_DIV_MAX); + baud_max = u->uartclk * 32 / AUART_LINECTRL_BAUD_DIV_MIN; + baud = uart_get_baud_rate(u, termios, old, baud_min, baud_max); +- div = u->uartclk * 32 / baud; ++ div = DIV_ROUND_CLOSEST(u->uartclk * 32, baud); + } + + ctrl |= AUART_LINECTRL_BAUD_DIVFRAC(div & 0x3F); +diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c +index 479e223f9cff..f029aad67183 100644 +--- a/drivers/usb/core/hcd.c ++++ b/drivers/usb/core/hcd.c +@@ -520,8 +520,10 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb) + */ + tbuf_size = max_t(u16, sizeof(struct usb_hub_descriptor), wLength); + tbuf = kzalloc(tbuf_size, GFP_KERNEL); +- if (!tbuf) +- return -ENOMEM; ++ if (!tbuf) { ++ status = -ENOMEM; ++ goto err_alloc; ++ } + + bufp = tbuf; + +@@ -734,6 +736,7 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb) + } + + kfree(tbuf); ++ err_alloc: + + /* any errors get returned through the urb completion */ + spin_lock_irq(&hcd_root_hub_lock); +diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c +index e32029a31ca4..4c101f4161f8 100644 +--- a/drivers/usb/host/xhci-ring.c ++++ b/drivers/usb/host/xhci-ring.c +@@ -2000,6 +2000,9 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, + case TRB_NORMAL: + td->urb->actual_length = requested - remaining; + goto finish_td; ++ case TRB_STATUS: ++ td->urb->actual_length = requested; ++ goto finish_td; + default: + xhci_warn(xhci, "WARN: unexpected TRB Type %d\n", + trb_type); +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index 0a436c4a28ad..2c48e2528600 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -2550,17 +2550,14 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) + } + + nfs4_stateid_copy(&stateid, &delegation->stateid); +- if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { ++ if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) || ++ !test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, ++ &delegation->flags)) { + rcu_read_unlock(); + nfs_finish_clear_delegation_stateid(state, &stateid); + return; + } + +- if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags)) { +- rcu_read_unlock(); +- return; +- } +- + cred = get_rpccred(delegation->cred); + rcu_read_unlock(); + status = nfs41_test_and_free_expired_stateid(server, &stateid, cred); +diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c +index 010aff5c5a79..536009e50387 100644 +--- a/fs/nfsd/nfsproc.c ++++ b/fs/nfsd/nfsproc.c +@@ -790,6 +790,7 @@ nfserrno (int errno) + { nfserr_serverfault, -ESERVERFAULT }, + { nfserr_serverfault, -ENFILE }, + { nfserr_io, -EUCLEAN }, ++ { nfserr_perm, -ENOKEY }, + }; + int i; + +diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c +index bfc00de5c6f1..3365ecb9074d 100644 +--- a/fs/xfs/libxfs/xfs_bmap.c ++++ b/fs/xfs/libxfs/xfs_bmap.c +@@ -769,8 +769,8 @@ xfs_bmap_extents_to_btree( + args.type = XFS_ALLOCTYPE_START_BNO; + args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); + } else if (dfops->dop_low) { +-try_another_ag: + args.type = XFS_ALLOCTYPE_START_BNO; ++try_another_ag: + args.fsbno = *firstblock; + } else { + args.type = XFS_ALLOCTYPE_NEAR_BNO; +@@ -796,17 +796,19 @@ xfs_bmap_extents_to_btree( + if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && + args.fsbno == NULLFSBLOCK && + args.type == XFS_ALLOCTYPE_NEAR_BNO) { +- dfops->dop_low = true; ++ args.type = XFS_ALLOCTYPE_FIRST_AG; + goto try_another_ag; + } ++ if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { ++ xfs_iroot_realloc(ip, -1, whichfork); ++ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); ++ return -ENOSPC; ++ } + /* + * Allocation can't fail, the space was reserved. + */ +- ASSERT(args.fsbno != NULLFSBLOCK); + ASSERT(*firstblock == NULLFSBLOCK || +- args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) || +- (dfops->dop_low && +- args.agno > XFS_FSB_TO_AGNO(mp, *firstblock))); ++ args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock)); + *firstblock = cur->bc_private.b.firstblock = args.fsbno; + cur->bc_private.b.allocated++; + ip->i_d.di_nblocks++; +@@ -1278,7 +1280,6 @@ xfs_bmap_read_extents( + /* REFERENCED */ + xfs_extnum_t room; /* number of entries there's room for */ + +- bno = NULLFSBLOCK; + mp = ip->i_mount; + ifp = XFS_IFORK_PTR(ip, whichfork); + exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE : +@@ -1291,9 +1292,7 @@ xfs_bmap_read_extents( + ASSERT(level > 0); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); + bno = be64_to_cpu(*pp); +- ASSERT(bno != NULLFSBLOCK); +- ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); +- ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); ++ + /* + * Go down the tree until leaf level is reached, following the first + * pointer (leftmost) at each level. +@@ -1864,6 +1863,7 @@ xfs_bmap_add_extent_delay_real( + */ + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_startblock(ep, new->br_startblock); ++ xfs_bmbt_set_state(ep, new->br_state); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + (*nextents)++; +@@ -2202,6 +2202,7 @@ STATIC int /* error */ + xfs_bmap_add_extent_unwritten_real( + struct xfs_trans *tp, + xfs_inode_t *ip, /* incore inode pointer */ ++ int whichfork, + xfs_extnum_t *idx, /* extent number to update/insert */ + xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ + xfs_bmbt_irec_t *new, /* new data to add to file extents */ +@@ -2221,12 +2222,14 @@ xfs_bmap_add_extent_unwritten_real( + /* left is 0, right is 1, prev is 2 */ + int rval=0; /* return value (logging flags) */ + int state = 0;/* state bits, accessed thru macros */ +- struct xfs_mount *mp = tp->t_mountp; ++ struct xfs_mount *mp = ip->i_mount; + + *logflagsp = 0; + + cur = *curp; +- ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); ++ ifp = XFS_IFORK_PTR(ip, whichfork); ++ if (whichfork == XFS_COW_FORK) ++ state |= BMAP_COWFORK; + + ASSERT(*idx >= 0); + ASSERT(*idx <= xfs_iext_count(ifp)); +@@ -2285,7 +2288,7 @@ xfs_bmap_add_extent_unwritten_real( + * Don't set contiguous if the combined extent would be too large. + * Also check for all-three-contiguous being too large. + */ +- if (*idx < xfs_iext_count(&ip->i_df) - 1) { ++ if (*idx < xfs_iext_count(ifp) - 1) { + state |= BMAP_RIGHT_VALID; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); + if (isnullstartblock(RIGHT.br_startblock)) +@@ -2325,7 +2328,8 @@ xfs_bmap_add_extent_unwritten_real( + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + xfs_iext_remove(ip, *idx + 1, 2, state); +- ip->i_d.di_nextents -= 2; ++ XFS_IFORK_NEXT_SET(ip, whichfork, ++ XFS_IFORK_NEXTENTS(ip, whichfork) - 2); + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { +@@ -2368,7 +2372,8 @@ xfs_bmap_add_extent_unwritten_real( + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + xfs_iext_remove(ip, *idx + 1, 1, state); +- ip->i_d.di_nextents--; ++ XFS_IFORK_NEXT_SET(ip, whichfork, ++ XFS_IFORK_NEXTENTS(ip, whichfork) - 1); + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { +@@ -2403,7 +2408,8 @@ xfs_bmap_add_extent_unwritten_real( + xfs_bmbt_set_state(ep, newext); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + xfs_iext_remove(ip, *idx + 1, 1, state); +- ip->i_d.di_nextents--; ++ XFS_IFORK_NEXT_SET(ip, whichfork, ++ XFS_IFORK_NEXTENTS(ip, whichfork) - 1); + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { +@@ -2515,7 +2521,8 @@ xfs_bmap_add_extent_unwritten_real( + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + xfs_iext_insert(ip, *idx, 1, new, state); +- ip->i_d.di_nextents++; ++ XFS_IFORK_NEXT_SET(ip, whichfork, ++ XFS_IFORK_NEXTENTS(ip, whichfork) + 1); + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { +@@ -2593,7 +2600,8 @@ xfs_bmap_add_extent_unwritten_real( + ++*idx; + xfs_iext_insert(ip, *idx, 1, new, state); + +- ip->i_d.di_nextents++; ++ XFS_IFORK_NEXT_SET(ip, whichfork, ++ XFS_IFORK_NEXTENTS(ip, whichfork) + 1); + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { +@@ -2641,7 +2649,8 @@ xfs_bmap_add_extent_unwritten_real( + ++*idx; + xfs_iext_insert(ip, *idx, 2, &r[0], state); + +- ip->i_d.di_nextents += 2; ++ XFS_IFORK_NEXT_SET(ip, whichfork, ++ XFS_IFORK_NEXTENTS(ip, whichfork) + 2); + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { +@@ -2695,17 +2704,17 @@ xfs_bmap_add_extent_unwritten_real( + } + + /* update reverse mappings */ +- error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new); ++ error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new); + if (error) + goto done; + + /* convert to a btree if necessary */ +- if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) { ++ if (xfs_bmap_needs_btree(ip, whichfork)) { + int tmp_logflags; /* partial log flag return val */ + + ASSERT(cur == NULL); + error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur, +- 0, &tmp_logflags, XFS_DATA_FORK); ++ 0, &tmp_logflags, whichfork); + *logflagsp |= tmp_logflags; + if (error) + goto done; +@@ -2717,7 +2726,7 @@ xfs_bmap_add_extent_unwritten_real( + *curp = cur; + } + +- xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK); ++ xfs_bmap_check_leaf_extents(*curp, ip, whichfork); + done: + *logflagsp |= rval; + return error; +@@ -2809,7 +2818,8 @@ xfs_bmap_add_extent_hole_delay( + oldlen = startblockval(left.br_startblock) + + startblockval(new->br_startblock) + + startblockval(right.br_startblock); +- newlen = xfs_bmap_worst_indlen(ip, temp); ++ newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), ++ oldlen); + xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), + nullstartblock((int)newlen)); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); +@@ -2830,7 +2840,8 @@ xfs_bmap_add_extent_hole_delay( + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); + oldlen = startblockval(left.br_startblock) + + startblockval(new->br_startblock); +- newlen = xfs_bmap_worst_indlen(ip, temp); ++ newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), ++ oldlen); + xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), + nullstartblock((int)newlen)); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); +@@ -2846,7 +2857,8 @@ xfs_bmap_add_extent_hole_delay( + temp = new->br_blockcount + right.br_blockcount; + oldlen = startblockval(new->br_startblock) + + startblockval(right.br_startblock); +- newlen = xfs_bmap_worst_indlen(ip, temp); ++ newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), ++ oldlen); + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), + new->br_startoff, + nullstartblock((int)newlen), temp, right.br_state); +@@ -3822,17 +3834,13 @@ xfs_bmap_btalloc( + * the first block that was allocated. + */ + ASSERT(*ap->firstblock == NULLFSBLOCK || +- XFS_FSB_TO_AGNO(mp, *ap->firstblock) == +- XFS_FSB_TO_AGNO(mp, args.fsbno) || +- (ap->dfops->dop_low && +- XFS_FSB_TO_AGNO(mp, *ap->firstblock) < +- XFS_FSB_TO_AGNO(mp, args.fsbno))); ++ XFS_FSB_TO_AGNO(mp, *ap->firstblock) <= ++ XFS_FSB_TO_AGNO(mp, args.fsbno)); + + ap->blkno = args.fsbno; + if (*ap->firstblock == NULLFSBLOCK) + *ap->firstblock = args.fsbno; +- ASSERT(nullfb || fb_agno == args.agno || +- (ap->dfops->dop_low && fb_agno < args.agno)); ++ ASSERT(nullfb || fb_agno <= args.agno); + ap->length = args.len; + if (!(ap->flags & XFS_BMAPI_COWFORK)) + ap->ip->i_d.di_nblocks += args.len; +@@ -4156,6 +4164,19 @@ xfs_bmapi_read( + return 0; + } + ++/* ++ * Add a delayed allocation extent to an inode. Blocks are reserved from the ++ * global pool and the extent inserted into the inode in-core extent tree. ++ * ++ * On entry, got refers to the first extent beyond the offset of the extent to ++ * allocate or eof is specified if no such extent exists. On return, got refers ++ * to the extent record that was inserted to the inode fork. ++ * ++ * Note that the allocated extent may have been merged with contiguous extents ++ * during insertion into the inode fork. Thus, got does not reflect the current ++ * state of the inode fork on return. If necessary, the caller can use lastx to ++ * look up the updated record in the inode fork. ++ */ + int + xfs_bmapi_reserve_delalloc( + struct xfs_inode *ip, +@@ -4242,13 +4263,8 @@ xfs_bmapi_reserve_delalloc( + got->br_startblock = nullstartblock(indlen); + got->br_blockcount = alen; + got->br_state = XFS_EXT_NORM; +- xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); + +- /* +- * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay +- * might have merged it into one of the neighbouring ones. +- */ +- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); ++ xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); + + /* + * Tag the inode if blocks were preallocated. Note that COW fork +@@ -4260,10 +4276,6 @@ xfs_bmapi_reserve_delalloc( + if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) + xfs_inode_set_cowblocks_tag(ip); + +- ASSERT(got->br_startoff <= aoff); +- ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); +- ASSERT(isnullstartblock(got->br_startblock)); +- ASSERT(got->br_state == XFS_EXT_NORM); + return 0; + + out_unreserve_blocks: +@@ -4368,10 +4380,16 @@ xfs_bmapi_allocate( + bma->got.br_state = XFS_EXT_NORM; + + /* +- * A wasdelay extent has been initialized, so shouldn't be flagged +- * as unwritten. ++ * In the data fork, a wasdelay extent has been initialized, so ++ * shouldn't be flagged as unwritten. ++ * ++ * For the cow fork, however, we convert delalloc reservations ++ * (extents allocated for speculative preallocation) to ++ * allocated unwritten extents, and only convert the unwritten ++ * extents to real extents when we're about to write the data. + */ +- if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) && ++ if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) && ++ (bma->flags & XFS_BMAPI_PREALLOC) && + xfs_sb_version_hasextflgbit(&mp->m_sb)) + bma->got.br_state = XFS_EXT_UNWRITTEN; + +@@ -4422,8 +4440,6 @@ xfs_bmapi_convert_unwritten( + (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) + return 0; + +- ASSERT(whichfork != XFS_COW_FORK); +- + /* + * Modify (by adding) the state flag, if writing. + */ +@@ -4448,8 +4464,8 @@ xfs_bmapi_convert_unwritten( + return error; + } + +- error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, +- &bma->cur, mval, bma->firstblock, bma->dfops, ++ error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork, ++ &bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops, + &tmp_logflags); + /* + * Log the inode core unconditionally in the unwritten extent conversion +@@ -4458,8 +4474,12 @@ xfs_bmapi_convert_unwritten( + * in the transaction for the sake of fsync(), even if nothing has + * changed, because fsync() will not force the log for this transaction + * unless it sees the inode pinned. ++ * ++ * Note: If we're only converting cow fork extents, there aren't ++ * any on-disk updates to make, so we don't need to log anything. + */ +- bma->logflags |= tmp_logflags | XFS_ILOG_CORE; ++ if (whichfork != XFS_COW_FORK) ++ bma->logflags |= tmp_logflags | XFS_ILOG_CORE; + if (error) + return error; + +@@ -4533,15 +4553,15 @@ xfs_bmapi_write( + ASSERT(*nmap >= 1); + ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); + ASSERT(!(flags & XFS_BMAPI_IGSTATE)); +- ASSERT(tp != NULL); ++ ASSERT(tp != NULL || ++ (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) == ++ (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)); + ASSERT(len > 0); + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK); + ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP)); + ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP)); +- ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK); +- ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK); + + /* zeroing is for currently only for data extents, not metadata */ + ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != +@@ -4746,13 +4766,9 @@ xfs_bmapi_write( + if (bma.cur) { + if (!error) { + ASSERT(*firstblock == NULLFSBLOCK || +- XFS_FSB_TO_AGNO(mp, *firstblock) == ++ XFS_FSB_TO_AGNO(mp, *firstblock) <= + XFS_FSB_TO_AGNO(mp, +- bma.cur->bc_private.b.firstblock) || +- (dfops->dop_low && +- XFS_FSB_TO_AGNO(mp, *firstblock) < +- XFS_FSB_TO_AGNO(mp, +- bma.cur->bc_private.b.firstblock))); ++ bma.cur->bc_private.b.firstblock)); + *firstblock = bma.cur->bc_private.b.firstblock; + } + xfs_btree_del_cursor(bma.cur, +@@ -4787,34 +4803,59 @@ xfs_bmap_split_indlen( + xfs_filblks_t len2 = *indlen2; + xfs_filblks_t nres = len1 + len2; /* new total res. */ + xfs_filblks_t stolen = 0; ++ xfs_filblks_t resfactor; + + /* + * Steal as many blocks as we can to try and satisfy the worst case + * indlen for both new extents. + */ +- while (nres > ores && avail) { +- nres--; +- avail--; +- stolen++; +- } ++ if (ores < nres && avail) ++ stolen = XFS_FILBLKS_MIN(nres - ores, avail); ++ ores += stolen; ++ ++ /* nothing else to do if we've satisfied the new reservation */ ++ if (ores >= nres) ++ return stolen; ++ ++ /* ++ * We can't meet the total required reservation for the two extents. ++ * Calculate the percent of the overall shortage between both extents ++ * and apply this percentage to each of the requested indlen values. ++ * This distributes the shortage fairly and reduces the chances that one ++ * of the two extents is left with nothing when extents are repeatedly ++ * split. ++ */ ++ resfactor = (ores * 100); ++ do_div(resfactor, nres); ++ len1 *= resfactor; ++ do_div(len1, 100); ++ len2 *= resfactor; ++ do_div(len2, 100); ++ ASSERT(len1 + len2 <= ores); ++ ASSERT(len1 < *indlen1 && len2 < *indlen2); + + /* +- * The only blocks available are those reserved for the original +- * extent and what we can steal from the extent being removed. +- * If this still isn't enough to satisfy the combined +- * requirements for the two new extents, skim blocks off of each +- * of the new reservations until they match what is available. ++ * Hand out the remainder to each extent. If one of the two reservations ++ * is zero, we want to make sure that one gets a block first. The loop ++ * below starts with len1, so hand len2 a block right off the bat if it ++ * is zero. + */ +- while (nres > ores) { +- if (len1) { +- len1--; +- nres--; ++ ores -= (len1 + len2); ++ ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores); ++ if (ores && !len2 && *indlen2) { ++ len2++; ++ ores--; ++ } ++ while (ores) { ++ if (len1 < *indlen1) { ++ len1++; ++ ores--; + } +- if (nres == ores) ++ if (!ores) + break; +- if (len2) { +- len2--; +- nres--; ++ if (len2 < *indlen2) { ++ len2++; ++ ores--; + } + } + +@@ -5556,8 +5597,8 @@ __xfs_bunmapi( + } + del.br_state = XFS_EXT_UNWRITTEN; + error = xfs_bmap_add_extent_unwritten_real(tp, ip, +- &lastx, &cur, &del, firstblock, dfops, +- &logflags); ++ whichfork, &lastx, &cur, &del, ++ firstblock, dfops, &logflags); + if (error) + goto error0; + goto nodelete; +@@ -5610,8 +5651,9 @@ __xfs_bunmapi( + prev.br_state = XFS_EXT_UNWRITTEN; + lastx--; + error = xfs_bmap_add_extent_unwritten_real(tp, +- ip, &lastx, &cur, &prev, +- firstblock, dfops, &logflags); ++ ip, whichfork, &lastx, &cur, ++ &prev, firstblock, dfops, ++ &logflags); + if (error) + goto error0; + goto nodelete; +@@ -5619,8 +5661,9 @@ __xfs_bunmapi( + ASSERT(del.br_state == XFS_EXT_NORM); + del.br_state = XFS_EXT_UNWRITTEN; + error = xfs_bmap_add_extent_unwritten_real(tp, +- ip, &lastx, &cur, &del, +- firstblock, dfops, &logflags); ++ ip, whichfork, &lastx, &cur, ++ &del, firstblock, dfops, ++ &logflags); + if (error) + goto error0; + goto nodelete; +diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c +index d9be241fc86f..999cc5878890 100644 +--- a/fs/xfs/libxfs/xfs_bmap_btree.c ++++ b/fs/xfs/libxfs/xfs_bmap_btree.c +@@ -453,8 +453,8 @@ xfs_bmbt_alloc_block( + + if (args.fsbno == NULLFSBLOCK) { + args.fsbno = be64_to_cpu(start->l); +-try_another_ag: + args.type = XFS_ALLOCTYPE_START_BNO; ++try_another_ag: + /* + * Make sure there is sufficient room left in the AG to + * complete a full tree split for an extent insert. If +@@ -494,8 +494,8 @@ xfs_bmbt_alloc_block( + if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && + args.fsbno == NULLFSBLOCK && + args.type == XFS_ALLOCTYPE_NEAR_BNO) { +- cur->bc_private.b.dfops->dop_low = true; + args.fsbno = cur->bc_private.b.firstblock; ++ args.type = XFS_ALLOCTYPE_FIRST_AG; + goto try_another_ag; + } + +@@ -512,7 +512,7 @@ xfs_bmbt_alloc_block( + goto error0; + cur->bc_private.b.dfops->dop_low = true; + } +- if (args.fsbno == NULLFSBLOCK) { ++ if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; +diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c +index 21e6a6ab6b9a..2849d3fa3d0b 100644 +--- a/fs/xfs/libxfs/xfs_btree.c ++++ b/fs/xfs/libxfs/xfs_btree.c +@@ -810,7 +810,8 @@ xfs_btree_read_bufl( + xfs_daddr_t d; /* real disk block address */ + int error; + +- ASSERT(fsbno != NULLFSBLOCK); ++ if (!XFS_FSB_SANITY_CHECK(mp, fsbno)) ++ return -EFSCORRUPTED; + d = XFS_FSB_TO_DADDR(mp, fsbno); + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, + mp->m_bsize, lock, &bp, ops); +diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h +index b69b947c4c1b..33a8f8694d30 100644 +--- a/fs/xfs/libxfs/xfs_btree.h ++++ b/fs/xfs/libxfs/xfs_btree.h +@@ -456,7 +456,7 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) + #define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b)) + + #define XFS_FSB_SANITY_CHECK(mp,fsb) \ +- (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ ++ (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ + XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) + + /* +diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c +index f2dc1a950c85..1bdf2888295b 100644 +--- a/fs/xfs/libxfs/xfs_da_btree.c ++++ b/fs/xfs/libxfs/xfs_da_btree.c +@@ -2633,7 +2633,7 @@ xfs_da_read_buf( + /* + * Readahead the dir/attr block. + */ +-xfs_daddr_t ++int + xfs_da_reada_buf( + struct xfs_inode *dp, + xfs_dablk_t bno, +@@ -2664,7 +2664,5 @@ xfs_da_reada_buf( + if (mapp != &map) + kmem_free(mapp); + +- if (error) +- return -1; +- return mappedbno; ++ return error; + } +diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h +index 98c75cbe6ac2..4e29cb6a3627 100644 +--- a/fs/xfs/libxfs/xfs_da_btree.h ++++ b/fs/xfs/libxfs/xfs_da_btree.h +@@ -201,7 +201,7 @@ int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp, + xfs_dablk_t bno, xfs_daddr_t mappedbno, + struct xfs_buf **bpp, int whichfork, + const struct xfs_buf_ops *ops); +-xfs_daddr_t xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, ++int xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, + xfs_daddr_t mapped_bno, int whichfork, + const struct xfs_buf_ops *ops); + int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, +diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c +index 75a557432d0f..bbd1238852b3 100644 +--- a/fs/xfs/libxfs/xfs_dir2_node.c ++++ b/fs/xfs/libxfs/xfs_dir2_node.c +@@ -155,6 +155,42 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = { + .verify_write = xfs_dir3_free_write_verify, + }; + ++/* Everything ok in the free block header? */ ++static bool ++xfs_dir3_free_header_check( ++ struct xfs_inode *dp, ++ xfs_dablk_t fbno, ++ struct xfs_buf *bp) ++{ ++ struct xfs_mount *mp = dp->i_mount; ++ unsigned int firstdb; ++ int maxbests; ++ ++ maxbests = dp->d_ops->free_max_bests(mp->m_dir_geo); ++ firstdb = (xfs_dir2_da_to_db(mp->m_dir_geo, fbno) - ++ xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) * ++ maxbests; ++ if (xfs_sb_version_hascrc(&mp->m_sb)) { ++ struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; ++ ++ if (be32_to_cpu(hdr3->firstdb) != firstdb) ++ return false; ++ if (be32_to_cpu(hdr3->nvalid) > maxbests) ++ return false; ++ if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused)) ++ return false; ++ } else { ++ struct xfs_dir2_free_hdr *hdr = bp->b_addr; ++ ++ if (be32_to_cpu(hdr->firstdb) != firstdb) ++ return false; ++ if (be32_to_cpu(hdr->nvalid) > maxbests) ++ return false; ++ if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused)) ++ return false; ++ } ++ return true; ++} + + static int + __xfs_dir3_free_read( +@@ -168,11 +204,22 @@ __xfs_dir3_free_read( + + err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, + XFS_DATA_FORK, &xfs_dir3_free_buf_ops); ++ if (err || !*bpp) ++ return err; ++ ++ /* Check things that we can't do in the verifier. */ ++ if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) { ++ xfs_buf_ioerror(*bpp, -EFSCORRUPTED); ++ xfs_verifier_error(*bpp); ++ xfs_trans_brelse(tp, *bpp); ++ return -EFSCORRUPTED; ++ } + + /* try read returns without an error or *bpp if it lands in a hole */ +- if (!err && tp && *bpp) ++ if (tp) + xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF); +- return err; ++ ++ return 0; + } + + int +diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c +index f272abff11e1..d41ade5d293e 100644 +--- a/fs/xfs/libxfs/xfs_ialloc.c ++++ b/fs/xfs/libxfs/xfs_ialloc.c +@@ -51,8 +51,7 @@ xfs_ialloc_cluster_alignment( + struct xfs_mount *mp) + { + if (xfs_sb_version_hasalign(&mp->m_sb) && +- mp->m_sb.sb_inoalignmt >= +- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) ++ mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) + return mp->m_sb.sb_inoalignmt; + return 1; + } +diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c +index 222e103356c6..25c1e078aef6 100644 +--- a/fs/xfs/libxfs/xfs_inode_fork.c ++++ b/fs/xfs/libxfs/xfs_inode_fork.c +@@ -26,6 +26,7 @@ + #include "xfs_inode.h" + #include "xfs_trans.h" + #include "xfs_inode_item.h" ++#include "xfs_btree.h" + #include "xfs_bmap_btree.h" + #include "xfs_bmap.h" + #include "xfs_error.h" +@@ -429,11 +430,13 @@ xfs_iformat_btree( + /* REFERENCED */ + int nrecs; + int size; ++ int level; + + ifp = XFS_IFORK_PTR(ip, whichfork); + dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); + size = XFS_BMAP_BROOT_SPACE(mp, dfp); + nrecs = be16_to_cpu(dfp->bb_numrecs); ++ level = be16_to_cpu(dfp->bb_level); + + /* + * blow out if -- fork has less extents than can fit in +@@ -446,7 +449,8 @@ xfs_iformat_btree( + XFS_IFORK_MAXEXT(ip, whichfork) || + XFS_BMDR_SPACE_CALC(nrecs) > + XFS_DFORK_SIZE(dip, mp, whichfork) || +- XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { ++ XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) || ++ level == 0 || level > XFS_BTREE_MAXLEVELS) { + xfs_warn(mp, "corrupt inode %Lu (btree).", + (unsigned long long) ip->i_ino); + XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, +@@ -497,15 +501,14 @@ xfs_iread_extents( + * We know that the size is valid (it's checked in iformat_btree) + */ + ifp->if_bytes = ifp->if_real_bytes = 0; +- ifp->if_flags |= XFS_IFEXTENTS; + xfs_iext_add(ifp, 0, nextents); + error = xfs_bmap_read_extents(tp, ip, whichfork); + if (error) { + xfs_iext_destroy(ifp); +- ifp->if_flags &= ~XFS_IFEXTENTS; + return error; + } + xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); ++ ifp->if_flags |= XFS_IFEXTENTS; + return 0; + } + /* +diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c +index 631e7c0e0a29..937d406d3c11 100644 +--- a/fs/xfs/xfs_aops.c ++++ b/fs/xfs/xfs_aops.c +@@ -274,54 +274,49 @@ xfs_end_io( + struct xfs_ioend *ioend = + container_of(work, struct xfs_ioend, io_work); + struct xfs_inode *ip = XFS_I(ioend->io_inode); ++ xfs_off_t offset = ioend->io_offset; ++ size_t size = ioend->io_size; + int error = ioend->io_bio->bi_error; + + /* +- * Set an error if the mount has shut down and proceed with end I/O +- * processing so it can perform whatever cleanups are necessary. ++ * Just clean up the in-memory strutures if the fs has been shut down. + */ +- if (XFS_FORCED_SHUTDOWN(ip->i_mount)) ++ if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + error = -EIO; ++ goto done; ++ } + + /* +- * For a CoW extent, we need to move the mapping from the CoW fork +- * to the data fork. If instead an error happened, just dump the +- * new blocks. ++ * Clean up any COW blocks on an I/O error. + */ +- if (ioend->io_type == XFS_IO_COW) { +- if (error) +- goto done; +- if (ioend->io_bio->bi_error) { +- error = xfs_reflink_cancel_cow_range(ip, +- ioend->io_offset, ioend->io_size); +- goto done; ++ if (unlikely(error)) { ++ switch (ioend->io_type) { ++ case XFS_IO_COW: ++ xfs_reflink_cancel_cow_range(ip, offset, size, true); ++ break; + } +- error = xfs_reflink_end_cow(ip, ioend->io_offset, +- ioend->io_size); +- if (error) +- goto done; ++ ++ goto done; + } + + /* +- * For unwritten extents we need to issue transactions to convert a +- * range to normal written extens after the data I/O has finished. +- * Detecting and handling completion IO errors is done individually +- * for each case as different cleanup operations need to be performed +- * on error. ++ * Success: commit the COW or unwritten blocks if needed. + */ +- if (ioend->io_type == XFS_IO_UNWRITTEN) { +- if (error) +- goto done; +- error = xfs_iomap_write_unwritten(ip, ioend->io_offset, +- ioend->io_size); +- } else if (ioend->io_append_trans) { +- error = xfs_setfilesize_ioend(ioend, error); +- } else { +- ASSERT(!xfs_ioend_is_append(ioend) || +- ioend->io_type == XFS_IO_COW); ++ switch (ioend->io_type) { ++ case XFS_IO_COW: ++ error = xfs_reflink_end_cow(ip, offset, size); ++ break; ++ case XFS_IO_UNWRITTEN: ++ error = xfs_iomap_write_unwritten(ip, offset, size); ++ break; ++ default: ++ ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); ++ break; + } + + done: ++ if (ioend->io_append_trans) ++ error = xfs_setfilesize_ioend(ioend, error); + xfs_destroy_ioend(ioend, error); + } + +@@ -481,6 +476,12 @@ xfs_submit_ioend( + struct xfs_ioend *ioend, + int status) + { ++ /* Convert CoW extents to regular */ ++ if (!status && ioend->io_type == XFS_IO_COW) { ++ status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), ++ ioend->io_offset, ioend->io_size); ++ } ++ + /* Reserve log space if we might write beyond the on-disk inode size. */ + if (!status && + ioend->io_type != XFS_IO_UNWRITTEN && +diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c +index c1417919ab0a..c516d7158a21 100644 +--- a/fs/xfs/xfs_bmap_util.c ++++ b/fs/xfs/xfs_bmap_util.c +@@ -917,17 +917,18 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) + */ + int + xfs_free_eofblocks( +- xfs_mount_t *mp, +- xfs_inode_t *ip, +- bool need_iolock) ++ struct xfs_inode *ip) + { +- xfs_trans_t *tp; +- int error; +- xfs_fileoff_t end_fsb; +- xfs_fileoff_t last_fsb; +- xfs_filblks_t map_len; +- int nimaps; +- xfs_bmbt_irec_t imap; ++ struct xfs_trans *tp; ++ int error; ++ xfs_fileoff_t end_fsb; ++ xfs_fileoff_t last_fsb; ++ xfs_filblks_t map_len; ++ int nimaps; ++ struct xfs_bmbt_irec imap; ++ struct xfs_mount *mp = ip->i_mount; ++ ++ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + + /* + * Figure out if there are any blocks beyond the end +@@ -944,6 +945,10 @@ xfs_free_eofblocks( + error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); + xfs_iunlock(ip, XFS_ILOCK_SHARED); + ++ /* ++ * If there are blocks after the end of file, truncate the file to its ++ * current size to free them up. ++ */ + if (!error && (nimaps != 0) && + (imap.br_startblock != HOLESTARTBLOCK || + ip->i_delayed_blks)) { +@@ -954,22 +959,13 @@ xfs_free_eofblocks( + if (error) + return error; + +- /* +- * There are blocks after the end of file. +- * Free them up now by truncating the file to +- * its current size. +- */ +- if (need_iolock) { +- if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) +- return -EAGAIN; +- } ++ /* wait on dio to ensure i_size has settled */ ++ inode_dio_wait(VFS_I(ip)); + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, + &tp); + if (error) { + ASSERT(XFS_FORCED_SHUTDOWN(mp)); +- if (need_iolock) +- xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return error; + } + +@@ -997,8 +993,6 @@ xfs_free_eofblocks( + } + + xfs_iunlock(ip, XFS_ILOCK_EXCL); +- if (need_iolock) +- xfs_iunlock(ip, XFS_IOLOCK_EXCL); + } + return error; + } +@@ -1393,10 +1387,16 @@ xfs_shift_file_space( + xfs_fileoff_t stop_fsb; + xfs_fileoff_t next_fsb; + xfs_fileoff_t shift_fsb; ++ uint resblks; + + ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT); + + if (direction == SHIFT_LEFT) { ++ /* ++ * Reserve blocks to cover potential extent merges after left ++ * shift operations. ++ */ ++ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); + next_fsb = XFS_B_TO_FSB(mp, offset + len); + stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size); + } else { +@@ -1404,6 +1404,7 @@ xfs_shift_file_space( + * If right shift, delegate the work of initialization of + * next_fsb to xfs_bmap_shift_extent as it has ilock held. + */ ++ resblks = 0; + next_fsb = NULLFSBLOCK; + stop_fsb = XFS_B_TO_FSB(mp, offset); + } +@@ -1415,7 +1416,7 @@ xfs_shift_file_space( + * into the accessible region of the file. + */ + if (xfs_can_free_eofblocks(ip, true)) { +- error = xfs_free_eofblocks(mp, ip, false); ++ error = xfs_free_eofblocks(ip); + if (error) + return error; + } +@@ -1445,21 +1446,14 @@ xfs_shift_file_space( + } + + while (!error && !done) { +- /* +- * We would need to reserve permanent block for transaction. +- * This will come into picture when after shifting extent into +- * hole we found that adjacent extents can be merged which +- * may lead to freeing of a block during record update. +- */ +- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, +- XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); ++ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, ++ &tp); + if (error) + break; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, +- ip->i_gdquot, ip->i_pdquot, +- XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, ++ ip->i_gdquot, ip->i_pdquot, resblks, 0, + XFS_QMOPT_RES_REGBLKS); + if (error) + goto out_trans_cancel; +diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h +index 68a621a8e0c0..f1005393785c 100644 +--- a/fs/xfs/xfs_bmap_util.h ++++ b/fs/xfs/xfs_bmap_util.h +@@ -63,8 +63,7 @@ int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset, + + /* EOF block manipulation functions */ + bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); +-int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip, +- bool need_iolock); ++int xfs_free_eofblocks(struct xfs_inode *ip); + + int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip, + struct xfs_swapext *sx); +diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c +index 2975cb2319f4..0306168af332 100644 +--- a/fs/xfs/xfs_buf_item.c ++++ b/fs/xfs/xfs_buf_item.c +@@ -1162,6 +1162,7 @@ xfs_buf_iodone_callbacks( + */ + bp->b_last_error = 0; + bp->b_retries = 0; ++ bp->b_first_retry_time = 0; + + xfs_buf_do_callbacks(bp); + bp->b_fspriv = NULL; +diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c +index 162dc186cf04..29c2f997aedf 100644 +--- a/fs/xfs/xfs_extent_busy.c ++++ b/fs/xfs/xfs_extent_busy.c +@@ -45,18 +45,7 @@ xfs_extent_busy_insert( + struct rb_node **rbp; + struct rb_node *parent = NULL; + +- new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL); +- if (!new) { +- /* +- * No Memory! Since it is now not possible to track the free +- * block, make this a synchronous transaction to insure that +- * the block is not reused before this transaction commits. +- */ +- trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len); +- xfs_trans_set_sync(tp); +- return; +- } +- ++ new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_SLEEP); + new->agno = agno; + new->bno = bno; + new->length = len; +diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c +index bbb9eb6811b2..2a695a8f4fe7 100644 +--- a/fs/xfs/xfs_file.c ++++ b/fs/xfs/xfs_file.c +@@ -527,6 +527,15 @@ xfs_file_dio_aio_write( + if ((iocb->ki_pos & mp->m_blockmask) || + ((iocb->ki_pos + count) & mp->m_blockmask)) { + unaligned_io = 1; ++ ++ /* ++ * We can't properly handle unaligned direct I/O to reflink ++ * files yet, as we can't unshare a partial block. ++ */ ++ if (xfs_is_reflink_inode(ip)) { ++ trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count); ++ return -EREMCHG; ++ } + iolock = XFS_IOLOCK_EXCL; + } else { + iolock = XFS_IOLOCK_SHARED; +@@ -614,8 +623,10 @@ xfs_file_buffered_aio_write( + struct xfs_inode *ip = XFS_I(inode); + ssize_t ret; + int enospc = 0; +- int iolock = XFS_IOLOCK_EXCL; ++ int iolock; + ++write_retry: ++ iolock = XFS_IOLOCK_EXCL; + xfs_ilock(ip, iolock); + + ret = xfs_file_aio_write_checks(iocb, from, &iolock); +@@ -625,7 +636,6 @@ xfs_file_buffered_aio_write( + /* We can write back this queue in page reclaim */ + current->backing_dev_info = inode_to_bdi(inode); + +-write_retry: + trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos); + ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops); + if (likely(ret >= 0)) +@@ -641,18 +651,21 @@ xfs_file_buffered_aio_write( + * running at the same time. + */ + if (ret == -EDQUOT && !enospc) { ++ xfs_iunlock(ip, iolock); + enospc = xfs_inode_free_quota_eofblocks(ip); + if (enospc) + goto write_retry; + enospc = xfs_inode_free_quota_cowblocks(ip); + if (enospc) + goto write_retry; ++ iolock = 0; + } else if (ret == -ENOSPC && !enospc) { + struct xfs_eofblocks eofb = {0}; + + enospc = 1; + xfs_flush_inodes(ip->i_mount); +- eofb.eof_scan_owner = ip->i_ino; /* for locking */ ++ ++ xfs_iunlock(ip, iolock); + eofb.eof_flags = XFS_EOF_FLAGS_SYNC; + xfs_icache_free_eofblocks(ip->i_mount, &eofb); + goto write_retry; +@@ -660,7 +673,8 @@ xfs_file_buffered_aio_write( + + current->backing_dev_info = NULL; + out: +- xfs_iunlock(ip, iolock); ++ if (iolock) ++ xfs_iunlock(ip, iolock); + return ret; + } + +@@ -908,9 +922,9 @@ xfs_dir_open( + */ + mode = xfs_ilock_data_map_shared(ip); + if (ip->i_d.di_nextents > 0) +- xfs_dir3_data_readahead(ip, 0, -1); ++ error = xfs_dir3_data_readahead(ip, 0, -1); + xfs_iunlock(ip, mode); +- return 0; ++ return error; + } + + STATIC int +diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c +index 70ca4f608321..3531f8f72fa5 100644 +--- a/fs/xfs/xfs_icache.c ++++ b/fs/xfs/xfs_icache.c +@@ -1322,13 +1322,10 @@ xfs_inode_free_eofblocks( + int flags, + void *args) + { +- int ret; ++ int ret = 0; + struct xfs_eofblocks *eofb = args; +- bool need_iolock = true; + int match; + +- ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); +- + if (!xfs_can_free_eofblocks(ip, false)) { + /* inode could be preallocated or append-only */ + trace_xfs_inode_free_eofblocks_invalid(ip); +@@ -1356,21 +1353,19 @@ xfs_inode_free_eofblocks( + if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && + XFS_ISIZE(ip) < eofb->eof_min_file_size) + return 0; +- +- /* +- * A scan owner implies we already hold the iolock. Skip it in +- * xfs_free_eofblocks() to avoid deadlock. This also eliminates +- * the possibility of EAGAIN being returned. +- */ +- if (eofb->eof_scan_owner == ip->i_ino) +- need_iolock = false; + } + +- ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock); +- +- /* don't revisit the inode if we're not waiting */ +- if (ret == -EAGAIN && !(flags & SYNC_WAIT)) +- ret = 0; ++ /* ++ * If the caller is waiting, return -EAGAIN to keep the background ++ * scanner moving and revisit the inode in a subsequent pass. ++ */ ++ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { ++ if (flags & SYNC_WAIT) ++ ret = -EAGAIN; ++ return ret; ++ } ++ ret = xfs_free_eofblocks(ip); ++ xfs_iunlock(ip, XFS_IOLOCK_EXCL); + + return ret; + } +@@ -1417,15 +1412,10 @@ __xfs_inode_free_quota_eofblocks( + struct xfs_eofblocks eofb = {0}; + struct xfs_dquot *dq; + +- ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); +- + /* +- * Set the scan owner to avoid a potential livelock. Otherwise, the scan +- * can repeatedly trylock on the inode we're currently processing. We +- * run a sync scan to increase effectiveness and use the union filter to ++ * Run a sync scan to increase effectiveness and use the union filter to + * cover all applicable quotas in a single scan. + */ +- eofb.eof_scan_owner = ip->i_ino; + eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC; + + if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) { +@@ -1577,12 +1567,9 @@ xfs_inode_free_cowblocks( + { + int ret; + struct xfs_eofblocks *eofb = args; +- bool need_iolock = true; + int match; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + +- ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); +- + /* + * Just clear the tag if we have an empty cow fork or none at all. It's + * possible the inode was fully unshared since it was originally tagged. +@@ -1615,28 +1602,16 @@ xfs_inode_free_cowblocks( + if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && + XFS_ISIZE(ip) < eofb->eof_min_file_size) + return 0; +- +- /* +- * A scan owner implies we already hold the iolock. Skip it in +- * xfs_free_eofblocks() to avoid deadlock. This also eliminates +- * the possibility of EAGAIN being returned. +- */ +- if (eofb->eof_scan_owner == ip->i_ino) +- need_iolock = false; + } + + /* Free the CoW blocks */ +- if (need_iolock) { +- xfs_ilock(ip, XFS_IOLOCK_EXCL); +- xfs_ilock(ip, XFS_MMAPLOCK_EXCL); +- } ++ xfs_ilock(ip, XFS_IOLOCK_EXCL); ++ xfs_ilock(ip, XFS_MMAPLOCK_EXCL); + +- ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); ++ ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); + +- if (need_iolock) { +- xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); +- xfs_iunlock(ip, XFS_IOLOCK_EXCL); +- } ++ xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); ++ xfs_iunlock(ip, XFS_IOLOCK_EXCL); + + return ret; + } +diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h +index a1e02f4708ab..8a7c849b4dea 100644 +--- a/fs/xfs/xfs_icache.h ++++ b/fs/xfs/xfs_icache.h +@@ -27,7 +27,6 @@ struct xfs_eofblocks { + kgid_t eof_gid; + prid_t eof_prid; + __u64 eof_min_file_size; +- xfs_ino_t eof_scan_owner; + }; + + #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ +@@ -102,7 +101,6 @@ xfs_fs_eofblocks_from_user( + dst->eof_flags = src->eof_flags; + dst->eof_prid = src->eof_prid; + dst->eof_min_file_size = src->eof_min_file_size; +- dst->eof_scan_owner = NULLFSINO; + + dst->eof_uid = INVALID_UID; + if (src->eof_flags & XFS_EOF_FLAGS_UID) { +diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c +index de32f0fe47c8..7eaf1ef74e3c 100644 +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -1615,7 +1615,7 @@ xfs_itruncate_extents( + + /* Remove all pending CoW reservations. */ + error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block, +- last_block); ++ last_block, true); + if (error) + goto out; + +@@ -1692,32 +1692,34 @@ xfs_release( + if (xfs_can_free_eofblocks(ip, false)) { + + /* ++ * Check if the inode is being opened, written and closed ++ * frequently and we have delayed allocation blocks outstanding ++ * (e.g. streaming writes from the NFS server), truncating the ++ * blocks past EOF will cause fragmentation to occur. ++ * ++ * In this case don't do the truncation, but we have to be ++ * careful how we detect this case. Blocks beyond EOF show up as ++ * i_delayed_blks even when the inode is clean, so we need to ++ * truncate them away first before checking for a dirty release. ++ * Hence on the first dirty close we will still remove the ++ * speculative allocation, but after that we will leave it in ++ * place. ++ */ ++ if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) ++ return 0; ++ /* + * If we can't get the iolock just skip truncating the blocks + * past EOF because we could deadlock with the mmap_sem +- * otherwise. We'll get another chance to drop them once the ++ * otherwise. We'll get another chance to drop them once the + * last reference to the inode is dropped, so we'll never leak + * blocks permanently. +- * +- * Further, check if the inode is being opened, written and +- * closed frequently and we have delayed allocation blocks +- * outstanding (e.g. streaming writes from the NFS server), +- * truncating the blocks past EOF will cause fragmentation to +- * occur. +- * +- * In this case don't do the truncation, either, but we have to +- * be careful how we detect this case. Blocks beyond EOF show +- * up as i_delayed_blks even when the inode is clean, so we +- * need to truncate them away first before checking for a dirty +- * release. Hence on the first dirty close we will still remove +- * the speculative allocation, but after that we will leave it +- * in place. + */ +- if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) +- return 0; +- +- error = xfs_free_eofblocks(mp, ip, true); +- if (error && error != -EAGAIN) +- return error; ++ if (xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { ++ error = xfs_free_eofblocks(ip); ++ xfs_iunlock(ip, XFS_IOLOCK_EXCL); ++ if (error) ++ return error; ++ } + + /* delalloc blocks after truncation means it really is dirty */ + if (ip->i_delayed_blks) +@@ -1904,8 +1906,11 @@ xfs_inactive( + * cache. Post-eof blocks must be freed, lest we end up with + * broken free space accounting. + */ +- if (xfs_can_free_eofblocks(ip, true)) +- xfs_free_eofblocks(mp, ip, false); ++ if (xfs_can_free_eofblocks(ip, true)) { ++ xfs_ilock(ip, XFS_IOLOCK_EXCL); ++ xfs_free_eofblocks(ip); ++ xfs_iunlock(ip, XFS_IOLOCK_EXCL); ++ } + + return; + } +diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c +index fdecf79d2fa4..2326a6913fde 100644 +--- a/fs/xfs/xfs_iomap.c ++++ b/fs/xfs/xfs_iomap.c +@@ -637,6 +637,11 @@ xfs_file_iomap_begin_delay( + goto out_unlock; + } + ++ /* ++ * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch ++ * them out if the write happens to fail. ++ */ ++ iomap->flags = IOMAP_F_NEW; + trace_xfs_iomap_alloc(ip, offset, count, 0, &got); + done: + if (isnullstartblock(got.br_startblock)) +@@ -685,7 +690,7 @@ xfs_iomap_write_allocate( + int nres; + + if (whichfork == XFS_COW_FORK) +- flags |= XFS_BMAPI_COWFORK; ++ flags |= XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC; + + /* + * Make sure that the dquots are there. +@@ -1026,17 +1031,7 @@ xfs_file_iomap_begin( + if (error) + goto out_unlock; + +- /* +- * We're here because we're trying to do a directio write to a +- * region that isn't aligned to a filesystem block. If the +- * extent is shared, fall back to buffered mode to handle the +- * RMW. +- */ +- if (!(flags & IOMAP_REPORT) && shared) { +- trace_xfs_reflink_bounce_dio_write(ip, &imap); +- error = -EREMCHG; +- goto out_unlock; +- } ++ ASSERT((flags & IOMAP_REPORT) || !shared); + } + + if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { +@@ -1095,7 +1090,8 @@ xfs_file_iomap_end_delalloc( + struct xfs_inode *ip, + loff_t offset, + loff_t length, +- ssize_t written) ++ ssize_t written, ++ struct iomap *iomap) + { + struct xfs_mount *mp = ip->i_mount; + xfs_fileoff_t start_fsb; +@@ -1114,14 +1110,14 @@ xfs_file_iomap_end_delalloc( + end_fsb = XFS_B_TO_FSB(mp, offset + length); + + /* +- * Trim back delalloc blocks if we didn't manage to write the whole +- * range reserved. ++ * Trim delalloc blocks if they were allocated by this write and we ++ * didn't manage to write the whole range. + * + * We don't need to care about racing delalloc as we hold i_mutex + * across the reserve/allocate/unreserve calls. If there are delalloc + * blocks in the range, they are ours. + */ +- if (start_fsb < end_fsb) { ++ if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) { + truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), + XFS_FSB_TO_B(mp, end_fsb) - 1); + +@@ -1151,7 +1147,7 @@ xfs_file_iomap_end( + { + if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) + return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, +- length, written); ++ length, written, iomap); + return 0; + } + +diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c +index 9b9540db17a6..52d27cc4370a 100644 +--- a/fs/xfs/xfs_mount.c ++++ b/fs/xfs/xfs_mount.c +@@ -187,7 +187,7 @@ xfs_initialize_perag( + xfs_agnumber_t *maxagi) + { + xfs_agnumber_t index; +- xfs_agnumber_t first_initialised = 0; ++ xfs_agnumber_t first_initialised = NULLAGNUMBER; + xfs_perag_t *pag; + int error = -ENOMEM; + +@@ -202,22 +202,20 @@ xfs_initialize_perag( + xfs_perag_put(pag); + continue; + } +- if (!first_initialised) +- first_initialised = index; + + pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); + if (!pag) +- goto out_unwind; ++ goto out_unwind_new_pags; + pag->pag_agno = index; + pag->pag_mount = mp; + spin_lock_init(&pag->pag_ici_lock); + mutex_init(&pag->pag_ici_reclaim_lock); + INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); + if (xfs_buf_hash_init(pag)) +- goto out_unwind; ++ goto out_free_pag; + + if (radix_tree_preload(GFP_NOFS)) +- goto out_unwind; ++ goto out_hash_destroy; + + spin_lock(&mp->m_perag_lock); + if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { +@@ -225,10 +223,13 @@ xfs_initialize_perag( + spin_unlock(&mp->m_perag_lock); + radix_tree_preload_end(); + error = -EEXIST; +- goto out_unwind; ++ goto out_hash_destroy; + } + spin_unlock(&mp->m_perag_lock); + radix_tree_preload_end(); ++ /* first new pag is fully initialized */ ++ if (first_initialised == NULLAGNUMBER) ++ first_initialised = index; + } + + index = xfs_set_inode_alloc(mp, agcount); +@@ -239,11 +240,16 @@ xfs_initialize_perag( + mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp); + return 0; + +-out_unwind: ++out_hash_destroy: + xfs_buf_hash_destroy(pag); ++out_free_pag: + kmem_free(pag); +- for (; index > first_initialised; index--) { ++out_unwind_new_pags: ++ /* unwind any prior newly initialized pags */ ++ for (index = first_initialised; index < agcount; index++) { + pag = radix_tree_delete(&mp->m_perag_tree, index); ++ if (!pag) ++ break; + xfs_buf_hash_destroy(pag); + kmem_free(pag); + } +@@ -505,8 +511,7 @@ STATIC void + xfs_set_inoalignment(xfs_mount_t *mp) + { + if (xfs_sb_version_hasalign(&mp->m_sb) && +- mp->m_sb.sb_inoalignmt >= +- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) ++ mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) + mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; + else + mp->m_inoalign_mask = 0; +diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c +index 07593a362cd0..a72cd2e3c048 100644 +--- a/fs/xfs/xfs_reflink.c ++++ b/fs/xfs/xfs_reflink.c +@@ -82,11 +82,22 @@ + * mappings are a reservation against the free space in the filesystem; + * adjacent mappings can also be combined into fewer larger mappings. + * ++ * As an optimization, the CoW extent size hint (cowextsz) creates ++ * outsized aligned delalloc reservations in the hope of landing out of ++ * order nearby CoW writes in a single extent on disk, thereby reducing ++ * fragmentation and improving future performance. ++ * ++ * D: --RRRRRRSSSRRRRRRRR--- (data fork) ++ * C: ------DDDDDDD--------- (CoW fork) ++ * + * When dirty pages are being written out (typically in writepage), the +- * delalloc reservations are converted into real mappings by allocating +- * blocks and replacing the delalloc mapping with real ones. A delalloc +- * mapping can be replaced by several real ones if the free space is +- * fragmented. ++ * delalloc reservations are converted into unwritten mappings by ++ * allocating blocks and replacing the delalloc mapping with real ones. ++ * A delalloc mapping can be replaced by several unwritten ones if the ++ * free space is fragmented. ++ * ++ * D: --RRRRRRSSSRRRRRRRR--- ++ * C: ------UUUUUUU--------- + * + * We want to adapt the delalloc mechanism for copy-on-write, since the + * write paths are similar. The first two steps (creating the reservation +@@ -101,13 +112,29 @@ + * Block-aligned directio writes will use the same mechanism as buffered + * writes. + * ++ * Just prior to submitting the actual disk write requests, we convert ++ * the extents representing the range of the file actually being written ++ * (as opposed to extra pieces created for the cowextsize hint) to real ++ * extents. This will become important in the next step: ++ * ++ * D: --RRRRRRSSSRRRRRRRR--- ++ * C: ------UUrrUUU--------- ++ * + * CoW remapping must be done after the data block write completes, + * because we don't want to destroy the old data fork map until we're sure + * the new block has been written. Since the new mappings are kept in a + * separate fork, we can simply iterate these mappings to find the ones + * that cover the file blocks that we just CoW'd. For each extent, simply + * unmap the corresponding range in the data fork, map the new range into +- * the data fork, and remove the extent from the CoW fork. ++ * the data fork, and remove the extent from the CoW fork. Because of ++ * the presence of the cowextsize hint, however, we must be careful ++ * only to remap the blocks that we've actually written out -- we must ++ * never remap delalloc reservations nor CoW staging blocks that have ++ * yet to be written. This corresponds exactly to the real extents in ++ * the CoW fork: ++ * ++ * D: --RRRRRRrrSRRRRRRRR--- ++ * C: ------UU--UUU--------- + * + * Since the remapping operation can be applied to an arbitrary file + * range, we record the need for the remap step as a flag in the ioend +@@ -296,6 +323,65 @@ xfs_reflink_reserve_cow( + return 0; + } + ++/* Convert part of an unwritten CoW extent to a real one. */ ++STATIC int ++xfs_reflink_convert_cow_extent( ++ struct xfs_inode *ip, ++ struct xfs_bmbt_irec *imap, ++ xfs_fileoff_t offset_fsb, ++ xfs_filblks_t count_fsb, ++ struct xfs_defer_ops *dfops) ++{ ++ struct xfs_bmbt_irec irec = *imap; ++ xfs_fsblock_t first_block; ++ int nimaps = 1; ++ ++ if (imap->br_state == XFS_EXT_NORM) ++ return 0; ++ ++ xfs_trim_extent(&irec, offset_fsb, count_fsb); ++ trace_xfs_reflink_convert_cow(ip, &irec); ++ if (irec.br_blockcount == 0) ++ return 0; ++ return xfs_bmapi_write(NULL, ip, irec.br_startoff, irec.br_blockcount, ++ XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, &first_block, ++ 0, &irec, &nimaps, dfops); ++} ++ ++/* Convert all of the unwritten CoW extents in a file's range to real ones. */ ++int ++xfs_reflink_convert_cow( ++ struct xfs_inode *ip, ++ xfs_off_t offset, ++ xfs_off_t count) ++{ ++ struct xfs_bmbt_irec got; ++ struct xfs_defer_ops dfops; ++ struct xfs_mount *mp = ip->i_mount; ++ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); ++ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); ++ xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); ++ xfs_extnum_t idx; ++ bool found; ++ int error = 0; ++ ++ xfs_ilock(ip, XFS_ILOCK_EXCL); ++ ++ /* Convert all the extents to real from unwritten. */ ++ for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got); ++ found && got.br_startoff < end_fsb; ++ found = xfs_iext_get_extent(ifp, ++idx, &got)) { ++ error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb, ++ end_fsb - offset_fsb, &dfops); ++ if (error) ++ break; ++ } ++ ++ /* Finish up. */ ++ xfs_iunlock(ip, XFS_ILOCK_EXCL); ++ return error; ++} ++ + /* Allocate all CoW reservations covering a range of blocks in a file. */ + static int + __xfs_reflink_allocate_cow( +@@ -328,6 +414,7 @@ __xfs_reflink_allocate_cow( + goto out_unlock; + ASSERT(nimaps == 1); + ++ /* Make sure there's a CoW reservation for it. */ + error = xfs_reflink_reserve_cow(ip, &imap, &shared); + if (error) + goto out_trans_cancel; +@@ -337,14 +424,16 @@ __xfs_reflink_allocate_cow( + goto out_trans_cancel; + } + ++ /* Allocate the entire reservation as unwritten blocks. */ + xfs_trans_ijoin(tp, ip, 0); + error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount, +- XFS_BMAPI_COWFORK, &first_block, ++ XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block, + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), + &imap, &nimaps, &dfops); + if (error) + goto out_trans_cancel; + ++ /* Finish up. */ + error = xfs_defer_finish(&tp, &dfops, NULL); + if (error) + goto out_trans_cancel; +@@ -389,11 +478,12 @@ xfs_reflink_allocate_cow_range( + if (error) { + trace_xfs_reflink_allocate_cow_range_error(ip, error, + _RET_IP_); +- break; ++ return error; + } + } + +- return error; ++ /* Convert the CoW extents to regular. */ ++ return xfs_reflink_convert_cow(ip, offset, count); + } + + /* +@@ -459,14 +549,18 @@ xfs_reflink_trim_irec_to_next_cow( + } + + /* +- * Cancel all pending CoW reservations for some block range of an inode. ++ * Cancel CoW reservations for some block range of an inode. ++ * ++ * If cancel_real is true this function cancels all COW fork extents for the ++ * inode; if cancel_real is false, real extents are not cleared. + */ + int + xfs_reflink_cancel_cow_blocks( + struct xfs_inode *ip, + struct xfs_trans **tpp, + xfs_fileoff_t offset_fsb, +- xfs_fileoff_t end_fsb) ++ xfs_fileoff_t end_fsb, ++ bool cancel_real) + { + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + struct xfs_bmbt_irec got, del; +@@ -490,7 +584,7 @@ xfs_reflink_cancel_cow_blocks( + &idx, &got, &del); + if (error) + break; +- } else { ++ } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) { + xfs_trans_ijoin(*tpp, ip, 0); + xfs_defer_init(&dfops, &firstfsb); + +@@ -532,13 +626,17 @@ xfs_reflink_cancel_cow_blocks( + } + + /* +- * Cancel all pending CoW reservations for some byte range of an inode. ++ * Cancel CoW reservations for some byte range of an inode. ++ * ++ * If cancel_real is true this function cancels all COW fork extents for the ++ * inode; if cancel_real is false, real extents are not cleared. + */ + int + xfs_reflink_cancel_cow_range( + struct xfs_inode *ip, + xfs_off_t offset, +- xfs_off_t count) ++ xfs_off_t count, ++ bool cancel_real) + { + struct xfs_trans *tp; + xfs_fileoff_t offset_fsb; +@@ -564,7 +662,8 @@ xfs_reflink_cancel_cow_range( + xfs_trans_ijoin(tp, ip, 0); + + /* Scrape out the old CoW reservations */ +- error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb); ++ error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb, ++ cancel_real); + if (error) + goto out_cancel; + +@@ -641,6 +740,16 @@ xfs_reflink_end_cow( + + ASSERT(!isnullstartblock(got.br_startblock)); + ++ /* ++ * Don't remap unwritten extents; these are ++ * speculatively preallocated CoW extents that have been ++ * allocated but have not yet been involved in a write. ++ */ ++ if (got.br_state == XFS_EXT_UNWRITTEN) { ++ idx--; ++ goto next_extent; ++ } ++ + /* Unmap the old blocks in the data fork. */ + xfs_defer_init(&dfops, &firstfsb); + rlen = del.br_blockcount; +@@ -855,13 +964,14 @@ STATIC int + xfs_reflink_update_dest( + struct xfs_inode *dest, + xfs_off_t newlen, +- xfs_extlen_t cowextsize) ++ xfs_extlen_t cowextsize, ++ bool is_dedupe) + { + struct xfs_mount *mp = dest->i_mount; + struct xfs_trans *tp; + int error; + +- if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) ++ if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) + return 0; + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); +@@ -882,6 +992,10 @@ xfs_reflink_update_dest( + dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; + } + ++ if (!is_dedupe) { ++ xfs_trans_ichgtime(tp, dest, ++ XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); ++ } + xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); + + error = xfs_trans_commit(tp); +@@ -1195,7 +1309,8 @@ xfs_reflink_remap_range( + !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) + cowextsize = src->i_d.di_cowextsize; + +- ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize); ++ ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, ++ is_dedupe); + + out_unlock: + xfs_iunlock(src, XFS_MMAPLOCK_EXCL); +@@ -1345,7 +1460,7 @@ xfs_reflink_clear_inode_flag( + * We didn't find any shared blocks so turn off the reflink flag. + * First, get rid of any leftover CoW mappings. + */ +- error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF); ++ error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true); + if (error) + return error; + +diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h +index aa6a4d64bd35..b715bacb2ea2 100644 +--- a/fs/xfs/xfs_reflink.h ++++ b/fs/xfs/xfs_reflink.h +@@ -30,6 +30,8 @@ extern int xfs_reflink_reserve_cow(struct xfs_inode *ip, + struct xfs_bmbt_irec *imap, bool *shared); + extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip, + xfs_off_t offset, xfs_off_t count); ++extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset, ++ xfs_off_t count); + extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, + struct xfs_bmbt_irec *imap); + extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, +@@ -37,9 +39,9 @@ extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, + + extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip, + struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, +- xfs_fileoff_t end_fsb); ++ xfs_fileoff_t end_fsb, bool cancel_real); + extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, +- xfs_off_t count); ++ xfs_off_t count, bool cancel_real); + extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, + xfs_off_t count); + extern int xfs_reflink_recover_cow(struct xfs_mount *mp); +diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c +index eecbaac08eba..d80187b0e726 100644 +--- a/fs/xfs/xfs_super.c ++++ b/fs/xfs/xfs_super.c +@@ -953,7 +953,7 @@ xfs_fs_destroy_inode( + XFS_STATS_INC(ip->i_mount, vn_remove); + + if (xfs_is_reflink_inode(ip)) { +- error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); ++ error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); + if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) + xfs_warn(ip->i_mount, + "Error %d while evicting CoW blocks for inode %llu.", +diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h +index 69c5bcd9a51b..375c5e030e5b 100644 +--- a/fs/xfs/xfs_trace.h ++++ b/fs/xfs/xfs_trace.h +@@ -3089,6 +3089,7 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class, + __field(xfs_fileoff_t, lblk) + __field(xfs_extlen_t, len) + __field(xfs_fsblock_t, pblk) ++ __field(int, state) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; +@@ -3096,13 +3097,15 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class, + __entry->lblk = irec->br_startoff; + __entry->len = irec->br_blockcount; + __entry->pblk = irec->br_startblock; ++ __entry->state = irec->br_state; + ), +- TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu", ++ TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu st %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->lblk, + __entry->len, +- __entry->pblk) ++ __entry->pblk, ++ __entry->state) + ); + #define DEFINE_INODE_IREC_EVENT(name) \ + DEFINE_EVENT(xfs_inode_irec_class, name, \ +@@ -3242,11 +3245,12 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared); + DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc); + DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); + DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); ++DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow); + + DEFINE_RW_EVENT(xfs_reflink_reserve_cow); + DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range); + +-DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write); ++DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write); + DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping); + DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec); + +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index 1c5190dab2c1..e3d146dadceb 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -162,8 +162,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, + int len, void *val); + int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, + int len, struct kvm_io_device *dev); +-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, +- struct kvm_io_device *dev); ++void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, ++ struct kvm_io_device *dev); + struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, + gpa_t addr); + +diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h +index 254698856b8f..8b35bdbdc214 100644 +--- a/include/linux/memcontrol.h ++++ b/include/linux/memcontrol.h +@@ -739,6 +739,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait) + return false; + } + ++static inline void mem_cgroup_update_page_stat(struct page *page, ++ enum mem_cgroup_stat_index idx, ++ int nr) ++{ ++} ++ + static inline void mem_cgroup_inc_page_stat(struct page *page, + enum mem_cgroup_stat_index idx) + { +diff --git a/kernel/padata.c b/kernel/padata.c +index 05316c9f32da..3202aa17492c 100644 +--- a/kernel/padata.c ++++ b/kernel/padata.c +@@ -186,19 +186,20 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd) + + reorder = &next_queue->reorder; + ++ spin_lock(&reorder->lock); + if (!list_empty(&reorder->list)) { + padata = list_entry(reorder->list.next, + struct padata_priv, list); + +- spin_lock(&reorder->lock); + list_del_init(&padata->list); + atomic_dec(&pd->reorder_objects); +- spin_unlock(&reorder->lock); + + pd->processed++; + ++ spin_unlock(&reorder->lock); + goto out; + } ++ spin_unlock(&reorder->lock); + + if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) { + padata = ERR_PTR(-ENODATA); +diff --git a/lib/syscall.c b/lib/syscall.c +index 63239e097b13..a72cd0996230 100644 +--- a/lib/syscall.c ++++ b/lib/syscall.c +@@ -11,6 +11,7 @@ static int collect_syscall(struct task_struct *target, long *callno, + + if (!try_get_task_stack(target)) { + /* Task has no stack, so the task isn't in a syscall. */ ++ *sp = *pc = 0; + *callno = -1; + return 0; + } +diff --git a/mm/hugetlb.c b/mm/hugetlb.c +index c7025c132670..968b547f3b90 100644 +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -4474,6 +4474,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, + { + struct page *page = NULL; + spinlock_t *ptl; ++ pte_t pte; + retry: + ptl = pmd_lockptr(mm, pmd); + spin_lock(ptl); +@@ -4483,12 +4484,13 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, + */ + if (!pmd_huge(*pmd)) + goto out; +- if (pmd_present(*pmd)) { ++ pte = huge_ptep_get((pte_t *)pmd); ++ if (pte_present(pte)) { + page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT); + if (flags & FOLL_GET) + get_page(page); + } else { +- if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pmd))) { ++ if (is_hugetlb_entry_migration(pte)) { + spin_unlock(ptl); + __migration_entry_wait(mm, (pte_t *)pmd, ptl); + goto retry; +diff --git a/mm/rmap.c b/mm/rmap.c +index 91619fd70939..a40d990eede0 100644 +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -1294,7 +1294,7 @@ void page_add_file_rmap(struct page *page, bool compound) + goto out; + } + __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, nr); +- mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); ++ mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, nr); + out: + unlock_page_memcg(page); + } +@@ -1334,7 +1334,7 @@ static void page_remove_file_rmap(struct page *page, bool compound) + * pte lock(a spinlock) is held, which implies preemption disabled. + */ + __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, -nr); +- mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); ++ mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, -nr); + + if (unlikely(PageMlocked(page))) + clear_page_mlock(page); +diff --git a/mm/workingset.c b/mm/workingset.c +index a67f5796b995..dda16cf9599f 100644 +--- a/mm/workingset.c ++++ b/mm/workingset.c +@@ -533,7 +533,7 @@ static int __init workingset_init(void) + pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", + timestamp_bits, max_order, bucket_order); + +- ret = list_lru_init_key(&shadow_nodes, &shadow_nodes_key); ++ ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key); + if (ret) + goto err; + ret = register_shrinker(&workingset_shadow_shrinker); +diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c +index 770c52701efa..140b067d5d57 100644 +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -7,6 +7,7 @@ + #include <linux/kthread.h> + #include <linux/net.h> + #include <linux/nsproxy.h> ++#include <linux/sched.h> + #include <linux/slab.h> + #include <linux/socket.h> + #include <linux/string.h> +@@ -469,11 +470,16 @@ static int ceph_tcp_connect(struct ceph_connection *con) + { + struct sockaddr_storage *paddr = &con->peer_addr.in_addr; + struct socket *sock; ++ unsigned int noio_flag; + int ret; + + BUG_ON(con->sock); ++ ++ /* sock_create_kern() allocates with GFP_KERNEL */ ++ noio_flag = memalloc_noio_save(); + ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family, + SOCK_STREAM, IPPROTO_TCP, &sock); ++ memalloc_noio_restore(noio_flag); + if (ret) + return ret; + sock->sk->sk_allocation = GFP_NOFS; +diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c +index 3f4efcb85df5..3490d21ab9e7 100644 +--- a/sound/core/seq/seq_fifo.c ++++ b/sound/core/seq/seq_fifo.c +@@ -265,6 +265,10 @@ int snd_seq_fifo_resize(struct snd_seq_fifo *f, int poolsize) + /* NOTE: overflow flag is not cleared */ + spin_unlock_irqrestore(&f->lock, flags); + ++ /* close the old pool and wait until all users are gone */ ++ snd_seq_pool_mark_closing(oldpool); ++ snd_use_lock_sync(&f->use_lock); ++ + /* release cells in old pool */ + for (cell = oldhead; cell; cell = next) { + next = cell->next; +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index c813ad857650..152c7ed65254 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -4846,6 +4846,7 @@ enum { + ALC292_FIXUP_DISABLE_AAMIX, + ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK, + ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, ++ ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE, + ALC275_FIXUP_DELL_XPS, + ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE, + ALC293_FIXUP_LENOVO_SPK_NOISE, +@@ -5446,6 +5447,15 @@ static const struct hda_fixup alc269_fixups[] = { + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE + }, ++ [ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE] = { ++ .type = HDA_FIXUP_PINS, ++ .v.pins = (const struct hda_pintbl[]) { ++ { 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */ ++ { } ++ }, ++ .chained = true, ++ .chain_id = ALC269_FIXUP_HEADSET_MODE ++ }, + [ALC275_FIXUP_DELL_XPS] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { +@@ -5518,7 +5528,7 @@ static const struct hda_fixup alc269_fixups[] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc298_fixup_speaker_volume, + .chained = true, +- .chain_id = ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, ++ .chain_id = ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE, + }, + [ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = { + .type = HDA_FIXUP_PINS, +diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c +index 89ac5f5a93eb..7ae46c2647d4 100644 +--- a/sound/soc/atmel/atmel-classd.c ++++ b/sound/soc/atmel/atmel-classd.c +@@ -349,7 +349,7 @@ static int atmel_classd_codec_dai_digital_mute(struct snd_soc_dai *codec_dai, + } + + #define CLASSD_ACLK_RATE_11M2896_MPY_8 (112896 * 100 * 8) +-#define CLASSD_ACLK_RATE_12M288_MPY_8 (12228 * 1000 * 8) ++#define CLASSD_ACLK_RATE_12M288_MPY_8 (12288 * 1000 * 8) + + static struct { + int rate; +diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c +index 324461e985b3..fe2cf1ed8237 100644 +--- a/sound/soc/codecs/rt5665.c ++++ b/sound/soc/codecs/rt5665.c +@@ -1241,7 +1241,7 @@ static irqreturn_t rt5665_irq(int irq, void *data) + static void rt5665_jd_check_handler(struct work_struct *work) + { + struct rt5665_priv *rt5665 = container_of(work, struct rt5665_priv, +- calibrate_work.work); ++ jd_check_work.work); + + if (snd_soc_read(rt5665->codec, RT5665_AJD1_CTRL) & 0x0010) { + /* jack out */ +diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c +index bd313c907b20..172d7db1653c 100644 +--- a/sound/soc/intel/skylake/skl-topology.c ++++ b/sound/soc/intel/skylake/skl-topology.c +@@ -486,7 +486,7 @@ static int skl_tplg_set_module_init_data(struct snd_soc_dapm_widget *w) + if (bc->set_params != SKL_PARAM_INIT) + continue; + +- mconfig->formats_config.caps = (u32 *)&bc->params; ++ mconfig->formats_config.caps = (u32 *)bc->params; + mconfig->formats_config.caps_size = bc->size; + + break; +diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c +index a29786dd9522..4d28a9ddbee0 100644 +--- a/virt/kvm/eventfd.c ++++ b/virt/kvm/eventfd.c +@@ -870,7 +870,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, + continue; + + kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); +- kvm->buses[bus_idx]->ioeventfd_count--; ++ if (kvm->buses[bus_idx]) ++ kvm->buses[bus_idx]->ioeventfd_count--; + ioeventfd_release(p); + ret = 0; + break; +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index 482612b4e496..da5db473afb0 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -723,8 +723,11 @@ static void kvm_destroy_vm(struct kvm *kvm) + list_del(&kvm->vm_list); + spin_unlock(&kvm_lock); + kvm_free_irq_routing(kvm); +- for (i = 0; i < KVM_NR_BUSES; i++) +- kvm_io_bus_destroy(kvm->buses[i]); ++ for (i = 0; i < KVM_NR_BUSES; i++) { ++ if (kvm->buses[i]) ++ kvm_io_bus_destroy(kvm->buses[i]); ++ kvm->buses[i] = NULL; ++ } + kvm_coalesced_mmio_free(kvm); + #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) + mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); +@@ -3473,6 +3476,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, + }; + + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); ++ if (!bus) ++ return -ENOMEM; + r = __kvm_io_bus_write(vcpu, bus, &range, val); + return r < 0 ? r : 0; + } +@@ -3490,6 +3495,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, + }; + + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); ++ if (!bus) ++ return -ENOMEM; + + /* First try the device referenced by cookie. */ + if ((cookie >= 0) && (cookie < bus->dev_count) && +@@ -3540,6 +3547,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, + }; + + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); ++ if (!bus) ++ return -ENOMEM; + r = __kvm_io_bus_read(vcpu, bus, &range, val); + return r < 0 ? r : 0; + } +@@ -3552,6 +3561,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, + struct kvm_io_bus *new_bus, *bus; + + bus = kvm->buses[bus_idx]; ++ if (!bus) ++ return -ENOMEM; ++ + /* exclude ioeventfd which is limited by maximum fd */ + if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) + return -ENOSPC; +@@ -3571,37 +3583,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, + } + + /* Caller must hold slots_lock. */ +-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, +- struct kvm_io_device *dev) ++void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, ++ struct kvm_io_device *dev) + { +- int i, r; ++ int i; + struct kvm_io_bus *new_bus, *bus; + + bus = kvm->buses[bus_idx]; +- r = -ENOENT; ++ if (!bus) ++ return; ++ + for (i = 0; i < bus->dev_count; i++) + if (bus->range[i].dev == dev) { +- r = 0; + break; + } + +- if (r) +- return r; ++ if (i == bus->dev_count) ++ return; + + new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * + sizeof(struct kvm_io_range)), GFP_KERNEL); +- if (!new_bus) +- return -ENOMEM; ++ if (!new_bus) { ++ pr_err("kvm: failed to shrink bus, removing it completely\n"); ++ goto broken; ++ } + + memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); + new_bus->dev_count--; + memcpy(new_bus->range + i, bus->range + i + 1, + (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); + ++broken: + rcu_assign_pointer(kvm->buses[bus_idx], new_bus); + synchronize_srcu_expedited(&kvm->srcu); + kfree(bus); +- return r; ++ return; + } + + struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, +@@ -3614,6 +3630,8 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, + srcu_idx = srcu_read_lock(&kvm->srcu); + + bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); ++ if (!bus) ++ goto out_unlock; + + dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1); + if (dev_idx < 0) |