1 files changed, 4658 insertions, 0 deletions
diff --git a/4.9.21/1020_linux-4.9.21.patch b/4.9.21/1020_linux-4.9.21.patch
new file mode 100644
index 0000000..42defb2
--- /dev/null
+++ b/4.9.21/1020_linux-4.9.21.patch
@@ -0,0 +1,4658 @@
+diff --git a/Makefile b/Makefile
+index 4496018..1523557 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 9
+-SUBLEVEL = 20
++SUBLEVEL = 21
+ EXTRAVERSION =
+ NAME = Roaring Lionus
+ 
+diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi
+index ae4b388..4616452 100644
+--- a/arch/arm/boot/dts/bcm5301x.dtsi
++++ b/arch/arm/boot/dts/bcm5301x.dtsi
+@@ -66,14 +66,14 @@
+ 		timer@20200 {
+ 			compatible = "arm,cortex-a9-global-timer";
+ 			reg = <0x20200 0x100>;
+-			interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
++			interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
+ 			clocks = <&periph_clk>;
+ 		};
+ 
+ 		local-timer@20600 {
+ 			compatible = "arm,cortex-a9-twd-timer";
+ 			reg = <0x20600 0x100>;
+-			interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>;
++			interrupts = <GIC_PPI 13 IRQ_TYPE_EDGE_RISING>;
+ 			clocks = <&periph_clk>;
+ 		};
+ 
+diff --git a/arch/arm/mach-bcm/bcm_5301x.c b/arch/arm/mach-bcm/bcm_5301x.c
+index c8830a2..fe067f6 100644
+--- a/arch/arm/mach-bcm/bcm_5301x.c
++++ b/arch/arm/mach-bcm/bcm_5301x.c
+@@ -9,14 +9,42 @@
+ #include <asm/hardware/cache-l2x0.h>
+ 
+ #include <asm/mach/arch.h>
++#include <asm/siginfo.h>
++#include <asm/signal.h>
++
++#define FSR_EXTERNAL		(1 << 12)
++#define FSR_READ		(0 << 10)
++#define FSR_IMPRECISE		0x0406
+ 
+ static const char *const bcm5301x_dt_compat[] __initconst = {
+ 	"brcm,bcm4708",
+ 	NULL,
+ };
+ 
++static int bcm5301x_abort_handler(unsigned long addr, unsigned int fsr,
++				  struct pt_regs *regs)
++{
++	/*
++	 * We want to ignore aborts forwarded from the PCIe bus that are
++	 * expected and shouldn't really be passed by the PCIe controller.
++	 * The biggest disadvantage is the same FSR code may be reported when
++	 * reading non-existing APB register and we shouldn't ignore that.
++	 */
++	if (fsr == (FSR_EXTERNAL | FSR_READ | FSR_IMPRECISE))
++		return 0;
++
++	return 1;
++}
++
++static void __init bcm5301x_init_early(void)
++{
++	hook_fault_code(16 + 6, bcm5301x_abort_handler, SIGBUS, BUS_OBJERR,
++			"imprecise external abort");
++}
++
+ DT_MACHINE_START(BCM5301X, "BCM5301X")
+ 	.l2c_aux_val	= 0,
+ 	.l2c_aux_mask	= ~0,
+ 	.dt_compat	= bcm5301x_dt_compat,
++	.init_early	= bcm5301x_init_early,
+ MACHINE_END
+diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
+index 8ac0e59..0ddf369 100644
+--- a/arch/mips/lantiq/irq.c
++++ b/arch/mips/lantiq/irq.c
+@@ -269,6 +269,11 @@ static void ltq_hw5_irqdispatch(void)
+ DEFINE_HWx_IRQDISPATCH(5)
+ #endif
+ 
++static void ltq_hw_irq_handler(struct irq_desc *desc)
++{
++	ltq_hw_irqdispatch(irq_desc_get_irq(desc) - 2);
++}
++
+ #ifdef CONFIG_MIPS_MT_SMP
+ void __init arch_init_ipiirq(int irq, struct irqaction *action)
+ {
+@@ -313,23 +318,19 @@ static struct irqaction irq_call = {
+ asmlinkage void plat_irq_dispatch(void)
+ {
+ 	unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM;
+-	unsigned int i;
+-
+-	if ((MIPS_CPU_TIMER_IRQ == 7) && (pending & CAUSEF_IP7)) {
+-		do_IRQ(MIPS_CPU_TIMER_IRQ);
+-		goto out;
+-	} else {
+-		for (i = 0; i < MAX_IM; i++) {
+-			if (pending & (CAUSEF_IP2 << i)) {
+-				ltq_hw_irqdispatch(i);
+-				goto out;
+-			}
+-		}
++	int irq;
++
++	if (!pending) {
++		spurious_interrupt();
++		return;
+ 	}
+-	pr_alert("Spurious IRQ: CAUSE=0x%08x\n", read_c0_status());
+ 
+-out:
+-	return;
++	pending >>= CAUSEB_IP;
++	while (pending) {
++		irq = fls(pending) - 1;
++		do_IRQ(MIPS_CPU_IRQ_BASE + irq);
++		pending &= ~BIT(irq);
++	}
+ }
+ 
+ static int icu_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
+@@ -354,11 +355,6 @@ static const struct irq_domain_ops irq_domain_ops = {
+ 	.map = icu_map,
+ };
+ 
+-static struct irqaction cascade = {
+-	.handler = no_action,
+-	.name = "cascade",
+-};
+-
+ int __init icu_of_init(struct device_node *node, struct device_node *parent)
+ {
+ 	struct device_node *eiu_node;
+@@ -390,7 +386,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent)
+ 	mips_cpu_irq_init();
+ 
+ 	for (i = 0; i < MAX_IM; i++)
+-		setup_irq(i + 2, &cascade);
++		irq_set_chained_handler(i + 2, ltq_hw_irq_handler);
+ 
+ 	if (cpu_has_vint) {
+ 		pr_info("Setting up vectored interrupts\n");
+diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
+index 9a2aee1..7fcf512 100644
+--- a/arch/parisc/include/asm/uaccess.h
++++ b/arch/parisc/include/asm/uaccess.h
+@@ -68,6 +68,15 @@ struct exception_table_entry {
+ 	".previous\n"
+ 
+ /*
++ * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry
++ * (with lowest bit set) for which the fault handler in fixup_exception() will
++ * load -EFAULT into %r8 for a read or write fault, and zeroes the target
++ * register in case of a read fault in get_user().
++ */
++#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\
++	ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1)
++
++/*
+  * The page fault handler stores, in a per-cpu area, the following information
+  * if a fixup routine is available.
+  */
+@@ -94,7 +103,7 @@ struct exception_data {
+ #define __get_user(x, ptr)                               \
+ ({                                                       \
+ 	register long __gu_err __asm__ ("r8") = 0;       \
+-	register long __gu_val __asm__ ("r9") = 0;       \
++	register long __gu_val;				 \
+ 							 \
+ 	load_sr2();					 \
+ 	switch (sizeof(*(ptr))) {			 \
+@@ -110,22 +119,23 @@ struct exception_data {
+ })
+ 
+ #define __get_user_asm(ldx, ptr)                        \
+-	__asm__("\n1:\t" ldx "\t0(%%sr2,%2),%0\n\t"	\
+-		ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_1)\
++	__asm__("1: " ldx " 0(%%sr2,%2),%0\n"		\
++		"9:\n"					\
++		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	\
+ 		: "=r"(__gu_val), "=r"(__gu_err)        \
+-		: "r"(ptr), "1"(__gu_err)		\
+-		: "r1");
++		: "r"(ptr), "1"(__gu_err));
+ 
+ #if !defined(CONFIG_64BIT)
+ 
+ #define __get_user_asm64(ptr) 				\
+-	__asm__("\n1:\tldw 0(%%sr2,%2),%0"		\
+-		"\n2:\tldw 4(%%sr2,%2),%R0\n\t"		\
+-		ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_2)\
+-		ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_get_user_skip_1)\
++	__asm__("   copy %%r0,%R0\n"			\
++		"1: ldw 0(%%sr2,%2),%0\n"		\
++		"2: ldw 4(%%sr2,%2),%R0\n"		\
++		"9:\n"					\
++		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	\
++		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)	\
+ 		: "=r"(__gu_val), "=r"(__gu_err)	\
+-		: "r"(ptr), "1"(__gu_err)		\
+-		: "r1");
++		: "r"(ptr), "1"(__gu_err));
+ 
+ #endif /* !defined(CONFIG_64BIT) */
+ 
+@@ -151,32 +161,31 @@ struct exception_data {
+  * The "__put_user/kernel_asm()" macros tell gcc they read from memory
+  * instead of writing. This is because they do not write to any memory
+  * gcc knows about, so there are no aliasing issues. These macros must
+- * also be aware that "fixup_put_user_skip_[12]" are executed in the
+- * context of the fault, and any registers used there must be listed
+- * as clobbers. In this case only "r1" is used by the current routines.
+- * r8/r9 are already listed as err/val.
++ * also be aware that fixups are executed in the context of the fault,
++ * and any registers used there must be listed as clobbers.
++ * r8 is already listed as err.
+  */
+ 
+ #define __put_user_asm(stx, x, ptr)                         \
+ 	__asm__ __volatile__ (                              \
+-		"\n1:\t" stx "\t%2,0(%%sr2,%1)\n\t"	    \
+-		ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_1)\
++		"1: " stx " %2,0(%%sr2,%1)\n"		    \
++		"9:\n"					    \
++		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	    \
+ 		: "=r"(__pu_err)                            \
+-		: "r"(ptr), "r"(x), "0"(__pu_err)	    \
+-		: "r1")
++		: "r"(ptr), "r"(x), "0"(__pu_err))
+ 
+ 
+ #if !defined(CONFIG_64BIT)
+ 
+ #define __put_user_asm64(__val, ptr) do {	    	    \
+ 	__asm__ __volatile__ (				    \
+-		"\n1:\tstw %2,0(%%sr2,%1)"		    \
+-		"\n2:\tstw %R2,4(%%sr2,%1)\n\t"		    \
+-		ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_2)\
+-		ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_put_user_skip_1)\
++		"1: stw %2,0(%%sr2,%1)\n"		    \
++		"2: stw %R2,4(%%sr2,%1)\n"		    \
++		"9:\n"					    \
++		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	    \
++		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)	    \
+ 		: "=r"(__pu_err)                            \
+-		: "r"(ptr), "r"(__val), "0"(__pu_err) \
+-		: "r1");				    \
++		: "r"(ptr), "r"(__val), "0"(__pu_err));	    \
+ } while (0)
+ 
+ #endif /* !defined(CONFIG_64BIT) */
+diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
+index 3cad8aa..4e6f0d9 100644
+--- a/arch/parisc/kernel/parisc_ksyms.c
++++ b/arch/parisc/kernel/parisc_ksyms.c
+@@ -47,16 +47,6 @@ EXPORT_SYMBOL(__cmpxchg_u64);
+ EXPORT_SYMBOL(lclear_user);
+ EXPORT_SYMBOL(lstrnlen_user);
+ 
+-/* Global fixups - defined as int to avoid creation of function pointers */
+-extern int fixup_get_user_skip_1;
+-extern int fixup_get_user_skip_2;
+-extern int fixup_put_user_skip_1;
+-extern int fixup_put_user_skip_2;
+-EXPORT_SYMBOL(fixup_get_user_skip_1);
+-EXPORT_SYMBOL(fixup_get_user_skip_2);
+-EXPORT_SYMBOL(fixup_put_user_skip_1);
+-EXPORT_SYMBOL(fixup_put_user_skip_2);
+-
+ #ifndef CONFIG_64BIT
+ /* Needed so insmod can set dp value */
+ extern int $global$;
+diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
+index e81afc37..e7ffde2 100644
+--- a/arch/parisc/kernel/process.c
++++ b/arch/parisc/kernel/process.c
+@@ -140,6 +140,8 @@ void machine_power_off(void)
+ 	printk(KERN_EMERG "System shut down completed.\n"
+ 	       "Please power this system off now.");
+ 
++	/* prevent soft lockup/stalled CPU messages for endless loop. */
++	rcu_sysrq_start();
+ 	for (;;);
+ }
+ 
+diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile
+index 8fa92b8..f2dac4d 100644
+--- a/arch/parisc/lib/Makefile
++++ b/arch/parisc/lib/Makefile
+@@ -2,7 +2,7 @@
+ # Makefile for parisc-specific library files
+ #
+ 
+-lib-y	:= lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o \
++lib-y	:= lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \
+ 	   ucmpdi2.o delay.o
+ 
+ obj-y	:= iomap.o
+diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S
+deleted file mode 100644
+index a5b72f2..0000000
+--- a/arch/parisc/lib/fixup.S
++++ /dev/null
+@@ -1,98 +0,0 @@
+-/*
+- * Linux/PA-RISC Project (http://www.parisc-linux.org/)
+- *
+- *  Copyright (C) 2004  Randolph Chung <tausq@debian.org>
+- *
+- *    This program is free software; you can redistribute it and/or modify
+- *    it under the terms of the GNU General Public License as published by
+- *    the Free Software Foundation; either version 2, or (at your option)
+- *    any later version.
+- *
+- *    This program is distributed in the hope that it will be useful,
+- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+- *    GNU General Public License for more details.
+- *
+- *    You should have received a copy of the GNU General Public License
+- *    along with this program; if not, write to the Free Software
+- *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+- * 
+- * Fixup routines for kernel exception handling.
+- */
+-#include <asm/asm-offsets.h>
+-#include <asm/assembly.h>
+-#include <asm/errno.h>
+-#include <linux/linkage.h>
+-
+-#ifdef CONFIG_SMP
+-	.macro  get_fault_ip t1 t2
+-	loadgp
+-	addil LT%__per_cpu_offset,%r27
+-	LDREG RT%__per_cpu_offset(%r1),\t1
+-	/* t2 = smp_processor_id() */
+-	mfctl 30,\t2
+-	ldw TI_CPU(\t2),\t2
+-#ifdef CONFIG_64BIT
+-	extrd,u \t2,63,32,\t2
+-#endif
+-	/* t2 = &__per_cpu_offset[smp_processor_id()]; */
+-	LDREGX \t2(\t1),\t2 
+-	addil LT%exception_data,%r27
+-	LDREG RT%exception_data(%r1),\t1
+-	/* t1 = this_cpu_ptr(&exception_data) */
+-	add,l \t1,\t2,\t1
+-	/* %r27 = t1->fault_gp - restore gp */
+-	LDREG EXCDATA_GP(\t1), %r27
+-	/* t1 = t1->fault_ip */
+-	LDREG EXCDATA_IP(\t1), \t1
+-	.endm
+-#else
+-	.macro  get_fault_ip t1 t2
+-	loadgp
+-	/* t1 = this_cpu_ptr(&exception_data) */
+-	addil LT%exception_data,%r27
+-	LDREG RT%exception_data(%r1),\t2
+-	/* %r27 = t2->fault_gp - restore gp */
+-	LDREG EXCDATA_GP(\t2), %r27
+-	/* t1 = t2->fault_ip */
+-	LDREG EXCDATA_IP(\t2), \t1
+-	.endm
+-#endif
+-
+-	.level LEVEL
+-
+-	.text
+-	.section .fixup, "ax"
+-
+-	/* get_user() fixups, store -EFAULT in r8, and 0 in r9 */
+-ENTRY_CFI(fixup_get_user_skip_1)
+-	get_fault_ip %r1,%r8
+-	ldo 4(%r1), %r1
+-	ldi -EFAULT, %r8
+-	bv %r0(%r1)
+-	copy %r0, %r9
+-ENDPROC_CFI(fixup_get_user_skip_1)
+-
+-ENTRY_CFI(fixup_get_user_skip_2)
+-	get_fault_ip %r1,%r8
+-	ldo 8(%r1), %r1
+-	ldi -EFAULT, %r8
+-	bv %r0(%r1)
+-	copy %r0, %r9
+-ENDPROC_CFI(fixup_get_user_skip_2)
+-
+-	/* put_user() fixups, store -EFAULT in r8 */
+-ENTRY_CFI(fixup_put_user_skip_1)
+-	get_fault_ip %r1,%r8
+-	ldo 4(%r1), %r1
+-	bv %r0(%r1)
+-	ldi -EFAULT, %r8
+-ENDPROC_CFI(fixup_put_user_skip_1)
+-
+-ENTRY_CFI(fixup_put_user_skip_2)
+-	get_fault_ip %r1,%r8
+-	ldo 8(%r1), %r1
+-	bv %r0(%r1)
+-	ldi -EFAULT, %r8
+-ENDPROC_CFI(fixup_put_user_skip_2)
+-
+diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S
+index 56845de..f01188c 100644
+--- a/arch/parisc/lib/lusercopy.S
++++ b/arch/parisc/lib/lusercopy.S
+@@ -5,6 +5,8 @@
+  *    Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
+  *    Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
+  *    Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
++ *    Copyright (C) 2017 Helge Deller <deller@gmx.de>
++ *    Copyright (C) 2017 John David Anglin <dave.anglin@bell.net>
+  *
+  *
+  *    This program is free software; you can redistribute it and/or modify
+@@ -132,4 +134,320 @@ ENDPROC_CFI(lstrnlen_user)
+ 
+ 	.procend
+ 
++
++
++/*
++ * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
++ *
++ * Inputs:
++ * - sr1 already contains space of source region
++ * - sr2 already contains space of destination region
++ *
++ * Returns:
++ * - number of bytes that could not be copied.
++ *   On success, this will be zero.
++ *
++ * This code is based on a C-implementation of a copy routine written by
++ * Randolph Chung, which in turn was derived from the glibc.
++ *
++ * Several strategies are tried to try to get the best performance for various
++ * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
++ * at a time using general registers.  Unaligned copies are handled either by
++ * aligning the destination and then using shift-and-write method, or in a few
++ * cases by falling back to a byte-at-a-time copy.
++ *
++ * Testing with various alignments and buffer sizes shows that this code is
++ * often >10x faster than a simple byte-at-a-time copy, even for strangely
++ * aligned operands. It is interesting to note that the glibc version of memcpy
++ * (written in C) is actually quite fast already. This routine is able to beat
++ * it by 30-40% for aligned copies because of the loop unrolling, but in some
++ * cases the glibc version is still slightly faster. This lends more
++ * credibility that gcc can generate very good code as long as we are careful.
++ *
++ * Possible optimizations:
++ * - add cache prefetching
++ * - try not to use the post-increment address modifiers; they may create
++ *   additional interlocks. Assumption is that those were only efficient on old
++ *   machines (pre PA8000 processors)
++ */
++
++	dst = arg0
++	src = arg1
++	len = arg2
++	end = arg3
++	t1  = r19
++	t2  = r20
++	t3  = r21
++	t4  = r22
++	srcspc = sr1
++	dstspc = sr2
++
++	t0 = r1
++	a1 = t1
++	a2 = t2
++	a3 = t3
++	a0 = t4
++
++	save_src = ret0
++	save_dst = ret1
++	save_len = r31
++
++ENTRY_CFI(pa_memcpy)
++	.proc
++	.callinfo NO_CALLS
++	.entry
++
++	/* Last destination address */
++	add	dst,len,end
++
++	/* short copy with less than 16 bytes? */
++	cmpib,>>=,n 15,len,.Lbyte_loop
++
++	/* same alignment? */
++	xor	src,dst,t0
++	extru	t0,31,2,t1
++	cmpib,<>,n  0,t1,.Lunaligned_copy
++
++#ifdef CONFIG_64BIT
++	/* only do 64-bit copies if we can get aligned. */
++	extru	t0,31,3,t1
++	cmpib,<>,n  0,t1,.Lalign_loop32
++
++	/* loop until we are 64-bit aligned */
++.Lalign_loop64:
++	extru	dst,31,3,t1
++	cmpib,=,n	0,t1,.Lcopy_loop_16
++20:	ldb,ma	1(srcspc,src),t1
++21:	stb,ma	t1,1(dstspc,dst)
++	b	.Lalign_loop64
++	ldo	-1(len),len
++
++	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
++	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
++
++	ldi	31,t0
++.Lcopy_loop_16:
++	cmpb,COND(>>=),n t0,len,.Lword_loop
++
++10:	ldd	0(srcspc,src),t1
++11:	ldd	8(srcspc,src),t2
++	ldo	16(src),src
++12:	std,ma	t1,8(dstspc,dst)
++13:	std,ma	t2,8(dstspc,dst)
++14:	ldd	0(srcspc,src),t1
++15:	ldd	8(srcspc,src),t2
++	ldo	16(src),src
++16:	std,ma	t1,8(dstspc,dst)
++17:	std,ma	t2,8(dstspc,dst)
++
++	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
++	ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
++	ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
++	ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
++	ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
++	ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
++	ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
++	ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
++
++	b	.Lcopy_loop_16
++	ldo	-32(len),len
++
++.Lword_loop:
++	cmpib,COND(>>=),n 3,len,.Lbyte_loop
++20:	ldw,ma	4(srcspc,src),t1
++21:	stw,ma	t1,4(dstspc,dst)
++	b	.Lword_loop
++	ldo	-4(len),len
++
++	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
++	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
++
++#endif /* CONFIG_64BIT */
++
++	/* loop until we are 32-bit aligned */
++.Lalign_loop32:
++	extru	dst,31,2,t1
++	cmpib,=,n	0,t1,.Lcopy_loop_4
++20:	ldb,ma	1(srcspc,src),t1
++21:	stb,ma	t1,1(dstspc,dst)
++	b	.Lalign_loop32
++	ldo	-1(len),len
++
++	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
++	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
++
++
++.Lcopy_loop_4:
++	cmpib,COND(>>=),n 15,len,.Lbyte_loop
++
++10:	ldw	0(srcspc,src),t1
++11:	ldw	4(srcspc,src),t2
++12:	stw,ma	t1,4(dstspc,dst)
++13:	stw,ma	t2,4(dstspc,dst)
++14:	ldw	8(srcspc,src),t1
++15:	ldw	12(srcspc,src),t2
++	ldo	16(src),src
++16:	stw,ma	t1,4(dstspc,dst)
++17:	stw,ma	t2,4(dstspc,dst)
++
++	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
++	ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
++	ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
++	ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
++	ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
++	ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
++	ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
++	ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
++
++	b	.Lcopy_loop_4
++	ldo	-16(len),len
++
++.Lbyte_loop:
++	cmpclr,COND(<>) len,%r0,%r0
++	b,n	.Lcopy_done
++20:	ldb	0(srcspc,src),t1
++	ldo	1(src),src
++21:	stb,ma	t1,1(dstspc,dst)
++	b	.Lbyte_loop
++	ldo	-1(len),len
++
++	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
++	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
++
++.Lcopy_done:
++	bv	%r0(%r2)
++	sub	end,dst,ret0
++
++
++	/* src and dst are not aligned the same way. */
++	/* need to go the hard way */
++.Lunaligned_copy:
++	/* align until dst is 32bit-word-aligned */
++	extru	dst,31,2,t1
++	cmpib,COND(=),n	0,t1,.Lcopy_dstaligned
++20:	ldb	0(srcspc,src),t1
++	ldo	1(src),src
++21:	stb,ma	t1,1(dstspc,dst)
++	b	.Lunaligned_copy
++	ldo	-1(len),len
++
++	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
++	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
++
++.Lcopy_dstaligned:
++
++	/* store src, dst and len in safe place */
++	copy	src,save_src
++	copy	dst,save_dst
++	copy	len,save_len
++
++	/* len now needs give number of words to copy */
++	SHRREG	len,2,len
++
++	/*
++	 * Copy from a not-aligned src to an aligned dst using shifts.
++	 * Handles 4 words per loop.
++	 */
++
++	depw,z src,28,2,t0
++	subi 32,t0,t0
++	mtsar t0
++	extru len,31,2,t0
++	cmpib,= 2,t0,.Lcase2
++	/* Make src aligned by rounding it down.  */
++	depi 0,31,2,src
++
++	cmpiclr,<> 3,t0,%r0
++	b,n .Lcase3
++	cmpiclr,<> 1,t0,%r0
++	b,n .Lcase1
++.Lcase0:
++	cmpb,= %r0,len,.Lcda_finish
++	nop
++
++1:	ldw,ma 4(srcspc,src), a3
++	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
++1:	ldw,ma 4(srcspc,src), a0
++	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
++	b,n .Ldo3
++.Lcase1:
++1:	ldw,ma 4(srcspc,src), a2
++	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
++1:	ldw,ma 4(srcspc,src), a3
++	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
++	ldo -1(len),len
++	cmpb,=,n %r0,len,.Ldo0
++.Ldo4:
++1:	ldw,ma 4(srcspc,src), a0
++	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
++	shrpw a2, a3, %sar, t0
++1:	stw,ma t0, 4(dstspc,dst)
++	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
++.Ldo3:
++1:	ldw,ma 4(srcspc,src), a1
++	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
++	shrpw a3, a0, %sar, t0
++1:	stw,ma t0, 4(dstspc,dst)
++	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
++.Ldo2:
++1:	ldw,ma 4(srcspc,src), a2
++	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
++	shrpw a0, a1, %sar, t0
++1:	stw,ma t0, 4(dstspc,dst)
++	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
++.Ldo1:
++1:	ldw,ma 4(srcspc,src), a3
++	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
++	shrpw a1, a2, %sar, t0
++1:	stw,ma t0, 4(dstspc,dst)
++	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
++	ldo -4(len),len
++	cmpb,<> %r0,len,.Ldo4
++	nop
++.Ldo0:
++	shrpw a2, a3, %sar, t0
++1:	stw,ma t0, 4(dstspc,dst)
++	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
++
++.Lcda_rdfault:
++.Lcda_finish:
++	/* calculate new src, dst and len and jump to byte-copy loop */
++	sub	dst,save_dst,t0
++	add	save_src,t0,src
++	b	.Lbyte_loop
++	sub	save_len,t0,len
++
++.Lcase3:
++1:	ldw,ma 4(srcspc,src), a0
++	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
++1:	ldw,ma 4(srcspc,src), a1
++	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
++	b .Ldo2
++	ldo 1(len),len
++.Lcase2:
++1:	ldw,ma 4(srcspc,src), a1
++	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
++1:	ldw,ma 4(srcspc,src), a2
++	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
++	b .Ldo1
++	ldo 2(len),len
++
++
++	/* fault exception fixup handlers: */
++#ifdef CONFIG_64BIT
++.Lcopy16_fault:
++10:	b	.Lcopy_done
++	std,ma	t1,8(dstspc,dst)
++	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
++#endif
++
++.Lcopy8_fault:
++10:	b	.Lcopy_done
++	stw,ma	t1,4(dstspc,dst)
++	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
++
++	.exit
++ENDPROC_CFI(pa_memcpy)
++	.procend
++
+ 	.end
+diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
+index f82ff10..b3d47ec 100644
+--- a/arch/parisc/lib/memcpy.c
++++ b/arch/parisc/lib/memcpy.c
+@@ -2,7 +2,7 @@
+  *    Optimized memory copy routines.
+  *
+  *    Copyright (C) 2004 Randolph Chung <tausq@debian.org>
+- *    Copyright (C) 2013 Helge Deller <deller@gmx.de>
++ *    Copyright (C) 2013-2017 Helge Deller <deller@gmx.de>
+  *
+  *    This program is free software; you can redistribute it and/or modify
+  *    it under the terms of the GNU General Public License as published by
+@@ -21,474 +21,21 @@
+  *    Portions derived from the GNU C Library
+  *    Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
+  *
+- * Several strategies are tried to try to get the best performance for various
+- * conditions. In the optimal case, we copy 64-bytes in an unrolled loop using 
+- * fp regs. This is followed by loops that copy 32- or 16-bytes at a time using
+- * general registers.  Unaligned copies are handled either by aligning the 
+- * destination and then using shift-and-write method, or in a few cases by 
+- * falling back to a byte-at-a-time copy.
+- *
+- * I chose to implement this in C because it is easier to maintain and debug,
+- * and in my experiments it appears that the C code generated by gcc (3.3/3.4
+- * at the time of writing) is fairly optimal. Unfortunately some of the 
+- * semantics of the copy routine (exception handling) is difficult to express
+- * in C, so we have to play some tricks to get it to work.
+- *
+- * All the loads and stores are done via explicit asm() code in order to use
+- * the right space registers. 
+- * 
+- * Testing with various alignments and buffer sizes shows that this code is 
+- * often >10x faster than a simple byte-at-a-time copy, even for strangely
+- * aligned operands. It is interesting to note that the glibc version
+- * of memcpy (written in C) is actually quite fast already. This routine is 
+- * able to beat it by 30-40% for aligned copies because of the loop unrolling, 
+- * but in some cases the glibc version is still slightly faster. This lends 
+- * more credibility that gcc can generate very good code as long as we are 
+- * careful.
+- *
+- * TODO:
+- * - cache prefetching needs more experimentation to get optimal settings
+- * - try not to use the post-increment address modifiers; they create additional
+- *   interlocks
+- * - replace byte-copy loops with stybs sequences
+  */
+ 
+-#ifdef __KERNEL__
+ #include <linux/module.h>
+ #include <linux/compiler.h>
+ #include <linux/uaccess.h>
+-#define s_space "%%sr1"
+-#define d_space "%%sr2"
+-#else
+-#include "memcpy.h"
+-#define s_space "%%sr0"
+-#define d_space "%%sr0"
+-#define pa_memcpy new2_copy
+-#endif
+ 
+ DECLARE_PER_CPU(struct exception_data, exception_data);
+ 
+-#define preserve_branch(label)	do {					\
+-	volatile int dummy = 0;						\
+-	/* The following branch is never taken, it's just here to  */	\
+-	/* prevent gcc from optimizing away our exception code. */ 	\
+-	if (unlikely(dummy != dummy))					\
+-		goto label;						\
+-} while (0)
+-
+ #define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3))
+ #define get_kernel_space() (0)
+ 
+-#define MERGE(w0, sh_1, w1, sh_2)  ({					\
+-	unsigned int _r;						\
+-	asm volatile (							\
+-	"mtsar %3\n"							\
+-	"shrpw %1, %2, %%sar, %0\n"					\
+-	: "=r"(_r)							\
+-	: "r"(w0), "r"(w1), "r"(sh_2)					\
+-	);								\
+-	_r;								\
+-})
+-#define THRESHOLD	16
+-
+-#ifdef DEBUG_MEMCPY
+-#define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, __LINE__, __func__ ); printk(KERN_DEBUG fmt, ##args ); } while (0)
+-#else
+-#define DPRINTF(fmt, args...)
+-#endif
+-
+-#define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e)	\
+-	__asm__ __volatile__ (				\
+-	"1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n\t"	\
+-	ASM_EXCEPTIONTABLE_ENTRY(1b,_e)			\
+-	: _tt(_t), "+r"(_a)				\
+-	: 						\
+-	: "r8")
+-
+-#define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) 	\
+-	__asm__ __volatile__ (				\
+-	"1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n\t"	\
+-	ASM_EXCEPTIONTABLE_ENTRY(1b,_e)			\
+-	: "+r"(_a) 					\
+-	: _tt(_t)					\
+-	: "r8")
+-
+-#define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,"=r",_s,_a,_t,_e)
+-#define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,"r",_s,_a,_t,_e)
+-#define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,"=r",_s,_a,_t,_e)
+-#define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,"r",_s,_a,_t,_e)
+-#define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,"=f",_s,_a,_t,_e)
+-#define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,"f",_s,_a,_t,_e)
+-
+-#define def_load_insn(_insn,_tt,_s,_o,_a,_t,_e) 	\
+-	__asm__ __volatile__ (				\
+-	"1:\t" #_insn " " #_o "(" _s ",%1), %0\n\t"	\
+-	ASM_EXCEPTIONTABLE_ENTRY(1b,_e)			\
+-	: _tt(_t) 					\
+-	: "r"(_a)					\
+-	: "r8")
+-
+-#define def_store_insn(_insn,_tt,_s,_t,_o,_a,_e) 	\
+-	__asm__ __volatile__ (				\
+-	"1:\t" #_insn " %0, " #_o "(" _s ",%1)\n\t" 	\
+-	ASM_EXCEPTIONTABLE_ENTRY(1b,_e)			\
+-	: 						\
+-	: _tt(_t), "r"(_a)				\
+-	: "r8")
+-
+-#define ldw(_s,_o,_a,_t,_e)	def_load_insn(ldw,"=r",_s,_o,_a,_t,_e)
+-#define stw(_s,_t,_o,_a,_e) 	def_store_insn(stw,"r",_s,_t,_o,_a,_e)
+-
+-#ifdef  CONFIG_PREFETCH
+-static inline void prefetch_src(const void *addr)
+-{
+-	__asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr));
+-}
+-
+-static inline void prefetch_dst(const void *addr)
+-{
+-	__asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr));
+-}
+-#else
+-#define prefetch_src(addr) do { } while(0)
+-#define prefetch_dst(addr) do { } while(0)
+-#endif
+-
+-#define PA_MEMCPY_OK		0
+-#define PA_MEMCPY_LOAD_ERROR	1
+-#define PA_MEMCPY_STORE_ERROR	2
+-
+-/* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words
+- * per loop.  This code is derived from glibc. 
+- */
+-static noinline unsigned long copy_dstaligned(unsigned long dst,
+-					unsigned long src, unsigned long len)
+-{
+-	/* gcc complains that a2 and a3 may be uninitialized, but actually
+-	 * they cannot be.  Initialize a2/a3 to shut gcc up.
+-	 */
+-	register unsigned int a0, a1, a2 = 0, a3 = 0;
+-	int sh_1, sh_2;
+-
+-	/* prefetch_src((const void *)src); */
+-
+-	/* Calculate how to shift a word read at the memory operation
+-	   aligned srcp to make it aligned for copy.  */
+-	sh_1 = 8 * (src % sizeof(unsigned int));
+-	sh_2 = 8 * sizeof(unsigned int) - sh_1;
+-
+-	/* Make src aligned by rounding it down.  */
+-	src &= -sizeof(unsigned int);
+-
+-	switch (len % 4)
+-	{
+-		case 2:
+-			/* a1 = ((unsigned int *) src)[0];
+-			   a2 = ((unsigned int *) src)[1]; */
+-			ldw(s_space, 0, src, a1, cda_ldw_exc);
+-			ldw(s_space, 4, src, a2, cda_ldw_exc);
+-			src -= 1 * sizeof(unsigned int);
+-			dst -= 3 * sizeof(unsigned int);
+-			len += 2;
+-			goto do1;
+-		case 3:
+-			/* a0 = ((unsigned int *) src)[0];
+-			   a1 = ((unsigned int *) src)[1]; */
+-			ldw(s_space, 0, src, a0, cda_ldw_exc);
+-			ldw(s_space, 4, src, a1, cda_ldw_exc);
+-			src -= 0 * sizeof(unsigned int);
+-			dst -= 2 * sizeof(unsigned int);
+-			len += 1;
+-			goto do2;
+-		case 0:
+-			if (len == 0)
+-				return PA_MEMCPY_OK;
+-			/* a3 = ((unsigned int *) src)[0];
+-			   a0 = ((unsigned int *) src)[1]; */
+-			ldw(s_space, 0, src, a3, cda_ldw_exc);
+-			ldw(s_space, 4, src, a0, cda_ldw_exc);
+-			src -=-1 * sizeof(unsigned int);
+-			dst -= 1 * sizeof(unsigned int);
+-			len += 0;
+-			goto do3;
+-		case 1:
+-			/* a2 = ((unsigned int *) src)[0];
+-			   a3 = ((unsigned int *) src)[1]; */
+-			ldw(s_space, 0, src, a2, cda_ldw_exc);
+-			ldw(s_space, 4, src, a3, cda_ldw_exc);
+-			src -=-2 * sizeof(unsigned int);
+-			dst -= 0 * sizeof(unsigned int);
+-			len -= 1;
+-			if (len == 0)
+-				goto do0;
+-			goto do4;			/* No-op.  */
+-	}
+-
+-	do
+-	{
+-		/* prefetch_src((const void *)(src + 4 * sizeof(unsigned int))); */
+-do4:
+-		/* a0 = ((unsigned int *) src)[0]; */
+-		ldw(s_space, 0, src, a0, cda_ldw_exc);
+-		/* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
+-		stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
+-do3:
+-		/* a1 = ((unsigned int *) src)[1]; */
+-		ldw(s_space, 4, src, a1, cda_ldw_exc);
+-		/* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */
+-		stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc);
+-do2:
+-		/* a2 = ((unsigned int *) src)[2]; */
+-		ldw(s_space, 8, src, a2, cda_ldw_exc);
+-		/* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */
+-		stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc);
+-do1:
+-		/* a3 = ((unsigned int *) src)[3]; */
+-		ldw(s_space, 12, src, a3, cda_ldw_exc);
+-		/* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */
+-		stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc);
+-
+-		src += 4 * sizeof(unsigned int);
+-		dst += 4 * sizeof(unsigned int);
+-		len -= 4;
+-	}
+-	while (len != 0);
+-
+-do0:
+-	/* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
+-	stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
+-
+-	preserve_branch(handle_load_error);
+-	preserve_branch(handle_store_error);
+-
+-	return PA_MEMCPY_OK;
+-
+-handle_load_error:
+-	__asm__ __volatile__ ("cda_ldw_exc:\n");
+-	return PA_MEMCPY_LOAD_ERROR;
+-
+-handle_store_error:
+-	__asm__ __volatile__ ("cda_stw_exc:\n");
+-	return PA_MEMCPY_STORE_ERROR;
+-}
+-
+-
+-/* Returns PA_MEMCPY_OK, PA_MEMCPY_LOAD_ERROR or PA_MEMCPY_STORE_ERROR.
+- * In case of an access fault the faulty address can be read from the per_cpu
+- * exception data struct. */
+-static noinline unsigned long pa_memcpy_internal(void *dstp, const void *srcp,
+-					unsigned long len)
+-{
+-	register unsigned long src, dst, t1, t2, t3;
+-	register unsigned char *pcs, *pcd;
+-	register unsigned int *pws, *pwd;
+-	register double *pds, *pdd;
+-	unsigned long ret;
+-
+-	src = (unsigned long)srcp;
+-	dst = (unsigned long)dstp;
+-	pcs = (unsigned char *)srcp;
+-	pcd = (unsigned char *)dstp;
+-
+-	/* prefetch_src((const void *)srcp); */
+-
+-	if (len < THRESHOLD)
+-		goto byte_copy;
+-
+-	/* Check alignment */
+-	t1 = (src ^ dst);
+-	if (unlikely(t1 & (sizeof(double)-1)))
+-		goto unaligned_copy;
+-
+-	/* src and dst have same alignment. */
+-
+-	/* Copy bytes till we are double-aligned. */
+-	t2 = src & (sizeof(double) - 1);
+-	if (unlikely(t2 != 0)) {
+-		t2 = sizeof(double) - t2;
+-		while (t2 && len) {
+-			/* *pcd++ = *pcs++; */
+-			ldbma(s_space, pcs, t3, pmc_load_exc);
+-			len--;
+-			stbma(d_space, t3, pcd, pmc_store_exc);
+-			t2--;
+-		}
+-	}
+-
+-	pds = (double *)pcs;
+-	pdd = (double *)pcd;
+-
+-#if 0
+-	/* Copy 8 doubles at a time */
+-	while (len >= 8*sizeof(double)) {
+-		register double r1, r2, r3, r4, r5, r6, r7, r8;
+-		/* prefetch_src((char *)pds + L1_CACHE_BYTES); */
+-		flddma(s_space, pds, r1, pmc_load_exc);
+-		flddma(s_space, pds, r2, pmc_load_exc);
+-		flddma(s_space, pds, r3, pmc_load_exc);
+-		flddma(s_space, pds, r4, pmc_load_exc);
+-		fstdma(d_space, r1, pdd, pmc_store_exc);
+-		fstdma(d_space, r2, pdd, pmc_store_exc);
+-		fstdma(d_space, r3, pdd, pmc_store_exc);
+-		fstdma(d_space, r4, pdd, pmc_store_exc);
+-
+-#if 0
+-		if (L1_CACHE_BYTES <= 32)
+-			prefetch_src((char *)pds + L1_CACHE_BYTES);
+-#endif
+-		flddma(s_space, pds, r5, pmc_load_exc);
+-		flddma(s_space, pds, r6, pmc_load_exc);
+-		flddma(s_space, pds, r7, pmc_load_exc);
+-		flddma(s_space, pds, r8, pmc_load_exc);
+-		fstdma(d_space, r5, pdd, pmc_store_exc);
+-		fstdma(d_space, r6, pdd, pmc_store_exc);
+-		fstdma(d_space, r7, pdd, pmc_store_exc);
+-		fstdma(d_space, r8, pdd, pmc_store_exc);
+-		len -= 8*sizeof(double);
+-	}
+-#endif
+-
+-	pws = (unsigned int *)pds;
+-	pwd = (unsigned int *)pdd;
+-
+-word_copy:
+-	while (len >= 8*sizeof(unsigned int)) {
+-		register unsigned int r1,r2,r3,r4,r5,r6,r7,r8;
+-		/* prefetch_src((char *)pws + L1_CACHE_BYTES); */
+-		ldwma(s_space, pws, r1, pmc_load_exc);
+-		ldwma(s_space, pws, r2, pmc_load_exc);
+-		ldwma(s_space, pws, r3, pmc_load_exc);
+-		ldwma(s_space, pws, r4, pmc_load_exc);
+-		stwma(d_space, r1, pwd, pmc_store_exc);
+-		stwma(d_space, r2, pwd, pmc_store_exc);
+-		stwma(d_space, r3, pwd, pmc_store_exc);
+-		stwma(d_space, r4, pwd, pmc_store_exc);
+-
+-		ldwma(s_space, pws, r5, pmc_load_exc);
+-		ldwma(s_space, pws, r6, pmc_load_exc);
+-		ldwma(s_space, pws, r7, pmc_load_exc);
+-		ldwma(s_space, pws, r8, pmc_load_exc);
+-		stwma(d_space, r5, pwd, pmc_store_exc);
+-		stwma(d_space, r6, pwd, pmc_store_exc);
+-		stwma(d_space, r7, pwd, pmc_store_exc);
+-		stwma(d_space, r8, pwd, pmc_store_exc);
+-		len -= 8*sizeof(unsigned int);
+-	}
+-
+-	while (len >= 4*sizeof(unsigned int)) {
+-		register unsigned int r1,r2,r3,r4;
+-		ldwma(s_space, pws, r1, pmc_load_exc);
+-		ldwma(s_space, pws, r2, pmc_load_exc);
+-		ldwma(s_space, pws, r3, pmc_load_exc);
+-		ldwma(s_space, pws, r4, pmc_load_exc);
+-		stwma(d_space, r1, pwd, pmc_store_exc);
+-		stwma(d_space, r2, pwd, pmc_store_exc);
+-		stwma(d_space, r3, pwd, pmc_store_exc);
+-		stwma(d_space, r4, pwd, pmc_store_exc);
+-		len -= 4*sizeof(unsigned int);
+-	}
+-
+-	pcs = (unsigned char *)pws;
+-	pcd = (unsigned char *)pwd;
+-
+-byte_copy:
+-	while (len) {
+-		/* *pcd++ = *pcs++; */
+-		ldbma(s_space, pcs, t3, pmc_load_exc);
+-		stbma(d_space, t3, pcd, pmc_store_exc);
+-		len--;
+-	}
+-
+-	return PA_MEMCPY_OK;
+-
+-unaligned_copy:
+-	/* possibly we are aligned on a word, but not on a double... */
+-	if (likely((t1 & (sizeof(unsigned int)-1)) == 0)) {
+-		t2 = src & (sizeof(unsigned int) - 1);
+-
+-		if (unlikely(t2 != 0)) {
+-			t2 = sizeof(unsigned int) - t2;
+-			while (t2) {
+-				/* *pcd++ = *pcs++; */
+-				ldbma(s_space, pcs, t3, pmc_load_exc);
+-				stbma(d_space, t3, pcd, pmc_store_exc);
+-				len--;
+-				t2--;
+-			}
+-		}
+-
+-		pws = (unsigned int *)pcs;
+-		pwd = (unsigned int *)pcd;
+-		goto word_copy;
+-	}
+-
+-	/* Align the destination.  */
+-	if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) {
+-		t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1));
+-		while (t2) {
+-			/* *pcd++ = *pcs++; */
+-			ldbma(s_space, pcs, t3, pmc_load_exc);
+-			stbma(d_space, t3, pcd, pmc_store_exc);
+-			len--;
+-			t2--;
+-		}
+-		dst = (unsigned long)pcd;
+-		src = (unsigned long)pcs;
+-	}
+-
+-	ret = copy_dstaligned(dst, src, len / sizeof(unsigned int));
+-	if (ret)
+-		return ret;
+-
+-	pcs += (len & -sizeof(unsigned int));
+-	pcd += (len & -sizeof(unsigned int));
+-	len %= sizeof(unsigned int);
+-
+-	preserve_branch(handle_load_error);
+-	preserve_branch(handle_store_error);
+-
+-	goto byte_copy;
+-
+-handle_load_error:
+-	__asm__ __volatile__ ("pmc_load_exc:\n");
+-	return PA_MEMCPY_LOAD_ERROR;
+-
+-handle_store_error:
+-	__asm__ __volatile__ ("pmc_store_exc:\n");
+-	return PA_MEMCPY_STORE_ERROR;
+-}
+-
+-
+ /* Returns 0 for success, otherwise, returns number of bytes not transferred. */
+-static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
+-{
+-	unsigned long ret, fault_addr, reference;
+-	struct exception_data *d;
+-
+-	ret = pa_memcpy_internal(dstp, srcp, len);
+-	if (likely(ret == PA_MEMCPY_OK))
+-		return 0;
+-
+-	/* if a load or store fault occured we can get the faulty addr */
+-	d = this_cpu_ptr(&exception_data);
+-	fault_addr = d->fault_addr;
+-
+-	/* error in load or store? */
+-	if (ret == PA_MEMCPY_LOAD_ERROR)
+-		reference = (unsigned long) srcp;
+-	else
+-		reference = (unsigned long) dstp;
++extern unsigned long pa_memcpy(void *dst, const void *src,
++				unsigned long len);
+ 
+-	DPRINTF("pa_memcpy: fault type = %lu, len=%lu fault_addr=%lu ref=%lu\n",
+-		ret, len, fault_addr, reference);
+-
+-	if (fault_addr >= reference)
+-		return len - (fault_addr - reference);
+-	else
+-		return len;
+-}
+-
+-#ifdef __KERNEL__
+ unsigned long __copy_to_user(void __user *dst, const void *src,
+ 			     unsigned long len)
+ {
+@@ -537,5 +84,3 @@ long probe_kernel_read(void *dst, const void *src, size_t size)
+ 
+ 	return __probe_kernel_read(dst, src, size);
+ }
+-
+-#endif
+diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
+index 1a0b4f6..040c48f 100644
+--- a/arch/parisc/mm/fault.c
++++ b/arch/parisc/mm/fault.c
+@@ -149,6 +149,23 @@ int fixup_exception(struct pt_regs *regs)
+ 		d->fault_space = regs->isr;
+ 		d->fault_addr = regs->ior;
+ 
++		/*
++		 * Fix up get_user() and put_user().
++		 * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant
++		 * bit in the relative address of the fixup routine to indicate
++		 * that %r8 should be loaded with -EFAULT to report a userspace
++		 * access error.
++		 */
++		if (fix->fixup & 1) {
++			regs->gr[8] = -EFAULT;
++
++			/* zero target register for get_user() */
++			if (parisc_acctyp(0, regs->iir) == VM_READ) {
++				int treg = regs->iir & 0x1f;
++				regs->gr[treg] = 0;
++			}
++		}
++
+ 		regs->iaoq[0] = (unsigned long)&fix->fixup + fix->fixup;
+ 		regs->iaoq[0] &= ~3;
+ 		/*
+diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
+index 779782f..9a53a06 100644
+--- a/arch/x86/lib/memcpy_64.S
++++ b/arch/x86/lib/memcpy_64.S
+@@ -290,7 +290,7 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
+ 	_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
+ 	_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
+ 	_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
+-	_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
++	_ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
+ 	_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
+ 	_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
+ 	_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
+diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
+index 887e571..aed2064 100644
+--- a/arch/x86/mm/kaslr.c
++++ b/arch/x86/mm/kaslr.c
+@@ -48,7 +48,7 @@ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
+ #if defined(CONFIG_X86_ESPFIX64)
+ static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
+ #elif defined(CONFIG_EFI)
+-static const unsigned long vaddr_end = EFI_VA_START;
++static const unsigned long vaddr_end = EFI_VA_END;
+ #else
+ static const unsigned long vaddr_end = __START_KERNEL_map;
+ #endif
+@@ -105,7 +105,7 @@ void __init kernel_randomize_memory(void)
+ 	 */
+ 	BUILD_BUG_ON(vaddr_start >= vaddr_end);
+ 	BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) &&
+-		     vaddr_end >= EFI_VA_START);
++		     vaddr_end >= EFI_VA_END);
+ 	BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
+ 		      IS_ENABLED(CONFIG_EFI)) &&
+ 		     vaddr_end >= __START_KERNEL_map);
+diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
+index f8960fc..9f21b0c 100644
+--- a/arch/x86/xen/setup.c
++++ b/arch/x86/xen/setup.c
+@@ -713,10 +713,9 @@ static void __init xen_reserve_xen_mfnlist(void)
+ 		size = PFN_PHYS(xen_start_info->nr_p2m_frames);
+ 	}
+ 
+-	if (!xen_is_e820_reserved(start, size)) {
+-		memblock_reserve(start, size);
++	memblock_reserve(start, size);
++	if (!xen_is_e820_reserved(start, size))
+ 		return;
+-	}
+ 
+ #ifdef CONFIG_X86_32
+ 	/*
+@@ -727,6 +726,7 @@ static void __init xen_reserve_xen_mfnlist(void)
+ 	BUG();
+ #else
+ 	xen_relocate_p2m();
++	memblock_free(start, size);
+ #endif
+ }
+ 
+diff --git a/block/bio.c b/block/bio.c
+index db85c57..655c901 100644
+--- a/block/bio.c
++++ b/block/bio.c
+@@ -372,10 +372,14 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
+ 	bio_list_init(&punt);
+ 	bio_list_init(&nopunt);
+ 
+-	while ((bio = bio_list_pop(current->bio_list)))
++	while ((bio = bio_list_pop(&current->bio_list[0])))
+ 		bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
++	current->bio_list[0] = nopunt;
+ 
+-	*current->bio_list = nopunt;
++	bio_list_init(&nopunt);
++	while ((bio = bio_list_pop(&current->bio_list[1])))
++		bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
++	current->bio_list[1] = nopunt;
+ 
+ 	spin_lock(&bs->rescue_lock);
+ 	bio_list_merge(&bs->rescue_list, &punt);
+@@ -462,7 +466,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
+ 		 * we retry with the original gfp_flags.
+ 		 */
+ 
+-		if (current->bio_list && !bio_list_empty(current->bio_list))
++		if (current->bio_list &&
++		    (!bio_list_empty(&current->bio_list[0]) ||
++		     !bio_list_empty(&current->bio_list[1])))
+ 			gfp_mask &= ~__GFP_DIRECT_RECLAIM;
+ 
+ 		p = mempool_alloc(bs->bio_pool, gfp_mask);
+diff --git a/block/blk-core.c b/block/blk-core.c
+index 14d7c07..d1f2801 100644
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -1994,7 +1994,14 @@ generic_make_request_checks(struct bio *bio)
+  */
+ blk_qc_t generic_make_request(struct bio *bio)
+ {
+-	struct bio_list bio_list_on_stack;
++	/*
++	 * bio_list_on_stack[0] contains bios submitted by the current
++	 * make_request_fn.
++	 * bio_list_on_stack[1] contains bios that were submitted before
++	 * the current make_request_fn, but that haven't been processed
++	 * yet.
++	 */
++	struct bio_list bio_list_on_stack[2];
+ 	blk_qc_t ret = BLK_QC_T_NONE;
+ 
+ 	if (!generic_make_request_checks(bio))
+@@ -2011,7 +2018,7 @@ blk_qc_t generic_make_request(struct bio *bio)
+ 	 * should be added at the tail
+ 	 */
+ 	if (current->bio_list) {
+-		bio_list_add(current->bio_list, bio);
++		bio_list_add(&current->bio_list[0], bio);
+ 		goto out;
+ 	}
+ 
+@@ -2030,23 +2037,39 @@ blk_qc_t generic_make_request(struct bio *bio)
+ 	 * bio_list, and call into ->make_request() again.
+ 	 */
+ 	BUG_ON(bio->bi_next);
+-	bio_list_init(&bio_list_on_stack);
+-	current->bio_list = &bio_list_on_stack;
++	bio_list_init(&bio_list_on_stack[0]);
++	current->bio_list = bio_list_on_stack;
+ 	do {
+ 		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+ 
+ 		if (likely(blk_queue_enter(q, false) == 0)) {
++			struct bio_list lower, same;
++
++			/* Create a fresh bio_list for all subordinate requests */
++			bio_list_on_stack[1] = bio_list_on_stack[0];
++			bio_list_init(&bio_list_on_stack[0]);
+ 			ret = q->make_request_fn(q, bio);
+ 
+ 			blk_queue_exit(q);
+ 
+-			bio = bio_list_pop(current->bio_list);
++			/* sort new bios into those for a lower level
++			 * and those for the same level
++			 */
++			bio_list_init(&lower);
++			bio_list_init(&same);
++			while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
++				if (q == bdev_get_queue(bio->bi_bdev))
++					bio_list_add(&same, bio);
++				else
++					bio_list_add(&lower, bio);
++			/* now assemble so we handle the lowest level first */
++			bio_list_merge(&bio_list_on_stack[0], &lower);
++			bio_list_merge(&bio_list_on_stack[0], &same);
++			bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
+ 		} else {
+-			struct bio *bio_next = bio_list_pop(current->bio_list);
+-
+ 			bio_io_error(bio);
+-			bio = bio_next;
+ 		}
++		bio = bio_list_pop(&bio_list_on_stack[0]);
+ 	} while (bio);
+ 	current->bio_list = NULL; /* deactivate */
+ 
+diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
+index 9ed0878..4c5678c 100644
+--- a/drivers/acpi/Makefile
++++ b/drivers/acpi/Makefile
+@@ -2,7 +2,6 @@
+ # Makefile for the Linux ACPI interpreter
+ #
+ 
+-ccflags-y			:= -Os
+ ccflags-$(CONFIG_ACPI_DEBUG)	+= -DACPI_DEBUG_OUTPUT
+ 
+ #
+diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
+index b4c1a6a..03250e1 100644
+--- a/drivers/acpi/acpi_platform.c
++++ b/drivers/acpi/acpi_platform.c
+@@ -25,9 +25,11 @@
+ ACPI_MODULE_NAME("platform");
+ 
+ static const struct acpi_device_id forbidden_id_list[] = {
+-	{"PNP0000", 0},	/* PIC */
+-	{"PNP0100", 0},	/* Timer */
+-	{"PNP0200", 0},	/* AT DMA Controller */
++	{"PNP0000",  0},	/* PIC */
++	{"PNP0100",  0},	/* Timer */
++	{"PNP0200",  0},	/* AT DMA Controller */
++	{"ACPI0009", 0},	/* IOxAPIC */
++	{"ACPI000A", 0},	/* IOAPIC */
+ 	{"", 0},
+ };
+ 
+diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+index b1254f8..b87d278 100644
+--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
++++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+@@ -1299,6 +1299,8 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
+ 		goto out_pm_put;
+ 	}
+ 
++	mutex_lock(&gpu->lock);
++
+ 	fence = etnaviv_gpu_fence_alloc(gpu);
+ 	if (!fence) {
+ 		event_free(gpu, event);
+@@ -1306,8 +1308,6 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
+ 		goto out_pm_put;
+ 	}
+ 
+-	mutex_lock(&gpu->lock);
+-
+ 	gpu->event[event].fence = fence;
+ 	submit->fence = fence->seqno;
+ 	gpu->active_fence = submit->fence;
+diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
+index 3de5e6e..4ce04e0 100644
+--- a/drivers/gpu/drm/radeon/radeon_ttm.c
++++ b/drivers/gpu/drm/radeon/radeon_ttm.c
+@@ -213,8 +213,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
+ 			rbo->placement.num_busy_placement = 0;
+ 			for (i = 0; i < rbo->placement.num_placement; i++) {
+ 				if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) {
+-					if (rbo->placements[0].fpfn < fpfn)
+-						rbo->placements[0].fpfn = fpfn;
++					if (rbo->placements[i].fpfn < fpfn)
++						rbo->placements[i].fpfn = fpfn;
+ 				} else {
+ 					rbo->placement.busy_placement =
+ 						&rbo->placements[i];
+diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
+index 7aadce1..c7e6c98 100644
+--- a/drivers/gpu/drm/vc4/vc4_crtc.c
++++ b/drivers/gpu/drm/vc4/vc4_crtc.c
+@@ -842,6 +842,17 @@ static void vc4_crtc_destroy_state(struct drm_crtc *crtc,
+ 	drm_atomic_helper_crtc_destroy_state(crtc, state);
+ }
+ 
++static void
++vc4_crtc_reset(struct drm_crtc *crtc)
++{
++	if (crtc->state)
++		__drm_atomic_helper_crtc_destroy_state(crtc->state);
++
++	crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL);
++	if (crtc->state)
++		crtc->state->crtc = crtc;
++}
++
+ static const struct drm_crtc_funcs vc4_crtc_funcs = {
+ 	.set_config = drm_atomic_helper_set_config,
+ 	.destroy = vc4_crtc_destroy,
+@@ -849,7 +860,7 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = {
+ 	.set_property = NULL,
+ 	.cursor_set = NULL, /* handled by drm_mode_cursor_universal */
+ 	.cursor_move = NULL, /* handled by drm_mode_cursor_universal */
+-	.reset = drm_atomic_helper_crtc_reset,
++	.reset = vc4_crtc_reset,
+ 	.atomic_duplicate_state = vc4_crtc_duplicate_state,
+ 	.atomic_destroy_state = vc4_crtc_destroy_state,
+ 	.gamma_set = vc4_crtc_gamma_set,
+diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
+index 5e7a564..0c535d0 100644
+--- a/drivers/hid/wacom_sys.c
++++ b/drivers/hid/wacom_sys.c
+@@ -2017,6 +2017,14 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
+ 
+ 	wacom_update_name(wacom, wireless ? " (WL)" : "");
+ 
++	/* pen only Bamboo neither support touch nor pad */
++	if ((features->type == BAMBOO_PEN) &&
++	    ((features->device_type & WACOM_DEVICETYPE_TOUCH) ||
++	    (features->device_type & WACOM_DEVICETYPE_PAD))) {
++		error = -ENODEV;
++		goto fail;
++	}
++
+ 	error = wacom_add_shared_data(hdev);
+ 	if (error)
+ 		goto fail;
+@@ -2064,14 +2072,6 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
+ 		goto fail_quirks;
+ 	}
+ 
+-	/* pen only Bamboo neither support touch nor pad */
+-	if ((features->type == BAMBOO_PEN) &&
+-	    ((features->device_type & WACOM_DEVICETYPE_TOUCH) ||
+-	    (features->device_type & WACOM_DEVICETYPE_PAD))) {
+-		error = -ENODEV;
+-		goto fail_quirks;
+-	}
+-
+ 	if (features->device_type & WACOM_DEVICETYPE_WL_MONITOR)
+ 		error = hid_hw_open(hdev);
+ 
+diff --git a/drivers/md/dm.c b/drivers/md/dm.c
+index 628ba00..e66f404 100644
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -986,26 +986,29 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
+ 	struct dm_offload *o = container_of(cb, struct dm_offload, cb);
+ 	struct bio_list list;
+ 	struct bio *bio;
++	int i;
+ 
+ 	INIT_LIST_HEAD(&o->cb.list);
+ 
+ 	if (unlikely(!current->bio_list))
+ 		return;
+ 
+-	list = *current->bio_list;
+-	bio_list_init(current->bio_list);
+-
+-	while ((bio = bio_list_pop(&list))) {
+-		struct bio_set *bs = bio->bi_pool;
+-		if (unlikely(!bs) || bs == fs_bio_set) {
+-			bio_list_add(current->bio_list, bio);
+-			continue;
++	for (i = 0; i < 2; i++) {
++		list = current->bio_list[i];
++		bio_list_init(&current->bio_list[i]);
++
++		while ((bio = bio_list_pop(&list))) {
++			struct bio_set *bs = bio->bi_pool;
++			if (unlikely(!bs) || bs == fs_bio_set) {
++				bio_list_add(&current->bio_list[i], bio);
++				continue;
++			}
++
++			spin_lock(&bs->rescue_lock);
++			bio_list_add(&bs->rescue_list, bio);
++			queue_work(bs->rescue_workqueue, &bs->rescue_work);
++			spin_unlock(&bs->rescue_lock);
+ 		}
+-
+-		spin_lock(&bs->rescue_lock);
+-		bio_list_add(&bs->rescue_list, bio);
+-		queue_work(bs->rescue_workqueue, &bs->rescue_work);
+-		spin_unlock(&bs->rescue_lock);
+ 	}
+ }
+ 
+diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
+index 55b5e0e..4c4aab0 100644
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -941,7 +941,8 @@ static void wait_barrier(struct r10conf *conf)
+ 				    !conf->barrier ||
+ 				    (atomic_read(&conf->nr_pending) &&
+ 				     current->bio_list &&
+-				     !bio_list_empty(current->bio_list)),
++				     (!bio_list_empty(&current->bio_list[0]) ||
++				      !bio_list_empty(&current->bio_list[1]))),
+ 				    conf->resync_lock);
+ 		conf->nr_waiting--;
+ 		if (!conf->nr_waiting)
+diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
+index 387ae1c..a8b430f 100644
+--- a/drivers/mmc/host/sdhci-of-at91.c
++++ b/drivers/mmc/host/sdhci-of-at91.c
+@@ -29,6 +29,8 @@
+ 
+ #include "sdhci-pltfm.h"
+ 
++#define SDMMC_MC1R	0x204
++#define		SDMMC_MC1R_DDR		BIT(3)
+ #define SDMMC_CACR	0x230
+ #define		SDMMC_CACR_CAPWREN	BIT(0)
+ #define		SDMMC_CACR_KEY		(0x46 << 8)
+@@ -103,11 +105,18 @@ static void sdhci_at91_set_power(struct sdhci_host *host, unsigned char mode,
+ 	sdhci_set_power_noreg(host, mode, vdd);
+ }
+ 
++void sdhci_at91_set_uhs_signaling(struct sdhci_host *host, unsigned int timing)
++{
++	if (timing == MMC_TIMING_MMC_DDR52)
++		sdhci_writeb(host, SDMMC_MC1R_DDR, SDMMC_MC1R);
++	sdhci_set_uhs_signaling(host, timing);
++}
++
+ static const struct sdhci_ops sdhci_at91_sama5d2_ops = {
+ 	.set_clock		= sdhci_at91_set_clock,
+ 	.set_bus_width		= sdhci_set_bus_width,
+ 	.reset			= sdhci_reset,
+-	.set_uhs_signaling	= sdhci_set_uhs_signaling,
++	.set_uhs_signaling	= sdhci_at91_set_uhs_signaling,
+ 	.set_power		= sdhci_at91_set_power,
+ };
+ 
+diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
+index a983ba0..7d275e7 100644
+--- a/drivers/mmc/host/sdhci.c
++++ b/drivers/mmc/host/sdhci.c
+@@ -1823,6 +1823,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
+ 	struct sdhci_host *host = mmc_priv(mmc);
+ 	unsigned long flags;
+ 
++	if (enable)
++		pm_runtime_get_noresume(host->mmc->parent);
++
+ 	spin_lock_irqsave(&host->lock, flags);
+ 	if (enable)
+ 		host->flags |= SDHCI_SDIO_IRQ_ENABLED;
+@@ -1831,6 +1834,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
+ 
+ 	sdhci_enable_sdio_irq_nolock(host, enable);
+ 	spin_unlock_irqrestore(&host->lock, flags);
++
++	if (!enable)
++		pm_runtime_put_noidle(host->mmc->parent);
+ }
+ 
+ static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
+diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
+index da10b48..bde769b 100644
+--- a/drivers/nvme/host/core.c
++++ b/drivers/nvme/host/core.c
+@@ -2057,9 +2057,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
+ 		 * Revalidating a dead namespace sets capacity to 0. This will
+ 		 * end buffered writers dirtying pages that can't be synced.
+ 		 */
+-		if (ns->disk && !test_and_set_bit(NVME_NS_DEAD, &ns->flags))
+-			revalidate_disk(ns->disk);
+-
++		if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
++			continue;
++		revalidate_disk(ns->disk);
+ 		blk_set_queue_dying(ns->queue);
+ 		blk_mq_abort_requeue_list(ns->queue);
+ 		blk_mq_start_stopped_hw_queues(ns->queue, true);
+diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
+index 5e52034..8a9c186 100644
+--- a/drivers/nvme/host/pci.c
++++ b/drivers/nvme/host/pci.c
+@@ -1983,8 +1983,10 @@ static void nvme_remove(struct pci_dev *pdev)
+ 
+ 	pci_set_drvdata(pdev, NULL);
+ 
+-	if (!pci_device_is_present(pdev))
++	if (!pci_device_is_present(pdev)) {
+ 		nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
++		nvme_dev_disable(dev, false);
++	}
+ 
+ 	flush_work(&dev->reset_work);
+ 	nvme_uninit_ctrl(&dev->ctrl);
+diff --git a/drivers/pci/host/pcie-iproc-bcma.c b/drivers/pci/host/pcie-iproc-bcma.c
+index 8ce0890..46ca8ed 100644
+--- a/drivers/pci/host/pcie-iproc-bcma.c
++++ b/drivers/pci/host/pcie-iproc-bcma.c
+@@ -44,8 +44,7 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev)
+ {
+ 	struct device *dev = &bdev->dev;
+ 	struct iproc_pcie *pcie;
+-	LIST_HEAD(res);
+-	struct resource res_mem;
++	LIST_HEAD(resources);
+ 	int ret;
+ 
+ 	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+@@ -62,22 +61,23 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev)
+ 
+ 	pcie->base_addr = bdev->addr;
+ 
+-	res_mem.start = bdev->addr_s[0];
+-	res_mem.end = bdev->addr_s[0] + SZ_128M - 1;
+-	res_mem.name = "PCIe MEM space";
+-	res_mem.flags = IORESOURCE_MEM;
+-	pci_add_resource(&res, &res_mem);
++	pcie->mem.start = bdev->addr_s[0];
++	pcie->mem.end = bdev->addr_s[0] + SZ_128M - 1;
++	pcie->mem.name = "PCIe MEM space";
++	pcie->mem.flags = IORESOURCE_MEM;
++	pci_add_resource(&resources, &pcie->mem);
+ 
+ 	pcie->map_irq = iproc_pcie_bcma_map_irq;
+ 
+-	ret = iproc_pcie_setup(pcie, &res);
+-	if (ret)
++	ret = iproc_pcie_setup(pcie, &resources);
++	if (ret) {
+ 		dev_err(dev, "PCIe controller setup failed\n");
+-
+-	pci_free_resource_list(&res);
++		pci_free_resource_list(&resources);
++		return ret;
++	}
+ 
+ 	bcma_set_drvdata(bdev, pcie);
+-	return ret;
++	return 0;
+ }
+ 
+ static void iproc_pcie_bcma_remove(struct bcma_device *bdev)
+diff --git a/drivers/pci/host/pcie-iproc-platform.c b/drivers/pci/host/pcie-iproc-platform.c
+index a3de087..7dcaddc 100644
+--- a/drivers/pci/host/pcie-iproc-platform.c
++++ b/drivers/pci/host/pcie-iproc-platform.c
+@@ -46,7 +46,7 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
+ 	struct device_node *np = dev->of_node;
+ 	struct resource reg;
+ 	resource_size_t iobase = 0;
+-	LIST_HEAD(res);
++	LIST_HEAD(resources);
+ 	int ret;
+ 
+ 	of_id = of_match_device(iproc_pcie_of_match_table, dev);
+@@ -108,23 +108,24 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
+ 		pcie->phy = NULL;
+ 	}
+ 
+-	ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &res, &iobase);
++	ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &resources,
++					       &iobase);
+ 	if (ret) {
+-		dev_err(dev,
+-			"unable to get PCI host bridge resources\n");
++		dev_err(dev, "unable to get PCI host bridge resources\n");
+ 		return ret;
+ 	}
+ 
+ 	pcie->map_irq = of_irq_parse_and_map_pci;
+ 
+-	ret = iproc_pcie_setup(pcie, &res);
+-	if (ret)
++	ret = iproc_pcie_setup(pcie, &resources);
++	if (ret) {
+ 		dev_err(dev, "PCIe controller setup failed\n");
+-
+-	pci_free_resource_list(&res);
++		pci_free_resource_list(&resources);
++		return ret;
++	}
+ 
+ 	platform_set_drvdata(pdev, pcie);
+-	return ret;
++	return 0;
+ }
+ 
+ static int iproc_pcie_pltfm_remove(struct platform_device *pdev)
+diff --git a/drivers/pci/host/pcie-iproc.h b/drivers/pci/host/pcie-iproc.h
+index e84d93c..fa42267 100644
+--- a/drivers/pci/host/pcie-iproc.h
++++ b/drivers/pci/host/pcie-iproc.h
+@@ -68,6 +68,7 @@ struct iproc_pcie {
+ #ifdef CONFIG_ARM
+ 	struct pci_sys_data sysdata;
+ #endif
++	struct resource mem;
+ 	struct pci_bus *root_bus;
+ 	struct phy *phy;
+ 	int (*map_irq)(const struct pci_dev *, u8, u8);
+diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
+index 7bb2068..d314579 100644
+--- a/drivers/scsi/device_handler/scsi_dh_alua.c
++++ b/drivers/scsi/device_handler/scsi_dh_alua.c
+@@ -113,7 +113,7 @@ struct alua_queue_data {
+ #define ALUA_POLICY_SWITCH_ALL		1
+ 
+ static void alua_rtpg_work(struct work_struct *work);
+-static void alua_rtpg_queue(struct alua_port_group *pg,
++static bool alua_rtpg_queue(struct alua_port_group *pg,
+ 			    struct scsi_device *sdev,
+ 			    struct alua_queue_data *qdata, bool force);
+ static void alua_check(struct scsi_device *sdev, bool force);
+@@ -862,7 +862,13 @@ static void alua_rtpg_work(struct work_struct *work)
+ 	kref_put(&pg->kref, release_port_group);
+ }
+ 
+-static void alua_rtpg_queue(struct alua_port_group *pg,
++/**
++ * alua_rtpg_queue() - cause RTPG to be submitted asynchronously
++ *
++ * Returns true if and only if alua_rtpg_work() will be called asynchronously.
++ * That function is responsible for calling @qdata->fn().
++ */
++static bool alua_rtpg_queue(struct alua_port_group *pg,
+ 			    struct scsi_device *sdev,
+ 			    struct alua_queue_data *qdata, bool force)
+ {
+@@ -870,8 +876,8 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
+ 	unsigned long flags;
+ 	struct workqueue_struct *alua_wq = kaluad_wq;
+ 
+-	if (!pg)
+-		return;
++	if (!pg || scsi_device_get(sdev))
++		return false;
+ 
+ 	spin_lock_irqsave(&pg->lock, flags);
+ 	if (qdata) {
+@@ -884,14 +890,12 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
+ 		pg->flags |= ALUA_PG_RUN_RTPG;
+ 		kref_get(&pg->kref);
+ 		pg->rtpg_sdev = sdev;
+-		scsi_device_get(sdev);
+ 		start_queue = 1;
+ 	} else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) {
+ 		pg->flags |= ALUA_PG_RUN_RTPG;
+ 		/* Do not queue if the worker is already running */
+ 		if (!(pg->flags & ALUA_PG_RUNNING)) {
+ 			kref_get(&pg->kref);
+-			sdev = NULL;
+ 			start_queue = 1;
+ 		}
+ 	}
+@@ -900,13 +904,17 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
+ 		alua_wq = kaluad_sync_wq;
+ 	spin_unlock_irqrestore(&pg->lock, flags);
+ 
+-	if (start_queue &&
+-	    !queue_delayed_work(alua_wq, &pg->rtpg_work,
+-				msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) {
+-		if (sdev)
+-			scsi_device_put(sdev);
+-		kref_put(&pg->kref, release_port_group);
++	if (start_queue) {
++		if (queue_delayed_work(alua_wq, &pg->rtpg_work,
++				msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS)))
++			sdev = NULL;
++		else
++			kref_put(&pg->kref, release_port_group);
+ 	}
++	if (sdev)
++		scsi_device_put(sdev);
++
++	return true;
+ }
+ 
+ /*
+@@ -1007,11 +1015,13 @@ static int alua_activate(struct scsi_device *sdev,
+ 		mutex_unlock(&h->init_mutex);
+ 		goto out;
+ 	}
+-	fn = NULL;
+ 	rcu_read_unlock();
+ 	mutex_unlock(&h->init_mutex);
+ 
+-	alua_rtpg_queue(pg, sdev, qdata, true);
++	if (alua_rtpg_queue(pg, sdev, qdata, true))
++		fn = NULL;
++	else
++		err = SCSI_DH_DEV_OFFLINED;
+ 	kref_put(&pg->kref, release_port_group);
+ out:
+ 	if (fn)
+diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
+index 763f012..87f5e694 100644
+--- a/drivers/scsi/libsas/sas_ata.c
++++ b/drivers/scsi/libsas/sas_ata.c
+@@ -221,7 +221,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
+ 		task->num_scatter = qc->n_elem;
+ 	} else {
+ 		for_each_sg(qc->sg, sg, qc->n_elem, si)
+-			xfer += sg->length;
++			xfer += sg_dma_len(sg);
+ 
+ 		task->total_xfer_len = xfer;
+ 		task->num_scatter = si;
+diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
+index fe7469c..ad33238 100644
+--- a/drivers/scsi/qla2xxx/qla_attr.c
++++ b/drivers/scsi/qla2xxx/qla_attr.c
+@@ -2153,8 +2153,6 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
+ 		    "Timer for the VP[%d] has stopped\n", vha->vp_idx);
+ 	}
+ 
+-	BUG_ON(atomic_read(&vha->vref_count));
+-
+ 	qla2x00_free_fcports(vha);
+ 
+ 	mutex_lock(&ha->vport_lock);
+diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
+index 73b12e4..8e63a7b 100644
+--- a/drivers/scsi/qla2xxx/qla_def.h
++++ b/drivers/scsi/qla2xxx/qla_def.h
+@@ -3742,6 +3742,7 @@ typedef struct scsi_qla_host {
+ 	struct qla8044_reset_template reset_tmplt;
+ 	struct qla_tgt_counters tgt_counters;
+ 	uint16_t	bbcr;
++	wait_queue_head_t vref_waitq;
+ } scsi_qla_host_t;
+ 
+ struct qla27xx_image_status {
+@@ -3780,6 +3781,7 @@ struct qla_tgt_vp_map {
+ 	mb();						     \
+ 	if (__vha->flags.delete_progress) {		     \
+ 		atomic_dec(&__vha->vref_count);		     \
++		wake_up(&__vha->vref_waitq);		\
+ 		__bail = 1;				     \
+ 	} else {					     \
+ 		__bail = 0;				     \
+@@ -3788,6 +3790,7 @@ struct qla_tgt_vp_map {
+ 
+ #define QLA_VHA_MARK_NOT_BUSY(__vha) do {		     \
+ 	atomic_dec(&__vha->vref_count);			     \
++	wake_up(&__vha->vref_waitq);			\
+ } while (0)
+ 
+ /*
+diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
+index 5b09296..8f12f6b 100644
+--- a/drivers/scsi/qla2xxx/qla_init.c
++++ b/drivers/scsi/qla2xxx/qla_init.c
+@@ -4356,6 +4356,7 @@ qla2x00_update_fcports(scsi_qla_host_t *base_vha)
+ 			}
+ 		}
+ 		atomic_dec(&vha->vref_count);
++		wake_up(&vha->vref_waitq);
+ 	}
+ 	spin_unlock_irqrestore(&ha->vport_slock, flags);
+ }
+diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
+index cf7ba52..3dfb54a 100644
+--- a/drivers/scsi/qla2xxx/qla_mid.c
++++ b/drivers/scsi/qla2xxx/qla_mid.c
+@@ -74,13 +74,14 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha)
+ 	 * ensures no active vp_list traversal while the vport is removed
+ 	 * from the queue)
+ 	 */
+-	spin_lock_irqsave(&ha->vport_slock, flags);
+-	while (atomic_read(&vha->vref_count)) {
+-		spin_unlock_irqrestore(&ha->vport_slock, flags);
+-
+-		msleep(500);
++	wait_event_timeout(vha->vref_waitq, atomic_read(&vha->vref_count),
++	    10*HZ);
+ 
+-		spin_lock_irqsave(&ha->vport_slock, flags);
++	spin_lock_irqsave(&ha->vport_slock, flags);
++	if (atomic_read(&vha->vref_count)) {
++		ql_dbg(ql_dbg_vport, vha, 0xfffa,
++		    "vha->vref_count=%u timeout\n", vha->vref_count.counter);
++		vha->vref_count = (atomic_t)ATOMIC_INIT(0);
+ 	}
+ 	list_del(&vha->list);
+ 	qlt_update_vp_map(vha, RESET_VP_IDX);
+@@ -269,6 +270,7 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb)
+ 
+ 			spin_lock_irqsave(&ha->vport_slock, flags);
+ 			atomic_dec(&vha->vref_count);
++			wake_up(&vha->vref_waitq);
+ 		}
+ 		i++;
+ 	}
+diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
+index bea819e..4f361d8 100644
+--- a/drivers/scsi/qla2xxx/qla_os.c
++++ b/drivers/scsi/qla2xxx/qla_os.c
+@@ -4045,6 +4045,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
+ 
+ 	spin_lock_init(&vha->work_lock);
+ 	spin_lock_init(&vha->cmd_list_lock);
++	init_waitqueue_head(&vha->vref_waitq);
+ 
+ 	sprintf(vha->host_str, "%s_%ld", QLA2XXX_DRIVER_NAME, vha->host_no);
+ 	ql_dbg(ql_dbg_init, vha, 0x0041,
+diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
+index 121de0a..f753df2 100644
+--- a/drivers/scsi/sg.c
++++ b/drivers/scsi/sg.c
+@@ -998,6 +998,8 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
+ 		result = get_user(val, ip);
+ 		if (result)
+ 			return result;
++		if (val > SG_MAX_CDB_SIZE)
++			return -ENOMEM;
+ 		sfp->next_cmd_len = (val > 0) ? val : 0;
+ 		return 0;
+ 	case SG_GET_VERSION_NUM:
+diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
+index fabbe76..4d079cd 100644
+--- a/drivers/tty/serial/atmel_serial.c
++++ b/drivers/tty/serial/atmel_serial.c
+@@ -1938,6 +1938,11 @@ static void atmel_flush_buffer(struct uart_port *port)
+ 		atmel_uart_writel(port, ATMEL_PDC_TCR, 0);
+ 		atmel_port->pdc_tx.ofs = 0;
+ 	}
++	/*
++	 * in uart_flush_buffer(), the xmit circular buffer has just
++	 * been cleared, so we have to reset tx_len accordingly.
++	 */
++	atmel_port->tx_len = 0;
+ }
+ 
+ /*
+@@ -2471,6 +2476,9 @@ static void atmel_console_write(struct console *co, const char *s, u_int count)
+ 	pdc_tx = atmel_uart_readl(port, ATMEL_PDC_PTSR) & ATMEL_PDC_TXTEN;
+ 	atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS);
+ 
++	/* Make sure that tx path is actually able to send characters */
++	atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN);
++
+ 	uart_console_write(port, s, count, atmel_console_putchar);
+ 
+ 	/*
+diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
+index 770454e..07390f8 100644
+--- a/drivers/tty/serial/mxs-auart.c
++++ b/drivers/tty/serial/mxs-auart.c
+@@ -1085,7 +1085,7 @@ static void mxs_auart_settermios(struct uart_port *u,
+ 					AUART_LINECTRL_BAUD_DIV_MAX);
+ 		baud_max = u->uartclk * 32 / AUART_LINECTRL_BAUD_DIV_MIN;
+ 		baud = uart_get_baud_rate(u, termios, old, baud_min, baud_max);
+-		div = u->uartclk * 32 / baud;
++		div = DIV_ROUND_CLOSEST(u->uartclk * 32, baud);
+ 	}
+ 
+ 	ctrl |= AUART_LINECTRL_BAUD_DIVFRAC(div & 0x3F);
+diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
+index 479e223..f029aad 100644
+--- a/drivers/usb/core/hcd.c
++++ b/drivers/usb/core/hcd.c
+@@ -520,8 +520,10 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb)
+ 	 */
+ 	tbuf_size =  max_t(u16, sizeof(struct usb_hub_descriptor), wLength);
+ 	tbuf = kzalloc(tbuf_size, GFP_KERNEL);
+-	if (!tbuf)
+-		return -ENOMEM;
++	if (!tbuf) {
++		status = -ENOMEM;
++		goto err_alloc;
++	}
+ 
+ 	bufp = tbuf;
+ 
+@@ -734,6 +736,7 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb)
+ 	}
+ 
+ 	kfree(tbuf);
++ err_alloc:
+ 
+ 	/* any errors get returned through the urb completion */
+ 	spin_lock_irq(&hcd_root_hub_lock);
+diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
+index 1536aeb..4e894d3 100644
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -2532,17 +2532,14 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state)
+ 	}
+ 
+ 	nfs4_stateid_copy(&stateid, &delegation->stateid);
+-	if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
++	if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) ||
++		!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED,
++			&delegation->flags)) {
+ 		rcu_read_unlock();
+ 		nfs_finish_clear_delegation_stateid(state, &stateid);
+ 		return;
+ 	}
+ 
+-	if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags)) {
+-		rcu_read_unlock();
+-		return;
+-	}
+-
+ 	cred = get_rpccred(delegation->cred);
+ 	rcu_read_unlock();
+ 	status = nfs41_test_and_free_expired_stateid(server, &stateid, cred);
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index 010aff5..536009e 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -790,6 +790,7 @@ nfserrno (int errno)
+ 		{ nfserr_serverfault, -ESERVERFAULT },
+ 		{ nfserr_serverfault, -ENFILE },
+ 		{ nfserr_io, -EUCLEAN },
++		{ nfserr_perm, -ENOKEY },
+ 	};
+ 	int	i;
+ 
+diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
+index d346d42..33db69b 100644
+--- a/fs/xfs/libxfs/xfs_ag_resv.c
++++ b/fs/xfs/libxfs/xfs_ag_resv.c
+@@ -39,6 +39,7 @@
+ #include "xfs_rmap_btree.h"
+ #include "xfs_btree.h"
+ #include "xfs_refcount_btree.h"
++#include "xfs_ialloc_btree.h"
+ 
+ /*
+  * Per-AG Block Reservations
+@@ -200,22 +201,30 @@ __xfs_ag_resv_init(
+ 	struct xfs_mount		*mp = pag->pag_mount;
+ 	struct xfs_ag_resv		*resv;
+ 	int				error;
++	xfs_extlen_t			reserved;
+ 
+-	resv = xfs_perag_resv(pag, type);
+ 	if (used > ask)
+ 		ask = used;
+-	resv->ar_asked = ask;
+-	resv->ar_reserved = resv->ar_orig_reserved = ask - used;
+-	mp->m_ag_max_usable -= ask;
++	reserved = ask - used;
+ 
+-	trace_xfs_ag_resv_init(pag, type, ask);
+-
+-	error = xfs_mod_fdblocks(mp, -(int64_t)resv->ar_reserved, true);
+-	if (error)
++	error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true);
++	if (error) {
+ 		trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
+ 				error, _RET_IP_);
++		xfs_warn(mp,
++"Per-AG reservation for AG %u failed.  Filesystem may run out of space.",
++				pag->pag_agno);
++		return error;
++	}
+ 
+-	return error;
++	mp->m_ag_max_usable -= ask;
++
++	resv = xfs_perag_resv(pag, type);
++	resv->ar_asked = ask;
++	resv->ar_reserved = resv->ar_orig_reserved = reserved;
++
++	trace_xfs_ag_resv_init(pag, type, ask);
++	return 0;
+ }
+ 
+ /* Create a per-AG block reservation. */
+@@ -223,6 +232,8 @@ int
+ xfs_ag_resv_init(
+ 	struct xfs_perag		*pag)
+ {
++	struct xfs_mount		*mp = pag->pag_mount;
++	xfs_agnumber_t			agno = pag->pag_agno;
+ 	xfs_extlen_t			ask;
+ 	xfs_extlen_t			used;
+ 	int				error = 0;
+@@ -231,23 +242,45 @@ xfs_ag_resv_init(
+ 	if (pag->pag_meta_resv.ar_asked == 0) {
+ 		ask = used = 0;
+ 
+-		error = xfs_refcountbt_calc_reserves(pag->pag_mount,
+-				pag->pag_agno, &ask, &used);
++		error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used);
+ 		if (error)
+ 			goto out;
+ 
+-		error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
+-				ask, used);
++		error = xfs_finobt_calc_reserves(mp, agno, &ask, &used);
+ 		if (error)
+ 			goto out;
++
++		error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
++				ask, used);
++		if (error) {
++			/*
++			 * Because we didn't have per-AG reservations when the
++			 * finobt feature was added we might not be able to
++			 * reserve all needed blocks.  Warn and fall back to the
++			 * old and potentially buggy code in that case, but
++			 * ensure we do have the reservation for the refcountbt.
++			 */
++			ask = used = 0;
++
++			mp->m_inotbt_nores = true;
++
++			error = xfs_refcountbt_calc_reserves(mp, agno, &ask,
++					&used);
++			if (error)
++				goto out;
++
++			error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
++					ask, used);
++			if (error)
++				goto out;
++		}
+ 	}
+ 
+ 	/* Create the AGFL metadata reservation */
+ 	if (pag->pag_agfl_resv.ar_asked == 0) {
+ 		ask = used = 0;
+ 
+-		error = xfs_rmapbt_calc_reserves(pag->pag_mount, pag->pag_agno,
+-				&ask, &used);
++		error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used);
+ 		if (error)
+ 			goto out;
+ 
+@@ -256,9 +289,16 @@ xfs_ag_resv_init(
+ 			goto out;
+ 	}
+ 
++#ifdef DEBUG
++	/* need to read in the AGF for the ASSERT below to work */
++	error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0);
++	if (error)
++		return error;
++
+ 	ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
+ 	       xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <=
+ 	       pag->pagf_freeblks + pag->pagf_flcount);
++#endif
+ out:
+ 	return error;
+ }
+diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
+index f52fd63..5a508b0 100644
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -769,8 +769,8 @@ xfs_bmap_extents_to_btree(
+ 		args.type = XFS_ALLOCTYPE_START_BNO;
+ 		args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
+ 	} else if (dfops->dop_low) {
+-try_another_ag:
+ 		args.type = XFS_ALLOCTYPE_START_BNO;
++try_another_ag:
+ 		args.fsbno = *firstblock;
+ 	} else {
+ 		args.type = XFS_ALLOCTYPE_NEAR_BNO;
+@@ -796,17 +796,19 @@ xfs_bmap_extents_to_btree(
+ 	if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
+ 	    args.fsbno == NULLFSBLOCK &&
+ 	    args.type == XFS_ALLOCTYPE_NEAR_BNO) {
+-		dfops->dop_low = true;
++		args.type = XFS_ALLOCTYPE_FIRST_AG;
+ 		goto try_another_ag;
+ 	}
++	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
++		xfs_iroot_realloc(ip, -1, whichfork);
++		xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
++		return -ENOSPC;
++	}
+ 	/*
+ 	 * Allocation can't fail, the space was reserved.
+ 	 */
+-	ASSERT(args.fsbno != NULLFSBLOCK);
+ 	ASSERT(*firstblock == NULLFSBLOCK ||
+-	       args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
+-	       (dfops->dop_low &&
+-		args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
++	       args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock));
+ 	*firstblock = cur->bc_private.b.firstblock = args.fsbno;
+ 	cur->bc_private.b.allocated++;
+ 	ip->i_d.di_nblocks++;
+@@ -1278,7 +1280,6 @@ xfs_bmap_read_extents(
+ 	/* REFERENCED */
+ 	xfs_extnum_t		room;	/* number of entries there's room for */
+ 
+-	bno = NULLFSBLOCK;
+ 	mp = ip->i_mount;
+ 	ifp = XFS_IFORK_PTR(ip, whichfork);
+ 	exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
+@@ -1291,9 +1292,7 @@ xfs_bmap_read_extents(
+ 	ASSERT(level > 0);
+ 	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
+ 	bno = be64_to_cpu(*pp);
+-	ASSERT(bno != NULLFSBLOCK);
+-	ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
+-	ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
++
+ 	/*
+ 	 * Go down the tree until leaf level is reached, following the first
+ 	 * pointer (leftmost) at each level.
+@@ -1955,6 +1954,7 @@ xfs_bmap_add_extent_delay_real(
+ 		 */
+ 		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+ 		xfs_bmbt_set_startblock(ep, new->br_startblock);
++		xfs_bmbt_set_state(ep, new->br_state);
+ 		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+ 
+ 		(*nextents)++;
+@@ -2293,6 +2293,7 @@ STATIC int				/* error */
+ xfs_bmap_add_extent_unwritten_real(
+ 	struct xfs_trans	*tp,
+ 	xfs_inode_t		*ip,	/* incore inode pointer */
++	int			whichfork,
+ 	xfs_extnum_t		*idx,	/* extent number to update/insert */
+ 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
+ 	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
+@@ -2312,12 +2313,14 @@ xfs_bmap_add_extent_unwritten_real(
+ 					/* left is 0, right is 1, prev is 2 */
+ 	int			rval=0;	/* return value (logging flags) */
+ 	int			state = 0;/* state bits, accessed thru macros */
+-	struct xfs_mount	*mp = tp->t_mountp;
++	struct xfs_mount	*mp = ip->i_mount;
+ 
+ 	*logflagsp = 0;
+ 
+ 	cur = *curp;
+-	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
++	ifp = XFS_IFORK_PTR(ip, whichfork);
++	if (whichfork == XFS_COW_FORK)
++		state |= BMAP_COWFORK;
+ 
+ 	ASSERT(*idx >= 0);
+ 	ASSERT(*idx <= xfs_iext_count(ifp));
+@@ -2376,7 +2379,7 @@ xfs_bmap_add_extent_unwritten_real(
+ 	 * Don't set contiguous if the combined extent would be too large.
+ 	 * Also check for all-three-contiguous being too large.
+ 	 */
+-	if (*idx < xfs_iext_count(&ip->i_df) - 1) {
++	if (*idx < xfs_iext_count(ifp) - 1) {
+ 		state |= BMAP_RIGHT_VALID;
+ 		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
+ 		if (isnullstartblock(RIGHT.br_startblock))
+@@ -2416,7 +2419,8 @@ xfs_bmap_add_extent_unwritten_real(
+ 		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ 
+ 		xfs_iext_remove(ip, *idx + 1, 2, state);
+-		ip->i_d.di_nextents -= 2;
++		XFS_IFORK_NEXT_SET(ip, whichfork,
++				XFS_IFORK_NEXTENTS(ip, whichfork) - 2);
+ 		if (cur == NULL)
+ 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+ 		else {
+@@ -2459,7 +2463,8 @@ xfs_bmap_add_extent_unwritten_real(
+ 		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ 
+ 		xfs_iext_remove(ip, *idx + 1, 1, state);
+-		ip->i_d.di_nextents--;
++		XFS_IFORK_NEXT_SET(ip, whichfork,
++				XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+ 		if (cur == NULL)
+ 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+ 		else {
+@@ -2494,7 +2499,8 @@ xfs_bmap_add_extent_unwritten_real(
+ 		xfs_bmbt_set_state(ep, newext);
+ 		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ 		xfs_iext_remove(ip, *idx + 1, 1, state);
+-		ip->i_d.di_nextents--;
++		XFS_IFORK_NEXT_SET(ip, whichfork,
++				XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+ 		if (cur == NULL)
+ 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+ 		else {
+@@ -2606,7 +2612,8 @@ xfs_bmap_add_extent_unwritten_real(
+ 		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ 
+ 		xfs_iext_insert(ip, *idx, 1, new, state);
+-		ip->i_d.di_nextents++;
++		XFS_IFORK_NEXT_SET(ip, whichfork,
++				XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
+ 		if (cur == NULL)
+ 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+ 		else {
+@@ -2684,7 +2691,8 @@ xfs_bmap_add_extent_unwritten_real(
+ 		++*idx;
+ 		xfs_iext_insert(ip, *idx, 1, new, state);
+ 
+-		ip->i_d.di_nextents++;
++		XFS_IFORK_NEXT_SET(ip, whichfork,
++				XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
+ 		if (cur == NULL)
+ 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+ 		else {
+@@ -2732,7 +2740,8 @@ xfs_bmap_add_extent_unwritten_real(
+ 		++*idx;
+ 		xfs_iext_insert(ip, *idx, 2, &r[0], state);
+ 
+-		ip->i_d.di_nextents += 2;
++		XFS_IFORK_NEXT_SET(ip, whichfork,
++				XFS_IFORK_NEXTENTS(ip, whichfork) + 2);
+ 		if (cur == NULL)
+ 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+ 		else {
+@@ -2786,17 +2795,17 @@ xfs_bmap_add_extent_unwritten_real(
+ 	}
+ 
+ 	/* update reverse mappings */
+-	error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new);
++	error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new);
+ 	if (error)
+ 		goto done;
+ 
+ 	/* convert to a btree if necessary */
+-	if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
++	if (xfs_bmap_needs_btree(ip, whichfork)) {
+ 		int	tmp_logflags;	/* partial log flag return val */
+ 
+ 		ASSERT(cur == NULL);
+ 		error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur,
+-				0, &tmp_logflags, XFS_DATA_FORK);
++				0, &tmp_logflags, whichfork);
+ 		*logflagsp |= tmp_logflags;
+ 		if (error)
+ 			goto done;
+@@ -2808,7 +2817,7 @@ xfs_bmap_add_extent_unwritten_real(
+ 		*curp = cur;
+ 	}
+ 
+-	xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK);
++	xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
+ done:
+ 	*logflagsp |= rval;
+ 	return error;
+@@ -2900,7 +2909,8 @@ xfs_bmap_add_extent_hole_delay(
+ 		oldlen = startblockval(left.br_startblock) +
+ 			startblockval(new->br_startblock) +
+ 			startblockval(right.br_startblock);
+-		newlen = xfs_bmap_worst_indlen(ip, temp);
++		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
++					 oldlen);
+ 		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
+ 			nullstartblock((int)newlen));
+ 		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+@@ -2921,7 +2931,8 @@ xfs_bmap_add_extent_hole_delay(
+ 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
+ 		oldlen = startblockval(left.br_startblock) +
+ 			startblockval(new->br_startblock);
+-		newlen = xfs_bmap_worst_indlen(ip, temp);
++		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
++					 oldlen);
+ 		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
+ 			nullstartblock((int)newlen));
+ 		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+@@ -2937,7 +2948,8 @@ xfs_bmap_add_extent_hole_delay(
+ 		temp = new->br_blockcount + right.br_blockcount;
+ 		oldlen = startblockval(new->br_startblock) +
+ 			startblockval(right.br_startblock);
+-		newlen = xfs_bmap_worst_indlen(ip, temp);
++		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
++					 oldlen);
+ 		xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
+ 			new->br_startoff,
+ 			nullstartblock((int)newlen), temp, right.br_state);
+@@ -3913,17 +3925,13 @@ xfs_bmap_btalloc(
+ 		 * the first block that was allocated.
+ 		 */
+ 		ASSERT(*ap->firstblock == NULLFSBLOCK ||
+-		       XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
+-		       XFS_FSB_TO_AGNO(mp, args.fsbno) ||
+-		       (ap->dfops->dop_low &&
+-			XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
+-			XFS_FSB_TO_AGNO(mp, args.fsbno)));
++		       XFS_FSB_TO_AGNO(mp, *ap->firstblock) <=
++		       XFS_FSB_TO_AGNO(mp, args.fsbno));
+ 
+ 		ap->blkno = args.fsbno;
+ 		if (*ap->firstblock == NULLFSBLOCK)
+ 			*ap->firstblock = args.fsbno;
+-		ASSERT(nullfb || fb_agno == args.agno ||
+-		       (ap->dfops->dop_low && fb_agno < args.agno));
++		ASSERT(nullfb || fb_agno <= args.agno);
+ 		ap->length = args.len;
+ 		if (!(ap->flags & XFS_BMAPI_COWFORK))
+ 			ap->ip->i_d.di_nblocks += args.len;
+@@ -4249,6 +4257,19 @@ xfs_bmapi_read(
+ 	return 0;
+ }
+ 
++/*
++ * Add a delayed allocation extent to an inode. Blocks are reserved from the
++ * global pool and the extent inserted into the inode in-core extent tree.
++ *
++ * On entry, got refers to the first extent beyond the offset of the extent to
++ * allocate or eof is specified if no such extent exists. On return, got refers
++ * to the extent record that was inserted to the inode fork.
++ *
++ * Note that the allocated extent may have been merged with contiguous extents
++ * during insertion into the inode fork. Thus, got does not reflect the current
++ * state of the inode fork on return. If necessary, the caller can use lastx to
++ * look up the updated record in the inode fork.
++ */
+ int
+ xfs_bmapi_reserve_delalloc(
+ 	struct xfs_inode	*ip,
+@@ -4335,13 +4356,8 @@ xfs_bmapi_reserve_delalloc(
+ 	got->br_startblock = nullstartblock(indlen);
+ 	got->br_blockcount = alen;
+ 	got->br_state = XFS_EXT_NORM;
+-	xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
+ 
+-	/*
+-	 * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
+-	 * might have merged it into one of the neighbouring ones.
+-	 */
+-	xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
++	xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
+ 
+ 	/*
+ 	 * Tag the inode if blocks were preallocated. Note that COW fork
+@@ -4353,10 +4369,6 @@ xfs_bmapi_reserve_delalloc(
+ 	if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
+ 		xfs_inode_set_cowblocks_tag(ip);
+ 
+-	ASSERT(got->br_startoff <= aoff);
+-	ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
+-	ASSERT(isnullstartblock(got->br_startblock));
+-	ASSERT(got->br_state == XFS_EXT_NORM);
+ 	return 0;
+ 
+ out_unreserve_blocks:
+@@ -4461,10 +4473,16 @@ xfs_bmapi_allocate(
+ 	bma->got.br_state = XFS_EXT_NORM;
+ 
+ 	/*
+-	 * A wasdelay extent has been initialized, so shouldn't be flagged
+-	 * as unwritten.
++	 * In the data fork, a wasdelay extent has been initialized, so
++	 * shouldn't be flagged as unwritten.
++	 *
++	 * For the cow fork, however, we convert delalloc reservations
++	 * (extents allocated for speculative preallocation) to
++	 * allocated unwritten extents, and only convert the unwritten
++	 * extents to real extents when we're about to write the data.
+ 	 */
+-	if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
++	if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) &&
++	    (bma->flags & XFS_BMAPI_PREALLOC) &&
+ 	    xfs_sb_version_hasextflgbit(&mp->m_sb))
+ 		bma->got.br_state = XFS_EXT_UNWRITTEN;
+ 
+@@ -4515,8 +4533,6 @@ xfs_bmapi_convert_unwritten(
+ 			(XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
+ 		return 0;
+ 
+-	ASSERT(whichfork != XFS_COW_FORK);
+-
+ 	/*
+ 	 * Modify (by adding) the state flag, if writing.
+ 	 */
+@@ -4541,8 +4557,8 @@ xfs_bmapi_convert_unwritten(
+ 			return error;
+ 	}
+ 
+-	error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
+-			&bma->cur, mval, bma->firstblock, bma->dfops,
++	error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
++			&bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops,
+ 			&tmp_logflags);
+ 	/*
+ 	 * Log the inode core unconditionally in the unwritten extent conversion
+@@ -4551,8 +4567,12 @@ xfs_bmapi_convert_unwritten(
+ 	 * in the transaction for the sake of fsync(), even if nothing has
+ 	 * changed, because fsync() will not force the log for this transaction
+ 	 * unless it sees the inode pinned.
++	 *
++	 * Note: If we're only converting cow fork extents, there aren't
++	 * any on-disk updates to make, so we don't need to log anything.
+ 	 */
+-	bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
++	if (whichfork != XFS_COW_FORK)
++		bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
+ 	if (error)
+ 		return error;
+ 
+@@ -4626,15 +4646,15 @@ xfs_bmapi_write(
+ 	ASSERT(*nmap >= 1);
+ 	ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
+ 	ASSERT(!(flags & XFS_BMAPI_IGSTATE));
+-	ASSERT(tp != NULL);
++	ASSERT(tp != NULL ||
++	       (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) ==
++			(XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK));
+ 	ASSERT(len > 0);
+ 	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
+ 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ 	ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK);
+ 	ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP));
+ 	ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP));
+-	ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK);
+-	ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK);
+ 
+ 	/* zeroing is for currently only for data extents, not metadata */
+ 	ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
+@@ -4840,13 +4860,9 @@ xfs_bmapi_write(
+ 	if (bma.cur) {
+ 		if (!error) {
+ 			ASSERT(*firstblock == NULLFSBLOCK ||
+-			       XFS_FSB_TO_AGNO(mp, *firstblock) ==
++			       XFS_FSB_TO_AGNO(mp, *firstblock) <=
+ 			       XFS_FSB_TO_AGNO(mp,
+-				       bma.cur->bc_private.b.firstblock) ||
+-			       (dfops->dop_low &&
+-				XFS_FSB_TO_AGNO(mp, *firstblock) <
+-				XFS_FSB_TO_AGNO(mp,
+-					bma.cur->bc_private.b.firstblock)));
++				       bma.cur->bc_private.b.firstblock));
+ 			*firstblock = bma.cur->bc_private.b.firstblock;
+ 		}
+ 		xfs_btree_del_cursor(bma.cur,
+@@ -4881,34 +4897,59 @@ xfs_bmap_split_indlen(
+ 	xfs_filblks_t			len2 = *indlen2;
+ 	xfs_filblks_t			nres = len1 + len2; /* new total res. */
+ 	xfs_filblks_t			stolen = 0;
++	xfs_filblks_t			resfactor;
+ 
+ 	/*
+ 	 * Steal as many blocks as we can to try and satisfy the worst case
+ 	 * indlen for both new extents.
+ 	 */
+-	while (nres > ores && avail) {
+-		nres--;
+-		avail--;
+-		stolen++;
+-	}
++	if (ores < nres && avail)
++		stolen = XFS_FILBLKS_MIN(nres - ores, avail);
++	ores += stolen;
++
++	 /* nothing else to do if we've satisfied the new reservation */
++	if (ores >= nres)
++		return stolen;
++
++	/*
++	 * We can't meet the total required reservation for the two extents.
++	 * Calculate the percent of the overall shortage between both extents
++	 * and apply this percentage to each of the requested indlen values.
++	 * This distributes the shortage fairly and reduces the chances that one
++	 * of the two extents is left with nothing when extents are repeatedly
++	 * split.
++	 */
++	resfactor = (ores * 100);
++	do_div(resfactor, nres);
++	len1 *= resfactor;
++	do_div(len1, 100);
++	len2 *= resfactor;
++	do_div(len2, 100);
++	ASSERT(len1 + len2 <= ores);
++	ASSERT(len1 < *indlen1 && len2 < *indlen2);
+ 
+ 	/*
+-	 * The only blocks available are those reserved for the original
+-	 * extent and what we can steal from the extent being removed.
+-	 * If this still isn't enough to satisfy the combined
+-	 * requirements for the two new extents, skim blocks off of each
+-	 * of the new reservations until they match what is available.
++	 * Hand out the remainder to each extent. If one of the two reservations
++	 * is zero, we want to make sure that one gets a block first. The loop
++	 * below starts with len1, so hand len2 a block right off the bat if it
++	 * is zero.
+ 	 */
+-	while (nres > ores) {
+-		if (len1) {
+-			len1--;
+-			nres--;
++	ores -= (len1 + len2);
++	ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
++	if (ores && !len2 && *indlen2) {
++		len2++;
++		ores--;
++	}
++	while (ores) {
++		if (len1 < *indlen1) {
++			len1++;
++			ores--;
+ 		}
+-		if (nres == ores)
++		if (!ores)
+ 			break;
+-		if (len2) {
+-			len2--;
+-			nres--;
++		if (len2 < *indlen2) {
++			len2++;
++			ores--;
+ 		}
+ 	}
+ 
+@@ -5656,8 +5697,8 @@ __xfs_bunmapi(
+ 			}
+ 			del.br_state = XFS_EXT_UNWRITTEN;
+ 			error = xfs_bmap_add_extent_unwritten_real(tp, ip,
+-					&lastx, &cur, &del, firstblock, dfops,
+-					&logflags);
++					whichfork, &lastx, &cur, &del,
++					firstblock, dfops, &logflags);
+ 			if (error)
+ 				goto error0;
+ 			goto nodelete;
+@@ -5714,8 +5755,9 @@ __xfs_bunmapi(
+ 				prev.br_state = XFS_EXT_UNWRITTEN;
+ 				lastx--;
+ 				error = xfs_bmap_add_extent_unwritten_real(tp,
+-						ip, &lastx, &cur, &prev,
+-						firstblock, dfops, &logflags);
++						ip, whichfork, &lastx, &cur,
++						&prev, firstblock, dfops,
++						&logflags);
+ 				if (error)
+ 					goto error0;
+ 				goto nodelete;
+@@ -5723,8 +5765,9 @@ __xfs_bunmapi(
+ 				ASSERT(del.br_state == XFS_EXT_NORM);
+ 				del.br_state = XFS_EXT_UNWRITTEN;
+ 				error = xfs_bmap_add_extent_unwritten_real(tp,
+-						ip, &lastx, &cur, &del,
+-						firstblock, dfops, &logflags);
++						ip, whichfork, &lastx, &cur,
++						&del, firstblock, dfops,
++						&logflags);
+ 				if (error)
+ 					goto error0;
+ 				goto nodelete;
+diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
+index f76c169..5c39186 100644
+--- a/fs/xfs/libxfs/xfs_bmap_btree.c
++++ b/fs/xfs/libxfs/xfs_bmap_btree.c
+@@ -453,8 +453,8 @@ xfs_bmbt_alloc_block(
+ 
+ 	if (args.fsbno == NULLFSBLOCK) {
+ 		args.fsbno = be64_to_cpu(start->l);
+-try_another_ag:
+ 		args.type = XFS_ALLOCTYPE_START_BNO;
++try_another_ag:
+ 		/*
+ 		 * Make sure there is sufficient room left in the AG to
+ 		 * complete a full tree split for an extent insert.  If
+@@ -494,8 +494,8 @@ xfs_bmbt_alloc_block(
+ 	if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
+ 	    args.fsbno == NULLFSBLOCK &&
+ 	    args.type == XFS_ALLOCTYPE_NEAR_BNO) {
+-		cur->bc_private.b.dfops->dop_low = true;
+ 		args.fsbno = cur->bc_private.b.firstblock;
++		args.type = XFS_ALLOCTYPE_FIRST_AG;
+ 		goto try_another_ag;
+ 	}
+ 
+@@ -512,7 +512,7 @@ xfs_bmbt_alloc_block(
+ 			goto error0;
+ 		cur->bc_private.b.dfops->dop_low = true;
+ 	}
+-	if (args.fsbno == NULLFSBLOCK) {
++	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
+ 		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ 		*stat = 0;
+ 		return 0;
+diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
+index 21e6a6a..2849d3f 100644
+--- a/fs/xfs/libxfs/xfs_btree.c
++++ b/fs/xfs/libxfs/xfs_btree.c
+@@ -810,7 +810,8 @@ xfs_btree_read_bufl(
+ 	xfs_daddr_t		d;		/* real disk block address */
+ 	int			error;
+ 
+-	ASSERT(fsbno != NULLFSBLOCK);
++	if (!XFS_FSB_SANITY_CHECK(mp, fsbno))
++		return -EFSCORRUPTED;
+ 	d = XFS_FSB_TO_DADDR(mp, fsbno);
+ 	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
+ 				   mp->m_bsize, lock, &bp, ops);
+diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
+index c2b01d1..3b0fc1a 100644
+--- a/fs/xfs/libxfs/xfs_btree.h
++++ b/fs/xfs/libxfs/xfs_btree.h
+@@ -491,7 +491,7 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
+ #define	XFS_FILBLKS_MAX(a,b)	max_t(xfs_filblks_t, (a), (b))
+ 
+ #define	XFS_FSB_SANITY_CHECK(mp,fsb)	\
+-	(XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
++	(fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
+ 		XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks)
+ 
+ /*
+diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
+index f2dc1a9..1bdf288 100644
+--- a/fs/xfs/libxfs/xfs_da_btree.c
++++ b/fs/xfs/libxfs/xfs_da_btree.c
+@@ -2633,7 +2633,7 @@ xfs_da_read_buf(
+ /*
+  * Readahead the dir/attr block.
+  */
+-xfs_daddr_t
++int
+ xfs_da_reada_buf(
+ 	struct xfs_inode	*dp,
+ 	xfs_dablk_t		bno,
+@@ -2664,7 +2664,5 @@ xfs_da_reada_buf(
+ 	if (mapp != &map)
+ 		kmem_free(mapp);
+ 
+-	if (error)
+-		return -1;
+-	return mappedbno;
++	return error;
+ }
+diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
+index 98c75cb..4e29cb6 100644
+--- a/fs/xfs/libxfs/xfs_da_btree.h
++++ b/fs/xfs/libxfs/xfs_da_btree.h
+@@ -201,7 +201,7 @@ int	xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp,
+ 			       xfs_dablk_t bno, xfs_daddr_t mappedbno,
+ 			       struct xfs_buf **bpp, int whichfork,
+ 			       const struct xfs_buf_ops *ops);
+-xfs_daddr_t	xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
++int	xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
+ 				xfs_daddr_t mapped_bno, int whichfork,
+ 				const struct xfs_buf_ops *ops);
+ int	xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
+diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
+index 75a5574..bbd1238 100644
+--- a/fs/xfs/libxfs/xfs_dir2_node.c
++++ b/fs/xfs/libxfs/xfs_dir2_node.c
+@@ -155,6 +155,42 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
+ 	.verify_write = xfs_dir3_free_write_verify,
+ };
+ 
++/* Everything ok in the free block header? */
++static bool
++xfs_dir3_free_header_check(
++	struct xfs_inode	*dp,
++	xfs_dablk_t		fbno,
++	struct xfs_buf		*bp)
++{
++	struct xfs_mount	*mp = dp->i_mount;
++	unsigned int		firstdb;
++	int			maxbests;
++
++	maxbests = dp->d_ops->free_max_bests(mp->m_dir_geo);
++	firstdb = (xfs_dir2_da_to_db(mp->m_dir_geo, fbno) -
++		   xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) *
++			maxbests;
++	if (xfs_sb_version_hascrc(&mp->m_sb)) {
++		struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
++
++		if (be32_to_cpu(hdr3->firstdb) != firstdb)
++			return false;
++		if (be32_to_cpu(hdr3->nvalid) > maxbests)
++			return false;
++		if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused))
++			return false;
++	} else {
++		struct xfs_dir2_free_hdr *hdr = bp->b_addr;
++
++		if (be32_to_cpu(hdr->firstdb) != firstdb)
++			return false;
++		if (be32_to_cpu(hdr->nvalid) > maxbests)
++			return false;
++		if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused))
++			return false;
++	}
++	return true;
++}
+ 
+ static int
+ __xfs_dir3_free_read(
+@@ -168,11 +204,22 @@ __xfs_dir3_free_read(
+ 
+ 	err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
+ 				XFS_DATA_FORK, &xfs_dir3_free_buf_ops);
++	if (err || !*bpp)
++		return err;
++
++	/* Check things that we can't do in the verifier. */
++	if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) {
++		xfs_buf_ioerror(*bpp, -EFSCORRUPTED);
++		xfs_verifier_error(*bpp);
++		xfs_trans_brelse(tp, *bpp);
++		return -EFSCORRUPTED;
++	}
+ 
+ 	/* try read returns without an error or *bpp if it lands in a hole */
+-	if (!err && tp && *bpp)
++	if (tp)
+ 		xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF);
+-	return err;
++
++	return 0;
+ }
+ 
+ int
+diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
+index d45c037..a2818f6 100644
+--- a/fs/xfs/libxfs/xfs_ialloc.c
++++ b/fs/xfs/libxfs/xfs_ialloc.c
+@@ -51,8 +51,7 @@ xfs_ialloc_cluster_alignment(
+ 	struct xfs_mount	*mp)
+ {
+ 	if (xfs_sb_version_hasalign(&mp->m_sb) &&
+-	    mp->m_sb.sb_inoalignmt >=
+-			XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
++	    mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
+ 		return mp->m_sb.sb_inoalignmt;
+ 	return 1;
+ }
+diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
+index 6c6b959..b9c351f 100644
+--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
++++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
+@@ -82,11 +82,12 @@ xfs_finobt_set_root(
+ }
+ 
+ STATIC int
+-xfs_inobt_alloc_block(
++__xfs_inobt_alloc_block(
+ 	struct xfs_btree_cur	*cur,
+ 	union xfs_btree_ptr	*start,
+ 	union xfs_btree_ptr	*new,
+-	int			*stat)
++	int			*stat,
++	enum xfs_ag_resv_type	resv)
+ {
+ 	xfs_alloc_arg_t		args;		/* block allocation args */
+ 	int			error;		/* error return value */
+@@ -103,6 +104,7 @@ xfs_inobt_alloc_block(
+ 	args.maxlen = 1;
+ 	args.prod = 1;
+ 	args.type = XFS_ALLOCTYPE_NEAR_BNO;
++	args.resv = resv;
+ 
+ 	error = xfs_alloc_vextent(&args);
+ 	if (error) {
+@@ -123,6 +125,27 @@ xfs_inobt_alloc_block(
+ }
+ 
+ STATIC int
++xfs_inobt_alloc_block(
++	struct xfs_btree_cur	*cur,
++	union xfs_btree_ptr	*start,
++	union xfs_btree_ptr	*new,
++	int			*stat)
++{
++	return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_NONE);
++}
++
++STATIC int
++xfs_finobt_alloc_block(
++	struct xfs_btree_cur	*cur,
++	union xfs_btree_ptr	*start,
++	union xfs_btree_ptr	*new,
++	int			*stat)
++{
++	return __xfs_inobt_alloc_block(cur, start, new, stat,
++			XFS_AG_RESV_METADATA);
++}
++
++STATIC int
+ xfs_inobt_free_block(
+ 	struct xfs_btree_cur	*cur,
+ 	struct xfs_buf		*bp)
+@@ -328,7 +351,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
+ 
+ 	.dup_cursor		= xfs_inobt_dup_cursor,
+ 	.set_root		= xfs_finobt_set_root,
+-	.alloc_block		= xfs_inobt_alloc_block,
++	.alloc_block		= xfs_finobt_alloc_block,
+ 	.free_block		= xfs_inobt_free_block,
+ 	.get_minrecs		= xfs_inobt_get_minrecs,
+ 	.get_maxrecs		= xfs_inobt_get_maxrecs,
+@@ -478,3 +501,64 @@ xfs_inobt_rec_check_count(
+ 	return 0;
+ }
+ #endif	/* DEBUG */
++
++static xfs_extlen_t
++xfs_inobt_max_size(
++	struct xfs_mount	*mp)
++{
++	/* Bail out if we're uninitialized, which can happen in mkfs. */
++	if (mp->m_inobt_mxr[0] == 0)
++		return 0;
++
++	return xfs_btree_calc_size(mp, mp->m_inobt_mnr,
++		(uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock /
++				XFS_INODES_PER_CHUNK);
++}
++
++static int
++xfs_inobt_count_blocks(
++	struct xfs_mount	*mp,
++	xfs_agnumber_t		agno,
++	xfs_btnum_t		btnum,
++	xfs_extlen_t		*tree_blocks)
++{
++	struct xfs_buf		*agbp;
++	struct xfs_btree_cur	*cur;
++	int			error;
++
++	error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
++	if (error)
++		return error;
++
++	cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, btnum);
++	error = xfs_btree_count_blocks(cur, tree_blocks);
++	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
++	xfs_buf_relse(agbp);
++
++	return error;
++}
++
++/*
++ * Figure out how many blocks to reserve and how many are used by this btree.
++ */
++int
++xfs_finobt_calc_reserves(
++	struct xfs_mount	*mp,
++	xfs_agnumber_t		agno,
++	xfs_extlen_t		*ask,
++	xfs_extlen_t		*used)
++{
++	xfs_extlen_t		tree_len = 0;
++	int			error;
++
++	if (!xfs_sb_version_hasfinobt(&mp->m_sb))
++		return 0;
++
++	error = xfs_inobt_count_blocks(mp, agno, XFS_BTNUM_FINO, &tree_len);
++	if (error)
++		return error;
++
++	*ask += xfs_inobt_max_size(mp);
++	*used += tree_len;
++	return 0;
++}
+diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
+index bd88453..aa81e2e 100644
+--- a/fs/xfs/libxfs/xfs_ialloc_btree.h
++++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
+@@ -72,4 +72,7 @@ int xfs_inobt_rec_check_count(struct xfs_mount *,
+ #define xfs_inobt_rec_check_count(mp, rec)	0
+ #endif	/* DEBUG */
+ 
++int xfs_finobt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno,
++		xfs_extlen_t *ask, xfs_extlen_t *used);
++
+ #endif	/* __XFS_IALLOC_BTREE_H__ */
+diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
+index 222e103..25c1e07 100644
+--- a/fs/xfs/libxfs/xfs_inode_fork.c
++++ b/fs/xfs/libxfs/xfs_inode_fork.c
+@@ -26,6 +26,7 @@
+ #include "xfs_inode.h"
+ #include "xfs_trans.h"
+ #include "xfs_inode_item.h"
++#include "xfs_btree.h"
+ #include "xfs_bmap_btree.h"
+ #include "xfs_bmap.h"
+ #include "xfs_error.h"
+@@ -429,11 +430,13 @@ xfs_iformat_btree(
+ 	/* REFERENCED */
+ 	int			nrecs;
+ 	int			size;
++	int			level;
+ 
+ 	ifp = XFS_IFORK_PTR(ip, whichfork);
+ 	dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
+ 	size = XFS_BMAP_BROOT_SPACE(mp, dfp);
+ 	nrecs = be16_to_cpu(dfp->bb_numrecs);
++	level = be16_to_cpu(dfp->bb_level);
+ 
+ 	/*
+ 	 * blow out if -- fork has less extents than can fit in
+@@ -446,7 +449,8 @@ xfs_iformat_btree(
+ 					XFS_IFORK_MAXEXT(ip, whichfork) ||
+ 		     XFS_BMDR_SPACE_CALC(nrecs) >
+ 					XFS_DFORK_SIZE(dip, mp, whichfork) ||
+-		     XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
++		     XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) ||
++		     level == 0 || level > XFS_BTREE_MAXLEVELS) {
+ 		xfs_warn(mp, "corrupt inode %Lu (btree).",
+ 					(unsigned long long) ip->i_ino);
+ 		XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
+@@ -497,15 +501,14 @@ xfs_iread_extents(
+ 	 * We know that the size is valid (it's checked in iformat_btree)
+ 	 */
+ 	ifp->if_bytes = ifp->if_real_bytes = 0;
+-	ifp->if_flags |= XFS_IFEXTENTS;
+ 	xfs_iext_add(ifp, 0, nextents);
+ 	error = xfs_bmap_read_extents(tp, ip, whichfork);
+ 	if (error) {
+ 		xfs_iext_destroy(ifp);
+-		ifp->if_flags &= ~XFS_IFEXTENTS;
+ 		return error;
+ 	}
+ 	xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
++	ifp->if_flags |= XFS_IFEXTENTS;
+ 	return 0;
+ }
+ /*
+diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
+index 06763f5..0457abe 100644
+--- a/fs/xfs/xfs_aops.c
++++ b/fs/xfs/xfs_aops.c
+@@ -279,54 +279,49 @@ xfs_end_io(
+ 	struct xfs_ioend	*ioend =
+ 		container_of(work, struct xfs_ioend, io_work);
+ 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
++	xfs_off_t		offset = ioend->io_offset;
++	size_t			size = ioend->io_size;
+ 	int			error = ioend->io_bio->bi_error;
+ 
+ 	/*
+-	 * Set an error if the mount has shut down and proceed with end I/O
+-	 * processing so it can perform whatever cleanups are necessary.
++	 * Just clean up the in-memory strutures if the fs has been shut down.
+ 	 */
+-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
++	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+ 		error = -EIO;
++		goto done;
++	}
+ 
+ 	/*
+-	 * For a CoW extent, we need to move the mapping from the CoW fork
+-	 * to the data fork.  If instead an error happened, just dump the
+-	 * new blocks.
++	 * Clean up any COW blocks on an I/O error.
+ 	 */
+-	if (ioend->io_type == XFS_IO_COW) {
+-		if (error)
+-			goto done;
+-		if (ioend->io_bio->bi_error) {
+-			error = xfs_reflink_cancel_cow_range(ip,
+-					ioend->io_offset, ioend->io_size);
+-			goto done;
++	if (unlikely(error)) {
++		switch (ioend->io_type) {
++		case XFS_IO_COW:
++			xfs_reflink_cancel_cow_range(ip, offset, size, true);
++			break;
+ 		}
+-		error = xfs_reflink_end_cow(ip, ioend->io_offset,
+-				ioend->io_size);
+-		if (error)
+-			goto done;
++
++		goto done;
+ 	}
+ 
+ 	/*
+-	 * For unwritten extents we need to issue transactions to convert a
+-	 * range to normal written extens after the data I/O has finished.
+-	 * Detecting and handling completion IO errors is done individually
+-	 * for each case as different cleanup operations need to be performed
+-	 * on error.
++	 * Success:  commit the COW or unwritten blocks if needed.
+ 	 */
+-	if (ioend->io_type == XFS_IO_UNWRITTEN) {
+-		if (error)
+-			goto done;
+-		error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
+-						  ioend->io_size);
+-	} else if (ioend->io_append_trans) {
+-		error = xfs_setfilesize_ioend(ioend, error);
+-	} else {
+-		ASSERT(!xfs_ioend_is_append(ioend) ||
+-		       ioend->io_type == XFS_IO_COW);
++	switch (ioend->io_type) {
++	case XFS_IO_COW:
++		error = xfs_reflink_end_cow(ip, offset, size);
++		break;
++	case XFS_IO_UNWRITTEN:
++		error = xfs_iomap_write_unwritten(ip, offset, size);
++		break;
++	default:
++		ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
++		break;
+ 	}
+ 
+ done:
++	if (ioend->io_append_trans)
++		error = xfs_setfilesize_ioend(ioend, error);
+ 	xfs_destroy_ioend(ioend, error);
+ }
+ 
+@@ -486,6 +481,12 @@ xfs_submit_ioend(
+ 	struct xfs_ioend	*ioend,
+ 	int			status)
+ {
++	/* Convert CoW extents to regular */
++	if (!status && ioend->io_type == XFS_IO_COW) {
++		status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
++				ioend->io_offset, ioend->io_size);
++	}
++
+ 	/* Reserve log space if we might write beyond the on-disk inode size. */
+ 	if (!status &&
+ 	    ioend->io_type != XFS_IO_UNWRITTEN &&
+@@ -1257,44 +1258,6 @@ xfs_map_trim_size(
+ 	bh_result->b_size = mapping_size;
+ }
+ 
+-/* Bounce unaligned directio writes to the page cache. */
+-static int
+-xfs_bounce_unaligned_dio_write(
+-	struct xfs_inode	*ip,
+-	xfs_fileoff_t		offset_fsb,
+-	struct xfs_bmbt_irec	*imap)
+-{
+-	struct xfs_bmbt_irec	irec;
+-	xfs_fileoff_t		delta;
+-	bool			shared;
+-	bool			x;
+-	int			error;
+-
+-	irec = *imap;
+-	if (offset_fsb > irec.br_startoff) {
+-		delta = offset_fsb - irec.br_startoff;
+-		irec.br_blockcount -= delta;
+-		irec.br_startblock += delta;
+-		irec.br_startoff = offset_fsb;
+-	}
+-	error = xfs_reflink_trim_around_shared(ip, &irec, &shared, &x);
+-	if (error)
+-		return error;
+-
+-	/*
+-	 * We're here because we're trying to do a directio write to a
+-	 * region that isn't aligned to a filesystem block.  If any part
+-	 * of the extent is shared, fall back to buffered mode to handle
+-	 * the RMW.  This is done by returning -EREMCHG ("remote addr
+-	 * changed"), which is caught further up the call stack.
+-	 */
+-	if (shared) {
+-		trace_xfs_reflink_bounce_dio_write(ip, imap);
+-		return -EREMCHG;
+-	}
+-	return 0;
+-}
+-
+ STATIC int
+ __xfs_get_blocks(
+ 	struct inode		*inode,
+@@ -1432,13 +1395,6 @@ __xfs_get_blocks(
+ 	if (imap.br_startblock != HOLESTARTBLOCK &&
+ 	    imap.br_startblock != DELAYSTARTBLOCK &&
+ 	    (create || !ISUNWRITTEN(&imap))) {
+-		if (create && direct && !is_cow) {
+-			error = xfs_bounce_unaligned_dio_write(ip, offset_fsb,
+-					&imap);
+-			if (error)
+-				return error;
+-		}
+-
+ 		xfs_map_buffer(inode, bh_result, &imap, offset);
+ 		if (ISUNWRITTEN(&imap))
+ 			set_buffer_unwritten(bh_result);
+diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
+index efb8ccd..5c395e4 100644
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -917,17 +917,18 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
+  */
+ int
+ xfs_free_eofblocks(
+-	xfs_mount_t	*mp,
+-	xfs_inode_t	*ip,
+-	bool		need_iolock)
++	struct xfs_inode	*ip)
+ {
+-	xfs_trans_t	*tp;
+-	int		error;
+-	xfs_fileoff_t	end_fsb;
+-	xfs_fileoff_t	last_fsb;
+-	xfs_filblks_t	map_len;
+-	int		nimaps;
+-	xfs_bmbt_irec_t	imap;
++	struct xfs_trans	*tp;
++	int			error;
++	xfs_fileoff_t		end_fsb;
++	xfs_fileoff_t		last_fsb;
++	xfs_filblks_t		map_len;
++	int			nimaps;
++	struct xfs_bmbt_irec	imap;
++	struct xfs_mount	*mp = ip->i_mount;
++
++	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+ 
+ 	/*
+ 	 * Figure out if there are any blocks beyond the end
+@@ -944,6 +945,10 @@ xfs_free_eofblocks(
+ 	error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
+ 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ 
++	/*
++	 * If there are blocks after the end of file, truncate the file to its
++	 * current size to free them up.
++	 */
+ 	if (!error && (nimaps != 0) &&
+ 	    (imap.br_startblock != HOLESTARTBLOCK ||
+ 	     ip->i_delayed_blks)) {
+@@ -954,22 +959,13 @@ xfs_free_eofblocks(
+ 		if (error)
+ 			return error;
+ 
+-		/*
+-		 * There are blocks after the end of file.
+-		 * Free them up now by truncating the file to
+-		 * its current size.
+-		 */
+-		if (need_iolock) {
+-			if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
+-				return -EAGAIN;
+-		}
++		/* wait on dio to ensure i_size has settled */
++		inode_dio_wait(VFS_I(ip));
+ 
+ 		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0,
+ 				&tp);
+ 		if (error) {
+ 			ASSERT(XFS_FORCED_SHUTDOWN(mp));
+-			if (need_iolock)
+-				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ 			return error;
+ 		}
+ 
+@@ -997,8 +993,6 @@ xfs_free_eofblocks(
+ 		}
+ 
+ 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+-		if (need_iolock)
+-			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ 	}
+ 	return error;
+ }
+@@ -1393,10 +1387,16 @@ xfs_shift_file_space(
+ 	xfs_fileoff_t		stop_fsb;
+ 	xfs_fileoff_t		next_fsb;
+ 	xfs_fileoff_t		shift_fsb;
++	uint			resblks;
+ 
+ 	ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
+ 
+ 	if (direction == SHIFT_LEFT) {
++		/*
++		 * Reserve blocks to cover potential extent merges after left
++		 * shift operations.
++		 */
++		resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+ 		next_fsb = XFS_B_TO_FSB(mp, offset + len);
+ 		stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
+ 	} else {
+@@ -1404,6 +1404,7 @@ xfs_shift_file_space(
+ 		 * If right shift, delegate the work of initialization of
+ 		 * next_fsb to xfs_bmap_shift_extent as it has ilock held.
+ 		 */
++		resblks = 0;
+ 		next_fsb = NULLFSBLOCK;
+ 		stop_fsb = XFS_B_TO_FSB(mp, offset);
+ 	}
+@@ -1415,7 +1416,7 @@ xfs_shift_file_space(
+ 	 * into the accessible region of the file.
+ 	 */
+ 	if (xfs_can_free_eofblocks(ip, true)) {
+-		error = xfs_free_eofblocks(mp, ip, false);
++		error = xfs_free_eofblocks(ip);
+ 		if (error)
+ 			return error;
+ 	}
+@@ -1445,21 +1446,14 @@ xfs_shift_file_space(
+ 	}
+ 
+ 	while (!error && !done) {
+-		/*
+-		 * We would need to reserve permanent block for transaction.
+-		 * This will come into picture when after shifting extent into
+-		 * hole we found that adjacent extents can be merged which
+-		 * may lead to freeing of a block during record update.
+-		 */
+-		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
+-				XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
++		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
++					&tp);
+ 		if (error)
+ 			break;
+ 
+ 		xfs_ilock(ip, XFS_ILOCK_EXCL);
+ 		error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
+-				ip->i_gdquot, ip->i_pdquot,
+-				XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
++				ip->i_gdquot, ip->i_pdquot, resblks, 0,
+ 				XFS_QMOPT_RES_REGBLKS);
+ 		if (error)
+ 			goto out_trans_cancel;
+diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
+index 68a621a..f100539 100644
+--- a/fs/xfs/xfs_bmap_util.h
++++ b/fs/xfs/xfs_bmap_util.h
+@@ -63,8 +63,7 @@ int	xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
+ 
+ /* EOF block manipulation functions */
+ bool	xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
+-int	xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
+-			   bool need_iolock);
++int	xfs_free_eofblocks(struct xfs_inode *ip);
+ 
+ int	xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
+ 			 struct xfs_swapext *sx);
+diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
+index 2975cb2..0306168 100644
+--- a/fs/xfs/xfs_buf_item.c
++++ b/fs/xfs/xfs_buf_item.c
+@@ -1162,6 +1162,7 @@ xfs_buf_iodone_callbacks(
+ 	 */
+ 	bp->b_last_error = 0;
+ 	bp->b_retries = 0;
++	bp->b_first_retry_time = 0;
+ 
+ 	xfs_buf_do_callbacks(bp);
+ 	bp->b_fspriv = NULL;
+diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
+index 162dc18..29c2f99 100644
+--- a/fs/xfs/xfs_extent_busy.c
++++ b/fs/xfs/xfs_extent_busy.c
+@@ -45,18 +45,7 @@ xfs_extent_busy_insert(
+ 	struct rb_node		**rbp;
+ 	struct rb_node		*parent = NULL;
+ 
+-	new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL);
+-	if (!new) {
+-		/*
+-		 * No Memory!  Since it is now not possible to track the free
+-		 * block, make this a synchronous transaction to insure that
+-		 * the block is not reused before this transaction commits.
+-		 */
+-		trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len);
+-		xfs_trans_set_sync(tp);
+-		return;
+-	}
+-
++	new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_SLEEP);
+ 	new->agno = agno;
+ 	new->bno = bno;
+ 	new->length = len;
+diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
+index 9a5d64b..1209ad2 100644
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -554,6 +554,15 @@ xfs_file_dio_aio_write(
+ 	if ((iocb->ki_pos & mp->m_blockmask) ||
+ 	    ((iocb->ki_pos + count) & mp->m_blockmask)) {
+ 		unaligned_io = 1;
++
++		/*
++		 * We can't properly handle unaligned direct I/O to reflink
++		 * files yet, as we can't unshare a partial block.
++		 */
++		if (xfs_is_reflink_inode(ip)) {
++			trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count);
++			return -EREMCHG;
++		}
+ 		iolock = XFS_IOLOCK_EXCL;
+ 	} else {
+ 		iolock = XFS_IOLOCK_SHARED;
+@@ -675,8 +684,10 @@ xfs_file_buffered_aio_write(
+ 	struct xfs_inode	*ip = XFS_I(inode);
+ 	ssize_t			ret;
+ 	int			enospc = 0;
+-	int			iolock = XFS_IOLOCK_EXCL;
++	int			iolock;
+ 
++write_retry:
++	iolock = XFS_IOLOCK_EXCL;
+ 	xfs_rw_ilock(ip, iolock);
+ 
+ 	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
+@@ -686,7 +697,6 @@ xfs_file_buffered_aio_write(
+ 	/* We can write back this queue in page reclaim */
+ 	current->backing_dev_info = inode_to_bdi(inode);
+ 
+-write_retry:
+ 	trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos);
+ 	ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
+ 	if (likely(ret >= 0))
+@@ -702,18 +712,21 @@ xfs_file_buffered_aio_write(
+ 	 * running at the same time.
+ 	 */
+ 	if (ret == -EDQUOT && !enospc) {
++		xfs_rw_iunlock(ip, iolock);
+ 		enospc = xfs_inode_free_quota_eofblocks(ip);
+ 		if (enospc)
+ 			goto write_retry;
+ 		enospc = xfs_inode_free_quota_cowblocks(ip);
+ 		if (enospc)
+ 			goto write_retry;
++		iolock = 0;
+ 	} else if (ret == -ENOSPC && !enospc) {
+ 		struct xfs_eofblocks eofb = {0};
+ 
+ 		enospc = 1;
+ 		xfs_flush_inodes(ip->i_mount);
+-		eofb.eof_scan_owner = ip->i_ino; /* for locking */
++
++		xfs_rw_iunlock(ip, iolock);
+ 		eofb.eof_flags = XFS_EOF_FLAGS_SYNC;
+ 		xfs_icache_free_eofblocks(ip->i_mount, &eofb);
+ 		goto write_retry;
+@@ -721,7 +734,8 @@ xfs_file_buffered_aio_write(
+ 
+ 	current->backing_dev_info = NULL;
+ out:
+-	xfs_rw_iunlock(ip, iolock);
++	if (iolock)
++		xfs_rw_iunlock(ip, iolock);
+ 	return ret;
+ }
+ 
+@@ -987,9 +1001,9 @@ xfs_dir_open(
+ 	 */
+ 	mode = xfs_ilock_data_map_shared(ip);
+ 	if (ip->i_d.di_nextents > 0)
+-		xfs_dir3_data_readahead(ip, 0, -1);
++		error = xfs_dir3_data_readahead(ip, 0, -1);
+ 	xfs_iunlock(ip, mode);
+-	return 0;
++	return error;
+ }
+ 
+ STATIC int
+diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
+index 29cc988..3fb1f3f 100644
+--- a/fs/xfs/xfs_icache.c
++++ b/fs/xfs/xfs_icache.c
+@@ -1324,13 +1324,10 @@ xfs_inode_free_eofblocks(
+ 	int			flags,
+ 	void			*args)
+ {
+-	int ret;
++	int ret = 0;
+ 	struct xfs_eofblocks *eofb = args;
+-	bool need_iolock = true;
+ 	int match;
+ 
+-	ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
+-
+ 	if (!xfs_can_free_eofblocks(ip, false)) {
+ 		/* inode could be preallocated or append-only */
+ 		trace_xfs_inode_free_eofblocks_invalid(ip);
+@@ -1358,21 +1355,19 @@ xfs_inode_free_eofblocks(
+ 		if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
+ 		    XFS_ISIZE(ip) < eofb->eof_min_file_size)
+ 			return 0;
+-
+-		/*
+-		 * A scan owner implies we already hold the iolock. Skip it in
+-		 * xfs_free_eofblocks() to avoid deadlock. This also eliminates
+-		 * the possibility of EAGAIN being returned.
+-		 */
+-		if (eofb->eof_scan_owner == ip->i_ino)
+-			need_iolock = false;
+ 	}
+ 
+-	ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock);
+-
+-	/* don't revisit the inode if we're not waiting */
+-	if (ret == -EAGAIN && !(flags & SYNC_WAIT))
+-		ret = 0;
++	/*
++	 * If the caller is waiting, return -EAGAIN to keep the background
++	 * scanner moving and revisit the inode in a subsequent pass.
++	 */
++	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
++		if (flags & SYNC_WAIT)
++			ret = -EAGAIN;
++		return ret;
++	}
++	ret = xfs_free_eofblocks(ip);
++	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ 
+ 	return ret;
+ }
+@@ -1419,15 +1414,10 @@ __xfs_inode_free_quota_eofblocks(
+ 	struct xfs_eofblocks eofb = {0};
+ 	struct xfs_dquot *dq;
+ 
+-	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+-
+ 	/*
+-	 * Set the scan owner to avoid a potential livelock. Otherwise, the scan
+-	 * can repeatedly trylock on the inode we're currently processing. We
+-	 * run a sync scan to increase effectiveness and use the union filter to
++	 * Run a sync scan to increase effectiveness and use the union filter to
+ 	 * cover all applicable quotas in a single scan.
+ 	 */
+-	eofb.eof_scan_owner = ip->i_ino;
+ 	eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC;
+ 
+ 	if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) {
+@@ -1579,12 +1569,9 @@ xfs_inode_free_cowblocks(
+ {
+ 	int ret;
+ 	struct xfs_eofblocks *eofb = args;
+-	bool need_iolock = true;
+ 	int match;
+ 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ 
+-	ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
+-
+ 	/*
+ 	 * Just clear the tag if we have an empty cow fork or none at all. It's
+ 	 * possible the inode was fully unshared since it was originally tagged.
+@@ -1617,28 +1604,16 @@ xfs_inode_free_cowblocks(
+ 		if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
+ 		    XFS_ISIZE(ip) < eofb->eof_min_file_size)
+ 			return 0;
+-
+-		/*
+-		 * A scan owner implies we already hold the iolock. Skip it in
+-		 * xfs_free_eofblocks() to avoid deadlock. This also eliminates
+-		 * the possibility of EAGAIN being returned.
+-		 */
+-		if (eofb->eof_scan_owner == ip->i_ino)
+-			need_iolock = false;
+ 	}
+ 
+ 	/* Free the CoW blocks */
+-	if (need_iolock) {
+-		xfs_ilock(ip, XFS_IOLOCK_EXCL);
+-		xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+-	}
++	xfs_ilock(ip, XFS_IOLOCK_EXCL);
++	xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+ 
+-	ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
++	ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
+ 
+-	if (need_iolock) {
+-		xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
+-		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+-	}
++	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
++	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ 
+ 	return ret;
+ }
+diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
+index a1e02f4..8a7c849 100644
+--- a/fs/xfs/xfs_icache.h
++++ b/fs/xfs/xfs_icache.h
+@@ -27,7 +27,6 @@ struct xfs_eofblocks {
+ 	kgid_t		eof_gid;
+ 	prid_t		eof_prid;
+ 	__u64		eof_min_file_size;
+-	xfs_ino_t	eof_scan_owner;
+ };
+ 
+ #define SYNC_WAIT		0x0001	/* wait for i/o to complete */
+@@ -102,7 +101,6 @@ xfs_fs_eofblocks_from_user(
+ 	dst->eof_flags = src->eof_flags;
+ 	dst->eof_prid = src->eof_prid;
+ 	dst->eof_min_file_size = src->eof_min_file_size;
+-	dst->eof_scan_owner = NULLFSINO;
+ 
+ 	dst->eof_uid = INVALID_UID;
+ 	if (src->eof_flags & XFS_EOF_FLAGS_UID) {
+diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
+index 512ff13..e50636c 100644
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -1624,7 +1624,7 @@ xfs_itruncate_extents(
+ 
+ 	/* Remove all pending CoW reservations. */
+ 	error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block,
+-			last_block);
++			last_block, true);
+ 	if (error)
+ 		goto out;
+ 
+@@ -1701,32 +1701,34 @@ xfs_release(
+ 	if (xfs_can_free_eofblocks(ip, false)) {
+ 
+ 		/*
++		 * Check if the inode is being opened, written and closed
++		 * frequently and we have delayed allocation blocks outstanding
++		 * (e.g. streaming writes from the NFS server), truncating the
++		 * blocks past EOF will cause fragmentation to occur.
++		 *
++		 * In this case don't do the truncation, but we have to be
++		 * careful how we detect this case. Blocks beyond EOF show up as
++		 * i_delayed_blks even when the inode is clean, so we need to
++		 * truncate them away first before checking for a dirty release.
++		 * Hence on the first dirty close we will still remove the
++		 * speculative allocation, but after that we will leave it in
++		 * place.
++		 */
++		if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
++			return 0;
++		/*
+ 		 * If we can't get the iolock just skip truncating the blocks
+ 		 * past EOF because we could deadlock with the mmap_sem
+-		 * otherwise.  We'll get another chance to drop them once the
++		 * otherwise. We'll get another chance to drop them once the
+ 		 * last reference to the inode is dropped, so we'll never leak
+ 		 * blocks permanently.
+-		 *
+-		 * Further, check if the inode is being opened, written and
+-		 * closed frequently and we have delayed allocation blocks
+-		 * outstanding (e.g. streaming writes from the NFS server),
+-		 * truncating the blocks past EOF will cause fragmentation to
+-		 * occur.
+-		 *
+-		 * In this case don't do the truncation, either, but we have to
+-		 * be careful how we detect this case. Blocks beyond EOF show
+-		 * up as i_delayed_blks even when the inode is clean, so we
+-		 * need to truncate them away first before checking for a dirty
+-		 * release. Hence on the first dirty close we will still remove
+-		 * the speculative allocation, but after that we will leave it
+-		 * in place.
+ 		 */
+-		if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
+-			return 0;
+-
+-		error = xfs_free_eofblocks(mp, ip, true);
+-		if (error && error != -EAGAIN)
+-			return error;
++		if (xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
++			error = xfs_free_eofblocks(ip);
++			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
++			if (error)
++				return error;
++		}
+ 
+ 		/* delalloc blocks after truncation means it really is dirty */
+ 		if (ip->i_delayed_blks)
+@@ -1801,22 +1803,23 @@ xfs_inactive_ifree(
+ 	int			error;
+ 
+ 	/*
+-	 * The ifree transaction might need to allocate blocks for record
+-	 * insertion to the finobt. We don't want to fail here at ENOSPC, so
+-	 * allow ifree to dip into the reserved block pool if necessary.
+-	 *
+-	 * Freeing large sets of inodes generally means freeing inode chunks,
+-	 * directory and file data blocks, so this should be relatively safe.
+-	 * Only under severe circumstances should it be possible to free enough
+-	 * inodes to exhaust the reserve block pool via finobt expansion while
+-	 * at the same time not creating free space in the filesystem.
++	 * We try to use a per-AG reservation for any block needed by the finobt
++	 * tree, but as the finobt feature predates the per-AG reservation
++	 * support a degraded file system might not have enough space for the
++	 * reservation at mount time.  In that case try to dip into the reserved
++	 * pool and pray.
+ 	 *
+ 	 * Send a warning if the reservation does happen to fail, as the inode
+ 	 * now remains allocated and sits on the unlinked list until the fs is
+ 	 * repaired.
+ 	 */
+-	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree,
+-			XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);
++	if (unlikely(mp->m_inotbt_nores)) {
++		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree,
++				XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE,
++				&tp);
++	} else {
++		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 0, 0, 0, &tp);
++	}
+ 	if (error) {
+ 		if (error == -ENOSPC) {
+ 			xfs_warn_ratelimited(mp,
+@@ -1912,8 +1915,11 @@ xfs_inactive(
+ 		 * cache. Post-eof blocks must be freed, lest we end up with
+ 		 * broken free space accounting.
+ 		 */
+-		if (xfs_can_free_eofblocks(ip, true))
+-			xfs_free_eofblocks(mp, ip, false);
++		if (xfs_can_free_eofblocks(ip, true)) {
++			xfs_ilock(ip, XFS_IOLOCK_EXCL);
++			xfs_free_eofblocks(ip);
++			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
++		}
+ 
+ 		return;
+ 	}
+diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
+index e888961..3605624 100644
+--- a/fs/xfs/xfs_iomap.c
++++ b/fs/xfs/xfs_iomap.c
+@@ -637,6 +637,11 @@ xfs_file_iomap_begin_delay(
+ 		goto out_unlock;
+ 	}
+ 
++	/*
++	 * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
++	 * them out if the write happens to fail.
++	 */
++	iomap->flags = IOMAP_F_NEW;
+ 	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
+ done:
+ 	if (isnullstartblock(got.br_startblock))
+@@ -685,7 +690,7 @@ xfs_iomap_write_allocate(
+ 	int		nres;
+ 
+ 	if (whichfork == XFS_COW_FORK)
+-		flags |= XFS_BMAPI_COWFORK;
++		flags |= XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC;
+ 
+ 	/*
+ 	 * Make sure that the dquots are there.
+@@ -1061,7 +1066,8 @@ xfs_file_iomap_end_delalloc(
+ 	struct xfs_inode	*ip,
+ 	loff_t			offset,
+ 	loff_t			length,
+-	ssize_t			written)
++	ssize_t			written,
++	struct iomap		*iomap)
+ {
+ 	struct xfs_mount	*mp = ip->i_mount;
+ 	xfs_fileoff_t		start_fsb;
+@@ -1080,14 +1086,14 @@ xfs_file_iomap_end_delalloc(
+ 	end_fsb = XFS_B_TO_FSB(mp, offset + length);
+ 
+ 	/*
+-	 * Trim back delalloc blocks if we didn't manage to write the whole
+-	 * range reserved.
++	 * Trim delalloc blocks if they were allocated by this write and we
++	 * didn't manage to write the whole range.
+ 	 *
+ 	 * We don't need to care about racing delalloc as we hold i_mutex
+ 	 * across the reserve/allocate/unreserve calls. If there are delalloc
+ 	 * blocks in the range, they are ours.
+ 	 */
+-	if (start_fsb < end_fsb) {
++	if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) {
+ 		truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
+ 					 XFS_FSB_TO_B(mp, end_fsb) - 1);
+ 
+@@ -1117,7 +1123,7 @@ xfs_file_iomap_end(
+ {
+ 	if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
+ 		return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
+-				length, written);
++				length, written, iomap);
+ 	return 0;
+ }
+ 
+diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
+index b341f10..13796f2 100644
+--- a/fs/xfs/xfs_mount.c
++++ b/fs/xfs/xfs_mount.c
+@@ -502,8 +502,7 @@ STATIC void
+ xfs_set_inoalignment(xfs_mount_t *mp)
+ {
+ 	if (xfs_sb_version_hasalign(&mp->m_sb) &&
+-	    mp->m_sb.sb_inoalignmt >=
+-	    XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
++		mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
+ 		mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
+ 	else
+ 		mp->m_inoalign_mask = 0;
+diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
+index 819b80b..1bf878b 100644
+--- a/fs/xfs/xfs_mount.h
++++ b/fs/xfs/xfs_mount.h
+@@ -140,6 +140,7 @@ typedef struct xfs_mount {
+ 	int			m_fixedfsid[2];	/* unchanged for life of FS */
+ 	uint			m_dmevmask;	/* DMI events for this FS */
+ 	__uint64_t		m_flags;	/* global mount flags */
++	bool			m_inotbt_nores; /* no per-AG finobt resv. */
+ 	int			m_ialloc_inos;	/* inodes in inode allocation */
+ 	int			m_ialloc_blks;	/* blocks in inode allocation */
+ 	int			m_ialloc_min_blks;/* min blocks in sparse inode
+diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
+index 4d3f74e..2252f16 100644
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -82,11 +82,22 @@
+  * mappings are a reservation against the free space in the filesystem;
+  * adjacent mappings can also be combined into fewer larger mappings.
+  *
++ * As an optimization, the CoW extent size hint (cowextsz) creates
++ * outsized aligned delalloc reservations in the hope of landing out of
++ * order nearby CoW writes in a single extent on disk, thereby reducing
++ * fragmentation and improving future performance.
++ *
++ * D: --RRRRRRSSSRRRRRRRR--- (data fork)
++ * C: ------DDDDDDD--------- (CoW fork)
++ *
+  * When dirty pages are being written out (typically in writepage), the
+- * delalloc reservations are converted into real mappings by allocating
+- * blocks and replacing the delalloc mapping with real ones.  A delalloc
+- * mapping can be replaced by several real ones if the free space is
+- * fragmented.
++ * delalloc reservations are converted into unwritten mappings by
++ * allocating blocks and replacing the delalloc mapping with real ones.
++ * A delalloc mapping can be replaced by several unwritten ones if the
++ * free space is fragmented.
++ *
++ * D: --RRRRRRSSSRRRRRRRR---
++ * C: ------UUUUUUU---------
+  *
+  * We want to adapt the delalloc mechanism for copy-on-write, since the
+  * write paths are similar.  The first two steps (creating the reservation
+@@ -101,13 +112,29 @@
+  * Block-aligned directio writes will use the same mechanism as buffered
+  * writes.
+  *
++ * Just prior to submitting the actual disk write requests, we convert
++ * the extents representing the range of the file actually being written
++ * (as opposed to extra pieces created for the cowextsize hint) to real
++ * extents.  This will become important in the next step:
++ *
++ * D: --RRRRRRSSSRRRRRRRR---
++ * C: ------UUrrUUU---------
++ *
+  * CoW remapping must be done after the data block write completes,
+  * because we don't want to destroy the old data fork map until we're sure
+  * the new block has been written.  Since the new mappings are kept in a
+  * separate fork, we can simply iterate these mappings to find the ones
+  * that cover the file blocks that we just CoW'd.  For each extent, simply
+  * unmap the corresponding range in the data fork, map the new range into
+- * the data fork, and remove the extent from the CoW fork.
++ * the data fork, and remove the extent from the CoW fork.  Because of
++ * the presence of the cowextsize hint, however, we must be careful
++ * only to remap the blocks that we've actually written out --  we must
++ * never remap delalloc reservations nor CoW staging blocks that have
++ * yet to be written.  This corresponds exactly to the real extents in
++ * the CoW fork:
++ *
++ * D: --RRRRRRrrSRRRRRRRR---
++ * C: ------UU--UUU---------
+  *
+  * Since the remapping operation can be applied to an arbitrary file
+  * range, we record the need for the remap step as a flag in the ioend
+@@ -296,6 +323,65 @@ xfs_reflink_reserve_cow(
+ 	return 0;
+ }
+ 
++/* Convert part of an unwritten CoW extent to a real one. */
++STATIC int
++xfs_reflink_convert_cow_extent(
++	struct xfs_inode		*ip,
++	struct xfs_bmbt_irec		*imap,
++	xfs_fileoff_t			offset_fsb,
++	xfs_filblks_t			count_fsb,
++	struct xfs_defer_ops		*dfops)
++{
++	struct xfs_bmbt_irec		irec = *imap;
++	xfs_fsblock_t			first_block;
++	int				nimaps = 1;
++
++	if (imap->br_state == XFS_EXT_NORM)
++		return 0;
++
++	xfs_trim_extent(&irec, offset_fsb, count_fsb);
++	trace_xfs_reflink_convert_cow(ip, &irec);
++	if (irec.br_blockcount == 0)
++		return 0;
++	return xfs_bmapi_write(NULL, ip, irec.br_startoff, irec.br_blockcount,
++			XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, &first_block,
++			0, &irec, &nimaps, dfops);
++}
++
++/* Convert all of the unwritten CoW extents in a file's range to real ones. */
++int
++xfs_reflink_convert_cow(
++	struct xfs_inode	*ip,
++	xfs_off_t		offset,
++	xfs_off_t		count)
++{
++	struct xfs_bmbt_irec	got;
++	struct xfs_defer_ops	dfops;
++	struct xfs_mount	*mp = ip->i_mount;
++	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
++	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
++	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + count);
++	xfs_extnum_t		idx;
++	bool			found;
++	int			error = 0;
++
++	xfs_ilock(ip, XFS_ILOCK_EXCL);
++
++	/* Convert all the extents to real from unwritten. */
++	for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got);
++	     found && got.br_startoff < end_fsb;
++	     found = xfs_iext_get_extent(ifp, ++idx, &got)) {
++		error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb,
++				end_fsb - offset_fsb, &dfops);
++		if (error)
++			break;
++	}
++
++	/* Finish up. */
++	xfs_iunlock(ip, XFS_ILOCK_EXCL);
++	return error;
++}
++
+ /* Allocate all CoW reservations covering a range of blocks in a file. */
+ static int
+ __xfs_reflink_allocate_cow(
+@@ -328,6 +414,7 @@ __xfs_reflink_allocate_cow(
+ 		goto out_unlock;
+ 	ASSERT(nimaps == 1);
+ 
++	/* Make sure there's a CoW reservation for it. */
+ 	error = xfs_reflink_reserve_cow(ip, &imap, &shared);
+ 	if (error)
+ 		goto out_trans_cancel;
+@@ -337,14 +424,16 @@ __xfs_reflink_allocate_cow(
+ 		goto out_trans_cancel;
+ 	}
+ 
++	/* Allocate the entire reservation as unwritten blocks. */
+ 	xfs_trans_ijoin(tp, ip, 0);
+ 	error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
+-			XFS_BMAPI_COWFORK, &first_block,
++			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block,
+ 			XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
+ 			&imap, &nimaps, &dfops);
+ 	if (error)
+ 		goto out_trans_cancel;
+ 
++	/* Finish up. */
+ 	error = xfs_defer_finish(&tp, &dfops, NULL);
+ 	if (error)
+ 		goto out_trans_cancel;
+@@ -389,11 +478,12 @@ xfs_reflink_allocate_cow_range(
+ 		if (error) {
+ 			trace_xfs_reflink_allocate_cow_range_error(ip, error,
+ 					_RET_IP_);
+-			break;
++			return error;
+ 		}
+ 	}
+ 
+-	return error;
++	/* Convert the CoW extents to regular. */
++	return xfs_reflink_convert_cow(ip, offset, count);
+ }
+ 
+ /*
+@@ -481,14 +571,18 @@ xfs_reflink_trim_irec_to_next_cow(
+ }
+ 
+ /*
+- * Cancel all pending CoW reservations for some block range of an inode.
++ * Cancel CoW reservations for some block range of an inode.
++ *
++ * If cancel_real is true this function cancels all COW fork extents for the
++ * inode; if cancel_real is false, real extents are not cleared.
+  */
+ int
+ xfs_reflink_cancel_cow_blocks(
+ 	struct xfs_inode		*ip,
+ 	struct xfs_trans		**tpp,
+ 	xfs_fileoff_t			offset_fsb,
+-	xfs_fileoff_t			end_fsb)
++	xfs_fileoff_t			end_fsb,
++	bool				cancel_real)
+ {
+ 	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ 	struct xfs_bmbt_irec		got, prev, del;
+@@ -515,7 +609,7 @@ xfs_reflink_cancel_cow_blocks(
+ 					&idx, &got, &del);
+ 			if (error)
+ 				break;
+-		} else {
++		} else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
+ 			xfs_trans_ijoin(*tpp, ip, 0);
+ 			xfs_defer_init(&dfops, &firstfsb);
+ 
+@@ -558,13 +652,17 @@ xfs_reflink_cancel_cow_blocks(
+ }
+ 
+ /*
+- * Cancel all pending CoW reservations for some byte range of an inode.
++ * Cancel CoW reservations for some byte range of an inode.
++ *
++ * If cancel_real is true this function cancels all COW fork extents for the
++ * inode; if cancel_real is false, real extents are not cleared.
+  */
+ int
+ xfs_reflink_cancel_cow_range(
+ 	struct xfs_inode	*ip,
+ 	xfs_off_t		offset,
+-	xfs_off_t		count)
++	xfs_off_t		count,
++	bool			cancel_real)
+ {
+ 	struct xfs_trans	*tp;
+ 	xfs_fileoff_t		offset_fsb;
+@@ -590,7 +688,8 @@ xfs_reflink_cancel_cow_range(
+ 	xfs_trans_ijoin(tp, ip, 0);
+ 
+ 	/* Scrape out the old CoW reservations */
+-	error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb);
++	error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb,
++			cancel_real);
+ 	if (error)
+ 		goto out_cancel;
+ 
+@@ -669,6 +768,16 @@ xfs_reflink_end_cow(
+ 
+ 		ASSERT(!isnullstartblock(got.br_startblock));
+ 
++		/*
++		 * Don't remap unwritten extents; these are
++		 * speculatively preallocated CoW extents that have been
++		 * allocated but have not yet been involved in a write.
++		 */
++		if (got.br_state == XFS_EXT_UNWRITTEN) {
++			idx--;
++			goto next_extent;
++		}
++
+ 		/* Unmap the old blocks in the data fork. */
+ 		xfs_defer_init(&dfops, &firstfsb);
+ 		rlen = del.br_blockcount;
+@@ -885,13 +994,14 @@ STATIC int
+ xfs_reflink_update_dest(
+ 	struct xfs_inode	*dest,
+ 	xfs_off_t		newlen,
+-	xfs_extlen_t		cowextsize)
++	xfs_extlen_t		cowextsize,
++	bool			is_dedupe)
+ {
+ 	struct xfs_mount	*mp = dest->i_mount;
+ 	struct xfs_trans	*tp;
+ 	int			error;
+ 
+-	if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
++	if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
+ 		return 0;
+ 
+ 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
+@@ -912,6 +1022,10 @@ xfs_reflink_update_dest(
+ 		dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
+ 	}
+ 
++	if (!is_dedupe) {
++		xfs_trans_ichgtime(tp, dest,
++				   XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
++	}
+ 	xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE);
+ 
+ 	error = xfs_trans_commit(tp);
+@@ -1428,7 +1542,8 @@ xfs_reflink_remap_range(
+ 	    !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
+ 		cowextsize = src->i_d.di_cowextsize;
+ 
+-	ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize);
++	ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
++			is_dedupe);
+ 
+ out_unlock:
+ 	xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
+@@ -1580,7 +1695,7 @@ xfs_reflink_clear_inode_flag(
+ 	 * We didn't find any shared blocks so turn off the reflink flag.
+ 	 * First, get rid of any leftover CoW mappings.
+ 	 */
+-	error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF);
++	error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true);
+ 	if (error)
+ 		return error;
+ 
+diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
+index 97ea9b4..a57966f 100644
+--- a/fs/xfs/xfs_reflink.h
++++ b/fs/xfs/xfs_reflink.h
+@@ -30,6 +30,8 @@ extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
+ 		struct xfs_bmbt_irec *imap, bool *shared);
+ extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
+ 		xfs_off_t offset, xfs_off_t count);
++extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
++		xfs_off_t count);
+ extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
+ 		struct xfs_bmbt_irec *imap, bool *need_alloc);
+ extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
+@@ -37,9 +39,9 @@ extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
+ 
+ extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
+ 		struct xfs_trans **tpp, xfs_fileoff_t offset_fsb,
+-		xfs_fileoff_t end_fsb);
++		xfs_fileoff_t end_fsb, bool cancel_real);
+ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
+-		xfs_off_t count);
++		xfs_off_t count, bool cancel_real);
+ extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
+ 		xfs_off_t count);
+ extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
+diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
+index ade4691..dbbd3f1 100644
+--- a/fs/xfs/xfs_super.c
++++ b/fs/xfs/xfs_super.c
+@@ -948,7 +948,7 @@ xfs_fs_destroy_inode(
+ 	XFS_STATS_INC(ip->i_mount, vn_remove);
+ 
+ 	if (xfs_is_reflink_inode(ip)) {
+-		error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
++		error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
+ 		if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount))
+ 			xfs_warn(ip->i_mount,
+ "Error %d while evicting CoW blocks for inode %llu.",
+diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
+index 0907752..828f383 100644
+--- a/fs/xfs/xfs_trace.h
++++ b/fs/xfs/xfs_trace.h
+@@ -3183,6 +3183,7 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class,
+ 		__field(xfs_fileoff_t, lblk)
+ 		__field(xfs_extlen_t, len)
+ 		__field(xfs_fsblock_t, pblk)
++		__field(int, state)
+ 	),
+ 	TP_fast_assign(
+ 		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+@@ -3190,13 +3191,15 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class,
+ 		__entry->lblk = irec->br_startoff;
+ 		__entry->len = irec->br_blockcount;
+ 		__entry->pblk = irec->br_startblock;
++		__entry->state = irec->br_state;
+ 	),
+-	TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu",
++	TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu st %d",
+ 		  MAJOR(__entry->dev), MINOR(__entry->dev),
+ 		  __entry->ino,
+ 		  __entry->lblk,
+ 		  __entry->len,
+-		  __entry->pblk)
++		  __entry->pblk,
++		  __entry->state)
+ );
+ #define DEFINE_INODE_IREC_EVENT(name) \
+ DEFINE_EVENT(xfs_inode_irec_class, name, \
+@@ -3345,11 +3348,12 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared);
+ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc);
+ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found);
+ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
++DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow);
+ 
+ DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
+ DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
+ 
+-DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
++DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write);
+ DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping);
+ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec);
+ 
+diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
+index 01c0b9c..8c58db2 100644
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -162,8 +162,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
+ 		    int len, void *val);
+ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+ 			    int len, struct kvm_io_device *dev);
+-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+-			      struct kvm_io_device *dev);
++void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
++			       struct kvm_io_device *dev);
+ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+ 					 gpa_t addr);
+ 
+diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
+index 2546988..8b35bdb 100644
+--- a/include/linux/memcontrol.h
++++ b/include/linux/memcontrol.h
+@@ -739,6 +739,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
+ 	return false;
+ }
+ 
++static inline void mem_cgroup_update_page_stat(struct page *page,
++					       enum mem_cgroup_stat_index idx,
++					       int nr)
++{
++}
++
+ static inline void mem_cgroup_inc_page_stat(struct page *page,
+ 					    enum mem_cgroup_stat_index idx)
+ {
+diff --git a/kernel/padata.c b/kernel/padata.c
+index 7848f05..b4a3c0a 100644
+--- a/kernel/padata.c
++++ b/kernel/padata.c
+@@ -190,19 +190,20 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
+ 
+ 	reorder = &next_queue->reorder;
+ 
++	spin_lock(&reorder->lock);
+ 	if (!list_empty(&reorder->list)) {
+ 		padata = list_entry(reorder->list.next,
+ 				    struct padata_priv, list);
+ 
+-		spin_lock(&reorder->lock);
+ 		list_del_init(&padata->list);
+ 		atomic_dec(&pd->reorder_objects);
+-		spin_unlock(&reorder->lock);
+ 
+ 		pd->processed++;
+ 
++		spin_unlock(&reorder->lock);
+ 		goto out;
+ 	}
++	spin_unlock(&reorder->lock);
+ 
+ 	if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
+ 		padata = ERR_PTR(-ENODATA);
+diff --git a/lib/syscall.c b/lib/syscall.c
+index 63239e0..a72cd09 100644
+--- a/lib/syscall.c
++++ b/lib/syscall.c
+@@ -11,6 +11,7 @@ static int collect_syscall(struct task_struct *target, long *callno,
+ 
+ 	if (!try_get_task_stack(target)) {
+ 		/* Task has no stack, so the task isn't in a syscall. */
++		*sp = *pc = 0;
+ 		*callno = -1;
+ 		return 0;
+ 	}
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index b6adedb..65c36ac 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -4471,6 +4471,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+ {
+ 	struct page *page = NULL;
+ 	spinlock_t *ptl;
++	pte_t pte;
+ retry:
+ 	ptl = pmd_lockptr(mm, pmd);
+ 	spin_lock(ptl);
+@@ -4480,12 +4481,13 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+ 	 */
+ 	if (!pmd_huge(*pmd))
+ 		goto out;
+-	if (pmd_present(*pmd)) {
++	pte = huge_ptep_get((pte_t *)pmd);
++	if (pte_present(pte)) {
+ 		page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT);
+ 		if (flags & FOLL_GET)
+ 			get_page(page);
+ 	} else {
+-		if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pmd))) {
++		if (is_hugetlb_entry_migration(pte)) {
+ 			spin_unlock(ptl);
+ 			__migration_entry_wait(mm, (pte_t *)pmd, ptl);
+ 			goto retry;
+diff --git a/mm/rmap.c b/mm/rmap.c
+index 1ef3640..cd37c1c 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -1295,7 +1295,7 @@ void page_add_file_rmap(struct page *page, bool compound)
+ 			goto out;
+ 	}
+ 	__mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, nr);
+-	mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
++	mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, nr);
+ out:
+ 	unlock_page_memcg(page);
+ }
+@@ -1335,7 +1335,7 @@ static void page_remove_file_rmap(struct page *page, bool compound)
+ 	 * pte lock(a spinlock) is held, which implies preemption disabled.
+ 	 */
+ 	__mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, -nr);
+-	mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
++	mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, -nr);
+ 
+ 	if (unlikely(PageMlocked(page)))
+ 		clear_page_mlock(page);
+diff --git a/mm/workingset.c b/mm/workingset.c
+index 33f6f4d..4c4f056 100644
+--- a/mm/workingset.c
++++ b/mm/workingset.c
+@@ -492,7 +492,7 @@ static int __init workingset_init(void)
+ 	pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
+ 	       timestamp_bits, max_order, bucket_order);
+ 
+-	ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key);
++	ret = __list_lru_init(&workingset_shadow_nodes, true, &shadow_nodes_key);
+ 	if (ret)
+ 		goto err;
+ 	ret = register_shrinker(&workingset_shadow_shrinker);
+diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
+index 2efb335..25a30be 100644
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -7,6 +7,7 @@
+ #include <linux/kthread.h>
+ #include <linux/net.h>
+ #include <linux/nsproxy.h>
++#include <linux/sched.h>
+ #include <linux/slab.h>
+ #include <linux/socket.h>
+ #include <linux/string.h>
+@@ -469,11 +470,16 @@ static int ceph_tcp_connect(struct ceph_connection *con)
+ {
+ 	struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
+ 	struct socket *sock;
++	unsigned int noio_flag;
+ 	int ret;
+ 
+ 	BUG_ON(con->sock);
++
++	/* sock_create_kern() allocates with GFP_KERNEL */
++	noio_flag = memalloc_noio_save();
+ 	ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family,
+ 			       SOCK_STREAM, IPPROTO_TCP, &sock);
++	memalloc_noio_restore(noio_flag);
+ 	if (ret)
+ 		return ret;
+ 	sock->sk->sk_allocation = GFP_NOFS;
+diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c
+index 3f4efcb..3490d21 100644
+--- a/sound/core/seq/seq_fifo.c
++++ b/sound/core/seq/seq_fifo.c
+@@ -265,6 +265,10 @@ int snd_seq_fifo_resize(struct snd_seq_fifo *f, int poolsize)
+ 	/* NOTE: overflow flag is not cleared */
+ 	spin_unlock_irqrestore(&f->lock, flags);
+ 
++	/* close the old pool and wait until all users are gone */
++	snd_seq_pool_mark_closing(oldpool);
++	snd_use_lock_sync(&f->use_lock);
++
+ 	/* release cells in old pool */
+ 	for (cell = oldhead; cell; cell = next) {
+ 		next = cell->next;
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 112caa2..bb1aad3 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -4846,6 +4846,7 @@ enum {
+ 	ALC292_FIXUP_DISABLE_AAMIX,
+ 	ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK,
+ 	ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
++	ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
+ 	ALC275_FIXUP_DELL_XPS,
+ 	ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE,
+ 	ALC293_FIXUP_LENOVO_SPK_NOISE,
+@@ -5446,6 +5447,15 @@ static const struct hda_fixup alc269_fixups[] = {
+ 		.chained = true,
+ 		.chain_id = ALC269_FIXUP_HEADSET_MODE
+ 	},
++	[ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE] = {
++		.type = HDA_FIXUP_PINS,
++		.v.pins = (const struct hda_pintbl[]) {
++			{ 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */
++			{ }
++		},
++		.chained = true,
++		.chain_id = ALC269_FIXUP_HEADSET_MODE
++	},
+ 	[ALC275_FIXUP_DELL_XPS] = {
+ 		.type = HDA_FIXUP_VERBS,
+ 		.v.verbs = (const struct hda_verb[]) {
+@@ -5518,7 +5528,7 @@ static const struct hda_fixup alc269_fixups[] = {
+ 		.type = HDA_FIXUP_FUNC,
+ 		.v.func = alc298_fixup_speaker_volume,
+ 		.chained = true,
+-		.chain_id = ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
++		.chain_id = ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
+ 	},
+ 	[ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = {
+ 		.type = HDA_FIXUP_PINS,
+diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c
+index 89ac5f5..7ae46c2 100644
+--- a/sound/soc/atmel/atmel-classd.c
++++ b/sound/soc/atmel/atmel-classd.c
+@@ -349,7 +349,7 @@ static int atmel_classd_codec_dai_digital_mute(struct snd_soc_dai *codec_dai,
+ }
+ 
+ #define CLASSD_ACLK_RATE_11M2896_MPY_8 (112896 * 100 * 8)
+-#define CLASSD_ACLK_RATE_12M288_MPY_8  (12228 * 1000 * 8)
++#define CLASSD_ACLK_RATE_12M288_MPY_8  (12288 * 1000 * 8)
+ 
+ static struct {
+ 	int rate;
+diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
+index b5b1934..bef8a45 100644
+--- a/sound/soc/intel/skylake/skl-topology.c
++++ b/sound/soc/intel/skylake/skl-topology.c
+@@ -448,7 +448,7 @@ static int skl_tplg_set_module_init_data(struct snd_soc_dapm_widget *w)
+ 			if (bc->set_params != SKL_PARAM_INIT)
+ 				continue;
+ 
+-			mconfig->formats_config.caps = (u32 *)&bc->params;
++			mconfig->formats_config.caps = (u32 *)bc->params;
+ 			mconfig->formats_config.caps_size = bc->size;
+ 
+ 			break;
+diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
+index a29786d..4d28a9d 100644
+--- a/virt/kvm/eventfd.c
++++ b/virt/kvm/eventfd.c
+@@ -870,7 +870,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
+ 			continue;
+ 
+ 		kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
+-		kvm->buses[bus_idx]->ioeventfd_count--;
++		if (kvm->buses[bus_idx])
++			kvm->buses[bus_idx]->ioeventfd_count--;
+ 		ioeventfd_release(p);
+ 		ret = 0;
+ 		break;
+diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
+index 7f9ee29..f4c6d4f 100644
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -720,8 +720,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
+ 	list_del(&kvm->vm_list);
+ 	spin_unlock(&kvm_lock);
+ 	kvm_free_irq_routing(kvm);
+-	for (i = 0; i < KVM_NR_BUSES; i++)
+-		kvm_io_bus_destroy(kvm->buses[i]);
++	for (i = 0; i < KVM_NR_BUSES; i++) {
++		if (kvm->buses[i])
++			kvm_io_bus_destroy(kvm->buses[i]);
++		kvm->buses[i] = NULL;
++	}
+ 	kvm_coalesced_mmio_free(kvm);
+ #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+ 	mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
+@@ -3463,6 +3466,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
+ 	};
+ 
+ 	bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
++	if (!bus)
++		return -ENOMEM;
+ 	r = __kvm_io_bus_write(vcpu, bus, &range, val);
+ 	return r < 0 ? r : 0;
+ }
+@@ -3480,6 +3485,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
+ 	};
+ 
+ 	bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
++	if (!bus)
++		return -ENOMEM;
+ 
+ 	/* First try the device referenced by cookie. */
+ 	if ((cookie >= 0) && (cookie < bus->dev_count) &&
+@@ -3530,6 +3537,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
+ 	};
+ 
+ 	bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
++	if (!bus)
++		return -ENOMEM;
+ 	r = __kvm_io_bus_read(vcpu, bus, &range, val);
+ 	return r < 0 ? r : 0;
+ }
+@@ -3542,6 +3551,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+ 	struct kvm_io_bus *new_bus, *bus;
+ 
+ 	bus = kvm->buses[bus_idx];
++	if (!bus)
++		return -ENOMEM;
++
+ 	/* exclude ioeventfd which is limited by maximum fd */
+ 	if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
+ 		return -ENOSPC;
+@@ -3561,37 +3573,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+ }
+ 
+ /* Caller must hold slots_lock. */
+-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+-			      struct kvm_io_device *dev)
++void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
++			       struct kvm_io_device *dev)
+ {
+-	int i, r;
++	int i;
+ 	struct kvm_io_bus *new_bus, *bus;
+ 
+ 	bus = kvm->buses[bus_idx];
+-	r = -ENOENT;
++	if (!bus)
++		return;
++
+ 	for (i = 0; i < bus->dev_count; i++)
+ 		if (bus->range[i].dev == dev) {
+-			r = 0;
+ 			break;
+ 		}
+ 
+-	if (r)
+-		return r;
++	if (i == bus->dev_count)
++		return;
+ 
+ 	new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) *
+ 			  sizeof(struct kvm_io_range)), GFP_KERNEL);
+-	if (!new_bus)
+-		return -ENOMEM;
++	if (!new_bus)  {
++		pr_err("kvm: failed to shrink bus, removing it completely\n");
++		goto broken;
++	}
+ 
+ 	memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
+ 	new_bus->dev_count--;
+ 	memcpy(new_bus->range + i, bus->range + i + 1,
+ 	       (new_bus->dev_count - i) * sizeof(struct kvm_io_range));
+ 
++broken:
+ 	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
+ 	synchronize_srcu_expedited(&kvm->srcu);
+ 	kfree(bus);
+-	return r;
++	return;
+ }
+ 
+ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+@@ -3604,6 +3620,8 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+ 	srcu_idx = srcu_read_lock(&kvm->srcu);
+ 
+ 	bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
++	if (!bus)
++		goto out_unlock;
+ 
+ 	dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1);
+ 	if (dev_idx < 0)