summaryrefslogtreecommitdiff
blob: 4530d87679ab202f8f20ec2c118142b024afb66a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
commit 3022d734a54cbd2b65eea9a024564821101b4a9a
Author: Andi Kleen <ak@suse.de>
Date:   Tue Sep 26 10:52:39 2006 +0200

    [PATCH] Fix zeroing on exception in copy_*_user
    
    - Don't zero for __copy_from_user_inatomic following i386.
    This will prevent spurious zeros for parallel file system writers when
    one does a exception
    - The string instruction version didn't zero the output on
    exception. Oops.
    
    Also I cleaned up the code a bit while I was at it and added a minor
    optimization to the string instruction path.
    
    Signed-off-by: Andi Kleen <ak@suse.de>

diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c
index 370952c..c3454af 100644
--- a/arch/x86_64/kernel/x8664_ksyms.c
+++ b/arch/x86_64/kernel/x8664_ksyms.c
@@ -29,6 +29,7 @@ EXPORT_SYMBOL(__put_user_8);
 EXPORT_SYMBOL(copy_user_generic);
 EXPORT_SYMBOL(copy_from_user);
 EXPORT_SYMBOL(copy_to_user);
+EXPORT_SYMBOL(__copy_from_user_inatomic);
 
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(clear_page);
diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S
index 962f3a6..70bebd3 100644
--- a/arch/x86_64/lib/copy_user.S
+++ b/arch/x86_64/lib/copy_user.S
@@ -9,10 +9,29 @@
 
 #define FIX_ALIGNMENT 1
 
-	#include <asm/current.h>
-	#include <asm/asm-offsets.h>
-	#include <asm/thread_info.h>
-	#include <asm/cpufeature.h>
+#include <asm/current.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/cpufeature.h>
+
+	.macro ALTERNATIVE_JUMP feature,orig,alt
+0:
+	.byte 0xe9	/* 32bit jump */
+	.long \orig-1f	/* by default jump to orig */
+1:
+	.section .altinstr_replacement,"ax"
+2:	.byte 0xe9	             /* near jump with 32bit immediate */
+	.long \alt-1b /* offset */   /* or alternatively to alt */
+	.previous
+	.section .altinstructions,"a"
+	.align 8
+	.quad  0b
+	.quad  2b
+	.byte  \feature		     /* when feature is set */
+	.byte  5
+	.byte  5
+	.previous
+	.endm
 
 /* Standard copy_to_user with segment limit checking */		
 ENTRY(copy_to_user)
@@ -23,25 +42,21 @@ ENTRY(copy_to_user)
 	jc  bad_to_user
 	cmpq threadinfo_addr_limit(%rax),%rcx
 	jae bad_to_user
-2:
-	.byte 0xe9	/* 32bit jump */
-	.long .Lcug-1f
-1:
+	xorl %eax,%eax	/* clear zero flag */
+	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
 	CFI_ENDPROC
-ENDPROC(copy_to_user)
 
-	.section .altinstr_replacement,"ax"
-3:	.byte 0xe9			/* replacement jmp with 32 bit immediate */
-	.long copy_user_generic_c-1b	/* offset */
-	.previous
-	.section .altinstructions,"a"
-	.align 8
-	.quad  2b
-	.quad  3b
-	.byte  X86_FEATURE_REP_GOOD
-	.byte  5
-	.byte  5
-	.previous
+ENTRY(copy_user_generic)
+	CFI_STARTPROC
+	movl $1,%ecx	/* set zero flag */
+	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+	CFI_ENDPROC
+
+ENTRY(__copy_from_user_inatomic)
+	CFI_STARTPROC
+	xorl %ecx,%ecx	/* clear zero flag */
+	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+	CFI_ENDPROC
 
 /* Standard copy_from_user with segment limit checking */	
 ENTRY(copy_from_user)
@@ -52,7 +67,8 @@ ENTRY(copy_from_user)
 	jc  bad_from_user
 	cmpq threadinfo_addr_limit(%rax),%rcx
 	jae  bad_from_user
-	/* FALL THROUGH to copy_user_generic */
+	movl $1,%ecx	/* set zero flag */
+	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
 	CFI_ENDPROC
 ENDPROC(copy_from_user)
 	
@@ -73,37 +89,26 @@ END(bad_from_user)
 	
 		
 /*
- * copy_user_generic - memory copy with exception handling.
+ * copy_user_generic_unrolled - memory copy with exception handling.
+ * This version is for CPUs like P4 that don't have efficient micro code for rep movsq
  * 	
  * Input:	
  * rdi destination
  * rsi source
  * rdx count
+ * ecx zero flag -- if true zero destination on error
  *
  * Output:		
  * eax uncopied bytes or 0 if successful.
  */
-ENTRY(copy_user_generic)
+ENTRY(copy_user_generic_unrolled)
 	CFI_STARTPROC
-	.byte 0x66,0x66,0x90	/* 5 byte nop for replacement jump */
-	.byte 0x66,0x90
-1:
-	.section .altinstr_replacement,"ax"
-2:	.byte 0xe9	             /* near jump with 32bit immediate */
-	.long copy_user_generic_c-1b /* offset */
-	.previous
-	.section .altinstructions,"a"
-	.align 8
-	.quad  copy_user_generic
-	.quad  2b
-	.byte  X86_FEATURE_REP_GOOD
-	.byte  5
-	.byte  5
-	.previous
-.Lcug:
 	pushq %rbx
 	CFI_ADJUST_CFA_OFFSET 8
 	CFI_REL_OFFSET rbx, 0
+	pushq %rcx
+	CFI_ADJUST_CFA_OFFSET 8
+	CFI_REL_OFFSET rcx, 0
 	xorl %eax,%eax		/*zero for the exception handler */
 
 #ifdef FIX_ALIGNMENT
@@ -179,6 +184,9 @@ ENTRY(copy_user_generic)
 
 	CFI_REMEMBER_STATE
 .Lende:
+	popq %rcx
+	CFI_ADJUST_CFA_OFFSET -8
+	CFI_RESTORE rcx
 	popq %rbx
 	CFI_ADJUST_CFA_OFFSET -8
 	CFI_RESTORE rbx
@@ -265,6 +273,8 @@ ENTRY(copy_user_generic)
 	addl %ecx,%edx
 	/* edx: bytes to zero, rdi: dest, eax:zero */
 .Lzero_rest:
+	cmpl $0,(%rsp)
+	jz   .Le_zero
 	movq %rdx,%rcx
 .Le_byte:
 	xorl %eax,%eax
@@ -286,6 +296,7 @@ ENDPROC(copy_user_generic)
  /* rdi	destination
   * rsi source
   * rdx count
+  * ecx zero flag
   *
   * Output:
   * eax uncopied bytes or 0 if successfull.
@@ -296,25 +307,48 @@ ENDPROC(copy_user_generic)
   * And more would be dangerous because both Intel and AMD have
   * errata with rep movsq > 4GB. If someone feels the need to fix
   * this please consider this.
-   */
-copy_user_generic_c:
+  */
+ENTRY(copy_user_generic_string)
 	CFI_STARTPROC
+	movl %ecx,%r8d		/* save zero flag */
 	movl %edx,%ecx
 	shrl $3,%ecx
 	andl $7,%edx	
+	jz   10f
 1:	rep 
 	movsq 
 	movl %edx,%ecx
 2:	rep
 	movsb
-4:	movl %ecx,%eax
+9:	movl %ecx,%eax
 	ret
-3:	lea (%rdx,%rcx,8),%rax
+
+	/* multiple of 8 byte */
+10:	rep
+	movsq
+	xor %eax,%eax
 	ret
+
+	/* exception handling */
+3:      lea (%rdx,%rcx,8),%rax	/* exception on quad loop */
+	jmp 6f
+5:	movl %ecx,%eax		/* exception on byte loop */
+	/* eax: left over bytes */
+6:	testl %r8d,%r8d		/* zero flag set? */
+	jz 7f
+	movl %eax,%ecx		/* initialize x86 loop counter */
+	push %rax
+	xorl %eax,%eax
+8:	rep
+	stosb 			/* zero the rest */
+11:	pop %rax
+7:	ret
 	CFI_ENDPROC
 END(copy_user_generic_c)
 
 	.section __ex_table,"a"
 	.quad 1b,3b
-	.quad 2b,4b
+	.quad 2b,5b
+	.quad 8b,11b
+	.quad 10b,3b
 	.previous
diff --git a/include/asm-x86_64/uaccess.h b/include/asm-x86_64/uaccess.h
index 1e1fa00..bc68120 100644
--- a/include/asm-x86_64/uaccess.h
+++ b/include/asm-x86_64/uaccess.h
@@ -238,6 +238,7 @@ do {									\
 
 /* Handles exceptions in both to and from, but doesn't do access_ok */
 extern unsigned long copy_user_generic(void *to, const void *from, unsigned len); 
+extern unsigned long copy_user_generic_dontzero(void *to, const void *from, unsigned len);
 
 extern unsigned long copy_to_user(void __user *to, const void *from, unsigned len); 
 extern unsigned long copy_from_user(void *to, const void __user *from, unsigned len); 
@@ -303,7 +304,6 @@ static __always_inline int __copy_to_user(void __user *dst, const void *src, uns
 	}
 }	
 
-
 static __always_inline int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
 { 
        int ret = 0;
@@ -352,7 +352,7 @@ long strlen_user(const char __user *str);
 unsigned long clear_user(void __user *mem, unsigned long len);
 unsigned long __clear_user(void __user *mem, unsigned long len);
 
-#define __copy_to_user_inatomic __copy_to_user
-#define __copy_from_user_inatomic __copy_from_user
+extern long __copy_from_user_inatomic(void *dst, const void __user *src, unsigned size);
+#define __copy_to_user_inatomic copy_user_generic
 
 #endif /* __X86_64_UACCESS_H */