diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c index 4cf981d70f45b621acbce1df4bf7097d27b6d85d..6979297ce278642c5b4c59844d626cddd7cfdcbd 100644 --- a/arch/i386/lib/usercopy.c +++ b/arch/i386/lib/usercopy.c @@ -425,15 +425,121 @@ __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size) : "eax", "edx", "memory"); return size; } + +/* + * Non Temporal Hint version of __copy_user_zeroing_intel. It is cache aware. + * hyoshiok@miraclelinux.com + */ + +static unsigned long __copy_user_zeroing_intel_nocache(void *to, + const void __user *from, unsigned long size) +{ + int d0, d1; + + __asm__ __volatile__( + " .align 2,0x90\n" + "0: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" + " jbe 2f\n" + "1: movl 64(%4), %%eax\n" + " .align 2,0x90\n" + "2: movl 0(%4), %%eax\n" + "21: movl 4(%4), %%edx\n" + " movnti %%eax, 0(%3)\n" + " movnti %%edx, 4(%3)\n" + "3: movl 8(%4), %%eax\n" + "31: movl 12(%4),%%edx\n" + " movnti %%eax, 8(%3)\n" + " movnti %%edx, 12(%3)\n" + "4: movl 16(%4), %%eax\n" + "41: movl 20(%4), %%edx\n" + " movnti %%eax, 16(%3)\n" + " movnti %%edx, 20(%3)\n" + "10: movl 24(%4), %%eax\n" + "51: movl 28(%4), %%edx\n" + " movnti %%eax, 24(%3)\n" + " movnti %%edx, 28(%3)\n" + "11: movl 32(%4), %%eax\n" + "61: movl 36(%4), %%edx\n" + " movnti %%eax, 32(%3)\n" + " movnti %%edx, 36(%3)\n" + "12: movl 40(%4), %%eax\n" + "71: movl 44(%4), %%edx\n" + " movnti %%eax, 40(%3)\n" + " movnti %%edx, 44(%3)\n" + "13: movl 48(%4), %%eax\n" + "81: movl 52(%4), %%edx\n" + " movnti %%eax, 48(%3)\n" + " movnti %%edx, 52(%3)\n" + "14: movl 56(%4), %%eax\n" + "91: movl 60(%4), %%edx\n" + " movnti %%eax, 56(%3)\n" + " movnti %%edx, 60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" + " cmpl $63, %0\n" + " ja 0b\n" + " sfence \n" + "5: movl %0, %%eax\n" + " shrl $2, %0\n" + " andl $3, %%eax\n" + " cld\n" + "6: rep; movsl\n" + " movl %%eax,%0\n" + "7: rep; movsb\n" + "8:\n" + ".section .fixup,\"ax\"\n" + "9: lea 0(%%eax,%0,4),%0\n" + "16: pushl %0\n" + " pushl %%eax\n" + " xorl %%eax,%%eax\n" + " rep; stosb\n" + " popl %%eax\n" + " popl %0\n" + " jmp 8b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,16b\n" + " .long 1b,16b\n" + " .long 2b,16b\n" + " .long 21b,16b\n" + " .long 3b,16b\n" + " .long 31b,16b\n" + " .long 4b,16b\n" + " .long 41b,16b\n" + " .long 10b,16b\n" + " .long 51b,16b\n" + " .long 11b,16b\n" + " .long 61b,16b\n" + " .long 12b,16b\n" + " .long 71b,16b\n" + " .long 13b,16b\n" + " .long 81b,16b\n" + " .long 14b,16b\n" + " .long 91b,16b\n" + " .long 6b,9b\n" + " .long 7b,16b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) + : "1"(to), "2"(from), "0"(size) + : "eax", "edx", "memory"); + return size; +} + #else + /* * Leave these declared but undefined. They should not be any references to * them */ -unsigned long -__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size); -unsigned long -__copy_user_intel(void __user *to, const void *from, unsigned long size); +unsigned long __copy_user_zeroing_intel(void *to, const void __user *from, + unsigned long size); +unsigned long __copy_user_intel(void __user *to, const void *from, + unsigned long size); +unsigned long __copy_user_zeroing_intel_nocache(void *to, + const void __user *from, unsigned long size); #endif /* CONFIG_X86_INTEL_USERCOPY */ /* Generic arbitrary sized copy. */ @@ -515,8 +621,8 @@ do { \ : "memory"); \ } while (0) - -unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n) +unsigned long __copy_to_user_ll(void __user *to, const void *from, + unsigned long n) { BUG_ON((long) n < 0); #ifndef CONFIG_X86_WP_WORKS_OK @@ -576,8 +682,8 @@ unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long } EXPORT_SYMBOL(__copy_to_user_ll); -unsigned long -__copy_from_user_ll(void *to, const void __user *from, unsigned long n) +unsigned long __copy_from_user_ll(void *to, const void __user *from, + unsigned long n) { BUG_ON((long)n < 0); if (movsl_is_ok(to, from, n)) @@ -588,6 +694,21 @@ __copy_from_user_ll(void *to, const void __user *from, unsigned long n) } EXPORT_SYMBOL(__copy_from_user_ll); +unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, + unsigned long n) +{ + BUG_ON((long)n < 0); +#ifdef CONFIG_X86_INTEL_USERCOPY + if ( n > 64 && cpu_has_xmm2) + n = __copy_user_zeroing_intel_nocache(to, from, n); + else + __copy_user_zeroing(to, from, n); +#else + __copy_user_zeroing(to, from, n); +#endif + return n; +} + /** * copy_to_user: - Copy a block of data into user space. * @to: Destination address, in user space. diff --git a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h index 1ec65523ea5e750f0105095476112056061f9693..82af28a943ab1e376723c74ad1b43226639c7ccd 100644 --- a/include/asm-i386/uaccess.h +++ b/include/asm-i386/uaccess.h @@ -390,6 +390,8 @@ unsigned long __must_check __copy_to_user_ll(void __user *to, const void *from, unsigned long n); unsigned long __must_check __copy_from_user_ll(void *to, const void __user *from, unsigned long n); +unsigned long __must_check __copy_from_user_ll_nocache(void *to, + const void __user *from, unsigned long n); /* * Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault @@ -478,12 +480,43 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) return __copy_from_user_ll(to, from, n); } +#define ARCH_HAS_NOCACHE_UACCESS + +static __always_inline unsigned long __copy_from_user_inatomic_nocache(void *to, + const void __user *from, unsigned long n) +{ + if (__builtin_constant_p(n)) { + unsigned long ret; + + switch (n) { + case 1: + __get_user_size(*(u8 *)to, from, 1, ret, 1); + return ret; + case 2: + __get_user_size(*(u16 *)to, from, 2, ret, 2); + return ret; + case 4: + __get_user_size(*(u32 *)to, from, 4, ret, 4); + return ret; + } + } + return __copy_from_user_ll_nocache(to, from, n); +} + static __always_inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { might_sleep(); return __copy_from_user_inatomic(to, from, n); } + +static __always_inline unsigned long +__copy_from_user_nocache(void *to, const void __user *from, unsigned long n) +{ + might_sleep(); + return __copy_from_user_inatomic_nocache(to, from, n); +} + unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n); unsigned long __must_check copy_from_user(void *to, diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h new file mode 100644 index 0000000000000000000000000000000000000000..391e7ed1eb3fc05ea2eeb8548e42381c8aefa7e5 --- /dev/null +++ b/include/linux/uaccess.h @@ -0,0 +1,22 @@ +#ifndef __LINUX_UACCESS_H__ +#define __LINUX_UACCESS_H__ + +#include + +#ifndef ARCH_HAS_NOCACHE_UACCESS + +static inline unsigned long __copy_from_user_inatomic_nocache(void *to, + const void __user *from, unsigned long n) +{ + return __copy_from_user_inatomic(to, from, n); +} + +static inline unsigned long __copy_from_user_nocache(void *to, + const void __user *from, unsigned long n) +{ + return __copy_from_user(to, from, n); +} + +#endif /* ARCH_HAS_NOCACHE_UACCESS */ + +#endif /* __LINUX_UACCESS_H__ */ diff --git a/mm/filemap.c b/mm/filemap.c index 368678c2d53107d3cabf3c98a15bccaa37dfd3f5..807a463fd5ed69b1d39d5268dddf92464c81ed0f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -38,7 +39,6 @@ */ #include /* for generic_osync_inode */ -#include #include static ssize_t @@ -1902,7 +1902,7 @@ __filemap_copy_from_user_iovec(char *vaddr, int copy = min(bytes, iov->iov_len - base); base = 0; - left = __copy_from_user_inatomic(vaddr, buf, copy); + left = __copy_from_user_inatomic_nocache(vaddr, buf, copy); copied += copy; bytes -= copy; vaddr += copy; diff --git a/mm/filemap.h b/mm/filemap.h index 13793ba0ce172714355abee0ddd3291622370461..5683cde22055f39e86c5bb9f670e615007c24b7c 100644 --- a/mm/filemap.h +++ b/mm/filemap.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include size_t __filemap_copy_from_user_iovec(char *vaddr, @@ -34,13 +34,13 @@ filemap_copy_from_user(struct page *page, unsigned long offset, int left; kaddr = kmap_atomic(page, KM_USER0); - left = __copy_from_user_inatomic(kaddr + offset, buf, bytes); + left = __copy_from_user_inatomic_nocache(kaddr + offset, buf, bytes); kunmap_atomic(kaddr, KM_USER0); if (left != 0) { /* Do it the slow way */ kaddr = kmap(page); - left = __copy_from_user(kaddr + offset, buf, bytes); + left = __copy_from_user_nocache(kaddr + offset, buf, bytes); kunmap(page); } return bytes - left;