diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c07f492b871a8bf0f47cfb7be03a004dd740c2c1..6ca22706cd64e962c4918d148ea3c528bb8ed33f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -60,6 +60,7 @@ config X86 select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 + select ARCH_HAS_UACCESS_MCSAFE if X86_64 select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 192e4d2f9efcff90a0d44cc7db3bb7a2a4f71a37..c6dd1d9800818e1e847e4219e9a854c9f88c9542 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -72,6 +72,9 @@ config EARLY_PRINTK_USB_XDBC You should normally say N here, unless you want to debug early crashes or need a very simple printk logging facility. +config MCSAFE_TEST + def_bool n + config X86_PTDUMP_CORE def_bool n diff --git a/arch/x86/include/asm/mcsafe_test.h b/arch/x86/include/asm/mcsafe_test.h new file mode 100644 index 0000000000000000000000000000000000000000..eb59804b6201c35c85f66230522c663904ffada1 --- /dev/null +++ b/arch/x86/include/asm/mcsafe_test.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MCSAFE_TEST_H_ +#define _MCSAFE_TEST_H_ + +#ifndef __ASSEMBLY__ +#ifdef CONFIG_MCSAFE_TEST +extern unsigned long mcsafe_test_src; +extern unsigned long mcsafe_test_dst; + +static inline void mcsafe_inject_src(void *addr) +{ + if (addr) + mcsafe_test_src = (unsigned long) addr; + else + mcsafe_test_src = ~0UL; +} + +static inline void mcsafe_inject_dst(void *addr) +{ + if (addr) + mcsafe_test_dst = (unsigned long) addr; + else + mcsafe_test_dst = ~0UL; +} +#else /* CONFIG_MCSAFE_TEST */ +static inline void mcsafe_inject_src(void *addr) +{ +} + +static inline void mcsafe_inject_dst(void *addr) +{ +} +#endif /* CONFIG_MCSAFE_TEST */ + +#else /* __ASSEMBLY__ */ +#include + +#ifdef CONFIG_MCSAFE_TEST +.macro MCSAFE_TEST_CTL + .pushsection .data + .align 8 + .globl mcsafe_test_src + mcsafe_test_src: + .quad 0 + EXPORT_SYMBOL_GPL(mcsafe_test_src) + .globl mcsafe_test_dst + mcsafe_test_dst: + .quad 0 + EXPORT_SYMBOL_GPL(mcsafe_test_dst) + .popsection +.endm + +.macro MCSAFE_TEST_SRC reg count target + leaq \count(\reg), %r9 + cmp mcsafe_test_src, %r9 + ja \target +.endm + +.macro MCSAFE_TEST_DST reg count target + leaq \count(\reg), %r9 + cmp mcsafe_test_dst, %r9 + ja \target +.endm +#else +.macro MCSAFE_TEST_CTL +.endm + +.macro MCSAFE_TEST_SRC reg count target +.endm + +.macro MCSAFE_TEST_DST reg count target +.endm +#endif /* CONFIG_MCSAFE_TEST */ +#endif /* __ASSEMBLY__ */ +#endif /* _MCSAFE_TEST_H_ */ diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 533f74c300c254c7008b508e8262c341a13c90e5..d33f92b9fa228d91a5c5356b4afa5e6fbdf98832 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -116,7 +116,8 @@ int strcmp(const char *cs, const char *ct); #endif #define __HAVE_ARCH_MEMCPY_MCSAFE 1 -__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt); +__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src, + size_t cnt); DECLARE_STATIC_KEY_FALSE(mcsafe_key); /** @@ -131,14 +132,15 @@ DECLARE_STATIC_KEY_FALSE(mcsafe_key); * actually do machine check recovery. Everyone else can just * use memcpy(). * - * Return 0 for success, -EFAULT for fail + * Return 0 for success, or number of bytes not copied if there was an + * exception. */ -static __always_inline __must_check int +static __always_inline __must_check unsigned long memcpy_mcsafe(void *dst, const void *src, size_t cnt) { #ifdef CONFIG_X86_MCE if (static_branch_unlikely(&mcsafe_key)) - return memcpy_mcsafe_unrolled(dst, src, cnt); + return __memcpy_mcsafe(dst, src, cnt); else #endif memcpy(dst, src, cnt); diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 62546b3a398eb9aa3c06a983a6520ed2e3ef731d..62acb613114b2322088083f7a9ccc85495a5afa4 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -46,6 +46,17 @@ copy_user_generic(void *to, const void *from, unsigned len) return ret; } +static __always_inline __must_check unsigned long +copy_to_user_mcsafe(void *to, const void *from, unsigned len) +{ + unsigned long ret; + + __uaccess_begin(); + ret = memcpy_mcsafe(to, from, len); + __uaccess_end(); + return ret; +} + static __always_inline __must_check unsigned long raw_copy_from_user(void *dst, const void __user *src, unsigned long size) { @@ -194,4 +205,7 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) unsigned long copy_user_handle_tail(char *to, char *from, unsigned len); +unsigned long +mcsafe_handle_tail(char *to, char *from, unsigned len); + #endif /* _ASM_X86_UACCESS_64_H */ diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 9a53a06e5a3efcb62f9563a6161fd98bbc22d617..298ef1479240b6b899fb4185a5204d5ad56b8785 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -183,12 +184,15 @@ ENTRY(memcpy_orig) ENDPROC(memcpy_orig) #ifndef CONFIG_UML + +MCSAFE_TEST_CTL + /* - * memcpy_mcsafe_unrolled - memory copy with machine check exception handling + * __memcpy_mcsafe - memory copy with machine check exception handling * Note that we only catch machine checks when reading the source addresses. * Writes to target are posted and don't generate machine checks. */ -ENTRY(memcpy_mcsafe_unrolled) +ENTRY(__memcpy_mcsafe) cmpl $8, %edx /* Less than 8 bytes? Go to byte copy loop */ jb .L_no_whole_words @@ -204,58 +208,33 @@ ENTRY(memcpy_mcsafe_unrolled) subl $8, %ecx negl %ecx subl %ecx, %edx -.L_copy_leading_bytes: +.L_read_leading_bytes: movb (%rsi), %al + MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes + MCSAFE_TEST_DST %rdi 1 .E_leading_bytes +.L_write_leading_bytes: movb %al, (%rdi) incq %rsi incq %rdi decl %ecx - jnz .L_copy_leading_bytes + jnz .L_read_leading_bytes .L_8byte_aligned: - /* Figure out how many whole cache lines (64-bytes) to copy */ - movl %edx, %ecx - andl $63, %edx - shrl $6, %ecx - jz .L_no_whole_cache_lines - - /* Loop copying whole cache lines */ -.L_cache_w0: movq (%rsi), %r8 -.L_cache_w1: movq 1*8(%rsi), %r9 -.L_cache_w2: movq 2*8(%rsi), %r10 -.L_cache_w3: movq 3*8(%rsi), %r11 - movq %r8, (%rdi) - movq %r9, 1*8(%rdi) - movq %r10, 2*8(%rdi) - movq %r11, 3*8(%rdi) -.L_cache_w4: movq 4*8(%rsi), %r8 -.L_cache_w5: movq 5*8(%rsi), %r9 -.L_cache_w6: movq 6*8(%rsi), %r10 -.L_cache_w7: movq 7*8(%rsi), %r11 - movq %r8, 4*8(%rdi) - movq %r9, 5*8(%rdi) - movq %r10, 6*8(%rdi) - movq %r11, 7*8(%rdi) - leaq 64(%rsi), %rsi - leaq 64(%rdi), %rdi - decl %ecx - jnz .L_cache_w0 - - /* Are there any trailing 8-byte words? */ -.L_no_whole_cache_lines: movl %edx, %ecx andl $7, %edx shrl $3, %ecx jz .L_no_whole_words - /* Copy trailing words */ -.L_copy_trailing_words: +.L_read_words: movq (%rsi), %r8 - mov %r8, (%rdi) - leaq 8(%rsi), %rsi - leaq 8(%rdi), %rdi + MCSAFE_TEST_SRC %rsi 8 .E_read_words + MCSAFE_TEST_DST %rdi 8 .E_write_words +.L_write_words: + movq %r8, (%rdi) + addq $8, %rsi + addq $8, %rdi decl %ecx - jnz .L_copy_trailing_words + jnz .L_read_words /* Any trailing bytes? */ .L_no_whole_words: @@ -264,38 +243,55 @@ ENTRY(memcpy_mcsafe_unrolled) /* Copy trailing bytes */ movl %edx, %ecx -.L_copy_trailing_bytes: +.L_read_trailing_bytes: movb (%rsi), %al + MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes + MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes +.L_write_trailing_bytes: movb %al, (%rdi) incq %rsi incq %rdi decl %ecx - jnz .L_copy_trailing_bytes + jnz .L_read_trailing_bytes /* Copy successful. Return zero */ .L_done_memcpy_trap: xorq %rax, %rax ret -ENDPROC(memcpy_mcsafe_unrolled) -EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) +ENDPROC(__memcpy_mcsafe) +EXPORT_SYMBOL_GPL(__memcpy_mcsafe) .section .fixup, "ax" - /* Return -EFAULT for any failure */ -.L_memcpy_mcsafe_fail: - mov $-EFAULT, %rax + /* + * Return number of bytes not copied for any failure. Note that + * there is no "tail" handling since the source buffer is 8-byte + * aligned and poison is cacheline aligned. + */ +.E_read_words: + shll $3, %ecx +.E_leading_bytes: + addl %edx, %ecx +.E_trailing_bytes: + mov %ecx, %eax ret + /* + * For write fault handling, given the destination is unaligned, + * we handle faults on multi-byte writes with a byte-by-byte + * copy up to the write-protected page. + */ +.E_write_words: + shll $3, %ecx + addl %edx, %ecx + movl %ecx, %edx + jmp mcsafe_handle_tail + .previous - _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) + _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) + _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) + _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) + _ASM_EXTABLE(.L_write_words, .E_write_words) + _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) #endif diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 75d3776123cc0f8ee4d79dc2c07e17c431a950dd..7ebc9901dd05986d72c06564b72c9ca5c3788f58 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -75,6 +75,27 @@ copy_user_handle_tail(char *to, char *from, unsigned len) return len; } +/* + * Similar to copy_user_handle_tail, probe for the write fault point, + * but reuse __memcpy_mcsafe in case a new read error is encountered. + * clac() is handled in _copy_to_iter_mcsafe(). + */ +__visible unsigned long +mcsafe_handle_tail(char *to, char *from, unsigned len) +{ + for (; len; --len, to++, from++) { + /* + * Call the assembly routine back directly since + * memcpy_mcsafe() may silently fallback to memcpy. + */ + unsigned long rem = __memcpy_mcsafe(to, from, 1); + + if (rem) + break; + } + return len; +} + #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE /** * clean_cache_range - write back a cache range with CLWB diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 60d01b5d2a6710e7b43bb340db1bdc24d64d2775..88e77b7f0c4bbcc9703791d9b04144bf4c67ce4c 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -287,6 +287,16 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, } EXPORT_SYMBOL_GPL(dax_copy_from_iter); +size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i) +{ + if (!dax_alive(dax_dev)) + return 0; + + return dax_dev->ops->copy_to_iter(dax_dev, pgoff, addr, bytes, i); +} +EXPORT_SYMBOL_GPL(dax_copy_to_iter); + #ifdef CONFIG_ARCH_HAS_PMEM_API void arch_wb_cache_pmem(void *addr, size_t size); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size) diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 775c06d953b7a0abadef32ad05c37d168651eacf..d10964d41fd7799cb53c11d0fad14d7620b39140 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -185,9 +185,24 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } +static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + struct linear_c *lc = ti->private; + struct block_device *bdev = lc->dev->bdev; + struct dax_device *dax_dev = lc->dev->dax_dev; + sector_t dev_sector, sector = pgoff * PAGE_SECTORS; + + dev_sector = linear_map_sector(ti, sector); + if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) + return 0; + return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); +} + #else #define linear_dax_direct_access NULL #define linear_dax_copy_from_iter NULL +#define linear_dax_copy_to_iter NULL #endif static struct target_type linear_target = { @@ -204,6 +219,7 @@ static struct target_type linear_target = { .iterate_devices = linear_iterate_devices, .direct_access = linear_dax_direct_access, .dax_copy_from_iter = linear_dax_copy_from_iter, + .dax_copy_to_iter = linear_dax_copy_to_iter, }; int __init dm_linear_init(void) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index c90c7c08a77fab607181a2c0ccf77e0a51e439ed..9ea2b0291f20d781b355cca994192a595c6a1b39 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -962,9 +962,23 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti, dax_copy: return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); } + +static size_t log_writes_dax_copy_to_iter(struct dm_target *ti, + pgoff_t pgoff, void *addr, size_t bytes, + struct iov_iter *i) +{ + struct log_writes_c *lc = ti->private; + sector_t sector = pgoff * PAGE_SECTORS; + + if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) + return 0; + return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); +} + #else #define log_writes_dax_direct_access NULL #define log_writes_dax_copy_from_iter NULL +#define log_writes_dax_copy_to_iter NULL #endif static struct target_type log_writes_target = { @@ -982,6 +996,7 @@ static struct target_type log_writes_target = { .io_hints = log_writes_io_hints, .direct_access = log_writes_dax_direct_access, .dax_copy_from_iter = log_writes_dax_copy_from_iter, + .dax_copy_to_iter = log_writes_dax_copy_to_iter, }; static int __init dm_log_writes_init(void) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index fe7fb9b1aec34602ac677c07a9c0ae4134191813..8547d75943389134af94dfb5aff8cab5834415da 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -354,9 +354,29 @@ static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } +static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + sector_t dev_sector, sector = pgoff * PAGE_SECTORS; + struct stripe_c *sc = ti->private; + struct dax_device *dax_dev; + struct block_device *bdev; + uint32_t stripe; + + stripe_map_sector(sc, sector, &stripe, &dev_sector); + dev_sector += sc->stripe[stripe].physical_start; + dax_dev = sc->stripe[stripe].dev->dax_dev; + bdev = sc->stripe[stripe].dev->bdev; + + if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) + return 0; + return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); +} + #else #define stripe_dax_direct_access NULL #define stripe_dax_copy_from_iter NULL +#define stripe_dax_copy_to_iter NULL #endif /* @@ -478,6 +498,7 @@ static struct target_type stripe_target = { .io_hints = stripe_io_hints, .direct_access = stripe_dax_direct_access, .dax_copy_from_iter = stripe_dax_copy_from_iter, + .dax_copy_to_iter = stripe_dax_copy_to_iter, }; int __init dm_stripe_init(void) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0a7b0107ca78d8ed967e546f9111b0a57e987421..6752f1c25258a89c19503e5312e210788d429d03 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1089,6 +1089,30 @@ static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, return ret; } +static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + struct mapped_device *md = dax_get_private(dax_dev); + sector_t sector = pgoff * PAGE_SECTORS; + struct dm_target *ti; + long ret = 0; + int srcu_idx; + + ti = dm_dax_get_live_target(md, sector, &srcu_idx); + + if (!ti) + goto out; + if (!ti->type->dax_copy_to_iter) { + ret = copy_to_iter(addr, bytes, i); + goto out; + } + ret = ti->type->dax_copy_to_iter(ti, pgoff, addr, bytes, i); + out: + dm_put_live_table(md, srcu_idx); + + return ret; +} + /* * A target may call dm_accept_partial_bio only from the map routine. It is * allowed for all bio types except REQ_PREFLUSH and REQ_OP_ZONE_RESET. @@ -3134,6 +3158,7 @@ static const struct block_device_operations dm_blk_dops = { static const struct dax_operations dm_dax_ops = { .direct_access = dm_dax_direct_access, .copy_from_iter = dm_dax_copy_from_iter, + .copy_to_iter = dm_dax_copy_to_iter, }; /* diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 30852270484fb3a2138ecf58df8b0d1ad16da4ba..2e96b34bc936bf89f6a9a65d983e4bbf3a673fbd 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -276,7 +276,8 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, if (rw == READ) { if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) return -EIO; - return memcpy_mcsafe(buf, nsio->addr + offset, size); + if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0) + return -EIO; } if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) { diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index bf2dd2a4a5e60050f092ee3c2f14b01b76781b53..68940356cad3f100f4cfbdd325d42235ea3c5da4 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -101,15 +101,15 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, void *pmem_addr, unsigned int len) { unsigned int chunk; - int rc; + unsigned long rem; void *mem; while (len) { mem = kmap_atomic(page); chunk = min_t(unsigned int, len, PAGE_SIZE); - rc = memcpy_mcsafe(mem + off, pmem_addr, chunk); + rem = memcpy_mcsafe(mem + off, pmem_addr, chunk); kunmap_atomic(mem); - if (rc) + if (rem) return BLK_STS_IOERR; len -= chunk; off = 0; @@ -259,9 +259,16 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, return copy_from_iter_flushcache(addr, bytes, i); } +static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + return copy_to_iter_mcsafe(addr, bytes, i); +} + static const struct dax_operations pmem_dax_ops = { .direct_access = pmem_dax_direct_access, .copy_from_iter = pmem_copy_from_iter, + .copy_to_iter = pmem_copy_to_iter, }; static const struct attribute_group *pmem_attribute_groups[] = { diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 0a312e4502075b44fd7664db38dcdf7a048909e6..29024492b8ede9fed0eb1a94b28bd319ca001136 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -51,9 +51,16 @@ static size_t dcssblk_dax_copy_from_iter(struct dax_device *dax_dev, return copy_from_iter(addr, bytes, i); } +static size_t dcssblk_dax_copy_to_iter(struct dax_device *dax_dev, + pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) +{ + return copy_to_iter(addr, bytes, i); +} + static const struct dax_operations dcssblk_dax_ops = { .direct_access = dcssblk_dax_direct_access, .copy_from_iter = dcssblk_dax_copy_from_iter, + .copy_to_iter = dcssblk_dax_copy_to_iter, }; struct dcssblk_dev_info { diff --git a/fs/dax.c b/fs/dax.c index 31e9f51ac917e22f5ee4bfdecdb83bc9527f686d..1f5f14a2ce4c5bcac373b6c36998d043b88fee3b 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1082,6 +1082,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, struct iov_iter *iter = data; loff_t end = pos + length, done = 0; ssize_t ret = 0; + size_t xfer; int id; if (iov_iter_rw(iter) == READ) { @@ -1145,18 +1146,20 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, * vfs_write(), depending on which operation we are doing. */ if (iov_iter_rw(iter) == WRITE) - map_len = dax_copy_from_iter(dax_dev, pgoff, kaddr, + xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr, map_len, iter); else - map_len = copy_to_iter(kaddr, map_len, iter); - if (map_len <= 0) { - ret = map_len ? map_len : -EFAULT; - break; - } + xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr, + map_len, iter); - pos += map_len; - length -= map_len; - done += map_len; + pos += xfer; + length -= xfer; + done += xfer; + + if (xfer == 0) + ret = -EFAULT; + if (xfer < map_len) + break; } dax_read_unlock(id); diff --git a/include/linux/dax.h b/include/linux/dax.h index 25bab6abb6952397041c74fd46f5ad81c7e2e6d7..b51db4264c83bd4aa7a0e886df6efd9e2d1676c9 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -20,6 +20,9 @@ struct dax_operations { /* copy_from_iter: required operation for fs-dax direct-i/o */ size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, struct iov_iter *); + /* copy_to_iter: required operation for fs-dax direct-i/o */ + size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t, + struct iov_iter *); }; extern struct attribute_group dax_attribute_group; @@ -125,6 +128,8 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); +size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 31fef7c341859be03f9170823d84430e1bf7072e..6fb0808e87c81af9c4ad840cde86b892f96ee8a3 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -133,7 +133,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); */ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); -typedef size_t (*dm_dax_copy_from_iter_fn)(struct dm_target *ti, pgoff_t pgoff, +typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); #define PAGE_SECTORS (PAGE_SIZE / 512) @@ -184,7 +184,8 @@ struct target_type { dm_iterate_devices_fn iterate_devices; dm_io_hints_fn io_hints; dm_dax_direct_access_fn direct_access; - dm_dax_copy_from_iter_fn dax_copy_from_iter; + dm_dax_copy_iter_fn dax_copy_from_iter; + dm_dax_copy_iter_fn dax_copy_to_iter; /* For internal device-mapper use. */ struct list_head list; diff --git a/include/linux/string.h b/include/linux/string.h index dd39a690c841e4abc05516b6f83151e31809da9c..4a5a0eb7df511a5b1826346a3150d7050b48d0dd 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -147,8 +147,8 @@ extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); #endif #ifndef __HAVE_ARCH_MEMCPY_MCSAFE -static inline __must_check int memcpy_mcsafe(void *dst, const void *src, - size_t cnt) +static inline __must_check unsigned long memcpy_mcsafe(void *dst, + const void *src, size_t cnt) { memcpy(dst, src, cnt); return 0; diff --git a/include/linux/uio.h b/include/linux/uio.h index e67e12adb1362da1e8ef729b914f6c5e05979efa..409c845d4cd3dce5762c27a2e7e7c1c5c288f134 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -154,6 +154,12 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); #define _copy_from_iter_flushcache _copy_from_iter_nocache #endif +#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE +size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i); +#else +#define _copy_to_iter_mcsafe _copy_to_iter +#endif + static __always_inline __must_check size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) { @@ -163,6 +169,15 @@ size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) return _copy_from_iter_flushcache(addr, bytes, i); } +static __always_inline __must_check +size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i) +{ + if (unlikely(!check_copy_size(addr, bytes, false))) + return 0; + else + return _copy_to_iter_mcsafe(addr, bytes, i); +} + size_t iov_iter_zero(size_t bytes, struct iov_iter *); unsigned long iov_iter_alignment(const struct iov_iter *i); unsigned long iov_iter_gap_alignment(const struct iov_iter *i); diff --git a/lib/Kconfig b/lib/Kconfig index 5fe577673b985d91c68d8a907cebc4641290e9af..907f6e4f1cf27d8fc67424f7046a4b3c4a07cfe6 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -586,6 +586,9 @@ config ARCH_HAS_PMEM_API config ARCH_HAS_UACCESS_FLUSHCACHE bool +config ARCH_HAS_UACCESS_MCSAFE + bool + config STACKDEPOT bool select STACKTRACE diff --git a/lib/iov_iter.c b/lib/iov_iter.c index fdae394172fa78efaf3637266492c5a5823ce41a..7e43cd54c84ca3da2d77b02e7112c69386428a2b 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -573,6 +573,67 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) } EXPORT_SYMBOL(_copy_to_iter); +#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE +static int copyout_mcsafe(void __user *to, const void *from, size_t n) +{ + if (access_ok(VERIFY_WRITE, to, n)) { + kasan_check_read(from, n); + n = copy_to_user_mcsafe((__force void *) to, from, n); + } + return n; +} + +static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, + const char *from, size_t len) +{ + unsigned long ret; + char *to; + + to = kmap_atomic(page); + ret = memcpy_mcsafe(to + offset, from, len); + kunmap_atomic(to); + + return ret; +} + +size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) +{ + const char *from = addr; + unsigned long rem, curr_addr, s_addr = (unsigned long) addr; + + if (unlikely(i->type & ITER_PIPE)) { + WARN_ON(1); + return 0; + } + if (iter_is_iovec(i)) + might_fault(); + iterate_and_advance(i, bytes, v, + copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), + ({ + rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset, + (from += v.bv_len) - v.bv_len, v.bv_len); + if (rem) { + curr_addr = (unsigned long) from; + bytes = curr_addr - s_addr - rem; + return bytes; + } + }), + ({ + rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, + v.iov_len); + if (rem) { + curr_addr = (unsigned long) from; + bytes = curr_addr - s_addr - rem; + return bytes; + } + }) + ) + + return bytes; +} +EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe); +#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */ + size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 4ea385be528fc80080bb4b24ed9639896ba7d0c4..a8fb63edcf8948df54b6aaa2f225def65d5a705f 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -29,6 +29,8 @@ #include "nfit_test.h" #include "../watermark.h" +#include + /* * Generate an NFIT table to describe the following topology: * @@ -2681,6 +2683,107 @@ static struct platform_driver nfit_test_driver = { .id_table = nfit_test_id, }; +static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); + +enum INJECT { + INJECT_NONE, + INJECT_SRC, + INJECT_DST, +}; + +static void mcsafe_test_init(char *dst, char *src, size_t size) +{ + size_t i; + + memset(dst, 0xff, size); + for (i = 0; i < size; i++) + src[i] = (char) i; +} + +static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src, + size_t size, unsigned long rem) +{ + size_t i; + + for (i = 0; i < size - rem; i++) + if (dst[i] != (unsigned char) i) { + pr_info_once("%s:%d: offset: %zd got: %#x expect: %#x\n", + __func__, __LINE__, i, dst[i], + (unsigned char) i); + return false; + } + for (i = size - rem; i < size; i++) + if (dst[i] != 0xffU) { + pr_info_once("%s:%d: offset: %zd got: %#x expect: 0xff\n", + __func__, __LINE__, i, dst[i]); + return false; + } + return true; +} + +void mcsafe_test(void) +{ + char *inject_desc[] = { "none", "source", "destination" }; + enum INJECT inj; + + if (IS_ENABLED(CONFIG_MCSAFE_TEST)) { + pr_info("%s: run...\n", __func__); + } else { + pr_info("%s: disabled, skip.\n", __func__); + return; + } + + for (inj = INJECT_NONE; inj <= INJECT_DST; inj++) { + int i; + + pr_info("%s: inject: %s\n", __func__, inject_desc[inj]); + for (i = 0; i < 512; i++) { + unsigned long expect, rem; + void *src, *dst; + bool valid; + + switch (inj) { + case INJECT_NONE: + mcsafe_inject_src(NULL); + mcsafe_inject_dst(NULL); + dst = &mcsafe_buf[2048]; + src = &mcsafe_buf[1024 - i]; + expect = 0; + break; + case INJECT_SRC: + mcsafe_inject_src(&mcsafe_buf[1024]); + mcsafe_inject_dst(NULL); + dst = &mcsafe_buf[2048]; + src = &mcsafe_buf[1024 - i]; + expect = 512 - i; + break; + case INJECT_DST: + mcsafe_inject_src(NULL); + mcsafe_inject_dst(&mcsafe_buf[2048]); + dst = &mcsafe_buf[2048 - i]; + src = &mcsafe_buf[1024]; + expect = 512 - i; + break; + } + + mcsafe_test_init(dst, src, 512); + rem = __memcpy_mcsafe(dst, src, 512); + valid = mcsafe_test_validate(dst, src, 512, expect); + if (rem == expect && valid) + continue; + pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n", + __func__, + ((unsigned long) dst) & ~PAGE_MASK, + ((unsigned long ) src) & ~PAGE_MASK, + 512, i, rem, valid ? "valid" : "bad", + expect); + } + } + + mcsafe_inject_src(NULL); + mcsafe_inject_dst(NULL); +} + static __init int nfit_test_init(void) { int rc, i; @@ -2689,6 +2792,7 @@ static __init int nfit_test_init(void) libnvdimm_test(); acpi_nfit_test(); device_dax_test(); + mcsafe_test(); nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);