提交 2f718ffc 编写于 作者: N Nick Piggin 提交者: Linus Torvalds

mm: buffered write iterator

Add an iterator data structure to operate over an iovec.  Add usercopy
operators needed by generic_file_buffered_write, and convert that function
over.
Signed-off-by: NNick Piggin <npiggin@suse.de>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 08291429
...@@ -401,6 +401,39 @@ struct page; ...@@ -401,6 +401,39 @@ struct page;
struct address_space; struct address_space;
struct writeback_control; struct writeback_control;
struct iov_iter {
const struct iovec *iov;
unsigned long nr_segs;
size_t iov_offset;
size_t count;
};
size_t iov_iter_copy_from_user_atomic(struct page *page,
struct iov_iter *i, unsigned long offset, size_t bytes);
size_t iov_iter_copy_from_user(struct page *page,
struct iov_iter *i, unsigned long offset, size_t bytes);
void iov_iter_advance(struct iov_iter *i, size_t bytes);
int iov_iter_fault_in_readable(struct iov_iter *i);
size_t iov_iter_single_seg_count(struct iov_iter *i);
static inline void iov_iter_init(struct iov_iter *i,
const struct iovec *iov, unsigned long nr_segs,
size_t count, size_t written)
{
i->iov = iov;
i->nr_segs = nr_segs;
i->iov_offset = 0;
i->count = count + written;
iov_iter_advance(i, written);
}
static inline size_t iov_iter_count(struct iov_iter *i)
{
return i->count;
}
struct address_space_operations { struct address_space_operations {
int (*writepage)(struct page *page, struct writeback_control *wbc); int (*writepage)(struct page *page, struct writeback_control *wbc);
int (*readpage)(struct file *, struct page *); int (*readpage)(struct file *, struct page *);
......
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
#include <linux/security.h> #include <linux/security.h>
#include <linux/syscalls.h> #include <linux/syscalls.h>
#include <linux/cpuset.h> #include <linux/cpuset.h>
#include "filemap.h" #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
#include "internal.h" #include "internal.h"
/* /*
...@@ -1635,8 +1635,7 @@ int remove_suid(struct dentry *dentry) ...@@ -1635,8 +1635,7 @@ int remove_suid(struct dentry *dentry)
} }
EXPORT_SYMBOL(remove_suid); EXPORT_SYMBOL(remove_suid);
size_t static size_t __iovec_copy_from_user_inatomic(char *vaddr,
__filemap_copy_from_user_iovec_inatomic(char *vaddr,
const struct iovec *iov, size_t base, size_t bytes) const struct iovec *iov, size_t base, size_t bytes)
{ {
size_t copied = 0, left = 0; size_t copied = 0, left = 0;
...@@ -1658,6 +1657,110 @@ __filemap_copy_from_user_iovec_inatomic(char *vaddr, ...@@ -1658,6 +1657,110 @@ __filemap_copy_from_user_iovec_inatomic(char *vaddr,
return copied - left; return copied - left;
} }
/*
* Copy as much as we can into the page and return the number of bytes which
* were sucessfully copied. If a fault is encountered then return the number of
* bytes which were copied.
*/
size_t iov_iter_copy_from_user_atomic(struct page *page,
struct iov_iter *i, unsigned long offset, size_t bytes)
{
char *kaddr;
size_t copied;
BUG_ON(!in_atomic());
kaddr = kmap_atomic(page, KM_USER0);
if (likely(i->nr_segs == 1)) {
int left;
char __user *buf = i->iov->iov_base + i->iov_offset;
left = __copy_from_user_inatomic_nocache(kaddr + offset,
buf, bytes);
copied = bytes - left;
} else {
copied = __iovec_copy_from_user_inatomic(kaddr + offset,
i->iov, i->iov_offset, bytes);
}
kunmap_atomic(kaddr, KM_USER0);
return copied;
}
/*
* This has the same sideeffects and return value as
* iov_iter_copy_from_user_atomic().
* The difference is that it attempts to resolve faults.
* Page must not be locked.
*/
size_t iov_iter_copy_from_user(struct page *page,
struct iov_iter *i, unsigned long offset, size_t bytes)
{
char *kaddr;
size_t copied;
kaddr = kmap(page);
if (likely(i->nr_segs == 1)) {
int left;
char __user *buf = i->iov->iov_base + i->iov_offset;
left = __copy_from_user_nocache(kaddr + offset, buf, bytes);
copied = bytes - left;
} else {
copied = __iovec_copy_from_user_inatomic(kaddr + offset,
i->iov, i->iov_offset, bytes);
}
kunmap(page);
return copied;
}
static void __iov_iter_advance_iov(struct iov_iter *i, size_t bytes)
{
if (likely(i->nr_segs == 1)) {
i->iov_offset += bytes;
} else {
const struct iovec *iov = i->iov;
size_t base = i->iov_offset;
while (bytes) {
int copy = min(bytes, iov->iov_len - base);
bytes -= copy;
base += copy;
if (iov->iov_len == base) {
iov++;
base = 0;
}
}
i->iov = iov;
i->iov_offset = base;
}
}
void iov_iter_advance(struct iov_iter *i, size_t bytes)
{
BUG_ON(i->count < bytes);
__iov_iter_advance_iov(i, bytes);
i->count -= bytes;
}
int iov_iter_fault_in_readable(struct iov_iter *i)
{
size_t seglen = min(i->iov->iov_len - i->iov_offset, i->count);
char __user *buf = i->iov->iov_base + i->iov_offset;
return fault_in_pages_readable(buf, seglen);
}
/*
* Return the count of just the current iov_iter segment.
*/
size_t iov_iter_single_seg_count(struct iov_iter *i)
{
const struct iovec *iov = i->iov;
if (i->nr_segs == 1)
return i->count;
else
return min(i->count, iov->iov_len - i->iov_offset);
}
/* /*
* Performs necessary checks before doing a write * Performs necessary checks before doing a write
* *
...@@ -1816,30 +1919,22 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -1816,30 +1919,22 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
const struct address_space_operations *a_ops = mapping->a_ops; const struct address_space_operations *a_ops = mapping->a_ops;
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
long status = 0; long status = 0;
const struct iovec *cur_iov = iov; /* current iovec */ struct iov_iter i;
size_t iov_offset = 0; /* offset in the current iovec */
char __user *buf;
/* iov_iter_init(&i, iov, nr_segs, count, written);
* handle partial DIO write. Adjust cur_iov if needed.
*/
filemap_set_next_iovec(&cur_iov, nr_segs, &iov_offset, written);
do { do {
struct page *src_page; struct page *src_page;
struct page *page; struct page *page;
pgoff_t index; /* Pagecache index for current page */ pgoff_t index; /* Pagecache index for current page */
unsigned long offset; /* Offset into pagecache page */ unsigned long offset; /* Offset into pagecache page */
unsigned long seglen; /* Bytes remaining in current iovec */
unsigned long bytes; /* Bytes to write to page */ unsigned long bytes; /* Bytes to write to page */
size_t copied; /* Bytes copied from user */ size_t copied; /* Bytes copied from user */
buf = cur_iov->iov_base + iov_offset;
offset = (pos & (PAGE_CACHE_SIZE - 1)); offset = (pos & (PAGE_CACHE_SIZE - 1));
index = pos >> PAGE_CACHE_SHIFT; index = pos >> PAGE_CACHE_SHIFT;
bytes = PAGE_CACHE_SIZE - offset; bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
if (bytes > count) iov_iter_count(&i));
bytes = count;
/* /*
* a non-NULL src_page indicates that we're doing the * a non-NULL src_page indicates that we're doing the
...@@ -1847,10 +1942,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -1847,10 +1942,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
*/ */
src_page = NULL; src_page = NULL;
seglen = cur_iov->iov_len - iov_offset;
if (seglen > bytes)
seglen = bytes;
/* /*
* Bring in the user page that we will copy from _first_. * Bring in the user page that we will copy from _first_.
* Otherwise there's a nasty deadlock on copying from the * Otherwise there's a nasty deadlock on copying from the
...@@ -1861,7 +1952,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -1861,7 +1952,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
* to check that the address is actually valid, when atomic * to check that the address is actually valid, when atomic
* usercopies are used, below. * usercopies are used, below.
*/ */
if (unlikely(fault_in_pages_readable(buf, seglen))) { if (unlikely(iov_iter_fault_in_readable(&i))) {
status = -EFAULT; status = -EFAULT;
break; break;
} }
...@@ -1892,8 +1983,8 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -1892,8 +1983,8 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
* same reason as we can't take a page fault with a * same reason as we can't take a page fault with a
* page locked (as explained below). * page locked (as explained below).
*/ */
copied = filemap_copy_from_user(src_page, offset, copied = iov_iter_copy_from_user(src_page, &i,
cur_iov, nr_segs, iov_offset, bytes); offset, bytes);
if (unlikely(copied == 0)) { if (unlikely(copied == 0)) {
status = -EFAULT; status = -EFAULT;
page_cache_release(page); page_cache_release(page);
...@@ -1939,8 +2030,8 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -1939,8 +2030,8 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
* really matter. * really matter.
*/ */
pagefault_disable(); pagefault_disable();
copied = filemap_copy_from_user_atomic(page, offset, copied = iov_iter_copy_from_user_atomic(page, &i,
cur_iov, nr_segs, iov_offset, bytes); offset, bytes);
pagefault_enable(); pagefault_enable();
} else { } else {
void *src, *dst; void *src, *dst;
...@@ -1965,10 +2056,9 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -1965,10 +2056,9 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
if (src_page) if (src_page)
page_cache_release(src_page); page_cache_release(src_page);
iov_iter_advance(&i, copied);
written += copied; written += copied;
count -= copied;
pos += copied; pos += copied;
filemap_set_next_iovec(&cur_iov, nr_segs, &iov_offset, copied);
balance_dirty_pages_ratelimited(mapping); balance_dirty_pages_ratelimited(mapping);
cond_resched(); cond_resched();
...@@ -1992,7 +2082,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -1992,7 +2082,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
continue; continue;
else else
break; break;
} while (count); } while (iov_iter_count(&i));
*ppos = pos; *ppos = pos;
/* /*
......
/*
* linux/mm/filemap.h
*
* Copyright (C) 1994-1999 Linus Torvalds
*/
#ifndef __FILEMAP_H
#define __FILEMAP_H
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/uio.h>
#include <linux/uaccess.h>
size_t
__filemap_copy_from_user_iovec_inatomic(char *vaddr,
const struct iovec *iov,
size_t base,
size_t bytes);
/*
* Copy as much as we can into the page and return the number of bytes which
* were sucessfully copied. If a fault is encountered then return the number of
* bytes which were copied.
*/
static inline size_t
filemap_copy_from_user_atomic(struct page *page, unsigned long offset,
const struct iovec *iov, unsigned long nr_segs,
size_t base, size_t bytes)
{
char *kaddr;
size_t copied;
kaddr = kmap_atomic(page, KM_USER0);
if (likely(nr_segs == 1)) {
int left;
char __user *buf = iov->iov_base + base;
left = __copy_from_user_inatomic_nocache(kaddr + offset,
buf, bytes);
copied = bytes - left;
} else {
copied = __filemap_copy_from_user_iovec_inatomic(kaddr + offset,
iov, base, bytes);
}
kunmap_atomic(kaddr, KM_USER0);
return copied;
}
/*
* This has the same sideeffects and return value as
* filemap_copy_from_user_atomic().
* The difference is that it attempts to resolve faults.
*/
static inline size_t
filemap_copy_from_user(struct page *page, unsigned long offset,
const struct iovec *iov, unsigned long nr_segs,
size_t base, size_t bytes)
{
char *kaddr;
size_t copied;
kaddr = kmap(page);
if (likely(nr_segs == 1)) {
int left;
char __user *buf = iov->iov_base + base;
left = __copy_from_user_nocache(kaddr + offset, buf, bytes);
copied = bytes - left;
} else {
copied = __filemap_copy_from_user_iovec_inatomic(kaddr + offset,
iov, base, bytes);
}
kunmap(page);
return copied;
}
static inline void
filemap_set_next_iovec(const struct iovec **iovp, unsigned long nr_segs,
size_t *basep, size_t bytes)
{
if (likely(nr_segs == 1)) {
*basep += bytes;
} else {
const struct iovec *iov = *iovp;
size_t base = *basep;
while (bytes) {
int copy = min(bytes, iov->iov_len - base);
bytes -= copy;
base += copy;
if (iov->iov_len == base) {
iov++;
base = 0;
}
}
*iovp = iov;
*basep = base;
}
}
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册