提交 9ea7df53 编写于 作者: J Jan Kara 提交者: Al Viro

ext4: Rewrite ext4_page_mkwrite() to use generic helpers

Rewrite ext4_page_mkwrite() to use __block_page_mkwrite() helper. This
removes the need of using i_alloc_sem to avoid races with truncate which
seems to be the wrong locking order according to lock ordering documented in
mm/rmap.c. Also calling ext4_da_write_begin() as used by the old code seems to
be problematic because we can decide to flush delay-allocated blocks which
will acquire s_umount semaphore - again creating unpleasant lock dependency
if not directly a deadlock.

Also add a check for frozen filesystem so that we don't busyloop in page fault
when the filesystem is frozen.
Signed-off-by: NJan Kara <jack@suse.cz>
Signed-off-by: NAl Viro <viro@zeniv.linux.org.uk>
上级 58268691
...@@ -5843,80 +5843,84 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -5843,80 +5843,84 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
struct page *page = vmf->page; struct page *page = vmf->page;
loff_t size; loff_t size;
unsigned long len; unsigned long len;
int ret = -EINVAL; int ret;
void *fsdata;
struct file *file = vma->vm_file; struct file *file = vma->vm_file;
struct inode *inode = file->f_path.dentry->d_inode; struct inode *inode = file->f_path.dentry->d_inode;
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
handle_t *handle;
get_block_t *get_block;
int retries = 0;
/* /*
* Get i_alloc_sem to stop truncates messing with the inode. We cannot * This check is racy but catches the common case. We rely on
* get i_mutex because we are already holding mmap_sem. * __block_page_mkwrite() to do a reliable check.
*/ */
down_read(&inode->i_alloc_sem); vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
size = i_size_read(inode); /* Delalloc case is easy... */
if (page->mapping != mapping || size <= page_offset(page) if (test_opt(inode->i_sb, DELALLOC) &&
|| !PageUptodate(page)) { !ext4_should_journal_data(inode) &&
/* page got truncated from under us? */ !ext4_nonda_switch(inode->i_sb)) {
goto out_unlock; do {
ret = __block_page_mkwrite(vma, vmf,
ext4_da_get_block_prep);
} while (ret == -ENOSPC &&
ext4_should_retry_alloc(inode->i_sb, &retries));
goto out_ret;
} }
ret = 0;
lock_page(page); lock_page(page);
wait_on_page_writeback(page); size = i_size_read(inode);
if (PageMappedToDisk(page)) { /* Page got truncated from under us? */
up_read(&inode->i_alloc_sem); if (page->mapping != mapping || page_offset(page) > size) {
return VM_FAULT_LOCKED; unlock_page(page);
ret = VM_FAULT_NOPAGE;
goto out;
} }
if (page->index == size >> PAGE_CACHE_SHIFT) if (page->index == size >> PAGE_CACHE_SHIFT)
len = size & ~PAGE_CACHE_MASK; len = size & ~PAGE_CACHE_MASK;
else else
len = PAGE_CACHE_SIZE; len = PAGE_CACHE_SIZE;
/* /*
* return if we have all the buffers mapped. This avoid * Return if we have all the buffers mapped. This avoids the need to do
* the need to call write_begin/write_end which does a * journal_start/journal_stop which can block and take a long time
* journal_start/journal_stop which can block and take
* long time
*/ */
if (page_has_buffers(page)) { if (page_has_buffers(page)) {
if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
ext4_bh_unmapped)) { ext4_bh_unmapped)) {
up_read(&inode->i_alloc_sem); /* Wait so that we don't change page under IO */
return VM_FAULT_LOCKED; wait_on_page_writeback(page);
ret = VM_FAULT_LOCKED;
goto out;
} }
} }
unlock_page(page); unlock_page(page);
/* /* OK, we need to fill the hole... */
* OK, we need to fill the hole... Do write_begin write_end if (ext4_should_dioread_nolock(inode))
* to do block allocation/reservation.We are not holding get_block = ext4_get_block_write;
* inode.i__mutex here. That allow * parallel write_begin, else
* write_end call. lock_page prevent this from happening get_block = ext4_get_block;
* on the same page though retry_alloc:
*/ handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), if (IS_ERR(handle)) {
len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); ret = VM_FAULT_SIGBUS;
if (ret < 0) goto out;
goto out_unlock; }
ret = mapping->a_ops->write_end(file, mapping, page_offset(page), ret = __block_page_mkwrite(vma, vmf, get_block);
len, len, page, fsdata); if (!ret && ext4_should_journal_data(inode)) {
if (ret < 0) if (walk_page_buffers(handle, page_buffers(page), 0,
goto out_unlock; PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
ret = 0; unlock_page(page);
/*
* write_begin/end might have created a dirty page and someone
* could wander in and start the IO. Make sure that hasn't
* happened.
*/
lock_page(page);
wait_on_page_writeback(page);
up_read(&inode->i_alloc_sem);
return VM_FAULT_LOCKED;
out_unlock:
if (ret)
ret = VM_FAULT_SIGBUS; ret = VM_FAULT_SIGBUS;
up_read(&inode->i_alloc_sem); goto out;
}
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
}
ext4_journal_stop(handle);
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry_alloc;
out_ret:
ret = block_page_mkwrite_return(ret);
out:
return ret; return ret;
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册