提交 0e5b88cd 编写于 作者: L Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
  Btrfs: break out of shrink_delalloc earlier
  btrfs: fix not enough reserved space
  btrfs: fix dip leak
  Btrfs: make sure not to return overlapping extents to fiemap
  Btrfs: deal with short returns from copy_from_user
  Btrfs: fix regressions in copy_from_user handling
...@@ -729,6 +729,15 @@ struct btrfs_space_info { ...@@ -729,6 +729,15 @@ struct btrfs_space_info {
u64 disk_total; /* total bytes on disk, takes mirrors into u64 disk_total; /* total bytes on disk, takes mirrors into
account */ account */
/*
* we bump reservation progress every time we decrement
* bytes_reserved. This way people waiting for reservations
* know something good has happened and they can check
* for progress. The number here isn't to be trusted, it
* just shows reclaim activity
*/
unsigned long reservation_progress;
int full; /* indicates that we cannot allocate any more int full; /* indicates that we cannot allocate any more
chunks for this space */ chunks for this space */
int force_alloc; /* set if we need to force a chunk alloc for int force_alloc; /* set if we need to force a chunk alloc for
......
...@@ -3342,15 +3342,16 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, ...@@ -3342,15 +3342,16 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
u64 max_reclaim; u64 max_reclaim;
u64 reclaimed = 0; u64 reclaimed = 0;
long time_left; long time_left;
int pause = 1;
int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
int loops = 0; int loops = 0;
unsigned long progress;
block_rsv = &root->fs_info->delalloc_block_rsv; block_rsv = &root->fs_info->delalloc_block_rsv;
space_info = block_rsv->space_info; space_info = block_rsv->space_info;
smp_mb(); smp_mb();
reserved = space_info->bytes_reserved; reserved = space_info->bytes_reserved;
progress = space_info->reservation_progress;
if (reserved == 0) if (reserved == 0)
return 0; return 0;
...@@ -3365,31 +3366,36 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, ...@@ -3365,31 +3366,36 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
spin_lock(&space_info->lock); spin_lock(&space_info->lock);
if (reserved > space_info->bytes_reserved) { if (reserved > space_info->bytes_reserved)
loops = 0;
reclaimed += reserved - space_info->bytes_reserved; reclaimed += reserved - space_info->bytes_reserved;
} else {
loops++;
}
reserved = space_info->bytes_reserved; reserved = space_info->bytes_reserved;
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
loops++;
if (reserved == 0 || reclaimed >= max_reclaim) if (reserved == 0 || reclaimed >= max_reclaim)
break; break;
if (trans && trans->transaction->blocked) if (trans && trans->transaction->blocked)
return -EAGAIN; return -EAGAIN;
__set_current_state(TASK_INTERRUPTIBLE); time_left = schedule_timeout_interruptible(1);
time_left = schedule_timeout(pause);
/* We were interrupted, exit */ /* We were interrupted, exit */
if (time_left) if (time_left)
break; break;
pause <<= 1; /* we've kicked the IO a few times, if anything has been freed,
if (pause > HZ / 10) * exit. There is no sense in looping here for a long time
pause = HZ / 10; * when we really need to commit the transaction, or there are
* just too many writers without enough free space
*/
if (loops > 3) {
smp_mb();
if (progress != space_info->reservation_progress)
break;
}
} }
return reclaimed >= to_reclaim; return reclaimed >= to_reclaim;
...@@ -3612,6 +3618,7 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, ...@@ -3612,6 +3618,7 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
if (num_bytes) { if (num_bytes) {
spin_lock(&space_info->lock); spin_lock(&space_info->lock);
space_info->bytes_reserved -= num_bytes; space_info->bytes_reserved -= num_bytes;
space_info->reservation_progress++;
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
} }
} }
...@@ -3844,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) ...@@ -3844,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
if (block_rsv->reserved >= block_rsv->size) { if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size; num_bytes = block_rsv->reserved - block_rsv->size;
sinfo->bytes_reserved -= num_bytes; sinfo->bytes_reserved -= num_bytes;
sinfo->reservation_progress++;
block_rsv->reserved = block_rsv->size; block_rsv->reserved = block_rsv->size;
block_rsv->full = 1; block_rsv->full = 1;
} }
...@@ -4005,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) ...@@ -4005,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
to_reserve = 0; to_reserve = 0;
} }
spin_unlock(&BTRFS_I(inode)->accounting_lock); spin_unlock(&BTRFS_I(inode)->accounting_lock);
to_reserve += calc_csum_metadata_size(inode, num_bytes); to_reserve += calc_csum_metadata_size(inode, num_bytes);
ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
if (ret) if (ret)
...@@ -4133,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, ...@@ -4133,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
btrfs_set_block_group_used(&cache->item, old_val); btrfs_set_block_group_used(&cache->item, old_val);
cache->reserved -= num_bytes; cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes; cache->space_info->bytes_reserved -= num_bytes;
cache->space_info->reservation_progress++;
cache->space_info->bytes_used += num_bytes; cache->space_info->bytes_used += num_bytes;
cache->space_info->disk_used += num_bytes * factor; cache->space_info->disk_used += num_bytes * factor;
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
...@@ -4184,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root, ...@@ -4184,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root,
if (reserved) { if (reserved) {
cache->reserved -= num_bytes; cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes; cache->space_info->bytes_reserved -= num_bytes;
cache->space_info->reservation_progress++;
} }
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock); spin_unlock(&cache->space_info->lock);
...@@ -4234,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache, ...@@ -4234,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
space_info->bytes_readonly += num_bytes; space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes; cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes; space_info->bytes_reserved -= num_bytes;
space_info->reservation_progress++;
} }
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
...@@ -4712,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, ...@@ -4712,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
if (ret) { if (ret) {
spin_lock(&cache->space_info->lock); spin_lock(&cache->space_info->lock);
cache->space_info->bytes_reserved -= buf->len; cache->space_info->bytes_reserved -= buf->len;
cache->space_info->reservation_progress++;
spin_unlock(&cache->space_info->lock); spin_unlock(&cache->space_info->lock);
} }
goto out; goto out;
......
...@@ -3046,17 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -3046,17 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
} }
while (!end) { while (!end) {
off = extent_map_end(em); u64 offset_in_extent;
if (off >= max)
end = 1; /* break if the extent we found is outside the range */
if (em->start >= max || extent_map_end(em) < off)
break;
/*
* get_extent may return an extent that starts before our
* requested range. We have to make sure the ranges
* we return to fiemap always move forward and don't
* overlap, so adjust the offsets here
*/
em_start = max(em->start, off);
em_start = em->start; /*
em_len = em->len; * record the offset from the start of the extent
* for adjusting the disk offset below
*/
offset_in_extent = em_start - em->start;
em_end = extent_map_end(em); em_end = extent_map_end(em);
em_len = em_end - em_start;
emflags = em->flags; emflags = em->flags;
disko = 0; disko = 0;
flags = 0; flags = 0;
/*
* bump off for our next call to get_extent
*/
off = extent_map_end(em);
if (off >= max)
end = 1;
if (em->block_start == EXTENT_MAP_LAST_BYTE) { if (em->block_start == EXTENT_MAP_LAST_BYTE) {
end = 1; end = 1;
flags |= FIEMAP_EXTENT_LAST; flags |= FIEMAP_EXTENT_LAST;
...@@ -3067,7 +3088,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -3067,7 +3088,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
flags |= (FIEMAP_EXTENT_DELALLOC | flags |= (FIEMAP_EXTENT_DELALLOC |
FIEMAP_EXTENT_UNKNOWN); FIEMAP_EXTENT_UNKNOWN);
} else { } else {
disko = em->block_start; disko = em->block_start + offset_in_extent;
} }
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
flags |= FIEMAP_EXTENT_ENCODED; flags |= FIEMAP_EXTENT_ENCODED;
......
...@@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, ...@@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
/* Flush processor's dcache for this page */ /* Flush processor's dcache for this page */
flush_dcache_page(page); flush_dcache_page(page);
/*
* if we get a partial write, we can end up with
* partially up to date pages. These add
* a lot of complexity, so make sure they don't
* happen by forcing this copy to be retried.
*
* The rest of the btrfs_file_write code will fall
* back to page at a time copies after we return 0.
*/
if (!PageUptodate(page) && copied < count)
copied = 0;
iov_iter_advance(i, copied); iov_iter_advance(i, copied);
write_bytes -= copied; write_bytes -= copied;
total_copied += copied; total_copied += copied;
...@@ -762,6 +775,27 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, ...@@ -762,6 +775,27 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
return 0; return 0;
} }
/*
* on error we return an unlocked page and the error value
* on success we return a locked page and 0
*/
static int prepare_uptodate_page(struct page *page, u64 pos)
{
int ret = 0;
if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
ret = btrfs_readpage(NULL, page);
if (ret)
return ret;
lock_page(page);
if (!PageUptodate(page)) {
unlock_page(page);
return -EIO;
}
}
return 0;
}
/* /*
* this gets pages into the page cache and locks them down, it also properly * this gets pages into the page cache and locks them down, it also properly
* waits for data=ordered extents to finish before allowing the pages to be * waits for data=ordered extents to finish before allowing the pages to be
...@@ -777,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, ...@@ -777,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
unsigned long index = pos >> PAGE_CACHE_SHIFT; unsigned long index = pos >> PAGE_CACHE_SHIFT;
struct inode *inode = fdentry(file)->d_inode; struct inode *inode = fdentry(file)->d_inode;
int err = 0; int err = 0;
int faili = 0;
u64 start_pos; u64 start_pos;
u64 last_pos; u64 last_pos;
...@@ -794,15 +829,24 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, ...@@ -794,15 +829,24 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
for (i = 0; i < num_pages; i++) { for (i = 0; i < num_pages; i++) {
pages[i] = grab_cache_page(inode->i_mapping, index + i); pages[i] = grab_cache_page(inode->i_mapping, index + i);
if (!pages[i]) { if (!pages[i]) {
int c; faili = i - 1;
for (c = i - 1; c >= 0; c--) { err = -ENOMEM;
unlock_page(pages[c]); goto fail;
page_cache_release(pages[c]); }
}
return -ENOMEM; if (i == 0)
err = prepare_uptodate_page(pages[i], pos);
if (i == num_pages - 1)
err = prepare_uptodate_page(pages[i],
pos + write_bytes);
if (err) {
page_cache_release(pages[i]);
faili = i - 1;
goto fail;
} }
wait_on_page_writeback(pages[i]); wait_on_page_writeback(pages[i]);
} }
err = 0;
if (start_pos < inode->i_size) { if (start_pos < inode->i_size) {
struct btrfs_ordered_extent *ordered; struct btrfs_ordered_extent *ordered;
lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_extent_bits(&BTRFS_I(inode)->io_tree,
...@@ -842,6 +886,14 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, ...@@ -842,6 +886,14 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
WARN_ON(!PageLocked(pages[i])); WARN_ON(!PageLocked(pages[i]));
} }
return 0; return 0;
fail:
while (faili >= 0) {
unlock_page(pages[faili]);
page_cache_release(pages[faili]);
faili--;
}
return err;
} }
static ssize_t btrfs_file_aio_write(struct kiocb *iocb, static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
...@@ -851,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, ...@@ -851,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct inode *inode = fdentry(file)->d_inode; struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
struct page *pinned[2];
struct page **pages = NULL; struct page **pages = NULL;
struct iov_iter i; struct iov_iter i;
loff_t *ppos = &iocb->ki_pos; loff_t *ppos = &iocb->ki_pos;
...@@ -872,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, ...@@ -872,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
(file->f_flags & O_DIRECT)); (file->f_flags & O_DIRECT));
pinned[0] = NULL;
pinned[1] = NULL;
start_pos = pos; start_pos = pos;
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
...@@ -962,32 +1010,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, ...@@ -962,32 +1010,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
first_index = pos >> PAGE_CACHE_SHIFT; first_index = pos >> PAGE_CACHE_SHIFT;
last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
/*
* there are lots of better ways to do this, but this code
* makes sure the first and last page in the file range are
* up to date and ready for cow
*/
if ((pos & (PAGE_CACHE_SIZE - 1))) {
pinned[0] = grab_cache_page(inode->i_mapping, first_index);
if (!PageUptodate(pinned[0])) {
ret = btrfs_readpage(NULL, pinned[0]);
BUG_ON(ret);
wait_on_page_locked(pinned[0]);
} else {
unlock_page(pinned[0]);
}
}
if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
pinned[1] = grab_cache_page(inode->i_mapping, last_index);
if (!PageUptodate(pinned[1])) {
ret = btrfs_readpage(NULL, pinned[1]);
BUG_ON(ret);
wait_on_page_locked(pinned[1]);
} else {
unlock_page(pinned[1]);
}
}
while (iov_iter_count(&i) > 0) { while (iov_iter_count(&i) > 0) {
size_t offset = pos & (PAGE_CACHE_SIZE - 1); size_t offset = pos & (PAGE_CACHE_SIZE - 1);
size_t write_bytes = min(iov_iter_count(&i), size_t write_bytes = min(iov_iter_count(&i),
...@@ -1024,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, ...@@ -1024,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
copied = btrfs_copy_from_user(pos, num_pages, copied = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, &i); write_bytes, pages, &i);
dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT; /*
* if we have trouble faulting in the pages, fall
* back to one page at a time
*/
if (copied < write_bytes)
nrptrs = 1;
if (copied == 0)
dirty_pages = 0;
else
dirty_pages = (copied + offset +
PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT;
if (num_pages > dirty_pages) { if (num_pages > dirty_pages) {
if (copied > 0) if (copied > 0)
...@@ -1069,10 +1103,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, ...@@ -1069,10 +1103,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
err = ret; err = ret;
kfree(pages); kfree(pages);
if (pinned[0])
page_cache_release(pinned[0]);
if (pinned[1])
page_cache_release(pinned[1]);
*ppos = pos; *ppos = pos;
/* /*
......
...@@ -4821,10 +4821,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, ...@@ -4821,10 +4821,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
goto fail; goto fail;
/* /*
* 1 item for inode ref * 2 items for inode and inode ref
* 2 items for dir items * 2 items for dir items
* 1 item for parent inode
*/ */
trans = btrfs_start_transaction(root, 3); trans = btrfs_start_transaction(root, 5);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
err = PTR_ERR(trans); err = PTR_ERR(trans);
goto fail; goto fail;
...@@ -6056,6 +6057,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, ...@@ -6056,6 +6057,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
if (!skip_sum) { if (!skip_sum) {
dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
if (!dip->csums) { if (!dip->csums) {
kfree(dip);
ret = -ENOMEM; ret = -ENOMEM;
goto free_ordered; goto free_ordered;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册