提交 af1b3490 编写于 作者: E Eric Whitney 提交者: Joseph Qi

ext4: fix reserved cluster accounting at page invalidation time

commit f456767d3391e9f7d9d25a2e7241d75676dc19da upstream.

Add new code to count canceled pending cluster reservations on bigalloc
file systems and to reduce the cluster reservation count on all file
systems using delayed allocation.  This replaces old code in
ext4_da_page_release_reservations that was incorrect.
Signed-off-by: NEric Whitney <enwlinux@gmail.com>
Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
Signed-off-by: NJoseph Qi <joseph.qi@linux.alibaba.com>
Reviewed-by: NJiufei Xue <jiufei.xue@linux.alibaba.com>
上级 eb792dc6
...@@ -2519,6 +2519,7 @@ extern int ext4_page_mkwrite(struct vm_fault *vmf); ...@@ -2519,6 +2519,7 @@ extern int ext4_page_mkwrite(struct vm_fault *vmf);
extern int ext4_filemap_fault(struct vm_fault *vmf); extern int ext4_filemap_fault(struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern int ext4_get_projid(struct inode *inode, kprojid_t *projid); extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
extern void ext4_da_release_space(struct inode *inode, int to_free);
extern void ext4_da_update_reserve_space(struct inode *inode, extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim); int used, int quota_claim);
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
......
...@@ -1780,3 +1780,93 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk, ...@@ -1780,3 +1780,93 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
__remove_pending(inode, last); __remove_pending(inode, last);
} }
} }
/*
* ext4_es_remove_blks - remove block range from extents status tree and
* reduce reservation count or cancel pending
* reservation as needed
*
* @inode - file containing range
* @lblk - first block in range
* @len - number of blocks to remove
*
*/
void ext4_es_remove_blks(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
unsigned int clu_size, reserved = 0;
ext4_lblk_t last_lclu, first, length, remainder, last;
bool delonly;
int err = 0;
struct pending_reservation *pr;
struct ext4_pending_tree *tree;
/*
* Process cluster by cluster for bigalloc - there may be up to
* two clusters in a 4k page with a 1k block size and two blocks
* per cluster. Also necessary for systems with larger page sizes
* and potentially larger block sizes.
*/
clu_size = sbi->s_cluster_ratio;
last_lclu = EXT4_B2C(sbi, lblk + len - 1);
write_lock(&EXT4_I(inode)->i_es_lock);
for (first = lblk, remainder = len;
remainder > 0;
first += length, remainder -= length) {
if (EXT4_B2C(sbi, first) == last_lclu)
length = remainder;
else
length = clu_size - EXT4_LBLK_COFF(sbi, first);
/*
* The BH_Delay flag, which triggers calls to this function,
* and the contents of the extents status tree can be
* inconsistent due to writepages activity. So, note whether
* the blocks to be removed actually belong to an extent with
* delayed only status.
*/
delonly = __es_scan_clu(inode, &ext4_es_is_delonly, first);
/*
* because of the writepages effect, written and unwritten
* blocks could be removed here
*/
last = first + length - 1;
err = __es_remove_extent(inode, first, last);
if (err)
ext4_warning(inode->i_sb,
"%s: couldn't remove page (err = %d)",
__func__, err);
/* non-bigalloc case: simply count the cluster for release */
if (sbi->s_cluster_ratio == 1 && delonly) {
reserved++;
continue;
}
/*
* bigalloc case: if all delayed allocated only blocks have
* just been removed from a cluster, either cancel a pending
* reservation if it exists or count a cluster for release
*/
if (delonly &&
!__es_scan_clu(inode, &ext4_es_is_delonly, first)) {
pr = __get_pending(inode, EXT4_B2C(sbi, first));
if (pr != NULL) {
tree = &EXT4_I(inode)->i_pending_tree;
rb_erase(&pr->rb_node, &tree->root);
kmem_cache_free(ext4_pending_cachep, pr);
} else {
reserved++;
}
}
}
write_unlock(&EXT4_I(inode)->i_es_lock);
ext4_da_release_space(inode, reserved);
}
...@@ -1605,7 +1605,7 @@ static int ext4_da_reserve_space(struct inode *inode) ...@@ -1605,7 +1605,7 @@ static int ext4_da_reserve_space(struct inode *inode)
return 0; /* success */ return 0; /* success */
} }
static void ext4_da_release_space(struct inode *inode, int to_free) void ext4_da_release_space(struct inode *inode, int to_free)
{ {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_inode_info *ei = EXT4_I(inode);
...@@ -1644,13 +1644,11 @@ static void ext4_da_page_release_reservation(struct page *page, ...@@ -1644,13 +1644,11 @@ static void ext4_da_page_release_reservation(struct page *page,
unsigned int offset, unsigned int offset,
unsigned int length) unsigned int length)
{ {
int to_release = 0, contiguous_blks = 0; int contiguous_blks = 0;
struct buffer_head *head, *bh; struct buffer_head *head, *bh;
unsigned int curr_off = 0; unsigned int curr_off = 0;
struct inode *inode = page->mapping->host; struct inode *inode = page->mapping->host;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
unsigned int stop = offset + length; unsigned int stop = offset + length;
int num_clusters;
ext4_fsblk_t lblk; ext4_fsblk_t lblk;
BUG_ON(stop > PAGE_SIZE || stop < length); BUG_ON(stop > PAGE_SIZE || stop < length);
...@@ -1664,7 +1662,6 @@ static void ext4_da_page_release_reservation(struct page *page, ...@@ -1664,7 +1662,6 @@ static void ext4_da_page_release_reservation(struct page *page,
break; break;
if ((offset <= curr_off) && (buffer_delay(bh))) { if ((offset <= curr_off) && (buffer_delay(bh))) {
to_release++;
contiguous_blks++; contiguous_blks++;
clear_buffer_delay(bh); clear_buffer_delay(bh);
} else if (contiguous_blks) { } else if (contiguous_blks) {
...@@ -1672,7 +1669,7 @@ static void ext4_da_page_release_reservation(struct page *page, ...@@ -1672,7 +1669,7 @@ static void ext4_da_page_release_reservation(struct page *page,
(PAGE_SHIFT - inode->i_blkbits); (PAGE_SHIFT - inode->i_blkbits);
lblk += (curr_off >> inode->i_blkbits) - lblk += (curr_off >> inode->i_blkbits) -
contiguous_blks; contiguous_blks;
ext4_es_remove_extent(inode, lblk, contiguous_blks); ext4_es_remove_blks(inode, lblk, contiguous_blks);
contiguous_blks = 0; contiguous_blks = 0;
} }
curr_off = next_off; curr_off = next_off;
...@@ -1681,21 +1678,9 @@ static void ext4_da_page_release_reservation(struct page *page, ...@@ -1681,21 +1678,9 @@ static void ext4_da_page_release_reservation(struct page *page,
if (contiguous_blks) { if (contiguous_blks) {
lblk = page->index << (PAGE_SHIFT - inode->i_blkbits); lblk = page->index << (PAGE_SHIFT - inode->i_blkbits);
lblk += (curr_off >> inode->i_blkbits) - contiguous_blks; lblk += (curr_off >> inode->i_blkbits) - contiguous_blks;
ext4_es_remove_extent(inode, lblk, contiguous_blks); ext4_es_remove_blks(inode, lblk, contiguous_blks);
} }
/* If we have released all the blocks belonging to a cluster, then we
* need to release the reserved space for that cluster. */
num_clusters = EXT4_NUM_B2C(sbi, to_release);
while (num_clusters > 0) {
lblk = (page->index << (PAGE_SHIFT - inode->i_blkbits)) +
((num_clusters - 1) << sbi->s_cluster_bits);
if (sbi->s_cluster_ratio == 1 ||
!ext4_es_scan_clu(inode, &ext4_es_is_delayed, lblk))
ext4_da_release_space(inode, 1);
num_clusters--;
}
} }
/* /*
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册