提交 45bce8f3 编写于 作者: L Linus Torvalds

fs/buffer.c: make block-size be per-page and protected by the page lock

This makes the buffer size handling be a per-page thing, which allows us
to not have to worry about locking too much when changing the buffer
size.  If a page doesn't have buffers, we still need to read the block
size from the inode, but we can do that with ACCESS_ONCE(), so that even
if the size is changing, we get a consistent value.

This doesn't convert all functions - many of the buffer functions are
used purely by filesystems, which in turn results in the buffer size
being fixed at mount-time.  So they don't have the same consistency
issues that the raw device access can have.
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 9489e9dc
...@@ -1552,6 +1552,28 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block) ...@@ -1552,6 +1552,28 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
} }
EXPORT_SYMBOL(unmap_underlying_metadata); EXPORT_SYMBOL(unmap_underlying_metadata);
/*
* Size is a power-of-two in the range 512..PAGE_SIZE,
* and the case we care about most is PAGE_SIZE.
*
* So this *could* possibly be written with those
* constraints in mind (relevant mostly if some
* architecture has a slow bit-scan instruction)
*/
static inline int block_size_bits(unsigned int blocksize)
{
return ilog2(blocksize);
}
static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
{
BUG_ON(!PageLocked(page));
if (!page_has_buffers(page))
create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
return page_buffers(page);
}
/* /*
* NOTE! All mapped/uptodate combinations are valid: * NOTE! All mapped/uptodate combinations are valid:
* *
...@@ -1589,19 +1611,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page, ...@@ -1589,19 +1611,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
sector_t block; sector_t block;
sector_t last_block; sector_t last_block;
struct buffer_head *bh, *head; struct buffer_head *bh, *head;
const unsigned blocksize = 1 << inode->i_blkbits; unsigned int blocksize, bbits;
int nr_underway = 0; int nr_underway = 0;
int write_op = (wbc->sync_mode == WB_SYNC_ALL ? int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
WRITE_SYNC : WRITE); WRITE_SYNC : WRITE);
BUG_ON(!PageLocked(page)); head = create_page_buffers(page, inode,
last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
if (!page_has_buffers(page)) {
create_empty_buffers(page, blocksize,
(1 << BH_Dirty)|(1 << BH_Uptodate)); (1 << BH_Dirty)|(1 << BH_Uptodate));
}
/* /*
* Be very careful. We have no exclusion from __set_page_dirty_buffers * Be very careful. We have no exclusion from __set_page_dirty_buffers
...@@ -1613,9 +1629,12 @@ static int __block_write_full_page(struct inode *inode, struct page *page, ...@@ -1613,9 +1629,12 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
* handle that here by just cleaning them. * handle that here by just cleaning them.
*/ */
block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
head = page_buffers(page);
bh = head; bh = head;
blocksize = bh->b_size;
bbits = block_size_bits(blocksize);
block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
last_block = (i_size_read(inode) - 1) >> bbits;
/* /*
* Get all the dirty buffers mapped to disk addresses and * Get all the dirty buffers mapped to disk addresses and
...@@ -1806,12 +1825,10 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len, ...@@ -1806,12 +1825,10 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
BUG_ON(to > PAGE_CACHE_SIZE); BUG_ON(to > PAGE_CACHE_SIZE);
BUG_ON(from > to); BUG_ON(from > to);
blocksize = 1 << inode->i_blkbits; head = create_page_buffers(page, inode, 0);
if (!page_has_buffers(page)) blocksize = head->b_size;
create_empty_buffers(page, blocksize, 0); bbits = block_size_bits(blocksize);
head = page_buffers(page);
bbits = inode->i_blkbits;
block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
for(bh = head, block_start = 0; bh != head || !block_start; for(bh = head, block_start = 0; bh != head || !block_start;
...@@ -1881,11 +1898,11 @@ static int __block_commit_write(struct inode *inode, struct page *page, ...@@ -1881,11 +1898,11 @@ static int __block_commit_write(struct inode *inode, struct page *page,
unsigned blocksize; unsigned blocksize;
struct buffer_head *bh, *head; struct buffer_head *bh, *head;
blocksize = 1 << inode->i_blkbits; bh = head = page_buffers(page);
blocksize = bh->b_size;
for(bh = head = page_buffers(page), block_start = 0; block_start = 0;
bh != head || !block_start; do {
block_start=block_end, bh = bh->b_this_page) {
block_end = block_start + blocksize; block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) { if (block_end <= from || block_start >= to) {
if (!buffer_uptodate(bh)) if (!buffer_uptodate(bh))
...@@ -1895,7 +1912,10 @@ static int __block_commit_write(struct inode *inode, struct page *page, ...@@ -1895,7 +1912,10 @@ static int __block_commit_write(struct inode *inode, struct page *page,
mark_buffer_dirty(bh); mark_buffer_dirty(bh);
} }
clear_buffer_new(bh); clear_buffer_new(bh);
}
block_start = block_end;
bh = bh->b_this_page;
} while (bh != head);
/* /*
* If this is a partial write which happened to make all buffers * If this is a partial write which happened to make all buffers
...@@ -2020,7 +2040,6 @@ EXPORT_SYMBOL(generic_write_end); ...@@ -2020,7 +2040,6 @@ EXPORT_SYMBOL(generic_write_end);
int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
unsigned long from) unsigned long from)
{ {
struct inode *inode = page->mapping->host;
unsigned block_start, block_end, blocksize; unsigned block_start, block_end, blocksize;
unsigned to; unsigned to;
struct buffer_head *bh, *head; struct buffer_head *bh, *head;
...@@ -2029,13 +2048,13 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, ...@@ -2029,13 +2048,13 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
if (!page_has_buffers(page)) if (!page_has_buffers(page))
return 0; return 0;
blocksize = 1 << inode->i_blkbits; head = page_buffers(page);
blocksize = head->b_size;
to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
to = from + to; to = from + to;
if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
return 0; return 0;
head = page_buffers(page);
bh = head; bh = head;
block_start = 0; block_start = 0;
do { do {
...@@ -2068,18 +2087,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block) ...@@ -2068,18 +2087,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
struct inode *inode = page->mapping->host; struct inode *inode = page->mapping->host;
sector_t iblock, lblock; sector_t iblock, lblock;
struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
unsigned int blocksize; unsigned int blocksize, bbits;
int nr, i; int nr, i;
int fully_mapped = 1; int fully_mapped = 1;
BUG_ON(!PageLocked(page)); head = create_page_buffers(page, inode, 0);
blocksize = 1 << inode->i_blkbits; blocksize = head->b_size;
if (!page_has_buffers(page)) bbits = block_size_bits(blocksize);
create_empty_buffers(page, blocksize, 0);
head = page_buffers(page);
iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; lblock = (i_size_read(inode)+blocksize-1) >> bbits;
bh = head; bh = head;
nr = 0; nr = 0;
i = 0; i = 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册