提交 a52d9a80 编写于 作者: C Chris Mason 提交者: David Woodhouse

Btrfs: Extent based page cache code. This uses an rbtree of extents and tests

instead of buffer heads.
Signed-off-by: NChris Mason <chris.mason@oracle.com>
上级 83df7c1d
...@@ -4,7 +4,8 @@ ifneq ($(KERNELRELEASE),) ...@@ -4,7 +4,8 @@ ifneq ($(KERNELRELEASE),)
obj-m := btrfs.o obj-m := btrfs.o
btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
hash.o file-item.o inode-item.o inode-map.o disk-io.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \
transaction.o bit-radix.o inode.o file.o tree-defrag.o transaction.o bit-radix.o inode.o file.o tree-defrag.o \
extent_map.o
#btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
# root-tree.o dir-item.o hash.o file-item.o inode-item.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \
......
...@@ -19,11 +19,14 @@ ...@@ -19,11 +19,14 @@
#ifndef __BTRFS_I__ #ifndef __BTRFS_I__
#define __BTRFS_I__ #define __BTRFS_I__
#include "extent_map.h"
/* in memory btrfs inode */ /* in memory btrfs inode */
struct btrfs_inode { struct btrfs_inode {
struct btrfs_root *root; struct btrfs_root *root;
struct btrfs_block_group_cache *block_group; struct btrfs_block_group_cache *block_group;
struct btrfs_key location; struct btrfs_key location;
struct extent_map_tree extent_tree;
struct inode vfs_inode; struct inode vfs_inode;
/* /*
......
...@@ -1176,7 +1176,14 @@ int btrfs_commit_write(struct file *file, struct page *page, ...@@ -1176,7 +1176,14 @@ int btrfs_commit_write(struct file *file, struct page *page,
unsigned from, unsigned to); unsigned from, unsigned to);
int btrfs_get_block(struct inode *inode, sector_t iblock, int btrfs_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *result, int create); struct buffer_head *result, int create);
struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
size_t page_offset, u64 start, u64 end,
int create);
int btrfs_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode);
/* file.c */ /* file.c */
int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end);
extern struct file_operations btrfs_file_operations; extern struct file_operations btrfs_file_operations;
int btrfs_drop_extents(struct btrfs_trans_handle *trans, int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode, struct btrfs_root *root, struct inode *inode,
......
...@@ -77,7 +77,6 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, ...@@ -77,7 +77,6 @@ int btrfs_insert_dev_radix(struct btrfs_root *root,
u64 num_blocks); u64 num_blocks);
int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh,
u64 logical); u64 logical);
int btrfs_releasepage(struct page *page, gfp_t flags);
void btrfs_btree_balance_dirty(struct btrfs_root *root); void btrfs_btree_balance_dirty(struct btrfs_root *root);
int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
void btrfs_mark_buffer_dirty(struct buffer_head *bh); void btrfs_mark_buffer_dirty(struct buffer_head *bh);
......
此差异已折叠。
#ifndef __EXTENTMAP__
#define __EXTENTMAP__
#include <linux/rbtree.h>
#define EXTENT_MAP_INLINE (u64)-2
#define EXTENT_MAP_DELALLOC (u64)-1
struct extent_map_tree {
struct rb_root map;
struct rb_root state;
struct address_space *mapping;
rwlock_t lock;
};
/* note, this must start with the same fields as fs/extent_map.c:tree_entry */
struct extent_map {
u64 start;
u64 end; /* inclusive */
int in_tree;
struct rb_node rb_node;
/* block_start and block_end are in bytes */
u64 block_start;
u64 block_end; /* inclusive */
struct block_device *bdev;
atomic_t refs;
};
/* note, this must start with the same fields as fs/extent_map.c:tree_entry */
struct extent_state {
u64 start;
u64 end; /* inclusive */
int in_tree;
struct rb_node rb_node;
wait_queue_head_t wq;
atomic_t refs;
unsigned long state;
struct list_head list;
};
struct extent_buffer {
u64 start;
u64 end; /* inclusive */
char *addr;
struct page *pages[];
};
typedef struct extent_map *(get_extent_t)(struct inode *inode,
struct page *page,
size_t page_offset,
u64 start, u64 end,
int create);
void extent_map_tree_init(struct extent_map_tree *tree,
struct address_space *mapping, gfp_t mask);
struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 end);
int add_extent_mapping(struct extent_map_tree *tree,
struct extent_map *em);
int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page);
int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask);
int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask);
struct extent_map *alloc_extent_map(gfp_t mask);
void free_extent_map(struct extent_map *em);
int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
get_extent_t *get_extent);
void __init extent_map_init(void);
void __exit extent_map_exit(void);
int extent_clean_all_trees(struct extent_map_tree *tree);
int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
gfp_t mask);
int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
gfp_t mask);
int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
gfp_t mask);
int extent_invalidatepage(struct extent_map_tree *tree,
struct page *page, unsigned long offset);
int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
get_extent_t *get_extent,
struct writeback_control *wbc);
int extent_prepare_write(struct extent_map_tree *tree,
struct inode *inode, struct page *page,
unsigned from, unsigned to, get_extent_t *get_extent);
int extent_commit_write(struct extent_map_tree *tree,
struct inode *inode, struct page *page,
unsigned from, unsigned to);
int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end);
#endif
...@@ -81,14 +81,14 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) ...@@ -81,14 +81,14 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
} }
} }
static int insert_inline_extent(struct btrfs_root *root, struct inode *inode, static int insert_inline_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
u64 offset, ssize_t size, u64 offset, ssize_t size,
struct buffer_head *bh) struct page *page, size_t page_offset)
{ {
struct btrfs_key key; struct btrfs_key key;
struct btrfs_path *path; struct btrfs_path *path;
char *ptr, *kaddr; char *ptr, *kaddr;
struct btrfs_trans_handle *trans;
struct btrfs_file_extent_item *ei; struct btrfs_file_extent_item *ei;
u32 datasize; u32 datasize;
int err = 0; int err = 0;
...@@ -98,8 +98,6 @@ static int insert_inline_extent(struct btrfs_root *root, struct inode *inode, ...@@ -98,8 +98,6 @@ static int insert_inline_extent(struct btrfs_root *root, struct inode *inode,
if (!path) if (!path)
return -ENOMEM; return -ENOMEM;
mutex_lock(&root->fs_info->fs_mutex);
trans = btrfs_start_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode); btrfs_set_trans_block_group(trans, inode);
key.objectid = inode->i_ino; key.objectid = inode->i_ino;
...@@ -122,18 +120,13 @@ static int insert_inline_extent(struct btrfs_root *root, struct inode *inode, ...@@ -122,18 +120,13 @@ static int insert_inline_extent(struct btrfs_root *root, struct inode *inode,
BTRFS_FILE_EXTENT_INLINE); BTRFS_FILE_EXTENT_INLINE);
ptr = btrfs_file_extent_inline_start(ei); ptr = btrfs_file_extent_inline_start(ei);
kaddr = kmap_atomic(bh->b_page, KM_USER0); kaddr = kmap_atomic(page, KM_USER0);
btrfs_memcpy(root, path->nodes[0]->b_data, btrfs_memcpy(root, path->nodes[0]->b_data,
ptr, kaddr + bh_offset(bh), ptr, kaddr + page_offset, size);
size);
kunmap_atomic(kaddr, KM_USER0); kunmap_atomic(kaddr, KM_USER0);
btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_mark_buffer_dirty(path->nodes[0]);
fail: fail:
btrfs_free_path(path); btrfs_free_path(path);
ret = btrfs_end_transaction(trans, root);
if (ret && !err)
err = ret;
mutex_unlock(&root->fs_info->fs_mutex);
return err; return err;
} }
...@@ -145,45 +138,143 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, ...@@ -145,45 +138,143 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
loff_t pos, loff_t pos,
size_t write_bytes) size_t write_bytes)
{ {
int i;
int offset;
int err = 0; int err = 0;
int ret; int i;
int this_write;
struct inode *inode = file->f_path.dentry->d_inode; struct inode *inode = file->f_path.dentry->d_inode;
struct buffer_head *bh; struct extent_map *em;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct btrfs_key ins;
u64 hint_block;
u64 num_blocks;
u64 start_pos;
u64 end_of_last_block;
u64 end_pos = pos + write_bytes;
loff_t isize = i_size_read(inode);
for (i = 0; i < num_pages; i++) { em = alloc_extent_map(GFP_NOFS);
offset = pos & (PAGE_CACHE_SIZE -1); if (!em)
this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); return -ENOMEM;
/* FIXME, one block at a time */ em->bdev = inode->i_sb->s_bdev;
bh = page_buffers(pages[i]);
if (buffer_mapped(bh) && bh->b_blocknr == 0) { start_pos = pos & ~((u64)root->blocksize - 1);
ret = insert_inline_extent(root, inode, num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >>
pages[i]->index << PAGE_CACHE_SHIFT, inode->i_blkbits;
offset + this_write, bh);
if (ret) {
err = ret;
goto failed;
}
}
ret = btrfs_commit_write(file, pages[i], offset, end_of_last_block = start_pos + (num_blocks << inode->i_blkbits) - 1;
offset + this_write); mutex_lock(&root->fs_info->fs_mutex);
pos += this_write; trans = btrfs_start_transaction(root, 1);
if (ret) { if (!trans) {
err = ret; err = -ENOMEM;
goto out_unlock;
}
btrfs_set_trans_block_group(trans, inode);
inode->i_blocks += num_blocks << 3;
hint_block = 0;
if ((end_of_last_block & 4095) == 0) {
printk("strange end of last %Lu %lu %Lu\n", start_pos, write_bytes, end_of_last_block);
}
set_extent_uptodate(em_tree, start_pos, end_of_last_block, GFP_NOFS);
/* FIXME...EIEIO, ENOSPC and more */
/* step one, delete the existing extents in this range */
/* FIXME blocksize != pagesize */
if (start_pos < inode->i_size) {
err = btrfs_drop_extents(trans, root, inode,
start_pos, (pos + write_bytes + root->blocksize -1) &
~((u64)root->blocksize - 1), &hint_block);
if (err)
goto failed;
}
/* insert any holes we need to create */
if (inode->i_size < start_pos) {
u64 last_pos_in_file;
u64 hole_size;
u64 mask = root->blocksize - 1;
last_pos_in_file = (isize + mask) & ~mask;
hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
hole_size >>= inode->i_blkbits;
if (last_pos_in_file < start_pos) {
err = btrfs_insert_file_extent(trans, root,
inode->i_ino,
last_pos_in_file,
0, 0, hole_size);
}
if (err)
goto failed; goto failed;
}
/*
* either allocate an extent for the new bytes or setup the key
* to show we are doing inline data in the extent
*/
if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size ||
pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
err = btrfs_alloc_extent(trans, root, inode->i_ino,
num_blocks, 0, hint_block, (u64)-1,
&ins, 1);
BUG_ON(err);
err = btrfs_insert_file_extent(trans, root, inode->i_ino,
start_pos, ins.objectid, ins.offset,
ins.offset);
BUG_ON(err);
em->start = start_pos;
em->end = end_of_last_block;
em->block_start = ins.objectid << inode->i_blkbits;
em->block_end = em->block_start +
(ins.offset << inode->i_blkbits) - 1;
set_extent_dirty(em_tree, start_pos, end_of_last_block,
GFP_NOFS);
err = add_extent_mapping(em_tree, em);
for (i = 0; i < num_pages; i++) {
struct page *p = pages[i];
SetPageUptodate(p);
__set_page_dirty_nobuffers(p);
} }
WARN_ON(this_write > write_bytes); } else {
write_bytes -= this_write; struct page *p = pages[0];
err = insert_inline_extent(trans, root, inode, start_pos,
end_pos - start_pos, p, 0);
BUG_ON(err);
em->start = start_pos;
em->end = end_pos;
em->block_start = EXTENT_MAP_INLINE;
em->block_end = EXTENT_MAP_INLINE;
add_extent_mapping(em_tree, em);
}
if (end_pos > isize) {
i_size_write(inode, end_pos);
btrfs_update_inode(trans, root, inode);
} }
failed: failed:
err = btrfs_end_transaction(trans, root);
out_unlock:
mutex_unlock(&root->fs_info->fs_mutex);
free_extent_map(em);
return err; return err;
} }
int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
{
struct extent_map *em;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
while(1) {
em = lookup_extent_mapping(em_tree, start, end);
if (!em)
break;
remove_extent_mapping(em_tree, em);
/* once for us */
free_extent_map(em);
/* once for the tree*/
free_extent_map(em);
}
return 0;
}
/* /*
* this is very complex, but the basic idea is to drop all extents * this is very complex, but the basic idea is to drop all extents
* in the range start - end. hint_block is filled in with a block number * in the range start - end. hint_block is filled in with a block number
...@@ -213,6 +304,8 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, ...@@ -213,6 +304,8 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
int found_inline; int found_inline;
int recow; int recow;
btrfs_drop_extent_cache(inode, start, end - 1);
path = btrfs_alloc_path(); path = btrfs_alloc_path();
if (!path) if (!path)
return -ENOMEM; return -ENOMEM;
...@@ -434,18 +527,9 @@ static int prepare_pages(struct btrfs_root *root, ...@@ -434,18 +527,9 @@ static int prepare_pages(struct btrfs_root *root,
int i; int i;
unsigned long index = pos >> PAGE_CACHE_SHIFT; unsigned long index = pos >> PAGE_CACHE_SHIFT;
struct inode *inode = file->f_path.dentry->d_inode; struct inode *inode = file->f_path.dentry->d_inode;
int offset;
int err = 0; int err = 0;
int this_write;
struct buffer_head *bh;
struct buffer_head *head;
loff_t isize = i_size_read(inode);
struct btrfs_trans_handle *trans;
u64 hint_block;
u64 num_blocks; u64 num_blocks;
u64 alloc_extent_start;
u64 start_pos; u64 start_pos;
struct btrfs_key ins;
start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >>
...@@ -457,119 +541,17 @@ static int prepare_pages(struct btrfs_root *root, ...@@ -457,119 +541,17 @@ static int prepare_pages(struct btrfs_root *root,
pages[i] = grab_cache_page(inode->i_mapping, index + i); pages[i] = grab_cache_page(inode->i_mapping, index + i);
if (!pages[i]) { if (!pages[i]) {
err = -ENOMEM; err = -ENOMEM;
goto failed_release; BUG_ON(1);
} }
cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
wait_on_page_writeback(pages[i]); wait_on_page_writeback(pages[i]);
} if (!PagePrivate(pages[i])) {
SetPagePrivate(pages[i]);
mutex_lock(&root->fs_info->fs_mutex); set_page_private(pages[i], 1);
trans = btrfs_start_transaction(root, 1); page_cache_get(pages[i]);
if (!trans) {
err = -ENOMEM;
mutex_unlock(&root->fs_info->fs_mutex);
goto out_unlock;
}
btrfs_set_trans_block_group(trans, inode);
/* FIXME blocksize != 4096 */
inode->i_blocks += num_blocks << 3;
hint_block = 0;
/* FIXME...EIEIO, ENOSPC and more */
/* step one, delete the existing extents in this range */
/* FIXME blocksize != pagesize */
if (start_pos < inode->i_size) {
err = btrfs_drop_extents(trans, root, inode,
start_pos, (pos + write_bytes + root->blocksize -1) &
~((u64)root->blocksize - 1), &hint_block);
if (err)
goto failed_release;
}
/* insert any holes we need to create */
if (inode->i_size < start_pos) {
u64 last_pos_in_file;
u64 hole_size;
u64 mask = root->blocksize - 1;
last_pos_in_file = (isize + mask) & ~mask;
hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
hole_size >>= inode->i_blkbits;
if (last_pos_in_file < start_pos) {
err = btrfs_insert_file_extent(trans, root,
inode->i_ino,
last_pos_in_file,
0, 0, hole_size);
}
if (err)
goto failed_release;
}
/*
* either allocate an extent for the new bytes or setup the key
* to show we are doing inline data in the extent
*/
if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size ||
pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
err = btrfs_alloc_extent(trans, root, inode->i_ino,
num_blocks, 0, hint_block, (u64)-1,
&ins, 1);
if (err)
goto failed_truncate;
err = btrfs_insert_file_extent(trans, root, inode->i_ino,
start_pos, ins.objectid, ins.offset,
ins.offset);
if (err)
goto failed_truncate;
} else {
ins.offset = 0;
ins.objectid = 0;
}
BUG_ON(err);
alloc_extent_start = ins.objectid;
err = btrfs_end_transaction(trans, root);
mutex_unlock(&root->fs_info->fs_mutex);
for (i = 0; i < num_pages; i++) {
offset = pos & (PAGE_CACHE_SIZE -1);
this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes);
if (!page_has_buffers(pages[i])) {
create_empty_buffers(pages[i],
root->fs_info->sb->s_blocksize,
(1 << BH_Uptodate));
} }
head = page_buffers(pages[i]);
bh = head;
do {
err = btrfs_map_bh_to_logical(root, bh,
alloc_extent_start);
BUG_ON(err);
if (err)
goto failed_truncate;
bh = bh->b_this_page;
if (alloc_extent_start)
alloc_extent_start++;
} while (bh != head);
pos += this_write;
WARN_ON(this_write > write_bytes);
write_bytes -= this_write;
} }
return 0; return 0;
failed_release:
btrfs_drop_pages(pages, num_pages);
return err;
failed_truncate:
btrfs_drop_pages(pages, num_pages);
if (pos > isize)
vmtruncate(inode, isize);
return err;
out_unlock:
mutex_unlock(&root->fs_info->fs_mutex);
goto failed_release;
} }
static ssize_t btrfs_file_write(struct file *file, const char __user *buf, static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
...@@ -685,7 +667,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ...@@ -685,7 +667,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
page_cache_release(pinned[1]); page_cache_release(pinned[1]);
*ppos = pos; *ppos = pos;
current->backing_dev_info = NULL; current->backing_dev_info = NULL;
mark_inode_dirty(inode);
return num_written ? num_written : err; return num_written ? num_written : err;
} }
...@@ -714,8 +695,8 @@ static int btrfs_sync_file(struct file *file, ...@@ -714,8 +695,8 @@ static int btrfs_sync_file(struct file *file,
mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->trans_mutex);
/* /*
* ok we haven't committed the transaction yet, lets do a commit * ok we haven't committed the transaction yet, lets do a commit
*/ */
trans = btrfs_start_transaction(root, 1); trans = btrfs_start_transaction(root, 1);
if (!trans) { if (!trans) {
ret = -ENOMEM; ret = -ENOMEM;
......
此差异已折叠。
...@@ -185,6 +185,7 @@ static int __init init_btrfs_fs(void) ...@@ -185,6 +185,7 @@ static int __init init_btrfs_fs(void)
err = btrfs_init_cachep(); err = btrfs_init_cachep();
if (err) if (err)
return err; return err;
extent_map_init();
return register_filesystem(&btrfs_fs_type); return register_filesystem(&btrfs_fs_type);
} }
...@@ -192,6 +193,7 @@ static void __exit exit_btrfs_fs(void) ...@@ -192,6 +193,7 @@ static void __exit exit_btrfs_fs(void)
{ {
btrfs_exit_transaction_sys(); btrfs_exit_transaction_sys();
btrfs_destroy_cachep(); btrfs_destroy_cachep();
extent_map_exit();
unregister_filesystem(&btrfs_fs_type); unregister_filesystem(&btrfs_fs_type);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册