提交 c8b97818 编写于 作者: C Chris Mason

Btrfs: Add zlib compression support

This is a large change for adding compression on reading and writing,
both for inline and regular extents.  It does some fairly large
surgery to the writeback paths.

Compression is off by default and enabled by mount -o compress.  Even
when the -o compress mount option is not used, it is possible to read
compressed extents off the disk.

If compression for a given set of pages fails to make them smaller, the
file is flagged to avoid future compression attempts later.

* While finding delalloc extents, the pages are locked before being sent down
to the delalloc handler.  This allows the delalloc handler to do complex things
such as cleaning the pages, marking them writeback and starting IO on their
behalf.

* Inline extents are inserted at delalloc time now.  This allows us to compress
the data before inserting the inline extent, and it allows us to insert
an inline extent that spans multiple pages.

* All of the in-memory extent representations (extent_map.c, ordered-data.c etc)
are changed to record both an in-memory size and an on disk size, as well
as a flag for compression.

From a disk format point of view, the extent pointers in the file are changed
to record the on disk size of a given extent and some encoding flags.
Space in the disk format is allocated for compression encoding, as well
as encryption and a generic 'other' field.  Neither the encryption or the
'other' field are currently used.

In order to limit the amount of data read for a single random read in the
file, the size of a compressed extent is limited to 128k.  This is a
software only limit, the disk format supports u64 sized compressed extents.

In order to limit the ram consumed while processing extents, the uncompressed
size of a compressed extent is limited to 256k.  This is a software only limit
and will be subject to tuning later.

Checksumming is still done on compressed extents, and it is done on the
uncompressed version of the data.  This way additional encodings can be
layered on without having to figure out which encoding to checksum.

Compression happens at delalloc time, which is basically singled threaded because
it is usually done by a single pdflush thread.  This makes it tricky to
spread the compression load across all the cpus on the box.  We'll have to
look at parallel pdflush walks of dirty inodes at a later time.

Decompression is hooked into readpages and it does spread across CPUs nicely.
Signed-off-by: NChris Mason <chris.mason@oracle.com>
上级 26ce34a9
......@@ -501,6 +501,8 @@ config BTRFS_FS
tristate "Btrfs filesystem (EXPERIMENTAL) Unstable disk format"
depends on EXPERIMENTAL
select LIBCRC32C
select ZLIB_INFLATE
select ZLIB_DEFLATE
help
Btrfs is a new filesystem with extents, writable snapshotting,
support for multiple devices and many more features.
......
......@@ -7,7 +7,8 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
transaction.o inode.o file.o tree-defrag.o \
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
ref-cache.o export.o tree-log.o acl.o free-space-cache.o
ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \
compression.o
else
# Normal Makefile
......
/*
* Copyright (C) 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/kernel.h>
#include <linux/bio.h>
#include <linux/buffer_head.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/smp_lock.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/bit_spinlock.h>
#include <linux/version.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "volumes.h"
#include "ordered-data.h"
#include "compat.h"
#include "compression.h"
#include "extent_io.h"
#include "extent_map.h"
struct compressed_bio {
/* number of bios pending for this compressed extent */
atomic_t pending_bios;
/* the pages with the compressed data on them */
struct page **compressed_pages;
/* inode that owns this data */
struct inode *inode;
/* starting offset in the inode for our pages */
u64 start;
/* number of bytes in the inode we're working on */
unsigned long len;
/* number of bytes on disk */
unsigned long compressed_len;
/* number of compressed pages in the array */
unsigned long nr_pages;
/* IO errors */
int errors;
/* for reads, this is the bio we are copying the data into */
struct bio *orig_bio;
};
static struct bio *compressed_bio_alloc(struct block_device *bdev,
u64 first_byte, gfp_t gfp_flags)
{
struct bio *bio;
int nr_vecs;
nr_vecs = bio_get_nr_vecs(bdev);
bio = bio_alloc(gfp_flags, nr_vecs);
if (bio == NULL && (current->flags & PF_MEMALLOC)) {
while (!bio && (nr_vecs /= 2))
bio = bio_alloc(gfp_flags, nr_vecs);
}
if (bio) {
bio->bi_size = 0;
bio->bi_bdev = bdev;
bio->bi_sector = first_byte >> 9;
}
return bio;
}
/* when we finish reading compressed pages from the disk, we
* decompress them and then run the bio end_io routines on the
* decompressed pages (in the inode address space).
*
* This allows the checksumming and other IO error handling routines
* to work normally
*
* The compressed pages are freed here, and it must be run
* in process context
*/
static void end_compressed_bio_read(struct bio *bio, int err)
{
struct extent_io_tree *tree;
struct compressed_bio *cb = bio->bi_private;
struct inode *inode;
struct page *page;
unsigned long index;
int ret;
if (err)
cb->errors = 1;
/* if there are more bios still pending for this compressed
* extent, just exit
*/
if (!atomic_dec_and_test(&cb->pending_bios))
goto out;
/* ok, we're the last bio for this extent, lets start
* the decompression.
*/
inode = cb->inode;
tree = &BTRFS_I(inode)->io_tree;
ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
cb->start,
cb->orig_bio->bi_io_vec,
cb->orig_bio->bi_vcnt,
cb->compressed_len);
if (ret)
cb->errors = 1;
/* release the compressed pages */
index = 0;
for (index = 0; index < cb->nr_pages; index++) {
page = cb->compressed_pages[index];
page->mapping = NULL;
page_cache_release(page);
}
/* do io completion on the original bio */
if (cb->errors)
bio_io_error(cb->orig_bio);
else
bio_endio(cb->orig_bio, 0);
/* finally free the cb struct */
kfree(cb->compressed_pages);
kfree(cb);
out:
bio_put(bio);
}
/*
* Clear the writeback bits on all of the file
* pages for a compressed write
*/
static noinline int end_compressed_writeback(struct inode *inode, u64 start,
unsigned long ram_size)
{
unsigned long index = start >> PAGE_CACHE_SHIFT;
unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT;
struct page *pages[16];
unsigned long nr_pages = end_index - index + 1;
int i;
int ret;
while(nr_pages > 0) {
ret = find_get_pages_contig(inode->i_mapping, index,
min(nr_pages, ARRAY_SIZE(pages)), pages);
if (ret == 0) {
nr_pages -= 1;
index += 1;
continue;
}
for (i = 0; i < ret; i++) {
end_page_writeback(pages[i]);
page_cache_release(pages[i]);
}
nr_pages -= ret;
index += ret;
}
/* the inode may be gone now */
return 0;
}
/*
* do the cleanup once all the compressed pages hit the disk.
* This will clear writeback on the file pages and free the compressed
* pages.
*
* This also calls the writeback end hooks for the file pages so that
* metadata and checksums can be updated in the file.
*/
static void end_compressed_bio_write(struct bio *bio, int err)
{
struct extent_io_tree *tree;
struct compressed_bio *cb = bio->bi_private;
struct inode *inode;
struct page *page;
unsigned long index;
if (err)
cb->errors = 1;
/* if there are more bios still pending for this compressed
* extent, just exit
*/
if (!atomic_dec_and_test(&cb->pending_bios))
goto out;
/* ok, we're the last bio for this extent, step one is to
* call back into the FS and do all the end_io operations
*/
inode = cb->inode;
tree = &BTRFS_I(inode)->io_tree;
tree->ops->writepage_end_io_hook(cb->compressed_pages[0],
cb->start,
cb->start + cb->len - 1,
NULL, 1);
end_compressed_writeback(inode, cb->start, cb->len);
/* note, our inode could be gone now */
/*
* release the compressed pages, these came from alloc_page and
* are not attached to the inode at all
*/
index = 0;
for (index = 0; index < cb->nr_pages; index++) {
page = cb->compressed_pages[index];
page->mapping = NULL;
page_cache_release(page);
}
/* finally free the cb struct */
kfree(cb->compressed_pages);
kfree(cb);
out:
bio_put(bio);
}
/*
* worker function to build and submit bios for previously compressed pages.
* The corresponding pages in the inode should be marked for writeback
* and the compressed pages should have a reference on them for dropping
* when the IO is complete.
*
* This also checksums the file bytes and gets things ready for
* the end io hooks.
*/
int btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
struct page **compressed_pages,
unsigned long nr_pages)
{
struct bio *bio = NULL;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct compressed_bio *cb;
unsigned long bytes_left;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
int page_index = 0;
struct page *page;
u64 first_byte = disk_start;
struct block_device *bdev;
int ret;
WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
cb = kmalloc(sizeof(*cb), GFP_NOFS);
atomic_set(&cb->pending_bios, 0);
cb->errors = 0;
cb->inode = inode;
cb->start = start;
cb->len = len;
cb->compressed_pages = compressed_pages;
cb->compressed_len = compressed_len;
cb->orig_bio = NULL;
cb->nr_pages = nr_pages;
bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
ret = btrfs_csum_file_bytes(root, inode, start, len);
BUG_ON(ret);
bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
atomic_inc(&cb->pending_bios);
/* create and submit bios for the compressed pages */
bytes_left = compressed_len;
while(bytes_left > 0) {
page = compressed_pages[page_index];
page->mapping = inode->i_mapping;
if (bio->bi_size)
ret = io_tree->ops->merge_bio_hook(page, 0,
PAGE_CACHE_SIZE,
bio, 0);
else
ret = 0;
if (ret || bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) <
PAGE_CACHE_SIZE) {
bio_get(bio);
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
BUG_ON(ret);
ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
BUG_ON(ret);
bio_put(bio);
bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
atomic_inc(&cb->pending_bios);
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
}
page_index++;
bytes_left -= PAGE_CACHE_SIZE;
first_byte += PAGE_CACHE_SIZE;
}
bio_get(bio);
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
BUG_ON(ret);
ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
BUG_ON(ret);
bio_put(bio);
return 0;
}
/*
* for a compressed read, the bio we get passed has all the inode pages
* in it. We don't actually do IO on those pages but allocate new ones
* to hold the compressed pages on disk.
*
* bio->bi_sector points to the compressed extent on disk
* bio->bi_io_vec points to all of the inode pages
* bio->bi_vcnt is a count of pages
*
* After the compressed pages are read, we copy the bytes into the
* bio we were passed and then call the bio end_io calls
*/
int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags)
{
struct extent_io_tree *tree;
struct extent_map_tree *em_tree;
struct compressed_bio *cb;
struct btrfs_root *root = BTRFS_I(inode)->root;
unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
unsigned long compressed_len;
unsigned long nr_pages;
unsigned long page_index;
struct page *page;
struct block_device *bdev;
struct bio *comp_bio;
u64 cur_disk_byte = (u64)bio->bi_sector << 9;
struct extent_map *em;
int ret;
tree = &BTRFS_I(inode)->io_tree;
em_tree = &BTRFS_I(inode)->extent_tree;
/* we need the actual starting offset of this extent in the file */
spin_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree,
page_offset(bio->bi_io_vec->bv_page),
PAGE_CACHE_SIZE);
spin_unlock(&em_tree->lock);
cb = kmalloc(sizeof(*cb), GFP_NOFS);
atomic_set(&cb->pending_bios, 0);
cb->errors = 0;
cb->inode = inode;
cb->start = em->start;
compressed_len = em->block_len;
free_extent_map(em);
cb->len = uncompressed_len;
cb->compressed_len = compressed_len;
cb->orig_bio = bio;
nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
PAGE_CACHE_SIZE;
cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages,
GFP_NOFS);
bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
for (page_index = 0; page_index < nr_pages; page_index++) {
cb->compressed_pages[page_index] = alloc_page(GFP_NOFS |
__GFP_HIGHMEM);
}
cb->nr_pages = nr_pages;
comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
atomic_inc(&cb->pending_bios);
for (page_index = 0; page_index < nr_pages; page_index++) {
page = cb->compressed_pages[page_index];
page->mapping = inode->i_mapping;
if (comp_bio->bi_size)
ret = tree->ops->merge_bio_hook(page, 0,
PAGE_CACHE_SIZE,
comp_bio, 0);
else
ret = 0;
if (ret || bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0) <
PAGE_CACHE_SIZE) {
bio_get(comp_bio);
ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
BUG_ON(ret);
ret = btrfs_map_bio(root, READ, comp_bio, 0, 0);
BUG_ON(ret);
bio_put(comp_bio);
comp_bio = compressed_bio_alloc(bdev, cur_disk_byte,
GFP_NOFS);
atomic_inc(&cb->pending_bios);
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
}
cur_disk_byte += PAGE_CACHE_SIZE;
}
bio_get(comp_bio);
ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
BUG_ON(ret);
ret = btrfs_map_bio(root, READ, comp_bio, 0, 0);
BUG_ON(ret);
bio_put(comp_bio);
return 0;
}
/*
* Copyright (C) 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_COMPRESSION_
#define __BTRFS_COMPRESSION_
int btrfs_zlib_decompress(unsigned char *data_in,
struct page *dest_page,
unsigned long start_byte,
size_t srclen, size_t destlen);
int btrfs_zlib_compress_pages(struct address_space *mapping,
u64 start, unsigned long len,
struct page **pages,
unsigned long nr_dest_pages,
unsigned long *out_pages,
unsigned long *total_in,
unsigned long *total_out,
unsigned long max_out);
int btrfs_zlib_decompress_biovec(struct page **pages_in,
u64 disk_start,
struct bio_vec *bvec,
int vcnt,
size_t srclen);
void btrfs_zlib_exit(void);
int btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
struct page **compressed_pages,
unsigned long nr_pages);
int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
#endif
......@@ -400,10 +400,18 @@ struct btrfs_timespec {
__le32 nsec;
} __attribute__ ((__packed__));
/*
* there is no padding here on purpose. If you want to extent the inode,
* make a new item type
*/
typedef enum {
BTRFS_COMPRESS_NONE = 0,
BTRFS_COMPRESS_ZLIB = 1,
BTRFS_COMPRESS_LAST = 2,
} btrfs_compression_type;
/* we don't understand any encryption methods right now */
typedef enum {
BTRFS_ENCRYPTION_NONE = 0,
BTRFS_ENCRYPTION_LAST = 1,
} btrfs_encryption_type;
struct btrfs_inode_item {
/* nfs style generation number */
__le64 generation;
......@@ -419,6 +427,7 @@ struct btrfs_inode_item {
__le64 rdev;
__le16 flags;
__le16 compat_flags;
struct btrfs_timespec atime;
struct btrfs_timespec ctime;
struct btrfs_timespec mtime;
......@@ -454,8 +463,33 @@ struct btrfs_root_item {
#define BTRFS_FILE_EXTENT_INLINE 1
struct btrfs_file_extent_item {
/*
* transaction id that created this extent
*/
__le64 generation;
/*
* max number of bytes to hold this extent in ram
* when we split a compressed extent we can't know how big
* each of the resulting pieces will be. So, this is
* an upper limit on the size of the extent in ram instead of
* an exact limit.
*/
__le64 ram_bytes;
/*
* 32 bits for the various ways we might encode the data,
* including compression and encryption. If any of these
* are set to something a given disk format doesn't understand
* it is treated like an incompat flag for reading and writing,
* but not for stat.
*/
u8 compression;
u8 encryption;
__le16 other_encoding; /* spare for later use */
/* are we inline data or a real extent? */
u8 type;
/*
* disk space consumed by the extent, checksum blocks are included
* in these numbers
......@@ -471,9 +505,11 @@ struct btrfs_file_extent_item {
*/
__le64 offset;
/*
* the logical number of file blocks (no csums included)
* the logical number of file blocks (no csums included). This
* always reflects the size uncompressed and without encoding.
*/
__le64 num_bytes;
} __attribute__ ((__packed__));
struct btrfs_csum_item {
......@@ -814,6 +850,7 @@ struct btrfs_root {
#define BTRFS_MOUNT_NOBARRIER (1 << 2)
#define BTRFS_MOUNT_SSD (1 << 3)
#define BTRFS_MOUNT_DEGRADED (1 << 4)
#define BTRFS_MOUNT_COMPRESS (1 << 5)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
......@@ -825,6 +862,7 @@ struct btrfs_root {
#define BTRFS_INODE_NODATASUM (1 << 0)
#define BTRFS_INODE_NODATACOW (1 << 1)
#define BTRFS_INODE_READONLY (1 << 2)
#define BTRFS_INODE_NOCOMPRESS (1 << 3)
#define btrfs_clear_flag(inode, flag) (BTRFS_I(inode)->flags &= \
~BTRFS_INODE_##flag)
#define btrfs_set_flag(inode, flag) (BTRFS_I(inode)->flags |= \
......@@ -1424,14 +1462,6 @@ static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize)
return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize;
}
static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
struct btrfs_item *e)
{
unsigned long offset;
offset = offsetof(struct btrfs_file_extent_item, disk_bytenr);
return btrfs_item_size(eb, e) - offset;
}
BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item,
disk_bytenr, 64);
BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item,
......@@ -1442,6 +1472,36 @@ BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item,
offset, 64);
BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item,
num_bytes, 64);
BTRFS_SETGET_FUNCS(file_extent_ram_bytes, struct btrfs_file_extent_item,
ram_bytes, 64);
BTRFS_SETGET_FUNCS(file_extent_compression, struct btrfs_file_extent_item,
compression, 8);
BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item,
encryption, 8);
BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item,
other_encoding, 16);
/* this returns the number of file bytes represented by the inline item.
* If an item is compressed, this is the uncompressed size
*/
static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
struct btrfs_file_extent_item *e)
{
return btrfs_file_extent_ram_bytes(eb, e);
}
/*
* this returns the number of bytes used by the item on disk, minus the
* size of any extent headers. If a file is compressed on disk, this is
* the compressed size
*/
static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
struct btrfs_item *e)
{
unsigned long offset;
offset = offsetof(struct btrfs_file_extent_item, disk_bytenr);
return btrfs_item_size(eb, e) - offset;
}
static inline struct btrfs_root *btrfs_sb(struct super_block *sb)
{
......@@ -1745,10 +1805,11 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
struct bio *bio);
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 objectid, u64 pos, u64 disk_offset,
u64 disk_num_bytes,
u64 num_bytes, u64 offset);
struct btrfs_root *root,
u64 objectid, u64 pos,
u64 disk_offset, u64 disk_num_bytes,
u64 num_bytes, u64 offset, u64 ram_bytes,
u8 compression, u8 encryption, u16 other_encoding);
int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 objectid,
......@@ -1758,6 +1819,8 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum *sums);
int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
struct bio *bio);
int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode,
u64 start, unsigned long len);
struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
......@@ -1799,7 +1862,7 @@ void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name,
int namelen);
int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
size_t size, struct bio *bio);
size_t size, struct bio *bio, unsigned long bio_flags);
unsigned long btrfs_force_ra(struct address_space *mapping,
struct file_ra_state *ra, struct file *file,
......
......@@ -83,6 +83,7 @@ struct async_submit_bio {
extent_submit_bio_hook_t *submit_bio_hook;
int rw;
int mirror_num;
unsigned long bio_flags;
struct btrfs_work work;
};
......@@ -115,6 +116,7 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
}
em->start = 0;
em->len = (u64)-1;
em->block_len = (u64)-1;
em->block_start = 0;
em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
......@@ -469,12 +471,13 @@ static void run_one_async_submit(struct btrfs_work *work)
wake_up(&fs_info->async_submit_wait);
async->submit_bio_hook(async->inode, async->rw, async->bio,
async->mirror_num);
async->mirror_num, async->bio_flags);
kfree(async);
}
int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
int rw, struct bio *bio, int mirror_num,
unsigned long bio_flags,
extent_submit_bio_hook_t *submit_bio_hook)
{
struct async_submit_bio *async;
......@@ -491,6 +494,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
async->submit_bio_hook = submit_bio_hook;
async->work.func = run_one_async_submit;
async->work.flags = 0;
async->bio_flags = bio_flags;
while(atomic_read(&fs_info->async_submit_draining) &&
atomic_read(&fs_info->nr_async_submits)) {
......@@ -530,7 +534,7 @@ static int btree_csum_one_bio(struct bio *bio)
}
static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
int mirror_num)
int mirror_num, unsigned long bio_flags)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
......@@ -556,17 +560,17 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
}
static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
int mirror_num)
int mirror_num, unsigned long bio_flags)
{
/*
* kthread helpers are used to submit writes so that checksumming
* can happen in parallel across all CPUs
*/
if (!(rw & (1 << BIO_RW))) {
return __btree_submit_bio_hook(inode, rw, bio, mirror_num);
return __btree_submit_bio_hook(inode, rw, bio, mirror_num, 0);
}
return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
inode, rw, bio, mirror_num,
inode, rw, bio, mirror_num, 0,
__btree_submit_bio_hook);
}
......@@ -1407,6 +1411,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->btree_inode = new_inode(sb);
fs_info->btree_inode->i_ino = 1;
fs_info->btree_inode->i_nlink = 1;
fs_info->thread_pool_size = min(num_online_cpus() + 2, 8);
INIT_LIST_HEAD(&fs_info->ordered_extents);
......@@ -1508,6 +1513,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
*/
btrfs_init_workers(&fs_info->workers, "worker",
fs_info->thread_pool_size);
btrfs_init_workers(&fs_info->submit_workers, "submit",
min_t(u64, fs_devices->num_devices,
fs_info->thread_pool_size));
......@@ -1559,6 +1565,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
}
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
4 * 1024 * 1024 / PAGE_CACHE_SIZE);
nodesize = btrfs_super_nodesize(disk_super);
leafsize = btrfs_super_leafsize(disk_super);
......
......@@ -71,6 +71,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
int metadata);
int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
int rw, struct bio *bio, int mirror_num,
unsigned long bio_flags,
extent_submit_bio_hook_t *submit_bio_hook);
int btrfs_congested_async(struct btrfs_fs_info *info, int iodone);
unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
......
......@@ -3278,6 +3278,7 @@ static int noinline relocate_data_extent(struct inode *reloc_inode,
em->start = extent_key->objectid - offset;
em->len = extent_key->offset;
em->block_len = extent_key->offset;
em->block_start = extent_key->objectid;
em->bdev = root->fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
......@@ -3314,10 +3315,14 @@ struct btrfs_ref_path {
};
struct disk_extent {
u64 ram_bytes;
u64 disk_bytenr;
u64 disk_num_bytes;
u64 offset;
u64 num_bytes;
u8 compression;
u8 encryption;
u16 other_encoding;
};
static int is_cowonly_root(u64 root_objectid)
......@@ -3631,6 +3636,11 @@ static int noinline get_new_locations(struct inode *reloc_inode,
btrfs_file_extent_disk_num_bytes(leaf, fi);
exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
exts[nr].compression = btrfs_file_extent_compression(leaf, fi);
exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
fi);
WARN_ON(exts[nr].offset > 0);
WARN_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
......@@ -3846,6 +3856,8 @@ static int noinline replace_one_extent(struct btrfs_trans_handle *trans,
new_extents[0].disk_bytenr);
btrfs_set_file_extent_disk_num_bytes(leaf, fi,
new_extents[0].disk_num_bytes);
btrfs_set_file_extent_ram_bytes(leaf, fi,
new_extents[0].ram_bytes);
ext_offset += new_extents[0].offset;
btrfs_set_file_extent_offset(leaf, fi, ext_offset);
btrfs_mark_buffer_dirty(leaf);
......@@ -3911,6 +3923,16 @@ static int noinline replace_one_extent(struct btrfs_trans_handle *trans,
new_extents[i].disk_bytenr);
btrfs_set_file_extent_disk_num_bytes(leaf, fi,
new_extents[i].disk_num_bytes);
btrfs_set_file_extent_ram_bytes(leaf, fi,
new_extents[i].ram_bytes);
btrfs_set_file_extent_compression(leaf, fi,
new_extents[i].compression);
btrfs_set_file_extent_encryption(leaf, fi,
new_extents[i].encryption);
btrfs_set_file_extent_other_encoding(leaf, fi,
new_extents[i].other_encoding);
btrfs_set_file_extent_num_bytes(leaf, fi,
extent_len);
ext_offset += new_extents[i].offset;
......@@ -4169,6 +4191,8 @@ static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans,
ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
btrfs_set_file_extent_generation(leaf, fi, trans->transid);
btrfs_set_file_extent_ram_bytes(leaf, fi,
new_extent->ram_bytes);
btrfs_set_file_extent_disk_bytenr(leaf, fi,
new_extent->disk_bytenr);
btrfs_set_file_extent_disk_num_bytes(leaf, fi,
......@@ -4847,7 +4871,8 @@ static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info,
BUG_ON(err);
err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
group->key.offset, 0);
group->key.offset, 0, group->key.offset,
0, 0, 0);
BUG_ON(err);
inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
......
此差异已折叠。
......@@ -18,6 +18,9 @@
#define EXTENT_BOUNDARY (1 << 11)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
/* flags for bio submission */
#define EXTENT_BIO_COMPRESSED 1
/*
* page->private values. Every page that is controlled by the extent
* map has page->private set to one.
......@@ -28,14 +31,17 @@
struct extent_state;
typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
struct bio *bio, int mirror_num);
struct bio *bio, int mirror_num,
unsigned long bio_flags);
struct extent_io_ops {
int (*fill_delalloc)(struct inode *inode, u64 start, u64 end);
int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
u64 start, u64 end, int *page_started);
int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
extent_submit_bio_hook_t *submit_bio_hook;
int (*merge_bio_hook)(struct page *page, unsigned long offset,
size_t size, struct bio *bio);
size_t size, struct bio *bio,
unsigned long bio_flags);
int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
int (*readpage_io_failed_hook)(struct bio *bio, struct page *page,
u64 start, u64 end,
......@@ -245,4 +251,9 @@ void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
int release_extent_buffer_tail_pages(struct extent_buffer *eb);
int extent_range_uptodate(struct extent_io_tree *tree,
u64 start, u64 end);
int extent_clear_unlock_delalloc(struct inode *inode,
struct extent_io_tree *tree,
u64 start, u64 end, struct page *locked_page,
int clear_dirty, int set_writeback,
int clear_writeback);
#endif
......@@ -184,6 +184,13 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
return 0;
/*
* don't merge compressed extents, we need to know their
* actual size
*/
if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
return 0;
if (extent_map_end(prev) == next->start &&
prev->flags == next->flags &&
prev->bdev == next->bdev &&
......@@ -239,6 +246,7 @@ int add_extent_mapping(struct extent_map_tree *tree,
if (rb && mergable_maps(merge, em)) {
em->start = merge->start;
em->len += merge->len;
em->block_len += merge->block_len;
em->block_start = merge->block_start;
merge->in_tree = 0;
rb_erase(&merge->rb_node, &tree->map);
......@@ -250,6 +258,7 @@ int add_extent_mapping(struct extent_map_tree *tree,
merge = rb_entry(rb, struct extent_map, rb_node);
if (rb && mergable_maps(em, merge)) {
em->len += merge->len;
em->block_len += merge->len;
rb_erase(&merge->rb_node, &tree->map);
merge->in_tree = 0;
free_extent_map(merge);
......
......@@ -10,6 +10,7 @@
/* bits for the flags field */
#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */
#define EXTENT_FLAG_COMPRESSED 1
struct extent_map {
struct rb_node rb_node;
......@@ -18,6 +19,7 @@ struct extent_map {
u64 start;
u64 len;
u64 block_start;
u64 block_len;
unsigned long flags;
struct block_device *bdev;
atomic_t refs;
......@@ -38,9 +40,9 @@ static inline u64 extent_map_end(struct extent_map *em)
static inline u64 extent_map_block_end(struct extent_map *em)
{
if (em->block_start + em->len < em->block_start)
if (em->block_start + em->block_len < em->block_start)
return (u64)-1;
return em->block_start + em->len;
return em->block_start + em->block_len;
}
void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask);
......
......@@ -31,7 +31,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 objectid, u64 pos,
u64 disk_offset, u64 disk_num_bytes,
u64 num_bytes, u64 offset)
u64 num_bytes, u64 offset, u64 ram_bytes,
u8 compression, u8 encryption, u16 other_encoding)
{
int ret = 0;
struct btrfs_file_extent_item *item;
......@@ -57,8 +58,13 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes);
btrfs_set_file_extent_offset(leaf, item, offset);
btrfs_set_file_extent_num_bytes(leaf, item, num_bytes);
btrfs_set_file_extent_ram_bytes(leaf, item, ram_bytes);
btrfs_set_file_extent_generation(leaf, item, trans->transid);
btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
btrfs_set_file_extent_compression(leaf, item, compression);
btrfs_set_file_extent_encryption(leaf, item, encryption);
btrfs_set_file_extent_other_encoding(leaf, item, other_encoding);
btrfs_mark_buffer_dirty(leaf);
out:
btrfs_free_path(path);
......@@ -213,6 +219,73 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
return 0;
}
int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode,
u64 start, unsigned long len)
{
struct btrfs_ordered_sum *sums;
struct btrfs_sector_sum *sector_sum;
struct btrfs_ordered_extent *ordered;
char *data;
struct page *page;
unsigned long total_bytes = 0;
unsigned long this_sum_bytes = 0;
sums = kzalloc(btrfs_ordered_sum_size(root, len), GFP_NOFS);
if (!sums)
return -ENOMEM;
sector_sum = sums->sums;
sums->file_offset = start;
sums->len = len;
INIT_LIST_HEAD(&sums->list);
ordered = btrfs_lookup_ordered_extent(inode, sums->file_offset);
BUG_ON(!ordered);
while(len > 0) {
if (start >= ordered->file_offset + ordered->len ||
start < ordered->file_offset) {
sums->len = this_sum_bytes;
this_sum_bytes = 0;
btrfs_add_ordered_sum(inode, ordered, sums);
btrfs_put_ordered_extent(ordered);
sums = kzalloc(btrfs_ordered_sum_size(root, len),
GFP_NOFS);
BUG_ON(!sums);
sector_sum = sums->sums;
sums->len = len;
sums->file_offset = start;
ordered = btrfs_lookup_ordered_extent(inode,
sums->file_offset);
BUG_ON(!ordered);
}
page = find_get_page(inode->i_mapping,
start >> PAGE_CACHE_SHIFT);
data = kmap_atomic(page, KM_USER0);
sector_sum->sum = ~(u32)0;
sector_sum->sum = btrfs_csum_data(root, data, sector_sum->sum,
PAGE_CACHE_SIZE);
kunmap_atomic(data, KM_USER0);
btrfs_csum_final(sector_sum->sum,
(char *)&sector_sum->sum);
sector_sum->offset = page_offset(page);
page_cache_release(page);
sector_sum++;
total_bytes += PAGE_CACHE_SIZE;
this_sum_bytes += PAGE_CACHE_SIZE;
start += PAGE_CACHE_SIZE;
WARN_ON(len < PAGE_CACHE_SIZE);
len -= PAGE_CACHE_SIZE;
}
btrfs_add_ordered_sum(inode, ordered, sums);
btrfs_put_ordered_extent(ordered);
return 0;
}
int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
struct bio *bio)
{
......
......@@ -95,153 +95,6 @@ static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages)
}
}
/* this does all the hard work for inserting an inline extent into
* the btree. Any existing inline extent is extended as required to make room,
* otherwise things are inserted as required into the btree
*/
static int noinline insert_inline_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
u64 offset, size_t size,
struct page **pages, size_t page_offset,
int num_pages)
{
struct btrfs_key key;
struct btrfs_path *path;
struct extent_buffer *leaf;
char *kaddr;
unsigned long ptr;
struct btrfs_file_extent_item *ei;
struct page *page;
u32 datasize;
int err = 0;
int ret;
int i;
ssize_t cur_size;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
btrfs_set_trans_block_group(trans, inode);
key.objectid = inode->i_ino;
key.offset = offset;
btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret < 0) {
err = ret;
goto fail;
}
if (ret == 1) {
struct btrfs_key found_key;
if (path->slots[0] == 0)
goto insert;
path->slots[0]--;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
if (found_key.objectid != inode->i_ino)
goto insert;
if (found_key.type != BTRFS_EXTENT_DATA_KEY)
goto insert;
ei = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, ei) !=
BTRFS_FILE_EXTENT_INLINE) {
goto insert;
}
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
ret = 0;
}
if (ret == 0) {
u32 found_size;
u64 found_end;
leaf = path->nodes[0];
ei = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, ei) !=
BTRFS_FILE_EXTENT_INLINE) {
err = ret;
btrfs_print_leaf(root, leaf);
printk("found wasn't inline offset %Lu inode %lu\n",
offset, inode->i_ino);
goto fail;
}
found_size = btrfs_file_extent_inline_len(leaf,
btrfs_item_nr(leaf, path->slots[0]));
found_end = key.offset + found_size;
if (found_end < offset + size) {
btrfs_release_path(root, path);
ret = btrfs_search_slot(trans, root, &key, path,
offset + size - found_end, 1);
BUG_ON(ret != 0);
ret = btrfs_extend_item(trans, root, path,
offset + size - found_end);
if (ret) {
err = ret;
goto fail;
}
leaf = path->nodes[0];
ei = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
inode_add_bytes(inode, offset + size - found_end);
}
if (found_end < offset) {
ptr = btrfs_file_extent_inline_start(ei) + found_size;
memset_extent_buffer(leaf, 0, ptr, offset - found_end);
}
} else {
insert:
btrfs_release_path(root, path);
datasize = offset + size - key.offset;
inode_add_bytes(inode, datasize);
datasize = btrfs_file_extent_calc_inline_size(datasize);
ret = btrfs_insert_empty_item(trans, root, path, &key,
datasize);
if (ret) {
err = ret;
printk("got bad ret %d\n", ret);
goto fail;
}
leaf = path->nodes[0];
ei = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
btrfs_set_file_extent_generation(leaf, ei, trans->transid);
btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
}
ptr = btrfs_file_extent_inline_start(ei) + offset - key.offset;
cur_size = size;
i = 0;
while (size > 0) {
page = pages[i];
kaddr = kmap_atomic(page, KM_USER0);
cur_size = min_t(size_t, PAGE_CACHE_SIZE - page_offset, size);
write_extent_buffer(leaf, kaddr + page_offset, ptr, cur_size);
kunmap_atomic(kaddr, KM_USER0);
page_offset = 0;
ptr += cur_size;
size -= cur_size;
if (i >= num_pages) {
printk("i %d num_pages %d\n", i, num_pages);
}
i++;
}
btrfs_mark_buffer_dirty(leaf);
fail:
btrfs_free_path(path);
return err;
}
/*
* after copy_from_user, pages need to be dirtied and we need to make
* sure holes are created between the current EOF and the start of
......@@ -267,8 +120,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
u64 start_pos;
u64 end_of_last_block;
u64 end_pos = pos + write_bytes;
u64 inline_size;
int did_inline = 0;
loff_t isize = i_size_read(inode);
start_pos = pos & ~((u64)root->sectorsize - 1);
......@@ -314,7 +165,8 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
err = btrfs_insert_file_extent(trans, root,
inode->i_ino,
last_pos_in_file,
0, 0, hole_size, 0);
0, 0, hole_size, 0,
hole_size, 0, 0, 0);
btrfs_drop_extent_cache(inode, last_pos_in_file,
last_pos_in_file + hole_size - 1, 0);
mutex_unlock(&BTRFS_I(inode)->extent_mutex);
......@@ -324,57 +176,19 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
goto failed;
}
/*
* either allocate an extent for the new bytes or setup the key
* to show we are doing inline data in the extent
/* check for reserved extents on each page, we don't want
* to reset the delalloc bit on things that already have
* extents reserved.
*/
inline_size = end_pos;
if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
inline_size > root->fs_info->max_inline ||
(inline_size & (root->sectorsize -1)) == 0 ||
inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
/* check for reserved extents on each page, we don't want
* to reset the delalloc bit on things that already have
* extents reserved.
*/
btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
for (i = 0; i < num_pages; i++) {
struct page *p = pages[i];
SetPageUptodate(p);
ClearPageChecked(p);
set_page_dirty(p);
}
} else {
u64 aligned_end;
/* step one, delete the existing extents in this range */
aligned_end = (pos + write_bytes + root->sectorsize - 1) &
~((u64)root->sectorsize - 1);
mutex_lock(&BTRFS_I(inode)->extent_mutex);
err = btrfs_drop_extents(trans, root, inode, start_pos,
aligned_end, aligned_end, &hint_byte);
if (err)
goto failed;
if (isize > inline_size)
inline_size = min_t(u64, isize, aligned_end);
inline_size -= start_pos;
err = insert_inline_extent(trans, root, inode, start_pos,
inline_size, pages, 0, num_pages);
btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1, 0);
BUG_ON(err);
mutex_unlock(&BTRFS_I(inode)->extent_mutex);
/*
* an ugly way to do all the prop accounting around
* the page bits and mapping tags
*/
set_page_writeback(pages[0]);
end_page_writeback(pages[0]);
did_inline = 1;
btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
for (i = 0; i < num_pages; i++) {
struct page *p = pages[i];
SetPageUptodate(p);
ClearPageChecked(p);
set_page_dirty(p);
}
if (end_pos > isize) {
i_size_write(inode, end_pos);
if (did_inline)
BTRFS_I(inode)->disk_i_size = end_pos;
btrfs_update_inode(trans, root, inode);
}
failed:
......@@ -399,6 +213,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
int ret;
int testend = 1;
unsigned long flags;
int compressed = 0;
WARN_ON(end < start);
if (end == (u64)-1) {
......@@ -434,6 +249,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
free_extent_map(em);
continue;
}
compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
remove_extent_mapping(em_tree, em);
......@@ -442,6 +258,12 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split->start = em->start;
split->len = start - em->start;
split->block_start = em->block_start;
if (compressed)
split->block_len = em->block_len;
else
split->block_len = split->len;
split->bdev = em->bdev;
split->flags = flags;
ret = add_extent_mapping(em_tree, split);
......@@ -459,7 +281,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split->bdev = em->bdev;
split->flags = flags;
split->block_start = em->block_start + diff;
if (compressed) {
split->block_len = em->block_len;
split->block_start = em->block_start;
} else {
split->block_len = split->len;
split->block_start = em->block_start + diff;
}
ret = add_extent_mapping(em_tree, split);
BUG_ON(ret);
......@@ -533,7 +361,7 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
struct btrfs_item *item;
item = btrfs_item_nr(leaf, slot);
extent_end = found_key.offset +
btrfs_file_extent_inline_len(leaf, item);
btrfs_file_extent_inline_len(leaf, extent);
extent_end = (extent_end + root->sectorsize - 1) &
~((u64)root->sectorsize -1 );
}
......@@ -573,6 +401,10 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
u64 extent_end = 0;
u64 search_start = start;
u64 leaf_start;
u64 ram_bytes = 0;
u8 compression = 0;
u8 encryption = 0;
u16 other_encoding = 0;
u64 root_gen;
u64 root_owner;
struct extent_buffer *leaf;
......@@ -589,6 +421,7 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
int recow;
int ret;
inline_limit = 0;
btrfs_drop_extent_cache(inode, start, end - 1, 0);
path = btrfs_alloc_path();
......@@ -637,6 +470,12 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
extent = btrfs_item_ptr(leaf, slot,
struct btrfs_file_extent_item);
found_type = btrfs_file_extent_type(leaf, extent);
compression = btrfs_file_extent_compression(leaf,
extent);
encryption = btrfs_file_extent_encryption(leaf,
extent);
other_encoding = btrfs_file_extent_other_encoding(leaf,
extent);
if (found_type == BTRFS_FILE_EXTENT_REG) {
extent_end =
btrfs_file_extent_disk_bytenr(leaf,
......@@ -646,13 +485,13 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
extent_end = key.offset +
btrfs_file_extent_num_bytes(leaf, extent);
ram_bytes = btrfs_file_extent_ram_bytes(leaf,
extent);
found_extent = 1;
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
struct btrfs_item *item;
item = btrfs_item_nr(leaf, slot);
found_inline = 1;
extent_end = key.offset +
btrfs_file_extent_inline_len(leaf, item);
btrfs_file_extent_inline_len(leaf, extent);
}
} else {
extent_end = search_start;
......@@ -680,10 +519,9 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
search_start = (extent_end + mask) & ~mask;
} else
search_start = extent_end;
if (end <= extent_end && start >= key.offset && found_inline) {
if (end <= extent_end && start >= key.offset && found_inline)
*hint_byte = EXTENT_MAP_INLINE;
goto out;
}
if (found_extent) {
read_extent_buffer(leaf, &old, (unsigned long)extent,
......@@ -770,12 +608,27 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
write_extent_buffer(leaf, &old,
(unsigned long)extent, sizeof(old));
btrfs_set_file_extent_compression(leaf, extent,
compression);
btrfs_set_file_extent_encryption(leaf, extent,
encryption);
btrfs_set_file_extent_other_encoding(leaf, extent,
other_encoding);
btrfs_set_file_extent_offset(leaf, extent,
le64_to_cpu(old.offset) + end - key.offset);
WARN_ON(le64_to_cpu(old.num_bytes) <
(extent_end - end));
btrfs_set_file_extent_num_bytes(leaf, extent,
extent_end - end);
/*
* set the ram bytes to the size of the full extent
* before splitting. This is a worst case flag,
* but its the best we can do because we don't know
* how splitting affects compression
*/
btrfs_set_file_extent_ram_bytes(leaf, extent,
ram_bytes);
btrfs_set_file_extent_type(leaf, extent,
BTRFS_FILE_EXTENT_REG);
......
此差异已折叠。
......@@ -165,7 +165,8 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
* inserted.
*/
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, int nocow)
u64 start, u64 len, u64 disk_len, int nocow,
int compressed)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
......@@ -180,9 +181,12 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
entry->file_offset = file_offset;
entry->start = start;
entry->len = len;
entry->disk_len = disk_len;
entry->inode = inode;
if (nocow)
set_bit(BTRFS_ORDERED_NOCOW, &entry->flags);
if (compressed)
set_bit(BTRFS_ORDERED_COMPRESSED, &entry->flags);
/* one ref for the tree */
atomic_set(&entry->refs, 1);
......@@ -389,9 +393,10 @@ void btrfs_start_ordered_extent(struct inode *inode,
* for pdflush to find them
*/
btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_NONE);
if (wait)
if (wait) {
wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
&entry->flags));
}
}
/*
......
......@@ -66,6 +66,8 @@ struct btrfs_ordered_sum {
#define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */
#define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */
struct btrfs_ordered_extent {
/* logical offset in the file */
u64 file_offset;
......@@ -73,9 +75,12 @@ struct btrfs_ordered_extent {
/* disk byte number */
u64 start;
/* length of the extent in bytes */
/* ram length of the extent in bytes */
u64 len;
/* extent length on disk */
u64 disk_len;
/* flags (described above) */
unsigned long flags;
......@@ -127,7 +132,8 @@ int btrfs_remove_ordered_extent(struct inode *inode,
int btrfs_dec_test_ordered_pending(struct inode *inode,
u64 file_offset, u64 io_size);
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, int nocow);
u64 start, u64 len, u64 disk_len, int nocow,
int compressed);
int btrfs_add_ordered_sum(struct inode *inode,
struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum);
......
......@@ -115,15 +115,16 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
if (btrfs_file_extent_type(l, fi) ==
BTRFS_FILE_EXTENT_INLINE) {
printk("\t\tinline extent data size %u\n",
btrfs_file_extent_inline_len(l, item));
btrfs_file_extent_inline_len(l, fi));
break;
}
printk("\t\textent data disk bytenr %llu nr %llu\n",
(unsigned long long)btrfs_file_extent_disk_bytenr(l, fi),
(unsigned long long)btrfs_file_extent_disk_num_bytes(l, fi));
printk("\t\textent data offset %llu nr %llu\n",
printk("\t\textent data offset %llu nr %llu ram %llu\n",
(unsigned long long)btrfs_file_extent_offset(l, fi),
(unsigned long long)btrfs_file_extent_num_bytes(l, fi));
(unsigned long long)btrfs_file_extent_num_bytes(l, fi),
(unsigned long long)btrfs_file_extent_ram_bytes(l, fi));
break;
case BTRFS_BLOCK_GROUP_ITEM_KEY:
bi = btrfs_item_ptr(l, i,
......
......@@ -47,6 +47,7 @@
#include "volumes.h"
#include "version.h"
#include "export.h"
#include "compression.h"
#define BTRFS_SUPER_MAGIC 0x9123683E
......@@ -69,7 +70,7 @@ static void btrfs_put_super (struct super_block * sb)
enum {
Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_err,
Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_err,
};
static match_table_t tokens = {
......@@ -83,6 +84,7 @@ static match_table_t tokens = {
{Opt_max_inline, "max_inline=%s"},
{Opt_alloc_start, "alloc_start=%s"},
{Opt_thread_pool, "thread_pool=%d"},
{Opt_compress, "compress"},
{Opt_ssd, "ssd"},
{Opt_noacl, "noacl"},
{Opt_err, NULL},
......@@ -163,6 +165,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
btrfs_set_opt(info->mount_opt, NODATACOW);
btrfs_set_opt(info->mount_opt, NODATASUM);
break;
case Opt_compress:
printk(KERN_INFO "btrfs: use compression\n");
btrfs_set_opt(info->mount_opt, COMPRESS);
break;
case Opt_ssd:
printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
btrfs_set_opt(info->mount_opt, SSD);
......@@ -622,6 +628,7 @@ static int __init init_btrfs_fs(void)
err = btrfs_interface_init();
if (err)
goto free_extent_map;
err = register_filesystem(&btrfs_fs_type);
if (err)
goto unregister_ioctl;
......@@ -651,6 +658,7 @@ static void __exit exit_btrfs_fs(void)
unregister_filesystem(&btrfs_fs_type);
btrfs_exit_sysfs();
btrfs_cleanup_fs_uuids();
btrfs_zlib_exit();
}
module_init(init_btrfs_fs)
......
......@@ -540,8 +540,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
if (found_type == BTRFS_FILE_EXTENT_REG)
extent_end = start + btrfs_file_extent_num_bytes(eb, item);
else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
size = btrfs_file_extent_inline_len(eb,
btrfs_item_nr(eb, slot));
size = btrfs_file_extent_inline_len(eb, item);
extent_end = (start + size + mask) & ~mask;
} else {
ret = 0;
......
......@@ -1816,6 +1816,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
em->start = key.offset;
em->len = *num_bytes;
em->block_start = 0;
em->block_len = em->len;
if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
ret = btrfs_add_system_chunk(trans, chunk_root, &key,
......@@ -2323,6 +2324,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
em->start = logical;
em->len = length;
em->block_start = 0;
em->block_len = em->len;
map->num_stripes = num_stripes;
map->io_width = btrfs_chunk_io_width(leaf, chunk);
......
/*
* Copyright (C) 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*
* Based on jffs2 zlib code:
* Copyright © 2001-2007 Red Hat, Inc.
* Created by David Woodhouse <dwmw2@infradead.org>
*/
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/zlib.h>
#include <linux/zutil.h>
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/err.h>
#include <linux/sched.h>
#include <linux/pagemap.h>
#include <linux/bio.h>
/* Plan: call deflate() with avail_in == *sourcelen,
avail_out = *dstlen - 12 and flush == Z_FINISH.
If it doesn't manage to finish, call it again with
avail_in == 0 and avail_out set to the remaining 12
bytes for it to clean up.
Q: Is 12 bytes sufficient?
*/
#define STREAM_END_SPACE 12
struct workspace {
z_stream inf_strm;
z_stream def_strm;
char *buf;
struct list_head list;
};
static LIST_HEAD(idle_workspace);
static DEFINE_SPINLOCK(workspace_lock);
static unsigned long num_workspace;
static atomic_t alloc_workspace = ATOMIC_INIT(0);
static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);
/*
* this finds an available zlib workspace or allocates a new one
* NULL or an ERR_PTR is returned if things go bad.
*/
static struct workspace *find_zlib_workspace(void)
{
struct workspace *workspace;
int ret;
int cpus = num_online_cpus();
again:
spin_lock(&workspace_lock);
if (!list_empty(&idle_workspace)) {
workspace = list_entry(idle_workspace.next, struct workspace,
list);
list_del(&workspace->list);
num_workspace--;
spin_unlock(&workspace_lock);
return workspace;
}
spin_unlock(&workspace_lock);
if (atomic_read(&alloc_workspace) > cpus) {
DEFINE_WAIT(wait);
prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
if (atomic_read(&alloc_workspace) > cpus)
schedule();
finish_wait(&workspace_wait, &wait);
goto again;
}
atomic_inc(&alloc_workspace);
workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
if (!workspace) {
ret = -ENOMEM;
goto fail;
}
workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
if (!workspace->def_strm.workspace) {
ret = -ENOMEM;
goto fail;
}
workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
if (!workspace->inf_strm.workspace) {
ret = -ENOMEM;
goto fail_inflate;
}
workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
if (!workspace->buf) {
ret = -ENOMEM;
goto fail_kmalloc;
}
return workspace;
fail_kmalloc:
vfree(workspace->inf_strm.workspace);
fail_inflate:
vfree(workspace->def_strm.workspace);
fail:
kfree(workspace);
atomic_dec(&alloc_workspace);
wake_up(&workspace_wait);
return ERR_PTR(ret);
}
/*
* put a workspace struct back on the list or free it if we have enough
* idle ones sitting around
*/
static int free_workspace(struct workspace *workspace)
{
spin_lock(&workspace_lock);
if (num_workspace < num_online_cpus()) {
list_add_tail(&workspace->list, &idle_workspace);
num_workspace++;
spin_unlock(&workspace_lock);
if (waitqueue_active(&workspace_wait))
wake_up(&workspace_wait);
return 0;
}
spin_unlock(&workspace_lock);
vfree(workspace->def_strm.workspace);
vfree(workspace->inf_strm.workspace);
kfree(workspace->buf);
kfree(workspace);
atomic_dec(&alloc_workspace);
if (waitqueue_active(&workspace_wait))
wake_up(&workspace_wait);
return 0;
}
/*
* cleanup function for module exit
*/
static void free_workspaces(void)
{
struct workspace *workspace;
while(!list_empty(&idle_workspace)) {
workspace = list_entry(idle_workspace.next, struct workspace,
list);
list_del(&workspace->list);
vfree(workspace->def_strm.workspace);
vfree(workspace->inf_strm.workspace);
kfree(workspace->buf);
kfree(workspace);
atomic_dec(&alloc_workspace);
}
}
/*
* given an address space and start/len, compress the bytes.
*
* pages are allocated to hold the compressed result and stored
* in 'pages'
*
* out_pages is used to return the number of pages allocated. There
* may be pages allocated even if we return an error
*
* total_in is used to return the number of bytes actually read. It
* may be smaller then len if we had to exit early because we
* ran out of room in the pages array or because we cross the
* max_out threshold.
*
* total_out is used to return the total number of compressed bytes
*
* max_out tells us the max number of bytes that we're allowed to
* stuff into pages
*/
int btrfs_zlib_compress_pages(struct address_space *mapping,
u64 start, unsigned long len,
struct page **pages,
unsigned long nr_dest_pages,
unsigned long *out_pages,
unsigned long *total_in,
unsigned long *total_out,
unsigned long max_out)
{
int ret;
struct workspace *workspace;
char *data_in;
char *cpage_out;
int nr_pages = 0;
struct page *in_page = NULL;
struct page *out_page = NULL;
int out_written = 0;
int in_read = 0;
unsigned long bytes_left;
*out_pages = 0;
*total_out = 0;
*total_in = 0;
workspace = find_zlib_workspace();
if (!workspace)
return -1;
if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
printk(KERN_WARNING "deflateInit failed\n");
ret = -1;
goto out;
}
workspace->def_strm.total_in = 0;
workspace->def_strm.total_out = 0;
in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
data_in = kmap(in_page);
out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
cpage_out = kmap(out_page);
pages[0] = out_page;
nr_pages = 1;
workspace->def_strm.next_in = data_in;
workspace->def_strm.next_out = cpage_out;
workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
out_written = 0;
in_read = 0;
while (workspace->def_strm.total_in < len) {
ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
if (ret != Z_OK) {
printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
ret);
zlib_deflateEnd(&workspace->def_strm);
ret = -1;
goto out;
}
/* we're making it bigger, give up */
if (workspace->def_strm.total_in > 8192 &&
workspace->def_strm.total_in <
workspace->def_strm.total_out) {
ret = -1;
goto out;
}
/* we need another page for writing out. Test this
* before the total_in so we will pull in a new page for
* the stream end if required
*/
if (workspace->def_strm.avail_out == 0) {
kunmap(out_page);
if (nr_pages == nr_dest_pages) {
out_page = NULL;
ret = -1;
goto out;
}
out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
cpage_out = kmap(out_page);
pages[nr_pages] = out_page;
nr_pages++;
workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
workspace->def_strm.next_out = cpage_out;
}
/* we're all done */
if (workspace->def_strm.total_in >= len)
break;
/* we've read in a full page, get a new one */
if (workspace->def_strm.avail_in == 0) {
if (workspace->def_strm.total_out > max_out)
break;
bytes_left = len - workspace->def_strm.total_in;
kunmap(in_page);
page_cache_release(in_page);
start += PAGE_CACHE_SIZE;
in_page = find_get_page(mapping,
start >> PAGE_CACHE_SHIFT);
data_in = kmap(in_page);
workspace->def_strm.avail_in = min(bytes_left,
PAGE_CACHE_SIZE);
workspace->def_strm.next_in = data_in;
}
}
workspace->def_strm.avail_in = 0;
ret = zlib_deflate(&workspace->def_strm, Z_FINISH);
zlib_deflateEnd(&workspace->def_strm);
if (ret != Z_STREAM_END) {
ret = -1;
goto out;
}
if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
ret = -1;
goto out;
}
ret = 0;
*total_out = workspace->def_strm.total_out;
*total_in = workspace->def_strm.total_in;
out:
*out_pages = nr_pages;
if (out_page)
kunmap(out_page);
if (in_page) {
kunmap(in_page);
page_cache_release(in_page);
}
free_workspace(workspace);
return ret;
}
/*
* pages_in is an array of pages with compressed data.
*
* disk_start is the starting logical offset of this array in the file
*
* bvec is a bio_vec of pages from the file that we want to decompress into
*
* vcnt is the count of pages in the biovec
*
* srclen is the number of bytes in pages_in
*
* The basic idea is that we have a bio that was created by readpages.
* The pages in the bio are for the uncompressed data, and they may not
* be contiguous. They all correspond to the range of bytes covered by
* the compressed extent.
*/
int btrfs_zlib_decompress_biovec(struct page **pages_in,
u64 disk_start,
struct bio_vec *bvec,
int vcnt,
size_t srclen)
{
int ret = 0;
int wbits = MAX_WBITS;
struct workspace *workspace;
char *data_in;
size_t total_out = 0;
unsigned long page_bytes_left;
unsigned long page_in_index = 0;
unsigned long page_out_index = 0;
struct page *page_out;
unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
PAGE_CACHE_SIZE;
unsigned long buf_start;
unsigned long buf_offset;
unsigned long bytes;
unsigned long working_bytes;
unsigned long pg_offset;
unsigned long start_byte;
unsigned long current_buf_start;
char *kaddr;
workspace = find_zlib_workspace();
if (!workspace)
return -ENOMEM;
data_in = kmap(pages_in[page_in_index]);
workspace->inf_strm.next_in = data_in;
workspace->inf_strm.avail_in = min(srclen, PAGE_CACHE_SIZE);
workspace->inf_strm.total_in = 0;
workspace->inf_strm.total_out = 0;
workspace->inf_strm.next_out = workspace->buf;
workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
page_out = bvec[page_out_index].bv_page;
page_bytes_left = PAGE_CACHE_SIZE;
pg_offset = 0;
/* If it's deflate, and it's got no preset dictionary, then
we can tell zlib to skip the adler32 check. */
if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
((data_in[0] & 0x0f) == Z_DEFLATED) &&
!(((data_in[0]<<8) + data_in[1]) % 31)) {
wbits = -((data_in[0] >> 4) + 8);
workspace->inf_strm.next_in += 2;
workspace->inf_strm.avail_in -= 2;
}
if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
printk(KERN_WARNING "inflateInit failed\n");
ret = -1;
goto out;
}
while(workspace->inf_strm.total_in < srclen) {
ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
if (ret != Z_OK && ret != Z_STREAM_END) {
break;
}
/*
* buf start is the byte offset we're of the start of
* our workspace buffer
*/
buf_start = total_out;
/* total_out is the last byte of the workspace buffer */
total_out = workspace->inf_strm.total_out;
working_bytes = total_out - buf_start;
/*
* start byte is the first byte of the page we're currently
* copying into relative to the start of the compressed data.
*/
start_byte = page_offset(page_out) - disk_start;
if (working_bytes == 0) {
/* we didn't make progress in this inflate
* call, we're done
*/
if (ret != Z_STREAM_END)
ret = -1;
break;
}
/* we haven't yet hit data corresponding to this page */
if (total_out <= start_byte) {
goto next;
}
/*
* the start of the data we care about is offset into
* the middle of our working buffer
*/
if (total_out > start_byte && buf_start < start_byte) {
buf_offset = start_byte - buf_start;
working_bytes -= buf_offset;
} else {
buf_offset = 0;
}
current_buf_start = buf_start;
/* copy bytes from the working buffer into the pages */
while(working_bytes > 0) {
bytes = min(PAGE_CACHE_SIZE - pg_offset,
PAGE_CACHE_SIZE - buf_offset);
bytes = min(bytes, working_bytes);
kaddr = kmap_atomic(page_out, KM_USER0);
memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
bytes);
kunmap_atomic(kaddr, KM_USER0);
flush_dcache_page(page_out);
pg_offset += bytes;
page_bytes_left -= bytes;
buf_offset += bytes;
working_bytes -= bytes;
current_buf_start += bytes;
/* check if we need to pick another page */
if (page_bytes_left == 0) {
page_out_index++;
if (page_out_index >= vcnt) {
ret = 0;
goto done;
}
page_out = bvec[page_out_index].bv_page;
pg_offset = 0;
page_bytes_left = PAGE_CACHE_SIZE;
start_byte = page_offset(page_out) - disk_start;
/*
* make sure our new page is covered by this
* working buffer
*/
if (total_out <= start_byte) {
goto next;
}
/* the next page in the biovec might not
* be adjacent to the last page, but it
* might still be found inside this working
* buffer. bump our offset pointer
*/
if (total_out > start_byte &&
current_buf_start < start_byte) {
buf_offset = start_byte - buf_start;
working_bytes = total_out - start_byte;
current_buf_start = buf_start +
buf_offset;
}
}
}
next:
workspace->inf_strm.next_out = workspace->buf;
workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
if (workspace->inf_strm.avail_in == 0) {
unsigned long tmp;
kunmap(pages_in[page_in_index]);
page_in_index++;
if (page_in_index >= total_pages_in) {
data_in = NULL;
break;
}
data_in = kmap(pages_in[page_in_index]);
workspace->inf_strm.next_in = data_in;
tmp = srclen - workspace->inf_strm.total_in;
workspace->inf_strm.avail_in = min(tmp,
PAGE_CACHE_SIZE);
}
}
if (ret != Z_STREAM_END) {
ret = -1;
} else {
ret = 0;
}
done:
zlib_inflateEnd(&workspace->inf_strm);
if (data_in)
kunmap(pages_in[page_in_index]);
out:
free_workspace(workspace);
return ret;
}
/*
* a less complex decompression routine. Our compressed data fits in a
* single page, and we want to read a single page out of it.
* start_byte tells us the offset into the compressed data we're interested in
*/
int btrfs_zlib_decompress(unsigned char *data_in,
struct page *dest_page,
unsigned long start_byte,
size_t srclen, size_t destlen)
{
int ret = 0;
int wbits = MAX_WBITS;
struct workspace *workspace;
unsigned long bytes_left = destlen;
unsigned long total_out = 0;
char *kaddr;
if (destlen > PAGE_CACHE_SIZE)
return -ENOMEM;
workspace = find_zlib_workspace();
if (!workspace)
return -ENOMEM;
workspace->inf_strm.next_in = data_in;
workspace->inf_strm.avail_in = srclen;
workspace->inf_strm.total_in = 0;
workspace->inf_strm.next_out = workspace->buf;
workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
workspace->inf_strm.total_out = 0;
/* If it's deflate, and it's got no preset dictionary, then
we can tell zlib to skip the adler32 check. */
if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
((data_in[0] & 0x0f) == Z_DEFLATED) &&
!(((data_in[0]<<8) + data_in[1]) % 31)) {
wbits = -((data_in[0] >> 4) + 8);
workspace->inf_strm.next_in += 2;
workspace->inf_strm.avail_in -= 2;
}
if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
printk(KERN_WARNING "inflateInit failed\n");
ret = -1;
goto out;
}
while(bytes_left > 0) {
unsigned long buf_start;
unsigned long buf_offset;
unsigned long bytes;
unsigned long pg_offset = 0;
ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
if (ret != Z_OK && ret != Z_STREAM_END) {
break;
}
buf_start = total_out;
total_out = workspace->inf_strm.total_out;
if (total_out == buf_start) {
ret = -1;
break;
}
if (total_out <= start_byte) {
goto next;
}
if (total_out > start_byte && buf_start < start_byte) {
buf_offset = start_byte - buf_start;
} else {
buf_offset = 0;
}
bytes = min(PAGE_CACHE_SIZE - pg_offset,
PAGE_CACHE_SIZE - buf_offset);
bytes = min(bytes, bytes_left);
kaddr = kmap_atomic(dest_page, KM_USER0);
memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
kunmap_atomic(kaddr, KM_USER0);
pg_offset += bytes;
bytes_left -= bytes;
next:
workspace->inf_strm.next_out = workspace->buf;
workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
}
if (ret != Z_STREAM_END && bytes_left != 0) {
ret = -1;
} else {
ret = 0;
}
zlib_inflateEnd(&workspace->inf_strm);
out:
free_workspace(workspace);
return ret;
}
void btrfs_zlib_exit(void)
{
free_workspaces();
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册