提交 ea814ab9 编写于 作者: L Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Pretty much all bug fixes and clean ups for 4.3, after a lot of
  features and other churn going into 4.2"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  Revert "ext4: remove block_device_ejected"
  ext4: ratelimit the file system mounted message
  ext4: silence a format string false positive
  ext4: simplify some code in read_mmp_block()
  ext4: don't manipulate recovery flag when freezing no-journal fs
  jbd2: limit number of reserved credits
  ext4 crypto: remove duplicate header file
  ext4: update c/mtime on truncate up
  jbd2: avoid infinite loop when destroying aborted journal
  ext4, jbd2: add REQ_FUA flag when recording an error in the superblock
  ext4 crypto: fix spelling typo in comment
  ext4 crypto: exit cleanly if ext4_derive_key_aes() fails
  ext4: reject journal options for ext2 mounts
  ext4: implement cgroup writeback support
  ext4: replace ext4_io_submit->io_op with ->io_wbc
  ext4 crypto: check for too-short encrypted file names
  ext4 crypto: use a jbd2 transaction when adding a crypto policy
  jbd2: speedup jbd2_journal_dirty_metadata()
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/key.h> #include <linux/key.h>
#include <linux/key.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/mempool.h> #include <linux/mempool.h>
#include <linux/random.h> #include <linux/random.h>
...@@ -329,6 +328,10 @@ int _ext4_fname_disk_to_usr(struct inode *inode, ...@@ -329,6 +328,10 @@ int _ext4_fname_disk_to_usr(struct inode *inode,
return oname->len; return oname->len;
} }
} }
if (iname->len < EXT4_CRYPTO_BLOCK_SIZE) {
EXT4_ERROR_INODE(inode, "encrypted inode too small");
return -EUCLEAN;
}
if (EXT4_I(inode)->i_crypt_info) if (EXT4_I(inode)->i_crypt_info)
return ext4_fname_decrypt(inode, iname, oname); return ext4_fname_decrypt(inode, iname, oname);
......
...@@ -30,7 +30,7 @@ static void derive_crypt_complete(struct crypto_async_request *req, int rc) ...@@ -30,7 +30,7 @@ static void derive_crypt_complete(struct crypto_async_request *req, int rc)
/** /**
* ext4_derive_key_aes() - Derive a key using AES-128-ECB * ext4_derive_key_aes() - Derive a key using AES-128-ECB
* @deriving_key: Encryption key used for derivatio. * @deriving_key: Encryption key used for derivation.
* @source_key: Source key to which to apply derivation. * @source_key: Source key to which to apply derivation.
* @derived_key: Derived key. * @derived_key: Derived key.
* *
...@@ -220,6 +220,8 @@ int _ext4_get_encryption_info(struct inode *inode) ...@@ -220,6 +220,8 @@ int _ext4_get_encryption_info(struct inode *inode)
BUG_ON(master_key->size != EXT4_AES_256_XTS_KEY_SIZE); BUG_ON(master_key->size != EXT4_AES_256_XTS_KEY_SIZE);
res = ext4_derive_key_aes(ctx.nonce, master_key->raw, res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
raw_key); raw_key);
if (res)
goto out;
got_key: got_key:
ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0); ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
if (!ctfm || IS_ERR(ctfm)) { if (!ctfm || IS_ERR(ctfm)) {
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/string.h> #include <linux/string.h>
#include <linux/types.h> #include <linux/types.h>
#include "ext4_jbd2.h"
#include "ext4.h" #include "ext4.h"
#include "xattr.h" #include "xattr.h"
...@@ -49,7 +50,8 @@ static int ext4_create_encryption_context_from_policy( ...@@ -49,7 +50,8 @@ static int ext4_create_encryption_context_from_policy(
struct inode *inode, const struct ext4_encryption_policy *policy) struct inode *inode, const struct ext4_encryption_policy *policy)
{ {
struct ext4_encryption_context ctx; struct ext4_encryption_context ctx;
int res = 0; handle_t *handle;
int res, res2;
res = ext4_convert_inline_data(inode); res = ext4_convert_inline_data(inode);
if (res) if (res)
...@@ -78,11 +80,22 @@ static int ext4_create_encryption_context_from_policy( ...@@ -78,11 +80,22 @@ static int ext4_create_encryption_context_from_policy(
BUILD_BUG_ON(sizeof(ctx.nonce) != EXT4_KEY_DERIVATION_NONCE_SIZE); BUILD_BUG_ON(sizeof(ctx.nonce) != EXT4_KEY_DERIVATION_NONCE_SIZE);
get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE); get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
handle = ext4_journal_start(inode, EXT4_HT_MISC,
ext4_jbd2_credits_xattr(inode));
if (IS_ERR(handle))
return PTR_ERR(handle);
res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION, res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
sizeof(ctx), 0); sizeof(ctx), 0);
if (!res) if (!res) {
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
res = ext4_mark_inode_dirty(handle, inode);
if (res)
EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
}
res2 = ext4_journal_stop(handle);
if (!res)
res = res2;
return res; return res;
} }
......
...@@ -187,7 +187,7 @@ typedef struct ext4_io_end { ...@@ -187,7 +187,7 @@ typedef struct ext4_io_end {
} ext4_io_end_t; } ext4_io_end_t;
struct ext4_io_submit { struct ext4_io_submit {
int io_op; struct writeback_control *io_wbc;
struct bio *io_bio; struct bio *io_bio;
ext4_io_end_t *io_end; ext4_io_end_t *io_end;
sector_t io_next_block; sector_t io_next_block;
......
...@@ -4728,6 +4728,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) ...@@ -4728,6 +4728,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
error = ext4_orphan_add(handle, inode); error = ext4_orphan_add(handle, inode);
orphan = 1; orphan = 1;
} }
/*
* Update c/mtime on truncate up, ext4_truncate() will
* update c/mtime in shrink case below
*/
if (!shrink) {
inode->i_mtime = ext4_current_time(inode);
inode->i_ctime = inode->i_mtime;
}
down_write(&EXT4_I(inode)->i_data_sem); down_write(&EXT4_I(inode)->i_data_sem);
EXT4_I(inode)->i_disksize = attr->ia_size; EXT4_I(inode)->i_disksize = attr->ia_size;
rc = ext4_mark_inode_dirty(handle, inode); rc = ext4_mark_inode_dirty(handle, inode);
......
...@@ -69,6 +69,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, ...@@ -69,6 +69,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
ext4_fsblk_t mmp_block) ext4_fsblk_t mmp_block)
{ {
struct mmp_struct *mmp; struct mmp_struct *mmp;
int ret;
if (*bh) if (*bh)
clear_buffer_uptodate(*bh); clear_buffer_uptodate(*bh);
...@@ -76,33 +77,36 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, ...@@ -76,33 +77,36 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
/* This would be sb_bread(sb, mmp_block), except we need to be sure /* This would be sb_bread(sb, mmp_block), except we need to be sure
* that the MD RAID device cache has been bypassed, and that the read * that the MD RAID device cache has been bypassed, and that the read
* is not blocked in the elevator. */ * is not blocked in the elevator. */
if (!*bh) if (!*bh) {
*bh = sb_getblk(sb, mmp_block); *bh = sb_getblk(sb, mmp_block);
if (!*bh) if (!*bh) {
return -ENOMEM; ret = -ENOMEM;
if (*bh) { goto warn_exit;
get_bh(*bh);
lock_buffer(*bh);
(*bh)->b_end_io = end_buffer_read_sync;
submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
wait_on_buffer(*bh);
if (!buffer_uptodate(*bh)) {
brelse(*bh);
*bh = NULL;
} }
} }
if (unlikely(!*bh)) {
ext4_warning(sb, "Error while reading MMP block %llu", get_bh(*bh);
mmp_block); lock_buffer(*bh);
return -EIO; (*bh)->b_end_io = end_buffer_read_sync;
submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
wait_on_buffer(*bh);
if (!buffer_uptodate(*bh)) {
brelse(*bh);
*bh = NULL;
ret = -EIO;
goto warn_exit;
} }
mmp = (struct mmp_struct *)((*bh)->b_data); mmp = (struct mmp_struct *)((*bh)->b_data);
if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC || if (le32_to_cpu(mmp->mmp_magic) == EXT4_MMP_MAGIC &&
!ext4_mmp_csum_verify(sb, mmp)) ext4_mmp_csum_verify(sb, mmp))
return -EINVAL; return 0;
ret = -EINVAL;
return 0;
warn_exit:
ext4_warning(sb, "Error %d while reading MMP block %llu",
ret, mmp_block);
return ret;
} }
/* /*
...@@ -111,7 +115,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, ...@@ -111,7 +115,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
const char *function, unsigned int line, const char *msg) const char *function, unsigned int line, const char *msg)
{ {
__ext4_warning(sb, function, line, msg); __ext4_warning(sb, function, line, "%s", msg);
__ext4_warning(sb, function, line, __ext4_warning(sb, function, line,
"MMP failure info: last update time: %llu, last update " "MMP failure info: last update time: %llu, last update "
"node: %s, last update device: %s\n", "node: %s, last update device: %s\n",
......
...@@ -354,8 +354,10 @@ void ext4_io_submit(struct ext4_io_submit *io) ...@@ -354,8 +354,10 @@ void ext4_io_submit(struct ext4_io_submit *io)
struct bio *bio = io->io_bio; struct bio *bio = io->io_bio;
if (bio) { if (bio) {
int io_op = io->io_wbc->sync_mode == WB_SYNC_ALL ?
WRITE_SYNC : WRITE;
bio_get(io->io_bio); bio_get(io->io_bio);
submit_bio(io->io_op, io->io_bio); submit_bio(io_op, io->io_bio);
bio_put(io->io_bio); bio_put(io->io_bio);
} }
io->io_bio = NULL; io->io_bio = NULL;
...@@ -364,7 +366,7 @@ void ext4_io_submit(struct ext4_io_submit *io) ...@@ -364,7 +366,7 @@ void ext4_io_submit(struct ext4_io_submit *io)
void ext4_io_submit_init(struct ext4_io_submit *io, void ext4_io_submit_init(struct ext4_io_submit *io,
struct writeback_control *wbc) struct writeback_control *wbc)
{ {
io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); io->io_wbc = wbc;
io->io_bio = NULL; io->io_bio = NULL;
io->io_end = NULL; io->io_end = NULL;
} }
...@@ -377,6 +379,7 @@ static int io_submit_init_bio(struct ext4_io_submit *io, ...@@ -377,6 +379,7 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
if (!bio) if (!bio)
return -ENOMEM; return -ENOMEM;
wbc_init_bio(io->io_wbc, bio);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev; bio->bi_bdev = bh->b_bdev;
bio->bi_end_io = ext4_end_bio; bio->bi_end_io = ext4_end_bio;
...@@ -405,6 +408,7 @@ static int io_submit_add_bh(struct ext4_io_submit *io, ...@@ -405,6 +408,7 @@ static int io_submit_add_bh(struct ext4_io_submit *io,
ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh)); ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
if (ret != bh->b_size) if (ret != bh->b_size)
goto submit_and_retry; goto submit_and_retry;
wbc_account_io(io->io_wbc, page, bh->b_size);
io->io_next_block++; io->io_next_block++;
return 0; return 0;
} }
......
...@@ -60,6 +60,7 @@ static struct ext4_lazy_init *ext4_li_info; ...@@ -60,6 +60,7 @@ static struct ext4_lazy_init *ext4_li_info;
static struct mutex ext4_li_mtx; static struct mutex ext4_li_mtx;
static struct ext4_features *ext4_feat; static struct ext4_features *ext4_feat;
static int ext4_mballoc_ready; static int ext4_mballoc_ready;
static struct ratelimit_state ext4_mount_msg_ratelimit;
static int ext4_load_journal(struct super_block *, struct ext4_super_block *, static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum); unsigned long journal_devnum);
...@@ -321,6 +322,22 @@ static void save_error_info(struct super_block *sb, const char *func, ...@@ -321,6 +322,22 @@ static void save_error_info(struct super_block *sb, const char *func,
ext4_commit_super(sb, 1); ext4_commit_super(sb, 1);
} }
/*
* The del_gendisk() function uninitializes the disk-specific data
* structures, including the bdi structure, without telling anyone
* else. Once this happens, any attempt to call mark_buffer_dirty()
* (for example, by ext4_commit_super), will cause a kernel OOPS.
* This is a kludge to prevent these oops until we can put in a proper
* hook in del_gendisk() to inform the VFS and file system layers.
*/
static int block_device_ejected(struct super_block *sb)
{
struct inode *bd_inode = sb->s_bdev->bd_inode;
struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
return bdi->dev == NULL;
}
static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
{ {
struct super_block *sb = journal->j_private; struct super_block *sb = journal->j_private;
...@@ -1390,9 +1407,9 @@ static const struct mount_opts { ...@@ -1390,9 +1407,9 @@ static const struct mount_opts {
{Opt_stripe, 0, MOPT_GTE0}, {Opt_stripe, 0, MOPT_GTE0},
{Opt_resuid, 0, MOPT_GTE0}, {Opt_resuid, 0, MOPT_GTE0},
{Opt_resgid, 0, MOPT_GTE0}, {Opt_resgid, 0, MOPT_GTE0},
{Opt_journal_dev, 0, MOPT_GTE0}, {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0},
{Opt_journal_path, 0, MOPT_STRING}, {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING},
{Opt_journal_ioprio, 0, MOPT_GTE0}, {Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0},
{Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
{Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
{Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
...@@ -3639,6 +3656,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -3639,6 +3656,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
} }
if (test_opt(sb, DELALLOC)) if (test_opt(sb, DELALLOC))
clear_opt(sb, DELALLOC); clear_opt(sb, DELALLOC);
} else {
sb->s_iflags |= SB_I_CGROUPWB;
} }
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
...@@ -4271,9 +4290,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -4271,9 +4290,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"the device does not support discard"); "the device does not support discard");
} }
ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
"Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
*sbi->s_es->s_mount_opts ? "; " : "", orig_data); "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
*sbi->s_es->s_mount_opts ? "; " : "", orig_data);
if (es->s_error_count) if (es->s_error_count)
mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
...@@ -4613,7 +4633,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) ...@@ -4613,7 +4633,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
int error = 0; int error = 0;
if (!sbh) if (!sbh || block_device_ejected(sb))
return error; return error;
if (buffer_write_io_error(sbh)) { if (buffer_write_io_error(sbh)) {
/* /*
...@@ -4661,7 +4681,8 @@ static int ext4_commit_super(struct super_block *sb, int sync) ...@@ -4661,7 +4681,8 @@ static int ext4_commit_super(struct super_block *sb, int sync)
ext4_superblock_csum_set(sb); ext4_superblock_csum_set(sb);
mark_buffer_dirty(sbh); mark_buffer_dirty(sbh);
if (sync) { if (sync) {
error = sync_dirty_buffer(sbh); error = __sync_dirty_buffer(sbh,
test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC);
if (error) if (error)
return error; return error;
...@@ -4829,10 +4850,11 @@ static int ext4_freeze(struct super_block *sb) ...@@ -4829,10 +4850,11 @@ static int ext4_freeze(struct super_block *sb)
error = jbd2_journal_flush(journal); error = jbd2_journal_flush(journal);
if (error < 0) if (error < 0)
goto out; goto out;
/* Journal blocked and flushed, clear needs_recovery flag. */
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
} }
/* Journal blocked and flushed, clear needs_recovery flag. */
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
error = ext4_commit_super(sb, 1); error = ext4_commit_super(sb, 1);
out: out:
if (journal) if (journal)
...@@ -4850,8 +4872,11 @@ static int ext4_unfreeze(struct super_block *sb) ...@@ -4850,8 +4872,11 @@ static int ext4_unfreeze(struct super_block *sb)
if (sb->s_flags & MS_RDONLY) if (sb->s_flags & MS_RDONLY)
return 0; return 0;
/* Reset the needs_recovery flag before the fs is unlocked. */ if (EXT4_SB(sb)->s_journal) {
EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); /* Reset the needs_recovery flag before the fs is unlocked. */
EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
}
ext4_commit_super(sb, 1); ext4_commit_super(sb, 1);
return 0; return 0;
} }
...@@ -5600,6 +5625,7 @@ static int __init ext4_init_fs(void) ...@@ -5600,6 +5625,7 @@ static int __init ext4_init_fs(void)
{ {
int i, err; int i, err;
ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
ext4_li_info = NULL; ext4_li_info = NULL;
mutex_init(&ext4_li_mtx); mutex_init(&ext4_li_mtx);
......
...@@ -417,12 +417,12 @@ int jbd2_cleanup_journal_tail(journal_t *journal) ...@@ -417,12 +417,12 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
* journal_clean_one_cp_list * journal_clean_one_cp_list
* *
* Find all the written-back checkpoint buffers in the given list and * Find all the written-back checkpoint buffers in the given list and
* release them. * release them. If 'destroy' is set, clean all buffers unconditionally.
* *
* Called with j_list_lock held. * Called with j_list_lock held.
* Returns 1 if we freed the transaction, 0 otherwise. * Returns 1 if we freed the transaction, 0 otherwise.
*/ */
static int journal_clean_one_cp_list(struct journal_head *jh) static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
{ {
struct journal_head *last_jh; struct journal_head *last_jh;
struct journal_head *next_jh = jh; struct journal_head *next_jh = jh;
...@@ -436,7 +436,10 @@ static int journal_clean_one_cp_list(struct journal_head *jh) ...@@ -436,7 +436,10 @@ static int journal_clean_one_cp_list(struct journal_head *jh)
do { do {
jh = next_jh; jh = next_jh;
next_jh = jh->b_cpnext; next_jh = jh->b_cpnext;
ret = __try_to_free_cp_buf(jh); if (!destroy)
ret = __try_to_free_cp_buf(jh);
else
ret = __jbd2_journal_remove_checkpoint(jh) + 1;
if (!ret) if (!ret)
return freed; return freed;
if (ret == 2) if (ret == 2)
...@@ -459,10 +462,11 @@ static int journal_clean_one_cp_list(struct journal_head *jh) ...@@ -459,10 +462,11 @@ static int journal_clean_one_cp_list(struct journal_head *jh)
* journal_clean_checkpoint_list * journal_clean_checkpoint_list
* *
* Find all the written-back checkpoint buffers in the journal and release them. * Find all the written-back checkpoint buffers in the journal and release them.
* If 'destroy' is set, release all buffers unconditionally.
* *
* Called with j_list_lock held. * Called with j_list_lock held.
*/ */
void __jbd2_journal_clean_checkpoint_list(journal_t *journal) void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
{ {
transaction_t *transaction, *last_transaction, *next_transaction; transaction_t *transaction, *last_transaction, *next_transaction;
int ret; int ret;
...@@ -476,7 +480,8 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal) ...@@ -476,7 +480,8 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
do { do {
transaction = next_transaction; transaction = next_transaction;
next_transaction = transaction->t_cpnext; next_transaction = transaction->t_cpnext;
ret = journal_clean_one_cp_list(transaction->t_checkpoint_list); ret = journal_clean_one_cp_list(transaction->t_checkpoint_list,
destroy);
/* /*
* This function only frees up some memory if possible so we * This function only frees up some memory if possible so we
* dont have an obligation to finish processing. Bail out if * dont have an obligation to finish processing. Bail out if
...@@ -492,7 +497,7 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal) ...@@ -492,7 +497,7 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
* we can possibly see not yet submitted buffers on io_list * we can possibly see not yet submitted buffers on io_list
*/ */
ret = journal_clean_one_cp_list(transaction-> ret = journal_clean_one_cp_list(transaction->
t_checkpoint_io_list); t_checkpoint_io_list, destroy);
if (need_resched()) if (need_resched())
return; return;
/* /*
...@@ -505,6 +510,28 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal) ...@@ -505,6 +510,28 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
} while (transaction != last_transaction); } while (transaction != last_transaction);
} }
/*
* Remove buffers from all checkpoint lists as journal is aborted and we just
* need to free memory
*/
void jbd2_journal_destroy_checkpoint(journal_t *journal)
{
/*
* We loop because __jbd2_journal_clean_checkpoint_list() may abort
* early due to a need of rescheduling.
*/
while (1) {
spin_lock(&journal->j_list_lock);
if (!journal->j_checkpoint_transactions) {
spin_unlock(&journal->j_list_lock);
break;
}
__jbd2_journal_clean_checkpoint_list(journal, true);
spin_unlock(&journal->j_list_lock);
cond_resched();
}
}
/* /*
* journal_remove_checkpoint: called after a buffer has been committed * journal_remove_checkpoint: called after a buffer has been committed
* to disk (either by being write-back flushed to disk, or being * to disk (either by being write-back flushed to disk, or being
......
...@@ -510,7 +510,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -510,7 +510,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* frees some memory * frees some memory
*/ */
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
__jbd2_journal_clean_checkpoint_list(journal); __jbd2_journal_clean_checkpoint_list(journal, false);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_debug(3, "JBD2: commit phase 1\n"); jbd_debug(3, "JBD2: commit phase 1\n");
......
...@@ -1456,7 +1456,7 @@ void jbd2_journal_update_sb_errno(journal_t *journal) ...@@ -1456,7 +1456,7 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
sb->s_errno = cpu_to_be32(journal->j_errno); sb->s_errno = cpu_to_be32(journal->j_errno);
read_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
jbd2_write_superblock(journal, WRITE_SYNC); jbd2_write_superblock(journal, WRITE_FUA);
} }
EXPORT_SYMBOL(jbd2_journal_update_sb_errno); EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
...@@ -1693,8 +1693,17 @@ int jbd2_journal_destroy(journal_t *journal) ...@@ -1693,8 +1693,17 @@ int jbd2_journal_destroy(journal_t *journal)
while (journal->j_checkpoint_transactions != NULL) { while (journal->j_checkpoint_transactions != NULL) {
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
mutex_lock(&journal->j_checkpoint_mutex); mutex_lock(&journal->j_checkpoint_mutex);
jbd2_log_do_checkpoint(journal); err = jbd2_log_do_checkpoint(journal);
mutex_unlock(&journal->j_checkpoint_mutex); mutex_unlock(&journal->j_checkpoint_mutex);
/*
* If checkpointing failed, just free the buffers to avoid
* looping forever
*/
if (err) {
jbd2_journal_destroy_checkpoint(journal);
spin_lock(&journal->j_list_lock);
break;
}
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
} }
......
...@@ -204,6 +204,20 @@ static int add_transaction_credits(journal_t *journal, int blocks, ...@@ -204,6 +204,20 @@ static int add_transaction_credits(journal_t *journal, int blocks,
* attach this handle to a new transaction. * attach this handle to a new transaction.
*/ */
atomic_sub(total, &t->t_outstanding_credits); atomic_sub(total, &t->t_outstanding_credits);
/*
* Is the number of reserved credits in the current transaction too
* big to fit this handle? Wait until reserved credits are freed.
*/
if (atomic_read(&journal->j_reserved_credits) + total >
journal->j_max_transaction_buffers) {
read_unlock(&journal->j_state_lock);
wait_event(journal->j_wait_reserved,
atomic_read(&journal->j_reserved_credits) + total <=
journal->j_max_transaction_buffers);
return 1;
}
wait_transaction_locked(journal); wait_transaction_locked(journal);
return 1; return 1;
} }
...@@ -262,20 +276,24 @@ static int start_this_handle(journal_t *journal, handle_t *handle, ...@@ -262,20 +276,24 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
int rsv_blocks = 0; int rsv_blocks = 0;
unsigned long ts = jiffies; unsigned long ts = jiffies;
if (handle->h_rsv_handle)
rsv_blocks = handle->h_rsv_handle->h_buffer_credits;
/* /*
* 1/2 of transaction can be reserved so we can practically handle * Limit the number of reserved credits to 1/2 of maximum transaction
* only 1/2 of maximum transaction size per operation * size and limit the number of total credits to not exceed maximum
* transaction size per operation.
*/ */
if (WARN_ON(blocks > journal->j_max_transaction_buffers / 2)) { if ((rsv_blocks > journal->j_max_transaction_buffers / 2) ||
printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n", (rsv_blocks + blocks > journal->j_max_transaction_buffers)) {
current->comm, blocks, printk(KERN_ERR "JBD2: %s wants too many credits "
journal->j_max_transaction_buffers / 2); "credits:%d rsv_credits:%d max:%d\n",
current->comm, blocks, rsv_blocks,
journal->j_max_transaction_buffers);
WARN_ON(1);
return -ENOSPC; return -ENOSPC;
} }
if (handle->h_rsv_handle)
rsv_blocks = handle->h_rsv_handle->h_buffer_credits;
alloc_transaction: alloc_transaction:
if (!journal->j_running_transaction) { if (!journal->j_running_transaction) {
/* /*
...@@ -1280,8 +1298,6 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh, ...@@ -1280,8 +1298,6 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
triggers->t_abort(triggers, jh2bh(jh)); triggers->t_abort(triggers, jh2bh(jh));
} }
/** /**
* int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
* @handle: transaction to add buffer to. * @handle: transaction to add buffer to.
...@@ -1314,12 +1330,41 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) ...@@ -1314,12 +1330,41 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
if (is_handle_aborted(handle)) if (is_handle_aborted(handle))
return -EROFS; return -EROFS;
journal = transaction->t_journal; if (!buffer_jbd(bh)) {
jh = jbd2_journal_grab_journal_head(bh);
if (!jh) {
ret = -EUCLEAN; ret = -EUCLEAN;
goto out; goto out;
} }
/*
* We don't grab jh reference here since the buffer must be part
* of the running transaction.
*/
jh = bh2jh(bh);
/*
* This and the following assertions are unreliable since we may see jh
* in inconsistent state unless we grab bh_state lock. But this is
* crucial to catch bugs so let's do a reliable check until the
* lockless handling is fully proven.
*/
if (jh->b_transaction != transaction &&
jh->b_next_transaction != transaction) {
jbd_lock_bh_state(bh);
J_ASSERT_JH(jh, jh->b_transaction == transaction ||
jh->b_next_transaction == transaction);
jbd_unlock_bh_state(bh);
}
if (jh->b_modified == 1) {
/* If it's in our transaction it must be in BJ_Metadata list. */
if (jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata) {
jbd_lock_bh_state(bh);
J_ASSERT_JH(jh, jh->b_transaction != transaction ||
jh->b_jlist == BJ_Metadata);
jbd_unlock_bh_state(bh);
}
goto out;
}
journal = transaction->t_journal;
jbd_debug(5, "journal_head %p\n", jh); jbd_debug(5, "journal_head %p\n", jh);
JBUFFER_TRACE(jh, "entry"); JBUFFER_TRACE(jh, "entry");
...@@ -1410,7 +1455,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) ...@@ -1410,7 +1455,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
out_unlock_bh: out_unlock_bh:
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
jbd2_journal_put_journal_head(jh);
out: out:
JBUFFER_TRACE(jh, "exit"); JBUFFER_TRACE(jh, "exit");
return ret; return ret;
......
...@@ -1081,8 +1081,9 @@ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); ...@@ -1081,8 +1081,9 @@ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
extern void jbd2_journal_commit_transaction(journal_t *); extern void jbd2_journal_commit_transaction(journal_t *);
/* Checkpoint list management */ /* Checkpoint list management */
void __jbd2_journal_clean_checkpoint_list(journal_t *journal); void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy);
int __jbd2_journal_remove_checkpoint(struct journal_head *); int __jbd2_journal_remove_checkpoint(struct journal_head *);
void jbd2_journal_destroy_checkpoint(journal_t *journal);
void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *); void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册