提交 4edebed8 编写于 作者: L Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull Ext4 updates from Theodore Ts'o:
 "The major new feature added in this update is Darrick J Wong's
  metadata checksum feature, which adds crc32 checksums to ext4's
  metadata fields.

  There is also the usual set of cleanups and bug fixes."

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (44 commits)
  ext4: hole-punch use truncate_pagecache_range
  jbd2: use kmem_cache_zalloc wrapper instead of flag
  ext4: remove mb_groups before tearing down the buddy_cache
  ext4: add ext4_mb_unload_buddy in the error path
  ext4: don't trash state flags in EXT4_IOC_SETFLAGS
  ext4: let getattr report the right blocks in delalloc+bigalloc
  ext4: add missing save_error_info() to ext4_error()
  ext4: add debugging trigger for ext4_error()
  ext4: protect group inode free counting with group lock
  ext4: use consistent ssize_t type in ext4_file_write()
  ext4: fix format flag in ext4_ext_binsearch_idx()
  ext4: cleanup in ext4_discard_allocated_blocks()
  ext4: return ENOMEM when mounts fail due to lack of memory
  ext4: remove redundundant "(char *) bh->b_data" casts
  ext4: disallow hard-linked directory in ext4_lookup
  ext4: fix potential integer overflow in alloc_flex_gd()
  ext4: remove needs_recovery in ext4_mb_init()
  ext4: force ro mount if ext4_setup_super() fails
  ext4: fix potential NULL dereference in ext4_free_inodes_counts()
  ext4/jbd2: add metadata checksumming to the list of supported features
  ...
......@@ -2,6 +2,8 @@ config EXT4_FS
tristate "The Extended 4 (ext4) filesystem"
select JBD2
select CRC16
select CRYPTO
select CRYPTO_CRC32C
help
This is the next generation of the ext3 filesystem.
......
......@@ -168,12 +168,14 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
/* If checksum is bad mark all blocks used to prevent allocation
* essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
ext4_error(sb, "Checksum bad for group %u", block_group);
ext4_free_group_clusters_set(sb, gdp, 0);
ext4_free_inodes_set(sb, gdp, 0);
ext4_itable_unused_set(sb, gdp, 0);
memset(bh->b_data, 0xff, sb->s_blocksize);
ext4_block_bitmap_csum_set(sb, block_group, gdp, bh,
EXT4_BLOCKS_PER_GROUP(sb) / 8);
return;
}
memset(bh->b_data, 0, sb->s_blocksize);
......@@ -210,6 +212,9 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
*/
ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
sb->s_blocksize * 8, bh->b_data);
ext4_block_bitmap_csum_set(sb, block_group, gdp, bh,
EXT4_BLOCKS_PER_GROUP(sb) / 8);
ext4_group_desc_csum_set(sb, block_group, gdp);
}
/* Return the number of free blocks in a block group. It is used when
......@@ -276,9 +281,9 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
}
static int ext4_valid_block_bitmap(struct super_block *sb,
struct ext4_group_desc *desc,
unsigned int block_group,
struct buffer_head *bh)
struct ext4_group_desc *desc,
unsigned int block_group,
struct buffer_head *bh)
{
ext4_grpblk_t offset;
ext4_grpblk_t next_zero_bit;
......@@ -325,6 +330,23 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
block_group, bitmap_blk);
return 0;
}
void ext4_validate_block_bitmap(struct super_block *sb,
struct ext4_group_desc *desc,
unsigned int block_group,
struct buffer_head *bh)
{
if (buffer_verified(bh))
return;
ext4_lock_group(sb, block_group);
if (ext4_valid_block_bitmap(sb, desc, block_group, bh) &&
ext4_block_bitmap_csum_verify(sb, block_group, desc, bh,
EXT4_BLOCKS_PER_GROUP(sb) / 8))
set_buffer_verified(bh);
ext4_unlock_group(sb, block_group);
}
/**
* ext4_read_block_bitmap()
* @sb: super block
......@@ -355,12 +377,12 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
}
if (bitmap_uptodate(bh))
return bh;
goto verify;
lock_buffer(bh);
if (bitmap_uptodate(bh)) {
unlock_buffer(bh);
return bh;
goto verify;
}
ext4_lock_group(sb, block_group);
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
......@@ -379,7 +401,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
*/
set_bitmap_uptodate(bh);
unlock_buffer(bh);
return bh;
goto verify;
}
/*
* submit the buffer_head for reading
......@@ -390,6 +412,9 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
get_bh(bh);
submit_bh(READ, bh);
return bh;
verify:
ext4_validate_block_bitmap(sb, desc, block_group, bh);
return bh;
}
/* Returns 0 on success, 1 on error */
......@@ -412,7 +437,7 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
}
clear_buffer_new(bh);
/* Panic or remount fs read-only if block bitmap is invalid */
ext4_valid_block_bitmap(sb, desc, block_group, bh);
ext4_validate_block_bitmap(sb, desc, block_group, bh);
return 0;
}
......
......@@ -29,3 +29,86 @@ unsigned int ext4_count_free(struct buffer_head *map, unsigned int numchars)
#endif /* EXT4FS_DEBUG */
int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz)
{
__u32 hi;
__u32 provided, calculated;
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
return 1;
provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo);
calculated = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END) {
hi = le16_to_cpu(gdp->bg_inode_bitmap_csum_hi);
provided |= (hi << 16);
} else
calculated &= 0xFFFF;
return provided == calculated;
}
void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz)
{
__u32 csum;
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
return;
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
gdp->bg_inode_bitmap_csum_lo = cpu_to_le16(csum & 0xFFFF);
if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END)
gdp->bg_inode_bitmap_csum_hi = cpu_to_le16(csum >> 16);
}
int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz)
{
__u32 hi;
__u32 provided, calculated;
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
return 1;
provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo);
calculated = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
if (sbi->s_desc_size >= EXT4_BG_BLOCK_BITMAP_CSUM_HI_END) {
hi = le16_to_cpu(gdp->bg_block_bitmap_csum_hi);
provided |= (hi << 16);
} else
calculated &= 0xFFFF;
if (provided == calculated)
return 1;
ext4_error(sb, "Bad block bitmap checksum: block_group = %u", group);
return 0;
}
void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz)
{
__u32 csum;
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
return;
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
gdp->bg_block_bitmap_csum_lo = cpu_to_le16(csum & 0xFFFF);
if (sbi->s_desc_size >= EXT4_BG_BLOCK_BITMAP_CSUM_HI_END)
gdp->bg_block_bitmap_csum_hi = cpu_to_le16(csum >> 16);
}
......@@ -179,6 +179,18 @@ static int ext4_readdir(struct file *filp,
continue;
}
/* Check the checksum */
if (!buffer_verified(bh) &&
!ext4_dirent_csum_verify(inode,
(struct ext4_dir_entry *)bh->b_data)) {
EXT4_ERROR_FILE(filp, 0, "directory fails checksum "
"at offset %llu",
(unsigned long long)filp->f_pos);
filp->f_pos += sb->s_blocksize - offset;
continue;
}
set_buffer_verified(bh);
revalidate:
/* If the dir block has changed since the last call to
* readdir(2), then we might be pointing to an invalid
......
......@@ -29,6 +29,7 @@
#include <linux/wait.h>
#include <linux/blockgroup_lock.h>
#include <linux/percpu_counter.h>
#include <crypto/hash.h>
#ifdef __KERNEL__
#include <linux/compat.h>
#endif
......@@ -298,7 +299,9 @@ struct ext4_group_desc
__le16 bg_free_inodes_count_lo;/* Free inodes count */
__le16 bg_used_dirs_count_lo; /* Directories count */
__le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
__u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */
__le32 bg_exclude_bitmap_lo; /* Exclude bitmap for snapshots */
__le16 bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bbitmap) LE */
__le16 bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+ibitmap) LE */
__le16 bg_itable_unused_lo; /* Unused inodes count */
__le16 bg_checksum; /* crc16(sb_uuid+group+desc) */
__le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
......@@ -308,9 +311,19 @@ struct ext4_group_desc
__le16 bg_free_inodes_count_hi;/* Free inodes count MSB */
__le16 bg_used_dirs_count_hi; /* Directories count MSB */
__le16 bg_itable_unused_hi; /* Unused inodes count MSB */
__u32 bg_reserved2[3];
__le32 bg_exclude_bitmap_hi; /* Exclude bitmap block MSB */
__le16 bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bbitmap) BE */
__le16 bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+ibitmap) BE */
__u32 bg_reserved;
};
#define EXT4_BG_INODE_BITMAP_CSUM_HI_END \
(offsetof(struct ext4_group_desc, bg_inode_bitmap_csum_hi) + \
sizeof(__le16))
#define EXT4_BG_BLOCK_BITMAP_CSUM_HI_END \
(offsetof(struct ext4_group_desc, bg_block_bitmap_csum_hi) + \
sizeof(__le16))
/*
* Structure of a flex block group info
*/
......@@ -650,7 +663,8 @@ struct ext4_inode {
__le16 l_i_file_acl_high;
__le16 l_i_uid_high; /* these 2 fields */
__le16 l_i_gid_high; /* were reserved2[0] */
__u32 l_i_reserved2;
__le16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */
__le16 l_i_reserved;
} linux2;
struct {
__le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
......@@ -666,7 +680,7 @@ struct ext4_inode {
} masix2;
} osd2; /* OS dependent 2 */
__le16 i_extra_isize;
__le16 i_pad1;
__le16 i_checksum_hi; /* crc32c(uuid+inum+inode) BE */
__le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
__le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
__le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
......@@ -768,7 +782,7 @@ do { \
#define i_gid_low i_gid
#define i_uid_high osd2.linux2.l_i_uid_high
#define i_gid_high osd2.linux2.l_i_gid_high
#define i_reserved2 osd2.linux2.l_i_reserved2
#define i_checksum_lo osd2.linux2.l_i_checksum_lo
#elif defined(__GNU__)
......@@ -908,6 +922,9 @@ struct ext4_inode_info {
*/
tid_t i_sync_tid;
tid_t i_datasync_tid;
/* Precomputed uuid+inum+igen checksum for seeding inode checksums */
__u32 i_csum_seed;
};
/*
......@@ -1001,6 +1018,9 @@ extern void ext4_set_bits(void *bm, int cur, int len);
#define EXT4_ERRORS_PANIC 3 /* Panic */
#define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE
/* Metadata checksum algorithm codes */
#define EXT4_CRC32C_CHKSUM 1
/*
* Structure of the super block
*/
......@@ -1087,7 +1107,7 @@ struct ext4_super_block {
__le64 s_mmp_block; /* Block for multi-mount protection */
__le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
__u8 s_log_groups_per_flex; /* FLEX_BG group size */
__u8 s_reserved_char_pad;
__u8 s_checksum_type; /* metadata checksum algorithm used */
__le16 s_reserved_pad;
__le64 s_kbytes_written; /* nr of lifetime kilobytes written */
__le32 s_snapshot_inum; /* Inode number of active snapshot */
......@@ -1113,7 +1133,8 @@ struct ext4_super_block {
__le32 s_usr_quota_inum; /* inode for tracking user quota */
__le32 s_grp_quota_inum; /* inode for tracking group quota */
__le32 s_overhead_clusters; /* overhead blocks/clusters in fs */
__le32 s_reserved[109]; /* Padding to the end of the block */
__le32 s_reserved[108]; /* Padding to the end of the block */
__le32 s_checksum; /* crc32c(superblock) */
};
#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
......@@ -1176,6 +1197,7 @@ struct ext4_sb_info {
struct proc_dir_entry *s_proc;
struct kobject s_kobj;
struct completion s_kobj_unregister;
struct super_block *s_sb;
/* Journaling */
struct journal_s *s_journal;
......@@ -1266,6 +1288,12 @@ struct ext4_sb_info {
/* record the last minlen when FITRIM is called. */
atomic_t s_last_trim_minblks;
/* Reference to checksum algorithm driver via cryptoapi */
struct crypto_shash *s_chksum_driver;
/* Precomputed FS UUID checksum for seeding other checksums */
__u32 s_csum_seed;
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
......@@ -1414,6 +1442,12 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100
#define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200
/*
* METADATA_CSUM also enables group descriptor checksums (GDT_CSUM). When
* METADATA_CSUM is set, group descriptor checksums use the same algorithm as
* all other data structures' checksums. However, the METADATA_CSUM and
* GDT_CSUM bits are mutually exclusive.
*/
#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400
#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
......@@ -1461,7 +1495,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
EXT4_FEATURE_RO_COMPAT_BIGALLOC)
EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)
/*
* Default values for user and/or group using reserved blocks
......@@ -1526,6 +1561,18 @@ struct ext4_dir_entry_2 {
char name[EXT4_NAME_LEN]; /* File name */
};
/*
* This is a bogus directory entry at the end of each leaf block that
* records checksums.
*/
struct ext4_dir_entry_tail {
__le32 det_reserved_zero1; /* Pretend to be unused */
__le16 det_rec_len; /* 12 */
__u8 det_reserved_zero2; /* Zero name length */
__u8 det_reserved_ft; /* 0xDE, fake file type */
__le32 det_checksum; /* crc32c(uuid+inum+dirblock) */
};
/*
* Ext4 directory file types. Only the low 3 bits are used. The
* other bits are reserved for now.
......@@ -1541,6 +1588,8 @@ struct ext4_dir_entry_2 {
#define EXT4_FT_MAX 8
#define EXT4_FT_DIR_CSUM 0xDE
/*
* EXT4_DIR_PAD defines the directory entries boundaries
*
......@@ -1609,6 +1658,25 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
#define DX_HASH_HALF_MD4_UNSIGNED 4
#define DX_HASH_TEA_UNSIGNED 5
static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc,
const void *address, unsigned int length)
{
struct {
struct shash_desc shash;
char ctx[crypto_shash_descsize(sbi->s_chksum_driver)];
} desc;
int err;
desc.shash.tfm = sbi->s_chksum_driver;
desc.shash.flags = 0;
*(u32 *)desc.ctx = crc;
err = crypto_shash_update(&desc.shash, address, length);
BUG_ON(err);
return *(u32 *)desc.ctx;
}
#ifdef __KERNEL__
/* hash info structure used by the directory hash */
......@@ -1741,7 +1809,8 @@ struct mmp_struct {
__le16 mmp_check_interval;
__le16 mmp_pad1;
__le32 mmp_pad2[227];
__le32 mmp_pad2[226];
__le32 mmp_checksum; /* crc32c(uuid+mmp_block) */
};
/* arguments passed to the mmp thread */
......@@ -1784,8 +1853,24 @@ struct mmpd_data {
/* bitmap.c */
extern unsigned int ext4_count_free(struct buffer_head *, unsigned);
void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz);
int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz);
void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz);
int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz);
/* balloc.c */
extern void ext4_validate_block_bitmap(struct super_block *sb,
struct ext4_group_desc *desc,
unsigned int block_group,
struct buffer_head *bh);
extern unsigned int ext4_block_group(struct super_block *sb,
ext4_fsblk_t blocknr);
extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
......@@ -1864,7 +1949,7 @@ extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
/* mballoc.c */
extern long ext4_mb_stats;
extern long ext4_mb_max_to_scan;
extern int ext4_mb_init(struct super_block *, int);
extern int ext4_mb_init(struct super_block *);
extern int ext4_mb_release(struct super_block *);
extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
struct ext4_allocation_request *, int *);
......@@ -1936,6 +2021,8 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
extern int ext4_ext_migrate(struct inode *);
/* namei.c */
extern int ext4_dirent_csum_verify(struct inode *inode,
struct ext4_dir_entry *dirent);
extern int ext4_orphan_add(handle_t *, struct inode *);
extern int ext4_orphan_del(handle_t *, struct inode *);
extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
......@@ -1950,6 +2037,10 @@ extern int ext4_group_extend(struct super_block *sb,
extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
/* super.c */
extern int ext4_superblock_csum_verify(struct super_block *sb,
struct ext4_super_block *es);
extern void ext4_superblock_csum_set(struct super_block *sb,
struct ext4_super_block *es);
extern void *ext4_kvmalloc(size_t size, gfp_t flags);
extern void *ext4_kvzalloc(size_t size, gfp_t flags);
extern void ext4_kvfree(void *ptr);
......@@ -2025,10 +2116,17 @@ extern void ext4_used_dirs_set(struct super_block *sb,
struct ext4_group_desc *bg, __u32 count);
extern void ext4_itable_unused_set(struct super_block *sb,
struct ext4_group_desc *bg, __u32 count);
extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
struct ext4_group_desc *gdp);
extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
extern int ext4_group_desc_csum_verify(struct super_block *sb, __u32 group,
struct ext4_group_desc *gdp);
extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group,
struct ext4_group_desc *gdp);
static inline int ext4_has_group_desc_csum(struct super_block *sb)
{
return EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_GDT_CSUM |
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM);
}
static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
{
......@@ -2225,6 +2323,9 @@ static inline void ext4_unlock_group(struct super_block *sb,
static inline void ext4_mark_super_dirty(struct super_block *sb)
{
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
ext4_superblock_csum_set(sb, es);
if (EXT4_SB(sb)->s_journal == NULL)
sb->s_dirt =1;
}
......@@ -2314,6 +2415,9 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
/* mmp.c */
extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
extern void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp);
extern int ext4_mmp_csum_verify(struct super_block *sb,
struct mmp_struct *mmp);
/* BH_Uninit flag: blocks are allocated but uninitialized on disk */
enum ext4_state_bits {
......
......@@ -63,8 +63,21 @@
* ext4_inode has i_block array (60 bytes total).
* The first 12 bytes store ext4_extent_header;
* the remainder stores an array of ext4_extent.
* For non-inode extent blocks, ext4_extent_tail
* follows the array.
*/
/*
* This is the extent tail on-disk structure.
* All other extent structures are 12 bytes long. It turns out that
* block_size % 12 >= 4 for at least all powers of 2 greater than 512, which
* covers all valid ext4 block sizes. Therefore, this tail structure can be
* crammed into the end of the block without having to rebalance the tree.
*/
struct ext4_extent_tail {
__le32 et_checksum; /* crc32c(uuid+inum+extent_block) */
};
/*
* This is the extent on-disk structure.
* It's used at the bottom of the tree.
......@@ -101,6 +114,17 @@ struct ext4_extent_header {
#define EXT4_EXT_MAGIC cpu_to_le16(0xf30a)
#define EXT4_EXTENT_TAIL_OFFSET(hdr) \
(sizeof(struct ext4_extent_header) + \
(sizeof(struct ext4_extent) * le16_to_cpu((hdr)->eh_max)))
static inline struct ext4_extent_tail *
find_ext4_extent_tail(struct ext4_extent_header *eh)
{
return (struct ext4_extent_tail *)(((void *)eh) +
EXT4_EXTENT_TAIL_OFFSET(eh));
}
/*
* Array of ext4_ext_path contains path to some extent.
* Creation/lookup routines use it for traversal/splitting/etc.
......
......@@ -138,16 +138,23 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
}
int __ext4_handle_dirty_super(const char *where, unsigned int line,
handle_t *handle, struct super_block *sb)
handle_t *handle, struct super_block *sb,
int now)
{
struct buffer_head *bh = EXT4_SB(sb)->s_sbh;
int err = 0;
if (ext4_handle_valid(handle)) {
ext4_superblock_csum_set(sb,
(struct ext4_super_block *)bh->b_data);
err = jbd2_journal_dirty_metadata(handle, bh);
if (err)
ext4_journal_abort_handle(where, line, __func__,
bh, handle, err);
} else if (now) {
ext4_superblock_csum_set(sb,
(struct ext4_super_block *)bh->b_data);
mark_buffer_dirty(bh);
} else
sb->s_dirt = 1;
return err;
......
......@@ -213,7 +213,8 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
struct buffer_head *bh);
int __ext4_handle_dirty_super(const char *where, unsigned int line,
handle_t *handle, struct super_block *sb);
handle_t *handle, struct super_block *sb,
int now);
#define ext4_journal_get_write_access(handle, bh) \
__ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh))
......@@ -225,8 +226,10 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
#define ext4_handle_dirty_metadata(handle, inode, bh) \
__ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \
(bh))
#define ext4_handle_dirty_super_now(handle, sb) \
__ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb), 1)
#define ext4_handle_dirty_super(handle, sb) \
__ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
__ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb), 0)
handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
......
......@@ -52,6 +52,46 @@
#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
static __le32 ext4_extent_block_csum(struct inode *inode,
struct ext4_extent_header *eh)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
__u32 csum;
csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)eh,
EXT4_EXTENT_TAIL_OFFSET(eh));
return cpu_to_le32(csum);
}
static int ext4_extent_block_csum_verify(struct inode *inode,
struct ext4_extent_header *eh)
{
struct ext4_extent_tail *et;
if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
return 1;
et = find_ext4_extent_tail(eh);
if (et->et_checksum != ext4_extent_block_csum(inode, eh))
return 0;
return 1;
}
static void ext4_extent_block_csum_set(struct inode *inode,
struct ext4_extent_header *eh)
{
struct ext4_extent_tail *et;
if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
return;
et = find_ext4_extent_tail(eh);
et->et_checksum = ext4_extent_block_csum(inode, eh);
}
static int ext4_split_extent(handle_t *handle,
struct inode *inode,
struct ext4_ext_path *path,
......@@ -117,6 +157,7 @@ static int __ext4_ext_dirty(const char *where, unsigned int line,
{
int err;
if (path->p_bh) {
ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh));
/* path points to block */
err = __ext4_handle_dirty_metadata(where, line, handle,
inode, path->p_bh);
......@@ -391,6 +432,12 @@ static int __ext4_ext_check(const char *function, unsigned int line,
error_msg = "invalid extent entries";
goto corrupted;
}
/* Verify checksum on non-root extent tree nodes */
if (ext_depth(inode) != depth &&
!ext4_extent_block_csum_verify(inode, eh)) {
error_msg = "extent tree corrupted";
goto corrupted;
}
return 0;
corrupted:
......@@ -412,6 +459,26 @@ int ext4_ext_check_inode(struct inode *inode)
return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode));
}
static int __ext4_ext_check_block(const char *function, unsigned int line,
struct inode *inode,
struct ext4_extent_header *eh,
int depth,
struct buffer_head *bh)
{
int ret;
if (buffer_verified(bh))
return 0;
ret = ext4_ext_check(inode, eh, depth);
if (ret)
return ret;
set_buffer_verified(bh);
return ret;
}
#define ext4_ext_check_block(inode, eh, depth, bh) \
__ext4_ext_check_block(__func__, __LINE__, inode, eh, depth, bh)
#ifdef EXT_DEBUG
static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
{
......@@ -536,7 +603,7 @@ ext4_ext_binsearch_idx(struct inode *inode,
}
path->p_idx = l - 1;
ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
ext_debug(" -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block),
ext4_idx_pblock(path->p_idx));
#ifdef CHECK_BINSEARCH
......@@ -668,8 +735,6 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
i = depth;
/* walk through the tree */
while (i) {
int need_to_validate = 0;
ext_debug("depth %d: num %d, max %d\n",
ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
......@@ -688,8 +753,6 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
put_bh(bh);
goto err;
}
/* validate the extent entries */
need_to_validate = 1;
}
eh = ext_block_hdr(bh);
ppos++;
......@@ -703,7 +766,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
path[ppos].p_hdr = eh;
i--;
if (need_to_validate && ext4_ext_check(inode, eh, i))
if (ext4_ext_check_block(inode, eh, i, bh))
goto err;
}
......@@ -914,6 +977,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
le16_add_cpu(&neh->eh_entries, m);
}
ext4_extent_block_csum_set(inode, neh);
set_buffer_uptodate(bh);
unlock_buffer(bh);
......@@ -992,6 +1056,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
sizeof(struct ext4_extent_idx) * m);
le16_add_cpu(&neh->eh_entries, m);
}
ext4_extent_block_csum_set(inode, neh);
set_buffer_uptodate(bh);
unlock_buffer(bh);
......@@ -1089,6 +1154,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
else
neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
neh->eh_magic = EXT4_EXT_MAGIC;
ext4_extent_block_csum_set(inode, neh);
set_buffer_uptodate(bh);
unlock_buffer(bh);
......@@ -1344,7 +1410,8 @@ static int ext4_ext_search_right(struct inode *inode,
return -EIO;
eh = ext_block_hdr(bh);
/* subtract from p_depth to get proper eh_depth */
if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
if (ext4_ext_check_block(inode, eh,
path->p_depth - depth, bh)) {
put_bh(bh);
return -EIO;
}
......@@ -1357,7 +1424,7 @@ static int ext4_ext_search_right(struct inode *inode,
if (bh == NULL)
return -EIO;
eh = ext_block_hdr(bh);
if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
if (ext4_ext_check_block(inode, eh, path->p_depth - depth, bh)) {
put_bh(bh);
return -EIO;
}
......@@ -2644,8 +2711,8 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
err = -EIO;
break;
}
if (ext4_ext_check(inode, ext_block_hdr(bh),
depth - i - 1)) {
if (ext4_ext_check_block(inode, ext_block_hdr(bh),
depth - i - 1, bh)) {
err = -EIO;
break;
}
......@@ -4722,8 +4789,8 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
/* Now release the pages */
if (last_page_offset > first_page_offset) {
truncate_inode_pages_range(mapping, first_page_offset,
last_page_offset-1);
truncate_pagecache_range(inode, first_page_offset,
last_page_offset - 1);
}
/* finish any pending end_io work */
......
......@@ -95,7 +95,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
{
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
int unaligned_aio = 0;
int ret;
ssize_t ret;
/*
* If we have encountered a bitmap-format file, the size limit
......
......@@ -70,24 +70,27 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
ext4_group_t block_group,
struct ext4_group_desc *gdp)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
J_ASSERT_BH(bh, buffer_locked(bh));
/* If checksum is bad mark all blocks and inodes use to prevent
* allocation, essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
ext4_error(sb, "Checksum bad for group %u", block_group);
ext4_free_group_clusters_set(sb, gdp, 0);
ext4_free_inodes_set(sb, gdp, 0);
ext4_itable_unused_set(sb, gdp, 0);
memset(bh->b_data, 0xff, sb->s_blocksize);
ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh,
EXT4_INODES_PER_GROUP(sb) / 8);
return 0;
}
memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
bh->b_data);
ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh,
EXT4_INODES_PER_GROUP(sb) / 8);
ext4_group_desc_csum_set(sb, block_group, gdp);
return EXT4_INODES_PER_GROUP(sb);
}
......@@ -128,12 +131,12 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
return NULL;
}
if (bitmap_uptodate(bh))
return bh;
goto verify;
lock_buffer(bh);
if (bitmap_uptodate(bh)) {
unlock_buffer(bh);
return bh;
goto verify;
}
ext4_lock_group(sb, block_group);
......@@ -141,6 +144,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
ext4_init_inode_bitmap(sb, bh, block_group, desc);
set_bitmap_uptodate(bh);
set_buffer_uptodate(bh);
set_buffer_verified(bh);
ext4_unlock_group(sb, block_group);
unlock_buffer(bh);
return bh;
......@@ -154,7 +158,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
*/
set_bitmap_uptodate(bh);
unlock_buffer(bh);
return bh;
goto verify;
}
/*
* submit the buffer_head for reading
......@@ -171,6 +175,20 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
block_group, bitmap_blk);
return NULL;
}
verify:
ext4_lock_group(sb, block_group);
if (!buffer_verified(bh) &&
!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh,
EXT4_INODES_PER_GROUP(sb) / 8)) {
ext4_unlock_group(sb, block_group);
put_bh(bh);
ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
"inode_bitmap = %llu", block_group, bitmap_blk);
return NULL;
}
ext4_unlock_group(sb, block_group);
set_buffer_verified(bh);
return bh;
}
......@@ -276,7 +294,9 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
ext4_used_dirs_set(sb, gdp, count);
percpu_counter_dec(&sbi->s_dirs_counter);
}
gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
ext4_inode_bitmap_csum_set(sb, block_group, gdp, bitmap_bh,
EXT4_INODES_PER_GROUP(sb) / 8);
ext4_group_desc_csum_set(sb, block_group, gdp);
ext4_unlock_group(sb, block_group);
percpu_counter_inc(&sbi->s_freeinodes_counter);
......@@ -488,10 +508,12 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
for (i = 0; i < ngroups; i++) {
grp = (parent_group + i) % ngroups;
desc = ext4_get_group_desc(sb, grp, NULL);
grp_free = ext4_free_inodes_count(sb, desc);
if (desc && grp_free && grp_free >= avefreei) {
*group = grp;
return 0;
if (desc) {
grp_free = ext4_free_inodes_count(sb, desc);
if (grp_free && grp_free >= avefreei) {
*group = grp;
return 0;
}
}
}
......@@ -709,7 +731,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
got:
/* We may have to initialize the block bitmap if it isn't already */
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
if (ext4_has_group_desc_csum(sb) &&
gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
struct buffer_head *block_bitmap_bh;
......@@ -731,8 +753,11 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
ext4_free_group_clusters_set(sb, gdp,
ext4_free_clusters_after_init(sb, group, gdp));
gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
gdp);
ext4_block_bitmap_csum_set(sb, group, gdp,
block_bitmap_bh,
EXT4_BLOCKS_PER_GROUP(sb) /
8);
ext4_group_desc_csum_set(sb, group, gdp);
}
ext4_unlock_group(sb, group);
......@@ -751,7 +776,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
goto fail;
/* Update the relevant bg descriptor fields */
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
if (ext4_has_group_desc_csum(sb)) {
int free;
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
......@@ -772,7 +797,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
ext4_itable_unused_set(sb, gdp,
(EXT4_INODES_PER_GROUP(sb) - ino));
up_read(&grp->alloc_sem);
} else {
ext4_lock_group(sb, group);
}
ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
if (S_ISDIR(mode)) {
ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1);
......@@ -782,10 +810,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
atomic_inc(&sbi->s_flex_groups[f].used_dirs);
}
}
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
ext4_unlock_group(sb, group);
if (ext4_has_group_desc_csum(sb)) {
ext4_inode_bitmap_csum_set(sb, group, gdp, inode_bitmap_bh,
EXT4_INODES_PER_GROUP(sb) / 8);
ext4_group_desc_csum_set(sb, group, gdp);
}
ext4_unlock_group(sb, group);
BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
......@@ -850,6 +880,19 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
inode->i_generation = sbi->s_next_generation++;
spin_unlock(&sbi->s_next_gen_lock);
/* Precompute checksum seed for inode metadata */
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
__u32 csum;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
__le32 inum = cpu_to_le32(inode->i_ino);
__le32 gen = cpu_to_le32(inode->i_generation);
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum,
sizeof(inum));
ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen,
sizeof(gen));
}
ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
ext4_set_inode_state(inode, EXT4_STATE_NEW);
......@@ -1140,7 +1183,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
skip_zeroout:
ext4_lock_group(sb, group);
gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
ext4_group_desc_csum_set(sb, group, gdp);
ext4_unlock_group(sb, group);
BUFFER_TRACE(group_desc_bh,
......
......@@ -47,6 +47,73 @@
#define MPAGE_DA_EXTENT_TAIL 0x01
static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
struct ext4_inode_info *ei)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
__u16 csum_lo;
__u16 csum_hi = 0;
__u32 csum;
csum_lo = raw->i_checksum_lo;
raw->i_checksum_lo = 0;
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) {
csum_hi = raw->i_checksum_hi;
raw->i_checksum_hi = 0;
}
csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw,
EXT4_INODE_SIZE(inode->i_sb));
raw->i_checksum_lo = csum_lo;
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
raw->i_checksum_hi = csum_hi;
return csum;
}
static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw,
struct ext4_inode_info *ei)
{
__u32 provided, calculated;
if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
cpu_to_le32(EXT4_OS_LINUX) ||
!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
return 1;
provided = le16_to_cpu(raw->i_checksum_lo);
calculated = ext4_inode_csum(inode, raw, ei);
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
provided |= ((__u32)le16_to_cpu(raw->i_checksum_hi)) << 16;
else
calculated &= 0xFFFF;
return provided == calculated;
}
static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
struct ext4_inode_info *ei)
{
__u32 csum;
if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
cpu_to_le32(EXT4_OS_LINUX) ||
!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
return;
csum = ext4_inode_csum(inode, raw, ei);
raw->i_checksum_lo = cpu_to_le16(csum & 0xFFFF);
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
raw->i_checksum_hi = cpu_to_le16(csum >> 16);
}
static inline int ext4_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
......@@ -3517,8 +3584,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
b = table;
end = b + EXT4_SB(sb)->s_inode_readahead_blks;
num = EXT4_INODES_PER_GROUP(sb);
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
if (ext4_has_group_desc_csum(sb))
num -= ext4_itable_unused_count(sb, gdp);
table += num / inodes_per_block;
if (end > table)
......@@ -3646,6 +3712,39 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
if (ret < 0)
goto bad_inode;
raw_inode = ext4_raw_inode(&iloc);
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
EXT4_INODE_SIZE(inode->i_sb)) {
EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)",
EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize,
EXT4_INODE_SIZE(inode->i_sb));
ret = -EIO;
goto bad_inode;
}
} else
ei->i_extra_isize = 0;
/* Precompute checksum seed for inode metadata */
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
__u32 csum;
__le32 inum = cpu_to_le32(inode->i_ino);
__le32 gen = raw_inode->i_generation;
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum,
sizeof(inum));
ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen,
sizeof(gen));
}
if (!ext4_inode_csum_verify(inode, raw_inode, ei)) {
EXT4_ERROR_INODE(inode, "checksum invalid");
ret = -EIO;
goto bad_inode;
}
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
......@@ -3725,12 +3824,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
}
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
EXT4_INODE_SIZE(inode->i_sb)) {
ret = -EIO;
goto bad_inode;
}
if (ei->i_extra_isize == 0) {
/* The extra space is currently unused. Use it. */
ei->i_extra_isize = sizeof(struct ext4_inode) -
......@@ -3742,8 +3835,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
ext4_set_inode_state(inode, EXT4_STATE_XATTR);
}
} else
ei->i_extra_isize = 0;
}
EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
......@@ -3942,7 +4034,7 @@ static int ext4_do_update_inode(handle_t *handle,
EXT4_SET_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
ext4_handle_sync(handle);
err = ext4_handle_dirty_super(handle, sb);
err = ext4_handle_dirty_super_now(handle, sb);
}
}
raw_inode->i_generation = cpu_to_le32(inode->i_generation);
......@@ -3969,6 +4061,8 @@ static int ext4_do_update_inode(handle_t *handle,
raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
}
ext4_inode_csum_set(inode, raw_inode, ei);
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
rc = ext4_handle_dirty_metadata(handle, NULL, bh);
if (!err)
......@@ -4213,7 +4307,8 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
* will return the blocks that include the delayed allocation
* blocks for this file.
*/
delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;
delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb),
EXT4_I(inode)->i_reserved_data_blocks);
stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
return 0;
......
......@@ -38,7 +38,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
handle_t *handle = NULL;
int err, migrate = 0;
struct ext4_iloc iloc;
unsigned int oldflags;
unsigned int oldflags, mask, i;
unsigned int jflag;
if (!inode_owner_or_capable(inode))
......@@ -115,8 +115,14 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (err)
goto flags_err;
flags = flags & EXT4_FL_USER_MODIFIABLE;
flags |= oldflags & ~EXT4_FL_USER_MODIFIABLE;
for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
if (!(mask & EXT4_FL_USER_MODIFIABLE))
continue;
if (mask & flags)
ext4_set_inode_flag(inode, i);
else
ext4_clear_inode_flag(inode, i);
}
ei->i_flags = flags;
ext4_set_inode_flags(inode);
......@@ -152,6 +158,13 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (!inode_owner_or_capable(inode))
return -EPERM;
if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
ext4_warning(sb, "Setting inode version is not "
"supported with metadata_csum enabled.");
return -ENOTTY;
}
err = mnt_want_write_file(filp);
if (err)
return err;
......
......@@ -788,7 +788,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
int first_block;
struct super_block *sb;
struct buffer_head *bhs;
struct buffer_head **bh;
struct buffer_head **bh = NULL;
struct inode *inode;
char *data;
char *bitmap;
......@@ -2375,7 +2375,7 @@ static int ext4_groupinfo_create_slab(size_t size)
return 0;
}
int ext4_mb_init(struct super_block *sb, int needs_recovery)
int ext4_mb_init(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned i, j;
......@@ -2517,6 +2517,9 @@ int ext4_mb_release(struct super_block *sb)
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
if (sbi->s_proc)
remove_proc_entry("mb_groups", sbi->s_proc);
if (sbi->s_group_info) {
for (i = 0; i < ngroups; i++) {
grinfo = ext4_get_group_info(sb, i);
......@@ -2564,8 +2567,6 @@ int ext4_mb_release(struct super_block *sb)
}
free_percpu(sbi->s_locality_groups);
if (sbi->s_proc)
remove_proc_entry("mb_groups", sbi->s_proc);
return 0;
}
......@@ -2797,7 +2798,9 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
}
len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
ext4_free_group_clusters_set(sb, gdp, len);
gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh,
EXT4_BLOCKS_PER_GROUP(sb) / 8);
ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
......@@ -3071,13 +3074,9 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
{
struct ext4_prealloc_space *pa = ac->ac_pa;
int len;
if (pa && pa->pa_type == MB_INODE_PA) {
len = ac->ac_b_ex.fe_len;
pa->pa_free += len;
}
if (pa && pa->pa_type == MB_INODE_PA)
pa->pa_free += ac->ac_b_ex.fe_len;
}
/*
......@@ -4636,6 +4635,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
*/
new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
if (!new_entry) {
ext4_mb_unload_buddy(&e4b);
err = -ENOMEM;
goto error_return;
}
......@@ -4659,7 +4659,9 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
ext4_free_group_clusters_set(sb, gdp, ret);
gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh,
EXT4_BLOCKS_PER_GROUP(sb) / 8);
ext4_group_desc_csum_set(sb, block_group, gdp);
ext4_unlock_group(sb, block_group);
percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
......@@ -4803,7 +4805,9 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
mb_free_blocks(NULL, &e4b, bit, count);
blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
ext4_free_group_clusters_set(sb, desc, blk_free_count);
desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh,
EXT4_BLOCKS_PER_GROUP(sb) / 8);
ext4_group_desc_csum_set(sb, block_group, desc);
ext4_unlock_group(sb, block_group);
percpu_counter_add(&sbi->s_freeclusters_counter,
EXT4_B2C(sbi, blocks_freed));
......
......@@ -6,12 +6,45 @@
#include "ext4.h"
/* Checksumming functions */
static __u32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
int offset = offsetof(struct mmp_struct, mmp_checksum);
__u32 csum;
csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset);
return cpu_to_le32(csum);
}
int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
{
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
return 1;
return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
}
void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
{
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
return;
mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
}
/*
* Write the MMP block using WRITE_SYNC to try to get the block on-disk
* faster.
*/
static int write_mmp_block(struct buffer_head *bh)
static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
{
struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
ext4_mmp_csum_set(sb, mmp);
mark_buffer_dirty(bh);
lock_buffer(bh);
bh->b_end_io = end_buffer_write_sync;
......@@ -59,7 +92,8 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
}
mmp = (struct mmp_struct *)((*bh)->b_data);
if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC)
if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC ||
!ext4_mmp_csum_verify(sb, mmp))
return -EINVAL;
return 0;
......@@ -120,7 +154,7 @@ static int kmmpd(void *data)
mmp->mmp_time = cpu_to_le64(get_seconds());
last_update_time = jiffies;
retval = write_mmp_block(bh);
retval = write_mmp_block(sb, bh);
/*
* Don't spew too many error messages. Print one every
* (s_mmp_update_interval * 60) seconds.
......@@ -200,7 +234,7 @@ static int kmmpd(void *data)
mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
mmp->mmp_time = cpu_to_le64(get_seconds());
retval = write_mmp_block(bh);
retval = write_mmp_block(sb, bh);
failed:
kfree(data);
......@@ -299,7 +333,7 @@ int ext4_multi_mount_protect(struct super_block *sb,
seq = mmp_new_seq();
mmp->mmp_seq = cpu_to_le32(seq);
retval = write_mmp_block(bh);
retval = write_mmp_block(sb, bh);
if (retval)
goto failed;
......
此差异已折叠。
......@@ -161,6 +161,8 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
if (flex_gd == NULL)
goto out3;
if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data))
goto out2;
flex_gd->count = flexbg_size;
flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) *
......@@ -796,7 +798,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
ext4_kvfree(o_group_desc);
le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
err = ext4_handle_dirty_super_now(handle, sb);
if (err)
ext4_std_error(sb, err);
......@@ -968,6 +970,8 @@ static void update_backups(struct super_block *sb,
goto exit_err;
}
ext4_superblock_csum_set(sb, (struct ext4_super_block *)data);
while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) {
struct buffer_head *bh;
......@@ -1067,6 +1071,54 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
return err;
}
static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block)
{
struct buffer_head *bh = sb_getblk(sb, block);
if (!bh)
return NULL;
if (bitmap_uptodate(bh))
return bh;
lock_buffer(bh);
if (bh_submit_read(bh) < 0) {
unlock_buffer(bh);
brelse(bh);
return NULL;
}
unlock_buffer(bh);
return bh;
}
static int ext4_set_bitmap_checksums(struct super_block *sb,
ext4_group_t group,
struct ext4_group_desc *gdp,
struct ext4_new_group_data *group_data)
{
struct buffer_head *bh;
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
return 0;
bh = ext4_get_bitmap(sb, group_data->inode_bitmap);
if (!bh)
return -EIO;
ext4_inode_bitmap_csum_set(sb, group, gdp, bh,
EXT4_INODES_PER_GROUP(sb) / 8);
brelse(bh);
bh = ext4_get_bitmap(sb, group_data->block_bitmap);
if (!bh)
return -EIO;
ext4_block_bitmap_csum_set(sb, group, gdp, bh,
EXT4_BLOCKS_PER_GROUP(sb) / 8);
brelse(bh);
return 0;
}
/*
* ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg
*/
......@@ -1093,18 +1145,24 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
*/
gdb_bh = sbi->s_group_desc[gdb_num];
/* Update group descriptor block for new group */
gdp = (struct ext4_group_desc *)((char *)gdb_bh->b_data +
gdp = (struct ext4_group_desc *)(gdb_bh->b_data +
gdb_off * EXT4_DESC_SIZE(sb));
memset(gdp, 0, EXT4_DESC_SIZE(sb));
ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap);
ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap);
err = ext4_set_bitmap_checksums(sb, group, gdp, group_data);
if (err) {
ext4_std_error(sb, err);
break;
}
ext4_inode_table_set(sb, gdp, group_data->inode_table);
ext4_free_group_clusters_set(sb, gdp,
EXT4_B2C(sbi, group_data->free_blocks_count));
ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
gdp->bg_flags = cpu_to_le16(*bg_flags);
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
ext4_group_desc_csum_set(sb, group, gdp);
err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
if (unlikely(err)) {
......@@ -1343,17 +1401,14 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
(1 + ext4_bg_num_gdb(sb, group + i) +
le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
group_data[i].free_blocks_count = blocks_per_group - overhead;
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
if (ext4_has_group_desc_csum(sb))
flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
EXT4_BG_INODE_UNINIT;
else
flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED;
}
if (last_group == n_group &&
EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
if (last_group == n_group && ext4_has_group_desc_csum(sb))
/* We need to initialize block bitmap of last group. */
flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT;
......
......@@ -112,6 +112,48 @@ static struct file_system_type ext3_fs_type = {
#define IS_EXT3_SB(sb) (0)
#endif
static int ext4_verify_csum_type(struct super_block *sb,
struct ext4_super_block *es)
{
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
return 1;
return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
}
static __le32 ext4_superblock_csum(struct super_block *sb,
struct ext4_super_block *es)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
int offset = offsetof(struct ext4_super_block, s_checksum);
__u32 csum;
csum = ext4_chksum(sbi, ~0, (char *)es, offset);
return cpu_to_le32(csum);
}
int ext4_superblock_csum_verify(struct super_block *sb,
struct ext4_super_block *es)
{
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
return 1;
return es->s_checksum == ext4_superblock_csum(sb, es);
}
void ext4_superblock_csum_set(struct super_block *sb,
struct ext4_super_block *es)
{
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
return;
es->s_checksum = ext4_superblock_csum(sb, es);
}
void *ext4_kvmalloc(size_t size, gfp_t flags)
{
void *ret;
......@@ -497,6 +539,7 @@ void __ext4_error(struct super_block *sb, const char *function,
printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
sb->s_id, function, line, current->comm, &vaf);
va_end(args);
save_error_info(sb, function, line);
ext4_handle_error(sb);
}
......@@ -905,6 +948,8 @@ static void ext4_put_super(struct super_block *sb)
unlock_super(sb);
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi->s_blockgroup_lock);
kfree(sbi);
}
......@@ -1922,43 +1967,69 @@ static int ext4_fill_flex_info(struct super_block *sb)
return 0;
}
__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
struct ext4_group_desc *gdp)
static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
struct ext4_group_desc *gdp)
{
int offset;
__u16 crc = 0;
__le32 le_group = cpu_to_le32(block_group);
if (sbi->s_es->s_feature_ro_compat &
cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
int offset = offsetof(struct ext4_group_desc, bg_checksum);
__le32 le_group = cpu_to_le32(block_group);
crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
crc = crc16(crc, (__u8 *)gdp, offset);
offset += sizeof(gdp->bg_checksum); /* skip checksum */
/* for checksum of struct ext4_group_desc do the rest...*/
if ((sbi->s_es->s_feature_incompat &
cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
offset < le16_to_cpu(sbi->s_es->s_desc_size))
crc = crc16(crc, (__u8 *)gdp + offset,
le16_to_cpu(sbi->s_es->s_desc_size) -
offset);
if ((sbi->s_es->s_feature_ro_compat &
cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) {
/* Use new metadata_csum algorithm */
__u16 old_csum;
__u32 csum32;
old_csum = gdp->bg_checksum;
gdp->bg_checksum = 0;
csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
sizeof(le_group));
csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp,
sbi->s_desc_size);
gdp->bg_checksum = old_csum;
crc = csum32 & 0xFFFF;
goto out;
}
/* old crc16 code */
offset = offsetof(struct ext4_group_desc, bg_checksum);
crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
crc = crc16(crc, (__u8 *)gdp, offset);
offset += sizeof(gdp->bg_checksum); /* skip checksum */
/* for checksum of struct ext4_group_desc do the rest...*/
if ((sbi->s_es->s_feature_incompat &
cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
offset < le16_to_cpu(sbi->s_es->s_desc_size))
crc = crc16(crc, (__u8 *)gdp + offset,
le16_to_cpu(sbi->s_es->s_desc_size) -
offset);
out:
return cpu_to_le16(crc);
}
int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
struct ext4_group_desc *gdp)
{
if ((sbi->s_es->s_feature_ro_compat &
cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
(gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
if (ext4_has_group_desc_csum(sb) &&
(gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb),
block_group, gdp)))
return 0;
return 1;
}
void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
struct ext4_group_desc *gdp)
{
if (!ext4_has_group_desc_csum(sb))
return;
gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp);
}
/* Called at mount-time, super-block is locked */
static int ext4_check_descriptors(struct super_block *sb,
ext4_group_t *first_not_zeroed)
......@@ -2013,7 +2084,7 @@ static int ext4_check_descriptors(struct super_block *sb,
return 0;
}
ext4_lock_group(sb, i);
if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
"Checksum for group %u failed (%u!=%u)",
i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
......@@ -2417,6 +2488,23 @@ static ssize_t sbi_ui_store(struct ext4_attr *a,
return count;
}
static ssize_t trigger_test_error(struct ext4_attr *a,
struct ext4_sb_info *sbi,
const char *buf, size_t count)
{
int len = count;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (len && buf[len-1] == '\n')
len--;
if (len)
ext4_error(sbi->s_sb, "%.*s", len, buf);
return count;
}
#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
static struct ext4_attr ext4_attr_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode }, \
......@@ -2447,6 +2535,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump);
EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
static struct attribute *ext4_attrs[] = {
ATTR_LIST(delayed_allocation_blocks),
......@@ -2461,6 +2550,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(mb_stream_req),
ATTR_LIST(mb_group_prealloc),
ATTR_LIST(max_writeback_mb_bump),
ATTR_LIST(trigger_fs_error),
NULL,
};
......@@ -2957,6 +3047,44 @@ static void ext4_destroy_lazyinit_thread(void)
kthread_stop(ext4_lazyinit_task);
}
static int set_journal_csum_feature_set(struct super_block *sb)
{
int ret = 1;
int compat, incompat;
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
/* journal checksum v2 */
compat = 0;
incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2;
} else {
/* journal checksum v1 */
compat = JBD2_FEATURE_COMPAT_CHECKSUM;
incompat = 0;
}
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
ret = jbd2_journal_set_features(sbi->s_journal,
compat, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
incompat);
} else if (test_opt(sb, JOURNAL_CHECKSUM)) {
ret = jbd2_journal_set_features(sbi->s_journal,
compat, 0,
incompat);
jbd2_journal_clear_features(sbi->s_journal, 0, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
} else {
jbd2_journal_clear_features(sbi->s_journal,
JBD2_FEATURE_COMPAT_CHECKSUM, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
JBD2_FEATURE_INCOMPAT_CSUM_V2);
}
return ret;
}
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
char *orig_data = kstrdup(data, GFP_KERNEL);
......@@ -2993,6 +3121,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto out_free_orig;
}
sb->s_fs_info = sbi;
sbi->s_sb = sb;
sbi->s_mount_opt = 0;
sbi->s_resuid = make_kuid(&init_user_ns, EXT4_DEF_RESUID);
sbi->s_resgid = make_kgid(&init_user_ns, EXT4_DEF_RESGID);
......@@ -3032,13 +3161,54 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* Note: s_es must be initialized as soon as possible because
* some ext4 macro-instructions depend on its value
*/
es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
es = (struct ext4_super_block *) (bh->b_data + offset);
sbi->s_es = es;
sb->s_magic = le16_to_cpu(es->s_magic);
if (sb->s_magic != EXT4_SUPER_MAGIC)
goto cantfind_ext4;
sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
/* Warn if metadata_csum and gdt_csum are both set. */
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
ext4_warning(sb, KERN_INFO "metadata_csum and uninit_bg are "
"redundant flags; please run fsck.");
/* Check for a known checksum algorithm */
if (!ext4_verify_csum_type(sb, es)) {
ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
"unknown checksum algorithm.");
silent = 1;
goto cantfind_ext4;
}
/* Load the checksum driver */
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
if (IS_ERR(sbi->s_chksum_driver)) {
ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
ret = PTR_ERR(sbi->s_chksum_driver);
sbi->s_chksum_driver = NULL;
goto failed_mount;
}
}
/* Check superblock checksum */
if (!ext4_superblock_csum_verify(sb, es)) {
ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
"invalid superblock checksum. Run e2fsck?");
silent = 1;
goto cantfind_ext4;
}
/* Precompute checksum seed for all metadata */
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
sizeof(es->s_uuid));
/* Set defaults before we parse the mount options */
def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
set_opt(sb, INIT_INODE_TABLE);
......@@ -3200,7 +3370,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"Can't read superblock on 2nd try");
goto failed_mount;
}
es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
es = (struct ext4_super_block *)(bh->b_data + offset);
sbi->s_es = es;
if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
ext4_msg(sb, KERN_ERR,
......@@ -3392,6 +3562,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
GFP_KERNEL);
if (sbi->s_group_desc == NULL) {
ext4_msg(sb, KERN_ERR, "not enough memory");
ret = -ENOMEM;
goto failed_mount;
}
......@@ -3449,6 +3620,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
if (err) {
ext4_msg(sb, KERN_ERR, "insufficient memory");
ret = err;
goto failed_mount3;
}
......@@ -3506,26 +3678,17 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto no_journal;
}
if (ext4_blocks_count(es) > 0xffffffffULL &&
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) &&
!jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
JBD2_FEATURE_INCOMPAT_64BIT)) {
ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
goto failed_mount_wq;
}
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
jbd2_journal_set_features(sbi->s_journal,
JBD2_FEATURE_COMPAT_CHECKSUM, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
} else if (test_opt(sb, JOURNAL_CHECKSUM)) {
jbd2_journal_set_features(sbi->s_journal,
JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0);
jbd2_journal_clear_features(sbi->s_journal, 0, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
} else {
jbd2_journal_clear_features(sbi->s_journal,
JBD2_FEATURE_COMPAT_CHECKSUM, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
if (!set_journal_csum_feature_set(sb)) {
ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
"feature set");
goto failed_mount_wq;
}
/* We have now updated the journal if required, so we can
......@@ -3606,7 +3769,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount4;
}
ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY);
if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY))
sb->s_flags |= MS_RDONLY;
/* determine the minimum size of new large inodes, if present */
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
......@@ -3641,7 +3805,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
ext4_ext_init(sb);
err = ext4_mb_init(sb, needs_recovery);
err = ext4_mb_init(sb);
if (err) {
ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
err);
......@@ -3724,6 +3888,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
brelse(sbi->s_group_desc[i]);
ext4_kvfree(sbi->s_group_desc);
failed_mount:
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
if (sbi->s_proc) {
remove_proc_entry("options", sbi->s_proc);
remove_proc_entry(sb->s_id, ext4_proc_root);
......@@ -3847,7 +4013,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
goto out_bdev;
}
es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
es = (struct ext4_super_block *) (bh->b_data + offset);
if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
!(le32_to_cpu(es->s_feature_incompat) &
EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
......@@ -4039,6 +4205,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
&EXT4_SB(sb)->s_freeinodes_counter));
sb->s_dirt = 0;
BUFFER_TRACE(sbh, "marking dirty");
ext4_superblock_csum_set(sb, es);
mark_buffer_dirty(sbh);
if (sync) {
error = sync_dirty_buffer(sbh);
......@@ -4333,7 +4500,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
struct ext4_group_desc *gdp =
ext4_get_group_desc(sb, g, NULL);
if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
ext4_msg(sb, KERN_ERR,
"ext4_remount: Checksum for group %u failed (%u!=%u)",
g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
......
......@@ -122,6 +122,58 @@ const struct xattr_handler *ext4_xattr_handlers[] = {
NULL
};
static __le32 ext4_xattr_block_csum(struct inode *inode,
sector_t block_nr,
struct ext4_xattr_header *hdr)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
__u32 csum, old;
old = hdr->h_checksum;
hdr->h_checksum = 0;
if (le32_to_cpu(hdr->h_refcount) != 1) {
block_nr = cpu_to_le64(block_nr);
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&block_nr,
sizeof(block_nr));
} else
csum = ei->i_csum_seed;
csum = ext4_chksum(sbi, csum, (__u8 *)hdr,
EXT4_BLOCK_SIZE(inode->i_sb));
hdr->h_checksum = old;
return cpu_to_le32(csum);
}
static int ext4_xattr_block_csum_verify(struct inode *inode,
sector_t block_nr,
struct ext4_xattr_header *hdr)
{
if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
(hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr)))
return 0;
return 1;
}
static void ext4_xattr_block_csum_set(struct inode *inode,
sector_t block_nr,
struct ext4_xattr_header *hdr)
{
if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
return;
hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr);
}
static inline int ext4_handle_dirty_xattr_block(handle_t *handle,
struct inode *inode,
struct buffer_head *bh)
{
ext4_xattr_block_csum_set(inode, bh->b_blocknr, BHDR(bh));
return ext4_handle_dirty_metadata(handle, inode, bh);
}
static inline const struct xattr_handler *
ext4_xattr_handler(int name_index)
{
......@@ -156,12 +208,22 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end)
}
static inline int
ext4_xattr_check_block(struct buffer_head *bh)
ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
{
int error;
if (buffer_verified(bh))
return 0;
if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
BHDR(bh)->h_blocks != cpu_to_le32(1))
return -EIO;
return ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
return -EIO;
error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
if (!error)
set_buffer_verified(bh);
return error;
}
static inline int
......@@ -224,7 +286,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
goto cleanup;
ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
if (ext4_xattr_check_block(bh)) {
if (ext4_xattr_check_block(inode, bh)) {
bad_block:
EXT4_ERROR_INODE(inode, "bad block %llu",
EXT4_I(inode)->i_file_acl);
......@@ -369,7 +431,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
goto cleanup;
ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
if (ext4_xattr_check_block(bh)) {
if (ext4_xattr_check_block(inode, bh)) {
EXT4_ERROR_INODE(inode, "bad block %llu",
EXT4_I(inode)->i_file_acl);
error = -EIO;
......@@ -492,7 +554,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
if (ce)
mb_cache_entry_release(ce);
unlock_buffer(bh);
error = ext4_handle_dirty_metadata(handle, inode, bh);
error = ext4_handle_dirty_xattr_block(handle, inode, bh);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
dquot_free_block(inode, 1);
......@@ -662,7 +724,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
atomic_read(&(bs->bh->b_count)),
le32_to_cpu(BHDR(bs->bh)->h_refcount));
if (ext4_xattr_check_block(bs->bh)) {
if (ext4_xattr_check_block(inode, bs->bh)) {
EXT4_ERROR_INODE(inode, "bad block %llu",
EXT4_I(inode)->i_file_acl);
error = -EIO;
......@@ -725,9 +787,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
if (error == -EIO)
goto bad_block;
if (!error)
error = ext4_handle_dirty_metadata(handle,
inode,
bs->bh);
error = ext4_handle_dirty_xattr_block(handle,
inode,
bs->bh);
if (error)
goto cleanup;
goto inserted;
......@@ -796,9 +858,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
ea_bdebug(new_bh, "reusing; refcount now=%d",
le32_to_cpu(BHDR(new_bh)->h_refcount));
unlock_buffer(new_bh);
error = ext4_handle_dirty_metadata(handle,
inode,
new_bh);
error = ext4_handle_dirty_xattr_block(handle,
inode,
new_bh);
if (error)
goto cleanup_dquot;
}
......@@ -855,8 +917,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
set_buffer_uptodate(new_bh);
unlock_buffer(new_bh);
ext4_xattr_cache_insert(new_bh);
error = ext4_handle_dirty_metadata(handle,
inode, new_bh);
error = ext4_handle_dirty_xattr_block(handle,
inode, new_bh);
if (error)
goto cleanup;
}
......@@ -1193,7 +1255,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
error = -EIO;
if (!bh)
goto cleanup;
if (ext4_xattr_check_block(bh)) {
if (ext4_xattr_check_block(inode, bh)) {
EXT4_ERROR_INODE(inode, "bad block %llu",
EXT4_I(inode)->i_file_acl);
error = -EIO;
......
......@@ -27,7 +27,9 @@ struct ext4_xattr_header {
__le32 h_refcount; /* reference count */
__le32 h_blocks; /* number of disk blocks used */
__le32 h_hash; /* hash value of all attributes */
__u32 h_reserved[4]; /* zero right now */
__le32 h_checksum; /* crc32c(uuid+id+xattrblock) */
/* id = inum if refcount=1, blknum otherwise */
__u32 h_reserved[3]; /* zero right now */
};
struct ext4_xattr_ibody_header {
......
config JBD2
tristate
select CRC32
select CRYPTO
select CRYPTO_CRC32C
help
This is a generic journaling layer for block devices that support
both 32-bit and 64-bit block numbers. It is currently used by
......
......@@ -85,6 +85,24 @@ static void release_buffer_page(struct buffer_head *bh)
__brelse(bh);
}
static void jbd2_commit_block_csum_set(journal_t *j,
struct journal_head *descriptor)
{
struct commit_header *h;
__u32 csum;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
return;
h = (struct commit_header *)(jh2bh(descriptor)->b_data);
h->h_chksum_type = 0;
h->h_chksum_size = 0;
h->h_chksum[0] = 0;
csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data,
j->j_blocksize);
h->h_chksum[0] = cpu_to_be32(csum);
}
/*
* Done it all: now submit the commit record. We should have
* cleaned up our previous buffers by now, so if we are in abort
......@@ -128,6 +146,7 @@ static int journal_submit_commit_record(journal_t *journal,
tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE;
tmp->h_chksum[0] = cpu_to_be32(crc32_sum);
}
jbd2_commit_block_csum_set(journal, descriptor);
JBUFFER_TRACE(descriptor, "submit commit block");
lock_buffer(bh);
......@@ -301,6 +320,44 @@ static void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
}
static void jbd2_descr_block_csum_set(journal_t *j,
struct journal_head *descriptor)
{
struct jbd2_journal_block_tail *tail;
__u32 csum;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
return;
tail = (struct jbd2_journal_block_tail *)
(jh2bh(descriptor)->b_data + j->j_blocksize -
sizeof(struct jbd2_journal_block_tail));
tail->t_checksum = 0;
csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data,
j->j_blocksize);
tail->t_checksum = cpu_to_be32(csum);
}
static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
struct buffer_head *bh, __u32 sequence)
{
struct page *page = bh->b_page;
__u8 *addr;
__u32 csum;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
return;
sequence = cpu_to_be32(sequence);
addr = kmap_atomic(page, KM_USER0);
csum = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence,
sizeof(sequence));
csum = jbd2_chksum(j, csum, addr + offset_in_page(bh->b_data),
bh->b_size);
kunmap_atomic(addr, KM_USER0);
tag->t_checksum = cpu_to_be32(csum);
}
/*
* jbd2_journal_commit_transaction
*
......@@ -334,6 +391,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
unsigned long first_block;
tid_t first_tid;
int update_tail;
int csum_size = 0;
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
csum_size = sizeof(struct jbd2_journal_block_tail);
/*
* First job: lock down the current transaction and wait for
......@@ -627,7 +688,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
tag = (journal_block_tag_t *) tagp;
write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr);
tag->t_flags = cpu_to_be32(tag_flag);
tag->t_flags = cpu_to_be16(tag_flag);
jbd2_block_tag_csum_set(journal, tag, jh2bh(new_jh),
commit_transaction->t_tid);
tagp += tag_bytes;
space_left -= tag_bytes;
......@@ -643,7 +706,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (bufs == journal->j_wbufsize ||
commit_transaction->t_buffers == NULL ||
space_left < tag_bytes + 16) {
space_left < tag_bytes + 16 + csum_size) {
jbd_debug(4, "JBD2: Submit %d IOs\n", bufs);
......@@ -651,8 +714,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
submitting the IOs. "tag" still points to
the last tag we set up. */
tag->t_flags |= cpu_to_be32(JBD2_FLAG_LAST_TAG);
tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
jbd2_descr_block_csum_set(journal, descriptor);
start_journal_io:
for (i = 0; i < bufs; i++) {
struct buffer_head *bh = wbuf[i];
......
......@@ -97,6 +97,43 @@ EXPORT_SYMBOL(jbd2_inode_cache);
static void __journal_abort_soft (journal_t *journal, int errno);
static int jbd2_journal_create_slab(size_t slab_size);
/* Checksumming functions */
int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
{
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
return 1;
return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
}
static __u32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
{
__u32 csum, old_csum;
old_csum = sb->s_checksum;
sb->s_checksum = 0;
csum = jbd2_chksum(j, ~0, (char *)sb, sizeof(journal_superblock_t));
sb->s_checksum = old_csum;
return cpu_to_be32(csum);
}
int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
{
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
return 1;
return sb->s_checksum == jbd2_superblock_csum(j, sb);
}
void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb)
{
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
return;
sb->s_checksum = jbd2_superblock_csum(j, sb);
}
/*
* Helper function used to manage commit timeouts
*/
......@@ -1348,6 +1385,7 @@ static void jbd2_journal_update_sb_errno(journal_t *journal)
jbd_debug(1, "JBD2: updating superblock error (errno %d)\n",
journal->j_errno);
sb->s_errno = cpu_to_be32(journal->j_errno);
jbd2_superblock_csum_set(journal, sb);
read_unlock(&journal->j_state_lock);
jbd2_write_superblock(journal, WRITE_SYNC);
......@@ -1376,6 +1414,9 @@ static int journal_get_superblock(journal_t *journal)
}
}
if (buffer_verified(bh))
return 0;
sb = journal->j_superblock;
err = -EINVAL;
......@@ -1413,6 +1454,43 @@ static int journal_get_superblock(journal_t *journal)
goto out;
}
if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) &&
JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
/* Can't have checksum v1 and v2 on at the same time! */
printk(KERN_ERR "JBD: Can't enable checksumming v1 and v2 "
"at the same time!\n");
goto out;
}
if (!jbd2_verify_csum_type(journal, sb)) {
printk(KERN_ERR "JBD: Unknown checksum type\n");
goto out;
}
/* Load the checksum driver */
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
if (IS_ERR(journal->j_chksum_driver)) {
printk(KERN_ERR "JBD: Cannot load crc32c driver.\n");
err = PTR_ERR(journal->j_chksum_driver);
journal->j_chksum_driver = NULL;
goto out;
}
}
/* Check superblock checksum */
if (!jbd2_superblock_csum_verify(journal, sb)) {
printk(KERN_ERR "JBD: journal checksum error\n");
goto out;
}
/* Precompute checksum seed for all metadata */
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
sizeof(sb->s_uuid));
set_buffer_verified(bh);
return 0;
out:
......@@ -1564,6 +1642,8 @@ int jbd2_journal_destroy(journal_t *journal)
iput(journal->j_inode);
if (journal->j_revoke)
jbd2_journal_destroy_revoke(journal);
if (journal->j_chksum_driver)
crypto_free_shash(journal->j_chksum_driver);
kfree(journal->j_wbuf);
kfree(journal);
......@@ -1653,6 +1733,10 @@ int jbd2_journal_check_available_features (journal_t *journal, unsigned long com
int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
unsigned long ro, unsigned long incompat)
{
#define INCOMPAT_FEATURE_ON(f) \
((incompat & (f)) && !(sb->s_feature_incompat & cpu_to_be32(f)))
#define COMPAT_FEATURE_ON(f) \
((compat & (f)) && !(sb->s_feature_compat & cpu_to_be32(f)))
journal_superblock_t *sb;
if (jbd2_journal_check_used_features(journal, compat, ro, incompat))
......@@ -1661,16 +1745,54 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
if (!jbd2_journal_check_available_features(journal, compat, ro, incompat))
return 0;
/* Asking for checksumming v2 and v1? Only give them v2. */
if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 &&
compat & JBD2_FEATURE_COMPAT_CHECKSUM)
compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM;
jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
compat, ro, incompat);
sb = journal->j_superblock;
/* If enabling v2 checksums, update superblock */
if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
sb->s_feature_compat &=
~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
/* Load the checksum driver */
if (journal->j_chksum_driver == NULL) {
journal->j_chksum_driver = crypto_alloc_shash("crc32c",
0, 0);
if (IS_ERR(journal->j_chksum_driver)) {
printk(KERN_ERR "JBD: Cannot load crc32c "
"driver.\n");
journal->j_chksum_driver = NULL;
return 0;
}
}
/* Precompute checksum seed for all metadata */
if (JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_CSUM_V2))
journal->j_csum_seed = jbd2_chksum(journal, ~0,
sb->s_uuid,
sizeof(sb->s_uuid));
}
/* If enabling v1 checksums, downgrade superblock */
if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM))
sb->s_feature_incompat &=
~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2);
sb->s_feature_compat |= cpu_to_be32(compat);
sb->s_feature_ro_compat |= cpu_to_be32(ro);
sb->s_feature_incompat |= cpu_to_be32(incompat);
return 1;
#undef COMPAT_FEATURE_ON
#undef INCOMPAT_FEATURE_ON
}
/*
......@@ -1975,10 +2097,16 @@ int jbd2_journal_blocks_per_page(struct inode *inode)
*/
size_t journal_tag_bytes(journal_t *journal)
{
journal_block_tag_t tag;
size_t x = 0;
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
x += sizeof(tag.t_checksum);
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
return JBD2_TAG_SIZE64;
return x + JBD2_TAG_SIZE64;
else
return JBD2_TAG_SIZE32;
return x + JBD2_TAG_SIZE32;
}
/*
......
......@@ -174,6 +174,25 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
return 0;
}
static int jbd2_descr_block_csum_verify(journal_t *j,
void *buf)
{
struct jbd2_journal_block_tail *tail;
__u32 provided, calculated;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
return 1;
tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize -
sizeof(struct jbd2_journal_block_tail));
provided = tail->t_checksum;
tail->t_checksum = 0;
calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
tail->t_checksum = provided;
provided = be32_to_cpu(provided);
return provided == calculated;
}
/*
* Count the number of in-use tags in a journal descriptor block.
......@@ -186,6 +205,9 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
int nr = 0, size = journal->j_blocksize;
int tag_bytes = journal_tag_bytes(journal);
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
size -= sizeof(struct jbd2_journal_block_tail);
tagp = &bh->b_data[sizeof(journal_header_t)];
while ((tagp - bh->b_data + tag_bytes) <= size) {
......@@ -193,10 +215,10 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
nr++;
tagp += tag_bytes;
if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID)))
if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID)))
tagp += 16;
if (tag->t_flags & cpu_to_be32(JBD2_FLAG_LAST_TAG))
if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG))
break;
}
......@@ -353,6 +375,41 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh,
return 0;
}
static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
{
struct commit_header *h;
__u32 provided, calculated;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
return 1;
h = buf;
provided = h->h_chksum[0];
h->h_chksum[0] = 0;
calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
h->h_chksum[0] = provided;
provided = be32_to_cpu(provided);
return provided == calculated;
}
static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
void *buf, __u32 sequence)
{
__u32 provided, calculated;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
return 1;
sequence = cpu_to_be32(sequence);
calculated = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence,
sizeof(sequence));
calculated = jbd2_chksum(j, calculated, buf, j->j_blocksize);
provided = be32_to_cpu(tag->t_checksum);
return provided == cpu_to_be32(calculated);
}
static int do_one_pass(journal_t *journal,
struct recovery_info *info, enum passtype pass)
{
......@@ -366,6 +423,7 @@ static int do_one_pass(journal_t *journal,
int blocktype;
int tag_bytes = journal_tag_bytes(journal);
__u32 crc32_sum = ~0; /* Transactional Checksums */
int descr_csum_size = 0;
/*
* First thing is to establish what we expect to find in the log
......@@ -451,6 +509,18 @@ static int do_one_pass(journal_t *journal,
switch(blocktype) {
case JBD2_DESCRIPTOR_BLOCK:
/* Verify checksum first */
if (JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_CSUM_V2))
descr_csum_size =
sizeof(struct jbd2_journal_block_tail);
if (descr_csum_size > 0 &&
!jbd2_descr_block_csum_verify(journal,
bh->b_data)) {
err = -EIO;
goto failed;
}
/* If it is a valid descriptor block, replay it
* in pass REPLAY; if journal_checksums enabled, then
* calculate checksums in PASS_SCAN, otherwise,
......@@ -481,11 +551,11 @@ static int do_one_pass(journal_t *journal,
tagp = &bh->b_data[sizeof(journal_header_t)];
while ((tagp - bh->b_data + tag_bytes)
<= journal->j_blocksize) {
<= journal->j_blocksize - descr_csum_size) {
unsigned long io_block;
tag = (journal_block_tag_t *) tagp;
flags = be32_to_cpu(tag->t_flags);
flags = be16_to_cpu(tag->t_flags);
io_block = next_log_block++;
wrap(journal, next_log_block);
......@@ -516,6 +586,19 @@ static int do_one_pass(journal_t *journal,
goto skip_write;
}
/* Look for block corruption */
if (!jbd2_block_tag_csum_verify(
journal, tag, obh->b_data,
be32_to_cpu(tmp->h_sequence))) {
brelse(obh);
success = -EIO;
printk(KERN_ERR "JBD: Invalid "
"checksum recovering "
"block %llu in log\n",
blocknr);
continue;
}
/* Find a buffer for the new
* data being restored */
nbh = __getblk(journal->j_fs_dev,
......@@ -650,6 +733,19 @@ static int do_one_pass(journal_t *journal,
}
crc32_sum = ~0;
}
if (pass == PASS_SCAN &&
!jbd2_commit_block_csum_verify(journal,
bh->b_data)) {
info->end_transaction = next_commit_ID;
if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
journal->j_failed_commit =
next_commit_ID;
brelse(bh);
break;
}
}
brelse(bh);
next_commit_ID++;
continue;
......@@ -706,6 +802,25 @@ static int do_one_pass(journal_t *journal,
return err;
}
static int jbd2_revoke_block_csum_verify(journal_t *j,
void *buf)
{
struct jbd2_journal_revoke_tail *tail;
__u32 provided, calculated;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
return 1;
tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize -
sizeof(struct jbd2_journal_revoke_tail));
provided = tail->r_checksum;
tail->r_checksum = 0;
calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
tail->r_checksum = provided;
provided = be32_to_cpu(provided);
return provided == calculated;
}
/* Scan a revoke record, marking all blocks mentioned as revoked. */
......@@ -720,6 +835,9 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
offset = sizeof(jbd2_journal_revoke_header_t);
max = be32_to_cpu(header->r_count);
if (!jbd2_revoke_block_csum_verify(journal, header))
return -EINVAL;
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
record_len = 8;
......
......@@ -578,6 +578,7 @@ static void write_one_revoke_record(journal_t *journal,
struct jbd2_revoke_record_s *record,
int write_op)
{
int csum_size = 0;
struct journal_head *descriptor;
int offset;
journal_header_t *header;
......@@ -592,9 +593,13 @@ static void write_one_revoke_record(journal_t *journal,
descriptor = *descriptorp;
offset = *offsetp;
/* Do we need to leave space at the end for a checksum? */
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
csum_size = sizeof(struct jbd2_journal_revoke_tail);
/* Make sure we have a descriptor with space left for the record */
if (descriptor) {
if (offset == journal->j_blocksize) {
if (offset >= journal->j_blocksize - csum_size) {
flush_descriptor(journal, descriptor, offset, write_op);
descriptor = NULL;
}
......@@ -631,6 +636,24 @@ static void write_one_revoke_record(journal_t *journal,
*offsetp = offset;
}
static void jbd2_revoke_csum_set(journal_t *j,
struct journal_head *descriptor)
{
struct jbd2_journal_revoke_tail *tail;
__u32 csum;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
return;
tail = (struct jbd2_journal_revoke_tail *)
(jh2bh(descriptor)->b_data + j->j_blocksize -
sizeof(struct jbd2_journal_revoke_tail));
tail->r_checksum = 0;
csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data,
j->j_blocksize);
tail->r_checksum = cpu_to_be32(csum);
}
/*
* Flush a revoke descriptor out to the journal. If we are aborting,
* this is a noop; otherwise we are generating a buffer which needs to
......@@ -652,6 +675,8 @@ static void flush_descriptor(journal_t *journal,
header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data;
header->r_count = cpu_to_be32(offset);
jbd2_revoke_csum_set(journal, descriptor);
set_buffer_jwrite(bh);
BUFFER_TRACE(bh, "write");
set_buffer_dirty(bh);
......
......@@ -162,8 +162,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
alloc_transaction:
if (!journal->j_running_transaction) {
new_transaction = kmem_cache_alloc(transaction_cache,
gfp_mask | __GFP_ZERO);
new_transaction = kmem_cache_zalloc(transaction_cache,
gfp_mask);
if (!new_transaction) {
/*
* If __GFP_FS is not present, then we may be
......
......@@ -31,6 +31,7 @@
#include <linux/mutex.h>
#include <linux/timer.h>
#include <linux/slab.h>
#include <crypto/hash.h>
#endif
#define journal_oom_retry 1
......@@ -147,12 +148,24 @@ typedef struct journal_header_s
#define JBD2_CRC32_CHKSUM 1
#define JBD2_MD5_CHKSUM 2
#define JBD2_SHA1_CHKSUM 3
#define JBD2_CRC32C_CHKSUM 4
#define JBD2_CRC32_CHKSUM_SIZE 4
#define JBD2_CHECKSUM_BYTES (32 / sizeof(u32))
/*
* Commit block header for storing transactional checksums:
*
* NOTE: If FEATURE_COMPAT_CHECKSUM (checksum v1) is set, the h_chksum*
* fields are used to store a checksum of the descriptor and data blocks.
*
* If FEATURE_INCOMPAT_CSUM_V2 (checksum v2) is set, then the h_chksum
* field is used to store crc32c(uuid+commit_block). Each journal metadata
* block gets its own checksum, and data block checksums are stored in
* journal_block_tag (in the descriptor). The other h_chksum* fields are
* not used.
*
* Checksum v1 and v2 are mutually exclusive features.
*/
struct commit_header {
__be32 h_magic;
......@@ -175,13 +188,19 @@ struct commit_header {
typedef struct journal_block_tag_s
{
__be32 t_blocknr; /* The on-disk block number */
__be32 t_flags; /* See below */
__be16 t_checksum; /* truncated crc32c(uuid+seq+block) */
__be16 t_flags; /* See below */
__be32 t_blocknr_high; /* most-significant high 32bits. */
} journal_block_tag_t;
#define JBD2_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high))
#define JBD2_TAG_SIZE64 (sizeof(journal_block_tag_t))
/* Tail of descriptor block, for checksumming */
struct jbd2_journal_block_tail {
__be32 t_checksum; /* crc32c(uuid+descr_block) */
};
/*
* The revoke descriptor: used on disk to describe a series of blocks to
* be revoked from the log
......@@ -192,6 +211,10 @@ typedef struct jbd2_journal_revoke_header_s
__be32 r_count; /* Count of bytes used in the block */
} jbd2_journal_revoke_header_t;
/* Tail of revoke block, for checksumming */
struct jbd2_journal_revoke_tail {
__be32 r_checksum; /* crc32c(uuid+revoke_block) */
};
/* Definitions for the journal tag flags word: */
#define JBD2_FLAG_ESCAPE 1 /* on-disk block is escaped */
......@@ -241,7 +264,10 @@ typedef struct journal_superblock_s
__be32 s_max_trans_data; /* Limit of data blocks per trans. */
/* 0x0050 */
__u32 s_padding[44];
__u8 s_checksum_type; /* checksum type */
__u8 s_padding2[3];
__u32 s_padding[42];
__be32 s_checksum; /* crc32c(superblock) */
/* 0x0100 */
__u8 s_users[16*48]; /* ids of all fs'es sharing the log */
......@@ -263,13 +289,15 @@ typedef struct journal_superblock_s
#define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001
#define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002
#define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004
#define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008
/* Features known to this kernel version: */
#define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM
#define JBD2_KNOWN_ROCOMPAT_FEATURES 0
#define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \
JBD2_FEATURE_INCOMPAT_64BIT | \
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \
JBD2_FEATURE_INCOMPAT_CSUM_V2)
#ifdef __KERNEL__
......@@ -939,6 +967,12 @@ struct journal_s
* superblock pointer here
*/
void *j_private;
/* Reference to checksum algorithm driver via cryptoapi */
struct crypto_shash *j_chksum_driver;
/* Precomputed journal UUID checksum for seeding other checksums */
__u32 j_csum_seed;
};
/*
......@@ -1268,6 +1302,25 @@ static inline int jbd_space_needed(journal_t *journal)
extern int jbd_blocks_per_page(struct inode *inode);
static inline u32 jbd2_chksum(journal_t *journal, u32 crc,
const void *address, unsigned int length)
{
struct {
struct shash_desc shash;
char ctx[crypto_shash_descsize(journal->j_chksum_driver)];
} desc;
int err;
desc.shash.tfm = journal->j_chksum_driver;
desc.shash.flags = 0;
*(u32 *)desc.ctx = crc;
err = crypto_shash_update(&desc.shash, address, length);
BUG_ON(err);
return *(u32 *)desc.ctx;
}
#ifdef __KERNEL__
#define buffer_trace_init(bh) do {} while (0)
......
......@@ -12,6 +12,7 @@ enum jbd_state_bits {
BH_State, /* Pins most journal_head state */
BH_JournalHead, /* Pins bh->b_private and jh->b_bh */
BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */
BH_Verified, /* Metadata block has been verified ok */
BH_JBDPrivateStart, /* First bit available for private use by FS */
};
......@@ -24,6 +25,7 @@ TAS_BUFFER_FNS(Revoked, revoked)
BUFFER_FNS(RevokeValid, revokevalid)
TAS_BUFFER_FNS(RevokeValid, revokevalid)
BUFFER_FNS(Freed, freed)
BUFFER_FNS(Verified, verified)
static inline struct buffer_head *jh2bh(struct journal_head *jh)
{
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册