提交 9f67672a 编写于 作者: L Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "New features for ext4 this cycle include support for encrypted
  casefold, ensure that deleted file names are cleared in directory
  blocks by zeroing directory entries when they are unlinked or moved as
  part of a hash tree node split. We also improve the block allocator's
  performance on a freshly mounted file system by prefetching block
  bitmaps.

  There are also the usual cleanups and bug fixes, including fixing a
  page cache invalidation race when there is mixed buffered and direct
  I/O and the block size is less than page size, and allow the dax flag
  to be set and cleared on inline directories"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (32 commits)
  ext4: wipe ext4_dir_entry2 upon file deletion
  ext4: Fix occasional generic/418 failure
  fs: fix reporting supported extra file attributes for statx()
  ext4: allow the dax flag to be set and cleared on inline directories
  ext4: fix debug format string warning
  ext4: fix trailing whitespace
  ext4: fix various seppling typos
  ext4: fix error return code in ext4_fc_perform_commit()
  ext4: annotate data race in jbd2_journal_dirty_metadata()
  ext4: annotate data race in start_this_handle()
  ext4: fix ext4_error_err save negative errno into superblock
  ext4: fix error code in ext4_commit_super
  ext4: always panic when errors=panic is specified
  ext4: delete redundant uptodate check for buffer
  ext4: do not set SB_ACTIVE in ext4_orphan_cleanup()
  ext4: make prefetch_block_bitmaps default
  ext4: add proc files to monitor new structures
  ext4: improve cr 0 / cr 1 group scanning
  ext4: add MB_NUM_ORDERS macro
  ext4: add mballoc stats proc file
  ...
...@@ -121,6 +121,31 @@ The directory file type is one of the following values: ...@@ -121,6 +121,31 @@ The directory file type is one of the following values:
* - 0x7 * - 0x7
- Symbolic link. - Symbolic link.
To support directories that are both encrypted and casefolded directories, we
must also include hash information in the directory entry. We append
``ext4_extended_dir_entry_2`` to ``ext4_dir_entry_2`` except for the entries
for dot and dotdot, which are kept the same. The structure follows immediately
after ``name`` and is included in the size listed by ``rec_len`` If a directory
entry uses this extension, it may be up to 271 bytes.
.. list-table::
:widths: 8 8 24 40
:header-rows: 1
* - Offset
- Size
- Name
- Description
* - 0x0
- \_\_le32
- hash
- The hash of the directory name
* - 0x4
- \_\_le32
- minor\_hash
- The minor hash of the directory name
In order to add checksums to these classic directory blocks, a phony In order to add checksums to these classic directory blocks, a phony
``struct ext4_dir_entry`` is placed at the end of each leaf block to ``struct ext4_dir_entry`` is placed at the end of each leaf block to
hold the checksum. The directory entry is 12 bytes long. The inode hold the checksum. The directory entry is 12 bytes long. The inode
...@@ -322,6 +347,8 @@ The directory hash is one of the following values: ...@@ -322,6 +347,8 @@ The directory hash is one of the following values:
- Half MD4, unsigned. - Half MD4, unsigned.
* - 0x5 * - 0x5
- Tea, unsigned. - Tea, unsigned.
* - 0x6
- Siphash.
Interior nodes of an htree are recorded as ``struct dx_node``, which is Interior nodes of an htree are recorded as ``struct dx_node``, which is
also the full length of a data block: also the full length of a data block:
......
...@@ -239,7 +239,7 @@ unsigned ext4_free_clusters_after_init(struct super_block *sb, ...@@ -239,7 +239,7 @@ unsigned ext4_free_clusters_after_init(struct super_block *sb,
ext4_group_t block_group, ext4_group_t block_group,
struct ext4_group_desc *gdp) struct ext4_group_desc *gdp)
{ {
return num_clusters_in_group(sb, block_group) - return num_clusters_in_group(sb, block_group) -
ext4_num_overhead_clusters(sb, block_group, gdp); ext4_num_overhead_clusters(sb, block_group, gdp);
} }
......
...@@ -55,6 +55,18 @@ static int is_dx_dir(struct inode *inode) ...@@ -55,6 +55,18 @@ static int is_dx_dir(struct inode *inode)
return 0; return 0;
} }
static bool is_fake_dir_entry(struct ext4_dir_entry_2 *de)
{
/* Check if . or .. , or skip if namelen is 0 */
if ((de->name_len > 0) && (de->name_len <= 2) && (de->name[0] == '.') &&
(de->name[1] == '.' || de->name[1] == '\0'))
return true;
/* Check if this is a csum entry */
if (de->file_type == EXT4_FT_DIR_CSUM)
return true;
return false;
}
/* /*
* Return 0 if the directory entry is OK, and 1 if there is a problem * Return 0 if the directory entry is OK, and 1 if there is a problem
* *
...@@ -73,16 +85,20 @@ int __ext4_check_dir_entry(const char *function, unsigned int line, ...@@ -73,16 +85,20 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
const int rlen = ext4_rec_len_from_disk(de->rec_len, const int rlen = ext4_rec_len_from_disk(de->rec_len,
dir->i_sb->s_blocksize); dir->i_sb->s_blocksize);
const int next_offset = ((char *) de - buf) + rlen; const int next_offset = ((char *) de - buf) + rlen;
bool fake = is_fake_dir_entry(de);
bool has_csum = ext4_has_metadata_csum(dir->i_sb);
if (unlikely(rlen < EXT4_DIR_REC_LEN(1))) if (unlikely(rlen < ext4_dir_rec_len(1, fake ? NULL : dir)))
error_msg = "rec_len is smaller than minimal"; error_msg = "rec_len is smaller than minimal";
else if (unlikely(rlen % 4 != 0)) else if (unlikely(rlen % 4 != 0))
error_msg = "rec_len % 4 != 0"; error_msg = "rec_len % 4 != 0";
else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len))) else if (unlikely(rlen < ext4_dir_rec_len(de->name_len,
fake ? NULL : dir)))
error_msg = "rec_len is too small for name_len"; error_msg = "rec_len is too small for name_len";
else if (unlikely(next_offset > size)) else if (unlikely(next_offset > size))
error_msg = "directory entry overrun"; error_msg = "directory entry overrun";
else if (unlikely(next_offset > size - EXT4_DIR_REC_LEN(1) && else if (unlikely(next_offset > size - ext4_dir_rec_len(1,
has_csum ? NULL : dir) &&
next_offset != size)) next_offset != size))
error_msg = "directory entry too close to block end"; error_msg = "directory entry too close to block end";
else if (unlikely(le32_to_cpu(de->inode) > else if (unlikely(le32_to_cpu(de->inode) >
...@@ -94,15 +110,15 @@ int __ext4_check_dir_entry(const char *function, unsigned int line, ...@@ -94,15 +110,15 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
if (filp) if (filp)
ext4_error_file(filp, function, line, bh->b_blocknr, ext4_error_file(filp, function, line, bh->b_blocknr,
"bad entry in directory: %s - offset=%u, " "bad entry in directory: %s - offset=%u, "
"inode=%u, rec_len=%d, name_len=%d, size=%d", "inode=%u, rec_len=%d, size=%d fake=%d",
error_msg, offset, le32_to_cpu(de->inode), error_msg, offset, le32_to_cpu(de->inode),
rlen, de->name_len, size); rlen, size, fake);
else else
ext4_error_inode(dir, function, line, bh->b_blocknr, ext4_error_inode(dir, function, line, bh->b_blocknr,
"bad entry in directory: %s - offset=%u, " "bad entry in directory: %s - offset=%u, "
"inode=%u, rec_len=%d, name_len=%d, size=%d", "inode=%u, rec_len=%d, size=%d fake=%d",
error_msg, offset, le32_to_cpu(de->inode), error_msg, offset, le32_to_cpu(de->inode),
rlen, de->name_len, size); rlen, size, fake);
return 1; return 1;
} }
...@@ -124,9 +140,9 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) ...@@ -124,9 +140,9 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
if (is_dx_dir(inode)) { if (is_dx_dir(inode)) {
err = ext4_dx_readdir(file, ctx); err = ext4_dx_readdir(file, ctx);
if (err != ERR_BAD_DX_DIR) { if (err != ERR_BAD_DX_DIR)
return err; return err;
}
/* Can we just clear INDEX flag to ignore htree information? */ /* Can we just clear INDEX flag to ignore htree information? */
if (!ext4_has_metadata_csum(sb)) { if (!ext4_has_metadata_csum(sb)) {
/* /*
...@@ -224,7 +240,8 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) ...@@ -224,7 +240,8 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
* failure will be detected in the * failure will be detected in the
* dirent test below. */ * dirent test below. */
if (ext4_rec_len_from_disk(de->rec_len, if (ext4_rec_len_from_disk(de->rec_len,
sb->s_blocksize) < EXT4_DIR_REC_LEN(1)) sb->s_blocksize) < ext4_dir_rec_len(1,
inode))
break; break;
i += ext4_rec_len_from_disk(de->rec_len, i += ext4_rec_len_from_disk(de->rec_len,
sb->s_blocksize); sb->s_blocksize);
...@@ -265,7 +282,9 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) ...@@ -265,7 +282,9 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
/* Directory is encrypted */ /* Directory is encrypted */
err = fscrypt_fname_disk_to_usr(inode, err = fscrypt_fname_disk_to_usr(inode,
0, 0, &de_name, &fstr); EXT4_DIRENT_HASH(de),
EXT4_DIRENT_MINOR_HASH(de),
&de_name, &fstr);
de_name = fstr; de_name = fstr;
fstr.len = save_len; fstr.len = save_len;
if (err) if (err)
......
...@@ -162,7 +162,12 @@ enum SHIFT_DIRECTION { ...@@ -162,7 +162,12 @@ enum SHIFT_DIRECTION {
#define EXT4_MB_USE_RESERVED 0x2000 #define EXT4_MB_USE_RESERVED 0x2000
/* Do strict check for free blocks while retrying block allocation */ /* Do strict check for free blocks while retrying block allocation */
#define EXT4_MB_STRICT_CHECK 0x4000 #define EXT4_MB_STRICT_CHECK 0x4000
/* Large fragment size list lookup succeeded at least once for cr = 0 */
#define EXT4_MB_CR0_OPTIMIZED 0x8000
/* Avg fragment size rb tree lookup succeeded at least once for cr = 1 */
#define EXT4_MB_CR1_OPTIMIZED 0x00010000
/* Perform linear traversal for one group */
#define EXT4_MB_SEARCH_NEXT_LINEAR 0x00020000
struct ext4_allocation_request { struct ext4_allocation_request {
/* target inode for block we're allocating */ /* target inode for block we're allocating */
struct inode *inode; struct inode *inode;
...@@ -1213,7 +1218,7 @@ struct ext4_inode_info { ...@@ -1213,7 +1218,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
#define EXT4_MOUNT_WARN_ON_ERROR 0x2000000 /* Trigger WARN_ON on error */ #define EXT4_MOUNT_WARN_ON_ERROR 0x2000000 /* Trigger WARN_ON on error */
#define EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS 0x4000000 #define EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS 0x4000000
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
...@@ -1238,7 +1243,9 @@ struct ext4_inode_info { ...@@ -1238,7 +1243,9 @@ struct ext4_inode_info {
#define EXT4_MOUNT2_JOURNAL_FAST_COMMIT 0x00000010 /* Journal fast commit */ #define EXT4_MOUNT2_JOURNAL_FAST_COMMIT 0x00000010 /* Journal fast commit */
#define EXT4_MOUNT2_DAX_NEVER 0x00000020 /* Do not allow Direct Access */ #define EXT4_MOUNT2_DAX_NEVER 0x00000020 /* Do not allow Direct Access */
#define EXT4_MOUNT2_DAX_INODE 0x00000040 /* For printing options only */ #define EXT4_MOUNT2_DAX_INODE 0x00000040 /* For printing options only */
#define EXT4_MOUNT2_MB_OPTIMIZE_SCAN 0x00000080 /* Optimize group
* scanning in mballoc
*/
#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
~EXT4_MOUNT_##opt ~EXT4_MOUNT_##opt
...@@ -1519,9 +1526,14 @@ struct ext4_sb_info { ...@@ -1519,9 +1526,14 @@ struct ext4_sb_info {
unsigned int s_mb_free_pending; unsigned int s_mb_free_pending;
struct list_head s_freed_data_list; /* List of blocks to be freed struct list_head s_freed_data_list; /* List of blocks to be freed
after commit completed */ after commit completed */
struct rb_root s_mb_avg_fragment_size_root;
rwlock_t s_mb_rb_lock;
struct list_head *s_mb_largest_free_orders;
rwlock_t *s_mb_largest_free_orders_locks;
/* tunables */ /* tunables */
unsigned long s_stripe; unsigned long s_stripe;
unsigned int s_mb_max_linear_groups;
unsigned int s_mb_stream_request; unsigned int s_mb_stream_request;
unsigned int s_mb_max_to_scan; unsigned int s_mb_max_to_scan;
unsigned int s_mb_min_to_scan; unsigned int s_mb_min_to_scan;
...@@ -1541,12 +1553,17 @@ struct ext4_sb_info { ...@@ -1541,12 +1553,17 @@ struct ext4_sb_info {
atomic_t s_bal_success; /* we found long enough chunks */ atomic_t s_bal_success; /* we found long enough chunks */
atomic_t s_bal_allocated; /* in blocks */ atomic_t s_bal_allocated; /* in blocks */
atomic_t s_bal_ex_scanned; /* total extents scanned */ atomic_t s_bal_ex_scanned; /* total extents scanned */
atomic_t s_bal_groups_scanned; /* number of groups scanned */
atomic_t s_bal_goals; /* goal hits */ atomic_t s_bal_goals; /* goal hits */
atomic_t s_bal_breaks; /* too long searches */ atomic_t s_bal_breaks; /* too long searches */
atomic_t s_bal_2orders; /* 2^order hits */ atomic_t s_bal_2orders; /* 2^order hits */
spinlock_t s_bal_lock; atomic_t s_bal_cr0_bad_suggestions;
unsigned long s_mb_buddies_generated; atomic_t s_bal_cr1_bad_suggestions;
unsigned long long s_mb_generation_time; atomic64_t s_bal_cX_groups_considered[4];
atomic64_t s_bal_cX_hits[4];
atomic64_t s_bal_cX_failed[4]; /* cX loop didn't find blocks */
atomic_t s_mb_buddies_generated; /* number of buddies generated */
atomic64_t s_mb_generation_time;
atomic_t s_mb_lost_chunks; atomic_t s_mb_lost_chunks;
atomic_t s_mb_preallocated; atomic_t s_mb_preallocated;
atomic_t s_mb_discarded; atomic_t s_mb_discarded;
...@@ -2187,6 +2204,17 @@ struct ext4_dir_entry { ...@@ -2187,6 +2204,17 @@ struct ext4_dir_entry {
char name[EXT4_NAME_LEN]; /* File name */ char name[EXT4_NAME_LEN]; /* File name */
}; };
/*
* Encrypted Casefolded entries require saving the hash on disk. This structure
* followed ext4_dir_entry_2's name[name_len] at the next 4 byte aligned
* boundary.
*/
struct ext4_dir_entry_hash {
__le32 hash;
__le32 minor_hash;
};
/* /*
* The new version of the directory entry. Since EXT4 structures are * The new version of the directory entry. Since EXT4 structures are
* stored in intel byte order, and the name_len field could never be * stored in intel byte order, and the name_len field could never be
...@@ -2201,6 +2229,22 @@ struct ext4_dir_entry_2 { ...@@ -2201,6 +2229,22 @@ struct ext4_dir_entry_2 {
char name[EXT4_NAME_LEN]; /* File name */ char name[EXT4_NAME_LEN]; /* File name */
}; };
/*
* Access the hashes at the end of ext4_dir_entry_2
*/
#define EXT4_DIRENT_HASHES(entry) \
((struct ext4_dir_entry_hash *) \
(((void *)(entry)) + \
((8 + (entry)->name_len + EXT4_DIR_ROUND) & ~EXT4_DIR_ROUND)))
#define EXT4_DIRENT_HASH(entry) le32_to_cpu(EXT4_DIRENT_HASHES(de)->hash)
#define EXT4_DIRENT_MINOR_HASH(entry) \
le32_to_cpu(EXT4_DIRENT_HASHES(de)->minor_hash)
static inline bool ext4_hash_in_dirent(const struct inode *inode)
{
return IS_CASEFOLDED(inode) && IS_ENCRYPTED(inode);
}
/* /*
* This is a bogus directory entry at the end of each leaf block that * This is a bogus directory entry at the end of each leaf block that
* records checksums. * records checksums.
...@@ -2242,10 +2286,24 @@ struct ext4_dir_entry_tail { ...@@ -2242,10 +2286,24 @@ struct ext4_dir_entry_tail {
*/ */
#define EXT4_DIR_PAD 4 #define EXT4_DIR_PAD 4
#define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1) #define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1)
#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
~EXT4_DIR_ROUND)
#define EXT4_MAX_REC_LEN ((1<<16)-1) #define EXT4_MAX_REC_LEN ((1<<16)-1)
/*
* The rec_len is dependent on the type of directory. Directories that are
* casefolded and encrypted need to store the hash as well, so we add room for
* ext4_extended_dir_entry_2. For all entries related to '.' or '..' you should
* pass NULL for dir, as those entries do not use the extra fields.
*/
static inline unsigned int ext4_dir_rec_len(__u8 name_len,
const struct inode *dir)
{
int rec_len = (name_len + 8 + EXT4_DIR_ROUND);
if (dir && ext4_hash_in_dirent(dir))
rec_len += sizeof(struct ext4_dir_entry_hash);
return (rec_len & ~EXT4_DIR_ROUND);
}
/* /*
* If we ever get support for fs block sizes > page_size, we'll need * If we ever get support for fs block sizes > page_size, we'll need
* to remove the #if statements in the next two functions... * to remove the #if statements in the next two functions...
...@@ -2302,6 +2360,7 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) ...@@ -2302,6 +2360,7 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
#define DX_HASH_LEGACY_UNSIGNED 3 #define DX_HASH_LEGACY_UNSIGNED 3
#define DX_HASH_HALF_MD4_UNSIGNED 4 #define DX_HASH_HALF_MD4_UNSIGNED 4
#define DX_HASH_TEA_UNSIGNED 5 #define DX_HASH_TEA_UNSIGNED 5
#define DX_HASH_SIPHASH 6
static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc, static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc,
const void *address, unsigned int length) const void *address, unsigned int length)
...@@ -2356,6 +2415,7 @@ struct ext4_filename { ...@@ -2356,6 +2415,7 @@ struct ext4_filename {
}; };
#define fname_name(p) ((p)->disk_name.name) #define fname_name(p) ((p)->disk_name.name)
#define fname_usr_name(p) ((p)->usr_fname->name)
#define fname_len(p) ((p)->disk_name.len) #define fname_len(p) ((p)->disk_name.len)
/* /*
...@@ -2586,9 +2646,9 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb, ...@@ -2586,9 +2646,9 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
#ifdef CONFIG_UNICODE #ifdef CONFIG_UNICODE
extern void ext4_fname_setup_ci_filename(struct inode *dir, extern int ext4_fname_setup_ci_filename(struct inode *dir,
const struct qstr *iname, const struct qstr *iname,
struct fscrypt_str *fname); struct ext4_filename *fname);
#endif #endif
#ifdef CONFIG_FS_ENCRYPTION #ifdef CONFIG_FS_ENCRYPTION
...@@ -2619,9 +2679,9 @@ static inline int ext4_fname_setup_filename(struct inode *dir, ...@@ -2619,9 +2679,9 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
ext4_fname_from_fscrypt_name(fname, &name); ext4_fname_from_fscrypt_name(fname, &name);
#ifdef CONFIG_UNICODE #ifdef CONFIG_UNICODE
ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name); err = ext4_fname_setup_ci_filename(dir, iname, fname);
#endif #endif
return 0; return err;
} }
static inline int ext4_fname_prepare_lookup(struct inode *dir, static inline int ext4_fname_prepare_lookup(struct inode *dir,
...@@ -2638,9 +2698,9 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir, ...@@ -2638,9 +2698,9 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir,
ext4_fname_from_fscrypt_name(fname, &name); ext4_fname_from_fscrypt_name(fname, &name);
#ifdef CONFIG_UNICODE #ifdef CONFIG_UNICODE
ext4_fname_setup_ci_filename(dir, &dentry->d_name, &fname->cf_name); err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname);
#endif #endif
return 0; return err;
} }
static inline void ext4_fname_free_filename(struct ext4_filename *fname) static inline void ext4_fname_free_filename(struct ext4_filename *fname)
...@@ -2665,15 +2725,16 @@ static inline int ext4_fname_setup_filename(struct inode *dir, ...@@ -2665,15 +2725,16 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
int lookup, int lookup,
struct ext4_filename *fname) struct ext4_filename *fname)
{ {
int err = 0;
fname->usr_fname = iname; fname->usr_fname = iname;
fname->disk_name.name = (unsigned char *) iname->name; fname->disk_name.name = (unsigned char *) iname->name;
fname->disk_name.len = iname->len; fname->disk_name.len = iname->len;
#ifdef CONFIG_UNICODE #ifdef CONFIG_UNICODE
ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name); err = ext4_fname_setup_ci_filename(dir, iname, fname);
#endif #endif
return 0; return err;
} }
static inline int ext4_fname_prepare_lookup(struct inode *dir, static inline int ext4_fname_prepare_lookup(struct inode *dir,
...@@ -2698,9 +2759,9 @@ extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, ...@@ -2698,9 +2759,9 @@ extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
struct ext4_dir_entry_2 *, struct ext4_dir_entry_2 *,
struct buffer_head *, char *, int, struct buffer_head *, char *, int,
unsigned int); unsigned int);
#define ext4_check_dir_entry(dir, filp, de, bh, buf, size, offset) \ #define ext4_check_dir_entry(dir, filp, de, bh, buf, size, offset) \
unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \ unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \
(de), (bh), (buf), (size), (offset))) (de), (bh), (buf), (size), (offset)))
extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
__u32 minor_hash, __u32 minor_hash,
struct ext4_dir_entry_2 *dirent, struct ext4_dir_entry_2 *dirent,
...@@ -2711,7 +2772,7 @@ extern int ext4_find_dest_de(struct inode *dir, struct inode *inode, ...@@ -2711,7 +2772,7 @@ extern int ext4_find_dest_de(struct inode *dir, struct inode *inode,
void *buf, int buf_size, void *buf, int buf_size,
struct ext4_filename *fname, struct ext4_filename *fname,
struct ext4_dir_entry_2 **dest_de); struct ext4_dir_entry_2 **dest_de);
void ext4_insert_dentry(struct inode *inode, void ext4_insert_dentry(struct inode *dir, struct inode *inode,
struct ext4_dir_entry_2 *de, struct ext4_dir_entry_2 *de,
int buf_size, int buf_size,
struct ext4_filename *fname); struct ext4_filename *fname);
...@@ -2802,8 +2863,10 @@ int __init ext4_fc_init_dentry_cache(void); ...@@ -2802,8 +2863,10 @@ int __init ext4_fc_init_dentry_cache(void);
/* mballoc.c */ /* mballoc.c */
extern const struct seq_operations ext4_mb_seq_groups_ops; extern const struct seq_operations ext4_mb_seq_groups_ops;
extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
extern long ext4_mb_stats; extern long ext4_mb_stats;
extern long ext4_mb_max_to_scan; extern long ext4_mb_max_to_scan;
extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset);
extern int ext4_mb_init(struct super_block *); extern int ext4_mb_init(struct super_block *);
extern int ext4_mb_release(struct super_block *); extern int ext4_mb_release(struct super_block *);
extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
...@@ -3306,11 +3369,14 @@ struct ext4_group_info { ...@@ -3306,11 +3369,14 @@ struct ext4_group_info {
ext4_grpblk_t bb_free; /* total free blocks */ ext4_grpblk_t bb_free; /* total free blocks */
ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ ext4_grpblk_t bb_fragments; /* nr of freespace fragments */
ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */ ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */
ext4_group_t bb_group; /* Group number */
struct list_head bb_prealloc_list; struct list_head bb_prealloc_list;
#ifdef DOUBLE_CHECK #ifdef DOUBLE_CHECK
void *bb_bitmap; void *bb_bitmap;
#endif #endif
struct rw_semaphore alloc_sem; struct rw_semaphore alloc_sem;
struct rb_node bb_avg_fragment_size_rb;
struct list_head bb_largest_free_order_node;
ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block
* regions, index is order. * regions, index is order.
* bb_counters[3] = 5 means * bb_counters[3] = 5 means
...@@ -3513,9 +3579,6 @@ extern void ext4_initialize_dirent_tail(struct buffer_head *bh, ...@@ -3513,9 +3579,6 @@ extern void ext4_initialize_dirent_tail(struct buffer_head *bh,
unsigned int blocksize); unsigned int blocksize);
extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode, extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,
struct buffer_head *bh); struct buffer_head *bh);
extern int ext4_ci_compare(const struct inode *parent,
const struct qstr *fname,
const struct qstr *entry, bool quick);
extern int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name, extern int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name,
struct inode *inode); struct inode *inode);
extern int __ext4_link(struct inode *dir, struct inode *inode, extern int __ext4_link(struct inode *dir, struct inode *inode,
......
...@@ -66,7 +66,7 @@ ...@@ -66,7 +66,7 @@
* Fast Commit Ineligibility * Fast Commit Ineligibility
* ------------------------- * -------------------------
* Not all operations are supported by fast commits today (e.g extended * Not all operations are supported by fast commits today (e.g extended
* attributes). Fast commit ineligiblity is marked by calling one of the * attributes). Fast commit ineligibility is marked by calling one of the
* two following functions: * two following functions:
* *
* - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall * - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall
...@@ -1088,8 +1088,10 @@ static int ext4_fc_perform_commit(journal_t *journal) ...@@ -1088,8 +1088,10 @@ static int ext4_fc_perform_commit(journal_t *journal)
head.fc_tid = cpu_to_le32( head.fc_tid = cpu_to_le32(
sbi->s_journal->j_running_transaction->t_tid); sbi->s_journal->j_running_transaction->t_tid);
if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head), if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head),
(u8 *)&head, &crc)) (u8 *)&head, &crc)) {
ret = -ENOSPC;
goto out; goto out;
}
} }
spin_lock(&sbi->s_fc_lock); spin_lock(&sbi->s_fc_lock);
...@@ -1734,7 +1736,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb, ...@@ -1734,7 +1736,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
} }
/* Range is mapped and needs a state change */ /* Range is mapped and needs a state change */
jbd_debug(1, "Converting from %d to %d %lld", jbd_debug(1, "Converting from %ld to %d %lld",
map.m_flags & EXT4_MAP_UNWRITTEN, map.m_flags & EXT4_MAP_UNWRITTEN,
ext4_ext_is_unwritten(ex), map.m_pblk); ext4_ext_is_unwritten(ex), map.m_pblk);
ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
......
...@@ -371,15 +371,32 @@ static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset, ...@@ -371,15 +371,32 @@ static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size, static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
int error, unsigned int flags) int error, unsigned int flags)
{ {
loff_t offset = iocb->ki_pos; loff_t pos = iocb->ki_pos;
struct inode *inode = file_inode(iocb->ki_filp); struct inode *inode = file_inode(iocb->ki_filp);
if (error) if (error)
return error; return error;
if (size && flags & IOMAP_DIO_UNWRITTEN) if (size && flags & IOMAP_DIO_UNWRITTEN) {
return ext4_convert_unwritten_extents(NULL, inode, error = ext4_convert_unwritten_extents(NULL, inode, pos, size);
offset, size); if (error < 0)
return error;
}
/*
* If we are extending the file, we have to update i_size here before
* page cache gets invalidated in iomap_dio_rw(). Otherwise racing
* buffered reads could zero out too much from page cache pages. Update
* of on-disk size will happen later in ext4_dio_write_iter() where
* we have enough information to also perform orphan list handling etc.
* Note that we perform all extending writes synchronously under
* i_rwsem held exclusively so i_size update is safe here in that case.
* If the write was not extending, we cannot see pos > i_size here
* because operations reducing i_size like truncate wait for all
* outstanding DIO before updating i_size.
*/
pos += size;
if (pos > i_size_read(inode))
i_size_write(inode, pos);
return 0; return 0;
} }
......
...@@ -197,7 +197,7 @@ static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num) ...@@ -197,7 +197,7 @@ static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num)
* represented, and whether or not the returned hash is 32 bits or 64 * represented, and whether or not the returned hash is 32 bits or 64
* bits. 32 bit hashes will return 0 for the minor hash. * bits. 32 bit hashes will return 0 for the minor hash.
*/ */
static int __ext4fs_dirhash(const char *name, int len, static int __ext4fs_dirhash(const struct inode *dir, const char *name, int len,
struct dx_hash_info *hinfo) struct dx_hash_info *hinfo)
{ {
__u32 hash; __u32 hash;
...@@ -259,6 +259,22 @@ static int __ext4fs_dirhash(const char *name, int len, ...@@ -259,6 +259,22 @@ static int __ext4fs_dirhash(const char *name, int len,
hash = buf[0]; hash = buf[0];
minor_hash = buf[1]; minor_hash = buf[1];
break; break;
case DX_HASH_SIPHASH:
{
struct qstr qname = QSTR_INIT(name, len);
__u64 combined_hash;
if (fscrypt_has_encryption_key(dir)) {
combined_hash = fscrypt_fname_siphash(dir, &qname);
} else {
ext4_warning_inode(dir, "Siphash requires key");
return -1;
}
hash = (__u32)(combined_hash >> 32);
minor_hash = (__u32)combined_hash;
break;
}
default: default:
hinfo->hash = 0; hinfo->hash = 0;
return -1; return -1;
...@@ -280,7 +296,8 @@ int ext4fs_dirhash(const struct inode *dir, const char *name, int len, ...@@ -280,7 +296,8 @@ int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
unsigned char *buff; unsigned char *buff;
struct qstr qstr = {.name = name, .len = len }; struct qstr qstr = {.name = name, .len = len };
if (len && IS_CASEFOLDED(dir) && um) { if (len && IS_CASEFOLDED(dir) && um &&
(!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir))) {
buff = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL); buff = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL);
if (!buff) if (!buff)
return -ENOMEM; return -ENOMEM;
...@@ -291,12 +308,12 @@ int ext4fs_dirhash(const struct inode *dir, const char *name, int len, ...@@ -291,12 +308,12 @@ int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
goto opaque_seq; goto opaque_seq;
} }
r = __ext4fs_dirhash(buff, dlen, hinfo); r = __ext4fs_dirhash(dir, buff, dlen, hinfo);
kfree(buff); kfree(buff);
return r; return r;
} }
opaque_seq: opaque_seq:
#endif #endif
return __ext4fs_dirhash(name, len, hinfo); return __ext4fs_dirhash(dir, name, len, hinfo);
} }
...@@ -1292,7 +1292,8 @@ struct inode *__ext4_new_inode(struct user_namespace *mnt_userns, ...@@ -1292,7 +1292,8 @@ struct inode *__ext4_new_inode(struct user_namespace *mnt_userns,
ei->i_extra_isize = sbi->s_want_extra_isize; ei->i_extra_isize = sbi->s_want_extra_isize;
ei->i_inline_off = 0; ei->i_inline_off = 0;
if (ext4_has_feature_inline_data(sb)) if (ext4_has_feature_inline_data(sb) &&
(!(ei->i_flags & EXT4_DAX_FL) || S_ISDIR(mode)))
ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
ret = inode; ret = inode;
err = dquot_alloc_inode(inode); err = dquot_alloc_inode(inode);
...@@ -1513,6 +1514,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, ...@@ -1513,6 +1514,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
handle_t *handle; handle_t *handle;
ext4_fsblk_t blk; ext4_fsblk_t blk;
int num, ret = 0, used_blks = 0; int num, ret = 0, used_blks = 0;
unsigned long used_inos = 0;
/* This should not happen, but just to be sure check this */ /* This should not happen, but just to be sure check this */
if (sb_rdonly(sb)) { if (sb_rdonly(sb)) {
...@@ -1543,22 +1545,37 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, ...@@ -1543,22 +1545,37 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
* used inodes so we need to skip blocks with used inodes in * used inodes so we need to skip blocks with used inodes in
* inode table. * inode table.
*/ */
if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) {
used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) - used_inos = EXT4_INODES_PER_GROUP(sb) -
ext4_itable_unused_count(sb, gdp)), ext4_itable_unused_count(sb, gdp);
sbi->s_inodes_per_block); used_blks = DIV_ROUND_UP(used_inos, sbi->s_inodes_per_block);
if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group) || /* Bogus inode unused count? */
((group == 0) && ((EXT4_INODES_PER_GROUP(sb) - if (used_blks < 0 || used_blks > sbi->s_itb_per_group) {
ext4_itable_unused_count(sb, gdp)) < ext4_error(sb, "Something is wrong with group %u: "
EXT4_FIRST_INO(sb)))) { "used itable blocks: %d; "
ext4_error(sb, "Something is wrong with group %u: " "itable unused count: %u",
"used itable blocks: %d; " group, used_blks,
"itable unused count: %u", ext4_itable_unused_count(sb, gdp));
group, used_blks, ret = 1;
ext4_itable_unused_count(sb, gdp)); goto err_out;
ret = 1; }
goto err_out;
used_inos += group * EXT4_INODES_PER_GROUP(sb);
/*
* Are there some uninitialized inodes in the inode table
* before the first normal inode?
*/
if ((used_blks != sbi->s_itb_per_group) &&
(used_inos < EXT4_FIRST_INO(sb))) {
ext4_error(sb, "Something is wrong with group %u: "
"itable unused count: %u; "
"itables initialized count: %ld",
group, ext4_itable_unused_count(sb, gdp),
used_inos);
ret = 1;
goto err_out;
}
} }
blk = ext4_inode_table(sb, gdp) + used_blks; blk = ext4_inode_table(sb, gdp) + used_blks;
......
...@@ -705,7 +705,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode, ...@@ -705,7 +705,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode,
/* /*
* Truncate transactions can be complex and absolutely huge. So we need to * Truncate transactions can be complex and absolutely huge. So we need to
* be able to restart the transaction at a conventient checkpoint to make * be able to restart the transaction at a convenient checkpoint to make
* sure we don't overflow the journal. * sure we don't overflow the journal.
* *
* Try to extend this transaction for the purposes of truncation. If * Try to extend this transaction for the purposes of truncation. If
......
...@@ -795,7 +795,7 @@ ext4_journalled_write_inline_data(struct inode *inode, ...@@ -795,7 +795,7 @@ ext4_journalled_write_inline_data(struct inode *inode,
* clear the inode state safely. * clear the inode state safely.
* 2. The inode has inline data, then we need to read the data, make it * 2. The inode has inline data, then we need to read the data, make it
* update and dirty so that ext4_da_writepages can handle it. We don't * update and dirty so that ext4_da_writepages can handle it. We don't
* need to start the journal since the file's metatdata isn't changed now. * need to start the journal since the file's metadata isn't changed now.
*/ */
static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
struct inode *inode, struct inode *inode,
...@@ -1031,7 +1031,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle, ...@@ -1031,7 +1031,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
err = ext4_journal_get_write_access(handle, iloc->bh); err = ext4_journal_get_write_access(handle, iloc->bh);
if (err) if (err)
return err; return err;
ext4_insert_dentry(inode, de, inline_size, fname); ext4_insert_dentry(dir, inode, de, inline_size, fname);
ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size); ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size);
...@@ -1100,7 +1100,7 @@ static int ext4_update_inline_dir(handle_t *handle, struct inode *dir, ...@@ -1100,7 +1100,7 @@ static int ext4_update_inline_dir(handle_t *handle, struct inode *dir,
int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE; int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE;
int new_size = get_max_inline_xattr_value_size(dir, iloc); int new_size = get_max_inline_xattr_value_size(dir, iloc);
if (new_size - old_size <= EXT4_DIR_REC_LEN(1)) if (new_size - old_size <= ext4_dir_rec_len(1, NULL))
return -ENOSPC; return -ENOSPC;
ret = ext4_update_inline_data(handle, dir, ret = ext4_update_inline_data(handle, dir,
...@@ -1380,8 +1380,8 @@ int ext4_inlinedir_to_tree(struct file *dir_file, ...@@ -1380,8 +1380,8 @@ int ext4_inlinedir_to_tree(struct file *dir_file,
fake.name_len = 1; fake.name_len = 1;
strcpy(fake.name, "."); strcpy(fake.name, ".");
fake.rec_len = ext4_rec_len_to_disk( fake.rec_len = ext4_rec_len_to_disk(
EXT4_DIR_REC_LEN(fake.name_len), ext4_dir_rec_len(fake.name_len, NULL),
inline_size); inline_size);
ext4_set_de_type(inode->i_sb, &fake, S_IFDIR); ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
de = &fake; de = &fake;
pos = EXT4_INLINE_DOTDOT_OFFSET; pos = EXT4_INLINE_DOTDOT_OFFSET;
...@@ -1390,8 +1390,8 @@ int ext4_inlinedir_to_tree(struct file *dir_file, ...@@ -1390,8 +1390,8 @@ int ext4_inlinedir_to_tree(struct file *dir_file,
fake.name_len = 2; fake.name_len = 2;
strcpy(fake.name, ".."); strcpy(fake.name, "..");
fake.rec_len = ext4_rec_len_to_disk( fake.rec_len = ext4_rec_len_to_disk(
EXT4_DIR_REC_LEN(fake.name_len), ext4_dir_rec_len(fake.name_len, NULL),
inline_size); inline_size);
ext4_set_de_type(inode->i_sb, &fake, S_IFDIR); ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
de = &fake; de = &fake;
pos = EXT4_INLINE_DOTDOT_SIZE; pos = EXT4_INLINE_DOTDOT_SIZE;
...@@ -1406,7 +1406,12 @@ int ext4_inlinedir_to_tree(struct file *dir_file, ...@@ -1406,7 +1406,12 @@ int ext4_inlinedir_to_tree(struct file *dir_file,
} }
} }
ext4fs_dirhash(dir, de->name, de->name_len, hinfo); if (ext4_hash_in_dirent(dir)) {
hinfo->hash = EXT4_DIRENT_HASH(de);
hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de);
} else {
ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
}
if ((hinfo->hash < start_hash) || if ((hinfo->hash < start_hash) ||
((hinfo->hash == start_hash) && ((hinfo->hash == start_hash) &&
(hinfo->minor_hash < start_minor_hash))) (hinfo->minor_hash < start_minor_hash)))
...@@ -1488,8 +1493,8 @@ int ext4_read_inline_dir(struct file *file, ...@@ -1488,8 +1493,8 @@ int ext4_read_inline_dir(struct file *file,
* So we will use extra_offset and extra_size to indicate them * So we will use extra_offset and extra_size to indicate them
* during the inline dir iteration. * during the inline dir iteration.
*/ */
dotdot_offset = EXT4_DIR_REC_LEN(1); dotdot_offset = ext4_dir_rec_len(1, NULL);
dotdot_size = dotdot_offset + EXT4_DIR_REC_LEN(2); dotdot_size = dotdot_offset + ext4_dir_rec_len(2, NULL);
extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE; extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE;
extra_size = extra_offset + inline_size; extra_size = extra_offset + inline_size;
...@@ -1524,7 +1529,7 @@ int ext4_read_inline_dir(struct file *file, ...@@ -1524,7 +1529,7 @@ int ext4_read_inline_dir(struct file *file,
* failure will be detected in the * failure will be detected in the
* dirent test below. */ * dirent test below. */
if (ext4_rec_len_from_disk(de->rec_len, extra_size) if (ext4_rec_len_from_disk(de->rec_len, extra_size)
< EXT4_DIR_REC_LEN(1)) < ext4_dir_rec_len(1, NULL))
break; break;
i += ext4_rec_len_from_disk(de->rec_len, i += ext4_rec_len_from_disk(de->rec_len,
extra_size); extra_size);
......
...@@ -1066,8 +1066,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, ...@@ -1066,8 +1066,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
block_end = block_start + blocksize; block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) { if (block_end <= from || block_start >= to) {
if (PageUptodate(page)) { if (PageUptodate(page)) {
if (!buffer_uptodate(bh)) set_buffer_uptodate(bh);
set_buffer_uptodate(bh);
} }
continue; continue;
} }
...@@ -1092,8 +1091,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, ...@@ -1092,8 +1091,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
} }
} }
if (PageUptodate(page)) { if (PageUptodate(page)) {
if (!buffer_uptodate(bh)) set_buffer_uptodate(bh);
set_buffer_uptodate(bh);
continue; continue;
} }
if (!buffer_uptodate(bh) && !buffer_delay(bh) && if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
...@@ -3824,7 +3822,7 @@ static int __ext4_block_zero_page_range(handle_t *handle, ...@@ -3824,7 +3822,7 @@ static int __ext4_block_zero_page_range(handle_t *handle,
* starting from file offset 'from'. The range to be zero'd must * starting from file offset 'from'. The range to be zero'd must
* be contained with in one block. If the specified range exceeds * be contained with in one block. If the specified range exceeds
* the end of the block it will be shortened to end of the block * the end of the block it will be shortened to end of the block
* that cooresponds to 'from' * that corresponds to 'from'
*/ */
static int ext4_block_zero_page_range(handle_t *handle, static int ext4_block_zero_page_range(handle_t *handle,
struct address_space *mapping, loff_t from, loff_t length) struct address_space *mapping, loff_t from, loff_t length)
......
...@@ -316,6 +316,12 @@ static void ext4_dax_dontcache(struct inode *inode, unsigned int flags) ...@@ -316,6 +316,12 @@ static void ext4_dax_dontcache(struct inode *inode, unsigned int flags)
static bool dax_compatible(struct inode *inode, unsigned int oldflags, static bool dax_compatible(struct inode *inode, unsigned int oldflags,
unsigned int flags) unsigned int flags)
{ {
/* Allow the DAX flag to be changed on inline directories */
if (S_ISDIR(inode->i_mode)) {
flags &= ~EXT4_INLINE_DATA_FL;
oldflags &= ~EXT4_INLINE_DATA_FL;
}
if (flags & EXT4_DAX_FL) { if (flags & EXT4_DAX_FL) {
if ((oldflags & EXT4_DAX_MUT_EXCL) || if ((oldflags & EXT4_DAX_MUT_EXCL) ||
ext4_test_inode_state(inode, ext4_test_inode_state(inode,
......
此差异已折叠。
...@@ -59,7 +59,7 @@ ...@@ -59,7 +59,7 @@
* by the stream allocator, which purpose is to pack requests * by the stream allocator, which purpose is to pack requests
* as close each to other as possible to produce smooth I/O traffic * as close each to other as possible to produce smooth I/O traffic
* We use locality group prealloc space for stream request. * We use locality group prealloc space for stream request.
* We can tune the same via /proc/fs/ext4/<parition>/stream_req * We can tune the same via /proc/fs/ext4/<partition>/stream_req
*/ */
#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */ #define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */
...@@ -78,6 +78,23 @@ ...@@ -78,6 +78,23 @@
*/ */
#define MB_DEFAULT_MAX_INODE_PREALLOC 512 #define MB_DEFAULT_MAX_INODE_PREALLOC 512
/*
* Number of groups to search linearly before performing group scanning
* optimization.
*/
#define MB_DEFAULT_LINEAR_LIMIT 4
/*
* Minimum number of groups that should be present in the file system to perform
* group scanning optimizations.
*/
#define MB_DEFAULT_LINEAR_SCAN_THRESHOLD 16
/*
* Number of valid buddy orders
*/
#define MB_NUM_ORDERS(sb) ((sb)->s_blocksize_bits + 2)
struct ext4_free_data { struct ext4_free_data {
/* this links the free block information from sb_info */ /* this links the free block information from sb_info */
struct list_head efd_list; struct list_head efd_list;
...@@ -161,11 +178,14 @@ struct ext4_allocation_context { ...@@ -161,11 +178,14 @@ struct ext4_allocation_context {
/* copy of the best found extent taken before preallocation efforts */ /* copy of the best found extent taken before preallocation efforts */
struct ext4_free_extent ac_f_ex; struct ext4_free_extent ac_f_ex;
ext4_group_t ac_last_optimal_group;
__u32 ac_groups_considered;
__u32 ac_flags; /* allocation hints */
__u16 ac_groups_scanned; __u16 ac_groups_scanned;
__u16 ac_groups_linear_remaining;
__u16 ac_found; __u16 ac_found;
__u16 ac_tail; __u16 ac_tail;
__u16 ac_buddy; __u16 ac_buddy;
__u16 ac_flags; /* allocation hints */
__u8 ac_status; __u8 ac_status;
__u8 ac_criteria; __u8 ac_criteria;
__u8 ac_2order; /* if request is to allocate 2^N blocks and __u8 ac_2order; /* if request is to allocate 2^N blocks and
......
...@@ -32,7 +32,7 @@ static int finish_range(handle_t *handle, struct inode *inode, ...@@ -32,7 +32,7 @@ static int finish_range(handle_t *handle, struct inode *inode,
newext.ee_block = cpu_to_le32(lb->first_block); newext.ee_block = cpu_to_le32(lb->first_block);
newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1); newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1);
ext4_ext_store_pblock(&newext, lb->first_pblock); ext4_ext_store_pblock(&newext, lb->first_pblock);
/* Locking only for convinience since we are operating on temp inode */ /* Locking only for convenience since we are operating on temp inode */
down_write(&EXT4_I(inode)->i_data_sem); down_write(&EXT4_I(inode)->i_data_sem);
path = ext4_find_extent(inode, lb->first_block, NULL, 0); path = ext4_find_extent(inode, lb->first_block, NULL, 0);
if (IS_ERR(path)) { if (IS_ERR(path)) {
...@@ -43,8 +43,8 @@ static int finish_range(handle_t *handle, struct inode *inode, ...@@ -43,8 +43,8 @@ static int finish_range(handle_t *handle, struct inode *inode,
/* /*
* Calculate the credit needed to inserting this extent * Calculate the credit needed to inserting this extent
* Since we are doing this in loop we may accumalate extra * Since we are doing this in loop we may accumulate extra
* credit. But below we try to not accumalate too much * credit. But below we try to not accumulate too much
* of them by restarting the journal. * of them by restarting the journal.
*/ */
needed = ext4_ext_calc_credits_for_single_extent(inode, needed = ext4_ext_calc_credits_for_single_extent(inode,
......
...@@ -56,7 +56,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) ...@@ -56,7 +56,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
wait_on_buffer(bh); wait_on_buffer(bh);
sb_end_write(sb); sb_end_write(sb);
if (unlikely(!buffer_uptodate(bh))) if (unlikely(!buffer_uptodate(bh)))
return 1; return -EIO;
return 0; return 0;
} }
......
...@@ -280,9 +280,11 @@ static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de, ...@@ -280,9 +280,11 @@ static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
unsigned blocksize, struct dx_hash_info *hinfo, unsigned blocksize, struct dx_hash_info *hinfo,
struct dx_map_entry map[]); struct dx_map_entry map[]);
static void dx_sort_map(struct dx_map_entry *map, unsigned count); static void dx_sort_map(struct dx_map_entry *map, unsigned count);
static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to, static struct ext4_dir_entry_2 *dx_move_dirents(struct inode *dir, char *from,
struct dx_map_entry *offsets, int count, unsigned blocksize); char *to, struct dx_map_entry *offsets,
static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize); int count, unsigned int blocksize);
static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base,
unsigned int blocksize);
static void dx_insert_block(struct dx_frame *frame, static void dx_insert_block(struct dx_frame *frame,
u32 hash, ext4_lblk_t block); u32 hash, ext4_lblk_t block);
static int ext4_htree_next_block(struct inode *dir, __u32 hash, static int ext4_htree_next_block(struct inode *dir, __u32 hash,
...@@ -574,8 +576,9 @@ static inline void dx_set_limit(struct dx_entry *entries, unsigned value) ...@@ -574,8 +576,9 @@ static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize) static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
{ {
unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - unsigned int entry_space = dir->i_sb->s_blocksize -
EXT4_DIR_REC_LEN(2) - infosize; ext4_dir_rec_len(1, NULL) -
ext4_dir_rec_len(2, NULL) - infosize;
if (ext4_has_metadata_csum(dir->i_sb)) if (ext4_has_metadata_csum(dir->i_sb))
entry_space -= sizeof(struct dx_tail); entry_space -= sizeof(struct dx_tail);
...@@ -584,7 +587,8 @@ static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize) ...@@ -584,7 +587,8 @@ static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
static inline unsigned dx_node_limit(struct inode *dir) static inline unsigned dx_node_limit(struct inode *dir)
{ {
unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); unsigned int entry_space = dir->i_sb->s_blocksize -
ext4_dir_rec_len(0, dir);
if (ext4_has_metadata_csum(dir->i_sb)) if (ext4_has_metadata_csum(dir->i_sb))
entry_space -= sizeof(struct dx_tail); entry_space -= sizeof(struct dx_tail);
...@@ -673,7 +677,10 @@ static struct stats dx_show_leaf(struct inode *dir, ...@@ -673,7 +677,10 @@ static struct stats dx_show_leaf(struct inode *dir,
name = fname_crypto_str.name; name = fname_crypto_str.name;
len = fname_crypto_str.len; len = fname_crypto_str.len;
} }
ext4fs_dirhash(dir, de->name, if (IS_CASEFOLDED(dir))
h.hash = EXT4_DIRENT_HASH(de);
else
ext4fs_dirhash(dir, de->name,
de->name_len, &h); de->name_len, &h);
printk("%*.s:(E)%x.%u ", len, name, printk("%*.s:(E)%x.%u ", len, name,
h.hash, (unsigned) ((char *) de h.hash, (unsigned) ((char *) de
...@@ -689,7 +696,7 @@ static struct stats dx_show_leaf(struct inode *dir, ...@@ -689,7 +696,7 @@ static struct stats dx_show_leaf(struct inode *dir,
(unsigned) ((char *) de - base)); (unsigned) ((char *) de - base));
#endif #endif
} }
space += EXT4_DIR_REC_LEN(de->name_len); space += ext4_dir_rec_len(de->name_len, dir);
names++; names++;
} }
de = ext4_next_entry(de, size); de = ext4_next_entry(de, size);
...@@ -784,18 +791,34 @@ dx_probe(struct ext4_filename *fname, struct inode *dir, ...@@ -784,18 +791,34 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
root = (struct dx_root *) frame->bh->b_data; root = (struct dx_root *) frame->bh->b_data;
if (root->info.hash_version != DX_HASH_TEA && if (root->info.hash_version != DX_HASH_TEA &&
root->info.hash_version != DX_HASH_HALF_MD4 && root->info.hash_version != DX_HASH_HALF_MD4 &&
root->info.hash_version != DX_HASH_LEGACY) { root->info.hash_version != DX_HASH_LEGACY &&
root->info.hash_version != DX_HASH_SIPHASH) {
ext4_warning_inode(dir, "Unrecognised inode hash code %u", ext4_warning_inode(dir, "Unrecognised inode hash code %u",
root->info.hash_version); root->info.hash_version);
goto fail; goto fail;
} }
if (ext4_hash_in_dirent(dir)) {
if (root->info.hash_version != DX_HASH_SIPHASH) {
ext4_warning_inode(dir,
"Hash in dirent, but hash is not SIPHASH");
goto fail;
}
} else {
if (root->info.hash_version == DX_HASH_SIPHASH) {
ext4_warning_inode(dir,
"Hash code is SIPHASH, but hash not in dirent");
goto fail;
}
}
if (fname) if (fname)
hinfo = &fname->hinfo; hinfo = &fname->hinfo;
hinfo->hash_version = root->info.hash_version; hinfo->hash_version = root->info.hash_version;
if (hinfo->hash_version <= DX_HASH_TEA) if (hinfo->hash_version <= DX_HASH_TEA)
hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
if (fname && fname_name(fname)) /* hash is already computed for encrypted casefolded directory */
if (fname && fname_name(fname) &&
!(IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)))
ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), hinfo); ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), hinfo);
hash = hinfo->hash; hash = hinfo->hash;
...@@ -956,7 +979,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, ...@@ -956,7 +979,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
* If the hash is 1, then continue only if the next page has a * If the hash is 1, then continue only if the next page has a
* continuation hash of any value. This is used for readdir * continuation hash of any value. This is used for readdir
* handling. Otherwise, check to see if the hash matches the * handling. Otherwise, check to see if the hash matches the
* desired contiuation hash. If it doesn't, return since * desired continuation hash. If it doesn't, return since
* there's no point to read in the successive index pages. * there's no point to read in the successive index pages.
*/ */
bhash = dx_get_hash(p->at); bhash = dx_get_hash(p->at);
...@@ -997,6 +1020,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, ...@@ -997,6 +1020,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
struct ext4_dir_entry_2 *de, *top; struct ext4_dir_entry_2 *de, *top;
int err = 0, count = 0; int err = 0, count = 0;
struct fscrypt_str fname_crypto_str = FSTR_INIT(NULL, 0), tmp_str; struct fscrypt_str fname_crypto_str = FSTR_INIT(NULL, 0), tmp_str;
int csum = ext4_has_metadata_csum(dir->i_sb);
dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
(unsigned long)block)); (unsigned long)block));
...@@ -1005,9 +1029,11 @@ static int htree_dirblock_to_tree(struct file *dir_file, ...@@ -1005,9 +1029,11 @@ static int htree_dirblock_to_tree(struct file *dir_file,
return PTR_ERR(bh); return PTR_ERR(bh);
de = (struct ext4_dir_entry_2 *) bh->b_data; de = (struct ext4_dir_entry_2 *) bh->b_data;
/* csum entries are not larger in the casefolded encrypted case */
top = (struct ext4_dir_entry_2 *) ((char *) de + top = (struct ext4_dir_entry_2 *) ((char *) de +
dir->i_sb->s_blocksize - dir->i_sb->s_blocksize -
EXT4_DIR_REC_LEN(0)); ext4_dir_rec_len(0,
csum ? NULL : dir));
/* Check if the directory is encrypted */ /* Check if the directory is encrypted */
if (IS_ENCRYPTED(dir)) { if (IS_ENCRYPTED(dir)) {
err = fscrypt_prepare_readdir(dir); err = fscrypt_prepare_readdir(dir);
...@@ -1031,7 +1057,17 @@ static int htree_dirblock_to_tree(struct file *dir_file, ...@@ -1031,7 +1057,17 @@ static int htree_dirblock_to_tree(struct file *dir_file,
/* silently ignore the rest of the block */ /* silently ignore the rest of the block */
break; break;
} }
ext4fs_dirhash(dir, de->name, de->name_len, hinfo); if (ext4_hash_in_dirent(dir)) {
if (de->name_len && de->inode) {
hinfo->hash = EXT4_DIRENT_HASH(de);
hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de);
} else {
hinfo->hash = 0;
hinfo->minor_hash = 0;
}
} else {
ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
}
if ((hinfo->hash < start_hash) || if ((hinfo->hash < start_hash) ||
((hinfo->hash == start_hash) && ((hinfo->hash == start_hash) &&
(hinfo->minor_hash < start_minor_hash))) (hinfo->minor_hash < start_minor_hash)))
...@@ -1100,7 +1136,11 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, ...@@ -1100,7 +1136,11 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
start_hash, start_minor_hash)); start_hash, start_minor_hash));
dir = file_inode(dir_file); dir = file_inode(dir_file);
if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) { if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; if (ext4_hash_in_dirent(dir))
hinfo.hash_version = DX_HASH_SIPHASH;
else
hinfo.hash_version =
EXT4_SB(dir->i_sb)->s_def_hash_version;
if (hinfo.hash_version <= DX_HASH_TEA) if (hinfo.hash_version <= DX_HASH_TEA)
hinfo.hash_version += hinfo.hash_version +=
EXT4_SB(dir->i_sb)->s_hash_unsigned; EXT4_SB(dir->i_sb)->s_hash_unsigned;
...@@ -1218,7 +1258,10 @@ static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de, ...@@ -1218,7 +1258,10 @@ static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
while ((char *) de < base + blocksize) { while ((char *) de < base + blocksize) {
if (de->name_len && de->inode) { if (de->name_len && de->inode) {
ext4fs_dirhash(dir, de->name, de->name_len, &h); if (ext4_hash_in_dirent(dir))
h.hash = EXT4_DIRENT_HASH(de);
else
ext4fs_dirhash(dir, de->name, de->name_len, &h);
map_tail--; map_tail--;
map_tail->hash = h.hash; map_tail->hash = h.hash;
map_tail->offs = ((char *) de - base)>>2; map_tail->offs = ((char *) de - base)>>2;
...@@ -1282,47 +1325,65 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) ...@@ -1282,47 +1325,65 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
* Returns: 0 if the directory entry matches, more than 0 if it * Returns: 0 if the directory entry matches, more than 0 if it
* doesn't match or less than zero on error. * doesn't match or less than zero on error.
*/ */
int ext4_ci_compare(const struct inode *parent, const struct qstr *name, static int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
const struct qstr *entry, bool quick) u8 *de_name, size_t de_name_len, bool quick)
{ {
const struct super_block *sb = parent->i_sb; const struct super_block *sb = parent->i_sb;
const struct unicode_map *um = sb->s_encoding; const struct unicode_map *um = sb->s_encoding;
struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
struct qstr entry = QSTR_INIT(de_name, de_name_len);
int ret; int ret;
if (IS_ENCRYPTED(parent)) {
const struct fscrypt_str encrypted_name =
FSTR_INIT(de_name, de_name_len);
decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
if (!decrypted_name.name)
return -ENOMEM;
ret = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name,
&decrypted_name);
if (ret < 0)
goto out;
entry.name = decrypted_name.name;
entry.len = decrypted_name.len;
}
if (quick) if (quick)
ret = utf8_strncasecmp_folded(um, name, entry); ret = utf8_strncasecmp_folded(um, name, &entry);
else else
ret = utf8_strncasecmp(um, name, entry); ret = utf8_strncasecmp(um, name, &entry);
if (ret < 0) { if (ret < 0) {
/* Handle invalid character sequence as either an error /* Handle invalid character sequence as either an error
* or as an opaque byte sequence. * or as an opaque byte sequence.
*/ */
if (sb_has_strict_encoding(sb)) if (sb_has_strict_encoding(sb))
return -EINVAL; ret = -EINVAL;
else if (name->len != entry.len)
if (name->len != entry->len) ret = 1;
return 1; else
ret = !!memcmp(name->name, entry.name, entry.len);
return !!memcmp(name->name, entry->name, name->len);
} }
out:
kfree(decrypted_name.name);
return ret; return ret;
} }
void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
struct fscrypt_str *cf_name) struct ext4_filename *name)
{ {
struct fscrypt_str *cf_name = &name->cf_name;
struct dx_hash_info *hinfo = &name->hinfo;
int len; int len;
if (!IS_CASEFOLDED(dir) || !dir->i_sb->s_encoding) { if (!IS_CASEFOLDED(dir) || !dir->i_sb->s_encoding) {
cf_name->name = NULL; cf_name->name = NULL;
return; return 0;
} }
cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS); cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
if (!cf_name->name) if (!cf_name->name)
return; return -ENOMEM;
len = utf8_casefold(dir->i_sb->s_encoding, len = utf8_casefold(dir->i_sb->s_encoding,
iname, cf_name->name, iname, cf_name->name,
...@@ -1330,10 +1391,18 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, ...@@ -1330,10 +1391,18 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
if (len <= 0) { if (len <= 0) {
kfree(cf_name->name); kfree(cf_name->name);
cf_name->name = NULL; cf_name->name = NULL;
return;
} }
cf_name->len = (unsigned) len; cf_name->len = (unsigned) len;
if (!IS_ENCRYPTED(dir))
return 0;
hinfo->hash_version = DX_HASH_SIPHASH;
hinfo->seed = NULL;
if (cf_name->name)
ext4fs_dirhash(dir, cf_name->name, cf_name->len, hinfo);
else
ext4fs_dirhash(dir, iname->name, iname->len, hinfo);
return 0;
} }
#endif #endif
...@@ -1342,14 +1411,11 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, ...@@ -1342,14 +1411,11 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
* *
* Return: %true if the directory entry matches, otherwise %false. * Return: %true if the directory entry matches, otherwise %false.
*/ */
static inline bool ext4_match(const struct inode *parent, static bool ext4_match(struct inode *parent,
const struct ext4_filename *fname, const struct ext4_filename *fname,
const struct ext4_dir_entry_2 *de) struct ext4_dir_entry_2 *de)
{ {
struct fscrypt_name f; struct fscrypt_name f;
#ifdef CONFIG_UNICODE
const struct qstr entry = {.name = de->name, .len = de->name_len};
#endif
if (!de->inode) if (!de->inode)
return false; return false;
...@@ -1365,10 +1431,19 @@ static inline bool ext4_match(const struct inode *parent, ...@@ -1365,10 +1431,19 @@ static inline bool ext4_match(const struct inode *parent,
if (fname->cf_name.name) { if (fname->cf_name.name) {
struct qstr cf = {.name = fname->cf_name.name, struct qstr cf = {.name = fname->cf_name.name,
.len = fname->cf_name.len}; .len = fname->cf_name.len};
return !ext4_ci_compare(parent, &cf, &entry, true); if (IS_ENCRYPTED(parent)) {
if (fname->hinfo.hash != EXT4_DIRENT_HASH(de) ||
fname->hinfo.minor_hash !=
EXT4_DIRENT_MINOR_HASH(de)) {
return 0;
}
}
return !ext4_ci_compare(parent, &cf, de->name,
de->name_len, true);
} }
return !ext4_ci_compare(parent, fname->usr_fname, &entry, return !ext4_ci_compare(parent, fname->usr_fname, de->name,
false); de->name_len, false);
} }
#endif #endif
...@@ -1765,7 +1840,8 @@ struct dentry *ext4_get_parent(struct dentry *child) ...@@ -1765,7 +1840,8 @@ struct dentry *ext4_get_parent(struct dentry *child)
* Returns pointer to last entry moved. * Returns pointer to last entry moved.
*/ */
static struct ext4_dir_entry_2 * static struct ext4_dir_entry_2 *
dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count, dx_move_dirents(struct inode *dir, char *from, char *to,
struct dx_map_entry *map, int count,
unsigned blocksize) unsigned blocksize)
{ {
unsigned rec_len = 0; unsigned rec_len = 0;
...@@ -1773,11 +1849,19 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count, ...@@ -1773,11 +1849,19 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
while (count--) { while (count--) {
struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
(from + (map->offs<<2)); (from + (map->offs<<2));
rec_len = EXT4_DIR_REC_LEN(de->name_len); rec_len = ext4_dir_rec_len(de->name_len, dir);
memcpy (to, de, rec_len); memcpy (to, de, rec_len);
((struct ext4_dir_entry_2 *) to)->rec_len = ((struct ext4_dir_entry_2 *) to)->rec_len =
ext4_rec_len_to_disk(rec_len, blocksize); ext4_rec_len_to_disk(rec_len, blocksize);
/* wipe dir_entry excluding the rec_len field */
de->inode = 0; de->inode = 0;
memset(&de->name_len, 0, ext4_rec_len_from_disk(de->rec_len,
blocksize) -
offsetof(struct ext4_dir_entry_2,
name_len));
map++; map++;
to += rec_len; to += rec_len;
} }
...@@ -1788,7 +1872,8 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count, ...@@ -1788,7 +1872,8 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
* Compact each dir entry in the range to the minimal rec_len. * Compact each dir entry in the range to the minimal rec_len.
* Returns pointer to last entry in range. * Returns pointer to last entry in range.
*/ */
static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize) static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base,
unsigned int blocksize)
{ {
struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base; struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
unsigned rec_len = 0; unsigned rec_len = 0;
...@@ -1797,7 +1882,7 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize) ...@@ -1797,7 +1882,7 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize)
while ((char*)de < base + blocksize) { while ((char*)de < base + blocksize) {
next = ext4_next_entry(de, blocksize); next = ext4_next_entry(de, blocksize);
if (de->inode && de->name_len) { if (de->inode && de->name_len) {
rec_len = EXT4_DIR_REC_LEN(de->name_len); rec_len = ext4_dir_rec_len(de->name_len, dir);
if (de > to) if (de > to)
memmove(to, de, rec_len); memmove(to, de, rec_len);
to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize); to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
...@@ -1887,9 +1972,9 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, ...@@ -1887,9 +1972,9 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
hash2, split, count-split)); hash2, split, count-split));
/* Fancy dance to stay within two buffers */ /* Fancy dance to stay within two buffers */
de2 = dx_move_dirents(data1, data2, map + split, count - split, de2 = dx_move_dirents(dir, data1, data2, map + split, count - split,
blocksize); blocksize);
de = dx_pack_dirents(data1, blocksize); de = dx_pack_dirents(dir, data1, blocksize);
de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
(char *) de, (char *) de,
blocksize); blocksize);
...@@ -1937,7 +2022,7 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode, ...@@ -1937,7 +2022,7 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
struct ext4_dir_entry_2 **dest_de) struct ext4_dir_entry_2 **dest_de)
{ {
struct ext4_dir_entry_2 *de; struct ext4_dir_entry_2 *de;
unsigned short reclen = EXT4_DIR_REC_LEN(fname_len(fname)); unsigned short reclen = ext4_dir_rec_len(fname_len(fname), dir);
int nlen, rlen; int nlen, rlen;
unsigned int offset = 0; unsigned int offset = 0;
char *top; char *top;
...@@ -1950,7 +2035,7 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode, ...@@ -1950,7 +2035,7 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
return -EFSCORRUPTED; return -EFSCORRUPTED;
if (ext4_match(dir, fname, de)) if (ext4_match(dir, fname, de))
return -EEXIST; return -EEXIST;
nlen = EXT4_DIR_REC_LEN(de->name_len); nlen = ext4_dir_rec_len(de->name_len, dir);
rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
if ((de->inode ? rlen - nlen : rlen) >= reclen) if ((de->inode ? rlen - nlen : rlen) >= reclen)
break; break;
...@@ -1964,7 +2049,8 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode, ...@@ -1964,7 +2049,8 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
return 0; return 0;
} }
void ext4_insert_dentry(struct inode *inode, void ext4_insert_dentry(struct inode *dir,
struct inode *inode,
struct ext4_dir_entry_2 *de, struct ext4_dir_entry_2 *de,
int buf_size, int buf_size,
struct ext4_filename *fname) struct ext4_filename *fname)
...@@ -1972,7 +2058,7 @@ void ext4_insert_dentry(struct inode *inode, ...@@ -1972,7 +2058,7 @@ void ext4_insert_dentry(struct inode *inode,
int nlen, rlen; int nlen, rlen;
nlen = EXT4_DIR_REC_LEN(de->name_len); nlen = ext4_dir_rec_len(de->name_len, dir);
rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
if (de->inode) { if (de->inode) {
struct ext4_dir_entry_2 *de1 = struct ext4_dir_entry_2 *de1 =
...@@ -1986,6 +2072,13 @@ void ext4_insert_dentry(struct inode *inode, ...@@ -1986,6 +2072,13 @@ void ext4_insert_dentry(struct inode *inode,
ext4_set_de_type(inode->i_sb, de, inode->i_mode); ext4_set_de_type(inode->i_sb, de, inode->i_mode);
de->name_len = fname_len(fname); de->name_len = fname_len(fname);
memcpy(de->name, fname_name(fname), fname_len(fname)); memcpy(de->name, fname_name(fname), fname_len(fname));
if (ext4_hash_in_dirent(dir)) {
struct dx_hash_info *hinfo = &fname->hinfo;
EXT4_DIRENT_HASHES(de)->hash = cpu_to_le32(hinfo->hash);
EXT4_DIRENT_HASHES(de)->minor_hash =
cpu_to_le32(hinfo->minor_hash);
}
} }
/* /*
...@@ -2022,7 +2115,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname, ...@@ -2022,7 +2115,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
} }
/* By now the buffer is marked for journaling */ /* By now the buffer is marked for journaling */
ext4_insert_dentry(inode, de, blocksize, fname); ext4_insert_dentry(dir, inode, de, blocksize, fname);
/* /*
* XXX shouldn't update any times until successful * XXX shouldn't update any times until successful
...@@ -2102,6 +2195,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, ...@@ -2102,6 +2195,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
data2 = bh2->b_data; data2 = bh2->b_data;
memcpy(data2, de, len); memcpy(data2, de, len);
memset(de, 0, len); /* wipe old data */
de = (struct ext4_dir_entry_2 *) data2; de = (struct ext4_dir_entry_2 *) data2;
top = data2 + len; top = data2 + len;
while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
...@@ -2114,11 +2208,16 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, ...@@ -2114,11 +2208,16 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
/* Initialize the root; the dot dirents already exist */ /* Initialize the root; the dot dirents already exist */
de = (struct ext4_dir_entry_2 *) (&root->dotdot); de = (struct ext4_dir_entry_2 *) (&root->dotdot);
de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2), de->rec_len = ext4_rec_len_to_disk(
blocksize); blocksize - ext4_dir_rec_len(2, NULL), blocksize);
memset (&root->info, 0, sizeof(root->info)); memset (&root->info, 0, sizeof(root->info));
root->info.info_length = sizeof(root->info); root->info.info_length = sizeof(root->info);
root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; if (ext4_hash_in_dirent(dir))
root->info.hash_version = DX_HASH_SIPHASH;
else
root->info.hash_version =
EXT4_SB(dir->i_sb)->s_def_hash_version;
entries = root->entries; entries = root->entries;
dx_set_block(entries, 1); dx_set_block(entries, 1);
dx_set_count(entries, 1); dx_set_count(entries, 1);
...@@ -2129,7 +2228,11 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, ...@@ -2129,7 +2228,11 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
if (fname->hinfo.hash_version <= DX_HASH_TEA) if (fname->hinfo.hash_version <= DX_HASH_TEA)
fname->hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; fname->hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), &fname->hinfo);
/* casefolded encrypted hashes are computed on fname setup */
if (!ext4_hash_in_dirent(dir))
ext4fs_dirhash(dir, fname_name(fname),
fname_len(fname), &fname->hinfo);
memset(frames, 0, sizeof(frames)); memset(frames, 0, sizeof(frames));
frame = frames; frame = frames;
...@@ -2139,10 +2242,10 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, ...@@ -2139,10 +2242,10 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh); retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
if (retval) if (retval)
goto out_frames; goto out_frames;
retval = ext4_handle_dirty_dirblock(handle, dir, bh2); retval = ext4_handle_dirty_dirblock(handle, dir, bh2);
if (retval) if (retval)
goto out_frames; goto out_frames;
de = do_split(handle,dir, &bh2, frame, &fname->hinfo); de = do_split(handle,dir, &bh2, frame, &fname->hinfo);
if (IS_ERR(de)) { if (IS_ERR(de)) {
...@@ -2482,15 +2585,27 @@ int ext4_generic_delete_entry(struct inode *dir, ...@@ -2482,15 +2585,27 @@ int ext4_generic_delete_entry(struct inode *dir,
entry_buf, buf_size, i)) entry_buf, buf_size, i))
return -EFSCORRUPTED; return -EFSCORRUPTED;
if (de == de_del) { if (de == de_del) {
if (pde) if (pde) {
pde->rec_len = ext4_rec_len_to_disk( pde->rec_len = ext4_rec_len_to_disk(
ext4_rec_len_from_disk(pde->rec_len, ext4_rec_len_from_disk(pde->rec_len,
blocksize) + blocksize) +
ext4_rec_len_from_disk(de->rec_len, ext4_rec_len_from_disk(de->rec_len,
blocksize), blocksize),
blocksize); blocksize);
else
/* wipe entire dir_entry */
memset(de, 0, ext4_rec_len_from_disk(de->rec_len,
blocksize));
} else {
/* wipe dir_entry excluding the rec_len field */
de->inode = 0; de->inode = 0;
memset(&de->name_len, 0,
ext4_rec_len_from_disk(de->rec_len,
blocksize) -
offsetof(struct ext4_dir_entry_2,
name_len));
}
inode_inc_iversion(dir); inode_inc_iversion(dir);
return 0; return 0;
} }
...@@ -2722,7 +2837,7 @@ struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, ...@@ -2722,7 +2837,7 @@ struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
{ {
de->inode = cpu_to_le32(inode->i_ino); de->inode = cpu_to_le32(inode->i_ino);
de->name_len = 1; de->name_len = 1;
de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len), de->rec_len = ext4_rec_len_to_disk(ext4_dir_rec_len(de->name_len, NULL),
blocksize); blocksize);
strcpy(de->name, "."); strcpy(de->name, ".");
ext4_set_de_type(inode->i_sb, de, S_IFDIR); ext4_set_de_type(inode->i_sb, de, S_IFDIR);
...@@ -2732,11 +2847,12 @@ struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, ...@@ -2732,11 +2847,12 @@ struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
de->name_len = 2; de->name_len = 2;
if (!dotdot_real_len) if (!dotdot_real_len)
de->rec_len = ext4_rec_len_to_disk(blocksize - de->rec_len = ext4_rec_len_to_disk(blocksize -
(csum_size + EXT4_DIR_REC_LEN(1)), (csum_size + ext4_dir_rec_len(1, NULL)),
blocksize); blocksize);
else else
de->rec_len = ext4_rec_len_to_disk( de->rec_len = ext4_rec_len_to_disk(
EXT4_DIR_REC_LEN(de->name_len), blocksize); ext4_dir_rec_len(de->name_len, NULL),
blocksize);
strcpy(de->name, ".."); strcpy(de->name, "..");
ext4_set_de_type(inode->i_sb, de, S_IFDIR); ext4_set_de_type(inode->i_sb, de, S_IFDIR);
...@@ -2869,7 +2985,8 @@ bool ext4_empty_dir(struct inode *inode) ...@@ -2869,7 +2985,8 @@ bool ext4_empty_dir(struct inode *inode)
} }
sb = inode->i_sb; sb = inode->i_sb;
if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2)) { if (inode->i_size < ext4_dir_rec_len(1, NULL) +
ext4_dir_rec_len(2, NULL)) {
EXT4_ERROR_INODE(inode, "invalid size"); EXT4_ERROR_INODE(inode, "invalid size");
return true; return true;
} }
...@@ -3372,7 +3489,7 @@ static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir, ...@@ -3372,7 +3489,7 @@ static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir,
* for transaction commit if we are running out of space * for transaction commit if we are running out of space
* and thus we deadlock. So we have to stop transaction now * and thus we deadlock. So we have to stop transaction now
* and restart it when symlink contents is written. * and restart it when symlink contents is written.
* *
* To keep fs consistent in case of crash, we have to put inode * To keep fs consistent in case of crash, we have to put inode
* to orphan list in the mean time. * to orphan list in the mean time.
*/ */
......
...@@ -667,9 +667,6 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error, ...@@ -667,9 +667,6 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
ext4_commit_super(sb); ext4_commit_super(sb);
} }
if (sb_rdonly(sb) || continue_fs)
return;
/* /*
* We force ERRORS_RO behavior when system is rebooting. Otherwise we * We force ERRORS_RO behavior when system is rebooting. Otherwise we
* could panic during 'reboot -f' as the underlying device got already * could panic during 'reboot -f' as the underlying device got already
...@@ -679,6 +676,10 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error, ...@@ -679,6 +676,10 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
panic("EXT4-fs (device %s): panic forced after error\n", panic("EXT4-fs (device %s): panic forced after error\n",
sb->s_id); sb->s_id);
} }
if (sb_rdonly(sb) || continue_fs)
return;
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
/* /*
* Make sure updated value of ->s_mount_flags will be visible before * Make sure updated value of ->s_mount_flags will be visible before
...@@ -1688,7 +1689,7 @@ enum { ...@@ -1688,7 +1689,7 @@ enum {
Opt_dioread_nolock, Opt_dioread_lock, Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache, Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
Opt_prefetch_block_bitmaps, Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan,
#ifdef CONFIG_EXT4_DEBUG #ifdef CONFIG_EXT4_DEBUG
Opt_fc_debug_max_replay, Opt_fc_debug_force Opt_fc_debug_max_replay, Opt_fc_debug_force
#endif #endif
...@@ -1788,7 +1789,9 @@ static const match_table_t tokens = { ...@@ -1788,7 +1789,9 @@ static const match_table_t tokens = {
{Opt_inlinecrypt, "inlinecrypt"}, {Opt_inlinecrypt, "inlinecrypt"},
{Opt_nombcache, "nombcache"}, {Opt_nombcache, "nombcache"},
{Opt_nombcache, "no_mbcache"}, /* for backward compatibility */ {Opt_nombcache, "no_mbcache"}, /* for backward compatibility */
{Opt_prefetch_block_bitmaps, "prefetch_block_bitmaps"}, {Opt_removed, "prefetch_block_bitmaps"},
{Opt_no_prefetch_block_bitmaps, "no_prefetch_block_bitmaps"},
{Opt_mb_optimize_scan, "mb_optimize_scan=%d"},
{Opt_removed, "check=none"}, /* mount option from ext2/3 */ {Opt_removed, "check=none"}, /* mount option from ext2/3 */
{Opt_removed, "nocheck"}, /* mount option from ext2/3 */ {Opt_removed, "nocheck"}, /* mount option from ext2/3 */
{Opt_removed, "reservation"}, /* mount option from ext2/3 */ {Opt_removed, "reservation"}, /* mount option from ext2/3 */
...@@ -1821,6 +1824,8 @@ static ext4_fsblk_t get_sb_block(void **data) ...@@ -1821,6 +1824,8 @@ static ext4_fsblk_t get_sb_block(void **data)
} }
#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
#define DEFAULT_MB_OPTIMIZE_SCAN (-1)
static const char deprecated_msg[] = static const char deprecated_msg[] =
"Mount option \"%s\" will be removed by %s\n" "Mount option \"%s\" will be removed by %s\n"
"Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
...@@ -2007,8 +2012,9 @@ static const struct mount_opts { ...@@ -2007,8 +2012,9 @@ static const struct mount_opts {
{Opt_max_dir_size_kb, 0, MOPT_GTE0}, {Opt_max_dir_size_kb, 0, MOPT_GTE0},
{Opt_test_dummy_encryption, 0, MOPT_STRING}, {Opt_test_dummy_encryption, 0, MOPT_STRING},
{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET}, {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
{Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS, {Opt_no_prefetch_block_bitmaps, EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS,
MOPT_SET}, MOPT_SET},
{Opt_mb_optimize_scan, EXT4_MOUNT2_MB_OPTIMIZE_SCAN, MOPT_GTE0},
#ifdef CONFIG_EXT4_DEBUG #ifdef CONFIG_EXT4_DEBUG
{Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT, {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY}, MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
...@@ -2090,9 +2096,15 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, ...@@ -2090,9 +2096,15 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb,
return 1; return 1;
} }
struct ext4_parsed_options {
unsigned long journal_devnum;
unsigned int journal_ioprio;
int mb_optimize_scan;
};
static int handle_mount_opt(struct super_block *sb, char *opt, int token, static int handle_mount_opt(struct super_block *sb, char *opt, int token,
substring_t *args, unsigned long *journal_devnum, substring_t *args, struct ext4_parsed_options *parsed_opts,
unsigned int *journal_ioprio, int is_remount) int is_remount)
{ {
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
const struct mount_opts *m; const struct mount_opts *m;
...@@ -2249,7 +2261,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, ...@@ -2249,7 +2261,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
"Cannot specify journal on remount"); "Cannot specify journal on remount");
return -1; return -1;
} }
*journal_devnum = arg; parsed_opts->journal_devnum = arg;
} else if (token == Opt_journal_path) { } else if (token == Opt_journal_path) {
char *journal_path; char *journal_path;
struct inode *journal_inode; struct inode *journal_inode;
...@@ -2285,7 +2297,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, ...@@ -2285,7 +2297,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
return -1; return -1;
} }
*journal_devnum = new_encode_dev(journal_inode->i_rdev); parsed_opts->journal_devnum = new_encode_dev(journal_inode->i_rdev);
path_put(&path); path_put(&path);
kfree(journal_path); kfree(journal_path);
} else if (token == Opt_journal_ioprio) { } else if (token == Opt_journal_ioprio) {
...@@ -2294,7 +2306,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, ...@@ -2294,7 +2306,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
" (must be 0-7)"); " (must be 0-7)");
return -1; return -1;
} }
*journal_ioprio = parsed_opts->journal_ioprio =
IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
} else if (token == Opt_test_dummy_encryption) { } else if (token == Opt_test_dummy_encryption) {
return ext4_set_test_dummy_encryption(sb, opt, &args[0], return ext4_set_test_dummy_encryption(sb, opt, &args[0],
...@@ -2384,6 +2396,13 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, ...@@ -2384,6 +2396,13 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
sbi->s_mount_opt |= m->mount_opt; sbi->s_mount_opt |= m->mount_opt;
} else if (token == Opt_data_err_ignore) { } else if (token == Opt_data_err_ignore) {
sbi->s_mount_opt &= ~m->mount_opt; sbi->s_mount_opt &= ~m->mount_opt;
} else if (token == Opt_mb_optimize_scan) {
if (arg != 0 && arg != 1) {
ext4_msg(sb, KERN_WARNING,
"mb_optimize_scan should be set to 0 or 1.");
return -1;
}
parsed_opts->mb_optimize_scan = arg;
} else { } else {
if (!args->from) if (!args->from)
arg = 1; arg = 1;
...@@ -2411,8 +2430,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, ...@@ -2411,8 +2430,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
} }
static int parse_options(char *options, struct super_block *sb, static int parse_options(char *options, struct super_block *sb,
unsigned long *journal_devnum, struct ext4_parsed_options *ret_opts,
unsigned int *journal_ioprio,
int is_remount) int is_remount)
{ {
struct ext4_sb_info __maybe_unused *sbi = EXT4_SB(sb); struct ext4_sb_info __maybe_unused *sbi = EXT4_SB(sb);
...@@ -2432,8 +2450,8 @@ static int parse_options(char *options, struct super_block *sb, ...@@ -2432,8 +2450,8 @@ static int parse_options(char *options, struct super_block *sb,
*/ */
args[0].to = args[0].from = NULL; args[0].to = args[0].from = NULL;
token = match_token(p, tokens, args); token = match_token(p, tokens, args);
if (handle_mount_opt(sb, p, token, args, journal_devnum, if (handle_mount_opt(sb, p, token, args, ret_opts,
journal_ioprio, is_remount) < 0) is_remount) < 0)
return 0; return 0;
} }
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
...@@ -3023,9 +3041,6 @@ static void ext4_orphan_cleanup(struct super_block *sb, ...@@ -3023,9 +3041,6 @@ static void ext4_orphan_cleanup(struct super_block *sb,
sb->s_flags &= ~SB_RDONLY; sb->s_flags &= ~SB_RDONLY;
} }
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
/* Needed for iput() to work correctly and not trash data */
sb->s_flags |= SB_ACTIVE;
/* /*
* Turn on quotas which were not enabled for read-only mounts if * Turn on quotas which were not enabled for read-only mounts if
* filesystem has quota feature, so that they are updated correctly. * filesystem has quota feature, so that they are updated correctly.
...@@ -3691,11 +3706,11 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, ...@@ -3691,11 +3706,11 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
elr->lr_super = sb; elr->lr_super = sb;
elr->lr_first_not_zeroed = start; elr->lr_first_not_zeroed = start;
if (test_opt(sb, PREFETCH_BLOCK_BITMAPS)) if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS)) {
elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
else {
elr->lr_mode = EXT4_LI_MODE_ITABLE; elr->lr_mode = EXT4_LI_MODE_ITABLE;
elr->lr_next_group = start; elr->lr_next_group = start;
} else {
elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
} }
/* /*
...@@ -3726,7 +3741,7 @@ int ext4_register_li_request(struct super_block *sb, ...@@ -3726,7 +3741,7 @@ int ext4_register_li_request(struct super_block *sb,
goto out; goto out;
} }
if (!test_opt(sb, PREFETCH_BLOCK_BITMAPS) && if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) &&
(first_not_zeroed == ngroups || sb_rdonly(sb) || (first_not_zeroed == ngroups || sb_rdonly(sb) ||
!test_opt(sb, INIT_INODE_TABLE))) !test_opt(sb, INIT_INODE_TABLE)))
goto out; goto out;
...@@ -4015,7 +4030,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -4015,7 +4030,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ext4_fsblk_t sb_block = get_sb_block(&data); ext4_fsblk_t sb_block = get_sb_block(&data);
ext4_fsblk_t logical_sb_block; ext4_fsblk_t logical_sb_block;
unsigned long offset = 0; unsigned long offset = 0;
unsigned long journal_devnum = 0;
unsigned long def_mount_opts; unsigned long def_mount_opts;
struct inode *root; struct inode *root;
const char *descr; const char *descr;
...@@ -4026,8 +4040,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -4026,8 +4040,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
int needs_recovery, has_huge_files; int needs_recovery, has_huge_files;
__u64 blocks_count; __u64 blocks_count;
int err = 0; int err = 0;
unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
ext4_group_t first_not_zeroed; ext4_group_t first_not_zeroed;
struct ext4_parsed_options parsed_opts;
/* Set defaults for the variables that will be set during parsing */
parsed_opts.journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
parsed_opts.journal_devnum = 0;
parsed_opts.mb_optimize_scan = DEFAULT_MB_OPTIMIZE_SCAN;
if ((data && !orig_data) || !sbi) if ((data && !orig_data) || !sbi)
goto out_free_base; goto out_free_base;
...@@ -4273,8 +4292,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -4273,8 +4292,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
GFP_KERNEL); GFP_KERNEL);
if (!s_mount_opts) if (!s_mount_opts)
goto failed_mount; goto failed_mount;
if (!parse_options(s_mount_opts, sb, &journal_devnum, if (!parse_options(s_mount_opts, sb, &parsed_opts, 0)) {
&journal_ioprio, 0)) {
ext4_msg(sb, KERN_WARNING, ext4_msg(sb, KERN_WARNING,
"failed to parse options in superblock: %s", "failed to parse options in superblock: %s",
s_mount_opts); s_mount_opts);
...@@ -4282,8 +4300,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -4282,8 +4300,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
kfree(s_mount_opts); kfree(s_mount_opts);
} }
sbi->s_def_mount_opt = sbi->s_mount_opt; sbi->s_def_mount_opt = sbi->s_mount_opt;
if (!parse_options((char *) data, sb, &journal_devnum, if (!parse_options((char *) data, sb, &parsed_opts, 0))
&journal_ioprio, 0))
goto failed_mount; goto failed_mount;
#ifdef CONFIG_UNICODE #ifdef CONFIG_UNICODE
...@@ -4292,12 +4309,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -4292,12 +4309,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
struct unicode_map *encoding; struct unicode_map *encoding;
__u16 encoding_flags; __u16 encoding_flags;
if (ext4_has_feature_encrypt(sb)) {
ext4_msg(sb, KERN_ERR,
"Can't mount with encoding and encryption");
goto failed_mount;
}
if (ext4_sb_read_encoding(es, &encoding_info, if (ext4_sb_read_encoding(es, &encoding_info,
&encoding_flags)) { &encoding_flags)) {
ext4_msg(sb, KERN_ERR, ext4_msg(sb, KERN_ERR,
...@@ -4774,7 +4785,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -4774,7 +4785,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* root first: it may be modified in the journal! * root first: it may be modified in the journal!
*/ */
if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) { if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
err = ext4_load_journal(sb, es, journal_devnum); err = ext4_load_journal(sb, es, parsed_opts.journal_devnum);
if (err) if (err)
goto failed_mount3a; goto failed_mount3a;
} else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) && } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
...@@ -4874,7 +4885,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -4874,7 +4885,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount_wq; goto failed_mount_wq;
} }
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); set_task_ioprio(sbi->s_journal->j_task, parsed_opts.journal_ioprio);
sbi->s_journal->j_submit_inode_data_buffers = sbi->s_journal->j_submit_inode_data_buffers =
ext4_journal_submit_inode_data_buffers; ext4_journal_submit_inode_data_buffers;
...@@ -4980,6 +4991,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -4980,6 +4991,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ext4_fc_replay_cleanup(sb); ext4_fc_replay_cleanup(sb);
ext4_ext_init(sb); ext4_ext_init(sb);
/*
* Enable optimize_scan if number of groups is > threshold. This can be
* turned off by passing "mb_optimize_scan=0". This can also be
* turned on forcefully by passing "mb_optimize_scan=1".
*/
if (parsed_opts.mb_optimize_scan == 1)
set_opt2(sb, MB_OPTIMIZE_SCAN);
else if (parsed_opts.mb_optimize_scan == 0)
clear_opt2(sb, MB_OPTIMIZE_SCAN);
else if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD)
set_opt2(sb, MB_OPTIMIZE_SCAN);
err = ext4_mb_init(sb); err = ext4_mb_init(sb);
if (err) { if (err) {
ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
...@@ -4996,7 +5020,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -4996,7 +5020,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ext4_journal_commit_callback; ext4_journal_commit_callback;
block = ext4_count_free_clusters(sb); block = ext4_count_free_clusters(sb);
ext4_free_blocks_count_set(sbi->s_es, ext4_free_blocks_count_set(sbi->s_es,
EXT4_C2B(sbi, block)); EXT4_C2B(sbi, block));
err = percpu_counter_init(&sbi->s_freeclusters_counter, block, err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
GFP_KERNEL); GFP_KERNEL);
...@@ -5561,8 +5585,10 @@ static int ext4_commit_super(struct super_block *sb) ...@@ -5561,8 +5585,10 @@ static int ext4_commit_super(struct super_block *sb)
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
int error = 0; int error = 0;
if (!sbh || block_device_ejected(sb)) if (!sbh)
return error; return -EINVAL;
if (block_device_ejected(sb))
return -ENODEV;
ext4_update_super(sb); ext4_update_super(sb);
...@@ -5813,13 +5839,16 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ...@@ -5813,13 +5839,16 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
struct ext4_mount_options old_opts; struct ext4_mount_options old_opts;
int enable_quota = 0; int enable_quota = 0;
ext4_group_t g; ext4_group_t g;
unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
int err = 0; int err = 0;
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
int i, j; int i, j;
char *to_free[EXT4_MAXQUOTAS]; char *to_free[EXT4_MAXQUOTAS];
#endif #endif
char *orig_data = kstrdup(data, GFP_KERNEL); char *orig_data = kstrdup(data, GFP_KERNEL);
struct ext4_parsed_options parsed_opts;
parsed_opts.journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
parsed_opts.journal_devnum = 0;
if (data && !orig_data) if (data && !orig_data)
return -ENOMEM; return -ENOMEM;
...@@ -5850,7 +5879,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ...@@ -5850,7 +5879,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
old_opts.s_qf_names[i] = NULL; old_opts.s_qf_names[i] = NULL;
#endif #endif
if (sbi->s_journal && sbi->s_journal->j_task->io_context) if (sbi->s_journal && sbi->s_journal->j_task->io_context)
journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; parsed_opts.journal_ioprio =
sbi->s_journal->j_task->io_context->ioprio;
/* /*
* Some options can be enabled by ext4 and/or by VFS mount flag * Some options can be enabled by ext4 and/or by VFS mount flag
...@@ -5860,7 +5890,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ...@@ -5860,7 +5890,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
vfs_flags = SB_LAZYTIME | SB_I_VERSION; vfs_flags = SB_LAZYTIME | SB_I_VERSION;
sb->s_flags = (sb->s_flags & ~vfs_flags) | (*flags & vfs_flags); sb->s_flags = (sb->s_flags & ~vfs_flags) | (*flags & vfs_flags);
if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) { if (!parse_options(data, sb, &parsed_opts, 1)) {
err = -EINVAL; err = -EINVAL;
goto restore_opts; goto restore_opts;
} }
...@@ -5910,7 +5940,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ...@@ -5910,7 +5940,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
if (sbi->s_journal) { if (sbi->s_journal) {
ext4_init_journal_params(sb, sbi->s_journal); ext4_init_journal_params(sb, sbi->s_journal);
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); set_task_ioprio(sbi->s_journal->j_task, parsed_opts.journal_ioprio);
} }
/* Flush outstanding errors before changing fs state */ /* Flush outstanding errors before changing fs state */
......
...@@ -215,6 +215,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); ...@@ -215,6 +215,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
EXT4_RW_ATTR_SBI_UI(mb_max_inode_prealloc, s_mb_max_inode_prealloc); EXT4_RW_ATTR_SBI_UI(mb_max_inode_prealloc, s_mb_max_inode_prealloc);
EXT4_RW_ATTR_SBI_UI(mb_max_linear_groups, s_mb_max_linear_groups);
EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error); EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error);
EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
...@@ -263,6 +264,7 @@ static struct attribute *ext4_attrs[] = { ...@@ -263,6 +264,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(mb_stream_req), ATTR_LIST(mb_stream_req),
ATTR_LIST(mb_group_prealloc), ATTR_LIST(mb_group_prealloc),
ATTR_LIST(mb_max_inode_prealloc), ATTR_LIST(mb_max_inode_prealloc),
ATTR_LIST(mb_max_linear_groups),
ATTR_LIST(max_writeback_mb_bump), ATTR_LIST(max_writeback_mb_bump),
ATTR_LIST(extent_max_zeroout_kb), ATTR_LIST(extent_max_zeroout_kb),
ATTR_LIST(trigger_fs_error), ATTR_LIST(trigger_fs_error),
...@@ -313,6 +315,7 @@ EXT4_ATTR_FEATURE(verity); ...@@ -313,6 +315,7 @@ EXT4_ATTR_FEATURE(verity);
#endif #endif
EXT4_ATTR_FEATURE(metadata_csum_seed); EXT4_ATTR_FEATURE(metadata_csum_seed);
EXT4_ATTR_FEATURE(fast_commit); EXT4_ATTR_FEATURE(fast_commit);
EXT4_ATTR_FEATURE(encrypted_casefold);
static struct attribute *ext4_feat_attrs[] = { static struct attribute *ext4_feat_attrs[] = {
ATTR_LIST(lazy_itable_init), ATTR_LIST(lazy_itable_init),
...@@ -330,6 +333,7 @@ static struct attribute *ext4_feat_attrs[] = { ...@@ -330,6 +333,7 @@ static struct attribute *ext4_feat_attrs[] = {
#endif #endif
ATTR_LIST(metadata_csum_seed), ATTR_LIST(metadata_csum_seed),
ATTR_LIST(fast_commit), ATTR_LIST(fast_commit),
ATTR_LIST(encrypted_casefold),
NULL, NULL,
}; };
ATTRIBUTE_GROUPS(ext4_feat); ATTRIBUTE_GROUPS(ext4_feat);
...@@ -528,6 +532,10 @@ int ext4_register_sysfs(struct super_block *sb) ...@@ -528,6 +532,10 @@ int ext4_register_sysfs(struct super_block *sb)
ext4_fc_info_show, sb); ext4_fc_info_show, sb);
proc_create_seq_data("mb_groups", S_IRUGO, sbi->s_proc, proc_create_seq_data("mb_groups", S_IRUGO, sbi->s_proc,
&ext4_mb_seq_groups_ops, sb); &ext4_mb_seq_groups_ops, sb);
proc_create_single_data("mb_stats", 0444, sbi->s_proc,
ext4_seq_mb_stats_show, sb);
proc_create_seq_data("mb_structs_summary", 0444, sbi->s_proc,
&ext4_mb_seq_structs_summary_ops, sb);
} }
return 0; return 0;
} }
......
...@@ -45,16 +45,13 @@ static int pagecache_read(struct inode *inode, void *buf, size_t count, ...@@ -45,16 +45,13 @@ static int pagecache_read(struct inode *inode, void *buf, size_t count,
size_t n = min_t(size_t, count, size_t n = min_t(size_t, count,
PAGE_SIZE - offset_in_page(pos)); PAGE_SIZE - offset_in_page(pos));
struct page *page; struct page *page;
void *addr;
page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT, page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT,
NULL); NULL);
if (IS_ERR(page)) if (IS_ERR(page))
return PTR_ERR(page); return PTR_ERR(page);
addr = kmap_atomic(page); memcpy_from_page(buf, page, offset_in_page(pos), n);
memcpy(buf, addr + offset_in_page(pos), n);
kunmap_atomic(addr);
put_page(page); put_page(page);
...@@ -80,7 +77,6 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count, ...@@ -80,7 +77,6 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count,
PAGE_SIZE - offset_in_page(pos)); PAGE_SIZE - offset_in_page(pos));
struct page *page; struct page *page;
void *fsdata; void *fsdata;
void *addr;
int res; int res;
res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0, res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0,
...@@ -88,9 +84,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count, ...@@ -88,9 +84,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count,
if (res) if (res)
return res; return res;
addr = kmap_atomic(page); memcpy_to_page(page, offset_in_page(pos), buf, n);
memcpy(addr + offset_in_page(pos), buf, n);
kunmap_atomic(addr);
res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n, res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n,
page, fsdata); page, fsdata);
......
...@@ -1617,7 +1617,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, ...@@ -1617,7 +1617,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
* If storing the value in an external inode is an option, * If storing the value in an external inode is an option,
* reserve space for xattr entries/names in the external * reserve space for xattr entries/names in the external
* attribute block so that a long value does not occupy the * attribute block so that a long value does not occupy the
* whole space and prevent futher entries being added. * whole space and prevent further entries being added.
*/ */
if (ext4_has_feature_ea_inode(inode->i_sb) && if (ext4_has_feature_ea_inode(inode->i_sb) &&
new_size && is_block && new_size && is_block &&
......
...@@ -245,15 +245,14 @@ static int fc_do_one_pass(journal_t *journal, ...@@ -245,15 +245,14 @@ static int fc_do_one_pass(journal_t *journal,
return 0; return 0;
while (next_fc_block <= journal->j_fc_last) { while (next_fc_block <= journal->j_fc_last) {
jbd_debug(3, "Fast commit replay: next block %ld", jbd_debug(3, "Fast commit replay: next block %ld\n",
next_fc_block); next_fc_block);
err = jread(&bh, journal, next_fc_block); err = jread(&bh, journal, next_fc_block);
if (err) { if (err) {
jbd_debug(3, "Fast commit replay: read error"); jbd_debug(3, "Fast commit replay: read error\n");
break; break;
} }
jbd_debug(3, "Processing fast commit blk with seq %d");
err = journal->j_fc_replay_callback(journal, bh, pass, err = journal->j_fc_replay_callback(journal, bh, pass,
next_fc_block - journal->j_fc_first, next_fc_block - journal->j_fc_first,
expected_commit_id); expected_commit_id);
......
...@@ -349,7 +349,12 @@ static int start_this_handle(journal_t *journal, handle_t *handle, ...@@ -349,7 +349,12 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
} }
alloc_transaction: alloc_transaction:
if (!journal->j_running_transaction) { /*
* This check is racy but it is just an optimization of allocating new
* transaction early if there are high chances we'll need it. If we
* guess wrong, we'll retry or free unused transaction.
*/
if (!data_race(journal->j_running_transaction)) {
/* /*
* If __GFP_FS is not present, then we may be being called from * If __GFP_FS is not present, then we may be being called from
* inside the fs writeback layer, so we MUST NOT fail. * inside the fs writeback layer, so we MUST NOT fail.
...@@ -1474,8 +1479,8 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) ...@@ -1474,8 +1479,8 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
* crucial to catch bugs so let's do a reliable check until the * crucial to catch bugs so let's do a reliable check until the
* lockless handling is fully proven. * lockless handling is fully proven.
*/ */
if (jh->b_transaction != transaction && if (data_race(jh->b_transaction != transaction &&
jh->b_next_transaction != transaction) { jh->b_next_transaction != transaction)) {
spin_lock(&jh->b_state_lock); spin_lock(&jh->b_state_lock);
J_ASSERT_JH(jh, jh->b_transaction == transaction || J_ASSERT_JH(jh, jh->b_transaction == transaction ||
jh->b_next_transaction == transaction); jh->b_next_transaction == transaction);
...@@ -1483,8 +1488,8 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) ...@@ -1483,8 +1488,8 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
} }
if (jh->b_modified == 1) { if (jh->b_modified == 1) {
/* If it's in our transaction it must be in BJ_Metadata list. */ /* If it's in our transaction it must be in BJ_Metadata list. */
if (jh->b_transaction == transaction && if (data_race(jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata) { jh->b_jlist != BJ_Metadata)) {
spin_lock(&jh->b_state_lock); spin_lock(&jh->b_state_lock);
if (jh->b_transaction == transaction && if (jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata) jh->b_jlist != BJ_Metadata)
......
...@@ -86,12 +86,20 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, ...@@ -86,12 +86,20 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
/* SB_NOATIME means filesystem supplies dummy atime value */ /* SB_NOATIME means filesystem supplies dummy atime value */
if (inode->i_sb->s_flags & SB_NOATIME) if (inode->i_sb->s_flags & SB_NOATIME)
stat->result_mask &= ~STATX_ATIME; stat->result_mask &= ~STATX_ATIME;
/*
* Note: If you add another clause to set an attribute flag, please
* update attributes_mask below.
*/
if (IS_AUTOMOUNT(inode)) if (IS_AUTOMOUNT(inode))
stat->attributes |= STATX_ATTR_AUTOMOUNT; stat->attributes |= STATX_ATTR_AUTOMOUNT;
if (IS_DAX(inode)) if (IS_DAX(inode))
stat->attributes |= STATX_ATTR_DAX; stat->attributes |= STATX_ATTR_DAX;
stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT |
STATX_ATTR_DAX);
mnt_userns = mnt_user_ns(path->mnt); mnt_userns = mnt_user_ns(path->mnt);
if (inode->i_op->getattr) if (inode->i_op->getattr)
return inode->i_op->getattr(mnt_userns, path, stat, return inode->i_op->getattr(mnt_userns, path, stat,
......
...@@ -61,7 +61,7 @@ void __jbd2_debug(int level, const char *file, const char *func, ...@@ -61,7 +61,7 @@ void __jbd2_debug(int level, const char *file, const char *func,
#define jbd_debug(n, fmt, a...) \ #define jbd_debug(n, fmt, a...) \
__jbd2_debug((n), __FILE__, __func__, __LINE__, (fmt), ##a) __jbd2_debug((n), __FILE__, __func__, __LINE__, (fmt), ##a)
#else #else
#define jbd_debug(n, fmt, a...) /**/ #define jbd_debug(n, fmt, a...) no_printk(fmt, ##a)
#endif #endif
extern void *jbd2_alloc(size_t size, gfp_t flags); extern void *jbd2_alloc(size_t size, gfp_t flags);
...@@ -594,18 +594,22 @@ struct transaction_s ...@@ -594,18 +594,22 @@ struct transaction_s
*/ */
unsigned long t_log_start; unsigned long t_log_start;
/* Number of buffers on the t_buffers list [j_list_lock] */ /*
* Number of buffers on the t_buffers list [j_list_lock, no locks
* needed for jbd2 thread]
*/
int t_nr_buffers; int t_nr_buffers;
/* /*
* Doubly-linked circular list of all buffers reserved but not yet * Doubly-linked circular list of all buffers reserved but not yet
* modified by this transaction [j_list_lock] * modified by this transaction [j_list_lock, no locks needed fo
* jbd2 thread]
*/ */
struct journal_head *t_reserved_list; struct journal_head *t_reserved_list;
/* /*
* Doubly-linked circular list of all metadata buffers owned by this * Doubly-linked circular list of all metadata buffers owned by this
* transaction [j_list_lock] * transaction [j_list_lock, no locks needed for jbd2 thread]
*/ */
struct journal_head *t_buffers; struct journal_head *t_buffers;
...@@ -629,9 +633,11 @@ struct transaction_s ...@@ -629,9 +633,11 @@ struct transaction_s
struct journal_head *t_checkpoint_io_list; struct journal_head *t_checkpoint_io_list;
/* /*
* Doubly-linked circular list of metadata buffers being shadowed by log * Doubly-linked circular list of metadata buffers being
* IO. The IO buffers on the iobuf list and the shadow buffers on this * shadowed by log IO. The IO buffers on the iobuf list and
* list match each other one for one at all times. [j_list_lock] * the shadow buffers on this list match each other one for
* one at all times. [j_list_lock, no locks needed for jbd2
* thread]
*/ */
struct journal_head *t_shadow_list; struct journal_head *t_shadow_list;
...@@ -768,7 +774,8 @@ enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY}; ...@@ -768,7 +774,8 @@ enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
struct journal_s struct journal_s
{ {
/** /**
* @j_flags: General journaling state flags [j_state_lock] * @j_flags: General journaling state flags [j_state_lock,
* no lock for quick racy checks]
*/ */
unsigned long j_flags; unsigned long j_flags;
...@@ -808,7 +815,8 @@ struct journal_s ...@@ -808,7 +815,8 @@ struct journal_s
/** /**
* @j_barrier_count: * @j_barrier_count:
* *
* Number of processes waiting to create a barrier lock [j_state_lock] * Number of processes waiting to create a barrier lock [j_state_lock,
* no lock for quick racy checks]
*/ */
int j_barrier_count; int j_barrier_count;
...@@ -821,7 +829,8 @@ struct journal_s ...@@ -821,7 +829,8 @@ struct journal_s
* @j_running_transaction: * @j_running_transaction:
* *
* Transactions: The current running transaction... * Transactions: The current running transaction...
* [j_state_lock] [caller holding open handle] * [j_state_lock, no lock for quick racy checks] [caller holding
* open handle]
*/ */
transaction_t *j_running_transaction; transaction_t *j_running_transaction;
...@@ -1033,7 +1042,7 @@ struct journal_s ...@@ -1033,7 +1042,7 @@ struct journal_s
* @j_commit_sequence: * @j_commit_sequence:
* *
* Sequence number of the most recently committed transaction * Sequence number of the most recently committed transaction
* [j_state_lock]. * [j_state_lock, no lock for quick racy checks]
*/ */
tid_t j_commit_sequence; tid_t j_commit_sequence;
...@@ -1041,7 +1050,7 @@ struct journal_s ...@@ -1041,7 +1050,7 @@ struct journal_s
* @j_commit_request: * @j_commit_request:
* *
* Sequence number of the most recent transaction wanting commit * Sequence number of the most recent transaction wanting commit
* [j_state_lock] * [j_state_lock, no lock for quick racy checks]
*/ */
tid_t j_commit_request; tid_t j_commit_request;
......
...@@ -1358,64 +1358,6 @@ TRACE_EVENT(ext4_read_block_bitmap_load, ...@@ -1358,64 +1358,6 @@ TRACE_EVENT(ext4_read_block_bitmap_load,
__entry->group, __entry->prefetch) __entry->group, __entry->prefetch)
); );
TRACE_EVENT(ext4_direct_IO_enter,
TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw),
TP_ARGS(inode, offset, len, rw),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned long, len )
__field( int, rw )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = offset;
__entry->len = len;
__entry->rw = rw;
),
TP_printk("dev %d,%d ino %lu pos %lld len %lu rw %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->pos, __entry->len, __entry->rw)
);
TRACE_EVENT(ext4_direct_IO_exit,
TP_PROTO(struct inode *inode, loff_t offset, unsigned long len,
int rw, int ret),
TP_ARGS(inode, offset, len, rw, ret),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned long, len )
__field( int, rw )
__field( int, ret )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = offset;
__entry->len = len;
__entry->rw = rw;
__entry->ret = ret;
),
TP_printk("dev %d,%d ino %lu pos %lld len %lu rw %d ret %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->pos, __entry->len,
__entry->rw, __entry->ret)
);
DECLARE_EVENT_CLASS(ext4__fallocate_mode, DECLARE_EVENT_CLASS(ext4__fallocate_mode,
TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode),
...@@ -1962,124 +1904,6 @@ TRACE_EVENT(ext4_get_implied_cluster_alloc_exit, ...@@ -1962,124 +1904,6 @@ TRACE_EVENT(ext4_get_implied_cluster_alloc_exit,
__entry->len, show_mflags(__entry->flags), __entry->ret) __entry->len, show_mflags(__entry->flags), __entry->ret)
); );
TRACE_EVENT(ext4_ext_put_in_cache,
TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len,
ext4_fsblk_t start),
TP_ARGS(inode, lblk, len, start),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( ext4_lblk_t, lblk )
__field( unsigned int, len )
__field( ext4_fsblk_t, start )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->lblk = lblk;
__entry->len = len;
__entry->start = start;
),
TP_printk("dev %d,%d ino %lu lblk %u len %u start %llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned) __entry->lblk,
__entry->len,
(unsigned long long) __entry->start)
);
TRACE_EVENT(ext4_ext_in_cache,
TP_PROTO(struct inode *inode, ext4_lblk_t lblk, int ret),
TP_ARGS(inode, lblk, ret),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( ext4_lblk_t, lblk )
__field( int, ret )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->lblk = lblk;
__entry->ret = ret;
),
TP_printk("dev %d,%d ino %lu lblk %u ret %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned) __entry->lblk,
__entry->ret)
);
TRACE_EVENT(ext4_find_delalloc_range,
TP_PROTO(struct inode *inode, ext4_lblk_t from, ext4_lblk_t to,
int reverse, int found, ext4_lblk_t found_blk),
TP_ARGS(inode, from, to, reverse, found, found_blk),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( ext4_lblk_t, from )
__field( ext4_lblk_t, to )
__field( int, reverse )
__field( int, found )
__field( ext4_lblk_t, found_blk )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->from = from;
__entry->to = to;
__entry->reverse = reverse;
__entry->found = found;
__entry->found_blk = found_blk;
),
TP_printk("dev %d,%d ino %lu from %u to %u reverse %d found %d "
"(blk = %u)",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned) __entry->from, (unsigned) __entry->to,
__entry->reverse, __entry->found,
(unsigned) __entry->found_blk)
);
TRACE_EVENT(ext4_get_reserved_cluster_alloc,
TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len),
TP_ARGS(inode, lblk, len),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( ext4_lblk_t, lblk )
__field( unsigned int, len )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->lblk = lblk;
__entry->len = len;
),
TP_printk("dev %d,%d ino %lu lblk %u len %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned) __entry->lblk,
__entry->len)
);
TRACE_EVENT(ext4_ext_show_extent, TRACE_EVENT(ext4_ext_show_extent,
TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
unsigned short len), unsigned short len),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册