1. 13 6月, 2018 1 次提交
    • K
      treewide: kvmalloc() -> kvmalloc_array() · 344476e1
      Kees Cook 提交于
      The kvmalloc() function has a 2-factor argument form, kvmalloc_array(). This
      patch replaces cases of:
      
              kvmalloc(a * b, gfp)
      
      with:
              kvmalloc_array(a * b, gfp)
      
      as well as handling cases of:
      
              kvmalloc(a * b * c, gfp)
      
      with:
      
              kvmalloc(array3_size(a, b, c), gfp)
      
      as it's slightly less ugly than:
      
              kvmalloc_array(array_size(a, b), c, gfp)
      
      This does, however, attempt to ignore constant size factors like:
      
              kvmalloc(4 * 1024, gfp)
      
      though any constants defined via macros get caught up in the conversion.
      
      Any factors with a sizeof() of "unsigned char", "char", and "u8" were
      dropped, since they're redundant.
      
      The Coccinelle script used for this was:
      
      // Fix redundant parens around sizeof().
      @@
      type TYPE;
      expression THING, E;
      @@
      
      (
        kvmalloc(
      -	(sizeof(TYPE)) * E
      +	sizeof(TYPE) * E
        , ...)
      |
        kvmalloc(
      -	(sizeof(THING)) * E
      +	sizeof(THING) * E
        , ...)
      )
      
      // Drop single-byte sizes and redundant parens.
      @@
      expression COUNT;
      typedef u8;
      typedef __u8;
      @@
      
      (
        kvmalloc(
      -	sizeof(u8) * (COUNT)
      +	COUNT
        , ...)
      |
        kvmalloc(
      -	sizeof(__u8) * (COUNT)
      +	COUNT
        , ...)
      |
        kvmalloc(
      -	sizeof(char) * (COUNT)
      +	COUNT
        , ...)
      |
        kvmalloc(
      -	sizeof(unsigned char) * (COUNT)
      +	COUNT
        , ...)
      |
        kvmalloc(
      -	sizeof(u8) * COUNT
      +	COUNT
        , ...)
      |
        kvmalloc(
      -	sizeof(__u8) * COUNT
      +	COUNT
        , ...)
      |
        kvmalloc(
      -	sizeof(char) * COUNT
      +	COUNT
        , ...)
      |
        kvmalloc(
      -	sizeof(unsigned char) * COUNT
      +	COUNT
        , ...)
      )
      
      // 2-factor product with sizeof(type/expression) and identifier or constant.
      @@
      type TYPE;
      expression THING;
      identifier COUNT_ID;
      constant COUNT_CONST;
      @@
      
      (
      - kvmalloc
      + kvmalloc_array
        (
      -	sizeof(TYPE) * (COUNT_ID)
      +	COUNT_ID, sizeof(TYPE)
        , ...)
      |
      - kvmalloc
      + kvmalloc_array
        (
      -	sizeof(TYPE) * COUNT_ID
      +	COUNT_ID, sizeof(TYPE)
        , ...)
      |
      - kvmalloc
      + kvmalloc_array
        (
      -	sizeof(TYPE) * (COUNT_CONST)
      +	COUNT_CONST, sizeof(TYPE)
        , ...)
      |
      - kvmalloc
      + kvmalloc_array
        (
      -	sizeof(TYPE) * COUNT_CONST
      +	COUNT_CONST, sizeof(TYPE)
        , ...)
      |
      - kvmalloc
      + kvmalloc_array
        (
      -	sizeof(THING) * (COUNT_ID)
      +	COUNT_ID, sizeof(THING)
        , ...)
      |
      - kvmalloc
      + kvmalloc_array
        (
      -	sizeof(THING) * COUNT_ID
      +	COUNT_ID, sizeof(THING)
        , ...)
      |
      - kvmalloc
      + kvmalloc_array
        (
      -	sizeof(THING) * (COUNT_CONST)
      +	COUNT_CONST, sizeof(THING)
        , ...)
      |
      - kvmalloc
      + kvmalloc_array
        (
      -	sizeof(THING) * COUNT_CONST
      +	COUNT_CONST, sizeof(THING)
        , ...)
      )
      
      // 2-factor product, only identifiers.
      @@
      identifier SIZE, COUNT;
      @@
      
      - kvmalloc
      + kvmalloc_array
        (
      -	SIZE * COUNT
      +	COUNT, SIZE
        , ...)
      
      // 3-factor product with 1 sizeof(type) or sizeof(expression), with
      // redundant parens removed.
      @@
      expression THING;
      identifier STRIDE, COUNT;
      type TYPE;
      @@
      
      (
        kvmalloc(
      -	sizeof(TYPE) * (COUNT) * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kvmalloc(
      -	sizeof(TYPE) * (COUNT) * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kvmalloc(
      -	sizeof(TYPE) * COUNT * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kvmalloc(
      -	sizeof(TYPE) * COUNT * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kvmalloc(
      -	sizeof(THING) * (COUNT) * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kvmalloc(
      -	sizeof(THING) * (COUNT) * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kvmalloc(
      -	sizeof(THING) * COUNT * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kvmalloc(
      -	sizeof(THING) * COUNT * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      )
      
      // 3-factor product with 2 sizeof(variable), with redundant parens removed.
      @@
      expression THING1, THING2;
      identifier COUNT;
      type TYPE1, TYPE2;
      @@
      
      (
        kvmalloc(
      -	sizeof(TYPE1) * sizeof(TYPE2) * COUNT
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
        , ...)
      |
        kvmalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
        , ...)
      |
        kvmalloc(
      -	sizeof(THING1) * sizeof(THING2) * COUNT
      +	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
        , ...)
      |
        kvmalloc(
      -	sizeof(THING1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
        , ...)
      |
        kvmalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * COUNT
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
        , ...)
      |
        kvmalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
        , ...)
      )
      
      // 3-factor product, only identifiers, with redundant parens removed.
      @@
      identifier STRIDE, SIZE, COUNT;
      @@
      
      (
        kvmalloc(
      -	(COUNT) * STRIDE * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kvmalloc(
      -	COUNT * (STRIDE) * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kvmalloc(
      -	COUNT * STRIDE * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kvmalloc(
      -	(COUNT) * (STRIDE) * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kvmalloc(
      -	COUNT * (STRIDE) * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kvmalloc(
      -	(COUNT) * STRIDE * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kvmalloc(
      -	(COUNT) * (STRIDE) * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kvmalloc(
      -	COUNT * STRIDE * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      )
      
      // Any remaining multi-factor products, first at least 3-factor products,
      // when they're not all constants...
      @@
      expression E1, E2, E3;
      constant C1, C2, C3;
      @@
      
      (
        kvmalloc(C1 * C2 * C3, ...)
      |
        kvmalloc(
      -	(E1) * E2 * E3
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kvmalloc(
      -	(E1) * (E2) * E3
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kvmalloc(
      -	(E1) * (E2) * (E3)
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kvmalloc(
      -	E1 * E2 * E3
      +	array3_size(E1, E2, E3)
        , ...)
      )
      
      // And then all remaining 2 factors products when they're not all constants,
      // keeping sizeof() as the second factor argument.
      @@
      expression THING, E1, E2;
      type TYPE;
      constant C1, C2, C3;
      @@
      
      (
        kvmalloc(sizeof(THING) * C2, ...)
      |
        kvmalloc(sizeof(TYPE) * C2, ...)
      |
        kvmalloc(C1 * C2 * C3, ...)
      |
        kvmalloc(C1 * C2, ...)
      |
      - kvmalloc
      + kvmalloc_array
        (
      -	sizeof(TYPE) * (E2)
      +	E2, sizeof(TYPE)
        , ...)
      |
      - kvmalloc
      + kvmalloc_array
        (
      -	sizeof(TYPE) * E2
      +	E2, sizeof(TYPE)
        , ...)
      |
      - kvmalloc
      + kvmalloc_array
        (
      -	sizeof(THING) * (E2)
      +	E2, sizeof(THING)
        , ...)
      |
      - kvmalloc
      + kvmalloc_array
        (
      -	sizeof(THING) * E2
      +	E2, sizeof(THING)
        , ...)
      |
      - kvmalloc
      + kvmalloc_array
        (
      -	(E1) * E2
      +	E1, E2
        , ...)
      |
      - kvmalloc
      + kvmalloc_array
        (
      -	(E1) * (E2)
      +	E1, E2
        , ...)
      |
      - kvmalloc
      + kvmalloc_array
        (
      -	E1 * E2
      +	E1, E2
        , ...)
      )
      Signed-off-by: NKees Cook <keescook@chromium.org>
      344476e1
  2. 31 5月, 2018 2 次提交
  3. 21 5月, 2018 2 次提交
  4. 14 5月, 2018 1 次提交
    • J
      ext4: handle errors on ext4_commit_super · c89128a0
      Jaegeuk Kim 提交于
      When remounting ext4 from ro to rw, currently it allows its transition,
      even if ext4_commit_super() returns EIO. Even worse thing is, after that,
      fs/buffer complains buffer dirty bits like:
      
       Call trace:
       [<ffffff9750c259dc>] mark_buffer_dirty+0x184/0x1a4
       [<ffffff9750cb398c>] __ext4_handle_dirty_super+0x4c/0xfc
       [<ffffff9750c7a9fc>] ext4_file_open+0x154/0x1c0
       [<ffffff9750bea51c>] do_dentry_open+0x114/0x2d0
       [<ffffff9750bea75c>] vfs_open+0x5c/0x94
       [<ffffff9750bf879c>] path_openat+0x668/0xfe8
       [<ffffff9750bf8088>] do_filp_open+0x74/0x120
       [<ffffff9750beac98>] do_sys_open+0x148/0x254
       [<ffffff9750beade0>] SyS_openat+0x10/0x18
       [<ffffff9750a83ab0>] el0_svc_naked+0x24/0x28
       EXT4-fs (dm-1): previous I/O error to superblock detected
       Buffer I/O error on dev dm-1, logical block 0, lost sync page write
       EXT4-fs (dm-1): re-mounted. Opts: (null)
       Buffer I/O error on dev dm-1, logical block 80, lost async page write
      Signed-off-by: NJaegeuk Kim <jaegeuk@google.com>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      c89128a0
  5. 12 5月, 2018 1 次提交
  6. 26 4月, 2018 1 次提交
  7. 30 3月, 2018 5 次提交
  8. 22 3月, 2018 2 次提交
  9. 19 2月, 2018 1 次提交
  10. 29 1月, 2018 1 次提交
  11. 20 1月, 2018 1 次提交
    • D
      ext4: auto disable dax instead of failing mount · 24f3478d
      Dan Williams 提交于
      Bring the ext4 filesystem in line with xfs that only warns and continues
      when the "-o dax" option is specified to mount and the backing device
      does not support dax. This is in preparation for removing dax support
      from devices that do not enable get_user_pages() operations on dax
      mappings. In other words 'gup' support is required and configurations
      that were using so called 'page-less' dax will be converted back to
      using the page cache.
      
      Removing the broken 'page-less' dax support is a pre-requisite for
      removing the "EXPERIMENTAL" warning when mounting a filesystem in dax
      mode.
      Reviewed-by: NJan Kara <jack@suse.cz>
      Signed-off-by: NDan Williams <dan.j.williams@intel.com>
      24f3478d
  12. 16 1月, 2018 1 次提交
    • D
      ext4: Define usercopy region in ext4_inode_cache slab cache · f8dd7c70
      David Windsor 提交于
      The ext4 symlink pathnames, stored in struct ext4_inode_info.i_data
      and therefore contained in the ext4_inode_cache slab cache, need
      to be copied to/from userspace.
      
      cache object allocation:
          fs/ext4/super.c:
              ext4_alloc_inode(...):
                  struct ext4_inode_info *ei;
                  ...
                  ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
                  ...
                  return &ei->vfs_inode;
      
          include/trace/events/ext4.h:
                  #define EXT4_I(inode) \
                      (container_of(inode, struct ext4_inode_info, vfs_inode))
      
          fs/ext4/namei.c:
              ext4_symlink(...):
                  ...
                  inode->i_link = (char *)&EXT4_I(inode)->i_data;
      
      example usage trace:
          readlink_copy+0x43/0x70
          vfs_readlink+0x62/0x110
          SyS_readlinkat+0x100/0x130
      
          fs/namei.c:
              readlink_copy(..., link):
                  ...
                  copy_to_user(..., link, len)
      
              (inlined into vfs_readlink)
              generic_readlink(dentry, ...):
                  struct inode *inode = d_inode(dentry);
                  const char *link = inode->i_link;
                  ...
                  readlink_copy(..., link);
      
      In support of usercopy hardening, this patch defines a region in the
      ext4_inode_cache slab cache in which userspace copy operations are
      allowed.
      
      This region is known as the slab cache's usercopy region. Slab caches
      can now check that each dynamically sized copy operation involving
      cache-managed memory falls entirely within the slab's usercopy region.
      
      This patch is modified from Brad Spengler/PaX Team's PAX_USERCOPY
      whitelisting code in the last public patch of grsecurity/PaX based on my
      understanding of the code. Changes or omissions from the original code are
      mine and don't reflect the original grsecurity/PaX code.
      Signed-off-by: NDavid Windsor <dave@nullcore.net>
      [kees: adjust commit log, provide usage trace]
      Cc: "Theodore Ts'o" <tytso@mit.edu>
      Cc: Andreas Dilger <adilger.kernel@dilger.ca>
      Cc: linux-ext4@vger.kernel.org
      Signed-off-by: NKees Cook <keescook@chromium.org>
      f8dd7c70
  13. 12 1月, 2018 2 次提交
  14. 10 1月, 2018 2 次提交
  15. 18 12月, 2017 1 次提交
    • T
      ext4: fix up remaining files with SPDX cleanups · f5166768
      Theodore Ts'o 提交于
      A number of ext4 source files were skipped due because their copyright
      permission statements didn't match the expected text used by the
      automated conversion utilities.  I've added SPDX tags for the rest.
      
      While looking at some of these files, I've noticed that we have quite
      a bit of variation on the licenses that were used --- in particular
      some of the Red Hat licenses on the jbd2 files use a GPL2+ license,
      and we have some files that have a LGPL-2.1 license (which was quite
      surprising).
      
      I've not attempted to do any license changes.  Even if it is perfectly
      legal to relicense to GPL 2.0-only for consistency's sake, that should
      be done with ext4 developer community discussion.
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      
      f5166768
  16. 28 11月, 2017 1 次提交
    • L
      Rename superblock flags (MS_xyz -> SB_xyz) · 1751e8a6
      Linus Torvalds 提交于
      This is a pure automated search-and-replace of the internal kernel
      superblock flags.
      
      The s_flags are now called SB_*, with the names and the values for the
      moment mirroring the MS_* flags that they're equivalent to.
      
      Note how the MS_xyz flags are the ones passed to the mount system call,
      while the SB_xyz flags are what we then use in sb->s_flags.
      
      The script to do this was:
      
          # places to look in; re security/*: it generally should *not* be
          # touched (that stuff parses mount(2) arguments directly), but
          # there are two places where we really deal with superblock flags.
          FILES="drivers/mtd drivers/staging/lustre fs ipc mm \
                  include/linux/fs.h include/uapi/linux/bfs_fs.h \
                  security/apparmor/apparmorfs.c security/apparmor/include/lib.h"
          # the list of MS_... constants
          SYMS="RDONLY NOSUID NODEV NOEXEC SYNCHRONOUS REMOUNT MANDLOCK \
                DIRSYNC NOATIME NODIRATIME BIND MOVE REC VERBOSE SILENT \
                POSIXACL UNBINDABLE PRIVATE SLAVE SHARED RELATIME KERNMOUNT \
                I_VERSION STRICTATIME LAZYTIME SUBMOUNT NOREMOTELOCK NOSEC BORN \
                ACTIVE NOUSER"
      
          SED_PROG=
          for i in $SYMS; do SED_PROG="$SED_PROG -e s/MS_$i/SB_$i/g"; done
      
          # we want files that contain at least one of MS_...,
          # with fs/namespace.c and fs/pnode.c excluded.
          L=$(for i in $SYMS; do git grep -w -l MS_$i $FILES; done| sort|uniq|grep -v '^fs/namespace.c'|grep -v '^fs/pnode.c')
      
          for f in $L; do sed -i $f $SED_PROG; done
      Requested-by: NAl Viro <viro@zeniv.linux.org.uk>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      1751e8a6
  17. 09 11月, 2017 1 次提交
  18. 19 10月, 2017 6 次提交
  19. 12 10月, 2017 2 次提交
    • R
      ext4: add sanity check for encryption + DAX · 7d3e06a8
      Ross Zwisler 提交于
      We prevent DAX from being used on inodes which are using ext4's built in
      encryption via a check in ext4_set_inode_flags().  We do have what appears
      to be an unsafe transition of S_DAX in ext4_set_context(), though, where
      S_DAX can get disabled without us doing a proper writeback + invalidate.
      
      There are also issues with mm-level races when changing the value of S_DAX,
      as well as issues with the VM_MIXEDMAP flag:
      
      https://www.spinics.net/lists/linux-xfs/msg09859.html
      
      I actually think we are safe in this case because of the following:
      
      1) You can't encrypt an existing file.  Encryption can only be set on an
      empty directory, with new inodes in that directory being created with
      encryption turned on, so I don't think it's possible to turn encryption on
      for a file that has open DAX mmaps or outstanding I/Os.
      
      2) There is no way to turn encryption off on a given file.  Once an inode
      is encrypted, it stays encrypted for the life of that inode, so we don't
      have to worry about the case where we turn encryption off and S_DAX
      suddenly turns on.
      
      3) The only way we end up in ext4_set_context() to turn on encryption is
      when we are creating a new file in the encrypted directory.  This happens
      as part of ext4_create() before the inode has been allowed to do any I/O.
      Here's the call tree:
      
       ext4_create()
         __ext4_new_inode()
      	 ext4_set_inode_flags() // sets S_DAX
      	 fscrypt_inherit_context()
      		fscrypt_get_encryption_info();
      		ext4_set_context() // sets EXT4_INODE_ENCRYPT, clears S_DAX
      
      So, I actually think it's safe to transition S_DAX in ext4_set_context()
      without any locking, writebacks or invalidations.  I've added a
      WARN_ON_ONCE() sanity check to make sure that we are notified if we ever
      encounter a case where we are encrypting an inode that already has data,
      in which case we need to add code to safely transition S_DAX.
      Signed-off-by: NRoss Zwisler <ross.zwisler@linux.intel.com>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      Reviewed-by: NJan Kara <jack@suse.cz>
      7d3e06a8
    • R
      ext4: prevent data corruption with inline data + DAX · 559db4c6
      Ross Zwisler 提交于
      If an inode has inline data it is currently prevented from using DAX by a
      check in ext4_set_inode_flags().  When the inode grows inline data via
      ext4_create_inline_data() or removes its inline data via
      ext4_destroy_inline_data_nolock(), the value of S_DAX can change.
      
      Currently these changes are unsafe because we don't hold off page faults
      and I/O, write back dirty radix tree entries and invalidate all mappings.
      There are also issues with mm-level races when changing the value of S_DAX,
      as well as issues with the VM_MIXEDMAP flag:
      
      https://www.spinics.net/lists/linux-xfs/msg09859.html
      
      The unsafe transition of S_DAX can reliably cause data corruption, as shown
      by the following fstest:
      
      https://patchwork.kernel.org/patch/9948381/
      
      Fix this issue by preventing the DAX mount option from being used on
      filesystems that were created to support inline data.  Inline data is an
      option given to mkfs.ext4.
      Signed-off-by: NRoss Zwisler <ross.zwisler@linux.intel.com>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      Reviewed-by: NJan Kara <jack@suse.cz>
      CC: stable@vger.kernel.org
      559db4c6
  20. 06 9月, 2017 1 次提交
  21. 01 9月, 2017 1 次提交
  22. 25 8月, 2017 2 次提交
  23. 18 8月, 2017 2 次提交
    • J
      quota: Reduce contention on dq_data_lock · 7b9ca4c6
      Jan Kara 提交于
      dq_data_lock is currently used to protect all modifications of quota
      accounting information, consistency of quota accounting on the inode,
      and dquot pointers from inode. As a result contention on the lock can be
      pretty heavy.
      
      Reduce the contention on the lock by protecting quota accounting
      information by a new dquot->dq_dqb_lock and consistency of quota
      accounting with inode usage by inode->i_lock.
      
      This change reduces time to create 500000 files on ext4 on ramdisk by 50
      different processes in separate directories by 6% when user quota is
      turned on. When those 50 processes belong to 50 different users, the
      improvement is about 9%.
      Signed-off-by: NJan Kara <jack@suse.cz>
      7b9ca4c6
    • J
      ext4: Disable dirty list tracking of dquots when journalling quotas · 91389240
      Jan Kara 提交于
      When journalling quotas, we writeback all dquots immediately after
      changing them as part of current transation. Thus there's no need to
      write anything in dquot_writeback_dquots() and so we can avoid updating
      list of dirty dquots to reduce dq_list_lock contention.
      
      This change reduces time to create 500000 files on ext4 on ramdisk by 50
      different processes in separate directories by 15% when user quota is
      turned on.
      Signed-off-by: NJan Kara <jack@suse.cz>
      91389240