1. 18 8月, 2018 1 次提交
  2. 30 7月, 2018 3 次提交
  3. 17 6月, 2018 2 次提交
  4. 13 6月, 2018 1 次提交
  5. 06 6月, 2018 1 次提交
    • D
      vfs: change inode times to use struct timespec64 · 95582b00
      Deepa Dinamani 提交于
      struct timespec is not y2038 safe. Transition vfs to use
      y2038 safe struct timespec64 instead.
      
      The change was made with the help of the following cocinelle
      script. This catches about 80% of the changes.
      All the header file and logic changes are included in the
      first 5 rules. The rest are trivial substitutions.
      I avoid changing any of the function signatures or any other
      filesystem specific data structures to keep the patch simple
      for review.
      
      The script can be a little shorter by combining different cases.
      But, this version was sufficient for my usecase.
      
      virtual patch
      
      @ depends on patch @
      identifier now;
      @@
      - struct timespec
      + struct timespec64
        current_time ( ... )
        {
      - struct timespec now = current_kernel_time();
      + struct timespec64 now = current_kernel_time64();
        ...
      - return timespec_trunc(
      + return timespec64_trunc(
        ... );
        }
      
      @ depends on patch @
      identifier xtime;
      @@
       struct \( iattr \| inode \| kstat \) {
       ...
      -       struct timespec xtime;
      +       struct timespec64 xtime;
       ...
       }
      
      @ depends on patch @
      identifier t;
      @@
       struct inode_operations {
       ...
      int (*update_time) (...,
      -       struct timespec t,
      +       struct timespec64 t,
      ...);
       ...
       }
      
      @ depends on patch @
      identifier t;
      identifier fn_update_time =~ "update_time$";
      @@
       fn_update_time (...,
      - struct timespec *t,
      + struct timespec64 *t,
       ...) { ... }
      
      @ depends on patch @
      identifier t;
      @@
      lease_get_mtime( ... ,
      - struct timespec *t
      + struct timespec64 *t
        ) { ... }
      
      @te depends on patch forall@
      identifier ts;
      local idexpression struct inode *inode_node;
      identifier i_xtime =~ "^i_[acm]time$";
      identifier ia_xtime =~ "^ia_[acm]time$";
      identifier fn_update_time =~ "update_time$";
      identifier fn;
      expression e, E3;
      local idexpression struct inode *node1;
      local idexpression struct inode *node2;
      local idexpression struct iattr *attr1;
      local idexpression struct iattr *attr2;
      local idexpression struct iattr attr;
      identifier i_xtime1 =~ "^i_[acm]time$";
      identifier i_xtime2 =~ "^i_[acm]time$";
      identifier ia_xtime1 =~ "^ia_[acm]time$";
      identifier ia_xtime2 =~ "^ia_[acm]time$";
      @@
      (
      (
      - struct timespec ts;
      + struct timespec64 ts;
      |
      - struct timespec ts = current_time(inode_node);
      + struct timespec64 ts = current_time(inode_node);
      )
      
      <+... when != ts
      (
      - timespec_equal(&inode_node->i_xtime, &ts)
      + timespec64_equal(&inode_node->i_xtime, &ts)
      |
      - timespec_equal(&ts, &inode_node->i_xtime)
      + timespec64_equal(&ts, &inode_node->i_xtime)
      |
      - timespec_compare(&inode_node->i_xtime, &ts)
      + timespec64_compare(&inode_node->i_xtime, &ts)
      |
      - timespec_compare(&ts, &inode_node->i_xtime)
      + timespec64_compare(&ts, &inode_node->i_xtime)
      |
      ts = current_time(e)
      |
      fn_update_time(..., &ts,...)
      |
      inode_node->i_xtime = ts
      |
      node1->i_xtime = ts
      |
      ts = inode_node->i_xtime
      |
      <+... attr1->ia_xtime ...+> = ts
      |
      ts = attr1->ia_xtime
      |
      ts.tv_sec
      |
      ts.tv_nsec
      |
      btrfs_set_stack_timespec_sec(..., ts.tv_sec)
      |
      btrfs_set_stack_timespec_nsec(..., ts.tv_nsec)
      |
      - ts = timespec64_to_timespec(
      + ts =
      ...
      -)
      |
      - ts = ktime_to_timespec(
      + ts = ktime_to_timespec64(
      ...)
      |
      - ts = E3
      + ts = timespec_to_timespec64(E3)
      |
      - ktime_get_real_ts(&ts)
      + ktime_get_real_ts64(&ts)
      |
      fn(...,
      - ts
      + timespec64_to_timespec(ts)
      ,...)
      )
      ...+>
      (
      <... when != ts
      - return ts;
      + return timespec64_to_timespec(ts);
      ...>
      )
      |
      - timespec_equal(&node1->i_xtime1, &node2->i_xtime2)
      + timespec64_equal(&node1->i_xtime2, &node2->i_xtime2)
      |
      - timespec_equal(&node1->i_xtime1, &attr2->ia_xtime2)
      + timespec64_equal(&node1->i_xtime2, &attr2->ia_xtime2)
      |
      - timespec_compare(&node1->i_xtime1, &node2->i_xtime2)
      + timespec64_compare(&node1->i_xtime1, &node2->i_xtime2)
      |
      node1->i_xtime1 =
      - timespec_trunc(attr1->ia_xtime1,
      + timespec64_trunc(attr1->ia_xtime1,
      ...)
      |
      - attr1->ia_xtime1 = timespec_trunc(attr2->ia_xtime2,
      + attr1->ia_xtime1 =  timespec64_trunc(attr2->ia_xtime2,
      ...)
      |
      - ktime_get_real_ts(&attr1->ia_xtime1)
      + ktime_get_real_ts64(&attr1->ia_xtime1)
      |
      - ktime_get_real_ts(&attr.ia_xtime1)
      + ktime_get_real_ts64(&attr.ia_xtime1)
      )
      
      @ depends on patch @
      struct inode *node;
      struct iattr *attr;
      identifier fn;
      identifier i_xtime =~ "^i_[acm]time$";
      identifier ia_xtime =~ "^ia_[acm]time$";
      expression e;
      @@
      (
      - fn(node->i_xtime);
      + fn(timespec64_to_timespec(node->i_xtime));
      |
       fn(...,
      - node->i_xtime);
      + timespec64_to_timespec(node->i_xtime));
      |
      - e = fn(attr->ia_xtime);
      + e = fn(timespec64_to_timespec(attr->ia_xtime));
      )
      
      @ depends on patch forall @
      struct inode *node;
      struct iattr *attr;
      identifier i_xtime =~ "^i_[acm]time$";
      identifier ia_xtime =~ "^ia_[acm]time$";
      identifier fn;
      @@
      {
      + struct timespec ts;
      <+...
      (
      + ts = timespec64_to_timespec(node->i_xtime);
      fn (...,
      - &node->i_xtime,
      + &ts,
      ...);
      |
      + ts = timespec64_to_timespec(attr->ia_xtime);
      fn (...,
      - &attr->ia_xtime,
      + &ts,
      ...);
      )
      ...+>
      }
      
      @ depends on patch forall @
      struct inode *node;
      struct iattr *attr;
      struct kstat *stat;
      identifier ia_xtime =~ "^ia_[acm]time$";
      identifier i_xtime =~ "^i_[acm]time$";
      identifier xtime =~ "^[acm]time$";
      identifier fn, ret;
      @@
      {
      + struct timespec ts;
      <+...
      (
      + ts = timespec64_to_timespec(node->i_xtime);
      ret = fn (...,
      - &node->i_xtime,
      + &ts,
      ...);
      |
      + ts = timespec64_to_timespec(node->i_xtime);
      ret = fn (...,
      - &node->i_xtime);
      + &ts);
      |
      + ts = timespec64_to_timespec(attr->ia_xtime);
      ret = fn (...,
      - &attr->ia_xtime,
      + &ts,
      ...);
      |
      + ts = timespec64_to_timespec(attr->ia_xtime);
      ret = fn (...,
      - &attr->ia_xtime);
      + &ts);
      |
      + ts = timespec64_to_timespec(stat->xtime);
      ret = fn (...,
      - &stat->xtime);
      + &ts);
      )
      ...+>
      }
      
      @ depends on patch @
      struct inode *node;
      struct inode *node2;
      identifier i_xtime1 =~ "^i_[acm]time$";
      identifier i_xtime2 =~ "^i_[acm]time$";
      identifier i_xtime3 =~ "^i_[acm]time$";
      struct iattr *attrp;
      struct iattr *attrp2;
      struct iattr attr ;
      identifier ia_xtime1 =~ "^ia_[acm]time$";
      identifier ia_xtime2 =~ "^ia_[acm]time$";
      struct kstat *stat;
      struct kstat stat1;
      struct timespec64 ts;
      identifier xtime =~ "^[acmb]time$";
      expression e;
      @@
      (
      ( node->i_xtime2 \| attrp->ia_xtime2 \| attr.ia_xtime2 \) = node->i_xtime1  ;
      |
       node->i_xtime2 = \( node2->i_xtime1 \| timespec64_trunc(...) \);
      |
       node->i_xtime2 = node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \);
      |
       node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \);
      |
       stat->xtime = node2->i_xtime1;
      |
       stat1.xtime = node2->i_xtime1;
      |
      ( node->i_xtime2 \| attrp->ia_xtime2 \) = attrp->ia_xtime1  ;
      |
      ( attrp->ia_xtime1 \| attr.ia_xtime1 \) = attrp2->ia_xtime2;
      |
      - e = node->i_xtime1;
      + e = timespec64_to_timespec( node->i_xtime1 );
      |
      - e = attrp->ia_xtime1;
      + e = timespec64_to_timespec( attrp->ia_xtime1 );
      |
      node->i_xtime1 = current_time(...);
      |
       node->i_xtime2 = node->i_xtime1 = node->i_xtime3 =
      - e;
      + timespec_to_timespec64(e);
      |
       node->i_xtime1 = node->i_xtime3 =
      - e;
      + timespec_to_timespec64(e);
      |
      - node->i_xtime1 = e;
      + node->i_xtime1 = timespec_to_timespec64(e);
      )
      Signed-off-by: NDeepa Dinamani <deepa.kernel@gmail.com>
      Cc: <anton@tuxera.com>
      Cc: <balbi@kernel.org>
      Cc: <bfields@fieldses.org>
      Cc: <darrick.wong@oracle.com>
      Cc: <dhowells@redhat.com>
      Cc: <dsterba@suse.com>
      Cc: <dwmw2@infradead.org>
      Cc: <hch@lst.de>
      Cc: <hirofumi@mail.parknet.co.jp>
      Cc: <hubcap@omnibond.com>
      Cc: <jack@suse.com>
      Cc: <jaegeuk@kernel.org>
      Cc: <jaharkes@cs.cmu.edu>
      Cc: <jslaby@suse.com>
      Cc: <keescook@chromium.org>
      Cc: <mark@fasheh.com>
      Cc: <miklos@szeredi.hu>
      Cc: <nico@linaro.org>
      Cc: <reiserfs-devel@vger.kernel.org>
      Cc: <richard@nod.at>
      Cc: <sage@redhat.com>
      Cc: <sfrench@samba.org>
      Cc: <swhiteho@redhat.com>
      Cc: <tj@kernel.org>
      Cc: <trond.myklebust@primarydata.com>
      Cc: <tytso@mit.edu>
      Cc: <viro@zeniv.linux.org.uk>
      95582b00
  6. 16 5月, 2018 1 次提交
  7. 12 5月, 2018 1 次提交
  8. 22 3月, 2018 1 次提交
  9. 08 1月, 2018 1 次提交
  10. 18 12月, 2017 1 次提交
    • T
      ext4: fix up remaining files with SPDX cleanups · f5166768
      Theodore Ts'o 提交于
      A number of ext4 source files were skipped due because their copyright
      permission statements didn't match the expected text used by the
      automated conversion utilities.  I've added SPDX tags for the rest.
      
      While looking at some of these files, I've noticed that we have quite
      a bit of variation on the licenses that were used --- in particular
      some of the Red Hat licenses on the jbd2 files use a GPL2+ license,
      and we have some files that have a LGPL-2.1 license (which was quite
      surprising).
      
      I've not attempted to do any license changes.  Even if it is perfectly
      legal to relicense to GPL 2.0-only for consistency's sake, that should
      be done with ext4 developer community discussion.
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      
      f5166768
  11. 09 11月, 2017 1 次提交
  12. 02 11月, 2017 1 次提交
    • G
      License cleanup: add SPDX GPL-2.0 license identifier to files with no license · b2441318
      Greg Kroah-Hartman 提交于
      Many source files in the tree are missing licensing information, which
      makes it harder for compliance tools to determine the correct license.
      
      By default all files without license information are under the default
      license of the kernel, which is GPL version 2.
      
      Update the files which contain no license information with the 'GPL-2.0'
      SPDX license identifier.  The SPDX identifier is a legally binding
      shorthand, which can be used instead of the full boiler plate text.
      
      This patch is based on work done by Thomas Gleixner and Kate Stewart and
      Philippe Ombredanne.
      
      How this work was done:
      
      Patches were generated and checked against linux-4.14-rc6 for a subset of
      the use cases:
       - file had no licensing information it it.
       - file was a */uapi/* one with no licensing information in it,
       - file was a */uapi/* one with existing licensing information,
      
      Further patches will be generated in subsequent months to fix up cases
      where non-standard license headers were used, and references to license
      had to be inferred by heuristics based on keywords.
      
      The analysis to determine which SPDX License Identifier to be applied to
      a file was done in a spreadsheet of side by side results from of the
      output of two independent scanners (ScanCode & Windriver) producing SPDX
      tag:value files created by Philippe Ombredanne.  Philippe prepared the
      base worksheet, and did an initial spot review of a few 1000 files.
      
      The 4.13 kernel was the starting point of the analysis with 60,537 files
      assessed.  Kate Stewart did a file by file comparison of the scanner
      results in the spreadsheet to determine which SPDX license identifier(s)
      to be applied to the file. She confirmed any determination that was not
      immediately clear with lawyers working with the Linux Foundation.
      
      Criteria used to select files for SPDX license identifier tagging was:
       - Files considered eligible had to be source code files.
       - Make and config files were included as candidates if they contained >5
         lines of source
       - File already had some variant of a license header in it (even if <5
         lines).
      
      All documentation files were explicitly excluded.
      
      The following heuristics were used to determine which SPDX license
      identifiers to apply.
      
       - when both scanners couldn't find any license traces, file was
         considered to have no license information in it, and the top level
         COPYING file license applied.
      
         For non */uapi/* files that summary was:
      
         SPDX license identifier                            # files
         ---------------------------------------------------|-------
         GPL-2.0                                              11139
      
         and resulted in the first patch in this series.
      
         If that file was a */uapi/* path one, it was "GPL-2.0 WITH
         Linux-syscall-note" otherwise it was "GPL-2.0".  Results of that was:
      
         SPDX license identifier                            # files
         ---------------------------------------------------|-------
         GPL-2.0 WITH Linux-syscall-note                        930
      
         and resulted in the second patch in this series.
      
       - if a file had some form of licensing information in it, and was one
         of the */uapi/* ones, it was denoted with the Linux-syscall-note if
         any GPL family license was found in the file or had no licensing in
         it (per prior point).  Results summary:
      
         SPDX license identifier                            # files
         ---------------------------------------------------|------
         GPL-2.0 WITH Linux-syscall-note                       270
         GPL-2.0+ WITH Linux-syscall-note                      169
         ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause)    21
         ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)    17
         LGPL-2.1+ WITH Linux-syscall-note                      15
         GPL-1.0+ WITH Linux-syscall-note                       14
         ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause)    5
         LGPL-2.0+ WITH Linux-syscall-note                       4
         LGPL-2.1 WITH Linux-syscall-note                        3
         ((GPL-2.0 WITH Linux-syscall-note) OR MIT)              3
         ((GPL-2.0 WITH Linux-syscall-note) AND MIT)             1
      
         and that resulted in the third patch in this series.
      
       - when the two scanners agreed on the detected license(s), that became
         the concluded license(s).
      
       - when there was disagreement between the two scanners (one detected a
         license but the other didn't, or they both detected different
         licenses) a manual inspection of the file occurred.
      
       - In most cases a manual inspection of the information in the file
         resulted in a clear resolution of the license that should apply (and
         which scanner probably needed to revisit its heuristics).
      
       - When it was not immediately clear, the license identifier was
         confirmed with lawyers working with the Linux Foundation.
      
       - If there was any question as to the appropriate license identifier,
         the file was flagged for further research and to be revisited later
         in time.
      
      In total, over 70 hours of logged manual review was done on the
      spreadsheet to determine the SPDX license identifiers to apply to the
      source files by Kate, Philippe, Thomas and, in some cases, confirmation
      by lawyers working with the Linux Foundation.
      
      Kate also obtained a third independent scan of the 4.13 code base from
      FOSSology, and compared selected files where the other two scanners
      disagreed against that SPDX file, to see if there was new insights.  The
      Windriver scanner is based on an older version of FOSSology in part, so
      they are related.
      
      Thomas did random spot checks in about 500 files from the spreadsheets
      for the uapi headers and agreed with SPDX license identifier in the
      files he inspected. For the non-uapi files Thomas did random spot checks
      in about 15000 files.
      
      In initial set of patches against 4.14-rc6, 3 files were found to have
      copy/paste license identifier errors, and have been fixed to reflect the
      correct identifier.
      
      Additionally Philippe spent 10 hours this week doing a detailed manual
      inspection and review of the 12,461 patched files from the initial patch
      version early this week with:
       - a full scancode scan run, collecting the matched texts, detected
         license ids and scores
       - reviewing anything where there was a license detected (about 500+
         files) to ensure that the applied SPDX license was correct
       - reviewing anything where there was no detection but the patch license
         was not GPL-2.0 WITH Linux-syscall-note to ensure that the applied
         SPDX license was correct
      
      This produced a worksheet with 20 files needing minor correction.  This
      worksheet was then exported into 3 different .csv files for the
      different types of files to be modified.
      
      These .csv files were then reviewed by Greg.  Thomas wrote a script to
      parse the csv files and add the proper SPDX tag to the file, in the
      format that the file expected.  This script was further refined by Greg
      based on the output to detect more types of files automatically and to
      distinguish between header and source .c files (which need different
      comment types.)  Finally Greg ran the script using the .csv files to
      generate the patches.
      Reviewed-by: NKate Stewart <kstewart@linuxfoundation.org>
      Reviewed-by: NPhilippe Ombredanne <pombredanne@nexb.com>
      Reviewed-by: NThomas Gleixner <tglx@linutronix.de>
      Signed-off-by: NGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      b2441318
  13. 29 10月, 2017 1 次提交
  14. 19 10月, 2017 1 次提交
    • D
      fscrypt: clean up include file mess · 734f0d24
      Dave Chinner 提交于
      Filesystems have to include different header files based on whether they
      are compiled with encryption support or not. That's nasty and messy.
      
      Instead, rationalise the headers so we have a single include fscrypt.h
      and let it decide what internal implementation to include based on the
      __FS_HAS_ENCRYPTION define.  Filesystems set __FS_HAS_ENCRYPTION to 1
      before including linux/fscrypt.h if they are built with encryption
      support.  Otherwise, they must set __FS_HAS_ENCRYPTION to 0.
      
      Add guards to prevent fscrypt_supp.h and fscrypt_notsupp.h from being
      directly included by filesystems.
      Signed-off-by: NDave Chinner <dchinner@redhat.com>
      [EB: use 1 and 0 rather than defined/undefined]
      Signed-off-by: NEric Biggers <ebiggers@google.com>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      734f0d24
  15. 13 10月, 2017 1 次提交
  16. 02 10月, 2017 2 次提交
  17. 01 9月, 2017 1 次提交
  18. 25 8月, 2017 2 次提交
    • T
      ext4: backward compatibility support for Lustre ea_inode implementation · a6d05676
      Tahsin Erdogan 提交于
      Original Lustre ea_inode feature did not have ref counts on xattr inodes
      because there was always one parent that referenced it. New
      implementation expects ref count to be initialized which is not true for
      Lustre case. Handle this by detecting Lustre created xattr inode and set
      its ref count to 1.
      
      The quota handling of xattr inodes have also changed with deduplication
      support. New implementation manually manages quotas to support sharing
      across multiple users. A consequence is that, a referencing inode
      incorporates the blocks of xattr inode into its own i_block field.
      
      We need to know how a xattr inode was created so that we can reverse the
      block charges during reference removal. This is handled by introducing a
      EXT4_STATE_LUSTRE_EA_INODE flag. The flag is set on a xattr inode if
      inode appears to have been created by Lustre. During xattr inode reference
      removal, the manual quota uncharge is skipped if the flag is set.
      Signed-off-by: NTahsin Erdogan <tahsin@google.com>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      a6d05676
    • C
      ext4: remove timebomb in ext4_decode_extra_time() · eaa093d2
      Christoph Hellwig 提交于
      Changing behavior based on the version code is a timebomb waiting to
      happen, and not easily bisectable.  Drop it and leave any removal
      to explicit developer action. (And I don't think file system
      should _ever_ remove backwards compatibility that has no explicit
      flag, but I'll leave that to the ext4 folks).
      Signed-off-by: NChristoph Hellwig <hch@lst.de>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      Reviewed-by: NEric Biggers <ebiggers@google.com>
      eaa093d2
  19. 06 8月, 2017 3 次提交
    • T
      ext4: make xattr inode reads faster · 9699d4f9
      Tahsin Erdogan 提交于
      ext4_xattr_inode_read() currently reads each block sequentially while
      waiting for io operation to complete before moving on to the next
      block. This prevents request merging in block layer.
      
      Add a ext4_bread_batch() function that starts reads for all blocks
      then optionally waits for them to complete. A similar logic is used
      in ext4_find_entry(), so update that code to use the new function.
      Signed-off-by: NTahsin Erdogan <tahsin@google.com>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      9699d4f9
    • A
      ext4: fix dir_nlink behaviour · c7414892
      Andreas Dilger 提交于
      The dir_nlink feature has been enabled by default for new ext4
      filesystems since e2fsprogs-1.41 in 2008, and was automatically
      enabled by the kernel for older ext4 filesystems since the
      dir_nlink feature was added with ext4 in kernel 2.6.28+ when
      the subdirectory count exceeded EXT4_LINK_MAX-1.
      
      Automatically adding the file system features such as dir_nlink is
      generally frowned upon, since it could cause the file system to not be
      mountable on older kernel, thus preventing the administrator from
      rolling back to an older kernel if necessary.
      
      In this case, the administrator might also want to disable the feature
      because glibc's fts_read() function does not correctly optimize
      directory traversal for directories that use st_nlinks field of 1 to
      indicate that the number of links in the directory are not tracked by
      the file system, and could fail to traverse the full directory
      hierarchy.  Fortunately, in the past ten years very few users have
      complained about incomplete file system traversal by glibc's
      fts_read().
      
      This commit also changes ext4_inc_count() to allow i_nlinks to reach
      the full EXT4_LINK_MAX links on the parent directory (including "."
      and "..") before changing i_links_count to be 1.
      
      Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196405Signed-off-by: NAndreas Dilger <adilger@dilger.ca>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      c7414892
    • D
      ext4: silence array overflow warning · 381cebfe
      Dan Carpenter 提交于
      I get a static checker warning:
      
          fs/ext4/ext4.h:3091 ext4_set_de_type()
          error: buffer overflow 'ext4_type_by_mode' 15 <= 15
      
      It seems unlikely that we would hit this read overflow in real life, but
      it's also simple enough to make the array 16 bytes instead of 15.
      Signed-off-by: NDan Carpenter <dan.carpenter@oracle.com>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      381cebfe
  20. 31 7月, 2017 1 次提交
  21. 23 6月, 2017 1 次提交
    • D
      ext4: send parallel discards on commit completions · a0154344
      Daeho Jeong 提交于
      Now, when we mount ext4 filesystem with '-o discard' option, we have to
      issue all the discard commands for the blocks to be deallocated and
      wait for the completion of the commands on the commit complete phase.
      Because this procedure might involve a lot of sequential combinations of
      issuing discard commands and waiting for that, the delay of this
      procedure might be too much long, even to 17.0s in our test,
      and it results in long commit delay and fsync() performance degradation.
      
      To reduce this kind of delay, instead of adding callback for each
      extent and handling all of them in a sequential manner on commit phase,
      we instead add a separate list of extents to free to the superblock and
      then process this list at once after transaction commits so that
      we can issue all the discard commands in a parallel manner like XFS
      filesystem.
      
      Finally, we could enhance the discard command handling performance.
      The result was such that 17.0s delay of a single commit in the worst
      case has been enhanced to 4.8s.
      Signed-off-by: NDaeho Jeong <daeho.jeong@samsung.com>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      Tested-by: NHobin Woo <hobin.woo@samsung.com>
      Tested-by: NKitae Lee <kitae87.lee@samsung.com>
      Reviewed-by: NJan Kara <jack@suse.cz>
      a0154344
  22. 22 6月, 2017 10 次提交
    • T
      ext4: add nombcache mount option · cdb7ee4c
      Tahsin Erdogan 提交于
      The main purpose of mb cache is to achieve deduplication in
      extended attributes. In use cases where opportunity for deduplication
      is unlikely, it only adds overhead.
      
      Add a mount option to explicitly turn off mb cache.
      Suggested-by: NAndreas Dilger <adilger@dilger.ca>
      Signed-off-by: NTahsin Erdogan <tahsin@google.com>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      cdb7ee4c
    • T
      ext4: xattr inode deduplication · dec214d0
      Tahsin Erdogan 提交于
      Ext4 now supports xattr values that are up to 64k in size (vfs limit).
      Large xattr values are stored in external inodes each one holding a
      single value. Once written the data blocks of these inodes are immutable.
      
      The real world use cases are expected to have a lot of value duplication
      such as inherited acls etc. To reduce data duplication on disk, this patch
      implements a deduplicator that allows sharing of xattr inodes.
      
      The deduplication is based on an in-memory hash lookup that is a best
      effort sharing scheme. When a xattr inode is read from disk (i.e.
      getxattr() call), its crc32c hash is added to a hash table. Before
      creating a new xattr inode for a value being set, the hash table is
      checked to see if an existing inode holds an identical value. If such an
      inode is found, the ref count on that inode is incremented. On value
      removal the ref count is decremented and if it reaches zero the inode is
      deleted.
      
      The quota charging for such inodes is manually managed. Every reference
      holder is charged the full size as if there was no sharing happening.
      This is consistent with how xattr blocks are also charged.
      
      [ Fixed up journal credits calculation to handle inline data and the
        rare case where an shared xattr block can get freed when two thread
        race on breaking the xattr block sharing. --tytso ]
      Signed-off-by: NTahsin Erdogan <tahsin@google.com>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      dec214d0
    • T
      ext4: add ext4_is_quota_file() · 02749a4c
      Tahsin Erdogan 提交于
      IS_NOQUOTA() indicates whether quota is disabled for an inode. Ext4
      also uses it to check whether an inode is for a quota file. The
      distinction currently doesn't matter because quota is disabled only
      for the quota files. When we start disabling quota for other inodes
      in the future, we will want to make the distinction clear.
      
      Replace IS_NOQUOTA() call with ext4_is_quota_file() at places where
      we are checking for quota files.
      Signed-off-by: NTahsin Erdogan <tahsin@google.com>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      02749a4c
    • T
      ext2, ext4: make mb block cache names more explicit · 47387409
      Tahsin Erdogan 提交于
      There will be a second mb_cache instance that tracks ea_inodes. Make
      existing names more explicit so that it is clear that they refer to
      xattr block cache.
      Signed-off-by: NTahsin Erdogan <tahsin@google.com>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      47387409
    • T
      ext4: move struct ext4_xattr_inode_array to xattr.h · b6d9029d
      Tahsin Erdogan 提交于
      Since this is a xattr specific data structure it is cleaner to keep it in
      xattr header file.
      Signed-off-by: NTahsin Erdogan <tahsin@google.com>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      b6d9029d
    • T
      ext4: modify ext4_xattr_ino_array to hold struct inode * · 0421a189
      Tahsin Erdogan 提交于
      Tracking struct inode * rather than the inode number eliminates the
      repeated ext4_xattr_inode_iget() call later. The second call cannot
      fail in practice but still requires explanation when it wants to ignore
      the return value. Avoid the trouble and make things simple.
      Signed-off-by: NTahsin Erdogan <tahsin@google.com>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      0421a189
    • T
      ext4: extended attribute value size limit is enforced by vfs · 0eefb107
      Tahsin Erdogan 提交于
      EXT4_XATTR_MAX_LARGE_EA_SIZE definition in ext4 is currently unused.
      Besides, vfs enforces its own 64k limit which makes the 1MB limit in
      ext4 redundant. Remove it.
      Signed-off-by: NTahsin Erdogan <tahsin@google.com>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      0eefb107
    • T
      ext4: do not set posix acls on xattr inodes · 1b917ed8
      Tahsin Erdogan 提交于
      We don't need acls on xattr inodes because they are not directly
      accessible from user mode.
      
      Besides lockdep complains about recursive locking of xattr_sem as seen
      below.
      
        =============================================
        [ INFO: possible recursive locking detected ]
        4.11.0-rc8+ #402 Not tainted
        ---------------------------------------------
        python/1894 is trying to acquire lock:
         (&ei->xattr_sem){++++..}, at: [<ffffffff804878a6>] ext4_xattr_get+0x66/0x270
      
        but task is already holding lock:
         (&ei->xattr_sem){++++..}, at: [<ffffffff80489500>] ext4_xattr_set_handle+0xa0/0x5d0
      
        other info that might help us debug this:
         Possible unsafe locking scenario:
      
               CPU0
               ----
          lock(&ei->xattr_sem);
          lock(&ei->xattr_sem);
      
         *** DEADLOCK ***
      
         May be due to missing lock nesting notation
      
        3 locks held by python/1894:
         #0:  (sb_writers#10){.+.+.+}, at: [<ffffffff803d829f>] mnt_want_write+0x1f/0x50
         #1:  (&sb->s_type->i_mutex_key#15){+.+...}, at: [<ffffffff803dda27>] vfs_setxattr+0x57/0xb0
         #2:  (&ei->xattr_sem){++++..}, at: [<ffffffff80489500>] ext4_xattr_set_handle+0xa0/0x5d0
      
        stack backtrace:
        CPU: 0 PID: 1894 Comm: python Not tainted 4.11.0-rc8+ #402
        Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
        Call Trace:
         dump_stack+0x67/0x99
         __lock_acquire+0x5f3/0x1830
         lock_acquire+0xb5/0x1d0
         down_read+0x2f/0x60
         ext4_xattr_get+0x66/0x270
         ext4_get_acl+0x43/0x1e0
         get_acl+0x72/0xf0
         posix_acl_create+0x5e/0x170
         ext4_init_acl+0x21/0xc0
         __ext4_new_inode+0xffd/0x16b0
         ext4_xattr_set_entry+0x5ea/0xb70
         ext4_xattr_block_set+0x1b5/0x970
         ext4_xattr_set_handle+0x351/0x5d0
         ext4_xattr_set+0x124/0x180
         ext4_xattr_user_set+0x34/0x40
         __vfs_setxattr+0x66/0x80
         __vfs_setxattr_noperm+0x69/0x1c0
         vfs_setxattr+0xa2/0xb0
         setxattr+0x129/0x160
         path_setxattr+0x87/0xb0
         SyS_setxattr+0xf/0x20
         entry_SYSCALL_64_fastpath+0x18/0xad
      Signed-off-by: NTahsin Erdogan <tahsin@google.com>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      1b917ed8
    • A
      ext4: xattr-in-inode support · e50e5129
      Andreas Dilger 提交于
      Large xattr support is implemented for EXT4_FEATURE_INCOMPAT_EA_INODE.
      
      If the size of an xattr value is larger than will fit in a single
      external block, then the xattr value will be saved into the body
      of an external xattr inode.
      
      The also helps support a larger number of xattr, since only the headers
      will be stored in the in-inode space or the single external block.
      
      The inode is referenced from the xattr header via "e_value_inum",
      which was formerly "e_value_block", but that field was never used.
      The e_value_size still contains the xattr size so that listing
      xattrs does not need to look up the inode if the data is not accessed.
      
      struct ext4_xattr_entry {
              __u8    e_name_len;     /* length of name */
              __u8    e_name_index;   /* attribute name index */
              __le16  e_value_offs;   /* offset in disk block of value */
              __le32  e_value_inum;   /* inode in which value is stored */
              __le32  e_value_size;   /* size of attribute value */
              __le32  e_hash;         /* hash value of name and value */
              char    e_name[0];      /* attribute name */
      };
      
      The xattr inode is marked with the EXT4_EA_INODE_FL flag and also
      holds a back-reference to the owning inode in its i_mtime field,
      allowing the ext4/e2fsck to verify the correct inode is accessed.
      
      [ Applied fix by Dan Carpenter to avoid freeing an ERR_PTR. ]
      
      Lustre-Jira: https://jira.hpdd.intel.com/browse/LU-80
      Lustre-bugzilla: https://bugzilla.lustre.org/show_bug.cgi?id=4424Signed-off-by: NKalpak Shah <kalpak.shah@sun.com>
      Signed-off-by: NJames Simmons <uja.ornl@gmail.com>
      Signed-off-by: NAndreas Dilger <andreas.dilger@intel.com>
      Signed-off-by: NTahsin Erdogan <tahsin@google.com>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      Signed-off-by: NDan Carpenter <dan.carpenter@oracle.com>
      e50e5129
    • A
      ext4: add largedir feature · e08ac99f
      Artem Blagodarenko 提交于
      This INCOMPAT_LARGEDIR feature allows larger directories to be created
      in ldiskfs, both with directory sizes over 2GB and and a maximum htree
      depth of 3 instead of the current limit of 2. These features are needed
      in order to exceed the current limit of approximately 10M entries in a
      single directory.
      
      This patch was originally written by Yang Sheng to support the Lustre server.
      
      [ Bumped the credits needed to update an indexed directory -- tytso ]
      Signed-off-by: NLiang Zhen <liang.zhen@intel.com>
      Signed-off-by: NYang Sheng <yang.sheng@intel.com>
      Signed-off-by: NArtem Blagodarenko <artem.blagodarenko@seagate.com>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      Reviewed-by: NAndreas Dilger <andreas.dilger@intel.com>
      e08ac99f
  23. 25 5月, 2017 1 次提交
  24. 30 4月, 2017 1 次提交