1. 06 6月, 2018 1 次提交
    • D
      vfs: change inode times to use struct timespec64 · 95582b00
      Deepa Dinamani 提交于
      struct timespec is not y2038 safe. Transition vfs to use
      y2038 safe struct timespec64 instead.
      
      The change was made with the help of the following cocinelle
      script. This catches about 80% of the changes.
      All the header file and logic changes are included in the
      first 5 rules. The rest are trivial substitutions.
      I avoid changing any of the function signatures or any other
      filesystem specific data structures to keep the patch simple
      for review.
      
      The script can be a little shorter by combining different cases.
      But, this version was sufficient for my usecase.
      
      virtual patch
      
      @ depends on patch @
      identifier now;
      @@
      - struct timespec
      + struct timespec64
        current_time ( ... )
        {
      - struct timespec now = current_kernel_time();
      + struct timespec64 now = current_kernel_time64();
        ...
      - return timespec_trunc(
      + return timespec64_trunc(
        ... );
        }
      
      @ depends on patch @
      identifier xtime;
      @@
       struct \( iattr \| inode \| kstat \) {
       ...
      -       struct timespec xtime;
      +       struct timespec64 xtime;
       ...
       }
      
      @ depends on patch @
      identifier t;
      @@
       struct inode_operations {
       ...
      int (*update_time) (...,
      -       struct timespec t,
      +       struct timespec64 t,
      ...);
       ...
       }
      
      @ depends on patch @
      identifier t;
      identifier fn_update_time =~ "update_time$";
      @@
       fn_update_time (...,
      - struct timespec *t,
      + struct timespec64 *t,
       ...) { ... }
      
      @ depends on patch @
      identifier t;
      @@
      lease_get_mtime( ... ,
      - struct timespec *t
      + struct timespec64 *t
        ) { ... }
      
      @te depends on patch forall@
      identifier ts;
      local idexpression struct inode *inode_node;
      identifier i_xtime =~ "^i_[acm]time$";
      identifier ia_xtime =~ "^ia_[acm]time$";
      identifier fn_update_time =~ "update_time$";
      identifier fn;
      expression e, E3;
      local idexpression struct inode *node1;
      local idexpression struct inode *node2;
      local idexpression struct iattr *attr1;
      local idexpression struct iattr *attr2;
      local idexpression struct iattr attr;
      identifier i_xtime1 =~ "^i_[acm]time$";
      identifier i_xtime2 =~ "^i_[acm]time$";
      identifier ia_xtime1 =~ "^ia_[acm]time$";
      identifier ia_xtime2 =~ "^ia_[acm]time$";
      @@
      (
      (
      - struct timespec ts;
      + struct timespec64 ts;
      |
      - struct timespec ts = current_time(inode_node);
      + struct timespec64 ts = current_time(inode_node);
      )
      
      <+... when != ts
      (
      - timespec_equal(&inode_node->i_xtime, &ts)
      + timespec64_equal(&inode_node->i_xtime, &ts)
      |
      - timespec_equal(&ts, &inode_node->i_xtime)
      + timespec64_equal(&ts, &inode_node->i_xtime)
      |
      - timespec_compare(&inode_node->i_xtime, &ts)
      + timespec64_compare(&inode_node->i_xtime, &ts)
      |
      - timespec_compare(&ts, &inode_node->i_xtime)
      + timespec64_compare(&ts, &inode_node->i_xtime)
      |
      ts = current_time(e)
      |
      fn_update_time(..., &ts,...)
      |
      inode_node->i_xtime = ts
      |
      node1->i_xtime = ts
      |
      ts = inode_node->i_xtime
      |
      <+... attr1->ia_xtime ...+> = ts
      |
      ts = attr1->ia_xtime
      |
      ts.tv_sec
      |
      ts.tv_nsec
      |
      btrfs_set_stack_timespec_sec(..., ts.tv_sec)
      |
      btrfs_set_stack_timespec_nsec(..., ts.tv_nsec)
      |
      - ts = timespec64_to_timespec(
      + ts =
      ...
      -)
      |
      - ts = ktime_to_timespec(
      + ts = ktime_to_timespec64(
      ...)
      |
      - ts = E3
      + ts = timespec_to_timespec64(E3)
      |
      - ktime_get_real_ts(&ts)
      + ktime_get_real_ts64(&ts)
      |
      fn(...,
      - ts
      + timespec64_to_timespec(ts)
      ,...)
      )
      ...+>
      (
      <... when != ts
      - return ts;
      + return timespec64_to_timespec(ts);
      ...>
      )
      |
      - timespec_equal(&node1->i_xtime1, &node2->i_xtime2)
      + timespec64_equal(&node1->i_xtime2, &node2->i_xtime2)
      |
      - timespec_equal(&node1->i_xtime1, &attr2->ia_xtime2)
      + timespec64_equal(&node1->i_xtime2, &attr2->ia_xtime2)
      |
      - timespec_compare(&node1->i_xtime1, &node2->i_xtime2)
      + timespec64_compare(&node1->i_xtime1, &node2->i_xtime2)
      |
      node1->i_xtime1 =
      - timespec_trunc(attr1->ia_xtime1,
      + timespec64_trunc(attr1->ia_xtime1,
      ...)
      |
      - attr1->ia_xtime1 = timespec_trunc(attr2->ia_xtime2,
      + attr1->ia_xtime1 =  timespec64_trunc(attr2->ia_xtime2,
      ...)
      |
      - ktime_get_real_ts(&attr1->ia_xtime1)
      + ktime_get_real_ts64(&attr1->ia_xtime1)
      |
      - ktime_get_real_ts(&attr.ia_xtime1)
      + ktime_get_real_ts64(&attr.ia_xtime1)
      )
      
      @ depends on patch @
      struct inode *node;
      struct iattr *attr;
      identifier fn;
      identifier i_xtime =~ "^i_[acm]time$";
      identifier ia_xtime =~ "^ia_[acm]time$";
      expression e;
      @@
      (
      - fn(node->i_xtime);
      + fn(timespec64_to_timespec(node->i_xtime));
      |
       fn(...,
      - node->i_xtime);
      + timespec64_to_timespec(node->i_xtime));
      |
      - e = fn(attr->ia_xtime);
      + e = fn(timespec64_to_timespec(attr->ia_xtime));
      )
      
      @ depends on patch forall @
      struct inode *node;
      struct iattr *attr;
      identifier i_xtime =~ "^i_[acm]time$";
      identifier ia_xtime =~ "^ia_[acm]time$";
      identifier fn;
      @@
      {
      + struct timespec ts;
      <+...
      (
      + ts = timespec64_to_timespec(node->i_xtime);
      fn (...,
      - &node->i_xtime,
      + &ts,
      ...);
      |
      + ts = timespec64_to_timespec(attr->ia_xtime);
      fn (...,
      - &attr->ia_xtime,
      + &ts,
      ...);
      )
      ...+>
      }
      
      @ depends on patch forall @
      struct inode *node;
      struct iattr *attr;
      struct kstat *stat;
      identifier ia_xtime =~ "^ia_[acm]time$";
      identifier i_xtime =~ "^i_[acm]time$";
      identifier xtime =~ "^[acm]time$";
      identifier fn, ret;
      @@
      {
      + struct timespec ts;
      <+...
      (
      + ts = timespec64_to_timespec(node->i_xtime);
      ret = fn (...,
      - &node->i_xtime,
      + &ts,
      ...);
      |
      + ts = timespec64_to_timespec(node->i_xtime);
      ret = fn (...,
      - &node->i_xtime);
      + &ts);
      |
      + ts = timespec64_to_timespec(attr->ia_xtime);
      ret = fn (...,
      - &attr->ia_xtime,
      + &ts,
      ...);
      |
      + ts = timespec64_to_timespec(attr->ia_xtime);
      ret = fn (...,
      - &attr->ia_xtime);
      + &ts);
      |
      + ts = timespec64_to_timespec(stat->xtime);
      ret = fn (...,
      - &stat->xtime);
      + &ts);
      )
      ...+>
      }
      
      @ depends on patch @
      struct inode *node;
      struct inode *node2;
      identifier i_xtime1 =~ "^i_[acm]time$";
      identifier i_xtime2 =~ "^i_[acm]time$";
      identifier i_xtime3 =~ "^i_[acm]time$";
      struct iattr *attrp;
      struct iattr *attrp2;
      struct iattr attr ;
      identifier ia_xtime1 =~ "^ia_[acm]time$";
      identifier ia_xtime2 =~ "^ia_[acm]time$";
      struct kstat *stat;
      struct kstat stat1;
      struct timespec64 ts;
      identifier xtime =~ "^[acmb]time$";
      expression e;
      @@
      (
      ( node->i_xtime2 \| attrp->ia_xtime2 \| attr.ia_xtime2 \) = node->i_xtime1  ;
      |
       node->i_xtime2 = \( node2->i_xtime1 \| timespec64_trunc(...) \);
      |
       node->i_xtime2 = node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \);
      |
       node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \);
      |
       stat->xtime = node2->i_xtime1;
      |
       stat1.xtime = node2->i_xtime1;
      |
      ( node->i_xtime2 \| attrp->ia_xtime2 \) = attrp->ia_xtime1  ;
      |
      ( attrp->ia_xtime1 \| attr.ia_xtime1 \) = attrp2->ia_xtime2;
      |
      - e = node->i_xtime1;
      + e = timespec64_to_timespec( node->i_xtime1 );
      |
      - e = attrp->ia_xtime1;
      + e = timespec64_to_timespec( attrp->ia_xtime1 );
      |
      node->i_xtime1 = current_time(...);
      |
       node->i_xtime2 = node->i_xtime1 = node->i_xtime3 =
      - e;
      + timespec_to_timespec64(e);
      |
       node->i_xtime1 = node->i_xtime3 =
      - e;
      + timespec_to_timespec64(e);
      |
      - node->i_xtime1 = e;
      + node->i_xtime1 = timespec_to_timespec64(e);
      )
      Signed-off-by: NDeepa Dinamani <deepa.kernel@gmail.com>
      Cc: <anton@tuxera.com>
      Cc: <balbi@kernel.org>
      Cc: <bfields@fieldses.org>
      Cc: <darrick.wong@oracle.com>
      Cc: <dhowells@redhat.com>
      Cc: <dsterba@suse.com>
      Cc: <dwmw2@infradead.org>
      Cc: <hch@lst.de>
      Cc: <hirofumi@mail.parknet.co.jp>
      Cc: <hubcap@omnibond.com>
      Cc: <jack@suse.com>
      Cc: <jaegeuk@kernel.org>
      Cc: <jaharkes@cs.cmu.edu>
      Cc: <jslaby@suse.com>
      Cc: <keescook@chromium.org>
      Cc: <mark@fasheh.com>
      Cc: <miklos@szeredi.hu>
      Cc: <nico@linaro.org>
      Cc: <reiserfs-devel@vger.kernel.org>
      Cc: <richard@nod.at>
      Cc: <sage@redhat.com>
      Cc: <sfrench@samba.org>
      Cc: <swhiteho@redhat.com>
      Cc: <tj@kernel.org>
      Cc: <trond.myklebust@primarydata.com>
      Cc: <tytso@mit.edu>
      Cc: <viro@zeniv.linux.org.uk>
      95582b00
  2. 01 6月, 2018 9 次提交
    • C
      f2fs: clean up symbol namespace · 4d57b86d
      Chao Yu 提交于
      As Ted reported:
      
      "Hi, I was looking at f2fs's sources recently, and I noticed that there
      is a very large number of non-static symbols which don't have a f2fs
      prefix.  There's well over a hundred (see attached below).
      
      As one example, in fs/f2fs/dir.c there is:
      
      unsigned char get_de_type(struct f2fs_dir_entry *de)
      
      This function is clearly only useful for f2fs, but it has a generic
      name.  This means that if any other file system tries to have the same
      symbol name, there will be a symbol conflict and the kernel would not
      successfully build.  It also means that when someone is looking f2fs
      sources, it's not at all obvious whether a function such as
      read_data_page(), invalidate_blocks(), is a generic kernel function
      found in the fs, mm, or block layers, or a f2fs specific function.
      
      You might want to fix this at some point.  Hopefully Kent's bcachefs
      isn't similarly using genericly named functions, since that might
      cause conflicts with f2fs's functions --- but just as this would be a
      problem that we would rightly insist that Kent fix, this is something
      that we should have rightly insisted that f2fs should have fixed
      before it was integrated into the mainline kernel.
      
      acquire_orphan_inode
      add_ino_entry
      add_orphan_inode
      allocate_data_block
      allocate_new_segments
      alloc_nid
      alloc_nid_done
      alloc_nid_failed
      available_free_memory
      ...."
      
      This patch adds "f2fs_" prefix for all non-static symbols in order to:
      a) avoid conflict with other kernel generic symbols;
      b) to indicate the function is f2fs specific one instead of generic
      one;
      Reported-by: NTheodore Ts'o <tytso@mit.edu>
      Signed-off-by: NChao Yu <yuchao0@huawei.com>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      4d57b86d
    • C
      f2fs: avoid stucking GC due to atomic write · 2ef79ecb
      Chao Yu 提交于
      f2fs doesn't allow abuse on atomic write class interface, so except
      limiting in-mem pages' total memory usage capacity, we need to limit
      atomic-write usage as well when filesystem is seriously fragmented,
      otherwise we may run into infinite loop during foreground GC because
      target blocks in victim segment are belong to atomic opened file for
      long time.
      
      Now, we will detect failure due to atomic write in foreground GC, if
      the count exceeds threshold, we will drop all atomic written data in
      cache, by this, I expect it can keep our system running safely to
      prevent Dos attack.
      
      In addition, his patch adds to show GC skip information in debugfs,
      now it just shows count of skipped caused by atomic write.
      Signed-off-by: NChao Yu <yuchao0@huawei.com>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      2ef79ecb
    • C
      f2fs: clean up with is_valid_blkaddr() · 7b525dd0
      Chao Yu 提交于
      - rename is_valid_blkaddr() to is_valid_meta_blkaddr() for readability.
      - introduce is_valid_blkaddr() for cleanup.
      
      No logic change in this patch.
      Signed-off-by: NChao Yu <yuchao0@huawei.com>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      7b525dd0
    • C
      f2fs: fix to initialize i_current_depth according to inode type · 1c41e680
      Chao Yu 提交于
      i_current_depth is used only for directory inode, but its space is
      shared with i_gc_failures field used for regular inode, in order to
      avoid affecting i_gc_failures' value, this patch fixes to initialize
      the union's fields according to inode type.
      Signed-off-by: NChao Yu <yuchao0@huawei.com>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      1c41e680
    • J
      f2fs: avoid bug_on on corrupted inode · 5d64600d
      Jaegeuk Kim 提交于
      syzbot has tested the proposed patch but the reproducer still triggered crash:
      kernel BUG at fs/f2fs/inode.c:LINE!
      
      F2FS-fs (loop1): invalid crc value
      F2FS-fs (loop5): Magic Mismatch, valid(0xf2f52010) - read(0x0)
      F2FS-fs (loop5): Can't find valid F2FS filesystem in 1th superblock
      F2FS-fs (loop5): invalid crc value
      ------------[ cut here ]------------
      kernel BUG at fs/f2fs/inode.c:238!
      invalid opcode: 0000 [#1] SMP KASAN
      Dumping ftrace buffer:
         (ftrace buffer empty)
      Modules linked in:
      CPU: 1 PID: 4886 Comm: syz-executor1 Not tainted 4.17.0-rc1+ #1
      Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
      RIP: 0010:do_read_inode fs/f2fs/inode.c:238 [inline]
      RIP: 0010:f2fs_iget+0x3307/0x3ca0 fs/f2fs/inode.c:313
      RSP: 0018:ffff8801c44a70e8 EFLAGS: 00010293
      RAX: ffff8801ce208040 RBX: ffff8801b3621080 RCX: ffffffff82eace18
      F2FS-fs (loop2): Magic Mismatch, valid(0xf2f52010) - read(0x0)
      RDX: 0000000000000000 RSI: ffffffff82eaf047 RDI: 0000000000000007
      RBP: ffff8801c44a7410 R08: ffff8801ce208040 R09: ffffed0039ee4176
      R10: ffffed0039ee4176 R11: ffff8801cf720bb7 R12: ffff8801c0efa000
      R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000000
      FS:  00007f753aa9d700(0000) GS:ffff8801daf00000(0000) knlGS:0000000000000000
      ------------[ cut here ]------------
      CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
      kernel BUG at fs/f2fs/inode.c:238!
      CR2: 0000000001b03018 CR3: 00000001c8b74000 CR4: 00000000001406e0
      DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
      DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
      Call Trace:
       f2fs_fill_super+0x4377/0x7bf0 fs/f2fs/super.c:2842
       mount_bdev+0x30c/0x3e0 fs/super.c:1165
       f2fs_mount+0x34/0x40 fs/f2fs/super.c:3020
       mount_fs+0xae/0x328 fs/super.c:1268
       vfs_kern_mount.part.34+0xd4/0x4d0 fs/namespace.c:1037
       vfs_kern_mount fs/namespace.c:1027 [inline]
       do_new_mount fs/namespace.c:2517 [inline]
       do_mount+0x564/0x3070 fs/namespace.c:2847
       ksys_mount+0x12d/0x140 fs/namespace.c:3063
       __do_sys_mount fs/namespace.c:3077 [inline]
       __se_sys_mount fs/namespace.c:3074 [inline]
       __x64_sys_mount+0xbe/0x150 fs/namespace.c:3074
       do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
       entry_SYSCALL_64_after_hwframe+0x49/0xbe
      RIP: 0033:0x457daa
      RSP: 002b:00007f753aa9cba8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5
      RAX: ffffffffffffffda RBX: 0000000020000000 RCX: 0000000000457daa
      RDX: 0000000020000000 RSI: 0000000020000100 RDI: 00007f753aa9cbf0
      RBP: 0000000000000064 R08: 0000000020016a00 R09: 0000000020000000
      R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000003
      R13: 0000000000000064 R14: 00000000006fcb80 R15: 0000000000000000
      RIP: do_read_inode fs/f2fs/inode.c:238 [inline] RSP: ffff8801c44a70e8
      RIP: f2fs_iget+0x3307/0x3ca0 fs/f2fs/inode.c:313 RSP: ffff8801c44a70e8
      invalid opcode: 0000 [#2] SMP KASAN
      ---[ end trace 1cbcbec2156680bc ]---
      
      Reported-and-tested-by: syzbot+41a1b341571f0952badb@syzkaller.appspotmail.com
      Reviewed-by: NChao Yu <yuchao0@huawei.com>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      5d64600d
    • J
      f2fs: give message and set need_fsck given broken node id · a4f843bd
      Jaegeuk Kim 提交于
      syzbot hit the following crash on upstream commit
      83beed7b (Fri Apr 20 17:56:32 2018 +0000)
      Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal
      syzbot dashboard link: https://syzkaller.appspot.com/bug?extid=d154ec99402c6f628887
      
      C reproducer: https://syzkaller.appspot.com/x/repro.c?id=5414336294027264
      syzkaller reproducer: https://syzkaller.appspot.com/x/repro.syz?id=5471683234234368
      Raw console output: https://syzkaller.appspot.com/x/log.txt?id=5436660795834368
      Kernel config: https://syzkaller.appspot.com/x/.config?id=1808800213120130118
      compiler: gcc (GCC) 8.0.1 20180413 (experimental)
      
      IMPORTANT: if you fix the bug, please add the following tag to the commit:
      Reported-by: syzbot+d154ec99402c6f628887@syzkaller.appspotmail.com
      It will help syzbot understand when the bug is fixed. See footer for details.
      If you forward the report, please keep this part and the footer.
      
      F2FS-fs (loop0): Magic Mismatch, valid(0xf2f52010) - read(0x0)
      F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock
      F2FS-fs (loop0): invalid crc value
      ------------[ cut here ]------------
      kernel BUG at fs/f2fs/node.c:1185!
      invalid opcode: 0000 [#1] SMP KASAN
      Dumping ftrace buffer:
         (ftrace buffer empty)
      Modules linked in:
      CPU: 1 PID: 4549 Comm: syzkaller704305 Not tainted 4.17.0-rc1+ #10
      Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
      RIP: 0010:__get_node_page+0xb68/0x16e0 fs/f2fs/node.c:1185
      RSP: 0018:ffff8801d960e820 EFLAGS: 00010293
      RAX: ffff8801d88205c0 RBX: 0000000000000003 RCX: ffffffff82f6cc06
      RDX: 0000000000000000 RSI: ffffffff82f6d5e8 RDI: 0000000000000004
      RBP: ffff8801d960ec30 R08: ffff8801d88205c0 R09: ffffed003b5e46c2
      R10: 0000000000000003 R11: 0000000000000003 R12: ffff8801a86e00c0
      R13: 0000000000000001 R14: ffff8801a86e0530 R15: ffff8801d9745240
      FS:  000000000072c880(0000) GS:ffff8801daf00000(0000) knlGS:0000000000000000
      CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
      CR2: 00007f3d403209b8 CR3: 00000001d8f3f000 CR4: 00000000001406e0
      DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
      DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
      Call Trace:
       get_node_page fs/f2fs/node.c:1237 [inline]
       truncate_xattr_node+0x152/0x2e0 fs/f2fs/node.c:1014
       remove_inode_page+0x200/0xaf0 fs/f2fs/node.c:1039
       f2fs_evict_inode+0xe86/0x1710 fs/f2fs/inode.c:547
       evict+0x4a6/0x960 fs/inode.c:557
       iput_final fs/inode.c:1519 [inline]
       iput+0x62d/0xa80 fs/inode.c:1545
       f2fs_fill_super+0x5f4e/0x7bf0 fs/f2fs/super.c:2849
       mount_bdev+0x30c/0x3e0 fs/super.c:1164
       f2fs_mount+0x34/0x40 fs/f2fs/super.c:3020
       mount_fs+0xae/0x328 fs/super.c:1267
       vfs_kern_mount.part.34+0xd4/0x4d0 fs/namespace.c:1037
       vfs_kern_mount fs/namespace.c:1027 [inline]
       do_new_mount fs/namespace.c:2518 [inline]
       do_mount+0x564/0x3070 fs/namespace.c:2848
       ksys_mount+0x12d/0x140 fs/namespace.c:3064
       __do_sys_mount fs/namespace.c:3078 [inline]
       __se_sys_mount fs/namespace.c:3075 [inline]
       __x64_sys_mount+0xbe/0x150 fs/namespace.c:3075
       do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
       entry_SYSCALL_64_after_hwframe+0x49/0xbe
      RIP: 0033:0x443dea
      RSP: 002b:00007ffcc7882368 EFLAGS: 00000297 ORIG_RAX: 00000000000000a5
      RAX: ffffffffffffffda RBX: 0000000020000c00 RCX: 0000000000443dea
      RDX: 0000000020000000 RSI: 0000000020000100 RDI: 00007ffcc7882370
      RBP: 0000000000000003 R08: 0000000020016a00 R09: 000000000000000a
      R10: 0000000000000000 R11: 0000000000000297 R12: 0000000000000004
      R13: 0000000000402ce0 R14: 0000000000000000 R15: 0000000000000000
      RIP: __get_node_page+0xb68/0x16e0 fs/f2fs/node.c:1185 RSP: ffff8801d960e820
      ---[ end trace 4edbeb71f002bb76 ]---
      
      Reported-and-tested-by: syzbot+d154ec99402c6f628887@syzkaller.appspotmail.com
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      a4f843bd
    • Z
      f2fs: change le32 to le16 of f2fs_inode->i_extra_size · d6964949
      Zhikang Zhang 提交于
      In the structure of f2fs_inode, i_extra_size's type is __le16,
      so we should keep type consistent when using it.
      
      Fixes: 704956ec ("f2fs: support inode checksum")
      Signed-off-by: NZhikang Zhang <zhangzhikang1@huawei.com>
      Signed-off-by: NYunlei He <heyunlei@huawei.com>
      Reviewed-by: NChao Yu <yuchao0@huawei.com>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      d6964949
    • C
      f2fs: don't use GFP_ZERO for page caches · 81114baa
      Chao Yu 提交于
      Related to https://lkml.org/lkml/2018/4/8/661
      
      Sometimes, we need to write meta data to new allocated block address,
      then we will allocate a zeroed page in inner inode's address space, and
      fill partial data in it, and leave other place with zero value which means
      some fields are initial status.
      
      There are two inner inodes (meta inode and node inode) setting __GFP_ZERO,
      I have just checked them, for both of them, we can avoid using __GFP_ZERO,
      and do initialization by ourselves to avoid unneeded/redundant zeroing
      from mm.
      
      Cc: <stable@vger.kernel.org>
      Signed-off-by: NChao Yu <yuchao0@huawei.com>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      81114baa
    • C
      f2fs: introduce private inode status mapping · 59c84408
      Chao Yu 提交于
      Previously, we use generic FS_*_FL defined by vfs to indicate inode status
      for each bit of i_flags, so f2fs's flag status definition is tied to vfs'
      one, it will be hard for f2fs to reuse bits f2fs never used to indicate
      new status..
      
      In order to solve this issue, we introduce private inode status mapping,
      Note, for these bits have already been persisted into disk, we should
      never change their definition, for other ones, we can remap them for
      later new coming status.
      Signed-off-by: NChao Yu <yuchao0@huawei.com>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      59c84408
  3. 04 4月, 2018 1 次提交
  4. 17 3月, 2018 1 次提交
  5. 13 3月, 2018 1 次提交
  6. 26 1月, 2018 1 次提交
  7. 19 1月, 2018 1 次提交
    • D
      f2fs: prevent newly created inode from being dirtied incorrectly · 9ac1e2d8
      Daeho Jeong 提交于
      Now, we invoke f2fs_mark_inode_dirty_sync() to make an inode dirty in
      advance of creating a new node page for the inode. By this, some inodes
      whose node page is not created yet can be linked into the global dirty
      list.
      
      If the checkpoint is executed at this moment, the inode will be written
      back by writeback_single_inode() and finally update_inode_page() will
      fail to detach the inode from the global dirty list because the inode
      doesn't have a node page.
      
      The problem is that the inode's state in VFS layer will become clean
      after execution of writeback_single_inode() and it's still linked in
      the global dirty list of f2fs and this will cause a kernel panic.
      
      So, we will prevent the newly created inode from being dirtied during
      the FI_NEW_INODE flag of the inode is set. We will make it dirty
      right after the flag is cleared.
      Signed-off-by: NDaeho Jeong <daeho.jeong@samsung.com>
      Signed-off-by: NYoungjin Gil <youngjin.gil@samsung.com>
      Tested-by: NHobin Woo <hobin.woo@samsung.com>
      Reviewed-by: NChao Yu <yuchao0@huawei.com>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      9ac1e2d8
  8. 12 1月, 2018 1 次提交
  9. 03 1月, 2018 1 次提交
    • Y
      f2fs: fix an error case of missing update inode page · 211a6fa0
      Yunlei He 提交于
      -Thread A                             Thread B
      
      -write_checkpoint
       -block_operations
        -f2fs_unlock_all                    -f2fs_sync_file
                                             -f2fs_write_inode
                                              -f2fs_inode_synced
          -f2fs_sync_inode_meta
           -sync_node_pages
                                              -set_page_drity
      
      In this case, if sudden power off without next new checkpoint,
      the last inode page update will lost. wb_writeback is same with
      fsync.
      
      Yunlei also reproduced the bug by:
      
      @@ -366,7 +366,7 @@ int update_inode(struct inode *inode, struct page *node_page)
              struct extent_tree *et = F2FS_I(inode)->extent_tree;
      
              f2fs_inode_synced(inode);
      -
      +       msleep(10000);
              f2fs_wait_on_page_writeback(node_page, NODE, true);
      
      shell 1:                                       shell2:
      
      dd if=/dev/zero of=./test bs=1M count=10
      sync
      echo "hello" >> ./test
      fsync test  // sleep 10s
                                                     sync //return quickly
      echo c > /proc/sysrq-trigger
      Signed-off-by: NYunlei He <heyunlei@huawei.com>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      211a6fa0
  10. 06 11月, 2017 1 次提交
    • C
      f2fs: support flexible inline xattr size · 6afc662e
      Chao Yu 提交于
      Now, in product, more and more features based on file encryption were
      introduced, their demand of xattr space is increasing, however, inline
      xattr has fixed-size of 200 bytes, once inline xattr space is full, new
      increased xattr data would occupy additional xattr block which may bring
      us more space usage and performance regression during persisting.
      
      In order to resolve above issue, it's better to expand inline xattr size
      flexibly according to user's requirement.
      
      So this patch introduces new filesystem feature 'flexible inline xattr',
      and new mount option 'inline_xattr_size=%u', once mkfs enables the
      feature, we can use the option to make f2fs supporting flexible inline
      xattr size.
      
      To support this feature, we add extra attribute i_inline_xattr_size in
      inode layout, indicating that how many space inline xattr borrows from
      block address mapping space in inode layout, by this, we can easily
      locate and store flexible-sized inline xattr data in inode.
      
      Inode disk layout:
        +----------------------+
        | .i_mode              |
        | ...                  |
        | .i_ext               |
        +----------------------+
        | .i_extra_isize       |
        | .i_inline_xattr_size |-----------+
        | ...                  |           |
        +----------------------+           |
        | .i_addr              |           |
        |  - block address or  |           |
        |  - inline data       |           |
        +----------------------+<---+      v
        |    inline xattr      |    +---inline xattr range
        +----------------------+<---+
        | .i_nid               |
        +----------------------+
        |   node_footer        |
        | (nid, ino, offset)   |
        +----------------------+
      
      Note that, we have to cnosider backward compatibility which reserved
      inline_data space, 200 bytes, all the time, reported by Sheng Yong.
      
      Previous inline data or directory always reserved 200 bytes in inode layout,
      even if inline_xattr is disabled. In order to keep inline_dentry's structure
      for backward compatibility, we get the space back only from inline_data.
      Signed-off-by: NChao Yu <yuchao0@huawei.com>
      Reported-by: NSheng Yong <shengyong1@huawei.com>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      6afc662e
  11. 26 10月, 2017 1 次提交
  12. 19 10月, 2017 1 次提交
    • E
      fs, fscrypt: add an S_ENCRYPTED inode flag · 2ee6a576
      Eric Biggers 提交于
      Introduce a flag S_ENCRYPTED which can be set in ->i_flags to indicate
      that the inode is encrypted using the fscrypt (fs/crypto/) mechanism.
      
      Checking this flag will give the same information that
      inode->i_sb->s_cop->is_encrypted(inode) currently does, but will be more
      efficient.  This will be useful for adding higher-level helper functions
      for filesystems to use.  For example we'll be able to replace this:
      
      	if (ext4_encrypted_inode(inode)) {
      		ret = fscrypt_get_encryption_info(inode);
      		if (ret)
      			return ret;
      		if (!fscrypt_has_encryption_key(inode))
      			return -ENOKEY;
      	}
      
      with this:
      
      	ret = fscrypt_require_key(inode);
      	if (ret)
      		return ret;
      
      ... since we'll be able to retain the fast path for unencrypted files as
      a single flag check, using an inline function.  This wasn't possible
      before because we'd have had to frequently call through the
      ->i_sb->s_cop->is_encrypted function pointer, even when the encryption
      support was disabled or not being used.
      
      Note: we don't define S_ENCRYPTED to 0 if CONFIG_FS_ENCRYPTION is
      disabled because we want to continue to return an error if an encrypted
      file is accessed without encryption support, rather than pretending that
      it is unencrypted.
      Reviewed-by: NChao Yu <yuchao0@huawei.com>
      Acked-by: NDave Chinner <dchinner@redhat.com>
      Signed-off-by: NEric Biggers <ebiggers@google.com>
      Signed-off-by: NTheodore Ts'o <tytso@mit.edu>
      2ee6a576
  13. 11 10月, 2017 1 次提交
    • C
      f2fs: enhance multiple device flush · 39d787be
      Chao Yu 提交于
      When multiple device feature is enabled, during ->fsync we will issue
      flush in all devices to make sure node/data of the file being persisted
      into storage. But some flushes of device could be unneeded as file's
      data may be not writebacked into those devices. So this patch adds and
      manage bitmap per inode in global cache to indicate which device is
      dirty and it needs to issue flush during ->fsync, hence, we could improve
      performance of fsync in scenario of multiple device.
      Signed-off-by: NChao Yu <yuchao0@huawei.com>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      39d787be
  14. 13 9月, 2017 1 次提交
  15. 06 9月, 2017 1 次提交
  16. 04 8月, 2017 1 次提交
  17. 01 8月, 2017 3 次提交
    • C
      f2fs: support project quota · 5c57132e
      Chao Yu 提交于
      This patch adds to support plain project quota.
      Signed-off-by: NChao Yu <yuchao0@huawei.com>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      5c57132e
    • C
      f2fs: enhance on-disk inode structure scalability · 7a2af766
      Chao Yu 提交于
      This patch add new flag F2FS_EXTRA_ATTR storing in inode.i_inline
      to indicate that on-disk structure of current inode is extended.
      
      In order to extend, we changed the inode structure a bit:
      
      Original one:
      
      struct f2fs_inode {
      	...
      	struct f2fs_extent i_ext;
      	__le32 i_addr[DEF_ADDRS_PER_INODE];
      	__le32 i_nid[DEF_NIDS_PER_INODE];
      }
      
      Extended one:
      
      struct f2fs_inode {
              ...
              struct f2fs_extent i_ext;
      	union {
      		struct {
      			__le16 i_extra_isize;
      			__le16 i_padding;
      			__le32 i_extra_end[0];
      		};
      		__le32 i_addr[DEF_ADDRS_PER_INODE];
      	};
              __le32 i_nid[DEF_NIDS_PER_INODE];
      }
      
      Once F2FS_EXTRA_ATTR is set, we will steal four bytes in the head of
      i_addr field for storing i_extra_isize and i_padding. with i_extra_isize,
      we can calculate actual size of reserved space in i_addr, available
      attribute fields included in total extra attribute fields for current
      inode can be described as below:
      
        +--------------------+
        | .i_mode            |
        | ...                |
        | .i_ext             |
        +--------------------+
        | .i_extra_isize     |-----+
        | .i_padding         |     |
        | .i_prjid           |     |
        | .i_atime_extra     |     |
        | .i_ctime_extra     |     |
        | .i_mtime_extra     |<----+
        | .i_inode_cs        |<----- store blkaddr/inline from here
        | .i_xattr_cs        |
        | ...                |
        +--------------------+
        |                    |
        |    block address   |
        |                    |
        +--------------------+
        | .i_nid             |
        +--------------------+
        |   node_footer      |
        | (nid, ino, offset) |
        +--------------------+
      
      Hence, with this patch, we would enhance scalability of f2fs inode for
      storing more newly added attribute.
      Signed-off-by: NChao Yu <yuchao0@huawei.com>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      7a2af766
    • C
      f2fs: make max inline size changeable · f2470371
      Chao Yu 提交于
      This patch tries to make below macros calculating max inline size,
      inline dentry field size considerring reserving size-changeable
      space:
      - MAX_INLINE_DATA
      - NR_INLINE_DENTRY
      - INLINE_DENTRY_BITMAP_SIZE
      - INLINE_RESERVED_SIZE
      
      Then, when inline_{data,dentry} options is enabled, it allows us to
      reserve inline space with different size flexibly for adding newly
      introduced inode attribute.
      Signed-off-by: NChao Yu <yuchao0@huawei.com>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      f2470371
  18. 09 7月, 2017 1 次提交
    • C
      f2fs: support plain user/group quota · 0abd675e
      Chao Yu 提交于
      This patch adds to support plain user/group quota.
      
      Change Note by Jaegeuk Kim.
      
      - Use f2fs page cache for quota files in order to consider garbage collection.
        so, quota files are not tolerable for sudden power-cuts, so user needs to do
        quotacheck.
      
      - setattr() calls dquot_transfer which will transfer inode->i_blocks.
        We can't reclaim that during f2fs_evict_inode(). So, we need to count
        node blocks as well in order to match i_blocks with dquot's space.
      
        Note that, Chao wrote a patch to count inode->i_blocks without inode block.
        (f2fs: don't count inode block in in-memory inode.i_blocks)
      
      - in f2fs_remount, we need to make RW in prior to dquot_resume.
      
      - handle fault_injection case during f2fs_quota_off_umount
      
      - TODO: Project quota
      Signed-off-by: NChao Yu <yuchao0@huawei.com>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      0abd675e
  19. 08 7月, 2017 1 次提交
    • C
      f2fs: don't count inode block in in-memory inode.i_blocks · 000519f2
      Chao Yu 提交于
      Previously, we count all inode consumed blocks including inode block,
      xattr block, index block, data block into i_blocks, for other generic
      filesystems, they won't count inode block into i_blocks, so for
      userspace applications or quota system, they may detect incorrect block
      count according to i_blocks value in inode.
      
      This patch changes to count all blocks into inode.i_blocks excluding
      inode block, for on-disk i_blocks, we keep counting inode block for
      backward compatibility.
      Signed-off-by: NChao Yu <yuchao0@huawei.com>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      000519f2
  20. 04 7月, 2017 2 次提交
    • C
      f2fs: measure inode.i_blocks as generic filesystem · 0eb0adad
      Chao Yu 提交于
      Both in memory or on disk, generic filesystems record i_blocks with
      512bytes sized sector count, also VFS sub module such as disk quota
      follows this rule, but f2fs records it with 4096bytes sized block
      count, this difference leads to that once we use dquota's function
      which inc/dec iblocks, it will make i_blocks of f2fs being inconsistent
      between in memory and on disk.
      
      In order to resolve this issue, this patch changes to make in-memory
      i_blocks of f2fs recording sector count instead of block count,
      meanwhile leaving on-disk i_blocks recording block count.
      Signed-off-by: NChao Yu <yuchao0@huawei.com>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      0eb0adad
    • J
      f2fs: remove false-positive bug_on · d8c4256c
      Jaegeuk Kim 提交于
      For example,
      
      f2fs_create
       - new_node_page is failed
       - handle_failed_inode
        - skip to add it into orphan list, since ni.blk_addr == NULL_ADDR
         : set_inode_flag(inode, FI_FREE_NID)
      
      f2fs_evict_inode
       - EIO due to fault injection
       - f2fs_bug_on() is triggered
      
      So, we don't need to call f2fs_bug_on in this case.
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      d8c4256c
  21. 24 5月, 2017 1 次提交
    • J
      f2fs: load inode's flag from disk · 93607124
      Jaegeuk Kim 提交于
      This patch fixes missing inode flag loaded from disk, reported by Tom.
      
      [tom@localhost ~]$ sudo mount /dev/loop0 /mnt/
      [tom@localhost ~]$ sudo chown tom:tom /mnt/
      [tom@localhost ~]$ touch /mnt/testfile
      [tom@localhost ~]$ sudo chattr +i /mnt/testfile
      [tom@localhost ~]$ echo test > /mnt/testfile
      bash: /mnt/testfile: Operation not permitted
      [tom@localhost ~]$ rm /mnt/testfile
      rm: cannot remove '/mnt/testfile': Operation not permitted
      [tom@localhost ~]$ sudo umount /mnt/
      [tom@localhost ~]$ sudo mount /dev/loop0 /mnt/
      [tom@localhost ~]$ lsattr /mnt/testfile
      ----i-------------- /mnt/testfile
      [tom@localhost ~]$ echo test > /mnt/testfile
      [tom@localhost ~]$ rm /mnt/testfile
      [tom@localhost ~]$ sudo umount /mnt/
      
      Cc: stable@vger.kernel.org
      Reported-by: NTom Yan <tom.ty89@outlook.com>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      93607124
  22. 25 4月, 2017 1 次提交
    • J
      f2fs: fix out-of free segments · a7881893
      Jaegeuk Kim 提交于
      This patch also reverts d0db7703 ("f2fs: do SSR in higher priority").
      
      This patch fixes out of free segments caused by many small file creation by
      1) mkfs -s 1 2G
      2) mount
      3) untar
       - preoduce 60000 small files burstly
      4) sync
       - flush node pages
       - flush imeta
      
      Here, when we do f2fs_balance_fs, we missed # of imeta blocks, resulting in
      skipping to check has_not_enough_free_secs.
      
      Another test is done by
      1) mkfs -s 12 2G
      2) mount
      3) untar
       - preoduce 60000 small files burstly
      4) sync
       - flush node pages
       - flush imeta
      
      In this case, this patch also fixes wrong block allocation under large section
      size.
      Reported-by: NWilliam Brana <wbrana@gmail.com>
      Cc: <stable@vger.kernel.org>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      a7881893
  23. 13 4月, 2017 1 次提交
    • J
      f2fs: fix fs corruption due to zero inode page · 9bb02c36
      Jaegeuk Kim 提交于
      This patch fixes the following scenario.
      
      - f2fs_create/f2fs_mkdir             - write_checkpoint
       - f2fs_mark_inode_dirty_sync         - block_operations
                                             - f2fs_lock_all
                                             - f2fs_sync_inode_meta
                                              - f2fs_unlock_all
                                              - sync_inode_metadata
       - f2fs_lock_op
                                               - f2fs_write_inode
                                                - update_inode_page
                                                 - get_node_page
                                                   return -ENOENT
       - new_inode_page
        - fill_node_footer
       - f2fs_mark_inode_dirty_sync
       - ...
       - f2fs_unlock_op
                                                - f2fs_inode_synced
                                             - f2fs_lock_all
                                             - do_checkpoint
      
      In this checkpoint, we can get an inode page which contains zeros having valid
      node footer only.
      
      Cc: <stable@vger.kernel.org>
      Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
      9bb02c36
  24. 22 3月, 2017 2 次提交
  25. 28 2月, 2017 1 次提交
  26. 24 11月, 2016 3 次提交