1. 06 8月, 2018 4 次提交
  2. 06 6月, 2018 1 次提交
    • D
      vfs: change inode times to use struct timespec64 · 95582b00
      Deepa Dinamani 提交于
      struct timespec is not y2038 safe. Transition vfs to use
      y2038 safe struct timespec64 instead.
      
      The change was made with the help of the following cocinelle
      script. This catches about 80% of the changes.
      All the header file and logic changes are included in the
      first 5 rules. The rest are trivial substitutions.
      I avoid changing any of the function signatures or any other
      filesystem specific data structures to keep the patch simple
      for review.
      
      The script can be a little shorter by combining different cases.
      But, this version was sufficient for my usecase.
      
      virtual patch
      
      @ depends on patch @
      identifier now;
      @@
      - struct timespec
      + struct timespec64
        current_time ( ... )
        {
      - struct timespec now = current_kernel_time();
      + struct timespec64 now = current_kernel_time64();
        ...
      - return timespec_trunc(
      + return timespec64_trunc(
        ... );
        }
      
      @ depends on patch @
      identifier xtime;
      @@
       struct \( iattr \| inode \| kstat \) {
       ...
      -       struct timespec xtime;
      +       struct timespec64 xtime;
       ...
       }
      
      @ depends on patch @
      identifier t;
      @@
       struct inode_operations {
       ...
      int (*update_time) (...,
      -       struct timespec t,
      +       struct timespec64 t,
      ...);
       ...
       }
      
      @ depends on patch @
      identifier t;
      identifier fn_update_time =~ "update_time$";
      @@
       fn_update_time (...,
      - struct timespec *t,
      + struct timespec64 *t,
       ...) { ... }
      
      @ depends on patch @
      identifier t;
      @@
      lease_get_mtime( ... ,
      - struct timespec *t
      + struct timespec64 *t
        ) { ... }
      
      @te depends on patch forall@
      identifier ts;
      local idexpression struct inode *inode_node;
      identifier i_xtime =~ "^i_[acm]time$";
      identifier ia_xtime =~ "^ia_[acm]time$";
      identifier fn_update_time =~ "update_time$";
      identifier fn;
      expression e, E3;
      local idexpression struct inode *node1;
      local idexpression struct inode *node2;
      local idexpression struct iattr *attr1;
      local idexpression struct iattr *attr2;
      local idexpression struct iattr attr;
      identifier i_xtime1 =~ "^i_[acm]time$";
      identifier i_xtime2 =~ "^i_[acm]time$";
      identifier ia_xtime1 =~ "^ia_[acm]time$";
      identifier ia_xtime2 =~ "^ia_[acm]time$";
      @@
      (
      (
      - struct timespec ts;
      + struct timespec64 ts;
      |
      - struct timespec ts = current_time(inode_node);
      + struct timespec64 ts = current_time(inode_node);
      )
      
      <+... when != ts
      (
      - timespec_equal(&inode_node->i_xtime, &ts)
      + timespec64_equal(&inode_node->i_xtime, &ts)
      |
      - timespec_equal(&ts, &inode_node->i_xtime)
      + timespec64_equal(&ts, &inode_node->i_xtime)
      |
      - timespec_compare(&inode_node->i_xtime, &ts)
      + timespec64_compare(&inode_node->i_xtime, &ts)
      |
      - timespec_compare(&ts, &inode_node->i_xtime)
      + timespec64_compare(&ts, &inode_node->i_xtime)
      |
      ts = current_time(e)
      |
      fn_update_time(..., &ts,...)
      |
      inode_node->i_xtime = ts
      |
      node1->i_xtime = ts
      |
      ts = inode_node->i_xtime
      |
      <+... attr1->ia_xtime ...+> = ts
      |
      ts = attr1->ia_xtime
      |
      ts.tv_sec
      |
      ts.tv_nsec
      |
      btrfs_set_stack_timespec_sec(..., ts.tv_sec)
      |
      btrfs_set_stack_timespec_nsec(..., ts.tv_nsec)
      |
      - ts = timespec64_to_timespec(
      + ts =
      ...
      -)
      |
      - ts = ktime_to_timespec(
      + ts = ktime_to_timespec64(
      ...)
      |
      - ts = E3
      + ts = timespec_to_timespec64(E3)
      |
      - ktime_get_real_ts(&ts)
      + ktime_get_real_ts64(&ts)
      |
      fn(...,
      - ts
      + timespec64_to_timespec(ts)
      ,...)
      )
      ...+>
      (
      <... when != ts
      - return ts;
      + return timespec64_to_timespec(ts);
      ...>
      )
      |
      - timespec_equal(&node1->i_xtime1, &node2->i_xtime2)
      + timespec64_equal(&node1->i_xtime2, &node2->i_xtime2)
      |
      - timespec_equal(&node1->i_xtime1, &attr2->ia_xtime2)
      + timespec64_equal(&node1->i_xtime2, &attr2->ia_xtime2)
      |
      - timespec_compare(&node1->i_xtime1, &node2->i_xtime2)
      + timespec64_compare(&node1->i_xtime1, &node2->i_xtime2)
      |
      node1->i_xtime1 =
      - timespec_trunc(attr1->ia_xtime1,
      + timespec64_trunc(attr1->ia_xtime1,
      ...)
      |
      - attr1->ia_xtime1 = timespec_trunc(attr2->ia_xtime2,
      + attr1->ia_xtime1 =  timespec64_trunc(attr2->ia_xtime2,
      ...)
      |
      - ktime_get_real_ts(&attr1->ia_xtime1)
      + ktime_get_real_ts64(&attr1->ia_xtime1)
      |
      - ktime_get_real_ts(&attr.ia_xtime1)
      + ktime_get_real_ts64(&attr.ia_xtime1)
      )
      
      @ depends on patch @
      struct inode *node;
      struct iattr *attr;
      identifier fn;
      identifier i_xtime =~ "^i_[acm]time$";
      identifier ia_xtime =~ "^ia_[acm]time$";
      expression e;
      @@
      (
      - fn(node->i_xtime);
      + fn(timespec64_to_timespec(node->i_xtime));
      |
       fn(...,
      - node->i_xtime);
      + timespec64_to_timespec(node->i_xtime));
      |
      - e = fn(attr->ia_xtime);
      + e = fn(timespec64_to_timespec(attr->ia_xtime));
      )
      
      @ depends on patch forall @
      struct inode *node;
      struct iattr *attr;
      identifier i_xtime =~ "^i_[acm]time$";
      identifier ia_xtime =~ "^ia_[acm]time$";
      identifier fn;
      @@
      {
      + struct timespec ts;
      <+...
      (
      + ts = timespec64_to_timespec(node->i_xtime);
      fn (...,
      - &node->i_xtime,
      + &ts,
      ...);
      |
      + ts = timespec64_to_timespec(attr->ia_xtime);
      fn (...,
      - &attr->ia_xtime,
      + &ts,
      ...);
      )
      ...+>
      }
      
      @ depends on patch forall @
      struct inode *node;
      struct iattr *attr;
      struct kstat *stat;
      identifier ia_xtime =~ "^ia_[acm]time$";
      identifier i_xtime =~ "^i_[acm]time$";
      identifier xtime =~ "^[acm]time$";
      identifier fn, ret;
      @@
      {
      + struct timespec ts;
      <+...
      (
      + ts = timespec64_to_timespec(node->i_xtime);
      ret = fn (...,
      - &node->i_xtime,
      + &ts,
      ...);
      |
      + ts = timespec64_to_timespec(node->i_xtime);
      ret = fn (...,
      - &node->i_xtime);
      + &ts);
      |
      + ts = timespec64_to_timespec(attr->ia_xtime);
      ret = fn (...,
      - &attr->ia_xtime,
      + &ts,
      ...);
      |
      + ts = timespec64_to_timespec(attr->ia_xtime);
      ret = fn (...,
      - &attr->ia_xtime);
      + &ts);
      |
      + ts = timespec64_to_timespec(stat->xtime);
      ret = fn (...,
      - &stat->xtime);
      + &ts);
      )
      ...+>
      }
      
      @ depends on patch @
      struct inode *node;
      struct inode *node2;
      identifier i_xtime1 =~ "^i_[acm]time$";
      identifier i_xtime2 =~ "^i_[acm]time$";
      identifier i_xtime3 =~ "^i_[acm]time$";
      struct iattr *attrp;
      struct iattr *attrp2;
      struct iattr attr ;
      identifier ia_xtime1 =~ "^ia_[acm]time$";
      identifier ia_xtime2 =~ "^ia_[acm]time$";
      struct kstat *stat;
      struct kstat stat1;
      struct timespec64 ts;
      identifier xtime =~ "^[acmb]time$";
      expression e;
      @@
      (
      ( node->i_xtime2 \| attrp->ia_xtime2 \| attr.ia_xtime2 \) = node->i_xtime1  ;
      |
       node->i_xtime2 = \( node2->i_xtime1 \| timespec64_trunc(...) \);
      |
       node->i_xtime2 = node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \);
      |
       node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \);
      |
       stat->xtime = node2->i_xtime1;
      |
       stat1.xtime = node2->i_xtime1;
      |
      ( node->i_xtime2 \| attrp->ia_xtime2 \) = attrp->ia_xtime1  ;
      |
      ( attrp->ia_xtime1 \| attr.ia_xtime1 \) = attrp2->ia_xtime2;
      |
      - e = node->i_xtime1;
      + e = timespec64_to_timespec( node->i_xtime1 );
      |
      - e = attrp->ia_xtime1;
      + e = timespec64_to_timespec( attrp->ia_xtime1 );
      |
      node->i_xtime1 = current_time(...);
      |
       node->i_xtime2 = node->i_xtime1 = node->i_xtime3 =
      - e;
      + timespec_to_timespec64(e);
      |
       node->i_xtime1 = node->i_xtime3 =
      - e;
      + timespec_to_timespec64(e);
      |
      - node->i_xtime1 = e;
      + node->i_xtime1 = timespec_to_timespec64(e);
      )
      Signed-off-by: NDeepa Dinamani <deepa.kernel@gmail.com>
      Cc: <anton@tuxera.com>
      Cc: <balbi@kernel.org>
      Cc: <bfields@fieldses.org>
      Cc: <darrick.wong@oracle.com>
      Cc: <dhowells@redhat.com>
      Cc: <dsterba@suse.com>
      Cc: <dwmw2@infradead.org>
      Cc: <hch@lst.de>
      Cc: <hirofumi@mail.parknet.co.jp>
      Cc: <hubcap@omnibond.com>
      Cc: <jack@suse.com>
      Cc: <jaegeuk@kernel.org>
      Cc: <jaharkes@cs.cmu.edu>
      Cc: <jslaby@suse.com>
      Cc: <keescook@chromium.org>
      Cc: <mark@fasheh.com>
      Cc: <miklos@szeredi.hu>
      Cc: <nico@linaro.org>
      Cc: <reiserfs-devel@vger.kernel.org>
      Cc: <richard@nod.at>
      Cc: <sage@redhat.com>
      Cc: <sfrench@samba.org>
      Cc: <swhiteho@redhat.com>
      Cc: <tj@kernel.org>
      Cc: <trond.myklebust@primarydata.com>
      Cc: <tytso@mit.edu>
      Cc: <viro@zeniv.linux.org.uk>
      95582b00
  3. 18 4月, 2018 1 次提交
  4. 12 4月, 2018 1 次提交
  5. 31 3月, 2018 3 次提交
    • Q
      btrfs: qgroup: Use separate meta reservation type for delalloc · 43b18595
      Qu Wenruo 提交于
      Before this patch, btrfs qgroup is mixing per-transcation meta rsv with
      preallocated meta rsv, making it quite easy to underflow qgroup meta
      reservation.
      
      Since we have the new qgroup meta rsv types, apply it to delalloc
      reservation.
      
      Now for delalloc, most of its reserved space will use META_PREALLOC qgroup
      rsv type.
      
      And for callers reducing outstanding extent like btrfs_finish_ordered_io(),
      they will convert corresponding META_PREALLOC reservation to
      META_PERTRANS.
      
      This is mainly due to the fact that current qgroup numbers will only be
      updated in btrfs_commit_transaction(), that's to say if we don't keep
      such placeholder reservation, we can exceed qgroup limitation.
      
      And for callers freeing outstanding extent in error handler, we will
      just free META_PREALLOC bytes.
      
      This behavior makes callers of btrfs_qgroup_release_meta() or
      btrfs_qgroup_convert_meta() to be aware of which type they are.
      So in this patch, btrfs_delalloc_release_metadata() and its callers get
      an extra parameter to info qgroup to do correct meta convert/release.
      
      The good news is, even we use the wrong type (convert or free), it won't
      cause obvious bug, as prealloc type is always in good shape, and the
      type only affects how per-trans meta is increased or not.
      
      So the worst case will be at most metadata limitation can be sometimes
      exceeded (no convert at all) or metadata limitation is reached too soon
      (no free at all).
      Signed-off-by: NQu Wenruo <wqu@suse.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      43b18595
    • N
      btrfs: Remove userspace transaction ioctls · 7a5a07a8
      Nikolay Borisov 提交于
      Commit 3558d4f8 ("btrfs: Deprecate userspace transaction ioctls")
      marked the beginning of the end of userspace transaction. This commit
      finishes the job! There are no known users and ceph does not use the
      ioctl anymore.
      Signed-off-by: NNikolay Borisov <nborisov@suse.com>
      Acked-by: NSage Weil <sage@redhat.com>
      Reviewed-by: NDavid Sterba <dsterba@suse.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      7a5a07a8
    • D
      btrfs: open code trivial helper btrfs_page_exists_in_range · 051c98eb
      David Sterba 提交于
      The called function name is self explanatory.
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      051c98eb
  6. 26 3月, 2018 2 次提交
  7. 29 1月, 2018 1 次提交
    • J
      fs: new API for handling inode->i_version · ae5e165d
      Jeff Layton 提交于
      Add a documentation blob that explains what the i_version field is, how
      it is expected to work, and how it is currently implemented by various
      filesystems.
      
      We already have inode_inc_iversion. Add several other functions for
      manipulating and accessing the i_version counter. For now, the
      implementation is trivial and basically works the way that all of the
      open-coded i_version accesses work today.
      
      Future patches will convert existing users of i_version to use the new
      API, and then convert the backend implementation to do things more
      efficiently.
      Signed-off-by: NJeff Layton <jlayton@redhat.com>
      Reviewed-by: NJan Kara <jack@suse.cz>
      ae5e165d
  8. 22 1月, 2018 10 次提交
    • F
      Btrfs: fix space leak after fallocate and zero range operations · 81fdf638
      Filipe Manana 提交于
      If we do a buffered write after a zero range operation that has an
      unaligned (with the filesystem's sector size) end which also falls within
      an unwritten (prealloc) extent that is currently beyond the inode's
      i_size, and the zero range operation has the flag FALLOC_FL_KEEP_SIZE,
      we end up leaking data and metadata space. This happens because when
      zeroing a range we call btrfs_truncate_block(), which does delalloc
      (loads the page and partially zeroes its content), and in the buffered
      write path we only clear existing delalloc space reservation for the
      range we are writing into if that range starts at an offset smaller then
      the inode's i_size, which makes sense since we can not have delalloc
      extents beyond the i_size, only unwritten extents are allowed.
      
      Example reproducer:
      
       $ mkfs.btrfs -f /dev/sdb
       $ mount /dev/sdb /mnt
       $ xfs_io -f -c "falloc -k 428K 4K" /mnt/foobar
       $ xfs_io -c "fzero -k 0 430K" /mnt/foobar
       $ xfs_io -c "pwrite -S 0xaa 428K 4K" /mnt/foobar
       $ umount /mnt
      
      After the unmount we get the metadata and data space leaks reported in
      dmesg/syslog:
      
       [95794.602253] ------------[ cut here ]------------
       [95794.603322] WARNING: CPU: 0 PID: 31496 at fs/btrfs/inode.c:9561 btrfs_destroy_inode+0x4e/0x206 [btrfs]
       [95794.605167] Modules linked in: btrfs xfs ppdev ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper parport_pc psmouse sg i2c_piix4 parport i2c_core evdev pcspkr button serio_raw sunrpc loop autofs4 ext4 crc16 mbcache jbd2 zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod sd_mod virtio_scsi ata_generic crc32c_intel ata_piix floppy virtio_pci virtio_ring virtio libata scsi_mod e1000 [last unloaded: btrfs]
       [95794.613000] CPU: 0 PID: 31496 Comm: umount Tainted: G        W       4.14.0-rc6-btrfs-next-54+ #1
       [95794.614448] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014
       [95794.615972] task: ffff880075aa0240 task.stack: ffffc90001734000
       [95794.617114] RIP: 0010:btrfs_destroy_inode+0x4e/0x206 [btrfs]
       [95794.618001] RSP: 0018:ffffc90001737d00 EFLAGS: 00010202
       [95794.618721] RAX: 0000000000000000 RBX: ffff880070fa1418 RCX: ffffc90001737c7c
       [95794.619645] RDX: 0000000175aa0240 RSI: 0000000000000001 RDI: ffff880070fa1418
       [95794.620711] RBP: ffffc90001737d38 R08: 0000000000000000 R09: 0000000000000000
       [95794.621932] R10: ffffc90001737c48 R11: ffff88007123e158 R12: ffff880075b6a000
       [95794.623124] R13: ffff88006145c000 R14: ffff880070fa1418 R15: ffff880070c3b4a0
       [95794.624188] FS:  00007fa6793c92c0(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000
       [95794.625578] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
       [95794.626522] CR2: 000056338670d048 CR3: 00000000610dc005 CR4: 00000000001606f0
       [95794.627647] Call Trace:
       [95794.628128]  destroy_inode+0x3d/0x55
       [95794.628573]  evict+0x177/0x17e
       [95794.629010]  dispose_list+0x50/0x71
       [95794.629478]  evict_inodes+0x132/0x141
       [95794.630289]  generic_shutdown_super+0x3f/0x10b
       [95794.630864]  kill_anon_super+0x12/0x1c
       [95794.631383]  btrfs_kill_super+0x16/0x21 [btrfs]
       [95794.631930]  deactivate_locked_super+0x30/0x68
       [95794.632539]  deactivate_super+0x36/0x39
       [95794.633200]  cleanup_mnt+0x49/0x67
       [95794.633818]  __cleanup_mnt+0x12/0x14
       [95794.634416]  task_work_run+0x82/0xa6
       [95794.634902]  prepare_exit_to_usermode+0xe1/0x10c
       [95794.635525]  syscall_return_slowpath+0x18c/0x1af
       [95794.636122]  entry_SYSCALL_64_fastpath+0xab/0xad
       [95794.636834] RIP: 0033:0x7fa678cb99a7
       [95794.637370] RSP: 002b:00007ffccf0aaed8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
       [95794.638672] RAX: 0000000000000000 RBX: 0000563386706030 RCX: 00007fa678cb99a7
       [95794.639596] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000056338670ca90
       [95794.640703] RBP: 000056338670ca90 R08: 000056338670c740 R09: 0000000000000015
       [95794.641773] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007fa6791bae64
       [95794.643150] R13: 0000000000000000 R14: 0000563386706210 R15: 00007ffccf0ab160
       [95794.644249] Code: ff 4c 8b a8 80 06 00 00 48 8b 87 c0 01 00 00 48 85 c0 74 02 0f ff 48 83 bb e0 02 00 00 00 74 02 0f ff 83 bb 3c ff ff ff 00 74 02 <0f> ff 83 bb 40 ff ff ff 00 74 02 0f ff 48 83 bb f8 fe ff ff 00
       [95794.646929] ---[ end trace e95877675c6ec007 ]---
       [95794.647751] ------------[ cut here ]------------
       [95794.648509] WARNING: CPU: 0 PID: 31496 at fs/btrfs/inode.c:9562 btrfs_destroy_inode+0x59/0x206 [btrfs]
       [95794.649842] Modules linked in: btrfs xfs ppdev ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper parport_pc psmouse sg i2c_piix4 parport i2c_core evdev pcspkr button serio_raw sunrpc loop autofs4 ext4 crc16 mbcache jbd2 zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod sd_mod virtio_scsi ata_generic crc32c_intel ata_piix floppy virtio_pci virtio_ring virtio libata scsi_mod e1000 [last unloaded: btrfs]
       [95794.654659] CPU: 0 PID: 31496 Comm: umount Tainted: G        W       4.14.0-rc6-btrfs-next-54+ #1
       [95794.655894] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014
       [95794.657546] task: ffff880075aa0240 task.stack: ffffc90001734000
       [95794.658433] RIP: 0010:btrfs_destroy_inode+0x59/0x206 [btrfs]
       [95794.659279] RSP: 0018:ffffc90001737d00 EFLAGS: 00010202
       [95794.660054] RAX: 0000000000000000 RBX: ffff880070fa1418 RCX: ffffc90001737c7c
       [95794.660753] RDX: 0000000175aa0240 RSI: 0000000000000001 RDI: ffff880070fa1418
       [95794.661513] RBP: ffffc90001737d38 R08: 0000000000000000 R09: 0000000000000000
       [95794.662289] R10: ffffc90001737c48 R11: ffff88007123e158 R12: ffff880075b6a000
       [95794.663393] R13: ffff88006145c000 R14: ffff880070fa1418 R15: ffff880070c3b4a0
       [95794.664342] FS:  00007fa6793c92c0(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000
       [95794.665673] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
       [95794.666593] CR2: 000056338670d048 CR3: 00000000610dc005 CR4: 00000000001606f0
       [95794.667629] Call Trace:
       [95794.668065]  destroy_inode+0x3d/0x55
       [95794.668637]  evict+0x177/0x17e
       [95794.669179]  dispose_list+0x50/0x71
       [95794.669830]  evict_inodes+0x132/0x141
       [95794.670416]  generic_shutdown_super+0x3f/0x10b
       [95794.671103]  kill_anon_super+0x12/0x1c
       [95794.671786]  btrfs_kill_super+0x16/0x21 [btrfs]
       [95794.672552]  deactivate_locked_super+0x30/0x68
       [95794.673393]  deactivate_super+0x36/0x39
       [95794.674107]  cleanup_mnt+0x49/0x67
       [95794.674706]  __cleanup_mnt+0x12/0x14
       [95794.675279]  task_work_run+0x82/0xa6
       [95794.675795]  prepare_exit_to_usermode+0xe1/0x10c
       [95794.676507]  syscall_return_slowpath+0x18c/0x1af
       [95794.677275]  entry_SYSCALL_64_fastpath+0xab/0xad
       [95794.678006] RIP: 0033:0x7fa678cb99a7
       [95794.678600] RSP: 002b:00007ffccf0aaed8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
       [95794.679739] RAX: 0000000000000000 RBX: 0000563386706030 RCX: 00007fa678cb99a7
       [95794.680779] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000056338670ca90
       [95794.681837] RBP: 000056338670ca90 R08: 000056338670c740 R09: 0000000000000015
       [95794.682867] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007fa6791bae64
       [95794.683891] R13: 0000000000000000 R14: 0000563386706210 R15: 00007ffccf0ab160
       [95794.684843] Code: c0 01 00 00 48 85 c0 74 02 0f ff 48 83 bb e0 02 00 00 00 74 02 0f ff 83 bb 3c ff ff ff 00 74 02 0f ff 83 bb 40 ff ff ff 00 74 02 <0f> ff 48 83 bb f8 fe ff ff 00 74 02 0f ff 48 83 bb 00 ff ff ff
       [95794.687156] ---[ end trace e95877675c6ec008 ]---
       [95794.687876] ------------[ cut here ]------------
       [95794.688579] WARNING: CPU: 0 PID: 31496 at fs/btrfs/inode.c:9565 btrfs_destroy_inode+0x7d/0x206 [btrfs]
       [95794.689735] Modules linked in: btrfs xfs ppdev ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper parport_pc psmouse sg i2c_piix4 parport i2c_core evdev pcspkr button serio_raw sunrpc loop autofs4 ext4 crc16 mbcache jbd2 zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod sd_mod virtio_scsi ata_generic crc32c_intel ata_piix floppy virtio_pci virtio_ring virtio libata scsi_mod e1000 [last unloaded: btrfs]
       [95794.695015] CPU: 0 PID: 31496 Comm: umount Tainted: G        W       4.14.0-rc6-btrfs-next-54+ #1
       [95794.696396] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014
       [95794.697956] task: ffff880075aa0240 task.stack: ffffc90001734000
       [95794.698925] RIP: 0010:btrfs_destroy_inode+0x7d/0x206 [btrfs]
       [95794.699763] RSP: 0018:ffffc90001737d00 EFLAGS: 00010206
       [95794.700434] RAX: 0000000000000000 RBX: ffff880070fa1418 RCX: ffffc90001737c7c
       [95794.701445] RDX: 0000000175aa0240 RSI: 0000000000000001 RDI: ffff880070fa1418
       [95794.702448] RBP: ffffc90001737d38 R08: 0000000000000000 R09: 0000000000000000
       [95794.703557] R10: ffffc90001737c48 R11: ffff88007123e158 R12: ffff880075b6a000
       [95794.704441] R13: ffff88006145c000 R14: ffff880070fa1418 R15: ffff880070c3b4a0
       [95794.705270] FS:  00007fa6793c92c0(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000
       [95794.706341] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
       [95794.707001] CR2: 000056338670d048 CR3: 00000000610dc005 CR4: 00000000001606f0
       [95794.708030] Call Trace:
       [95794.708466]  destroy_inode+0x3d/0x55
       [95794.709071]  evict+0x177/0x17e
       [95794.709497]  dispose_list+0x50/0x71
       [95794.709973]  evict_inodes+0x132/0x141
       [95794.710564]  generic_shutdown_super+0x3f/0x10b
       [95794.711200]  kill_anon_super+0x12/0x1c
       [95794.711633]  btrfs_kill_super+0x16/0x21 [btrfs]
       [95794.712139]  deactivate_locked_super+0x30/0x68
       [95794.712608]  deactivate_super+0x36/0x39
       [95794.713093]  cleanup_mnt+0x49/0x67
       [95794.713514]  __cleanup_mnt+0x12/0x14
       [95794.713933]  task_work_run+0x82/0xa6
       [95794.714543]  prepare_exit_to_usermode+0xe1/0x10c
       [95794.715247]  syscall_return_slowpath+0x18c/0x1af
       [95794.715952]  entry_SYSCALL_64_fastpath+0xab/0xad
       [95794.716653] RIP: 0033:0x7fa678cb99a7
       [95794.721100] RSP: 002b:00007ffccf0aaed8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
       [95794.722052] RAX: 0000000000000000 RBX: 0000563386706030 RCX: 00007fa678cb99a7
       [95794.722856] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000056338670ca90
       [95794.723698] RBP: 000056338670ca90 R08: 000056338670c740 R09: 0000000000000015
       [95794.724736] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007fa6791bae64
       [95794.725928] R13: 0000000000000000 R14: 0000563386706210 R15: 00007ffccf0ab160
       [95794.726728] Code: 40 ff ff ff 00 74 02 0f ff 48 83 bb f8 fe ff ff 00 74 02 0f ff 48 83 bb 00 ff ff ff 00 74 02 0f ff 48 83 bb 30 ff ff ff 00 74 02 <0f> ff 48 83 bb 08 ff ff ff 00 74 02 0f ff 4d 85 e4 0f 84 52 01
       [95794.729203] ---[ end trace e95877675c6ec009 ]---
       [95794.841054] ------------[ cut here ]------------
       [95794.841829] WARNING: CPU: 0 PID: 31496 at fs/btrfs/extent-tree.c:5831 btrfs_free_block_groups+0x235/0x36a [btrfs]
       [95794.843425] Modules linked in: btrfs xfs ppdev ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper parport_pc psmouse sg i2c_piix4 parport i2c_core evdev pcspkr button serio_raw sunrpc loop autofs4 ext4 crc16 mbcache jbd2 zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod sd_mod virtio_scsi ata_generic crc32c_intel ata_piix floppy virtio_pci virtio_ring virtio libata scsi_mod e1000 [last unloaded: btrfs]
       [95794.850658] CPU: 0 PID: 31496 Comm: umount Tainted: G        W       4.14.0-rc6-btrfs-next-54+ #1
       [95794.852590] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014
       [95794.854752] task: ffff880075aa0240 task.stack: ffffc90001734000
       [95794.855812] RIP: 0010:btrfs_free_block_groups+0x235/0x36a [btrfs]
       [95794.856811] RSP: 0018:ffffc90001737d70 EFLAGS: 00010206
       [95794.857805] RAX: 0000000080000000 RBX: ffff88006145c000 RCX: 0000000000000001
       [95794.859014] RDX: 00000001810af668 RSI: 0000000000000002 RDI: 00000000ffffffff
       [95794.860270] RBP: ffffc90001737d98 R08: 0000000000000000 R09: ffffffff817e22b9
       [95794.861525] R10: ffffc90001737c80 R11: 00000000000337fd R12: 0000000000000000
       [95794.862700] R13: ffff88006145c0c0 R14: ffff88021b61a800 R15: ffff88006145c100
       [95794.863810] FS:  00007fa6793c92c0(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000
       [95794.865149] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
       [95794.866099] CR2: 000056338670d048 CR3: 00000000610dc005 CR4: 00000000001606f0
       [95794.867198] Call Trace:
       [95794.867626]  close_ctree+0x1db/0x2b8 [btrfs]
       [95794.868188]  ? evict_inodes+0x132/0x141
       [95794.869037]  btrfs_put_super+0x15/0x17 [btrfs]
       [95794.870400]  generic_shutdown_super+0x6a/0x10b
       [95794.871262]  kill_anon_super+0x12/0x1c
       [95794.872046]  btrfs_kill_super+0x16/0x21 [btrfs]
       [95794.872746]  deactivate_locked_super+0x30/0x68
       [95794.873687]  deactivate_super+0x36/0x39
       [95794.874639]  cleanup_mnt+0x49/0x67
       [95794.875504]  __cleanup_mnt+0x12/0x14
       [95794.876126]  task_work_run+0x82/0xa6
       [95794.876788]  prepare_exit_to_usermode+0xe1/0x10c
       [95794.877777]  syscall_return_slowpath+0x18c/0x1af
       [95794.878381]  entry_SYSCALL_64_fastpath+0xab/0xad
       [95794.878888] RIP: 0033:0x7fa678cb99a7
       [95794.879307] RSP: 002b:00007ffccf0aaed8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
       [95794.880204] RAX: 0000000000000000 RBX: 0000563386706030 RCX: 00007fa678cb99a7
       [95794.881640] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000056338670ca90
       [95794.882690] RBP: 000056338670ca90 R08: 000056338670c740 R09: 0000000000000015
       [95794.883538] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007fa6791bae64
       [95794.884562] R13: 0000000000000000 R14: 0000563386706210 R15: 00007ffccf0ab160
       [95794.885664] Code: 89 ef e8 07 ec 32 e1 e8 9d c0 ea e0 48 8d b3 28 02 00 00 48 83 c9 ff 31 d2 48 89 df e8 29 c5 ff ff 48 83 bb 80 02 00 00 00 74 02 <0f> ff 48 83 bb 88 02 00 00 00 74 02 0f ff 48 83 bb d8 02 00 00
       [95794.887980] ---[ end trace e95877675c6ec00a ]---
       [95794.888739] ------------[ cut here ]------------
       [95794.889405] WARNING: CPU: 0 PID: 31496 at fs/btrfs/extent-tree.c:5832 btrfs_free_block_groups+0x241/0x36a [btrfs]
       [95794.891020] Modules linked in: btrfs xfs ppdev ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper parport_pc psmouse sg i2c_piix4 parport i2c_core evdev pcspkr button serio_raw sunrpc loop autofs4 ext4 crc16 mbcache jbd2 zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod sd_mod virtio_scsi ata_generic crc32c_intel ata_piix floppy virtio_pci virtio_ring virtio libata scsi_mod e1000 [last unloaded: btrfs]
       [95794.897551] CPU: 0 PID: 31496 Comm: umount Tainted: G        W       4.14.0-rc6-btrfs-next-54+ #1
       [95794.898509] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014
       [95794.899685] task: ffff880075aa0240 task.stack: ffffc90001734000
       [95794.900592] RIP: 0010:btrfs_free_block_groups+0x241/0x36a [btrfs]
       [95794.901387] RSP: 0018:ffffc90001737d70 EFLAGS: 00010206
       [95794.902300] RAX: 0000000080000000 RBX: ffff88006145c000 RCX: 0000000000000001
       [95794.903260] RDX: 00000001810af668 RSI: 0000000000000002 RDI: 00000000ffffffff
       [95794.904332] RBP: ffffc90001737d98 R08: 0000000000000000 R09: ffffffff817e22b9
       [95794.905300] R10: ffffc90001737c80 R11: 00000000000337fd R12: 0000000000000000
       [95794.906439] R13: ffff88006145c0c0 R14: ffff88021b61a800 R15: ffff88006145c100
       [95794.907459] FS:  00007fa6793c92c0(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000
       [95794.908625] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
       [95794.909511] CR2: 000056338670d048 CR3: 00000000610dc005 CR4: 00000000001606f0
       [95794.910630] Call Trace:
       [95794.911153]  close_ctree+0x1db/0x2b8 [btrfs]
       [95794.911837]  ? evict_inodes+0x132/0x141
       [95794.912344]  btrfs_put_super+0x15/0x17 [btrfs]
       [95794.912975]  generic_shutdown_super+0x6a/0x10b
       [95794.913788]  kill_anon_super+0x12/0x1c
       [95794.914424]  btrfs_kill_super+0x16/0x21 [btrfs]
       [95794.915142]  deactivate_locked_super+0x30/0x68
       [95794.915831]  deactivate_super+0x36/0x39
       [95794.916433]  cleanup_mnt+0x49/0x67
       [95794.917045]  __cleanup_mnt+0x12/0x14
       [95794.917665]  task_work_run+0x82/0xa6
       [95794.918309]  prepare_exit_to_usermode+0xe1/0x10c
       [95794.919021]  syscall_return_slowpath+0x18c/0x1af
       [95794.919722]  entry_SYSCALL_64_fastpath+0xab/0xad
       [95794.920426] RIP: 0033:0x7fa678cb99a7
       [95794.921039] RSP: 002b:00007ffccf0aaed8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
       [95794.922303] RAX: 0000000000000000 RBX: 0000563386706030 RCX: 00007fa678cb99a7
       [95794.923335] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000056338670ca90
       [95794.924364] RBP: 000056338670ca90 R08: 000056338670c740 R09: 0000000000000015
       [95794.925435] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007fa6791bae64
       [95794.926533] R13: 0000000000000000 R14: 0000563386706210 R15: 00007ffccf0ab160
       [95794.927557] Code: 48 8d b3 28 02 00 00 48 83 c9 ff 31 d2 48 89 df e8 29 c5 ff ff 48 83 bb 80 02 00 00 00 74 02 0f ff 48 83 bb 88 02 00 00 00 74 02 <0f> ff 48 83 bb d8 02 00 00 00 74 02 0f ff 48 83 bb e0 02 00 00
       [95794.930166] ---[ end trace e95877675c6ec00b ]---
       [95794.930961] ------------[ cut here ]------------
       [95794.931727] WARNING: CPU: 0 PID: 31496 at fs/btrfs/extent-tree.c:9953 btrfs_free_block_groups+0x2bc/0x36a [btrfs]
       [95794.932729] Modules linked in: btrfs xfs ppdev ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper parport_pc psmouse sg i2c_piix4 parport i2c_core evdev pcspkr button serio_raw sunrpc loop autofs4 ext4 crc16 mbcache jbd2 zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod sd_mod virtio_scsi ata_generic crc32c_intel ata_piix floppy virtio_pci virtio_ring virtio libata scsi_mod e1000 [last unloaded: btrfs]
       [95794.938394] CPU: 0 PID: 31496 Comm: umount Tainted: G        W       4.14.0-rc6-btrfs-next-54+ #1
       [95794.939842] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014
       [95794.941455] task: ffff880075aa0240 task.stack: ffffc90001734000
       [95794.942336] RIP: 0010:btrfs_free_block_groups+0x2bc/0x36a [btrfs]
       [95794.943268] RSP: 0018:ffffc90001737d70 EFLAGS: 00010206
       [95794.944127] RAX: ffff8802004fd0e8 RBX: ffff88006145c000 RCX: 0000000000000001
       [95794.945211] RDX: 00000001810af668 RSI: 0000000000000002 RDI: 00000000ffffffff
       [95794.946316] RBP: ffffc90001737d98 R08: 0000000000000000 R09: ffffffff817e22b9
       [95794.947271] R10: ffffc90001737c80 R11: 00000000000337fd R12: ffff8802004fd0e8
       [95794.948219] R13: ffff88006145c0c0 R14: ffff88006145e598 R15: ffff88006145c100
       [95794.949193] FS:  00007fa6793c92c0(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000
       [95794.950495] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
       [95794.951338] CR2: 000056338670d048 CR3: 00000000610dc005 CR4: 00000000001606f0
       [95794.952361] Call Trace:
       [95794.952811]  close_ctree+0x1db/0x2b8 [btrfs]
       [95794.953522]  ? evict_inodes+0x132/0x141
       [95794.954543]  btrfs_put_super+0x15/0x17 [btrfs]
       [95794.955231]  generic_shutdown_super+0x6a/0x10b
       [95794.955916]  kill_anon_super+0x12/0x1c
       [95794.956414]  btrfs_kill_super+0x16/0x21 [btrfs]
       [95794.956953]  deactivate_locked_super+0x30/0x68
       [95794.957635]  deactivate_super+0x36/0x39
       [95794.958256]  cleanup_mnt+0x49/0x67
       [95794.958701]  __cleanup_mnt+0x12/0x14
       [95794.959181]  task_work_run+0x82/0xa6
       [95794.959635]  prepare_exit_to_usermode+0xe1/0x10c
       [95794.960182]  syscall_return_slowpath+0x18c/0x1af
       [95794.960731]  entry_SYSCALL_64_fastpath+0xab/0xad
       [95794.961438] RIP: 0033:0x7fa678cb99a7
       [95794.961990] RSP: 002b:00007ffccf0aaed8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
       [95794.963111] RAX: 0000000000000000 RBX: 0000563386706030 RCX: 00007fa678cb99a7
       [95794.963975] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000056338670ca90
       [95794.964680] RBP: 000056338670ca90 R08: 000056338670c740 R09: 0000000000000015
       [95794.965763] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007fa6791bae64
       [95794.966868] R13: 0000000000000000 R14: 0000563386706210 R15: 00007ffccf0ab160
       [95794.967800] Code: 00 00 00 4c 8b a3 98 25 00 00 49 83 bc 24 60 ff ff ff 00 75 16 49 83 bc 24 68 ff ff ff 00 75 0b 49 83 bc 24 70 ff ff ff 00 74 16 <0f> ff 49 8d b4 24 18 ff ff ff 31 c9 31 d2 48 89 df e8 93 7a ff
       [95794.970629] ---[ end trace e95877675c6ec00c ]---
       [95794.971451] BTRFS info (device sdi): space_info 1 has 7680000 free, is not full
       [95794.972351] BTRFS info (device sdi): space_info total=8388608, used=704512, pinned=0, reserved=0, may_use=4096, readonly=0
       [95794.973595] ------------[ cut here ]------------
       [95794.974353] WARNING: CPU: 0 PID: 31496 at fs/btrfs/extent-tree.c:9953 btrfs_free_block_groups+0x2bc/0x36a [btrfs]
       [95794.980163] Modules linked in: btrfs xfs ppdev ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper parport_pc psmouse sg i2c_piix4 parport i2c_core evdev pcspkr button serio_raw sunrpc loop autofs4 ext4 crc16 mbcache jbd2 zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod sd_mod virtio_scsi ata_generic crc32c_intel ata_piix floppy virtio_pci virtio_ring virtio libata scsi_mod e1000 [last unloaded: btrfs]
       [95794.986461] CPU: 0 PID: 31496 Comm: umount Tainted: G        W       4.14.0-rc6-btrfs-next-54+ #1
       [95794.987591] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014
       [95794.988929] task: ffff880075aa0240 task.stack: ffffc90001734000
       [95794.989922] RIP: 0010:btrfs_free_block_groups+0x2bc/0x36a [btrfs]
       [95794.990715] RSP: 0018:ffffc90001737d70 EFLAGS: 00010206
       [95794.991431] RAX: ffff88020f6e70e8 RBX: ffff88006145c000 RCX: ffffffff8115a906
       [95794.992455] RDX: ffffffff8115a902 RSI: ffff880075aa0b40 RDI: ffff880075aa0b40
       [95794.993535] RBP: ffffc90001737d98 R08: 0000000000000020 R09: fffffffffffffff7
       [95794.994573] R10: 00000000ffffffc4 R11: ffff8800633b1bc0 R12: ffff88020f6e70e8
       [95794.996250] R13: 0000000000000038 R14: ffff88006145e598 R15: 0000000000000000
       [95794.997233] FS:  00007fa6793c92c0(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000
       [95794.998592] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
       [95794.999484] CR2: 000056338670d048 CR3: 00000000610dc005 CR4: 00000000001606f0
       [95795.000542] Call Trace:
       [95795.001138]  close_ctree+0x1db/0x2b8 [btrfs]
       [95795.001885]  ? evict_inodes+0x132/0x141
       [95795.002407]  btrfs_put_super+0x15/0x17 [btrfs]
       [95795.003093]  generic_shutdown_super+0x6a/0x10b
       [95795.003720]  kill_anon_super+0x12/0x1c
       [95795.004353]  btrfs_kill_super+0x16/0x21 [btrfs]
       [95795.005095]  deactivate_locked_super+0x30/0x68
       [95795.005716]  deactivate_super+0x36/0x39
       [95795.006388]  cleanup_mnt+0x49/0x67
       [95795.006939]  __cleanup_mnt+0x12/0x14
       [95795.007512]  task_work_run+0x82/0xa6
       [95795.008124]  prepare_exit_to_usermode+0xe1/0x10c
       [95795.008994]  syscall_return_slowpath+0x18c/0x1af
       [95795.009831]  entry_SYSCALL_64_fastpath+0xab/0xad
       [95795.010610] RIP: 0033:0x7fa678cb99a7
       [95795.011193] RSP: 002b:00007ffccf0aaed8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
       [95795.012327] RAX: 0000000000000000 RBX: 0000563386706030 RCX: 00007fa678cb99a7
       [95795.013432] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000056338670ca90
       [95795.014558] RBP: 000056338670ca90 R08: 000056338670c740 R09: 0000000000000015
       [95795.015577] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007fa6791bae64
       [95795.016569] R13: 0000000000000000 R14: 0000563386706210 R15: 00007ffccf0ab160
       [95795.017662] Code: 00 00 00 4c 8b a3 98 25 00 00 49 83 bc 24 60 ff ff ff 00 75 16 49 83 bc 24 68 ff ff ff 00 75 0b 49 83 bc 24 70 ff ff ff 00 74 16 <0f> ff 49 8d b4 24 18 ff ff ff 31 c9 31 d2 48 89 df e8 93 7a ff
       [95795.020538] ---[ end trace e95877675c6ec00d ]---
       [95795.021259] BTRFS info (device sdi): space_info 4 has 1072775168 free, is not full
       [95795.022390] BTRFS info (device sdi): space_info total=1073741824, used=114688, pinned=0, reserved=0, may_use=786432, readonly=65536
      
      Fix this by ensuring the zero range operation does not call
      btrfs_truncate_block() if the corresponding extent is an unwritten one
      (it's pointless anyway, since reading from an unwritten extent yields
      zeroes).
      Signed-off-by: NFilipe Manana <fdmanana@suse.com>
      Tested-by: NNikolay Borisov <nborisov@suse.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      81fdf638
    • F
      Btrfs: fix missing inode i_size update after zero range operation · 9f13ce74
      Filipe Manana 提交于
      For a fallocate's zero range operation that targets a range with an end
      that is not aligned to the sector size, we can end up not updating the
      inode's i_size. This happens when the last page of the range maps to an
      unwritten (prealloc) extent and before that last page we have either a
      hole or a written extent. This is because in this scenario we relied
      on a call to btrfs_prealloc_file_range() to update the inode's i_size,
      however it can only update the i_size to the "down aligned" end of the
      range.
      
      Example:
      
       $ mkfs.btrfs -f /dev/sdc
       $ mount /dev/sdc /mnt
       $ xfs_io -f -c "pwrite -S 0xff 0 428K" /mnt/foobar
       $ xfs_io -c "falloc -k 428K 4K" /mnt/foobar
       $ xfs_io -c "fzero 0 430K" /mnt/foobar
       $ du --bytes /mnt/foobar
       438272	/mnt/foobar
      
      The inode's i_size was left as 428Kb (438272 bytes) when it should have
      been updated to 430Kb (440320 bytes).
      Fix this by always updating the inode's i_size explicitly after zeroing
      the range.
      
      Fixes: ba6d5887946ff86d93dc ("Btrfs: add support for fallocate's zero range operation")
      Signed-off-by: NFilipe Manana <fdmanana@suse.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      9f13ce74
    • F
      Btrfs: use cached state when dirtying pages during buffered write · 94f45071
      Filipe Manana 提交于
      During a buffered IO write, we can have an extent state that we got when
      we locked the range (if the range starts at an offset lower than eof), so
      always pass it to btrfs_dirty_pages() so that setting the delalloc bit
      in the range does not need to do a full search in the inode's io tree,
      saving time and reducing the amount of time we hold the io tree's lock.
      Signed-off-by: NFilipe Manana <fdmanana@suse.com>
      Reviewed-by: NDavid Sterba <dsterba@suse.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      94f45071
    • F
      Btrfs: add support for fallocate's zero range operation · f27451f2
      Filipe Manana 提交于
      This implements support the zero range operation of fallocate. For now
      at least it's as simple as possible while reusing most of the existing
      fallocate and hole punching infrastructure.
      Signed-off-by: NFilipe Manana <fdmanana@suse.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      f27451f2
    • D
      btrfs: sink unlock_extent parameter gfp_flags · e43bbe5e
      David Sterba 提交于
      All callers pass either GFP_NOFS or GFP_KERNEL now, so we can sink the
      parameter to the function, though we lose some of the slightly better
      semantics of GFP_KERNEL in some places, it's worth cleaning up the
      callchains.
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      e43bbe5e
    • L
      Btrfs: set plug for fsync · 343e4fc1
      Liu Bo 提交于
      Setting plug can merge adjacent IOs before dispatching IOs to the disk
      driver.
      
      Without plug, it'd not be a problem for single disk usecases, but for
      multiple disks using raid profile, a large IO can be split to several
      IOs of stripe length, and plug can be helpful to bring them together
      for each disk so that we can save several disk access.
      
      Moreover, fsync issues synchronous writes, so plug can really take
      effect.
      Signed-off-by: NLiu Bo <bo.li.liu@oracle.com>
      Reviewed-by: NDavid Sterba <dsterba@suse.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      343e4fc1
    • D
      btrfs: sink gfp parameter to clear_extent_bit · ae0f1625
      David Sterba 提交于
      All callers use GFP_NOFS, we don't have to pass it as an argument. The
      built-in tests pass GFP_KERNEL, but they run only at module load time
      and NOFS works there as well.
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      ae0f1625
    • L
      Btrfs: add __init macro to btrfs init functions · f5c29bd9
      Liu Bo 提交于
      Adding __init macro gives kernel a hint that this function is only used
      during the initialization phase and its memory resources can be freed up
      after.
      Signed-off-by: NLiu Bo <bo.li.liu@oracle.com>
      Reviewed-by: NDavid Sterba <dsterba@suse.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      f5c29bd9
    • N
      btrfs: Use locked_end rather than open coding it · 96b09dde
      Nikolay Borisov 提交于
      Right before we go into this loop locked_end is set to alloc_end - 1 and
      is being used in nearby functions, no need to have exceptions. This just
      makes the code consistent, no functional changes.
      Signed-off-by: NNikolay Borisov <nborisov@suse.com>
      Reviewed-by: NDavid Sterba <dsterba@suse.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      96b09dde
    • N
      btrfs: Move loop termination condition in while() · 6b7d6e93
      Nikolay Borisov 提交于
      Fallocating a file in btrfs goes through several stages. The one before
      actually inserting the fallocated extents is to create a qgroup
      reservation, covering the desired range. To this end there is a loop in
      btrfs_fallocate which checks to see if there are holes in the fallocated
      range or !PREALLOC extents past EOF and if so create qgroup reservations
      for them. Unfortunately, the main condition of the loop is burried right
      at the end of its body rather than in the actual while statement which
      makes it non-obvious. Fix this by moving the condition in the while
      statement where it belongs. No functional changes.
      Signed-off-by: NNikolay Borisov <nborisov@suse.com>
      Reviewed-by: NDavid Sterba <dsterba@suse.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      6b7d6e93
  9. 28 11月, 2017 1 次提交
    • L
      Btrfs: fix list_add corruption and soft lockups in fsync · ebb70442
      Liu Bo 提交于
      Xfstests btrfs/146 revealed this corruption,
      
      [   58.138831] Buffer I/O error on dev dm-0, logical block 2621424, async page read
      [   58.151233] BTRFS error (device sdf): bdev /dev/mapper/error-test errs: wr 1, rd 0, flush 0, corrupt 0, gen 0
      [   58.152403] list_add corruption. prev->next should be next (ffff88005e6775d8), but was ffffc9000189be88. (prev=ffffc9000189be88).
      [   58.153518] ------------[ cut here ]------------
      [   58.153892] WARNING: CPU: 1 PID: 1287 at lib/list_debug.c:31 __list_add_valid+0x169/0x1f0
      ...
      [   58.157379] RIP: 0010:__list_add_valid+0x169/0x1f0
      ...
      [   58.161956] Call Trace:
      [   58.162264]  btrfs_log_inode_parent+0x5bd/0xfb0 [btrfs]
      [   58.163583]  btrfs_log_dentry_safe+0x60/0x80 [btrfs]
      [   58.164003]  btrfs_sync_file+0x4c2/0x6f0 [btrfs]
      [   58.164393]  vfs_fsync_range+0x5f/0xd0
      [   58.164898]  do_fsync+0x5a/0x90
      [   58.165170]  SyS_fsync+0x10/0x20
      [   58.165395]  entry_SYSCALL_64_fastpath+0x1f/0xbe
      ...
      
      It turns out that we could record btrfs_log_ctx:io_err in
      log_one_extents when IO fails, but make log_one_extents() return '0'
      instead of -EIO, so the IO error is not acknowledged by the callers,
      i.e.  btrfs_log_inode_parent(), which would remove btrfs_log_ctx:list
      from list head 'root->log_ctxs'.  Since btrfs_log_ctx is allocated
      from stack memory, it'd get freed with a object alive on the
      list. then a future list_add will throw the above warning.
      
      This returns the correct error in the above case.
      
      Jeff also reported this while testing against his fsync error
      patch set[1].
      
      [1]: https://www.spinics.net/lists/linux-btrfs/msg65308.html
      "btrfs list corruption and soft lockups while testing writeback error handling"
      
      Fixes: 8407f553 ("Btrfs: fix data corruption after fast fsync and writeback error")
      Signed-off-by: NLiu Bo <bo.li.liu@oracle.com>
      Reviewed-by: NDavid Sterba <dsterba@suse.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      ebb70442
  10. 16 11月, 2017 2 次提交
    • F
      Btrfs: fix reported number of inode blocks after buffered append writes · e3b8a485
      Filipe Manana 提交于
      The patch from commit a7e3b975 ("Btrfs: fix reported number of inode
      blocks") introduced a regression where if we do a buffered write starting
      at position equal to or greater than the file's size and then stat(2) the
      file before writeback is triggered, the number of used blocks does not
      change (unless there's a prealloc/unwritten extent). Example:
      
        $ xfs_io -f -c "pwrite -S 0xab 0 64K" foobar
        $ du -h foobar
        0	foobar
        $ sync
        $ du -h foobar
        64K	foobar
      
      The first version of that patch didn't had this regression and the second
      version, which was the one committed, was made only to address some
      performance regression detected by the intel test robots using fs_mark.
      
      This fixes the regression by setting the new delaloc bit in the range, and
      doing it at btrfs_dirty_pages() while setting the regular dealloc bit as
      well, so that this way we set both bits at once avoiding navigation of the
      inode's io tree twice. Doing it at btrfs_dirty_pages() is also the most
      meaninful place, as we should set the new dellaloc bit when if we set the
      delalloc bit, which happens only if we copied bytes into the pages at
      __btrfs_buffered_write().
      
      This was making some of LTP's du tests fail, which can be quickly run
      using a command line like the following:
      
        $ ./runltp -q -p -l /ltp.log -f commands -s du -d /mnt
      
      Fixes: a7e3b975 ("Btrfs: fix reported number of inode blocks")
      Signed-off-by: NFilipe Manana <fdmanana@suse.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      e3b8a485
    • F
      Btrfs: move definition of the function btrfs_find_new_delalloc_bytes · f48bf66b
      Filipe Manana 提交于
      Move the definition of the function btrfs_find_new_delalloc_bytes() closer
      to the function btrfs_dirty_pages(), because in a future commit it will be
      used exclusively by btrfs_dirty_pages(). This just moves the function's
      definition, with no functional changes at all.
      Signed-off-by: NFilipe Manana <fdmanana@suse.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      f48bf66b
  11. 02 11月, 2017 1 次提交
    • J
      Btrfs: rework outstanding_extents · 8b62f87b
      Josef Bacik 提交于
      Right now we do a lot of weird hoops around outstanding_extents in order
      to keep the extent count consistent.  This is because we logically
      transfer the outstanding_extent count from the initial reservation
      through the set_delalloc_bits.  This makes it pretty difficult to get a
      handle on how and when we need to mess with outstanding_extents.
      
      Fix this by revamping the rules of how we deal with outstanding_extents.
      Now instead everybody that is holding on to a delalloc extent is
      required to increase the outstanding extents count for itself.  This
      means we'll have something like this
      
      btrfs_delalloc_reserve_metadata	- outstanding_extents = 1
       btrfs_set_extent_delalloc	- outstanding_extents = 2
      btrfs_release_delalloc_extents	- outstanding_extents = 1
      
      for an initial file write.  Now take the append write where we extend an
      existing delalloc range but still under the maximum extent size
      
      btrfs_delalloc_reserve_metadata - outstanding_extents = 2
        btrfs_set_extent_delalloc
          btrfs_set_bit_hook		- outstanding_extents = 3
          btrfs_merge_extent_hook	- outstanding_extents = 2
      btrfs_delalloc_release_extents	- outstanding_extnets = 1
      
      In order to make the ordered extent transition we of course must now
      make ordered extents carry their own outstanding_extent reservation, so
      for cow_file_range we end up with
      
      btrfs_add_ordered_extent	- outstanding_extents = 2
      clear_extent_bit		- outstanding_extents = 1
      btrfs_remove_ordered_extent	- outstanding_extents = 0
      
      This makes all manipulations of outstanding_extents much more explicit.
      Every successful call to btrfs_delalloc_reserve_metadata _must_ now be
      combined with btrfs_release_delalloc_extents, even in the error case, as
      that is the only function that actually modifies the
      outstanding_extents counter.
      
      The drawback to this is now we are much more likely to have transient
      cases where outstanding_extents is much larger than it actually should
      be.  This could happen before as we manipulated the delalloc bits, but
      now it happens basically at every write.  This may put more pressure on
      the ENOSPC flushing code, but I think making this code simpler is worth
      the cost.  I have another change coming to mitigate this side-effect
      somewhat.
      
      I also added trace points for the counter manipulation.  These were used
      by a bpf script I wrote to help track down leak issues.
      Signed-off-by: NJosef Bacik <jbacik@fb.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      8b62f87b
  12. 30 10月, 2017 3 次提交
  13. 05 9月, 2017 1 次提交
  14. 16 8月, 2017 2 次提交
  15. 10 7月, 2017 1 次提交
    • G
      btrfs: nowait aio: Correct assignment of pos · ff0fa732
      Goldwyn Rodrigues 提交于
      Assigning pos for usage early messes up in append mode, where the pos is
      re-assigned in generic_write_checks(). Assign pos later to get the
      correct position to write from iocb->ki_pos.
      
      Since check_can_nocow also uses the value of pos, we shift
      generic_write_checks() before check_can_nocow(). Checks with IOCB_DIRECT
      are present in generic_write_checks(), so checking for IOCB_NOWAIT is
      enough.
      
      Also, put locking sequence in the fast path.
      
      This fixes a user visible bug, as reported:
      
      "apparently breaks several shell related features on my system.
      In zsh history stopped working, because no new entries are added
      anymore.
      I fist noticed the issue when I tried to build mplayer. It uses a shell
      script to generate a help_mp.h file:
      [...]
      
      Here is a simple testcase:
      
       % echo "foo" >> test
       % echo "foo" >> test
       % cat test
       foo
       %
      "
      
      Fixes: edf064e7 ("btrfs: nowait aio support")
      CC: Jens Axboe <axboe@kernel.dk>
      Reported-by: NMarkus Trippelsdorf <markus@trippelsdorf.de>
      Link: https://lkml.kernel.org/r/20170704042306.GA274@x4Signed-off-by: NGoldwyn Rodrigues <rgoldwyn@suse.com>
      Reviewed-by: NDavid Sterba <dsterba@suse.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      ff0fa732
  16. 06 7月, 2017 1 次提交
  17. 30 6月, 2017 2 次提交
    • Q
      btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges · bc42bda2
      Qu Wenruo 提交于
      [BUG]
      For the following case, btrfs can underflow qgroup reserved space
      at an error path:
      (Page size 4K, function name without "btrfs_" prefix)
      
               Task A                  |             Task B
      ----------------------------------------------------------------------
      Buffered_write [0, 2K)           |
      |- check_data_free_space()       |
      |  |- qgroup_reserve_data()      |
      |     Range aligned to page      |
      |     range [0, 4K)          <<< |
      |     4K bytes reserved      <<< |
      |- copy pages to page cache      |
                                       | Buffered_write [2K, 4K)
                                       | |- check_data_free_space()
                                       | |  |- qgroup_reserved_data()
                                       | |     Range alinged to page
                                       | |     range [0, 4K)
                                       | |     Already reserved by A <<<
                                       | |     0 bytes reserved      <<<
                                       | |- delalloc_reserve_metadata()
                                       | |  And it *FAILED* (Maybe EQUOTA)
                                       | |- free_reserved_data_space()
                                            |- qgroup_free_data()
                                               Range aligned to page range
                                               [0, 4K)
                                               Freeing 4K
      (Special thanks to Chandan for the detailed report and analyse)
      
      [CAUSE]
      Above Task B is freeing reserved data range [0, 4K) which is actually
      reserved by Task A.
      
      And at writeback time, page dirty by Task A will go through writeback
      routine, which will free 4K reserved data space at file extent insert
      time, causing the qgroup underflow.
      
      [FIX]
      For btrfs_qgroup_free_data(), add @reserved parameter to only free
      data ranges reserved by previous btrfs_qgroup_reserve_data().
      So in above case, Task B will try to free 0 byte, so no underflow.
      Reported-by: NChandan Rajendra <chandan@linux.vnet.ibm.com>
      Signed-off-by: NQu Wenruo <quwenruo@cn.fujitsu.com>
      Reviewed-by: NChandan Rajendra <chandan@linux.vnet.ibm.com>
      Tested-by: NChandan Rajendra <chandan@linux.vnet.ibm.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      bc42bda2
    • Q
      btrfs: qgroup: Introduce extent changeset for qgroup reserve functions · 364ecf36
      Qu Wenruo 提交于
      Introduce a new parameter, struct extent_changeset for
      btrfs_qgroup_reserved_data() and its callers.
      
      Such extent_changeset was used in btrfs_qgroup_reserve_data() to record
      which range it reserved in current reserve, so it can free it in error
      paths.
      
      The reason we need to export it to callers is, at buffered write error
      path, without knowing what exactly which range we reserved in current
      allocation, we can free space which is not reserved by us.
      
      This will lead to qgroup reserved space underflow.
      Reviewed-by: NChandan Rajendra <chandan@linux.vnet.ibm.com>
      Signed-off-by: NQu Wenruo <quwenruo@cn.fujitsu.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      364ecf36
  18. 21 6月, 2017 1 次提交
    • F
      Btrfs: fix invalid extent maps due to hole punching · 609805d8
      Filipe Manana 提交于
      While punching a hole in a range that is not aligned with the sector size
      (currently the same as the page size) we can end up leaving an extent map
      in memory with a length that is smaller then the sector size or with a
      start offset that is not aligned to the sector size. Both cases are not
      expected and can lead to problems. This issue is easily detected
      after the patch from commit a7e3b975 ("Btrfs: fix reported number of
      inode blocks"), introduced in kernel 4.12-rc1, in a scenario like the
      following for example:
      
        $ mkfs.btrfs -f /dev/sdb
        $ mount /dev/sdb /mnt
        $ xfs_io -c "pwrite -S 0xaa -b 100K 0 100K" /mnt/foo
        $ xfs_io -c "fpunch 60K 90K" /mnt/foo
        $ xfs_io -c "pwrite -S 0xbb -b 100K 50K 100K" /mnt/foo
        $ xfs_io -c "pwrite -S 0xcc -b 50K 100K 50K" /mnt/foo
        $ umount /mnt
      
      After the unmount operation we can see several warnings emmitted due to
      underflows related to space reservation counters:
      
      [ 2837.443299] ------------[ cut here ]------------
      [ 2837.447395] WARNING: CPU: 8 PID: 2474 at fs/btrfs/inode.c:9444 btrfs_destroy_inode+0xe8/0x27e [btrfs]
      [ 2837.452108] Modules linked in: dm_flakey dm_mod ppdev parport_pc psmouse parport sg pcspkr acpi_cpufreq tpm_tis tpm_tis_core i2c_piix4 i2c_core evdev tpm button se
      rio_raw sunrpc loop autofs4 ext4 crc16 jbd2 mbcache btrfs raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_gene
      ric raid1 raid0 multipath linear md_mod sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix libata virtio_pci virtio_ring virtio e1000 scsi_mod floppy
      [ 2837.458389] CPU: 8 PID: 2474 Comm: umount Tainted: G        W       4.10.0-rc8-btrfs-next-43+ #1
      [ 2837.459754] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014
      [ 2837.462379] Call Trace:
      [ 2837.462379]  dump_stack+0x68/0x92
      [ 2837.462379]  __warn+0xc2/0xdd
      [ 2837.462379]  warn_slowpath_null+0x1d/0x1f
      [ 2837.462379]  btrfs_destroy_inode+0xe8/0x27e [btrfs]
      [ 2837.462379]  destroy_inode+0x3d/0x55
      [ 2837.462379]  evict+0x177/0x17e
      [ 2837.462379]  dispose_list+0x50/0x71
      [ 2837.462379]  evict_inodes+0x132/0x141
      [ 2837.462379]  generic_shutdown_super+0x3f/0xeb
      [ 2837.462379]  kill_anon_super+0x12/0x1c
      [ 2837.462379]  btrfs_kill_super+0x16/0x21 [btrfs]
      [ 2837.462379]  deactivate_locked_super+0x30/0x68
      [ 2837.462379]  deactivate_super+0x36/0x39
      [ 2837.462379]  cleanup_mnt+0x58/0x76
      [ 2837.462379]  __cleanup_mnt+0x12/0x14
      [ 2837.462379]  task_work_run+0x77/0x9b
      [ 2837.462379]  prepare_exit_to_usermode+0x9d/0xc5
      [ 2837.462379]  syscall_return_slowpath+0x196/0x1b9
      [ 2837.462379]  entry_SYSCALL_64_fastpath+0xab/0xad
      [ 2837.462379] RIP: 0033:0x7f3ef3e6b9a7
      [ 2837.462379] RSP: 002b:00007ffdd0d8de58 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
      [ 2837.462379] RAX: 0000000000000000 RBX: 0000556f76a39060 RCX: 00007f3ef3e6b9a7
      [ 2837.462379] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000556f76a3f910
      [ 2837.462379] RBP: 0000556f76a3f910 R08: 0000556f76a3e670 R09: 0000000000000015
      [ 2837.462379] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007f3ef436ce64
      [ 2837.462379] R13: 0000000000000000 R14: 0000556f76a39240 R15: 00007ffdd0d8e0e0
      [ 2837.519355] ---[ end trace e79345fe24b30b8d ]---
      [ 2837.596256] ------------[ cut here ]------------
      [ 2837.597625] WARNING: CPU: 8 PID: 2474 at fs/btrfs/extent-tree.c:5699 btrfs_free_block_groups+0x246/0x3eb [btrfs]
      [ 2837.603547] Modules linked in: dm_flakey dm_mod ppdev parport_pc psmouse parport sg pcspkr acpi_cpufreq tpm_tis tpm_tis_core i2c_piix4 i2c_core evdev tpm button serio_raw sunrpc loop autofs4 ext4 crc16 jbd2 mbcache btrfs raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix libata virtio_pci virtio_ring virtio e1000 scsi_mod floppy
      [ 2837.659372] CPU: 8 PID: 2474 Comm: umount Tainted: G        W       4.10.0-rc8-btrfs-next-43+ #1
      [ 2837.663359] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014
      [ 2837.663359] Call Trace:
      [ 2837.663359]  dump_stack+0x68/0x92
      [ 2837.663359]  __warn+0xc2/0xdd
      [ 2837.663359]  warn_slowpath_null+0x1d/0x1f
      [ 2837.663359]  btrfs_free_block_groups+0x246/0x3eb [btrfs]
      [ 2837.663359]  close_ctree+0x1dd/0x2e1 [btrfs]
      [ 2837.663359]  ? evict_inodes+0x132/0x141
      [ 2837.663359]  btrfs_put_super+0x15/0x17 [btrfs]
      [ 2837.663359]  generic_shutdown_super+0x6a/0xeb
      [ 2837.663359]  kill_anon_super+0x12/0x1c
      [ 2837.663359]  btrfs_kill_super+0x16/0x21 [btrfs]
      [ 2837.663359]  deactivate_locked_super+0x30/0x68
      [ 2837.663359]  deactivate_super+0x36/0x39
      [ 2837.663359]  cleanup_mnt+0x58/0x76
      [ 2837.663359]  __cleanup_mnt+0x12/0x14
      [ 2837.663359]  task_work_run+0x77/0x9b
      [ 2837.663359]  prepare_exit_to_usermode+0x9d/0xc5
      [ 2837.663359]  syscall_return_slowpath+0x196/0x1b9
      [ 2837.663359]  entry_SYSCALL_64_fastpath+0xab/0xad
      [ 2837.663359] RIP: 0033:0x7f3ef3e6b9a7
      [ 2837.663359] RSP: 002b:00007ffdd0d8de58 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
      [ 2837.663359] RAX: 0000000000000000 RBX: 0000556f76a39060 RCX: 00007f3ef3e6b9a7
      [ 2837.663359] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000556f76a3f910
      [ 2837.663359] RBP: 0000556f76a3f910 R08: 0000556f76a3e670 R09: 0000000000000015
      [ 2837.663359] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007f3ef436ce64
      [ 2837.663359] R13: 0000000000000000 R14: 0000556f76a39240 R15: 00007ffdd0d8e0e0
      [ 2837.739445] ---[ end trace e79345fe24b30b8e ]---
      [ 2837.745595] ------------[ cut here ]------------
      [ 2837.746412] WARNING: CPU: 8 PID: 2474 at fs/btrfs/extent-tree.c:5700 btrfs_free_block_groups+0x261/0x3eb [btrfs]
      [ 2837.747955] Modules linked in: dm_flakey dm_mod ppdev parport_pc psmouse parport sg pcspkr acpi_cpufreq tpm_tis tpm_tis_core i2c_piix4 i2c_core evdev tpm button serio_raw sunrpc loop autofs4 ext4 crc16 jbd2 mbcache btrfs raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix libata virtio_pci virtio_ring virtio e1000 scsi_mod floppy
      [ 2837.755395] CPU: 8 PID: 2474 Comm: umount Tainted: G        W       4.10.0-rc8-btrfs-next-43+ #1
      [ 2837.756769] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014
      [ 2837.758526] Call Trace:
      [ 2837.758925]  dump_stack+0x68/0x92
      [ 2837.759383]  __warn+0xc2/0xdd
      [ 2837.759383]  warn_slowpath_null+0x1d/0x1f
      [ 2837.759383]  btrfs_free_block_groups+0x261/0x3eb [btrfs]
      [ 2837.759383]  close_ctree+0x1dd/0x2e1 [btrfs]
      [ 2837.759383]  ? evict_inodes+0x132/0x141
      [ 2837.759383]  btrfs_put_super+0x15/0x17 [btrfs]
      [ 2837.759383]  generic_shutdown_super+0x6a/0xeb
      [ 2837.759383]  kill_anon_super+0x12/0x1c
      [ 2837.759383]  btrfs_kill_super+0x16/0x21 [btrfs]
      [ 2837.759383]  deactivate_locked_super+0x30/0x68
      [ 2837.759383]  deactivate_super+0x36/0x39
      [ 2837.759383]  cleanup_mnt+0x58/0x76
      [ 2837.759383]  __cleanup_mnt+0x12/0x14
      [ 2837.759383]  task_work_run+0x77/0x9b
      [ 2837.759383]  prepare_exit_to_usermode+0x9d/0xc5
      [ 2837.759383]  syscall_return_slowpath+0x196/0x1b9
      [ 2837.759383]  entry_SYSCALL_64_fastpath+0xab/0xad
      [ 2837.759383] RIP: 0033:0x7f3ef3e6b9a7
      [ 2837.759383] RSP: 002b:00007ffdd0d8de58 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
      [ 2837.759383] RAX: 0000000000000000 RBX: 0000556f76a39060 RCX: 00007f3ef3e6b9a7
      [ 2837.759383] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000556f76a3f910
      [ 2837.759383] RBP: 0000556f76a3f910 R08: 0000556f76a3e670 R09: 0000000000000015
      [ 2837.759383] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007f3ef436ce64
      [ 2837.759383] R13: 0000000000000000 R14: 0000556f76a39240 R15: 00007ffdd0d8e0e0
      [ 2837.777063] ---[ end trace e79345fe24b30b8f ]---
      [ 2837.778235] ------------[ cut here ]------------
      [ 2837.778856] WARNING: CPU: 8 PID: 2474 at fs/btrfs/extent-tree.c:9825 btrfs_free_block_groups+0x348/0x3eb [btrfs]
      [ 2837.791385] Modules linked in: dm_flakey dm_mod ppdev parport_pc psmouse parport sg pcspkr acpi_cpufreq tpm_tis tpm_tis_core i2c_piix4 i2c_core evdev tpm button serio_raw sunrpc loop autofs4 ext4 crc16 jbd2 mbcache btrfs raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix libata virtio_pci virtio_ring virtio e1000 scsi_mod floppy
      [ 2837.797711] CPU: 8 PID: 2474 Comm: umount Tainted: G        W       4.10.0-rc8-btrfs-next-43+ #1
      [ 2837.798594] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014
      [ 2837.800118] Call Trace:
      [ 2837.800515]  dump_stack+0x68/0x92
      [ 2837.801015]  __warn+0xc2/0xdd
      [ 2837.801471]  warn_slowpath_null+0x1d/0x1f
      [ 2837.801698]  btrfs_free_block_groups+0x348/0x3eb [btrfs]
      [ 2837.801698]  close_ctree+0x1dd/0x2e1 [btrfs]
      [ 2837.801698]  ? evict_inodes+0x132/0x141
      [ 2837.801698]  btrfs_put_super+0x15/0x17 [btrfs]
      [ 2837.801698]  generic_shutdown_super+0x6a/0xeb
      [ 2837.801698]  kill_anon_super+0x12/0x1c
      [ 2837.801698]  btrfs_kill_super+0x16/0x21 [btrfs]
      [ 2837.801698]  deactivate_locked_super+0x30/0x68
      [ 2837.801698]  deactivate_super+0x36/0x39
      [ 2837.801698]  cleanup_mnt+0x58/0x76
      [ 2837.801698]  __cleanup_mnt+0x12/0x14
      [ 2837.801698]  task_work_run+0x77/0x9b
      [ 2837.801698]  prepare_exit_to_usermode+0x9d/0xc5
      [ 2837.801698]  syscall_return_slowpath+0x196/0x1b9
      [ 2837.801698]  entry_SYSCALL_64_fastpath+0xab/0xad
      [ 2837.801698] RIP: 0033:0x7f3ef3e6b9a7
      [ 2837.801698] RSP: 002b:00007ffdd0d8de58 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
      [ 2837.801698] RAX: 0000000000000000 RBX: 0000556f76a39060 RCX: 00007f3ef3e6b9a7
      [ 2837.801698] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000556f76a3f910
      [ 2837.801698] RBP: 0000556f76a3f910 R08: 0000556f76a3e670 R09: 0000000000000015
      [ 2837.801698] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007f3ef436ce64
      [ 2837.801698] R13: 0000000000000000 R14: 0000556f76a39240 R15: 00007ffdd0d8e0e0
      [ 2837.818441] ---[ end trace e79345fe24b30b90 ]---
      [ 2837.818991] BTRFS info (device sdc): space_info 1 has 7974912 free, is not full
      [ 2837.819830] BTRFS info (device sdc): space_info total=8388608, used=417792, pinned=0, reserved=0, may_use=18446744073709547520, readonly=0
      
      What happens in the above example is the following:
      
      1) When punching the hole, at btrfs_punch_hole(), the variable tail_len
         is set to 2048 (as tail_start is 148Kb + 1 and offset + len is 150Kb).
         This results in the creation of an extent map with a length of 2Kb
         starting at file offset 148Kb, through find_first_non_hole() ->
         btrfs_get_extent().
      
      2) The second write (first write after the hole punch operation), sets
         the range [50Kb, 152Kb[ to delalloc.
      
      3) The third write, at btrfs_find_new_delalloc_bytes(), sees the extent
         map covering the range [148Kb, 150Kb[ and ends up calling
         set_extent_bit() for the same range, which results in splitting an
         existing extent state record, covering the range [148Kb, 152Kb[ into
         two 2Kb extent state records, covering the ranges [148Kb, 150Kb[ and
         [150Kb, 152Kb[.
      
      4) Finally at lock_and_cleanup_extent_if_need(), immediately after calling
         btrfs_find_new_delalloc_bytes() we clear the delalloc bit from the
         range [100Kb, 152Kb[ which results in the btrfs_clear_bit_hook()
         callback being invoked against the two 2Kb extent state records that
         cover the ranges [148Kb, 150Kb[ and [150Kb, 152Kb[. When called against
         the first 2Kb extent state, it calls btrfs_delalloc_release_metadata()
         with a length argument of 2048 bytes. That function rounds up the length
         to a sector size aligned length, so it ends up considering a length of
         4096 bytes, and then calls calc_csum_metadata_size() which results in
         decrementing the inode's csum_bytes counter by 4096 bytes, so after
         it stays a value of 0 bytes. Then the same happens when
         btrfs_clear_bit_hook() is called against the second extent state that
         has a length of 2Kb, covering the range [150Kb, 152Kb[, the length is
         rounded up to 4096 and calc_csum_metadata_size() ends up being called
         to decrement 4096 bytes from the inode's csum_bytes counter, which
         at that time has a value of 0, leading to an underflow, which is
         exactly what triggers the first warning, at btrfs_destroy_inode().
         All the other warnings relate to several space accounting counters
         that underflow as well due to similar reasons.
      
      A similar case but where the hole punching operation creates an extent map
      with a start offset not aligned to the sector size is the following:
      
        $ mkfs.btrfs -f /dev/sdb
        $ mount /dev/sdb /mnt
        $ xfs_io -f -c "fpunch 695K 820K" $SCRATCH_MNT/bar
        $ xfs_io -c "pwrite -S 0xaa 1008K 307K" $SCRATCH_MNT/bar
        $ xfs_io -c "pwrite -S 0xbb -b 630K 1073K 630K" $SCRATCH_MNT/bar
        $ xfs_io -c "pwrite -S 0xcc -b 459K 1068K 459K" $SCRATCH_MNT/bar
        $ umount /mnt
      
      During the unmount operation we get similar traces for the same reasons as
      in the first example.
      
      So fix the hole punching operation to make sure it never creates extent
      maps with a length that is not aligned to the sector size nor with a start
      offset that is not aligned to the sector size, as this breaks all
      assumptions and it's a land mine.
      
      Fixes: d7781546 ("btrfs: Avoid trucating page or punching hole in a already existed hole.")
      Cc: <stable@vger.kernel.org>
      Signed-off-by: NFilipe Manana <fdmanana@suse.com>
      Reviewed-by: NLiu Bo <bo.li.liu@oracle.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      609805d8
  19. 20 6月, 2017 1 次提交
  20. 26 4月, 2017 1 次提交
    • F
      Btrfs: fix reported number of inode blocks · a7e3b975
      Filipe Manana 提交于
      Currently when there are buffered writes that were not yet flushed and
      they fall within allocated ranges of the file (that is, not in holes or
      beyond eof assuming there are no prealloc extents beyond eof), btrfs
      simply reports an incorrect number of used blocks through the stat(2)
      system call (or any of its variants), regardless of mount options or
      inode flags (compress, compress-force, nodatacow). This is because the
      number of blocks used that is reported is based on the current number
      of bytes in the vfs inode plus the number of dealloc bytes in the btrfs
      inode. The later covers bytes that both fall within allocated regions
      of the file and holes.
      
      Example scenarios where the number of reported blocks is wrong while the
      buffered writes are not flushed:
      
        $ mkfs.btrfs -f /dev/sdc
        $ mount /dev/sdc /mnt/sdc
      
        $ xfs_io -f -c "pwrite -S 0xaa 0 64K" /mnt/sdc/foo1
        wrote 65536/65536 bytes at offset 0
        64 KiB, 16 ops; 0.0000 sec (259.336 MiB/sec and 66390.0415 ops/sec)
      
        $ sync
      
        $ xfs_io -c "pwrite -S 0xbb 0 64K" /mnt/sdc/foo1
        wrote 65536/65536 bytes at offset 0
        64 KiB, 16 ops; 0.0000 sec (192.308 MiB/sec and 49230.7692 ops/sec)
      
        # The following should have reported 64K...
        $ du -h /mnt/sdc/foo1
        128K	/mnt/sdc/foo1
      
        $ sync
      
        # After flushing the buffered write, it now reports the correct value.
        $ du -h /mnt/sdc/foo1
        64K	/mnt/sdc/foo1
      
        $ xfs_io -f -c "falloc -k 0 128K" -c "pwrite -S 0xaa 0 64K" /mnt/sdc/foo2
        wrote 65536/65536 bytes at offset 0
        64 KiB, 16 ops; 0.0000 sec (520.833 MiB/sec and 133333.3333 ops/sec)
      
        $ sync
      
        $ xfs_io -c "pwrite -S 0xbb 64K 64K" /mnt/sdc/foo2
        wrote 65536/65536 bytes at offset 65536
        64 KiB, 16 ops; 0.0000 sec (260.417 MiB/sec and 66666.6667 ops/sec)
      
        # The following should have reported 128K...
        $ du -h /mnt/sdc/foo2
        192K	/mnt/sdc/foo2
      
        $ sync
      
        # After flushing the buffered write, it now reports the correct value.
        $ du -h /mnt/sdc/foo2
        128K	/mnt/sdc/foo2
      
      So the number of used file blocks is simply incorrect, unlike in other
      filesystems such as ext4 and xfs for example, but only while the buffered
      writes are not flushed.
      
      Fix this by tracking the number of delalloc bytes that fall within holes
      and beyond eof of a file, and use instead this new counter when reporting
      the number of used blocks for an inode.
      
      Another different problem that exists is that the delalloc bytes counter
      is reset when writeback starts (by clearing the EXTENT_DEALLOC flag from
      the respective range in the inode's iotree) and the vfs inode's bytes
      counter is only incremented when writeback finishes (through
      insert_reserved_file_extent()). Therefore while writeback is ongoing we
      simply report a wrong number of blocks used by an inode if the write
      operation covers a range previously unallocated. While this change does
      not fix this problem, it does minimizes it a lot by shortening that time
      window, as the new dealloc bytes counter (new_delalloc_bytes) is only
      decremented when writeback finishes right before updating the vfs inode's
      bytes counter. Fully fixing this second problem is not trivial and will
      be addressed later by a different patch.
      Signed-off-by: NFilipe Manana <fdmanana@suse.com>
      a7e3b975