1. 27 2月, 2021 5 次提交
  2. 26 2月, 2021 19 次提交
    • I
      btrfs: use copy_highpage() instead of 2 kmaps() · 80cc8384
      Ira Weiny 提交于
      There are many places where kmap/memove/kunmap patterns occur.
      
      This pattern exists in the core common function copy_highpage().
      
      Use copy_highpage to avoid open coding the use of kmap and leverages the
      core functions use of kmap_local_page().
      
      Development of this patch was aided by the following coccinelle script:
      
      // <smpl>
      // SPDX-License-Identifier: GPL-2.0-only
      // Find kmap/copypage/kunmap pattern and replace with copy_highpage calls
      //
      // NOTE: The expressions in the copy page version of this kmap pattern are
      // overly complex and so these all need individual attention.
      //
      // Confidence: Low
      // Copyright: (C) 2021 Intel Corporation
      // URL: http://coccinelle.lip6.fr/
      // Comments:
      // Options:
      
      //
      // Then a copy_page where we have 2 pages involved.
      //
      @ copy_page_rule @
      expression page, page2, To, From, Size;
      identifier ptr, ptr2;
      type VP, VP2;
      @@
      
      /* kmap */
      (
      -VP ptr = kmap(page);
      ...
      -VP2 ptr2 = kmap(page2);
      |
      -VP ptr = kmap_atomic(page);
      ...
      -VP2 ptr2 = kmap_atomic(page2);
      |
      -ptr = kmap(page);
      ...
      -ptr2 = kmap(page2);
      |
      -ptr = kmap_atomic(page);
      ...
      -ptr2 = kmap_atomic(page2);
      )
      
      // 1 or more copy versions of the entire page
      <+...
      (
      -copy_page(To, From);
      +copy_highpage(To, From);
      |
      -memmove(To, From, Size);
      +memmoveExtra(To, From, Size);
      )
      ...+>
      
      /* kunmap */
      (
      -kunmap(page2);
      ...
      -kunmap(page);
      |
      -kunmap(page);
      ...
      -kunmap(page2);
      |
      -kmap_atomic(ptr2);
      ...
      -kmap_atomic(ptr);
      )
      
      // Remove any pointers left unused
      @
      depends on copy_page_rule
      @
      identifier copy_page_rule.ptr;
      identifier copy_page_rule.ptr2;
      type VP, VP1;
      type VP2, VP21;
      @@
      
      -VP ptr;
      	... when != ptr;
      ? VP1 ptr;
      -VP2 ptr2;
      	... when != ptr2;
      ? VP21 ptr2;
      
      // </smpl>
      Reviewed-by: NChristoph Hellwig <hch@lst.de>
      Signed-off-by: NIra Weiny <ira.weiny@intel.com>
      Reviewed-by: NDavid Sterba <dsterba@suse.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      80cc8384
    • I
      btrfs: use memcpy_[to|from]_page() and kmap_local_page() · 3590ec58
      Ira Weiny 提交于
      There are many places where the pattern kmap/memcpy/kunmap occurs.
      
      This pattern was lifted to the core common functions
      memcpy_[to|from]_page().
      
      Use these new functions to reduce the code, eliminate direct uses of
      kmap, and leverage the new core functions use of kmap_local_page().
      
      Also, there is 1 place where a kmap/memcpy is followed by an
      optional memset.  Here we leave the kmap open coded to avoid remapping
      the page but use kmap_local_page() directly.
      
      Development of this patch was aided by the coccinelle script:
      
      // <smpl>
      // SPDX-License-Identifier: GPL-2.0-only
      // Find kmap/memcpy/kunmap pattern and replace with memcpy*page calls
      //
      // NOTE: Offsets and other expressions may be more complex than what the script
      // will automatically generate.  Therefore a catchall rule is provided to find
      // the pattern which then must be evaluated by hand.
      //
      // Confidence: Low
      // Copyright: (C) 2021 Intel Corporation
      // URL: http://coccinelle.lip6.fr/
      // Comments:
      // Options:
      
      //
      // simple memcpy version
      //
      @ memcpy_rule1 @
      expression page, T, F, B, Off;
      identifier ptr;
      type VP;
      @@
      
      (
      -VP ptr = kmap(page);
      |
      -ptr = kmap(page);
      |
      -VP ptr = kmap_atomic(page);
      |
      -ptr = kmap_atomic(page);
      )
      <+...
      (
      -memcpy(ptr + Off, F, B);
      +memcpy_to_page(page, Off, F, B);
      |
      -memcpy(ptr, F, B);
      +memcpy_to_page(page, 0, F, B);
      |
      -memcpy(T, ptr + Off, B);
      +memcpy_from_page(T, page, Off, B);
      |
      -memcpy(T, ptr, B);
      +memcpy_from_page(T, page, 0, B);
      )
      ...+>
      (
      -kunmap(page);
      |
      -kunmap_atomic(ptr);
      )
      
      // Remove any pointers left unused
      @
      depends on memcpy_rule1
      @
      identifier memcpy_rule1.ptr;
      type VP, VP1;
      @@
      
      -VP ptr;
      	... when != ptr;
      ? VP1 ptr;
      
      //
      // Some callers kmap without a temp pointer
      //
      @ memcpy_rule2 @
      expression page, T, Off, F, B;
      @@
      
      <+...
      (
      -memcpy(kmap(page) + Off, F, B);
      +memcpy_to_page(page, Off, F, B);
      |
      -memcpy(kmap(page), F, B);
      +memcpy_to_page(page, 0, F, B);
      |
      -memcpy(T, kmap(page) + Off, B);
      +memcpy_from_page(T, page, Off, B);
      |
      -memcpy(T, kmap(page), B);
      +memcpy_from_page(T, page, 0, B);
      )
      ...+>
      -kunmap(page);
      // No need for the ptr variable removal
      
      //
      // Catch all
      //
      @ memcpy_rule3 @
      expression page;
      expression GenTo, GenFrom, GenSize;
      identifier ptr;
      type VP;
      @@
      
      (
      -VP ptr = kmap(page);
      |
      -ptr = kmap(page);
      |
      -VP ptr = kmap_atomic(page);
      |
      -ptr = kmap_atomic(page);
      )
      <+...
      (
      //
      // Some call sites have complex expressions within the memcpy
      // match a catch all to be evaluated by hand.
      //
      -memcpy(GenTo, GenFrom, GenSize);
      +memcpy_to_pageExtra(page, GenTo, GenFrom, GenSize);
      +memcpy_from_pageExtra(GenTo, page, GenFrom, GenSize);
      )
      ...+>
      (
      -kunmap(page);
      |
      -kunmap_atomic(ptr);
      )
      
      // Remove any pointers left unused
      @
      depends on memcpy_rule3
      @
      identifier memcpy_rule3.ptr;
      type VP, VP1;
      @@
      
      -VP ptr;
      	... when != ptr;
      ? VP1 ptr;
      
      // <smpl>
      Reviewed-by: NChristoph Hellwig <hch@lst.de>
      Signed-off-by: NIra Weiny <ira.weiny@intel.com>
      Reviewed-by: NDavid Sterba <dsterba@suse.com>
      Signed-off-by: NDavid Sterba <dsterba@suse.com>
      3590ec58
    • S
      cifs: update internal version number · 8369dfd7
      Steve French 提交于
      To 2.31
      Signed-off-by: NSteve French <stfrench@microsoft.com>
      8369dfd7
    • D
      cifs: use discard iterator to discard unneeded network data more efficiently · cf0604a6
      David Howells 提交于
      The iterator, ITER_DISCARD, that can only be used in READ mode and
      just discards any data copied to it, was added to allow a network
      filesystem to discard any unwanted data sent by a server.
      Convert cifs_discard_from_socket() to use this.
      Signed-off-by: NDavid Howells <dhowells@redhat.com>
      Signed-off-by: NSteve French <stfrench@microsoft.com>
      cf0604a6
    • P
      cifs: introduce helper for finding referral server to improve DFS target resolution · 5ff2836e
      Paulo Alcantara 提交于
      Some servers seem to mistakenly report different values for
      capabilities and share flags, so we can't always rely on those values
      to decide whether the resolved target can handle any new DFS
      referrals.
      
      Add a new helper is_referral_server() to check if all resolved targets
      can handle new DFS referrals by directly looking at the
      GET_DFS_REFERRAL.ReferralHeaderFlags value as specified in MS-DFSC
      2.2.4 RESP_GET_DFS_REFERRAL in addition to is_tcon_dfs().
      Signed-off-by: NPaulo Alcantara (SUSE) <pc@cjr.nz>
      Cc: stable@vger.kernel.org # 5.11
      Signed-off-by: NSteve French <stfrench@microsoft.com>
      5ff2836e
    • P
      cifs: check all path components in resolved dfs target · ff2c54a0
      Paulo Alcantara 提交于
      Handle the case where a resolved target share is like
      //server/users/dir, and the user "foo" has no read permission to
      access the parent folder "users" but has access to the final path
      component "dir".
      
      is_path_remote() already implements that, so call it directly.
      Signed-off-by: NPaulo Alcantara (SUSE) <pc@cjr.nz>
      Cc: stable@vger.kernel.org # 5.11
      Signed-off-by: NSteve French <stfrench@microsoft.com>
      ff2c54a0
    • P
      cifs: fix DFS failover · 8513222b
      Paulo Alcantara 提交于
      In do_dfs_failover(), the mount_get_conns() function requires the full
      fs context in order to get new connection to server, so clone the
      original context and change it accordingly when retrying the DFS
      targets in the referral.
      
      If failover was successful, then update original context with the new
      UNC, prefix path and ip address.
      Signed-off-by: NPaulo Alcantara (SUSE) <pc@cjr.nz>
      Cc: stable@vger.kernel.org # 5.11
      Signed-off-by: NSteve French <stfrench@microsoft.com>
      8513222b
    • P
      cifs: fix nodfs mount option · d01132ae
      Paulo Alcantara 提交于
      Skip DFS resolving when mounting with 'nodfs' even if
      CONFIG_CIFS_DFS_UPCALL is enabled.
      Signed-off-by: NPaulo Alcantara (SUSE) <pc@cjr.nz>
      Cc: stable@vger.kernel.org # 5.11
      Reviewed-by: NShyam Prasad N <sprasad@microsoft.com>
      Signed-off-by: NSteve French <stfrench@microsoft.com>
      d01132ae
    • R
      cifs: fix handling of escaped ',' in the password mount argument · d08395a3
      Ronnie Sahlberg 提交于
      Passwords can contain ',' which are also used as the separator between
      mount options. Mount.cifs will escape all ',' characters as the string ",,".
      Update parsing of the mount options to detect ",," and treat it as a single
      'c' character.
      
      Fixes: 24e0a1ef ("cifs: switch to new mount api")
      Cc: stable@vger.kernel.org # 5.11
      Reported-by: NSimon Taylor <simon@simon-taylor.me.uk>
      Tested-by: NSimon Taylor <simon@simon-taylor.me.uk>
      Signed-off-by: NRonnie Sahlberg <lsahlber@redhat.com>
      Signed-off-by: NSteve French <stfrench@microsoft.com>
      d08395a3
    • S
      cifs: Add new parameter "acregmax" for distinct file and directory metadata timeout · 57804646
      Steve French 提交于
      The new optional mount parameter "acregmax" allows a different
      timeout for file metadata ("acdirmax" now allows controlling timeout
      for directory metadata).  Setting "actimeo" still works as before,
      and changes timeout for both files and directories, but
      specifying "acregmax" or "acdirmax" allows overriding the
      default more granularly which can be a big performance benefit
      on some workloads. "acregmax" is already used by NFS as a mount
      parameter (albeit with a larger default and thus looser caching).
      Suggested-by: NTom Talpey <tom@talpey.com>
      Reviewed-By: NTom Talpey <tom@talpey.com>
      Reviewed-by: NRonnie Sahlberg <lsahlber@redhat.com>
      Signed-off-by: NSteve French <stfrench@microsoft.com>
      57804646
    • S
      cifs: convert revalidate of directories to using directory metadata cache timeout · ddaf6d4a
      Steve French 提交于
      The new optional mount parm, "acdirmax" allows caching the metadata
      for a directory longer than file metadata, which can be very helpful
      for performance.  Convert cifs_inode_needs_reval to check acdirmax
      for revalidating directory metadata.
      Signed-off-by: NSteve French <stfrench@microsoft.com>
      Reviewed-by: NRonnie Sahlberg <lsahlber@redhat.com>
      Reviewed-By: NTom Talpey <tom@talpey.com>
      Signed-off-by: NSteve French <stfrench@microsoft.com>
      ddaf6d4a
    • S
      cifs: Add new mount parameter "acdirmax" to allow caching directory metadata · 4c9f9481
      Steve French 提交于
      nfs and cifs on Linux currently have a mount parameter "actimeo" to control
      metadata (attribute) caching but cifs does not have additional mount
      parameters to allow distinguishing between caching directory metadata
      (e.g. needed to revalidate paths) and that for files.
      
      Add new mount parameter "acdirmax" to allow caching metadata for
      directories more loosely than file data.  NFS adjusts metadata
      caching from acdirmin to acdirmax (and another two mount parms
      for files) but to reduce complexity, it is safer to just introduce
      the one mount parm to allow caching directories longer. The
      defaults for acdirmax and actimeo (for cifs.ko) are conservative,
      1 second (NFS defaults acdirmax to 60 seconds). For many workloads,
      setting acdirmax to a higher value is safe and will improve
      performance.  This patch leaves unchanged the default values
      for caching metadata for files and directories but gives the
      user more flexibility in adjusting them safely for their workload
      via the new mount parm.
      Signed-off-by: NSteve French <stfrench@microsoft.com>
      Reviewed-by: NRonnie Sahlberg <lsahlber@redhat.com>
      Reviewed-By: NTom Talpey <tom@talpey.com>
      4c9f9481
    • J
      io-wq: remove now unused IO_WQ_BIT_ERROR · d6ce7f67
      Jens Axboe 提交于
      This flag is now dead, remove it.
      
      Fixes: 1cbd9c2b ("io-wq: don't create any IO workers upfront")
      Signed-off-by: NJens Axboe <axboe@kernel.dk>
      d6ce7f67
    • J
      io_uring: fix SQPOLL thread handling over exec · 5f3f26f9
      Jens Axboe 提交于
      Just like the changes for io-wq, ensure that we re-fork the SQPOLL
      thread if the owner execs. Mark the ctx sq thread as sqo_exec if
      it dies, and the ring as needing a wakeup which will force the task
      to enter the kernel. When it does, setup the new thread and proceed
      as usual.
      Signed-off-by: NJens Axboe <axboe@kernel.dk>
      5f3f26f9
    • J
      io-wq: improve manager/worker handling over exec · 4fb6ac32
      Jens Axboe 提交于
      exec will cancel any threads, including the ones that io-wq is using. This
      isn't a problem, in fact we'd prefer it to be that way since it means we
      know that any async work cancels naturally without having to handle it
      proactively.
      
      But it does mean that we need to setup a new manager, as the manager and
      workers are gone. Handle this at queue time, and cancel work if we fail.
      Since the manager can go away without us noticing, ensure that the manager
      itself holds a reference to the 'wq' as well. Rename io_wq_destroy() to
      io_wq_put() to reflect that.
      
      In the future we can now simplify exec cancelation handling, for now just
      leave it the same.
      Signed-off-by: NJens Axboe <axboe@kernel.dk>
      4fb6ac32
    • J
      io_uring: ensure SQPOLL startup is triggered before error shutdown · eb85890b
      Jens Axboe 提交于
      syzbot reports the following hang:
      
      INFO: task syz-executor.0:12538 can't die for more than 143 seconds.
      task:syz-executor.0  state:D stack:28352 pid:12538 ppid:  8423 flags:0x00004004
      Call Trace:
       context_switch kernel/sched/core.c:4324 [inline]
       __schedule+0x90c/0x21a0 kernel/sched/core.c:5075
       schedule+0xcf/0x270 kernel/sched/core.c:5154
       schedule_timeout+0x1db/0x250 kernel/time/timer.c:1868
       do_wait_for_common kernel/sched/completion.c:85 [inline]
       __wait_for_common kernel/sched/completion.c:106 [inline]
       wait_for_common kernel/sched/completion.c:117 [inline]
       wait_for_completion+0x168/0x270 kernel/sched/completion.c:138
       io_sq_thread_finish+0x96/0x580 fs/io_uring.c:7152
       io_sq_offload_create fs/io_uring.c:7929 [inline]
       io_uring_create fs/io_uring.c:9465 [inline]
       io_uring_setup+0x1fb2/0x2c20 fs/io_uring.c:9550
       do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
       entry_SYSCALL_64_after_hwframe+0x44/0xae
      
      which is due to exiting after the SQPOLL thread has been created, but
      hasn't been started yet. Ensure that we always complete the startup
      side when waiting for it to exit.
      
      Reported-by: syzbot+c927c937cba8ef66dd4a@syzkaller.appspotmail.com
      Signed-off-by: NJens Axboe <axboe@kernel.dk>
      eb85890b
    • J
      io-wq: make buffered file write hashed work map per-ctx · e941894e
      Jens Axboe 提交于
      Before the io-wq thread change, we maintained a hash work map and lock
      per-node per-ring. That wasn't ideal, as we really wanted it to be per
      ring. But now that we have per-task workers, the hash map ends up being
      just per-task. That'll work just fine for the normal case of having
      one task use a ring, but if you share the ring between tasks, then it's
      considerably worse than it was before.
      
      Make the hash map per ctx instead, which provides full per-ctx buffered
      write serialization on hashed writes.
      Signed-off-by: NJens Axboe <axboe@kernel.dk>
      e941894e
    • D
      xfs: use current->journal_info for detecting transaction recursion · 756b1c34
      Dave Chinner 提交于
      Because the iomap code using PF_MEMALLOC_NOFS to detect transaction
      recursion in XFS is just wrong. Remove it from the iomap code and
      replace it with XFS specific internal checks using
      current->journal_info instead.
      
      [djwong: This change also realigns the lifetime of NOFS flag changes to
      match the incore transaction, instead of the inconsistent scheme we have
      now.]
      
      Fixes: 9070733b ("xfs: abstract PF_FSTRANS to PF_MEMALLOC_NOFS")
      Signed-off-by: NDave Chinner <dchinner@redhat.com>
      Reviewed-by: NDarrick J. Wong <djwong@kernel.org>
      Signed-off-by: NDarrick J. Wong <djwong@kernel.org>
      Reviewed-by: NChristoph Hellwig <hch@lst.de>
      756b1c34
    • D
      xfs: don't nest transactions when scanning for eofblocks · 9febcda6
      Darrick J. Wong 提交于
      Brian Foster reported a lockdep warning on xfs/167:
      
      ============================================
      WARNING: possible recursive locking detected
      5.11.0-rc4 #35 Tainted: G        W I
      --------------------------------------------
      fsstress/17733 is trying to acquire lock:
      ffff8e0fd1d90650 (sb_internal){++++}-{0:0}, at: xfs_free_eofblocks+0x104/0x1d0 [xfs]
      
      but task is already holding lock:
      ffff8e0fd1d90650 (sb_internal){++++}-{0:0}, at: xfs_trans_alloc_inode+0x5f/0x160 [xfs]
      
      stack backtrace:
      CPU: 38 PID: 17733 Comm: fsstress Tainted: G        W I       5.11.0-rc4 #35
      Hardware name: Dell Inc. PowerEdge R740/01KPX8, BIOS 1.6.11 11/20/2018
      Call Trace:
       dump_stack+0x8b/0xb0
       __lock_acquire.cold+0x159/0x2ab
       lock_acquire+0x116/0x370
       xfs_trans_alloc+0x1ad/0x310 [xfs]
       xfs_free_eofblocks+0x104/0x1d0 [xfs]
       xfs_blockgc_scan_inode+0x24/0x60 [xfs]
       xfs_inode_walk_ag+0x202/0x4b0 [xfs]
       xfs_inode_walk+0x66/0xc0 [xfs]
       xfs_trans_alloc+0x160/0x310 [xfs]
       xfs_trans_alloc_inode+0x5f/0x160 [xfs]
       xfs_alloc_file_space+0x105/0x300 [xfs]
       xfs_file_fallocate+0x270/0x460 [xfs]
       vfs_fallocate+0x14d/0x3d0
       __x64_sys_fallocate+0x3e/0x70
       do_syscall_64+0x33/0x40
       entry_SYSCALL_64_after_hwframe+0x44/0xa9
      
      The cause of this is the new code that spurs a scan to garbage collect
      speculative preallocations if we fail to reserve enough blocks while
      allocating a transaction.  While the warning itself is a fairly benign
      lockdep complaint, it does expose a potential livelock if the rwsem
      behavior ever changes with regards to nesting read locks when someone's
      waiting for a write lock.
      
      Fix this by freeing the transaction and jumping back to xfs_trans_alloc
      like this patch in the V4 submission[1].
      
      [1] https://lore.kernel.org/linux-xfs/161142798066.2171939.9311024588681972086.stgit@magnolia/
      
      Fixes: a1a7d05a ("xfs: flush speculative space allocations when we run out of space")
      Reported-by: NBrian Foster <bfoster@redhat.com>
      Signed-off-by: NDarrick J. Wong <djwong@kernel.org>
      Reviewed-by: NBrian Foster <bfoster@redhat.com>
      Reviewed-by: NAllison Henderson <allison.henderson@oracle.com>
      Reviewed-by: NChristoph Hellwig <hch@lst.de>
      9febcda6
  3. 25 2月, 2021 16 次提交