1. 31 7月, 2018 1 次提交
  2. 22 6月, 2018 1 次提交
  3. 20 6月, 2018 1 次提交
  4. 13 6月, 2018 2 次提交
    • K
      treewide: kzalloc() -> kcalloc() · 6396bb22
      Kees Cook 提交于
      The kzalloc() function has a 2-factor argument form, kcalloc(). This
      patch replaces cases of:
      
              kzalloc(a * b, gfp)
      
      with:
              kcalloc(a * b, gfp)
      
      as well as handling cases of:
      
              kzalloc(a * b * c, gfp)
      
      with:
      
              kzalloc(array3_size(a, b, c), gfp)
      
      as it's slightly less ugly than:
      
              kzalloc_array(array_size(a, b), c, gfp)
      
      This does, however, attempt to ignore constant size factors like:
      
              kzalloc(4 * 1024, gfp)
      
      though any constants defined via macros get caught up in the conversion.
      
      Any factors with a sizeof() of "unsigned char", "char", and "u8" were
      dropped, since they're redundant.
      
      The Coccinelle script used for this was:
      
      // Fix redundant parens around sizeof().
      @@
      type TYPE;
      expression THING, E;
      @@
      
      (
        kzalloc(
      -	(sizeof(TYPE)) * E
      +	sizeof(TYPE) * E
        , ...)
      |
        kzalloc(
      -	(sizeof(THING)) * E
      +	sizeof(THING) * E
        , ...)
      )
      
      // Drop single-byte sizes and redundant parens.
      @@
      expression COUNT;
      typedef u8;
      typedef __u8;
      @@
      
      (
        kzalloc(
      -	sizeof(u8) * (COUNT)
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(__u8) * (COUNT)
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(char) * (COUNT)
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(unsigned char) * (COUNT)
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(u8) * COUNT
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(__u8) * COUNT
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(char) * COUNT
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(unsigned char) * COUNT
      +	COUNT
        , ...)
      )
      
      // 2-factor product with sizeof(type/expression) and identifier or constant.
      @@
      type TYPE;
      expression THING;
      identifier COUNT_ID;
      constant COUNT_CONST;
      @@
      
      (
      - kzalloc
      + kcalloc
        (
      -	sizeof(TYPE) * (COUNT_ID)
      +	COUNT_ID, sizeof(TYPE)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(TYPE) * COUNT_ID
      +	COUNT_ID, sizeof(TYPE)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(TYPE) * (COUNT_CONST)
      +	COUNT_CONST, sizeof(TYPE)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(TYPE) * COUNT_CONST
      +	COUNT_CONST, sizeof(TYPE)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(THING) * (COUNT_ID)
      +	COUNT_ID, sizeof(THING)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(THING) * COUNT_ID
      +	COUNT_ID, sizeof(THING)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(THING) * (COUNT_CONST)
      +	COUNT_CONST, sizeof(THING)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(THING) * COUNT_CONST
      +	COUNT_CONST, sizeof(THING)
        , ...)
      )
      
      // 2-factor product, only identifiers.
      @@
      identifier SIZE, COUNT;
      @@
      
      - kzalloc
      + kcalloc
        (
      -	SIZE * COUNT
      +	COUNT, SIZE
        , ...)
      
      // 3-factor product with 1 sizeof(type) or sizeof(expression), with
      // redundant parens removed.
      @@
      expression THING;
      identifier STRIDE, COUNT;
      type TYPE;
      @@
      
      (
        kzalloc(
      -	sizeof(TYPE) * (COUNT) * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kzalloc(
      -	sizeof(TYPE) * (COUNT) * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kzalloc(
      -	sizeof(TYPE) * COUNT * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kzalloc(
      -	sizeof(TYPE) * COUNT * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kzalloc(
      -	sizeof(THING) * (COUNT) * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kzalloc(
      -	sizeof(THING) * (COUNT) * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kzalloc(
      -	sizeof(THING) * COUNT * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kzalloc(
      -	sizeof(THING) * COUNT * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      )
      
      // 3-factor product with 2 sizeof(variable), with redundant parens removed.
      @@
      expression THING1, THING2;
      identifier COUNT;
      type TYPE1, TYPE2;
      @@
      
      (
        kzalloc(
      -	sizeof(TYPE1) * sizeof(TYPE2) * COUNT
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
        , ...)
      |
        kzalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
        , ...)
      |
        kzalloc(
      -	sizeof(THING1) * sizeof(THING2) * COUNT
      +	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
        , ...)
      |
        kzalloc(
      -	sizeof(THING1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
        , ...)
      |
        kzalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * COUNT
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
        , ...)
      |
        kzalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
        , ...)
      )
      
      // 3-factor product, only identifiers, with redundant parens removed.
      @@
      identifier STRIDE, SIZE, COUNT;
      @@
      
      (
        kzalloc(
      -	(COUNT) * STRIDE * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	COUNT * (STRIDE) * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	COUNT * STRIDE * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	(COUNT) * (STRIDE) * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	COUNT * (STRIDE) * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	(COUNT) * STRIDE * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	(COUNT) * (STRIDE) * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	COUNT * STRIDE * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      )
      
      // Any remaining multi-factor products, first at least 3-factor products,
      // when they're not all constants...
      @@
      expression E1, E2, E3;
      constant C1, C2, C3;
      @@
      
      (
        kzalloc(C1 * C2 * C3, ...)
      |
        kzalloc(
      -	(E1) * E2 * E3
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kzalloc(
      -	(E1) * (E2) * E3
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kzalloc(
      -	(E1) * (E2) * (E3)
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kzalloc(
      -	E1 * E2 * E3
      +	array3_size(E1, E2, E3)
        , ...)
      )
      
      // And then all remaining 2 factors products when they're not all constants,
      // keeping sizeof() as the second factor argument.
      @@
      expression THING, E1, E2;
      type TYPE;
      constant C1, C2, C3;
      @@
      
      (
        kzalloc(sizeof(THING) * C2, ...)
      |
        kzalloc(sizeof(TYPE) * C2, ...)
      |
        kzalloc(C1 * C2 * C3, ...)
      |
        kzalloc(C1 * C2, ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(TYPE) * (E2)
      +	E2, sizeof(TYPE)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(TYPE) * E2
      +	E2, sizeof(TYPE)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(THING) * (E2)
      +	E2, sizeof(THING)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(THING) * E2
      +	E2, sizeof(THING)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	(E1) * E2
      +	E1, E2
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	(E1) * (E2)
      +	E1, E2
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	E1 * E2
      +	E1, E2
        , ...)
      )
      Signed-off-by: NKees Cook <keescook@chromium.org>
      6396bb22
    • K
      treewide: kmalloc() -> kmalloc_array() · 6da2ec56
      Kees Cook 提交于
      The kmalloc() function has a 2-factor argument form, kmalloc_array(). This
      patch replaces cases of:
      
              kmalloc(a * b, gfp)
      
      with:
              kmalloc_array(a * b, gfp)
      
      as well as handling cases of:
      
              kmalloc(a * b * c, gfp)
      
      with:
      
              kmalloc(array3_size(a, b, c), gfp)
      
      as it's slightly less ugly than:
      
              kmalloc_array(array_size(a, b), c, gfp)
      
      This does, however, attempt to ignore constant size factors like:
      
              kmalloc(4 * 1024, gfp)
      
      though any constants defined via macros get caught up in the conversion.
      
      Any factors with a sizeof() of "unsigned char", "char", and "u8" were
      dropped, since they're redundant.
      
      The tools/ directory was manually excluded, since it has its own
      implementation of kmalloc().
      
      The Coccinelle script used for this was:
      
      // Fix redundant parens around sizeof().
      @@
      type TYPE;
      expression THING, E;
      @@
      
      (
        kmalloc(
      -	(sizeof(TYPE)) * E
      +	sizeof(TYPE) * E
        , ...)
      |
        kmalloc(
      -	(sizeof(THING)) * E
      +	sizeof(THING) * E
        , ...)
      )
      
      // Drop single-byte sizes and redundant parens.
      @@
      expression COUNT;
      typedef u8;
      typedef __u8;
      @@
      
      (
        kmalloc(
      -	sizeof(u8) * (COUNT)
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(__u8) * (COUNT)
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(char) * (COUNT)
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(unsigned char) * (COUNT)
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(u8) * COUNT
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(__u8) * COUNT
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(char) * COUNT
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(unsigned char) * COUNT
      +	COUNT
        , ...)
      )
      
      // 2-factor product with sizeof(type/expression) and identifier or constant.
      @@
      type TYPE;
      expression THING;
      identifier COUNT_ID;
      constant COUNT_CONST;
      @@
      
      (
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * (COUNT_ID)
      +	COUNT_ID, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * COUNT_ID
      +	COUNT_ID, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * (COUNT_CONST)
      +	COUNT_CONST, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * COUNT_CONST
      +	COUNT_CONST, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * (COUNT_ID)
      +	COUNT_ID, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * COUNT_ID
      +	COUNT_ID, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * (COUNT_CONST)
      +	COUNT_CONST, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * COUNT_CONST
      +	COUNT_CONST, sizeof(THING)
        , ...)
      )
      
      // 2-factor product, only identifiers.
      @@
      identifier SIZE, COUNT;
      @@
      
      - kmalloc
      + kmalloc_array
        (
      -	SIZE * COUNT
      +	COUNT, SIZE
        , ...)
      
      // 3-factor product with 1 sizeof(type) or sizeof(expression), with
      // redundant parens removed.
      @@
      expression THING;
      identifier STRIDE, COUNT;
      type TYPE;
      @@
      
      (
        kmalloc(
      -	sizeof(TYPE) * (COUNT) * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE) * (COUNT) * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE) * COUNT * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE) * COUNT * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kmalloc(
      -	sizeof(THING) * (COUNT) * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kmalloc(
      -	sizeof(THING) * (COUNT) * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kmalloc(
      -	sizeof(THING) * COUNT * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kmalloc(
      -	sizeof(THING) * COUNT * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      )
      
      // 3-factor product with 2 sizeof(variable), with redundant parens removed.
      @@
      expression THING1, THING2;
      identifier COUNT;
      type TYPE1, TYPE2;
      @@
      
      (
        kmalloc(
      -	sizeof(TYPE1) * sizeof(TYPE2) * COUNT
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
        , ...)
      |
        kmalloc(
      -	sizeof(THING1) * sizeof(THING2) * COUNT
      +	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
        , ...)
      |
        kmalloc(
      -	sizeof(THING1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * COUNT
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
        , ...)
      )
      
      // 3-factor product, only identifiers, with redundant parens removed.
      @@
      identifier STRIDE, SIZE, COUNT;
      @@
      
      (
        kmalloc(
      -	(COUNT) * STRIDE * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	COUNT * (STRIDE) * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	COUNT * STRIDE * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	(COUNT) * (STRIDE) * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	COUNT * (STRIDE) * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	(COUNT) * STRIDE * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	(COUNT) * (STRIDE) * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	COUNT * STRIDE * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      )
      
      // Any remaining multi-factor products, first at least 3-factor products,
      // when they're not all constants...
      @@
      expression E1, E2, E3;
      constant C1, C2, C3;
      @@
      
      (
        kmalloc(C1 * C2 * C3, ...)
      |
        kmalloc(
      -	(E1) * E2 * E3
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kmalloc(
      -	(E1) * (E2) * E3
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kmalloc(
      -	(E1) * (E2) * (E3)
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kmalloc(
      -	E1 * E2 * E3
      +	array3_size(E1, E2, E3)
        , ...)
      )
      
      // And then all remaining 2 factors products when they're not all constants,
      // keeping sizeof() as the second factor argument.
      @@
      expression THING, E1, E2;
      type TYPE;
      constant C1, C2, C3;
      @@
      
      (
        kmalloc(sizeof(THING) * C2, ...)
      |
        kmalloc(sizeof(TYPE) * C2, ...)
      |
        kmalloc(C1 * C2 * C3, ...)
      |
        kmalloc(C1 * C2, ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * (E2)
      +	E2, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * E2
      +	E2, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * (E2)
      +	E2, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * E2
      +	E2, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	(E1) * E2
      +	E1, E2
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	(E1) * (E2)
      +	E1, E2
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	E1 * E2
      +	E1, E2
        , ...)
      )
      Signed-off-by: NKees Cook <keescook@chromium.org>
      6da2ec56
  5. 25 5月, 2018 1 次提交
  6. 16 5月, 2018 4 次提交
    • W
      locking/percpu-rwsem: Annotate rwsem ownership transfer by setting RWSEM_OWNER_UNKNOWN · 5a817641
      Waiman Long 提交于
      The filesystem freezing code needs to transfer ownership of a rwsem
      embedded in a percpu-rwsem from the task that does the freezing to
      another one that does the thawing by calling percpu_rwsem_release()
      after freezing and percpu_rwsem_acquire() before thawing.
      
      However, the new rwsem debug code runs afoul with this scheme by warning
      that the task that releases the rwsem isn't the one that acquires it,
      as reported by Amir Goldstein:
      
        DEBUG_LOCKS_WARN_ON(sem->owner != get_current())
        WARNING: CPU: 1 PID: 1401 at /home/amir/build/src/linux/kernel/locking/rwsem.c:133 up_write+0x59/0x79
      
        Call Trace:
         percpu_up_write+0x1f/0x28
         thaw_super_locked+0xdf/0x120
         do_vfs_ioctl+0x270/0x5f1
         ksys_ioctl+0x52/0x71
         __x64_sys_ioctl+0x16/0x19
         do_syscall_64+0x5d/0x167
         entry_SYSCALL_64_after_hwframe+0x49/0xbe
      
      To work properly with the rwsem debug code, we need to annotate that the
      rwsem ownership is unknown during the tranfer period until a brave soul
      comes forward to acquire the ownership. During that period, optimistic
      spinning will be disabled.
      Reported-by: NAmir Goldstein <amir73il@gmail.com>
      Tested-by: NAmir Goldstein <amir73il@gmail.com>
      Signed-off-by: NWaiman Long <longman@redhat.com>
      Acked-by: NPeter Zijlstra <peterz@infradead.org>
      Cc: Andrew Morton <akpm@linux-foundation.org>
      Cc: Davidlohr Bueso <dave@stgolabs.net>
      Cc: Jan Kara <jack@suse.cz>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Matthew Wilcox <willy@infradead.org>
      Cc: Oleg Nesterov <oleg@redhat.com>
      Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
      Cc: Theodore Y. Ts'o <tytso@mit.edu>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Cc: Will Deacon <will.deacon@arm.com>
      Cc: linux-fsdevel@vger.kernel.org
      Link: http://lkml.kernel.org/r/1526420991-21213-3-git-send-email-longman@redhat.comSigned-off-by: NIngo Molnar <mingo@kernel.org>
      5a817641
    • W
      locking/rwsem: Add a new RWSEM_ANONYMOUSLY_OWNED flag · d7d760ef
      Waiman Long 提交于
      There are use cases where a rwsem can be acquired by one task, but
      released by another task. In thess cases, optimistic spinning may need
      to be disabled.  One example will be the filesystem freeze/thaw code
      where the task that freezes the filesystem will acquire a write lock
      on a rwsem and then un-owns it before returning to userspace. Later on,
      another task will come along, acquire the ownership, thaw the filesystem
      and release the rwsem.
      
      Bit 0 of the owner field was used to designate that it is a reader
      owned rwsem. It is now repurposed to mean that the owner of the rwsem
      is not known. If only bit 0 is set, the rwsem is reader owned. If bit
      0 and other bits are set, it is writer owned with an unknown owner.
      One such value for the latter case is (-1L). So we can set owner to 1 for
      reader-owned, -1 for writer-owned. The owner is unknown in both cases.
      
      To handle transfer of rwsem ownership, the higher level code should
      set the owner field to -1 to indicate a write-locked rwsem with unknown
      owner.  Optimistic spinning will be disabled in this case.
      
      Once the higher level code figures who the new owner is, it can then
      set the owner field accordingly.
      Tested-by: NAmir Goldstein <amir73il@gmail.com>
      Signed-off-by: NWaiman Long <longman@redhat.com>
      Acked-by: NPeter Zijlstra <peterz@infradead.org>
      Cc: Andrew Morton <akpm@linux-foundation.org>
      Cc: Davidlohr Bueso <dave@stgolabs.net>
      Cc: Jan Kara <jack@suse.cz>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Matthew Wilcox <willy@infradead.org>
      Cc: Oleg Nesterov <oleg@redhat.com>
      Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
      Cc: Theodore Y. Ts'o <tytso@mit.edu>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Cc: Will Deacon <will.deacon@arm.com>
      Cc: linux-fsdevel@vger.kernel.org
      Link: http://lkml.kernel.org/r/1526420991-21213-2-git-send-email-longman@redhat.comSigned-off-by: NIngo Molnar <mingo@kernel.org>
      d7d760ef
    • C
      proc: introduce proc_create_single{,_data} · 3f3942ac
      Christoph Hellwig 提交于
      Variants of proc_create{,_data} that directly take a seq_file show
      callback and drastically reduces the boilerplate code in the callers.
      
      All trivial callers converted over.
      Signed-off-by: NChristoph Hellwig <hch@lst.de>
      3f3942ac
    • C
      proc: introduce proc_create_seq{,_data} · fddda2b7
      Christoph Hellwig 提交于
      Variants of proc_create{,_data} that directly take a struct seq_operations
      argument and drastically reduces the boilerplate code in the callers.
      
      All trivial callers converted over.
      Signed-off-by: NChristoph Hellwig <hch@lst.de>
      fddda2b7
  7. 14 5月, 2018 2 次提交
  8. 04 5月, 2018 1 次提交
    • P
      locking/mutex: Optimize __mutex_trylock_fast() · c427f695
      Peter Zijlstra 提交于
      Use try_cmpxchg to avoid the pointless TEST instruction..
      And add the (missing) atomic_long_try_cmpxchg*() wrappery.
      
      On x86_64 this gives:
      
      0000000000000710 <mutex_lock>:						0000000000000710 <mutex_lock>:
       710:   65 48 8b 14 25 00 00    mov    %gs:0x0,%rdx                      710:   65 48 8b 14 25 00 00    mov    %gs:0x0,%rdx
       717:   00 00                                                            717:   00 00
                              715: R_X86_64_32S       current_task                                    715: R_X86_64_32S       current_task
       719:   31 c0                   xor    %eax,%eax                         719:   31 c0                   xor    %eax,%eax
       71b:   f0 48 0f b1 17          lock cmpxchg %rdx,(%rdi)                 71b:   f0 48 0f b1 17          lock cmpxchg %rdx,(%rdi)
       720:   48 85 c0                test   %rax,%rax                         720:   75 02                   jne    724 <mutex_lock+0x14>
       723:   75 02                   jne    727 <mutex_lock+0x17>             722:   f3 c3                   repz retq
       725:   f3 c3                   repz retq                                724:   eb da                   jmp    700 <__mutex_lock_slowpath>
       727:   eb d7                   jmp    700 <__mutex_lock_slowpath>       726:   66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
       729:   0f 1f 80 00 00 00 00    nopl   0x0(%rax)                         72d:   00 00 00
      
      On ARM64 this gives:
      
      000000000000638 <mutex_lock>:						0000000000000638 <mutex_lock>:
           638:       d5384101        mrs     x1, sp_el0                           638:       d5384101        mrs     x1, sp_el0
           63c:       d2800002        mov     x2, #0x0                             63c:       d2800002        mov     x2, #0x0
           640:       f9800011        prfm    pstl1strm, [x0]                      640:       f9800011        prfm    pstl1strm, [x0]
           644:       c85ffc03        ldaxr   x3, [x0]                             644:       c85ffc03        ldaxr   x3, [x0]
           648:       ca020064        eor     x4, x3, x2                           648:       ca020064        eor     x4, x3, x2
           64c:       b5000064        cbnz    x4, 658 <mutex_lock+0x20>            64c:       b5000064        cbnz    x4, 658 <mutex_lock+0x20>
           650:       c8047c01        stxr    w4, x1, [x0]                         650:       c8047c01        stxr    w4, x1, [x0]
           654:       35ffff84        cbnz    w4, 644 <mutex_lock+0xc>             654:       35ffff84        cbnz    w4, 644 <mutex_lock+0xc>
           658:       b40000c3        cbz     x3, 670 <mutex_lock+0x38>            658:       b5000043        cbnz    x3, 660 <mutex_lock+0x28>
           65c:       a9bf7bfd        stp     x29, x30, [sp,#-16]!                 65c:       d65f03c0        ret
           660:       910003fd        mov     x29, sp                              660:       a9bf7bfd        stp     x29, x30, [sp,#-16]!
           664:       97ffffef        bl      620 <__mutex_lock_slowpath>          664:       910003fd        mov     x29, sp
           668:       a8c17bfd        ldp     x29, x30, [sp],#16                   668:       97ffffee        bl      620 <__mutex_lock_slowpath>
           66c:       d65f03c0        ret                                          66c:       a8c17bfd        ldp     x29, x30, [sp],#16
           670:       d65f03c0        ret                                          670:       d65f03c0        ret
      Reported-by: NMatthew Wilcox <mawilcox@microsoft.com>
      Acked-by: NWill Deacon <will.deacon@arm.com>
      Signed-off-by: NPeter Zijlstra (Intel) <peterz@infradead.org>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Signed-off-by: NIngo Molnar <mingo@kernel.org>
      c427f695
  9. 27 4月, 2018 11 次提交
  10. 31 3月, 2018 1 次提交
  11. 29 3月, 2018 2 次提交
  12. 20 3月, 2018 1 次提交
  13. 09 3月, 2018 1 次提交
    • B
      rtmutex: Make rt_mutex_futex_unlock() safe for irq-off callsites · 6b0ef92f
      Boqun Feng 提交于
      When running rcutorture with TREE03 config, CONFIG_PROVE_LOCKING=y, and
      kernel cmdline argument "rcutorture.gp_exp=1", lockdep reports a
      HARDIRQ-safe->HARDIRQ-unsafe deadlock:
      
       ================================
       WARNING: inconsistent lock state
       4.16.0-rc4+ #1 Not tainted
       --------------------------------
       inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage.
       takes:
       __schedule+0xbe/0xaf0
       {IN-HARDIRQ-W} state was registered at:
         _raw_spin_lock+0x2a/0x40
         scheduler_tick+0x47/0xf0
      ...
       other info that might help us debug this:
        Possible unsafe locking scenario:
              CPU0
              ----
         lock(&rq->lock);
         <Interrupt>
           lock(&rq->lock);
        *** DEADLOCK ***
       1 lock held by rcu_torture_rea/724:
       rcu_torture_read_lock+0x0/0x70
       stack backtrace:
       CPU: 2 PID: 724 Comm: rcu_torture_rea Not tainted 4.16.0-rc4+ #1
       Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014
       Call Trace:
        lock_acquire+0x90/0x200
        ? __schedule+0xbe/0xaf0
        _raw_spin_lock+0x2a/0x40
        ? __schedule+0xbe/0xaf0
        __schedule+0xbe/0xaf0
        preempt_schedule_irq+0x2f/0x60
        retint_kernel+0x1b/0x2d
       RIP: 0010:rcu_read_unlock_special+0x0/0x680
        ? rcu_torture_read_unlock+0x60/0x60
        __rcu_read_unlock+0x64/0x70
        rcu_torture_read_unlock+0x17/0x60
        rcu_torture_reader+0x275/0x450
        ? rcutorture_booster_init+0x110/0x110
        ? rcu_torture_stall+0x230/0x230
        ? kthread+0x10e/0x130
        kthread+0x10e/0x130
        ? kthread_create_worker_on_cpu+0x70/0x70
        ? call_usermodehelper_exec_async+0x11a/0x150
        ret_from_fork+0x3a/0x50
      
      This happens with the following even sequence:
      
      	preempt_schedule_irq();
      	  local_irq_enable();
      	  __schedule():
      	    local_irq_disable(); // irq off
      	    ...
      	    rcu_note_context_switch():
      	      rcu_note_preempt_context_switch():
      	        rcu_read_unlock_special():
      	          local_irq_save(flags);
      	          ...
      		  raw_spin_unlock_irqrestore(...,flags); // irq remains off
      	          rt_mutex_futex_unlock():
      	            raw_spin_lock_irq();
      	            ...
      	            raw_spin_unlock_irq(); // accidentally set irq on
      
      	    <return to __schedule()>
      	    rq_lock():
      	      raw_spin_lock(); // acquiring rq->lock with irq on
      
      which means rq->lock becomes a HARDIRQ-unsafe lock, which can cause
      deadlocks in scheduler code.
      
      This problem was introduced by commit 02a7c234 ("rcu: Suppress
      lockdep false-positive ->boost_mtx complaints"). That brought the user
      of rt_mutex_futex_unlock() with irq off.
      
      To fix this, replace the *lock_irq() in rt_mutex_futex_unlock() with
      *lock_irq{save,restore}() to make it safe to call rt_mutex_futex_unlock()
      with irq off.
      
      Fixes: 02a7c234 ("rcu: Suppress lockdep false-positive ->boost_mtx complaints")
      Signed-off-by: NBoqun Feng <boqun.feng@gmail.com>
      Signed-off-by: NThomas Gleixner <tglx@linutronix.de>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Lai Jiangshan <jiangshanlai@gmail.com>
      Cc: Steven Rostedt <rostedt@goodmis.org>
      Cc: Josh Triplett <josh@joshtriplett.org>
      Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
      Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
      Link: https://lkml.kernel.org/r/20180309065630.8283-1-boqun.feng@gmail.com
      6b0ef92f
  14. 28 2月, 2018 1 次提交
  15. 13 2月, 2018 2 次提交
    • W
      locking/qspinlock: Ensure node->count is updated before initialising node · 11dc1322
      Will Deacon 提交于
      When queuing on the qspinlock, the count field for the current CPU's head
      node is incremented. This needn't be atomic because locking in e.g. IRQ
      context is balanced and so an IRQ will return with node->count as it
      found it.
      
      However, the compiler could in theory reorder the initialisation of
      node[idx] before the increment of the head node->count, causing an
      IRQ to overwrite the initialised node and potentially corrupt the lock
      state.
      
      Avoid the potential for this harmful compiler reordering by placing a
      barrier() between the increment of the head node->count and the subsequent
      node initialisation.
      Signed-off-by: NWill Deacon <will.deacon@arm.com>
      Acked-by: NPeter Zijlstra (Intel) <peterz@infradead.org>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Link: http://lkml.kernel.org/r/1518528177-19169-3-git-send-email-will.deacon@arm.comSigned-off-by: NIngo Molnar <mingo@kernel.org>
      11dc1322
    • W
      locking/qspinlock: Ensure node is initialised before updating prev->next · 95bcade3
      Will Deacon 提交于
      When a locker ends up queuing on the qspinlock locking slowpath, we
      initialise the relevant mcs node and publish it indirectly by updating
      the tail portion of the lock word using xchg_tail. If we find that there
      was a pre-existing locker in the queue, we subsequently update their
      ->next field to point at our node so that we are notified when it's our
      turn to take the lock.
      
      This can be roughly illustrated as follows:
      
        /* Initialise the fields in node and encode a pointer to node in tail */
        tail = initialise_node(node);
      
        /*
         * Exchange tail into the lockword using an atomic read-modify-write
         * operation with release semantics
         */
        old = xchg_tail(lock, tail);
      
        /* If there was a pre-existing waiter ... */
        if (old & _Q_TAIL_MASK) {
      	prev = decode_tail(old);
      	smp_read_barrier_depends();
      
      	/* ... then update their ->next field to point to node.
      	WRITE_ONCE(prev->next, node);
        }
      
      The conditional update of prev->next therefore relies on the address
      dependency from the result of xchg_tail ensuring order against the
      prior initialisation of node. However, since the release semantics of
      the xchg_tail operation apply only to the write portion of the RmW,
      then this ordering is not guaranteed and it is possible for the CPU
      to return old before the writes to node have been published, consequently
      allowing us to point prev->next to an uninitialised node.
      
      This patch fixes the problem by making the update of prev->next a RELEASE
      operation, which also removes the reliance on dependency ordering.
      Signed-off-by: NWill Deacon <will.deacon@arm.com>
      Acked-by: NPeter Zijlstra (Intel) <peterz@infradead.org>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Link: http://lkml.kernel.org/r/1518528177-19169-2-git-send-email-will.deacon@arm.comSigned-off-by: NIngo Molnar <mingo@kernel.org>
      95bcade3
  16. 24 1月, 2018 1 次提交
  17. 18 1月, 2018 2 次提交
  18. 15 1月, 2018 1 次提交
    • P
      futex: Avoid violating the 10th rule of futex · c1e2f0ea
      Peter Zijlstra 提交于
      Julia reported futex state corruption in the following scenario:
      
         waiter                                  waker                                            stealer (prio > waiter)
      
         futex(WAIT_REQUEUE_PI, uaddr, uaddr2,
               timeout=[N ms])
            futex_wait_requeue_pi()
               futex_wait_queue_me()
                  freezable_schedule()
                  <scheduled out>
                                                 futex(LOCK_PI, uaddr2)
                                                 futex(CMP_REQUEUE_PI, uaddr,
                                                       uaddr2, 1, 0)
                                                    /* requeues waiter to uaddr2 */
                                                 futex(UNLOCK_PI, uaddr2)
                                                       wake_futex_pi()
                                                          cmp_futex_value_locked(uaddr2, waiter)
                                                          wake_up_q()
                 <woken by waker>
                 <hrtimer_wakeup() fires,
                  clears sleeper->task>
                                                                                                 futex(LOCK_PI, uaddr2)
                                                                                                    __rt_mutex_start_proxy_lock()
                                                                                                       try_to_take_rt_mutex() /* steals lock */
                                                                                                          rt_mutex_set_owner(lock, stealer)
                                                                                                    <preempted>
               <scheduled in>
               rt_mutex_wait_proxy_lock()
                  __rt_mutex_slowlock()
                     try_to_take_rt_mutex() /* fails, lock held by stealer */
                     if (timeout && !timeout->task)
                        return -ETIMEDOUT;
                  fixup_owner()
                     /* lock wasn't acquired, so,
                        fixup_pi_state_owner skipped */
      
         return -ETIMEDOUT;
      
         /* At this point, we've returned -ETIMEDOUT to userspace, but the
          * futex word shows waiter to be the owner, and the pi_mutex has
          * stealer as the owner */
      
         futex_lock(LOCK_PI, uaddr2)
           -> bails with EDEADLK, futex word says we're owner.
      
      And suggested that what commit:
      
        73d786bd ("futex: Rework inconsistent rt_mutex/futex_q state")
      
      removes from fixup_owner() looks to be just what is needed. And indeed
      it is -- I completely missed that requeue_pi could also result in this
      case. So we need to restore that, except that subsequent patches, like
      commit:
      
        16ffa12d ("futex: Pull rt_mutex_futex_unlock() out from under hb->lock")
      
      changed all the locking rules. Even without that, the sequence:
      
      -               if (rt_mutex_futex_trylock(&q->pi_state->pi_mutex)) {
      -                       locked = 1;
      -                       goto out;
      -               }
      
      -               raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock);
      -               owner = rt_mutex_owner(&q->pi_state->pi_mutex);
      -               if (!owner)
      -                       owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
      -               raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock);
      -               ret = fixup_pi_state_owner(uaddr, q, owner);
      
      already suggests there were races; otherwise we'd never have to look
      at next_owner.
      
      So instead of doing 3 consecutive wait_lock sections with who knows
      what races, we do it all in a single section. Additionally, the usage
      of pi_state->owner in fixup_owner() was only safe because only the
      rt_mutex owner would modify it, which this additional case wrecks.
      
      Luckily the values can only change away and not to the value we're
      testing, this means we can do a speculative test and double check once
      we have the wait_lock.
      
      Fixes: 73d786bd ("futex: Rework inconsistent rt_mutex/futex_q state")
      Reported-by: NJulia Cartwright <julia@ni.com>
      Reported-by: NGratian Crisan <gratian.crisan@ni.com>
      Signed-off-by: NPeter Zijlstra (Intel) <peterz@infradead.org>
      Signed-off-by: NThomas Gleixner <tglx@linutronix.de>
      Tested-by: NJulia Cartwright <julia@ni.com>
      Tested-by: NGratian Crisan <gratian.crisan@ni.com>
      Cc: Darren Hart <dvhart@infradead.org>
      Cc: stable@vger.kernel.org
      Link: https://lkml.kernel.org/r/20171208124939.7livp7no2ov65rrc@hirez.programming.kicks-ass.net
      c1e2f0ea
  19. 12 12月, 2017 4 次提交
    • I
      locking/lockdep: Remove the cross-release locking checks · e966eaee
      Ingo Molnar 提交于
      This code (CONFIG_LOCKDEP_CROSSRELEASE=y and CONFIG_LOCKDEP_COMPLETIONS=y),
      while it found a number of old bugs initially, was also causing too many
      false positives that caused people to disable lockdep - which is arguably
      a worse overall outcome.
      
      If we disable cross-release by default but keep the code upstream then
      in practice the most likely outcome is that we'll allow the situation
      to degrade gradually, by allowing entropy to introduce more and more
      false positives, until it overwhelms maintenance capacity.
      
      Another bad side effect was that people were trying to work around
      the false positives by uglifying/complicating unrelated code. There's
      a marked difference between annotating locking operations and
      uglifying good code just due to bad lock debugging code ...
      
      This gradual decrease in quality happened to a number of debugging
      facilities in the kernel, and lockdep is pretty complex already,
      so we cannot risk this outcome.
      
      Either cross-release checking can be done right with no false positives,
      or it should not be included in the upstream kernel.
      
      ( Note that it might make sense to maintain it out of tree and go through
        the false positives every now and then and see whether new bugs were
        introduced. )
      
      Cc: Byungchul Park <byungchul.park@lge.com>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Cc: linux-kernel@vger.kernel.org
      Signed-off-by: NIngo Molnar <mingo@kernel.org>
      e966eaee
    • W
      locking/core: Remove break_lock field when CONFIG_GENERIC_LOCKBREAK=y · d89c7035
      Will Deacon 提交于
      When CONFIG_GENERIC_LOCKBEAK=y, locking structures grow an extra int ->break_lock
      field which is used to implement raw_spin_is_contended() by setting the field
      to 1 when waiting on a lock and clearing it to zero when holding a lock.
      However, there are a few problems with this approach:
      
        - There is a write-write race between a CPU successfully taking the lock
          (and subsequently writing break_lock = 0) and a waiter waiting on
          the lock (and subsequently writing break_lock = 1). This could result
          in a contended lock being reported as uncontended and vice-versa.
      
        - On machines with store buffers, nothing guarantees that the writes
          to break_lock are visible to other CPUs at any particular time.
      
        - READ_ONCE/WRITE_ONCE are not used, so the field is potentially
          susceptible to harmful compiler optimisations,
      
      Consequently, the usefulness of this field is unclear and we'd be better off
      removing it and allowing architectures to implement raw_spin_is_contended() by
      providing a definition of arch_spin_is_contended(), as they can when
      CONFIG_GENERIC_LOCKBREAK=n.
      Signed-off-by: NWill Deacon <will.deacon@arm.com>
      Acked-by: NPeter Zijlstra <peterz@infradead.org>
      Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
      Cc: Sebastian Ott <sebott@linux.vnet.ibm.com>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Link: http://lkml.kernel.org/r/1511894539-7988-3-git-send-email-will.deacon@arm.comSigned-off-by: NIngo Molnar <mingo@kernel.org>
      d89c7035
    • W
      locking/core: Fix deadlock during boot on systems with GENERIC_LOCKBREAK · f87f3a32
      Will Deacon 提交于
      Commit:
      
        a8a217c2 ("locking/core: Remove {read,spin,write}_can_lock()")
      
      removed the definition of raw_spin_can_lock(), causing the GENERIC_LOCKBREAK
      spin_lock() routines to poll the ->break_lock field when waiting on a lock.
      
      This has been reported to cause a deadlock during boot on s390, because
      the ->break_lock field is also set by the waiters, and can potentially
      remain set indefinitely if no other CPUs come in to take the lock after
      it has been released.
      
      This patch removes the explicit spinning on ->break_lock from the waiters,
      instead relying on the outer trylock() operation to determine when the
      lock is available.
      Reported-by: NSebastian Ott <sebott@linux.vnet.ibm.com>
      Tested-by: NSebastian Ott <sebott@linux.vnet.ibm.com>
      Signed-off-by: NWill Deacon <will.deacon@arm.com>
      Acked-by: NPeter Zijlstra <peterz@infradead.org>
      Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Fixes: a8a217c2 ("locking/core: Remove {read,spin,write}_can_lock()")
      Link: http://lkml.kernel.org/r/1511894539-7988-2-git-send-email-will.deacon@arm.comSigned-off-by: NIngo Molnar <mingo@kernel.org>
      f87f3a32
    • P
      torture: Eliminate torture_runnable and perf_runnable · a2f2577d
      Paul E. McKenney 提交于
      The purpose of torture_runnable is to allow rcutorture and locktorture
      to be started and stopped via sysfs when they are built into the kernel
      (as in not compiled as loadable modules).  However, the 0444 permissions
      for both instances of torture_runnable prevent this use case from ever
      being put into practice.  Given that there have been no complaints
      about this deficiency, it is reasonable to conclude that no one actually
      makes use of this sysfs capability.  The perf_runnable module parameter
      for rcuperf is in the same situation.
      
      This commit therefore removes both torture_runnable instances as well
      as perf_runnable.
      Reported-by: NThomas Gleixner <tglx@linutronix.de>
      Signed-off-by: NPaul E. McKenney <paulmck@linux.vnet.ibm.com>
      a2f2577d