1. 26 7月, 2018 11 次提交
  2. 13 6月, 2018 1 次提交
    • K
      treewide: kmalloc() -> kmalloc_array() · 6da2ec56
      Kees Cook 提交于
      The kmalloc() function has a 2-factor argument form, kmalloc_array(). This
      patch replaces cases of:
      
              kmalloc(a * b, gfp)
      
      with:
              kmalloc_array(a * b, gfp)
      
      as well as handling cases of:
      
              kmalloc(a * b * c, gfp)
      
      with:
      
              kmalloc(array3_size(a, b, c), gfp)
      
      as it's slightly less ugly than:
      
              kmalloc_array(array_size(a, b), c, gfp)
      
      This does, however, attempt to ignore constant size factors like:
      
              kmalloc(4 * 1024, gfp)
      
      though any constants defined via macros get caught up in the conversion.
      
      Any factors with a sizeof() of "unsigned char", "char", and "u8" were
      dropped, since they're redundant.
      
      The tools/ directory was manually excluded, since it has its own
      implementation of kmalloc().
      
      The Coccinelle script used for this was:
      
      // Fix redundant parens around sizeof().
      @@
      type TYPE;
      expression THING, E;
      @@
      
      (
        kmalloc(
      -	(sizeof(TYPE)) * E
      +	sizeof(TYPE) * E
        , ...)
      |
        kmalloc(
      -	(sizeof(THING)) * E
      +	sizeof(THING) * E
        , ...)
      )
      
      // Drop single-byte sizes and redundant parens.
      @@
      expression COUNT;
      typedef u8;
      typedef __u8;
      @@
      
      (
        kmalloc(
      -	sizeof(u8) * (COUNT)
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(__u8) * (COUNT)
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(char) * (COUNT)
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(unsigned char) * (COUNT)
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(u8) * COUNT
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(__u8) * COUNT
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(char) * COUNT
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(unsigned char) * COUNT
      +	COUNT
        , ...)
      )
      
      // 2-factor product with sizeof(type/expression) and identifier or constant.
      @@
      type TYPE;
      expression THING;
      identifier COUNT_ID;
      constant COUNT_CONST;
      @@
      
      (
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * (COUNT_ID)
      +	COUNT_ID, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * COUNT_ID
      +	COUNT_ID, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * (COUNT_CONST)
      +	COUNT_CONST, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * COUNT_CONST
      +	COUNT_CONST, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * (COUNT_ID)
      +	COUNT_ID, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * COUNT_ID
      +	COUNT_ID, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * (COUNT_CONST)
      +	COUNT_CONST, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * COUNT_CONST
      +	COUNT_CONST, sizeof(THING)
        , ...)
      )
      
      // 2-factor product, only identifiers.
      @@
      identifier SIZE, COUNT;
      @@
      
      - kmalloc
      + kmalloc_array
        (
      -	SIZE * COUNT
      +	COUNT, SIZE
        , ...)
      
      // 3-factor product with 1 sizeof(type) or sizeof(expression), with
      // redundant parens removed.
      @@
      expression THING;
      identifier STRIDE, COUNT;
      type TYPE;
      @@
      
      (
        kmalloc(
      -	sizeof(TYPE) * (COUNT) * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE) * (COUNT) * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE) * COUNT * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE) * COUNT * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kmalloc(
      -	sizeof(THING) * (COUNT) * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kmalloc(
      -	sizeof(THING) * (COUNT) * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kmalloc(
      -	sizeof(THING) * COUNT * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kmalloc(
      -	sizeof(THING) * COUNT * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      )
      
      // 3-factor product with 2 sizeof(variable), with redundant parens removed.
      @@
      expression THING1, THING2;
      identifier COUNT;
      type TYPE1, TYPE2;
      @@
      
      (
        kmalloc(
      -	sizeof(TYPE1) * sizeof(TYPE2) * COUNT
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
        , ...)
      |
        kmalloc(
      -	sizeof(THING1) * sizeof(THING2) * COUNT
      +	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
        , ...)
      |
        kmalloc(
      -	sizeof(THING1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * COUNT
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
        , ...)
      )
      
      // 3-factor product, only identifiers, with redundant parens removed.
      @@
      identifier STRIDE, SIZE, COUNT;
      @@
      
      (
        kmalloc(
      -	(COUNT) * STRIDE * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	COUNT * (STRIDE) * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	COUNT * STRIDE * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	(COUNT) * (STRIDE) * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	COUNT * (STRIDE) * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	(COUNT) * STRIDE * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	(COUNT) * (STRIDE) * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	COUNT * STRIDE * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      )
      
      // Any remaining multi-factor products, first at least 3-factor products,
      // when they're not all constants...
      @@
      expression E1, E2, E3;
      constant C1, C2, C3;
      @@
      
      (
        kmalloc(C1 * C2 * C3, ...)
      |
        kmalloc(
      -	(E1) * E2 * E3
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kmalloc(
      -	(E1) * (E2) * E3
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kmalloc(
      -	(E1) * (E2) * (E3)
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kmalloc(
      -	E1 * E2 * E3
      +	array3_size(E1, E2, E3)
        , ...)
      )
      
      // And then all remaining 2 factors products when they're not all constants,
      // keeping sizeof() as the second factor argument.
      @@
      expression THING, E1, E2;
      type TYPE;
      constant C1, C2, C3;
      @@
      
      (
        kmalloc(sizeof(THING) * C2, ...)
      |
        kmalloc(sizeof(TYPE) * C2, ...)
      |
        kmalloc(C1 * C2 * C3, ...)
      |
        kmalloc(C1 * C2, ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * (E2)
      +	E2, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * E2
      +	E2, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * (E2)
      +	E2, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * E2
      +	E2, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	(E1) * E2
      +	E1, E2
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	(E1) * (E2)
      +	E1, E2
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	E1 * E2
      +	E1, E2
        , ...)
      )
      Signed-off-by: NKees Cook <keescook@chromium.org>
      6da2ec56
  3. 06 6月, 2018 1 次提交
    • D
      vfs: change inode times to use struct timespec64 · 95582b00
      Deepa Dinamani 提交于
      struct timespec is not y2038 safe. Transition vfs to use
      y2038 safe struct timespec64 instead.
      
      The change was made with the help of the following cocinelle
      script. This catches about 80% of the changes.
      All the header file and logic changes are included in the
      first 5 rules. The rest are trivial substitutions.
      I avoid changing any of the function signatures or any other
      filesystem specific data structures to keep the patch simple
      for review.
      
      The script can be a little shorter by combining different cases.
      But, this version was sufficient for my usecase.
      
      virtual patch
      
      @ depends on patch @
      identifier now;
      @@
      - struct timespec
      + struct timespec64
        current_time ( ... )
        {
      - struct timespec now = current_kernel_time();
      + struct timespec64 now = current_kernel_time64();
        ...
      - return timespec_trunc(
      + return timespec64_trunc(
        ... );
        }
      
      @ depends on patch @
      identifier xtime;
      @@
       struct \( iattr \| inode \| kstat \) {
       ...
      -       struct timespec xtime;
      +       struct timespec64 xtime;
       ...
       }
      
      @ depends on patch @
      identifier t;
      @@
       struct inode_operations {
       ...
      int (*update_time) (...,
      -       struct timespec t,
      +       struct timespec64 t,
      ...);
       ...
       }
      
      @ depends on patch @
      identifier t;
      identifier fn_update_time =~ "update_time$";
      @@
       fn_update_time (...,
      - struct timespec *t,
      + struct timespec64 *t,
       ...) { ... }
      
      @ depends on patch @
      identifier t;
      @@
      lease_get_mtime( ... ,
      - struct timespec *t
      + struct timespec64 *t
        ) { ... }
      
      @te depends on patch forall@
      identifier ts;
      local idexpression struct inode *inode_node;
      identifier i_xtime =~ "^i_[acm]time$";
      identifier ia_xtime =~ "^ia_[acm]time$";
      identifier fn_update_time =~ "update_time$";
      identifier fn;
      expression e, E3;
      local idexpression struct inode *node1;
      local idexpression struct inode *node2;
      local idexpression struct iattr *attr1;
      local idexpression struct iattr *attr2;
      local idexpression struct iattr attr;
      identifier i_xtime1 =~ "^i_[acm]time$";
      identifier i_xtime2 =~ "^i_[acm]time$";
      identifier ia_xtime1 =~ "^ia_[acm]time$";
      identifier ia_xtime2 =~ "^ia_[acm]time$";
      @@
      (
      (
      - struct timespec ts;
      + struct timespec64 ts;
      |
      - struct timespec ts = current_time(inode_node);
      + struct timespec64 ts = current_time(inode_node);
      )
      
      <+... when != ts
      (
      - timespec_equal(&inode_node->i_xtime, &ts)
      + timespec64_equal(&inode_node->i_xtime, &ts)
      |
      - timespec_equal(&ts, &inode_node->i_xtime)
      + timespec64_equal(&ts, &inode_node->i_xtime)
      |
      - timespec_compare(&inode_node->i_xtime, &ts)
      + timespec64_compare(&inode_node->i_xtime, &ts)
      |
      - timespec_compare(&ts, &inode_node->i_xtime)
      + timespec64_compare(&ts, &inode_node->i_xtime)
      |
      ts = current_time(e)
      |
      fn_update_time(..., &ts,...)
      |
      inode_node->i_xtime = ts
      |
      node1->i_xtime = ts
      |
      ts = inode_node->i_xtime
      |
      <+... attr1->ia_xtime ...+> = ts
      |
      ts = attr1->ia_xtime
      |
      ts.tv_sec
      |
      ts.tv_nsec
      |
      btrfs_set_stack_timespec_sec(..., ts.tv_sec)
      |
      btrfs_set_stack_timespec_nsec(..., ts.tv_nsec)
      |
      - ts = timespec64_to_timespec(
      + ts =
      ...
      -)
      |
      - ts = ktime_to_timespec(
      + ts = ktime_to_timespec64(
      ...)
      |
      - ts = E3
      + ts = timespec_to_timespec64(E3)
      |
      - ktime_get_real_ts(&ts)
      + ktime_get_real_ts64(&ts)
      |
      fn(...,
      - ts
      + timespec64_to_timespec(ts)
      ,...)
      )
      ...+>
      (
      <... when != ts
      - return ts;
      + return timespec64_to_timespec(ts);
      ...>
      )
      |
      - timespec_equal(&node1->i_xtime1, &node2->i_xtime2)
      + timespec64_equal(&node1->i_xtime2, &node2->i_xtime2)
      |
      - timespec_equal(&node1->i_xtime1, &attr2->ia_xtime2)
      + timespec64_equal(&node1->i_xtime2, &attr2->ia_xtime2)
      |
      - timespec_compare(&node1->i_xtime1, &node2->i_xtime2)
      + timespec64_compare(&node1->i_xtime1, &node2->i_xtime2)
      |
      node1->i_xtime1 =
      - timespec_trunc(attr1->ia_xtime1,
      + timespec64_trunc(attr1->ia_xtime1,
      ...)
      |
      - attr1->ia_xtime1 = timespec_trunc(attr2->ia_xtime2,
      + attr1->ia_xtime1 =  timespec64_trunc(attr2->ia_xtime2,
      ...)
      |
      - ktime_get_real_ts(&attr1->ia_xtime1)
      + ktime_get_real_ts64(&attr1->ia_xtime1)
      |
      - ktime_get_real_ts(&attr.ia_xtime1)
      + ktime_get_real_ts64(&attr.ia_xtime1)
      )
      
      @ depends on patch @
      struct inode *node;
      struct iattr *attr;
      identifier fn;
      identifier i_xtime =~ "^i_[acm]time$";
      identifier ia_xtime =~ "^ia_[acm]time$";
      expression e;
      @@
      (
      - fn(node->i_xtime);
      + fn(timespec64_to_timespec(node->i_xtime));
      |
       fn(...,
      - node->i_xtime);
      + timespec64_to_timespec(node->i_xtime));
      |
      - e = fn(attr->ia_xtime);
      + e = fn(timespec64_to_timespec(attr->ia_xtime));
      )
      
      @ depends on patch forall @
      struct inode *node;
      struct iattr *attr;
      identifier i_xtime =~ "^i_[acm]time$";
      identifier ia_xtime =~ "^ia_[acm]time$";
      identifier fn;
      @@
      {
      + struct timespec ts;
      <+...
      (
      + ts = timespec64_to_timespec(node->i_xtime);
      fn (...,
      - &node->i_xtime,
      + &ts,
      ...);
      |
      + ts = timespec64_to_timespec(attr->ia_xtime);
      fn (...,
      - &attr->ia_xtime,
      + &ts,
      ...);
      )
      ...+>
      }
      
      @ depends on patch forall @
      struct inode *node;
      struct iattr *attr;
      struct kstat *stat;
      identifier ia_xtime =~ "^ia_[acm]time$";
      identifier i_xtime =~ "^i_[acm]time$";
      identifier xtime =~ "^[acm]time$";
      identifier fn, ret;
      @@
      {
      + struct timespec ts;
      <+...
      (
      + ts = timespec64_to_timespec(node->i_xtime);
      ret = fn (...,
      - &node->i_xtime,
      + &ts,
      ...);
      |
      + ts = timespec64_to_timespec(node->i_xtime);
      ret = fn (...,
      - &node->i_xtime);
      + &ts);
      |
      + ts = timespec64_to_timespec(attr->ia_xtime);
      ret = fn (...,
      - &attr->ia_xtime,
      + &ts,
      ...);
      |
      + ts = timespec64_to_timespec(attr->ia_xtime);
      ret = fn (...,
      - &attr->ia_xtime);
      + &ts);
      |
      + ts = timespec64_to_timespec(stat->xtime);
      ret = fn (...,
      - &stat->xtime);
      + &ts);
      )
      ...+>
      }
      
      @ depends on patch @
      struct inode *node;
      struct inode *node2;
      identifier i_xtime1 =~ "^i_[acm]time$";
      identifier i_xtime2 =~ "^i_[acm]time$";
      identifier i_xtime3 =~ "^i_[acm]time$";
      struct iattr *attrp;
      struct iattr *attrp2;
      struct iattr attr ;
      identifier ia_xtime1 =~ "^ia_[acm]time$";
      identifier ia_xtime2 =~ "^ia_[acm]time$";
      struct kstat *stat;
      struct kstat stat1;
      struct timespec64 ts;
      identifier xtime =~ "^[acmb]time$";
      expression e;
      @@
      (
      ( node->i_xtime2 \| attrp->ia_xtime2 \| attr.ia_xtime2 \) = node->i_xtime1  ;
      |
       node->i_xtime2 = \( node2->i_xtime1 \| timespec64_trunc(...) \);
      |
       node->i_xtime2 = node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \);
      |
       node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \);
      |
       stat->xtime = node2->i_xtime1;
      |
       stat1.xtime = node2->i_xtime1;
      |
      ( node->i_xtime2 \| attrp->ia_xtime2 \) = attrp->ia_xtime1  ;
      |
      ( attrp->ia_xtime1 \| attr.ia_xtime1 \) = attrp2->ia_xtime2;
      |
      - e = node->i_xtime1;
      + e = timespec64_to_timespec( node->i_xtime1 );
      |
      - e = attrp->ia_xtime1;
      + e = timespec64_to_timespec( attrp->ia_xtime1 );
      |
      node->i_xtime1 = current_time(...);
      |
       node->i_xtime2 = node->i_xtime1 = node->i_xtime3 =
      - e;
      + timespec_to_timespec64(e);
      |
       node->i_xtime1 = node->i_xtime3 =
      - e;
      + timespec_to_timespec64(e);
      |
      - node->i_xtime1 = e;
      + node->i_xtime1 = timespec_to_timespec64(e);
      )
      Signed-off-by: NDeepa Dinamani <deepa.kernel@gmail.com>
      Cc: <anton@tuxera.com>
      Cc: <balbi@kernel.org>
      Cc: <bfields@fieldses.org>
      Cc: <darrick.wong@oracle.com>
      Cc: <dhowells@redhat.com>
      Cc: <dsterba@suse.com>
      Cc: <dwmw2@infradead.org>
      Cc: <hch@lst.de>
      Cc: <hirofumi@mail.parknet.co.jp>
      Cc: <hubcap@omnibond.com>
      Cc: <jack@suse.com>
      Cc: <jaegeuk@kernel.org>
      Cc: <jaharkes@cs.cmu.edu>
      Cc: <jslaby@suse.com>
      Cc: <keescook@chromium.org>
      Cc: <mark@fasheh.com>
      Cc: <miklos@szeredi.hu>
      Cc: <nico@linaro.org>
      Cc: <reiserfs-devel@vger.kernel.org>
      Cc: <richard@nod.at>
      Cc: <sage@redhat.com>
      Cc: <sfrench@samba.org>
      Cc: <swhiteho@redhat.com>
      Cc: <tj@kernel.org>
      Cc: <trond.myklebust@primarydata.com>
      Cc: <tytso@mit.edu>
      Cc: <viro@zeniv.linux.org.uk>
      95582b00
  4. 31 5月, 2018 5 次提交
  5. 23 3月, 2018 1 次提交
    • M
      fuse: define the filesystem as untrusted · 0834136a
      Mimi Zohar 提交于
      Files on FUSE can change at any point in time without IMA being able
      to detect it.  The file data read for the file signature verification
      could be totally different from what is subsequently read, making the
      signature verification useless.
      
      FUSE can be mounted by unprivileged users either today with fusermount
      installed with setuid, or soon with the upcoming patches to allow FUSE
      mounts in a non-init user namespace.
      
      This patch sets the SB_I_IMA_UNVERIFIABLE_SIGNATURE flag and when
      appropriate sets the SB_I_UNTRUSTED_MOUNTER flag.
      Signed-off-by: NMimi Zohar <zohar@linux.vnet.ibm.com>
      Cc: Miklos Szeredi <miklos@szeredi.hu>
      Cc: Seth Forshee <seth.forshee@canonical.com>
      Cc: Dongsu Park <dongsu@kinvolk.io>
      Cc: Alban Crequy <alban@kinvolk.io>
      Acked-by: NSerge Hallyn <serge@hallyn.com>
      Acked-by: N"Eric W. Biederman" <ebiederm@xmission.com>
      0834136a
  6. 21 3月, 2018 8 次提交
    • M
      fuse: honor AT_STATX_FORCE_SYNC · bf5c1898
      Miklos Szeredi 提交于
      Force a refresh of attributes from the fuse server in this case.
      Signed-off-by: NMiklos Szeredi <mszeredi@redhat.com>
      bf5c1898
    • M
      fuse: honor AT_STATX_DONT_SYNC · ff1b89f3
      Miklos Szeredi 提交于
      The description of this flag says "Don't sync attributes with the server".
      In other words: always use the attributes cached in the kernel and don't
      send network or local messages to refresh the attributes.
      Signed-off-by: NMiklos Szeredi <mszeredi@redhat.com>
      ff1b89f3
    • S
      fuse: Restrict allow_other to the superblock's namespace or a descendant · 73f03c2b
      Seth Forshee 提交于
      Unprivileged users are normally restricted from mounting with the
      allow_other option by system policy, but this could be bypassed for a mount
      done with user namespace root permissions. In such cases allow_other should
      not allow users outside the userns to access the mount as doing so would
      give the unprivileged user the ability to manipulate processes it would
      otherwise be unable to manipulate. Restrict allow_other to apply to users
      in the same userns used at mount or a descendant of that namespace. Also
      export current_in_userns() for use by fuse when built as a module.
      Reviewed-by: NSerge Hallyn <serge@hallyn.com>
      Signed-off-by: NSeth Forshee <seth.forshee@canonical.com>
      Signed-off-by: NDongsu Park <dongsu@kinvolk.io>
      Signed-off-by: NEric W. Biederman <ebiederm@xmission.com>
      Signed-off-by: NMiklos Szeredi <mszeredi@redhat.com>
      73f03c2b
    • E
      fuse: Support fuse filesystems outside of init_user_ns · 8cb08329
      Eric W. Biederman 提交于
      In order to support mounts from namespaces other than init_user_ns, fuse
      must translate uids and gids to/from the userns of the process servicing
      requests on /dev/fuse. This patch does that, with a couple of restrictions
      on the namespace:
      
       - The userns for the fuse connection is fixed to the namespace
         from which /dev/fuse is opened.
      
       - The namespace must be the same as s_user_ns.
      
      These restrictions simplify the implementation by avoiding the need to pass
      around userns references and by allowing fuse to rely on the checks in
      setattr_prepare for ownership changes.  Either restriction could be relaxed
      in the future if needed.
      
      For cuse the userns used is the opener of /dev/cuse.  Semantically the cuse
      support does not appear safe for unprivileged users.  Practically the
      permissions on /dev/cuse only make it accessible to the global root user.
      If something slips through the cracks in a user namespace the only users
      who will be able to use the cuse device are those users mapped into the
      user namespace.
      
      Translation in the posix acl is updated to use the uuser namespace of the
      filesystem.  Avoiding cases which might bypass this translation is handled
      in a following change.
      
      This change is stronlgy based on a similar change from Seth Forshee and
      Dongsu Park.
      
      Cc: Seth Forshee <seth.forshee@canonical.com>
      Cc: Dongsu Park <dongsu@kinvolk.io>
      Signed-off-by: NEric W. Biederman <ebiederm@xmission.com>
      Signed-off-by: NMiklos Szeredi <mszeredi@redhat.com>
      8cb08329
    • E
      fuse: Fail all requests with invalid uids or gids · c9582eb0
      Eric W. Biederman 提交于
      Upon a cursory examinination the uid and gid of a fuse request are
      necessary for correct operation.  Failing a fuse request where those
      values are not reliable seems a straight forward and reliable means of
      ensuring that fuse requests with bad data are not sent or processed.
      
      In most cases the vfs will avoid actions it suspects will cause
      an inode write back of an inode with an invalid uid or gid.  But that does
      not map precisely to what fuse is doing, so test for this and solve
      this at the fuse level as well.
      
      Performing this work in fuse_req_init_context is cheap as the code is
      already performing the translation here and only needs to check the
      result of the translation to see if things are not representable in
      a form the fuse server can handle.
      
      [SzM] Don't zero the context for the nofail case, just keep using the
      munging version (makes sense for debugging and doesn't hurt).
      Signed-off-by: NEric W. Biederman <ebiederm@xmission.com>
      Signed-off-by: NMiklos Szeredi <mszeredi@redhat.com>
      c9582eb0
    • E
      fuse: Remove the buggy retranslation of pids in fuse_dev_do_read · dbf107b2
      Eric W. Biederman 提交于
      At the point of fuse_dev_do_read the user space process that initiated the
      action on the fuse filesystem may no longer exist.  The process have been
      killed or may have fired an asynchronous request and exited.
      
      If the initial process has exited, the code "pid_vnr(find_pid_ns(in->h.pid,
      fc->pid_ns)" will either return a pid of 0, or in the unlikely event that
      the pid has been reallocated it can return practically any pid.  Any pid is
      possible as the pid allocator allocates pid numbers in different pid
      namespaces independently.
      
      The only way to make translation in fuse_dev_do_read reliable is to call
      get_pid in fuse_req_init_context, and pid_vnr followed by put_pid in
      fuse_dev_do_read.  That reference counting in other contexts has been shown
      to bounce cache lines between processors and in general be slow.  So that
      is not desirable.
      
      The only known user of running the fuse server in a different pid namespace
      from the filesystem does not care what the pids are in the fuse messages so
      removing this code should not matter.
      
      Getting the translation to a server running outside of the pid namespace of
      a container can still be achieved by playing setns games at mount time.  It
      is also possible to add an option to pass a pid namespace into the fuse
      filesystem at mount time.
      
      Fixes: 5d6d3a30 ("fuse: allow server to run in different pid_ns")
      Signed-off-by: N"Eric W. Biederman" <ebiederm@xmission.com>
      Signed-off-by: NMiklos Szeredi <mszeredi@redhat.com>
      dbf107b2
    • S
      fuse: return -ECONNABORTED on /dev/fuse read after abort · 3b7008b2
      Szymon Lukasz 提交于
      Currently the userspace has no way of knowing whether the fuse
      connection ended because of umount or abort via sysfs. It makes it hard
      for filesystems to free the mountpoint after abort without worrying
      about removing some new mount.
      
      The patch fixes it by returning different errors when userspace reads
      from /dev/fuse (-ENODEV for umount and -ECONNABORTED for abort).
      
      Add a new capability flag FUSE_ABORT_ERROR. If set and the connection is
      gone because of sysfs abort, reading from the device will return
      -ECONNABORTED.
      Signed-off-by: NSzymon Lukasz <noh4hss@gmail.com>
      Signed-off-by: NMiklos Szeredi <mszeredi@redhat.com>
      3b7008b2
    • M
      fuse: atomic_o_trunc should truncate pagecache · df0e91d4
      Miklos Szeredi 提交于
      Fuse has an "atomic_o_trunc" mode, where userspace filesystem uses the
      O_TRUNC flag in the OPEN request to truncate the file atomically with the
      open.
      
      In this mode there's no need to send a SETATTR request to userspace after
      the open, so fuse_do_setattr() checks this mode and returns.  But this
      misses the important step of truncating the pagecache.
      
      Add the missing parts of truncation to the ATTR_OPEN branch.
      Reported-by: NChad Austin <chadaustin@fb.com>
      Fixes: 6ff958ed ("fuse: add atomic open+truncate support")
      Signed-off-by: NMiklos Szeredi <mszeredi@redhat.com>
      Cc: <stable@vger.kernel.org>
      df0e91d4
  7. 12 2月, 2018 1 次提交
    • L
      vfs: do bulk POLL* -> EPOLL* replacement · a9a08845
      Linus Torvalds 提交于
      This is the mindless scripted replacement of kernel use of POLL*
      variables as described by Al, done by this script:
      
          for V in IN OUT PRI ERR RDNORM RDBAND WRNORM WRBAND HUP RDHUP NVAL MSG; do
              L=`git grep -l -w POLL$V | grep -v '^t' | grep -v /um/ | grep -v '^sa' | grep -v '/poll.h$'|grep -v '^D'`
              for f in $L; do sed -i "-es/^\([^\"]*\)\(\<POLL$V\>\)/\\1E\\2/" $f; done
          done
      
      with de-mangling cleanups yet to come.
      
      NOTE! On almost all architectures, the EPOLL* constants have the same
      values as the POLL* constants do.  But they keyword here is "almost".
      For various bad reasons they aren't the same, and epoll() doesn't
      actually work quite correctly in some cases due to this on Sparc et al.
      
      The next patch from Al will sort out the final differences, and we
      should be all done.
      Scripted-by: NAl Viro <viro@zeniv.linux.org.uk>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      a9a08845
  8. 30 11月, 2017 1 次提交
  9. 28 11月, 2017 2 次提交
    • A
      fs: annotate ->poll() instances · 076ccb76
      Al Viro 提交于
      Signed-off-by: NAl Viro <viro@zeniv.linux.org.uk>
      076ccb76
    • L
      Rename superblock flags (MS_xyz -> SB_xyz) · 1751e8a6
      Linus Torvalds 提交于
      This is a pure automated search-and-replace of the internal kernel
      superblock flags.
      
      The s_flags are now called SB_*, with the names and the values for the
      moment mirroring the MS_* flags that they're equivalent to.
      
      Note how the MS_xyz flags are the ones passed to the mount system call,
      while the SB_xyz flags are what we then use in sb->s_flags.
      
      The script to do this was:
      
          # places to look in; re security/*: it generally should *not* be
          # touched (that stuff parses mount(2) arguments directly), but
          # there are two places where we really deal with superblock flags.
          FILES="drivers/mtd drivers/staging/lustre fs ipc mm \
                  include/linux/fs.h include/uapi/linux/bfs_fs.h \
                  security/apparmor/apparmorfs.c security/apparmor/include/lib.h"
          # the list of MS_... constants
          SYMS="RDONLY NOSUID NODEV NOEXEC SYNCHRONOUS REMOUNT MANDLOCK \
                DIRSYNC NOATIME NODIRATIME BIND MOVE REC VERBOSE SILENT \
                POSIXACL UNBINDABLE PRIVATE SLAVE SHARED RELATIME KERNMOUNT \
                I_VERSION STRICTATIME LAZYTIME SUBMOUNT NOREMOTELOCK NOSEC BORN \
                ACTIVE NOUSER"
      
          SED_PROG=
          for i in $SYMS; do SED_PROG="$SED_PROG -e s/MS_$i/SB_$i/g"; done
      
          # we want files that contain at least one of MS_...,
          # with fs/namespace.c and fs/pnode.c excluded.
          L=$(for i in $SYMS; do git grep -w -l MS_$i $FILES; done| sort|uniq|grep -v '^fs/namespace.c'|grep -v '^fs/pnode.c')
      
          for f in $L; do sed -i $f $SED_PROG; done
      Requested-by: NAl Viro <viro@zeniv.linux.org.uk>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      1751e8a6
  10. 16 11月, 2017 2 次提交
  11. 31 10月, 2017 1 次提交
    • K
      treewide: Fix function prototypes for module_param_call() · e4dca7b7
      Kees Cook 提交于
      Several function prototypes for the set/get functions defined by
      module_param_call() have a slightly wrong argument types. This fixes
      those in an effort to clean up the calls when running under type-enforced
      compiler instrumentation for CFI. This is the result of running the
      following semantic patch:
      
      @match_module_param_call_function@
      declarer name module_param_call;
      identifier _name, _set_func, _get_func;
      expression _arg, _mode;
      @@
      
       module_param_call(_name, _set_func, _get_func, _arg, _mode);
      
      @fix_set_prototype
       depends on match_module_param_call_function@
      identifier match_module_param_call_function._set_func;
      identifier _val, _param;
      type _val_type, _param_type;
      @@
      
       int _set_func(
      -_val_type _val
      +const char * _val
       ,
      -_param_type _param
      +const struct kernel_param * _param
       ) { ... }
      
      @fix_get_prototype
       depends on match_module_param_call_function@
      identifier match_module_param_call_function._get_func;
      identifier _val, _param;
      type _val_type, _param_type;
      @@
      
       int _get_func(
      -_val_type _val
      +char * _val
       ,
      -_param_type _param
      +const struct kernel_param * _param
       ) { ... }
      
      Two additional by-hand changes are included for places where the above
      Coccinelle script didn't notice them:
      
      	drivers/platform/x86/thinkpad_acpi.c
      	fs/lockd/svc.c
      Signed-off-by: NKees Cook <keescook@chromium.org>
      Signed-off-by: NJessica Yu <jeyu@kernel.org>
      e4dca7b7
  12. 25 10月, 2017 2 次提交
    • M
      fuse: fix READDIRPLUS skipping an entry · c6cdd514
      Miklos Szeredi 提交于
      Marios Titas running a Haskell program noticed a problem with fuse's
      readdirplus: when it is interrupted by a signal, it skips one directory
      entry.
      
      The reason is that fuse erronously updates ctx->pos after a failed
      dir_emit().
      
      The issue originates from the patch adding readdirplus support.
      Reported-by: NJakob Unterwurzacher <jakobunt@gmail.com>
      Tested-by: Marios Titas <redneb@gmx.com> 
      Signed-off-by: NMiklos Szeredi <mszeredi@redhat.com>
      Fixes: 0b05b183 ("fuse: implement NFS-like readdirplus support")
      Cc: <stable@vger.kernel.org> # v3.9
      c6cdd514
    • M
      locking/atomics: COCCINELLE/treewide: Convert trivial ACCESS_ONCE() patterns... · 6aa7de05
      Mark Rutland 提交于
      locking/atomics: COCCINELLE/treewide: Convert trivial ACCESS_ONCE() patterns to READ_ONCE()/WRITE_ONCE()
      
      Please do not apply this to mainline directly, instead please re-run the
      coccinelle script shown below and apply its output.
      
      For several reasons, it is desirable to use {READ,WRITE}_ONCE() in
      preference to ACCESS_ONCE(), and new code is expected to use one of the
      former. So far, there's been no reason to change most existing uses of
      ACCESS_ONCE(), as these aren't harmful, and changing them results in
      churn.
      
      However, for some features, the read/write distinction is critical to
      correct operation. To distinguish these cases, separate read/write
      accessors must be used. This patch migrates (most) remaining
      ACCESS_ONCE() instances to {READ,WRITE}_ONCE(), using the following
      coccinelle script:
      
      ----
      // Convert trivial ACCESS_ONCE() uses to equivalent READ_ONCE() and
      // WRITE_ONCE()
      
      // $ make coccicheck COCCI=/home/mark/once.cocci SPFLAGS="--include-headers" MODE=patch
      
      virtual patch
      
      @ depends on patch @
      expression E1, E2;
      @@
      
      - ACCESS_ONCE(E1) = E2
      + WRITE_ONCE(E1, E2)
      
      @ depends on patch @
      expression E;
      @@
      
      - ACCESS_ONCE(E)
      + READ_ONCE(E)
      ----
      Signed-off-by: NMark Rutland <mark.rutland@arm.com>
      Signed-off-by: NPaul E. McKenney <paulmck@linux.vnet.ibm.com>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Cc: davem@davemloft.net
      Cc: linux-arch@vger.kernel.org
      Cc: mpe@ellerman.id.au
      Cc: shuah@kernel.org
      Cc: snitzer@redhat.com
      Cc: thor.thayer@linux.intel.com
      Cc: tj@kernel.org
      Cc: viro@zeniv.linux.org.uk
      Cc: will.deacon@arm.com
      Link: http://lkml.kernel.org/r/1508792849-3115-19-git-send-email-paulmck@linux.vnet.ibm.comSigned-off-by: NIngo Molnar <mingo@kernel.org>
      6aa7de05
  13. 19 10月, 2017 1 次提交
  14. 12 9月, 2017 3 次提交
    • M
      fuse: getattr cleanup · 5b97eeac
      Miklos Szeredi 提交于
      The refreshed argument isn't used by any caller, get rid of it.
      
      Use a helper for just updating the inode (no need to fill in a kstat).
      Signed-off-by: NMiklos Szeredi <mszeredi@redhat.com>
      5b97eeac
    • M
      fuse: honor iocb sync flags on write · e1c0eecb
      Miklos Szeredi 提交于
      If the IOCB_DSYNC flag is set a sync is not being performed by
      fuse_file_write_iter.
      
      Honor IOCB_DSYNC/IOCB_SYNC by setting O_DYSNC/O_SYNC respectively in the
      flags filed of the write request.
      
      We don't need to sync data or metadata, since fuse_perform_write() does
      write-through and the filesystem is responsible for updating file times.
      
      Original patch by Vitaly Zolotusky.
      Reported-by: NNate Clark <nate@neworld.us>
      Cc: Vitaly Zolotusky <vitaly@unitc.com>.
      Signed-off-by: NMiklos Szeredi <mszeredi@redhat.com>
      e1c0eecb
    • M
      fuse: allow server to run in different pid_ns · 5d6d3a30
      Miklos Szeredi 提交于
      Commit 0b6e9ea0 ("fuse: Add support for pid namespaces") broke
      Sandstorm.io development tools, which have been sending FUSE file
      descriptors across PID namespace boundaries since early 2014.
      
      The above patch added a check that prevented I/O on the fuse device file
      descriptor if the pid namespace of the reader/writer was different from the
      pid namespace of the mounter.  With this change passing the device file
      descriptor to a different pid namespace simply doesn't work.  The check was
      added because pids are transferred to/from the fuse userspace server in the
      namespace registered at mount time.
      
      To fix this regression, remove the checks and do the following:
      
      1) the pid in the request header (the pid of the task that initiated the
      filesystem operation) is translated to the reader's pid namespace.  If a
      mapping doesn't exist for this pid, then a zero pid is used.  Note: even if
      a mapping would exist between the initiator task's pid namespace and the
      reader's pid namespace the pid will be zero if either mapping from
      initator's to mounter's namespace or mapping from mounter's to reader's
      namespace doesn't exist.
      
      2) The lk.pid value in setlk/setlkw requests and getlk reply is left alone.
      Userspace should not interpret this value anyway.  Also allow the
      setlk/setlkw operations if the pid of the task cannot be represented in the
      mounter's namespace (pid being zero in that case).
      Reported-by: NKenton Varda <kenton@sandstorm.io>
      Signed-off-by: NMiklos Szeredi <mszeredi@redhat.com>
      Fixes: 0b6e9ea0 ("fuse: Add support for pid namespaces")
      Cc: <stable@vger.kernel.org> # v4.12+
      Cc: Eric W. Biederman <ebiederm@xmission.com>
      Cc: Seth Forshee <seth.forshee@canonical.com>
      5d6d3a30