1. 15 10月, 2010 1 次提交
    • A
      llseek: automatically add .llseek fop · 6038f373
      Arnd Bergmann 提交于
      All file_operations should get a .llseek operation so we can make
      nonseekable_open the default for future file operations without a
      .llseek pointer.
      
      The three cases that we can automatically detect are no_llseek, seq_lseek
      and default_llseek. For cases where we can we can automatically prove that
      the file offset is always ignored, we use noop_llseek, which maintains
      the current behavior of not returning an error from a seek.
      
      New drivers should normally not use noop_llseek but instead use no_llseek
      and call nonseekable_open at open time.  Existing drivers can be converted
      to do the same when the maintainer knows for certain that no user code
      relies on calling seek on the device file.
      
      The generated code is often incorrectly indented and right now contains
      comments that clarify for each added line why a specific variant was
      chosen. In the version that gets submitted upstream, the comments will
      be gone and I will manually fix the indentation, because there does not
      seem to be a way to do that using coccinelle.
      
      Some amount of new code is currently sitting in linux-next that should get
      the same modifications, which I will do at the end of the merge window.
      
      Many thanks to Julia Lawall for helping me learn to write a semantic
      patch that does all this.
      
      ===== begin semantic patch =====
      // This adds an llseek= method to all file operations,
      // as a preparation for making no_llseek the default.
      //
      // The rules are
      // - use no_llseek explicitly if we do nonseekable_open
      // - use seq_lseek for sequential files
      // - use default_llseek if we know we access f_pos
      // - use noop_llseek if we know we don't access f_pos,
      //   but we still want to allow users to call lseek
      //
      @ open1 exists @
      identifier nested_open;
      @@
      nested_open(...)
      {
      <+...
      nonseekable_open(...)
      ...+>
      }
      
      @ open exists@
      identifier open_f;
      identifier i, f;
      identifier open1.nested_open;
      @@
      int open_f(struct inode *i, struct file *f)
      {
      <+...
      (
      nonseekable_open(...)
      |
      nested_open(...)
      )
      ...+>
      }
      
      @ read disable optional_qualifier exists @
      identifier read_f;
      identifier f, p, s, off;
      type ssize_t, size_t, loff_t;
      expression E;
      identifier func;
      @@
      ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off)
      {
      <+...
      (
         *off = E
      |
         *off += E
      |
         func(..., off, ...)
      |
         E = *off
      )
      ...+>
      }
      
      @ read_no_fpos disable optional_qualifier exists @
      identifier read_f;
      identifier f, p, s, off;
      type ssize_t, size_t, loff_t;
      @@
      ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off)
      {
      ... when != off
      }
      
      @ write @
      identifier write_f;
      identifier f, p, s, off;
      type ssize_t, size_t, loff_t;
      expression E;
      identifier func;
      @@
      ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off)
      {
      <+...
      (
        *off = E
      |
        *off += E
      |
        func(..., off, ...)
      |
        E = *off
      )
      ...+>
      }
      
      @ write_no_fpos @
      identifier write_f;
      identifier f, p, s, off;
      type ssize_t, size_t, loff_t;
      @@
      ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off)
      {
      ... when != off
      }
      
      @ fops0 @
      identifier fops;
      @@
      struct file_operations fops = {
       ...
      };
      
      @ has_llseek depends on fops0 @
      identifier fops0.fops;
      identifier llseek_f;
      @@
      struct file_operations fops = {
      ...
       .llseek = llseek_f,
      ...
      };
      
      @ has_read depends on fops0 @
      identifier fops0.fops;
      identifier read_f;
      @@
      struct file_operations fops = {
      ...
       .read = read_f,
      ...
      };
      
      @ has_write depends on fops0 @
      identifier fops0.fops;
      identifier write_f;
      @@
      struct file_operations fops = {
      ...
       .write = write_f,
      ...
      };
      
      @ has_open depends on fops0 @
      identifier fops0.fops;
      identifier open_f;
      @@
      struct file_operations fops = {
      ...
       .open = open_f,
      ...
      };
      
      // use no_llseek if we call nonseekable_open
      ////////////////////////////////////////////
      @ nonseekable1 depends on !has_llseek && has_open @
      identifier fops0.fops;
      identifier nso ~= "nonseekable_open";
      @@
      struct file_operations fops = {
      ...  .open = nso, ...
      +.llseek = no_llseek, /* nonseekable */
      };
      
      @ nonseekable2 depends on !has_llseek @
      identifier fops0.fops;
      identifier open.open_f;
      @@
      struct file_operations fops = {
      ...  .open = open_f, ...
      +.llseek = no_llseek, /* open uses nonseekable */
      };
      
      // use seq_lseek for sequential files
      /////////////////////////////////////
      @ seq depends on !has_llseek @
      identifier fops0.fops;
      identifier sr ~= "seq_read";
      @@
      struct file_operations fops = {
      ...  .read = sr, ...
      +.llseek = seq_lseek, /* we have seq_read */
      };
      
      // use default_llseek if there is a readdir
      ///////////////////////////////////////////
      @ fops1 depends on !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
      identifier fops0.fops;
      identifier readdir_e;
      @@
      // any other fop is used that changes pos
      struct file_operations fops = {
      ... .readdir = readdir_e, ...
      +.llseek = default_llseek, /* readdir is present */
      };
      
      // use default_llseek if at least one of read/write touches f_pos
      /////////////////////////////////////////////////////////////////
      @ fops2 depends on !fops1 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
      identifier fops0.fops;
      identifier read.read_f;
      @@
      // read fops use offset
      struct file_operations fops = {
      ... .read = read_f, ...
      +.llseek = default_llseek, /* read accesses f_pos */
      };
      
      @ fops3 depends on !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
      identifier fops0.fops;
      identifier write.write_f;
      @@
      // write fops use offset
      struct file_operations fops = {
      ... .write = write_f, ...
      +	.llseek = default_llseek, /* write accesses f_pos */
      };
      
      // Use noop_llseek if neither read nor write accesses f_pos
      ///////////////////////////////////////////////////////////
      
      @ fops4 depends on !fops1 && !fops2 && !fops3 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
      identifier fops0.fops;
      identifier read_no_fpos.read_f;
      identifier write_no_fpos.write_f;
      @@
      // write fops use offset
      struct file_operations fops = {
      ...
       .write = write_f,
       .read = read_f,
      ...
      +.llseek = noop_llseek, /* read and write both use no f_pos */
      };
      
      @ depends on has_write && !has_read && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
      identifier fops0.fops;
      identifier write_no_fpos.write_f;
      @@
      struct file_operations fops = {
      ... .write = write_f, ...
      +.llseek = noop_llseek, /* write uses no f_pos */
      };
      
      @ depends on has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
      identifier fops0.fops;
      identifier read_no_fpos.read_f;
      @@
      struct file_operations fops = {
      ... .read = read_f, ...
      +.llseek = noop_llseek, /* read uses no f_pos */
      };
      
      @ depends on !has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
      identifier fops0.fops;
      @@
      struct file_operations fops = {
      ...
      +.llseek = noop_llseek, /* no read or write fn */
      };
      ===== End semantic patch =====
      Signed-off-by: NArnd Bergmann <arnd@arndb.de>
      Cc: Julia Lawall <julia@diku.dk>
      Cc: Christoph Hellwig <hch@infradead.org>
      6038f373
  2. 16 9月, 2010 11 次提交
  3. 13 9月, 2010 1 次提交
  4. 12 9月, 2010 6 次提交
  5. 11 9月, 2010 8 次提交
  6. 10 9月, 2010 13 次提交
    • D
      xfs: log IO completion workqueue is a high priority queue · 51749e47
      Dave Chinner 提交于
      The workqueue implementation in 2.6.36-rcX has changed, resulting
      in the workqueues no longer having dedicated threads for work
      processing. This has caused severe livelocks under heavy parallel
      create workloads because the log IO completions have been getting
      held up behind metadata IO completions.  Hence log commits would
      stall, memory allocation would stall because pages could not be
      cleaned, and lock contention on the AIL during inode IO completion
      processing was being seen to slow everything down even further.
      
      By making the log Io completion workqueue a high priority workqueue,
      they are queued ahead of all data/metadata IO completions and
      processed before the data/metadata completions. Hence the log never
      gets stalled, and operations needed to clean memory can continue as
      quickly as possible. This avoids the livelock conditions and allos
      the system to keep running under heavy load as per normal.
      Signed-off-by: NDave Chinner <dchinner@redhat.com>
      Reviewed-by: NChristoph Hellwig <hch@lst.de>
      Signed-off-by: NAlex Elder <aelder@sgi.com>
      51749e47
    • R
      execve: make responsive to SIGKILL with large arguments · 9aea5a65
      Roland McGrath 提交于
      An execve with a very large total of argument/environment strings
      can take a really long time in the execve system call.  It runs
      uninterruptibly to count and copy all the strings.  This change
      makes it abort the exec quickly if sent a SIGKILL.
      
      Note that this is the conservative change, to interrupt only for
      SIGKILL, by using fatal_signal_pending().  It would be perfectly
      correct semantics to let any signal interrupt the string-copying in
      execve, i.e. use signal_pending() instead of fatal_signal_pending().
      We'll save that change for later, since it could have user-visible
      consequences, such as having a timer set too quickly make it so that
      an execve can never complete, though it always happened to work before.
      Signed-off-by: NRoland McGrath <roland@redhat.com>
      Reviewed-by: NKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      9aea5a65
    • R
      execve: improve interactivity with large arguments · 7993bc1f
      Roland McGrath 提交于
      This adds a preemption point during the copying of the argument and
      environment strings for execve, in copy_strings().  There is already
      a preemption point in the count() loop, so this doesn't add any new
      points in the abstract sense.
      
      When the total argument+environment strings are very large, the time
      spent copying them can be much more than a normal user time slice.
      So this change improves the interactivity of the rest of the system
      when one process is doing an execve with very large arguments.
      Signed-off-by: NRoland McGrath <roland@redhat.com>
      Reviewed-by: NKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      7993bc1f
    • R
      setup_arg_pages: diagnose excessive argument size · 1b528181
      Roland McGrath 提交于
      The CONFIG_STACK_GROWSDOWN variant of setup_arg_pages() does not
      check the size of the argument/environment area on the stack.
      When it is unworkably large, shift_arg_pages() hits its BUG_ON.
      This is exploitable with a very large RLIMIT_STACK limit, to
      create a crash pretty easily.
      
      Check that the initial stack is not too large to make it possible
      to map in any executable.  We're not checking that the actual
      executable (or intepreter, for binfmt_elf) will fit.  So those
      mappings might clobber part of the initial stack mapping.  But
      that is just userland lossage that userland made happen, not a
      kernel problem.
      Signed-off-by: NRoland McGrath <roland@redhat.com>
      Reviewed-by: NKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      1b528181
    • L
      Merge branch 'kvm-updates/2.6.36' of git://git.kernel.org/pub/scm/virt/kvm/kvm · be6200aa
      Linus Torvalds 提交于
      * 'kvm-updates/2.6.36' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
        KVM: x86: Perform hardware_enable in CPU_STARTING callback
        KVM: i8259: fix migration
        KVM: fix i8259 oops when no vcpus are online
        KVM: x86 emulator: fix regression with cmpxchg8b on i386 hosts
      be6200aa
    • L
      Merge branch 'perf-fixes-for-linus' of... · f2955b49
      Linus Torvalds 提交于
      Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
      
      * 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
        tracing: t_start: reset FTRACE_ITER_HASH in case of seek/pread
        perf symbols: Fix multiple initialization of symbol system
        perf: Fix CPU hotplug
        perf, trace: Fix module leak
        tracing/kprobe: Fix handling of C-unlike argument names
        tracing/kprobes: Fix handling of argument names
        perf probe: Fix handling of arguments names
        perf probe: Fix return probe support
        tracing/kprobe: Fix a memory leak in error case
        tracing: Do not allow llseek to set_ftrace_filter
      f2955b49
    • D
      KEYS: Fix bug in keyctl_session_to_parent() if parent has no session keyring · 3d96406c
      David Howells 提交于
      Fix a bug in keyctl_session_to_parent() whereby it tries to check the ownership
      of the parent process's session keyring whether or not the parent has a session
      keyring [CVE-2010-2960].
      
      This results in the following oops:
      
        BUG: unable to handle kernel NULL pointer dereference at 00000000000000a0
        IP: [<ffffffff811ae4dd>] keyctl_session_to_parent+0x251/0x443
        ...
        Call Trace:
         [<ffffffff811ae2f3>] ? keyctl_session_to_parent+0x67/0x443
         [<ffffffff8109d286>] ? __do_fault+0x24b/0x3d0
         [<ffffffff811af98c>] sys_keyctl+0xb4/0xb8
         [<ffffffff81001eab>] system_call_fastpath+0x16/0x1b
      
      if the parent process has no session keyring.
      
      If the system is using pam_keyinit then it mostly protected against this as all
      processes derived from a login will have inherited the session keyring created
      by pam_keyinit during the log in procedure.
      
      To test this, pam_keyinit calls need to be commented out in /etc/pam.d/.
      Reported-by: NTavis Ormandy <taviso@cmpxchg8b.com>
      Signed-off-by: NDavid Howells <dhowells@redhat.com>
      Acked-by: NTavis Ormandy <taviso@cmpxchg8b.com>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      3d96406c
    • D
      KEYS: Fix RCU no-lock warning in keyctl_session_to_parent() · 9d1ac65a
      David Howells 提交于
      There's an protected access to the parent process's credentials in the middle
      of keyctl_session_to_parent().  This results in the following RCU warning:
      
        ===================================================
        [ INFO: suspicious rcu_dereference_check() usage. ]
        ---------------------------------------------------
        security/keys/keyctl.c:1291 invoked rcu_dereference_check() without protection!
      
        other info that might help us debug this:
      
        rcu_scheduler_active = 1, debug_locks = 0
        1 lock held by keyctl-session-/2137:
         #0:  (tasklist_lock){.+.+..}, at: [<ffffffff811ae2ec>] keyctl_session_to_parent+0x60/0x236
      
        stack backtrace:
        Pid: 2137, comm: keyctl-session- Not tainted 2.6.36-rc2-cachefs+ #1
        Call Trace:
         [<ffffffff8105606a>] lockdep_rcu_dereference+0xaa/0xb3
         [<ffffffff811ae379>] keyctl_session_to_parent+0xed/0x236
         [<ffffffff811af77e>] sys_keyctl+0xb4/0xb6
         [<ffffffff81001eab>] system_call_fastpath+0x16/0x1b
      
      The code should take the RCU read lock to make sure the parents credentials
      don't go away, even though it's holding a spinlock and has IRQ disabled.
      Signed-off-by: NDavid Howells <dhowells@redhat.com>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      9d1ac65a
    • L
      Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block · ff3cb3fe
      Linus Torvalds 提交于
      * 'for-linus' of git://git.kernel.dk/linux-2.6-block:
        block: Range check cpu in blk_cpu_to_group
        scatterlist: prevent invalid free when alloc fails
        writeback: Fix lost wake-up shutting down writeback thread
        writeback: do not lose wakeup events when forking bdi threads
        cciss: fix reporting of max queue depth since init
        block: switch s390 tape_block and mg_disk to elevator_change()
        block: add function call to switch the IO scheduler from a driver
        fs/bio-integrity.c: return -ENOMEM on kmalloc failure
        bio-integrity.c: remove dependency on __GFP_NOFAIL
        BLOCK: fix bio.bi_rw handling
        block: put dev->kobj in blk_register_queue fail path
        cciss: handle allocation failure
        cfq-iosched: Documentation help for new tunables
        cfq-iosched: blktrace print per slice sector stats
        cfq-iosched: Implement tunable group_idle
        cfq-iosched: Do group share accounting in IOPS when slice_idle=0
        cfq-iosched: Do not idle if slice_idle=0
        cciss: disable doorbell reset on reset_devices
        blkio: Fix return code for mkdir calls
      ff3cb3fe
    • L
      Merge branch 'at91-fixes-for-linus' of git://github.com/at91linux/linux-2.6-at91 · 6ccaa317
      Linus Torvalds 提交于
      * 'at91-fixes-for-linus' of git://github.com/at91linux/linux-2.6-at91:
        AT91: at91sam9261ek: remove C99 comments but keep information
        AT91: at91sam9261ek board: remove warnings related to use of SPI or SD/MMC
        AT91: dm9000 initialization update
        AT91: SAM9G45 - add a separate clock entry for every single TC block
        AT91: clock: peripheral clocks can have other parent than mck
        AT91: change dma resource index
      6ccaa317
    • L
      Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound-2.6 · 3657423c
      Linus Torvalds 提交于
      * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound-2.6:
        ALSA: rawmidi: fix the get next midi device ioctl
        ALSA: hda - Fix wrong HP pin detection in snd_hda_parse_pin_def_config()
        ALSA: seq/oss - Fix double-free at error path of snd_seq_oss_open()
        ALSA: msnd-classic: Fix invalid cfg parameter
        ALSA: hda - Enable PC-beep for EeePC with ALC269 codec
        ALSA: hda - Add errata initverb sequence for CS42xx codecs
        ALSA: usb - Release capture substream URBs properly
        ALSA: virtuoso: fix setting of Xonar DS line-in/mic-in controls
        ALSA: virtuoso: work around missing reset in the Xonar DS Windows driver
        ALSA: hda - Add quirk for Lenovo T400s
        ALSA: usb-audio: fix detection of vendor-specific device protocol settings
        ALSA: usb-audio: Assume first control interface is for audio
        ALSA: hda - Add a new hp-laptop model for Conexant 5066, tested on HP G60
      3657423c
    • J
      drm/i915: don't enable self-refresh on Ironlake · dd8849c8
      Jesse Barnes 提交于
      We don't know how to enable it safely, especially as outputs turn on and
      off.  When disabling LP1 we also need to make sure LP2 and 3 are already
      disabled.
      
      Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=29173
      Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=29082Reported-by: NChris Lord <chris@linux.intel.com>
      Signed-off-by: NJesse Barnes <jbarnes@virtuousgeek.org>
      Tested-by: NDaniel Vetter <daniel.vetter@ffwll.ch>
      Cc: stable@kernel.org
      Signed-off-by: NChris Wilson <chris@chris-wilson.co.uk>
      dd8849c8
    • D
      xfs: prevent reading uninitialized stack memory · a122eb2f
      Dan Rosenberg 提交于
      The XFS_IOC_FSGETXATTR ioctl allows unprivileged users to read 12
      bytes of uninitialized stack memory, because the fsxattr struct
      declared on the stack in xfs_ioc_fsgetxattr() does not alter (or zero)
      the 12-byte fsx_pad member before copying it back to the user.  This
      patch takes care of it.
      Signed-off-by: NDan Rosenberg <dan.j.rosenberg@gmail.com>
      Reviewed-by: NEric Sandeen <sandeen@redhat.com>
      Signed-off-by: NAlex Elder <aelder@sgi.com>
      a122eb2f