1. 25 7月, 2008 14 次提交
    • U
      flag parameters: eventfd · b087498e
      Ulrich Drepper 提交于
      This patch adds the new eventfd2 syscall.  It extends the old eventfd
      syscall by one parameter which is meant to hold a flag value.  In this
      patch the only flag support is EFD_CLOEXEC which causes the close-on-exec
      flag for the returned file descriptor to be set.
      
      A new name EFD_CLOEXEC is introduced which in this implementation must
      have the same value as O_CLOEXEC.
      
      The following test must be adjusted for architectures other than x86 and
      x86-64 and in case the syscall numbers changed.
      
      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      #include <fcntl.h>
      #include <stdio.h>
      #include <unistd.h>
      #include <sys/syscall.h>
      
      #ifndef __NR_eventfd2
      # ifdef __x86_64__
      #  define __NR_eventfd2 290
      # elif defined __i386__
      #  define __NR_eventfd2 328
      # else
      #  error "need __NR_eventfd2"
      # endif
      #endif
      
      #define EFD_CLOEXEC O_CLOEXEC
      
      int
      main (void)
      {
        int fd = syscall (__NR_eventfd2, 1, 0);
        if (fd == -1)
          {
            puts ("eventfd2(0) failed");
            return 1;
          }
        int coe = fcntl (fd, F_GETFD);
        if (coe == -1)
          {
            puts ("fcntl failed");
            return 1;
          }
        if (coe & FD_CLOEXEC)
          {
            puts ("eventfd2(0) sets close-on-exec flag");
            return 1;
          }
        close (fd);
      
        fd = syscall (__NR_eventfd2, 1, EFD_CLOEXEC);
        if (fd == -1)
          {
            puts ("eventfd2(EFD_CLOEXEC) failed");
            return 1;
          }
        coe = fcntl (fd, F_GETFD);
        if (coe == -1)
          {
            puts ("fcntl failed");
            return 1;
          }
        if ((coe & FD_CLOEXEC) == 0)
          {
            puts ("eventfd2(EFD_CLOEXEC) does not set close-on-exec flag");
            return 1;
          }
        close (fd);
      
        puts ("OK");
      
        return 0;
      }
      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      
      [akpm@linux-foundation.org: add sys_ni stub]
      Signed-off-by: NUlrich Drepper <drepper@redhat.com>
      Acked-by: NDavide Libenzi <davidel@xmailserver.org>
      Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
      Cc: <linux-arch@vger.kernel.org>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      b087498e
    • U
      flag parameters: signalfd · 9deb27ba
      Ulrich Drepper 提交于
      This patch adds the new signalfd4 syscall.  It extends the old signalfd
      syscall by one parameter which is meant to hold a flag value.  In this
      patch the only flag support is SFD_CLOEXEC which causes the close-on-exec
      flag for the returned file descriptor to be set.
      
      A new name SFD_CLOEXEC is introduced which in this implementation must
      have the same value as O_CLOEXEC.
      
      The following test must be adjusted for architectures other than x86 and
      x86-64 and in case the syscall numbers changed.
      
      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      #include <fcntl.h>
      #include <signal.h>
      #include <stdio.h>
      #include <unistd.h>
      #include <sys/syscall.h>
      
      #ifndef __NR_signalfd4
      # ifdef __x86_64__
      #  define __NR_signalfd4 289
      # elif defined __i386__
      #  define __NR_signalfd4 327
      # else
      #  error "need __NR_signalfd4"
      # endif
      #endif
      
      #define SFD_CLOEXEC O_CLOEXEC
      
      int
      main (void)
      {
        sigset_t ss;
        sigemptyset (&ss);
        sigaddset (&ss, SIGUSR1);
        int fd = syscall (__NR_signalfd4, -1, &ss, 8, 0);
        if (fd == -1)
          {
            puts ("signalfd4(0) failed");
            return 1;
          }
        int coe = fcntl (fd, F_GETFD);
        if (coe == -1)
          {
            puts ("fcntl failed");
            return 1;
          }
        if (coe & FD_CLOEXEC)
          {
            puts ("signalfd4(0) set close-on-exec flag");
            return 1;
          }
        close (fd);
      
        fd = syscall (__NR_signalfd4, -1, &ss, 8, SFD_CLOEXEC);
        if (fd == -1)
          {
            puts ("signalfd4(SFD_CLOEXEC) failed");
            return 1;
          }
        coe = fcntl (fd, F_GETFD);
        if (coe == -1)
          {
            puts ("fcntl failed");
            return 1;
          }
        if ((coe & FD_CLOEXEC) == 0)
          {
            puts ("signalfd4(SFD_CLOEXEC) does not set close-on-exec flag");
            return 1;
          }
        close (fd);
      
        puts ("OK");
      
        return 0;
      }
      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      
      [akpm@linux-foundation.org: add sys_ni stub]
      Signed-off-by: NUlrich Drepper <drepper@redhat.com>
      Acked-by: NDavide Libenzi <davidel@xmailserver.org>
      Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
      Cc: <linux-arch@vger.kernel.org>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      9deb27ba
    • U
      flag parameters: paccept · aaca0bdc
      Ulrich Drepper 提交于
      This patch is by far the most complex in the series.  It adds a new syscall
      paccept.  This syscall differs from accept in that it adds (at the userlevel)
      two additional parameters:
      
      - a signal mask
      - a flags value
      
      The flags parameter can be used to set flag like SOCK_CLOEXEC.  This is
      imlpemented here as well.  Some people argued that this is a property which
      should be inherited from the file desriptor for the server but this is against
      POSIX.  Additionally, we really want the signal mask parameter as well
      (similar to pselect, ppoll, etc).  So an interface change in inevitable.
      
      The flag value is the same as for socket and socketpair.  I think diverging
      here will only create confusion.  Similar to the filesystem interfaces where
      the use of the O_* constants differs, it is acceptable here.
      
      The signal mask is handled as for pselect etc.  The mask is temporarily
      installed for the thread and removed before the call returns.  I modeled the
      code after pselect.  If there is a problem it's likely also in pselect.
      
      For architectures which use socketcall I maintained this interface instead of
      adding a system call.  The symmetry shouldn't be broken.
      
      The following test must be adjusted for architectures other than x86 and
      x86-64 and in case the syscall numbers changed.
      
      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      #include <errno.h>
      #include <fcntl.h>
      #include <pthread.h>
      #include <signal.h>
      #include <stdio.h>
      #include <unistd.h>
      #include <netinet/in.h>
      #include <sys/socket.h>
      #include <sys/syscall.h>
      
      #ifndef __NR_paccept
      # ifdef __x86_64__
      #  define __NR_paccept 288
      # elif defined __i386__
      #  define SYS_PACCEPT 18
      #  define USE_SOCKETCALL 1
      # else
      #  error "need __NR_paccept"
      # endif
      #endif
      
      #ifdef USE_SOCKETCALL
      # define paccept(fd, addr, addrlen, mask, flags) \
        ({ long args[6] = { \
             (long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
           syscall (__NR_socketcall, SYS_PACCEPT, args); })
      #else
      # define paccept(fd, addr, addrlen, mask, flags) \
        syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
      #endif
      
      #define PORT 57392
      
      #define SOCK_CLOEXEC O_CLOEXEC
      
      static pthread_barrier_t b;
      
      static void *
      tf (void *arg)
      {
        pthread_barrier_wait (&b);
        int s = socket (AF_INET, SOCK_STREAM, 0);
        struct sockaddr_in sin;
        sin.sin_family = AF_INET;
        sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
        sin.sin_port = htons (PORT);
        connect (s, (const struct sockaddr *) &sin, sizeof (sin));
        close (s);
      
        pthread_barrier_wait (&b);
        s = socket (AF_INET, SOCK_STREAM, 0);
        sin.sin_port = htons (PORT);
        connect (s, (const struct sockaddr *) &sin, sizeof (sin));
        close (s);
        pthread_barrier_wait (&b);
      
        pthread_barrier_wait (&b);
        sleep (2);
        pthread_kill ((pthread_t) arg, SIGUSR1);
      
        return NULL;
      }
      
      static void
      handler (int s)
      {
      }
      
      int
      main (void)
      {
        pthread_barrier_init (&b, NULL, 2);
      
        struct sockaddr_in sin;
        pthread_t th;
        if (pthread_create (&th, NULL, tf, (void *) pthread_self ()) != 0)
          {
            puts ("pthread_create failed");
            return 1;
          }
      
        int s = socket (AF_INET, SOCK_STREAM, 0);
        int reuse = 1;
        setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
        sin.sin_family = AF_INET;
        sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
        sin.sin_port = htons (PORT);
        bind (s, (struct sockaddr *) &sin, sizeof (sin));
        listen (s, SOMAXCONN);
      
        pthread_barrier_wait (&b);
      
        int s2 = paccept (s, NULL, 0, NULL, 0);
        if (s2 < 0)
          {
            puts ("paccept(0) failed");
            return 1;
          }
      
        int coe = fcntl (s2, F_GETFD);
        if (coe & FD_CLOEXEC)
          {
            puts ("paccept(0) set close-on-exec-flag");
            return 1;
          }
        close (s2);
      
        pthread_barrier_wait (&b);
      
        s2 = paccept (s, NULL, 0, NULL, SOCK_CLOEXEC);
        if (s2 < 0)
          {
            puts ("paccept(SOCK_CLOEXEC) failed");
            return 1;
          }
      
        coe = fcntl (s2, F_GETFD);
        if ((coe & FD_CLOEXEC) == 0)
          {
            puts ("paccept(SOCK_CLOEXEC) does not set close-on-exec flag");
            return 1;
          }
        close (s2);
      
        pthread_barrier_wait (&b);
      
        struct sigaction sa;
        sa.sa_handler = handler;
        sa.sa_flags = 0;
        sigemptyset (&sa.sa_mask);
        sigaction (SIGUSR1, &sa, NULL);
      
        sigset_t ss;
        pthread_sigmask (SIG_SETMASK, NULL, &ss);
        sigaddset (&ss, SIGUSR1);
        pthread_sigmask (SIG_SETMASK, &ss, NULL);
      
        sigdelset (&ss, SIGUSR1);
        alarm (4);
        pthread_barrier_wait (&b);
      
        errno = 0 ;
        s2 = paccept (s, NULL, 0, &ss, 0);
        if (s2 != -1 || errno != EINTR)
          {
            puts ("paccept did not fail with EINTR");
            return 1;
          }
      
        close (s);
      
        puts ("OK");
      
        return 0;
      }
      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      
      [akpm@linux-foundation.org: make it compile]
      [akpm@linux-foundation.org: add sys_ni stub]
      Signed-off-by: NUlrich Drepper <drepper@redhat.com>
      Acked-by: NDavide Libenzi <davidel@xmailserver.org>
      Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
      Cc: <linux-arch@vger.kernel.org>
      Cc: "David S. Miller" <davem@davemloft.net>
      Cc: Roland McGrath <roland@redhat.com>
      Cc: Kyle McMartin <kyle@mcmartin.ca>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      aaca0bdc
    • U
      generic irqs: handle failure of irqchip->set_type in setup_irq · 82736f4d
      Uwe Kleine-König 提交于
      set_type returns an int indicating success or failure, but up to now
      setup_irq ignores that.
      
      In my case this resulted in a machine hang:
      
      gpio-keys requested IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, but
      arm/ns9xxx can only trigger on one direction so set_type didn't touch
      the configuration which happens do default to a level sensitiveness and
      returned -EINVAL.  setup_irq ignored that and unmasked the irq.  This
      resulted in an endless triggering of the gpio-key interrupt service
      routine which effectively killed the machine.
      
      With this patch applied setup_irq propagates the error to the caller.
      
      Note that before in the case
      
      	chip && !chip->set_type && !chip->name
      
      a NULL pointer was feed to printk.  This is fixed, too.
      Signed-off-by: NUwe Kleine-König <Uwe.Kleine-Koenig@digi.com>
      Cc: Ingo Molnar <mingo@elte.hu>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      82736f4d
    • D
      pm: fix try_to_freeze_tasks()'s use of do_div() · f0af566d
      David Howells 提交于
      Fix try_to_freeze_tasks()'s use of do_div() on an s64 by making
      elapsed_csecs64 a u64 instead and dividing that.
      
      Possibly this should be guarded lest the interval calculation turn up
      negative, but the possible negativity of the result of the division is
      cast away anyway.
      
      This was introduced by patch 438e2ce6.
      Signed-off-by: NDavid Howells <dhowells@redhat.com>
      Acked-by: N"Rafael J. Wysocki" <rjw@sisk.pl>
      Acked-by: NPavel Machek <pavel@ucw.cz>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      f0af566d
    • Z
      pm: schedule sysrq poweroff on boot cpu · 2f15fc4b
      Zhang Rui 提交于
      schedule sysrq poweroff on boot cpu.
      
      sysrq poweroff needs to disable nonboot cpus, and we need to run this on boot
      cpu to avoid any recursion.  http://bugzilla.kernel.org/show_bug.cgi?id=10897
      
      [kosaki.motohiro@jp.fujitsu.com: build fix]
      Signed-off-by: NZhang Rui <rui.zhang@intel.com>
      Tested-by: NRus <harbour@sfinx.od.ua>
      Signed-off-by: NRafael J. Wysocki <rjw@sisk.pl>
      Acked-by: NPavel Machek <pavel@ucw.cz>
      Signed-off-by: NKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      2f15fc4b
    • Z
      pm: introduce new interfaces schedule_work_on() and queue_work_on() · c1a220e7
      Zhang Rui 提交于
      This interface allows adding a job on a specific cpu.
      
      Although a work struct on a cpu will be scheduled to other cpu if the cpu
      dies, there is a recursion if a work task tries to offline the cpu it's
      running on.  we need to schedule the task to a specific cpu in this case.
      http://bugzilla.kernel.org/show_bug.cgi?id=10897
      
      [oleg@tv-sign.ru: cleanups]
      Signed-off-by: NZhang Rui <rui.zhang@intel.com>
      Tested-by: NRus <harbour@sfinx.od.ua>
      Signed-off-by: NRafael J. Wysocki <rjw@sisk.pl>
      Acked-by: NPavel Machek <pavel@ucw.cz>
      Signed-off-by: NOleg Nesterov <oleg@tv-sign.ru>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      c1a220e7
    • A
      pm: hibernation: simplify memory bitmap · 0d83304c
      Akinobu Mita 提交于
      This patch simplifies the memory bitmap manipulations.
      
      - remove the member size in struct bm_block
      
      It is not necessary for struct bm_block to have the number of bit chunks that
      can be calculated by using end_pfn and start_pfn.
      
      - use find_next_bit() for memory_bm_next_pfn
      
      No need to invent the bitmap library only for the memory bitmap.
      Signed-off-by: NAkinobu Mita <akinobu.mita@gmail.com>
      Signed-off-by: NRafael J. Wysocki <rjw@sisk.pl>
      Acked-by: NPavel Machek <pavel@ucw.cz>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      0d83304c
    • D
      pm: boot time suspend selftest · 77437fd4
      David Brownell 提交于
      Boot-time test for system suspend states (STR or standby).  The generic
      RTC framework triggers wakeup alarms, which are used to exit those states.
      
        - Measures some aspects of suspend time ... this uses "jiffies" until
          someone converts it to use a timebase that works properly even while
          timer IRQs are disabled.
      
        - Triggered by a command line parameter.  By default nothing even
          vaguely troublesome will happen, but "test_suspend=mem" will give
          you a brief STR test during system boot.  (Or you may need to use
          "test_suspend=standby" instead, if your hardware needs that.)
      
      This isn't without problems.  It fires early enough during boot that for
      example both PCMCIA and MMC stacks have misbehaved.  The workaround in
      those cases was to boot without such media cards inserted.
      
      [matthltc@us.ibm.com: fix compile failure in boot time suspend selftest]
      Signed-off-by: NDavid Brownell <dbrownell@users.sourceforge.net>
      Cc: Ingo Molnar <mingo@elte.hu>
      Cc: Pavel Machek <pavel@suse.cz>
      Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
      Signed-off-by: NMatt Helsley <matthltc@us.ibm.com>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      77437fd4
    • P
      swsusp: provide users with a hint about the no_console_suspend option · 0d63081d
      Pavel Machek 提交于
      Tell the user about the no_console_suspend option, so that we don't have to
      tell each bug reporter personally.
      
      [akpm@linux-foundation.org: clarify the text a little]
      Signed-off-by: NPavel Machek <pavel@suse.cz>
      Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      0d63081d
    • A
      security: filesystem capabilities refactor kernel code · ab763c71
      Andrew G. Morgan 提交于
      To date, we've tried hard to confine filesystem support for capabilities
      to the security modules.  This has left a lot of the code in
      kernel/capability.c in a state where it looks like it supports something
      that filesystem support for capabilities actually suppresses when the LSM
      security/commmoncap.c code runs.  What is left is a lot of code that uses
      sub-optimal locking in the main kernel
      
      With this change we refactor the main kernel code and make it explicit
      which locks are needed and that the only remaining kernel races in this
      area are associated with non-filesystem capability code.
      Signed-off-by: NAndrew G. Morgan <morgan@kernel.org>
      Acked-by: NSerge Hallyn <serue@us.ibm.com>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      ab763c71
    • A
      hugetlb: multiple hstates for multiple page sizes · e5ff2159
      Andi Kleen 提交于
      Add basic support for more than one hstate in hugetlbfs.  This is the key
      to supporting multiple hugetlbfs page sizes at once.
      
      - Rather than a single hstate, we now have an array, with an iterator
      - default_hstate continues to be the struct hstate which we use by default
      - Add functions for architectures to register new hstates
      
      [akpm@linux-foundation.org: coding-style fixes]
      Acked-by: NAdam Litke <agl@us.ibm.com>
      Acked-by: NNishanth Aravamudan <nacc@us.ibm.com>
      Signed-off-by: NAndi Kleen <ak@suse.de>
      Signed-off-by: NNick Piggin <npiggin@suse.de>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      e5ff2159
    • M
      hugetlb: reserve huge pages for reliable MAP_PRIVATE hugetlbfs mappings until fork() · a1e78772
      Mel Gorman 提交于
      This patch reserves huge pages at mmap() time for MAP_PRIVATE mappings in
      a similar manner to the reservations taken for MAP_SHARED mappings.  The
      reserve count is accounted both globally and on a per-VMA basis for
      private mappings.  This guarantees that a process that successfully calls
      mmap() will successfully fault all pages in the future unless fork() is
      called.
      
      The characteristics of private mappings of hugetlbfs files behaviour after
      this patch are;
      
      1. The process calling mmap() is guaranteed to succeed all future faults until
         it forks().
      2. On fork(), the parent may die due to SIGKILL on writes to the private
         mapping if enough pages are not available for the COW. For reasonably
         reliable behaviour in the face of a small huge page pool, children of
         hugepage-aware processes should not reference the mappings; such as
         might occur when fork()ing to exec().
      3. On fork(), the child VMAs inherit no reserves. Reads on pages already
         faulted by the parent will succeed. Successful writes will depend on enough
         huge pages being free in the pool.
      4. Quotas of the hugetlbfs mount are checked at reserve time for the mapper
         and at fault time otherwise.
      
      Before this patch, all reads or writes in the child potentially needs page
      allocations that can later lead to the death of the parent.  This applies
      to reads and writes of uninstantiated pages as well as COW.  After the
      patch it is only a write to an instantiated page that causes problems.
      Signed-off-by: NMel Gorman <mel@csn.ul.ie>
      Acked-by: NAdam Litke <agl@us.ibm.com>
      Cc: Andy Whitcroft <apw@shadowen.org>
      Cc: William Lee Irwin III <wli@holomorphy.com>
      Cc: Hugh Dickins <hugh@veritas.com>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      a1e78772
    • A
      mm/vmstat.c: proper externs · c748e134
      Adrian Bunk 提交于
      This patch adds proper extern declarations for five variables in
      include/linux/vmstat.h
      Signed-off-by: NAdrian Bunk <bunk@kernel.org>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      c748e134
  2. 24 7月, 2008 2 次提交
  3. 23 7月, 2008 2 次提交
  4. 22 7月, 2008 7 次提交
    • J
      remove CONFIG_KMOD from core kernel code · a1ef5adb
      Johannes Berg 提交于
      Always compile request_module when the kernel allows modules.
      Signed-off-by: NJohannes Berg <johannes@sipsolutions.net>
      Signed-off-by: NRusty Russell <rusty@rustcorp.com.au>
      a1ef5adb
    • R
      modules: Take a shortcut for checking if an address is in a module · 3a642e99
      Rusty Russell 提交于
      This patch keeps track of the boundaries of module allocation, in
      order to speed up module_text_address().
      
      Inspired by Arjan's version, which required arch-specific defines:
      
      	Various pieces of the kernel (lockdep, latencytop, etc) tend
      	to store backtraces, sometimes at a relatively high
      	frequency. In itself this isn't a big performance deal (after
      	all you're using diagnostics features), but there have been
      	some complaints from people who have over 100 modules loaded
      	that this is a tad too slow.
      
      	This is due to the new backtracer code which looks at every
      	slot on the stack to see if it's a kernel/module text address,
      	so that's 1024 slots.  1024 times 100 modules... that's a lot
      	of list walking.
      Signed-off-by: NRusty Russell <rusty@rustcorp.com.au>
      3a642e99
    • D
      module: turn longs into ints for module sizes · 2f0f2a33
      Denys Vlasenko 提交于
      This shrinks module.o and each *.ko file.
      
      And finally, structure members which hold length of module
      code (four such members there) and count of symbols
      are converted from longs to ints.
      
      We cannot possibly have a module where 32 bits won't
      be enough to hold such counts.
      
      For one, module loading checks module size for sanity
      before loading, so such insanely big module will fail
      that test first.
      Signed-off-by: NDenys Vlasenko <vda.linux@googlemail.com>
      Signed-off-by: NRusty Russell <rusty@rustcorp.com.au>
      2f0f2a33
    • D
      Shrink struct module: CONFIG_UNUSED_SYMBOLS ifdefs · f7f5b675
      Denys Vlasenko 提交于
      module.c and module.h conatains code for finding
      exported symbols which are declared with EXPORT_UNUSED_SYMBOL,
      and this code is compiled in even if CONFIG_UNUSED_SYMBOLS is not set
      and thus there can be no EXPORT_UNUSED_SYMBOLs in modules anyway
      (because EXPORT_UNUSED_SYMBOL(x) are compiled out to nothing then).
      
      This patch adds required #ifdefs.
      Signed-off-by: NDenys Vlasenko <vda.linux@googlemail.com>
      Signed-off-by: NRusty Russell <rusty@rustcorp.com.au>
      f7f5b675
    • R
      module: generic each_symbol iterator function · dafd0940
      Rusty Russell 提交于
      Introduce an each_symbol() iterator to avoid duplicating the knowledge
      about the 5 different sections containing symbols.  Currently only
      used by find_symbol(), but will be used by symbol_put_addr() too.
      
      (Includes NULL ptr deref fix by Jiri Kosina <jkosina@suse.cz>)
      Signed-off-by: NRusty Russell <rusty@rustcorp.com.au>
      Cc: Jiri Kosina <jkosina@suse.cz>
      dafd0940
    • R
      module: don't use stop_machine for waiting rmmod · da39ba5e
      Rusty Russell 提交于
      rmmod has a little-used "-w" option, meaning that instead of failing if the
      module is in use, it should block until the module becomes unused.
      
      In this case, we don't need to use stop_machine: Max Krasnyansky
      indicated that would be useful for SystemTap which loads/unloads new
      modules frequently.
      
      Cc: Max Krasnyansky <maxk@qualcomm.com>
      Signed-off-by: NRusty Russell <rusty@rustcorp.com.au>
      da39ba5e
    • A
      sysdev: Pass the attribute to the low level sysdev show/store function · 4a0b2b4d
      Andi Kleen 提交于
      This allow to dynamically generate attributes and share show/store
      functions between attributes. Right now most attributes are generated
      by special macros and lots of duplicated code. With the attribute
      passed it's instead possible to attach some data to the attribute
      and then use that in shared low level functions to do different things.
      
      I need this for the dynamically generated bank attributes in the x86
      machine check code, but it'll allow some further cleanups.
      
      I converted all users in tree to the new show/store prototype. It's a single
      huge patch to avoid unbisectable sections.
      
      Runtime tested: x86-32, x86-64
      Compiled only: ia64, powerpc
      Not compile tested/only grep converted: sh, arm, avr32
      Signed-off-by: NAndi Kleen <ak@linux.intel.com>
      Signed-off-by: NGreg Kroah-Hartman <gregkh@suse.de>
      4a0b2b4d
  5. 20 7月, 2008 2 次提交
    • I
      sched: hrtick_enabled() should use cpu_active() · ba42059f
      Ingo Molnar 提交于
      Peter pointed out that hrtick_enabled() should use cpu_active().
      Signed-off-by: NIngo Molnar <mingo@elte.hu>
      ba42059f
    • P
      sched, x86: clean up hrtick implementation · 31656519
      Peter Zijlstra 提交于
      random uvesafb failures were reported against Gentoo:
      
        http://bugs.gentoo.org/show_bug.cgi?id=222799
      
      and Mihai Moldovan bisected it back to:
      
      > 8f4d37ec is first bad commit
      > commit 8f4d37ec
      > Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
      > Date:   Fri Jan 25 21:08:29 2008 +0100
      >
      >    sched: high-res preemption tick
      
      Linus suspected it to be hrtick + vm86 interaction and observed:
      
      > Btw, Peter, Ingo: I think that commit is doing bad things. They aren't
      > _incorrect_ per se, but they are definitely bad.
      >
      > Why?
      >
      > Using random _TIF_WORK_MASK flags is really impolite for doing
      > "scheduling" work. There's a reason that arch/x86/kernel/entry_32.S
      > special-cases the _TIF_NEED_RESCHED flag: we don't want to exit out of
      > vm86 mode unnecessarily.
      >
      > See the "work_notifysig_v86" label, and how it does that
      > "save_v86_state()" thing etc etc.
      
      Right, I never liked having to fiddle with those TIF flags. Initially I
      needed it because the hrtimer base lock could not nest in the rq lock.
      That however is fixed these days.
      
      Currently the only reason left to fiddle with the TIF flags is remote
      wakeups. We cannot program a remote cpu's hrtimer. I've been thinking
      about using the new and improved IPI function call stuff to implement
      hrtimer_start_on().
      
      However that does require that smp_call_function_single(.wait=0) works
      from interrupt context - /me looks at the latest series from Jens - Yes
      that does seem to be supported, good.
      
      Here's a stab at cleaning this stuff up ...
      
      Mihai reported test success as well.
      Signed-off-by: NPeter Zijlstra <a.p.zijlstra@chello.nl>
      Tested-by: NMihai Moldovan <ionic@ionic.de>
      Cc: Michal Januszewski <spock@gentoo.org>
      Cc: Antonino Daplas <adaplas@gmail.com>
      Signed-off-by: NIngo Molnar <mingo@elte.hu>
      31656519
  6. 19 7月, 2008 3 次提交
    • M
      cpumask: Optimize cpumask_of_cpu in kernel/time/tick-common.c · c18a41fb
      Mike Travis 提交于
        * Optimize various places where a pointer to the cpumask_of_cpu value
          will result in reducing stack pressure.
      Signed-off-by: NMike Travis <travis@sgi.com>
      Signed-off-by: NIngo Molnar <mingo@elte.hu>
      c18a41fb
    • M
      cpumask: Replace cpumask_of_cpu with cpumask_of_cpu_ptr · 65c01184
      Mike Travis 提交于
        * This patch replaces the dangerous lvalue version of cpumask_of_cpu
          with new cpumask_of_cpu_ptr macros.  These are patterned after the
          node_to_cpumask_ptr macros.
      
          In general terms, if there is a cpumask_of_cpu_map[] then a pointer to
          the cpumask_of_cpu_map[cpu] entry is used.  The cpumask_of_cpu_map
          is provided when there is a large NR_CPUS count, reducing
          greatly the amount of code generated and stack space used for
          cpumask_of_cpu().  The pointer to the cpumask_t value is needed for
          calling set_cpus_allowed_ptr() to reduce the amount of stack space
          needed to pass the cpumask_t value.
      
          If there isn't a cpumask_of_cpu_map[], then a temporary variable is
          declared and filled in with value from cpumask_of_cpu(cpu) as well as
          a pointer variable pointing to this temporary variable.  Afterwards,
          the pointer is used to reference the cpumask value.  The compiler
          will optimize out the extra dereference through the pointer as well
          as the stack space used for the pointer, resulting in identical code.
      
          A good example of the orthogonal usages is in net/sunrpc/svc.c:
      
      	case SVC_POOL_PERCPU:
      	{
      		unsigned int cpu = m->pool_to[pidx];
      		cpumask_of_cpu_ptr(cpumask, cpu);
      
      		*oldmask = current->cpus_allowed;
      		set_cpus_allowed_ptr(current, cpumask);
      		return 1;
      	}
      	case SVC_POOL_PERNODE:
      	{
      		unsigned int node = m->pool_to[pidx];
      		node_to_cpumask_ptr(nodecpumask, node);
      
      		*oldmask = current->cpus_allowed;
      		set_cpus_allowed_ptr(current, nodecpumask);
      		return 1;
      	}
      Signed-off-by: NMike Travis <travis@sgi.com>
      Signed-off-by: NIngo Molnar <mingo@elte.hu>
      65c01184
    • H
      softlockup: fix invalid proc_handler for softlockup_panic · 4dca10a9
      Hiroshi Shimamoto 提交于
      The type of softlockup_panic is int, but the proc_handler is
      proc_doulongvec_minmax(). This handler is for unsigned long.
      
      This handler should be proc_dointvec_minmax().
      Signed-off-by: NHiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
      Signed-off-by: NIngo Molnar <mingo@elte.hu>
      4dca10a9
  7. 18 7月, 2008 7 次提交
  8. 17 7月, 2008 3 次提交