1. 08 12月, 2006 3 次提交
    • N
      [PATCH] Add include/linux/freezer.h and move definitions from sched.h · 7dfb7103
      Nigel Cunningham 提交于
      Move process freezing functions from include/linux/sched.h to freezer.h, so
      that modifications to the freezer or the kernel configuration don't require
      recompiling just about everything.
      
      [akpm@osdl.org: fix ueagle driver]
      Signed-off-by: NNigel Cunningham <nigel@suspend2.net>
      Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
      Cc: Pavel Machek <pavel@ucw.cz>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
      7dfb7103
    • A
      [PATCH] Save some bytes in struct mm_struct · 36de6437
      Arnaldo Carvalho de Melo 提交于
      Before:
      [acme@newtoy net-2.6.20]$ pahole --cacheline 32 kernel/sched.o mm_struct
      
      /* include2/asm/processor.h:542 */
      struct mm_struct {
              struct vm_area_struct *    mmap;                 /*     0     4 */
              struct rb_root             mm_rb;                /*     4     4 */
              struct vm_area_struct *    mmap_cache;           /*     8     4 */
              long unsigned int          (*get_unmapped_area)(); /*    12     4 */
              void                       (*unmap_area)();      /*    16     4 */
              long unsigned int          mmap_base;            /*    20     4 */
              long unsigned int          task_size;            /*    24     4 */
              long unsigned int          cached_hole_size;     /*    28     4 */
              /* ---------- cacheline 1 boundary ---------- */
              long unsigned int          free_area_cache;      /*    32     4 */
              pgd_t *                    pgd;                  /*    36     4 */
              atomic_t                   mm_users;             /*    40     4 */
              atomic_t                   mm_count;             /*    44     4 */
              int                        map_count;            /*    48     4 */
              struct rw_semaphore        mmap_sem;             /*    52    64 */
              spinlock_t                 page_table_lock;      /*   116    40 */
              struct list_head           mmlist;               /*   156     8 */
              mm_counter_t               _file_rss;            /*   164     4 */
              mm_counter_t               _anon_rss;            /*   168     4 */
              long unsigned int          hiwater_rss;          /*   172     4 */
              long unsigned int          hiwater_vm;           /*   176     4 */
              long unsigned int          total_vm;             /*   180     4 */
              long unsigned int          locked_vm;            /*   184     4 */
              long unsigned int          shared_vm;            /*   188     4 */
              /* ---------- cacheline 6 boundary ---------- */
              long unsigned int          exec_vm;              /*   192     4 */
              long unsigned int          stack_vm;             /*   196     4 */
              long unsigned int          reserved_vm;          /*   200     4 */
              long unsigned int          def_flags;            /*   204     4 */
              long unsigned int          nr_ptes;              /*   208     4 */
              long unsigned int          start_code;           /*   212     4 */
              long unsigned int          end_code;             /*   216     4 */
              long unsigned int          start_data;           /*   220     4 */
              /* ---------- cacheline 7 boundary ---------- */
              long unsigned int          end_data;             /*   224     4 */
              long unsigned int          start_brk;            /*   228     4 */
              long unsigned int          brk;                  /*   232     4 */
              long unsigned int          start_stack;          /*   236     4 */
              long unsigned int          arg_start;            /*   240     4 */
              long unsigned int          arg_end;              /*   244     4 */
              long unsigned int          env_start;            /*   248     4 */
              long unsigned int          env_end;              /*   252     4 */
              /* ---------- cacheline 8 boundary ---------- */
              long unsigned int          saved_auxv[44];       /*   256   176 */
              unsigned int               dumpable:2;           /*   432     4 */
              cpumask_t                  cpu_vm_mask;          /*   436     4 */
              mm_context_t               context;              /*   440    68 */
              long unsigned int          swap_token_time;      /*   508     4 */
              /* ---------- cacheline 16 boundary ---------- */
              char                       recent_pagein;        /*   512     1 */
      
              /* XXX 3 bytes hole, try to pack */
      
              int                        core_waiters;         /*   516     4 */
              struct completion *        core_startup_done;    /*   520     4 */
              struct completion          core_done;            /*   524    52 */
              rwlock_t                   ioctx_list_lock;      /*   576    36 */
              struct kioctx *            ioctx_list;           /*   612     4 */
      }; /* size: 616, sum members: 613, holes: 1, sum holes: 3, cachelines: 20,
            last cacheline: 8 bytes */
      
      After:
      
      [acme@newtoy net-2.6.20]$ pahole --cacheline 32 kernel/sched.o mm_struct
      /* include2/asm/processor.h:542 */
      struct mm_struct {
              struct vm_area_struct *    mmap;                 /*     0     4 */
              struct rb_root             mm_rb;                /*     4     4 */
              struct vm_area_struct *    mmap_cache;           /*     8     4 */
              long unsigned int          (*get_unmapped_area)(); /*    12     4 */
              void                       (*unmap_area)();      /*    16     4 */
              long unsigned int          mmap_base;            /*    20     4 */
              long unsigned int          task_size;            /*    24     4 */
              long unsigned int          cached_hole_size;     /*    28     4 */
              /* ---------- cacheline 1 boundary ---------- */
              long unsigned int          free_area_cache;      /*    32     4 */
              pgd_t *                    pgd;                  /*    36     4 */
              atomic_t                   mm_users;             /*    40     4 */
              atomic_t                   mm_count;             /*    44     4 */
              int                        map_count;            /*    48     4 */
              struct rw_semaphore        mmap_sem;             /*    52    64 */
              spinlock_t                 page_table_lock;      /*   116    40 */
              struct list_head           mmlist;               /*   156     8 */
              mm_counter_t               _file_rss;            /*   164     4 */
              mm_counter_t               _anon_rss;            /*   168     4 */
              long unsigned int          hiwater_rss;          /*   172     4 */
              long unsigned int          hiwater_vm;           /*   176     4 */
              long unsigned int          total_vm;             /*   180     4 */
              long unsigned int          locked_vm;            /*   184     4 */
              long unsigned int          shared_vm;            /*   188     4 */
              /* ---------- cacheline 6 boundary ---------- */
              long unsigned int          exec_vm;              /*   192     4 */
              long unsigned int          stack_vm;             /*   196     4 */
              long unsigned int          reserved_vm;          /*   200     4 */
              long unsigned int          def_flags;            /*   204     4 */
              long unsigned int          nr_ptes;              /*   208     4 */
              long unsigned int          start_code;           /*   212     4 */
              long unsigned int          end_code;             /*   216     4 */
              long unsigned int          start_data;           /*   220     4 */
              /* ---------- cacheline 7 boundary ---------- */
              long unsigned int          end_data;             /*   224     4 */
              long unsigned int          start_brk;            /*   228     4 */
              long unsigned int          brk;                  /*   232     4 */
              long unsigned int          start_stack;          /*   236     4 */
              long unsigned int          arg_start;            /*   240     4 */
              long unsigned int          arg_end;              /*   244     4 */
              long unsigned int          env_start;            /*   248     4 */
              long unsigned int          env_end;              /*   252     4 */
              /* ---------- cacheline 8 boundary ---------- */
              long unsigned int          saved_auxv[44];       /*   256   176 */
              cpumask_t                  cpu_vm_mask;          /*   432     4 */
              mm_context_t               context;              /*   436    68 */
              long unsigned int          swap_token_time;      /*   504     4 */
              char                       recent_pagein;        /*   508     1 */
              unsigned char              dumpable:2;           /*   509     1 */
      
              /* XXX 2 bytes hole, try to pack */
      
              int                        core_waiters;         /*   512     4 */
              struct completion *        core_startup_done;    /*   516     4 */
              struct completion          core_done;            /*   520    52 */
              rwlock_t                   ioctx_list_lock;      /*   572    36 */
              struct kioctx *            ioctx_list;           /*   608     4 */
      }; /* size: 612, sum members: 610, holes: 1, sum holes: 2, cachelines: 20,
            last cacheline: 4 bytes */
      
      [acme@newtoy net-2.6.20]$ codiff -V /tmp/sched.o.before kernel/sched.o
      /pub/scm/linux/kernel/git/acme/net-2.6.20/kernel/sched.c:
        struct mm_struct |   -4
          dumpable:2;
           from: unsigned int          /*   432(30)    4(2) */
           to:   unsigned char         /*   509(6)     1(2) */
      < SNIP other offset changes >
       1 struct changed
      [acme@newtoy net-2.6.20]$
      
      I'm not aware of any problem about using 2 byte wide bitfields where
      previously a 4 byte wide one was, holler if there is any, I wouldn't be
      surprised, bitfields are things from hell.
      
      For the curious, 432(30) means: at offset 432 from the struct start, at
      offset 30 in the bitfield (yeah, it comes backwards, hellish, huh?) ditto
      for 509(6), while 4(2) and 1(2) means "struct field size(bitfield size)".
      
      Now we have a 2 bytes hole and are using only 4 bytes of the last 32
      bytes cacheline, any takers? :-)
      Signed-off-by: NArnaldo Carvalho de Melo <acme@mandriva.com>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
      36de6437
    • A
      [PATCH] new scheme to preempt swap token · 7602bdf2
      Ashwin Chaugule 提交于
      The new swap token patches replace the current token traversal algo.  The old
      algo had a crude timeout parameter that was used to handover the token from
      one task to another.  This algo, transfers the token to the tasks that are in
      need of the token.  The urgency for the token is based on the number of times
      a task is required to swap-in pages.  Accordingly, the priority of a task is
      incremented if it has been badly affected due to swap-outs.  To ensure that
      the token doesnt bounce around rapidly, the token holders are given a priority
      boost.  The priority of tasks is also decremented, if their rate of swap-in's
      keeps reducing.  This way, the condition to check whether to pre-empt the swap
      token, is a matter of comparing two task's priority fields.
      
      [akpm@osdl.org: cleanups]
      Signed-off-by: NAshwin Chaugule <ashwin.chaugule@celunite.com>
      Cc: Rik van Riel <riel@redhat.com>
      Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
      7602bdf2
  2. 29 10月, 2006 1 次提交
  3. 06 10月, 2006 1 次提交
  4. 03 10月, 2006 2 次提交
  5. 02 10月, 2006 11 次提交
    • C
      [PATCH] replace cad_pid by a struct pid · 9ec52099
      Cedric Le Goater 提交于
      There are a few places in the kernel where the init task is signaled.  The
      ctrl+alt+del sequence is one them.  It kills a task, usually init, using a
      cached pid (cad_pid).
      
      This patch replaces the pid_t by a struct pid to avoid pid wrap around
      problem.  The struct pid is initialized at boot time in init() and can be
      modified through systctl with
      
      	/proc/sys/kernel/cad_pid
      
      [ I haven't found any distro using it ? ]
      
      It also introduces a small helper routine kill_cad_pid() which is used
      where it seemed ok to use cad_pid instead of pid 1.
      
      [akpm@osdl.org: cleanups, build fix]
      Signed-off-by: NCedric Le Goater <clg@fr.ibm.com>
      Cc: Eric W. Biederman <ebiederm@xmission.com>
      Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
      Cc: Paul Mackerras <paulus@samba.org>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
      9ec52099
    • K
      [PATCH] IPC namespace core · 25b21cb2
      Kirill Korotaev 提交于
      This patch set allows to unshare IPCs and have a private set of IPC objects
      (sem, shm, msg) inside namespace.  Basically, it is another building block of
      containers functionality.
      
      This patch implements core IPC namespace changes:
      - ipc_namespace structure
      - new config option CONFIG_IPC_NS
      - adds CLONE_NEWIPC flag
      - unshare support
      
      [clg@fr.ibm.com: small fix for unshare of ipc namespace]
      [akpm@osdl.org: build fix]
      Signed-off-by: NPavel Emelianov <xemul@openvz.org>
      Signed-off-by: NKirill Korotaev <dev@openvz.org>
      Signed-off-by: NCedric Le Goater <clg@fr.ibm.com>
      Cc: "Eric W. Biederman" <ebiederm@xmission.com>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
      25b21cb2
    • S
      [PATCH] namespaces: utsname: implement CLONE_NEWUTS flag · 071df104
      Serge E. Hallyn 提交于
      Implement a CLONE_NEWUTS flag, and use it at clone and sys_unshare.
      
      [clg@fr.ibm.com: IPC unshare fix]
      [bunk@stusta.de: cleanup]
      Signed-off-by: NSerge Hallyn <serue@us.ibm.com>
      Cc: Kirill Korotaev <dev@openvz.org>
      Cc: "Eric W. Biederman" <ebiederm@xmission.com>
      Cc: Herbert Poetzl <herbert@13thfloor.at>
      Cc: Andrey Savochkin <saw@sw.ru>
      Signed-off-by: NAdrian Bunk <bunk@stusta.de>
      Signed-off-by: NCedric Le Goater <clg@fr.ibm.com>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
      071df104
    • S
      [PATCH] namespaces: utsname: implement utsname namespaces · 4865ecf1
      Serge E. Hallyn 提交于
      This patch defines the uts namespace and some manipulators.
      Adds the uts namespace to task_struct, and initializes a
      system-wide init namespace.
      
      It leaves a #define for system_utsname so sysctl will compile.
      This define will be removed in a separate patch.
      
      [akpm@osdl.org: build fix, cleanup]
      Signed-off-by: NSerge Hallyn <serue@us.ibm.com>
      Cc: Kirill Korotaev <dev@openvz.org>
      Cc: "Eric W. Biederman" <ebiederm@xmission.com>
      Cc: Herbert Poetzl <herbert@13thfloor.at>
      Cc: Andrey Savochkin <saw@sw.ru>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
      4865ecf1
    • S
      [PATCH] namespaces: incorporate fs namespace into nsproxy · 1651e14e
      Serge E. Hallyn 提交于
      This moves the mount namespace into the nsproxy.  The mount namespace count
      now refers to the number of nsproxies point to it, rather than the number of
      tasks.  As a result, the unshare_namespace() function in kernel/fork.c no
      longer checks whether it is being shared.
      Signed-off-by: NSerge Hallyn <serue@us.ibm.com>
      Cc: Kirill Korotaev <dev@openvz.org>
      Cc: "Eric W. Biederman" <ebiederm@xmission.com>
      Cc: Herbert Poetzl <herbert@13thfloor.at>
      Cc: Andrey Savochkin <saw@sw.ru>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
      1651e14e
    • S
      [PATCH] namespaces: add nsproxy · ab516013
      Serge E. Hallyn 提交于
      This patch adds a nsproxy structure to the task struct.  Later patches will
      move the fs namespace pointer into this structure, and introduce a new utsname
      namespace into the nsproxy.
      
      The vserver and openvz functionality, then, would be implemented in large part
      by virtualizing/isolating more and more resources into namespaces, each
      contained in the nsproxy.
      
      [akpm@osdl.org: build fix]
      Signed-off-by: NSerge Hallyn <serue@us.ibm.com>
      Cc: Kirill Korotaev <dev@openvz.org>
      Cc: "Eric W. Biederman" <ebiederm@xmission.com>
      Cc: Herbert Poetzl <herbert@13thfloor.at>
      Cc: Andrey Savochkin <saw@sw.ru>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
      ab516013
    • E
      [PATCH] usb: fixup usb so it uses struct pid · 2425c08b
      Eric W. Biederman 提交于
      The problem with remembering a user space process by its pid is that it is
      possible that the process will exit, pid wrap around will occur.
      Converting to a struct pid avoid that problem, and paves the way for
      implementing a pid namespace.
      
      Also since usb is the only user of kill_proc_info_as_uid rename
      kill_proc_info_as_uid to kill_pid_info_as_uid and have the new version take
      a struct pid.
      Signed-off-by: NEric W. Biederman <ebiederm@xmission.com>
      Acked-by: NGreg Kroah-Hartman <gregkh@suse.de>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
      2425c08b
    • S
      [PATCH] Define struct pspace · 3fbc9648
      Sukadev Bhattiprolu 提交于
      Define a per-container pid space object.  And create one instance of this
      object, init_pspace, to define the entire pid space.  Subsequent patches
      will provide/use interfaces to create/destroy pid spaces.
      
      Its a subset/rework of Eric Biederman's patch
      http://lkml.org/lkml/2006/2/6/285 .
      Signed-off-by: NEric Biederman <ebiederm@xmission.com>
      Signed-off-by: NSukadev Bhattiprolu <sukadev@us.ibm.com>
      Cc: Dave Hansen <haveblue@us.ibm.com>
      Cc: Serge Hallyn <serue@us.ibm.com>
      Cc: Cedric Le Goater <clg@fr.ibm.com>
      Cc: Kirill Korotaev <dev@sw.ru>
      Cc: Andrey Savochkin <saw@sw.ru>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
      3fbc9648
    • E
      [PATCH] pid: implement signal functions that take a struct pid * · c4b92fc1
      Eric W. Biederman 提交于
      Currently the signal functions all either take a task or a pid_t argument.
      This patch implements variants that take a struct pid *.  After all of the
      users have been update it is my intention to remove the variants that take a
      pid_t as using pid_t can be more work (an extra hash table lookup) and
      difficult to get right in the presence of multiple pid namespaces.
      
      There are two kinds of functions introduced in this patch.  The are the
      general use functions kill_pgrp and kill_pid which take a priv argument that
      is ultimately used to create the appropriate siginfo information, Then there
      are _kill_pgrp_info, kill_pgrp_info, kill_pid_info the internal implementation
      helpers that take an explicit siginfo.
      
      The distinction is made because filling out an explcit siginfo is tricky, and
      will be even more tricky when pid namespaces are introduced.
      Signed-off-by: NEric W. Biederman <ebiederm@xmission.com>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
      c4b92fc1
    • E
      [PATCH] pid: implement access helpers for a tacks various process groups · 22c935f4
      Eric W. Biederman 提交于
      In the last round of cleaning up the pid hash table a more general struct pid
      was introduced, that can be referenced counted.
      
      With the more general struct pid most if not all places where we store a pid_t
      we can now store a struct pid * and remove the need for a hash table lookup,
      and avoid any possible problems with pid roll over.
      
      Looking forward to the pid namespaces struct pid * gives us an absolute form a
      pid so we can compare and use them without caring which pid namespace we are
      in.
      
      This patchset introduces the infrastructure needed to use struct pid instead
      of pid_t, and then it goes on to convert two different kernel users that
      currently store a pid_t value.
      
      There are a lot more places to go but this is enough to get the basic idea.
      
      Before we can merge a pid namespace patch all of the kernel pid_t users need
      to be examined.  Those that deal with user space processes need to be
      converted to using a struct pid *.  Those that deal with kernel processes need
      to converted to using the kthread api.  A rare few that only use their current
      processes pid values get to be left alone.
      
      This patch:
      
      task_session returns the struct pid of a tasks session.
      task_pgrp    returns the struct pid of a tasks process group.
      task_tgid    returns the struct pid of a tasks thread group.
      task_pid     returns the struct pid of a tasks process id.
      
      These can be used to avoid unnecessary hash table lookups, and to implement
      safe pid comparisions in the face of a pid namespace.
      Signed-off-by: NEric W. Biederman <ebiederm@xmission.com>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
      22c935f4
    • E
      [PATCH] proc: readdir race fix (take 3) · 0804ef4b
      Eric W. Biederman 提交于
      The problem: An opendir, readdir, closedir sequence can fail to report
      process ids that are continually in use throughout the sequence of system
      calls.  For this race to trigger the process that proc_pid_readdir stops at
      must exit before readdir is called again.
      
      This can cause ps to fail to report processes, and it is in violation of
      posix guarantees and normal application expectations with respect to
      readdir.
      
      Currently there is no way to work around this problem in user space short
      of providing a gargantuan buffer to user space so the directory read all
      happens in on system call.
      
      This patch implements the normal directory semantics for proc, that
      guarantee that a directory entry that is neither created nor destroyed
      while reading the directory entry will be returned.  For directory that are
      either created or destroyed during the readdir you may or may not see them.
       Furthermore you may seek to a directory offset you have previously seen.
      
      These are the guarantee that ext[23] provides and that posix requires, and
      more importantly that user space expects.  Plus it is a simple semantic to
      implement reliable service.  It is just a matter of calling readdir a
      second time if you are wondering if something new has show up.
      
      These better semantics are implemented by scanning through the pids in
      numerical order and by making the file offset a pid plus a fixed offset.
      
      The pid scan happens on the pid bitmap, which when you look at it is
      remarkably efficient for a brute force algorithm.  Given that a typical
      cache line is 64 bytes and thus covers space for 64*8 == 200 pids.  There
      are only 40 cache lines for the entire 32K pid space.  A typical system
      will have 100 pids or more so this is actually fewer cache lines we have to
      look at to scan a linked list, and the worst case of having to scan the
      entire pid bitmap is pretty reasonable.
      
      If we need something more efficient we can go to a more efficient data
      structure for indexing the pids, but for now what we have should be
      sufficient.
      
      In addition this takes no additional locks and is actually less code than
      what we are doing now.
      
      Also another very subtle bug in this area has been fixed.  It is possible
      to catch a task in the middle of de_thread where a thread is assuming the
      thread of it's thread group leader.  This patch carefully handles that case
      so if we hit it we don't fail to return the pid, that is undergoing the
      de_thread dance.
      
      Thanks to KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> for
      providing the first fix, pointing this out and working on it.
      
      [oleg@tv-sign.ru: fix it]
      Signed-off-by: NEric W. Biederman <ebiederm@xmission.com>
      Acked-by: NKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
      Signed-off-by: NOleg Nesterov <oleg@tv-sign.ru>
      Cc: Jean Delvare <jdelvare@suse.de>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
      0804ef4b
  6. 01 10月, 2006 3 次提交
    • J
      [PATCH] csa accounting taskstats update · db5fed26
      Jay Lan 提交于
      ChangeLog:
         Feedbacks from Andrew Morton:
         - define TS_COMM_LEN to 32
         - change acct_stimexpd field of task_struct to be of
           cputime_t, which is to be used to save the tsk->stime
           of last timer interrupt update.
         - a new Documentation/accounting/taskstats-struct.txt
           to describe fields of taskstats struct.
      
         Feedback from Balbir Singh:
         - keep the stime of a task to be zero when both stime
           and utime are zero as recoreded in task_struct.
      
         Misc:
         - convert accumulated RSS/VM from platform dependent
           pages-ticks to MBytes-usecs in the kernel
      
      Cc: Shailabh Nagar <nagar@watson.ibm.com>
      Cc: Balbir Singh <balbir@in.ibm.com>
      Cc: Jes Sorensen <jes@sgi.com>
      Cc: Chris Sturtivant <csturtiv@sgi.com>
      Cc: Tony Ernst <tee@sgi.com>
      Cc: Guillaume Thouvenin <guillaume.thouvenin@bull.net>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
      db5fed26
    • J
      [PATCH] csa: convert CONFIG tag for extended accounting routines · 8f0ab514
      Jay Lan 提交于
      There were a few accounting data/macros that are used in CSA but are #ifdef'ed
      inside CONFIG_BSD_PROCESS_ACCT.  This patch is to change those ifdef's from
      CONFIG_BSD_PROCESS_ACCT to CONFIG_TASK_XACCT.  A few defines are moved from
      kernel/acct.c and include/linux/acct.h to kernel/tsacct.c and
      include/linux/tsacct_kern.h.
      Signed-off-by: NJay Lan <jlan@sgi.com>
      Cc: Shailabh Nagar <nagar@watson.ibm.com>
      Cc: Balbir Singh <balbir@in.ibm.com>
      Cc: Jes Sorensen <jes@sgi.com>
      Cc: Chris Sturtivant <csturtiv@sgi.com>
      Cc: Tony Ernst <tee@sgi.com>
      Cc: Guillaume Thouvenin <guillaume.thouvenin@bull.net>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
      8f0ab514
    • D
      [PATCH] BLOCK: Remove duplicate declaration of exit_io_context() [try #6] · 0d67a46d
      David Howells 提交于
      Remove the duplicate declaration of exit_io_context() from linux/sched.h.
      Signed-Off-By: NDavid Howells <dhowells@redhat.com>
      Signed-off-by: NJens Axboe <axboe@kernel.dk>
      0d67a46d
  7. 30 9月, 2006 7 次提交
  8. 26 9月, 2006 2 次提交
    • A
      [PATCH] Add the canary field to the PDA area and the task struct · 0a425405
      Arjan van de Ven 提交于
      This patch adds the per thread cookie field to the task struct and the PDA.
      Also it makes sure that the PDA value gets the new cookie value at context
      switch, and that a new task gets a new cookie at task creation time.
      Signed-off-by: NArjan van Ven <arjan@linux.intel.com>
      Signed-off-by: NIngo Molnar <mingo@elte.hu>
      Signed-off-by: NAndi Kleen <ak@suse.de>
      CC: Andi Kleen <ak@suse.de>
      0a425405
    • A
      [PATCH] non lazy "sleazy" fpu implementation · e07e23e1
      Arjan van de Ven 提交于
      Right now the kernel on x86-64 has a 100% lazy fpu behavior: after *every*
      context switch a trap is taken for the first FPU use to restore the FPU
      context lazily.  This is of course great for applications that have very
      sporadic or no FPU use (since then you avoid doing the expensive
      save/restore all the time).  However for very frequent FPU users...  you
      take an extra trap every context switch.
      
      The patch below adds a simple heuristic to this code: After 5 consecutive
      context switches of FPU use, the lazy behavior is disabled and the context
      gets restored every context switch.  If the app indeed uses the FPU, the
      trap is avoided.  (the chance of the 6th time slice using FPU after the
      previous 5 having done so are quite high obviously).
      
      After 256 switches, this is reset and lazy behavior is returned (until
      there are 5 consecutive ones again).  The reason for this is to give apps
      that do longer bursts of FPU use still the lazy behavior back after some
      time.
      
      [akpm@osdl.org: place new task_struct field next to jit_keyring to save space]
      Signed-off-by: NArjan van de Ven <arjan@linux.intel.com>
      Signed-off-by: NAndi Kleen <ak@suse.de>
      Cc: Andi Kleen <ak@muc.de>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      e07e23e1
  9. 02 9月, 2006 1 次提交
  10. 06 8月, 2006 1 次提交
  11. 15 7月, 2006 5 次提交
  12. 04 7月, 2006 3 次提交
    • I
      [PATCH] sched: cleanup, convert sched.c-internal typedefs to struct · 70b97a7f
      Ingo Molnar 提交于
      convert:
      
       - runqueue_t to 'struct rq'
       - prio_array_t to 'struct prio_array'
       - migration_req_t to 'struct migration_req'
      
      I was the one who added these but they are both against the kernel coding
      style and also were used inconsistently at places.  So just get rid of them at
      once, now that we are flushing the scheduler patch-queue anyway.
      
      Conversion was mostly scripted, the result was reviewed and all secondary
      whitespace and style impact (if any) was fixed up by hand.
      Signed-off-by: NIngo Molnar <mingo@elte.hu>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
      70b97a7f
    • I
      [PATCH] sched: cleanup, remove task_t, convert to struct task_struct · 36c8b586
      Ingo Molnar 提交于
      cleanup: remove task_t and convert all the uses to struct task_struct. I
      introduced it for the scheduler anno and it was a mistake.
      
      Conversion was mostly scripted, the result was reviewed and all
      secondary whitespace and style impact (if any) was fixed up by hand.
      Signed-off-by: NIngo Molnar <mingo@elte.hu>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
      36c8b586
    • I
      [PATCH] lockdep: core · fbb9ce95
      Ingo Molnar 提交于
      Do 'make oldconfig' and accept all the defaults for new config options -
      reboot into the kernel and if everything goes well it should boot up fine and
      you should have /proc/lockdep and /proc/lockdep_stats files.
      
      Typically if the lock validator finds some problem it will print out
      voluminous debug output that begins with "BUG: ..." and which syslog output
      can be used by kernel developers to figure out the precise locking scenario.
      
      What does the lock validator do?  It "observes" and maps all locking rules as
      they occur dynamically (as triggered by the kernel's natural use of spinlocks,
      rwlocks, mutexes and rwsems).  Whenever the lock validator subsystem detects a
      new locking scenario, it validates this new rule against the existing set of
      rules.  If this new rule is consistent with the existing set of rules then the
      new rule is added transparently and the kernel continues as normal.  If the
      new rule could create a deadlock scenario then this condition is printed out.
      
      When determining validity of locking, all possible "deadlock scenarios" are
      considered: assuming arbitrary number of CPUs, arbitrary irq context and task
      context constellations, running arbitrary combinations of all the existing
      locking scenarios.  In a typical system this means millions of separate
      scenarios.  This is why we call it a "locking correctness" validator - for all
      rules that are observed the lock validator proves it with mathematical
      certainty that a deadlock could not occur (assuming that the lock validator
      implementation itself is correct and its internal data structures are not
      corrupted by some other kernel subsystem).  [see more details and conditionals
      of this statement in include/linux/lockdep.h and
      Documentation/lockdep-design.txt]
      
      Furthermore, this "all possible scenarios" property of the validator also
      enables the finding of complex, highly unlikely multi-CPU multi-context races
      via single single-context rules, increasing the likelyhood of finding bugs
      drastically.  In practical terms: the lock validator already found a bug in
      the upstream kernel that could only occur on systems with 3 or more CPUs, and
      which needed 3 very unlikely code sequences to occur at once on the 3 CPUs.
      That bug was found and reported on a single-CPU system (!).  So in essence a
      race will be found "piecemail-wise", triggering all the necessary components
      for the race, without having to reproduce the race scenario itself!  In its
      short existence the lock validator found and reported many bugs before they
      actually caused a real deadlock.
      
      To further increase the efficiency of the validator, the mapping is not per
      "lock instance", but per "lock-class".  For example, all struct inode objects
      in the kernel have inode->inotify_mutex.  If there are 10,000 inodes cached,
      then there are 10,000 lock objects.  But ->inotify_mutex is a single "lock
      type", and all locking activities that occur against ->inotify_mutex are
      "unified" into this single lock-class.  The advantage of the lock-class
      approach is that all historical ->inotify_mutex uses are mapped into a single
      (and as narrow as possible) set of locking rules - regardless of how many
      different tasks or inode structures it took to build this set of rules.  The
      set of rules persist during the lifetime of the kernel.
      
      To see the rough magnitude of checking that the lock validator does, here's a
      portion of /proc/lockdep_stats, fresh after bootup:
      
       lock-classes:                            694 [max: 2048]
       direct dependencies:                  1598 [max: 8192]
       indirect dependencies:               17896
       all direct dependencies:             16206
       dependency chains:                    1910 [max: 8192]
       in-hardirq chains:                      17
       in-softirq chains:                     105
       in-process chains:                    1065
       stack-trace entries:                 38761 [max: 131072]
       combined max dependencies:         2033928
       hardirq-safe locks:                     24
       hardirq-unsafe locks:                  176
       softirq-safe locks:                     53
       softirq-unsafe locks:                  137
       irq-safe locks:                         59
       irq-unsafe locks:                      176
      
      The lock validator has observed 1598 actual single-thread locking patterns,
      and has validated all possible 2033928 distinct locking scenarios.
      
      More details about the design of the lock validator can be found in
      Documentation/lockdep-design.txt, which can also found at:
      
         http://redhat.com/~mingo/lockdep-patches/lockdep-design.txt
      
      [bunk@stusta.de: cleanups]
      Signed-off-by: NIngo Molnar <mingo@elte.hu>
      Signed-off-by: NArjan van de Ven <arjan@linux.intel.com>
      Signed-off-by: NAdrian Bunk <bunk@stusta.de>
      Signed-off-by: NAndrew Morton <akpm@osdl.org>
      Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
      fbb9ce95