1. 15 5月, 2012 1 次提交
    • T
      x86/mce: Only restart instruction after machine check recovery if it is safe · dad1743e
      Tony Luck 提交于
      Section 15.3.1.2 of the software developer manual has this to say about the
      RIPV bit in the IA32_MCG_STATUS register:
      
        RIPV (restart IP valid) flag, bit 0 — Indicates (when set) that program
        execution can be restarted reliably at the instruction pointed to by the
        instruction pointer pushed on the stack when the machine-check exception
        is generated.  When clear, the program cannot be reliably restarted at
        the pushed instruction pointer.
      
      We need to save the state of this bit in do_machine_check() and use it
      in mce_notify_process() to force a signal; even if memory_failure() says
      it made a complete recovery ... e.g. replaced a clean LRU page.
      Acked-by: NBorislav Petkov <bp@amd64.org>
      Signed-off-by: NTony Luck <tony.luck@intel.com>
      dad1743e
  2. 27 4月, 2012 1 次提交
  3. 20 4月, 2012 1 次提交
  4. 17 4月, 2012 1 次提交
  5. 16 4月, 2012 1 次提交
  6. 03 4月, 2012 1 次提交
  7. 29 3月, 2012 1 次提交
  8. 24 3月, 2012 1 次提交
  9. 23 3月, 2012 2 次提交
  10. 17 3月, 2012 1 次提交
  11. 13 3月, 2012 2 次提交
    • P
      perf/x86: Prettify pmu config literals · f9b4eeb8
      Peter Zijlstra 提交于
      I got somewhat tired of having to decode hex numbers..
      Signed-off-by: NPeter Zijlstra <a.p.zijlstra@chello.nl>
      Acked-by: NThomas Gleixner <tglx@linutronix.de>
      Cc: Stephane Eranian <eranian@google.com>
      Cc: Robert Richter <robert.richter@amd.com>
      Link: http://lkml.kernel.org/n/tip-0vsy1sgywc4uar3mu1szm0rg@git.kernel.orgSigned-off-by: NIngo Molnar <mingo@elte.hu>
      f9b4eeb8
    • P
      perf/x86: Fix local vs remote memory events for NHM/WSM · 87e24f4b
      Peter Zijlstra 提交于
      Verified using the below proglet.. before:
      
      [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 0
      remote write
      
       Performance counter stats for './numa 0':
      
               2,101,554 node-stores
               2,096,931 node-store-misses
      
             5.021546079 seconds time elapsed
      
      [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 1
      local write
      
       Performance counter stats for './numa 1':
      
                 501,137 node-stores
                     199 node-store-misses
      
             5.124451068 seconds time elapsed
      
      After:
      
      [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 0
      remote write
      
       Performance counter stats for './numa 0':
      
               2,107,516 node-stores
               2,097,187 node-store-misses
      
             5.012755149 seconds time elapsed
      
      [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 1
      local write
      
       Performance counter stats for './numa 1':
      
               2,063,355 node-stores
                     165 node-store-misses
      
             5.082091494 seconds time elapsed
      
      #define _GNU_SOURCE
      
      #include <sched.h>
      #include <stdio.h>
      #include <errno.h>
      #include <sys/mman.h>
      #include <sys/types.h>
      #include <dirent.h>
      #include <signal.h>
      #include <unistd.h>
      #include <numaif.h>
      #include <stdlib.h>
      
      #define SIZE (32*1024*1024)
      
      volatile int done;
      
      void sig_done(int sig)
      {
      	done = 1;
      }
      
      int main(int argc, char **argv)
      {
      	cpu_set_t *mask, *mask2;
      	size_t size;
      	int i, err, t;
      	int nrcpus = 1024;
      	char *mem;
      	unsigned long nodemask = 0x01; /* node 0 */
      	DIR *node;
      	struct dirent *de;
      	int read = 0;
      	int local = 0;
      
      	if (argc < 2) {
      		printf("usage: %s [0-3]\n", argv[0]);
      		printf("  bit0 - local/remote\n");
      		printf("  bit1 - read/write\n");
      		exit(0);
      	}
      
      	switch (atoi(argv[1])) {
      	case 0:
      		printf("remote write\n");
      		break;
      	case 1:
      		printf("local write\n");
      		local = 1;
      		break;
      	case 2:
      		printf("remote read\n");
      		read = 1;
      		break;
      	case 3:
      		printf("local read\n");
      		local = 1;
      		read = 1;
      		break;
      	}
      
      	mask = CPU_ALLOC(nrcpus);
      	size = CPU_ALLOC_SIZE(nrcpus);
      	CPU_ZERO_S(size, mask);
      
      	node = opendir("/sys/devices/system/node/node0/");
      	if (!node)
      		perror("opendir");
      	while ((de = readdir(node))) {
      		int cpu;
      
      		if (sscanf(de->d_name, "cpu%d", &cpu) == 1)
      			CPU_SET_S(cpu, size, mask);
      	}
      	closedir(node);
      
      	mask2 = CPU_ALLOC(nrcpus);
      	CPU_ZERO_S(size, mask2);
      	for (i = 0; i < size; i++)
      		CPU_SET_S(i, size, mask2);
      	CPU_XOR_S(size, mask2, mask2, mask); // invert
      
      	if (!local)
      		mask = mask2;
      
      	err = sched_setaffinity(0, size, mask);
      	if (err)
      		perror("sched_setaffinity");
      
      	mem = mmap(0, SIZE, PROT_READ|PROT_WRITE,
      			MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
      	err = mbind(mem, SIZE, MPOL_BIND, &nodemask, 8*sizeof(nodemask), MPOL_MF_MOVE);
      	if (err)
      		perror("mbind");
      
      	signal(SIGALRM, sig_done);
      	alarm(5);
      
      	if (!read) {
      		while (!done) {
      			for (i = 0; i < SIZE; i++)
      				mem[i] = 0x01;
      		}
      	} else {
      		while (!done) {
      			for (i = 0; i < SIZE; i++)
      				t += *(volatile char *)(mem + i);
      		}
      	}
      
      	return 0;
      }
      Signed-off-by: NPeter Zijlstra <a.p.zijlstra@chello.nl>
      Cc: Stephane Eranian <eranian@google.com>
      Cc: <stable@kernel.org>
      Link: http://lkml.kernel.org/n/tip-tq73sxus35xmqpojf7ootxgs@git.kernel.orgSigned-off-by: NIngo Molnar <mingo@elte.hu>
      87e24f4b
  12. 07 3月, 2012 1 次提交
    • S
      x86, mce: Fix rcu splat in drain_mce_log_buffer() · b11e3d78
      Srivatsa S. Bhat 提交于
      While booting, the following message is seen:
      
      [   21.665087] ===============================
      [   21.669439] [ INFO: suspicious RCU usage. ]
      [   21.673798] 3.2.0-0.0.0.28.36b5ec9-default #2 Not tainted
      [   21.681353] -------------------------------
      [   21.685864] arch/x86/kernel/cpu/mcheck/mce.c:194 suspicious rcu_dereference_index_check() usage!
      [   21.695013]
      [   21.695014] other info that might help us debug this:
      [   21.695016]
      [   21.703488]
      [   21.703489] rcu_scheduler_active = 1, debug_locks = 1
      [   21.710426] 3 locks held by modprobe/2139:
      [   21.714754]  #0:  (&__lockdep_no_validate__){......}, at: [<ffffffff8133afd3>] __driver_attach+0x53/0xa0
      [   21.725020]  #1:
      [   21.725323] ioatdma: Intel(R) QuickData Technology Driver 4.00
      [   21.733206]  (&__lockdep_no_validate__){......}, at: [<ffffffff8133afe1>] __driver_attach+0x61/0xa0
      [   21.743015]  #2:  (i7core_edac_lock){+.+.+.}, at: [<ffffffffa01cfa5f>] i7core_probe+0x1f/0x5c0 [i7core_edac]
      [   21.753708]
      [   21.753709] stack backtrace:
      [   21.758429] Pid: 2139, comm: modprobe Not tainted 3.2.0-0.0.0.28.36b5ec9-default #2
      [   21.768253] Call Trace:
      [   21.770838]  [<ffffffff810977cd>] lockdep_rcu_suspicious+0xcd/0x100
      [   21.777366]  [<ffffffff8101aa41>] drain_mcelog_buffer+0x191/0x1b0
      [   21.783715]  [<ffffffff8101aa78>] mce_register_decode_chain+0x18/0x20
      [   21.790430]  [<ffffffffa01cf8db>] i7core_register_mci+0x2fb/0x3e4 [i7core_edac]
      [   21.798003]  [<ffffffffa01cfb14>] i7core_probe+0xd4/0x5c0 [i7core_edac]
      [   21.804809]  [<ffffffff8129566b>] local_pci_probe+0x5b/0xe0
      [   21.810631]  [<ffffffff812957c9>] __pci_device_probe+0xd9/0xe0
      [   21.816650]  [<ffffffff813362e4>] ? get_device+0x14/0x20
      [   21.822178]  [<ffffffff81296916>] pci_device_probe+0x36/0x60
      [   21.828061]  [<ffffffff8133ac8a>] really_probe+0x7a/0x2b0
      [   21.833676]  [<ffffffff8133af23>] driver_probe_device+0x63/0xc0
      [   21.839868]  [<ffffffff8133b01b>] __driver_attach+0x9b/0xa0
      [   21.845718]  [<ffffffff8133af80>] ? driver_probe_device+0xc0/0xc0
      [   21.852027]  [<ffffffff81339168>] bus_for_each_dev+0x68/0x90
      [   21.857876]  [<ffffffff8133aa3c>] driver_attach+0x1c/0x20
      [   21.863462]  [<ffffffff8133a64d>] bus_add_driver+0x16d/0x2b0
      [   21.869377]  [<ffffffff8133b6dc>] driver_register+0x7c/0x160
      [   21.875220]  [<ffffffff81296bda>] __pci_register_driver+0x6a/0xf0
      [   21.881494]  [<ffffffffa01fe000>] ? 0xffffffffa01fdfff
      [   21.886846]  [<ffffffffa01fe047>] i7core_init+0x47/0x1000 [i7core_edac]
      [   21.893737]  [<ffffffff810001ce>] do_one_initcall+0x3e/0x180
      [   21.899670]  [<ffffffff810a9b95>] sys_init_module+0xc5/0x220
      [   21.905542]  [<ffffffff8149bc39>] system_call_fastpath+0x16/0x1b
      
      Fix this by using ACCESS_ONCE() instead of rcu_dereference_check_mce()
      over mcelog.next. Since the access to each entry is controlled by the
      ->finished field, ACCESS_ONCE() should work just fine. An rcu_dereference
      is unnecessary here.
      Signed-off-by: NSrivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
      Suggested-by: NPaul E. McKenney <paulmck@linux.vnet.ibm.com>
      Cc: Tony Luck <tony.luck@intel.com>
      Signed-off-by: NBorislav Petkov <borislav.petkov@amd.com>
      b11e3d78
  13. 05 3月, 2012 10 次提交
  14. 02 3月, 2012 2 次提交
  15. 29 2月, 2012 1 次提交
    • P
      x86: relocate get/set debugreg fcns to include/asm/debugreg. · f649e938
      Paul Gortmaker 提交于
      Since we already have a debugreg.h header file, move the
      assoc. get/set functions to it.  In addition to it being the
      logical home for them, it has a secondary advantage.  The
      functions that are moved use BUG().  So we really need to
      have linux/bug.h in scope.  But asm/processor.h is used about
      600 times, vs. only about 15 for debugreg.h -- so adding bug.h
      to the latter reduces the amount of time we'll be processing
      it during a compile.
      Signed-off-by: NPaul Gortmaker <paul.gortmaker@windriver.com>
      Acked-by: NIngo Molnar <mingo@elte.hu>
      CC: Thomas Gleixner <tglx@linutronix.de>
      CC: "H. Peter Anvin" <hpa@zytor.com>
      f649e938
  16. 23 2月, 2012 2 次提交
  17. 22 2月, 2012 3 次提交
  18. 21 2月, 2012 3 次提交
    • L
      i387: export 'fpu_owner_task' per-cpu variable · 27e74da9
      Linus Torvalds 提交于
      (And define it properly for x86-32, which had its 'current_task'
      declaration in separate from x86-64)
      
      Bitten by my dislike for modules on the machines I use, and the fact
      that apparently nobody else actually wanted to test the patches I sent
      out.
      
      Snif. Nobody else cares.
      
      Anyway, we probably should uninline the 'kernel_fpu_begin()' function
      that is what modules actually use and that references this, but this is
      the minimal fix for now.
      Reported-by: NJosh Boyer <jwboyer@gmail.com>
      Reported-and-tested-by: NJongman Heo <jongman.heo@samsung.com>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      27e74da9
    • H
      x32: Handle process creation · d1a797f3
      H. Peter Anvin 提交于
      Allow an x32 process to be started.
      Originally-by: NH. J. Lu <hjl.tools@gmail.com>
      Signed-off-by: NH. Peter Anvin <hpa@zytor.com>
      Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
      d1a797f3
    • L
      i387: support lazy restore of FPU state · 7e16838d
      Linus Torvalds 提交于
      This makes us recognize when we try to restore FPU state that matches
      what we already have in the FPU on this CPU, and avoids the restore
      entirely if so.
      
      To do this, we add two new data fields:
      
       - a percpu 'fpu_owner_task' variable that gets written any time we
         update the "has_fpu" field, and thus acts as a kind of back-pointer
         to the task that owns the CPU.  The exception is when we save the FPU
         state as part of a context switch - if the save can keep the FPU
         state around, we leave the 'fpu_owner_task' variable pointing at the
         task whose FP state still remains on the CPU.
      
       - a per-thread 'last_cpu' field, that indicates which CPU that thread
         used its FPU on last.  We update this on every context switch
         (writing an invalid CPU number if the last context switch didn't
         leave the FPU in a lazily usable state), so we know that *that*
         thread has done nothing else with the FPU since.
      
      These two fields together can be used when next switching back to the
      task to see if the CPU still matches: if 'fpu_owner_task' matches the
      task we are switching to, we know that no other task (or kernel FPU
      usage) touched the FPU on this CPU in the meantime, and if the current
      CPU number matches the 'last_cpu' field, we know that this thread did no
      other FP work on any other CPU, so the FPU state on the CPU must match
      what was saved on last context switch.
      
      In that case, we can avoid the 'f[x]rstor' entirely, and just clear the
      CR0.TS bit.
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      7e16838d
  19. 14 2月, 2012 2 次提交
  20. 13 2月, 2012 1 次提交
  21. 09 2月, 2012 1 次提交
  22. 07 2月, 2012 1 次提交
    • S
      perf: Fix double start/stop in x86_pmu_start() · f39d47ff
      Stephane Eranian 提交于
      The following patch fixes a bug introduced by the following
      commit:
      
              e050e3f0 ("perf: Fix broken interrupt rate throttling")
      
      The patch caused the following warning to pop up depending on
      the sampling frequency adjustments:
      
        ------------[ cut here ]------------
        WARNING: at arch/x86/kernel/cpu/perf_event.c:995 x86_pmu_start+0x79/0xd4()
      
      It was caused by the following call sequence:
      
      perf_adjust_freq_unthr_context.part() {
           stop()
           if (delta > 0) {
                perf_adjust_period() {
                    if (period > 8*...) {
                        stop()
                        ...
                        start()
                    }
                }
            }
            start()
      }
      
      Which caused a double start and a double stop, thus triggering
      the assert in x86_pmu_start().
      
      The patch fixes the problem by avoiding the double calls. We
      pass a new argument to perf_adjust_period() to indicate whether
      or not the event is already stopped. We can't just remove the
      start/stop from that function because it's called from
      __perf_event_overflow where the event needs to be reloaded via a
      stop/start back-toback call.
      
      The patch reintroduces the assertion in x86_pmu_start() which
      was removed by commit:
      
      	84f2b9b2 ("perf: Remove deprecated WARN_ON_ONCE()")
      
      In this second version, we've added calls to disable/enable PMU
      during unthrottling or frequency adjustment based on bug report
      of spurious NMI interrupts from Eric Dumazet.
      Reported-and-tested-by: NEric Dumazet <eric.dumazet@gmail.com>
      Signed-off-by: NStephane Eranian <eranian@google.com>
      Acked-by: NPeter Zijlstra <a.p.zijlstra@chello.nl>
      Cc: markus@trippelsdorf.de
      Cc: paulus@samba.org
      Link: http://lkml.kernel.org/r/20120207133956.GA4932@quad
      [ Minor edits to the changelog and to the code ]
      Signed-off-by: NIngo Molnar <mingo@elte.hu>
      f39d47ff