提交 72a73693 编写于 作者: L Linus Torvalds

Merge branch 'x86/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (160 commits)
  x86: remove extra calling to get ext cpuid level
  x86: use setup_clear_cpu_cap() when disabling the lapic
  KVM: fix exception entry / build bug, on 64-bit
  x86: add unknown_nmi_panic kernel parameter
  x86, VisWS: turn into generic arch, eliminate leftover files
  x86: add ->pre_time_init to x86_quirks
  x86: extend and use x86_quirks to clean up NUMAQ code
  x86: introduce x86_quirks
  x86: improve debug printout: add target bootmem range in early_res_to_bootmem()
  Subject: devmem, x86: fix rename of CONFIG_NONPROMISC_DEVMEM
  x86: remove arch_get_ram_range
  x86: Add a debugfs interface to dump PAT memtype
  x86: Add a arch directory for x86 under debugfs
  x86: i386: reduce boot fixmap space
  i386/xen: add proper unwind annotations to xen_sysenter_target
  x86: reduce force_mwait visibility
  x86: reduce forbid_dac's visibility
  x86: fix two modpost warnings
  x86: check function status in EDD boot code
  x86_64: ia32_signal.c: remove signal number conversion
  ...
...@@ -1206,7 +1206,7 @@ and is between 256 and 4096 characters. It is defined in the file ...@@ -1206,7 +1206,7 @@ and is between 256 and 4096 characters. It is defined in the file
or or
memmap=0x10000$0x18690000 memmap=0x10000$0x18690000
memtest= [KNL,X86_64] Enable memtest memtest= [KNL,X86] Enable memtest
Format: <integer> Format: <integer>
range: 0,4 : pattern number range: 0,4 : pattern number
default : 0 <disable> default : 0 <disable>
...@@ -2158,6 +2158,10 @@ and is between 256 and 4096 characters. It is defined in the file ...@@ -2158,6 +2158,10 @@ and is between 256 and 4096 characters. It is defined in the file
Note that genuine overcurrent events won't be Note that genuine overcurrent events won't be
reported either. reported either.
unknown_nmi_panic
[X86-32,X86-64]
Set unknown_nmi_panic=1 early on boot.
usbcore.autosuspend= usbcore.autosuspend=
[USB] The autosuspend time delay (in seconds) used [USB] The autosuspend time delay (in seconds) used
for newly-detected USB devices (default 2). This for newly-detected USB devices (default 2). This
......
...@@ -447,7 +447,6 @@ config PARAVIRT_DEBUG ...@@ -447,7 +447,6 @@ config PARAVIRT_DEBUG
config MEMTEST config MEMTEST
bool "Memtest" bool "Memtest"
depends on X86_64
help help
This option adds a kernel parameter 'memtest', which allows memtest This option adds a kernel parameter 'memtest', which allows memtest
to be set. to be set.
......
...@@ -362,10 +362,6 @@ config X86_ALIGNMENT_16 ...@@ -362,10 +362,6 @@ config X86_ALIGNMENT_16
def_bool y def_bool y
depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
config X86_GOOD_APIC
def_bool y
depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2 || MVIAC7 || X86_64
config X86_INTEL_USERCOPY config X86_INTEL_USERCOPY
def_bool y def_bool y
depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
......
...@@ -5,13 +5,15 @@ config TRACE_IRQFLAGS_SUPPORT ...@@ -5,13 +5,15 @@ config TRACE_IRQFLAGS_SUPPORT
source "lib/Kconfig.debug" source "lib/Kconfig.debug"
config NONPROMISC_DEVMEM config STRICT_DEVMEM
bool "Filter access to /dev/mem" bool "Filter access to /dev/mem"
help help
If this option is left off, you allow userspace access to all If this option is left on, you allow userspace (root) access to all
of memory, including kernel and userspace memory. Accidental of memory, including kernel and userspace memory. Accidental
access to this is obviously disastrous, but specific access can access to this is obviously disastrous, but specific access can
be used by people debugging the kernel. be used by people debugging the kernel. Note that with PAT support
enabled, even in this case there are restrictions on /dev/mem
use due to the cache aliasing requirements.
If this option is switched on, the /dev/mem file only allows If this option is switched on, the /dev/mem file only allows
userspace access to PCI space and the BIOS code and data regions. userspace access to PCI space and the BIOS code and data regions.
...@@ -287,7 +289,6 @@ config CPA_DEBUG ...@@ -287,7 +289,6 @@ config CPA_DEBUG
config OPTIMIZE_INLINING config OPTIMIZE_INLINING
bool "Allow gcc to uninline functions marked 'inline'" bool "Allow gcc to uninline functions marked 'inline'"
depends on BROKEN
help help
This option determines if the kernel forces gcc to inline the functions This option determines if the kernel forces gcc to inline the functions
developers have marked 'inline'. Doing so takes away freedom from gcc to developers have marked 'inline'. Doing so takes away freedom from gcc to
...@@ -298,5 +299,7 @@ config OPTIMIZE_INLINING ...@@ -298,5 +299,7 @@ config OPTIMIZE_INLINING
become the default in the future, until then this option is there to become the default in the future, until then this option is there to
test gcc for this. test gcc for this.
If unsure, say N.
endmenu endmenu
...@@ -167,9 +167,8 @@ void query_edd(void) ...@@ -167,9 +167,8 @@ void query_edd(void)
* Scan the BIOS-supported hard disks and query EDD * Scan the BIOS-supported hard disks and query EDD
* information... * information...
*/ */
get_edd_info(devno, &ei); if (!get_edd_info(devno, &ei)
&& boot_params.eddbuf_entries < EDDMAXNR) {
if (boot_params.eddbuf_entries < EDDMAXNR) {
memcpy(edp, &ei, sizeof ei); memcpy(edp, &ei, sizeof ei);
edp++; edp++;
boot_params.eddbuf_entries++; boot_params.eddbuf_entries++;
......
...@@ -98,12 +98,6 @@ static void reset_coprocessor(void) ...@@ -98,12 +98,6 @@ static void reset_coprocessor(void)
/* /*
* Set up the GDT * Set up the GDT
*/ */
#define GDT_ENTRY(flags, base, limit) \
(((u64)(base & 0xff000000) << 32) | \
((u64)flags << 40) | \
((u64)(limit & 0x00ff0000) << 32) | \
((u64)(base & 0x00ffffff) << 16) | \
((u64)(limit & 0x0000ffff)))
struct gdt_ptr { struct gdt_ptr {
u16 len; u16 len;
......
...@@ -2047,7 +2047,7 @@ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y ...@@ -2047,7 +2047,7 @@ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
# CONFIG_SAMPLES is not set # CONFIG_SAMPLES is not set
# CONFIG_KGDB is not set # CONFIG_KGDB is not set
CONFIG_HAVE_ARCH_KGDB=y CONFIG_HAVE_ARCH_KGDB=y
# CONFIG_NONPROMISC_DEVMEM is not set # CONFIG_STRICT_DEVMEM is not set
CONFIG_EARLY_PRINTK=y CONFIG_EARLY_PRINTK=y
CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACK_USAGE=y
......
...@@ -2012,7 +2012,7 @@ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y ...@@ -2012,7 +2012,7 @@ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
# CONFIG_SAMPLES is not set # CONFIG_SAMPLES is not set
# CONFIG_KGDB is not set # CONFIG_KGDB is not set
CONFIG_HAVE_ARCH_KGDB=y CONFIG_HAVE_ARCH_KGDB=y
# CONFIG_NONPROMISC_DEVMEM is not set # CONFIG_STRICT_DEVMEM is not set
CONFIG_EARLY_PRINTK=y CONFIG_EARLY_PRINTK=y
CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACK_USAGE=y
......
...@@ -36,6 +36,11 @@ ...@@ -36,6 +36,11 @@
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
X86_EFLAGS_CF)
asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset); asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
void signal_fault(struct pt_regs *regs, void __user *frame, char *where); void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
...@@ -248,7 +253,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, ...@@ -248,7 +253,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
regs->ss |= 3; regs->ss |= 3;
err |= __get_user(tmpflags, &sc->flags); err |= __get_user(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~0x40DD5) | (tmpflags & 0x40DD5); regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
/* disable syscall checks */ /* disable syscall checks */
regs->orig_ax = -1; regs->orig_ax = -1;
...@@ -515,7 +520,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, ...@@ -515,7 +520,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
compat_sigset_t *set, struct pt_regs *regs) compat_sigset_t *set, struct pt_regs *regs)
{ {
struct rt_sigframe __user *frame; struct rt_sigframe __user *frame;
struct exec_domain *ed = current_thread_info()->exec_domain;
void __user *restorer; void __user *restorer;
int err = 0; int err = 0;
...@@ -538,8 +542,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, ...@@ -538,8 +542,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv; goto give_sigsegv;
err |= __put_user((ed && ed->signal_invmap && sig < 32 err |= __put_user(sig, &frame->sig);
? ed->signal_invmap[sig] : sig), &frame->sig);
err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo); err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo);
err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc); err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc);
err |= copy_siginfo_to_user32(&frame->info, info); err |= copy_siginfo_to_user32(&frame->info, info);
......
...@@ -37,6 +37,11 @@ ...@@ -37,6 +37,11 @@
movq %rax,R8(%rsp) movq %rax,R8(%rsp)
.endm .endm
/*
* Reload arg registers from stack in case ptrace changed them.
* We don't reload %eax because syscall_trace_enter() returned
* the value it wants us to use in the table lookup.
*/
.macro LOAD_ARGS32 offset .macro LOAD_ARGS32 offset
movl \offset(%rsp),%r11d movl \offset(%rsp),%r11d
movl \offset+8(%rsp),%r10d movl \offset+8(%rsp),%r10d
...@@ -46,7 +51,6 @@ ...@@ -46,7 +51,6 @@
movl \offset+48(%rsp),%edx movl \offset+48(%rsp),%edx
movl \offset+56(%rsp),%esi movl \offset+56(%rsp),%esi
movl \offset+64(%rsp),%edi movl \offset+64(%rsp),%edi
movl \offset+72(%rsp),%eax
.endm .endm
.macro CFI_STARTPROC32 simple .macro CFI_STARTPROC32 simple
...@@ -137,13 +141,12 @@ ENTRY(ia32_sysenter_target) ...@@ -137,13 +141,12 @@ ENTRY(ia32_sysenter_target)
.previous .previous
GET_THREAD_INFO(%r10) GET_THREAD_INFO(%r10)
orl $TS_COMPAT,TI_status(%r10) orl $TS_COMPAT,TI_status(%r10)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
TI_flags(%r10)
CFI_REMEMBER_STATE CFI_REMEMBER_STATE
jnz sysenter_tracesys jnz sysenter_tracesys
sysenter_do_call:
cmpl $(IA32_NR_syscalls-1),%eax cmpl $(IA32_NR_syscalls-1),%eax
ja ia32_badsys ja ia32_badsys
sysenter_do_call:
IA32_ARG_FIXUP 1 IA32_ARG_FIXUP 1
call *ia32_sys_call_table(,%rax,8) call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp) movq %rax,RAX-ARGOFFSET(%rsp)
...@@ -242,8 +245,7 @@ ENTRY(ia32_cstar_target) ...@@ -242,8 +245,7 @@ ENTRY(ia32_cstar_target)
.previous .previous
GET_THREAD_INFO(%r10) GET_THREAD_INFO(%r10)
orl $TS_COMPAT,TI_status(%r10) orl $TS_COMPAT,TI_status(%r10)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
TI_flags(%r10)
CFI_REMEMBER_STATE CFI_REMEMBER_STATE
jnz cstar_tracesys jnz cstar_tracesys
cstar_do_call: cstar_do_call:
...@@ -321,6 +323,7 @@ ENTRY(ia32_syscall) ...@@ -321,6 +323,7 @@ ENTRY(ia32_syscall)
/*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/
/*CFI_REL_OFFSET cs,CS-RIP*/ /*CFI_REL_OFFSET cs,CS-RIP*/
CFI_REL_OFFSET rip,RIP-RIP CFI_REL_OFFSET rip,RIP-RIP
PARAVIRT_ADJUST_EXCEPTION_FRAME
SWAPGS SWAPGS
/* /*
* No need to follow this irqs on/off section: the syscall * No need to follow this irqs on/off section: the syscall
...@@ -336,8 +339,7 @@ ENTRY(ia32_syscall) ...@@ -336,8 +339,7 @@ ENTRY(ia32_syscall)
SAVE_ARGS 0,0,1 SAVE_ARGS 0,0,1
GET_THREAD_INFO(%r10) GET_THREAD_INFO(%r10)
orl $TS_COMPAT,TI_status(%r10) orl $TS_COMPAT,TI_status(%r10)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
TI_flags(%r10)
jnz ia32_tracesys jnz ia32_tracesys
ia32_do_syscall: ia32_do_syscall:
cmpl $(IA32_NR_syscalls-1),%eax cmpl $(IA32_NR_syscalls-1),%eax
......
...@@ -7,9 +7,10 @@ extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu ...@@ -7,9 +7,10 @@ extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu
CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
ifdef CONFIG_FTRACE ifdef CONFIG_FTRACE
# Do not profile debug utilities # Do not profile debug and lowlevel utilities
CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_tsc.o = -pg
CFLAGS_REMOVE_rtc.o = -pg CFLAGS_REMOVE_rtc.o = -pg
CFLAGS_REMOVE_paravirt.o = -pg
endif endif
# #
...@@ -102,6 +103,7 @@ obj-$(CONFIG_OLPC) += olpc.o ...@@ -102,6 +103,7 @@ obj-$(CONFIG_OLPC) += olpc.o
# 64 bit specific files # 64 bit specific files
ifeq ($(CONFIG_X86_64),y) ifeq ($(CONFIG_X86_64),y)
obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
obj-y += bios_uv.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
obj-$(CONFIG_AUDIT) += audit_64.o obj-$(CONFIG_AUDIT) += audit_64.o
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <linux/bootmem.h> #include <linux/bootmem.h>
#include <linux/dmi.h> #include <linux/dmi.h>
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <asm/segment.h>
#include "realmode/wakeup.h" #include "realmode/wakeup.h"
#include "sleep.h" #include "sleep.h"
...@@ -23,15 +24,6 @@ static unsigned long acpi_realmode; ...@@ -23,15 +24,6 @@ static unsigned long acpi_realmode;
static char temp_stack[10240]; static char temp_stack[10240];
#endif #endif
/* XXX: this macro should move to asm-x86/segment.h and be shared with the
boot code... */
#define GDT_ENTRY(flags, base, limit) \
(((u64)(base & 0xff000000) << 32) | \
((u64)flags << 40) | \
((u64)(limit & 0x00ff0000) << 32) | \
((u64)(base & 0x00ffffff) << 16) | \
((u64)(limit & 0x0000ffff)))
/** /**
* acpi_save_state_mem - save kernel state * acpi_save_state_mem - save kernel state
* *
......
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include <linux/iommu-helper.h> #include <linux/iommu-helper.h>
#include <asm/proto.h> #include <asm/proto.h>
#include <asm/gart.h> #include <asm/iommu.h>
#include <asm/amd_iommu_types.h> #include <asm/amd_iommu_types.h>
#include <asm/amd_iommu.h> #include <asm/amd_iommu.h>
...@@ -32,21 +32,37 @@ ...@@ -32,21 +32,37 @@
#define to_pages(addr, size) \ #define to_pages(addr, size) \
(round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
#define EXIT_LOOP_COUNT 10000000
static DEFINE_RWLOCK(amd_iommu_devtable_lock); static DEFINE_RWLOCK(amd_iommu_devtable_lock);
struct command { /*
* general struct to manage commands send to an IOMMU
*/
struct iommu_cmd {
u32 data[4]; u32 data[4];
}; };
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
struct unity_map_entry *e); struct unity_map_entry *e);
/* returns !0 if the IOMMU is caching non-present entries in its TLB */
static int iommu_has_npcache(struct amd_iommu *iommu) static int iommu_has_npcache(struct amd_iommu *iommu)
{ {
return iommu->cap & IOMMU_CAP_NPCACHE; return iommu->cap & IOMMU_CAP_NPCACHE;
} }
static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) /****************************************************************************
*
* IOMMU command queuing functions
*
****************************************************************************/
/*
* Writes the command to the IOMMUs command buffer and informs the
* hardware about the new command. Must be called with iommu->lock held.
*/
static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
{ {
u32 tail, head; u32 tail, head;
u8 *target; u8 *target;
...@@ -63,7 +79,11 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) ...@@ -63,7 +79,11 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
return 0; return 0;
} }
static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) /*
* General queuing function for commands. Takes iommu->lock and calls
* __iommu_queue_command().
*/
static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
{ {
unsigned long flags; unsigned long flags;
int ret; int ret;
...@@ -75,16 +95,24 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) ...@@ -75,16 +95,24 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
return ret; return ret;
} }
/*
* This function is called whenever we need to ensure that the IOMMU has
* completed execution of all commands we sent. It sends a
* COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
* us about that by writing a value to a physical address we pass with
* the command.
*/
static int iommu_completion_wait(struct amd_iommu *iommu) static int iommu_completion_wait(struct amd_iommu *iommu)
{ {
int ret; int ret;
struct command cmd; struct iommu_cmd cmd;
volatile u64 ready = 0; volatile u64 ready = 0;
unsigned long ready_phys = virt_to_phys(&ready); unsigned long ready_phys = virt_to_phys(&ready);
unsigned long i = 0;
memset(&cmd, 0, sizeof(cmd)); memset(&cmd, 0, sizeof(cmd));
cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK; cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
cmd.data[1] = HIGH_U32(ready_phys); cmd.data[1] = upper_32_bits(ready_phys);
cmd.data[2] = 1; /* value written to 'ready' */ cmd.data[2] = 1; /* value written to 'ready' */
CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
...@@ -95,15 +123,23 @@ static int iommu_completion_wait(struct amd_iommu *iommu) ...@@ -95,15 +123,23 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
if (ret) if (ret)
return ret; return ret;
while (!ready) while (!ready && (i < EXIT_LOOP_COUNT)) {
++i;
cpu_relax(); cpu_relax();
}
if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
return 0; return 0;
} }
/*
* Command send function for invalidating a device table entry
*/
static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
{ {
struct command cmd; struct iommu_cmd cmd;
BUG_ON(iommu == NULL); BUG_ON(iommu == NULL);
...@@ -116,20 +152,23 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) ...@@ -116,20 +152,23 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
return iommu_queue_command(iommu, &cmd); return iommu_queue_command(iommu, &cmd);
} }
/*
* Generic command send function for invalidaing TLB entries
*/
static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
u64 address, u16 domid, int pde, int s) u64 address, u16 domid, int pde, int s)
{ {
struct command cmd; struct iommu_cmd cmd;
memset(&cmd, 0, sizeof(cmd)); memset(&cmd, 0, sizeof(cmd));
address &= PAGE_MASK; address &= PAGE_MASK;
CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
cmd.data[1] |= domid; cmd.data[1] |= domid;
cmd.data[2] = LOW_U32(address); cmd.data[2] = LOW_U32(address);
cmd.data[3] = HIGH_U32(address); cmd.data[3] = upper_32_bits(address);
if (s) if (s) /* size bit - we flush more than one 4kb page */
cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
if (pde) if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
iommu->need_sync = 1; iommu->need_sync = 1;
...@@ -137,6 +176,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, ...@@ -137,6 +176,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
return iommu_queue_command(iommu, &cmd); return iommu_queue_command(iommu, &cmd);
} }
/*
* TLB invalidation function which is called from the mapping functions.
* It invalidates a single PTE if the range to flush is within a single
* page. Otherwise it flushes the whole TLB of the IOMMU.
*/
static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
u64 address, size_t size) u64 address, size_t size)
{ {
...@@ -159,6 +203,20 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, ...@@ -159,6 +203,20 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
return 0; return 0;
} }
/****************************************************************************
*
* The functions below are used the create the page table mappings for
* unity mapped regions.
*
****************************************************************************/
/*
* Generic mapping functions. It maps a physical address into a DMA
* address space. It allocates the page table pages if necessary.
* In the future it can be extended to a generic mapping function
* supporting all features of AMD IOMMU page tables like level skipping
* and full 64 bit address spaces.
*/
static int iommu_map(struct protection_domain *dom, static int iommu_map(struct protection_domain *dom,
unsigned long bus_addr, unsigned long bus_addr,
unsigned long phys_addr, unsigned long phys_addr,
...@@ -209,6 +267,10 @@ static int iommu_map(struct protection_domain *dom, ...@@ -209,6 +267,10 @@ static int iommu_map(struct protection_domain *dom,
return 0; return 0;
} }
/*
* This function checks if a specific unity mapping entry is needed for
* this specific IOMMU.
*/
static int iommu_for_unity_map(struct amd_iommu *iommu, static int iommu_for_unity_map(struct amd_iommu *iommu,
struct unity_map_entry *entry) struct unity_map_entry *entry)
{ {
...@@ -223,6 +285,12 @@ static int iommu_for_unity_map(struct amd_iommu *iommu, ...@@ -223,6 +285,12 @@ static int iommu_for_unity_map(struct amd_iommu *iommu,
return 0; return 0;
} }
/*
* Init the unity mappings for a specific IOMMU in the system
*
* Basically iterates over all unity mapping entries and applies them to
* the default domain DMA of that IOMMU if necessary.
*/
static int iommu_init_unity_mappings(struct amd_iommu *iommu) static int iommu_init_unity_mappings(struct amd_iommu *iommu)
{ {
struct unity_map_entry *entry; struct unity_map_entry *entry;
...@@ -239,6 +307,10 @@ static int iommu_init_unity_mappings(struct amd_iommu *iommu) ...@@ -239,6 +307,10 @@ static int iommu_init_unity_mappings(struct amd_iommu *iommu)
return 0; return 0;
} }
/*
* This function actually applies the mapping to the page table of the
* dma_ops domain.
*/
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
struct unity_map_entry *e) struct unity_map_entry *e)
{ {
...@@ -261,6 +333,9 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, ...@@ -261,6 +333,9 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
return 0; return 0;
} }
/*
* Inits the unity mappings required for a specific device
*/
static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
u16 devid) u16 devid)
{ {
...@@ -278,12 +353,26 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, ...@@ -278,12 +353,26 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
return 0; return 0;
} }
/****************************************************************************
*
* The next functions belong to the address allocator for the dma_ops
* interface functions. They work like the allocators in the other IOMMU
* drivers. Its basically a bitmap which marks the allocated pages in
* the aperture. Maybe it could be enhanced in the future to a more
* efficient allocator.
*
****************************************************************************/
static unsigned long dma_mask_to_pages(unsigned long mask) static unsigned long dma_mask_to_pages(unsigned long mask)
{ {
return (mask >> PAGE_SHIFT) + return (mask >> PAGE_SHIFT) +
(PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT); (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT);
} }
/*
* The address allocator core function.
*
* called with domain->lock held
*/
static unsigned long dma_ops_alloc_addresses(struct device *dev, static unsigned long dma_ops_alloc_addresses(struct device *dev,
struct dma_ops_domain *dom, struct dma_ops_domain *dom,
unsigned int pages) unsigned int pages)
...@@ -317,6 +406,11 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, ...@@ -317,6 +406,11 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
return address; return address;
} }
/*
* The address free function.
*
* called with domain->lock held
*/
static void dma_ops_free_addresses(struct dma_ops_domain *dom, static void dma_ops_free_addresses(struct dma_ops_domain *dom,
unsigned long address, unsigned long address,
unsigned int pages) unsigned int pages)
...@@ -325,6 +419,16 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, ...@@ -325,6 +419,16 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
iommu_area_free(dom->bitmap, address, pages); iommu_area_free(dom->bitmap, address, pages);
} }
/****************************************************************************
*
* The next functions belong to the domain allocation. A domain is
* allocated for every IOMMU as the default domain. If device isolation
* is enabled, every device get its own domain. The most important thing
* about domains is the page table mapping the DMA address space they
* contain.
*
****************************************************************************/
static u16 domain_id_alloc(void) static u16 domain_id_alloc(void)
{ {
unsigned long flags; unsigned long flags;
...@@ -342,6 +446,10 @@ static u16 domain_id_alloc(void) ...@@ -342,6 +446,10 @@ static u16 domain_id_alloc(void)
return id; return id;
} }
/*
* Used to reserve address ranges in the aperture (e.g. for exclusion
* ranges.
*/
static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
unsigned long start_page, unsigned long start_page,
unsigned int pages) unsigned int pages)
...@@ -382,6 +490,10 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) ...@@ -382,6 +490,10 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
free_page((unsigned long)p1); free_page((unsigned long)p1);
} }
/*
* Free a domain, only used if something went wrong in the
* allocation path and we need to free an already allocated page table
*/
static void dma_ops_domain_free(struct dma_ops_domain *dom) static void dma_ops_domain_free(struct dma_ops_domain *dom)
{ {
if (!dom) if (!dom)
...@@ -396,6 +508,11 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) ...@@ -396,6 +508,11 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
kfree(dom); kfree(dom);
} }
/*
* Allocates a new protection domain usable for the dma_ops functions.
* It also intializes the page table and the address allocator data
* structures required for the dma_ops interface
*/
static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
unsigned order) unsigned order)
{ {
...@@ -436,6 +553,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, ...@@ -436,6 +553,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
dma_dom->bitmap[0] = 1; dma_dom->bitmap[0] = 1;
dma_dom->next_bit = 0; dma_dom->next_bit = 0;
/* Intialize the exclusion range if necessary */
if (iommu->exclusion_start && if (iommu->exclusion_start &&
iommu->exclusion_start < dma_dom->aperture_size) { iommu->exclusion_start < dma_dom->aperture_size) {
unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
...@@ -444,6 +562,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, ...@@ -444,6 +562,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
dma_ops_reserve_addresses(dma_dom, startpage, pages); dma_ops_reserve_addresses(dma_dom, startpage, pages);
} }
/*
* At the last step, build the page tables so we don't need to
* allocate page table pages in the dma_ops mapping/unmapping
* path.
*/
num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512); num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *), dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
GFP_KERNEL); GFP_KERNEL);
...@@ -472,6 +595,10 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, ...@@ -472,6 +595,10 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
return NULL; return NULL;
} }
/*
* Find out the protection domain structure for a given PCI device. This
* will give us the pointer to the page table root for example.
*/
static struct protection_domain *domain_for_device(u16 devid) static struct protection_domain *domain_for_device(u16 devid)
{ {
struct protection_domain *dom; struct protection_domain *dom;
...@@ -484,6 +611,10 @@ static struct protection_domain *domain_for_device(u16 devid) ...@@ -484,6 +611,10 @@ static struct protection_domain *domain_for_device(u16 devid)
return dom; return dom;
} }
/*
* If a device is not yet associated with a domain, this function does
* assigns it visible for the hardware
*/
static void set_device_domain(struct amd_iommu *iommu, static void set_device_domain(struct amd_iommu *iommu,
struct protection_domain *domain, struct protection_domain *domain,
u16 devid) u16 devid)
...@@ -508,6 +639,19 @@ static void set_device_domain(struct amd_iommu *iommu, ...@@ -508,6 +639,19 @@ static void set_device_domain(struct amd_iommu *iommu,
iommu->need_sync = 1; iommu->need_sync = 1;
} }
/*****************************************************************************
*
* The next functions belong to the dma_ops mapping/unmapping code.
*
*****************************************************************************/
/*
* In the dma_ops path we only have the struct device. This function
* finds the corresponding IOMMU, the protection domain and the
* requestor id for a given device.
* If the device is not yet associated with a domain this is also done
* in this function.
*/
static int get_device_resources(struct device *dev, static int get_device_resources(struct device *dev,
struct amd_iommu **iommu, struct amd_iommu **iommu,
struct protection_domain **domain, struct protection_domain **domain,
...@@ -520,8 +664,9 @@ static int get_device_resources(struct device *dev, ...@@ -520,8 +664,9 @@ static int get_device_resources(struct device *dev,
BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask); BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask);
pcidev = to_pci_dev(dev); pcidev = to_pci_dev(dev);
_bdf = (pcidev->bus->number << 8) | pcidev->devfn; _bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
/* device not translated by any IOMMU in the system? */
if (_bdf >= amd_iommu_last_bdf) { if (_bdf >= amd_iommu_last_bdf) {
*iommu = NULL; *iommu = NULL;
*domain = NULL; *domain = NULL;
...@@ -547,6 +692,10 @@ static int get_device_resources(struct device *dev, ...@@ -547,6 +692,10 @@ static int get_device_resources(struct device *dev,
return 1; return 1;
} }
/*
* This is the generic map function. It maps one 4kb page at paddr to
* the given address in the DMA address space for the domain.
*/
static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
struct dma_ops_domain *dom, struct dma_ops_domain *dom,
unsigned long address, unsigned long address,
...@@ -578,6 +727,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, ...@@ -578,6 +727,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
return (dma_addr_t)address; return (dma_addr_t)address;
} }
/*
* The generic unmapping function for on page in the DMA address space.
*/
static void dma_ops_domain_unmap(struct amd_iommu *iommu, static void dma_ops_domain_unmap(struct amd_iommu *iommu,
struct dma_ops_domain *dom, struct dma_ops_domain *dom,
unsigned long address) unsigned long address)
...@@ -597,6 +749,12 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, ...@@ -597,6 +749,12 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
*pte = 0ULL; *pte = 0ULL;
} }
/*
* This function contains common code for mapping of a physically
* contiguous memory region into DMA address space. It is uses by all
* mapping functions provided by this IOMMU driver.
* Must be called with the domain lock held.
*/
static dma_addr_t __map_single(struct device *dev, static dma_addr_t __map_single(struct device *dev,
struct amd_iommu *iommu, struct amd_iommu *iommu,
struct dma_ops_domain *dma_dom, struct dma_ops_domain *dma_dom,
...@@ -628,6 +786,10 @@ static dma_addr_t __map_single(struct device *dev, ...@@ -628,6 +786,10 @@ static dma_addr_t __map_single(struct device *dev,
return address; return address;
} }
/*
* Does the reverse of the __map_single function. Must be called with
* the domain lock held too
*/
static void __unmap_single(struct amd_iommu *iommu, static void __unmap_single(struct amd_iommu *iommu,
struct dma_ops_domain *dma_dom, struct dma_ops_domain *dma_dom,
dma_addr_t dma_addr, dma_addr_t dma_addr,
...@@ -652,6 +814,9 @@ static void __unmap_single(struct amd_iommu *iommu, ...@@ -652,6 +814,9 @@ static void __unmap_single(struct amd_iommu *iommu,
dma_ops_free_addresses(dma_dom, dma_addr, pages); dma_ops_free_addresses(dma_dom, dma_addr, pages);
} }
/*
* The exported map_single function for dma_ops.
*/
static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
size_t size, int dir) size_t size, int dir)
{ {
...@@ -664,6 +829,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, ...@@ -664,6 +829,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
get_device_resources(dev, &iommu, &domain, &devid); get_device_resources(dev, &iommu, &domain, &devid);
if (iommu == NULL || domain == NULL) if (iommu == NULL || domain == NULL)
/* device not handled by any AMD IOMMU */
return (dma_addr_t)paddr; return (dma_addr_t)paddr;
spin_lock_irqsave(&domain->lock, flags); spin_lock_irqsave(&domain->lock, flags);
...@@ -683,6 +849,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, ...@@ -683,6 +849,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
return addr; return addr;
} }
/*
* The exported unmap_single function for dma_ops.
*/
static void unmap_single(struct device *dev, dma_addr_t dma_addr, static void unmap_single(struct device *dev, dma_addr_t dma_addr,
size_t size, int dir) size_t size, int dir)
{ {
...@@ -692,6 +861,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, ...@@ -692,6 +861,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
u16 devid; u16 devid;
if (!get_device_resources(dev, &iommu, &domain, &devid)) if (!get_device_resources(dev, &iommu, &domain, &devid))
/* device not handled by any AMD IOMMU */
return; return;
spin_lock_irqsave(&domain->lock, flags); spin_lock_irqsave(&domain->lock, flags);
...@@ -706,6 +876,10 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, ...@@ -706,6 +876,10 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
spin_unlock_irqrestore(&domain->lock, flags); spin_unlock_irqrestore(&domain->lock, flags);
} }
/*
* This is a special map_sg function which is used if we should map a
* device which is not handled by an AMD IOMMU in the system.
*/
static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
int nelems, int dir) int nelems, int dir)
{ {
...@@ -720,6 +894,10 @@ static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, ...@@ -720,6 +894,10 @@ static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
return nelems; return nelems;
} }
/*
* The exported map_sg function for dma_ops (handles scatter-gather
* lists).
*/
static int map_sg(struct device *dev, struct scatterlist *sglist, static int map_sg(struct device *dev, struct scatterlist *sglist,
int nelems, int dir) int nelems, int dir)
{ {
...@@ -775,6 +953,10 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, ...@@ -775,6 +953,10 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
goto out; goto out;
} }
/*
* The exported map_sg function for dma_ops (handles scatter-gather
* lists).
*/
static void unmap_sg(struct device *dev, struct scatterlist *sglist, static void unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, int dir) int nelems, int dir)
{ {
...@@ -804,6 +986,9 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, ...@@ -804,6 +986,9 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
spin_unlock_irqrestore(&domain->lock, flags); spin_unlock_irqrestore(&domain->lock, flags);
} }
/*
* The exported alloc_coherent function for dma_ops.
*/
static void *alloc_coherent(struct device *dev, size_t size, static void *alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag) dma_addr_t *dma_addr, gfp_t flag)
{ {
...@@ -851,6 +1036,11 @@ static void *alloc_coherent(struct device *dev, size_t size, ...@@ -851,6 +1036,11 @@ static void *alloc_coherent(struct device *dev, size_t size,
return virt_addr; return virt_addr;
} }
/*
* The exported free_coherent function for dma_ops.
* FIXME: fix the generic x86 DMA layer so that it actually calls that
* function.
*/
static void free_coherent(struct device *dev, size_t size, static void free_coherent(struct device *dev, size_t size,
void *virt_addr, dma_addr_t dma_addr) void *virt_addr, dma_addr_t dma_addr)
{ {
...@@ -879,6 +1069,8 @@ static void free_coherent(struct device *dev, size_t size, ...@@ -879,6 +1069,8 @@ static void free_coherent(struct device *dev, size_t size,
} }
/* /*
* The function for pre-allocating protection domains.
*
* If the driver core informs the DMA layer if a driver grabs a device * If the driver core informs the DMA layer if a driver grabs a device
* we don't need to preallocate the protection domains anymore. * we don't need to preallocate the protection domains anymore.
* For now we have to. * For now we have to.
...@@ -921,12 +1113,20 @@ static struct dma_mapping_ops amd_iommu_dma_ops = { ...@@ -921,12 +1113,20 @@ static struct dma_mapping_ops amd_iommu_dma_ops = {
.unmap_sg = unmap_sg, .unmap_sg = unmap_sg,
}; };
/*
* The function which clues the AMD IOMMU driver into dma_ops.
*/
int __init amd_iommu_init_dma_ops(void) int __init amd_iommu_init_dma_ops(void)
{ {
struct amd_iommu *iommu; struct amd_iommu *iommu;
int order = amd_iommu_aperture_order; int order = amd_iommu_aperture_order;
int ret; int ret;
/*
* first allocate a default protection domain for every IOMMU we
* found in the system. Devices not assigned to any other
* protection domain will be assigned to the default one.
*/
list_for_each_entry(iommu, &amd_iommu_list, list) { list_for_each_entry(iommu, &amd_iommu_list, list) {
iommu->default_dom = dma_ops_domain_alloc(iommu, order); iommu->default_dom = dma_ops_domain_alloc(iommu, order);
if (iommu->default_dom == NULL) if (iommu->default_dom == NULL)
...@@ -936,6 +1136,10 @@ int __init amd_iommu_init_dma_ops(void) ...@@ -936,6 +1136,10 @@ int __init amd_iommu_init_dma_ops(void)
goto free_domains; goto free_domains;
} }
/*
* If device isolation is enabled, pre-allocate the protection
* domains for each device.
*/
if (amd_iommu_isolate) if (amd_iommu_isolate)
prealloc_protection_domains(); prealloc_protection_domains();
...@@ -947,6 +1151,7 @@ int __init amd_iommu_init_dma_ops(void) ...@@ -947,6 +1151,7 @@ int __init amd_iommu_init_dma_ops(void)
gart_iommu_aperture = 0; gart_iommu_aperture = 0;
#endif #endif
/* Make the driver finally visible to the drivers */
dma_ops = &amd_iommu_dma_ops; dma_ops = &amd_iommu_dma_ops;
return 0; return 0;
......
此差异已折叠。
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <linux/suspend.h> #include <linux/suspend.h>
#include <asm/e820.h> #include <asm/e820.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/iommu.h>
#include <asm/gart.h> #include <asm/gart.h>
#include <asm/pci-direct.h> #include <asm/pci-direct.h>
#include <asm/dma.h> #include <asm/dma.h>
......
...@@ -75,7 +75,7 @@ char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; ...@@ -75,7 +75,7 @@ char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
/* /*
* Debug level, exported for io_apic.c * Debug level, exported for io_apic.c
*/ */
int apic_verbosity; unsigned int apic_verbosity;
int pic_mode; int pic_mode;
...@@ -177,7 +177,7 @@ void __cpuinit enable_NMI_through_LVT0(void) ...@@ -177,7 +177,7 @@ void __cpuinit enable_NMI_through_LVT0(void)
/* Level triggered for 82489DX */ /* Level triggered for 82489DX */
if (!lapic_is_integrated()) if (!lapic_is_integrated())
v |= APIC_LVT_LEVEL_TRIGGER; v |= APIC_LVT_LEVEL_TRIGGER;
apic_write_around(APIC_LVT0, v); apic_write(APIC_LVT0, v);
} }
/** /**
...@@ -212,9 +212,6 @@ int lapic_get_maxlvt(void) ...@@ -212,9 +212,6 @@ int lapic_get_maxlvt(void)
* this function twice on the boot CPU, once with a bogus timeout * this function twice on the boot CPU, once with a bogus timeout
* value, second time for real. The other (noncalibrating) CPUs * value, second time for real. The other (noncalibrating) CPUs
* call this function only once, with the real, calibrated value. * call this function only once, with the real, calibrated value.
*
* We do reads before writes even if unnecessary, to get around the
* P5 APIC double write bug.
*/ */
static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
{ {
...@@ -229,18 +226,18 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) ...@@ -229,18 +226,18 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
if (!irqen) if (!irqen)
lvtt_value |= APIC_LVT_MASKED; lvtt_value |= APIC_LVT_MASKED;
apic_write_around(APIC_LVTT, lvtt_value); apic_write(APIC_LVTT, lvtt_value);
/* /*
* Divide PICLK by 16 * Divide PICLK by 16
*/ */
tmp_value = apic_read(APIC_TDCR); tmp_value = apic_read(APIC_TDCR);
apic_write_around(APIC_TDCR, (tmp_value apic_write(APIC_TDCR,
& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
| APIC_TDR_DIV_16); APIC_TDR_DIV_16);
if (!oneshot) if (!oneshot)
apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
} }
/* /*
...@@ -249,7 +246,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) ...@@ -249,7 +246,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
static int lapic_next_event(unsigned long delta, static int lapic_next_event(unsigned long delta,
struct clock_event_device *evt) struct clock_event_device *evt)
{ {
apic_write_around(APIC_TMICT, delta); apic_write(APIC_TMICT, delta);
return 0; return 0;
} }
...@@ -278,7 +275,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, ...@@ -278,7 +275,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
case CLOCK_EVT_MODE_SHUTDOWN: case CLOCK_EVT_MODE_SHUTDOWN:
v = apic_read(APIC_LVTT); v = apic_read(APIC_LVTT);
v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
apic_write_around(APIC_LVTT, v); apic_write(APIC_LVTT, v);
break; break;
case CLOCK_EVT_MODE_RESUME: case CLOCK_EVT_MODE_RESUME:
/* Nothing to do here */ /* Nothing to do here */
...@@ -372,12 +369,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) ...@@ -372,12 +369,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
} }
} }
/* static int __init calibrate_APIC_clock(void)
* Setup the boot APIC
*
* Calibrate and verify the result.
*/
void __init setup_boot_APIC_clock(void)
{ {
struct clock_event_device *levt = &__get_cpu_var(lapic_events); struct clock_event_device *levt = &__get_cpu_var(lapic_events);
const long pm_100ms = PMTMR_TICKS_PER_SEC/10; const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
...@@ -387,24 +379,6 @@ void __init setup_boot_APIC_clock(void) ...@@ -387,24 +379,6 @@ void __init setup_boot_APIC_clock(void)
long delta, deltapm; long delta, deltapm;
int pm_referenced = 0; int pm_referenced = 0;
/*
* The local apic timer can be disabled via the kernel
* commandline or from the CPU detection code. Register the lapic
* timer as a dummy clock event source on SMP systems, so the
* broadcast mechanism is used. On UP systems simply ignore it.
*/
if (local_apic_timer_disabled) {
/* No broadcast on UP ! */
if (num_possible_cpus() > 1) {
lapic_clockevent.mult = 1;
setup_APIC_timer();
}
return;
}
apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
"calibrating APIC timer ...\n");
local_irq_disable(); local_irq_disable();
/* Replace the global interrupt handler */ /* Replace the global interrupt handler */
...@@ -489,8 +463,6 @@ void __init setup_boot_APIC_clock(void) ...@@ -489,8 +463,6 @@ void __init setup_boot_APIC_clock(void)
calibration_result / (1000000 / HZ), calibration_result / (1000000 / HZ),
calibration_result % (1000000 / HZ)); calibration_result % (1000000 / HZ));
local_apic_timer_verify_ok = 1;
/* /*
* Do a sanity check on the APIC calibration result * Do a sanity check on the APIC calibration result
*/ */
...@@ -498,12 +470,11 @@ void __init setup_boot_APIC_clock(void) ...@@ -498,12 +470,11 @@ void __init setup_boot_APIC_clock(void)
local_irq_enable(); local_irq_enable();
printk(KERN_WARNING printk(KERN_WARNING
"APIC frequency too slow, disabling apic timer\n"); "APIC frequency too slow, disabling apic timer\n");
/* No broadcast on UP ! */ return -1;
if (num_possible_cpus() > 1)
setup_APIC_timer();
return;
} }
local_apic_timer_verify_ok = 1;
/* We trust the pm timer based calibration */ /* We trust the pm timer based calibration */
if (!pm_referenced) { if (!pm_referenced) {
apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
...@@ -543,22 +514,55 @@ void __init setup_boot_APIC_clock(void) ...@@ -543,22 +514,55 @@ void __init setup_boot_APIC_clock(void)
if (!local_apic_timer_verify_ok) { if (!local_apic_timer_verify_ok) {
printk(KERN_WARNING printk(KERN_WARNING
"APIC timer disabled due to verification failure.\n"); "APIC timer disabled due to verification failure.\n");
return -1;
}
return 0;
}
/*
* Setup the boot APIC
*
* Calibrate and verify the result.
*/
void __init setup_boot_APIC_clock(void)
{
/*
* The local apic timer can be disabled via the kernel
* commandline or from the CPU detection code. Register the lapic
* timer as a dummy clock event source on SMP systems, so the
* broadcast mechanism is used. On UP systems simply ignore it.
*/
if (local_apic_timer_disabled) {
/* No broadcast on UP ! */ /* No broadcast on UP ! */
if (num_possible_cpus() == 1) if (num_possible_cpus() > 1) {
return; lapic_clockevent.mult = 1;
} else { setup_APIC_timer();
/* }
* If nmi_watchdog is set to IO_APIC, we need the return;
* PIT/HPET going. Otherwise register lapic as a dummy
* device.
*/
if (nmi_watchdog != NMI_IO_APIC)
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
else
printk(KERN_WARNING "APIC timer registered as dummy,"
" due to nmi_watchdog=%d!\n", nmi_watchdog);
} }
apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
"calibrating APIC timer ...\n");
if (calibrate_APIC_clock()) {
/* No broadcast on UP ! */
if (num_possible_cpus() > 1)
setup_APIC_timer();
return;
}
/*
* If nmi_watchdog is set to IO_APIC, we need the
* PIT/HPET going. Otherwise register lapic as a dummy
* device.
*/
if (nmi_watchdog != NMI_IO_APIC)
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
else
printk(KERN_WARNING "APIC timer registered as dummy,"
" due to nmi_watchdog=%d!\n", nmi_watchdog);
/* Setup the lapic or request the broadcast */ /* Setup the lapic or request the broadcast */
setup_APIC_timer(); setup_APIC_timer();
} }
...@@ -693,44 +697,44 @@ void clear_local_APIC(void) ...@@ -693,44 +697,44 @@ void clear_local_APIC(void)
*/ */
if (maxlvt >= 3) { if (maxlvt >= 3) {
v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED); apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
} }
/* /*
* Careful: we have to set masks only first to deassert * Careful: we have to set masks only first to deassert
* any level-triggered sources. * any level-triggered sources.
*/ */
v = apic_read(APIC_LVTT); v = apic_read(APIC_LVTT);
apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
v = apic_read(APIC_LVT0); v = apic_read(APIC_LVT0);
apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
v = apic_read(APIC_LVT1); v = apic_read(APIC_LVT1);
apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED); apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
if (maxlvt >= 4) { if (maxlvt >= 4) {
v = apic_read(APIC_LVTPC); v = apic_read(APIC_LVTPC);
apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
} }
/* lets not touch this if we didn't frob it */ /* lets not touch this if we didn't frob it */
#ifdef CONFIG_X86_MCE_P4THERMAL #ifdef CONFIG_X86_MCE_P4THERMAL
if (maxlvt >= 5) { if (maxlvt >= 5) {
v = apic_read(APIC_LVTTHMR); v = apic_read(APIC_LVTTHMR);
apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED); apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
} }
#endif #endif
/* /*
* Clean APIC state for other OSs: * Clean APIC state for other OSs:
*/ */
apic_write_around(APIC_LVTT, APIC_LVT_MASKED); apic_write(APIC_LVTT, APIC_LVT_MASKED);
apic_write_around(APIC_LVT0, APIC_LVT_MASKED); apic_write(APIC_LVT0, APIC_LVT_MASKED);
apic_write_around(APIC_LVT1, APIC_LVT_MASKED); apic_write(APIC_LVT1, APIC_LVT_MASKED);
if (maxlvt >= 3) if (maxlvt >= 3)
apic_write_around(APIC_LVTERR, APIC_LVT_MASKED); apic_write(APIC_LVTERR, APIC_LVT_MASKED);
if (maxlvt >= 4) if (maxlvt >= 4)
apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); apic_write(APIC_LVTPC, APIC_LVT_MASKED);
#ifdef CONFIG_X86_MCE_P4THERMAL #ifdef CONFIG_X86_MCE_P4THERMAL
if (maxlvt >= 5) if (maxlvt >= 5)
apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED); apic_write(APIC_LVTTHMR, APIC_LVT_MASKED);
#endif #endif
/* Integrated APIC (!82489DX) ? */ /* Integrated APIC (!82489DX) ? */
if (lapic_is_integrated()) { if (lapic_is_integrated()) {
...@@ -756,7 +760,7 @@ void disable_local_APIC(void) ...@@ -756,7 +760,7 @@ void disable_local_APIC(void)
*/ */
value = apic_read(APIC_SPIV); value = apic_read(APIC_SPIV);
value &= ~APIC_SPIV_APIC_ENABLED; value &= ~APIC_SPIV_APIC_ENABLED;
apic_write_around(APIC_SPIV, value); apic_write(APIC_SPIV, value);
/* /*
* When LAPIC was disabled by the BIOS and enabled by the kernel, * When LAPIC was disabled by the BIOS and enabled by the kernel,
...@@ -865,8 +869,8 @@ void __init sync_Arb_IDs(void) ...@@ -865,8 +869,8 @@ void __init sync_Arb_IDs(void)
apic_wait_icr_idle(); apic_wait_icr_idle();
apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG apic_write(APIC_ICR,
| APIC_DM_INIT); APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT);
} }
/* /*
...@@ -902,16 +906,16 @@ void __init init_bsp_APIC(void) ...@@ -902,16 +906,16 @@ void __init init_bsp_APIC(void)
else else
value |= APIC_SPIV_FOCUS_DISABLED; value |= APIC_SPIV_FOCUS_DISABLED;
value |= SPURIOUS_APIC_VECTOR; value |= SPURIOUS_APIC_VECTOR;
apic_write_around(APIC_SPIV, value); apic_write(APIC_SPIV, value);
/* /*
* Set up the virtual wire mode. * Set up the virtual wire mode.
*/ */
apic_write_around(APIC_LVT0, APIC_DM_EXTINT); apic_write(APIC_LVT0, APIC_DM_EXTINT);
value = APIC_DM_NMI; value = APIC_DM_NMI;
if (!lapic_is_integrated()) /* 82489DX */ if (!lapic_is_integrated()) /* 82489DX */
value |= APIC_LVT_LEVEL_TRIGGER; value |= APIC_LVT_LEVEL_TRIGGER;
apic_write_around(APIC_LVT1, value); apic_write(APIC_LVT1, value);
} }
static void __cpuinit lapic_setup_esr(void) static void __cpuinit lapic_setup_esr(void)
...@@ -926,7 +930,7 @@ static void __cpuinit lapic_setup_esr(void) ...@@ -926,7 +930,7 @@ static void __cpuinit lapic_setup_esr(void)
/* enables sending errors */ /* enables sending errors */
value = ERROR_APIC_VECTOR; value = ERROR_APIC_VECTOR;
apic_write_around(APIC_LVTERR, value); apic_write(APIC_LVTERR, value);
/* /*
* spec says clear errors after enabling vector. * spec says clear errors after enabling vector.
*/ */
...@@ -989,7 +993,7 @@ void __cpuinit setup_local_APIC(void) ...@@ -989,7 +993,7 @@ void __cpuinit setup_local_APIC(void)
*/ */
value = apic_read(APIC_TASKPRI); value = apic_read(APIC_TASKPRI);
value &= ~APIC_TPRI_MASK; value &= ~APIC_TPRI_MASK;
apic_write_around(APIC_TASKPRI, value); apic_write(APIC_TASKPRI, value);
/* /*
* After a crash, we no longer service the interrupts and a pending * After a crash, we no longer service the interrupts and a pending
...@@ -1047,7 +1051,7 @@ void __cpuinit setup_local_APIC(void) ...@@ -1047,7 +1051,7 @@ void __cpuinit setup_local_APIC(void)
* Set spurious IRQ vector * Set spurious IRQ vector
*/ */
value |= SPURIOUS_APIC_VECTOR; value |= SPURIOUS_APIC_VECTOR;
apic_write_around(APIC_SPIV, value); apic_write(APIC_SPIV, value);
/* /*
* Set up LVT0, LVT1: * Set up LVT0, LVT1:
...@@ -1069,7 +1073,7 @@ void __cpuinit setup_local_APIC(void) ...@@ -1069,7 +1073,7 @@ void __cpuinit setup_local_APIC(void)
apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
smp_processor_id()); smp_processor_id());
} }
apic_write_around(APIC_LVT0, value); apic_write(APIC_LVT0, value);
/* /*
* only the BP should see the LINT1 NMI signal, obviously. * only the BP should see the LINT1 NMI signal, obviously.
...@@ -1080,7 +1084,7 @@ void __cpuinit setup_local_APIC(void) ...@@ -1080,7 +1084,7 @@ void __cpuinit setup_local_APIC(void)
value = APIC_DM_NMI | APIC_LVT_MASKED; value = APIC_DM_NMI | APIC_LVT_MASKED;
if (!integrated) /* 82489DX */ if (!integrated) /* 82489DX */
value |= APIC_LVT_LEVEL_TRIGGER; value |= APIC_LVT_LEVEL_TRIGGER;
apic_write_around(APIC_LVT1, value); apic_write(APIC_LVT1, value);
} }
void __cpuinit end_local_APIC_setup(void) void __cpuinit end_local_APIC_setup(void)
...@@ -1091,7 +1095,7 @@ void __cpuinit end_local_APIC_setup(void) ...@@ -1091,7 +1095,7 @@ void __cpuinit end_local_APIC_setup(void)
/* Disable the local apic timer */ /* Disable the local apic timer */
value = apic_read(APIC_LVTT); value = apic_read(APIC_LVTT);
value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
apic_write_around(APIC_LVTT, value); apic_write(APIC_LVTT, value);
setup_apic_nmi_watchdog(NULL); setup_apic_nmi_watchdog(NULL);
apic_pm_activate(); apic_pm_activate();
...@@ -1214,9 +1218,6 @@ int apic_version[MAX_APICS]; ...@@ -1214,9 +1218,6 @@ int apic_version[MAX_APICS];
int __init APIC_init_uniprocessor(void) int __init APIC_init_uniprocessor(void)
{ {
if (disable_apic)
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
if (!smp_found_config && !cpu_has_apic) if (!smp_found_config && !cpu_has_apic)
return -1; return -1;
...@@ -1419,7 +1420,7 @@ void disconnect_bsp_APIC(int virt_wire_setup) ...@@ -1419,7 +1420,7 @@ void disconnect_bsp_APIC(int virt_wire_setup)
value &= ~APIC_VECTOR_MASK; value &= ~APIC_VECTOR_MASK;
value |= APIC_SPIV_APIC_ENABLED; value |= APIC_SPIV_APIC_ENABLED;
value |= 0xf; value |= 0xf;
apic_write_around(APIC_SPIV, value); apic_write(APIC_SPIV, value);
if (!virt_wire_setup) { if (!virt_wire_setup) {
/* /*
...@@ -1432,10 +1433,10 @@ void disconnect_bsp_APIC(int virt_wire_setup) ...@@ -1432,10 +1433,10 @@ void disconnect_bsp_APIC(int virt_wire_setup)
APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
apic_write_around(APIC_LVT0, value); apic_write(APIC_LVT0, value);
} else { } else {
/* Disable LVT0 */ /* Disable LVT0 */
apic_write_around(APIC_LVT0, APIC_LVT_MASKED); apic_write(APIC_LVT0, APIC_LVT_MASKED);
} }
/* /*
...@@ -1449,7 +1450,7 @@ void disconnect_bsp_APIC(int virt_wire_setup) ...@@ -1449,7 +1450,7 @@ void disconnect_bsp_APIC(int virt_wire_setup)
APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
apic_write_around(APIC_LVT1, value); apic_write(APIC_LVT1, value);
} }
} }
...@@ -1700,7 +1701,7 @@ early_param("lapic", parse_lapic); ...@@ -1700,7 +1701,7 @@ early_param("lapic", parse_lapic);
static int __init parse_nolapic(char *arg) static int __init parse_nolapic(char *arg)
{ {
disable_apic = 1; disable_apic = 1;
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); setup_clear_cpu_cap(X86_FEATURE_APIC);
return 0; return 0;
} }
early_param("nolapic", parse_nolapic); early_param("nolapic", parse_nolapic);
......
...@@ -54,7 +54,7 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); ...@@ -54,7 +54,7 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
/* /*
* Debug level, exported for io_apic.c * Debug level, exported for io_apic.c
*/ */
int apic_verbosity; unsigned int apic_verbosity;
/* Have we found an MP table */ /* Have we found an MP table */
int smp_found_config; int smp_found_config;
...@@ -314,7 +314,7 @@ static void setup_APIC_timer(void) ...@@ -314,7 +314,7 @@ static void setup_APIC_timer(void)
#define TICK_COUNT 100000000 #define TICK_COUNT 100000000
static void __init calibrate_APIC_clock(void) static int __init calibrate_APIC_clock(void)
{ {
unsigned apic, apic_start; unsigned apic, apic_start;
unsigned long tsc, tsc_start; unsigned long tsc, tsc_start;
...@@ -368,6 +368,17 @@ static void __init calibrate_APIC_clock(void) ...@@ -368,6 +368,17 @@ static void __init calibrate_APIC_clock(void)
clockevent_delta2ns(0xF, &lapic_clockevent); clockevent_delta2ns(0xF, &lapic_clockevent);
calibration_result = result / HZ; calibration_result = result / HZ;
/*
* Do a sanity check on the APIC calibration result
*/
if (calibration_result < (1000000 / HZ)) {
printk(KERN_WARNING
"APIC frequency too slow, disabling apic timer\n");
return -1;
}
return 0;
} }
/* /*
...@@ -394,14 +405,7 @@ void __init setup_boot_APIC_clock(void) ...@@ -394,14 +405,7 @@ void __init setup_boot_APIC_clock(void)
} }
printk(KERN_INFO "Using local APIC timer interrupts.\n"); printk(KERN_INFO "Using local APIC timer interrupts.\n");
calibrate_APIC_clock(); if (calibrate_APIC_clock()) {
/*
* Do a sanity check on the APIC calibration result
*/
if (calibration_result < (1000000 / HZ)) {
printk(KERN_WARNING
"APIC frequency too slow, disabling apic timer\n");
/* No broadcast on UP ! */ /* No broadcast on UP ! */
if (num_possible_cpus() > 1) if (num_possible_cpus() > 1)
setup_APIC_timer(); setup_APIC_timer();
...@@ -1337,7 +1341,7 @@ early_param("apic", apic_set_verbosity); ...@@ -1337,7 +1341,7 @@ early_param("apic", apic_set_verbosity);
static __init int setup_disableapic(char *str) static __init int setup_disableapic(char *str)
{ {
disable_apic = 1; disable_apic = 1;
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); setup_clear_cpu_cap(X86_FEATURE_APIC);
return 0; return 0;
} }
early_param("disableapic", setup_disableapic); early_param("disableapic", setup_disableapic);
......
...@@ -18,6 +18,8 @@ ...@@ -18,6 +18,8 @@
#include <asm/ia32.h> #include <asm/ia32.h>
#include <asm/bootparam.h> #include <asm/bootparam.h>
#include <xen/interface/xen.h>
#define __NO_STUBS 1 #define __NO_STUBS 1
#undef __SYSCALL #undef __SYSCALL
#undef _ASM_X86_64_UNISTD_H_ #undef _ASM_X86_64_UNISTD_H_
...@@ -131,5 +133,14 @@ int main(void) ...@@ -131,5 +133,14 @@ int main(void)
OFFSET(BP_loadflags, boot_params, hdr.loadflags); OFFSET(BP_loadflags, boot_params, hdr.loadflags);
OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
OFFSET(BP_version, boot_params, hdr.version); OFFSET(BP_version, boot_params, hdr.version);
BLANK();
DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
#ifdef CONFIG_XEN
BLANK();
OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
#undef ENTRY
#endif
return 0; return 0;
} }
/*
* BIOS run time interface routines.
*
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <asm/uv/bios.h>
const char *
x86_bios_strerror(long status)
{
const char *str;
switch (status) {
case 0: str = "Call completed without error"; break;
case -1: str = "Not implemented"; break;
case -2: str = "Invalid argument"; break;
case -3: str = "Call completed with error"; break;
default: str = "Unknown BIOS status code"; break;
}
return str;
}
long
x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second,
unsigned long *drift_info)
{
struct uv_bios_retval isrv;
BIOS_CALL(isrv, BIOS_FREQ_BASE, which, 0, 0, 0, 0, 0, 0);
*ticks_per_second = isrv.v0;
*drift_info = isrv.v1;
return isrv.status;
}
EXPORT_SYMBOL_GPL(x86_bios_freq_base);
...@@ -24,8 +24,6 @@ ...@@ -24,8 +24,6 @@
extern void vide(void); extern void vide(void);
__asm__(".align 4\nvide: ret"); __asm__(".align 4\nvide: ret");
int force_mwait __cpuinitdata;
static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
{ {
if (cpuid_eax(0x80000000) >= 0x80000007) { if (cpuid_eax(0x80000000) >= 0x80000007) {
......
...@@ -115,6 +115,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) ...@@ -115,6 +115,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
/* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
if (c->x86_power & (1<<8)) if (c->x86_power & (1<<8))
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
set_cpu_cap(c, X86_FEATURE_SYSCALL32);
} }
static void __cpuinit init_amd(struct cpuinfo_x86 *c) static void __cpuinit init_amd(struct cpuinfo_x86 *c)
......
...@@ -131,13 +131,7 @@ static void __init check_popad(void) ...@@ -131,13 +131,7 @@ static void __init check_popad(void)
* (for due to lack of "invlpg" and working WP on a i386) * (for due to lack of "invlpg" and working WP on a i386)
* - In order to run on anything without a TSC, we need to be * - In order to run on anything without a TSC, we need to be
* compiled for a i486. * compiled for a i486.
* - In order to support the local APIC on a buggy Pentium machine, */
* we need to be compiled with CONFIG_X86_GOOD_APIC disabled,
* which happens implicitly if compiled for a Pentium or lower
* (unless an advanced selection of CPU features is used) as an
* otherwise config implies a properly working local APIC without
* the need to do extra reads from the APIC.
*/
static void __init check_config(void) static void __init check_config(void)
{ {
...@@ -151,21 +145,6 @@ static void __init check_config(void) ...@@ -151,21 +145,6 @@ static void __init check_config(void)
if (boot_cpu_data.x86 == 3) if (boot_cpu_data.x86 == 3)
panic("Kernel requires i486+ for 'invlpg' and other features"); panic("Kernel requires i486+ for 'invlpg' and other features");
#endif #endif
/*
* If we were told we had a good local APIC, check for buggy Pentia,
* i.e. all B steppings and the C2 stepping of P54C when using their
* integrated APIC (see 11AP erratum in "Pentium Processor
* Specification Update").
*/
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC)
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL
&& cpu_has_apic
&& boot_cpu_data.x86 == 5
&& boot_cpu_data.x86_model == 2
&& (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11))
panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!");
#endif
} }
......
...@@ -7,15 +7,13 @@ ...@@ -7,15 +7,13 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/kgdb.h> #include <linux/kgdb.h>
#include <linux/topology.h> #include <linux/topology.h>
#include <linux/string.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/module.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <asm/processor.h>
#include <asm/i387.h> #include <asm/i387.h>
#include <asm/msr.h> #include <asm/msr.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/linkage.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/mtrr.h> #include <asm/mtrr.h>
#include <asm/mce.h> #include <asm/mce.h>
...@@ -305,7 +303,6 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) ...@@ -305,7 +303,6 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
c->x86_capability[2] = cpuid_edx(0x80860001); c->x86_capability[2] = cpuid_edx(0x80860001);
} }
c->extended_cpuid_level = cpuid_eax(0x80000000);
if (c->extended_cpuid_level >= 0x80000007) if (c->extended_cpuid_level >= 0x80000007)
c->x86_power = cpuid_edx(0x80000007); c->x86_power = cpuid_edx(0x80000007);
...@@ -316,18 +313,11 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) ...@@ -316,18 +313,11 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
c->x86_phys_bits = eax & 0xff; c->x86_phys_bits = eax & 0xff;
} }
/* Assume all 64-bit CPUs support 32-bit syscall */
set_cpu_cap(c, X86_FEATURE_SYSCALL32);
if (c->x86_vendor != X86_VENDOR_UNKNOWN && if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
cpu_devs[c->x86_vendor]->c_early_init) cpu_devs[c->x86_vendor]->c_early_init)
cpu_devs[c->x86_vendor]->c_early_init(c); cpu_devs[c->x86_vendor]->c_early_init(c);
validate_pat_support(c); validate_pat_support(c);
/* early_param could clear that, but recall get it set again */
if (disable_apic)
clear_cpu_cap(c, X86_FEATURE_APIC);
} }
/* /*
...@@ -517,8 +507,7 @@ void pda_init(int cpu) ...@@ -517,8 +507,7 @@ void pda_init(int cpu)
} }
char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
DEBUG_STKSZ] DEBUG_STKSZ] __page_aligned_bss;
__attribute__((section(".bss.page_aligned")));
extern asmlinkage void ignore_sysret(void); extern asmlinkage void ignore_sysret(void);
......
...@@ -227,6 +227,16 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) ...@@ -227,6 +227,16 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
if (cpu_has_bts) if (cpu_has_bts)
ds_init_intel(c); ds_init_intel(c);
/*
* See if we have a good local APIC by checking for buggy Pentia,
* i.e. all B steppings and the C2 stepping of P54C when using their
* integrated APIC (see 11AP erratum in "Pentium Processor
* Specification Update").
*/
if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
(c->x86_mask < 0x6 || c->x86_mask == 0xb))
set_cpu_cap(c, X86_FEATURE_11AP);
#ifdef CONFIG_X86_NUMAQ #ifdef CONFIG_X86_NUMAQ
numaq_tsc_disable(); numaq_tsc_disable();
#endif #endif
......
...@@ -780,15 +780,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) ...@@ -780,15 +780,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
} }
kobject_put(per_cpu(cache_kobject, cpu)); kobject_put(per_cpu(cache_kobject, cpu));
cpuid4_cache_sysfs_exit(cpu); cpuid4_cache_sysfs_exit(cpu);
break; return retval;
} }
kobject_uevent(&(this_object->kobj), KOBJ_ADD); kobject_uevent(&(this_object->kobj), KOBJ_ADD);
} }
if (!retval) cpu_set(cpu, cache_dev_map);
cpu_set(cpu, cache_dev_map);
kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
return retval; return 0;
} }
static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
......
...@@ -102,7 +102,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) ...@@ -102,7 +102,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
/* The temperature transition interrupt handler setup */ /* The temperature transition interrupt handler setup */
h = THERMAL_APIC_VECTOR; /* our delivery vector */ h = THERMAL_APIC_VECTOR; /* our delivery vector */
h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
apic_write_around(APIC_LVTTHMR, h); apic_write(APIC_LVTTHMR, h);
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
...@@ -114,7 +114,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) ...@@ -114,7 +114,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h); wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
l = apic_read(APIC_LVTTHMR); l = apic_read(APIC_LVTTHMR);
apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
/* enable thermal throttle processing */ /* enable thermal throttle processing */
......
...@@ -877,7 +877,8 @@ void __init early_res_to_bootmem(u64 start, u64 end) ...@@ -877,7 +877,8 @@ void __init early_res_to_bootmem(u64 start, u64 end)
for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++)
count++; count++;
printk(KERN_INFO "(%d early reservations) ==> bootmem\n", count); printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n",
count, start, end);
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
struct early_res *r = &early_res[i]; struct early_res *r = &early_res[i];
printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i,
...@@ -1298,11 +1299,6 @@ void __init e820_reserve_resources(void) ...@@ -1298,11 +1299,6 @@ void __init e820_reserve_resources(void)
} }
} }
/*
* Non-standard memory setup can be specified via this quirk:
*/
char * (*arch_memory_setup_quirk)(void);
char *__init default_machine_specific_memory_setup(void) char *__init default_machine_specific_memory_setup(void)
{ {
char *who = "BIOS-e820"; char *who = "BIOS-e820";
...@@ -1343,8 +1339,8 @@ char *__init default_machine_specific_memory_setup(void) ...@@ -1343,8 +1339,8 @@ char *__init default_machine_specific_memory_setup(void)
char *__init __attribute__((weak)) machine_specific_memory_setup(void) char *__init __attribute__((weak)) machine_specific_memory_setup(void)
{ {
if (arch_memory_setup_quirk) { if (x86_quirks->arch_memory_setup) {
char *who = arch_memory_setup_quirk(); char *who = x86_quirks->arch_memory_setup();
if (who) if (who)
return who; return who;
...@@ -1367,24 +1363,3 @@ void __init setup_memory_map(void) ...@@ -1367,24 +1363,3 @@ void __init setup_memory_map(void)
printk(KERN_INFO "BIOS-provided physical RAM map:\n"); printk(KERN_INFO "BIOS-provided physical RAM map:\n");
e820_print_map(who); e820_print_map(who);
} }
#ifdef CONFIG_X86_64
int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
{
int i;
if (slot < 0 || slot >= e820.nr_map)
return -1;
for (i = slot; i < e820.nr_map; i++) {
if (e820.map[i].type != E820_RAM)
continue;
break;
}
if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT))
return -1;
*addr = e820.map[i].addr;
*size = min_t(u64, e820.map[i].size + e820.map[i].addr,
max_pfn << PAGE_SHIFT) - *addr;
return i + 1;
}
#endif
...@@ -16,10 +16,7 @@ ...@@ -16,10 +16,7 @@
#include <asm/dma.h> #include <asm/dma.h>
#include <asm/io_apic.h> #include <asm/io_apic.h>
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/iommu.h>
#ifdef CONFIG_GART_IOMMU
#include <asm/gart.h>
#endif
static void __init fix_hypertransport_config(int num, int slot, int func) static void __init fix_hypertransport_config(int num, int slot, int func)
{ {
......
...@@ -332,7 +332,7 @@ sysenter_past_esp: ...@@ -332,7 +332,7 @@ sysenter_past_esp:
GET_THREAD_INFO(%ebp) GET_THREAD_INFO(%ebp)
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
jnz syscall_trace_entry jnz syscall_trace_entry
cmpl $(nr_syscalls), %eax cmpl $(nr_syscalls), %eax
jae syscall_badsys jae syscall_badsys
...@@ -370,7 +370,7 @@ ENTRY(system_call) ...@@ -370,7 +370,7 @@ ENTRY(system_call)
GET_THREAD_INFO(%ebp) GET_THREAD_INFO(%ebp)
# system call tracing in operation / emulation # system call tracing in operation / emulation
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
jnz syscall_trace_entry jnz syscall_trace_entry
cmpl $(nr_syscalls), %eax cmpl $(nr_syscalls), %eax
jae syscall_badsys jae syscall_badsys
...@@ -383,10 +383,6 @@ syscall_exit: ...@@ -383,10 +383,6 @@ syscall_exit:
# setting need_resched or sigpending # setting need_resched or sigpending
# between sampling and the iret # between sampling and the iret
TRACE_IRQS_OFF TRACE_IRQS_OFF
testl $X86_EFLAGS_TF,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit
jz no_singlestep
orl $_TIF_SINGLESTEP,TI_flags(%ebp)
no_singlestep:
movl TI_flags(%ebp), %ecx movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx # current->work testw $_TIF_ALLWORK_MASK, %cx # current->work
jne syscall_exit_work jne syscall_exit_work
...@@ -514,12 +510,8 @@ END(work_pending) ...@@ -514,12 +510,8 @@ END(work_pending)
syscall_trace_entry: syscall_trace_entry:
movl $-ENOSYS,PT_EAX(%esp) movl $-ENOSYS,PT_EAX(%esp)
movl %esp, %eax movl %esp, %eax
xorl %edx,%edx call syscall_trace_enter
call do_syscall_trace /* What it returned is what we'll actually use. */
cmpl $0, %eax
jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
# so must skip actual syscall
movl PT_ORIG_EAX(%esp), %eax
cmpl $(nr_syscalls), %eax cmpl $(nr_syscalls), %eax
jnae syscall_call jnae syscall_call
jmp syscall_exit jmp syscall_exit
...@@ -528,14 +520,13 @@ END(syscall_trace_entry) ...@@ -528,14 +520,13 @@ END(syscall_trace_entry)
# perform syscall exit tracing # perform syscall exit tracing
ALIGN ALIGN
syscall_exit_work: syscall_exit_work:
testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl testb $_TIF_WORK_SYSCALL_EXIT, %cl
jz work_pending jz work_pending
TRACE_IRQS_ON TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
# schedule() instead # schedule() instead
movl %esp, %eax movl %esp, %eax
movl $1, %edx call syscall_trace_leave
call do_syscall_trace
jmp resume_userspace jmp resume_userspace
END(syscall_exit_work) END(syscall_exit_work)
CFI_ENDPROC CFI_ENDPROC
...@@ -1024,6 +1015,7 @@ ENDPROC(kernel_thread_helper) ...@@ -1024,6 +1015,7 @@ ENDPROC(kernel_thread_helper)
ENTRY(xen_sysenter_target) ENTRY(xen_sysenter_target)
RING0_INT_FRAME RING0_INT_FRAME
addl $5*4, %esp /* remove xen-provided frame */ addl $5*4, %esp /* remove xen-provided frame */
CFI_ADJUST_CFA_OFFSET -5*4
jmp sysenter_past_esp jmp sysenter_past_esp
CFI_ENDPROC CFI_ENDPROC
......
...@@ -349,8 +349,7 @@ ENTRY(system_call_after_swapgs) ...@@ -349,8 +349,7 @@ ENTRY(system_call_after_swapgs)
movq %rcx,RIP-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET CFI_REL_OFFSET rip,RIP-ARGOFFSET
GET_THREAD_INFO(%rcx) GET_THREAD_INFO(%rcx)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
TI_flags(%rcx)
jnz tracesys jnz tracesys
cmpq $__NR_syscall_max,%rax cmpq $__NR_syscall_max,%rax
ja badsys ja badsys
...@@ -430,7 +429,12 @@ tracesys: ...@@ -430,7 +429,12 @@ tracesys:
FIXUP_TOP_OF_STACK %rdi FIXUP_TOP_OF_STACK %rdi
movq %rsp,%rdi movq %rsp,%rdi
call syscall_trace_enter call syscall_trace_enter
LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ /*
* Reload arg registers from stack in case ptrace changed them.
* We don't reload %rax because syscall_trace_enter() returned
* the value it wants us to use in the table lookup.
*/
LOAD_ARGS ARGOFFSET, 1
RESTORE_REST RESTORE_REST
cmpq $__NR_syscall_max,%rax cmpq $__NR_syscall_max,%rax
ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
...@@ -483,7 +487,7 @@ int_very_careful: ...@@ -483,7 +487,7 @@ int_very_careful:
ENABLE_INTERRUPTS(CLBR_NONE) ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_REST SAVE_REST
/* Check for syscall exit trace */ /* Check for syscall exit trace */
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx testl $_TIF_WORK_SYSCALL_EXIT,%edx
jz int_signal jz int_signal
pushq %rdi pushq %rdi
CFI_ADJUST_CFA_OFFSET 8 CFI_ADJUST_CFA_OFFSET 8
...@@ -491,7 +495,7 @@ int_very_careful: ...@@ -491,7 +495,7 @@ int_very_careful:
call syscall_trace_leave call syscall_trace_leave
popq %rdi popq %rdi
CFI_ADJUST_CFA_OFFSET -8 CFI_ADJUST_CFA_OFFSET -8
andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
jmp int_restore_rest jmp int_restore_rest
int_signal: int_signal:
...@@ -1189,6 +1193,7 @@ END(device_not_available) ...@@ -1189,6 +1193,7 @@ END(device_not_available)
/* runs on exception stack */ /* runs on exception stack */
KPROBE_ENTRY(debug) KPROBE_ENTRY(debug)
INTR_FRAME INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq $0 pushq $0
CFI_ADJUST_CFA_OFFSET 8 CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_debug, DEBUG_STACK paranoidentry do_debug, DEBUG_STACK
...@@ -1198,6 +1203,7 @@ KPROBE_END(debug) ...@@ -1198,6 +1203,7 @@ KPROBE_END(debug)
/* runs on exception stack */ /* runs on exception stack */
KPROBE_ENTRY(nmi) KPROBE_ENTRY(nmi)
INTR_FRAME INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq $-1 pushq $-1
CFI_ADJUST_CFA_OFFSET 8 CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_nmi, 0, 0 paranoidentry do_nmi, 0, 0
...@@ -1211,6 +1217,7 @@ KPROBE_END(nmi) ...@@ -1211,6 +1217,7 @@ KPROBE_END(nmi)
KPROBE_ENTRY(int3) KPROBE_ENTRY(int3)
INTR_FRAME INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq $0 pushq $0
CFI_ADJUST_CFA_OFFSET 8 CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_int3, DEBUG_STACK paranoidentry do_int3, DEBUG_STACK
...@@ -1237,6 +1244,7 @@ END(coprocessor_segment_overrun) ...@@ -1237,6 +1244,7 @@ END(coprocessor_segment_overrun)
/* runs on exception stack */ /* runs on exception stack */
ENTRY(double_fault) ENTRY(double_fault)
XCPT_FRAME XCPT_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
paranoidentry do_double_fault paranoidentry do_double_fault
jmp paranoid_exit1 jmp paranoid_exit1
CFI_ENDPROC CFI_ENDPROC
...@@ -1253,6 +1261,7 @@ END(segment_not_present) ...@@ -1253,6 +1261,7 @@ END(segment_not_present)
/* runs on exception stack */ /* runs on exception stack */
ENTRY(stack_segment) ENTRY(stack_segment)
XCPT_FRAME XCPT_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
paranoidentry do_stack_segment paranoidentry do_stack_segment
jmp paranoid_exit1 jmp paranoid_exit1
CFI_ENDPROC CFI_ENDPROC
...@@ -1278,6 +1287,7 @@ END(spurious_interrupt_bug) ...@@ -1278,6 +1287,7 @@ END(spurious_interrupt_bug)
/* runs on exception stack */ /* runs on exception stack */
ENTRY(machine_check) ENTRY(machine_check)
INTR_FRAME INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq $0 pushq $0
CFI_ADJUST_CFA_OFFSET 8 CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_machine_check paranoidentry do_machine_check
...@@ -1312,3 +1322,103 @@ KPROBE_ENTRY(ignore_sysret) ...@@ -1312,3 +1322,103 @@ KPROBE_ENTRY(ignore_sysret)
sysret sysret
CFI_ENDPROC CFI_ENDPROC
ENDPROC(ignore_sysret) ENDPROC(ignore_sysret)
#ifdef CONFIG_XEN
ENTRY(xen_hypervisor_callback)
zeroentry xen_do_hypervisor_callback
END(xen_hypervisor_callback)
/*
# A note on the "critical region" in our callback handler.
# We want to avoid stacking callback handlers due to events occurring
# during handling of the last event. To do this, we keep events disabled
# until we've done all processing. HOWEVER, we must enable events before
# popping the stack frame (can't be done atomically) and so it would still
# be possible to get enough handler activations to overflow the stack.
# Although unlikely, bugs of that kind are hard to track down, so we'd
# like to avoid the possibility.
# So, on entry to the handler we detect whether we interrupted an
# existing activation in its critical region -- if so, we pop the current
# activation and restart the handler using the previous one.
*/
ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
CFI_STARTPROC
/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
see the correct pointer to the pt_regs */
movq %rdi, %rsp # we don't return, adjust the stack frame
CFI_ENDPROC
CFI_DEFAULT_STACK
11: incl %gs:pda_irqcount
movq %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
cmovzq %gs:pda_irqstackptr,%rsp
pushq %rbp # backlink for old unwinder
call xen_evtchn_do_upcall
popq %rsp
CFI_DEF_CFA_REGISTER rsp
decl %gs:pda_irqcount
jmp error_exit
CFI_ENDPROC
END(do_hypervisor_callback)
/*
# Hypervisor uses this for application faults while it executes.
# We get here for two reasons:
# 1. Fault while reloading DS, ES, FS or GS
# 2. Fault while executing IRET
# Category 1 we do not need to fix up as Xen has already reloaded all segment
# registers that could be reloaded and zeroed the others.
# Category 2 we fix up by killing the current process. We cannot use the
# normal Linux return path in this case because if we use the IRET hypercall
# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
# We distinguish between categories by comparing each saved segment register
# with its current contents: any discrepancy means we in category 1.
*/
ENTRY(xen_failsafe_callback)
framesz = (RIP-0x30) /* workaround buggy gas */
_frame framesz
CFI_REL_OFFSET rcx, 0
CFI_REL_OFFSET r11, 8
movw %ds,%cx
cmpw %cx,0x10(%rsp)
CFI_REMEMBER_STATE
jne 1f
movw %es,%cx
cmpw %cx,0x18(%rsp)
jne 1f
movw %fs,%cx
cmpw %cx,0x20(%rsp)
jne 1f
movw %gs,%cx
cmpw %cx,0x28(%rsp)
jne 1f
/* All segments match their saved values => Category 2 (Bad IRET). */
movq (%rsp),%rcx
CFI_RESTORE rcx
movq 8(%rsp),%r11
CFI_RESTORE r11
addq $0x30,%rsp
CFI_ADJUST_CFA_OFFSET -0x30
pushq $0
CFI_ADJUST_CFA_OFFSET 8
pushq %r11
CFI_ADJUST_CFA_OFFSET 8
pushq %rcx
CFI_ADJUST_CFA_OFFSET 8
jmp general_protection
CFI_RESTORE_STATE
1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
movq (%rsp),%rcx
CFI_RESTORE rcx
movq 8(%rsp),%r11
CFI_RESTORE r11
addq $0x30,%rsp
CFI_ADJUST_CFA_OFFSET -0x30
pushq $0
CFI_ADJUST_CFA_OFFSET 8
SAVE_ALL
jmp error_exit
CFI_ENDPROC
END(xen_failsafe_callback)
#endif /* CONFIG_XEN */
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/uv/uv_mmrs.h> #include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h> #include <asm/uv/uv_hub.h>
#include <asm/uv/bios.h>
DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
...@@ -40,6 +41,9 @@ EXPORT_SYMBOL_GPL(uv_cpu_to_blade); ...@@ -40,6 +41,9 @@ EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
short uv_possible_blades; short uv_possible_blades;
EXPORT_SYMBOL_GPL(uv_possible_blades); EXPORT_SYMBOL_GPL(uv_possible_blades);
unsigned long sn_rtc_cycles_per_second;
EXPORT_SYMBOL(sn_rtc_cycles_per_second);
/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
static cpumask_t uv_target_cpus(void) static cpumask_t uv_target_cpus(void)
...@@ -272,6 +276,23 @@ static __init void map_mmioh_high(int max_pnode) ...@@ -272,6 +276,23 @@ static __init void map_mmioh_high(int max_pnode)
map_high("MMIOH", mmioh.s.base, shift, map_uc); map_high("MMIOH", mmioh.s.base, shift, map_uc);
} }
static __init void uv_rtc_init(void)
{
long status, ticks_per_sec, drift;
status =
x86_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
&drift);
if (status != 0 || ticks_per_sec < 100000) {
printk(KERN_WARNING
"unable to determine platform RTC clock frequency, "
"guessing.\n");
/* BIOS gives wrong value for clock freq. so guess */
sn_rtc_cycles_per_second = 1000000000000UL / 30000UL;
} else
sn_rtc_cycles_per_second = ticks_per_sec;
}
static __init void uv_system_init(void) static __init void uv_system_init(void)
{ {
union uvh_si_addr_map_config_u m_n_config; union uvh_si_addr_map_config_u m_n_config;
...@@ -326,6 +347,8 @@ static __init void uv_system_init(void) ...@@ -326,6 +347,8 @@ static __init void uv_system_init(void)
gnode_upper = (((unsigned long)node_id.s.node_id) & gnode_upper = (((unsigned long)node_id.s.node_id) &
~((1 << n_val) - 1)) << m_val; ~((1 << n_val) - 1)) << m_val;
uv_rtc_init();
for_each_present_cpu(cpu) { for_each_present_cpu(cpu) {
nid = cpu_to_node(cpu); nid = cpu_to_node(cpu);
pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu));
......
...@@ -39,6 +39,13 @@ static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; ...@@ -39,6 +39,13 @@ static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
#endif #endif
void __init x86_64_init_pda(void)
{
_cpu_pda = __cpu_pda;
cpu_pda(0) = &_boot_cpu_pda;
pda_init(0);
}
static void __init zap_identity_mappings(void) static void __init zap_identity_mappings(void)
{ {
pgd_t *pgd = pgd_offset_k(0UL); pgd_t *pgd = pgd_offset_k(0UL);
...@@ -102,9 +109,7 @@ void __init x86_64_start_kernel(char * real_mode_data) ...@@ -102,9 +109,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
early_printk("Kernel alive\n"); early_printk("Kernel alive\n");
_cpu_pda = __cpu_pda; x86_64_init_pda();
cpu_pda(0) = &_boot_cpu_pda;
pda_init(0);
early_printk("Kernel really alive\n"); early_printk("Kernel really alive\n");
......
...@@ -407,6 +407,7 @@ ENTRY(phys_base) ...@@ -407,6 +407,7 @@ ENTRY(phys_base)
/* This must match the first entry in level2_kernel_pgt */ /* This must match the first entry in level2_kernel_pgt */
.quad 0x0000000000000000 .quad 0x0000000000000000
#include "../../x86/xen/xen-head.S"
.section .bss, "aw", @nobits .section .bss, "aw", @nobits
.align L1_CACHE_BYTES .align L1_CACHE_BYTES
......
...@@ -756,7 +756,7 @@ void send_IPI_self(int vector) ...@@ -756,7 +756,7 @@ void send_IPI_self(int vector)
/* /*
* Send the IPI. The write to APIC_ICR fires this off. * Send the IPI. The write to APIC_ICR fires this off.
*/ */
apic_write_around(APIC_ICR, cfg); apic_write(APIC_ICR, cfg);
} }
#endif /* !CONFIG_SMP */ #endif /* !CONFIG_SMP */
...@@ -2030,7 +2030,7 @@ static void mask_lapic_irq(unsigned int irq) ...@@ -2030,7 +2030,7 @@ static void mask_lapic_irq(unsigned int irq)
unsigned long v; unsigned long v;
v = apic_read(APIC_LVT0); v = apic_read(APIC_LVT0);
apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
} }
static void unmask_lapic_irq(unsigned int irq) static void unmask_lapic_irq(unsigned int irq)
...@@ -2038,7 +2038,7 @@ static void unmask_lapic_irq(unsigned int irq) ...@@ -2038,7 +2038,7 @@ static void unmask_lapic_irq(unsigned int irq)
unsigned long v; unsigned long v;
v = apic_read(APIC_LVT0); v = apic_read(APIC_LVT0);
apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
} }
static struct irq_chip lapic_chip __read_mostly = { static struct irq_chip lapic_chip __read_mostly = {
...@@ -2168,7 +2168,7 @@ static inline void __init check_timer(void) ...@@ -2168,7 +2168,7 @@ static inline void __init check_timer(void)
* The AEOI mode will finish them in the 8259A * The AEOI mode will finish them in the 8259A
* automatically. * automatically.
*/ */
apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
init_8259A(1); init_8259A(1);
timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
...@@ -2177,8 +2177,9 @@ static inline void __init check_timer(void) ...@@ -2177,8 +2177,9 @@ static inline void __init check_timer(void)
pin2 = ioapic_i8259.pin; pin2 = ioapic_i8259.pin;
apic2 = ioapic_i8259.apic; apic2 = ioapic_i8259.apic;
printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
vector, apic1, pin1, apic2, pin2); "apic1=%d pin1=%d apic2=%d pin2=%d\n",
vector, apic1, pin1, apic2, pin2);
/* /*
* Some BIOS writers are clueless and report the ExtINTA * Some BIOS writers are clueless and report the ExtINTA
...@@ -2216,12 +2217,13 @@ static inline void __init check_timer(void) ...@@ -2216,12 +2217,13 @@ static inline void __init check_timer(void)
} }
clear_IO_APIC_pin(apic1, pin1); clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1) if (!no_pin1)
printk(KERN_ERR "..MP-BIOS bug: " apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
"8254 timer not connected to IO-APIC\n"); "8254 timer not connected to IO-APIC\n");
printk(KERN_INFO "...trying to set up timer (IRQ0) " apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
"through the 8259A ... "); "(IRQ0) through the 8259A ...\n");
printk("\n..... (found pin %d) ...", pin2); apic_printk(APIC_QUIET, KERN_INFO
"..... (found apic %d pin %d) ...\n", apic2, pin2);
/* /*
* legacy devices should be connected to IO APIC #0 * legacy devices should be connected to IO APIC #0
*/ */
...@@ -2230,7 +2232,7 @@ static inline void __init check_timer(void) ...@@ -2230,7 +2232,7 @@ static inline void __init check_timer(void)
unmask_IO_APIC_irq(0); unmask_IO_APIC_irq(0);
enable_8259A_irq(0); enable_8259A_irq(0);
if (timer_irq_works()) { if (timer_irq_works()) {
printk("works.\n"); apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
timer_through_8259 = 1; timer_through_8259 = 1;
if (nmi_watchdog == NMI_IO_APIC) { if (nmi_watchdog == NMI_IO_APIC) {
disable_8259A_irq(0); disable_8259A_irq(0);
...@@ -2244,44 +2246,47 @@ static inline void __init check_timer(void) ...@@ -2244,44 +2246,47 @@ static inline void __init check_timer(void)
*/ */
disable_8259A_irq(0); disable_8259A_irq(0);
clear_IO_APIC_pin(apic2, pin2); clear_IO_APIC_pin(apic2, pin2);
printk(" failed.\n"); apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
} }
if (nmi_watchdog == NMI_IO_APIC) { if (nmi_watchdog == NMI_IO_APIC) {
printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
"through the IO-APIC - disabling NMI Watchdog!\n");
nmi_watchdog = NMI_NONE; nmi_watchdog = NMI_NONE;
} }
timer_ack = 0; timer_ack = 0;
printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); apic_printk(APIC_QUIET, KERN_INFO
"...trying to set up timer as Virtual Wire IRQ...\n");
lapic_register_intr(0, vector); lapic_register_intr(0, vector);
apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
enable_8259A_irq(0); enable_8259A_irq(0);
if (timer_irq_works()) { if (timer_irq_works()) {
printk(" works.\n"); apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out; goto out;
} }
disable_8259A_irq(0); disable_8259A_irq(0);
apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
printk(" failed.\n"); apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); apic_printk(APIC_QUIET, KERN_INFO
"...trying to set up timer as ExtINT IRQ...\n");
init_8259A(0); init_8259A(0);
make_8259A_irq(0); make_8259A_irq(0);
apic_write_around(APIC_LVT0, APIC_DM_EXTINT); apic_write(APIC_LVT0, APIC_DM_EXTINT);
unlock_ExtINT_logic(); unlock_ExtINT_logic();
if (timer_irq_works()) { if (timer_irq_works()) {
printk(" works.\n"); apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out; goto out;
} }
printk(" failed :(.\n"); apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
"report. Then try booting with the 'noapic' option"); "report. Then try booting with the 'noapic' option.\n");
out: out:
local_irq_restore(flags); local_irq_restore(flags);
} }
......
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
#include <asm/proto.h> #include <asm/proto.h>
#include <asm/acpi.h> #include <asm/acpi.h>
#include <asm/dma.h> #include <asm/dma.h>
#include <asm/i8259.h>
#include <asm/nmi.h> #include <asm/nmi.h>
#include <asm/msidef.h> #include <asm/msidef.h>
#include <asm/hypertransport.h> #include <asm/hypertransport.h>
...@@ -1696,8 +1697,9 @@ static inline void __init check_timer(void) ...@@ -1696,8 +1697,9 @@ static inline void __init check_timer(void)
pin2 = ioapic_i8259.pin; pin2 = ioapic_i8259.pin;
apic2 = ioapic_i8259.apic; apic2 = ioapic_i8259.apic;
apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
cfg->vector, apic1, pin1, apic2, pin2); "apic1=%d pin1=%d apic2=%d pin2=%d\n",
cfg->vector, apic1, pin1, apic2, pin2);
/* /*
* Some BIOS writers are clueless and report the ExtINTA * Some BIOS writers are clueless and report the ExtINTA
...@@ -1735,14 +1737,13 @@ static inline void __init check_timer(void) ...@@ -1735,14 +1737,13 @@ static inline void __init check_timer(void)
} }
clear_IO_APIC_pin(apic1, pin1); clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1) if (!no_pin1)
apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: " apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
"8254 timer not connected to IO-APIC\n"); "8254 timer not connected to IO-APIC\n");
apic_printk(APIC_VERBOSE,KERN_INFO apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
"...trying to set up timer (IRQ0) " "(IRQ0) through the 8259A ...\n");
"through the 8259A ... "); apic_printk(APIC_QUIET, KERN_INFO
apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...", "..... (found apic %d pin %d) ...\n", apic2, pin2);
apic2, pin2);
/* /*
* legacy devices should be connected to IO APIC #0 * legacy devices should be connected to IO APIC #0
*/ */
...@@ -1751,7 +1752,7 @@ static inline void __init check_timer(void) ...@@ -1751,7 +1752,7 @@ static inline void __init check_timer(void)
unmask_IO_APIC_irq(0); unmask_IO_APIC_irq(0);
enable_8259A_irq(0); enable_8259A_irq(0);
if (timer_irq_works()) { if (timer_irq_works()) {
apic_printk(APIC_VERBOSE," works.\n"); apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
timer_through_8259 = 1; timer_through_8259 = 1;
if (nmi_watchdog == NMI_IO_APIC) { if (nmi_watchdog == NMI_IO_APIC) {
disable_8259A_irq(0); disable_8259A_irq(0);
...@@ -1765,29 +1766,32 @@ static inline void __init check_timer(void) ...@@ -1765,29 +1766,32 @@ static inline void __init check_timer(void)
*/ */
disable_8259A_irq(0); disable_8259A_irq(0);
clear_IO_APIC_pin(apic2, pin2); clear_IO_APIC_pin(apic2, pin2);
apic_printk(APIC_VERBOSE," failed.\n"); apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
} }
if (nmi_watchdog == NMI_IO_APIC) { if (nmi_watchdog == NMI_IO_APIC) {
printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
"through the IO-APIC - disabling NMI Watchdog!\n");
nmi_watchdog = NMI_NONE; nmi_watchdog = NMI_NONE;
} }
apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); apic_printk(APIC_QUIET, KERN_INFO
"...trying to set up timer as Virtual Wire IRQ...\n");
lapic_register_intr(0); lapic_register_intr(0);
apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
enable_8259A_irq(0); enable_8259A_irq(0);
if (timer_irq_works()) { if (timer_irq_works()) {
apic_printk(APIC_VERBOSE," works.\n"); apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out; goto out;
} }
disable_8259A_irq(0); disable_8259A_irq(0);
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
apic_printk(APIC_VERBOSE," failed.\n"); apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ..."); apic_printk(APIC_QUIET, KERN_INFO
"...trying to set up timer as ExtINT IRQ...\n");
init_8259A(0); init_8259A(0);
make_8259A_irq(0); make_8259A_irq(0);
...@@ -1796,11 +1800,12 @@ static inline void __init check_timer(void) ...@@ -1796,11 +1800,12 @@ static inline void __init check_timer(void)
unlock_ExtINT_logic(); unlock_ExtINT_logic();
if (timer_irq_works()) { if (timer_irq_works()) {
apic_printk(APIC_VERBOSE," works.\n"); apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out; goto out;
} }
apic_printk(APIC_VERBOSE," failed :(.\n"); apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n"); panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
"report. Then try booting with the 'noapic' option.\n");
out: out:
local_irq_restore(flags); local_irq_restore(flags);
} }
......
...@@ -103,6 +103,9 @@ void __init io_delay_init(void) ...@@ -103,6 +103,9 @@ void __init io_delay_init(void)
static int __init io_delay_param(char *s) static int __init io_delay_param(char *s)
{ {
if (!s)
return -EINVAL;
if (!strcmp(s, "0x80")) if (!strcmp(s, "0x80"))
io_delay_type = CONFIG_IO_DELAY_TYPE_0X80; io_delay_type = CONFIG_IO_DELAY_TYPE_0X80;
else if (!strcmp(s, "0xed")) else if (!strcmp(s, "0xed"))
......
...@@ -70,7 +70,7 @@ void __send_IPI_shortcut(unsigned int shortcut, int vector) ...@@ -70,7 +70,7 @@ void __send_IPI_shortcut(unsigned int shortcut, int vector)
/* /*
* Send the IPI. The write to APIC_ICR fires this off. * Send the IPI. The write to APIC_ICR fires this off.
*/ */
apic_write_around(APIC_ICR, cfg); apic_write(APIC_ICR, cfg);
} }
void send_IPI_self(int vector) void send_IPI_self(int vector)
...@@ -98,7 +98,7 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) ...@@ -98,7 +98,7 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
* prepare target chip field * prepare target chip field
*/ */
cfg = __prepare_ICR2(mask); cfg = __prepare_ICR2(mask);
apic_write_around(APIC_ICR2, cfg); apic_write(APIC_ICR2, cfg);
/* /*
* program the ICR * program the ICR
...@@ -108,7 +108,7 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) ...@@ -108,7 +108,7 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
/* /*
* Send the IPI. The write to APIC_ICR fires this off. * Send the IPI. The write to APIC_ICR fires this off.
*/ */
apic_write_around(APIC_ICR, cfg); apic_write(APIC_ICR, cfg);
} }
/* /*
......
...@@ -83,11 +83,8 @@ union irq_ctx { ...@@ -83,11 +83,8 @@ union irq_ctx {
static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
static char softirq_stack[NR_CPUS * THREAD_SIZE] static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
__attribute__((__section__(".bss.page_aligned"))); static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
static char hardirq_stack[NR_CPUS * THREAD_SIZE]
__attribute__((__section__(".bss.page_aligned")));
static void call_on_stack(void *func, void *stack) static void call_on_stack(void *func, void *stack)
{ {
......
...@@ -12,9 +12,13 @@ ...@@ -12,9 +12,13 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/io.h> #include <linux/io.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/module.h>
#include <asm/setup.h> #include <asm/setup.h>
struct dentry *arch_debugfs_dir;
EXPORT_SYMBOL(arch_debugfs_dir);
#ifdef CONFIG_DEBUG_BOOT_PARAMS #ifdef CONFIG_DEBUG_BOOT_PARAMS
struct setup_data_node { struct setup_data_node {
u64 paddr; u64 paddr;
...@@ -209,6 +213,10 @@ static int __init arch_kdebugfs_init(void) ...@@ -209,6 +213,10 @@ static int __init arch_kdebugfs_init(void)
{ {
int error = 0; int error = 0;
arch_debugfs_dir = debugfs_create_dir("x86", NULL);
if (!arch_debugfs_dir)
return -ENOMEM;
#ifdef CONFIG_DEBUG_BOOT_PARAMS #ifdef CONFIG_DEBUG_BOOT_PARAMS
error = boot_params_kdebugfs_init(); error = boot_params_kdebugfs_init();
#endif #endif
......
...@@ -860,7 +860,6 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) ...@@ -860,7 +860,6 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
resume_execution(cur, regs, kcb); resume_execution(cur, regs, kcb);
regs->flags |= kcb->kprobe_saved_flags; regs->flags |= kcb->kprobe_saved_flags;
trace_hardirqs_fixup_flags(regs->flags);
if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
kcb->kprobe_status = KPROBE_HIT_SSDONE; kcb->kprobe_status = KPROBE_HIT_SSDONE;
......
...@@ -150,7 +150,8 @@ int module_finalize(const Elf_Ehdr *hdr, ...@@ -150,7 +150,8 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs, const Elf_Shdr *sechdrs,
struct module *me) struct module *me)
{ {
const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL; const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
*para = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
...@@ -160,6 +161,8 @@ int module_finalize(const Elf_Ehdr *hdr, ...@@ -160,6 +161,8 @@ int module_finalize(const Elf_Ehdr *hdr,
alt = s; alt = s;
if (!strcmp(".smp_locks", secstrings + s->sh_name)) if (!strcmp(".smp_locks", secstrings + s->sh_name))
locks= s; locks= s;
if (!strcmp(".parainstructions", secstrings + s->sh_name))
para = s;
} }
if (alt) { if (alt) {
...@@ -175,6 +178,11 @@ int module_finalize(const Elf_Ehdr *hdr, ...@@ -175,6 +178,11 @@ int module_finalize(const Elf_Ehdr *hdr,
tseg, tseg + text->sh_size); tseg, tseg + text->sh_size);
} }
if (para) {
void *pseg = (void *)para->sh_addr;
apply_paravirt(pseg, pseg + para->sh_size);
}
return module_bug_finalize(hdr, sechdrs, me); return module_bug_finalize(hdr, sechdrs, me);
} }
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <asm/bios_ebda.h> #include <asm/bios_ebda.h>
#include <asm/e820.h> #include <asm/e820.h>
#include <asm/trampoline.h> #include <asm/trampoline.h>
#include <asm/setup.h>
#include <mach_apic.h> #include <mach_apic.h>
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
...@@ -48,76 +49,6 @@ static int __init mpf_checksum(unsigned char *mp, int len) ...@@ -48,76 +49,6 @@ static int __init mpf_checksum(unsigned char *mp, int len)
return sum & 0xFF; return sum & 0xFF;
} }
#ifdef CONFIG_X86_NUMAQ
int found_numaq;
/*
* Have to match translation table entries to main table entries by counter
* hence the mpc_record variable .... can't see a less disgusting way of
* doing this ....
*/
struct mpc_config_translation {
unsigned char mpc_type;
unsigned char trans_len;
unsigned char trans_type;
unsigned char trans_quad;
unsigned char trans_global;
unsigned char trans_local;
unsigned short trans_reserved;
};
static int mpc_record;
static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
__cpuinitdata;
static inline int generate_logical_apicid(int quad, int phys_apicid)
{
return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
}
static inline int mpc_apic_id(struct mpc_config_processor *m,
struct mpc_config_translation *translation_record)
{
int quad = translation_record->trans_quad;
int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
m->mpc_apicid,
(m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
(m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
m->mpc_apicver, quad, logical_apicid);
return logical_apicid;
}
int mp_bus_id_to_node[MAX_MP_BUSSES];
int mp_bus_id_to_local[MAX_MP_BUSSES];
static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
struct mpc_config_translation *translation)
{
int quad = translation->trans_quad;
int local = translation->trans_local;
mp_bus_id_to_node[m->mpc_busid] = quad;
mp_bus_id_to_local[m->mpc_busid] = local;
printk(KERN_INFO "Bus #%d is %s (node %d)\n",
m->mpc_busid, name, quad);
}
int quad_local_to_mp_bus_id [NR_CPUS/4][4];
static void mpc_oem_pci_bus(struct mpc_config_bus *m,
struct mpc_config_translation *translation)
{
int quad = translation->trans_quad;
int local = translation->trans_local;
quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
}
#endif
static void __cpuinit MP_processor_info(struct mpc_config_processor *m) static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
{ {
int apicid; int apicid;
...@@ -127,14 +58,12 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) ...@@ -127,14 +58,12 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
disabled_cpus++; disabled_cpus++;
return; return;
} }
#ifdef CONFIG_X86_NUMAQ
if (found_numaq) if (x86_quirks->mpc_apic_id)
apicid = mpc_apic_id(m, translation_table[mpc_record]); apicid = x86_quirks->mpc_apic_id(m);
else else
apicid = m->mpc_apicid; apicid = m->mpc_apicid;
#else
apicid = m->mpc_apicid;
#endif
if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
bootup_cpu = " (Bootup-CPU)"; bootup_cpu = " (Bootup-CPU)";
boot_cpu_physical_apicid = m->mpc_apicid; boot_cpu_physical_apicid = m->mpc_apicid;
...@@ -151,12 +80,10 @@ static void __init MP_bus_info(struct mpc_config_bus *m) ...@@ -151,12 +80,10 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
memcpy(str, m->mpc_bustype, 6); memcpy(str, m->mpc_bustype, 6);
str[6] = 0; str[6] = 0;
#ifdef CONFIG_X86_NUMAQ if (x86_quirks->mpc_oem_bus_info)
if (found_numaq) x86_quirks->mpc_oem_bus_info(m, str);
mpc_oem_bus_info(m, str, translation_table[mpc_record]); else
#else printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
#endif
#if MAX_MP_BUSSES < 256 #if MAX_MP_BUSSES < 256
if (m->mpc_busid >= MAX_MP_BUSSES) { if (m->mpc_busid >= MAX_MP_BUSSES) {
...@@ -173,10 +100,9 @@ static void __init MP_bus_info(struct mpc_config_bus *m) ...@@ -173,10 +100,9 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
#endif #endif
} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
#ifdef CONFIG_X86_NUMAQ if (x86_quirks->mpc_oem_pci_bus)
if (found_numaq) x86_quirks->mpc_oem_pci_bus(m);
mpc_oem_pci_bus(m, translation_table[mpc_record]);
#endif
clear_bit(m->mpc_busid, mp_bus_not_pci); clear_bit(m->mpc_busid, mp_bus_not_pci);
#if defined(CONFIG_EISA) || defined (CONFIG_MCA) #if defined(CONFIG_EISA) || defined (CONFIG_MCA)
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
...@@ -316,83 +242,6 @@ static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) ...@@ -316,83 +242,6 @@ static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
} }
#ifdef CONFIG_X86_NUMAQ
static void __init MP_translation_info(struct mpc_config_translation *m)
{
printk(KERN_INFO
"Translation: record %d, type %d, quad %d, global %d, local %d\n",
mpc_record, m->trans_type, m->trans_quad, m->trans_global,
m->trans_local);
if (mpc_record >= MAX_MPC_ENTRY)
printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
else
translation_table[mpc_record] = m; /* stash this for later */
if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
node_set_online(m->trans_quad);
}
/*
* Read/parse the MPC oem tables
*/
static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
unsigned short oemsize)
{
int count = sizeof(*oemtable); /* the header size */
unsigned char *oemptr = ((unsigned char *)oemtable) + count;
mpc_record = 0;
printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
oemtable);
if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
printk(KERN_WARNING
"SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
oemtable->oem_signature[0], oemtable->oem_signature[1],
oemtable->oem_signature[2], oemtable->oem_signature[3]);
return;
}
if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
return;
}
while (count < oemtable->oem_length) {
switch (*oemptr) {
case MP_TRANSLATION:
{
struct mpc_config_translation *m =
(struct mpc_config_translation *)oemptr;
MP_translation_info(m);
oemptr += sizeof(*m);
count += sizeof(*m);
++mpc_record;
break;
}
default:
{
printk(KERN_WARNING
"Unrecognised OEM table entry type! - %d\n",
(int)*oemptr);
return;
}
}
}
}
void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
char *productid)
{
if (strncmp(oem, "IBM NUMA", 8))
printk("Warning! Not a NUMA-Q system!\n");
else
found_numaq = 1;
if (mpc->mpc_oemptr)
smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr,
mpc->mpc_oemsize);
}
#endif /* CONFIG_X86_NUMAQ */
/* /*
* Read/parse the MPC * Read/parse the MPC
*/ */
...@@ -457,7 +306,6 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) ...@@ -457,7 +306,6 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
} else } else
mps_oem_check(mpc, oem, str); mps_oem_check(mpc, oem, str);
#endif #endif
/* save the local APIC address, it might be non-default */ /* save the local APIC address, it might be non-default */
if (!acpi_lapic) if (!acpi_lapic)
mp_lapic_addr = mpc->mpc_lapic; mp_lapic_addr = mpc->mpc_lapic;
...@@ -465,12 +313,17 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) ...@@ -465,12 +313,17 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
if (early) if (early)
return 1; return 1;
if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) {
struct mp_config_oemtable *oem_table = (struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr;
x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize);
}
/* /*
* Now process the configuration blocks. * Now process the configuration blocks.
*/ */
#ifdef CONFIG_X86_NUMAQ if (x86_quirks->mpc_record)
mpc_record = 0; *x86_quirks->mpc_record = 0;
#endif
while (count < mpc->mpc_length) { while (count < mpc->mpc_length) {
switch (*mpt) { switch (*mpt) {
case MP_PROCESSOR: case MP_PROCESSOR:
...@@ -536,9 +389,8 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) ...@@ -536,9 +389,8 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
count = mpc->mpc_length; count = mpc->mpc_length;
break; break;
} }
#ifdef CONFIG_X86_NUMAQ if (x86_quirks->mpc_record)
++mpc_record; (*x86_quirks->mpc_record)++;
#endif
} }
#ifdef CONFIG_X86_GENERICARCH #ifdef CONFIG_X86_GENERICARCH
...@@ -725,12 +577,6 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) ...@@ -725,12 +577,6 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
static struct intel_mp_floating *mpf_found; static struct intel_mp_floating *mpf_found;
/*
* Machine specific quirk for finding the SMP config before other setup
* activities destroy the table:
*/
int (*mach_get_smp_config_quirk)(unsigned int early);
/* /*
* Scan the memory blocks for an SMP configuration block. * Scan the memory blocks for an SMP configuration block.
*/ */
...@@ -738,8 +584,8 @@ static void __init __get_smp_config(unsigned int early) ...@@ -738,8 +584,8 @@ static void __init __get_smp_config(unsigned int early)
{ {
struct intel_mp_floating *mpf = mpf_found; struct intel_mp_floating *mpf = mpf_found;
if (mach_get_smp_config_quirk) { if (x86_quirks->mach_get_smp_config) {
if (mach_get_smp_config_quirk(early)) if (x86_quirks->mach_get_smp_config(early))
return; return;
} }
if (acpi_lapic && early) if (acpi_lapic && early)
...@@ -899,14 +745,12 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, ...@@ -899,14 +745,12 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
return 0; return 0;
} }
int (*mach_find_smp_config_quirk)(unsigned int reserve);
static void __init __find_smp_config(unsigned int reserve) static void __init __find_smp_config(unsigned int reserve)
{ {
unsigned int address; unsigned int address;
if (mach_find_smp_config_quirk) { if (x86_quirks->mach_find_smp_config) {
if (mach_find_smp_config_quirk(reserve)) if (x86_quirks->mach_find_smp_config(reserve))
return; return;
} }
/* /*
......
...@@ -263,7 +263,7 @@ late_initcall(init_lapic_nmi_sysfs); ...@@ -263,7 +263,7 @@ late_initcall(init_lapic_nmi_sysfs);
static void __acpi_nmi_enable(void *__unused) static void __acpi_nmi_enable(void *__unused)
{ {
apic_write_around(APIC_LVT0, APIC_DM_NMI); apic_write(APIC_LVT0, APIC_DM_NMI);
} }
/* /*
...@@ -277,7 +277,7 @@ void acpi_nmi_enable(void) ...@@ -277,7 +277,7 @@ void acpi_nmi_enable(void)
static void __acpi_nmi_disable(void *__unused) static void __acpi_nmi_disable(void *__unused)
{ {
apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
} }
/* /*
...@@ -448,6 +448,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) ...@@ -448,6 +448,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
static int __init setup_unknown_nmi_panic(char *str)
{
unknown_nmi_panic = 1;
return 1;
}
__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
{ {
unsigned char reason = get_nmi_reason(); unsigned char reason = get_nmi_reason();
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/mpspec.h> #include <asm/mpspec.h>
#include <asm/e820.h> #include <asm/e820.h>
#include <asm/setup.h>
#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
...@@ -71,6 +72,188 @@ static void __init smp_dump_qct(void) ...@@ -71,6 +72,188 @@ static void __init smp_dump_qct(void)
} }
} }
void __init numaq_tsc_disable(void)
{
if (!found_numaq)
return;
if (num_online_nodes() > 1) {
printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
setup_clear_cpu_cap(X86_FEATURE_TSC);
}
}
static int __init numaq_pre_time_init(void)
{
numaq_tsc_disable();
return 0;
}
int found_numaq;
/*
* Have to match translation table entries to main table entries by counter
* hence the mpc_record variable .... can't see a less disgusting way of
* doing this ....
*/
struct mpc_config_translation {
unsigned char mpc_type;
unsigned char trans_len;
unsigned char trans_type;
unsigned char trans_quad;
unsigned char trans_global;
unsigned char trans_local;
unsigned short trans_reserved;
};
/* x86_quirks member */
static int mpc_record;
static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
__cpuinitdata;
static inline int generate_logical_apicid(int quad, int phys_apicid)
{
return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
}
/* x86_quirks member */
static int mpc_apic_id(struct mpc_config_processor *m)
{
int quad = translation_table[mpc_record]->trans_quad;
int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
m->mpc_apicid,
(m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
(m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
m->mpc_apicver, quad, logical_apicid);
return logical_apicid;
}
int mp_bus_id_to_node[MAX_MP_BUSSES];
int mp_bus_id_to_local[MAX_MP_BUSSES];
/* x86_quirks member */
static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name)
{
int quad = translation_table[mpc_record]->trans_quad;
int local = translation_table[mpc_record]->trans_local;
mp_bus_id_to_node[m->mpc_busid] = quad;
mp_bus_id_to_local[m->mpc_busid] = local;
printk(KERN_INFO "Bus #%d is %s (node %d)\n",
m->mpc_busid, name, quad);
}
int quad_local_to_mp_bus_id [NR_CPUS/4][4];
/* x86_quirks member */
static void mpc_oem_pci_bus(struct mpc_config_bus *m)
{
int quad = translation_table[mpc_record]->trans_quad;
int local = translation_table[mpc_record]->trans_local;
quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
}
static void __init MP_translation_info(struct mpc_config_translation *m)
{
printk(KERN_INFO
"Translation: record %d, type %d, quad %d, global %d, local %d\n",
mpc_record, m->trans_type, m->trans_quad, m->trans_global,
m->trans_local);
if (mpc_record >= MAX_MPC_ENTRY)
printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
else
translation_table[mpc_record] = m; /* stash this for later */
if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
node_set_online(m->trans_quad);
}
static int __init mpf_checksum(unsigned char *mp, int len)
{
int sum = 0;
while (len--)
sum += *mp++;
return sum & 0xFF;
}
/*
* Read/parse the MPC oem tables
*/
static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
unsigned short oemsize)
{
int count = sizeof(*oemtable); /* the header size */
unsigned char *oemptr = ((unsigned char *)oemtable) + count;
mpc_record = 0;
printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
oemtable);
if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
printk(KERN_WARNING
"SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
oemtable->oem_signature[0], oemtable->oem_signature[1],
oemtable->oem_signature[2], oemtable->oem_signature[3]);
return;
}
if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
return;
}
while (count < oemtable->oem_length) {
switch (*oemptr) {
case MP_TRANSLATION:
{
struct mpc_config_translation *m =
(struct mpc_config_translation *)oemptr;
MP_translation_info(m);
oemptr += sizeof(*m);
count += sizeof(*m);
++mpc_record;
break;
}
default:
{
printk(KERN_WARNING
"Unrecognised OEM table entry type! - %d\n",
(int)*oemptr);
return;
}
}
}
}
static struct x86_quirks numaq_x86_quirks __initdata = {
.arch_pre_time_init = numaq_pre_time_init,
.arch_time_init = NULL,
.arch_pre_intr_init = NULL,
.arch_memory_setup = NULL,
.arch_intr_init = NULL,
.arch_trap_init = NULL,
.mach_get_smp_config = NULL,
.mach_find_smp_config = NULL,
.mpc_record = &mpc_record,
.mpc_apic_id = mpc_apic_id,
.mpc_oem_bus_info = mpc_oem_bus_info,
.mpc_oem_pci_bus = mpc_oem_pci_bus,
.smp_read_mpc_oem = smp_read_mpc_oem,
};
void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
char *productid)
{
if (strncmp(oem, "IBM NUMA", 8))
printk("Warning! Not a NUMA-Q system!\n");
else
found_numaq = 1;
}
static __init void early_check_numaq(void) static __init void early_check_numaq(void)
{ {
/* /*
...@@ -82,6 +265,9 @@ static __init void early_check_numaq(void) ...@@ -82,6 +265,9 @@ static __init void early_check_numaq(void)
*/ */
if (smp_found_config) if (smp_found_config)
early_get_smp_config(); early_get_smp_config();
if (found_numaq)
x86_quirks = &numaq_x86_quirks;
} }
int __init get_memcfg_numaq(void) int __init get_memcfg_numaq(void)
...@@ -92,14 +278,3 @@ int __init get_memcfg_numaq(void) ...@@ -92,14 +278,3 @@ int __init get_memcfg_numaq(void)
smp_dump_qct(); smp_dump_qct();
return 1; return 1;
} }
void __init numaq_tsc_disable(void)
{
if (!found_numaq)
return;
if (num_online_nodes() > 1) {
printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
setup_clear_cpu_cap(X86_FEATURE_TSC);
}
}
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <asm/desc.h> #include <asm/desc.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/arch_hooks.h> #include <asm/arch_hooks.h>
#include <asm/pgtable.h>
#include <asm/time.h> #include <asm/time.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/irq.h> #include <asm/irq.h>
...@@ -123,6 +124,7 @@ static void *get_call_destination(u8 type) ...@@ -123,6 +124,7 @@ static void *get_call_destination(u8 type)
.pv_irq_ops = pv_irq_ops, .pv_irq_ops = pv_irq_ops,
.pv_apic_ops = pv_apic_ops, .pv_apic_ops = pv_apic_ops,
.pv_mmu_ops = pv_mmu_ops, .pv_mmu_ops = pv_mmu_ops,
.pv_lock_ops = pv_lock_ops,
}; };
return *((void **)&tmpl + type); return *((void **)&tmpl + type);
} }
...@@ -266,6 +268,17 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) ...@@ -266,6 +268,17 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
return __get_cpu_var(paravirt_lazy_mode); return __get_cpu_var(paravirt_lazy_mode);
} }
void __init paravirt_use_bytelocks(void)
{
#ifdef CONFIG_SMP
pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
pv_lock_ops.spin_lock = __byte_spin_lock;
pv_lock_ops.spin_trylock = __byte_spin_trylock;
pv_lock_ops.spin_unlock = __byte_spin_unlock;
#endif
}
struct pv_info pv_info = { struct pv_info pv_info = {
.name = "bare hardware", .name = "bare hardware",
.paravirt_enabled = 0, .paravirt_enabled = 0,
...@@ -361,7 +374,6 @@ struct pv_cpu_ops pv_cpu_ops = { ...@@ -361,7 +374,6 @@ struct pv_cpu_ops pv_cpu_ops = {
struct pv_apic_ops pv_apic_ops = { struct pv_apic_ops pv_apic_ops = {
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
.apic_write = native_apic_write, .apic_write = native_apic_write,
.apic_write_atomic = native_apic_write_atomic,
.apic_read = native_apic_read, .apic_read = native_apic_read,
.setup_boot_clock = setup_boot_APIC_clock, .setup_boot_clock = setup_boot_APIC_clock,
.setup_secondary_clock = setup_secondary_APIC_clock, .setup_secondary_clock = setup_secondary_APIC_clock,
...@@ -373,6 +385,9 @@ struct pv_mmu_ops pv_mmu_ops = { ...@@ -373,6 +385,9 @@ struct pv_mmu_ops pv_mmu_ops = {
#ifndef CONFIG_X86_64 #ifndef CONFIG_X86_64
.pagetable_setup_start = native_pagetable_setup_start, .pagetable_setup_start = native_pagetable_setup_start,
.pagetable_setup_done = native_pagetable_setup_done, .pagetable_setup_done = native_pagetable_setup_done,
#else
.pagetable_setup_start = paravirt_nop,
.pagetable_setup_done = paravirt_nop,
#endif #endif
.read_cr2 = native_read_cr2, .read_cr2 = native_read_cr2,
...@@ -446,6 +461,18 @@ struct pv_mmu_ops pv_mmu_ops = { ...@@ -446,6 +461,18 @@ struct pv_mmu_ops pv_mmu_ops = {
.set_fixmap = native_set_fixmap, .set_fixmap = native_set_fixmap,
}; };
struct pv_lock_ops pv_lock_ops = {
#ifdef CONFIG_SMP
.spin_is_locked = __ticket_spin_is_locked,
.spin_is_contended = __ticket_spin_is_contended,
.spin_lock = __ticket_spin_lock,
.spin_trylock = __ticket_spin_trylock,
.spin_unlock = __ticket_spin_unlock,
#endif
};
EXPORT_SYMBOL_GPL(pv_lock_ops);
EXPORT_SYMBOL_GPL(pv_time_ops); EXPORT_SYMBOL_GPL(pv_time_ops);
EXPORT_SYMBOL (pv_cpu_ops); EXPORT_SYMBOL (pv_cpu_ops);
EXPORT_SYMBOL (pv_mmu_ops); EXPORT_SYMBOL (pv_mmu_ops);
......
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include <linux/iommu-helper.h> #include <linux/iommu-helper.h>
#include <asm/gart.h> #include <asm/iommu.h>
#include <asm/calgary.h> #include <asm/calgary.h>
#include <asm/tce.h> #include <asm/tce.h>
#include <asm/pci-direct.h> #include <asm/pci-direct.h>
......
...@@ -5,12 +5,11 @@ ...@@ -5,12 +5,11 @@
#include <asm/proto.h> #include <asm/proto.h>
#include <asm/dma.h> #include <asm/dma.h>
#include <asm/gart.h> #include <asm/iommu.h>
#include <asm/calgary.h> #include <asm/calgary.h>
#include <asm/amd_iommu.h> #include <asm/amd_iommu.h>
int forbid_dac __read_mostly; static int forbid_dac __read_mostly;
EXPORT_SYMBOL(forbid_dac);
const struct dma_mapping_ops *dma_ops; const struct dma_mapping_ops *dma_ops;
EXPORT_SYMBOL(dma_ops); EXPORT_SYMBOL(dma_ops);
...@@ -114,21 +113,15 @@ void __init pci_iommu_alloc(void) ...@@ -114,21 +113,15 @@ void __init pci_iommu_alloc(void)
* The order of these functions is important for * The order of these functions is important for
* fall-back/fail-over reasons * fall-back/fail-over reasons
*/ */
#ifdef CONFIG_GART_IOMMU
gart_iommu_hole_init(); gart_iommu_hole_init();
#endif
#ifdef CONFIG_CALGARY_IOMMU
detect_calgary(); detect_calgary();
#endif
detect_intel_iommu(); detect_intel_iommu();
amd_iommu_detect(); amd_iommu_detect();
#ifdef CONFIG_SWIOTLB
pci_swiotlb_init(); pci_swiotlb_init();
#endif
} }
#endif #endif
...@@ -184,9 +177,7 @@ static __init int iommu_setup(char *p) ...@@ -184,9 +177,7 @@ static __init int iommu_setup(char *p)
swiotlb = 1; swiotlb = 1;
#endif #endif
#ifdef CONFIG_GART_IOMMU
gart_parse_options(p); gart_parse_options(p);
#endif
#ifdef CONFIG_CALGARY_IOMMU #ifdef CONFIG_CALGARY_IOMMU
if (!strncmp(p, "calgary", 7)) if (!strncmp(p, "calgary", 7))
...@@ -500,17 +491,13 @@ EXPORT_SYMBOL(dma_free_coherent); ...@@ -500,17 +491,13 @@ EXPORT_SYMBOL(dma_free_coherent);
static int __init pci_iommu_init(void) static int __init pci_iommu_init(void)
{ {
#ifdef CONFIG_CALGARY_IOMMU
calgary_iommu_init(); calgary_iommu_init();
#endif
intel_iommu_init(); intel_iommu_init();
amd_iommu_init(); amd_iommu_init();
#ifdef CONFIG_GART_IOMMU
gart_iommu_init(); gart_iommu_init();
#endif
no_iommu_init(); no_iommu_init();
return 0; return 0;
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include <asm/mtrr.h> #include <asm/mtrr.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/proto.h> #include <asm/proto.h>
#include <asm/iommu.h>
#include <asm/gart.h> #include <asm/gart.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/swiotlb.h> #include <asm/swiotlb.h>
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include <asm/gart.h> #include <asm/iommu.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/dma.h> #include <asm/dma.h>
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
#include <asm/gart.h> #include <asm/iommu.h>
#include <asm/swiotlb.h> #include <asm/swiotlb.h>
#include <asm/dma.h> #include <asm/dma.h>
......
...@@ -15,6 +15,7 @@ unsigned long idle_nomwait; ...@@ -15,6 +15,7 @@ unsigned long idle_nomwait;
EXPORT_SYMBOL(idle_nomwait); EXPORT_SYMBOL(idle_nomwait);
struct kmem_cache *task_xstate_cachep; struct kmem_cache *task_xstate_cachep;
static int force_mwait __cpuinitdata;
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{ {
...@@ -199,6 +200,7 @@ static void poll_idle(void) ...@@ -199,6 +200,7 @@ static void poll_idle(void)
* *
* idle=mwait overrides this decision and forces the usage of mwait. * idle=mwait overrides this decision and forces the usage of mwait.
*/ */
static int __cpuinitdata force_mwait;
#define MWAIT_INFO 0x05 #define MWAIT_INFO 0x05
#define MWAIT_ECX_EXTENDED_INFO 0x01 #define MWAIT_ECX_EXTENDED_INFO 0x01
...@@ -326,6 +328,9 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) ...@@ -326,6 +328,9 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
static int __init idle_setup(char *str) static int __init idle_setup(char *str)
{ {
if (!str)
return -EINVAL;
if (!strcmp(str, "poll")) { if (!strcmp(str, "poll")) {
printk("using polling idle threads.\n"); printk("using polling idle threads.\n");
pm_idle = poll_idle; pm_idle = poll_idle;
......
...@@ -537,8 +537,8 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, ...@@ -537,8 +537,8 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
struct task_struct * struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p) __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{ {
struct thread_struct *prev = &prev_p->thread, struct thread_struct *prev = &prev_p->thread;
*next = &next_p->thread; struct thread_struct *next = &next_p->thread;
int cpu = smp_processor_id(); int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu); struct tss_struct *tss = &per_cpu(init_tss, cpu);
unsigned fsindex, gsindex; unsigned fsindex, gsindex;
...@@ -586,35 +586,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -586,35 +586,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/* /*
* Switch FS and GS. * Switch FS and GS.
*
* Segment register != 0 always requires a reload. Also
* reload when it has changed. When prev process used 64bit
* base always reload to avoid an information leak.
*/ */
{ if (unlikely(fsindex | next->fsindex | prev->fs)) {
/* segment register != 0 always requires a reload. loadsegment(fs, next->fsindex);
also reload when it has changed. /*
when prev process used 64bit base always reload * Check if the user used a selector != 0; if yes
to avoid an information leak. */ * clear 64bit base, since overloaded base is always
if (unlikely(fsindex | next->fsindex | prev->fs)) { * mapped to the Null selector
loadsegment(fs, next->fsindex); */
/* check if the user used a selector != 0 if (fsindex)
* if yes clear 64bit base, since overloaded base
* is always mapped to the Null selector
*/
if (fsindex)
prev->fs = 0; prev->fs = 0;
} }
/* when next process has a 64bit base use it */ /* when next process has a 64bit base use it */
if (next->fs) if (next->fs)
wrmsrl(MSR_FS_BASE, next->fs); wrmsrl(MSR_FS_BASE, next->fs);
prev->fsindex = fsindex; prev->fsindex = fsindex;
if (unlikely(gsindex | next->gsindex | prev->gs)) { if (unlikely(gsindex | next->gsindex | prev->gs)) {
load_gs_index(next->gsindex); load_gs_index(next->gsindex);
if (gsindex) if (gsindex)
prev->gs = 0; prev->gs = 0;
}
if (next->gs)
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
prev->gsindex = gsindex;
} }
if (next->gs)
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
prev->gsindex = gsindex;
/* Must be after DS reload */ /* Must be after DS reload */
unlazy_fpu(prev_p); unlazy_fpu(prev_p);
...@@ -627,7 +626,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -627,7 +626,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
write_pda(pcurrent, next_p); write_pda(pcurrent, next_p);
write_pda(kernelstack, write_pda(kernelstack,
(unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); (unsigned long)task_stack_page(next_p) +
THREAD_SIZE - PDA_STACKOFFSET);
#ifdef CONFIG_CC_STACKPROTECTOR #ifdef CONFIG_CC_STACKPROTECTOR
write_pda(stack_canary, next_p->stack_canary); write_pda(stack_canary, next_p->stack_canary);
/* /*
......
...@@ -1357,8 +1357,6 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) ...@@ -1357,8 +1357,6 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
#endif #endif
} }
#ifdef CONFIG_X86_32
void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
{ {
struct siginfo info; struct siginfo info;
...@@ -1377,89 +1375,10 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) ...@@ -1377,89 +1375,10 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
force_sig_info(SIGTRAP, &info, tsk); force_sig_info(SIGTRAP, &info, tsk);
} }
/* notification of system call entry/exit
* - triggered by current->work.syscall_trace
*/
int do_syscall_trace(struct pt_regs *regs, int entryexit)
{
int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU);
/*
* With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall
* interception
*/
int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP);
int ret = 0;
/* do the secure computing check first */
if (!entryexit)
secure_computing(regs->orig_ax);
if (unlikely(current->audit_context)) {
if (entryexit)
audit_syscall_exit(AUDITSC_RESULT(regs->ax),
regs->ax);
/* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
* on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
* not used, entry.S will call us only on syscall exit, not
* entry; so when TIF_SYSCALL_AUDIT is used we must avoid
* calling send_sigtrap() on syscall entry.
*
* Note that when PTRACE_SYSEMU_SINGLESTEP is used,
* is_singlestep is false, despite his name, so we will still do
* the correct thing.
*/
else if (is_singlestep)
goto out;
}
if (!(current->ptrace & PT_PTRACED))
goto out;
/* If a process stops on the 1st tracepoint with SYSCALL_TRACE
* and then is resumed with SYSEMU_SINGLESTEP, it will come in
* here. We have to check this and return */
if (is_sysemu && entryexit)
return 0;
/* Fake a debug trap */
if (is_singlestep)
send_sigtrap(current, regs, 0);
if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu)
goto out;
/* the 0x80 provides a way for the tracing parent to distinguish
between a syscall stop and SIGTRAP delivery */
/* Note that the debugger could change the result of test_thread_flag!*/
ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0));
/*
* this isn't the same as continuing with a signal, but it will do
* for normal use. strace only continues with a signal if the
* stopping signal is not SIGTRAP. -brl
*/
if (current->exit_code) {
send_sig(current->exit_code, current, 1);
current->exit_code = 0;
}
ret = is_sysemu;
out:
if (unlikely(current->audit_context) && !entryexit)
audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax,
regs->bx, regs->cx, regs->dx, regs->si);
if (ret == 0)
return 0;
regs->orig_ax = -1; /* force skip of syscall restarting */
if (unlikely(current->audit_context))
audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
return 1;
}
#else /* CONFIG_X86_64 */
static void syscall_trace(struct pt_regs *regs) static void syscall_trace(struct pt_regs *regs)
{ {
if (!(current->ptrace & PT_PTRACED))
return;
#if 0 #if 0
printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
...@@ -1481,39 +1400,81 @@ static void syscall_trace(struct pt_regs *regs) ...@@ -1481,39 +1400,81 @@ static void syscall_trace(struct pt_regs *regs)
} }
} }
asmlinkage void syscall_trace_enter(struct pt_regs *regs) #ifdef CONFIG_X86_32
# define IS_IA32 1
#elif defined CONFIG_IA32_EMULATION
# define IS_IA32 test_thread_flag(TIF_IA32)
#else
# define IS_IA32 0
#endif
/*
* We must return the syscall number to actually look up in the table.
* This can be -1L to skip running any syscall at all.
*/
asmregparm long syscall_trace_enter(struct pt_regs *regs)
{ {
long ret = 0;
/*
* If we stepped into a sysenter/syscall insn, it trapped in
* kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
* If user-mode had set TF itself, then it's still clear from
* do_debug() and we need to set it again to restore the user
* state. If we entered on the slow path, TF was already set.
*/
if (test_thread_flag(TIF_SINGLESTEP))
regs->flags |= X86_EFLAGS_TF;
/* do the secure computing check first */ /* do the secure computing check first */
secure_computing(regs->orig_ax); secure_computing(regs->orig_ax);
if (test_thread_flag(TIF_SYSCALL_TRACE) if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
&& (current->ptrace & PT_PTRACED)) ret = -1L;
if (ret || test_thread_flag(TIF_SYSCALL_TRACE))
syscall_trace(regs); syscall_trace(regs);
if (unlikely(current->audit_context)) { if (unlikely(current->audit_context)) {
if (test_thread_flag(TIF_IA32)) { if (IS_IA32)
audit_syscall_entry(AUDIT_ARCH_I386, audit_syscall_entry(AUDIT_ARCH_I386,
regs->orig_ax, regs->orig_ax,
regs->bx, regs->cx, regs->bx, regs->cx,
regs->dx, regs->si); regs->dx, regs->si);
} else { #ifdef CONFIG_X86_64
else
audit_syscall_entry(AUDIT_ARCH_X86_64, audit_syscall_entry(AUDIT_ARCH_X86_64,
regs->orig_ax, regs->orig_ax,
regs->di, regs->si, regs->di, regs->si,
regs->dx, regs->r10); regs->dx, regs->r10);
} #endif
} }
return ret ?: regs->orig_ax;
} }
asmlinkage void syscall_trace_leave(struct pt_regs *regs) asmregparm void syscall_trace_leave(struct pt_regs *regs)
{ {
if (unlikely(current->audit_context)) if (unlikely(current->audit_context))
audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
if ((test_thread_flag(TIF_SYSCALL_TRACE) if (test_thread_flag(TIF_SYSCALL_TRACE))
|| test_thread_flag(TIF_SINGLESTEP))
&& (current->ptrace & PT_PTRACED))
syscall_trace(regs); syscall_trace(regs);
}
#endif /* CONFIG_X86_32 */ /*
* If TIF_SYSCALL_EMU is set, we only get here because of
* TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
* We already reported this syscall instruction in
* syscall_trace_enter(), so don't do any more now.
*/
if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
return;
/*
* If we are single-stepping, synthesize a trap to follow the
* system call instruction.
*/
if (test_thread_flag(TIF_SINGLESTEP) &&
(current->ptrace & PT_PTRACED))
send_sigtrap(current, regs, 0);
}
...@@ -177,6 +177,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { ...@@ -177,6 +177,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"),
}, },
}, },
{ /* Handle problems with rebooting on Dell T5400's */
.callback = set_bios_reboot,
.ident = "Dell Precision T5400",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"),
},
},
{ /* Handle problems with rebooting on HP laptops */ { /* Handle problems with rebooting on HP laptops */
.callback = set_bios_reboot, .callback = set_bios_reboot,
.ident = "HP Compaq Laptop", .ident = "HP Compaq Laptop",
......
...@@ -57,12 +57,8 @@ ...@@ -57,12 +57,8 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/user.h> #include <linux/user.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/highmem.h>
#include <linux/kallsyms.h> #include <linux/kallsyms.h>
#include <linux/edd.h>
#include <linux/iscsi_ibft.h>
#include <linux/kexec.h>
#include <linux/cpufreq.h> #include <linux/cpufreq.h>
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
#include <linux/ctype.h> #include <linux/ctype.h>
...@@ -96,7 +92,7 @@ ...@@ -96,7 +92,7 @@
#include <asm/smp.h> #include <asm/smp.h>
#include <asm/desc.h> #include <asm/desc.h>
#include <asm/dma.h> #include <asm/dma.h>
#include <asm/gart.h> #include <asm/iommu.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/proto.h> #include <asm/proto.h>
...@@ -104,7 +100,6 @@ ...@@ -104,7 +100,6 @@
#include <asm/paravirt.h> #include <asm/paravirt.h>
#include <asm/percpu.h> #include <asm/percpu.h>
#include <asm/sections.h>
#include <asm/topology.h> #include <asm/topology.h>
#include <asm/apicdef.h> #include <asm/apicdef.h>
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
...@@ -579,6 +574,10 @@ static int __init setup_elfcorehdr(char *arg) ...@@ -579,6 +574,10 @@ static int __init setup_elfcorehdr(char *arg)
early_param("elfcorehdr", setup_elfcorehdr); early_param("elfcorehdr", setup_elfcorehdr);
#endif #endif
static struct x86_quirks default_x86_quirks __initdata;
struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
/* /*
* Determine if we were loaded by an EFI loader. If so, then we have also been * Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures * passed the efi memmap, systab, etc., so we should use these data structures
...@@ -824,7 +823,10 @@ void __init setup_arch(char **cmdline_p) ...@@ -824,7 +823,10 @@ void __init setup_arch(char **cmdline_p)
vmi_init(); vmi_init();
#endif #endif
paravirt_pagetable_setup_start(swapper_pg_dir);
paging_init(); paging_init();
paravirt_pagetable_setup_done(swapper_pg_dir);
paravirt_post_allocator_init();
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
map_vsyscall(); map_vsyscall();
...@@ -854,14 +856,6 @@ void __init setup_arch(char **cmdline_p) ...@@ -854,14 +856,6 @@ void __init setup_arch(char **cmdline_p)
init_cpu_to_node(); init_cpu_to_node();
#endif #endif
#ifdef CONFIG_X86_NUMAQ
/*
* need to check online nodes num, call it
* here before time_init/tsc_init
*/
numaq_tsc_disable();
#endif
init_apic_mappings(); init_apic_mappings();
ioapic_init_mappings(); ioapic_init_mappings();
......
...@@ -212,7 +212,7 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused) ...@@ -212,7 +212,7 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
badframe: badframe:
if (show_unhandled_signals && printk_ratelimit()) { if (show_unhandled_signals && printk_ratelimit()) {
printk(KERN_INFO "%s%s[%d] bad frame in sigreturn frame:" printk("%s%s[%d] bad frame in sigreturn frame:"
"%p ip:%lx sp:%lx oeax:%lx", "%p ip:%lx sp:%lx oeax:%lx",
task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
current->comm, task_pid_nr(current), frame, regs->ip, current->comm, task_pid_nr(current), frame, regs->ip,
...@@ -657,12 +657,6 @@ static void do_signal(struct pt_regs *regs) ...@@ -657,12 +657,6 @@ static void do_signal(struct pt_regs *regs)
void void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{ {
/* Pending single-step? */
if (thread_info_flags & _TIF_SINGLESTEP) {
regs->flags |= X86_EFLAGS_TF;
clear_thread_flag(TIF_SINGLESTEP);
}
/* deal with pending signal delivery */ /* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING) if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs); do_signal(regs);
......
...@@ -487,12 +487,6 @@ static void do_signal(struct pt_regs *regs) ...@@ -487,12 +487,6 @@ static void do_signal(struct pt_regs *regs)
void do_notify_resume(struct pt_regs *regs, void *unused, void do_notify_resume(struct pt_regs *regs, void *unused,
__u32 thread_info_flags) __u32 thread_info_flags)
{ {
/* Pending single-step? */
if (thread_info_flags & _TIF_SINGLESTEP) {
regs->flags |= X86_EFLAGS_TF;
clear_thread_flag(TIF_SINGLESTEP);
}
#ifdef CONFIG_X86_MCE #ifdef CONFIG_X86_MCE
/* notify userspace of pending MCEs */ /* notify userspace of pending MCEs */
if (thread_info_flags & _TIF_MCE_NOTIFY) if (thread_info_flags & _TIF_MCE_NOTIFY)
......
...@@ -546,8 +546,8 @@ static inline void __inquire_remote_apic(int apicid) ...@@ -546,8 +546,8 @@ static inline void __inquire_remote_apic(int apicid)
printk(KERN_CONT printk(KERN_CONT
"a previous APIC delivery may have failed\n"); "a previous APIC delivery may have failed\n");
apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
timeout = 0; timeout = 0;
do { do {
...@@ -579,11 +579,11 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) ...@@ -579,11 +579,11 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
int maxlvt; int maxlvt;
/* Target chip */ /* Target chip */
apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
/* Boot on the stack */ /* Boot on the stack */
/* Kick the second */ /* Kick the second */
apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
Dprintk("Waiting for send to finish...\n"); Dprintk("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle(); send_status = safe_apic_wait_icr_idle();
...@@ -592,14 +592,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) ...@@ -592,14 +592,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
* Give the other CPU some time to accept the IPI. * Give the other CPU some time to accept the IPI.
*/ */
udelay(200); udelay(200);
/*
* Due to the Pentium erratum 3AP.
*/
maxlvt = lapic_get_maxlvt(); maxlvt = lapic_get_maxlvt();
if (maxlvt > 3) { if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_read_around(APIC_SPIV);
apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0);
}
accept_status = (apic_read(APIC_ESR) & 0xEF); accept_status = (apic_read(APIC_ESR) & 0xEF);
Dprintk("NMI sent.\n"); Dprintk("NMI sent.\n");
...@@ -625,12 +620,14 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) ...@@ -625,12 +620,14 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
return send_status; return send_status;
} }
maxlvt = lapic_get_maxlvt();
/* /*
* Be paranoid about clearing APIC errors. * Be paranoid about clearing APIC errors.
*/ */
if (APIC_INTEGRATED(apic_version[phys_apicid])) { if (APIC_INTEGRATED(apic_version[phys_apicid])) {
apic_read_around(APIC_SPIV); if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0);
apic_read(APIC_ESR); apic_read(APIC_ESR);
} }
...@@ -639,13 +636,13 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) ...@@ -639,13 +636,13 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
/* /*
* Turn INIT on target chip * Turn INIT on target chip
*/ */
apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/* /*
* Send IPI * Send IPI
*/ */
apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT apic_write(APIC_ICR,
| APIC_DM_INIT); APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT);
Dprintk("Waiting for send to finish...\n"); Dprintk("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle(); send_status = safe_apic_wait_icr_idle();
...@@ -655,10 +652,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) ...@@ -655,10 +652,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
Dprintk("Deasserting INIT.\n"); Dprintk("Deasserting INIT.\n");
/* Target chip */ /* Target chip */
apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/* Send IPI */ /* Send IPI */
apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
Dprintk("Waiting for send to finish...\n"); Dprintk("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle(); send_status = safe_apic_wait_icr_idle();
...@@ -689,12 +686,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) ...@@ -689,12 +686,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
*/ */
Dprintk("#startup loops: %d.\n", num_starts); Dprintk("#startup loops: %d.\n", num_starts);
maxlvt = lapic_get_maxlvt();
for (j = 1; j <= num_starts; j++) { for (j = 1; j <= num_starts; j++) {
Dprintk("Sending STARTUP #%d.\n", j); Dprintk("Sending STARTUP #%d.\n", j);
apic_read_around(APIC_SPIV); if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0);
apic_read(APIC_ESR); apic_read(APIC_ESR);
Dprintk("After apic_write.\n"); Dprintk("After apic_write.\n");
...@@ -703,12 +698,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) ...@@ -703,12 +698,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
*/ */
/* Target chip */ /* Target chip */
apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/* Boot on the stack */ /* Boot on the stack */
/* Kick the second */ /* Kick the second */
apic_write_around(APIC_ICR, APIC_DM_STARTUP apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12));
| (start_eip >> 12));
/* /*
* Give the other CPU some time to accept the IPI. * Give the other CPU some time to accept the IPI.
...@@ -724,13 +718,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) ...@@ -724,13 +718,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
* Give the other CPU some time to accept the IPI. * Give the other CPU some time to accept the IPI.
*/ */
udelay(200); udelay(200);
/* if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
* Due to the Pentium erratum 3AP.
*/
if (maxlvt > 3) {
apic_read_around(APIC_SPIV);
apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0);
}
accept_status = (apic_read(APIC_ESR) & 0xEF); accept_status = (apic_read(APIC_ESR) & 0xEF);
if (send_status || accept_status) if (send_status || accept_status)
break; break;
...@@ -768,7 +757,7 @@ static void __cpuinit do_fork_idle(struct work_struct *work) ...@@ -768,7 +757,7 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
* *
* Must be called after the _cpu_pda pointer table is initialized. * Must be called after the _cpu_pda pointer table is initialized.
*/ */
static int __cpuinit get_local_pda(int cpu) int __cpuinit get_local_pda(int cpu)
{ {
struct x8664_pda *oldpda, *newpda; struct x8664_pda *oldpda, *newpda;
unsigned long size = sizeof(struct x8664_pda); unsigned long size = sizeof(struct x8664_pda);
...@@ -1311,7 +1300,7 @@ static void __ref remove_cpu_from_maps(int cpu) ...@@ -1311,7 +1300,7 @@ static void __ref remove_cpu_from_maps(int cpu)
cpu_clear(cpu, cpu_callout_map); cpu_clear(cpu, cpu_callout_map);
cpu_clear(cpu, cpu_callin_map); cpu_clear(cpu, cpu_callin_map);
/* was set by cpu_init() */ /* was set by cpu_init() */
clear_bit(cpu, (unsigned long *)&cpu_initialized); cpu_clear(cpu, cpu_initialized);
numa_remove_cpu(cpu); numa_remove_cpu(cpu);
} }
...@@ -1390,7 +1379,8 @@ static int __init parse_maxcpus(char *arg) ...@@ -1390,7 +1379,8 @@ static int __init parse_maxcpus(char *arg)
{ {
extern unsigned int maxcpus; extern unsigned int maxcpus;
maxcpus = simple_strtoul(arg, NULL, 0); if (arg)
maxcpus = simple_strtoul(arg, NULL, 0);
return 0; return 0;
} }
early_param("maxcpus", parse_maxcpus); early_param("maxcpus", parse_maxcpus);
...@@ -105,6 +105,20 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) ...@@ -105,6 +105,20 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
static int enable_single_step(struct task_struct *child) static int enable_single_step(struct task_struct *child)
{ {
struct pt_regs *regs = task_pt_regs(child); struct pt_regs *regs = task_pt_regs(child);
unsigned long oflags;
/*
* If we stepped into a sysenter/syscall insn, it trapped in
* kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
* If user-mode had set TF itself, then it's still clear from
* do_debug() and we need to set it again to restore the user
* state so we don't wrongly set TIF_FORCED_TF below.
* If enable_single_step() was used last and that is what
* set TIF_SINGLESTEP, then both TF and TIF_FORCED_TF are
* already set and our bookkeeping is fine.
*/
if (unlikely(test_tsk_thread_flag(child, TIF_SINGLESTEP)))
regs->flags |= X86_EFLAGS_TF;
/* /*
* Always set TIF_SINGLESTEP - this guarantees that * Always set TIF_SINGLESTEP - this guarantees that
...@@ -113,11 +127,7 @@ static int enable_single_step(struct task_struct *child) ...@@ -113,11 +127,7 @@ static int enable_single_step(struct task_struct *child)
*/ */
set_tsk_thread_flag(child, TIF_SINGLESTEP); set_tsk_thread_flag(child, TIF_SINGLESTEP);
/* oflags = regs->flags;
* If TF was already set, don't do anything else
*/
if (regs->flags & X86_EFLAGS_TF)
return 0;
/* Set TF on the kernel stack.. */ /* Set TF on the kernel stack.. */
regs->flags |= X86_EFLAGS_TF; regs->flags |= X86_EFLAGS_TF;
...@@ -126,9 +136,22 @@ static int enable_single_step(struct task_struct *child) ...@@ -126,9 +136,22 @@ static int enable_single_step(struct task_struct *child)
* ..but if TF is changed by the instruction we will trace, * ..but if TF is changed by the instruction we will trace,
* don't mark it as being "us" that set it, so that we * don't mark it as being "us" that set it, so that we
* won't clear it by hand later. * won't clear it by hand later.
*
* Note that if we don't actually execute the popf because
* of a signal arriving right now or suchlike, we will lose
* track of the fact that it really was "us" that set it.
*/ */
if (is_setting_trap_flag(child, regs)) if (is_setting_trap_flag(child, regs)) {
clear_tsk_thread_flag(child, TIF_FORCED_TF);
return 0; return 0;
}
/*
* If TF was already set, check whether it was us who set it.
* If not, we should never attempt a block step.
*/
if (oflags & X86_EFLAGS_TF)
return test_tsk_thread_flag(child, TIF_FORCED_TF);
set_tsk_thread_flag(child, TIF_FORCED_TF); set_tsk_thread_flag(child, TIF_FORCED_TF);
......
...@@ -129,6 +129,7 @@ void __init hpet_time_init(void) ...@@ -129,6 +129,7 @@ void __init hpet_time_init(void)
*/ */
void __init time_init(void) void __init time_init(void)
{ {
pre_time_init_hook();
tsc_init(); tsc_init();
late_time_init = choose_time_init(); late_time_init = choose_time_init();
} }
...@@ -58,6 +58,7 @@ ...@@ -58,6 +58,7 @@
#include <asm/nmi.h> #include <asm/nmi.h>
#include <asm/smp.h> #include <asm/smp.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/traps.h>
#include "mach_traps.h" #include "mach_traps.h"
...@@ -77,26 +78,6 @@ char ignore_fpu_irq; ...@@ -77,26 +78,6 @@ char ignore_fpu_irq;
gate_desc idt_table[256] gate_desc idt_table[256]
__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
asmlinkage void divide_error(void);
asmlinkage void debug(void);
asmlinkage void nmi(void);
asmlinkage void int3(void);
asmlinkage void overflow(void);
asmlinkage void bounds(void);
asmlinkage void invalid_op(void);
asmlinkage void device_not_available(void);
asmlinkage void coprocessor_segment_overrun(void);
asmlinkage void invalid_TSS(void);
asmlinkage void segment_not_present(void);
asmlinkage void stack_segment(void);
asmlinkage void general_protection(void);
asmlinkage void page_fault(void);
asmlinkage void coprocessor_error(void);
asmlinkage void simd_coprocessor_error(void);
asmlinkage void alignment_check(void);
asmlinkage void spurious_interrupt_bug(void);
asmlinkage void machine_check(void);
int panic_on_unrecovered_nmi; int panic_on_unrecovered_nmi;
int kstack_depth_to_print = 24; int kstack_depth_to_print = 24;
static unsigned int code_bytes = 64; static unsigned int code_bytes = 64;
...@@ -256,7 +237,7 @@ static const struct stacktrace_ops print_trace_ops = { ...@@ -256,7 +237,7 @@ static const struct stacktrace_ops print_trace_ops = {
static void static void
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp, char *log_lvl) unsigned long *stack, unsigned long bp, char *log_lvl)
{ {
dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
printk("%s =======================\n", log_lvl); printk("%s =======================\n", log_lvl);
...@@ -383,6 +364,54 @@ int is_valid_bugaddr(unsigned long ip) ...@@ -383,6 +364,54 @@ int is_valid_bugaddr(unsigned long ip)
return ud2 == 0x0b0f; return ud2 == 0x0b0f;
} }
static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
static int die_owner = -1;
static unsigned int die_nest_count;
unsigned __kprobes long oops_begin(void)
{
unsigned long flags;
oops_enter();
if (die_owner != raw_smp_processor_id()) {
console_verbose();
raw_local_irq_save(flags);
__raw_spin_lock(&die_lock);
die_owner = smp_processor_id();
die_nest_count = 0;
bust_spinlocks(1);
} else {
raw_local_irq_save(flags);
}
die_nest_count++;
return flags;
}
void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
{
bust_spinlocks(0);
die_owner = -1;
add_taint(TAINT_DIE);
__raw_spin_unlock(&die_lock);
raw_local_irq_restore(flags);
if (!regs)
return;
if (kexec_should_crash(current))
crash_kexec(regs);
if (in_interrupt())
panic("Fatal exception in interrupt");
if (panic_on_oops)
panic("Fatal exception");
oops_exit();
do_exit(signr);
}
int __kprobes __die(const char *str, struct pt_regs *regs, long err) int __kprobes __die(const char *str, struct pt_regs *regs, long err)
{ {
unsigned short ss; unsigned short ss;
...@@ -423,31 +452,9 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) ...@@ -423,31 +452,9 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
*/ */
void die(const char *str, struct pt_regs *regs, long err) void die(const char *str, struct pt_regs *regs, long err)
{ {
static struct { unsigned long flags = oops_begin();
raw_spinlock_t lock;
u32 lock_owner;
int lock_owner_depth;
} die = {
.lock = __RAW_SPIN_LOCK_UNLOCKED,
.lock_owner = -1,
.lock_owner_depth = 0
};
unsigned long flags;
oops_enter();
if (die.lock_owner != raw_smp_processor_id()) {
console_verbose();
raw_local_irq_save(flags);
__raw_spin_lock(&die.lock);
die.lock_owner = smp_processor_id();
die.lock_owner_depth = 0;
bust_spinlocks(1);
} else {
raw_local_irq_save(flags);
}
if (++die.lock_owner_depth < 3) { if (die_nest_count < 3) {
report_bug(regs->ip, regs); report_bug(regs->ip, regs);
if (__die(str, regs, err)) if (__die(str, regs, err))
...@@ -456,26 +463,7 @@ void die(const char *str, struct pt_regs *regs, long err) ...@@ -456,26 +463,7 @@ void die(const char *str, struct pt_regs *regs, long err)
printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
} }
bust_spinlocks(0); oops_end(flags, regs, SIGSEGV);
die.lock_owner = -1;
add_taint(TAINT_DIE);
__raw_spin_unlock(&die.lock);
raw_local_irq_restore(flags);
if (!regs)
return;
if (kexec_should_crash(current))
crash_kexec(regs);
if (in_interrupt())
panic("Fatal exception in interrupt");
if (panic_on_oops)
panic("Fatal exception");
oops_exit();
do_exit(SIGSEGV);
} }
static inline void static inline void
......
...@@ -51,30 +51,10 @@ ...@@ -51,30 +51,10 @@
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/proto.h> #include <asm/proto.h>
#include <asm/pda.h> #include <asm/pda.h>
#include <asm/traps.h>
#include <mach_traps.h> #include <mach_traps.h>
asmlinkage void divide_error(void);
asmlinkage void debug(void);
asmlinkage void nmi(void);
asmlinkage void int3(void);
asmlinkage void overflow(void);
asmlinkage void bounds(void);
asmlinkage void invalid_op(void);
asmlinkage void device_not_available(void);
asmlinkage void double_fault(void);
asmlinkage void coprocessor_segment_overrun(void);
asmlinkage void invalid_TSS(void);
asmlinkage void segment_not_present(void);
asmlinkage void stack_segment(void);
asmlinkage void general_protection(void);
asmlinkage void page_fault(void);
asmlinkage void coprocessor_error(void);
asmlinkage void simd_coprocessor_error(void);
asmlinkage void alignment_check(void);
asmlinkage void spurious_interrupt_bug(void);
asmlinkage void machine_check(void);
int panic_on_unrecovered_nmi; int panic_on_unrecovered_nmi;
int kstack_depth_to_print = 12; int kstack_depth_to_print = 12;
static unsigned int code_bytes = 64; static unsigned int code_bytes = 64;
...@@ -355,17 +335,24 @@ static const struct stacktrace_ops print_trace_ops = { ...@@ -355,17 +335,24 @@ static const struct stacktrace_ops print_trace_ops = {
.address = print_trace_address, .address = print_trace_address,
}; };
void show_trace(struct task_struct *task, struct pt_regs *regs, static void
unsigned long *stack, unsigned long bp) show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp, char *log_lvl)
{ {
printk("\nCall Trace:\n"); printk("\nCall Trace:\n");
dump_trace(task, regs, stack, bp, &print_trace_ops, NULL); dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
printk("\n"); printk("\n");
} }
void show_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp)
{
show_trace_log_lvl(task, regs, stack, bp, "");
}
static void static void
_show_stack(struct task_struct *task, struct pt_regs *regs, show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, unsigned long bp) unsigned long *sp, unsigned long bp, char *log_lvl)
{ {
unsigned long *stack; unsigned long *stack;
int i; int i;
...@@ -399,12 +386,12 @@ _show_stack(struct task_struct *task, struct pt_regs *regs, ...@@ -399,12 +386,12 @@ _show_stack(struct task_struct *task, struct pt_regs *regs,
printk(" %016lx", *stack++); printk(" %016lx", *stack++);
touch_nmi_watchdog(); touch_nmi_watchdog();
} }
show_trace(task, regs, sp, bp); show_trace_log_lvl(task, regs, sp, bp, log_lvl);
} }
void show_stack(struct task_struct *task, unsigned long *sp) void show_stack(struct task_struct *task, unsigned long *sp)
{ {
_show_stack(task, NULL, sp, 0); show_stack_log_lvl(task, NULL, sp, 0, "");
} }
/* /*
...@@ -454,7 +441,8 @@ void show_registers(struct pt_regs *regs) ...@@ -454,7 +441,8 @@ void show_registers(struct pt_regs *regs)
u8 *ip; u8 *ip;
printk("Stack: "); printk("Stack: ");
_show_stack(NULL, regs, (unsigned long *)sp, regs->bp); show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
regs->bp, "");
printk("\n"); printk("\n");
printk(KERN_EMERG "Code: "); printk(KERN_EMERG "Code: ");
...@@ -518,7 +506,7 @@ unsigned __kprobes long oops_begin(void) ...@@ -518,7 +506,7 @@ unsigned __kprobes long oops_begin(void)
} }
void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
{ {
die_owner = -1; die_owner = -1;
bust_spinlocks(0); bust_spinlocks(0);
die_nest_count--; die_nest_count--;
......
...@@ -73,7 +73,7 @@ int is_visws_box(void) ...@@ -73,7 +73,7 @@ int is_visws_box(void)
return visws_board_type >= 0; return visws_board_type >= 0;
} }
static int __init visws_time_init_quirk(void) static int __init visws_time_init(void)
{ {
printk(KERN_INFO "Starting Cobalt Timer system clock\n"); printk(KERN_INFO "Starting Cobalt Timer system clock\n");
...@@ -93,7 +93,7 @@ static int __init visws_time_init_quirk(void) ...@@ -93,7 +93,7 @@ static int __init visws_time_init_quirk(void)
return 0; return 0;
} }
static int __init visws_pre_intr_init_quirk(void) static int __init visws_pre_intr_init(void)
{ {
init_VISWS_APIC_irqs(); init_VISWS_APIC_irqs();
...@@ -114,7 +114,7 @@ EXPORT_SYMBOL(sgivwfb_mem_size); ...@@ -114,7 +114,7 @@ EXPORT_SYMBOL(sgivwfb_mem_size);
long long mem_size __initdata = 0; long long mem_size __initdata = 0;
static char * __init visws_memory_setup_quirk(void) static char * __init visws_memory_setup(void)
{ {
long long gfx_mem_size = 8 * MB; long long gfx_mem_size = 8 * MB;
...@@ -176,7 +176,7 @@ static void visws_machine_power_off(void) ...@@ -176,7 +176,7 @@ static void visws_machine_power_off(void)
outl(PIIX_SPECIAL_STOP, 0xCFC); outl(PIIX_SPECIAL_STOP, 0xCFC);
} }
static int __init visws_get_smp_config_quirk(unsigned int early) static int __init visws_get_smp_config(unsigned int early)
{ {
/* /*
* Prevent MP-table parsing by the generic code: * Prevent MP-table parsing by the generic code:
...@@ -192,7 +192,7 @@ extern unsigned int __cpuinitdata maxcpus; ...@@ -192,7 +192,7 @@ extern unsigned int __cpuinitdata maxcpus;
* No problem for Linux. * No problem for Linux.
*/ */
static void __init MP_processor_info (struct mpc_config_processor *m) static void __init MP_processor_info(struct mpc_config_processor *m)
{ {
int ver, logical_apicid; int ver, logical_apicid;
physid_mask_t apic_cpus; physid_mask_t apic_cpus;
...@@ -232,7 +232,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m) ...@@ -232,7 +232,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
apic_version[m->mpc_apicid] = ver; apic_version[m->mpc_apicid] = ver;
} }
int __init visws_find_smp_config_quirk(unsigned int reserve) static int __init visws_find_smp_config(unsigned int reserve)
{ {
struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS); struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS);
unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
...@@ -258,7 +258,17 @@ int __init visws_find_smp_config_quirk(unsigned int reserve) ...@@ -258,7 +258,17 @@ int __init visws_find_smp_config_quirk(unsigned int reserve)
return 1; return 1;
} }
extern int visws_trap_init_quirk(void); static int visws_trap_init(void);
static struct x86_quirks visws_x86_quirks __initdata = {
.arch_time_init = visws_time_init,
.arch_pre_intr_init = visws_pre_intr_init,
.arch_memory_setup = visws_memory_setup,
.arch_intr_init = NULL,
.arch_trap_init = visws_trap_init,
.mach_get_smp_config = visws_get_smp_config,
.mach_find_smp_config = visws_find_smp_config,
};
void __init visws_early_detect(void) void __init visws_early_detect(void)
{ {
...@@ -272,16 +282,10 @@ void __init visws_early_detect(void) ...@@ -272,16 +282,10 @@ void __init visws_early_detect(void)
/* /*
* Install special quirks for timer, interrupt and memory setup: * Install special quirks for timer, interrupt and memory setup:
*/
arch_time_init_quirk = visws_time_init_quirk;
arch_pre_intr_init_quirk = visws_pre_intr_init_quirk;
arch_memory_setup_quirk = visws_memory_setup_quirk;
/*
* Fall back to generic behavior for traps: * Fall back to generic behavior for traps:
* Override generic MP-table parsing:
*/ */
arch_intr_init_quirk = NULL; x86_quirks = &visws_x86_quirks;
arch_trap_init_quirk = visws_trap_init_quirk;
/* /*
* Install reboot quirks: * Install reboot quirks:
...@@ -294,12 +298,6 @@ void __init visws_early_detect(void) ...@@ -294,12 +298,6 @@ void __init visws_early_detect(void)
*/ */
no_broadcast = 0; no_broadcast = 0;
/*
* Override generic MP-table parsing:
*/
mach_get_smp_config_quirk = visws_get_smp_config_quirk;
mach_find_smp_config_quirk = visws_find_smp_config_quirk;
#ifdef CONFIG_X86_IO_APIC #ifdef CONFIG_X86_IO_APIC
/* /*
* Turn off IO-APIC detection and initialization: * Turn off IO-APIC detection and initialization:
...@@ -426,7 +424,7 @@ static __init void cobalt_init(void) ...@@ -426,7 +424,7 @@ static __init void cobalt_init(void)
co_apic_read(CO_APIC_ID)); co_apic_read(CO_APIC_ID));
} }
int __init visws_trap_init_quirk(void) static int __init visws_trap_init(void)
{ {
lithium_init(); lithium_init();
cobalt_init(); cobalt_init();
......
...@@ -906,7 +906,6 @@ static inline int __init activate_vmi(void) ...@@ -906,7 +906,6 @@ static inline int __init activate_vmi(void)
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
para_fill(pv_apic_ops.apic_read, APICRead); para_fill(pv_apic_ops.apic_read, APICRead);
para_fill(pv_apic_ops.apic_write, APICWrite); para_fill(pv_apic_ops.apic_write, APICWrite);
para_fill(pv_apic_ops.apic_write_atomic, APICWrite);
#endif #endif
/* /*
......
...@@ -991,7 +991,6 @@ __init void lguest_init(void) ...@@ -991,7 +991,6 @@ __init void lguest_init(void)
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
/* apic read/write intercepts */ /* apic read/write intercepts */
pv_apic_ops.apic_write = lguest_apic_write; pv_apic_ops.apic_write = lguest_apic_write;
pv_apic_ops.apic_write_atomic = lguest_apic_write;
pv_apic_ops.apic_read = lguest_apic_read; pv_apic_ops.apic_read = lguest_apic_read;
#endif #endif
......
...@@ -10,14 +10,6 @@ ...@@ -10,14 +10,6 @@
#include <asm/e820.h> #include <asm/e820.h>
#include <asm/setup.h> #include <asm/setup.h>
/*
* Any quirks to be performed to initialize timers/irqs/etc?
*/
int (*arch_time_init_quirk)(void);
int (*arch_pre_intr_init_quirk)(void);
int (*arch_intr_init_quirk)(void);
int (*arch_trap_init_quirk)(void);
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
#define DEFAULT_SEND_IPI (1) #define DEFAULT_SEND_IPI (1)
#else #else
...@@ -37,8 +29,8 @@ int no_broadcast=DEFAULT_SEND_IPI; ...@@ -37,8 +29,8 @@ int no_broadcast=DEFAULT_SEND_IPI;
**/ **/
void __init pre_intr_init_hook(void) void __init pre_intr_init_hook(void)
{ {
if (arch_pre_intr_init_quirk) { if (x86_quirks->arch_pre_intr_init) {
if (arch_pre_intr_init_quirk()) if (x86_quirks->arch_pre_intr_init())
return; return;
} }
init_ISA_irqs(); init_ISA_irqs();
...@@ -64,8 +56,8 @@ static struct irqaction irq2 = { ...@@ -64,8 +56,8 @@ static struct irqaction irq2 = {
**/ **/
void __init intr_init_hook(void) void __init intr_init_hook(void)
{ {
if (arch_intr_init_quirk) { if (x86_quirks->arch_intr_init) {
if (arch_intr_init_quirk()) if (x86_quirks->arch_intr_init())
return; return;
} }
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
...@@ -97,8 +89,8 @@ void __init pre_setup_arch_hook(void) ...@@ -97,8 +89,8 @@ void __init pre_setup_arch_hook(void)
**/ **/
void __init trap_init_hook(void) void __init trap_init_hook(void)
{ {
if (arch_trap_init_quirk) { if (x86_quirks->arch_trap_init) {
if (arch_trap_init_quirk()) if (x86_quirks->arch_trap_init())
return; return;
} }
} }
...@@ -110,6 +102,16 @@ static struct irqaction irq0 = { ...@@ -110,6 +102,16 @@ static struct irqaction irq0 = {
.name = "timer" .name = "timer"
}; };
/**
* pre_time_init_hook - do any specific initialisations before.
*
**/
void __init pre_time_init_hook(void)
{
if (x86_quirks->arch_pre_time_init)
x86_quirks->arch_pre_time_init();
}
/** /**
* time_init_hook - do any specific initialisations for the system timer. * time_init_hook - do any specific initialisations for the system timer.
* *
...@@ -119,13 +121,13 @@ static struct irqaction irq0 = { ...@@ -119,13 +121,13 @@ static struct irqaction irq0 = {
**/ **/
void __init time_init_hook(void) void __init time_init_hook(void)
{ {
if (arch_time_init_quirk) { if (x86_quirks->arch_time_init) {
/* /*
* A nonzero return code does not mean failure, it means * A nonzero return code does not mean failure, it means
* that the architecture quirk does not want any * that the architecture quirk does not want any
* generic (timer) setup to be performed after this: * generic (timer) setup to be performed after this:
*/ */
if (arch_time_init_quirk()) if (x86_quirks->arch_time_init())
return; return;
} }
......
...@@ -21,3 +21,4 @@ obj-$(CONFIG_K8_NUMA) += k8topology_64.o ...@@ -21,3 +21,4 @@ obj-$(CONFIG_K8_NUMA) += k8topology_64.o
endif endif
obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
obj-$(CONFIG_MEMTEST) += memtest.o
...@@ -844,6 +844,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, ...@@ -844,6 +844,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
reserve_early(table_start << PAGE_SHIFT, reserve_early(table_start << PAGE_SHIFT,
table_end << PAGE_SHIFT, "PGTABLE"); table_end << PAGE_SHIFT, "PGTABLE");
if (!after_init_bootmem)
early_memtest(start, end);
return end >> PAGE_SHIFT; return end >> PAGE_SHIFT;
} }
...@@ -868,8 +871,6 @@ void __init paging_init(void) ...@@ -868,8 +871,6 @@ void __init paging_init(void)
*/ */
sparse_init(); sparse_init();
zone_sizes_init(); zone_sizes_init();
paravirt_post_allocator_init();
} }
/* /*
......
...@@ -517,118 +517,6 @@ static void __init init_gbpages(void) ...@@ -517,118 +517,6 @@ static void __init init_gbpages(void)
direct_gbpages = 0; direct_gbpages = 0;
} }
#ifdef CONFIG_MEMTEST
static void __init memtest(unsigned long start_phys, unsigned long size,
unsigned pattern)
{
unsigned long i;
unsigned long *start;
unsigned long start_bad;
unsigned long last_bad;
unsigned long val;
unsigned long start_phys_aligned;
unsigned long count;
unsigned long incr;
switch (pattern) {
case 0:
val = 0UL;
break;
case 1:
val = -1UL;
break;
case 2:
val = 0x5555555555555555UL;
break;
case 3:
val = 0xaaaaaaaaaaaaaaaaUL;
break;
default:
return;
}
incr = sizeof(unsigned long);
start_phys_aligned = ALIGN(start_phys, incr);
count = (size - (start_phys_aligned - start_phys))/incr;
start = __va(start_phys_aligned);
start_bad = 0;
last_bad = 0;
for (i = 0; i < count; i++)
start[i] = val;
for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
if (*start != val) {
if (start_phys_aligned == last_bad + incr) {
last_bad += incr;
} else {
if (start_bad) {
printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved",
val, start_bad, last_bad + incr);
reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
}
start_bad = last_bad = start_phys_aligned;
}
}
}
if (start_bad) {
printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved",
val, start_bad, last_bad + incr);
reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
}
}
/* default is disabled */
static int memtest_pattern __initdata;
static int __init parse_memtest(char *arg)
{
if (arg)
memtest_pattern = simple_strtoul(arg, NULL, 0);
return 0;
}
early_param("memtest", parse_memtest);
static void __init early_memtest(unsigned long start, unsigned long end)
{
u64 t_start, t_size;
unsigned pattern;
if (!memtest_pattern)
return;
printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
for (pattern = 0; pattern < memtest_pattern; pattern++) {
t_start = start;
t_size = 0;
while (t_start < end) {
t_start = find_e820_area_size(t_start, &t_size, 1);
/* done ? */
if (t_start >= end)
break;
if (t_start + t_size > end)
t_size = end - t_start;
printk(KERN_CONT "\n %016llx - %016llx pattern %d",
(unsigned long long)t_start,
(unsigned long long)t_start + t_size, pattern);
memtest(t_start, t_size, pattern);
t_start += t_size;
}
}
printk(KERN_CONT "\n");
}
#else
static void __init early_memtest(unsigned long start, unsigned long end)
{
}
#endif
static unsigned long __init kernel_physical_mapping_init(unsigned long start, static unsigned long __init kernel_physical_mapping_init(unsigned long start,
unsigned long end, unsigned long end,
unsigned long page_size_mask) unsigned long page_size_mask)
......
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/pfn.h>
#include <asm/e820.h>
static void __init memtest(unsigned long start_phys, unsigned long size,
unsigned pattern)
{
unsigned long i;
unsigned long *start;
unsigned long start_bad;
unsigned long last_bad;
unsigned long val;
unsigned long start_phys_aligned;
unsigned long count;
unsigned long incr;
switch (pattern) {
case 0:
val = 0UL;
break;
case 1:
val = -1UL;
break;
case 2:
#ifdef CONFIG_X86_64
val = 0x5555555555555555UL;
#else
val = 0x55555555UL;
#endif
break;
case 3:
#ifdef CONFIG_X86_64
val = 0xaaaaaaaaaaaaaaaaUL;
#else
val = 0xaaaaaaaaUL;
#endif
break;
default:
return;
}
incr = sizeof(unsigned long);
start_phys_aligned = ALIGN(start_phys, incr);
count = (size - (start_phys_aligned - start_phys))/incr;
start = __va(start_phys_aligned);
start_bad = 0;
last_bad = 0;
for (i = 0; i < count; i++)
start[i] = val;
for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
if (*start != val) {
if (start_phys_aligned == last_bad + incr) {
last_bad += incr;
} else {
if (start_bad) {
printk(KERN_CONT "\n %010lx bad mem addr %010lx - %010lx reserved",
val, start_bad, last_bad + incr);
reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
}
start_bad = last_bad = start_phys_aligned;
}
}
}
if (start_bad) {
printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved",
val, start_bad, last_bad + incr);
reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
}
}
/* default is disabled */
static int memtest_pattern __initdata;
static int __init parse_memtest(char *arg)
{
if (arg)
memtest_pattern = simple_strtoul(arg, NULL, 0);
return 0;
}
early_param("memtest", parse_memtest);
void __init early_memtest(unsigned long start, unsigned long end)
{
u64 t_start, t_size;
unsigned pattern;
if (!memtest_pattern)
return;
printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
for (pattern = 0; pattern < memtest_pattern; pattern++) {
t_start = start;
t_size = 0;
while (t_start < end) {
t_start = find_e820_area_size(t_start, &t_size, 1);
/* done ? */
if (t_start >= end)
break;
if (t_start + t_size > end)
t_size = end - t_start;
printk(KERN_CONT "\n %010llx - %010llx pattern %d",
(unsigned long long)t_start,
(unsigned long long)t_start + t_size, pattern);
memtest(t_start, t_size, pattern);
t_start += t_size;
}
}
printk(KERN_CONT "\n");
}
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/bootmem.h> #include <linux/bootmem.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <asm/msr.h> #include <asm/msr.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
...@@ -373,8 +375,8 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, ...@@ -373,8 +375,8 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
return vma_prot; return vma_prot;
} }
#ifdef CONFIG_NONPROMISC_DEVMEM #ifdef CONFIG_STRICT_DEVMEM
/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/ /* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/
static inline int range_is_allowed(unsigned long pfn, unsigned long size) static inline int range_is_allowed(unsigned long pfn, unsigned long size)
{ {
return 1; return 1;
...@@ -398,7 +400,7 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) ...@@ -398,7 +400,7 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
} }
return 1; return 1;
} }
#endif /* CONFIG_NONPROMISC_DEVMEM */ #endif /* CONFIG_STRICT_DEVMEM */
int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t *vma_prot) unsigned long size, pgprot_t *vma_prot)
...@@ -489,3 +491,89 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) ...@@ -489,3 +491,89 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
free_memtype(addr, addr + size); free_memtype(addr, addr + size);
} }
#if defined(CONFIG_DEBUG_FS)
/* get Nth element of the linked list */
static struct memtype *memtype_get_idx(loff_t pos)
{
struct memtype *list_node, *print_entry;
int i = 1;
print_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL);
if (!print_entry)
return NULL;
spin_lock(&memtype_lock);
list_for_each_entry(list_node, &memtype_list, nd) {
if (pos == i) {
*print_entry = *list_node;
spin_unlock(&memtype_lock);
return print_entry;
}
++i;
}
spin_unlock(&memtype_lock);
kfree(print_entry);
return NULL;
}
static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
{
if (*pos == 0) {
++*pos;
seq_printf(seq, "PAT memtype list:\n");
}
return memtype_get_idx(*pos);
}
static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
++*pos;
return memtype_get_idx(*pos);
}
static void memtype_seq_stop(struct seq_file *seq, void *v)
{
}
static int memtype_seq_show(struct seq_file *seq, void *v)
{
struct memtype *print_entry = (struct memtype *)v;
seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type),
print_entry->start, print_entry->end);
kfree(print_entry);
return 0;
}
static struct seq_operations memtype_seq_ops = {
.start = memtype_seq_start,
.next = memtype_seq_next,
.stop = memtype_seq_stop,
.show = memtype_seq_show,
};
static int memtype_seq_open(struct inode *inode, struct file *file)
{
return seq_open(file, &memtype_seq_ops);
}
static const struct file_operations memtype_fops = {
.open = memtype_seq_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
static int __init pat_memtype_list_init(void)
{
debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir,
NULL, &memtype_fops);
return 0;
}
late_initcall(pat_memtype_list_init);
#endif /* CONFIG_DEBUG_FS */
...@@ -5,13 +5,13 @@ obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o ...@@ -5,13 +5,13 @@ obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o
obj-$(CONFIG_PCI_DIRECT) += direct.o obj-$(CONFIG_PCI_DIRECT) += direct.o
obj-$(CONFIG_PCI_OLPC) += olpc.o obj-$(CONFIG_PCI_OLPC) += olpc.o
pci-y := fixup.o obj-y += fixup.o
pci-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_ACPI) += acpi.o
pci-y += legacy.o irq.o obj-y += legacy.o irq.o
pci-$(CONFIG_X86_VISWS) += visws.o obj-$(CONFIG_X86_VISWS) += visws.o
pci-$(CONFIG_X86_NUMAQ) += numa.o obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
obj-y += $(pci-y) common.o early.o obj-y += common.o early.o
obj-y += amd_bus.o obj-y += amd_bus.o
...@@ -57,14 +57,17 @@ static int __init pci_legacy_init(void) ...@@ -57,14 +57,17 @@ static int __init pci_legacy_init(void)
int __init pci_subsys_init(void) int __init pci_subsys_init(void)
{ {
#ifdef CONFIG_X86_NUMAQ
pci_numaq_init();
#endif
#ifdef CONFIG_ACPI #ifdef CONFIG_ACPI
pci_acpi_init(); pci_acpi_init();
#endif
#ifdef CONFIG_X86_VISWS
pci_visws_init();
#endif #endif
pci_legacy_init(); pci_legacy_init();
pcibios_irq_init(); pcibios_irq_init();
#ifdef CONFIG_X86_NUMAQ
pci_numa_init();
#endif
pcibios_init(); pcibios_init();
return 0; return 0;
......
...@@ -108,7 +108,8 @@ extern void __init dmi_check_skip_isa_align(void); ...@@ -108,7 +108,8 @@ extern void __init dmi_check_skip_isa_align(void);
/* some common used subsys_initcalls */ /* some common used subsys_initcalls */
extern int __init pci_acpi_init(void); extern int __init pci_acpi_init(void);
extern int __init pcibios_irq_init(void); extern int __init pcibios_irq_init(void);
extern int __init pci_numa_init(void); extern int __init pci_visws_init(void);
extern int __init pci_numaq_init(void);
extern int __init pcibios_init(void); extern int __init pcibios_init(void);
/* pci-mmconfig.c */ /* pci-mmconfig.c */
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册