Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenHarmony
kernel_linux
提交
1d991001
K
kernel_linux
项目概览
OpenHarmony
/
kernel_linux
上一次同步 3 年多
通知
13
Star
8
Fork
2
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
K
kernel_linux
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
1d991001
编写于
6月 20, 2009
作者:
I
Ingo Molnar
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'x86/mce3' into x86/urgent
上级
bc3f5d3d
b1f49f95
变更
16
隐藏空白更改
内联
并排
Showing
16 changed file
with
528 addition
and
508 deletion
+528
-508
arch/x86/include/asm/mce.h
arch/x86/include/asm/mce.h
+51
-12
arch/x86/include/asm/therm_throt.h
arch/x86/include/asm/therm_throt.h
+0
-9
arch/x86/kernel/cpu/mcheck/Makefile
arch/x86/kernel/cpu/mcheck/Makefile
+5
-4
arch/x86/kernel/cpu/mcheck/k7.c
arch/x86/kernel/cpu/mcheck/k7.c
+1
-2
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/cpu/mcheck/mce.c
+158
-73
arch/x86/kernel/cpu/mcheck/mce.h
arch/x86/kernel/cpu/mcheck/mce.h
+0
-38
arch/x86/kernel/cpu/mcheck/mce_amd.c
arch/x86/kernel/cpu/mcheck/mce_amd.c
+0
-0
arch/x86/kernel/cpu/mcheck/mce_intel.c
arch/x86/kernel/cpu/mcheck/mce_intel.c
+200
-54
arch/x86/kernel/cpu/mcheck/mce_intel_64.c
arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+0
-248
arch/x86/kernel/cpu/mcheck/non-fatal.c
arch/x86/kernel/cpu/mcheck/non-fatal.c
+1
-2
arch/x86/kernel/cpu/mcheck/p4.c
arch/x86/kernel/cpu/mcheck/p4.c
+1
-47
arch/x86/kernel/cpu/mcheck/p5.c
arch/x86/kernel/cpu/mcheck/p5.c
+6
-9
arch/x86/kernel/cpu/mcheck/p6.c
arch/x86/kernel/cpu/mcheck/p6.c
+1
-2
arch/x86/kernel/cpu/mcheck/therm_throt.c
arch/x86/kernel/cpu/mcheck/therm_throt.c
+102
-4
arch/x86/kernel/cpu/mcheck/winchip.c
arch/x86/kernel/cpu/mcheck/winchip.c
+1
-2
arch/x86/kernel/traps.c
arch/x86/kernel/traps.c
+1
-2
未找到文件。
arch/x86/include/asm/mce.h
浏览文件 @
1d991001
...
@@ -102,15 +102,39 @@ struct mce_log {
...
@@ -102,15 +102,39 @@ struct mce_log {
#ifdef __KERNEL__
#ifdef __KERNEL__
#include <linux/percpu.h>
#include <linux/init.h>
#include <asm/atomic.h>
extern
int
mce_disabled
;
extern
int
mce_disabled
;
extern
int
mce_p5_enabled
;
#include <asm/atomic.h>
#ifdef CONFIG_X86_MCE
#include <linux/percpu.h>
void
mcheck_init
(
struct
cpuinfo_x86
*
c
);
#else
static
inline
void
mcheck_init
(
struct
cpuinfo_x86
*
c
)
{}
#endif
#ifdef CONFIG_X86_OLD_MCE
extern
int
nr_mce_banks
;
void
amd_mcheck_init
(
struct
cpuinfo_x86
*
c
);
void
intel_p4_mcheck_init
(
struct
cpuinfo_x86
*
c
);
void
intel_p6_mcheck_init
(
struct
cpuinfo_x86
*
c
);
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
void
intel_p5_mcheck_init
(
struct
cpuinfo_x86
*
c
);
void
winchip_mcheck_init
(
struct
cpuinfo_x86
*
c
);
static
inline
void
enable_p5_mce
(
void
)
{
mce_p5_enabled
=
1
;
}
#else
static
inline
void
intel_p5_mcheck_init
(
struct
cpuinfo_x86
*
c
)
{}
static
inline
void
winchip_mcheck_init
(
struct
cpuinfo_x86
*
c
)
{}
static
inline
void
enable_p5_mce
(
void
)
{}
#endif
void
mce_setup
(
struct
mce
*
m
);
void
mce_setup
(
struct
mce
*
m
);
void
mce_log
(
struct
mce
*
m
);
void
mce_log
(
struct
mce
*
m
);
DECLARE_PER_CPU
(
struct
sys_device
,
mce_dev
);
DECLARE_PER_CPU
(
struct
sys_device
,
mce_dev
);
extern
void
(
*
threshold_cpu_callback
)(
unsigned
long
action
,
unsigned
int
cpu
);
/*
/*
* To support more than 128 would need to escape the predefined
* To support more than 128 would need to escape the predefined
...
@@ -145,12 +169,8 @@ int mce_available(struct cpuinfo_x86 *c);
...
@@ -145,12 +169,8 @@ int mce_available(struct cpuinfo_x86 *c);
DECLARE_PER_CPU
(
unsigned
,
mce_exception_count
);
DECLARE_PER_CPU
(
unsigned
,
mce_exception_count
);
DECLARE_PER_CPU
(
unsigned
,
mce_poll_count
);
DECLARE_PER_CPU
(
unsigned
,
mce_poll_count
);
void
mce_log_therm_throt_event
(
__u64
status
);
extern
atomic_t
mce_entry
;
extern
atomic_t
mce_entry
;
void
do_machine_check
(
struct
pt_regs
*
,
long
);
typedef
DECLARE_BITMAP
(
mce_banks_t
,
MAX_NR_BANKS
);
typedef
DECLARE_BITMAP
(
mce_banks_t
,
MAX_NR_BANKS
);
DECLARE_PER_CPU
(
mce_banks_t
,
mce_poll_banks
);
DECLARE_PER_CPU
(
mce_banks_t
,
mce_poll_banks
);
...
@@ -167,13 +187,32 @@ void mce_notify_process(void);
...
@@ -167,13 +187,32 @@ void mce_notify_process(void);
DECLARE_PER_CPU
(
struct
mce
,
injectm
);
DECLARE_PER_CPU
(
struct
mce
,
injectm
);
extern
struct
file_operations
mce_chrdev_ops
;
extern
struct
file_operations
mce_chrdev_ops
;
#ifdef CONFIG_X86_MCE
/*
void
mcheck_init
(
struct
cpuinfo_x86
*
c
);
* Exception handler
#else
*/
#define mcheck_init(c) do { } while (0)
#endif
/* Call the installed machine check handler for this CPU setup. */
extern
void
(
*
machine_check_vector
)(
struct
pt_regs
*
,
long
error_code
);
void
do_machine_check
(
struct
pt_regs
*
,
long
);
/*
* Threshold handler
*/
extern
void
(
*
mce_threshold_vector
)(
void
);
extern
void
(
*
mce_threshold_vector
)(
void
);
extern
void
(
*
threshold_cpu_callback
)(
unsigned
long
action
,
unsigned
int
cpu
);
/*
* Thermal handler
*/
void
intel_init_thermal
(
struct
cpuinfo_x86
*
c
);
#ifdef CONFIG_X86_NEW_MCE
void
mce_log_therm_throt_event
(
__u64
status
);
#else
static
inline
void
mce_log_therm_throt_event
(
__u64
status
)
{}
#endif
#endif
/* __KERNEL__ */
#endif
/* __KERNEL__ */
#endif
/* _ASM_X86_MCE_H */
#endif
/* _ASM_X86_MCE_H */
arch/x86/include/asm/therm_throt.h
已删除
100644 → 0
浏览文件 @
bc3f5d3d
#ifndef _ASM_X86_THERM_THROT_H
#define _ASM_X86_THERM_THROT_H
#include <asm/atomic.h>
extern
atomic_t
therm_throt_en
;
int
therm_throt_process
(
int
curr
);
#endif
/* _ASM_X86_THERM_THROT_H */
arch/x86/kernel/cpu/mcheck/Makefile
浏览文件 @
1d991001
obj-y
=
mce.o
therm_throt.o
obj-y
=
mce.o
obj-$(CONFIG_X86_NEW_MCE)
+=
mce-severity.o
obj-$(CONFIG_X86_NEW_MCE)
+=
mce-severity.o
obj-$(CONFIG_X86_OLD_MCE)
+=
k7.o p4.o p6.o
obj-$(CONFIG_X86_OLD_MCE)
+=
k7.o p4.o p6.o
obj-$(CONFIG_X86_ANCIENT_MCE)
+=
winchip.o p5.o
obj-$(CONFIG_X86_ANCIENT_MCE)
+=
winchip.o p5.o
obj-$(CONFIG_X86_MCE_P4THERMAL)
+=
mce_intel.o
obj-$(CONFIG_X86_MCE_INTEL)
+=
mce_intel.o
obj-$(CONFIG_X86_MCE_INTEL)
+=
mce_intel_64.o mce_intel.o
obj-$(CONFIG_X86_MCE_AMD)
+=
mce_amd.o
obj-$(CONFIG_X86_MCE_AMD)
+=
mce_amd_64.o
obj-$(CONFIG_X86_MCE_NONFATAL)
+=
non-fatal.o
obj-$(CONFIG_X86_MCE_NONFATAL)
+=
non-fatal.o
obj-$(CONFIG_X86_MCE_THRESHOLD)
+=
threshold.o
obj-$(CONFIG_X86_MCE_THRESHOLD)
+=
threshold.o
obj-$(CONFIG_X86_MCE_INJECT)
+=
mce-inject.o
obj-$(CONFIG_X86_MCE_INJECT)
+=
mce-inject.o
obj-$(CONFIG_X86_THERMAL_VECTOR)
+=
therm_throt.o
arch/x86/kernel/cpu/mcheck/k7.c
浏览文件 @
1d991001
...
@@ -10,10 +10,9 @@
...
@@ -10,10 +10,9 @@
#include <asm/processor.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/msr.h>
#include "mce.h"
/* Machine Check Handler For AMD Athlon/Duron: */
/* Machine Check Handler For AMD Athlon/Duron: */
static
void
k7_machine_check
(
struct
pt_regs
*
regs
,
long
error_code
)
static
void
k7_machine_check
(
struct
pt_regs
*
regs
,
long
error_code
)
{
{
...
...
arch/x86/kernel/cpu/mcheck/mce.c
浏览文件 @
1d991001
...
@@ -44,7 +44,6 @@
...
@@ -44,7 +44,6 @@
#include <asm/msr.h>
#include <asm/msr.h>
#include "mce-internal.h"
#include "mce-internal.h"
#include "mce.h"
/* Handle unconfigured int18 (should never happen) */
/* Handle unconfigured int18 (should never happen) */
static
void
unexpected_machine_check
(
struct
pt_regs
*
regs
,
long
error_code
)
static
void
unexpected_machine_check
(
struct
pt_regs
*
regs
,
long
error_code
)
...
@@ -57,7 +56,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
...
@@ -57,7 +56,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
void
(
*
machine_check_vector
)(
struct
pt_regs
*
,
long
error_code
)
=
void
(
*
machine_check_vector
)(
struct
pt_regs
*
,
long
error_code
)
=
unexpected_machine_check
;
unexpected_machine_check
;
int
mce_disabled
;
int
mce_disabled
__read_mostly
;
#ifdef CONFIG_X86_NEW_MCE
#ifdef CONFIG_X86_NEW_MCE
...
@@ -76,21 +75,22 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
...
@@ -76,21 +75,22 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
* 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
* 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
* 3: never panic or SIGBUS, log all errors (for testing only)
* 3: never panic or SIGBUS, log all errors (for testing only)
*/
*/
static
int
tolerant
=
1
;
static
int
tolerant
__read_mostly
=
1
;
static
int
banks
;
static
int
banks
__read_mostly
;
static
u64
*
bank
;
static
u64
*
bank
__read_mostly
;
static
unsigned
long
notify_user
;
static
int
rip_msr
__read_mostly
;
static
int
rip_msr
;
static
int
mce_bootlog
__read_mostly
=
-
1
;
static
int
mce_bootlog
=
-
1
;
static
int
monarch_timeout
__read_mostly
=
-
1
;
static
int
monarch_timeout
=
-
1
;
static
int
mce_panic_timeout
__read_mostly
;
static
int
mce_panic_timeout
;
static
int
mce_dont_log_ce
__read_mostly
;
static
int
mce_dont_log_ce
;
int
mce_cmci_disabled
__read_mostly
;
int
mce_cmci_disabled
;
int
mce_ignore_ce
__read_mostly
;
int
mce_ignore_ce
;
int
mce_ser
__read_mostly
;
int
mce_ser
;
/* User mode helper program triggered by machine check event */
static
char
trigger
[
128
];
static
unsigned
long
mce_need_notify
;
static
char
*
trigger_argv
[
2
]
=
{
trigger
,
NULL
};
static
char
mce_helper
[
128
];
static
char
*
mce_helper_argv
[
2
]
=
{
mce_helper
,
NULL
};
static
unsigned
long
dont_init_banks
;
static
unsigned
long
dont_init_banks
;
...
@@ -180,7 +180,7 @@ void mce_log(struct mce *mce)
...
@@ -180,7 +180,7 @@ void mce_log(struct mce *mce)
wmb
();
wmb
();
mce
->
finished
=
1
;
mce
->
finished
=
1
;
set_bit
(
0
,
&
notify_user
);
set_bit
(
0
,
&
mce_need_notify
);
}
}
static
void
print_mce
(
struct
mce
*
m
)
static
void
print_mce
(
struct
mce
*
m
)
...
@@ -691,18 +691,21 @@ static atomic_t global_nwo;
...
@@ -691,18 +691,21 @@ static atomic_t global_nwo;
* in the entry order.
* in the entry order.
* TBD double check parallel CPU hotunplug
* TBD double check parallel CPU hotunplug
*/
*/
static
int
mce_start
(
int
no_way_out
,
int
*
order
)
static
int
mce_start
(
int
*
no_way_out
)
{
{
int
nwo
;
int
order
;
int
cpus
=
num_online_cpus
();
int
cpus
=
num_online_cpus
();
u64
timeout
=
(
u64
)
monarch_timeout
*
NSEC_PER_USEC
;
u64
timeout
=
(
u64
)
monarch_timeout
*
NSEC_PER_USEC
;
if
(
!
timeout
)
{
if
(
!
timeout
)
*
order
=
-
1
;
return
-
1
;
return
no_way_out
;
}
atomic_add
(
no_way_out
,
&
global_nwo
);
atomic_add
(
*
no_way_out
,
&
global_nwo
);
/*
* global_nwo should be updated before mce_callin
*/
smp_wmb
();
order
=
atomic_add_return
(
1
,
&
mce_callin
);
/*
/*
* Wait for everyone.
* Wait for everyone.
...
@@ -710,40 +713,43 @@ static int mce_start(int no_way_out, int *order)
...
@@ -710,40 +713,43 @@ static int mce_start(int no_way_out, int *order)
while
(
atomic_read
(
&
mce_callin
)
!=
cpus
)
{
while
(
atomic_read
(
&
mce_callin
)
!=
cpus
)
{
if
(
mce_timed_out
(
&
timeout
))
{
if
(
mce_timed_out
(
&
timeout
))
{
atomic_set
(
&
global_nwo
,
0
);
atomic_set
(
&
global_nwo
,
0
);
*
order
=
-
1
;
return
-
1
;
return
no_way_out
;
}
}
ndelay
(
SPINUNIT
);
ndelay
(
SPINUNIT
);
}
}
/*
/*
*
Cache the global no_way_out state.
*
mce_callin should be read before global_nwo
*/
*/
nwo
=
atomic_read
(
&
global_nwo
);
smp_rmb
(
);
/*
if
(
order
==
1
)
{
* Monarch starts executing now, the others wait.
/*
*/
* Monarch: Starts executing now, the others wait.
if
(
*
order
==
1
)
{
*/
atomic_set
(
&
mce_executing
,
1
);
atomic_set
(
&
mce_executing
,
1
);
return
nwo
;
}
else
{
/*
* Subject: Now start the scanning loop one by one in
* the original callin order.
* This way when there are any shared banks it will be
* only seen by one CPU before cleared, avoiding duplicates.
*/
while
(
atomic_read
(
&
mce_executing
)
<
order
)
{
if
(
mce_timed_out
(
&
timeout
))
{
atomic_set
(
&
global_nwo
,
0
);
return
-
1
;
}
ndelay
(
SPINUNIT
);
}
}
}
/*
/*
* Now start the scanning loop one by one
* Cache the global no_way_out state.
* in the original callin order.
* This way when there are any shared banks it will
* be only seen by one CPU before cleared, avoiding duplicates.
*/
*/
while
(
atomic_read
(
&
mce_executing
)
<
*
order
)
{
*
no_way_out
=
atomic_read
(
&
global_nwo
);
if
(
mce_timed_out
(
&
timeout
))
{
atomic_set
(
&
global_nwo
,
0
);
return
order
;
*
order
=
-
1
;
return
no_way_out
;
}
ndelay
(
SPINUNIT
);
}
return
nwo
;
}
}
/*
/*
...
@@ -863,7 +869,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
...
@@ -863,7 +869,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* check handler.
* check handler.
*/
*/
int
order
;
int
order
;
/*
/*
* If no_way_out gets set, there is no safe way to recover from this
* If no_way_out gets set, there is no safe way to recover from this
* MCE. If tolerant is cranked up, we'll try anyway.
* MCE. If tolerant is cranked up, we'll try anyway.
...
@@ -887,7 +892,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
...
@@ -887,7 +892,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
if
(
!
banks
)
if
(
!
banks
)
goto
out
;
goto
out
;
order
=
atomic_add_return
(
1
,
&
mce_callin
);
mce_setup
(
&
m
);
mce_setup
(
&
m
);
m
.
mcgstatus
=
mce_rdmsrl
(
MSR_IA32_MCG_STATUS
);
m
.
mcgstatus
=
mce_rdmsrl
(
MSR_IA32_MCG_STATUS
);
...
@@ -909,7 +913,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
...
@@ -909,7 +913,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* This way we don't report duplicated events on shared banks
* This way we don't report duplicated events on shared banks
* because the first one to see it will clear it.
* because the first one to see it will clear it.
*/
*/
no_way_out
=
mce_start
(
no_way_out
,
&
order
);
order
=
mce_start
(
&
no_way_out
);
for
(
i
=
0
;
i
<
banks
;
i
++
)
{
for
(
i
=
0
;
i
<
banks
;
i
++
)
{
__clear_bit
(
i
,
toclear
);
__clear_bit
(
i
,
toclear
);
if
(
!
bank
[
i
])
if
(
!
bank
[
i
])
...
@@ -1118,7 +1122,7 @@ static void mcheck_timer(unsigned long data)
...
@@ -1118,7 +1122,7 @@ static void mcheck_timer(unsigned long data)
static
void
mce_do_trigger
(
struct
work_struct
*
work
)
static
void
mce_do_trigger
(
struct
work_struct
*
work
)
{
{
call_usermodehelper
(
trigger
,
trigg
er_argv
,
NULL
,
UMH_NO_WAIT
);
call_usermodehelper
(
mce_helper
,
mce_help
er_argv
,
NULL
,
UMH_NO_WAIT
);
}
}
static
DECLARE_WORK
(
mce_trigger_work
,
mce_do_trigger
);
static
DECLARE_WORK
(
mce_trigger_work
,
mce_do_trigger
);
...
@@ -1135,7 +1139,7 @@ int mce_notify_irq(void)
...
@@ -1135,7 +1139,7 @@ int mce_notify_irq(void)
clear_thread_flag
(
TIF_MCE_NOTIFY
);
clear_thread_flag
(
TIF_MCE_NOTIFY
);
if
(
test_and_clear_bit
(
0
,
&
notify_user
))
{
if
(
test_and_clear_bit
(
0
,
&
mce_need_notify
))
{
wake_up_interruptible
(
&
mce_wait
);
wake_up_interruptible
(
&
mce_wait
);
/*
/*
...
@@ -1143,7 +1147,7 @@ int mce_notify_irq(void)
...
@@ -1143,7 +1147,7 @@ int mce_notify_irq(void)
* work_pending is always cleared before the function is
* work_pending is always cleared before the function is
* executed.
* executed.
*/
*/
if
(
trigg
er
[
0
]
&&
!
work_pending
(
&
mce_trigger_work
))
if
(
mce_help
er
[
0
]
&&
!
work_pending
(
&
mce_trigger_work
))
schedule_work
(
&
mce_trigger_work
);
schedule_work
(
&
mce_trigger_work
);
if
(
__ratelimit
(
&
ratelimit
))
if
(
__ratelimit
(
&
ratelimit
))
...
@@ -1282,8 +1286,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
...
@@ -1282,8 +1286,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
return
;
return
;
switch
(
c
->
x86_vendor
)
{
switch
(
c
->
x86_vendor
)
{
case
X86_VENDOR_INTEL
:
case
X86_VENDOR_INTEL
:
if
(
mce_p5_enabled
())
intel_p5_mcheck_init
(
c
);
intel_p5_mcheck_init
(
c
);
break
;
break
;
case
X86_VENDOR_CENTAUR
:
case
X86_VENDOR_CENTAUR
:
winchip_mcheck_init
(
c
);
winchip_mcheck_init
(
c
);
...
@@ -1609,8 +1612,9 @@ static int mce_resume(struct sys_device *dev)
...
@@ -1609,8 +1612,9 @@ static int mce_resume(struct sys_device *dev)
static
void
mce_cpu_restart
(
void
*
data
)
static
void
mce_cpu_restart
(
void
*
data
)
{
{
del_timer_sync
(
&
__get_cpu_var
(
mce_timer
));
del_timer_sync
(
&
__get_cpu_var
(
mce_timer
));
if
(
mce_available
(
&
current_cpu_data
))
if
(
!
mce_available
(
&
current_cpu_data
))
mce_init
();
return
;
mce_init
();
mce_init_timer
();
mce_init_timer
();
}
}
...
@@ -1620,6 +1624,26 @@ static void mce_restart(void)
...
@@ -1620,6 +1624,26 @@ static void mce_restart(void)
on_each_cpu
(
mce_cpu_restart
,
NULL
,
1
);
on_each_cpu
(
mce_cpu_restart
,
NULL
,
1
);
}
}
/* Toggle features for corrected errors */
static
void
mce_disable_ce
(
void
*
all
)
{
if
(
!
mce_available
(
&
current_cpu_data
))
return
;
if
(
all
)
del_timer_sync
(
&
__get_cpu_var
(
mce_timer
));
cmci_clear
();
}
static
void
mce_enable_ce
(
void
*
all
)
{
if
(
!
mce_available
(
&
current_cpu_data
))
return
;
cmci_reenable
();
cmci_recheck
();
if
(
all
)
mce_init_timer
();
}
static
struct
sysdev_class
mce_sysclass
=
{
static
struct
sysdev_class
mce_sysclass
=
{
.
suspend
=
mce_suspend
,
.
suspend
=
mce_suspend
,
.
shutdown
=
mce_shutdown
,
.
shutdown
=
mce_shutdown
,
...
@@ -1659,9 +1683,9 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
...
@@ -1659,9 +1683,9 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
static
ssize_t
static
ssize_t
show_trigger
(
struct
sys_device
*
s
,
struct
sysdev_attribute
*
attr
,
char
*
buf
)
show_trigger
(
struct
sys_device
*
s
,
struct
sysdev_attribute
*
attr
,
char
*
buf
)
{
{
strcpy
(
buf
,
trigg
er
);
strcpy
(
buf
,
mce_help
er
);
strcat
(
buf
,
"
\n
"
);
strcat
(
buf
,
"
\n
"
);
return
strlen
(
trigg
er
)
+
1
;
return
strlen
(
mce_help
er
)
+
1
;
}
}
static
ssize_t
set_trigger
(
struct
sys_device
*
s
,
struct
sysdev_attribute
*
attr
,
static
ssize_t
set_trigger
(
struct
sys_device
*
s
,
struct
sysdev_attribute
*
attr
,
...
@@ -1670,10 +1694,10 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
...
@@ -1670,10 +1694,10 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
char
*
p
;
char
*
p
;
int
len
;
int
len
;
strncpy
(
trigger
,
buf
,
sizeof
(
trigg
er
));
strncpy
(
mce_helper
,
buf
,
sizeof
(
mce_help
er
));
trigger
[
sizeof
(
trigg
er
)
-
1
]
=
0
;
mce_helper
[
sizeof
(
mce_help
er
)
-
1
]
=
0
;
len
=
strlen
(
trigg
er
);
len
=
strlen
(
mce_help
er
);
p
=
strchr
(
trigg
er
,
'\n'
);
p
=
strchr
(
mce_help
er
,
'\n'
);
if
(
*
p
)
if
(
*
p
)
*
p
=
0
;
*
p
=
0
;
...
@@ -1681,6 +1705,52 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
...
@@ -1681,6 +1705,52 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
return
len
;
return
len
;
}
}
static
ssize_t
set_ignore_ce
(
struct
sys_device
*
s
,
struct
sysdev_attribute
*
attr
,
const
char
*
buf
,
size_t
size
)
{
u64
new
;
if
(
strict_strtoull
(
buf
,
0
,
&
new
)
<
0
)
return
-
EINVAL
;
if
(
mce_ignore_ce
^
!!
new
)
{
if
(
new
)
{
/* disable ce features */
on_each_cpu
(
mce_disable_ce
,
(
void
*
)
1
,
1
);
mce_ignore_ce
=
1
;
}
else
{
/* enable ce features */
mce_ignore_ce
=
0
;
on_each_cpu
(
mce_enable_ce
,
(
void
*
)
1
,
1
);
}
}
return
size
;
}
static
ssize_t
set_cmci_disabled
(
struct
sys_device
*
s
,
struct
sysdev_attribute
*
attr
,
const
char
*
buf
,
size_t
size
)
{
u64
new
;
if
(
strict_strtoull
(
buf
,
0
,
&
new
)
<
0
)
return
-
EINVAL
;
if
(
mce_cmci_disabled
^
!!
new
)
{
if
(
new
)
{
/* disable cmci */
on_each_cpu
(
mce_disable_ce
,
NULL
,
1
);
mce_cmci_disabled
=
1
;
}
else
{
/* enable cmci */
mce_cmci_disabled
=
0
;
on_each_cpu
(
mce_enable_ce
,
NULL
,
1
);
}
}
return
size
;
}
static
ssize_t
store_int_with_restart
(
struct
sys_device
*
s
,
static
ssize_t
store_int_with_restart
(
struct
sys_device
*
s
,
struct
sysdev_attribute
*
attr
,
struct
sysdev_attribute
*
attr
,
const
char
*
buf
,
size_t
size
)
const
char
*
buf
,
size_t
size
)
...
@@ -1693,6 +1763,7 @@ static ssize_t store_int_with_restart(struct sys_device *s,
...
@@ -1693,6 +1763,7 @@ static ssize_t store_int_with_restart(struct sys_device *s,
static
SYSDEV_ATTR
(
trigger
,
0644
,
show_trigger
,
set_trigger
);
static
SYSDEV_ATTR
(
trigger
,
0644
,
show_trigger
,
set_trigger
);
static
SYSDEV_INT_ATTR
(
tolerant
,
0644
,
tolerant
);
static
SYSDEV_INT_ATTR
(
tolerant
,
0644
,
tolerant
);
static
SYSDEV_INT_ATTR
(
monarch_timeout
,
0644
,
monarch_timeout
);
static
SYSDEV_INT_ATTR
(
monarch_timeout
,
0644
,
monarch_timeout
);
static
SYSDEV_INT_ATTR
(
dont_log_ce
,
0644
,
mce_dont_log_ce
);
static
struct
sysdev_ext_attribute
attr_check_interval
=
{
static
struct
sysdev_ext_attribute
attr_check_interval
=
{
_SYSDEV_ATTR
(
check_interval
,
0644
,
sysdev_show_int
,
_SYSDEV_ATTR
(
check_interval
,
0644
,
sysdev_show_int
,
...
@@ -1700,9 +1771,24 @@ static struct sysdev_ext_attribute attr_check_interval = {
...
@@ -1700,9 +1771,24 @@ static struct sysdev_ext_attribute attr_check_interval = {
&
check_interval
&
check_interval
};
};
static
struct
sysdev_ext_attribute
attr_ignore_ce
=
{
_SYSDEV_ATTR
(
ignore_ce
,
0644
,
sysdev_show_int
,
set_ignore_ce
),
&
mce_ignore_ce
};
static
struct
sysdev_ext_attribute
attr_cmci_disabled
=
{
_SYSDEV_ATTR
(
cmci_disabled
,
0644
,
sysdev_show_int
,
set_cmci_disabled
),
&
mce_cmci_disabled
};
static
struct
sysdev_attribute
*
mce_attrs
[]
=
{
static
struct
sysdev_attribute
*
mce_attrs
[]
=
{
&
attr_tolerant
.
attr
,
&
attr_check_interval
.
attr
,
&
attr_trigger
,
&
attr_tolerant
.
attr
,
&
attr_check_interval
.
attr
,
&
attr_trigger
,
&
attr_monarch_timeout
.
attr
,
&
attr_monarch_timeout
.
attr
,
&
attr_dont_log_ce
.
attr
,
&
attr_ignore_ce
.
attr
,
&
attr_cmci_disabled
.
attr
,
NULL
NULL
};
};
...
@@ -1712,7 +1798,7 @@ static cpumask_var_t mce_dev_initialized;
...
@@ -1712,7 +1798,7 @@ static cpumask_var_t mce_dev_initialized;
static
__cpuinit
int
mce_create_device
(
unsigned
int
cpu
)
static
__cpuinit
int
mce_create_device
(
unsigned
int
cpu
)
{
{
int
err
;
int
err
;
int
i
;
int
i
,
j
;
if
(
!
mce_available
(
&
boot_cpu_data
))
if
(
!
mce_available
(
&
boot_cpu_data
))
return
-
EIO
;
return
-
EIO
;
...
@@ -1730,9 +1816,9 @@ static __cpuinit int mce_create_device(unsigned int cpu)
...
@@ -1730,9 +1816,9 @@ static __cpuinit int mce_create_device(unsigned int cpu)
if
(
err
)
if
(
err
)
goto
error
;
goto
error
;
}
}
for
(
i
=
0
;
i
<
banks
;
i
++
)
{
for
(
j
=
0
;
j
<
banks
;
j
++
)
{
err
=
sysdev_create_file
(
&
per_cpu
(
mce_dev
,
cpu
),
err
=
sysdev_create_file
(
&
per_cpu
(
mce_dev
,
cpu
),
&
bank_attrs
[
i
]);
&
bank_attrs
[
j
]);
if
(
err
)
if
(
err
)
goto
error2
;
goto
error2
;
}
}
...
@@ -1740,8 +1826,8 @@ static __cpuinit int mce_create_device(unsigned int cpu)
...
@@ -1740,8 +1826,8 @@ static __cpuinit int mce_create_device(unsigned int cpu)
return
0
;
return
0
;
error2:
error2:
while
(
--
i
>=
0
)
while
(
--
j
>=
0
)
sysdev_remove_file
(
&
per_cpu
(
mce_dev
,
cpu
),
&
bank_attrs
[
i
]);
sysdev_remove_file
(
&
per_cpu
(
mce_dev
,
cpu
),
&
bank_attrs
[
j
]);
error:
error:
while
(
--
i
>=
0
)
while
(
--
i
>=
0
)
sysdev_remove_file
(
&
per_cpu
(
mce_dev
,
cpu
),
mce_attrs
[
i
]);
sysdev_remove_file
(
&
per_cpu
(
mce_dev
,
cpu
),
mce_attrs
[
i
]);
...
@@ -1883,7 +1969,7 @@ static __init int mce_init_device(void)
...
@@ -1883,7 +1969,7 @@ static __init int mce_init_device(void)
if
(
!
mce_available
(
&
boot_cpu_data
))
if
(
!
mce_available
(
&
boot_cpu_data
))
return
-
EIO
;
return
-
EIO
;
alloc_cpumask_var
(
&
mce_dev_initialized
,
GFP_KERNEL
);
z
alloc_cpumask_var
(
&
mce_dev_initialized
,
GFP_KERNEL
);
err
=
mce_init_banks
();
err
=
mce_init_banks
();
if
(
err
)
if
(
err
)
...
@@ -1915,7 +2001,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
...
@@ -1915,7 +2001,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
/* This has to be run for each processor */
/* This has to be run for each processor */
void
mcheck_init
(
struct
cpuinfo_x86
*
c
)
void
mcheck_init
(
struct
cpuinfo_x86
*
c
)
{
{
if
(
mce_disabled
==
1
)
if
(
mce_disabled
)
return
;
return
;
switch
(
c
->
x86_vendor
)
{
switch
(
c
->
x86_vendor
)
{
...
@@ -1945,10 +2031,9 @@ void mcheck_init(struct cpuinfo_x86 *c)
...
@@ -1945,10 +2031,9 @@ void mcheck_init(struct cpuinfo_x86 *c)
static
int
__init
mcheck_enable
(
char
*
str
)
static
int
__init
mcheck_enable
(
char
*
str
)
{
{
mce_
disabled
=
-
1
;
mce_
p5_enabled
=
1
;
return
1
;
return
1
;
}
}
__setup
(
"mce"
,
mcheck_enable
);
__setup
(
"mce"
,
mcheck_enable
);
#endif
/* CONFIG_X86_OLD_MCE */
#endif
/* CONFIG_X86_OLD_MCE */
...
...
arch/x86/kernel/cpu/mcheck/mce.h
已删除
100644 → 0
浏览文件 @
bc3f5d3d
#include <linux/init.h>
#include <asm/mce.h>
#ifdef CONFIG_X86_OLD_MCE
void
amd_mcheck_init
(
struct
cpuinfo_x86
*
c
);
void
intel_p4_mcheck_init
(
struct
cpuinfo_x86
*
c
);
void
intel_p6_mcheck_init
(
struct
cpuinfo_x86
*
c
);
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
void
intel_p5_mcheck_init
(
struct
cpuinfo_x86
*
c
);
void
winchip_mcheck_init
(
struct
cpuinfo_x86
*
c
);
extern
int
mce_p5_enable
;
static
inline
int
mce_p5_enabled
(
void
)
{
return
mce_p5_enable
;
}
static
inline
void
enable_p5_mce
(
void
)
{
mce_p5_enable
=
1
;
}
#else
static
inline
void
intel_p5_mcheck_init
(
struct
cpuinfo_x86
*
c
)
{}
static
inline
void
winchip_mcheck_init
(
struct
cpuinfo_x86
*
c
)
{}
static
inline
int
mce_p5_enabled
(
void
)
{
return
0
;
}
static
inline
void
enable_p5_mce
(
void
)
{
}
#endif
/* Call the installed machine check handler for this CPU setup. */
extern
void
(
*
machine_check_vector
)(
struct
pt_regs
*
,
long
error_code
);
#ifdef CONFIG_X86_OLD_MCE
extern
int
nr_mce_banks
;
void
intel_set_thermal_handler
(
void
);
#else
static
inline
void
intel_set_thermal_handler
(
void
)
{
}
#endif
void
intel_init_thermal
(
struct
cpuinfo_x86
*
c
);
arch/x86/kernel/cpu/mcheck/mce_amd
_64
.c
→
arch/x86/kernel/cpu/mcheck/mce_amd.c
浏览文件 @
1d991001
文件已移动
arch/x86/kernel/cpu/mcheck/mce_intel.c
浏览文件 @
1d991001
/*
/*
* Common code for Intel machine checks
* Intel specific MCE features.
* Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
* Copyright (C) 2008, 2009 Intel Corporation
* Author: Andi Kleen
*/
*/
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <
asm/therm_thro
t.h>
#include <
linux/ini
t.h>
#include <
asm/processor
.h>
#include <
linux/interrupt
.h>
#include <
asm/system
.h>
#include <
linux/percpu
.h>
#include <asm/apic.h>
#include <asm/apic.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/msr.h>
#include <asm/mce.h>
/*
* Support for Intel Correct Machine Check Interrupts. This allows
* the CPU to raise an interrupt when a corrected machine check happened.
* Normally we pick those up using a regular polling timer.
* Also supports reliable discovery of shared banks.
*/
#include "mce.h"
static
DEFINE_PER_CPU
(
mce_banks_t
,
mce_banks_owned
);
void
intel_init_thermal
(
struct
cpuinfo_x86
*
c
)
/*
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other.
*/
static
DEFINE_SPINLOCK
(
cmci_discover_lock
);
#define CMCI_THRESHOLD 1
static
int
cmci_supported
(
int
*
banks
)
{
{
unsigned
int
cpu
=
smp_processor_id
();
u64
cap
;
int
tm2
=
0
;
u32
l
,
h
;
if
(
mce_cmci_disabled
||
mce_ignore_ce
)
return
0
;
/*
/*
* Thermal monitoring depends on ACPI, clock modulation
* Vendor check is not strictly needed, but the initial
* and APIC as well
* initialization is vendor keyed and this
* makes sure none of the backdoors are entered otherwise.
*/
*/
if
(
!
cpu_has
(
c
,
X86_FEATURE_ACPI
)
||
!
cpu_has
(
c
,
X86_FEATURE_ACC
)
||
if
(
boot_cpu_data
.
x86_vendor
!=
X86_VENDOR_INTEL
)
!
cpu_has
(
c
,
X86_FEATURE_APIC
))
{
return
0
;
pr_debug
(
"Thermal monitoring disabled
\n
"
);
if
(
!
cpu_has_apic
||
lapic_get_maxlvt
()
<
6
)
return
;
return
0
;
rdmsrl
(
MSR_IA32_MCG_CAP
,
cap
);
*
banks
=
min_t
(
unsigned
,
MAX_NR_BANKS
,
cap
&
0xff
);
return
!!
(
cap
&
MCG_CMCI_P
);
}
/*
* The interrupt handler. This is called on every event.
* Just call the poller directly to log any events.
* This could in theory increase the threshold under high load,
* but doesn't for now.
*/
static
void
intel_threshold_interrupt
(
void
)
{
machine_check_poll
(
MCP_TIMESTAMP
,
&
__get_cpu_var
(
mce_banks_owned
));
mce_notify_irq
();
}
static
void
print_update
(
char
*
type
,
int
*
hdr
,
int
num
)
{
if
(
*
hdr
==
0
)
printk
(
KERN_INFO
"CPU %d MCA banks"
,
smp_processor_id
());
*
hdr
=
1
;
printk
(
KERN_CONT
" %s:%d"
,
type
,
num
);
}
/*
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
* on this CPU. Use the algorithm recommended in the SDM to discover shared
* banks.
*/
static
void
cmci_discover
(
int
banks
,
int
boot
)
{
unsigned
long
*
owned
=
(
void
*
)
&
__get_cpu_var
(
mce_banks_owned
);
unsigned
long
flags
;
int
hdr
=
0
;
int
i
;
spin_lock_irqsave
(
&
cmci_discover_lock
,
flags
);
for
(
i
=
0
;
i
<
banks
;
i
++
)
{
u64
val
;
if
(
test_bit
(
i
,
owned
))
continue
;
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
/* Already owned by someone else? */
if
(
val
&
CMCI_EN
)
{
if
(
test_and_clear_bit
(
i
,
owned
)
||
boot
)
print_update
(
"SHD"
,
&
hdr
,
i
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
));
continue
;
}
val
|=
CMCI_EN
|
CMCI_THRESHOLD
;
wrmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
/* Did the enable bit stick? -- the bank supports CMCI */
if
(
val
&
CMCI_EN
)
{
if
(
!
test_and_set_bit
(
i
,
owned
)
||
boot
)
print_update
(
"CMCI"
,
&
hdr
,
i
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
));
}
else
{
WARN_ON
(
!
test_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
)));
}
}
}
spin_unlock_irqrestore
(
&
cmci_discover_lock
,
flags
);
if
(
hdr
)
printk
(
KERN_CONT
"
\n
"
);
}
/*
/*
* First check if its enabled already, in which case there might
* Just in case we missed an event during initialization check
* be some SMM goo which handles it, so we can't even put a handler
* all the CMCI owned banks.
* since it might be delivered via SMI already:
*/
*/
void
cmci_recheck
(
void
)
rdmsr
(
MSR_IA32_MISC_ENABLE
,
l
,
h
);
{
h
=
apic_read
(
APIC_LVTTHMR
)
;
unsigned
long
flags
;
i
f
((
l
&
MSR_IA32_MISC_ENABLE_TM1
)
&&
(
h
&
APIC_DM_SMI
))
{
i
nt
banks
;
printk
(
KERN_DEBUG
"CPU%d: Thermal monitoring handled by SMI
\n
"
,
cpu
);
if
(
!
mce_available
(
&
current_cpu_data
)
||
!
cmci_supported
(
&
banks
))
return
;
return
;
}
local_irq_save
(
flags
);
machine_check_poll
(
MCP_TIMESTAMP
,
&
__get_cpu_var
(
mce_banks_owned
));
local_irq_restore
(
flags
);
}
if
(
cpu_has
(
c
,
X86_FEATURE_TM2
)
&&
(
l
&
MSR_IA32_MISC_ENABLE_TM2
))
/*
tm2
=
1
;
* Disable CMCI on this CPU for all banks it owns when it goes down.
* This allows other CPUs to claim the banks on rediscovery.
*/
void
cmci_clear
(
void
)
{
unsigned
long
flags
;
int
i
;
int
banks
;
u64
val
;
/* Check whether a vector already exists */
if
(
!
cmci_supported
(
&
banks
))
if
(
h
&
APIC_VECTOR_MASK
)
{
printk
(
KERN_DEBUG
"CPU%d: Thermal LVT vector (%#x) already installed
\n
"
,
cpu
,
(
h
&
APIC_VECTOR_MASK
));
return
;
return
;
spin_lock_irqsave
(
&
cmci_discover_lock
,
flags
);
for
(
i
=
0
;
i
<
banks
;
i
++
)
{
if
(
!
test_bit
(
i
,
__get_cpu_var
(
mce_banks_owned
)))
continue
;
/* Disable CMCI */
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
val
&=
~
(
CMCI_EN
|
CMCI_THRESHOLD_MASK
);
wrmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_banks_owned
));
}
}
spin_unlock_irqrestore
(
&
cmci_discover_lock
,
flags
);
}
/* We'll mask the thermal vector in the lapic till we're ready: */
/*
h
=
THERMAL_APIC_VECTOR
|
APIC_DM_FIXED
|
APIC_LVT_MASKED
;
* After a CPU went down cycle through all the others and rediscover
apic_write
(
APIC_LVTTHMR
,
h
);
* Must run in process context.
*/
void
cmci_rediscover
(
int
dying
)
{
int
banks
;
int
cpu
;
cpumask_var_t
old
;
rdmsr
(
MSR_IA32_THERM_INTERRUPT
,
l
,
h
);
if
(
!
cmci_supported
(
&
banks
))
wrmsr
(
MSR_IA32_THERM_INTERRUPT
,
return
;
l
|
(
THERM_INT_LOW_ENABLE
|
THERM_INT_HIGH_ENABLE
),
h
);
if
(
!
alloc_cpumask_var
(
&
old
,
GFP_KERNEL
))
return
;
cpumask_copy
(
old
,
&
current
->
cpus_allowed
);
for_each_online_cpu
(
cpu
)
{
if
(
cpu
==
dying
)
continue
;
if
(
set_cpus_allowed_ptr
(
current
,
cpumask_of
(
cpu
)))
continue
;
/* Recheck banks in case CPUs don't all have the same */
if
(
cmci_supported
(
&
banks
))
cmci_discover
(
banks
,
0
);
}
intel_set_thermal_handler
();
set_cpus_allowed_ptr
(
current
,
old
);
free_cpumask_var
(
old
);
}
/*
* Reenable CMCI on this CPU in case a CPU down failed.
*/
void
cmci_reenable
(
void
)
{
int
banks
;
if
(
cmci_supported
(
&
banks
))
cmci_discover
(
banks
,
0
);
}
rdmsr
(
MSR_IA32_MISC_ENABLE
,
l
,
h
);
static
void
intel_init_cmci
(
void
)
wrmsr
(
MSR_IA32_MISC_ENABLE
,
l
|
MSR_IA32_MISC_ENABLE_TM1
,
h
);
{
int
banks
;
/* Unmask the thermal vector: */
if
(
!
cmci_supported
(
&
banks
))
l
=
apic_read
(
APIC_LVTTHMR
);
return
;
apic_write
(
APIC_LVTTHMR
,
l
&
~
APIC_LVT_MASKED
);
printk
(
KERN_INFO
"CPU%d: Thermal monitoring enabled (%s)
\n
"
,
mce_threshold_vector
=
intel_threshold_interrupt
;
cpu
,
tm2
?
"TM2"
:
"TM1"
);
cmci_discover
(
banks
,
1
);
/*
* For CPU #0 this runs with still disabled APIC, but that's
* ok because only the vector is set up. We still do another
* check for the banks later for CPU #0 just to make sure
* to not miss any events.
*/
apic_write
(
APIC_LVTCMCI
,
THRESHOLD_APIC_VECTOR
|
APIC_DM_FIXED
);
cmci_recheck
();
}
/* enable thermal throttle processing */
void
mce_intel_feature_init
(
struct
cpuinfo_x86
*
c
)
atomic_set
(
&
therm_throt_en
,
1
);
{
intel_init_thermal
(
c
);
intel_init_cmci
();
}
}
arch/x86/kernel/cpu/mcheck/mce_intel_64.c
已删除
100644 → 0
浏览文件 @
bc3f5d3d
/*
* Intel specific MCE features.
* Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
* Copyright (C) 2008, 2009 Intel Corporation
* Author: Andi Kleen
*/
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/msr.h>
#include <asm/mce.h>
#include <asm/hw_irq.h>
#include <asm/idle.h>
#include <asm/therm_throt.h>
#include "mce.h"
asmlinkage
void
smp_thermal_interrupt
(
void
)
{
__u64
msr_val
;
ack_APIC_irq
();
exit_idle
();
irq_enter
();
rdmsrl
(
MSR_IA32_THERM_STATUS
,
msr_val
);
if
(
therm_throt_process
(
msr_val
&
THERM_STATUS_PROCHOT
))
mce_log_therm_throt_event
(
msr_val
);
inc_irq_stat
(
irq_thermal_count
);
irq_exit
();
}
/*
* Support for Intel Correct Machine Check Interrupts. This allows
* the CPU to raise an interrupt when a corrected machine check happened.
* Normally we pick those up using a regular polling timer.
* Also supports reliable discovery of shared banks.
*/
static
DEFINE_PER_CPU
(
mce_banks_t
,
mce_banks_owned
);
/*
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other.
*/
static
DEFINE_SPINLOCK
(
cmci_discover_lock
);
#define CMCI_THRESHOLD 1
static
int
cmci_supported
(
int
*
banks
)
{
u64
cap
;
if
(
mce_cmci_disabled
||
mce_ignore_ce
)
return
0
;
/*
* Vendor check is not strictly needed, but the initial
* initialization is vendor keyed and this
* makes sure none of the backdoors are entered otherwise.
*/
if
(
boot_cpu_data
.
x86_vendor
!=
X86_VENDOR_INTEL
)
return
0
;
if
(
!
cpu_has_apic
||
lapic_get_maxlvt
()
<
6
)
return
0
;
rdmsrl
(
MSR_IA32_MCG_CAP
,
cap
);
*
banks
=
min_t
(
unsigned
,
MAX_NR_BANKS
,
cap
&
0xff
);
return
!!
(
cap
&
MCG_CMCI_P
);
}
/*
* The interrupt handler. This is called on every event.
* Just call the poller directly to log any events.
* This could in theory increase the threshold under high load,
* but doesn't for now.
*/
static
void
intel_threshold_interrupt
(
void
)
{
machine_check_poll
(
MCP_TIMESTAMP
,
&
__get_cpu_var
(
mce_banks_owned
));
mce_notify_irq
();
}
static
void
print_update
(
char
*
type
,
int
*
hdr
,
int
num
)
{
if
(
*
hdr
==
0
)
printk
(
KERN_INFO
"CPU %d MCA banks"
,
smp_processor_id
());
*
hdr
=
1
;
printk
(
KERN_CONT
" %s:%d"
,
type
,
num
);
}
/*
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
* on this CPU. Use the algorithm recommended in the SDM to discover shared
* banks.
*/
static
void
cmci_discover
(
int
banks
,
int
boot
)
{
unsigned
long
*
owned
=
(
void
*
)
&
__get_cpu_var
(
mce_banks_owned
);
unsigned
long
flags
;
int
hdr
=
0
;
int
i
;
spin_lock_irqsave
(
&
cmci_discover_lock
,
flags
);
for
(
i
=
0
;
i
<
banks
;
i
++
)
{
u64
val
;
if
(
test_bit
(
i
,
owned
))
continue
;
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
/* Already owned by someone else? */
if
(
val
&
CMCI_EN
)
{
if
(
test_and_clear_bit
(
i
,
owned
)
||
boot
)
print_update
(
"SHD"
,
&
hdr
,
i
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
));
continue
;
}
val
|=
CMCI_EN
|
CMCI_THRESHOLD
;
wrmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
/* Did the enable bit stick? -- the bank supports CMCI */
if
(
val
&
CMCI_EN
)
{
if
(
!
test_and_set_bit
(
i
,
owned
)
||
boot
)
print_update
(
"CMCI"
,
&
hdr
,
i
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
));
}
else
{
WARN_ON
(
!
test_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
)));
}
}
spin_unlock_irqrestore
(
&
cmci_discover_lock
,
flags
);
if
(
hdr
)
printk
(
KERN_CONT
"
\n
"
);
}
/*
* Just in case we missed an event during initialization check
* all the CMCI owned banks.
*/
void
cmci_recheck
(
void
)
{
unsigned
long
flags
;
int
banks
;
if
(
!
mce_available
(
&
current_cpu_data
)
||
!
cmci_supported
(
&
banks
))
return
;
local_irq_save
(
flags
);
machine_check_poll
(
MCP_TIMESTAMP
,
&
__get_cpu_var
(
mce_banks_owned
));
local_irq_restore
(
flags
);
}
/*
* Disable CMCI on this CPU for all banks it owns when it goes down.
* This allows other CPUs to claim the banks on rediscovery.
*/
void
cmci_clear
(
void
)
{
unsigned
long
flags
;
int
i
;
int
banks
;
u64
val
;
if
(
!
cmci_supported
(
&
banks
))
return
;
spin_lock_irqsave
(
&
cmci_discover_lock
,
flags
);
for
(
i
=
0
;
i
<
banks
;
i
++
)
{
if
(
!
test_bit
(
i
,
__get_cpu_var
(
mce_banks_owned
)))
continue
;
/* Disable CMCI */
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
val
&=
~
(
CMCI_EN
|
CMCI_THRESHOLD_MASK
);
wrmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_banks_owned
));
}
spin_unlock_irqrestore
(
&
cmci_discover_lock
,
flags
);
}
/*
* After a CPU went down cycle through all the others and rediscover
* Must run in process context.
*/
void
cmci_rediscover
(
int
dying
)
{
int
banks
;
int
cpu
;
cpumask_var_t
old
;
if
(
!
cmci_supported
(
&
banks
))
return
;
if
(
!
alloc_cpumask_var
(
&
old
,
GFP_KERNEL
))
return
;
cpumask_copy
(
old
,
&
current
->
cpus_allowed
);
for_each_online_cpu
(
cpu
)
{
if
(
cpu
==
dying
)
continue
;
if
(
set_cpus_allowed_ptr
(
current
,
cpumask_of
(
cpu
)))
continue
;
/* Recheck banks in case CPUs don't all have the same */
if
(
cmci_supported
(
&
banks
))
cmci_discover
(
banks
,
0
);
}
set_cpus_allowed_ptr
(
current
,
old
);
free_cpumask_var
(
old
);
}
/*
* Reenable CMCI on this CPU in case a CPU down failed.
*/
void
cmci_reenable
(
void
)
{
int
banks
;
if
(
cmci_supported
(
&
banks
))
cmci_discover
(
banks
,
0
);
}
static
void
intel_init_cmci
(
void
)
{
int
banks
;
if
(
!
cmci_supported
(
&
banks
))
return
;
mce_threshold_vector
=
intel_threshold_interrupt
;
cmci_discover
(
banks
,
1
);
/*
* For CPU #0 this runs with still disabled APIC, but that's
* ok because only the vector is set up. We still do another
* check for the banks later for CPU #0 just to make sure
* to not miss any events.
*/
apic_write
(
APIC_LVTCMCI
,
THRESHOLD_APIC_VECTOR
|
APIC_DM_FIXED
);
cmci_recheck
();
}
void
mce_intel_feature_init
(
struct
cpuinfo_x86
*
c
)
{
intel_init_thermal
(
c
);
intel_init_cmci
();
}
arch/x86/kernel/cpu/mcheck/non-fatal.c
浏览文件 @
1d991001
...
@@ -17,10 +17,9 @@
...
@@ -17,10 +17,9 @@
#include <asm/processor.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/msr.h>
#include "mce.h"
static
int
firstbank
;
static
int
firstbank
;
#define MCE_RATE (15*HZ)
/* timer rate is 15s */
#define MCE_RATE (15*HZ)
/* timer rate is 15s */
...
...
arch/x86/kernel/cpu/mcheck/p4.c
浏览文件 @
1d991001
/*
/*
* P4 specific Machine Check Exception Reporting
* P4 specific Machine Check Exception Reporting
*/
*/
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/smp.h>
#include <asm/therm_throt.h>
#include <asm/processor.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/mce.h>
#include <asm/apic.h>
#include <asm/msr.h>
#include <asm/msr.h>
#include "mce.h"
/* as supported by the P4/Xeon family */
/* as supported by the P4/Xeon family */
struct
intel_mce_extended_msrs
{
struct
intel_mce_extended_msrs
{
u32
eax
;
u32
eax
;
...
@@ -33,46 +27,6 @@ struct intel_mce_extended_msrs {
...
@@ -33,46 +27,6 @@ struct intel_mce_extended_msrs {
static
int
mce_num_extended_msrs
;
static
int
mce_num_extended_msrs
;
#ifdef CONFIG_X86_MCE_P4THERMAL
static
void
unexpected_thermal_interrupt
(
struct
pt_regs
*
regs
)
{
printk
(
KERN_ERR
"CPU%d: Unexpected LVT TMR interrupt!
\n
"
,
smp_processor_id
());
add_taint
(
TAINT_MACHINE_CHECK
);
}
/* P4/Xeon Thermal transition interrupt handler: */
static
void
intel_thermal_interrupt
(
struct
pt_regs
*
regs
)
{
__u64
msr_val
;
ack_APIC_irq
();
rdmsrl
(
MSR_IA32_THERM_STATUS
,
msr_val
);
therm_throt_process
(
msr_val
&
THERM_STATUS_PROCHOT
);
}
/* Thermal interrupt handler for this CPU setup: */
static
void
(
*
vendor_thermal_interrupt
)(
struct
pt_regs
*
regs
)
=
unexpected_thermal_interrupt
;
void
smp_thermal_interrupt
(
struct
pt_regs
*
regs
)
{
irq_enter
();
vendor_thermal_interrupt
(
regs
);
__get_cpu_var
(
irq_stat
).
irq_thermal_count
++
;
irq_exit
();
}
void
intel_set_thermal_handler
(
void
)
{
vendor_thermal_interrupt
=
intel_thermal_interrupt
;
}
#endif
/* CONFIG_X86_MCE_P4THERMAL */
/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
static
void
intel_get_extended_msrs
(
struct
intel_mce_extended_msrs
*
r
)
static
void
intel_get_extended_msrs
(
struct
intel_mce_extended_msrs
*
r
)
{
{
...
...
arch/x86/kernel/cpu/mcheck/p5.c
浏览文件 @
1d991001
...
@@ -10,12 +10,11 @@
...
@@ -10,12 +10,11 @@
#include <asm/processor.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/msr.h>
#include "mce.h"
/* By default disabled */
/* By default disabled */
int
mce_p5_enable
;
int
mce_p5_enabled
__read_mostly
;
/* Machine check handler for Pentium class Intel CPUs: */
/* Machine check handler for Pentium class Intel CPUs: */
static
void
pentium_machine_check
(
struct
pt_regs
*
regs
,
long
error_code
)
static
void
pentium_machine_check
(
struct
pt_regs
*
regs
,
long
error_code
)
...
@@ -43,15 +42,13 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
...
@@ -43,15 +42,13 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
{
{
u32
l
,
h
;
u32
l
,
h
;
/*
Check for MCE support
: */
/*
Default P5 to off as its often misconnected
: */
if
(
!
cpu_has
(
c
,
X86_FEATURE_MCE
)
)
if
(
!
mce_p5_enabled
)
return
;
return
;
#ifdef CONFIG_X86_OLD_MCE
/* Check for MCE support: */
/* Default P5 to off as its often misconnected: */
if
(
!
cpu_has
(
c
,
X86_FEATURE_MCE
))
if
(
mce_disabled
!=
-
1
)
return
;
return
;
#endif
machine_check_vector
=
pentium_machine_check
;
machine_check_vector
=
pentium_machine_check
;
/* Make sure the vector pointer is visible before we enable MCEs: */
/* Make sure the vector pointer is visible before we enable MCEs: */
...
...
arch/x86/kernel/cpu/mcheck/p6.c
浏览文件 @
1d991001
...
@@ -10,10 +10,9 @@
...
@@ -10,10 +10,9 @@
#include <asm/processor.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/msr.h>
#include "mce.h"
/* Machine Check Handler For PII/PIII */
/* Machine Check Handler For PII/PIII */
static
void
intel_machine_check
(
struct
pt_regs
*
regs
,
long
error_code
)
static
void
intel_machine_check
(
struct
pt_regs
*
regs
,
long
error_code
)
{
{
...
...
arch/x86/kernel/cpu/mcheck/therm_throt.c
浏览文件 @
1d991001
...
@@ -13,13 +13,23 @@
...
@@ -13,13 +13,23 @@
* Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
* Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
* Inspired by Ross Biro's and Al Borchers' counter code.
* Inspired by Ross Biro's and Al Borchers' counter code.
*/
*/
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/notifier.h>
#include <linux/jiffies.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/percpu.h>
#include <linux/percpu.h>
#include <linux/sysdev.h>
#include <linux/sysdev.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/cpu.h>
#include <asm/therm_throt.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/idle.h>
#include <asm/mce.h>
#include <asm/msr.h>
/* How long to wait between reporting thermal events */
/* How long to wait between reporting thermal events */
#define CHECK_INTERVAL (300 * HZ)
#define CHECK_INTERVAL (300 * HZ)
...
@@ -27,7 +37,7 @@
...
@@ -27,7 +37,7 @@
static
DEFINE_PER_CPU
(
__u64
,
next_check
)
=
INITIAL_JIFFIES
;
static
DEFINE_PER_CPU
(
__u64
,
next_check
)
=
INITIAL_JIFFIES
;
static
DEFINE_PER_CPU
(
unsigned
long
,
thermal_throttle_count
);
static
DEFINE_PER_CPU
(
unsigned
long
,
thermal_throttle_count
);
atomic_t
therm_throt_en
=
ATOMIC_INIT
(
0
);
static
atomic_t
therm_throt_en
=
ATOMIC_INIT
(
0
);
#ifdef CONFIG_SYSFS
#ifdef CONFIG_SYSFS
#define define_therm_throt_sysdev_one_ro(_name) \
#define define_therm_throt_sysdev_one_ro(_name) \
...
@@ -82,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = {
...
@@ -82,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = {
* 1 : Event should be logged further, and a message has been
* 1 : Event should be logged further, and a message has been
* printed to the syslog.
* printed to the syslog.
*/
*/
int
therm_throt_process
(
int
curr
)
static
int
therm_throt_process
(
int
curr
)
{
{
unsigned
int
cpu
=
smp_processor_id
();
unsigned
int
cpu
=
smp_processor_id
();
__u64
tmp_jiffs
=
get_jiffies_64
();
__u64
tmp_jiffs
=
get_jiffies_64
();
...
@@ -186,6 +196,94 @@ static __init int thermal_throttle_init_device(void)
...
@@ -186,6 +196,94 @@ static __init int thermal_throttle_init_device(void)
return
0
;
return
0
;
}
}
device_initcall
(
thermal_throttle_init_device
);
device_initcall
(
thermal_throttle_init_device
);
#endif
/* CONFIG_SYSFS */
#endif
/* CONFIG_SYSFS */
/* Thermal transition interrupt handler */
static
void
intel_thermal_interrupt
(
void
)
{
__u64
msr_val
;
rdmsrl
(
MSR_IA32_THERM_STATUS
,
msr_val
);
if
(
therm_throt_process
(
msr_val
&
THERM_STATUS_PROCHOT
))
mce_log_therm_throt_event
(
msr_val
);
}
static
void
unexpected_thermal_interrupt
(
void
)
{
printk
(
KERN_ERR
"CPU%d: Unexpected LVT TMR interrupt!
\n
"
,
smp_processor_id
());
add_taint
(
TAINT_MACHINE_CHECK
);
}
static
void
(
*
smp_thermal_vector
)(
void
)
=
unexpected_thermal_interrupt
;
asmlinkage
void
smp_thermal_interrupt
(
struct
pt_regs
*
regs
)
{
exit_idle
();
irq_enter
();
inc_irq_stat
(
irq_thermal_count
);
smp_thermal_vector
();
irq_exit
();
/* Ack only at the end to avoid potential reentry */
ack_APIC_irq
();
}
void
intel_init_thermal
(
struct
cpuinfo_x86
*
c
)
{
unsigned
int
cpu
=
smp_processor_id
();
int
tm2
=
0
;
u32
l
,
h
;
/* Thermal monitoring depends on ACPI and clock modulation*/
if
(
!
cpu_has
(
c
,
X86_FEATURE_ACPI
)
||
!
cpu_has
(
c
,
X86_FEATURE_ACC
))
return
;
/*
* First check if its enabled already, in which case there might
* be some SMM goo which handles it, so we can't even put a handler
* since it might be delivered via SMI already:
*/
rdmsr
(
MSR_IA32_MISC_ENABLE
,
l
,
h
);
h
=
apic_read
(
APIC_LVTTHMR
);
if
((
l
&
MSR_IA32_MISC_ENABLE_TM1
)
&&
(
h
&
APIC_DM_SMI
))
{
printk
(
KERN_DEBUG
"CPU%d: Thermal monitoring handled by SMI
\n
"
,
cpu
);
return
;
}
if
(
cpu_has
(
c
,
X86_FEATURE_TM2
)
&&
(
l
&
MSR_IA32_MISC_ENABLE_TM2
))
tm2
=
1
;
/* Check whether a vector already exists */
if
(
h
&
APIC_VECTOR_MASK
)
{
printk
(
KERN_DEBUG
"CPU%d: Thermal LVT vector (%#x) already installed
\n
"
,
cpu
,
(
h
&
APIC_VECTOR_MASK
));
return
;
}
/* We'll mask the thermal vector in the lapic till we're ready: */
h
=
THERMAL_APIC_VECTOR
|
APIC_DM_FIXED
|
APIC_LVT_MASKED
;
apic_write
(
APIC_LVTTHMR
,
h
);
rdmsr
(
MSR_IA32_THERM_INTERRUPT
,
l
,
h
);
wrmsr
(
MSR_IA32_THERM_INTERRUPT
,
l
|
(
THERM_INT_LOW_ENABLE
|
THERM_INT_HIGH_ENABLE
),
h
);
smp_thermal_vector
=
intel_thermal_interrupt
;
rdmsr
(
MSR_IA32_MISC_ENABLE
,
l
,
h
);
wrmsr
(
MSR_IA32_MISC_ENABLE
,
l
|
MSR_IA32_MISC_ENABLE_TM1
,
h
);
/* Unmask the thermal vector: */
l
=
apic_read
(
APIC_LVTTHMR
);
apic_write
(
APIC_LVTTHMR
,
l
&
~
APIC_LVT_MASKED
);
printk
(
KERN_INFO
"CPU%d: Thermal monitoring enabled (%s)
\n
"
,
cpu
,
tm2
?
"TM2"
:
"TM1"
);
/* enable thermal throttle processing */
atomic_set
(
&
therm_throt_en
,
1
);
}
arch/x86/kernel/cpu/mcheck/winchip.c
浏览文件 @
1d991001
...
@@ -9,10 +9,9 @@
...
@@ -9,10 +9,9 @@
#include <asm/processor.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/msr.h>
#include "mce.h"
/* Machine check handler for WinChip C6: */
/* Machine check handler for WinChip C6: */
static
void
winchip_machine_check
(
struct
pt_regs
*
regs
,
long
error_code
)
static
void
winchip_machine_check
(
struct
pt_regs
*
regs
,
long
error_code
)
{
{
...
...
arch/x86/kernel/traps.c
浏览文件 @
1d991001
...
@@ -53,6 +53,7 @@
...
@@ -53,6 +53,7 @@
#include <asm/traps.h>
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/desc.h>
#include <asm/i387.h>
#include <asm/i387.h>
#include <asm/mce.h>
#include <asm/mach_traps.h>
#include <asm/mach_traps.h>
...
@@ -64,8 +65,6 @@
...
@@ -64,8 +65,6 @@
#include <asm/setup.h>
#include <asm/setup.h>
#include <asm/traps.h>
#include <asm/traps.h>
#include "cpu/mcheck/mce.h"
asmlinkage
int
system_call
(
void
);
asmlinkage
int
system_call
(
void
);
/* Do we ignore FPU interrupts ? */
/* Do we ignore FPU interrupts ? */
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录