Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
openeuler
raspberrypi-kernel
提交
f1690d17
R
raspberrypi-kernel
项目概览
openeuler
/
raspberrypi-kernel
通知
13
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
R
raspberrypi-kernel
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f1690d17
编写于
11月 26, 2010
作者:
R
Russell King
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'perf-split' of
git://linux-arm.org/linux-2.6-wd
into devel-stable
上级
612275ad
43eab878
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
2419 addition
and
2414 deletion
+2419
-2414
arch/arm/kernel/perf_event.c
arch/arm/kernel/perf_event.c
+34
-2414
arch/arm/kernel/perf_event_v6.c
arch/arm/kernel/perf_event_v6.c
+672
-0
arch/arm/kernel/perf_event_v7.c
arch/arm/kernel/perf_event_v7.c
+906
-0
arch/arm/kernel/perf_event_xscale.c
arch/arm/kernel/perf_event_xscale.c
+807
-0
未找到文件。
arch/arm/kernel/perf_event.c
浏览文件 @
f1690d17
...
@@ -4,9 +4,7 @@
...
@@ -4,9 +4,7 @@
* ARM performance counter support.
* ARM performance counter support.
*
*
* Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
* Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
*
* Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
* ARMv7 support: Jean Pihet <jpihet@mvista.com>
* 2010 (c) MontaVista Software, LLC.
*
*
* This code is based on the sparc64 perf event code, which is in turn based
* This code is based on the sparc64 perf event code, which is in turn based
* on the x86 code. Callchain code is based on the ARM OProfile backtrace
* on the x86 code. Callchain code is based on the ARM OProfile backtrace
...
@@ -69,29 +67,23 @@ struct cpu_hw_events {
...
@@ -69,29 +67,23 @@ struct cpu_hw_events {
};
};
DEFINE_PER_CPU
(
struct
cpu_hw_events
,
cpu_hw_events
);
DEFINE_PER_CPU
(
struct
cpu_hw_events
,
cpu_hw_events
);
/* PMU names. */
static
const
char
*
arm_pmu_names
[]
=
{
[
ARM_PERF_PMU_ID_XSCALE1
]
=
"xscale1"
,
[
ARM_PERF_PMU_ID_XSCALE2
]
=
"xscale2"
,
[
ARM_PERF_PMU_ID_V6
]
=
"v6"
,
[
ARM_PERF_PMU_ID_V6MP
]
=
"v6mpcore"
,
[
ARM_PERF_PMU_ID_CA8
]
=
"ARMv7 Cortex-A8"
,
[
ARM_PERF_PMU_ID_CA9
]
=
"ARMv7 Cortex-A9"
,
};
struct
arm_pmu
{
struct
arm_pmu
{
enum
arm_perf_pmu_ids
id
;
enum
arm_perf_pmu_ids
id
;
const
char
*
name
;
irqreturn_t
(
*
handle_irq
)(
int
irq_num
,
void
*
dev
);
irqreturn_t
(
*
handle_irq
)(
int
irq_num
,
void
*
dev
);
void
(
*
enable
)(
struct
hw_perf_event
*
evt
,
int
idx
);
void
(
*
enable
)(
struct
hw_perf_event
*
evt
,
int
idx
);
void
(
*
disable
)(
struct
hw_perf_event
*
evt
,
int
idx
);
void
(
*
disable
)(
struct
hw_perf_event
*
evt
,
int
idx
);
int
(
*
event_map
)(
int
evt
);
u64
(
*
raw_event
)(
u64
);
int
(
*
get_event_idx
)(
struct
cpu_hw_events
*
cpuc
,
int
(
*
get_event_idx
)(
struct
cpu_hw_events
*
cpuc
,
struct
hw_perf_event
*
hwc
);
struct
hw_perf_event
*
hwc
);
u32
(
*
read_counter
)(
int
idx
);
u32
(
*
read_counter
)(
int
idx
);
void
(
*
write_counter
)(
int
idx
,
u32
val
);
void
(
*
write_counter
)(
int
idx
,
u32
val
);
void
(
*
start
)(
void
);
void
(
*
start
)(
void
);
void
(
*
stop
)(
void
);
void
(
*
stop
)(
void
);
const
unsigned
(
*
cache_map
)[
PERF_COUNT_HW_CACHE_MAX
]
[
PERF_COUNT_HW_CACHE_OP_MAX
]
[
PERF_COUNT_HW_CACHE_RESULT_MAX
];
const
unsigned
(
*
event_map
)[
PERF_COUNT_HW_MAX
];
u32
raw_event_mask
;
int
num_events
;
int
num_events
;
u64
max_period
;
u64
max_period
;
};
};
...
@@ -136,10 +128,6 @@ EXPORT_SYMBOL_GPL(perf_num_counters);
...
@@ -136,10 +128,6 @@ EXPORT_SYMBOL_GPL(perf_num_counters);
#define CACHE_OP_UNSUPPORTED 0xFFFF
#define CACHE_OP_UNSUPPORTED 0xFFFF
static
unsigned
armpmu_perf_cache_map
[
PERF_COUNT_HW_CACHE_MAX
]
[
PERF_COUNT_HW_CACHE_OP_MAX
]
[
PERF_COUNT_HW_CACHE_RESULT_MAX
];
static
int
static
int
armpmu_map_cache_event
(
u64
config
)
armpmu_map_cache_event
(
u64
config
)
{
{
...
@@ -157,7 +145,7 @@ armpmu_map_cache_event(u64 config)
...
@@ -157,7 +145,7 @@ armpmu_map_cache_event(u64 config)
if
(
cache_result
>=
PERF_COUNT_HW_CACHE_RESULT_MAX
)
if
(
cache_result
>=
PERF_COUNT_HW_CACHE_RESULT_MAX
)
return
-
EINVAL
;
return
-
EINVAL
;
ret
=
(
int
)
armpmu_perf_cache_map
[
cache_type
][
cache_op
][
cache_result
];
ret
=
(
int
)
(
*
armpmu
->
cache_map
)
[
cache_type
][
cache_op
][
cache_result
];
if
(
ret
==
CACHE_OP_UNSUPPORTED
)
if
(
ret
==
CACHE_OP_UNSUPPORTED
)
return
-
ENOENT
;
return
-
ENOENT
;
...
@@ -165,6 +153,19 @@ armpmu_map_cache_event(u64 config)
...
@@ -165,6 +153,19 @@ armpmu_map_cache_event(u64 config)
return
ret
;
return
ret
;
}
}
static
int
armpmu_map_event
(
u64
config
)
{
int
mapping
=
(
*
armpmu
->
event_map
)[
config
];
return
mapping
==
HW_OP_UNSUPPORTED
?
-
EOPNOTSUPP
:
mapping
;
}
static
int
armpmu_map_raw_event
(
u64
config
)
{
return
(
int
)(
config
&
armpmu
->
raw_event_mask
);
}
static
int
static
int
armpmu_event_set_period
(
struct
perf_event
*
event
,
armpmu_event_set_period
(
struct
perf_event
*
event
,
struct
hw_perf_event
*
hwc
,
struct
hw_perf_event
*
hwc
,
...
@@ -458,11 +459,11 @@ __hw_perf_event_init(struct perf_event *event)
...
@@ -458,11 +459,11 @@ __hw_perf_event_init(struct perf_event *event)
/* Decode the generic type into an ARM event identifier. */
/* Decode the generic type into an ARM event identifier. */
if
(
PERF_TYPE_HARDWARE
==
event
->
attr
.
type
)
{
if
(
PERF_TYPE_HARDWARE
==
event
->
attr
.
type
)
{
mapping
=
armpmu
->
event_map
(
event
->
attr
.
config
);
mapping
=
armpmu
_map_event
(
event
->
attr
.
config
);
}
else
if
(
PERF_TYPE_HW_CACHE
==
event
->
attr
.
type
)
{
}
else
if
(
PERF_TYPE_HW_CACHE
==
event
->
attr
.
type
)
{
mapping
=
armpmu_map_cache_event
(
event
->
attr
.
config
);
mapping
=
armpmu_map_cache_event
(
event
->
attr
.
config
);
}
else
if
(
PERF_TYPE_RAW
==
event
->
attr
.
type
)
{
}
else
if
(
PERF_TYPE_RAW
==
event
->
attr
.
type
)
{
mapping
=
armpmu
->
raw_event
(
event
->
attr
.
config
);
mapping
=
armpmu
_map_
raw_event
(
event
->
attr
.
config
);
}
else
{
}
else
{
pr_debug
(
"event type %x not supported
\n
"
,
event
->
attr
.
type
);
pr_debug
(
"event type %x not supported
\n
"
,
event
->
attr
.
type
);
return
-
EOPNOTSUPP
;
return
-
EOPNOTSUPP
;
...
@@ -603,2366 +604,10 @@ static struct pmu pmu = {
...
@@ -603,2366 +604,10 @@ static struct pmu pmu = {
.
read
=
armpmu_read
,
.
read
=
armpmu_read
,
};
};
/*
/* Include the PMU-specific implementations. */
* ARMv6 Performance counter handling code.
#include "perf_event_xscale.c"
*
#include "perf_event_v6.c"
* ARMv6 has 2 configurable performance counters and a single cycle counter.
#include "perf_event_v7.c"
* They all share a single reset bit but can be written to zero so we can use
* that for a reset.
*
* The counters can't be individually enabled or disabled so when we remove
* one event and replace it with another we could get spurious counts from the
* wrong event. However, we can take advantage of the fact that the
* performance counters can export events to the event bus, and the event bus
* itself can be monitored. This requires that we *don't* export the events to
* the event bus. The procedure for disabling a configurable counter is:
* - change the counter to count the ETMEXTOUT[0] signal (0x20). This
* effectively stops the counter from counting.
* - disable the counter's interrupt generation (each counter has it's
* own interrupt enable bit).
* Once stopped, the counter value can be written as 0 to reset.
*
* To enable a counter:
* - enable the counter's interrupt generation.
* - set the new event type.
*
* Note: the dedicated cycle counter only counts cycles and can't be
* enabled/disabled independently of the others. When we want to disable the
* cycle counter, we have to just disable the interrupt reporting and start
* ignoring that counter. When re-enabling, we have to reset the value and
* enable the interrupt.
*/
enum
armv6_perf_types
{
ARMV6_PERFCTR_ICACHE_MISS
=
0x0
,
ARMV6_PERFCTR_IBUF_STALL
=
0x1
,
ARMV6_PERFCTR_DDEP_STALL
=
0x2
,
ARMV6_PERFCTR_ITLB_MISS
=
0x3
,
ARMV6_PERFCTR_DTLB_MISS
=
0x4
,
ARMV6_PERFCTR_BR_EXEC
=
0x5
,
ARMV6_PERFCTR_BR_MISPREDICT
=
0x6
,
ARMV6_PERFCTR_INSTR_EXEC
=
0x7
,
ARMV6_PERFCTR_DCACHE_HIT
=
0x9
,
ARMV6_PERFCTR_DCACHE_ACCESS
=
0xA
,
ARMV6_PERFCTR_DCACHE_MISS
=
0xB
,
ARMV6_PERFCTR_DCACHE_WBACK
=
0xC
,
ARMV6_PERFCTR_SW_PC_CHANGE
=
0xD
,
ARMV6_PERFCTR_MAIN_TLB_MISS
=
0xF
,
ARMV6_PERFCTR_EXPL_D_ACCESS
=
0x10
,
ARMV6_PERFCTR_LSU_FULL_STALL
=
0x11
,
ARMV6_PERFCTR_WBUF_DRAINED
=
0x12
,
ARMV6_PERFCTR_CPU_CYCLES
=
0xFF
,
ARMV6_PERFCTR_NOP
=
0x20
,
};
enum
armv6_counters
{
ARMV6_CYCLE_COUNTER
=
1
,
ARMV6_COUNTER0
,
ARMV6_COUNTER1
,
};
/*
* The hardware events that we support. We do support cache operations but
* we have harvard caches and no way to combine instruction and data
* accesses/misses in hardware.
*/
static
const
unsigned
armv6_perf_map
[
PERF_COUNT_HW_MAX
]
=
{
[
PERF_COUNT_HW_CPU_CYCLES
]
=
ARMV6_PERFCTR_CPU_CYCLES
,
[
PERF_COUNT_HW_INSTRUCTIONS
]
=
ARMV6_PERFCTR_INSTR_EXEC
,
[
PERF_COUNT_HW_CACHE_REFERENCES
]
=
HW_OP_UNSUPPORTED
,
[
PERF_COUNT_HW_CACHE_MISSES
]
=
HW_OP_UNSUPPORTED
,
[
PERF_COUNT_HW_BRANCH_INSTRUCTIONS
]
=
ARMV6_PERFCTR_BR_EXEC
,
[
PERF_COUNT_HW_BRANCH_MISSES
]
=
ARMV6_PERFCTR_BR_MISPREDICT
,
[
PERF_COUNT_HW_BUS_CYCLES
]
=
HW_OP_UNSUPPORTED
,
};
static
const
unsigned
armv6_perf_cache_map
[
PERF_COUNT_HW_CACHE_MAX
]
[
PERF_COUNT_HW_CACHE_OP_MAX
]
[
PERF_COUNT_HW_CACHE_RESULT_MAX
]
=
{
[
C
(
L1D
)]
=
{
/*
* The performance counters don't differentiate between read
* and write accesses/misses so this isn't strictly correct,
* but it's the best we can do. Writes and reads get
* combined.
*/
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV6_PERFCTR_DCACHE_ACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV6_PERFCTR_DCACHE_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV6_PERFCTR_DCACHE_ACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV6_PERFCTR_DCACHE_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
L1I
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6_PERFCTR_ICACHE_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6_PERFCTR_ICACHE_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
LL
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
DTLB
)]
=
{
/*
* The ARM performance counters can count micro DTLB misses,
* micro ITLB misses and main TLB misses. There isn't an event
* for TLB misses, so use the micro misses here and if users
* want the main TLB misses they can use a raw counter.
*/
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6_PERFCTR_DTLB_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6_PERFCTR_DTLB_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
ITLB
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
BPU
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
};
enum
armv6mpcore_perf_types
{
ARMV6MPCORE_PERFCTR_ICACHE_MISS
=
0x0
,
ARMV6MPCORE_PERFCTR_IBUF_STALL
=
0x1
,
ARMV6MPCORE_PERFCTR_DDEP_STALL
=
0x2
,
ARMV6MPCORE_PERFCTR_ITLB_MISS
=
0x3
,
ARMV6MPCORE_PERFCTR_DTLB_MISS
=
0x4
,
ARMV6MPCORE_PERFCTR_BR_EXEC
=
0x5
,
ARMV6MPCORE_PERFCTR_BR_NOTPREDICT
=
0x6
,
ARMV6MPCORE_PERFCTR_BR_MISPREDICT
=
0x7
,
ARMV6MPCORE_PERFCTR_INSTR_EXEC
=
0x8
,
ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS
=
0xA
,
ARMV6MPCORE_PERFCTR_DCACHE_RDMISS
=
0xB
,
ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS
=
0xC
,
ARMV6MPCORE_PERFCTR_DCACHE_WRMISS
=
0xD
,
ARMV6MPCORE_PERFCTR_DCACHE_EVICTION
=
0xE
,
ARMV6MPCORE_PERFCTR_SW_PC_CHANGE
=
0xF
,
ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS
=
0x10
,
ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS
=
0x11
,
ARMV6MPCORE_PERFCTR_LSU_FULL_STALL
=
0x12
,
ARMV6MPCORE_PERFCTR_WBUF_DRAINED
=
0x13
,
ARMV6MPCORE_PERFCTR_CPU_CYCLES
=
0xFF
,
};
/*
* The hardware events that we support. We do support cache operations but
* we have harvard caches and no way to combine instruction and data
* accesses/misses in hardware.
*/
static
const
unsigned
armv6mpcore_perf_map
[
PERF_COUNT_HW_MAX
]
=
{
[
PERF_COUNT_HW_CPU_CYCLES
]
=
ARMV6MPCORE_PERFCTR_CPU_CYCLES
,
[
PERF_COUNT_HW_INSTRUCTIONS
]
=
ARMV6MPCORE_PERFCTR_INSTR_EXEC
,
[
PERF_COUNT_HW_CACHE_REFERENCES
]
=
HW_OP_UNSUPPORTED
,
[
PERF_COUNT_HW_CACHE_MISSES
]
=
HW_OP_UNSUPPORTED
,
[
PERF_COUNT_HW_BRANCH_INSTRUCTIONS
]
=
ARMV6MPCORE_PERFCTR_BR_EXEC
,
[
PERF_COUNT_HW_BRANCH_MISSES
]
=
ARMV6MPCORE_PERFCTR_BR_MISPREDICT
,
[
PERF_COUNT_HW_BUS_CYCLES
]
=
HW_OP_UNSUPPORTED
,
};
static
const
unsigned
armv6mpcore_perf_cache_map
[
PERF_COUNT_HW_CACHE_MAX
]
[
PERF_COUNT_HW_CACHE_OP_MAX
]
[
PERF_COUNT_HW_CACHE_RESULT_MAX
]
=
{
[
C
(
L1D
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV6MPCORE_PERFCTR_DCACHE_RDMISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV6MPCORE_PERFCTR_DCACHE_WRMISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
L1I
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6MPCORE_PERFCTR_ICACHE_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6MPCORE_PERFCTR_ICACHE_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
LL
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
DTLB
)]
=
{
/*
* The ARM performance counters can count micro DTLB misses,
* micro ITLB misses and main TLB misses. There isn't an event
* for TLB misses, so use the micro misses here and if users
* want the main TLB misses they can use a raw counter.
*/
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6MPCORE_PERFCTR_DTLB_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6MPCORE_PERFCTR_DTLB_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
ITLB
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6MPCORE_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6MPCORE_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
BPU
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
};
static
inline
unsigned
long
armv6_pmcr_read
(
void
)
{
u32
val
;
asm
volatile
(
"mrc p15, 0, %0, c15, c12, 0"
:
"=r"
(
val
));
return
val
;
}
static
inline
void
armv6_pmcr_write
(
unsigned
long
val
)
{
asm
volatile
(
"mcr p15, 0, %0, c15, c12, 0"
:
:
"r"
(
val
));
}
#define ARMV6_PMCR_ENABLE (1 << 0)
#define ARMV6_PMCR_CTR01_RESET (1 << 1)
#define ARMV6_PMCR_CCOUNT_RESET (1 << 2)
#define ARMV6_PMCR_CCOUNT_DIV (1 << 3)
#define ARMV6_PMCR_COUNT0_IEN (1 << 4)
#define ARMV6_PMCR_COUNT1_IEN (1 << 5)
#define ARMV6_PMCR_CCOUNT_IEN (1 << 6)
#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8)
#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9)
#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10)
#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20
#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12
#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
#define ARMV6_PMCR_OVERFLOWED_MASK \
(ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
ARMV6_PMCR_CCOUNT_OVERFLOW)
static
inline
int
armv6_pmcr_has_overflowed
(
unsigned
long
pmcr
)
{
return
(
pmcr
&
ARMV6_PMCR_OVERFLOWED_MASK
);
}
static
inline
int
armv6_pmcr_counter_has_overflowed
(
unsigned
long
pmcr
,
enum
armv6_counters
counter
)
{
int
ret
=
0
;
if
(
ARMV6_CYCLE_COUNTER
==
counter
)
ret
=
pmcr
&
ARMV6_PMCR_CCOUNT_OVERFLOW
;
else
if
(
ARMV6_COUNTER0
==
counter
)
ret
=
pmcr
&
ARMV6_PMCR_COUNT0_OVERFLOW
;
else
if
(
ARMV6_COUNTER1
==
counter
)
ret
=
pmcr
&
ARMV6_PMCR_COUNT1_OVERFLOW
;
else
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
counter
);
return
ret
;
}
static
inline
u32
armv6pmu_read_counter
(
int
counter
)
{
unsigned
long
value
=
0
;
if
(
ARMV6_CYCLE_COUNTER
==
counter
)
asm
volatile
(
"mrc p15, 0, %0, c15, c12, 1"
:
"=r"
(
value
));
else
if
(
ARMV6_COUNTER0
==
counter
)
asm
volatile
(
"mrc p15, 0, %0, c15, c12, 2"
:
"=r"
(
value
));
else
if
(
ARMV6_COUNTER1
==
counter
)
asm
volatile
(
"mrc p15, 0, %0, c15, c12, 3"
:
"=r"
(
value
));
else
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
counter
);
return
value
;
}
static
inline
void
armv6pmu_write_counter
(
int
counter
,
u32
value
)
{
if
(
ARMV6_CYCLE_COUNTER
==
counter
)
asm
volatile
(
"mcr p15, 0, %0, c15, c12, 1"
:
:
"r"
(
value
));
else
if
(
ARMV6_COUNTER0
==
counter
)
asm
volatile
(
"mcr p15, 0, %0, c15, c12, 2"
:
:
"r"
(
value
));
else
if
(
ARMV6_COUNTER1
==
counter
)
asm
volatile
(
"mcr p15, 0, %0, c15, c12, 3"
:
:
"r"
(
value
));
else
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
counter
);
}
void
armv6pmu_enable_event
(
struct
hw_perf_event
*
hwc
,
int
idx
)
{
unsigned
long
val
,
mask
,
evt
,
flags
;
if
(
ARMV6_CYCLE_COUNTER
==
idx
)
{
mask
=
0
;
evt
=
ARMV6_PMCR_CCOUNT_IEN
;
}
else
if
(
ARMV6_COUNTER0
==
idx
)
{
mask
=
ARMV6_PMCR_EVT_COUNT0_MASK
;
evt
=
(
hwc
->
config_base
<<
ARMV6_PMCR_EVT_COUNT0_SHIFT
)
|
ARMV6_PMCR_COUNT0_IEN
;
}
else
if
(
ARMV6_COUNTER1
==
idx
)
{
mask
=
ARMV6_PMCR_EVT_COUNT1_MASK
;
evt
=
(
hwc
->
config_base
<<
ARMV6_PMCR_EVT_COUNT1_SHIFT
)
|
ARMV6_PMCR_COUNT1_IEN
;
}
else
{
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
idx
);
return
;
}
/*
* Mask out the current event and set the counter to count the event
* that we're interested in.
*/
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
armv6_pmcr_read
();
val
&=
~
mask
;
val
|=
evt
;
armv6_pmcr_write
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
irqreturn_t
armv6pmu_handle_irq
(
int
irq_num
,
void
*
dev
)
{
unsigned
long
pmcr
=
armv6_pmcr_read
();
struct
perf_sample_data
data
;
struct
cpu_hw_events
*
cpuc
;
struct
pt_regs
*
regs
;
int
idx
;
if
(
!
armv6_pmcr_has_overflowed
(
pmcr
))
return
IRQ_NONE
;
regs
=
get_irq_regs
();
/*
* The interrupts are cleared by writing the overflow flags back to
* the control register. All of the other bits don't have any effect
* if they are rewritten, so write the whole value back.
*/
armv6_pmcr_write
(
pmcr
);
perf_sample_data_init
(
&
data
,
0
);
cpuc
=
&
__get_cpu_var
(
cpu_hw_events
);
for
(
idx
=
0
;
idx
<=
armpmu
->
num_events
;
++
idx
)
{
struct
perf_event
*
event
=
cpuc
->
events
[
idx
];
struct
hw_perf_event
*
hwc
;
if
(
!
test_bit
(
idx
,
cpuc
->
active_mask
))
continue
;
/*
* We have a single interrupt for all counters. Check that
* each counter has overflowed before we process it.
*/
if
(
!
armv6_pmcr_counter_has_overflowed
(
pmcr
,
idx
))
continue
;
hwc
=
&
event
->
hw
;
armpmu_event_update
(
event
,
hwc
,
idx
);
data
.
period
=
event
->
hw
.
last_period
;
if
(
!
armpmu_event_set_period
(
event
,
hwc
,
idx
))
continue
;
if
(
perf_event_overflow
(
event
,
0
,
&
data
,
regs
))
armpmu
->
disable
(
hwc
,
idx
);
}
/*
* Handle the pending perf events.
*
* Note: this call *must* be run with interrupts disabled. For
* platforms that can have the PMU interrupts raised as an NMI, this
* will not work.
*/
irq_work_run
();
return
IRQ_HANDLED
;
}
static
void
armv6pmu_start
(
void
)
{
unsigned
long
flags
,
val
;
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
armv6_pmcr_read
();
val
|=
ARMV6_PMCR_ENABLE
;
armv6_pmcr_write
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
void
armv6pmu_stop
(
void
)
{
unsigned
long
flags
,
val
;
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
armv6_pmcr_read
();
val
&=
~
ARMV6_PMCR_ENABLE
;
armv6_pmcr_write
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
inline
int
armv6pmu_event_map
(
int
config
)
{
int
mapping
=
armv6_perf_map
[
config
];
if
(
HW_OP_UNSUPPORTED
==
mapping
)
mapping
=
-
EOPNOTSUPP
;
return
mapping
;
}
static
inline
int
armv6mpcore_pmu_event_map
(
int
config
)
{
int
mapping
=
armv6mpcore_perf_map
[
config
];
if
(
HW_OP_UNSUPPORTED
==
mapping
)
mapping
=
-
EOPNOTSUPP
;
return
mapping
;
}
static
u64
armv6pmu_raw_event
(
u64
config
)
{
return
config
&
0xff
;
}
static
int
armv6pmu_get_event_idx
(
struct
cpu_hw_events
*
cpuc
,
struct
hw_perf_event
*
event
)
{
/* Always place a cycle counter into the cycle counter. */
if
(
ARMV6_PERFCTR_CPU_CYCLES
==
event
->
config_base
)
{
if
(
test_and_set_bit
(
ARMV6_CYCLE_COUNTER
,
cpuc
->
used_mask
))
return
-
EAGAIN
;
return
ARMV6_CYCLE_COUNTER
;
}
else
{
/*
* For anything other than a cycle counter, try and use
* counter0 and counter1.
*/
if
(
!
test_and_set_bit
(
ARMV6_COUNTER1
,
cpuc
->
used_mask
))
{
return
ARMV6_COUNTER1
;
}
if
(
!
test_and_set_bit
(
ARMV6_COUNTER0
,
cpuc
->
used_mask
))
{
return
ARMV6_COUNTER0
;
}
/* The counters are all in use. */
return
-
EAGAIN
;
}
}
static
void
armv6pmu_disable_event
(
struct
hw_perf_event
*
hwc
,
int
idx
)
{
unsigned
long
val
,
mask
,
evt
,
flags
;
if
(
ARMV6_CYCLE_COUNTER
==
idx
)
{
mask
=
ARMV6_PMCR_CCOUNT_IEN
;
evt
=
0
;
}
else
if
(
ARMV6_COUNTER0
==
idx
)
{
mask
=
ARMV6_PMCR_COUNT0_IEN
|
ARMV6_PMCR_EVT_COUNT0_MASK
;
evt
=
ARMV6_PERFCTR_NOP
<<
ARMV6_PMCR_EVT_COUNT0_SHIFT
;
}
else
if
(
ARMV6_COUNTER1
==
idx
)
{
mask
=
ARMV6_PMCR_COUNT1_IEN
|
ARMV6_PMCR_EVT_COUNT1_MASK
;
evt
=
ARMV6_PERFCTR_NOP
<<
ARMV6_PMCR_EVT_COUNT1_SHIFT
;
}
else
{
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
idx
);
return
;
}
/*
* Mask out the current event and set the counter to count the number
* of ETM bus signal assertion cycles. The external reporting should
* be disabled and so this should never increment.
*/
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
armv6_pmcr_read
();
val
&=
~
mask
;
val
|=
evt
;
armv6_pmcr_write
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
void
armv6mpcore_pmu_disable_event
(
struct
hw_perf_event
*
hwc
,
int
idx
)
{
unsigned
long
val
,
mask
,
flags
,
evt
=
0
;
if
(
ARMV6_CYCLE_COUNTER
==
idx
)
{
mask
=
ARMV6_PMCR_CCOUNT_IEN
;
}
else
if
(
ARMV6_COUNTER0
==
idx
)
{
mask
=
ARMV6_PMCR_COUNT0_IEN
;
}
else
if
(
ARMV6_COUNTER1
==
idx
)
{
mask
=
ARMV6_PMCR_COUNT1_IEN
;
}
else
{
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
idx
);
return
;
}
/*
* Unlike UP ARMv6, we don't have a way of stopping the counters. We
* simply disable the interrupt reporting.
*/
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
armv6_pmcr_read
();
val
&=
~
mask
;
val
|=
evt
;
armv6_pmcr_write
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
const
struct
arm_pmu
armv6pmu
=
{
.
id
=
ARM_PERF_PMU_ID_V6
,
.
handle_irq
=
armv6pmu_handle_irq
,
.
enable
=
armv6pmu_enable_event
,
.
disable
=
armv6pmu_disable_event
,
.
event_map
=
armv6pmu_event_map
,
.
raw_event
=
armv6pmu_raw_event
,
.
read_counter
=
armv6pmu_read_counter
,
.
write_counter
=
armv6pmu_write_counter
,
.
get_event_idx
=
armv6pmu_get_event_idx
,
.
start
=
armv6pmu_start
,
.
stop
=
armv6pmu_stop
,
.
num_events
=
3
,
.
max_period
=
(
1LLU
<<
32
)
-
1
,
};
/*
* ARMv6mpcore is almost identical to single core ARMv6 with the exception
* that some of the events have different enumerations and that there is no
* *hack* to stop the programmable counters. To stop the counters we simply
* disable the interrupt reporting and update the event. When unthrottling we
* reset the period and enable the interrupt reporting.
*/
static
const
struct
arm_pmu
armv6mpcore_pmu
=
{
.
id
=
ARM_PERF_PMU_ID_V6MP
,
.
handle_irq
=
armv6pmu_handle_irq
,
.
enable
=
armv6pmu_enable_event
,
.
disable
=
armv6mpcore_pmu_disable_event
,
.
event_map
=
armv6mpcore_pmu_event_map
,
.
raw_event
=
armv6pmu_raw_event
,
.
read_counter
=
armv6pmu_read_counter
,
.
write_counter
=
armv6pmu_write_counter
,
.
get_event_idx
=
armv6pmu_get_event_idx
,
.
start
=
armv6pmu_start
,
.
stop
=
armv6pmu_stop
,
.
num_events
=
3
,
.
max_period
=
(
1LLU
<<
32
)
-
1
,
};
/*
* ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
*
* Copied from ARMv6 code, with the low level code inspired
* by the ARMv7 Oprofile code.
*
* Cortex-A8 has up to 4 configurable performance counters and
* a single cycle counter.
* Cortex-A9 has up to 31 configurable performance counters and
* a single cycle counter.
*
* All counters can be enabled/disabled and IRQ masked separately. The cycle
* counter and all 4 performance counters together can be reset separately.
*/
/* Common ARMv7 event types */
enum
armv7_perf_types
{
ARMV7_PERFCTR_PMNC_SW_INCR
=
0x00
,
ARMV7_PERFCTR_IFETCH_MISS
=
0x01
,
ARMV7_PERFCTR_ITLB_MISS
=
0x02
,
ARMV7_PERFCTR_DCACHE_REFILL
=
0x03
,
ARMV7_PERFCTR_DCACHE_ACCESS
=
0x04
,
ARMV7_PERFCTR_DTLB_REFILL
=
0x05
,
ARMV7_PERFCTR_DREAD
=
0x06
,
ARMV7_PERFCTR_DWRITE
=
0x07
,
ARMV7_PERFCTR_EXC_TAKEN
=
0x09
,
ARMV7_PERFCTR_EXC_EXECUTED
=
0x0A
,
ARMV7_PERFCTR_CID_WRITE
=
0x0B
,
/* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
* It counts:
* - all branch instructions,
* - instructions that explicitly write the PC,
* - exception generating instructions.
*/
ARMV7_PERFCTR_PC_WRITE
=
0x0C
,
ARMV7_PERFCTR_PC_IMM_BRANCH
=
0x0D
,
ARMV7_PERFCTR_UNALIGNED_ACCESS
=
0x0F
,
ARMV7_PERFCTR_PC_BRANCH_MIS_PRED
=
0x10
,
ARMV7_PERFCTR_CLOCK_CYCLES
=
0x11
,
ARMV7_PERFCTR_PC_BRANCH_MIS_USED
=
0x12
,
ARMV7_PERFCTR_CPU_CYCLES
=
0xFF
};
/* ARMv7 Cortex-A8 specific event types */
enum
armv7_a8_perf_types
{
ARMV7_PERFCTR_INSTR_EXECUTED
=
0x08
,
ARMV7_PERFCTR_PC_PROC_RETURN
=
0x0E
,
ARMV7_PERFCTR_WRITE_BUFFER_FULL
=
0x40
,
ARMV7_PERFCTR_L2_STORE_MERGED
=
0x41
,
ARMV7_PERFCTR_L2_STORE_BUFF
=
0x42
,
ARMV7_PERFCTR_L2_ACCESS
=
0x43
,
ARMV7_PERFCTR_L2_CACH_MISS
=
0x44
,
ARMV7_PERFCTR_AXI_READ_CYCLES
=
0x45
,
ARMV7_PERFCTR_AXI_WRITE_CYCLES
=
0x46
,
ARMV7_PERFCTR_MEMORY_REPLAY
=
0x47
,
ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY
=
0x48
,
ARMV7_PERFCTR_L1_DATA_MISS
=
0x49
,
ARMV7_PERFCTR_L1_INST_MISS
=
0x4A
,
ARMV7_PERFCTR_L1_DATA_COLORING
=
0x4B
,
ARMV7_PERFCTR_L1_NEON_DATA
=
0x4C
,
ARMV7_PERFCTR_L1_NEON_CACH_DATA
=
0x4D
,
ARMV7_PERFCTR_L2_NEON
=
0x4E
,
ARMV7_PERFCTR_L2_NEON_HIT
=
0x4F
,
ARMV7_PERFCTR_L1_INST
=
0x50
,
ARMV7_PERFCTR_PC_RETURN_MIS_PRED
=
0x51
,
ARMV7_PERFCTR_PC_BRANCH_FAILED
=
0x52
,
ARMV7_PERFCTR_PC_BRANCH_TAKEN
=
0x53
,
ARMV7_PERFCTR_PC_BRANCH_EXECUTED
=
0x54
,
ARMV7_PERFCTR_OP_EXECUTED
=
0x55
,
ARMV7_PERFCTR_CYCLES_INST_STALL
=
0x56
,
ARMV7_PERFCTR_CYCLES_INST
=
0x57
,
ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL
=
0x58
,
ARMV7_PERFCTR_CYCLES_NEON_INST_STALL
=
0x59
,
ARMV7_PERFCTR_NEON_CYCLES
=
0x5A
,
ARMV7_PERFCTR_PMU0_EVENTS
=
0x70
,
ARMV7_PERFCTR_PMU1_EVENTS
=
0x71
,
ARMV7_PERFCTR_PMU_EVENTS
=
0x72
,
};
/* ARMv7 Cortex-A9 specific event types */
enum
armv7_a9_perf_types
{
ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC
=
0x40
,
ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC
=
0x41
,
ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC
=
0x42
,
ARMV7_PERFCTR_COHERENT_LINE_MISS
=
0x50
,
ARMV7_PERFCTR_COHERENT_LINE_HIT
=
0x51
,
ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES
=
0x60
,
ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES
=
0x61
,
ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES
=
0x62
,
ARMV7_PERFCTR_STREX_EXECUTED_PASSED
=
0x63
,
ARMV7_PERFCTR_STREX_EXECUTED_FAILED
=
0x64
,
ARMV7_PERFCTR_DATA_EVICTION
=
0x65
,
ARMV7_PERFCTR_ISSUE_STAGE_NO_INST
=
0x66
,
ARMV7_PERFCTR_ISSUE_STAGE_EMPTY
=
0x67
,
ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE
=
0x68
,
ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS
=
0x6E
,
ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST
=
0x70
,
ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST
=
0x71
,
ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST
=
0x72
,
ARMV7_PERFCTR_FP_EXECUTED_INST
=
0x73
,
ARMV7_PERFCTR_NEON_EXECUTED_INST
=
0x74
,
ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES
=
0x80
,
ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES
=
0x81
,
ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES
=
0x82
,
ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES
=
0x83
,
ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES
=
0x84
,
ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES
=
0x85
,
ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES
=
0x86
,
ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES
=
0x8A
,
ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES
=
0x8B
,
ARMV7_PERFCTR_ISB_INST
=
0x90
,
ARMV7_PERFCTR_DSB_INST
=
0x91
,
ARMV7_PERFCTR_DMB_INST
=
0x92
,
ARMV7_PERFCTR_EXT_INTERRUPTS
=
0x93
,
ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED
=
0xA0
,
ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED
=
0xA1
,
ARMV7_PERFCTR_PLE_FIFO_FLUSH
=
0xA2
,
ARMV7_PERFCTR_PLE_RQST_COMPLETED
=
0xA3
,
ARMV7_PERFCTR_PLE_FIFO_OVERFLOW
=
0xA4
,
ARMV7_PERFCTR_PLE_RQST_PROG
=
0xA5
};
/*
* Cortex-A8 HW events mapping
*
* The hardware events that we support. We do support cache operations but
* we have harvard caches and no way to combine instruction and data
* accesses/misses in hardware.
*/
static
const
unsigned
armv7_a8_perf_map
[
PERF_COUNT_HW_MAX
]
=
{
[
PERF_COUNT_HW_CPU_CYCLES
]
=
ARMV7_PERFCTR_CPU_CYCLES
,
[
PERF_COUNT_HW_INSTRUCTIONS
]
=
ARMV7_PERFCTR_INSTR_EXECUTED
,
[
PERF_COUNT_HW_CACHE_REFERENCES
]
=
HW_OP_UNSUPPORTED
,
[
PERF_COUNT_HW_CACHE_MISSES
]
=
HW_OP_UNSUPPORTED
,
[
PERF_COUNT_HW_BRANCH_INSTRUCTIONS
]
=
ARMV7_PERFCTR_PC_WRITE
,
[
PERF_COUNT_HW_BRANCH_MISSES
]
=
ARMV7_PERFCTR_PC_BRANCH_MIS_PRED
,
[
PERF_COUNT_HW_BUS_CYCLES
]
=
ARMV7_PERFCTR_CLOCK_CYCLES
,
};
static
const
unsigned
armv7_a8_perf_cache_map
[
PERF_COUNT_HW_CACHE_MAX
]
[
PERF_COUNT_HW_CACHE_OP_MAX
]
[
PERF_COUNT_HW_CACHE_RESULT_MAX
]
=
{
[
C
(
L1D
)]
=
{
/*
* The performance counters don't differentiate between read
* and write accesses/misses so this isn't strictly correct,
* but it's the best we can do. Writes and reads get
* combined.
*/
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_DCACHE_ACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_DCACHE_REFILL
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_DCACHE_ACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_DCACHE_REFILL
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
L1I
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_L1_INST
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_L1_INST_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_L1_INST
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_L1_INST_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
LL
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_L2_ACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_L2_CACH_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_L2_ACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_L2_CACH_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
DTLB
)]
=
{
/*
* Only ITLB misses and DTLB refills are supported.
* If users want the DTLB refills misses a raw counter
* must be used.
*/
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_DTLB_REFILL
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_DTLB_REFILL
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
ITLB
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
BPU
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_PC_WRITE
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_PC_BRANCH_MIS_PRED
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_PC_WRITE
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_PC_BRANCH_MIS_PRED
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
};
/*
* Cortex-A9 HW events mapping
*/
static
const
unsigned
armv7_a9_perf_map
[
PERF_COUNT_HW_MAX
]
=
{
[
PERF_COUNT_HW_CPU_CYCLES
]
=
ARMV7_PERFCTR_CPU_CYCLES
,
[
PERF_COUNT_HW_INSTRUCTIONS
]
=
ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE
,
[
PERF_COUNT_HW_CACHE_REFERENCES
]
=
ARMV7_PERFCTR_COHERENT_LINE_HIT
,
[
PERF_COUNT_HW_CACHE_MISSES
]
=
ARMV7_PERFCTR_COHERENT_LINE_MISS
,
[
PERF_COUNT_HW_BRANCH_INSTRUCTIONS
]
=
ARMV7_PERFCTR_PC_WRITE
,
[
PERF_COUNT_HW_BRANCH_MISSES
]
=
ARMV7_PERFCTR_PC_BRANCH_MIS_PRED
,
[
PERF_COUNT_HW_BUS_CYCLES
]
=
ARMV7_PERFCTR_CLOCK_CYCLES
,
};
static
const
unsigned
armv7_a9_perf_cache_map
[
PERF_COUNT_HW_CACHE_MAX
]
[
PERF_COUNT_HW_CACHE_OP_MAX
]
[
PERF_COUNT_HW_CACHE_RESULT_MAX
]
=
{
[
C
(
L1D
)]
=
{
/*
* The performance counters don't differentiate between read
* and write accesses/misses so this isn't strictly correct,
* but it's the best we can do. Writes and reads get
* combined.
*/
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_DCACHE_ACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_DCACHE_REFILL
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_DCACHE_ACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_DCACHE_REFILL
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
L1I
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_IFETCH_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_IFETCH_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
LL
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
DTLB
)]
=
{
/*
* Only ITLB misses and DTLB refills are supported.
* If users want the DTLB refills misses a raw counter
* must be used.
*/
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_DTLB_REFILL
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_DTLB_REFILL
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
ITLB
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
BPU
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_PC_WRITE
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_PC_BRANCH_MIS_PRED
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_PC_WRITE
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_PC_BRANCH_MIS_PRED
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
};
/*
* Perf Events counters
*/
enum
armv7_counters
{
ARMV7_CYCLE_COUNTER
=
1
,
/* Cycle counter */
ARMV7_COUNTER0
=
2
,
/* First event counter */
};
/*
* The cycle counter is ARMV7_CYCLE_COUNTER.
* The first event counter is ARMV7_COUNTER0.
* The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
*/
#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1)
/*
* ARMv7 low level PMNC access
*/
/*
* Per-CPU PMNC: config reg
*/
#define ARMV7_PMNC_E (1 << 0)
/* Enable all counters */
#define ARMV7_PMNC_P (1 << 1)
/* Reset all counters */
#define ARMV7_PMNC_C (1 << 2)
/* Cycle counter reset */
#define ARMV7_PMNC_D (1 << 3)
/* CCNT counts every 64th cpu cycle */
#define ARMV7_PMNC_X (1 << 4)
/* Export to ETM */
#define ARMV7_PMNC_DP (1 << 5)
/* Disable CCNT if non-invasive debug*/
#define ARMV7_PMNC_N_SHIFT 11
/* Number of counters supported */
#define ARMV7_PMNC_N_MASK 0x1f
#define ARMV7_PMNC_MASK 0x3f
/* Mask for writable bits */
/*
* Available counters
*/
#define ARMV7_CNT0 0
/* First event counter */
#define ARMV7_CCNT 31
/* Cycle counter */
/* Perf Event to low level counters mapping */
#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0)
/*
* CNTENS: counters enable reg
*/
#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_CNTENS_C (1 << ARMV7_CCNT)
/*
* CNTENC: counters disable reg
*/
#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_CNTENC_C (1 << ARMV7_CCNT)
/*
* INTENS: counters overflow interrupt enable reg
*/
#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_INTENS_C (1 << ARMV7_CCNT)
/*
* INTENC: counters overflow interrupt disable reg
*/
#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_INTENC_C (1 << ARMV7_CCNT)
/*
* EVTSEL: Event selection reg
*/
#define ARMV7_EVTSEL_MASK 0xff
/* Mask for writable bits */
/*
* SELECT: Counter selection reg
*/
#define ARMV7_SELECT_MASK 0x1f
/* Mask for writable bits */
/*
* FLAG: counters overflow flag status reg
*/
#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_FLAG_C (1 << ARMV7_CCNT)
#define ARMV7_FLAG_MASK 0xffffffff
/* Mask for writable bits */
#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK
static
inline
unsigned
long
armv7_pmnc_read
(
void
)
{
u32
val
;
asm
volatile
(
"mrc p15, 0, %0, c9, c12, 0"
:
"=r"
(
val
));
return
val
;
}
static
inline
void
armv7_pmnc_write
(
unsigned
long
val
)
{
val
&=
ARMV7_PMNC_MASK
;
asm
volatile
(
"mcr p15, 0, %0, c9, c12, 0"
:
:
"r"
(
val
));
}
static
inline
int
armv7_pmnc_has_overflowed
(
unsigned
long
pmnc
)
{
return
pmnc
&
ARMV7_OVERFLOWED_MASK
;
}
static
inline
int
armv7_pmnc_counter_has_overflowed
(
unsigned
long
pmnc
,
enum
armv7_counters
counter
)
{
int
ret
=
0
;
if
(
counter
==
ARMV7_CYCLE_COUNTER
)
ret
=
pmnc
&
ARMV7_FLAG_C
;
else
if
((
counter
>=
ARMV7_COUNTER0
)
&&
(
counter
<=
ARMV7_COUNTER_LAST
))
ret
=
pmnc
&
ARMV7_FLAG_P
(
counter
);
else
pr_err
(
"CPU%u checking wrong counter %d overflow status
\n
"
,
smp_processor_id
(),
counter
);
return
ret
;
}
static
inline
int
armv7_pmnc_select_counter
(
unsigned
int
idx
)
{
u32
val
;
if
((
idx
<
ARMV7_COUNTER0
)
||
(
idx
>
ARMV7_COUNTER_LAST
))
{
pr_err
(
"CPU%u selecting wrong PMNC counter"
" %d
\n
"
,
smp_processor_id
(),
idx
);
return
-
1
;
}
val
=
(
idx
-
ARMV7_EVENT_CNT_TO_CNTx
)
&
ARMV7_SELECT_MASK
;
asm
volatile
(
"mcr p15, 0, %0, c9, c12, 5"
:
:
"r"
(
val
));
return
idx
;
}
static
inline
u32
armv7pmu_read_counter
(
int
idx
)
{
unsigned
long
value
=
0
;
if
(
idx
==
ARMV7_CYCLE_COUNTER
)
asm
volatile
(
"mrc p15, 0, %0, c9, c13, 0"
:
"=r"
(
value
));
else
if
((
idx
>=
ARMV7_COUNTER0
)
&&
(
idx
<=
ARMV7_COUNTER_LAST
))
{
if
(
armv7_pmnc_select_counter
(
idx
)
==
idx
)
asm
volatile
(
"mrc p15, 0, %0, c9, c13, 2"
:
"=r"
(
value
));
}
else
pr_err
(
"CPU%u reading wrong counter %d
\n
"
,
smp_processor_id
(),
idx
);
return
value
;
}
static
inline
void
armv7pmu_write_counter
(
int
idx
,
u32
value
)
{
if
(
idx
==
ARMV7_CYCLE_COUNTER
)
asm
volatile
(
"mcr p15, 0, %0, c9, c13, 0"
:
:
"r"
(
value
));
else
if
((
idx
>=
ARMV7_COUNTER0
)
&&
(
idx
<=
ARMV7_COUNTER_LAST
))
{
if
(
armv7_pmnc_select_counter
(
idx
)
==
idx
)
asm
volatile
(
"mcr p15, 0, %0, c9, c13, 2"
:
:
"r"
(
value
));
}
else
pr_err
(
"CPU%u writing wrong counter %d
\n
"
,
smp_processor_id
(),
idx
);
}
static
inline
void
armv7_pmnc_write_evtsel
(
unsigned
int
idx
,
u32
val
)
{
if
(
armv7_pmnc_select_counter
(
idx
)
==
idx
)
{
val
&=
ARMV7_EVTSEL_MASK
;
asm
volatile
(
"mcr p15, 0, %0, c9, c13, 1"
:
:
"r"
(
val
));
}
}
static
inline
u32
armv7_pmnc_enable_counter
(
unsigned
int
idx
)
{
u32
val
;
if
((
idx
!=
ARMV7_CYCLE_COUNTER
)
&&
((
idx
<
ARMV7_COUNTER0
)
||
(
idx
>
ARMV7_COUNTER_LAST
)))
{
pr_err
(
"CPU%u enabling wrong PMNC counter"
" %d
\n
"
,
smp_processor_id
(),
idx
);
return
-
1
;
}
if
(
idx
==
ARMV7_CYCLE_COUNTER
)
val
=
ARMV7_CNTENS_C
;
else
val
=
ARMV7_CNTENS_P
(
idx
);
asm
volatile
(
"mcr p15, 0, %0, c9, c12, 1"
:
:
"r"
(
val
));
return
idx
;
}
static
inline
u32
armv7_pmnc_disable_counter
(
unsigned
int
idx
)
{
u32
val
;
if
((
idx
!=
ARMV7_CYCLE_COUNTER
)
&&
((
idx
<
ARMV7_COUNTER0
)
||
(
idx
>
ARMV7_COUNTER_LAST
)))
{
pr_err
(
"CPU%u disabling wrong PMNC counter"
" %d
\n
"
,
smp_processor_id
(),
idx
);
return
-
1
;
}
if
(
idx
==
ARMV7_CYCLE_COUNTER
)
val
=
ARMV7_CNTENC_C
;
else
val
=
ARMV7_CNTENC_P
(
idx
);
asm
volatile
(
"mcr p15, 0, %0, c9, c12, 2"
:
:
"r"
(
val
));
return
idx
;
}
static
inline
u32
armv7_pmnc_enable_intens
(
unsigned
int
idx
)
{
u32
val
;
if
((
idx
!=
ARMV7_CYCLE_COUNTER
)
&&
((
idx
<
ARMV7_COUNTER0
)
||
(
idx
>
ARMV7_COUNTER_LAST
)))
{
pr_err
(
"CPU%u enabling wrong PMNC counter"
" interrupt enable %d
\n
"
,
smp_processor_id
(),
idx
);
return
-
1
;
}
if
(
idx
==
ARMV7_CYCLE_COUNTER
)
val
=
ARMV7_INTENS_C
;
else
val
=
ARMV7_INTENS_P
(
idx
);
asm
volatile
(
"mcr p15, 0, %0, c9, c14, 1"
:
:
"r"
(
val
));
return
idx
;
}
static
inline
u32
armv7_pmnc_disable_intens
(
unsigned
int
idx
)
{
u32
val
;
if
((
idx
!=
ARMV7_CYCLE_COUNTER
)
&&
((
idx
<
ARMV7_COUNTER0
)
||
(
idx
>
ARMV7_COUNTER_LAST
)))
{
pr_err
(
"CPU%u disabling wrong PMNC counter"
" interrupt enable %d
\n
"
,
smp_processor_id
(),
idx
);
return
-
1
;
}
if
(
idx
==
ARMV7_CYCLE_COUNTER
)
val
=
ARMV7_INTENC_C
;
else
val
=
ARMV7_INTENC_P
(
idx
);
asm
volatile
(
"mcr p15, 0, %0, c9, c14, 2"
:
:
"r"
(
val
));
return
idx
;
}
static
inline
u32
armv7_pmnc_getreset_flags
(
void
)
{
u32
val
;
/* Read */
asm
volatile
(
"mrc p15, 0, %0, c9, c12, 3"
:
"=r"
(
val
));
/* Write to clear flags */
val
&=
ARMV7_FLAG_MASK
;
asm
volatile
(
"mcr p15, 0, %0, c9, c12, 3"
:
:
"r"
(
val
));
return
val
;
}
#ifdef DEBUG
static
void
armv7_pmnc_dump_regs
(
void
)
{
u32
val
;
unsigned
int
cnt
;
printk
(
KERN_INFO
"PMNC registers dump:
\n
"
);
asm
volatile
(
"mrc p15, 0, %0, c9, c12, 0"
:
"=r"
(
val
));
printk
(
KERN_INFO
"PMNC =0x%08x
\n
"
,
val
);
asm
volatile
(
"mrc p15, 0, %0, c9, c12, 1"
:
"=r"
(
val
));
printk
(
KERN_INFO
"CNTENS=0x%08x
\n
"
,
val
);
asm
volatile
(
"mrc p15, 0, %0, c9, c14, 1"
:
"=r"
(
val
));
printk
(
KERN_INFO
"INTENS=0x%08x
\n
"
,
val
);
asm
volatile
(
"mrc p15, 0, %0, c9, c12, 3"
:
"=r"
(
val
));
printk
(
KERN_INFO
"FLAGS =0x%08x
\n
"
,
val
);
asm
volatile
(
"mrc p15, 0, %0, c9, c12, 5"
:
"=r"
(
val
));
printk
(
KERN_INFO
"SELECT=0x%08x
\n
"
,
val
);
asm
volatile
(
"mrc p15, 0, %0, c9, c13, 0"
:
"=r"
(
val
));
printk
(
KERN_INFO
"CCNT =0x%08x
\n
"
,
val
);
for
(
cnt
=
ARMV7_COUNTER0
;
cnt
<
ARMV7_COUNTER_LAST
;
cnt
++
)
{
armv7_pmnc_select_counter
(
cnt
);
asm
volatile
(
"mrc p15, 0, %0, c9, c13, 2"
:
"=r"
(
val
));
printk
(
KERN_INFO
"CNT[%d] count =0x%08x
\n
"
,
cnt
-
ARMV7_EVENT_CNT_TO_CNTx
,
val
);
asm
volatile
(
"mrc p15, 0, %0, c9, c13, 1"
:
"=r"
(
val
));
printk
(
KERN_INFO
"CNT[%d] evtsel=0x%08x
\n
"
,
cnt
-
ARMV7_EVENT_CNT_TO_CNTx
,
val
);
}
}
#endif
void
armv7pmu_enable_event
(
struct
hw_perf_event
*
hwc
,
int
idx
)
{
unsigned
long
flags
;
/*
* Enable counter and interrupt, and set the counter to count
* the event that we're interested in.
*/
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
/*
* Disable counter
*/
armv7_pmnc_disable_counter
(
idx
);
/*
* Set event (if destined for PMNx counters)
* We don't need to set the event if it's a cycle count
*/
if
(
idx
!=
ARMV7_CYCLE_COUNTER
)
armv7_pmnc_write_evtsel
(
idx
,
hwc
->
config_base
);
/*
* Enable interrupt for this counter
*/
armv7_pmnc_enable_intens
(
idx
);
/*
* Enable counter
*/
armv7_pmnc_enable_counter
(
idx
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
void
armv7pmu_disable_event
(
struct
hw_perf_event
*
hwc
,
int
idx
)
{
unsigned
long
flags
;
/*
* Disable counter and interrupt
*/
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
/*
* Disable counter
*/
armv7_pmnc_disable_counter
(
idx
);
/*
* Disable interrupt for this counter
*/
armv7_pmnc_disable_intens
(
idx
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
irqreturn_t
armv7pmu_handle_irq
(
int
irq_num
,
void
*
dev
)
{
unsigned
long
pmnc
;
struct
perf_sample_data
data
;
struct
cpu_hw_events
*
cpuc
;
struct
pt_regs
*
regs
;
int
idx
;
/*
* Get and reset the IRQ flags
*/
pmnc
=
armv7_pmnc_getreset_flags
();
/*
* Did an overflow occur?
*/
if
(
!
armv7_pmnc_has_overflowed
(
pmnc
))
return
IRQ_NONE
;
/*
* Handle the counter(s) overflow(s)
*/
regs
=
get_irq_regs
();
perf_sample_data_init
(
&
data
,
0
);
cpuc
=
&
__get_cpu_var
(
cpu_hw_events
);
for
(
idx
=
0
;
idx
<=
armpmu
->
num_events
;
++
idx
)
{
struct
perf_event
*
event
=
cpuc
->
events
[
idx
];
struct
hw_perf_event
*
hwc
;
if
(
!
test_bit
(
idx
,
cpuc
->
active_mask
))
continue
;
/*
* We have a single interrupt for all counters. Check that
* each counter has overflowed before we process it.
*/
if
(
!
armv7_pmnc_counter_has_overflowed
(
pmnc
,
idx
))
continue
;
hwc
=
&
event
->
hw
;
armpmu_event_update
(
event
,
hwc
,
idx
);
data
.
period
=
event
->
hw
.
last_period
;
if
(
!
armpmu_event_set_period
(
event
,
hwc
,
idx
))
continue
;
if
(
perf_event_overflow
(
event
,
0
,
&
data
,
regs
))
armpmu
->
disable
(
hwc
,
idx
);
}
/*
* Handle the pending perf events.
*
* Note: this call *must* be run with interrupts disabled. For
* platforms that can have the PMU interrupts raised as an NMI, this
* will not work.
*/
irq_work_run
();
return
IRQ_HANDLED
;
}
static
void
armv7pmu_start
(
void
)
{
unsigned
long
flags
;
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
/* Enable all counters */
armv7_pmnc_write
(
armv7_pmnc_read
()
|
ARMV7_PMNC_E
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
void
armv7pmu_stop
(
void
)
{
unsigned
long
flags
;
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
/* Disable all counters */
armv7_pmnc_write
(
armv7_pmnc_read
()
&
~
ARMV7_PMNC_E
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
inline
int
armv7_a8_pmu_event_map
(
int
config
)
{
int
mapping
=
armv7_a8_perf_map
[
config
];
if
(
HW_OP_UNSUPPORTED
==
mapping
)
mapping
=
-
EOPNOTSUPP
;
return
mapping
;
}
static
inline
int
armv7_a9_pmu_event_map
(
int
config
)
{
int
mapping
=
armv7_a9_perf_map
[
config
];
if
(
HW_OP_UNSUPPORTED
==
mapping
)
mapping
=
-
EOPNOTSUPP
;
return
mapping
;
}
static
u64
armv7pmu_raw_event
(
u64
config
)
{
return
config
&
0xff
;
}
static
int
armv7pmu_get_event_idx
(
struct
cpu_hw_events
*
cpuc
,
struct
hw_perf_event
*
event
)
{
int
idx
;
/* Always place a cycle counter into the cycle counter. */
if
(
event
->
config_base
==
ARMV7_PERFCTR_CPU_CYCLES
)
{
if
(
test_and_set_bit
(
ARMV7_CYCLE_COUNTER
,
cpuc
->
used_mask
))
return
-
EAGAIN
;
return
ARMV7_CYCLE_COUNTER
;
}
else
{
/*
* For anything other than a cycle counter, try and use
* the events counters
*/
for
(
idx
=
ARMV7_COUNTER0
;
idx
<=
armpmu
->
num_events
;
++
idx
)
{
if
(
!
test_and_set_bit
(
idx
,
cpuc
->
used_mask
))
return
idx
;
}
/* The counters are all in use. */
return
-
EAGAIN
;
}
}
static
struct
arm_pmu
armv7pmu
=
{
.
handle_irq
=
armv7pmu_handle_irq
,
.
enable
=
armv7pmu_enable_event
,
.
disable
=
armv7pmu_disable_event
,
.
raw_event
=
armv7pmu_raw_event
,
.
read_counter
=
armv7pmu_read_counter
,
.
write_counter
=
armv7pmu_write_counter
,
.
get_event_idx
=
armv7pmu_get_event_idx
,
.
start
=
armv7pmu_start
,
.
stop
=
armv7pmu_stop
,
.
max_period
=
(
1LLU
<<
32
)
-
1
,
};
static
u32
__init
armv7_reset_read_pmnc
(
void
)
{
u32
nb_cnt
;
/* Initialize & Reset PMNC: C and P bits */
armv7_pmnc_write
(
ARMV7_PMNC_P
|
ARMV7_PMNC_C
);
/* Read the nb of CNTx counters supported from PMNC */
nb_cnt
=
(
armv7_pmnc_read
()
>>
ARMV7_PMNC_N_SHIFT
)
&
ARMV7_PMNC_N_MASK
;
/* Add the CPU cycles counter and return */
return
nb_cnt
+
1
;
}
/*
* ARMv5 [xscale] Performance counter handling code.
*
* Based on xscale OProfile code.
*
* There are two variants of the xscale PMU that we support:
* - xscale1pmu: 2 event counters and a cycle counter
* - xscale2pmu: 4 event counters and a cycle counter
* The two variants share event definitions, but have different
* PMU structures.
*/
enum
xscale_perf_types
{
XSCALE_PERFCTR_ICACHE_MISS
=
0x00
,
XSCALE_PERFCTR_ICACHE_NO_DELIVER
=
0x01
,
XSCALE_PERFCTR_DATA_STALL
=
0x02
,
XSCALE_PERFCTR_ITLB_MISS
=
0x03
,
XSCALE_PERFCTR_DTLB_MISS
=
0x04
,
XSCALE_PERFCTR_BRANCH
=
0x05
,
XSCALE_PERFCTR_BRANCH_MISS
=
0x06
,
XSCALE_PERFCTR_INSTRUCTION
=
0x07
,
XSCALE_PERFCTR_DCACHE_FULL_STALL
=
0x08
,
XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG
=
0x09
,
XSCALE_PERFCTR_DCACHE_ACCESS
=
0x0A
,
XSCALE_PERFCTR_DCACHE_MISS
=
0x0B
,
XSCALE_PERFCTR_DCACHE_WRITE_BACK
=
0x0C
,
XSCALE_PERFCTR_PC_CHANGED
=
0x0D
,
XSCALE_PERFCTR_BCU_REQUEST
=
0x10
,
XSCALE_PERFCTR_BCU_FULL
=
0x11
,
XSCALE_PERFCTR_BCU_DRAIN
=
0x12
,
XSCALE_PERFCTR_BCU_ECC_NO_ELOG
=
0x14
,
XSCALE_PERFCTR_BCU_1_BIT_ERR
=
0x15
,
XSCALE_PERFCTR_RMW
=
0x16
,
/* XSCALE_PERFCTR_CCNT is not hardware defined */
XSCALE_PERFCTR_CCNT
=
0xFE
,
XSCALE_PERFCTR_UNUSED
=
0xFF
,
};
enum
xscale_counters
{
XSCALE_CYCLE_COUNTER
=
1
,
XSCALE_COUNTER0
,
XSCALE_COUNTER1
,
XSCALE_COUNTER2
,
XSCALE_COUNTER3
,
};
static
const
unsigned
xscale_perf_map
[
PERF_COUNT_HW_MAX
]
=
{
[
PERF_COUNT_HW_CPU_CYCLES
]
=
XSCALE_PERFCTR_CCNT
,
[
PERF_COUNT_HW_INSTRUCTIONS
]
=
XSCALE_PERFCTR_INSTRUCTION
,
[
PERF_COUNT_HW_CACHE_REFERENCES
]
=
HW_OP_UNSUPPORTED
,
[
PERF_COUNT_HW_CACHE_MISSES
]
=
HW_OP_UNSUPPORTED
,
[
PERF_COUNT_HW_BRANCH_INSTRUCTIONS
]
=
XSCALE_PERFCTR_BRANCH
,
[
PERF_COUNT_HW_BRANCH_MISSES
]
=
XSCALE_PERFCTR_BRANCH_MISS
,
[
PERF_COUNT_HW_BUS_CYCLES
]
=
HW_OP_UNSUPPORTED
,
};
static
const
unsigned
xscale_perf_cache_map
[
PERF_COUNT_HW_CACHE_MAX
]
[
PERF_COUNT_HW_CACHE_OP_MAX
]
[
PERF_COUNT_HW_CACHE_RESULT_MAX
]
=
{
[
C
(
L1D
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
XSCALE_PERFCTR_DCACHE_ACCESS
,
[
C
(
RESULT_MISS
)]
=
XSCALE_PERFCTR_DCACHE_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
XSCALE_PERFCTR_DCACHE_ACCESS
,
[
C
(
RESULT_MISS
)]
=
XSCALE_PERFCTR_DCACHE_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
L1I
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
XSCALE_PERFCTR_ICACHE_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
XSCALE_PERFCTR_ICACHE_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
LL
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
DTLB
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
XSCALE_PERFCTR_DTLB_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
XSCALE_PERFCTR_DTLB_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
ITLB
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
XSCALE_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
XSCALE_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
BPU
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
};
#define XSCALE_PMU_ENABLE 0x001
#define XSCALE_PMN_RESET 0x002
#define XSCALE_CCNT_RESET 0x004
#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET)
#define XSCALE_PMU_CNT64 0x008
static
inline
int
xscalepmu_event_map
(
int
config
)
{
int
mapping
=
xscale_perf_map
[
config
];
if
(
HW_OP_UNSUPPORTED
==
mapping
)
mapping
=
-
EOPNOTSUPP
;
return
mapping
;
}
static
u64
xscalepmu_raw_event
(
u64
config
)
{
return
config
&
0xff
;
}
#define XSCALE1_OVERFLOWED_MASK 0x700
#define XSCALE1_CCOUNT_OVERFLOW 0x400
#define XSCALE1_COUNT0_OVERFLOW 0x100
#define XSCALE1_COUNT1_OVERFLOW 0x200
#define XSCALE1_CCOUNT_INT_EN 0x040
#define XSCALE1_COUNT0_INT_EN 0x010
#define XSCALE1_COUNT1_INT_EN 0x020
#define XSCALE1_COUNT0_EVT_SHFT 12
#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
#define XSCALE1_COUNT1_EVT_SHFT 20
#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
static
inline
u32
xscale1pmu_read_pmnc
(
void
)
{
u32
val
;
asm
volatile
(
"mrc p14, 0, %0, c0, c0, 0"
:
"=r"
(
val
));
return
val
;
}
static
inline
void
xscale1pmu_write_pmnc
(
u32
val
)
{
/* upper 4bits and 7, 11 are write-as-0 */
val
&=
0xffff77f
;
asm
volatile
(
"mcr p14, 0, %0, c0, c0, 0"
:
:
"r"
(
val
));
}
static
inline
int
xscale1_pmnc_counter_has_overflowed
(
unsigned
long
pmnc
,
enum
xscale_counters
counter
)
{
int
ret
=
0
;
switch
(
counter
)
{
case
XSCALE_CYCLE_COUNTER
:
ret
=
pmnc
&
XSCALE1_CCOUNT_OVERFLOW
;
break
;
case
XSCALE_COUNTER0
:
ret
=
pmnc
&
XSCALE1_COUNT0_OVERFLOW
;
break
;
case
XSCALE_COUNTER1
:
ret
=
pmnc
&
XSCALE1_COUNT1_OVERFLOW
;
break
;
default:
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
counter
);
}
return
ret
;
}
static
irqreturn_t
xscale1pmu_handle_irq
(
int
irq_num
,
void
*
dev
)
{
unsigned
long
pmnc
;
struct
perf_sample_data
data
;
struct
cpu_hw_events
*
cpuc
;
struct
pt_regs
*
regs
;
int
idx
;
/*
* NOTE: there's an A stepping erratum that states if an overflow
* bit already exists and another occurs, the previous
* Overflow bit gets cleared. There's no workaround.
* Fixed in B stepping or later.
*/
pmnc
=
xscale1pmu_read_pmnc
();
/*
* Write the value back to clear the overflow flags. Overflow
* flags remain in pmnc for use below. We also disable the PMU
* while we process the interrupt.
*/
xscale1pmu_write_pmnc
(
pmnc
&
~
XSCALE_PMU_ENABLE
);
if
(
!
(
pmnc
&
XSCALE1_OVERFLOWED_MASK
))
return
IRQ_NONE
;
regs
=
get_irq_regs
();
perf_sample_data_init
(
&
data
,
0
);
cpuc
=
&
__get_cpu_var
(
cpu_hw_events
);
for
(
idx
=
0
;
idx
<=
armpmu
->
num_events
;
++
idx
)
{
struct
perf_event
*
event
=
cpuc
->
events
[
idx
];
struct
hw_perf_event
*
hwc
;
if
(
!
test_bit
(
idx
,
cpuc
->
active_mask
))
continue
;
if
(
!
xscale1_pmnc_counter_has_overflowed
(
pmnc
,
idx
))
continue
;
hwc
=
&
event
->
hw
;
armpmu_event_update
(
event
,
hwc
,
idx
);
data
.
period
=
event
->
hw
.
last_period
;
if
(
!
armpmu_event_set_period
(
event
,
hwc
,
idx
))
continue
;
if
(
perf_event_overflow
(
event
,
0
,
&
data
,
regs
))
armpmu
->
disable
(
hwc
,
idx
);
}
irq_work_run
();
/*
* Re-enable the PMU.
*/
pmnc
=
xscale1pmu_read_pmnc
()
|
XSCALE_PMU_ENABLE
;
xscale1pmu_write_pmnc
(
pmnc
);
return
IRQ_HANDLED
;
}
static
void
xscale1pmu_enable_event
(
struct
hw_perf_event
*
hwc
,
int
idx
)
{
unsigned
long
val
,
mask
,
evt
,
flags
;
switch
(
idx
)
{
case
XSCALE_CYCLE_COUNTER
:
mask
=
0
;
evt
=
XSCALE1_CCOUNT_INT_EN
;
break
;
case
XSCALE_COUNTER0
:
mask
=
XSCALE1_COUNT0_EVT_MASK
;
evt
=
(
hwc
->
config_base
<<
XSCALE1_COUNT0_EVT_SHFT
)
|
XSCALE1_COUNT0_INT_EN
;
break
;
case
XSCALE_COUNTER1
:
mask
=
XSCALE1_COUNT1_EVT_MASK
;
evt
=
(
hwc
->
config_base
<<
XSCALE1_COUNT1_EVT_SHFT
)
|
XSCALE1_COUNT1_INT_EN
;
break
;
default:
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
idx
);
return
;
}
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
xscale1pmu_read_pmnc
();
val
&=
~
mask
;
val
|=
evt
;
xscale1pmu_write_pmnc
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
void
xscale1pmu_disable_event
(
struct
hw_perf_event
*
hwc
,
int
idx
)
{
unsigned
long
val
,
mask
,
evt
,
flags
;
switch
(
idx
)
{
case
XSCALE_CYCLE_COUNTER
:
mask
=
XSCALE1_CCOUNT_INT_EN
;
evt
=
0
;
break
;
case
XSCALE_COUNTER0
:
mask
=
XSCALE1_COUNT0_INT_EN
|
XSCALE1_COUNT0_EVT_MASK
;
evt
=
XSCALE_PERFCTR_UNUSED
<<
XSCALE1_COUNT0_EVT_SHFT
;
break
;
case
XSCALE_COUNTER1
:
mask
=
XSCALE1_COUNT1_INT_EN
|
XSCALE1_COUNT1_EVT_MASK
;
evt
=
XSCALE_PERFCTR_UNUSED
<<
XSCALE1_COUNT1_EVT_SHFT
;
break
;
default:
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
idx
);
return
;
}
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
xscale1pmu_read_pmnc
();
val
&=
~
mask
;
val
|=
evt
;
xscale1pmu_write_pmnc
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
int
xscale1pmu_get_event_idx
(
struct
cpu_hw_events
*
cpuc
,
struct
hw_perf_event
*
event
)
{
if
(
XSCALE_PERFCTR_CCNT
==
event
->
config_base
)
{
if
(
test_and_set_bit
(
XSCALE_CYCLE_COUNTER
,
cpuc
->
used_mask
))
return
-
EAGAIN
;
return
XSCALE_CYCLE_COUNTER
;
}
else
{
if
(
!
test_and_set_bit
(
XSCALE_COUNTER1
,
cpuc
->
used_mask
))
{
return
XSCALE_COUNTER1
;
}
if
(
!
test_and_set_bit
(
XSCALE_COUNTER0
,
cpuc
->
used_mask
))
{
return
XSCALE_COUNTER0
;
}
return
-
EAGAIN
;
}
}
static
void
xscale1pmu_start
(
void
)
{
unsigned
long
flags
,
val
;
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
xscale1pmu_read_pmnc
();
val
|=
XSCALE_PMU_ENABLE
;
xscale1pmu_write_pmnc
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
void
xscale1pmu_stop
(
void
)
{
unsigned
long
flags
,
val
;
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
xscale1pmu_read_pmnc
();
val
&=
~
XSCALE_PMU_ENABLE
;
xscale1pmu_write_pmnc
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
inline
u32
xscale1pmu_read_counter
(
int
counter
)
{
u32
val
=
0
;
switch
(
counter
)
{
case
XSCALE_CYCLE_COUNTER
:
asm
volatile
(
"mrc p14, 0, %0, c1, c0, 0"
:
"=r"
(
val
));
break
;
case
XSCALE_COUNTER0
:
asm
volatile
(
"mrc p14, 0, %0, c2, c0, 0"
:
"=r"
(
val
));
break
;
case
XSCALE_COUNTER1
:
asm
volatile
(
"mrc p14, 0, %0, c3, c0, 0"
:
"=r"
(
val
));
break
;
}
return
val
;
}
static
inline
void
xscale1pmu_write_counter
(
int
counter
,
u32
val
)
{
switch
(
counter
)
{
case
XSCALE_CYCLE_COUNTER
:
asm
volatile
(
"mcr p14, 0, %0, c1, c0, 0"
:
:
"r"
(
val
));
break
;
case
XSCALE_COUNTER0
:
asm
volatile
(
"mcr p14, 0, %0, c2, c0, 0"
:
:
"r"
(
val
));
break
;
case
XSCALE_COUNTER1
:
asm
volatile
(
"mcr p14, 0, %0, c3, c0, 0"
:
:
"r"
(
val
));
break
;
}
}
static
const
struct
arm_pmu
xscale1pmu
=
{
.
id
=
ARM_PERF_PMU_ID_XSCALE1
,
.
handle_irq
=
xscale1pmu_handle_irq
,
.
enable
=
xscale1pmu_enable_event
,
.
disable
=
xscale1pmu_disable_event
,
.
event_map
=
xscalepmu_event_map
,
.
raw_event
=
xscalepmu_raw_event
,
.
read_counter
=
xscale1pmu_read_counter
,
.
write_counter
=
xscale1pmu_write_counter
,
.
get_event_idx
=
xscale1pmu_get_event_idx
,
.
start
=
xscale1pmu_start
,
.
stop
=
xscale1pmu_stop
,
.
num_events
=
3
,
.
max_period
=
(
1LLU
<<
32
)
-
1
,
};
#define XSCALE2_OVERFLOWED_MASK 0x01f
#define XSCALE2_CCOUNT_OVERFLOW 0x001
#define XSCALE2_COUNT0_OVERFLOW 0x002
#define XSCALE2_COUNT1_OVERFLOW 0x004
#define XSCALE2_COUNT2_OVERFLOW 0x008
#define XSCALE2_COUNT3_OVERFLOW 0x010
#define XSCALE2_CCOUNT_INT_EN 0x001
#define XSCALE2_COUNT0_INT_EN 0x002
#define XSCALE2_COUNT1_INT_EN 0x004
#define XSCALE2_COUNT2_INT_EN 0x008
#define XSCALE2_COUNT3_INT_EN 0x010
#define XSCALE2_COUNT0_EVT_SHFT 0
#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
#define XSCALE2_COUNT1_EVT_SHFT 8
#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
#define XSCALE2_COUNT2_EVT_SHFT 16
#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
#define XSCALE2_COUNT3_EVT_SHFT 24
#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
static
inline
u32
xscale2pmu_read_pmnc
(
void
)
{
u32
val
;
asm
volatile
(
"mrc p14, 0, %0, c0, c1, 0"
:
"=r"
(
val
));
/* bits 1-2 and 4-23 are read-unpredictable */
return
val
&
0xff000009
;
}
static
inline
void
xscale2pmu_write_pmnc
(
u32
val
)
{
/* bits 4-23 are write-as-0, 24-31 are write ignored */
val
&=
0xf
;
asm
volatile
(
"mcr p14, 0, %0, c0, c1, 0"
:
:
"r"
(
val
));
}
static
inline
u32
xscale2pmu_read_overflow_flags
(
void
)
{
u32
val
;
asm
volatile
(
"mrc p14, 0, %0, c5, c1, 0"
:
"=r"
(
val
));
return
val
;
}
static
inline
void
xscale2pmu_write_overflow_flags
(
u32
val
)
{
asm
volatile
(
"mcr p14, 0, %0, c5, c1, 0"
:
:
"r"
(
val
));
}
static
inline
u32
xscale2pmu_read_event_select
(
void
)
{
u32
val
;
asm
volatile
(
"mrc p14, 0, %0, c8, c1, 0"
:
"=r"
(
val
));
return
val
;
}
static
inline
void
xscale2pmu_write_event_select
(
u32
val
)
{
asm
volatile
(
"mcr p14, 0, %0, c8, c1, 0"
:
:
"r"
(
val
));
}
static
inline
u32
xscale2pmu_read_int_enable
(
void
)
{
u32
val
;
asm
volatile
(
"mrc p14, 0, %0, c4, c1, 0"
:
"=r"
(
val
));
return
val
;
}
static
void
xscale2pmu_write_int_enable
(
u32
val
)
{
asm
volatile
(
"mcr p14, 0, %0, c4, c1, 0"
:
:
"r"
(
val
));
}
static
inline
int
xscale2_pmnc_counter_has_overflowed
(
unsigned
long
of_flags
,
enum
xscale_counters
counter
)
{
int
ret
=
0
;
switch
(
counter
)
{
case
XSCALE_CYCLE_COUNTER
:
ret
=
of_flags
&
XSCALE2_CCOUNT_OVERFLOW
;
break
;
case
XSCALE_COUNTER0
:
ret
=
of_flags
&
XSCALE2_COUNT0_OVERFLOW
;
break
;
case
XSCALE_COUNTER1
:
ret
=
of_flags
&
XSCALE2_COUNT1_OVERFLOW
;
break
;
case
XSCALE_COUNTER2
:
ret
=
of_flags
&
XSCALE2_COUNT2_OVERFLOW
;
break
;
case
XSCALE_COUNTER3
:
ret
=
of_flags
&
XSCALE2_COUNT3_OVERFLOW
;
break
;
default:
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
counter
);
}
return
ret
;
}
static
irqreturn_t
xscale2pmu_handle_irq
(
int
irq_num
,
void
*
dev
)
{
unsigned
long
pmnc
,
of_flags
;
struct
perf_sample_data
data
;
struct
cpu_hw_events
*
cpuc
;
struct
pt_regs
*
regs
;
int
idx
;
/* Disable the PMU. */
pmnc
=
xscale2pmu_read_pmnc
();
xscale2pmu_write_pmnc
(
pmnc
&
~
XSCALE_PMU_ENABLE
);
/* Check the overflow flag register. */
of_flags
=
xscale2pmu_read_overflow_flags
();
if
(
!
(
of_flags
&
XSCALE2_OVERFLOWED_MASK
))
return
IRQ_NONE
;
/* Clear the overflow bits. */
xscale2pmu_write_overflow_flags
(
of_flags
);
regs
=
get_irq_regs
();
perf_sample_data_init
(
&
data
,
0
);
cpuc
=
&
__get_cpu_var
(
cpu_hw_events
);
for
(
idx
=
0
;
idx
<=
armpmu
->
num_events
;
++
idx
)
{
struct
perf_event
*
event
=
cpuc
->
events
[
idx
];
struct
hw_perf_event
*
hwc
;
if
(
!
test_bit
(
idx
,
cpuc
->
active_mask
))
continue
;
if
(
!
xscale2_pmnc_counter_has_overflowed
(
pmnc
,
idx
))
continue
;
hwc
=
&
event
->
hw
;
armpmu_event_update
(
event
,
hwc
,
idx
);
data
.
period
=
event
->
hw
.
last_period
;
if
(
!
armpmu_event_set_period
(
event
,
hwc
,
idx
))
continue
;
if
(
perf_event_overflow
(
event
,
0
,
&
data
,
regs
))
armpmu
->
disable
(
hwc
,
idx
);
}
irq_work_run
();
/*
* Re-enable the PMU.
*/
pmnc
=
xscale2pmu_read_pmnc
()
|
XSCALE_PMU_ENABLE
;
xscale2pmu_write_pmnc
(
pmnc
);
return
IRQ_HANDLED
;
}
static
void
xscale2pmu_enable_event
(
struct
hw_perf_event
*
hwc
,
int
idx
)
{
unsigned
long
flags
,
ien
,
evtsel
;
ien
=
xscale2pmu_read_int_enable
();
evtsel
=
xscale2pmu_read_event_select
();
switch
(
idx
)
{
case
XSCALE_CYCLE_COUNTER
:
ien
|=
XSCALE2_CCOUNT_INT_EN
;
break
;
case
XSCALE_COUNTER0
:
ien
|=
XSCALE2_COUNT0_INT_EN
;
evtsel
&=
~
XSCALE2_COUNT0_EVT_MASK
;
evtsel
|=
hwc
->
config_base
<<
XSCALE2_COUNT0_EVT_SHFT
;
break
;
case
XSCALE_COUNTER1
:
ien
|=
XSCALE2_COUNT1_INT_EN
;
evtsel
&=
~
XSCALE2_COUNT1_EVT_MASK
;
evtsel
|=
hwc
->
config_base
<<
XSCALE2_COUNT1_EVT_SHFT
;
break
;
case
XSCALE_COUNTER2
:
ien
|=
XSCALE2_COUNT2_INT_EN
;
evtsel
&=
~
XSCALE2_COUNT2_EVT_MASK
;
evtsel
|=
hwc
->
config_base
<<
XSCALE2_COUNT2_EVT_SHFT
;
break
;
case
XSCALE_COUNTER3
:
ien
|=
XSCALE2_COUNT3_INT_EN
;
evtsel
&=
~
XSCALE2_COUNT3_EVT_MASK
;
evtsel
|=
hwc
->
config_base
<<
XSCALE2_COUNT3_EVT_SHFT
;
break
;
default:
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
idx
);
return
;
}
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
xscale2pmu_write_event_select
(
evtsel
);
xscale2pmu_write_int_enable
(
ien
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
void
xscale2pmu_disable_event
(
struct
hw_perf_event
*
hwc
,
int
idx
)
{
unsigned
long
flags
,
ien
,
evtsel
;
ien
=
xscale2pmu_read_int_enable
();
evtsel
=
xscale2pmu_read_event_select
();
switch
(
idx
)
{
case
XSCALE_CYCLE_COUNTER
:
ien
&=
~
XSCALE2_CCOUNT_INT_EN
;
break
;
case
XSCALE_COUNTER0
:
ien
&=
~
XSCALE2_COUNT0_INT_EN
;
evtsel
&=
~
XSCALE2_COUNT0_EVT_MASK
;
evtsel
|=
XSCALE_PERFCTR_UNUSED
<<
XSCALE2_COUNT0_EVT_SHFT
;
break
;
case
XSCALE_COUNTER1
:
ien
&=
~
XSCALE2_COUNT1_INT_EN
;
evtsel
&=
~
XSCALE2_COUNT1_EVT_MASK
;
evtsel
|=
XSCALE_PERFCTR_UNUSED
<<
XSCALE2_COUNT1_EVT_SHFT
;
break
;
case
XSCALE_COUNTER2
:
ien
&=
~
XSCALE2_COUNT2_INT_EN
;
evtsel
&=
~
XSCALE2_COUNT2_EVT_MASK
;
evtsel
|=
XSCALE_PERFCTR_UNUSED
<<
XSCALE2_COUNT2_EVT_SHFT
;
break
;
case
XSCALE_COUNTER3
:
ien
&=
~
XSCALE2_COUNT3_INT_EN
;
evtsel
&=
~
XSCALE2_COUNT3_EVT_MASK
;
evtsel
|=
XSCALE_PERFCTR_UNUSED
<<
XSCALE2_COUNT3_EVT_SHFT
;
break
;
default:
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
idx
);
return
;
}
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
xscale2pmu_write_event_select
(
evtsel
);
xscale2pmu_write_int_enable
(
ien
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
int
xscale2pmu_get_event_idx
(
struct
cpu_hw_events
*
cpuc
,
struct
hw_perf_event
*
event
)
{
int
idx
=
xscale1pmu_get_event_idx
(
cpuc
,
event
);
if
(
idx
>=
0
)
goto
out
;
if
(
!
test_and_set_bit
(
XSCALE_COUNTER3
,
cpuc
->
used_mask
))
idx
=
XSCALE_COUNTER3
;
else
if
(
!
test_and_set_bit
(
XSCALE_COUNTER2
,
cpuc
->
used_mask
))
idx
=
XSCALE_COUNTER2
;
out:
return
idx
;
}
static
void
xscale2pmu_start
(
void
)
{
unsigned
long
flags
,
val
;
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
xscale2pmu_read_pmnc
()
&
~
XSCALE_PMU_CNT64
;
val
|=
XSCALE_PMU_ENABLE
;
xscale2pmu_write_pmnc
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
void
xscale2pmu_stop
(
void
)
{
unsigned
long
flags
,
val
;
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
xscale2pmu_read_pmnc
();
val
&=
~
XSCALE_PMU_ENABLE
;
xscale2pmu_write_pmnc
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
inline
u32
xscale2pmu_read_counter
(
int
counter
)
{
u32
val
=
0
;
switch
(
counter
)
{
case
XSCALE_CYCLE_COUNTER
:
asm
volatile
(
"mrc p14, 0, %0, c1, c1, 0"
:
"=r"
(
val
));
break
;
case
XSCALE_COUNTER0
:
asm
volatile
(
"mrc p14, 0, %0, c0, c2, 0"
:
"=r"
(
val
));
break
;
case
XSCALE_COUNTER1
:
asm
volatile
(
"mrc p14, 0, %0, c1, c2, 0"
:
"=r"
(
val
));
break
;
case
XSCALE_COUNTER2
:
asm
volatile
(
"mrc p14, 0, %0, c2, c2, 0"
:
"=r"
(
val
));
break
;
case
XSCALE_COUNTER3
:
asm
volatile
(
"mrc p14, 0, %0, c3, c2, 0"
:
"=r"
(
val
));
break
;
}
return
val
;
}
static
inline
void
xscale2pmu_write_counter
(
int
counter
,
u32
val
)
{
switch
(
counter
)
{
case
XSCALE_CYCLE_COUNTER
:
asm
volatile
(
"mcr p14, 0, %0, c1, c1, 0"
:
:
"r"
(
val
));
break
;
case
XSCALE_COUNTER0
:
asm
volatile
(
"mcr p14, 0, %0, c0, c2, 0"
:
:
"r"
(
val
));
break
;
case
XSCALE_COUNTER1
:
asm
volatile
(
"mcr p14, 0, %0, c1, c2, 0"
:
:
"r"
(
val
));
break
;
case
XSCALE_COUNTER2
:
asm
volatile
(
"mcr p14, 0, %0, c2, c2, 0"
:
:
"r"
(
val
));
break
;
case
XSCALE_COUNTER3
:
asm
volatile
(
"mcr p14, 0, %0, c3, c2, 0"
:
:
"r"
(
val
));
break
;
}
}
static
const
struct
arm_pmu
xscale2pmu
=
{
.
id
=
ARM_PERF_PMU_ID_XSCALE2
,
.
handle_irq
=
xscale2pmu_handle_irq
,
.
enable
=
xscale2pmu_enable_event
,
.
disable
=
xscale2pmu_disable_event
,
.
event_map
=
xscalepmu_event_map
,
.
raw_event
=
xscalepmu_raw_event
,
.
read_counter
=
xscale2pmu_read_counter
,
.
write_counter
=
xscale2pmu_write_counter
,
.
get_event_idx
=
xscale2pmu_get_event_idx
,
.
start
=
xscale2pmu_start
,
.
stop
=
xscale2pmu_stop
,
.
num_events
=
5
,
.
max_period
=
(
1LLU
<<
32
)
-
1
,
};
static
int
__init
static
int
__init
init_hw_perf_events
(
void
)
init_hw_perf_events
(
void
)
...
@@ -2977,37 +622,16 @@ init_hw_perf_events(void)
...
@@ -2977,37 +622,16 @@ init_hw_perf_events(void)
case
0xB360
:
/* ARM1136 */
case
0xB360
:
/* ARM1136 */
case
0xB560
:
/* ARM1156 */
case
0xB560
:
/* ARM1156 */
case
0xB760
:
/* ARM1176 */
case
0xB760
:
/* ARM1176 */
armpmu
=
&
armv6pmu
;
armpmu
=
armv6pmu_init
();
memcpy
(
armpmu_perf_cache_map
,
armv6_perf_cache_map
,
sizeof
(
armv6_perf_cache_map
));
break
;
break
;
case
0xB020
:
/* ARM11mpcore */
case
0xB020
:
/* ARM11mpcore */
armpmu
=
&
armv6mpcore_pmu
;
armpmu
=
armv6mpcore_pmu_init
();
memcpy
(
armpmu_perf_cache_map
,
armv6mpcore_perf_cache_map
,
sizeof
(
armv6mpcore_perf_cache_map
));
break
;
break
;
case
0xC080
:
/* Cortex-A8 */
case
0xC080
:
/* Cortex-A8 */
armv7pmu
.
id
=
ARM_PERF_PMU_ID_CA8
;
armpmu
=
armv7_a8_pmu_init
();
memcpy
(
armpmu_perf_cache_map
,
armv7_a8_perf_cache_map
,
sizeof
(
armv7_a8_perf_cache_map
));
armv7pmu
.
event_map
=
armv7_a8_pmu_event_map
;
armpmu
=
&
armv7pmu
;
/* Reset PMNC and read the nb of CNTx counters
supported */
armv7pmu
.
num_events
=
armv7_reset_read_pmnc
();
break
;
break
;
case
0xC090
:
/* Cortex-A9 */
case
0xC090
:
/* Cortex-A9 */
armv7pmu
.
id
=
ARM_PERF_PMU_ID_CA9
;
armpmu
=
armv7_a9_pmu_init
();
memcpy
(
armpmu_perf_cache_map
,
armv7_a9_perf_cache_map
,
sizeof
(
armv7_a9_perf_cache_map
));
armv7pmu
.
event_map
=
armv7_a9_pmu_event_map
;
armpmu
=
&
armv7pmu
;
/* Reset PMNC and read the nb of CNTx counters
supported */
armv7pmu
.
num_events
=
armv7_reset_read_pmnc
();
break
;
break
;
}
}
/* Intel CPUs [xscale]. */
/* Intel CPUs [xscale]. */
...
@@ -3015,21 +639,17 @@ init_hw_perf_events(void)
...
@@ -3015,21 +639,17 @@ init_hw_perf_events(void)
part_number
=
(
cpuid
>>
13
)
&
0x7
;
part_number
=
(
cpuid
>>
13
)
&
0x7
;
switch
(
part_number
)
{
switch
(
part_number
)
{
case
1
:
case
1
:
armpmu
=
&
xscale1pmu
;
armpmu
=
xscale1pmu_init
();
memcpy
(
armpmu_perf_cache_map
,
xscale_perf_cache_map
,
sizeof
(
xscale_perf_cache_map
));
break
;
break
;
case
2
:
case
2
:
armpmu
=
&
xscale2pmu
;
armpmu
=
xscale2pmu_init
();
memcpy
(
armpmu_perf_cache_map
,
xscale_perf_cache_map
,
sizeof
(
xscale_perf_cache_map
));
break
;
break
;
}
}
}
}
if
(
armpmu
)
{
if
(
armpmu
)
{
pr_info
(
"enabled with %s PMU driver, %d counters available
\n
"
,
pr_info
(
"enabled with %s PMU driver, %d counters available
\n
"
,
arm_pmu_names
[
armpmu
->
id
]
,
armpmu
->
num_events
);
armpmu
->
name
,
armpmu
->
num_events
);
}
else
{
}
else
{
pr_info
(
"no hardware support available
\n
"
);
pr_info
(
"no hardware support available
\n
"
);
}
}
...
...
arch/arm/kernel/perf_event_v6.c
0 → 100644
浏览文件 @
f1690d17
/*
* ARMv6 Performance counter handling code.
*
* Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
*
* ARMv6 has 2 configurable performance counters and a single cycle counter.
* They all share a single reset bit but can be written to zero so we can use
* that for a reset.
*
* The counters can't be individually enabled or disabled so when we remove
* one event and replace it with another we could get spurious counts from the
* wrong event. However, we can take advantage of the fact that the
* performance counters can export events to the event bus, and the event bus
* itself can be monitored. This requires that we *don't* export the events to
* the event bus. The procedure for disabling a configurable counter is:
* - change the counter to count the ETMEXTOUT[0] signal (0x20). This
* effectively stops the counter from counting.
* - disable the counter's interrupt generation (each counter has it's
* own interrupt enable bit).
* Once stopped, the counter value can be written as 0 to reset.
*
* To enable a counter:
* - enable the counter's interrupt generation.
* - set the new event type.
*
* Note: the dedicated cycle counter only counts cycles and can't be
* enabled/disabled independently of the others. When we want to disable the
* cycle counter, we have to just disable the interrupt reporting and start
* ignoring that counter. When re-enabling, we have to reset the value and
* enable the interrupt.
*/
#ifdef CONFIG_CPU_V6
enum
armv6_perf_types
{
ARMV6_PERFCTR_ICACHE_MISS
=
0x0
,
ARMV6_PERFCTR_IBUF_STALL
=
0x1
,
ARMV6_PERFCTR_DDEP_STALL
=
0x2
,
ARMV6_PERFCTR_ITLB_MISS
=
0x3
,
ARMV6_PERFCTR_DTLB_MISS
=
0x4
,
ARMV6_PERFCTR_BR_EXEC
=
0x5
,
ARMV6_PERFCTR_BR_MISPREDICT
=
0x6
,
ARMV6_PERFCTR_INSTR_EXEC
=
0x7
,
ARMV6_PERFCTR_DCACHE_HIT
=
0x9
,
ARMV6_PERFCTR_DCACHE_ACCESS
=
0xA
,
ARMV6_PERFCTR_DCACHE_MISS
=
0xB
,
ARMV6_PERFCTR_DCACHE_WBACK
=
0xC
,
ARMV6_PERFCTR_SW_PC_CHANGE
=
0xD
,
ARMV6_PERFCTR_MAIN_TLB_MISS
=
0xF
,
ARMV6_PERFCTR_EXPL_D_ACCESS
=
0x10
,
ARMV6_PERFCTR_LSU_FULL_STALL
=
0x11
,
ARMV6_PERFCTR_WBUF_DRAINED
=
0x12
,
ARMV6_PERFCTR_CPU_CYCLES
=
0xFF
,
ARMV6_PERFCTR_NOP
=
0x20
,
};
enum
armv6_counters
{
ARMV6_CYCLE_COUNTER
=
1
,
ARMV6_COUNTER0
,
ARMV6_COUNTER1
,
};
/*
* The hardware events that we support. We do support cache operations but
* we have harvard caches and no way to combine instruction and data
* accesses/misses in hardware.
*/
static
const
unsigned
armv6_perf_map
[
PERF_COUNT_HW_MAX
]
=
{
[
PERF_COUNT_HW_CPU_CYCLES
]
=
ARMV6_PERFCTR_CPU_CYCLES
,
[
PERF_COUNT_HW_INSTRUCTIONS
]
=
ARMV6_PERFCTR_INSTR_EXEC
,
[
PERF_COUNT_HW_CACHE_REFERENCES
]
=
HW_OP_UNSUPPORTED
,
[
PERF_COUNT_HW_CACHE_MISSES
]
=
HW_OP_UNSUPPORTED
,
[
PERF_COUNT_HW_BRANCH_INSTRUCTIONS
]
=
ARMV6_PERFCTR_BR_EXEC
,
[
PERF_COUNT_HW_BRANCH_MISSES
]
=
ARMV6_PERFCTR_BR_MISPREDICT
,
[
PERF_COUNT_HW_BUS_CYCLES
]
=
HW_OP_UNSUPPORTED
,
};
static
const
unsigned
armv6_perf_cache_map
[
PERF_COUNT_HW_CACHE_MAX
]
[
PERF_COUNT_HW_CACHE_OP_MAX
]
[
PERF_COUNT_HW_CACHE_RESULT_MAX
]
=
{
[
C
(
L1D
)]
=
{
/*
* The performance counters don't differentiate between read
* and write accesses/misses so this isn't strictly correct,
* but it's the best we can do. Writes and reads get
* combined.
*/
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV6_PERFCTR_DCACHE_ACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV6_PERFCTR_DCACHE_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV6_PERFCTR_DCACHE_ACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV6_PERFCTR_DCACHE_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
L1I
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6_PERFCTR_ICACHE_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6_PERFCTR_ICACHE_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
LL
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
DTLB
)]
=
{
/*
* The ARM performance counters can count micro DTLB misses,
* micro ITLB misses and main TLB misses. There isn't an event
* for TLB misses, so use the micro misses here and if users
* want the main TLB misses they can use a raw counter.
*/
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6_PERFCTR_DTLB_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6_PERFCTR_DTLB_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
ITLB
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
BPU
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
};
enum
armv6mpcore_perf_types
{
ARMV6MPCORE_PERFCTR_ICACHE_MISS
=
0x0
,
ARMV6MPCORE_PERFCTR_IBUF_STALL
=
0x1
,
ARMV6MPCORE_PERFCTR_DDEP_STALL
=
0x2
,
ARMV6MPCORE_PERFCTR_ITLB_MISS
=
0x3
,
ARMV6MPCORE_PERFCTR_DTLB_MISS
=
0x4
,
ARMV6MPCORE_PERFCTR_BR_EXEC
=
0x5
,
ARMV6MPCORE_PERFCTR_BR_NOTPREDICT
=
0x6
,
ARMV6MPCORE_PERFCTR_BR_MISPREDICT
=
0x7
,
ARMV6MPCORE_PERFCTR_INSTR_EXEC
=
0x8
,
ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS
=
0xA
,
ARMV6MPCORE_PERFCTR_DCACHE_RDMISS
=
0xB
,
ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS
=
0xC
,
ARMV6MPCORE_PERFCTR_DCACHE_WRMISS
=
0xD
,
ARMV6MPCORE_PERFCTR_DCACHE_EVICTION
=
0xE
,
ARMV6MPCORE_PERFCTR_SW_PC_CHANGE
=
0xF
,
ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS
=
0x10
,
ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS
=
0x11
,
ARMV6MPCORE_PERFCTR_LSU_FULL_STALL
=
0x12
,
ARMV6MPCORE_PERFCTR_WBUF_DRAINED
=
0x13
,
ARMV6MPCORE_PERFCTR_CPU_CYCLES
=
0xFF
,
};
/*
* The hardware events that we support. We do support cache operations but
* we have harvard caches and no way to combine instruction and data
* accesses/misses in hardware.
*/
static
const
unsigned
armv6mpcore_perf_map
[
PERF_COUNT_HW_MAX
]
=
{
[
PERF_COUNT_HW_CPU_CYCLES
]
=
ARMV6MPCORE_PERFCTR_CPU_CYCLES
,
[
PERF_COUNT_HW_INSTRUCTIONS
]
=
ARMV6MPCORE_PERFCTR_INSTR_EXEC
,
[
PERF_COUNT_HW_CACHE_REFERENCES
]
=
HW_OP_UNSUPPORTED
,
[
PERF_COUNT_HW_CACHE_MISSES
]
=
HW_OP_UNSUPPORTED
,
[
PERF_COUNT_HW_BRANCH_INSTRUCTIONS
]
=
ARMV6MPCORE_PERFCTR_BR_EXEC
,
[
PERF_COUNT_HW_BRANCH_MISSES
]
=
ARMV6MPCORE_PERFCTR_BR_MISPREDICT
,
[
PERF_COUNT_HW_BUS_CYCLES
]
=
HW_OP_UNSUPPORTED
,
};
static
const
unsigned
armv6mpcore_perf_cache_map
[
PERF_COUNT_HW_CACHE_MAX
]
[
PERF_COUNT_HW_CACHE_OP_MAX
]
[
PERF_COUNT_HW_CACHE_RESULT_MAX
]
=
{
[
C
(
L1D
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV6MPCORE_PERFCTR_DCACHE_RDMISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV6MPCORE_PERFCTR_DCACHE_WRMISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
L1I
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6MPCORE_PERFCTR_ICACHE_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6MPCORE_PERFCTR_ICACHE_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
LL
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
DTLB
)]
=
{
/*
* The ARM performance counters can count micro DTLB misses,
* micro ITLB misses and main TLB misses. There isn't an event
* for TLB misses, so use the micro misses here and if users
* want the main TLB misses they can use a raw counter.
*/
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6MPCORE_PERFCTR_DTLB_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6MPCORE_PERFCTR_DTLB_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
ITLB
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6MPCORE_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV6MPCORE_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
BPU
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
};
static
inline
unsigned
long
armv6_pmcr_read
(
void
)
{
u32
val
;
asm
volatile
(
"mrc p15, 0, %0, c15, c12, 0"
:
"=r"
(
val
));
return
val
;
}
static
inline
void
armv6_pmcr_write
(
unsigned
long
val
)
{
asm
volatile
(
"mcr p15, 0, %0, c15, c12, 0"
:
:
"r"
(
val
));
}
#define ARMV6_PMCR_ENABLE (1 << 0)
#define ARMV6_PMCR_CTR01_RESET (1 << 1)
#define ARMV6_PMCR_CCOUNT_RESET (1 << 2)
#define ARMV6_PMCR_CCOUNT_DIV (1 << 3)
#define ARMV6_PMCR_COUNT0_IEN (1 << 4)
#define ARMV6_PMCR_COUNT1_IEN (1 << 5)
#define ARMV6_PMCR_CCOUNT_IEN (1 << 6)
#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8)
#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9)
#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10)
#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20
#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12
#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
#define ARMV6_PMCR_OVERFLOWED_MASK \
(ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
ARMV6_PMCR_CCOUNT_OVERFLOW)
static
inline
int
armv6_pmcr_has_overflowed
(
unsigned
long
pmcr
)
{
return
pmcr
&
ARMV6_PMCR_OVERFLOWED_MASK
;
}
static
inline
int
armv6_pmcr_counter_has_overflowed
(
unsigned
long
pmcr
,
enum
armv6_counters
counter
)
{
int
ret
=
0
;
if
(
ARMV6_CYCLE_COUNTER
==
counter
)
ret
=
pmcr
&
ARMV6_PMCR_CCOUNT_OVERFLOW
;
else
if
(
ARMV6_COUNTER0
==
counter
)
ret
=
pmcr
&
ARMV6_PMCR_COUNT0_OVERFLOW
;
else
if
(
ARMV6_COUNTER1
==
counter
)
ret
=
pmcr
&
ARMV6_PMCR_COUNT1_OVERFLOW
;
else
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
counter
);
return
ret
;
}
static
inline
u32
armv6pmu_read_counter
(
int
counter
)
{
unsigned
long
value
=
0
;
if
(
ARMV6_CYCLE_COUNTER
==
counter
)
asm
volatile
(
"mrc p15, 0, %0, c15, c12, 1"
:
"=r"
(
value
));
else
if
(
ARMV6_COUNTER0
==
counter
)
asm
volatile
(
"mrc p15, 0, %0, c15, c12, 2"
:
"=r"
(
value
));
else
if
(
ARMV6_COUNTER1
==
counter
)
asm
volatile
(
"mrc p15, 0, %0, c15, c12, 3"
:
"=r"
(
value
));
else
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
counter
);
return
value
;
}
static
inline
void
armv6pmu_write_counter
(
int
counter
,
u32
value
)
{
if
(
ARMV6_CYCLE_COUNTER
==
counter
)
asm
volatile
(
"mcr p15, 0, %0, c15, c12, 1"
:
:
"r"
(
value
));
else
if
(
ARMV6_COUNTER0
==
counter
)
asm
volatile
(
"mcr p15, 0, %0, c15, c12, 2"
:
:
"r"
(
value
));
else
if
(
ARMV6_COUNTER1
==
counter
)
asm
volatile
(
"mcr p15, 0, %0, c15, c12, 3"
:
:
"r"
(
value
));
else
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
counter
);
}
void
armv6pmu_enable_event
(
struct
hw_perf_event
*
hwc
,
int
idx
)
{
unsigned
long
val
,
mask
,
evt
,
flags
;
if
(
ARMV6_CYCLE_COUNTER
==
idx
)
{
mask
=
0
;
evt
=
ARMV6_PMCR_CCOUNT_IEN
;
}
else
if
(
ARMV6_COUNTER0
==
idx
)
{
mask
=
ARMV6_PMCR_EVT_COUNT0_MASK
;
evt
=
(
hwc
->
config_base
<<
ARMV6_PMCR_EVT_COUNT0_SHIFT
)
|
ARMV6_PMCR_COUNT0_IEN
;
}
else
if
(
ARMV6_COUNTER1
==
idx
)
{
mask
=
ARMV6_PMCR_EVT_COUNT1_MASK
;
evt
=
(
hwc
->
config_base
<<
ARMV6_PMCR_EVT_COUNT1_SHIFT
)
|
ARMV6_PMCR_COUNT1_IEN
;
}
else
{
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
idx
);
return
;
}
/*
* Mask out the current event and set the counter to count the event
* that we're interested in.
*/
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
armv6_pmcr_read
();
val
&=
~
mask
;
val
|=
evt
;
armv6_pmcr_write
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
irqreturn_t
armv6pmu_handle_irq
(
int
irq_num
,
void
*
dev
)
{
unsigned
long
pmcr
=
armv6_pmcr_read
();
struct
perf_sample_data
data
;
struct
cpu_hw_events
*
cpuc
;
struct
pt_regs
*
regs
;
int
idx
;
if
(
!
armv6_pmcr_has_overflowed
(
pmcr
))
return
IRQ_NONE
;
regs
=
get_irq_regs
();
/*
* The interrupts are cleared by writing the overflow flags back to
* the control register. All of the other bits don't have any effect
* if they are rewritten, so write the whole value back.
*/
armv6_pmcr_write
(
pmcr
);
perf_sample_data_init
(
&
data
,
0
);
cpuc
=
&
__get_cpu_var
(
cpu_hw_events
);
for
(
idx
=
0
;
idx
<=
armpmu
->
num_events
;
++
idx
)
{
struct
perf_event
*
event
=
cpuc
->
events
[
idx
];
struct
hw_perf_event
*
hwc
;
if
(
!
test_bit
(
idx
,
cpuc
->
active_mask
))
continue
;
/*
* We have a single interrupt for all counters. Check that
* each counter has overflowed before we process it.
*/
if
(
!
armv6_pmcr_counter_has_overflowed
(
pmcr
,
idx
))
continue
;
hwc
=
&
event
->
hw
;
armpmu_event_update
(
event
,
hwc
,
idx
);
data
.
period
=
event
->
hw
.
last_period
;
if
(
!
armpmu_event_set_period
(
event
,
hwc
,
idx
))
continue
;
if
(
perf_event_overflow
(
event
,
0
,
&
data
,
regs
))
armpmu
->
disable
(
hwc
,
idx
);
}
/*
* Handle the pending perf events.
*
* Note: this call *must* be run with interrupts disabled. For
* platforms that can have the PMU interrupts raised as an NMI, this
* will not work.
*/
irq_work_run
();
return
IRQ_HANDLED
;
}
static
void
armv6pmu_start
(
void
)
{
unsigned
long
flags
,
val
;
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
armv6_pmcr_read
();
val
|=
ARMV6_PMCR_ENABLE
;
armv6_pmcr_write
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
void
armv6pmu_stop
(
void
)
{
unsigned
long
flags
,
val
;
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
armv6_pmcr_read
();
val
&=
~
ARMV6_PMCR_ENABLE
;
armv6_pmcr_write
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
int
armv6pmu_get_event_idx
(
struct
cpu_hw_events
*
cpuc
,
struct
hw_perf_event
*
event
)
{
/* Always place a cycle counter into the cycle counter. */
if
(
ARMV6_PERFCTR_CPU_CYCLES
==
event
->
config_base
)
{
if
(
test_and_set_bit
(
ARMV6_CYCLE_COUNTER
,
cpuc
->
used_mask
))
return
-
EAGAIN
;
return
ARMV6_CYCLE_COUNTER
;
}
else
{
/*
* For anything other than a cycle counter, try and use
* counter0 and counter1.
*/
if
(
!
test_and_set_bit
(
ARMV6_COUNTER1
,
cpuc
->
used_mask
))
return
ARMV6_COUNTER1
;
if
(
!
test_and_set_bit
(
ARMV6_COUNTER0
,
cpuc
->
used_mask
))
return
ARMV6_COUNTER0
;
/* The counters are all in use. */
return
-
EAGAIN
;
}
}
static
void
armv6pmu_disable_event
(
struct
hw_perf_event
*
hwc
,
int
idx
)
{
unsigned
long
val
,
mask
,
evt
,
flags
;
if
(
ARMV6_CYCLE_COUNTER
==
idx
)
{
mask
=
ARMV6_PMCR_CCOUNT_IEN
;
evt
=
0
;
}
else
if
(
ARMV6_COUNTER0
==
idx
)
{
mask
=
ARMV6_PMCR_COUNT0_IEN
|
ARMV6_PMCR_EVT_COUNT0_MASK
;
evt
=
ARMV6_PERFCTR_NOP
<<
ARMV6_PMCR_EVT_COUNT0_SHIFT
;
}
else
if
(
ARMV6_COUNTER1
==
idx
)
{
mask
=
ARMV6_PMCR_COUNT1_IEN
|
ARMV6_PMCR_EVT_COUNT1_MASK
;
evt
=
ARMV6_PERFCTR_NOP
<<
ARMV6_PMCR_EVT_COUNT1_SHIFT
;
}
else
{
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
idx
);
return
;
}
/*
* Mask out the current event and set the counter to count the number
* of ETM bus signal assertion cycles. The external reporting should
* be disabled and so this should never increment.
*/
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
armv6_pmcr_read
();
val
&=
~
mask
;
val
|=
evt
;
armv6_pmcr_write
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
void
armv6mpcore_pmu_disable_event
(
struct
hw_perf_event
*
hwc
,
int
idx
)
{
unsigned
long
val
,
mask
,
flags
,
evt
=
0
;
if
(
ARMV6_CYCLE_COUNTER
==
idx
)
{
mask
=
ARMV6_PMCR_CCOUNT_IEN
;
}
else
if
(
ARMV6_COUNTER0
==
idx
)
{
mask
=
ARMV6_PMCR_COUNT0_IEN
;
}
else
if
(
ARMV6_COUNTER1
==
idx
)
{
mask
=
ARMV6_PMCR_COUNT1_IEN
;
}
else
{
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
idx
);
return
;
}
/*
* Unlike UP ARMv6, we don't have a way of stopping the counters. We
* simply disable the interrupt reporting.
*/
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
armv6_pmcr_read
();
val
&=
~
mask
;
val
|=
evt
;
armv6_pmcr_write
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
const
struct
arm_pmu
armv6pmu
=
{
.
id
=
ARM_PERF_PMU_ID_V6
,
.
name
=
"v6"
,
.
handle_irq
=
armv6pmu_handle_irq
,
.
enable
=
armv6pmu_enable_event
,
.
disable
=
armv6pmu_disable_event
,
.
read_counter
=
armv6pmu_read_counter
,
.
write_counter
=
armv6pmu_write_counter
,
.
get_event_idx
=
armv6pmu_get_event_idx
,
.
start
=
armv6pmu_start
,
.
stop
=
armv6pmu_stop
,
.
cache_map
=
&
armv6_perf_cache_map
,
.
event_map
=
&
armv6_perf_map
,
.
raw_event_mask
=
0xFF
,
.
num_events
=
3
,
.
max_period
=
(
1LLU
<<
32
)
-
1
,
};
const
struct
arm_pmu
*
__init
armv6pmu_init
(
void
)
{
return
&
armv6pmu
;
}
/*
* ARMv6mpcore is almost identical to single core ARMv6 with the exception
* that some of the events have different enumerations and that there is no
* *hack* to stop the programmable counters. To stop the counters we simply
* disable the interrupt reporting and update the event. When unthrottling we
* reset the period and enable the interrupt reporting.
*/
static
const
struct
arm_pmu
armv6mpcore_pmu
=
{
.
id
=
ARM_PERF_PMU_ID_V6MP
,
.
name
=
"v6mpcore"
,
.
handle_irq
=
armv6pmu_handle_irq
,
.
enable
=
armv6pmu_enable_event
,
.
disable
=
armv6mpcore_pmu_disable_event
,
.
read_counter
=
armv6pmu_read_counter
,
.
write_counter
=
armv6pmu_write_counter
,
.
get_event_idx
=
armv6pmu_get_event_idx
,
.
start
=
armv6pmu_start
,
.
stop
=
armv6pmu_stop
,
.
cache_map
=
&
armv6mpcore_perf_cache_map
,
.
event_map
=
&
armv6mpcore_perf_map
,
.
raw_event_mask
=
0xFF
,
.
num_events
=
3
,
.
max_period
=
(
1LLU
<<
32
)
-
1
,
};
const
struct
arm_pmu
*
__init
armv6mpcore_pmu_init
(
void
)
{
return
&
armv6mpcore_pmu
;
}
#else
const
struct
arm_pmu
*
__init
armv6pmu_init
(
void
)
{
return
NULL
;
}
const
struct
arm_pmu
*
__init
armv6mpcore_pmu_init
(
void
)
{
return
NULL
;
}
#endif
/* CONFIG_CPU_V6 */
arch/arm/kernel/perf_event_v7.c
0 → 100644
浏览文件 @
f1690d17
/*
* ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
*
* ARMv7 support: Jean Pihet <jpihet@mvista.com>
* 2010 (c) MontaVista Software, LLC.
*
* Copied from ARMv6 code, with the low level code inspired
* by the ARMv7 Oprofile code.
*
* Cortex-A8 has up to 4 configurable performance counters and
* a single cycle counter.
* Cortex-A9 has up to 31 configurable performance counters and
* a single cycle counter.
*
* All counters can be enabled/disabled and IRQ masked separately. The cycle
* counter and all 4 performance counters together can be reset separately.
*/
#ifdef CONFIG_CPU_V7
/* Common ARMv7 event types */
enum
armv7_perf_types
{
ARMV7_PERFCTR_PMNC_SW_INCR
=
0x00
,
ARMV7_PERFCTR_IFETCH_MISS
=
0x01
,
ARMV7_PERFCTR_ITLB_MISS
=
0x02
,
ARMV7_PERFCTR_DCACHE_REFILL
=
0x03
,
ARMV7_PERFCTR_DCACHE_ACCESS
=
0x04
,
ARMV7_PERFCTR_DTLB_REFILL
=
0x05
,
ARMV7_PERFCTR_DREAD
=
0x06
,
ARMV7_PERFCTR_DWRITE
=
0x07
,
ARMV7_PERFCTR_EXC_TAKEN
=
0x09
,
ARMV7_PERFCTR_EXC_EXECUTED
=
0x0A
,
ARMV7_PERFCTR_CID_WRITE
=
0x0B
,
/* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
* It counts:
* - all branch instructions,
* - instructions that explicitly write the PC,
* - exception generating instructions.
*/
ARMV7_PERFCTR_PC_WRITE
=
0x0C
,
ARMV7_PERFCTR_PC_IMM_BRANCH
=
0x0D
,
ARMV7_PERFCTR_UNALIGNED_ACCESS
=
0x0F
,
ARMV7_PERFCTR_PC_BRANCH_MIS_PRED
=
0x10
,
ARMV7_PERFCTR_CLOCK_CYCLES
=
0x11
,
ARMV7_PERFCTR_PC_BRANCH_MIS_USED
=
0x12
,
ARMV7_PERFCTR_CPU_CYCLES
=
0xFF
};
/* ARMv7 Cortex-A8 specific event types */
enum
armv7_a8_perf_types
{
ARMV7_PERFCTR_INSTR_EXECUTED
=
0x08
,
ARMV7_PERFCTR_PC_PROC_RETURN
=
0x0E
,
ARMV7_PERFCTR_WRITE_BUFFER_FULL
=
0x40
,
ARMV7_PERFCTR_L2_STORE_MERGED
=
0x41
,
ARMV7_PERFCTR_L2_STORE_BUFF
=
0x42
,
ARMV7_PERFCTR_L2_ACCESS
=
0x43
,
ARMV7_PERFCTR_L2_CACH_MISS
=
0x44
,
ARMV7_PERFCTR_AXI_READ_CYCLES
=
0x45
,
ARMV7_PERFCTR_AXI_WRITE_CYCLES
=
0x46
,
ARMV7_PERFCTR_MEMORY_REPLAY
=
0x47
,
ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY
=
0x48
,
ARMV7_PERFCTR_L1_DATA_MISS
=
0x49
,
ARMV7_PERFCTR_L1_INST_MISS
=
0x4A
,
ARMV7_PERFCTR_L1_DATA_COLORING
=
0x4B
,
ARMV7_PERFCTR_L1_NEON_DATA
=
0x4C
,
ARMV7_PERFCTR_L1_NEON_CACH_DATA
=
0x4D
,
ARMV7_PERFCTR_L2_NEON
=
0x4E
,
ARMV7_PERFCTR_L2_NEON_HIT
=
0x4F
,
ARMV7_PERFCTR_L1_INST
=
0x50
,
ARMV7_PERFCTR_PC_RETURN_MIS_PRED
=
0x51
,
ARMV7_PERFCTR_PC_BRANCH_FAILED
=
0x52
,
ARMV7_PERFCTR_PC_BRANCH_TAKEN
=
0x53
,
ARMV7_PERFCTR_PC_BRANCH_EXECUTED
=
0x54
,
ARMV7_PERFCTR_OP_EXECUTED
=
0x55
,
ARMV7_PERFCTR_CYCLES_INST_STALL
=
0x56
,
ARMV7_PERFCTR_CYCLES_INST
=
0x57
,
ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL
=
0x58
,
ARMV7_PERFCTR_CYCLES_NEON_INST_STALL
=
0x59
,
ARMV7_PERFCTR_NEON_CYCLES
=
0x5A
,
ARMV7_PERFCTR_PMU0_EVENTS
=
0x70
,
ARMV7_PERFCTR_PMU1_EVENTS
=
0x71
,
ARMV7_PERFCTR_PMU_EVENTS
=
0x72
,
};
/* ARMv7 Cortex-A9 specific event types */
enum
armv7_a9_perf_types
{
ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC
=
0x40
,
ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC
=
0x41
,
ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC
=
0x42
,
ARMV7_PERFCTR_COHERENT_LINE_MISS
=
0x50
,
ARMV7_PERFCTR_COHERENT_LINE_HIT
=
0x51
,
ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES
=
0x60
,
ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES
=
0x61
,
ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES
=
0x62
,
ARMV7_PERFCTR_STREX_EXECUTED_PASSED
=
0x63
,
ARMV7_PERFCTR_STREX_EXECUTED_FAILED
=
0x64
,
ARMV7_PERFCTR_DATA_EVICTION
=
0x65
,
ARMV7_PERFCTR_ISSUE_STAGE_NO_INST
=
0x66
,
ARMV7_PERFCTR_ISSUE_STAGE_EMPTY
=
0x67
,
ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE
=
0x68
,
ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS
=
0x6E
,
ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST
=
0x70
,
ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST
=
0x71
,
ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST
=
0x72
,
ARMV7_PERFCTR_FP_EXECUTED_INST
=
0x73
,
ARMV7_PERFCTR_NEON_EXECUTED_INST
=
0x74
,
ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES
=
0x80
,
ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES
=
0x81
,
ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES
=
0x82
,
ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES
=
0x83
,
ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES
=
0x84
,
ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES
=
0x85
,
ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES
=
0x86
,
ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES
=
0x8A
,
ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES
=
0x8B
,
ARMV7_PERFCTR_ISB_INST
=
0x90
,
ARMV7_PERFCTR_DSB_INST
=
0x91
,
ARMV7_PERFCTR_DMB_INST
=
0x92
,
ARMV7_PERFCTR_EXT_INTERRUPTS
=
0x93
,
ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED
=
0xA0
,
ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED
=
0xA1
,
ARMV7_PERFCTR_PLE_FIFO_FLUSH
=
0xA2
,
ARMV7_PERFCTR_PLE_RQST_COMPLETED
=
0xA3
,
ARMV7_PERFCTR_PLE_FIFO_OVERFLOW
=
0xA4
,
ARMV7_PERFCTR_PLE_RQST_PROG
=
0xA5
};
/*
* Cortex-A8 HW events mapping
*
* The hardware events that we support. We do support cache operations but
* we have harvard caches and no way to combine instruction and data
* accesses/misses in hardware.
*/
static
const
unsigned
armv7_a8_perf_map
[
PERF_COUNT_HW_MAX
]
=
{
[
PERF_COUNT_HW_CPU_CYCLES
]
=
ARMV7_PERFCTR_CPU_CYCLES
,
[
PERF_COUNT_HW_INSTRUCTIONS
]
=
ARMV7_PERFCTR_INSTR_EXECUTED
,
[
PERF_COUNT_HW_CACHE_REFERENCES
]
=
HW_OP_UNSUPPORTED
,
[
PERF_COUNT_HW_CACHE_MISSES
]
=
HW_OP_UNSUPPORTED
,
[
PERF_COUNT_HW_BRANCH_INSTRUCTIONS
]
=
ARMV7_PERFCTR_PC_WRITE
,
[
PERF_COUNT_HW_BRANCH_MISSES
]
=
ARMV7_PERFCTR_PC_BRANCH_MIS_PRED
,
[
PERF_COUNT_HW_BUS_CYCLES
]
=
ARMV7_PERFCTR_CLOCK_CYCLES
,
};
static
const
unsigned
armv7_a8_perf_cache_map
[
PERF_COUNT_HW_CACHE_MAX
]
[
PERF_COUNT_HW_CACHE_OP_MAX
]
[
PERF_COUNT_HW_CACHE_RESULT_MAX
]
=
{
[
C
(
L1D
)]
=
{
/*
* The performance counters don't differentiate between read
* and write accesses/misses so this isn't strictly correct,
* but it's the best we can do. Writes and reads get
* combined.
*/
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_DCACHE_ACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_DCACHE_REFILL
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_DCACHE_ACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_DCACHE_REFILL
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
L1I
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_L1_INST
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_L1_INST_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_L1_INST
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_L1_INST_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
LL
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_L2_ACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_L2_CACH_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_L2_ACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_L2_CACH_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
DTLB
)]
=
{
/*
* Only ITLB misses and DTLB refills are supported.
* If users want the DTLB refills misses a raw counter
* must be used.
*/
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_DTLB_REFILL
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_DTLB_REFILL
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
ITLB
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
BPU
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_PC_WRITE
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_PC_BRANCH_MIS_PRED
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_PC_WRITE
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_PC_BRANCH_MIS_PRED
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
};
/*
* Cortex-A9 HW events mapping
*/
static
const
unsigned
armv7_a9_perf_map
[
PERF_COUNT_HW_MAX
]
=
{
[
PERF_COUNT_HW_CPU_CYCLES
]
=
ARMV7_PERFCTR_CPU_CYCLES
,
[
PERF_COUNT_HW_INSTRUCTIONS
]
=
ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE
,
[
PERF_COUNT_HW_CACHE_REFERENCES
]
=
ARMV7_PERFCTR_COHERENT_LINE_HIT
,
[
PERF_COUNT_HW_CACHE_MISSES
]
=
ARMV7_PERFCTR_COHERENT_LINE_MISS
,
[
PERF_COUNT_HW_BRANCH_INSTRUCTIONS
]
=
ARMV7_PERFCTR_PC_WRITE
,
[
PERF_COUNT_HW_BRANCH_MISSES
]
=
ARMV7_PERFCTR_PC_BRANCH_MIS_PRED
,
[
PERF_COUNT_HW_BUS_CYCLES
]
=
ARMV7_PERFCTR_CLOCK_CYCLES
,
};
static
const
unsigned
armv7_a9_perf_cache_map
[
PERF_COUNT_HW_CACHE_MAX
]
[
PERF_COUNT_HW_CACHE_OP_MAX
]
[
PERF_COUNT_HW_CACHE_RESULT_MAX
]
=
{
[
C
(
L1D
)]
=
{
/*
* The performance counters don't differentiate between read
* and write accesses/misses so this isn't strictly correct,
* but it's the best we can do. Writes and reads get
* combined.
*/
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_DCACHE_ACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_DCACHE_REFILL
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_DCACHE_ACCESS
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_DCACHE_REFILL
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
L1I
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_IFETCH_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_IFETCH_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
LL
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
DTLB
)]
=
{
/*
* Only ITLB misses and DTLB refills are supported.
* If users want the DTLB refills misses a raw counter
* must be used.
*/
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_DTLB_REFILL
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_DTLB_REFILL
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
ITLB
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
BPU
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_PC_WRITE
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_PC_BRANCH_MIS_PRED
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
ARMV7_PERFCTR_PC_WRITE
,
[
C
(
RESULT_MISS
)]
=
ARMV7_PERFCTR_PC_BRANCH_MIS_PRED
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
};
/*
* Perf Events counters
*/
enum
armv7_counters
{
ARMV7_CYCLE_COUNTER
=
1
,
/* Cycle counter */
ARMV7_COUNTER0
=
2
,
/* First event counter */
};
/*
* The cycle counter is ARMV7_CYCLE_COUNTER.
* The first event counter is ARMV7_COUNTER0.
* The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
*/
#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1)
/*
* ARMv7 low level PMNC access
*/
/*
* Per-CPU PMNC: config reg
*/
#define ARMV7_PMNC_E (1 << 0)
/* Enable all counters */
#define ARMV7_PMNC_P (1 << 1)
/* Reset all counters */
#define ARMV7_PMNC_C (1 << 2)
/* Cycle counter reset */
#define ARMV7_PMNC_D (1 << 3)
/* CCNT counts every 64th cpu cycle */
#define ARMV7_PMNC_X (1 << 4)
/* Export to ETM */
#define ARMV7_PMNC_DP (1 << 5)
/* Disable CCNT if non-invasive debug*/
#define ARMV7_PMNC_N_SHIFT 11
/* Number of counters supported */
#define ARMV7_PMNC_N_MASK 0x1f
#define ARMV7_PMNC_MASK 0x3f
/* Mask for writable bits */
/*
* Available counters
*/
#define ARMV7_CNT0 0
/* First event counter */
#define ARMV7_CCNT 31
/* Cycle counter */
/* Perf Event to low level counters mapping */
#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0)
/*
* CNTENS: counters enable reg
*/
#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_CNTENS_C (1 << ARMV7_CCNT)
/*
* CNTENC: counters disable reg
*/
#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_CNTENC_C (1 << ARMV7_CCNT)
/*
* INTENS: counters overflow interrupt enable reg
*/
#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_INTENS_C (1 << ARMV7_CCNT)
/*
* INTENC: counters overflow interrupt disable reg
*/
#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_INTENC_C (1 << ARMV7_CCNT)
/*
* EVTSEL: Event selection reg
*/
#define ARMV7_EVTSEL_MASK 0xff
/* Mask for writable bits */
/*
* SELECT: Counter selection reg
*/
#define ARMV7_SELECT_MASK 0x1f
/* Mask for writable bits */
/*
* FLAG: counters overflow flag status reg
*/
#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_FLAG_C (1 << ARMV7_CCNT)
#define ARMV7_FLAG_MASK 0xffffffff
/* Mask for writable bits */
#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK
static
inline
unsigned
long
armv7_pmnc_read
(
void
)
{
u32
val
;
asm
volatile
(
"mrc p15, 0, %0, c9, c12, 0"
:
"=r"
(
val
));
return
val
;
}
static
inline
void
armv7_pmnc_write
(
unsigned
long
val
)
{
val
&=
ARMV7_PMNC_MASK
;
asm
volatile
(
"mcr p15, 0, %0, c9, c12, 0"
:
:
"r"
(
val
));
}
static
inline
int
armv7_pmnc_has_overflowed
(
unsigned
long
pmnc
)
{
return
pmnc
&
ARMV7_OVERFLOWED_MASK
;
}
static
inline
int
armv7_pmnc_counter_has_overflowed
(
unsigned
long
pmnc
,
enum
armv7_counters
counter
)
{
int
ret
=
0
;
if
(
counter
==
ARMV7_CYCLE_COUNTER
)
ret
=
pmnc
&
ARMV7_FLAG_C
;
else
if
((
counter
>=
ARMV7_COUNTER0
)
&&
(
counter
<=
ARMV7_COUNTER_LAST
))
ret
=
pmnc
&
ARMV7_FLAG_P
(
counter
);
else
pr_err
(
"CPU%u checking wrong counter %d overflow status
\n
"
,
smp_processor_id
(),
counter
);
return
ret
;
}
static
inline
int
armv7_pmnc_select_counter
(
unsigned
int
idx
)
{
u32
val
;
if
((
idx
<
ARMV7_COUNTER0
)
||
(
idx
>
ARMV7_COUNTER_LAST
))
{
pr_err
(
"CPU%u selecting wrong PMNC counter"
" %d
\n
"
,
smp_processor_id
(),
idx
);
return
-
1
;
}
val
=
(
idx
-
ARMV7_EVENT_CNT_TO_CNTx
)
&
ARMV7_SELECT_MASK
;
asm
volatile
(
"mcr p15, 0, %0, c9, c12, 5"
:
:
"r"
(
val
));
return
idx
;
}
static
inline
u32
armv7pmu_read_counter
(
int
idx
)
{
unsigned
long
value
=
0
;
if
(
idx
==
ARMV7_CYCLE_COUNTER
)
asm
volatile
(
"mrc p15, 0, %0, c9, c13, 0"
:
"=r"
(
value
));
else
if
((
idx
>=
ARMV7_COUNTER0
)
&&
(
idx
<=
ARMV7_COUNTER_LAST
))
{
if
(
armv7_pmnc_select_counter
(
idx
)
==
idx
)
asm
volatile
(
"mrc p15, 0, %0, c9, c13, 2"
:
"=r"
(
value
));
}
else
pr_err
(
"CPU%u reading wrong counter %d
\n
"
,
smp_processor_id
(),
idx
);
return
value
;
}
static
inline
void
armv7pmu_write_counter
(
int
idx
,
u32
value
)
{
if
(
idx
==
ARMV7_CYCLE_COUNTER
)
asm
volatile
(
"mcr p15, 0, %0, c9, c13, 0"
:
:
"r"
(
value
));
else
if
((
idx
>=
ARMV7_COUNTER0
)
&&
(
idx
<=
ARMV7_COUNTER_LAST
))
{
if
(
armv7_pmnc_select_counter
(
idx
)
==
idx
)
asm
volatile
(
"mcr p15, 0, %0, c9, c13, 2"
:
:
"r"
(
value
));
}
else
pr_err
(
"CPU%u writing wrong counter %d
\n
"
,
smp_processor_id
(),
idx
);
}
static
inline
void
armv7_pmnc_write_evtsel
(
unsigned
int
idx
,
u32
val
)
{
if
(
armv7_pmnc_select_counter
(
idx
)
==
idx
)
{
val
&=
ARMV7_EVTSEL_MASK
;
asm
volatile
(
"mcr p15, 0, %0, c9, c13, 1"
:
:
"r"
(
val
));
}
}
static
inline
u32
armv7_pmnc_enable_counter
(
unsigned
int
idx
)
{
u32
val
;
if
((
idx
!=
ARMV7_CYCLE_COUNTER
)
&&
((
idx
<
ARMV7_COUNTER0
)
||
(
idx
>
ARMV7_COUNTER_LAST
)))
{
pr_err
(
"CPU%u enabling wrong PMNC counter"
" %d
\n
"
,
smp_processor_id
(),
idx
);
return
-
1
;
}
if
(
idx
==
ARMV7_CYCLE_COUNTER
)
val
=
ARMV7_CNTENS_C
;
else
val
=
ARMV7_CNTENS_P
(
idx
);
asm
volatile
(
"mcr p15, 0, %0, c9, c12, 1"
:
:
"r"
(
val
));
return
idx
;
}
static
inline
u32
armv7_pmnc_disable_counter
(
unsigned
int
idx
)
{
u32
val
;
if
((
idx
!=
ARMV7_CYCLE_COUNTER
)
&&
((
idx
<
ARMV7_COUNTER0
)
||
(
idx
>
ARMV7_COUNTER_LAST
)))
{
pr_err
(
"CPU%u disabling wrong PMNC counter"
" %d
\n
"
,
smp_processor_id
(),
idx
);
return
-
1
;
}
if
(
idx
==
ARMV7_CYCLE_COUNTER
)
val
=
ARMV7_CNTENC_C
;
else
val
=
ARMV7_CNTENC_P
(
idx
);
asm
volatile
(
"mcr p15, 0, %0, c9, c12, 2"
:
:
"r"
(
val
));
return
idx
;
}
static
inline
u32
armv7_pmnc_enable_intens
(
unsigned
int
idx
)
{
u32
val
;
if
((
idx
!=
ARMV7_CYCLE_COUNTER
)
&&
((
idx
<
ARMV7_COUNTER0
)
||
(
idx
>
ARMV7_COUNTER_LAST
)))
{
pr_err
(
"CPU%u enabling wrong PMNC counter"
" interrupt enable %d
\n
"
,
smp_processor_id
(),
idx
);
return
-
1
;
}
if
(
idx
==
ARMV7_CYCLE_COUNTER
)
val
=
ARMV7_INTENS_C
;
else
val
=
ARMV7_INTENS_P
(
idx
);
asm
volatile
(
"mcr p15, 0, %0, c9, c14, 1"
:
:
"r"
(
val
));
return
idx
;
}
static
inline
u32
armv7_pmnc_disable_intens
(
unsigned
int
idx
)
{
u32
val
;
if
((
idx
!=
ARMV7_CYCLE_COUNTER
)
&&
((
idx
<
ARMV7_COUNTER0
)
||
(
idx
>
ARMV7_COUNTER_LAST
)))
{
pr_err
(
"CPU%u disabling wrong PMNC counter"
" interrupt enable %d
\n
"
,
smp_processor_id
(),
idx
);
return
-
1
;
}
if
(
idx
==
ARMV7_CYCLE_COUNTER
)
val
=
ARMV7_INTENC_C
;
else
val
=
ARMV7_INTENC_P
(
idx
);
asm
volatile
(
"mcr p15, 0, %0, c9, c14, 2"
:
:
"r"
(
val
));
return
idx
;
}
static
inline
u32
armv7_pmnc_getreset_flags
(
void
)
{
u32
val
;
/* Read */
asm
volatile
(
"mrc p15, 0, %0, c9, c12, 3"
:
"=r"
(
val
));
/* Write to clear flags */
val
&=
ARMV7_FLAG_MASK
;
asm
volatile
(
"mcr p15, 0, %0, c9, c12, 3"
:
:
"r"
(
val
));
return
val
;
}
#ifdef DEBUG
static
void
armv7_pmnc_dump_regs
(
void
)
{
u32
val
;
unsigned
int
cnt
;
printk
(
KERN_INFO
"PMNC registers dump:
\n
"
);
asm
volatile
(
"mrc p15, 0, %0, c9, c12, 0"
:
"=r"
(
val
));
printk
(
KERN_INFO
"PMNC =0x%08x
\n
"
,
val
);
asm
volatile
(
"mrc p15, 0, %0, c9, c12, 1"
:
"=r"
(
val
));
printk
(
KERN_INFO
"CNTENS=0x%08x
\n
"
,
val
);
asm
volatile
(
"mrc p15, 0, %0, c9, c14, 1"
:
"=r"
(
val
));
printk
(
KERN_INFO
"INTENS=0x%08x
\n
"
,
val
);
asm
volatile
(
"mrc p15, 0, %0, c9, c12, 3"
:
"=r"
(
val
));
printk
(
KERN_INFO
"FLAGS =0x%08x
\n
"
,
val
);
asm
volatile
(
"mrc p15, 0, %0, c9, c12, 5"
:
"=r"
(
val
));
printk
(
KERN_INFO
"SELECT=0x%08x
\n
"
,
val
);
asm
volatile
(
"mrc p15, 0, %0, c9, c13, 0"
:
"=r"
(
val
));
printk
(
KERN_INFO
"CCNT =0x%08x
\n
"
,
val
);
for
(
cnt
=
ARMV7_COUNTER0
;
cnt
<
ARMV7_COUNTER_LAST
;
cnt
++
)
{
armv7_pmnc_select_counter
(
cnt
);
asm
volatile
(
"mrc p15, 0, %0, c9, c13, 2"
:
"=r"
(
val
));
printk
(
KERN_INFO
"CNT[%d] count =0x%08x
\n
"
,
cnt
-
ARMV7_EVENT_CNT_TO_CNTx
,
val
);
asm
volatile
(
"mrc p15, 0, %0, c9, c13, 1"
:
"=r"
(
val
));
printk
(
KERN_INFO
"CNT[%d] evtsel=0x%08x
\n
"
,
cnt
-
ARMV7_EVENT_CNT_TO_CNTx
,
val
);
}
}
#endif
void
armv7pmu_enable_event
(
struct
hw_perf_event
*
hwc
,
int
idx
)
{
unsigned
long
flags
;
/*
* Enable counter and interrupt, and set the counter to count
* the event that we're interested in.
*/
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
/*
* Disable counter
*/
armv7_pmnc_disable_counter
(
idx
);
/*
* Set event (if destined for PMNx counters)
* We don't need to set the event if it's a cycle count
*/
if
(
idx
!=
ARMV7_CYCLE_COUNTER
)
armv7_pmnc_write_evtsel
(
idx
,
hwc
->
config_base
);
/*
* Enable interrupt for this counter
*/
armv7_pmnc_enable_intens
(
idx
);
/*
* Enable counter
*/
armv7_pmnc_enable_counter
(
idx
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
void
armv7pmu_disable_event
(
struct
hw_perf_event
*
hwc
,
int
idx
)
{
unsigned
long
flags
;
/*
* Disable counter and interrupt
*/
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
/*
* Disable counter
*/
armv7_pmnc_disable_counter
(
idx
);
/*
* Disable interrupt for this counter
*/
armv7_pmnc_disable_intens
(
idx
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
irqreturn_t
armv7pmu_handle_irq
(
int
irq_num
,
void
*
dev
)
{
unsigned
long
pmnc
;
struct
perf_sample_data
data
;
struct
cpu_hw_events
*
cpuc
;
struct
pt_regs
*
regs
;
int
idx
;
/*
* Get and reset the IRQ flags
*/
pmnc
=
armv7_pmnc_getreset_flags
();
/*
* Did an overflow occur?
*/
if
(
!
armv7_pmnc_has_overflowed
(
pmnc
))
return
IRQ_NONE
;
/*
* Handle the counter(s) overflow(s)
*/
regs
=
get_irq_regs
();
perf_sample_data_init
(
&
data
,
0
);
cpuc
=
&
__get_cpu_var
(
cpu_hw_events
);
for
(
idx
=
0
;
idx
<=
armpmu
->
num_events
;
++
idx
)
{
struct
perf_event
*
event
=
cpuc
->
events
[
idx
];
struct
hw_perf_event
*
hwc
;
if
(
!
test_bit
(
idx
,
cpuc
->
active_mask
))
continue
;
/*
* We have a single interrupt for all counters. Check that
* each counter has overflowed before we process it.
*/
if
(
!
armv7_pmnc_counter_has_overflowed
(
pmnc
,
idx
))
continue
;
hwc
=
&
event
->
hw
;
armpmu_event_update
(
event
,
hwc
,
idx
);
data
.
period
=
event
->
hw
.
last_period
;
if
(
!
armpmu_event_set_period
(
event
,
hwc
,
idx
))
continue
;
if
(
perf_event_overflow
(
event
,
0
,
&
data
,
regs
))
armpmu
->
disable
(
hwc
,
idx
);
}
/*
* Handle the pending perf events.
*
* Note: this call *must* be run with interrupts disabled. For
* platforms that can have the PMU interrupts raised as an NMI, this
* will not work.
*/
irq_work_run
();
return
IRQ_HANDLED
;
}
static
void
armv7pmu_start
(
void
)
{
unsigned
long
flags
;
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
/* Enable all counters */
armv7_pmnc_write
(
armv7_pmnc_read
()
|
ARMV7_PMNC_E
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
void
armv7pmu_stop
(
void
)
{
unsigned
long
flags
;
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
/* Disable all counters */
armv7_pmnc_write
(
armv7_pmnc_read
()
&
~
ARMV7_PMNC_E
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
int
armv7pmu_get_event_idx
(
struct
cpu_hw_events
*
cpuc
,
struct
hw_perf_event
*
event
)
{
int
idx
;
/* Always place a cycle counter into the cycle counter. */
if
(
event
->
config_base
==
ARMV7_PERFCTR_CPU_CYCLES
)
{
if
(
test_and_set_bit
(
ARMV7_CYCLE_COUNTER
,
cpuc
->
used_mask
))
return
-
EAGAIN
;
return
ARMV7_CYCLE_COUNTER
;
}
else
{
/*
* For anything other than a cycle counter, try and use
* the events counters
*/
for
(
idx
=
ARMV7_COUNTER0
;
idx
<=
armpmu
->
num_events
;
++
idx
)
{
if
(
!
test_and_set_bit
(
idx
,
cpuc
->
used_mask
))
return
idx
;
}
/* The counters are all in use. */
return
-
EAGAIN
;
}
}
static
struct
arm_pmu
armv7pmu
=
{
.
handle_irq
=
armv7pmu_handle_irq
,
.
enable
=
armv7pmu_enable_event
,
.
disable
=
armv7pmu_disable_event
,
.
read_counter
=
armv7pmu_read_counter
,
.
write_counter
=
armv7pmu_write_counter
,
.
get_event_idx
=
armv7pmu_get_event_idx
,
.
start
=
armv7pmu_start
,
.
stop
=
armv7pmu_stop
,
.
raw_event_mask
=
0xFF
,
.
max_period
=
(
1LLU
<<
32
)
-
1
,
};
static
u32
__init
armv7_reset_read_pmnc
(
void
)
{
u32
nb_cnt
;
/* Initialize & Reset PMNC: C and P bits */
armv7_pmnc_write
(
ARMV7_PMNC_P
|
ARMV7_PMNC_C
);
/* Read the nb of CNTx counters supported from PMNC */
nb_cnt
=
(
armv7_pmnc_read
()
>>
ARMV7_PMNC_N_SHIFT
)
&
ARMV7_PMNC_N_MASK
;
/* Add the CPU cycles counter and return */
return
nb_cnt
+
1
;
}
const
struct
arm_pmu
*
__init
armv7_a8_pmu_init
(
void
)
{
armv7pmu
.
id
=
ARM_PERF_PMU_ID_CA8
;
armv7pmu
.
name
=
"ARMv7 Cortex-A8"
;
armv7pmu
.
cache_map
=
&
armv7_a8_perf_cache_map
;
armv7pmu
.
event_map
=
&
armv7_a8_perf_map
;
armv7pmu
.
num_events
=
armv7_reset_read_pmnc
();
return
&
armv7pmu
;
}
const
struct
arm_pmu
*
__init
armv7_a9_pmu_init
(
void
)
{
armv7pmu
.
id
=
ARM_PERF_PMU_ID_CA9
;
armv7pmu
.
name
=
"ARMv7 Cortex-A9"
;
armv7pmu
.
cache_map
=
&
armv7_a9_perf_cache_map
;
armv7pmu
.
event_map
=
&
armv7_a9_perf_map
;
armv7pmu
.
num_events
=
armv7_reset_read_pmnc
();
return
&
armv7pmu
;
}
#else
const
struct
arm_pmu
*
__init
armv7_a8_pmu_init
(
void
)
{
return
NULL
;
}
const
struct
arm_pmu
*
__init
armv7_a9_pmu_init
(
void
)
{
return
NULL
;
}
#endif
/* CONFIG_CPU_V7 */
arch/arm/kernel/perf_event_xscale.c
0 → 100644
浏览文件 @
f1690d17
/*
* ARMv5 [xscale] Performance counter handling code.
*
* Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com>
*
* Based on the previous xscale OProfile code.
*
* There are two variants of the xscale PMU that we support:
* - xscale1pmu: 2 event counters and a cycle counter
* - xscale2pmu: 4 event counters and a cycle counter
* The two variants share event definitions, but have different
* PMU structures.
*/
#ifdef CONFIG_CPU_XSCALE
enum
xscale_perf_types
{
XSCALE_PERFCTR_ICACHE_MISS
=
0x00
,
XSCALE_PERFCTR_ICACHE_NO_DELIVER
=
0x01
,
XSCALE_PERFCTR_DATA_STALL
=
0x02
,
XSCALE_PERFCTR_ITLB_MISS
=
0x03
,
XSCALE_PERFCTR_DTLB_MISS
=
0x04
,
XSCALE_PERFCTR_BRANCH
=
0x05
,
XSCALE_PERFCTR_BRANCH_MISS
=
0x06
,
XSCALE_PERFCTR_INSTRUCTION
=
0x07
,
XSCALE_PERFCTR_DCACHE_FULL_STALL
=
0x08
,
XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG
=
0x09
,
XSCALE_PERFCTR_DCACHE_ACCESS
=
0x0A
,
XSCALE_PERFCTR_DCACHE_MISS
=
0x0B
,
XSCALE_PERFCTR_DCACHE_WRITE_BACK
=
0x0C
,
XSCALE_PERFCTR_PC_CHANGED
=
0x0D
,
XSCALE_PERFCTR_BCU_REQUEST
=
0x10
,
XSCALE_PERFCTR_BCU_FULL
=
0x11
,
XSCALE_PERFCTR_BCU_DRAIN
=
0x12
,
XSCALE_PERFCTR_BCU_ECC_NO_ELOG
=
0x14
,
XSCALE_PERFCTR_BCU_1_BIT_ERR
=
0x15
,
XSCALE_PERFCTR_RMW
=
0x16
,
/* XSCALE_PERFCTR_CCNT is not hardware defined */
XSCALE_PERFCTR_CCNT
=
0xFE
,
XSCALE_PERFCTR_UNUSED
=
0xFF
,
};
enum
xscale_counters
{
XSCALE_CYCLE_COUNTER
=
1
,
XSCALE_COUNTER0
,
XSCALE_COUNTER1
,
XSCALE_COUNTER2
,
XSCALE_COUNTER3
,
};
static
const
unsigned
xscale_perf_map
[
PERF_COUNT_HW_MAX
]
=
{
[
PERF_COUNT_HW_CPU_CYCLES
]
=
XSCALE_PERFCTR_CCNT
,
[
PERF_COUNT_HW_INSTRUCTIONS
]
=
XSCALE_PERFCTR_INSTRUCTION
,
[
PERF_COUNT_HW_CACHE_REFERENCES
]
=
HW_OP_UNSUPPORTED
,
[
PERF_COUNT_HW_CACHE_MISSES
]
=
HW_OP_UNSUPPORTED
,
[
PERF_COUNT_HW_BRANCH_INSTRUCTIONS
]
=
XSCALE_PERFCTR_BRANCH
,
[
PERF_COUNT_HW_BRANCH_MISSES
]
=
XSCALE_PERFCTR_BRANCH_MISS
,
[
PERF_COUNT_HW_BUS_CYCLES
]
=
HW_OP_UNSUPPORTED
,
};
static
const
unsigned
xscale_perf_cache_map
[
PERF_COUNT_HW_CACHE_MAX
]
[
PERF_COUNT_HW_CACHE_OP_MAX
]
[
PERF_COUNT_HW_CACHE_RESULT_MAX
]
=
{
[
C
(
L1D
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
XSCALE_PERFCTR_DCACHE_ACCESS
,
[
C
(
RESULT_MISS
)]
=
XSCALE_PERFCTR_DCACHE_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
XSCALE_PERFCTR_DCACHE_ACCESS
,
[
C
(
RESULT_MISS
)]
=
XSCALE_PERFCTR_DCACHE_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
L1I
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
XSCALE_PERFCTR_ICACHE_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
XSCALE_PERFCTR_ICACHE_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
LL
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
DTLB
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
XSCALE_PERFCTR_DTLB_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
XSCALE_PERFCTR_DTLB_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
ITLB
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
XSCALE_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
XSCALE_PERFCTR_ITLB_MISS
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
[
C
(
BPU
)]
=
{
[
C
(
OP_READ
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_WRITE
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
[
C
(
OP_PREFETCH
)]
=
{
[
C
(
RESULT_ACCESS
)]
=
CACHE_OP_UNSUPPORTED
,
[
C
(
RESULT_MISS
)]
=
CACHE_OP_UNSUPPORTED
,
},
},
};
#define XSCALE_PMU_ENABLE 0x001
#define XSCALE_PMN_RESET 0x002
#define XSCALE_CCNT_RESET 0x004
#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET)
#define XSCALE_PMU_CNT64 0x008
#define XSCALE1_OVERFLOWED_MASK 0x700
#define XSCALE1_CCOUNT_OVERFLOW 0x400
#define XSCALE1_COUNT0_OVERFLOW 0x100
#define XSCALE1_COUNT1_OVERFLOW 0x200
#define XSCALE1_CCOUNT_INT_EN 0x040
#define XSCALE1_COUNT0_INT_EN 0x010
#define XSCALE1_COUNT1_INT_EN 0x020
#define XSCALE1_COUNT0_EVT_SHFT 12
#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
#define XSCALE1_COUNT1_EVT_SHFT 20
#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
static
inline
u32
xscale1pmu_read_pmnc
(
void
)
{
u32
val
;
asm
volatile
(
"mrc p14, 0, %0, c0, c0, 0"
:
"=r"
(
val
));
return
val
;
}
static
inline
void
xscale1pmu_write_pmnc
(
u32
val
)
{
/* upper 4bits and 7, 11 are write-as-0 */
val
&=
0xffff77f
;
asm
volatile
(
"mcr p14, 0, %0, c0, c0, 0"
:
:
"r"
(
val
));
}
static
inline
int
xscale1_pmnc_counter_has_overflowed
(
unsigned
long
pmnc
,
enum
xscale_counters
counter
)
{
int
ret
=
0
;
switch
(
counter
)
{
case
XSCALE_CYCLE_COUNTER
:
ret
=
pmnc
&
XSCALE1_CCOUNT_OVERFLOW
;
break
;
case
XSCALE_COUNTER0
:
ret
=
pmnc
&
XSCALE1_COUNT0_OVERFLOW
;
break
;
case
XSCALE_COUNTER1
:
ret
=
pmnc
&
XSCALE1_COUNT1_OVERFLOW
;
break
;
default:
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
counter
);
}
return
ret
;
}
static
irqreturn_t
xscale1pmu_handle_irq
(
int
irq_num
,
void
*
dev
)
{
unsigned
long
pmnc
;
struct
perf_sample_data
data
;
struct
cpu_hw_events
*
cpuc
;
struct
pt_regs
*
regs
;
int
idx
;
/*
* NOTE: there's an A stepping erratum that states if an overflow
* bit already exists and another occurs, the previous
* Overflow bit gets cleared. There's no workaround.
* Fixed in B stepping or later.
*/
pmnc
=
xscale1pmu_read_pmnc
();
/*
* Write the value back to clear the overflow flags. Overflow
* flags remain in pmnc for use below. We also disable the PMU
* while we process the interrupt.
*/
xscale1pmu_write_pmnc
(
pmnc
&
~
XSCALE_PMU_ENABLE
);
if
(
!
(
pmnc
&
XSCALE1_OVERFLOWED_MASK
))
return
IRQ_NONE
;
regs
=
get_irq_regs
();
perf_sample_data_init
(
&
data
,
0
);
cpuc
=
&
__get_cpu_var
(
cpu_hw_events
);
for
(
idx
=
0
;
idx
<=
armpmu
->
num_events
;
++
idx
)
{
struct
perf_event
*
event
=
cpuc
->
events
[
idx
];
struct
hw_perf_event
*
hwc
;
if
(
!
test_bit
(
idx
,
cpuc
->
active_mask
))
continue
;
if
(
!
xscale1_pmnc_counter_has_overflowed
(
pmnc
,
idx
))
continue
;
hwc
=
&
event
->
hw
;
armpmu_event_update
(
event
,
hwc
,
idx
);
data
.
period
=
event
->
hw
.
last_period
;
if
(
!
armpmu_event_set_period
(
event
,
hwc
,
idx
))
continue
;
if
(
perf_event_overflow
(
event
,
0
,
&
data
,
regs
))
armpmu
->
disable
(
hwc
,
idx
);
}
irq_work_run
();
/*
* Re-enable the PMU.
*/
pmnc
=
xscale1pmu_read_pmnc
()
|
XSCALE_PMU_ENABLE
;
xscale1pmu_write_pmnc
(
pmnc
);
return
IRQ_HANDLED
;
}
static
void
xscale1pmu_enable_event
(
struct
hw_perf_event
*
hwc
,
int
idx
)
{
unsigned
long
val
,
mask
,
evt
,
flags
;
switch
(
idx
)
{
case
XSCALE_CYCLE_COUNTER
:
mask
=
0
;
evt
=
XSCALE1_CCOUNT_INT_EN
;
break
;
case
XSCALE_COUNTER0
:
mask
=
XSCALE1_COUNT0_EVT_MASK
;
evt
=
(
hwc
->
config_base
<<
XSCALE1_COUNT0_EVT_SHFT
)
|
XSCALE1_COUNT0_INT_EN
;
break
;
case
XSCALE_COUNTER1
:
mask
=
XSCALE1_COUNT1_EVT_MASK
;
evt
=
(
hwc
->
config_base
<<
XSCALE1_COUNT1_EVT_SHFT
)
|
XSCALE1_COUNT1_INT_EN
;
break
;
default:
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
idx
);
return
;
}
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
xscale1pmu_read_pmnc
();
val
&=
~
mask
;
val
|=
evt
;
xscale1pmu_write_pmnc
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
void
xscale1pmu_disable_event
(
struct
hw_perf_event
*
hwc
,
int
idx
)
{
unsigned
long
val
,
mask
,
evt
,
flags
;
switch
(
idx
)
{
case
XSCALE_CYCLE_COUNTER
:
mask
=
XSCALE1_CCOUNT_INT_EN
;
evt
=
0
;
break
;
case
XSCALE_COUNTER0
:
mask
=
XSCALE1_COUNT0_INT_EN
|
XSCALE1_COUNT0_EVT_MASK
;
evt
=
XSCALE_PERFCTR_UNUSED
<<
XSCALE1_COUNT0_EVT_SHFT
;
break
;
case
XSCALE_COUNTER1
:
mask
=
XSCALE1_COUNT1_INT_EN
|
XSCALE1_COUNT1_EVT_MASK
;
evt
=
XSCALE_PERFCTR_UNUSED
<<
XSCALE1_COUNT1_EVT_SHFT
;
break
;
default:
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
idx
);
return
;
}
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
xscale1pmu_read_pmnc
();
val
&=
~
mask
;
val
|=
evt
;
xscale1pmu_write_pmnc
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
int
xscale1pmu_get_event_idx
(
struct
cpu_hw_events
*
cpuc
,
struct
hw_perf_event
*
event
)
{
if
(
XSCALE_PERFCTR_CCNT
==
event
->
config_base
)
{
if
(
test_and_set_bit
(
XSCALE_CYCLE_COUNTER
,
cpuc
->
used_mask
))
return
-
EAGAIN
;
return
XSCALE_CYCLE_COUNTER
;
}
else
{
if
(
!
test_and_set_bit
(
XSCALE_COUNTER1
,
cpuc
->
used_mask
))
return
XSCALE_COUNTER1
;
if
(
!
test_and_set_bit
(
XSCALE_COUNTER0
,
cpuc
->
used_mask
))
return
XSCALE_COUNTER0
;
return
-
EAGAIN
;
}
}
static
void
xscale1pmu_start
(
void
)
{
unsigned
long
flags
,
val
;
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
xscale1pmu_read_pmnc
();
val
|=
XSCALE_PMU_ENABLE
;
xscale1pmu_write_pmnc
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
void
xscale1pmu_stop
(
void
)
{
unsigned
long
flags
,
val
;
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
xscale1pmu_read_pmnc
();
val
&=
~
XSCALE_PMU_ENABLE
;
xscale1pmu_write_pmnc
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
inline
u32
xscale1pmu_read_counter
(
int
counter
)
{
u32
val
=
0
;
switch
(
counter
)
{
case
XSCALE_CYCLE_COUNTER
:
asm
volatile
(
"mrc p14, 0, %0, c1, c0, 0"
:
"=r"
(
val
));
break
;
case
XSCALE_COUNTER0
:
asm
volatile
(
"mrc p14, 0, %0, c2, c0, 0"
:
"=r"
(
val
));
break
;
case
XSCALE_COUNTER1
:
asm
volatile
(
"mrc p14, 0, %0, c3, c0, 0"
:
"=r"
(
val
));
break
;
}
return
val
;
}
static
inline
void
xscale1pmu_write_counter
(
int
counter
,
u32
val
)
{
switch
(
counter
)
{
case
XSCALE_CYCLE_COUNTER
:
asm
volatile
(
"mcr p14, 0, %0, c1, c0, 0"
:
:
"r"
(
val
));
break
;
case
XSCALE_COUNTER0
:
asm
volatile
(
"mcr p14, 0, %0, c2, c0, 0"
:
:
"r"
(
val
));
break
;
case
XSCALE_COUNTER1
:
asm
volatile
(
"mcr p14, 0, %0, c3, c0, 0"
:
:
"r"
(
val
));
break
;
}
}
static
const
struct
arm_pmu
xscale1pmu
=
{
.
id
=
ARM_PERF_PMU_ID_XSCALE1
,
.
name
=
"xscale1"
,
.
handle_irq
=
xscale1pmu_handle_irq
,
.
enable
=
xscale1pmu_enable_event
,
.
disable
=
xscale1pmu_disable_event
,
.
read_counter
=
xscale1pmu_read_counter
,
.
write_counter
=
xscale1pmu_write_counter
,
.
get_event_idx
=
xscale1pmu_get_event_idx
,
.
start
=
xscale1pmu_start
,
.
stop
=
xscale1pmu_stop
,
.
cache_map
=
&
xscale_perf_cache_map
,
.
event_map
=
&
xscale_perf_map
,
.
raw_event_mask
=
0xFF
,
.
num_events
=
3
,
.
max_period
=
(
1LLU
<<
32
)
-
1
,
};
const
struct
arm_pmu
*
__init
xscale1pmu_init
(
void
)
{
return
&
xscale1pmu
;
}
#define XSCALE2_OVERFLOWED_MASK 0x01f
#define XSCALE2_CCOUNT_OVERFLOW 0x001
#define XSCALE2_COUNT0_OVERFLOW 0x002
#define XSCALE2_COUNT1_OVERFLOW 0x004
#define XSCALE2_COUNT2_OVERFLOW 0x008
#define XSCALE2_COUNT3_OVERFLOW 0x010
#define XSCALE2_CCOUNT_INT_EN 0x001
#define XSCALE2_COUNT0_INT_EN 0x002
#define XSCALE2_COUNT1_INT_EN 0x004
#define XSCALE2_COUNT2_INT_EN 0x008
#define XSCALE2_COUNT3_INT_EN 0x010
#define XSCALE2_COUNT0_EVT_SHFT 0
#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
#define XSCALE2_COUNT1_EVT_SHFT 8
#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
#define XSCALE2_COUNT2_EVT_SHFT 16
#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
#define XSCALE2_COUNT3_EVT_SHFT 24
#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
static
inline
u32
xscale2pmu_read_pmnc
(
void
)
{
u32
val
;
asm
volatile
(
"mrc p14, 0, %0, c0, c1, 0"
:
"=r"
(
val
));
/* bits 1-2 and 4-23 are read-unpredictable */
return
val
&
0xff000009
;
}
static
inline
void
xscale2pmu_write_pmnc
(
u32
val
)
{
/* bits 4-23 are write-as-0, 24-31 are write ignored */
val
&=
0xf
;
asm
volatile
(
"mcr p14, 0, %0, c0, c1, 0"
:
:
"r"
(
val
));
}
static
inline
u32
xscale2pmu_read_overflow_flags
(
void
)
{
u32
val
;
asm
volatile
(
"mrc p14, 0, %0, c5, c1, 0"
:
"=r"
(
val
));
return
val
;
}
static
inline
void
xscale2pmu_write_overflow_flags
(
u32
val
)
{
asm
volatile
(
"mcr p14, 0, %0, c5, c1, 0"
:
:
"r"
(
val
));
}
static
inline
u32
xscale2pmu_read_event_select
(
void
)
{
u32
val
;
asm
volatile
(
"mrc p14, 0, %0, c8, c1, 0"
:
"=r"
(
val
));
return
val
;
}
static
inline
void
xscale2pmu_write_event_select
(
u32
val
)
{
asm
volatile
(
"mcr p14, 0, %0, c8, c1, 0"
:
:
"r"
(
val
));
}
static
inline
u32
xscale2pmu_read_int_enable
(
void
)
{
u32
val
;
asm
volatile
(
"mrc p14, 0, %0, c4, c1, 0"
:
"=r"
(
val
));
return
val
;
}
static
void
xscale2pmu_write_int_enable
(
u32
val
)
{
asm
volatile
(
"mcr p14, 0, %0, c4, c1, 0"
:
:
"r"
(
val
));
}
static
inline
int
xscale2_pmnc_counter_has_overflowed
(
unsigned
long
of_flags
,
enum
xscale_counters
counter
)
{
int
ret
=
0
;
switch
(
counter
)
{
case
XSCALE_CYCLE_COUNTER
:
ret
=
of_flags
&
XSCALE2_CCOUNT_OVERFLOW
;
break
;
case
XSCALE_COUNTER0
:
ret
=
of_flags
&
XSCALE2_COUNT0_OVERFLOW
;
break
;
case
XSCALE_COUNTER1
:
ret
=
of_flags
&
XSCALE2_COUNT1_OVERFLOW
;
break
;
case
XSCALE_COUNTER2
:
ret
=
of_flags
&
XSCALE2_COUNT2_OVERFLOW
;
break
;
case
XSCALE_COUNTER3
:
ret
=
of_flags
&
XSCALE2_COUNT3_OVERFLOW
;
break
;
default:
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
counter
);
}
return
ret
;
}
static
irqreturn_t
xscale2pmu_handle_irq
(
int
irq_num
,
void
*
dev
)
{
unsigned
long
pmnc
,
of_flags
;
struct
perf_sample_data
data
;
struct
cpu_hw_events
*
cpuc
;
struct
pt_regs
*
regs
;
int
idx
;
/* Disable the PMU. */
pmnc
=
xscale2pmu_read_pmnc
();
xscale2pmu_write_pmnc
(
pmnc
&
~
XSCALE_PMU_ENABLE
);
/* Check the overflow flag register. */
of_flags
=
xscale2pmu_read_overflow_flags
();
if
(
!
(
of_flags
&
XSCALE2_OVERFLOWED_MASK
))
return
IRQ_NONE
;
/* Clear the overflow bits. */
xscale2pmu_write_overflow_flags
(
of_flags
);
regs
=
get_irq_regs
();
perf_sample_data_init
(
&
data
,
0
);
cpuc
=
&
__get_cpu_var
(
cpu_hw_events
);
for
(
idx
=
0
;
idx
<=
armpmu
->
num_events
;
++
idx
)
{
struct
perf_event
*
event
=
cpuc
->
events
[
idx
];
struct
hw_perf_event
*
hwc
;
if
(
!
test_bit
(
idx
,
cpuc
->
active_mask
))
continue
;
if
(
!
xscale2_pmnc_counter_has_overflowed
(
pmnc
,
idx
))
continue
;
hwc
=
&
event
->
hw
;
armpmu_event_update
(
event
,
hwc
,
idx
);
data
.
period
=
event
->
hw
.
last_period
;
if
(
!
armpmu_event_set_period
(
event
,
hwc
,
idx
))
continue
;
if
(
perf_event_overflow
(
event
,
0
,
&
data
,
regs
))
armpmu
->
disable
(
hwc
,
idx
);
}
irq_work_run
();
/*
* Re-enable the PMU.
*/
pmnc
=
xscale2pmu_read_pmnc
()
|
XSCALE_PMU_ENABLE
;
xscale2pmu_write_pmnc
(
pmnc
);
return
IRQ_HANDLED
;
}
static
void
xscale2pmu_enable_event
(
struct
hw_perf_event
*
hwc
,
int
idx
)
{
unsigned
long
flags
,
ien
,
evtsel
;
ien
=
xscale2pmu_read_int_enable
();
evtsel
=
xscale2pmu_read_event_select
();
switch
(
idx
)
{
case
XSCALE_CYCLE_COUNTER
:
ien
|=
XSCALE2_CCOUNT_INT_EN
;
break
;
case
XSCALE_COUNTER0
:
ien
|=
XSCALE2_COUNT0_INT_EN
;
evtsel
&=
~
XSCALE2_COUNT0_EVT_MASK
;
evtsel
|=
hwc
->
config_base
<<
XSCALE2_COUNT0_EVT_SHFT
;
break
;
case
XSCALE_COUNTER1
:
ien
|=
XSCALE2_COUNT1_INT_EN
;
evtsel
&=
~
XSCALE2_COUNT1_EVT_MASK
;
evtsel
|=
hwc
->
config_base
<<
XSCALE2_COUNT1_EVT_SHFT
;
break
;
case
XSCALE_COUNTER2
:
ien
|=
XSCALE2_COUNT2_INT_EN
;
evtsel
&=
~
XSCALE2_COUNT2_EVT_MASK
;
evtsel
|=
hwc
->
config_base
<<
XSCALE2_COUNT2_EVT_SHFT
;
break
;
case
XSCALE_COUNTER3
:
ien
|=
XSCALE2_COUNT3_INT_EN
;
evtsel
&=
~
XSCALE2_COUNT3_EVT_MASK
;
evtsel
|=
hwc
->
config_base
<<
XSCALE2_COUNT3_EVT_SHFT
;
break
;
default:
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
idx
);
return
;
}
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
xscale2pmu_write_event_select
(
evtsel
);
xscale2pmu_write_int_enable
(
ien
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
void
xscale2pmu_disable_event
(
struct
hw_perf_event
*
hwc
,
int
idx
)
{
unsigned
long
flags
,
ien
,
evtsel
;
ien
=
xscale2pmu_read_int_enable
();
evtsel
=
xscale2pmu_read_event_select
();
switch
(
idx
)
{
case
XSCALE_CYCLE_COUNTER
:
ien
&=
~
XSCALE2_CCOUNT_INT_EN
;
break
;
case
XSCALE_COUNTER0
:
ien
&=
~
XSCALE2_COUNT0_INT_EN
;
evtsel
&=
~
XSCALE2_COUNT0_EVT_MASK
;
evtsel
|=
XSCALE_PERFCTR_UNUSED
<<
XSCALE2_COUNT0_EVT_SHFT
;
break
;
case
XSCALE_COUNTER1
:
ien
&=
~
XSCALE2_COUNT1_INT_EN
;
evtsel
&=
~
XSCALE2_COUNT1_EVT_MASK
;
evtsel
|=
XSCALE_PERFCTR_UNUSED
<<
XSCALE2_COUNT1_EVT_SHFT
;
break
;
case
XSCALE_COUNTER2
:
ien
&=
~
XSCALE2_COUNT2_INT_EN
;
evtsel
&=
~
XSCALE2_COUNT2_EVT_MASK
;
evtsel
|=
XSCALE_PERFCTR_UNUSED
<<
XSCALE2_COUNT2_EVT_SHFT
;
break
;
case
XSCALE_COUNTER3
:
ien
&=
~
XSCALE2_COUNT3_INT_EN
;
evtsel
&=
~
XSCALE2_COUNT3_EVT_MASK
;
evtsel
|=
XSCALE_PERFCTR_UNUSED
<<
XSCALE2_COUNT3_EVT_SHFT
;
break
;
default:
WARN_ONCE
(
1
,
"invalid counter number (%d)
\n
"
,
idx
);
return
;
}
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
xscale2pmu_write_event_select
(
evtsel
);
xscale2pmu_write_int_enable
(
ien
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
int
xscale2pmu_get_event_idx
(
struct
cpu_hw_events
*
cpuc
,
struct
hw_perf_event
*
event
)
{
int
idx
=
xscale1pmu_get_event_idx
(
cpuc
,
event
);
if
(
idx
>=
0
)
goto
out
;
if
(
!
test_and_set_bit
(
XSCALE_COUNTER3
,
cpuc
->
used_mask
))
idx
=
XSCALE_COUNTER3
;
else
if
(
!
test_and_set_bit
(
XSCALE_COUNTER2
,
cpuc
->
used_mask
))
idx
=
XSCALE_COUNTER2
;
out:
return
idx
;
}
static
void
xscale2pmu_start
(
void
)
{
unsigned
long
flags
,
val
;
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
xscale2pmu_read_pmnc
()
&
~
XSCALE_PMU_CNT64
;
val
|=
XSCALE_PMU_ENABLE
;
xscale2pmu_write_pmnc
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
void
xscale2pmu_stop
(
void
)
{
unsigned
long
flags
,
val
;
spin_lock_irqsave
(
&
pmu_lock
,
flags
);
val
=
xscale2pmu_read_pmnc
();
val
&=
~
XSCALE_PMU_ENABLE
;
xscale2pmu_write_pmnc
(
val
);
spin_unlock_irqrestore
(
&
pmu_lock
,
flags
);
}
static
inline
u32
xscale2pmu_read_counter
(
int
counter
)
{
u32
val
=
0
;
switch
(
counter
)
{
case
XSCALE_CYCLE_COUNTER
:
asm
volatile
(
"mrc p14, 0, %0, c1, c1, 0"
:
"=r"
(
val
));
break
;
case
XSCALE_COUNTER0
:
asm
volatile
(
"mrc p14, 0, %0, c0, c2, 0"
:
"=r"
(
val
));
break
;
case
XSCALE_COUNTER1
:
asm
volatile
(
"mrc p14, 0, %0, c1, c2, 0"
:
"=r"
(
val
));
break
;
case
XSCALE_COUNTER2
:
asm
volatile
(
"mrc p14, 0, %0, c2, c2, 0"
:
"=r"
(
val
));
break
;
case
XSCALE_COUNTER3
:
asm
volatile
(
"mrc p14, 0, %0, c3, c2, 0"
:
"=r"
(
val
));
break
;
}
return
val
;
}
static
inline
void
xscale2pmu_write_counter
(
int
counter
,
u32
val
)
{
switch
(
counter
)
{
case
XSCALE_CYCLE_COUNTER
:
asm
volatile
(
"mcr p14, 0, %0, c1, c1, 0"
:
:
"r"
(
val
));
break
;
case
XSCALE_COUNTER0
:
asm
volatile
(
"mcr p14, 0, %0, c0, c2, 0"
:
:
"r"
(
val
));
break
;
case
XSCALE_COUNTER1
:
asm
volatile
(
"mcr p14, 0, %0, c1, c2, 0"
:
:
"r"
(
val
));
break
;
case
XSCALE_COUNTER2
:
asm
volatile
(
"mcr p14, 0, %0, c2, c2, 0"
:
:
"r"
(
val
));
break
;
case
XSCALE_COUNTER3
:
asm
volatile
(
"mcr p14, 0, %0, c3, c2, 0"
:
:
"r"
(
val
));
break
;
}
}
static
const
struct
arm_pmu
xscale2pmu
=
{
.
id
=
ARM_PERF_PMU_ID_XSCALE2
,
.
name
=
"xscale2"
,
.
handle_irq
=
xscale2pmu_handle_irq
,
.
enable
=
xscale2pmu_enable_event
,
.
disable
=
xscale2pmu_disable_event
,
.
read_counter
=
xscale2pmu_read_counter
,
.
write_counter
=
xscale2pmu_write_counter
,
.
get_event_idx
=
xscale2pmu_get_event_idx
,
.
start
=
xscale2pmu_start
,
.
stop
=
xscale2pmu_stop
,
.
cache_map
=
&
xscale_perf_cache_map
,
.
event_map
=
&
xscale_perf_map
,
.
raw_event_mask
=
0xFF
,
.
num_events
=
5
,
.
max_period
=
(
1LLU
<<
32
)
-
1
,
};
const
struct
arm_pmu
*
__init
xscale2pmu_init
(
void
)
{
return
&
xscale2pmu
;
}
#else
const
struct
arm_pmu
*
__init
xscale1pmu_init
(
void
)
{
return
NULL
;
}
const
struct
arm_pmu
*
__init
xscale2pmu_init
(
void
)
{
return
NULL
;
}
#endif
/* CONFIG_CPU_XSCALE */
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录