提交 8a1ca8ce 编写于 作者: L Linus Torvalds

Merge branch 'perfcounters-for-linus' of...

Merge branch 'perfcounters-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perfcounters-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (574 commits)
  perf_counter: Turn off by default
  perf_counter: Add counter->id to the throttle event
  perf_counter: Better align code
  perf_counter: Rename L2 to LL cache
  perf_counter: Standardize event names
  perf_counter: Rename enums
  perf_counter tools: Clean up u64 usage
  perf_counter: Rename perf_counter_limit sysctl
  perf_counter: More paranoia settings
  perf_counter: powerpc: Implement generalized cache events for POWER processors
  perf_counters: powerpc: Add support for POWER7 processors
  perf_counter: Accurate period data
  perf_counter: Introduce struct for sample data
  perf_counter tools: Normalize data using per sample period data
  perf_counter: Annotate exit ctx recursion
  perf_counter tools: Propagate signals properly
  perf_counter tools: Small frequency related fixes
  perf_counter: More aggressive frequency adjustment
  perf_counter/x86: Fix the model number of Intel Core2 processors
  perf_counter, x86: Correct some event and umask values for Intel processors
  ...
...@@ -4403,6 +4403,16 @@ S: Maintained ...@@ -4403,6 +4403,16 @@ S: Maintained
F: include/linux/delayacct.h F: include/linux/delayacct.h
F: kernel/delayacct.c F: kernel/delayacct.c
PERFORMANCE COUNTER SUBSYSTEM
P: Peter Zijlstra
M: a.p.zijlstra@chello.nl
P: Paul Mackerras
M: paulus@samba.org
P: Ingo Molnar
M: mingo@elte.hu
L: linux-kernel@vger.kernel.org
S: Supported
PERSONALITY HANDLING PERSONALITY HANDLING
P: Christoph Hellwig P: Christoph Hellwig
M: hch@infradead.org M: hch@infradead.org
......
...@@ -131,5 +131,44 @@ static inline int irqs_disabled_flags(unsigned long flags) ...@@ -131,5 +131,44 @@ static inline int irqs_disabled_flags(unsigned long flags)
*/ */
struct irq_chip; struct irq_chip;
#ifdef CONFIG_PERF_COUNTERS
static inline unsigned long test_perf_counter_pending(void)
{
unsigned long x;
asm volatile("lbz %0,%1(13)"
: "=r" (x)
: "i" (offsetof(struct paca_struct, perf_counter_pending)));
return x;
}
static inline void set_perf_counter_pending(void)
{
asm volatile("stb %0,%1(13)" : :
"r" (1),
"i" (offsetof(struct paca_struct, perf_counter_pending)));
}
static inline void clear_perf_counter_pending(void)
{
asm volatile("stb %0,%1(13)" : :
"r" (0),
"i" (offsetof(struct paca_struct, perf_counter_pending)));
}
extern void perf_counter_do_pending(void);
#else
static inline unsigned long test_perf_counter_pending(void)
{
return 0;
}
static inline void set_perf_counter_pending(void) {}
static inline void clear_perf_counter_pending(void) {}
static inline void perf_counter_do_pending(void) {}
#endif /* CONFIG_PERF_COUNTERS */
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HW_IRQ_H */ #endif /* _ASM_POWERPC_HW_IRQ_H */
...@@ -99,6 +99,7 @@ struct paca_struct { ...@@ -99,6 +99,7 @@ struct paca_struct {
u8 soft_enabled; /* irq soft-enable flag */ u8 soft_enabled; /* irq soft-enable flag */
u8 hard_enabled; /* set if irqs are enabled in MSR */ u8 hard_enabled; /* set if irqs are enabled in MSR */
u8 io_sync; /* writel() needs spin_unlock sync */ u8 io_sync; /* writel() needs spin_unlock sync */
u8 perf_counter_pending; /* PM interrupt while soft-disabled */
/* Stuff for accurate time accounting */ /* Stuff for accurate time accounting */
u64 user_time; /* accumulated usermode TB ticks */ u64 user_time; /* accumulated usermode TB ticks */
......
/*
* Performance counter support - PowerPC-specific definitions.
*
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/types.h>
#define MAX_HWCOUNTERS 8
#define MAX_EVENT_ALTERNATIVES 8
#define MAX_LIMITED_HWCOUNTERS 2
/*
* This struct provides the constants and functions needed to
* describe the PMU on a particular POWER-family CPU.
*/
struct power_pmu {
int n_counter;
int max_alternatives;
u64 add_fields;
u64 test_adder;
int (*compute_mmcr)(u64 events[], int n_ev,
unsigned int hwc[], u64 mmcr[]);
int (*get_constraint)(u64 event, u64 *mskp, u64 *valp);
int (*get_alternatives)(u64 event, unsigned int flags,
u64 alt[]);
void (*disable_pmc)(unsigned int pmc, u64 mmcr[]);
int (*limited_pmc_event)(u64 event);
u32 flags;
int n_generic;
int *generic_events;
int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
};
extern struct power_pmu *ppmu;
/*
* Values for power_pmu.flags
*/
#define PPMU_LIMITED_PMC5_6 1 /* PMC5/6 have limited function */
#define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */
/*
* Values for flags to get_alternatives()
*/
#define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */
#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */
#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */
struct pt_regs;
extern unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs)
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
/*
* The power_pmu.get_constraint function returns a 64-bit value and
* a 64-bit mask that express the constraints between this event and
* other events.
*
* The value and mask are divided up into (non-overlapping) bitfields
* of three different types:
*
* Select field: this expresses the constraint that some set of bits
* in MMCR* needs to be set to a specific value for this event. For a
* select field, the mask contains 1s in every bit of the field, and
* the value contains a unique value for each possible setting of the
* MMCR* bits. The constraint checking code will ensure that two events
* that set the same field in their masks have the same value in their
* value dwords.
*
* Add field: this expresses the constraint that there can be at most
* N events in a particular class. A field of k bits can be used for
* N <= 2^(k-1) - 1. The mask has the most significant bit of the field
* set (and the other bits 0), and the value has only the least significant
* bit of the field set. In addition, the 'add_fields' and 'test_adder'
* in the struct power_pmu for this processor come into play. The
* add_fields value contains 1 in the LSB of the field, and the
* test_adder contains 2^(k-1) - 1 - N in the field.
*
* NAND field: this expresses the constraint that you may not have events
* in all of a set of classes. (For example, on PPC970, you can't select
* events from the FPU, ISU and IDU simultaneously, although any two are
* possible.) For N classes, the field is N+1 bits wide, and each class
* is assigned one bit from the least-significant N bits. The mask has
* only the most-significant bit set, and the value has only the bit
* for the event's class set. The test_adder has the least significant
* bit set in the field.
*
* If an event is not subject to the constraint expressed by a particular
* field, then it will have 0 in both the mask and value for that field.
*/
...@@ -492,11 +492,13 @@ ...@@ -492,11 +492,13 @@
#define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */ #define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */
#define SPRN_MMCR1 798 #define SPRN_MMCR1 798
#define SPRN_MMCRA 0x312 #define SPRN_MMCRA 0x312
#define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */
#define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */ #define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */
#define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */ #define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */
#define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */ #define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */
#define MMCRA_SLOT_SHIFT 24 #define MMCRA_SLOT_SHIFT 24
#define MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */ #define MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */
#define POWER6_MMCRA_SDSYNC 0x0000080000000000ULL /* SDAR/SIAR synced */
#define POWER6_MMCRA_SIHV 0x0000040000000000ULL #define POWER6_MMCRA_SIHV 0x0000040000000000ULL
#define POWER6_MMCRA_SIPR 0x0000020000000000ULL #define POWER6_MMCRA_SIPR 0x0000020000000000ULL
#define POWER6_MMCRA_THRM 0x00000020UL #define POWER6_MMCRA_THRM 0x00000020UL
......
...@@ -322,6 +322,6 @@ SYSCALL_SPU(epoll_create1) ...@@ -322,6 +322,6 @@ SYSCALL_SPU(epoll_create1)
SYSCALL_SPU(dup3) SYSCALL_SPU(dup3)
SYSCALL_SPU(pipe2) SYSCALL_SPU(pipe2)
SYSCALL(inotify_init1) SYSCALL(inotify_init1)
SYSCALL(ni_syscall) SYSCALL_SPU(perf_counter_open)
COMPAT_SYS_SPU(preadv) COMPAT_SYS_SPU(preadv)
COMPAT_SYS_SPU(pwritev) COMPAT_SYS_SPU(pwritev)
...@@ -341,6 +341,7 @@ ...@@ -341,6 +341,7 @@
#define __NR_dup3 316 #define __NR_dup3 316
#define __NR_pipe2 317 #define __NR_pipe2 317
#define __NR_inotify_init1 318 #define __NR_inotify_init1 318
#define __NR_perf_counter_open 319
#define __NR_preadv 320 #define __NR_preadv 320
#define __NR_pwritev 321 #define __NR_pwritev 321
......
...@@ -94,6 +94,9 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o ...@@ -94,6 +94,9 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o power4-pmu.o ppc970-pmu.o \
power5-pmu.o power5+-pmu.o power6-pmu.o \
power7-pmu.o
obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
......
...@@ -131,6 +131,7 @@ int main(void) ...@@ -131,6 +131,7 @@ int main(void)
DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending));
DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
......
...@@ -526,6 +526,15 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) ...@@ -526,6 +526,15 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
2: 2:
TRACE_AND_RESTORE_IRQ(r5); TRACE_AND_RESTORE_IRQ(r5);
#ifdef CONFIG_PERF_COUNTERS
/* check paca->perf_counter_pending if we're enabling ints */
lbz r3,PACAPERFPEND(r13)
and. r3,r3,r5
beq 27f
bl .perf_counter_do_pending
27:
#endif /* CONFIG_PERF_COUNTERS */
/* extract EE bit and use it to restore paca->hard_enabled */ /* extract EE bit and use it to restore paca->hard_enabled */
ld r3,_MSR(r1) ld r3,_MSR(r1)
rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */
......
...@@ -135,6 +135,11 @@ notrace void raw_local_irq_restore(unsigned long en) ...@@ -135,6 +135,11 @@ notrace void raw_local_irq_restore(unsigned long en)
iseries_handle_interrupts(); iseries_handle_interrupts();
} }
if (test_perf_counter_pending()) {
clear_perf_counter_pending();
perf_counter_do_pending();
}
/* /*
* if (get_paca()->hard_enabled) return; * if (get_paca()->hard_enabled) return;
* But again we need to take care that gcc gets hard_enabled directly * But again we need to take care that gcc gets hard_enabled directly
......
此差异已折叠。
/*
* Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors.
*
* Copyright 2009 Paul Mackerras, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/perf_counter.h>
#include <asm/reg.h>
/*
* Bits in event code for POWER4
*/
#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
#define PM_PMC_MSK 0xf
#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
#define PM_UNIT_MSK 0xf
#define PM_LOWER_SH 6
#define PM_LOWER_MSK 1
#define PM_LOWER_MSKS 0x40
#define PM_BYTE_SH 4 /* Byte number of event bus to use */
#define PM_BYTE_MSK 3
#define PM_PMCSEL_MSK 7
/*
* Unit code values
*/
#define PM_FPU 1
#define PM_ISU1 2
#define PM_IFU 3
#define PM_IDU0 4
#define PM_ISU1_ALT 6
#define PM_ISU2 7
#define PM_IFU_ALT 8
#define PM_LSU0 9
#define PM_LSU1 0xc
#define PM_GPS 0xf
/*
* Bits in MMCR0 for POWER4
*/
#define MMCR0_PMC1SEL_SH 8
#define MMCR0_PMC2SEL_SH 1
#define MMCR_PMCSEL_MSK 0x1f
/*
* Bits in MMCR1 for POWER4
*/
#define MMCR1_TTM0SEL_SH 62
#define MMCR1_TTC0SEL_SH 61
#define MMCR1_TTM1SEL_SH 59
#define MMCR1_TTC1SEL_SH 58
#define MMCR1_TTM2SEL_SH 56
#define MMCR1_TTC2SEL_SH 55
#define MMCR1_TTM3SEL_SH 53
#define MMCR1_TTC3SEL_SH 52
#define MMCR1_TTMSEL_MSK 3
#define MMCR1_TD_CP_DBG0SEL_SH 50
#define MMCR1_TD_CP_DBG1SEL_SH 48
#define MMCR1_TD_CP_DBG2SEL_SH 46
#define MMCR1_TD_CP_DBG3SEL_SH 44
#define MMCR1_DEBUG0SEL_SH 43
#define MMCR1_DEBUG1SEL_SH 42
#define MMCR1_DEBUG2SEL_SH 41
#define MMCR1_DEBUG3SEL_SH 40
#define MMCR1_PMC1_ADDER_SEL_SH 39
#define MMCR1_PMC2_ADDER_SEL_SH 38
#define MMCR1_PMC6_ADDER_SEL_SH 37
#define MMCR1_PMC5_ADDER_SEL_SH 36
#define MMCR1_PMC8_ADDER_SEL_SH 35
#define MMCR1_PMC7_ADDER_SEL_SH 34
#define MMCR1_PMC3_ADDER_SEL_SH 33
#define MMCR1_PMC4_ADDER_SEL_SH 32
#define MMCR1_PMC3SEL_SH 27
#define MMCR1_PMC4SEL_SH 22
#define MMCR1_PMC5SEL_SH 17
#define MMCR1_PMC6SEL_SH 12
#define MMCR1_PMC7SEL_SH 7
#define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */
static short mmcr1_adder_bits[8] = {
MMCR1_PMC1_ADDER_SEL_SH,
MMCR1_PMC2_ADDER_SEL_SH,
MMCR1_PMC3_ADDER_SEL_SH,
MMCR1_PMC4_ADDER_SEL_SH,
MMCR1_PMC5_ADDER_SEL_SH,
MMCR1_PMC6_ADDER_SEL_SH,
MMCR1_PMC7_ADDER_SEL_SH,
MMCR1_PMC8_ADDER_SEL_SH
};
/*
* Bits in MMCRA
*/
#define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */
/*
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
* 3210987654321098765432109876543210987654321098765432109876543210
* |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><>
* | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
* \SMPL ||\TTC3SEL
* |\TTC_IFU_SEL
* \TTM2SEL0
*
* SMPL - SAMPLE_ENABLE constraint
* 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000
*
* UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2
* 55: UC1 error 0x0080_0000_0000_0000
* 54: FPU events needed 0x0040_0000_0000_0000
* 53: ISU1 events needed 0x0020_0000_0000_0000
* 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000
*
* UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0
* 51: UC2 error 0x0008_0000_0000_0000
* 50: FPU events needed 0x0004_0000_0000_0000
* 49: IFU events needed 0x0002_0000_0000_0000
* 48: LSU0 events needed 0x0001_0000_0000_0000
*
* UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1
* 47: UC3 error 0x8000_0000_0000
* 46: LSU0 events needed 0x4000_0000_0000
* 45: IFU events needed 0x2000_0000_0000
* 44: IDU0|ISU2 events needed 0x1000_0000_0000
* 43: ISU1 events needed 0x0800_0000_0000
*
* TTM2SEL0
* 42: 0 = IDU0 events needed
* 1 = ISU2 events needed 0x0400_0000_0000
*
* TTC_IFU_SEL
* 41: 0 = IFU.U events needed
* 1 = IFU.L events needed 0x0200_0000_0000
*
* TTC3SEL
* 40: 0 = LSU1.U events needed
* 1 = LSU1.L events needed 0x0100_0000_0000
*
* PS1
* 39: PS1 error 0x0080_0000_0000
* 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
*
* PS2
* 35: PS2 error 0x0008_0000_0000
* 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
*
* B0
* 28-31: Byte 0 event source 0xf000_0000
* 1 = FPU
* 2 = ISU1
* 3 = IFU
* 4 = IDU0
* 7 = ISU2
* 9 = LSU0
* c = LSU1
* f = GPS
*
* B1, B2, B3
* 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
*
* P8
* 15: P8 error 0x8000
* 14-15: Count of events needing PMC8
*
* P1..P7
* 0-13: Count of events needing PMC1..PMC7
*
* Note: this doesn't allow events using IFU.U to be combined with events
* using IFU.L, though that is feasible (using TTM0 and TTM2). However
* there are no listed events for IFU.L (they are debug events not
* verified for performance monitoring) so this shouldn't cause a
* problem.
*/
static struct unitinfo {
u64 value, mask;
int unit;
int lowerbit;
} p4_unitinfo[16] = {
[PM_FPU] = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 },
[PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 },
[PM_ISU1_ALT] =
{ 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 },
[PM_IFU] = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 },
[PM_IFU_ALT] =
{ 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 },
[PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 },
[PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 },
[PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 },
[PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 },
[PM_GPS] = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 }
};
static unsigned char direct_marked_event[8] = {
(1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
(1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
(1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */
(1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
(1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */
(1<<3) | (1<<4) | (1<<5),
/* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
(1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
(1<<4), /* PMC8: PM_MRK_LSU_FIN */
};
/*
* Returns 1 if event counts things relating to marked instructions
* and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
*/
static int p4_marked_instr_event(u64 event)
{
int pmc, psel, unit, byte, bit;
unsigned int mask;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
psel = event & PM_PMCSEL_MSK;
if (pmc) {
if (direct_marked_event[pmc - 1] & (1 << psel))
return 1;
if (psel == 0) /* add events */
bit = (pmc <= 4)? pmc - 1: 8 - pmc;
else if (psel == 6) /* decode events */
bit = 4;
else
return 0;
} else
bit = psel;
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
mask = 0;
switch (unit) {
case PM_LSU1:
if (event & PM_LOWER_MSKS)
mask = 1 << 28; /* byte 7 bit 4 */
else
mask = 6 << 24; /* byte 3 bits 1 and 2 */
break;
case PM_LSU0:
/* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */
mask = 0x083dff00;
}
return (mask >> (byte * 8 + bit)) & 1;
}
static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp)
{
int pmc, byte, unit, lower, sh;
u64 mask = 0, value = 0;
int grp = -1;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc > 8)
return -1;
sh = (pmc - 1) * 2;
mask |= 2 << sh;
value |= 1 << sh;
grp = ((pmc - 1) >> 1) & 1;
}
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
if (unit) {
lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK;
/*
* Bus events on bytes 0 and 2 can be counted
* on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
*/
if (!pmc)
grp = byte & 1;
if (!p4_unitinfo[unit].unit)
return -1;
mask |= p4_unitinfo[unit].mask;
value |= p4_unitinfo[unit].value;
sh = p4_unitinfo[unit].lowerbit;
if (sh > 1)
value |= (u64)lower << sh;
else if (lower != sh)
return -1;
unit = p4_unitinfo[unit].unit;
/* Set byte lane select field */
mask |= 0xfULL << (28 - 4 * byte);
value |= (u64)unit << (28 - 4 * byte);
}
if (grp == 0) {
/* increment PMC1/2/5/6 field */
mask |= 0x8000000000ull;
value |= 0x1000000000ull;
} else {
/* increment PMC3/4/7/8 field */
mask |= 0x800000000ull;
value |= 0x100000000ull;
}
/* Marked instruction events need sample_enable set */
if (p4_marked_instr_event(event)) {
mask |= 1ull << 56;
value |= 1ull << 56;
}
/* PMCSEL=6 decode events on byte 2 need sample_enable clear */
if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2)
mask |= 1ull << 56;
*maskp = mask;
*valp = value;
return 0;
}
static unsigned int ppc_inst_cmpl[] = {
0x1001, 0x4001, 0x6001, 0x7001, 0x8001
};
static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
{
int i, j, na;
alt[0] = event;
na = 1;
/* 2 possibilities for PM_GRP_DISP_REJECT */
if (event == 0x8003 || event == 0x0224) {
alt[1] = event ^ (0x8003 ^ 0x0224);
return 2;
}
/* 2 possibilities for PM_ST_MISS_L1 */
if (event == 0x0c13 || event == 0x0c23) {
alt[1] = event ^ (0x0c13 ^ 0x0c23);
return 2;
}
/* several possibilities for PM_INST_CMPL */
for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) {
if (event == ppc_inst_cmpl[i]) {
for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j)
if (j != i)
alt[na++] = ppc_inst_cmpl[j];
break;
}
}
return na;
}
static int p4_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], u64 mmcr[])
{
u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
unsigned int pmc, unit, byte, psel, lower;
unsigned int ttm, grp;
unsigned int pmc_inuse = 0;
unsigned int pmc_grp_use[2];
unsigned char busbyte[4];
unsigned char unituse[16];
unsigned int unitlower = 0;
int i;
if (n_ev > 8)
return -1;
/* First pass to count resource use */
pmc_grp_use[0] = pmc_grp_use[1] = 0;
memset(busbyte, 0, sizeof(busbyte));
memset(unituse, 0, sizeof(unituse));
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc_inuse & (1 << (pmc - 1)))
return -1;
pmc_inuse |= 1 << (pmc - 1);
/* count 1/2/5/6 vs 3/4/7/8 use */
++pmc_grp_use[((pmc - 1) >> 1) & 1];
}
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK;
if (unit) {
if (!pmc)
++pmc_grp_use[byte & 1];
if (unit == 6 || unit == 8)
/* map alt ISU1/IFU codes: 6->2, 8->3 */
unit = (unit >> 1) - 1;
if (busbyte[byte] && busbyte[byte] != unit)
return -1;
busbyte[byte] = unit;
lower <<= unit;
if (unituse[unit] && lower != (unitlower & lower))
return -1;
unituse[unit] = 1;
unitlower |= lower;
}
}
if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
return -1;
/*
* Assign resources and set multiplexer selects.
*
* Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2.
* Each TTMx can only select one unit, but since
* units 2 and 6 are both ISU1, and 3 and 8 are both IFU,
* we have some choices.
*/
if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) {
unituse[6] = 1; /* Move 2 to 6 */
unituse[2] = 0;
}
if (unituse[3] & (unituse[1] | unituse[2])) {
unituse[8] = 1; /* Move 3 to 8 */
unituse[3] = 0;
unitlower = (unitlower & ~8) | ((unitlower & 8) << 5);
}
/* Check only one unit per TTMx */
if (unituse[1] + unituse[2] + unituse[3] > 1 ||
unituse[4] + unituse[6] + unituse[7] > 1 ||
unituse[8] + unituse[9] > 1 ||
(unituse[5] | unituse[10] | unituse[11] |
unituse[13] | unituse[14]))
return -1;
/* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */
mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH;
mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH;
mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH;
/* Set TTCxSEL fields. */
if (unitlower & 0xe)
mmcr1 |= 1ull << MMCR1_TTC0SEL_SH;
if (unitlower & 0xf0)
mmcr1 |= 1ull << MMCR1_TTC1SEL_SH;
if (unitlower & 0xf00)
mmcr1 |= 1ull << MMCR1_TTC2SEL_SH;
if (unitlower & 0x7000)
mmcr1 |= 1ull << MMCR1_TTC3SEL_SH;
/* Set byte lane select fields. */
for (byte = 0; byte < 4; ++byte) {
unit = busbyte[byte];
if (!unit)
continue;
if (unit == 0xf) {
/* special case for GPS */
mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte);
} else {
if (!unituse[unit])
ttm = unit - 1; /* 2->1, 3->2 */
else
ttm = unit >> 2;
mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte);
}
}
/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
psel = event[i] & PM_PMCSEL_MSK;
if (!pmc) {
/* Bus event or 00xxx direct event (off or cycles) */
if (unit)
psel |= 0x10 | ((byte & 2) << 2);
for (pmc = 0; pmc < 8; ++pmc) {
if (pmc_inuse & (1 << pmc))
continue;
grp = (pmc >> 1) & 1;
if (unit) {
if (grp == (byte & 1))
break;
} else if (pmc_grp_use[grp] < 4) {
++pmc_grp_use[grp];
break;
}
}
pmc_inuse |= 1 << pmc;
} else {
/* Direct event */
--pmc;
if (psel == 0 && (byte & 2))
/* add events on higher-numbered bus */
mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
else if (psel == 6 && byte == 3)
/* seem to need to set sample_enable here */
mmcra |= MMCRA_SAMPLE_ENABLE;
psel |= 8;
}
if (pmc <= 1)
mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc);
else
mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
if (pmc == 7) /* PMC8 */
mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH;
hwc[i] = pmc;
if (p4_marked_instr_event(event[i]))
mmcra |= MMCRA_SAMPLE_ENABLE;
}
if (pmc_inuse & 1)
mmcr0 |= MMCR0_PMC1CE;
if (pmc_inuse & 0xfe)
mmcr0 |= MMCR0_PMCjCE;
mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
/* Return MMCRx values */
mmcr[0] = mmcr0;
mmcr[1] = mmcr1;
mmcr[2] = mmcra;
return 0;
}
static void p4_disable_pmc(unsigned int pmc, u64 mmcr[])
{
/*
* Setting the PMCxSEL field to 0 disables PMC x.
* (Note that pmc is 0-based here, not 1-based.)
*/
if (pmc <= 1) {
mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc));
} else {
mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)));
if (pmc == 7)
mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH);
}
}
static int p4_generic_events[] = {
[PERF_COUNT_HW_CPU_CYCLES] = 7,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x1001,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */
[PERF_COUNT_HW_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */
[PERF_COUNT_HW_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */
};
#define C(x) PERF_COUNT_HW_CACHE_##x
/*
* Table of generalized cache-related events.
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x8c10, 0x3c10 },
[C(OP_WRITE)] = { 0x7c10, 0xc13 },
[C(OP_PREFETCH)] = { 0xc35, 0 },
},
[C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { 0, 0 },
},
[C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0 },
[C(OP_WRITE)] = { 0, 0 },
[C(OP_PREFETCH)] = { 0xc34, 0 },
},
[C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x904 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x900 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x330, 0x331 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
};
struct power_pmu power4_pmu = {
.n_counter = 8,
.max_alternatives = 5,
.add_fields = 0x0000001100005555ull,
.test_adder = 0x0011083300000000ull,
.compute_mmcr = p4_compute_mmcr,
.get_constraint = p4_get_constraint,
.get_alternatives = p4_get_alternatives,
.disable_pmc = p4_disable_pmc,
.n_generic = ARRAY_SIZE(p4_generic_events),
.generic_events = p4_generic_events,
.cache_events = &power4_cache_events,
};
/*
* Performance counter support for POWER5+/++ (not POWER5) processors.
*
* Copyright 2009 Paul Mackerras, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/perf_counter.h>
#include <asm/reg.h>
/*
* Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3)
*/
#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
#define PM_PMC_MSK 0xf
#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */
#define PM_UNIT_MSK 0xf
#define PM_BYTE_SH 12 /* Byte number of event bus to use */
#define PM_BYTE_MSK 7
#define PM_GRS_SH 8 /* Storage subsystem mux select */
#define PM_GRS_MSK 7
#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */
#define PM_PMCSEL_MSK 0x7f
/* Values in PM_UNIT field */
#define PM_FPU 0
#define PM_ISU0 1
#define PM_IFU 2
#define PM_ISU1 3
#define PM_IDU 4
#define PM_ISU0_ALT 6
#define PM_GRS 7
#define PM_LSU0 8
#define PM_LSU1 0xc
#define PM_LASTUNIT 0xc
/*
* Bits in MMCR1 for POWER5+
*/
#define MMCR1_TTM0SEL_SH 62
#define MMCR1_TTM1SEL_SH 60
#define MMCR1_TTM2SEL_SH 58
#define MMCR1_TTM3SEL_SH 56
#define MMCR1_TTMSEL_MSK 3
#define MMCR1_TD_CP_DBG0SEL_SH 54
#define MMCR1_TD_CP_DBG1SEL_SH 52
#define MMCR1_TD_CP_DBG2SEL_SH 50
#define MMCR1_TD_CP_DBG3SEL_SH 48
#define MMCR1_GRS_L2SEL_SH 46
#define MMCR1_GRS_L2SEL_MSK 3
#define MMCR1_GRS_L3SEL_SH 44
#define MMCR1_GRS_L3SEL_MSK 3
#define MMCR1_GRS_MCSEL_SH 41
#define MMCR1_GRS_MCSEL_MSK 7
#define MMCR1_GRS_FABSEL_SH 39
#define MMCR1_GRS_FABSEL_MSK 3
#define MMCR1_PMC1_ADDER_SEL_SH 35
#define MMCR1_PMC2_ADDER_SEL_SH 34
#define MMCR1_PMC3_ADDER_SEL_SH 33
#define MMCR1_PMC4_ADDER_SEL_SH 32
#define MMCR1_PMC1SEL_SH 25
#define MMCR1_PMC2SEL_SH 17
#define MMCR1_PMC3SEL_SH 9
#define MMCR1_PMC4SEL_SH 1
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
#define MMCR1_PMCSEL_MSK 0x7f
/*
* Bits in MMCRA
*/
/*
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
* 3210987654321098765432109876543210987654321098765432109876543210
* [ ><><>< ><> <><>[ > < >< >< >< ><><><><><><>
* NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P6P5P4P3P2P1
*
* NC - number of counters
* 51: NC error 0x0008_0000_0000_0000
* 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
*
* G0..G3 - GRS mux constraints
* 46-47: GRS_L2SEL value
* 44-45: GRS_L3SEL value
* 41-44: GRS_MCSEL value
* 39-40: GRS_FABSEL value
* Note that these match up with their bit positions in MMCR1
*
* T0 - TTM0 constraint
* 36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000
*
* T1 - TTM1 constraint
* 34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000
*
* UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
* 33: UC3 error 0x02_0000_0000
* 32: FPU|IFU|ISU1 events needed 0x01_0000_0000
* 31: ISU0 events needed 0x01_8000_0000
* 30: IDU|GRS events needed 0x00_4000_0000
*
* B0
* 24-27: Byte 0 event source 0x0f00_0000
* Encoding as for the event code
*
* B1, B2, B3
* 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
*
* P6
* 11: P6 error 0x800
* 10-11: Count of events needing PMC6
*
* P1..P5
* 0-9: Count of events needing PMC1..PMC5
*/
static const int grsel_shift[8] = {
MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
};
/* Masks and values for using events from the various units */
static u64 unit_cons[PM_LASTUNIT+1][2] = {
[PM_FPU] = { 0x3200000000ull, 0x0100000000ull },
[PM_ISU0] = { 0x0200000000ull, 0x0080000000ull },
[PM_ISU1] = { 0x3200000000ull, 0x3100000000ull },
[PM_IFU] = { 0x3200000000ull, 0x2100000000ull },
[PM_IDU] = { 0x0e00000000ull, 0x0040000000ull },
[PM_GRS] = { 0x0e00000000ull, 0x0c40000000ull },
};
static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp)
{
int pmc, byte, unit, sh;
int bit, fmask;
u64 mask = 0, value = 0;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc > 6)
return -1;
sh = (pmc - 1) * 2;
mask |= 2 << sh;
value |= 1 << sh;
if (pmc >= 5 && !(event == 0x500009 || event == 0x600005))
return -1;
}
if (event & PM_BUSEVENT_MSK) {
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
if (unit > PM_LASTUNIT)
return -1;
if (unit == PM_ISU0_ALT)
unit = PM_ISU0;
mask |= unit_cons[unit][0];
value |= unit_cons[unit][1];
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
if (byte >= 4) {
if (unit != PM_LSU1)
return -1;
/* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
++unit;
byte &= 3;
}
if (unit == PM_GRS) {
bit = event & 7;
fmask = (bit == 6)? 7: 3;
sh = grsel_shift[bit];
mask |= (u64)fmask << sh;
value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
}
/* Set byte lane select field */
mask |= 0xfULL << (24 - 4 * byte);
value |= (u64)unit << (24 - 4 * byte);
}
if (pmc < 5) {
/* need a counter from PMC1-4 set */
mask |= 0x8000000000000ull;
value |= 0x1000000000000ull;
}
*maskp = mask;
*valp = value;
return 0;
}
static int power5p_limited_pmc_event(u64 event)
{
int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
return pmc == 5 || pmc == 6;
}
#define MAX_ALT 3 /* at most 3 alternatives for any event */
static const unsigned int event_alternatives[][MAX_ALT] = {
{ 0x100c0, 0x40001f }, /* PM_GCT_FULL_CYC */
{ 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */
{ 0x230e2, 0x323087 }, /* PM_BR_PRED_CR */
{ 0x230e3, 0x223087, 0x3230a0 }, /* PM_BR_PRED_TA */
{ 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
{ 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */
{ 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */
{ 0x100005, 0x600005 }, /* PM_RUN_CYC */
{ 0x100009, 0x200009 }, /* PM_INST_CMPL */
{ 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */
{ 0x300009, 0x400009 }, /* PM_INST_DISP */
};
/*
* Scan the alternatives table for a match and return the
* index into the alternatives table if found, else -1.
*/
static int find_alternative(unsigned int event)
{
int i, j;
for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
if (event < event_alternatives[i][0])
break;
for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
if (event == event_alternatives[i][j])
return i;
}
return -1;
}
static const unsigned char bytedecode_alternatives[4][4] = {
/* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 },
/* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e },
/* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 },
/* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e }
};
/*
* Some direct events for decodes of event bus byte 3 have alternative
* PMCSEL values on other counters. This returns the alternative
* event code for those that do, or -1 otherwise. This also handles
* alternative PCMSEL values for add events.
*/
static s64 find_alternative_bdecode(u64 event)
{
int pmc, altpmc, pp, j;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc == 0 || pmc > 4)
return -1;
altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */
pp = event & PM_PMCSEL_MSK;
for (j = 0; j < 4; ++j) {
if (bytedecode_alternatives[pmc - 1][j] == pp) {
return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
(altpmc << PM_PMC_SH) |
bytedecode_alternatives[altpmc - 1][j];
}
}
/* new decode alternatives for power5+ */
if (pmc == 1 && (pp == 0x0d || pp == 0x0e))
return event + (2 << PM_PMC_SH) + (0x2e - 0x0d);
if (pmc == 3 && (pp == 0x2e || pp == 0x2f))
return event - (2 << PM_PMC_SH) - (0x2e - 0x0d);
/* alternative add event encodings */
if (pp == 0x10 || pp == 0x28)
return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) |
(altpmc << PM_PMC_SH);
return -1;
}
static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[])
{
int i, j, nalt = 1;
int nlim;
s64 ae;
alt[0] = event;
nalt = 1;
nlim = power5p_limited_pmc_event(event);
i = find_alternative(event);
if (i >= 0) {
for (j = 0; j < MAX_ALT; ++j) {
ae = event_alternatives[i][j];
if (ae && ae != event)
alt[nalt++] = ae;
nlim += power5p_limited_pmc_event(ae);
}
} else {
ae = find_alternative_bdecode(event);
if (ae > 0)
alt[nalt++] = ae;
}
if (flags & PPMU_ONLY_COUNT_RUN) {
/*
* We're only counting in RUN state,
* so PM_CYC is equivalent to PM_RUN_CYC
* and PM_INST_CMPL === PM_RUN_INST_CMPL.
* This doesn't include alternatives that don't provide
* any extra flexibility in assigning PMCs (e.g.
* 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC).
* Note that even with these additional alternatives
* we never end up with more than 3 alternatives for any event.
*/
j = nalt;
for (i = 0; i < nalt; ++i) {
switch (alt[i]) {
case 0xf: /* PM_CYC */
alt[j++] = 0x600005; /* PM_RUN_CYC */
++nlim;
break;
case 0x600005: /* PM_RUN_CYC */
alt[j++] = 0xf;
break;
case 0x100009: /* PM_INST_CMPL */
alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
++nlim;
break;
case 0x500009: /* PM_RUN_INST_CMPL */
alt[j++] = 0x100009; /* PM_INST_CMPL */
alt[j++] = 0x200009;
break;
}
}
nalt = j;
}
if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
/* remove the limited PMC events */
j = 0;
for (i = 0; i < nalt; ++i) {
if (!power5p_limited_pmc_event(alt[i])) {
alt[j] = alt[i];
++j;
}
}
nalt = j;
} else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
/* remove all but the limited PMC events */
j = 0;
for (i = 0; i < nalt; ++i) {
if (power5p_limited_pmc_event(alt[i])) {
alt[j] = alt[i];
++j;
}
}
nalt = j;
}
return nalt;
}
/*
* Map of which direct events on which PMCs are marked instruction events.
* Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
* Bit 0 is set if it is marked for all PMCs.
* The 0x80 bit indicates a byte decode PMCSEL value.
*/
static unsigned char direct_event_is_marked[0x28] = {
0, /* 00 */
0x1f, /* 01 PM_IOPS_CMPL */
0x2, /* 02 PM_MRK_GRP_DISP */
0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
0, /* 04 */
0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
0x80, /* 06 */
0x80, /* 07 */
0, 0, 0,/* 08 - 0a */
0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
0, /* 0c */
0x80, /* 0d */
0x80, /* 0e */
0, /* 0f */
0, /* 10 */
0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
0, /* 12 */
0x10, /* 13 PM_MRK_GRP_CMPL */
0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
0x2, /* 15 PM_MRK_GRP_ISSUED */
0x80, /* 16 */
0x80, /* 17 */
0, 0, 0, 0, 0,
0x80, /* 1d */
0x80, /* 1e */
0, /* 1f */
0x80, /* 20 */
0x80, /* 21 */
0x80, /* 22 */
0x80, /* 23 */
0x80, /* 24 */
0x80, /* 25 */
0x80, /* 26 */
0x80, /* 27 */
};
/*
* Returns 1 if event counts things relating to marked instructions
* and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
*/
static int power5p_marked_instr_event(u64 event)
{
int pmc, psel;
int bit, byte, unit;
u32 mask;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
psel = event & PM_PMCSEL_MSK;
if (pmc >= 5)
return 0;
bit = -1;
if (psel < sizeof(direct_event_is_marked)) {
if (direct_event_is_marked[psel] & (1 << pmc))
return 1;
if (direct_event_is_marked[psel] & 0x80)
bit = 4;
else if (psel == 0x08)
bit = pmc - 1;
else if (psel == 0x10)
bit = 4 - pmc;
else if (psel == 0x1b && (pmc == 1 || pmc == 3))
bit = 4;
} else if ((psel & 0x48) == 0x40) {
bit = psel & 7;
} else if (psel == 0x28) {
bit = pmc - 1;
} else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) {
bit = 4;
}
if (!(event & PM_BUSEVENT_MSK) || bit == -1)
return 0;
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
if (unit == PM_LSU0) {
/* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
mask = 0x5dff00;
} else if (unit == PM_LSU1 && byte >= 4) {
byte -= 4;
/* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */
mask = 0x5f11c000;
} else
return 0;
return (mask >> (byte * 8 + bit)) & 1;
}
static int power5p_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], u64 mmcr[])
{
u64 mmcr1 = 0;
u64 mmcra = 0;
unsigned int pmc, unit, byte, psel;
unsigned int ttm;
int i, isbus, bit, grsel;
unsigned int pmc_inuse = 0;
unsigned char busbyte[4];
unsigned char unituse[16];
int ttmuse;
if (n_ev > 6)
return -1;
/* First pass to count resource use */
memset(busbyte, 0, sizeof(busbyte));
memset(unituse, 0, sizeof(unituse));
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc > 6)
return -1;
if (pmc_inuse & (1 << (pmc - 1)))
return -1;
pmc_inuse |= 1 << (pmc - 1);
}
if (event[i] & PM_BUSEVENT_MSK) {
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
if (unit > PM_LASTUNIT)
return -1;
if (unit == PM_ISU0_ALT)
unit = PM_ISU0;
if (byte >= 4) {
if (unit != PM_LSU1)
return -1;
++unit;
byte &= 3;
}
if (busbyte[byte] && busbyte[byte] != unit)
return -1;
busbyte[byte] = unit;
unituse[unit] = 1;
}
}
/*
* Assign resources and set multiplexer selects.
*
* PM_ISU0 can go either on TTM0 or TTM1, but that's the only
* choice we have to deal with.
*/
if (unituse[PM_ISU0] &
(unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */
unituse[PM_ISU0] = 0;
}
/* Set TTM[01]SEL fields. */
ttmuse = 0;
for (i = PM_FPU; i <= PM_ISU1; ++i) {
if (!unituse[i])
continue;
if (ttmuse++)
return -1;
mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH;
}
ttmuse = 0;
for (; i <= PM_GRS; ++i) {
if (!unituse[i])
continue;
if (ttmuse++)
return -1;
mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH;
}
if (ttmuse > 1)
return -1;
/* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
for (byte = 0; byte < 4; ++byte) {
unit = busbyte[byte];
if (!unit)
continue;
if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
/* get ISU0 through TTM1 rather than TTM0 */
unit = PM_ISU0_ALT;
} else if (unit == PM_LSU1 + 1) {
/* select lower word of LSU1 for this byte */
mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
}
ttm = unit >> 2;
mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
}
/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
psel = event[i] & PM_PMCSEL_MSK;
isbus = event[i] & PM_BUSEVENT_MSK;
if (!pmc) {
/* Bus event or any-PMC direct event */
for (pmc = 0; pmc < 4; ++pmc) {
if (!(pmc_inuse & (1 << pmc)))
break;
}
if (pmc >= 4)
return -1;
pmc_inuse |= 1 << pmc;
} else if (pmc <= 4) {
/* Direct event */
--pmc;
if (isbus && (byte & 2) &&
(psel == 8 || psel == 0x10 || psel == 0x28))
/* add events on higher-numbered bus */
mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
} else {
/* Instructions or run cycles on PMC5/6 */
--pmc;
}
if (isbus && unit == PM_GRS) {
bit = psel & 7;
grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
mmcr1 |= (u64)grsel << grsel_shift[bit];
}
if (power5p_marked_instr_event(event[i]))
mmcra |= MMCRA_SAMPLE_ENABLE;
if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1))
/* select alternate byte lane */
psel |= 0x10;
if (pmc <= 3)
mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
hwc[i] = pmc;
}
/* Return MMCRx values */
mmcr[0] = 0;
if (pmc_inuse & 1)
mmcr[0] = MMCR0_PMC1CE;
if (pmc_inuse & 0x3e)
mmcr[0] |= MMCR0_PMCjCE;
mmcr[1] = mmcr1;
mmcr[2] = mmcra;
return 0;
}
static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[])
{
if (pmc <= 3)
mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
}
static int power5p_generic_events[] = {
[PERF_COUNT_HW_CPU_CYCLES] = 0xf,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x100009,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */
[PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */
[PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */
};
#define C(x) PERF_COUNT_HW_CACHE_##x
/*
* Table of generalized cache-related events.
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x1c10a8, 0x3c1088 },
[C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 },
[C(OP_PREFETCH)] = { 0xc70e7, -1 },
},
[C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { 0, 0 },
},
[C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0 },
[C(OP_WRITE)] = { 0, 0 },
[C(OP_PREFETCH)] = { 0xc50c3, 0 },
},
[C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0xc20e4, 0x800c4 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x800c0 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x230e4, 0x230e5 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
};
struct power_pmu power5p_pmu = {
.n_counter = 6,
.max_alternatives = MAX_ALT,
.add_fields = 0x7000000000055ull,
.test_adder = 0x3000040000000ull,
.compute_mmcr = power5p_compute_mmcr,
.get_constraint = power5p_get_constraint,
.get_alternatives = power5p_get_alternatives,
.disable_pmc = power5p_disable_pmc,
.limited_pmc_event = power5p_limited_pmc_event,
.flags = PPMU_LIMITED_PMC5_6,
.n_generic = ARRAY_SIZE(power5p_generic_events),
.generic_events = power5p_generic_events,
.cache_events = &power5p_cache_events,
};
/*
* Performance counter support for POWER5 (not POWER5++) processors.
*
* Copyright 2009 Paul Mackerras, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/perf_counter.h>
#include <asm/reg.h>
/*
* Bits in event code for POWER5 (not POWER5++)
*/
#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
#define PM_PMC_MSK 0xf
#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */
#define PM_UNIT_MSK 0xf
#define PM_BYTE_SH 12 /* Byte number of event bus to use */
#define PM_BYTE_MSK 7
#define PM_GRS_SH 8 /* Storage subsystem mux select */
#define PM_GRS_MSK 7
#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */
#define PM_PMCSEL_MSK 0x7f
/* Values in PM_UNIT field */
#define PM_FPU 0
#define PM_ISU0 1
#define PM_IFU 2
#define PM_ISU1 3
#define PM_IDU 4
#define PM_ISU0_ALT 6
#define PM_GRS 7
#define PM_LSU0 8
#define PM_LSU1 0xc
#define PM_LASTUNIT 0xc
/*
* Bits in MMCR1 for POWER5
*/
#define MMCR1_TTM0SEL_SH 62
#define MMCR1_TTM1SEL_SH 60
#define MMCR1_TTM2SEL_SH 58
#define MMCR1_TTM3SEL_SH 56
#define MMCR1_TTMSEL_MSK 3
#define MMCR1_TD_CP_DBG0SEL_SH 54
#define MMCR1_TD_CP_DBG1SEL_SH 52
#define MMCR1_TD_CP_DBG2SEL_SH 50
#define MMCR1_TD_CP_DBG3SEL_SH 48
#define MMCR1_GRS_L2SEL_SH 46
#define MMCR1_GRS_L2SEL_MSK 3
#define MMCR1_GRS_L3SEL_SH 44
#define MMCR1_GRS_L3SEL_MSK 3
#define MMCR1_GRS_MCSEL_SH 41
#define MMCR1_GRS_MCSEL_MSK 7
#define MMCR1_GRS_FABSEL_SH 39
#define MMCR1_GRS_FABSEL_MSK 3
#define MMCR1_PMC1_ADDER_SEL_SH 35
#define MMCR1_PMC2_ADDER_SEL_SH 34
#define MMCR1_PMC3_ADDER_SEL_SH 33
#define MMCR1_PMC4_ADDER_SEL_SH 32
#define MMCR1_PMC1SEL_SH 25
#define MMCR1_PMC2SEL_SH 17
#define MMCR1_PMC3SEL_SH 9
#define MMCR1_PMC4SEL_SH 1
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
#define MMCR1_PMCSEL_MSK 0x7f
/*
* Bits in MMCRA
*/
/*
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
* 3210987654321098765432109876543210987654321098765432109876543210
* <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><>
* T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1
*
* T0 - TTM0 constraint
* 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000
*
* T1 - TTM1 constraint
* 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000
*
* NC - number of counters
* 51: NC error 0x0008_0000_0000_0000
* 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
*
* G0..G3 - GRS mux constraints
* 46-47: GRS_L2SEL value
* 44-45: GRS_L3SEL value
* 41-44: GRS_MCSEL value
* 39-40: GRS_FABSEL value
* Note that these match up with their bit positions in MMCR1
*
* UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
* 37: UC3 error 0x20_0000_0000
* 36: FPU|IFU|ISU1 events needed 0x10_0000_0000
* 35: ISU0 events needed 0x08_0000_0000
* 34: IDU|GRS events needed 0x04_0000_0000
*
* PS1
* 33: PS1 error 0x2_0000_0000
* 31-32: count of events needing PMC1/2 0x1_8000_0000
*
* PS2
* 30: PS2 error 0x4000_0000
* 28-29: count of events needing PMC3/4 0x3000_0000
*
* B0
* 24-27: Byte 0 event source 0x0f00_0000
* Encoding as for the event code
*
* B1, B2, B3
* 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
*
* P1..P6
* 0-11: Count of events needing PMC1..PMC6
*/
static const int grsel_shift[8] = {
MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
};
/* Masks and values for using events from the various units */
static u64 unit_cons[PM_LASTUNIT+1][2] = {
[PM_FPU] = { 0xc0002000000000ull, 0x00001000000000ull },
[PM_ISU0] = { 0x00002000000000ull, 0x00000800000000ull },
[PM_ISU1] = { 0xc0002000000000ull, 0xc0001000000000ull },
[PM_IFU] = { 0xc0002000000000ull, 0x80001000000000ull },
[PM_IDU] = { 0x30002000000000ull, 0x00000400000000ull },
[PM_GRS] = { 0x30002000000000ull, 0x30000400000000ull },
};
static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp)
{
int pmc, byte, unit, sh;
int bit, fmask;
u64 mask = 0, value = 0;
int grp = -1;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc > 6)
return -1;
sh = (pmc - 1) * 2;
mask |= 2 << sh;
value |= 1 << sh;
if (pmc <= 4)
grp = (pmc - 1) >> 1;
else if (event != 0x500009 && event != 0x600005)
return -1;
}
if (event & PM_BUSEVENT_MSK) {
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
if (unit > PM_LASTUNIT)
return -1;
if (unit == PM_ISU0_ALT)
unit = PM_ISU0;
mask |= unit_cons[unit][0];
value |= unit_cons[unit][1];
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
if (byte >= 4) {
if (unit != PM_LSU1)
return -1;
/* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
++unit;
byte &= 3;
}
if (unit == PM_GRS) {
bit = event & 7;
fmask = (bit == 6)? 7: 3;
sh = grsel_shift[bit];
mask |= (u64)fmask << sh;
value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
}
/*
* Bus events on bytes 0 and 2 can be counted
* on PMC1/2; bytes 1 and 3 on PMC3/4.
*/
if (!pmc)
grp = byte & 1;
/* Set byte lane select field */
mask |= 0xfULL << (24 - 4 * byte);
value |= (u64)unit << (24 - 4 * byte);
}
if (grp == 0) {
/* increment PMC1/2 field */
mask |= 0x200000000ull;
value |= 0x080000000ull;
} else if (grp == 1) {
/* increment PMC3/4 field */
mask |= 0x40000000ull;
value |= 0x10000000ull;
}
if (pmc < 5) {
/* need a counter from PMC1-4 set */
mask |= 0x8000000000000ull;
value |= 0x1000000000000ull;
}
*maskp = mask;
*valp = value;
return 0;
}
#define MAX_ALT 3 /* at most 3 alternatives for any event */
static const unsigned int event_alternatives[][MAX_ALT] = {
{ 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */
{ 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
{ 0x100005, 0x600005 }, /* PM_RUN_CYC */
{ 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */
{ 0x300009, 0x400009 }, /* PM_INST_DISP */
};
/*
* Scan the alternatives table for a match and return the
* index into the alternatives table if found, else -1.
*/
static int find_alternative(u64 event)
{
int i, j;
for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
if (event < event_alternatives[i][0])
break;
for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
if (event == event_alternatives[i][j])
return i;
}
return -1;
}
static const unsigned char bytedecode_alternatives[4][4] = {
/* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 },
/* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e },
/* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 },
/* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e }
};
/*
* Some direct events for decodes of event bus byte 3 have alternative
* PMCSEL values on other counters. This returns the alternative
* event code for those that do, or -1 otherwise.
*/
static s64 find_alternative_bdecode(u64 event)
{
int pmc, altpmc, pp, j;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc == 0 || pmc > 4)
return -1;
altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */
pp = event & PM_PMCSEL_MSK;
for (j = 0; j < 4; ++j) {
if (bytedecode_alternatives[pmc - 1][j] == pp) {
return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
(altpmc << PM_PMC_SH) |
bytedecode_alternatives[altpmc - 1][j];
}
}
return -1;
}
static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[])
{
int i, j, nalt = 1;
s64 ae;
alt[0] = event;
nalt = 1;
i = find_alternative(event);
if (i >= 0) {
for (j = 0; j < MAX_ALT; ++j) {
ae = event_alternatives[i][j];
if (ae && ae != event)
alt[nalt++] = ae;
}
} else {
ae = find_alternative_bdecode(event);
if (ae > 0)
alt[nalt++] = ae;
}
return nalt;
}
/*
* Map of which direct events on which PMCs are marked instruction events.
* Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
* Bit 0 is set if it is marked for all PMCs.
* The 0x80 bit indicates a byte decode PMCSEL value.
*/
static unsigned char direct_event_is_marked[0x28] = {
0, /* 00 */
0x1f, /* 01 PM_IOPS_CMPL */
0x2, /* 02 PM_MRK_GRP_DISP */
0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
0, /* 04 */
0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
0x80, /* 06 */
0x80, /* 07 */
0, 0, 0,/* 08 - 0a */
0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
0, /* 0c */
0x80, /* 0d */
0x80, /* 0e */
0, /* 0f */
0, /* 10 */
0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
0, /* 12 */
0x10, /* 13 PM_MRK_GRP_CMPL */
0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
0x2, /* 15 PM_MRK_GRP_ISSUED */
0x80, /* 16 */
0x80, /* 17 */
0, 0, 0, 0, 0,
0x80, /* 1d */
0x80, /* 1e */
0, /* 1f */
0x80, /* 20 */
0x80, /* 21 */
0x80, /* 22 */
0x80, /* 23 */
0x80, /* 24 */
0x80, /* 25 */
0x80, /* 26 */
0x80, /* 27 */
};
/*
* Returns 1 if event counts things relating to marked instructions
* and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
*/
static int power5_marked_instr_event(u64 event)
{
int pmc, psel;
int bit, byte, unit;
u32 mask;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
psel = event & PM_PMCSEL_MSK;
if (pmc >= 5)
return 0;
bit = -1;
if (psel < sizeof(direct_event_is_marked)) {
if (direct_event_is_marked[psel] & (1 << pmc))
return 1;
if (direct_event_is_marked[psel] & 0x80)
bit = 4;
else if (psel == 0x08)
bit = pmc - 1;
else if (psel == 0x10)
bit = 4 - pmc;
else if (psel == 0x1b && (pmc == 1 || pmc == 3))
bit = 4;
} else if ((psel & 0x58) == 0x40)
bit = psel & 7;
if (!(event & PM_BUSEVENT_MSK))
return 0;
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
if (unit == PM_LSU0) {
/* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
mask = 0x5dff00;
} else if (unit == PM_LSU1 && byte >= 4) {
byte -= 4;
/* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */
mask = 0x5f00c0aa;
} else
return 0;
return (mask >> (byte * 8 + bit)) & 1;
}
static int power5_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], u64 mmcr[])
{
u64 mmcr1 = 0;
u64 mmcra = 0;
unsigned int pmc, unit, byte, psel;
unsigned int ttm, grp;
int i, isbus, bit, grsel;
unsigned int pmc_inuse = 0;
unsigned int pmc_grp_use[2];
unsigned char busbyte[4];
unsigned char unituse[16];
int ttmuse;
if (n_ev > 6)
return -1;
/* First pass to count resource use */
pmc_grp_use[0] = pmc_grp_use[1] = 0;
memset(busbyte, 0, sizeof(busbyte));
memset(unituse, 0, sizeof(unituse));
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc > 6)
return -1;
if (pmc_inuse & (1 << (pmc - 1)))
return -1;
pmc_inuse |= 1 << (pmc - 1);
/* count 1/2 vs 3/4 use */
if (pmc <= 4)
++pmc_grp_use[(pmc - 1) >> 1];
}
if (event[i] & PM_BUSEVENT_MSK) {
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
if (unit > PM_LASTUNIT)
return -1;
if (unit == PM_ISU0_ALT)
unit = PM_ISU0;
if (byte >= 4) {
if (unit != PM_LSU1)
return -1;
++unit;
byte &= 3;
}
if (!pmc)
++pmc_grp_use[byte & 1];
if (busbyte[byte] && busbyte[byte] != unit)
return -1;
busbyte[byte] = unit;
unituse[unit] = 1;
}
}
if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2)
return -1;
/*
* Assign resources and set multiplexer selects.
*
* PM_ISU0 can go either on TTM0 or TTM1, but that's the only
* choice we have to deal with.
*/
if (unituse[PM_ISU0] &
(unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */
unituse[PM_ISU0] = 0;
}
/* Set TTM[01]SEL fields. */
ttmuse = 0;
for (i = PM_FPU; i <= PM_ISU1; ++i) {
if (!unituse[i])
continue;
if (ttmuse++)
return -1;
mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH;
}
ttmuse = 0;
for (; i <= PM_GRS; ++i) {
if (!unituse[i])
continue;
if (ttmuse++)
return -1;
mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH;
}
if (ttmuse > 1)
return -1;
/* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
for (byte = 0; byte < 4; ++byte) {
unit = busbyte[byte];
if (!unit)
continue;
if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
/* get ISU0 through TTM1 rather than TTM0 */
unit = PM_ISU0_ALT;
} else if (unit == PM_LSU1 + 1) {
/* select lower word of LSU1 for this byte */
mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
}
ttm = unit >> 2;
mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
}
/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
psel = event[i] & PM_PMCSEL_MSK;
isbus = event[i] & PM_BUSEVENT_MSK;
if (!pmc) {
/* Bus event or any-PMC direct event */
for (pmc = 0; pmc < 4; ++pmc) {
if (pmc_inuse & (1 << pmc))
continue;
grp = (pmc >> 1) & 1;
if (isbus) {
if (grp == (byte & 1))
break;
} else if (pmc_grp_use[grp] < 2) {
++pmc_grp_use[grp];
break;
}
}
pmc_inuse |= 1 << pmc;
} else if (pmc <= 4) {
/* Direct event */
--pmc;
if ((psel == 8 || psel == 0x10) && isbus && (byte & 2))
/* add events on higher-numbered bus */
mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
} else {
/* Instructions or run cycles on PMC5/6 */
--pmc;
}
if (isbus && unit == PM_GRS) {
bit = psel & 7;
grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
mmcr1 |= (u64)grsel << grsel_shift[bit];
}
if (power5_marked_instr_event(event[i]))
mmcra |= MMCRA_SAMPLE_ENABLE;
if (pmc <= 3)
mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
hwc[i] = pmc;
}
/* Return MMCRx values */
mmcr[0] = 0;
if (pmc_inuse & 1)
mmcr[0] = MMCR0_PMC1CE;
if (pmc_inuse & 0x3e)
mmcr[0] |= MMCR0_PMCjCE;
mmcr[1] = mmcr1;
mmcr[2] = mmcra;
return 0;
}
static void power5_disable_pmc(unsigned int pmc, u64 mmcr[])
{
if (pmc <= 3)
mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
}
static int power5_generic_events[] = {
[PERF_COUNT_HW_CPU_CYCLES] = 0xf,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x100009,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */
[PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */
[PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */
};
#define C(x) PERF_COUNT_HW_CACHE_##x
/*
* Table of generalized cache-related events.
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x4c1090, 0x3c1088 },
[C(OP_WRITE)] = { 0x3c1090, 0xc10c3 },
[C(OP_PREFETCH)] = { 0xc70e7, 0 },
},
[C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { 0, 0 },
},
[C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x3c309b },
[C(OP_WRITE)] = { 0, 0 },
[C(OP_PREFETCH)] = { 0xc50c3, 0 },
},
[C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x2c4090, 0x800c4 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x800c0 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x230e4, 0x230e5 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
};
struct power_pmu power5_pmu = {
.n_counter = 6,
.max_alternatives = MAX_ALT,
.add_fields = 0x7000090000555ull,
.test_adder = 0x3000490000000ull,
.compute_mmcr = power5_compute_mmcr,
.get_constraint = power5_get_constraint,
.get_alternatives = power5_get_alternatives,
.disable_pmc = power5_disable_pmc,
.n_generic = ARRAY_SIZE(power5_generic_events),
.generic_events = power5_generic_events,
.cache_events = &power5_cache_events,
};
/*
* Performance counter support for POWER6 processors.
*
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/perf_counter.h>
#include <asm/reg.h>
/*
* Bits in event code for POWER6
*/
#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
#define PM_PMC_MSK 0x7
#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
#define PM_UNIT_SH 16 /* Unit event comes (TTMxSEL encoding) */
#define PM_UNIT_MSK 0xf
#define PM_UNIT_MSKS (PM_UNIT_MSK << PM_UNIT_SH)
#define PM_LLAV 0x8000 /* Load lookahead match value */
#define PM_LLA 0x4000 /* Load lookahead match enable */
#define PM_BYTE_SH 12 /* Byte of event bus to use */
#define PM_BYTE_MSK 3
#define PM_SUBUNIT_SH 8 /* Subunit event comes from (NEST_SEL enc.) */
#define PM_SUBUNIT_MSK 7
#define PM_SUBUNIT_MSKS (PM_SUBUNIT_MSK << PM_SUBUNIT_SH)
#define PM_PMCSEL_MSK 0xff /* PMCxSEL value */
#define PM_BUSEVENT_MSK 0xf3700
/*
* Bits in MMCR1 for POWER6
*/
#define MMCR1_TTM0SEL_SH 60
#define MMCR1_TTMSEL_SH(n) (MMCR1_TTM0SEL_SH - (n) * 4)
#define MMCR1_TTMSEL_MSK 0xf
#define MMCR1_TTMSEL(m, n) (((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK)
#define MMCR1_NESTSEL_SH 45
#define MMCR1_NESTSEL_MSK 0x7
#define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK)
#define MMCR1_PMC1_LLA ((u64)1 << 44)
#define MMCR1_PMC1_LLA_VALUE ((u64)1 << 39)
#define MMCR1_PMC1_ADDR_SEL ((u64)1 << 35)
#define MMCR1_PMC1SEL_SH 24
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
#define MMCR1_PMCSEL_MSK 0xff
/*
* Map of which direct events on which PMCs are marked instruction events.
* Indexed by PMCSEL value >> 1.
* Bottom 4 bits are a map of which PMCs are interesting,
* top 4 bits say what sort of event:
* 0 = direct marked event,
* 1 = byte decode event,
* 4 = add/and event (PMC1 -> bits 0 & 4),
* 5 = add/and event (PMC1 -> bits 1 & 5),
* 6 = add/and event (PMC1 -> bits 2 & 6),
* 7 = add/and event (PMC1 -> bits 3 & 7).
*/
static unsigned char direct_event_is_marked[0x60 >> 1] = {
0, /* 00 */
0, /* 02 */
0, /* 04 */
0x07, /* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
0x04, /* 08 PM_MRK_DFU_FIN */
0x06, /* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */
0, /* 0c */
0, /* 0e */
0x02, /* 10 PM_MRK_INST_DISP */
0x08, /* 12 PM_MRK_LSU_DERAT_MISS */
0, /* 14 */
0, /* 16 */
0x0c, /* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */
0x0f, /* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */
0x01, /* 1c PM_MRK_INST_ISSUED */
0, /* 1e */
0, /* 20 */
0, /* 22 */
0, /* 24 */
0, /* 26 */
0x15, /* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */
0, /* 2a */
0, /* 2c */
0, /* 2e */
0x4f, /* 30 */
0x7f, /* 32 */
0x4f, /* 34 */
0x5f, /* 36 */
0x6f, /* 38 */
0x4f, /* 3a */
0, /* 3c */
0x08, /* 3e PM_MRK_INST_TIMEO */
0x1f, /* 40 */
0x1f, /* 42 */
0x1f, /* 44 */
0x1f, /* 46 */
0x1f, /* 48 */
0x1f, /* 4a */
0x1f, /* 4c */
0x1f, /* 4e */
0, /* 50 */
0x05, /* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */
0x1c, /* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */
0x02, /* 56 PM_MRK_LD_MISS_L1 */
0, /* 58 */
0, /* 5a */
0, /* 5c */
0, /* 5e */
};
/*
* Masks showing for each unit which bits are marked events.
* These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0.
*/
static u32 marked_bus_events[16] = {
0x01000000, /* direct events set 1: byte 3 bit 0 */
0x00010000, /* direct events set 2: byte 2 bit 0 */
0, 0, 0, 0, /* IDU, IFU, nest: nothing */
0x00000088, /* VMX set 1: byte 0 bits 3, 7 */
0x000000c0, /* VMX set 2: byte 0 bits 4-7 */
0x04010000, /* LSU set 1: byte 2 bit 0, byte 3 bit 2 */
0xff010000u, /* LSU set 2: byte 2 bit 0, all of byte 3 */
0, /* LSU set 3 */
0x00000010, /* VMX set 3: byte 0 bit 4 */
0, /* BFP set 1 */
0x00000022, /* BFP set 2: byte 0 bits 1, 5 */
0, 0
};
/*
* Returns 1 if event counts things relating to marked instructions
* and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
*/
static int power6_marked_instr_event(u64 event)
{
int pmc, psel, ptype;
int bit, byte, unit;
u32 mask;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
psel = (event & PM_PMCSEL_MSK) >> 1; /* drop edge/level bit */
if (pmc >= 5)
return 0;
bit = -1;
if (psel < sizeof(direct_event_is_marked)) {
ptype = direct_event_is_marked[psel];
if (pmc == 0 || !(ptype & (1 << (pmc - 1))))
return 0;
ptype >>= 4;
if (ptype == 0)
return 1;
if (ptype == 1)
bit = 0;
else
bit = ptype ^ (pmc - 1);
} else if ((psel & 0x48) == 0x40)
bit = psel & 7;
if (!(event & PM_BUSEVENT_MSK) || bit == -1)
return 0;
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
mask = marked_bus_events[unit];
return (mask >> (byte * 8 + bit)) & 1;
}
/*
* Assign PMC numbers and compute MMCR1 value for a set of events
*/
static int p6_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], u64 mmcr[])
{
u64 mmcr1 = 0;
u64 mmcra = 0;
int i;
unsigned int pmc, ev, b, u, s, psel;
unsigned int ttmset = 0;
unsigned int pmc_inuse = 0;
if (n_ev > 6)
return -1;
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc_inuse & (1 << (pmc - 1)))
return -1; /* collision! */
pmc_inuse |= 1 << (pmc - 1);
}
}
for (i = 0; i < n_ev; ++i) {
ev = event[i];
pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
--pmc;
} else {
/* can go on any PMC; find a free one */
for (pmc = 0; pmc < 4; ++pmc)
if (!(pmc_inuse & (1 << pmc)))
break;
if (pmc >= 4)
return -1;
pmc_inuse |= 1 << pmc;
}
hwc[i] = pmc;
psel = ev & PM_PMCSEL_MSK;
if (ev & PM_BUSEVENT_MSK) {
/* this event uses the event bus */
b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK;
u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK;
/* check for conflict on this byte of event bus */
if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u)
return -1;
mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b);
ttmset |= 1 << b;
if (u == 5) {
/* Nest events have a further mux */
s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
if ((ttmset & 0x10) &&
MMCR1_NESTSEL(mmcr1) != s)
return -1;
ttmset |= 0x10;
mmcr1 |= (u64)s << MMCR1_NESTSEL_SH;
}
if (0x30 <= psel && psel <= 0x3d) {
/* these need the PMCx_ADDR_SEL bits */
if (b >= 2)
mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc;
}
/* bus select values are different for PMC3/4 */
if (pmc >= 2 && (psel & 0x90) == 0x80)
psel ^= 0x20;
}
if (ev & PM_LLA) {
mmcr1 |= MMCR1_PMC1_LLA >> pmc;
if (ev & PM_LLAV)
mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc;
}
if (power6_marked_instr_event(event[i]))
mmcra |= MMCRA_SAMPLE_ENABLE;
if (pmc < 4)
mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc);
}
mmcr[0] = 0;
if (pmc_inuse & 1)
mmcr[0] = MMCR0_PMC1CE;
if (pmc_inuse & 0xe)
mmcr[0] |= MMCR0_PMCjCE;
mmcr[1] = mmcr1;
mmcr[2] = mmcra;
return 0;
}
/*
* Layout of constraint bits:
*
* 0-1 add field: number of uses of PMC1 (max 1)
* 2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6
* 12-15 add field: number of uses of PMC1-4 (max 4)
* 16-19 select field: unit on byte 0 of event bus
* 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
* 32-34 select field: nest (subunit) event selector
*/
static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp)
{
int pmc, byte, sh, subunit;
u64 mask = 0, value = 0;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc > 4 && !(event == 0x500009 || event == 0x600005))
return -1;
sh = (pmc - 1) * 2;
mask |= 2 << sh;
value |= 1 << sh;
}
if (event & PM_BUSEVENT_MSK) {
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
sh = byte * 4 + (16 - PM_UNIT_SH);
mask |= PM_UNIT_MSKS << sh;
value |= (u64)(event & PM_UNIT_MSKS) << sh;
if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
mask |= (u64)PM_SUBUNIT_MSK << 32;
value |= (u64)subunit << 32;
}
}
if (pmc <= 4) {
mask |= 0x8000; /* add field for count of PMC1-4 uses */
value |= 0x1000;
}
*maskp = mask;
*valp = value;
return 0;
}
static int p6_limited_pmc_event(u64 event)
{
int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
return pmc == 5 || pmc == 6;
}
#define MAX_ALT 4 /* at most 4 alternatives for any event */
static const unsigned int event_alternatives[][MAX_ALT] = {
{ 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */
{ 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */
{ 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */
{ 0x10000a, 0x2000f4, 0x600005 }, /* PM_RUN_CYC */
{ 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */
{ 0x10000e, 0x400010 }, /* PM_PURR */
{ 0x100010, 0x4000f8 }, /* PM_FLUSH */
{ 0x10001a, 0x200010 }, /* PM_MRK_INST_DISP */
{ 0x100026, 0x3000f8 }, /* PM_TB_BIT_TRANS */
{ 0x100054, 0x2000f0 }, /* PM_ST_FIN */
{ 0x100056, 0x2000fc }, /* PM_L1_ICACHE_MISS */
{ 0x1000f0, 0x40000a }, /* PM_INST_IMC_MATCH_CMPL */
{ 0x1000f8, 0x200008 }, /* PM_GCT_EMPTY_CYC */
{ 0x1000fc, 0x400006 }, /* PM_LSU_DERAT_MISS_CYC */
{ 0x20000e, 0x400007 }, /* PM_LSU_DERAT_MISS */
{ 0x200012, 0x300012 }, /* PM_INST_DISP */
{ 0x2000f2, 0x3000f2 }, /* PM_INST_DISP */
{ 0x2000f8, 0x300010 }, /* PM_EXT_INT */
{ 0x2000fe, 0x300056 }, /* PM_DATA_FROM_L2MISS */
{ 0x2d0030, 0x30001a }, /* PM_MRK_FPU_FIN */
{ 0x30000a, 0x400018 }, /* PM_MRK_INST_FIN */
{ 0x3000f6, 0x40000e }, /* PM_L1_DCACHE_RELOAD_VALID */
{ 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */
};
/*
* This could be made more efficient with a binary search on
* a presorted list, if necessary
*/
static int find_alternatives_list(u64 event)
{
int i, j;
unsigned int alt;
for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
if (event < event_alternatives[i][0])
return -1;
for (j = 0; j < MAX_ALT; ++j) {
alt = event_alternatives[i][j];
if (!alt || event < alt)
break;
if (event == alt)
return i;
}
}
return -1;
}
static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[])
{
int i, j, nlim;
unsigned int psel, pmc;
unsigned int nalt = 1;
u64 aevent;
alt[0] = event;
nlim = p6_limited_pmc_event(event);
/* check the alternatives table */
i = find_alternatives_list(event);
if (i >= 0) {
/* copy out alternatives from list */
for (j = 0; j < MAX_ALT; ++j) {
aevent = event_alternatives[i][j];
if (!aevent)
break;
if (aevent != event)
alt[nalt++] = aevent;
nlim += p6_limited_pmc_event(aevent);
}
} else {
/* Check for alternative ways of computing sum events */
/* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */
psel = event & (PM_PMCSEL_MSK & ~1); /* ignore edge bit */
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc && (psel == 0x32 || psel == 0x34))
alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) |
((5 - pmc) << PM_PMC_SH);
/* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */
if (pmc && (psel == 0x38 || psel == 0x3a))
alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) |
((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH);
}
if (flags & PPMU_ONLY_COUNT_RUN) {
/*
* We're only counting in RUN state,
* so PM_CYC is equivalent to PM_RUN_CYC,
* PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR.
* This doesn't include alternatives that don't provide
* any extra flexibility in assigning PMCs (e.g.
* 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC).
* Note that even with these additional alternatives
* we never end up with more than 4 alternatives for any event.
*/
j = nalt;
for (i = 0; i < nalt; ++i) {
switch (alt[i]) {
case 0x1e: /* PM_CYC */
alt[j++] = 0x600005; /* PM_RUN_CYC */
++nlim;
break;
case 0x10000a: /* PM_RUN_CYC */
alt[j++] = 0x1e; /* PM_CYC */
break;
case 2: /* PM_INST_CMPL */
alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
++nlim;
break;
case 0x500009: /* PM_RUN_INST_CMPL */
alt[j++] = 2; /* PM_INST_CMPL */
break;
case 0x10000e: /* PM_PURR */
alt[j++] = 0x4000f4; /* PM_RUN_PURR */
break;
case 0x4000f4: /* PM_RUN_PURR */
alt[j++] = 0x10000e; /* PM_PURR */
break;
}
}
nalt = j;
}
if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
/* remove the limited PMC events */
j = 0;
for (i = 0; i < nalt; ++i) {
if (!p6_limited_pmc_event(alt[i])) {
alt[j] = alt[i];
++j;
}
}
nalt = j;
} else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
/* remove all but the limited PMC events */
j = 0;
for (i = 0; i < nalt; ++i) {
if (p6_limited_pmc_event(alt[i])) {
alt[j] = alt[i];
++j;
}
}
nalt = j;
}
return nalt;
}
static void p6_disable_pmc(unsigned int pmc, u64 mmcr[])
{
/* Set PMCxSEL to 0 to disable PMCx */
if (pmc <= 3)
mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
}
static int power6_generic_events[] = {
[PERF_COUNT_HW_CPU_CYCLES] = 0x1e,
[PERF_COUNT_HW_INSTRUCTIONS] = 2,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */
[PERF_COUNT_HW_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */
[PERF_COUNT_HW_BRANCH_MISSES] = 0x400052, /* BR_MPRED */
};
#define C(x) PERF_COUNT_HW_CACHE_##x
/*
* Table of generalized cache-related events.
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
* The "DTLB" and "ITLB" events relate to the DERAT and IERAT.
*/
static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x80082, 0x80080 },
[C(OP_WRITE)] = { 0x80086, 0x80088 },
[C(OP_PREFETCH)] = { 0x810a4, 0 },
},
[C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x100056 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { 0x4008c, 0 },
},
[C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x150730, 0x250532 },
[C(OP_WRITE)] = { 0x250432, 0x150432 },
[C(OP_PREFETCH)] = { 0x810a6, 0 },
},
[C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x20000e },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x420ce },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x430e6, 0x400052 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
};
struct power_pmu power6_pmu = {
.n_counter = 6,
.max_alternatives = MAX_ALT,
.add_fields = 0x1555,
.test_adder = 0x3000,
.compute_mmcr = p6_compute_mmcr,
.get_constraint = p6_get_constraint,
.get_alternatives = p6_get_alternatives,
.disable_pmc = p6_disable_pmc,
.limited_pmc_event = p6_limited_pmc_event,
.flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR,
.n_generic = ARRAY_SIZE(power6_generic_events),
.generic_events = power6_generic_events,
.cache_events = &power6_cache_events,
};
/*
* Performance counter support for POWER7 processors.
*
* Copyright 2009 Paul Mackerras, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/perf_counter.h>
#include <asm/reg.h>
/*
* Bits in event code for POWER7
*/
#define PM_PMC_SH 16 /* PMC number (1-based) for direct events */
#define PM_PMC_MSK 0xf
#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
#define PM_UNIT_SH 12 /* TTMMUX number and setting - unit select */
#define PM_UNIT_MSK 0xf
#define PM_COMBINE_SH 11 /* Combined event bit */
#define PM_COMBINE_MSK 1
#define PM_COMBINE_MSKS 0x800
#define PM_L2SEL_SH 8 /* L2 event select */
#define PM_L2SEL_MSK 7
#define PM_PMCSEL_MSK 0xff
/*
* Bits in MMCR1 for POWER7
*/
#define MMCR1_TTM0SEL_SH 60
#define MMCR1_TTM1SEL_SH 56
#define MMCR1_TTM2SEL_SH 52
#define MMCR1_TTM3SEL_SH 48
#define MMCR1_TTMSEL_MSK 0xf
#define MMCR1_L2SEL_SH 45
#define MMCR1_L2SEL_MSK 7
#define MMCR1_PMC1_COMBINE_SH 35
#define MMCR1_PMC2_COMBINE_SH 34
#define MMCR1_PMC3_COMBINE_SH 33
#define MMCR1_PMC4_COMBINE_SH 32
#define MMCR1_PMC1SEL_SH 24
#define MMCR1_PMC2SEL_SH 16
#define MMCR1_PMC3SEL_SH 8
#define MMCR1_PMC4SEL_SH 0
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
#define MMCR1_PMCSEL_MSK 0xff
/*
* Bits in MMCRA
*/
/*
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
* 3210987654321098765432109876543210987654321098765432109876543210
* [ ><><><><><><>
* NC P6P5P4P3P2P1
*
* NC - number of counters
* 15: NC error 0x8000
* 12-14: number of events needing PMC1-4 0x7000
*
* P6
* 11: P6 error 0x800
* 10-11: Count of events needing PMC6
*
* P1..P5
* 0-9: Count of events needing PMC1..PMC5
*/
static int power7_get_constraint(u64 event, u64 *maskp, u64 *valp)
{
int pmc, sh;
u64 mask = 0, value = 0;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc > 6)
return -1;
sh = (pmc - 1) * 2;
mask |= 2 << sh;
value |= 1 << sh;
if (pmc >= 5 && !(event == 0x500fa || event == 0x600f4))
return -1;
}
if (pmc < 5) {
/* need a counter from PMC1-4 set */
mask |= 0x8000;
value |= 0x1000;
}
*maskp = mask;
*valp = value;
return 0;
}
#define MAX_ALT 2 /* at most 2 alternatives for any event */
static const unsigned int event_alternatives[][MAX_ALT] = {
{ 0x200f2, 0x300f2 }, /* PM_INST_DISP */
{ 0x200f4, 0x600f4 }, /* PM_RUN_CYC */
{ 0x400fa, 0x500fa }, /* PM_RUN_INST_CMPL */
};
/*
* Scan the alternatives table for a match and return the
* index into the alternatives table if found, else -1.
*/
static int find_alternative(u64 event)
{
int i, j;
for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
if (event < event_alternatives[i][0])
break;
for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
if (event == event_alternatives[i][j])
return i;
}
return -1;
}
static s64 find_alternative_decode(u64 event)
{
int pmc, psel;
/* this only handles the 4x decode events */
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
psel = event & PM_PMCSEL_MSK;
if ((pmc == 2 || pmc == 4) && (psel & ~7) == 0x40)
return event - (1 << PM_PMC_SH) + 8;
if ((pmc == 1 || pmc == 3) && (psel & ~7) == 0x48)
return event + (1 << PM_PMC_SH) - 8;
return -1;
}
static int power7_get_alternatives(u64 event, unsigned int flags, u64 alt[])
{
int i, j, nalt = 1;
s64 ae;
alt[0] = event;
nalt = 1;
i = find_alternative(event);
if (i >= 0) {
for (j = 0; j < MAX_ALT; ++j) {
ae = event_alternatives[i][j];
if (ae && ae != event)
alt[nalt++] = ae;
}
} else {
ae = find_alternative_decode(event);
if (ae > 0)
alt[nalt++] = ae;
}
if (flags & PPMU_ONLY_COUNT_RUN) {
/*
* We're only counting in RUN state,
* so PM_CYC is equivalent to PM_RUN_CYC
* and PM_INST_CMPL === PM_RUN_INST_CMPL.
* This doesn't include alternatives that don't provide
* any extra flexibility in assigning PMCs.
*/
j = nalt;
for (i = 0; i < nalt; ++i) {
switch (alt[i]) {
case 0x1e: /* PM_CYC */
alt[j++] = 0x600f4; /* PM_RUN_CYC */
break;
case 0x600f4: /* PM_RUN_CYC */
alt[j++] = 0x1e;
break;
case 0x2: /* PM_PPC_CMPL */
alt[j++] = 0x500fa; /* PM_RUN_INST_CMPL */
break;
case 0x500fa: /* PM_RUN_INST_CMPL */
alt[j++] = 0x2; /* PM_PPC_CMPL */
break;
}
}
nalt = j;
}
return nalt;
}
/*
* Returns 1 if event counts things relating to marked instructions
* and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
*/
static int power7_marked_instr_event(u64 event)
{
int pmc, psel;
int unit;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
psel = event & PM_PMCSEL_MSK & ~1; /* trim off edge/level bit */
if (pmc >= 5)
return 0;
switch (psel >> 4) {
case 2:
return pmc == 2 || pmc == 4;
case 3:
if (psel == 0x3c)
return pmc == 1;
if (psel == 0x3e)
return pmc != 2;
return 1;
case 4:
case 5:
return unit == 0xd;
case 6:
if (psel == 0x64)
return pmc >= 3;
case 8:
return unit == 0xd;
}
return 0;
}
static int power7_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], u64 mmcr[])
{
u64 mmcr1 = 0;
u64 mmcra = 0;
unsigned int pmc, unit, combine, l2sel, psel;
unsigned int pmc_inuse = 0;
int i;
/* First pass to count resource use */
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc > 6)
return -1;
if (pmc_inuse & (1 << (pmc - 1)))
return -1;
pmc_inuse |= 1 << (pmc - 1);
}
}
/* Second pass: assign PMCs, set all MMCR1 fields */
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
combine = (event[i] >> PM_COMBINE_SH) & PM_COMBINE_MSK;
l2sel = (event[i] >> PM_L2SEL_SH) & PM_L2SEL_MSK;
psel = event[i] & PM_PMCSEL_MSK;
if (!pmc) {
/* Bus event or any-PMC direct event */
for (pmc = 0; pmc < 4; ++pmc) {
if (!(pmc_inuse & (1 << pmc)))
break;
}
if (pmc >= 4)
return -1;
pmc_inuse |= 1 << pmc;
} else {
/* Direct or decoded event */
--pmc;
}
if (pmc <= 3) {
mmcr1 |= (u64) unit << (MMCR1_TTM0SEL_SH - 4 * pmc);
mmcr1 |= (u64) combine << (MMCR1_PMC1_COMBINE_SH - pmc);
mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
if (unit == 6) /* L2 events */
mmcr1 |= (u64) l2sel << MMCR1_L2SEL_SH;
}
if (power7_marked_instr_event(event[i]))
mmcra |= MMCRA_SAMPLE_ENABLE;
hwc[i] = pmc;
}
/* Return MMCRx values */
mmcr[0] = 0;
if (pmc_inuse & 1)
mmcr[0] = MMCR0_PMC1CE;
if (pmc_inuse & 0x3e)
mmcr[0] |= MMCR0_PMCjCE;
mmcr[1] = mmcr1;
mmcr[2] = mmcra;
return 0;
}
static void power7_disable_pmc(unsigned int pmc, u64 mmcr[])
{
if (pmc <= 3)
mmcr[1] &= ~(0xffULL << MMCR1_PMCSEL_SH(pmc));
}
static int power7_generic_events[] = {
[PERF_COUNT_CPU_CYCLES] = 0x1e,
[PERF_COUNT_INSTRUCTIONS] = 2,
[PERF_COUNT_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU */
[PERF_COUNT_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */
[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */
[PERF_COUNT_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */
};
#define C(x) PERF_COUNT_HW_CACHE_##x
/*
* Table of generalized cache-related events.
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x400f0, 0xc880 },
[C(OP_WRITE)] = { 0, 0x300f0 },
[C(OP_PREFETCH)] = { 0xd8b8, 0 },
},
[C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x200fc },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { 0x408a, 0 },
},
[C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x6080, 0x6084 },
[C(OP_WRITE)] = { 0x6082, 0x6086 },
[C(OP_PREFETCH)] = { 0, 0 },
},
[C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x300fc },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x400fc },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x10068, 0x400f6 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
};
struct power_pmu power7_pmu = {
.n_counter = 6,
.max_alternatives = MAX_ALT + 1,
.add_fields = 0x1555ull,
.test_adder = 0x3000ull,
.compute_mmcr = power7_compute_mmcr,
.get_constraint = power7_get_constraint,
.get_alternatives = power7_get_alternatives,
.disable_pmc = power7_disable_pmc,
.n_generic = ARRAY_SIZE(power7_generic_events),
.generic_events = power7_generic_events,
.cache_events = &power7_cache_events,
};
/*
* Performance counter support for PPC970-family processors.
*
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/string.h>
#include <linux/perf_counter.h>
#include <asm/reg.h>
/*
* Bits in event code for PPC970
*/
#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
#define PM_PMC_MSK 0xf
#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
#define PM_UNIT_MSK 0xf
#define PM_SPCSEL_SH 6
#define PM_SPCSEL_MSK 3
#define PM_BYTE_SH 4 /* Byte number of event bus to use */
#define PM_BYTE_MSK 3
#define PM_PMCSEL_MSK 0xf
/* Values in PM_UNIT field */
#define PM_NONE 0
#define PM_FPU 1
#define PM_VPU 2
#define PM_ISU 3
#define PM_IFU 4
#define PM_IDU 5
#define PM_STS 6
#define PM_LSU0 7
#define PM_LSU1U 8
#define PM_LSU1L 9
#define PM_LASTUNIT 9
/*
* Bits in MMCR0 for PPC970
*/
#define MMCR0_PMC1SEL_SH 8
#define MMCR0_PMC2SEL_SH 1
#define MMCR_PMCSEL_MSK 0x1f
/*
* Bits in MMCR1 for PPC970
*/
#define MMCR1_TTM0SEL_SH 62
#define MMCR1_TTM1SEL_SH 59
#define MMCR1_TTM3SEL_SH 53
#define MMCR1_TTMSEL_MSK 3
#define MMCR1_TD_CP_DBG0SEL_SH 50
#define MMCR1_TD_CP_DBG1SEL_SH 48
#define MMCR1_TD_CP_DBG2SEL_SH 46
#define MMCR1_TD_CP_DBG3SEL_SH 44
#define MMCR1_PMC1_ADDER_SEL_SH 39
#define MMCR1_PMC2_ADDER_SEL_SH 38
#define MMCR1_PMC6_ADDER_SEL_SH 37
#define MMCR1_PMC5_ADDER_SEL_SH 36
#define MMCR1_PMC8_ADDER_SEL_SH 35
#define MMCR1_PMC7_ADDER_SEL_SH 34
#define MMCR1_PMC3_ADDER_SEL_SH 33
#define MMCR1_PMC4_ADDER_SEL_SH 32
#define MMCR1_PMC3SEL_SH 27
#define MMCR1_PMC4SEL_SH 22
#define MMCR1_PMC5SEL_SH 17
#define MMCR1_PMC6SEL_SH 12
#define MMCR1_PMC7SEL_SH 7
#define MMCR1_PMC8SEL_SH 2
static short mmcr1_adder_bits[8] = {
MMCR1_PMC1_ADDER_SEL_SH,
MMCR1_PMC2_ADDER_SEL_SH,
MMCR1_PMC3_ADDER_SEL_SH,
MMCR1_PMC4_ADDER_SEL_SH,
MMCR1_PMC5_ADDER_SEL_SH,
MMCR1_PMC6_ADDER_SEL_SH,
MMCR1_PMC7_ADDER_SEL_SH,
MMCR1_PMC8_ADDER_SEL_SH
};
/*
* Bits in MMCRA
*/
/*
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
* 3210987654321098765432109876543210987654321098765432109876543210
* <><><>[ >[ >[ >< >< >< >< ><><><><><><><><>
* SPT0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
*
* SP - SPCSEL constraint
* 48-49: SPCSEL value 0x3_0000_0000_0000
*
* T0 - TTM0 constraint
* 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000
*
* T1 - TTM1 constraint
* 44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000
*
* UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS
* 43: UC3 error 0x0800_0000_0000
* 42: FPU|IFU|VPU events needed 0x0400_0000_0000
* 41: ISU events needed 0x0200_0000_0000
* 40: IDU|STS events needed 0x0100_0000_0000
*
* PS1
* 39: PS1 error 0x0080_0000_0000
* 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
*
* PS2
* 35: PS2 error 0x0008_0000_0000
* 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
*
* B0
* 28-31: Byte 0 event source 0xf000_0000
* Encoding as for the event code
*
* B1, B2, B3
* 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
*
* P1
* 15: P1 error 0x8000
* 14-15: Count of events needing PMC1
*
* P2..P8
* 0-13: Count of events needing PMC2..PMC8
*/
static unsigned char direct_marked_event[8] = {
(1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
(1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
(1<<3) | (1<<5), /* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */
(1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
(1<<4) | (1<<5), /* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */
(1<<3) | (1<<4) | (1<<5),
/* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
(1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
(1<<4) /* PMC8: PM_MRK_LSU_FIN */
};
/*
* Returns 1 if event counts things relating to marked instructions
* and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
*/
static int p970_marked_instr_event(u64 event)
{
int pmc, psel, unit, byte, bit;
unsigned int mask;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
psel = event & PM_PMCSEL_MSK;
if (pmc) {
if (direct_marked_event[pmc - 1] & (1 << psel))
return 1;
if (psel == 0) /* add events */
bit = (pmc <= 4)? pmc - 1: 8 - pmc;
else if (psel == 7 || psel == 13) /* decode events */
bit = 4;
else
return 0;
} else
bit = psel;
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
mask = 0;
switch (unit) {
case PM_VPU:
mask = 0x4c; /* byte 0 bits 2,3,6 */
case PM_LSU0:
/* byte 2 bits 0,2,3,4,6; all of byte 1 */
mask = 0x085dff00;
case PM_LSU1L:
mask = 0x50 << 24; /* byte 3 bits 4,6 */
break;
}
return (mask >> (byte * 8 + bit)) & 1;
}
/* Masks and values for using events from the various units */
static u64 unit_cons[PM_LASTUNIT+1][2] = {
[PM_FPU] = { 0xc80000000000ull, 0x040000000000ull },
[PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull },
[PM_ISU] = { 0x080000000000ull, 0x020000000000ull },
[PM_IFU] = { 0xc80000000000ull, 0x840000000000ull },
[PM_IDU] = { 0x380000000000ull, 0x010000000000ull },
[PM_STS] = { 0x380000000000ull, 0x310000000000ull },
};
static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp)
{
int pmc, byte, unit, sh, spcsel;
u64 mask = 0, value = 0;
int grp = -1;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc > 8)
return -1;
sh = (pmc - 1) * 2;
mask |= 2 << sh;
value |= 1 << sh;
grp = ((pmc - 1) >> 1) & 1;
}
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
if (unit) {
if (unit > PM_LASTUNIT)
return -1;
mask |= unit_cons[unit][0];
value |= unit_cons[unit][1];
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
/*
* Bus events on bytes 0 and 2 can be counted
* on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
*/
if (!pmc)
grp = byte & 1;
/* Set byte lane select field */
mask |= 0xfULL << (28 - 4 * byte);
value |= (u64)unit << (28 - 4 * byte);
}
if (grp == 0) {
/* increment PMC1/2/5/6 field */
mask |= 0x8000000000ull;
value |= 0x1000000000ull;
} else if (grp == 1) {
/* increment PMC3/4/7/8 field */
mask |= 0x800000000ull;
value |= 0x100000000ull;
}
spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
if (spcsel) {
mask |= 3ull << 48;
value |= (u64)spcsel << 48;
}
*maskp = mask;
*valp = value;
return 0;
}
static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
{
alt[0] = event;
/* 2 alternatives for LSU empty */
if (event == 0x2002 || event == 0x3002) {
alt[1] = event ^ 0x1000;
return 2;
}
return 1;
}
static int p970_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], u64 mmcr[])
{
u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
unsigned int pmc, unit, byte, psel;
unsigned int ttm, grp;
unsigned int pmc_inuse = 0;
unsigned int pmc_grp_use[2];
unsigned char busbyte[4];
unsigned char unituse[16];
unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 };
unsigned char ttmuse[2];
unsigned char pmcsel[8];
int i;
int spcsel;
if (n_ev > 8)
return -1;
/* First pass to count resource use */
pmc_grp_use[0] = pmc_grp_use[1] = 0;
memset(busbyte, 0, sizeof(busbyte));
memset(unituse, 0, sizeof(unituse));
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc_inuse & (1 << (pmc - 1)))
return -1;
pmc_inuse |= 1 << (pmc - 1);
/* count 1/2/5/6 vs 3/4/7/8 use */
++pmc_grp_use[((pmc - 1) >> 1) & 1];
}
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
if (unit) {
if (unit > PM_LASTUNIT)
return -1;
if (!pmc)
++pmc_grp_use[byte & 1];
if (busbyte[byte] && busbyte[byte] != unit)
return -1;
busbyte[byte] = unit;
unituse[unit] = 1;
}
}
if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
return -1;
/*
* Assign resources and set multiplexer selects.
*
* PM_ISU can go either on TTM0 or TTM1, but that's the only
* choice we have to deal with.
*/
if (unituse[PM_ISU] &
(unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU]))
unitmap[PM_ISU] = 2 | 4; /* move ISU to TTM1 */
/* Set TTM[01]SEL fields. */
ttmuse[0] = ttmuse[1] = 0;
for (i = PM_FPU; i <= PM_STS; ++i) {
if (!unituse[i])
continue;
ttm = unitmap[i];
++ttmuse[(ttm >> 2) & 1];
mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH;
}
/* Check only one unit per TTMx */
if (ttmuse[0] > 1 || ttmuse[1] > 1)
return -1;
/* Set byte lane select fields and TTM3SEL. */
for (byte = 0; byte < 4; ++byte) {
unit = busbyte[byte];
if (!unit)
continue;
if (unit <= PM_STS)
ttm = (unitmap[unit] >> 2) & 1;
else if (unit == PM_LSU0)
ttm = 2;
else {
ttm = 3;
if (unit == PM_LSU1L && byte >= 2)
mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
}
mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
}
/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
memset(pmcsel, 0x8, sizeof(pmcsel)); /* 8 means don't count */
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
psel = event[i] & PM_PMCSEL_MSK;
if (!pmc) {
/* Bus event or any-PMC direct event */
if (unit)
psel |= 0x10 | ((byte & 2) << 2);
else
psel |= 8;
for (pmc = 0; pmc < 8; ++pmc) {
if (pmc_inuse & (1 << pmc))
continue;
grp = (pmc >> 1) & 1;
if (unit) {
if (grp == (byte & 1))
break;
} else if (pmc_grp_use[grp] < 4) {
++pmc_grp_use[grp];
break;
}
}
pmc_inuse |= 1 << pmc;
} else {
/* Direct event */
--pmc;
if (psel == 0 && (byte & 2))
/* add events on higher-numbered bus */
mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
}
pmcsel[pmc] = psel;
hwc[i] = pmc;
spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
mmcr1 |= spcsel;
if (p970_marked_instr_event(event[i]))
mmcra |= MMCRA_SAMPLE_ENABLE;
}
for (pmc = 0; pmc < 2; ++pmc)
mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc);
for (; pmc < 8; ++pmc)
mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
if (pmc_inuse & 1)
mmcr0 |= MMCR0_PMC1CE;
if (pmc_inuse & 0xfe)
mmcr0 |= MMCR0_PMCjCE;
mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
/* Return MMCRx values */
mmcr[0] = mmcr0;
mmcr[1] = mmcr1;
mmcr[2] = mmcra;
return 0;
}
static void p970_disable_pmc(unsigned int pmc, u64 mmcr[])
{
int shift, i;
if (pmc <= 1) {
shift = MMCR0_PMC1SEL_SH - 7 * pmc;
i = 0;
} else {
shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2);
i = 1;
}
/*
* Setting the PMCxSEL field to 0x08 disables PMC x.
*/
mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift);
}
static int ppc970_generic_events[] = {
[PERF_COUNT_HW_CPU_CYCLES] = 7,
[PERF_COUNT_HW_INSTRUCTIONS] = 1,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */
[PERF_COUNT_HW_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */
[PERF_COUNT_HW_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */
};
#define C(x) PERF_COUNT_HW_CACHE_##x
/*
* Table of generalized cache-related events.
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x8810, 0x3810 },
[C(OP_WRITE)] = { 0x7810, 0x813 },
[C(OP_PREFETCH)] = { 0x731, 0 },
},
[C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { 0, 0 },
},
[C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0 },
[C(OP_WRITE)] = { 0, 0 },
[C(OP_PREFETCH)] = { 0x733, 0 },
},
[C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x704 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x700 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x431, 0x327 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
};
struct power_pmu ppc970_pmu = {
.n_counter = 8,
.max_alternatives = 2,
.add_fields = 0x001100005555ull,
.test_adder = 0x013300000000ull,
.compute_mmcr = p970_compute_mmcr,
.get_constraint = p970_get_constraint,
.get_alternatives = p970_get_alternatives,
.disable_pmc = p970_disable_pmc,
.n_generic = ARRAY_SIZE(ppc970_generic_events),
.generic_events = ppc970_generic_events,
.cache_events = &ppc970_cache_events,
};
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/kprobes.h> #include <linux/kprobes.h>
#include <linux/kdebug.h> #include <linux/kdebug.h>
#include <linux/perf_counter.h>
#include <asm/firmware.h> #include <asm/firmware.h>
#include <asm/page.h> #include <asm/page.h>
...@@ -170,6 +171,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, ...@@ -170,6 +171,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
die("Weird page fault", regs, SIGSEGV); die("Weird page fault", regs, SIGSEGV);
} }
perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
/* When running in the kernel we expect faults to occur only to /* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the * addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an * kernel and should generate an OOPS. Unfortunately, in the case of an
...@@ -309,6 +312,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, ...@@ -309,6 +312,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
} }
if (ret & VM_FAULT_MAJOR) { if (ret & VM_FAULT_MAJOR) {
current->maj_flt++; current->maj_flt++;
perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
regs, address);
#ifdef CONFIG_PPC_SMLPAR #ifdef CONFIG_PPC_SMLPAR
if (firmware_has_feature(FW_FEATURE_CMO)) { if (firmware_has_feature(FW_FEATURE_CMO)) {
preempt_disable(); preempt_disable();
...@@ -316,8 +321,11 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, ...@@ -316,8 +321,11 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
preempt_enable(); preempt_enable();
} }
#endif #endif
} else } else {
current->min_flt++; current->min_flt++;
perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
regs, address);
}
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
return 0; return 0;
......
config PPC64 config PPC64
bool "64-bit kernel" bool "64-bit kernel"
default n default n
select HAVE_PERF_COUNTERS
help help
This option selects whether a 32-bit or a 64-bit kernel This option selects whether a 32-bit or a 64-bit kernel
will be built. will be built.
......
...@@ -739,6 +739,7 @@ config X86_UP_IOAPIC ...@@ -739,6 +739,7 @@ config X86_UP_IOAPIC
config X86_LOCAL_APIC config X86_LOCAL_APIC
def_bool y def_bool y
depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
select HAVE_PERF_COUNTERS if (!M386 && !M486)
config X86_IO_APIC config X86_IO_APIC
def_bool y def_bool y
......
...@@ -825,10 +825,11 @@ ia32_sys_call_table: ...@@ -825,10 +825,11 @@ ia32_sys_call_table:
.quad compat_sys_signalfd4 .quad compat_sys_signalfd4
.quad sys_eventfd2 .quad sys_eventfd2
.quad sys_epoll_create1 .quad sys_epoll_create1
.quad sys_dup3 /* 330 */ .quad sys_dup3 /* 330 */
.quad sys_pipe2 .quad sys_pipe2
.quad sys_inotify_init1 .quad sys_inotify_init1
.quad compat_sys_preadv .quad compat_sys_preadv
.quad compat_sys_pwritev .quad compat_sys_pwritev
.quad compat_sys_rt_tgsigqueueinfo /* 335 */ .quad compat_sys_rt_tgsigqueueinfo /* 335 */
.quad sys_perf_counter_open
ia32_syscall_end: ia32_syscall_end:
此差异已折叠。
...@@ -49,7 +49,7 @@ BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) ...@@ -49,7 +49,7 @@ BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
#ifdef CONFIG_PERF_COUNTERS #ifdef CONFIG_PERF_COUNTERS
BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
#endif #endif
#ifdef CONFIG_X86_MCE_P4THERMAL #ifdef CONFIG_X86_MCE_P4THERMAL
......
...@@ -13,6 +13,8 @@ typedef struct { ...@@ -13,6 +13,8 @@ typedef struct {
unsigned int irq_spurious_count; unsigned int irq_spurious_count;
#endif #endif
unsigned int generic_irqs; /* arch dependent */ unsigned int generic_irqs; /* arch dependent */
unsigned int apic_perf_irqs;
unsigned int apic_pending_irqs;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
unsigned int irq_resched_count; unsigned int irq_resched_count;
unsigned int irq_call_count; unsigned int irq_call_count;
......
...@@ -29,6 +29,8 @@ ...@@ -29,6 +29,8 @@
extern void apic_timer_interrupt(void); extern void apic_timer_interrupt(void);
extern void generic_interrupt(void); extern void generic_interrupt(void);
extern void error_interrupt(void); extern void error_interrupt(void);
extern void perf_pending_interrupt(void);
extern void spurious_interrupt(void); extern void spurious_interrupt(void);
extern void thermal_interrupt(void); extern void thermal_interrupt(void);
extern void reschedule_interrupt(void); extern void reschedule_interrupt(void);
......
#ifndef _ASM_X86_INTEL_ARCH_PERFMON_H
#define _ASM_X86_INTEL_ARCH_PERFMON_H
#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c)
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0)
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
union cpuid10_eax {
struct {
unsigned int version_id:8;
unsigned int num_counters:8;
unsigned int bit_width:8;
unsigned int mask_length:8;
} split;
unsigned int full;
};
#endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */
...@@ -108,14 +108,14 @@ ...@@ -108,14 +108,14 @@
#define LOCAL_TIMER_VECTOR 0xef #define LOCAL_TIMER_VECTOR 0xef
/* /*
* Performance monitoring interrupt vector: * Generic system vector for platform specific use
*/ */
#define LOCAL_PERF_VECTOR 0xee #define GENERIC_INTERRUPT_VECTOR 0xed
/* /*
* Generic system vector for platform specific use * Performance monitoring pending work vector:
*/ */
#define GENERIC_INTERRUPT_VECTOR 0xed #define LOCAL_PENDING_VECTOR 0xec
/* /*
* First APIC vector available to drivers: (vectors 0x30-0xee) we * First APIC vector available to drivers: (vectors 0x30-0xee) we
......
#ifndef _ASM_X86_PERF_COUNTER_H
#define _ASM_X86_PERF_COUNTER_H
/*
* Performance counter hw details:
*/
#define X86_PMC_MAX_GENERIC 8
#define X86_PMC_MAX_FIXED 3
#define X86_PMC_IDX_GENERIC 0
#define X86_PMC_IDX_FIXED 32
#define X86_PMC_IDX_MAX 64
#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
/*
* Includes eventsel and unit mask as well:
*/
#define ARCH_PERFMON_EVENT_MASK 0xffff
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
/*
* Intel "Architectural Performance Monitoring" CPUID
* detection/enumeration details:
*/
union cpuid10_eax {
struct {
unsigned int version_id:8;
unsigned int num_counters:8;
unsigned int bit_width:8;
unsigned int mask_length:8;
} split;
unsigned int full;
};
union cpuid10_edx {
struct {
unsigned int num_counters_fixed:4;
unsigned int reserved:28;
} split;
unsigned int full;
};
/*
* Fixed-purpose performance counters:
*/
/*
* All 3 fixed-mode PMCs are configured via this single MSR:
*/
#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
/*
* The counts are available in three separate MSRs:
*/
/* Instr_Retired.Any: */
#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309
#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0)
/* CPU_CLK_Unhalted.Core: */
#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1)
/* CPU_CLK_Unhalted.Ref: */
#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2)
extern void set_perf_counter_pending(void);
#define clear_perf_counter_pending() do { } while (0)
#define test_perf_counter_pending() (0)
#ifdef CONFIG_PERF_COUNTERS
extern void init_hw_perf_counters(void);
extern void perf_counters_lapic_init(void);
#else
static inline void init_hw_perf_counters(void) { }
static inline void perf_counters_lapic_init(void) { }
#endif
#endif /* _ASM_X86_PERF_COUNTER_H */
...@@ -341,6 +341,7 @@ ...@@ -341,6 +341,7 @@
#define __NR_preadv 333 #define __NR_preadv 333
#define __NR_pwritev 334 #define __NR_pwritev 334
#define __NR_rt_tgsigqueueinfo 335 #define __NR_rt_tgsigqueueinfo 335
#define __NR_perf_counter_open 336
#ifdef __KERNEL__ #ifdef __KERNEL__
......
...@@ -659,7 +659,8 @@ __SYSCALL(__NR_preadv, sys_preadv) ...@@ -659,7 +659,8 @@ __SYSCALL(__NR_preadv, sys_preadv)
__SYSCALL(__NR_pwritev, sys_pwritev) __SYSCALL(__NR_pwritev, sys_pwritev)
#define __NR_rt_tgsigqueueinfo 297 #define __NR_rt_tgsigqueueinfo 297
__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
#define __NR_perf_counter_open 298
__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
#ifndef __NO_STUBS #ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_READDIR
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册