提交 1474855d 编写于 作者: B Bob Nelson 提交者: Arnd Bergmann

[CELL] oprofile: add support to OProfile for profiling CELL BE SPUs

From: Maynard Johnson <mpjohn@us.ibm.com>

This patch updates the existing arch/powerpc/oprofile/op_model_cell.c
to add in the SPU profiling capabilities.  In addition, a 'cell' subdirectory
was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code.
Exports spu_set_profile_private_kref and spu_get_profile_private_kref which
are used by OProfile to store private profile information in spufs data
structures.

Also incorporated several fixes from other patches (rrn).  Check pointer
returned from kzalloc.  Eliminated unnecessary cast.  Better error
handling and cleanup in the related area.  64-bit unsigned long parameter
was being demoted to 32-bit unsigned int and eventually promoted back to
unsigned long.
Signed-off-by: NCarl Love <carll@us.ibm.com>
Signed-off-by: NMaynard Johnson <mpjohn@us.ibm.com>
Signed-off-by: NBob Nelson <rrnelson@us.ibm.com>
Signed-off-by: NArnd Bergmann <arnd.bergmann@de.ibm.com>
Acked-by: NPaul Mackerras <paulus@samba.org>
上级 36aaccc1
......@@ -1455,7 +1455,8 @@ CONFIG_HAS_DMA=y
# Instrumentation Support
#
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
CONFIG_OPROFILE=m
CONFIG_OPROFILE_CELL=y
# CONFIG_KPROBES is not set
#
......
......@@ -122,6 +122,7 @@ extern struct timezone sys_tz;
static long timezone_offset;
unsigned long ppc_proc_freq;
EXPORT_SYMBOL(ppc_proc_freq);
unsigned long ppc_tb_freq;
static u64 tb_last_jiffy __cacheline_aligned_in_smp;
......
......@@ -15,3 +15,10 @@ config OPROFILE
If unsure, say N.
config OPROFILE_CELL
bool "OProfile for Cell Broadband Engine"
depends on (SPU_FS = y && OPROFILE = m) || (SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m)
default y
help
Profiling of Cell BE SPUs requires special support enabled
by this option.
......@@ -11,7 +11,9 @@ DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
timer_int.o )
oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
oprofile-$(CONFIG_PPC_CELL_NATIVE) += op_model_cell.o
oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \
cell/spu_profiler.o cell/vma_map.o \
cell/spu_task_sync.o
oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o
oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o
oprofile-$(CONFIG_6xx) += op_model_7450.o
/*
* Cell Broadband Engine OProfile Support
*
* (C) Copyright IBM Corporation 2006
*
* Author: Maynard Johnson <maynardj@us.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef PR_UTIL_H
#define PR_UTIL_H
#include <linux/cpumask.h>
#include <linux/oprofile.h>
#include <asm/cell-pmu.h>
#include <asm/spu.h>
#include "../../platforms/cell/cbe_regs.h"
/* Defines used for sync_start */
#define SKIP_GENERIC_SYNC 0
#define SYNC_START_ERROR -1
#define DO_GENERIC_SYNC 1
struct spu_overlay_info { /* map of sections within an SPU overlay */
unsigned int vma; /* SPU virtual memory address from elf */
unsigned int size; /* size of section from elf */
unsigned int offset; /* offset of section into elf file */
unsigned int buf;
};
struct vma_to_fileoffset_map { /* map of sections within an SPU program */
struct vma_to_fileoffset_map *next; /* list pointer */
unsigned int vma; /* SPU virtual memory address from elf */
unsigned int size; /* size of section from elf */
unsigned int offset; /* offset of section into elf file */
unsigned int guard_ptr;
unsigned int guard_val;
/*
* The guard pointer is an entry in the _ovly_buf_table,
* computed using ovly.buf as the index into the table. Since
* ovly.buf values begin at '1' to reference the first (or 0th)
* entry in the _ovly_buf_table, the computation subtracts 1
* from ovly.buf.
* The guard value is stored in the _ovly_buf_table entry and
* is an index (starting at 1) back to the _ovly_table entry
* that is pointing at this _ovly_buf_table entry. So, for
* example, for an overlay scenario with one overlay segment
* and two overlay sections:
* - Section 1 points to the first entry of the
* _ovly_buf_table, which contains a guard value
* of '1', referencing the first (index=0) entry of
* _ovly_table.
* - Section 2 points to the second entry of the
* _ovly_buf_table, which contains a guard value
* of '2', referencing the second (index=1) entry of
* _ovly_table.
*/
};
/* The three functions below are for maintaining and accessing
* the vma-to-fileoffset map.
*/
struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu,
u64 objectid);
unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map,
unsigned int vma, const struct spu *aSpu,
int *grd_val);
void vma_map_free(struct vma_to_fileoffset_map *map);
/*
* Entry point for SPU profiling.
* cycles_reset is the SPU_CYCLES count value specified by the user.
*/
int start_spu_profiling(unsigned int cycles_reset);
void stop_spu_profiling(void);
/* add the necessary profiling hooks */
int spu_sync_start(void);
/* remove the hooks */
int spu_sync_stop(void);
/* Record SPU program counter samples to the oprofile event buffer. */
void spu_sync_buffer(int spu_num, unsigned int *samples,
int num_samples);
void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset);
#endif /* PR_UTIL_H */
/*
* Cell Broadband Engine OProfile Support
*
* (C) Copyright IBM Corporation 2006
*
* Authors: Maynard Johnson <maynardj@us.ibm.com>
* Carl Love <carll@us.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/hrtimer.h>
#include <linux/smp.h>
#include <linux/slab.h>
#include <asm/cell-pmu.h>
#include "pr_util.h"
#define TRACE_ARRAY_SIZE 1024
#define SCALE_SHIFT 14
static u32 *samples;
static int spu_prof_running;
static unsigned int profiling_interval;
#define NUM_SPU_BITS_TRBUF 16
#define SPUS_PER_TB_ENTRY 4
#define SPUS_PER_NODE 8
#define SPU_PC_MASK 0xFFFF
static DEFINE_SPINLOCK(sample_array_lock);
unsigned long sample_array_lock_flags;
void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
{
unsigned long ns_per_cyc;
if (!freq_khz)
freq_khz = ppc_proc_freq/1000;
/* To calculate a timeout in nanoseconds, the basic
* formula is ns = cycles_reset * (NSEC_PER_SEC / cpu frequency).
* To avoid floating point math, we use the scale math
* technique as described in linux/jiffies.h. We use
* a scale factor of SCALE_SHIFT, which provides 4 decimal places
* of precision. This is close enough for the purpose at hand.
*
* The value of the timeout should be small enough that the hw
* trace buffer will not get more then about 1/3 full for the
* maximum user specified (the LFSR value) hw sampling frequency.
* This is to ensure the trace buffer will never fill even if the
* kernel thread scheduling varies under a heavy system load.
*/
ns_per_cyc = (USEC_PER_SEC << SCALE_SHIFT)/freq_khz;
profiling_interval = (ns_per_cyc * cycles_reset) >> SCALE_SHIFT;
}
/*
* Extract SPU PC from trace buffer entry
*/
static void spu_pc_extract(int cpu, int entry)
{
/* the trace buffer is 128 bits */
u64 trace_buffer[2];
u64 spu_mask;
int spu;
spu_mask = SPU_PC_MASK;
/* Each SPU PC is 16 bits; hence, four spus in each of
* the two 64-bit buffer entries that make up the
* 128-bit trace_buffer entry. Process two 64-bit values
* simultaneously.
* trace[0] SPU PC contents are: 0 1 2 3
* trace[1] SPU PC contents are: 4 5 6 7
*/
cbe_read_trace_buffer(cpu, trace_buffer);
for (spu = SPUS_PER_TB_ENTRY-1; spu >= 0; spu--) {
/* spu PC trace entry is upper 16 bits of the
* 18 bit SPU program counter
*/
samples[spu * TRACE_ARRAY_SIZE + entry]
= (spu_mask & trace_buffer[0]) << 2;
samples[(spu + SPUS_PER_TB_ENTRY) * TRACE_ARRAY_SIZE + entry]
= (spu_mask & trace_buffer[1]) << 2;
trace_buffer[0] = trace_buffer[0] >> NUM_SPU_BITS_TRBUF;
trace_buffer[1] = trace_buffer[1] >> NUM_SPU_BITS_TRBUF;
}
}
static int cell_spu_pc_collection(int cpu)
{
u32 trace_addr;
int entry;
/* process the collected SPU PC for the node */
entry = 0;
trace_addr = cbe_read_pm(cpu, trace_address);
while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) {
/* there is data in the trace buffer to process */
spu_pc_extract(cpu, entry);
entry++;
if (entry >= TRACE_ARRAY_SIZE)
/* spu_samples is full */
break;
trace_addr = cbe_read_pm(cpu, trace_address);
}
return entry;
}
static enum hrtimer_restart profile_spus(struct hrtimer *timer)
{
ktime_t kt;
int cpu, node, k, num_samples, spu_num;
if (!spu_prof_running)
goto stop;
for_each_online_cpu(cpu) {
if (cbe_get_hw_thread_id(cpu))
continue;
node = cbe_cpu_to_node(cpu);
/* There should only be one kernel thread at a time processing
* the samples. In the very unlikely case that the processing
* is taking a very long time and multiple kernel threads are
* started to process the samples. Make sure only one kernel
* thread is working on the samples array at a time. The
* sample array must be loaded and then processed for a given
* cpu. The sample array is not per cpu.
*/
spin_lock_irqsave(&sample_array_lock,
sample_array_lock_flags);
num_samples = cell_spu_pc_collection(cpu);
if (num_samples == 0) {
spin_unlock_irqrestore(&sample_array_lock,
sample_array_lock_flags);
continue;
}
for (k = 0; k < SPUS_PER_NODE; k++) {
spu_num = k + (node * SPUS_PER_NODE);
spu_sync_buffer(spu_num,
samples + (k * TRACE_ARRAY_SIZE),
num_samples);
}
spin_unlock_irqrestore(&sample_array_lock,
sample_array_lock_flags);
}
smp_wmb(); /* insure spu event buffer updates are written */
/* don't want events intermingled... */
kt = ktime_set(0, profiling_interval);
if (!spu_prof_running)
goto stop;
hrtimer_forward(timer, timer->base->get_time(), kt);
return HRTIMER_RESTART;
stop:
printk(KERN_INFO "SPU_PROF: spu-prof timer ending\n");
return HRTIMER_NORESTART;
}
static struct hrtimer timer;
/*
* Entry point for SPU profiling.
* NOTE: SPU profiling is done system-wide, not per-CPU.
*
* cycles_reset is the count value specified by the user when
* setting up OProfile to count SPU_CYCLES.
*/
int start_spu_profiling(unsigned int cycles_reset)
{
ktime_t kt;
pr_debug("timer resolution: %lu\n", TICK_NSEC);
kt = ktime_set(0, profiling_interval);
hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
timer.expires = kt;
timer.function = profile_spus;
/* Allocate arrays for collecting SPU PC samples */
samples = kzalloc(SPUS_PER_NODE *
TRACE_ARRAY_SIZE * sizeof(u32), GFP_KERNEL);
if (!samples)
return -ENOMEM;
spu_prof_running = 1;
hrtimer_start(&timer, kt, HRTIMER_MODE_REL);
return 0;
}
void stop_spu_profiling(void)
{
spu_prof_running = 0;
hrtimer_cancel(&timer);
kfree(samples);
pr_debug("SPU_PROF: stop_spu_profiling issued\n");
}
/*
* Cell Broadband Engine OProfile Support
*
* (C) Copyright IBM Corporation 2006
*
* Author: Maynard Johnson <maynardj@us.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
/* The purpose of this file is to handle SPU event task switching
* and to record SPU context information into the OProfile
* event buffer.
*
* Additionally, the spu_sync_buffer function is provided as a helper
* for recoding actual SPU program counter samples to the event buffer.
*/
#include <linux/dcookies.h>
#include <linux/kref.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/notifier.h>
#include <linux/numa.h>
#include <linux/oprofile.h>
#include <linux/spinlock.h>
#include "pr_util.h"
#define RELEASE_ALL 9999
static DEFINE_SPINLOCK(buffer_lock);
static DEFINE_SPINLOCK(cache_lock);
static int num_spu_nodes;
int spu_prof_num_nodes;
int last_guard_val[MAX_NUMNODES * 8];
/* Container for caching information about an active SPU task. */
struct cached_info {
struct vma_to_fileoffset_map *map;
struct spu *the_spu; /* needed to access pointer to local_store */
struct kref cache_ref;
};
static struct cached_info *spu_info[MAX_NUMNODES * 8];
static void destroy_cached_info(struct kref *kref)
{
struct cached_info *info;
info = container_of(kref, struct cached_info, cache_ref);
vma_map_free(info->map);
kfree(info);
module_put(THIS_MODULE);
}
/* Return the cached_info for the passed SPU number.
* ATTENTION: Callers are responsible for obtaining the
* cache_lock if needed prior to invoking this function.
*/
static struct cached_info *get_cached_info(struct spu *the_spu, int spu_num)
{
struct kref *ref;
struct cached_info *ret_info;
if (spu_num >= num_spu_nodes) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: Invalid index %d into spu info cache\n",
__FUNCTION__, __LINE__, spu_num);
ret_info = NULL;
goto out;
}
if (!spu_info[spu_num] && the_spu) {
ref = spu_get_profile_private_kref(the_spu->ctx);
if (ref) {
spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref);
kref_get(&spu_info[spu_num]->cache_ref);
}
}
ret_info = spu_info[spu_num];
out:
return ret_info;
}
/* Looks for cached info for the passed spu. If not found, the
* cached info is created for the passed spu.
* Returns 0 for success; otherwise, -1 for error.
*/
static int
prepare_cached_spu_info(struct spu *spu, unsigned long objectId)
{
unsigned long flags;
struct vma_to_fileoffset_map *new_map;
int retval = 0;
struct cached_info *info;
/* We won't bother getting cache_lock here since
* don't do anything with the cached_info that's returned.
*/
info = get_cached_info(spu, spu->number);
if (info) {
pr_debug("Found cached SPU info.\n");
goto out;
}
/* Create cached_info and set spu_info[spu->number] to point to it.
* spu->number is a system-wide value, not a per-node value.
*/
info = kzalloc(sizeof(struct cached_info), GFP_KERNEL);
if (!info) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: create vma_map failed\n",
__FUNCTION__, __LINE__);
retval = -ENOMEM;
goto err_alloc;
}
new_map = create_vma_map(spu, objectId);
if (!new_map) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: create vma_map failed\n",
__FUNCTION__, __LINE__);
retval = -ENOMEM;
goto err_alloc;
}
pr_debug("Created vma_map\n");
info->map = new_map;
info->the_spu = spu;
kref_init(&info->cache_ref);
spin_lock_irqsave(&cache_lock, flags);
spu_info[spu->number] = info;
/* Increment count before passing off ref to SPUFS. */
kref_get(&info->cache_ref);
/* We increment the module refcount here since SPUFS is
* responsible for the final destruction of the cached_info,
* and it must be able to access the destroy_cached_info()
* function defined in the OProfile module. We decrement
* the module refcount in destroy_cached_info.
*/
try_module_get(THIS_MODULE);
spu_set_profile_private_kref(spu->ctx, &info->cache_ref,
destroy_cached_info);
spin_unlock_irqrestore(&cache_lock, flags);
goto out;
err_alloc:
kfree(info);
out:
return retval;
}
/*
* NOTE: The caller is responsible for locking the
* cache_lock prior to calling this function.
*/
static int release_cached_info(int spu_index)
{
int index, end;
if (spu_index == RELEASE_ALL) {
end = num_spu_nodes;
index = 0;
} else {
if (spu_index >= num_spu_nodes) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: "
"Invalid index %d into spu info cache\n",
__FUNCTION__, __LINE__, spu_index);
goto out;
}
end = spu_index + 1;
index = spu_index;
}
for (; index < end; index++) {
if (spu_info[index]) {
kref_put(&spu_info[index]->cache_ref,
destroy_cached_info);
spu_info[index] = NULL;
}
}
out:
return 0;
}
/* The source code for fast_get_dcookie was "borrowed"
* from drivers/oprofile/buffer_sync.c.
*/
/* Optimisation. We can manage without taking the dcookie sem
* because we cannot reach this code without at least one
* dcookie user still being registered (namely, the reader
* of the event buffer).
*/
static inline unsigned long fast_get_dcookie(struct dentry *dentry,
struct vfsmount *vfsmnt)
{
unsigned long cookie;
if (dentry->d_cookie)
return (unsigned long)dentry;
get_dcookie(dentry, vfsmnt, &cookie);
return cookie;
}
/* Look up the dcookie for the task's first VM_EXECUTABLE mapping,
* which corresponds loosely to "application name". Also, determine
* the offset for the SPU ELF object. If computed offset is
* non-zero, it implies an embedded SPU object; otherwise, it's a
* separate SPU binary, in which case we retrieve it's dcookie.
* For the embedded case, we must determine if SPU ELF is embedded
* in the executable application or another file (i.e., shared lib).
* If embedded in a shared lib, we must get the dcookie and return
* that to the caller.
*/
static unsigned long
get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
unsigned long *spu_bin_dcookie,
unsigned long spu_ref)
{
unsigned long app_cookie = 0;
unsigned int my_offset = 0;
struct file *app = NULL;
struct vm_area_struct *vma;
struct mm_struct *mm = spu->mm;
if (!mm)
goto out;
down_read(&mm->mmap_sem);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (!vma->vm_file)
continue;
if (!(vma->vm_flags & VM_EXECUTABLE))
continue;
app_cookie = fast_get_dcookie(vma->vm_file->f_dentry,
vma->vm_file->f_vfsmnt);
pr_debug("got dcookie for %s\n",
vma->vm_file->f_dentry->d_name.name);
app = vma->vm_file;
break;
}
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref)
continue;
my_offset = spu_ref - vma->vm_start;
if (!vma->vm_file)
goto fail_no_image_cookie;
pr_debug("Found spu ELF at %X(object-id:%lx) for file %s\n",
my_offset, spu_ref,
vma->vm_file->f_dentry->d_name.name);
*offsetp = my_offset;
break;
}
*spu_bin_dcookie = fast_get_dcookie(vma->vm_file->f_dentry,
vma->vm_file->f_vfsmnt);
pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name);
up_read(&mm->mmap_sem);
out:
return app_cookie;
fail_no_image_cookie:
up_read(&mm->mmap_sem);
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: Cannot find dcookie for SPU binary\n",
__FUNCTION__, __LINE__);
goto out;
}
/* This function finds or creates cached context information for the
* passed SPU and records SPU context information into the OProfile
* event buffer.
*/
static int process_context_switch(struct spu *spu, unsigned long objectId)
{
unsigned long flags;
int retval;
unsigned int offset = 0;
unsigned long spu_cookie = 0, app_dcookie;
retval = prepare_cached_spu_info(spu, objectId);
if (retval)
goto out;
/* Get dcookie first because a mutex_lock is taken in that
* code path, so interrupts must not be disabled.
*/
app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId);
if (!app_dcookie || !spu_cookie) {
retval = -ENOENT;
goto out;
}
/* Record context info in event buffer */
spin_lock_irqsave(&buffer_lock, flags);
add_event_entry(ESCAPE_CODE);
add_event_entry(SPU_CTX_SWITCH_CODE);
add_event_entry(spu->number);
add_event_entry(spu->pid);
add_event_entry(spu->tgid);
add_event_entry(app_dcookie);
add_event_entry(spu_cookie);
add_event_entry(offset);
spin_unlock_irqrestore(&buffer_lock, flags);
smp_wmb(); /* insure spu event buffer updates are written */
/* don't want entries intermingled... */
out:
return retval;
}
/*
* This function is invoked on either a bind_context or unbind_context.
* If called for an unbind_context, the val arg is 0; otherwise,
* it is the object-id value for the spu context.
* The data arg is of type 'struct spu *'.
*/
static int spu_active_notify(struct notifier_block *self, unsigned long val,
void *data)
{
int retval;
unsigned long flags;
struct spu *the_spu = data;
pr_debug("SPU event notification arrived\n");
if (!val) {
spin_lock_irqsave(&cache_lock, flags);
retval = release_cached_info(the_spu->number);
spin_unlock_irqrestore(&cache_lock, flags);
} else {
retval = process_context_switch(the_spu, val);
}
return retval;
}
static struct notifier_block spu_active = {
.notifier_call = spu_active_notify,
};
static int number_of_online_nodes(void)
{
u32 cpu; u32 tmp;
int nodes = 0;
for_each_online_cpu(cpu) {
tmp = cbe_cpu_to_node(cpu) + 1;
if (tmp > nodes)
nodes++;
}
return nodes;
}
/* The main purpose of this function is to synchronize
* OProfile with SPUFS by registering to be notified of
* SPU task switches.
*
* NOTE: When profiling SPUs, we must ensure that only
* spu_sync_start is invoked and not the generic sync_start
* in drivers/oprofile/oprof.c. A return value of
* SKIP_GENERIC_SYNC or SYNC_START_ERROR will
* accomplish this.
*/
int spu_sync_start(void)
{
int k;
int ret = SKIP_GENERIC_SYNC;
int register_ret;
unsigned long flags = 0;
spu_prof_num_nodes = number_of_online_nodes();
num_spu_nodes = spu_prof_num_nodes * 8;
spin_lock_irqsave(&buffer_lock, flags);
add_event_entry(ESCAPE_CODE);
add_event_entry(SPU_PROFILING_CODE);
add_event_entry(num_spu_nodes);
spin_unlock_irqrestore(&buffer_lock, flags);
/* Register for SPU events */
register_ret = spu_switch_event_register(&spu_active);
if (register_ret) {
ret = SYNC_START_ERROR;
goto out;
}
for (k = 0; k < (MAX_NUMNODES * 8); k++)
last_guard_val[k] = 0;
pr_debug("spu_sync_start -- running.\n");
out:
return ret;
}
/* Record SPU program counter samples to the oprofile event buffer. */
void spu_sync_buffer(int spu_num, unsigned int *samples,
int num_samples)
{
unsigned long long file_offset;
unsigned long flags;
int i;
struct vma_to_fileoffset_map *map;
struct spu *the_spu;
unsigned long long spu_num_ll = spu_num;
unsigned long long spu_num_shifted = spu_num_ll << 32;
struct cached_info *c_info;
/* We need to obtain the cache_lock here because it's
* possible that after getting the cached_info, the SPU job
* corresponding to this cached_info may end, thus resulting
* in the destruction of the cached_info.
*/
spin_lock_irqsave(&cache_lock, flags);
c_info = get_cached_info(NULL, spu_num);
if (!c_info) {
/* This legitimately happens when the SPU task ends before all
* samples are recorded.
* No big deal -- so we just drop a few samples.
*/
pr_debug("SPU_PROF: No cached SPU contex "
"for SPU #%d. Dropping samples.\n", spu_num);
goto out;
}
map = c_info->map;
the_spu = c_info->the_spu;
spin_lock(&buffer_lock);
for (i = 0; i < num_samples; i++) {
unsigned int sample = *(samples+i);
int grd_val = 0;
file_offset = 0;
if (sample == 0)
continue;
file_offset = vma_map_lookup( map, sample, the_spu, &grd_val);
/* If overlays are used by this SPU application, the guard
* value is non-zero, indicating which overlay section is in
* use. We need to discard samples taken during the time
* period which an overlay occurs (i.e., guard value changes).
*/
if (grd_val && grd_val != last_guard_val[spu_num]) {
last_guard_val[spu_num] = grd_val;
/* Drop the rest of the samples. */
break;
}
add_event_entry(file_offset | spu_num_shifted);
}
spin_unlock(&buffer_lock);
out:
spin_unlock_irqrestore(&cache_lock, flags);
}
int spu_sync_stop(void)
{
unsigned long flags = 0;
int ret = spu_switch_event_unregister(&spu_active);
if (ret) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: spu_switch_event_unregister returned %d\n",
__FUNCTION__, __LINE__, ret);
goto out;
}
spin_lock_irqsave(&cache_lock, flags);
ret = release_cached_info(RELEASE_ALL);
spin_unlock_irqrestore(&cache_lock, flags);
out:
pr_debug("spu_sync_stop -- done.\n");
return ret;
}
/*
* Cell Broadband Engine OProfile Support
*
* (C) Copyright IBM Corporation 2006
*
* Author: Maynard Johnson <maynardj@us.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
/* The code in this source file is responsible for generating
* vma-to-fileOffset maps for both overlay and non-overlay SPU
* applications.
*/
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/elf.h>
#include "pr_util.h"
void vma_map_free(struct vma_to_fileoffset_map *map)
{
while (map) {
struct vma_to_fileoffset_map *next = map->next;
kfree(map);
map = next;
}
}
unsigned int
vma_map_lookup(struct vma_to_fileoffset_map *map, unsigned int vma,
const struct spu *aSpu, int *grd_val)
{
/*
* Default the offset to the physical address + a flag value.
* Addresses of dynamically generated code can't be found in the vma
* map. For those addresses the flagged value will be sent on to
* the user space tools so they can be reported rather than just
* thrown away.
*/
u32 offset = 0x10000000 + vma;
u32 ovly_grd;
for (; map; map = map->next) {
if (vma < map->vma || vma >= map->vma + map->size)
continue;
if (map->guard_ptr) {
ovly_grd = *(u32 *)(aSpu->local_store + map->guard_ptr);
if (ovly_grd != map->guard_val)
continue;
*grd_val = ovly_grd;
}
offset = vma - map->vma + map->offset;
break;
}
return offset;
}
static struct vma_to_fileoffset_map *
vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma,
unsigned int size, unsigned int offset, unsigned int guard_ptr,
unsigned int guard_val)
{
struct vma_to_fileoffset_map *new =
kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL);
if (!new) {
printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n",
__FUNCTION__, __LINE__);
vma_map_free(map);
return NULL;
}
new->next = map;
new->vma = vma;
new->size = size;
new->offset = offset;
new->guard_ptr = guard_ptr;
new->guard_val = guard_val;
return new;
}
/* Parse SPE ELF header and generate a list of vma_maps.
* A pointer to the first vma_map in the generated list
* of vma_maps is returned. */
struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu,
unsigned long spu_elf_start)
{
static const unsigned char expected[EI_PAD] = {
[EI_MAG0] = ELFMAG0,
[EI_MAG1] = ELFMAG1,
[EI_MAG2] = ELFMAG2,
[EI_MAG3] = ELFMAG3,
[EI_CLASS] = ELFCLASS32,
[EI_DATA] = ELFDATA2MSB,
[EI_VERSION] = EV_CURRENT,
[EI_OSABI] = ELFOSABI_NONE
};
int grd_val;
struct vma_to_fileoffset_map *map = NULL;
struct spu_overlay_info ovly;
unsigned int overlay_tbl_offset = -1;
unsigned long phdr_start, shdr_start;
Elf32_Ehdr ehdr;
Elf32_Phdr phdr;
Elf32_Shdr shdr, shdr_str;
Elf32_Sym sym;
int i, j;
char name[32];
unsigned int ovly_table_sym = 0;
unsigned int ovly_buf_table_sym = 0;
unsigned int ovly_table_end_sym = 0;
unsigned int ovly_buf_table_end_sym = 0;
unsigned long ovly_table;
unsigned int n_ovlys;
/* Get and validate ELF header. */
if (copy_from_user(&ehdr, (void *) spu_elf_start, sizeof (ehdr)))
goto fail;
if (memcmp(ehdr.e_ident, expected, EI_PAD) != 0) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: Unexpected e_ident parsing SPU ELF\n",
__FUNCTION__, __LINE__);
goto fail;
}
if (ehdr.e_machine != EM_SPU) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: Unexpected e_machine parsing SPU ELF\n",
__FUNCTION__, __LINE__);
goto fail;
}
if (ehdr.e_type != ET_EXEC) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: Unexpected e_type parsing SPU ELF\n",
__FUNCTION__, __LINE__);
goto fail;
}
phdr_start = spu_elf_start + ehdr.e_phoff;
shdr_start = spu_elf_start + ehdr.e_shoff;
/* Traverse program headers. */
for (i = 0; i < ehdr.e_phnum; i++) {
if (copy_from_user(&phdr,
(void *) (phdr_start + i * sizeof(phdr)),
sizeof(phdr)))
goto fail;
if (phdr.p_type != PT_LOAD)
continue;
if (phdr.p_flags & (1 << 27))
continue;
map = vma_map_add(map, phdr.p_vaddr, phdr.p_memsz,
phdr.p_offset, 0, 0);
if (!map)
goto fail;
}
pr_debug("SPU_PROF: Created non-overlay maps\n");
/* Traverse section table and search for overlay-related symbols. */
for (i = 0; i < ehdr.e_shnum; i++) {
if (copy_from_user(&shdr,
(void *) (shdr_start + i * sizeof(shdr)),
sizeof(shdr)))
goto fail;
if (shdr.sh_type != SHT_SYMTAB)
continue;
if (shdr.sh_entsize != sizeof (sym))
continue;
if (copy_from_user(&shdr_str,
(void *) (shdr_start + shdr.sh_link *
sizeof(shdr)),
sizeof(shdr)))
goto fail;
if (shdr_str.sh_type != SHT_STRTAB)
goto fail;;
for (j = 0; j < shdr.sh_size / sizeof (sym); j++) {
if (copy_from_user(&sym, (void *) (spu_elf_start +
shdr.sh_offset + j *
sizeof (sym)),
sizeof (sym)))
goto fail;
if (copy_from_user(name, (void *)
(spu_elf_start + shdr_str.sh_offset +
sym.st_name),
20))
goto fail;
if (memcmp(name, "_ovly_table", 12) == 0)
ovly_table_sym = sym.st_value;
if (memcmp(name, "_ovly_buf_table", 16) == 0)
ovly_buf_table_sym = sym.st_value;
if (memcmp(name, "_ovly_table_end", 16) == 0)
ovly_table_end_sym = sym.st_value;
if (memcmp(name, "_ovly_buf_table_end", 20) == 0)
ovly_buf_table_end_sym = sym.st_value;
}
}
/* If we don't have overlays, we're done. */
if (ovly_table_sym == 0 || ovly_buf_table_sym == 0
|| ovly_table_end_sym == 0 || ovly_buf_table_end_sym == 0) {
pr_debug("SPU_PROF: No overlay table found\n");
goto out;
} else {
pr_debug("SPU_PROF: Overlay table found\n");
}
/* The _ovly_table symbol represents a table with one entry
* per overlay section. The _ovly_buf_table symbol represents
* a table with one entry per overlay region.
* The struct spu_overlay_info gives the structure of the _ovly_table
* entries. The structure of _ovly_table_buf is simply one
* u32 word per entry.
*/
overlay_tbl_offset = vma_map_lookup(map, ovly_table_sym,
aSpu, &grd_val);
if (overlay_tbl_offset < 0) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: Error finding SPU overlay table\n",
__FUNCTION__, __LINE__);
goto fail;
}
ovly_table = spu_elf_start + overlay_tbl_offset;
n_ovlys = (ovly_table_end_sym -
ovly_table_sym) / sizeof (ovly);
/* Traverse overlay table. */
for (i = 0; i < n_ovlys; i++) {
if (copy_from_user(&ovly, (void *)
(ovly_table + i * sizeof (ovly)),
sizeof (ovly)))
goto fail;
/* The ovly.vma/size/offset arguments are analogous to the same
* arguments used above for non-overlay maps. The final two
* args are referred to as the guard pointer and the guard
* value.
* The guard pointer is an entry in the _ovly_buf_table,
* computed using ovly.buf as the index into the table. Since
* ovly.buf values begin at '1' to reference the first (or 0th)
* entry in the _ovly_buf_table, the computation subtracts 1
* from ovly.buf.
* The guard value is stored in the _ovly_buf_table entry and
* is an index (starting at 1) back to the _ovly_table entry
* that is pointing at this _ovly_buf_table entry. So, for
* example, for an overlay scenario with one overlay segment
* and two overlay sections:
* - Section 1 points to the first entry of the
* _ovly_buf_table, which contains a guard value
* of '1', referencing the first (index=0) entry of
* _ovly_table.
* - Section 2 points to the second entry of the
* _ovly_buf_table, which contains a guard value
* of '2', referencing the second (index=1) entry of
* _ovly_table.
*/
map = vma_map_add(map, ovly.vma, ovly.size, ovly.offset,
ovly_buf_table_sym + (ovly.buf-1) * 4, i+1);
if (!map)
goto fail;
}
goto out;
fail:
map = NULL;
out:
return map;
}
......@@ -29,6 +29,8 @@ static struct op_powerpc_model *model;
static struct op_counter_config ctr[OP_MAX_COUNTER];
static struct op_system_config sys;
static int op_per_cpu_rc;
static void op_handle_interrupt(struct pt_regs *regs)
{
model->handle_interrupt(regs, ctr);
......@@ -36,25 +38,41 @@ static void op_handle_interrupt(struct pt_regs *regs)
static void op_powerpc_cpu_setup(void *dummy)
{
model->cpu_setup(ctr);
int ret;
ret = model->cpu_setup(ctr);
if (ret != 0)
op_per_cpu_rc = ret;
}
static int op_powerpc_setup(void)
{
int err;
op_per_cpu_rc = 0;
/* Grab the hardware */
err = reserve_pmc_hardware(op_handle_interrupt);
if (err)
return err;
/* Pre-compute the values to stuff in the hardware registers. */
model->reg_setup(ctr, &sys, model->num_counters);
op_per_cpu_rc = model->reg_setup(ctr, &sys, model->num_counters);
/* Configure the registers on all cpus. */
if (op_per_cpu_rc)
goto out;
/* Configure the registers on all cpus. If an error occurs on one
* of the cpus, op_per_cpu_rc will be set to the error */
on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1);
return 0;
out: if (op_per_cpu_rc) {
/* error on setup release the performance counter hardware */
release_pmc_hardware();
}
return op_per_cpu_rc;
}
static void op_powerpc_shutdown(void)
......@@ -64,16 +82,29 @@ static void op_powerpc_shutdown(void)
static void op_powerpc_cpu_start(void *dummy)
{
model->start(ctr);
/* If any of the cpus have return an error, set the
* global flag to the error so it can be returned
* to the generic OProfile caller.
*/
int ret;
ret = model->start(ctr);
if (ret != 0)
op_per_cpu_rc = ret;
}
static int op_powerpc_start(void)
{
op_per_cpu_rc = 0;
if (model->global_start)
model->global_start(ctr);
if (model->start)
return model->global_start(ctr);
if (model->start) {
on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1);
return 0;
return op_per_cpu_rc;
}
return -EIO; /* No start function is defined for this
power architecture */
}
static inline void op_powerpc_cpu_stop(void *dummy)
......@@ -147,11 +178,13 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
switch (cur_cpu_spec->oprofile_type) {
#ifdef CONFIG_PPC64
#ifdef CONFIG_PPC_CELL_NATIVE
#ifdef CONFIG_OPROFILE_CELL
case PPC_OPROFILE_CELL:
if (firmware_has_feature(FW_FEATURE_LPAR))
return -ENODEV;
model = &op_model_cell;
ops->sync_start = model->sync_start;
ops->sync_stop = model->sync_stop;
break;
#endif
case PPC_OPROFILE_RS64:
......
......@@ -81,7 +81,7 @@ static void pmc_stop_ctrs(void)
/* Configures the counters on this CPU based on the global
* settings */
static void fsl7450_cpu_setup(struct op_counter_config *ctr)
static int fsl7450_cpu_setup(struct op_counter_config *ctr)
{
/* freeze all counters */
pmc_stop_ctrs();
......@@ -89,12 +89,14 @@ static void fsl7450_cpu_setup(struct op_counter_config *ctr)
mtspr(SPRN_MMCR0, mmcr0_val);
mtspr(SPRN_MMCR1, mmcr1_val);
mtspr(SPRN_MMCR2, mmcr2_val);
return 0;
}
#define NUM_CTRS 6
/* Configures the global settings for the countes on all CPUs. */
static void fsl7450_reg_setup(struct op_counter_config *ctr,
static int fsl7450_reg_setup(struct op_counter_config *ctr,
struct op_system_config *sys,
int num_ctrs)
{
......@@ -126,10 +128,12 @@ static void fsl7450_reg_setup(struct op_counter_config *ctr,
| mmcr1_event6(ctr[5].event);
mmcr2_val = 0;
return 0;
}
/* Sets the counters on this CPU to the chosen values, and starts them */
static void fsl7450_start(struct op_counter_config *ctr)
static int fsl7450_start(struct op_counter_config *ctr)
{
int i;
......@@ -148,6 +152,8 @@ static void fsl7450_start(struct op_counter_config *ctr)
pmc_start_ctrs();
oprofile_running = 1;
return 0;
}
/* Stop the counters on this CPU */
......@@ -193,7 +199,7 @@ static void fsl7450_handle_interrupt(struct pt_regs *regs,
/* The freeze bit was set by the interrupt. */
/* Clear the freeze bit, and reenable the interrupt.
* The counters won't actually start until the rfi clears
* the PMM bit */
* the PM/M bit */
pmc_start_ctrs();
}
......
......@@ -244,7 +244,7 @@ static void dump_pmcs(void)
mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3));
}
static void fsl_booke_cpu_setup(struct op_counter_config *ctr)
static int fsl_booke_cpu_setup(struct op_counter_config *ctr)
{
int i;
......@@ -258,9 +258,11 @@ static void fsl_booke_cpu_setup(struct op_counter_config *ctr)
set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel);
}
return 0;
}
static void fsl_booke_reg_setup(struct op_counter_config *ctr,
static int fsl_booke_reg_setup(struct op_counter_config *ctr,
struct op_system_config *sys,
int num_ctrs)
{
......@@ -276,9 +278,10 @@ static void fsl_booke_reg_setup(struct op_counter_config *ctr,
for (i = 0; i < num_counters; ++i)
reset_value[i] = 0x80000000UL - ctr[i].count;
return 0;
}
static void fsl_booke_start(struct op_counter_config *ctr)
static int fsl_booke_start(struct op_counter_config *ctr)
{
int i;
......@@ -308,6 +311,8 @@ static void fsl_booke_start(struct op_counter_config *ctr)
pr_debug("start on cpu %d, pmgc0 %x\n", smp_processor_id(),
mfpmr(PMRN_PMGC0));
return 0;
}
static void fsl_booke_stop(void)
......
......@@ -89,7 +89,7 @@ static inline void ctr_write(unsigned int i, u64 val)
/* precompute the values to stuff in the hardware registers */
static void pa6t_reg_setup(struct op_counter_config *ctr,
static int pa6t_reg_setup(struct op_counter_config *ctr,
struct op_system_config *sys,
int num_ctrs)
{
......@@ -135,10 +135,12 @@ static void pa6t_reg_setup(struct op_counter_config *ctr,
pr_debug("reset_value for pmc%u inited to 0x%lx\n",
pmc, reset_value[pmc]);
}
return 0;
}
/* configure registers on this cpu */
static void pa6t_cpu_setup(struct op_counter_config *ctr)
static int pa6t_cpu_setup(struct op_counter_config *ctr)
{
u64 mmcr0 = mmcr0_val;
u64 mmcr1 = mmcr1_val;
......@@ -154,9 +156,11 @@ static void pa6t_cpu_setup(struct op_counter_config *ctr)
mfspr(SPRN_PA6T_MMCR0));
pr_debug("setup on cpu %d, mmcr1 %016lx\n", smp_processor_id(),
mfspr(SPRN_PA6T_MMCR1));
return 0;
}
static void pa6t_start(struct op_counter_config *ctr)
static int pa6t_start(struct op_counter_config *ctr)
{
int i;
......@@ -174,6 +178,8 @@ static void pa6t_start(struct op_counter_config *ctr)
oprofile_running = 1;
pr_debug("start on cpu %d, mmcr0 %lx\n", smp_processor_id(), mmcr0);
return 0;
}
static void pa6t_stop(void)
......
......@@ -32,7 +32,7 @@ static u32 mmcr0_val;
static u64 mmcr1_val;
static u64 mmcra_val;
static void power4_reg_setup(struct op_counter_config *ctr,
static int power4_reg_setup(struct op_counter_config *ctr,
struct op_system_config *sys,
int num_ctrs)
{
......@@ -60,6 +60,8 @@ static void power4_reg_setup(struct op_counter_config *ctr,
mmcr0_val &= ~MMCR0_PROBLEM_DISABLE;
else
mmcr0_val |= MMCR0_PROBLEM_DISABLE;
return 0;
}
extern void ppc64_enable_pmcs(void);
......@@ -84,7 +86,7 @@ static inline int mmcra_must_set_sample(void)
return 0;
}
static void power4_cpu_setup(struct op_counter_config *ctr)
static int power4_cpu_setup(struct op_counter_config *ctr)
{
unsigned int mmcr0 = mmcr0_val;
unsigned long mmcra = mmcra_val;
......@@ -111,9 +113,11 @@ static void power4_cpu_setup(struct op_counter_config *ctr)
mfspr(SPRN_MMCR1));
dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(),
mfspr(SPRN_MMCRA));
return 0;
}
static void power4_start(struct op_counter_config *ctr)
static int power4_start(struct op_counter_config *ctr)
{
int i;
unsigned int mmcr0;
......@@ -148,6 +152,7 @@ static void power4_start(struct op_counter_config *ctr)
oprofile_running = 1;
dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
return 0;
}
static void power4_stop(void)
......
......@@ -88,7 +88,7 @@ static unsigned long reset_value[OP_MAX_COUNTER];
static int num_counters;
static void rs64_reg_setup(struct op_counter_config *ctr,
static int rs64_reg_setup(struct op_counter_config *ctr,
struct op_system_config *sys,
int num_ctrs)
{
......@@ -100,9 +100,10 @@ static void rs64_reg_setup(struct op_counter_config *ctr,
reset_value[i] = 0x80000000UL - ctr[i].count;
/* XXX setup user and kernel profiling */
return 0;
}
static void rs64_cpu_setup(struct op_counter_config *ctr)
static int rs64_cpu_setup(struct op_counter_config *ctr)
{
unsigned int mmcr0;
......@@ -125,9 +126,11 @@ static void rs64_cpu_setup(struct op_counter_config *ctr)
mfspr(SPRN_MMCR0));
dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(),
mfspr(SPRN_MMCR1));
return 0;
}
static void rs64_start(struct op_counter_config *ctr)
static int rs64_start(struct op_counter_config *ctr)
{
int i;
unsigned int mmcr0;
......@@ -155,6 +158,7 @@ static void rs64_start(struct op_counter_config *ctr)
mtspr(SPRN_MMCR0, mmcr0);
dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
return 0;
}
static void rs64_stop(void)
......
......@@ -22,6 +22,7 @@
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <asm/atomic.h>
#include <asm/spu.h>
......@@ -81,6 +82,8 @@ void destroy_spu_context(struct kref *kref)
spu_fini_csa(&ctx->csa);
if (ctx->gang)
spu_gang_remove_ctx(ctx->gang, ctx);
if (ctx->prof_priv_kref)
kref_put(ctx->prof_priv_kref, ctx->prof_priv_release);
BUG_ON(!list_empty(&ctx->rq));
atomic_dec(&nr_spu_contexts);
kfree(ctx);
......@@ -185,3 +188,20 @@ void spu_release_saved(struct spu_context *ctx)
spu_release(ctx);
}
void spu_set_profile_private_kref(struct spu_context *ctx,
struct kref *prof_info_kref,
void ( * prof_info_release) (struct kref *kref))
{
ctx->prof_priv_kref = prof_info_kref;
ctx->prof_priv_release = prof_info_release;
}
EXPORT_SYMBOL_GPL(spu_set_profile_private_kref);
void *spu_get_profile_private_kref(struct spu_context *ctx)
{
return ctx->prof_priv_kref;
}
EXPORT_SYMBOL_GPL(spu_get_profile_private_kref);
......@@ -274,6 +274,7 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
ctx->spu = spu;
ctx->ops = &spu_hw_ops;
spu->pid = current->pid;
spu->tgid = current->tgid;
spu_associate_mm(spu, ctx->owner);
spu->ibox_callback = spufs_ibox_callback;
spu->wbox_callback = spufs_wbox_callback;
......@@ -456,6 +457,7 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
spu->dma_callback = NULL;
spu_associate_mm(spu, NULL);
spu->pid = 0;
spu->tgid = 0;
ctx->ops = &spu_backing_ops;
spu->flags = 0;
spu->ctx = NULL;
......
......@@ -85,6 +85,8 @@ struct spu_context {
struct list_head gang_list;
struct spu_gang *gang;
struct kref *prof_priv_kref;
void ( * prof_priv_release) (struct kref *kref);
/* owner thread */
pid_t tid;
......
......@@ -26,6 +26,7 @@
#include <linux/profile.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/oprofile.h>
#include <linux/sched.h>
#include "oprofile_stats.h"
......
......@@ -20,27 +20,9 @@ void free_event_buffer(void);
/* wake up the process sleeping on the event file */
void wake_up_buffer_waiter(void);
/* Each escaped entry is prefixed by ESCAPE_CODE
* then one of the following codes, then the
* relevant data.
*/
#define ESCAPE_CODE ~0UL
#define CTX_SWITCH_CODE 1
#define CPU_SWITCH_CODE 2
#define COOKIE_SWITCH_CODE 3
#define KERNEL_ENTER_SWITCH_CODE 4
#define KERNEL_EXIT_SWITCH_CODE 5
#define MODULE_LOADED_CODE 6
#define CTX_TGID_CODE 7
#define TRACE_BEGIN_CODE 8
#define TRACE_END_CODE 9
#define INVALID_COOKIE ~0UL
#define NO_COOKIE 0UL
/* add data to the event buffer */
void add_event_entry(unsigned long data);
extern const struct file_operations event_buffer_fops;
/* mutex between sync_cpu_buffers() and the
......
......@@ -53,9 +53,24 @@ int oprofile_setup(void)
* us missing task deaths and eventually oopsing
* when trying to process the event buffer.
*/
if (oprofile_ops.sync_start) {
int sync_ret = oprofile_ops.sync_start();
switch (sync_ret) {
case 0:
goto post_sync;
case 1:
goto do_generic;
case -1:
goto out3;
default:
goto out3;
}
}
do_generic:
if ((err = sync_start()))
goto out3;
post_sync:
is_setup = 1;
mutex_unlock(&start_mutex);
return 0;
......@@ -118,7 +133,20 @@ void oprofile_stop(void)
void oprofile_shutdown(void)
{
mutex_lock(&start_mutex);
if (oprofile_ops.sync_stop) {
int sync_ret = oprofile_ops.sync_stop();
switch (sync_ret) {
case 0:
goto post_sync;
case 1:
goto do_generic;
default:
goto post_sync;
}
}
do_generic:
sync_stop();
post_sync:
if (oprofile_ops.shutdown)
oprofile_ops.shutdown();
is_setup = 0;
......
......@@ -39,14 +39,16 @@ struct op_system_config {
/* Per-arch configuration */
struct op_powerpc_model {
void (*reg_setup) (struct op_counter_config *,
int (*reg_setup) (struct op_counter_config *,
struct op_system_config *,
int num_counters);
void (*cpu_setup) (struct op_counter_config *);
void (*start) (struct op_counter_config *);
void (*global_start) (struct op_counter_config *);
int (*cpu_setup) (struct op_counter_config *);
int (*start) (struct op_counter_config *);
int (*global_start) (struct op_counter_config *);
void (*stop) (void);
void (*global_stop) (void);
int (*sync_start)(void);
int (*sync_stop)(void);
void (*handle_interrupt) (struct pt_regs *,
struct op_counter_config *);
int num_counters;
......
......@@ -138,6 +138,7 @@ struct spu {
struct spu_runqueue *rq;
unsigned long long timestamp;
pid_t pid;
pid_t tgid;
int class_0_pending;
spinlock_t register_lock;
......@@ -217,6 +218,20 @@ extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm);
struct mm_struct;
extern void spu_flush_all_slbs(struct mm_struct *mm);
/* This interface allows a profiler (e.g., OProfile) to store a ref
* to spu context information that it creates. This caching technique
* avoids the need to recreate this information after a save/restore operation.
*
* Assumes the caller has already incremented the ref count to
* profile_info; then spu_context_destroy must call kref_put
* on prof_info_kref.
*/
void spu_set_profile_private_kref(struct spu_context *ctx,
struct kref *prof_info_kref,
void ( * prof_info_release) (struct kref *kref));
void *spu_get_profile_private_kref(struct spu_context *ctx);
/* system callbacks from the SPU */
struct spu_syscall_block {
u64 nr_ret;
......
......@@ -12,6 +12,7 @@
#ifdef CONFIG_PROFILING
#include <linux/dcache.h>
#include <linux/types.h>
struct dcookie_user;
......
......@@ -21,6 +21,7 @@
#define EM_SPARC32PLUS 18 /* Sun's "v8plus" */
#define EM_PPC 20 /* PowerPC */
#define EM_PPC64 21 /* PowerPC64 */
#define EM_SPU 23 /* Cell BE SPU */
#define EM_SH 42 /* SuperH */
#define EM_SPARCV9 43 /* SPARC v9 64-bit */
#define EM_IA_64 50 /* HP/Intel IA-64 */
......
......@@ -17,6 +17,26 @@
#include <linux/spinlock.h>
#include <asm/atomic.h>
/* Each escaped entry is prefixed by ESCAPE_CODE
* then one of the following codes, then the
* relevant data.
* These #defines live in this file so that arch-specific
* buffer sync'ing code can access them.
*/
#define ESCAPE_CODE ~0UL
#define CTX_SWITCH_CODE 1
#define CPU_SWITCH_CODE 2
#define COOKIE_SWITCH_CODE 3
#define KERNEL_ENTER_SWITCH_CODE 4
#define KERNEL_EXIT_SWITCH_CODE 5
#define MODULE_LOADED_CODE 6
#define CTX_TGID_CODE 7
#define TRACE_BEGIN_CODE 8
#define TRACE_END_CODE 9
#define XEN_ENTER_SWITCH_CODE 10
#define SPU_PROFILING_CODE 11
#define SPU_CTX_SWITCH_CODE 12
struct super_block;
struct dentry;
struct file_operations;
......@@ -35,6 +55,14 @@ struct oprofile_operations {
int (*start)(void);
/* Stop delivering interrupts. */
void (*stop)(void);
/* Arch-specific buffer sync functions.
* Return value = 0: Success
* Return value = -1: Failure
* Return value = 1: Run generic sync function
*/
int (*sync_start)(void);
int (*sync_stop)(void);
/* Initiate a stack backtrace. Optional. */
void (*backtrace)(struct pt_regs * const regs, unsigned int depth);
/* CPU identification string. */
......@@ -55,6 +83,13 @@ int oprofile_arch_init(struct oprofile_operations * ops);
*/
void oprofile_arch_exit(void);
/**
* Add data to the event buffer.
* The data passed is free-form, but typically consists of
* file offsets, dcookies, context information, and ESCAPE codes.
*/
void add_event_entry(unsigned long data);
/**
* Add a sample. This may be called from any context. Pass
* smp_processor_id() as cpu.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册