提交 6a306e8b 编写于 作者: L Linus Torvalds

Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6

* 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6:
  [IA64] Fix large MCA bootmem allocation
  [IA64] Simplify cpu_idle_wait
  [IA64] Synchronize RBS on PTRACE_ATTACH
  [IA64] Synchronize kernel RSE to user-space and back
  [IA64] Rename TIF_PERFMON_WORK back to TIF_NOTIFY_RESUME
  [IA64] Wire up timerfd_{create,settime,gettime} syscalls
......@@ -1573,7 +1573,7 @@ sys_call_table:
data8 sys_fchmodat
data8 sys_faccessat
data8 sys_pselect6
data8 sys_ppoll
data8 sys_ppoll // 1295
data8 sys_unshare
data8 sys_splice
data8 sys_set_robust_list
......@@ -1588,5 +1588,8 @@ sys_call_table:
data8 sys_signalfd
data8 sys_ni_syscall
data8 sys_eventfd
data8 sys_timerfd_create // 1310
data8 sys_timerfd_settime
data8 sys_timerfd_gettime
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
......@@ -17,7 +17,7 @@
* Copyright (C) 2000 Intel
* Copyright (C) Chuck Fleckenstein <cfleck@co.intel.com>
*
* Copyright (C) 1999, 2004 Silicon Graphics, Inc.
* Copyright (C) 1999, 2004-2008 Silicon Graphics, Inc.
* Copyright (C) Vijay Chander <vijay@engr.sgi.com>
*
* Copyright (C) 2006 FUJITSU LIMITED
......@@ -1762,11 +1762,8 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
/* Caller prevents this from being called after init */
static void * __init_refok mca_bootmem(void)
{
void *p;
p = alloc_bootmem(sizeof(struct ia64_mca_cpu) * NR_CPUS +
KERNEL_STACK_SIZE);
return (void *)ALIGN((unsigned long)p, KERNEL_STACK_SIZE);
return __alloc_bootmem(sizeof(struct ia64_mca_cpu),
KERNEL_STACK_SIZE, 0);
}
/* Do per-CPU MCA-related initialization. */
......@@ -1774,33 +1771,33 @@ void __cpuinit
ia64_mca_cpu_init(void *cpu_data)
{
void *pal_vaddr;
void *data;
long sz = sizeof(struct ia64_mca_cpu);
int cpu = smp_processor_id();
static int first_time = 1;
if (first_time) {
void *mca_data;
int cpu;
first_time = 0;
mca_data = mca_bootmem();
for (cpu = 0; cpu < NR_CPUS; cpu++) {
format_mca_init_stack(mca_data,
offsetof(struct ia64_mca_cpu, mca_stack),
"MCA", cpu);
format_mca_init_stack(mca_data,
offsetof(struct ia64_mca_cpu, init_stack),
"INIT", cpu);
__per_cpu_mca[cpu] = __pa(mca_data);
mca_data += sizeof(struct ia64_mca_cpu);
}
}
/*
* The MCA info structure was allocated earlier and its
* physical address saved in __per_cpu_mca[cpu]. Copy that
* address * to ia64_mca_data so we can access it as a per-CPU
* variable.
* Structure will already be allocated if cpu has been online,
* then offlined.
*/
__get_cpu_var(ia64_mca_data) = __per_cpu_mca[smp_processor_id()];
if (__per_cpu_mca[cpu]) {
data = __va(__per_cpu_mca[cpu]);
} else {
if (first_time) {
data = mca_bootmem();
first_time = 0;
} else
data = page_address(alloc_pages_node(numa_node_id(),
GFP_KERNEL, get_order(sz)));
if (!data)
panic("Could not allocate MCA memory for cpu %d\n",
cpu);
}
format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, mca_stack),
"MCA", cpu);
format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, init_stack),
"INIT", cpu);
__get_cpu_var(ia64_mca_data) = __per_cpu_mca[cpu] = __pa(data);
/*
* Stash away a copy of the PTE needed to map the per-CPU page.
......
......@@ -585,21 +585,6 @@ pfm_put_task(struct task_struct *task)
if (task != current) put_task_struct(task);
}
static inline void
pfm_set_task_notify(struct task_struct *task)
{
struct thread_info *info;
info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE);
set_bit(TIF_PERFMON_WORK, &info->flags);
}
static inline void
pfm_clear_task_notify(void)
{
clear_thread_flag(TIF_PERFMON_WORK);
}
static inline void
pfm_reserve_page(unsigned long a)
{
......@@ -3724,7 +3709,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
PFM_SET_WORK_PENDING(task, 1);
pfm_set_task_notify(task);
tsk_set_notify_resume(task);
/*
* XXX: send reschedule if task runs on another CPU
......@@ -5082,7 +5067,7 @@ pfm_handle_work(void)
PFM_SET_WORK_PENDING(current, 0);
pfm_clear_task_notify();
tsk_clear_notify_resume(current);
regs = task_pt_regs(current);
......@@ -5450,7 +5435,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
* when coming from ctxsw, current still points to the
* previous task, therefore we must work with task and not current.
*/
pfm_set_task_notify(task);
tsk_set_notify_resume(task);
}
/*
* defer until state is changed (shorten spin window). the context is locked
......
......@@ -52,7 +52,6 @@
#include "sigframe.h"
void (*ia64_mark_idle)(int);
static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
unsigned long boot_option_idle_override = 0;
EXPORT_SYMBOL(boot_option_idle_override);
......@@ -157,6 +156,17 @@ show_regs (struct pt_regs *regs)
show_stack(NULL, NULL);
}
void tsk_clear_notify_resume(struct task_struct *tsk)
{
#ifdef CONFIG_PERFMON
if (tsk->thread.pfm_needs_checking)
return;
#endif
if (test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_RSE))
return;
clear_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME);
}
void
do_notify_resume_user (sigset_t *unused, struct sigscratch *scr, long in_syscall)
{
......@@ -175,6 +185,10 @@ do_notify_resume_user (sigset_t *unused, struct sigscratch *scr, long in_syscall
/* deal with pending signal delivery */
if (test_thread_flag(TIF_SIGPENDING)||test_thread_flag(TIF_RESTORE_SIGMASK))
ia64_do_signal(scr, in_syscall);
/* copy user rbs to kernel rbs */
if (unlikely(test_thread_flag(TIF_RESTORE_RSE)))
ia64_sync_krbs();
}
static int pal_halt = 1;
......@@ -239,33 +253,23 @@ static inline void play_dead(void)
}
#endif /* CONFIG_HOTPLUG_CPU */
void cpu_idle_wait(void)
static void do_nothing(void *unused)
{
unsigned int cpu, this_cpu = get_cpu();
cpumask_t map;
cpumask_t tmp = current->cpus_allowed;
set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
put_cpu();
cpus_clear(map);
for_each_online_cpu(cpu) {
per_cpu(cpu_idle_state, cpu) = 1;
cpu_set(cpu, map);
}
__get_cpu_var(cpu_idle_state) = 0;
}
wmb();
do {
ssleep(1);
for_each_online_cpu(cpu) {
if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
cpu_clear(cpu, map);
}
cpus_and(map, map, cpu_online_map);
} while (!cpus_empty(map));
set_cpus_allowed(current, tmp);
/*
* cpu_idle_wait - Used to ensure that all the CPUs discard old value of
* pm_idle and update to new pm_idle value. Required while changing pm_idle
* handler on SMP systems.
*
* Caller must have changed pm_idle to the new value before the call. Old
* pm_idle value will not be used by any CPU after the return of this function.
*/
void cpu_idle_wait(void)
{
smp_mb();
/* kick all the CPUs so that they exit out of pm_idle */
smp_call_function(do_nothing, NULL, 0, 1);
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);
......@@ -293,9 +297,6 @@ cpu_idle (void)
#ifdef CONFIG_SMP
min_xtp();
#endif
if (__get_cpu_var(cpu_idle_state))
__get_cpu_var(cpu_idle_state) = 0;
rmb();
if (mark_idle)
(*mark_idle)(1);
......
......@@ -547,6 +547,129 @@ ia64_sync_user_rbs (struct task_struct *child, struct switch_stack *sw,
return 0;
}
static long
ia64_sync_kernel_rbs (struct task_struct *child, struct switch_stack *sw,
unsigned long user_rbs_start, unsigned long user_rbs_end)
{
unsigned long addr, val;
long ret;
/* now copy word for word from user rbs to kernel rbs: */
for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) {
if (access_process_vm(child, addr, &val, sizeof(val), 0)
!= sizeof(val))
return -EIO;
ret = ia64_poke(child, sw, user_rbs_end, addr, val);
if (ret < 0)
return ret;
}
return 0;
}
typedef long (*syncfunc_t)(struct task_struct *, struct switch_stack *,
unsigned long, unsigned long);
static void do_sync_rbs(struct unw_frame_info *info, void *arg)
{
struct pt_regs *pt;
unsigned long urbs_end;
syncfunc_t fn = arg;
if (unw_unwind_to_user(info) < 0)
return;
pt = task_pt_regs(info->task);
urbs_end = ia64_get_user_rbs_end(info->task, pt, NULL);
fn(info->task, info->sw, pt->ar_bspstore, urbs_end);
}
/*
* when a thread is stopped (ptraced), debugger might change thread's user
* stack (change memory directly), and we must avoid the RSE stored in kernel
* to override user stack (user space's RSE is newer than kernel's in the
* case). To workaround the issue, we copy kernel RSE to user RSE before the
* task is stopped, so user RSE has updated data. we then copy user RSE to
* kernel after the task is resummed from traced stop and kernel will use the
* newer RSE to return to user. TIF_RESTORE_RSE is the flag to indicate we need
* synchronize user RSE to kernel.
*/
void ia64_ptrace_stop(void)
{
if (test_and_set_tsk_thread_flag(current, TIF_RESTORE_RSE))
return;
tsk_set_notify_resume(current);
unw_init_running(do_sync_rbs, ia64_sync_user_rbs);
}
/*
* This is called to read back the register backing store.
*/
void ia64_sync_krbs(void)
{
clear_tsk_thread_flag(current, TIF_RESTORE_RSE);
tsk_clear_notify_resume(current);
unw_init_running(do_sync_rbs, ia64_sync_kernel_rbs);
}
/*
* After PTRACE_ATTACH, a thread's register backing store area in user
* space is assumed to contain correct data whenever the thread is
* stopped. arch_ptrace_stop takes care of this on tracing stops.
* But if the child was already stopped for job control when we attach
* to it, then it might not ever get into ptrace_stop by the time we
* want to examine the user memory containing the RBS.
*/
void
ptrace_attach_sync_user_rbs (struct task_struct *child)
{
int stopped = 0;
struct unw_frame_info info;
/*
* If the child is in TASK_STOPPED, we need to change that to
* TASK_TRACED momentarily while we operate on it. This ensures
* that the child won't be woken up and return to user mode while
* we are doing the sync. (It can only be woken up for SIGKILL.)
*/
read_lock(&tasklist_lock);
if (child->signal) {
spin_lock_irq(&child->sighand->siglock);
if (child->state == TASK_STOPPED &&
!test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) {
tsk_set_notify_resume(child);
child->state = TASK_TRACED;
stopped = 1;
}
spin_unlock_irq(&child->sighand->siglock);
}
read_unlock(&tasklist_lock);
if (!stopped)
return;
unw_init_from_blocked_task(&info, child);
do_sync_rbs(&info, ia64_sync_user_rbs);
/*
* Now move the child back into TASK_STOPPED if it should be in a
* job control stop, so that SIGCONT can be used to wake it up.
*/
read_lock(&tasklist_lock);
if (child->signal) {
spin_lock_irq(&child->sighand->siglock);
if (child->state == TASK_TRACED &&
(child->signal->flags & SIGNAL_STOP_STOPPED)) {
child->state = TASK_STOPPED;
}
spin_unlock_irq(&child->sighand->siglock);
}
read_unlock(&tasklist_lock);
}
static inline int
thread_matches (struct task_struct *thread, unsigned long addr)
{
......@@ -1422,6 +1545,7 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data)
struct task_struct *child;
struct switch_stack *sw;
long ret;
struct unw_frame_info info;
lock_kernel();
ret = -EPERM;
......@@ -1453,6 +1577,8 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data)
if (request == PTRACE_ATTACH) {
ret = ptrace_attach(child);
if (!ret)
arch_ptrace_attach(child);
goto out_tsk;
}
......@@ -1481,6 +1607,11 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data)
/* write the word at location addr */
urbs_end = ia64_get_user_rbs_end(child, pt, NULL);
ret = ia64_poke(child, sw, urbs_end, addr, data);
/* Make sure user RBS has the latest data */
unw_init_from_blocked_task(&info, child);
do_sync_rbs(&info, ia64_sync_user_rbs);
goto out_tsk;
case PTRACE_PEEKUSR:
......@@ -1634,6 +1765,10 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3,
&& (current->ptrace & PT_PTRACED))
syscall_trace();
/* copy user rbs to kernel rbs */
if (test_thread_flag(TIF_RESTORE_RSE))
ia64_sync_krbs();
if (unlikely(current->audit_context)) {
long syscall;
int arch;
......@@ -1671,4 +1806,8 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
|| test_thread_flag(TIF_SINGLESTEP))
&& (current->ptrace & PT_PTRACED))
syscall_trace();
/* copy user rbs to kernel rbs */
if (test_thread_flag(TIF_RESTORE_RSE))
ia64_sync_krbs();
}
......@@ -292,6 +292,7 @@ struct switch_stack {
unsigned long, long);
extern void ia64_flush_fph (struct task_struct *);
extern void ia64_sync_fph (struct task_struct *);
extern void ia64_sync_krbs(void);
extern long ia64_sync_user_rbs (struct task_struct *, struct switch_stack *,
unsigned long, unsigned long);
......@@ -303,6 +304,16 @@ struct switch_stack {
extern void ia64_increment_ip (struct pt_regs *pt);
extern void ia64_decrement_ip (struct pt_regs *pt);
extern void ia64_ptrace_stop(void);
#define arch_ptrace_stop(code, info) \
ia64_ptrace_stop()
#define arch_ptrace_stop_needed(code, info) \
(!test_thread_flag(TIF_RESTORE_RSE))
extern void ptrace_attach_sync_user_rbs (struct task_struct *);
#define arch_ptrace_attach(child) \
ptrace_attach_sync_user_rbs(child)
#endif /* !__KERNEL__ */
/* pt_all_user_regs is used for PTRACE_GETREGS PTRACE_SETREGS */
......
......@@ -71,6 +71,9 @@ struct thread_info {
#define alloc_task_struct() ((struct task_struct *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER))
#define free_task_struct(tsk) free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER)
#define tsk_set_notify_resume(tsk) \
set_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME)
extern void tsk_clear_notify_resume(struct task_struct *tsk);
#endif /* !__ASSEMBLY */
/*
......@@ -85,28 +88,30 @@ struct thread_info {
#define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */
#define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */
#define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */
#define TIF_PERFMON_WORK 6 /* work for pfm_handle_work() */
#define TIF_NOTIFY_RESUME 6 /* resumption notification requested */
#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
#define TIF_MEMDIE 17
#define TIF_MCA_INIT 18 /* this task is processing MCA or INIT */
#define TIF_DB_DISABLED 19 /* debug trap disabled for fsyscall */
#define TIF_FREEZE 20 /* is freezing for suspend */
#define TIF_RESTORE_RSE 21 /* user RBS is newer than kernel RBS */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_SYSCALL_TRACEAUDIT (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP)
#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
#define _TIF_PERFMON_WORK (1 << TIF_PERFMON_WORK)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_MCA_INIT (1 << TIF_MCA_INIT)
#define _TIF_DB_DISABLED (1 << TIF_DB_DISABLED)
#define _TIF_FREEZE (1 << TIF_FREEZE)
#define _TIF_RESTORE_RSE (1 << TIF_RESTORE_RSE)
/* "work to do on user-return" bits */
#define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_PERFMON_WORK|_TIF_SYSCALL_AUDIT|\
#define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\
_TIF_NEED_RESCHED| _TIF_SYSCALL_TRACE|\
_TIF_RESTORE_SIGMASK)
/* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */
......
......@@ -299,11 +299,14 @@
#define __NR_signalfd 1307
#define __NR_timerfd 1308
#define __NR_eventfd 1309
#define __NR_timerfd_create 1310
#define __NR_timerfd_settime 1311
#define __NR_timerfd_gettime 1312
#ifdef __KERNEL__
#define NR_syscalls 286 /* length of syscall table */
#define NR_syscalls 289 /* length of syscall table */
/*
* The following defines stop scripts/checksyscalls.sh from complaining about
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册