提交 c2daa3be 编写于 作者: P Peter Zijlstra 提交者: Ingo Molnar

sched, x86: Provide a per-cpu preempt_count implementation

Convert x86 to use a per-cpu preemption count. The reason for doing so
is that accessing per-cpu variables is a lot cheaper than accessing
thread_info variables.

We still need to save/restore the actual preemption count due to
PREEMPT_ACTIVE so we place the per-cpu __preempt_count variable in the
same cache-line as the other hot __switch_to() variables such as
current_task.

NOTE: this save/restore is required even for !PREEMPT kernels as
cond_resched() also relies on preempt_count's PREEMPT_ACTIVE to ignore
task_struct::state.

Also rename thread_info::preempt_count to ensure nobody is
'accidentally' still poking at it.
Suggested-by: NLinus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: NPeter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-gzn5rfsf8trgjoqx8hyayy3q@git.kernel.orgSigned-off-by: NIngo Molnar <mingo@kernel.org>
上级 a233f112
...@@ -5,4 +5,3 @@ genhdr-y += unistd_64.h ...@@ -5,4 +5,3 @@ genhdr-y += unistd_64.h
genhdr-y += unistd_x32.h genhdr-y += unistd_x32.h
generic-y += clkdev.h generic-y += clkdev.h
generic-y += preempt.h
#ifndef __ASM_PREEMPT_H
#define __ASM_PREEMPT_H
#include <asm/rmwcc.h>
#include <asm/percpu.h>
#include <linux/thread_info.h>
DECLARE_PER_CPU(int, __preempt_count);
/*
* We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
* that think a non-zero value indicates we cannot preempt.
*/
static __always_inline int preempt_count(void)
{
return __this_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED;
}
static __always_inline void preempt_count_set(int pc)
{
__this_cpu_write_4(__preempt_count, pc);
}
/*
* must be macros to avoid header recursion hell
*/
#define task_preempt_count(p) \
(task_thread_info(p)->saved_preempt_count & ~PREEMPT_NEED_RESCHED)
#define init_task_preempt_count(p) do { \
task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \
} while (0)
#define init_idle_preempt_count(p, cpu) do { \
task_thread_info(p)->saved_preempt_count = PREEMPT_ENABLED; \
per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \
} while (0)
/*
* We fold the NEED_RESCHED bit into the preempt count such that
* preempt_enable() can decrement and test for needing to reschedule with a
* single instruction.
*
* We invert the actual bit, so that when the decrement hits 0 we know we both
* need to resched (the bit is cleared) and can resched (no preempt count).
*/
static __always_inline void set_preempt_need_resched(void)
{
__this_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);
}
static __always_inline void clear_preempt_need_resched(void)
{
__this_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED);
}
static __always_inline bool test_preempt_need_resched(void)
{
return !(__this_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED);
}
/*
* The various preempt_count add/sub methods
*/
static __always_inline void __preempt_count_add(int val)
{
__this_cpu_add_4(__preempt_count, val);
}
static __always_inline void __preempt_count_sub(int val)
{
__this_cpu_add_4(__preempt_count, -val);
}
static __always_inline bool __preempt_count_dec_and_test(void)
{
GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
}
/*
* Returns true when we need to resched -- even if we can not.
*/
static __always_inline bool need_resched(void)
{
return unlikely(test_preempt_need_resched());
}
/*
* Returns true when we need to resched and can (barring IRQ state).
*/
static __always_inline bool should_resched(void)
{
return unlikely(!__this_cpu_read_4(__preempt_count));
}
#endif /* __ASM_PREEMPT_H */
...@@ -28,8 +28,7 @@ struct thread_info { ...@@ -28,8 +28,7 @@ struct thread_info {
__u32 flags; /* low level flags */ __u32 flags; /* low level flags */
__u32 status; /* thread synchronous flags */ __u32 status; /* thread synchronous flags */
__u32 cpu; /* current CPU */ __u32 cpu; /* current CPU */
int preempt_count; /* 0 => preemptable, int saved_preempt_count;
<0 => BUG */
mm_segment_t addr_limit; mm_segment_t addr_limit;
struct restart_block restart_block; struct restart_block restart_block;
void __user *sysenter_return; void __user *sysenter_return;
...@@ -49,7 +48,7 @@ struct thread_info { ...@@ -49,7 +48,7 @@ struct thread_info {
.exec_domain = &default_exec_domain, \ .exec_domain = &default_exec_domain, \
.flags = 0, \ .flags = 0, \
.cpu = 0, \ .cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \ .saved_preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \ .addr_limit = KERNEL_DS, \
.restart_block = { \ .restart_block = { \
.fn = do_no_restart_syscall, \ .fn = do_no_restart_syscall, \
......
...@@ -32,7 +32,6 @@ void common(void) { ...@@ -32,7 +32,6 @@ void common(void) {
OFFSET(TI_flags, thread_info, flags); OFFSET(TI_flags, thread_info, flags);
OFFSET(TI_status, thread_info, status); OFFSET(TI_status, thread_info, status);
OFFSET(TI_addr_limit, thread_info, addr_limit); OFFSET(TI_addr_limit, thread_info, addr_limit);
OFFSET(TI_preempt_count, thread_info, preempt_count);
BLANK(); BLANK();
OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
......
...@@ -1095,6 +1095,9 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) = ...@@ -1095,6 +1095,9 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) =
DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);
DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
/* /*
...@@ -1169,6 +1172,8 @@ void debug_stack_reset(void) ...@@ -1169,6 +1172,8 @@ void debug_stack_reset(void)
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task); EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);
DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
#ifdef CONFIG_CC_STACKPROTECTOR #ifdef CONFIG_CC_STACKPROTECTOR
......
...@@ -362,12 +362,9 @@ END(ret_from_exception) ...@@ -362,12 +362,9 @@ END(ret_from_exception)
#ifdef CONFIG_PREEMPT #ifdef CONFIG_PREEMPT
ENTRY(resume_kernel) ENTRY(resume_kernel)
DISABLE_INTERRUPTS(CLBR_ANY) DISABLE_INTERRUPTS(CLBR_ANY)
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
jnz restore_all
need_resched: need_resched:
movl TI_flags(%ebp), %ecx # need_resched set ? cmpl $0,PER_CPU_VAR(__preempt_count)
testb $_TIF_NEED_RESCHED, %cl jnz restore_all
jz restore_all
testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
jz restore_all jz restore_all
call preempt_schedule_irq call preempt_schedule_irq
......
...@@ -1118,10 +1118,8 @@ retint_signal: ...@@ -1118,10 +1118,8 @@ retint_signal:
/* Returning to kernel space. Check if we need preemption */ /* Returning to kernel space. Check if we need preemption */
/* rcx: threadinfo. interrupts off. */ /* rcx: threadinfo. interrupts off. */
ENTRY(retint_kernel) ENTRY(retint_kernel)
cmpl $0,TI_preempt_count(%rcx) cmpl $0,PER_CPU_VAR(__preempt_count)
jnz retint_restore_args jnz retint_restore_args
bt $TIF_NEED_RESCHED,TI_flags(%rcx)
jnc retint_restore_args
bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
jnc retint_restore_args jnc retint_restore_args
call preempt_schedule_irq call preempt_schedule_irq
......
...@@ -100,9 +100,6 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) ...@@ -100,9 +100,6 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
irqctx->tinfo.task = curctx->tinfo.task; irqctx->tinfo.task = curctx->tinfo.task;
irqctx->tinfo.previous_esp = current_stack_pointer; irqctx->tinfo.previous_esp = current_stack_pointer;
/* Copy the preempt_count so that the [soft]irq checks work. */
irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count;
if (unlikely(overflow)) if (unlikely(overflow))
call_on_stack(print_stack_overflow, isp); call_on_stack(print_stack_overflow, isp);
...@@ -131,7 +128,6 @@ void irq_ctx_init(int cpu) ...@@ -131,7 +128,6 @@ void irq_ctx_init(int cpu)
THREAD_SIZE_ORDER)); THREAD_SIZE_ORDER));
memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); memset(&irqctx->tinfo, 0, sizeof(struct thread_info));
irqctx->tinfo.cpu = cpu; irqctx->tinfo.cpu = cpu;
irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
per_cpu(hardirq_ctx, cpu) = irqctx; per_cpu(hardirq_ctx, cpu) = irqctx;
......
...@@ -291,6 +291,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -291,6 +291,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
set_iopl_mask(next->iopl); set_iopl_mask(next->iopl);
/*
* If it were not for PREEMPT_ACTIVE we could guarantee that the
* preempt_count of all tasks was equal here and this would not be
* needed.
*/
task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
/* /*
* Now maybe handle debug registers and/or IO bitmaps * Now maybe handle debug registers and/or IO bitmaps
*/ */
......
...@@ -363,6 +363,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -363,6 +363,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
this_cpu_write(old_rsp, next->usersp); this_cpu_write(old_rsp, next->usersp);
this_cpu_write(current_task, next_p); this_cpu_write(current_task, next_p);
/*
* If it were not for PREEMPT_ACTIVE we could guarantee that the
* preempt_count of all tasks was equal here and this would not be
* needed.
*/
task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
this_cpu_write(kernel_stack, this_cpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) + (unsigned long)task_stack_page(next_p) +
THREAD_SIZE - KERNEL_STACK_OFFSET); THREAD_SIZE - KERNEL_STACK_OFFSET);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册