提交 ea714547 编写于 作者: J Jiri Olsa 提交者: Ingo Molnar

x86: Separate out entry text section

Put x86 entry code into a separate link section: .entry.text.

Separating the entry text section seems to have performance
benefits - caused by more efficient instruction cache usage.

Running hackbench with perf stat --repeat showed that the change
compresses the icache footprint. The icache load miss rate went
down by about 15%:

 before patch:
         19417627  L1-icache-load-misses      ( +-   0.147% )

 after patch:
         16490788  L1-icache-load-misses      ( +-   0.180% )

The motivation of the patch was to fix a particular kprobes
bug that relates to the entry text section, the performance
advantage was discovered accidentally.

Whole perf output follows:

 - results for current tip tree:

  Performance counter stats for './hackbench/hackbench 10' (500 runs):

         19417627  L1-icache-load-misses      ( +-   0.147% )
       2676914223  instructions             #      0.497 IPC     ( +- 0.079% )
       5389516026  cycles                     ( +-   0.144% )

      0.206267711  seconds time elapsed   ( +-   0.138% )

 - results for current tip tree with the patch applied:

  Performance counter stats for './hackbench/hackbench 10' (500 runs):

         16490788  L1-icache-load-misses      ( +-   0.180% )
       2717734941  instructions             #      0.502 IPC     ( +- 0.079% )
       5414756975  cycles                     ( +-   0.148% )

      0.206747566  seconds time elapsed   ( +-   0.137% )
Signed-off-by: NJiri Olsa <jolsa@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: masami.hiramatsu.pt@hitachi.com
Cc: ananth@in.ibm.com
Cc: davem@davemloft.net
Cc: 2nddept-manager@sdl.hitachi.co.jp
LKML-Reference: <20110307181039.GB15197@jolsa.redhat.com>
Signed-off-by: NIngo Molnar <mingo@elte.hu>
上级 86cb2ec7
...@@ -25,6 +25,8 @@ ...@@ -25,6 +25,8 @@
#define sysretl_audit ia32_ret_from_sys_call #define sysretl_audit ia32_ret_from_sys_call
#endif #endif
.section .entry.text, "ax"
#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
.macro IA32_ARG_FIXUP noebp=0 .macro IA32_ARG_FIXUP noebp=0
......
...@@ -65,6 +65,8 @@ ...@@ -65,6 +65,8 @@
#define sysexit_audit syscall_exit_work #define sysexit_audit syscall_exit_work
#endif #endif
.section .entry.text, "ax"
/* /*
* We use macros for low-level operations which need to be overridden * We use macros for low-level operations which need to be overridden
* for paravirtualization. The following will never clobber any registers: * for paravirtualization. The following will never clobber any registers:
...@@ -788,7 +790,7 @@ ENDPROC(ptregs_clone) ...@@ -788,7 +790,7 @@ ENDPROC(ptregs_clone)
*/ */
.section .init.rodata,"a" .section .init.rodata,"a"
ENTRY(interrupt) ENTRY(interrupt)
.text .section .entry.text, "ax"
.p2align 5 .p2align 5
.p2align CONFIG_X86_L1_CACHE_SHIFT .p2align CONFIG_X86_L1_CACHE_SHIFT
ENTRY(irq_entries_start) ENTRY(irq_entries_start)
...@@ -807,7 +809,7 @@ vector=FIRST_EXTERNAL_VECTOR ...@@ -807,7 +809,7 @@ vector=FIRST_EXTERNAL_VECTOR
.endif .endif
.previous .previous
.long 1b .long 1b
.text .section .entry.text, "ax"
vector=vector+1 vector=vector+1
.endif .endif
.endr .endr
......
...@@ -61,6 +61,8 @@ ...@@ -61,6 +61,8 @@
#define __AUDIT_ARCH_LE 0x40000000 #define __AUDIT_ARCH_LE 0x40000000
.code64 .code64
.section .entry.text, "ax"
#ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_FUNCTION_TRACER
#ifdef CONFIG_DYNAMIC_FTRACE #ifdef CONFIG_DYNAMIC_FTRACE
ENTRY(mcount) ENTRY(mcount)
...@@ -744,7 +746,7 @@ END(stub_rt_sigreturn) ...@@ -744,7 +746,7 @@ END(stub_rt_sigreturn)
*/ */
.section .init.rodata,"a" .section .init.rodata,"a"
ENTRY(interrupt) ENTRY(interrupt)
.text .section .entry.text
.p2align 5 .p2align 5
.p2align CONFIG_X86_L1_CACHE_SHIFT .p2align CONFIG_X86_L1_CACHE_SHIFT
ENTRY(irq_entries_start) ENTRY(irq_entries_start)
...@@ -763,7 +765,7 @@ vector=FIRST_EXTERNAL_VECTOR ...@@ -763,7 +765,7 @@ vector=FIRST_EXTERNAL_VECTOR
.endif .endif
.previous .previous
.quad 1b .quad 1b
.text .section .entry.text
vector=vector+1 vector=vector+1
.endif .endif
.endr .endr
......
...@@ -105,6 +105,7 @@ SECTIONS ...@@ -105,6 +105,7 @@ SECTIONS
SCHED_TEXT SCHED_TEXT
LOCK_TEXT LOCK_TEXT
KPROBES_TEXT KPROBES_TEXT
ENTRY_TEXT
IRQENTRY_TEXT IRQENTRY_TEXT
*(.fixup) *(.fixup)
*(.gnu.warning) *(.gnu.warning)
......
...@@ -11,6 +11,7 @@ extern char _sinittext[], _einittext[]; ...@@ -11,6 +11,7 @@ extern char _sinittext[], _einittext[];
extern char _end[]; extern char _end[];
extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
extern char __kprobes_text_start[], __kprobes_text_end[]; extern char __kprobes_text_start[], __kprobes_text_end[];
extern char __entry_text_start[], __entry_text_end[];
extern char __initdata_begin[], __initdata_end[]; extern char __initdata_begin[], __initdata_end[];
extern char __start_rodata[], __end_rodata[]; extern char __start_rodata[], __end_rodata[];
......
...@@ -424,6 +424,12 @@ ...@@ -424,6 +424,12 @@
*(.kprobes.text) \ *(.kprobes.text) \
VMLINUX_SYMBOL(__kprobes_text_end) = .; VMLINUX_SYMBOL(__kprobes_text_end) = .;
#define ENTRY_TEXT \
ALIGN_FUNCTION(); \
VMLINUX_SYMBOL(__entry_text_start) = .; \
*(.entry.text) \
VMLINUX_SYMBOL(__entry_text_end) = .;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER #ifdef CONFIG_FUNCTION_GRAPH_TRACER
#define IRQENTRY_TEXT \ #define IRQENTRY_TEXT \
ALIGN_FUNCTION(); \ ALIGN_FUNCTION(); \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册