提交 9d8e2277 编写于 作者: J Jan Beulich 提交者: Ingo Molnar

x86-64: Handle byte-wise tail copying in memcpy() without a loop

While hard to measure, reducing the number of possibly/likely
mis-predicted branches can generally be expected to be slightly
better.

Other than apparent at the first glance, this also doesn't grow
the function size (the alignment gap to the next function just
gets smaller).
Signed-off-by: NJan Beulich <jbeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/4F218584020000780006F422@nat28.tlf.novell.comSigned-off-by: NIngo Molnar <mingo@elte.hu>
上级 2ab56091
......@@ -164,18 +164,19 @@ ENTRY(memcpy)
retq
.p2align 4
.Lless_3bytes:
cmpl $0, %edx
je .Lend
subl $1, %edx
jb .Lend
/*
* Move data from 1 bytes to 3 bytes.
*/
.Lloop_1:
movb (%rsi), %r8b
movb %r8b, (%rdi)
incq %rdi
incq %rsi
decl %edx
jnz .Lloop_1
movzbl (%rsi), %ecx
jz .Lstore_1byte
movzbq 1(%rsi), %r8
movzbq (%rsi, %rdx), %r9
movb %r8b, 1(%rdi)
movb %r9b, (%rdi, %rdx)
.Lstore_1byte:
movb %cl, (%rdi)
.Lend:
retq
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册