提交 ee28ef86 编写于 作者: K kvn

6708714: Optimize long LShift on 32-bits x86

Summary: For small (1-3 bits) left long shifts in 32-bits VM use sets of add+addc instructions instead of shld+shl on new AMD cpus.
Reviewed-by: never
Contributed-by: shrinivas.joshi@amd.com
上级 e4ccd055
...@@ -307,6 +307,10 @@ void VM_Version::get_processor_features() { ...@@ -307,6 +307,10 @@ void VM_Version::get_processor_features() {
// Use it on new AMD cpus starting from Opteron. // Use it on new AMD cpus starting from Opteron.
UseAddressNop = true; UseAddressNop = true;
} }
if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
// Use it on new AMD cpus starting from Opteron.
UseNewLongLShift = true;
}
if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
if( supports_sse4a() ) { if( supports_sse4a() ) {
UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
......
...@@ -4754,6 +4754,33 @@ operand immI_32_63() %{ ...@@ -4754,6 +4754,33 @@ operand immI_32_63() %{
interface(CONST_INTER); interface(CONST_INTER);
%} %}
operand immI_1() %{
predicate( n->get_int() == 1 );
match(ConI);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}
operand immI_2() %{
predicate( n->get_int() == 2 );
match(ConI);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}
operand immI_3() %{
predicate( n->get_int() == 3 );
match(ConI);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}
// Pointer Immediate // Pointer Immediate
operand immP() %{ operand immP() %{
match(ConP); match(ConP);
...@@ -8943,6 +8970,63 @@ instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ ...@@ -8943,6 +8970,63 @@ instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
ins_pipe( ialu_reg_long_mem ); ins_pipe( ialu_reg_long_mem );
%} %}
// Shift Left Long by 1
instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
predicate(UseNewLongLShift);
match(Set dst (LShiftL dst cnt));
effect(KILL cr);
ins_cost(100);
format %{ "ADD $dst.lo,$dst.lo\n\t"
"ADC $dst.hi,$dst.hi" %}
ins_encode %{
__ addl($dst$$Register,$dst$$Register);
__ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
%}
ins_pipe( ialu_reg_long );
%}
// Shift Left Long by 2
instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
predicate(UseNewLongLShift);
match(Set dst (LShiftL dst cnt));
effect(KILL cr);
ins_cost(100);
format %{ "ADD $dst.lo,$dst.lo\n\t"
"ADC $dst.hi,$dst.hi\n\t"
"ADD $dst.lo,$dst.lo\n\t"
"ADC $dst.hi,$dst.hi" %}
ins_encode %{
__ addl($dst$$Register,$dst$$Register);
__ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
__ addl($dst$$Register,$dst$$Register);
__ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
%}
ins_pipe( ialu_reg_long );
%}
// Shift Left Long by 3
instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
predicate(UseNewLongLShift);
match(Set dst (LShiftL dst cnt));
effect(KILL cr);
ins_cost(100);
format %{ "ADD $dst.lo,$dst.lo\n\t"
"ADC $dst.hi,$dst.hi\n\t"
"ADD $dst.lo,$dst.lo\n\t"
"ADC $dst.hi,$dst.hi\n\t"
"ADD $dst.lo,$dst.lo\n\t"
"ADC $dst.hi,$dst.hi" %}
ins_encode %{
__ addl($dst$$Register,$dst$$Register);
__ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
__ addl($dst$$Register,$dst$$Register);
__ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
__ addl($dst$$Register,$dst$$Register);
__ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
%}
ins_pipe( ialu_reg_long );
%}
// Shift Left Long by 1-31 // Shift Left Long by 1-31
instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
match(Set dst (LShiftL dst cnt)); match(Set dst (LShiftL dst cnt));
......
...@@ -946,6 +946,9 @@ class CommandLineFlags { ...@@ -946,6 +946,9 @@ class CommandLineFlags {
diagnostic(bool, UseIncDec, true, \ diagnostic(bool, UseIncDec, true, \
"Use INC, DEC instructions on x86") \ "Use INC, DEC instructions on x86") \
\ \
product(bool, UseNewLongLShift, false, \
"Use optimized bitwise shift left") \
\
product(bool, UseStoreImmI16, true, \ product(bool, UseStoreImmI16, true, \
"Use store immediate 16-bits value instruction on x86") \ "Use store immediate 16-bits value instruction on x86") \
\ \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册