提交 6348dd84 编写于 作者: N never

6921969: optimize 64 long multiply for case with high bits zero

Reviewed-by: never, twisti, kvn, rasbold
Contributed-by: NHiroshi Yamauchi <yamauchi@google.com>
上级 b357f538
......@@ -235,6 +235,11 @@ reg_class xdb_reg7( XMM7a,XMM7b );
//----------SOURCE BLOCK-------------------------------------------------------
// This is a block of C++ code which provides values, functions, and
// definitions necessary in the rest of the architecture description
source_hpp %{
// Must be visible to the DFA in dfa_x86_32.cpp
extern bool is_operand_hi32_zero(Node* n);
%}
source %{
#define RELOC_IMM32 Assembler::imm_operand
#define RELOC_DISP32 Assembler::disp32_operand
......@@ -1485,6 +1490,21 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() {
return EBP_REG_mask;
}
// Returns true if the high 32 bits of the value is known to be zero.
bool is_operand_hi32_zero(Node* n) {
int opc = n->Opcode();
if (opc == Op_LoadUI2L) {
return true;
}
if (opc == Op_AndL) {
Node* o2 = n->in(2);
if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
return true;
}
}
return false;
}
%}
//----------ENCODING BLOCK-----------------------------------------------------
......@@ -8599,6 +8619,63 @@ instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
ins_pipe( pipe_slow );
%}
// Multiply Register Long where the left operand's high 32 bits are zero
instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
predicate(is_operand_hi32_zero(n->in(1)));
match(Set dst (MulL dst src));
effect(KILL cr, TEMP tmp);
ins_cost(2*100+2*400);
// Basic idea: lo(result) = lo(x_lo * y_lo)
// hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
format %{ "MOV $tmp,$src.hi\n\t"
"IMUL $tmp,EAX\n\t"
"MUL EDX:EAX,$src.lo\n\t"
"ADD EDX,$tmp" %}
ins_encode %{
__ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
__ imull($tmp$$Register, rax);
__ mull($src$$Register);
__ addl(rdx, $tmp$$Register);
%}
ins_pipe( pipe_slow );
%}
// Multiply Register Long where the right operand's high 32 bits are zero
instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
predicate(is_operand_hi32_zero(n->in(2)));
match(Set dst (MulL dst src));
effect(KILL cr, TEMP tmp);
ins_cost(2*100+2*400);
// Basic idea: lo(result) = lo(x_lo * y_lo)
// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
format %{ "MOV $tmp,$src.lo\n\t"
"IMUL $tmp,EDX\n\t"
"MUL EDX:EAX,$src.lo\n\t"
"ADD EDX,$tmp" %}
ins_encode %{
__ movl($tmp$$Register, $src$$Register);
__ imull($tmp$$Register, rdx);
__ mull($src$$Register);
__ addl(rdx, $tmp$$Register);
%}
ins_pipe( pipe_slow );
%}
// Multiply Register Long where the left and the right operands' high 32 bits are zero
instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
match(Set dst (MulL dst src));
effect(KILL cr);
ins_cost(1*400);
// Basic idea: lo(result) = lo(x_lo * y_lo)
// hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
format %{ "MUL EDX:EAX,$src.lo\n\t" %}
ins_encode %{
__ mull($src$$Register);
%}
ins_pipe( pipe_slow );
%}
// Multiply Register Long by small constant
instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{
match(Set dst (MulL dst src));
......
/*
* Copyright 2010 Google, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
/*
* @test
* @bug 6921969
* @summary Tests shorter long multiply sequences when the high 32 bits of long operands are known to be zero on x86_32
* @run main/othervm -Xbatch -XX:-Inline -XX:CompileOnly=.testNormal,.testLeftOptimized,.testRightOptimized,.testOptimized,.testLeftOptimized_LoadUI2L,.testRightOptimized_LoadUI2L,.testOptimized_LoadUI2L TestMultiplyLongHiZero
*/
// This test must run without any command line arguments.
public class TestMultiplyLongHiZero {
private static void check(long leftFactor, long rightFactor, long optimizedProduct, long constantProduct) {
long normalProduct = leftFactor * rightFactor; // unaffected by the new optimization
if (optimizedProduct != constantProduct || normalProduct != constantProduct) {
throw new RuntimeException("Not all three products are equal: " +
Long.toHexString(normalProduct) + ", " +
Long.toHexString(optimizedProduct) + ", " +
Long.toHexString(constantProduct));
}
}
private static int initInt(String[] args, int v) {
if (args.length > 0) {
try {
return Integer.valueOf(args[0]);
} catch (NumberFormatException e) { }
}
return v;
}
private static final long mask32 = 0x00000000FFFFFFFFL;
private static void testNormal(int leftFactor, int rightFactor, long constantProduct) {
check((long) leftFactor,
(long) rightFactor,
(long) leftFactor * (long) rightFactor, // unaffected by the new optimization
constantProduct);
}
private static void testLeftOptimized(int leftFactor, int rightFactor, long constantProduct) {
check((leftFactor & mask32),
(long) rightFactor,
(leftFactor & mask32) * (long) rightFactor, // left factor optimized
constantProduct);
}
private static void testRightOptimized(int leftFactor, int rightFactor, long constantProduct) {
check((long) leftFactor,
(rightFactor & mask32),
(long) leftFactor * (rightFactor & mask32), // right factor optimized
constantProduct);
}
private static void testOptimized(int leftFactor, int rightFactor, long constantProduct) {
check((leftFactor & mask32),
(rightFactor & mask32),
(leftFactor & mask32) * (rightFactor & mask32), // both factors optimized
constantProduct);
}
private static void testLeftOptimized_LoadUI2L(int leftFactor, int rightFactor, long constantProduct, int[] factors) {
check((leftFactor & mask32),
(long) rightFactor,
(factors[0] & mask32) * (long) rightFactor, // left factor optimized
constantProduct);
}
private static void testRightOptimized_LoadUI2L(int leftFactor, int rightFactor, long constantProduct, int[] factors) {
check((long) leftFactor,
(rightFactor & mask32),
(long) leftFactor * (factors[1] & mask32), // right factor optimized
constantProduct);
}
private static void testOptimized_LoadUI2L(int leftFactor, int rightFactor, long constantProduct, int[] factors) {
check((leftFactor & mask32),
(rightFactor & mask32),
(factors[0] & mask32) * (factors[1] & mask32), // both factors optimized
constantProduct);
}
private static void test(int leftFactor, int rightFactor,
long normalConstantProduct,
long leftOptimizedConstantProduct,
long rightOptimizedConstantProduct,
long optimizedConstantProduct) {
int[] factors = new int[2];
factors[0] = leftFactor;
factors[1] = rightFactor;
testNormal(leftFactor, rightFactor, normalConstantProduct);
testLeftOptimized(leftFactor, rightFactor, leftOptimizedConstantProduct);
testRightOptimized(leftFactor, rightFactor, rightOptimizedConstantProduct);
testOptimized(leftFactor, rightFactor, optimizedConstantProduct);
testLeftOptimized_LoadUI2L(leftFactor, rightFactor, leftOptimizedConstantProduct, factors);
testRightOptimized_LoadUI2L(leftFactor, rightFactor, rightOptimizedConstantProduct, factors);
testOptimized_LoadUI2L(leftFactor, rightFactor, optimizedConstantProduct, factors);
}
public static void main(String[] args) {
for (int i = 0; i < 100000; ++i) { // Trigger compilation
int i0 = initInt(args, 1);
int i1 = initInt(args, 3);
int i2 = initInt(args, -1);
int i3 = initInt(args, 0x7FFFFFFF);
test(i0, i1, 3L, 3L, 3L, 3L);
test(i0, i2, -1L, -1L, 0xFFFFFFFFL, 0xFFFFFFFFL);
test(i0, i3, 0x7FFFFFFFL, 0x7FFFFFFFL, 0x7FFFFFFFL, 0x7FFFFFFFL);
test(i1, i2, -3L, -3L, 0x2FFFFFFFDL, 0x2FFFFFFFDL);
test(i1, i3, 0x17FFFFFFDL, 0x17FFFFFFDL, 0x17FFFFFFDL, 0x17FFFFFFDL);
test(i2, i3, 0xFFFFFFFF80000001L, 0x7FFFFFFE80000001L,
0xFFFFFFFF80000001L, 0x7FFFFFFE80000001L);
}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册