提交 781a22bc 编写于 作者: K kvn

6987135: Performance regression on Intel platform with 32-bits edition between 6u13 and 6u14.

Summary: Use hardware DIV instruction for long division by constant when it is faster than code with multiply.
Reviewed-by: never
上级 764ce4b7
......@@ -1843,6 +1843,12 @@ bool Matcher::is_spillable_arg( int reg ) {
return can_be_java_arg(reg);
}
bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
// Use hardware SDIVX instruction when it is
// faster than a code which use multiply.
return VM_Version::has_fast_idiv();
}
// Register for DIVI projection of divmodI
RegMask Matcher::divI_proj_mask() {
ShouldNotReachHere();
......
/*
* Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
......@@ -80,7 +80,8 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(InteriorEntryAlignment, 4);
}
if (is_niagara1_plus()) {
if (AllocatePrefetchStyle > 0 && FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
if (has_blk_init() && AllocatePrefetchStyle > 0 &&
FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
// Use BIS instruction for allocation prefetch.
FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3);
if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
......@@ -118,16 +119,18 @@ void VM_Version::initialize() {
#endif
char buf[512];
jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s",
jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
(has_v8() ? ", has_v8" : ""),
(has_v9() ? ", has_v9" : ""),
(has_hardware_popc() ? ", popc" : ""),
(has_vis1() ? ", has_vis1" : ""),
(has_vis2() ? ", has_vis2" : ""),
(has_blk_init() ? ", has_blk_init" : ""),
(is_ultra3() ? ", is_ultra3" : ""),
(is_sun4v() ? ", is_sun4v" : ""),
(is_niagara1() ? ", is_niagara1" : ""),
(is_niagara1_plus() ? ", is_niagara1_plus" : ""),
(is_sparc64() ? ", is_sparc64" : ""),
(!has_hardware_mul32() ? ", no-mul32" : ""),
(!has_hardware_div32() ? ", no-div32" : ""),
(!has_hardware_fsmuld() ? ", no-fsmuld" : ""));
......
/*
* Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
......@@ -33,7 +33,9 @@ protected:
v9_instructions = 5,
vis1_instructions = 6,
vis2_instructions = 7,
sun4v_instructions = 8
sun4v_instructions = 8,
blk_init_instructions = 9,
fmaf_instructions = 10
};
enum Feature_Flag_Set {
......@@ -49,6 +51,8 @@ protected:
vis1_instructions_m = 1 << vis1_instructions,
vis2_instructions_m = 1 << vis2_instructions,
sun4v_m = 1 << sun4v_instructions,
blk_init_instructions_m = 1 << blk_init_instructions,
fmaf_instructions_m = 1 << fmaf_instructions,
generic_v8_m = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
generic_v9_m = generic_v8_m | v9_instructions_m,
......@@ -67,6 +71,7 @@ protected:
static int platform_features(int features);
static bool is_niagara1(int features) { return (features & sun4v_m) != 0; }
static bool is_sparc64(int features) { return (features & fmaf_instructions_m) != 0; }
static int maximum_niagara1_processor_count() { return 32; }
// Returns true if the platform is in the niagara line and
......@@ -86,6 +91,7 @@ public:
static bool has_hardware_popc() { return (_features & hardware_popc_m) != 0; }
static bool has_vis1() { return (_features & vis1_instructions_m) != 0; }
static bool has_vis2() { return (_features & vis2_instructions_m) != 0; }
static bool has_blk_init() { return (_features & blk_init_instructions_m) != 0; }
static bool supports_compare_and_exchange()
{ return has_v9(); }
......@@ -93,8 +99,10 @@ public:
static bool is_ultra3() { return (_features & ultra3_m) == ultra3_m; }
static bool is_sun4v() { return (_features & sun4v_m) != 0; }
static bool is_niagara1() { return is_niagara1(_features); }
static bool is_sparc64() { return is_sparc64(_features); }
static bool has_fast_fxtof() { return has_v9() && !is_ultra3(); }
static bool has_fast_idiv() { return is_niagara1_plus() || is_sparc64(); }
static const char* cpu_features() { return _features_str; }
......
......@@ -1288,7 +1288,7 @@ void Assembler::imull(Register dst, Register src, int value) {
if (is8bit(value)) {
emit_byte(0x6B);
emit_byte(0xC0 | encode);
emit_byte(value);
emit_byte(value & 0xFF);
} else {
emit_byte(0x69);
emit_byte(0xC0 | encode);
......@@ -3903,7 +3903,7 @@ void Assembler::imulq(Register dst, Register src, int value) {
if (is8bit(value)) {
emit_byte(0x6B);
emit_byte(0xC0 | encode);
emit_byte(value);
emit_byte(value & 0xFF);
} else {
emit_byte(0x69);
emit_byte(0xC0 | encode);
......
......@@ -446,6 +446,10 @@ public:
static bool supports_lzcnt() { return (_cpuFeatures & CPU_LZCNT) != 0; }
static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; }
// Intel Core and newer cpus have fast IDIV instruction (excluding Atom).
static bool has_fast_idiv() { return is_intel() && cpu_family() == 6 &&
supports_sse3() && _model != 0x1C; }
static bool supports_compare_and_exchange() { return true; }
static const char* cpu_features() { return _features_str; }
......
......@@ -1508,6 +1508,16 @@ bool Matcher::is_spillable_arg( int reg ) {
return can_be_java_arg(reg);
}
bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
// Use hardware integer DIV instruction when
// it is faster than a code which use multiply.
// Only when constant divisor fits into 32 bit
// (min_jint is excluded to get only correct
// positive 32 bit values from negative).
return VM_Version::has_fast_idiv() &&
(divisor == (int)divisor && divisor != min_jint);
}
// Register for DIVI projection of divmodI
RegMask Matcher::divI_proj_mask() {
return EAX_REG_mask;
......@@ -1546,6 +1556,9 @@ bool is_operand_hi32_zero(Node* n) {
return true;
}
}
if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
return true;
}
return false;
}
......@@ -2309,9 +2322,11 @@ encode %{
enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
emit_opcode( cbuf, 0x8B ); // Move
emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
emit_d8(cbuf,$primary);
emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
emit_d8(cbuf,$cnt$$constant-32);
if( $cnt$$constant > 32 ) { // Shift, if not by zero
emit_d8(cbuf,$primary);
emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
emit_d8(cbuf,$cnt$$constant-32);
}
emit_d8(cbuf,$primary);
emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
emit_d8(cbuf,31);
......@@ -8842,6 +8857,103 @@ instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI
ins_pipe( pipe_slow );
%}
// Divide Register Long (no special case since divisor != -1)
instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
match(Set dst (DivL dst imm));
effect( TEMP tmp, TEMP tmp2, KILL cr );
ins_cost(1000);
format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
"CMP $tmp,EDX\n\t"
"JA,s fast\n\t"
"MOV $tmp2,EAX\n\t"
"MOV EAX,EDX\n\t"
"SAR EDX,31\n\t"
"IDIV $tmp\n\t"
"XCHG EAX,$tmp2 \n\t"
"IDIV $tmp\n\t"
"CDQ\n\t"
"ADD EDX,$tmp2\n\t"
"JMP,s done\n"
"fast:\n\t"
"IDIV $tmp\n\t"
"XOR EDX,EDX\n"
"done:\n\t"
"NEG EDX:EAX # if $imm < 0" %}
ins_encode %{
int con = (int)$imm$$constant;
assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
int pcon = (con > 0) ? con : -con;
Label Lfast, Ldone;
__ movl($tmp$$Register, pcon);
__ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
__ jccb(Assembler::above, Lfast);
__ movl($tmp2$$Register, $dst$$Register); // save
__ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
__ sarl(HIGH_FROM_LOW($dst$$Register), 31); // src sign
__ idivl($tmp$$Register);
__ xchgl($dst$$Register, $tmp2$$Register);
__ idivl($tmp$$Register);
__ cdql();
__ addl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
__ jmpb(Ldone);
__ bind(Lfast);
// fast path: src is positive and result fits into 32 bit
__ idivl($tmp$$Register);
__ xorl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
__ bind(Ldone);
if (con < 0) {
__ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
}
%}
ins_pipe( pipe_slow );
%}
// Remainder Register Long (remainder fit into 32 bits)
instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
match(Set dst (ModL dst imm));
effect( TEMP tmp, TEMP tmp2, KILL cr );
ins_cost(1000);
format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
"CMP $tmp,EDX\n\t"
"JA,s fast\n\t"
"MOV $tmp2,EAX\n\t"
"MOV EAX,EDX\n\t"
"SAR EDX,31\n\t"
"IDIV $tmp\n\t"
"MOV EAX,$tmp2\n"
"fast:\n\t"
"IDIV $tmp\n\t"
"MOV EAX,EDX\n\t"
"SAR EDX,31\n\t" %}
ins_encode %{
int con = (int)$imm$$constant;
assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
int pcon = (con > 0) ? con : -con;
Label Lfast;
__ movl($tmp$$Register, pcon);
__ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
__ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
__ movl($tmp2$$Register, $dst$$Register); // save
__ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
__ sarl(HIGH_FROM_LOW($dst$$Register), 31); // src sign
__ idivl($tmp$$Register);
__ movl($dst$$Register, $tmp2$$Register);
__ bind(Lfast);
__ idivl($tmp$$Register);
__ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
__ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
%}
ins_pipe( pipe_slow );
%}
// Integer Shift Instructions
// Shift Left by one
instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
......
......@@ -2065,6 +2065,13 @@ bool Matcher::is_spillable_arg(int reg)
return can_be_java_arg(reg);
}
bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
// In 64 bit mode a code which use multiply when
// devisor is constant is faster than hardware
// DIV instruction (it uses MulHiL).
return false;
}
// Register for DIVI projection of divmodI
RegMask Matcher::divI_proj_mask() {
return INT_RAX_REG_mask;
......
/*
* Copyright (c) 2006, 2009, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
......@@ -65,10 +65,6 @@ int VM_Version::platform_features(int features) {
// getisax(2), SI_ARCHITECTURE_32, and SI_ARCHITECTURE_64 are
// supported on Solaris 10 and later.
if (os::Solaris::supports_getisax()) {
#ifndef PRODUCT
if (PrintMiscellaneous && Verbose)
tty->print_cr("getisax(2) supported.");
#endif
// Check 32-bit architecture.
do_sysinfo(SI_ARCHITECTURE_32, "sparc", &features, v8_instructions_m);
......@@ -81,6 +77,11 @@ int VM_Version::platform_features(int features) {
uint_t avn = os::Solaris::getisax(&av, 1);
assert(avn == 1, "should only return one av");
#ifndef PRODUCT
if (PrintMiscellaneous && Verbose)
tty->print_cr("getisax(2) returned: " PTR32_FORMAT, av);
#endif
if (av & AV_SPARC_MUL32) features |= hardware_mul32_m;
if (av & AV_SPARC_DIV32) features |= hardware_div32_m;
if (av & AV_SPARC_FSMULD) features |= hardware_fsmuld_m;
......@@ -88,11 +89,22 @@ int VM_Version::platform_features(int features) {
if (av & AV_SPARC_POPC) features |= hardware_popc_m;
if (av & AV_SPARC_VIS) features |= vis1_instructions_m;
if (av & AV_SPARC_VIS2) features |= vis2_instructions_m;
// Next values are not defined before Solaris 10
// but Solaris 8 is used for jdk6 update builds.
#ifndef AV_SPARC_ASI_BLK_INIT
#define AV_SPARC_ASI_BLK_INIT 0x0080 /* ASI_BLK_INIT_xxx ASI */
#endif
#ifndef AV_SPARC_FMAF
#define AV_SPARC_FMAF 0x0100 /* Sparc64 Fused Multiply-Add */
#endif
if (av & AV_SPARC_ASI_BLK_INIT) features |= blk_init_instructions_m;
if (av & AV_SPARC_FMAF) features |= fmaf_instructions_m;
} else {
// getisax(2) failed, use the old legacy code.
#ifndef PRODUCT
if (PrintMiscellaneous && Verbose)
tty->print_cr("getisax(2) not supported.");
tty->print_cr("getisax(2) is not supported.");
#endif
char tmp;
......
/*
* Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
......@@ -388,7 +388,8 @@ static Node *transform_long_divide( PhaseGVN *phase, Node *dividend, jlong divis
if (!d_pos) {
q = new (phase->C, 3) SubLNode(phase->longcon(0), phase->transform(q));
}
} else {
} else if ( !Matcher::use_asm_for_ldiv_by_con(d) ) { // Use hardware DIV instruction when
// it is faster than code generated below.
// Attempt the jlong constant divide -> multiply transform found in
// "Division by Invariant Integers using Multiplication"
// by Granlund and Montgomery
......@@ -558,7 +559,7 @@ Node *DivLNode::Ideal( PhaseGVN *phase, bool can_reshape) {
set_req(0,NULL); // Dividing by a not-zero constant; no faulting
// Dividing by MININT does not optimize as a power-of-2 shift.
// Dividing by MINLONG does not optimize as a power-of-2 shift.
if( l == min_jlong ) return NULL;
return transform_long_divide( phase, in(1), l );
......@@ -1062,7 +1063,7 @@ Node *ModLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
// Fell thru, the unroll case is not appropriate. Transform the modulo
// into a long multiply/int multiply/subtract case
// Cannot handle mod 0, and min_jint isn't handled by the transform
// Cannot handle mod 0, and min_jlong isn't handled by the transform
if( con == 0 || con == min_jlong ) return NULL;
// Get the absolute value of the constant; at this point, we can use this
......@@ -1075,7 +1076,7 @@ Node *ModLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
// If this is a power of two, then maybe we can mask it
if( is_power_of_2_long(pos_con) ) {
log2_con = log2_long(pos_con);
log2_con = exact_log2_long(pos_con);
const Type *dt = phase->type(in(1));
const TypeLong *dtl = dt->isa_long();
......@@ -1088,7 +1089,7 @@ Node *ModLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
// Save in(1) so that it cannot be changed or deleted
hook->init_req(0, in(1));
// Divide using the transform from DivI to MulL
// Divide using the transform from DivL to MulL
Node *result = transform_long_divide( phase, in(1), pos_con );
if (result != NULL) {
Node *divide = phase->transform(result);
......
/*
* Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
......@@ -298,6 +298,10 @@ public:
// Register for MODL projection of divmodL
static RegMask modL_proj_mask();
// Use hardware DIV instruction when it is faster than
// a code which use multiply for division by constant.
static bool use_asm_for_ldiv_by_con( jlong divisor );
static const RegMask method_handle_invoke_SP_save_mask();
// Java-Interpreter calling convention
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册