提交 0d7e738b 编写于 作者: T twisti

7006044: materialize cheap non-oop pointers on 64-bit SPARC

Summary: After 6961690 we load non-oop pointers for the constant table which could easily be materialized in a few instructions.
Reviewed-by: never, kvn
上级 2d52ec7b
......@@ -23,6 +23,7 @@
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "assembler_sparc.inline.hpp"
#include "gc_interface/collectedHeap.inline.hpp"
#include "interpreter/interpreter.hpp"
......@@ -1327,37 +1328,38 @@ void MacroAssembler::patchable_sethi(const AddressLiteral& addrlit, Register d)
}
int MacroAssembler::size_of_sethi(address a, bool worst_case) {
int MacroAssembler::insts_for_sethi(address a, bool worst_case) {
#ifdef _LP64
if (worst_case) return 7;
intptr_t iaddr = (intptr_t)a;
int hi32 = (int)(iaddr >> 32);
int lo32 = (int)(iaddr);
int inst_count;
if (hi32 == 0 && lo32 >= 0)
inst_count = 1;
else if (hi32 == -1)
inst_count = 2;
if (worst_case) return 7;
intptr_t iaddr = (intptr_t) a;
int msb32 = (int) (iaddr >> 32);
int lsb32 = (int) (iaddr);
int count;
if (msb32 == 0 && lsb32 >= 0)
count = 1;
else if (msb32 == -1)
count = 2;
else {
inst_count = 2;
if ( hi32 & 0x3ff )
inst_count++;
if ( lo32 & 0xFFFFFC00 ) {
if( (lo32 >> 20) & 0xfff ) inst_count += 2;
if( (lo32 >> 10) & 0x3ff ) inst_count += 2;
count = 2;
if (msb32 & 0x3ff)
count++;
if (lsb32 & 0xFFFFFC00 ) {
if ((lsb32 >> 20) & 0xfff) count += 2;
if ((lsb32 >> 10) & 0x3ff) count += 2;
}
}
return BytesPerInstWord * inst_count;
return count;
#else
return BytesPerInstWord;
return 1;
#endif
}
int MacroAssembler::worst_case_size_of_set() {
return size_of_sethi(NULL, true) + 1;
int MacroAssembler::worst_case_insts_for_set() {
return insts_for_sethi(NULL, true) + 1;
}
// Keep in sync with MacroAssembler::insts_for_internal_set
void MacroAssembler::internal_set(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
intptr_t value = addrlit.value();
......@@ -1379,6 +1381,23 @@ void MacroAssembler::internal_set(const AddressLiteral& addrlit, Register d, boo
}
}
// Keep in sync with MacroAssembler::internal_set
int MacroAssembler::insts_for_internal_set(intptr_t value) {
// can optimize
if (-4096 <= value && value <= 4095) {
return 1;
}
if (inv_hi22(hi22(value)) == value) {
return insts_for_sethi((address) value);
}
int count = insts_for_sethi((address) value);
AddressLiteral al(value);
if (al.low10() != 0) {
count++;
}
return count;
}
void MacroAssembler::set(const AddressLiteral& al, Register d) {
internal_set(al, d, false);
}
......@@ -1443,11 +1462,11 @@ void MacroAssembler::set64(jlong value, Register d, Register tmp) {
}
}
int MacroAssembler::size_of_set64(jlong value) {
int MacroAssembler::insts_for_set64(jlong value) {
v9_dep();
int hi = (int)(value >> 32);
int lo = (int)(value & ~0);
int hi = (int) (value >> 32);
int lo = (int) (value & ~0);
int count = 0;
// (Matcher::isSimpleConstant64 knows about the following optimizations.)
......
......@@ -1884,23 +1884,24 @@ public:
void sethi(const AddressLiteral& addrlit, Register d);
void patchable_sethi(const AddressLiteral& addrlit, Register d);
// compute the size of a sethi/set
static int size_of_sethi( address a, bool worst_case = false );
static int worst_case_size_of_set();
// compute the number of instructions for a sethi/set
static int insts_for_sethi( address a, bool worst_case = false );
static int worst_case_insts_for_set();
// set may be either setsw or setuw (high 32 bits may be zero or sign)
private:
void internal_set(const AddressLiteral& al, Register d, bool ForceRelocatable);
static int insts_for_internal_set(intptr_t value);
public:
void set(const AddressLiteral& addrlit, Register d);
void set(intptr_t value, Register d);
void set(address addr, Register d, RelocationHolder const& rspec);
static int insts_for_set(intptr_t value) { return insts_for_internal_set(value); }
void patchable_set(const AddressLiteral& addrlit, Register d);
void patchable_set(intptr_t value, Register d);
void set64(jlong value, Register d, Register tmp);
// Compute size of set64.
static int size_of_set64(jlong value);
static int insts_for_set64(jlong value);
// sign-extend 32 to 64
inline void signx( Register s, Register d ) { sra( s, G0, d); }
......
......@@ -1086,9 +1086,9 @@ void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
uint MachConstantBaseNode::size(PhaseRegAlloc*) const {
if (UseRDPCForConstantTableBase) {
// This is really the worst case but generally it's only 1 instruction.
return 4 /*rdpc*/ + 4 /*sub*/ + MacroAssembler::worst_case_size_of_set();
return (1 /*rdpc*/ + 1 /*sub*/ + MacroAssembler::worst_case_insts_for_set()) * BytesPerInstWord;
} else {
return MacroAssembler::worst_case_size_of_set();
return MacroAssembler::worst_case_insts_for_set() * BytesPerInstWord;
}
}
......@@ -1240,7 +1240,7 @@ const Pipeline * MachEpilogNode::pipeline() const {
int MachEpilogNode::safepoint_offset() const {
assert( do_polling(), "no return for this epilog node");
return MacroAssembler::size_of_sethi(os::get_polling_page());
return MacroAssembler::insts_for_sethi(os::get_polling_page()) * BytesPerInstWord;
}
//=============================================================================
......@@ -3553,7 +3553,8 @@ operand immP() %{
interface(CONST_INTER);
%}
// Pointer Immediate: 32 or 64-bit
#ifdef _LP64
// Pointer Immediate: 64-bit
operand immP_set() %{
predicate(!VM_Version::is_niagara1_plus());
match(ConP);
......@@ -3564,10 +3565,21 @@ operand immP_set() %{
interface(CONST_INTER);
%}
// Pointer Immediate: 32 or 64-bit
// Pointer Immediate: 64-bit
// From Niagara2 processors on a load should be better than materializing.
operand immP_load() %{
predicate(VM_Version::is_niagara1_plus());
predicate(VM_Version::is_niagara1_plus() && (n->bottom_type()->isa_oop_ptr() || (MacroAssembler::insts_for_set(n->get_ptr()) > 3)));
match(ConP);
op_cost(5);
// formats are generated automatically for constants and base registers
format %{ %}
interface(CONST_INTER);
%}
// Pointer Immediate: 64-bit
operand immP_no_oop_cheap() %{
predicate(VM_Version::is_niagara1_plus() && !n->bottom_type()->isa_oop_ptr() && (MacroAssembler::insts_for_set(n->get_ptr()) <= 3));
match(ConP);
op_cost(5);
......@@ -3575,6 +3587,7 @@ operand immP_load() %{
format %{ %}
interface(CONST_INTER);
%}
#endif
operand immP13() %{
predicate((-4096 < n->get_ptr()) && (n->get_ptr() <= 4095));
......@@ -3673,7 +3686,7 @@ operand immL_32bits() %{
// Long Immediate: cheap (materialize in <= 3 instructions)
operand immL_cheap() %{
predicate(!VM_Version::is_niagara1_plus() || MacroAssembler::size_of_set64(n->get_long()) <= 3);
predicate(!VM_Version::is_niagara1_plus() || MacroAssembler::insts_for_set64(n->get_long()) <= 3);
match(ConL);
op_cost(0);
......@@ -3683,7 +3696,7 @@ operand immL_cheap() %{
// Long Immediate: expensive (materialize in > 3 instructions)
operand immL_expensive() %{
predicate(VM_Version::is_niagara1_plus() && MacroAssembler::size_of_set64(n->get_long()) > 3);
predicate(VM_Version::is_niagara1_plus() && MacroAssembler::insts_for_set64(n->get_long()) > 3);
match(ConL);
op_cost(0);
......@@ -6094,8 +6107,18 @@ instruct loadConP_load(iRegP dst, immP_load con) %{
ins_cost(MEMORY_REF_COST);
format %{ "LD [$constanttablebase + $constantoffset],$dst\t! load from constant table: ptr=$con" %}
ins_encode %{
RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset($con), $dst$$Register);
__ ld_ptr($constanttablebase, con_offset, $dst$$Register);
RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset($con), $dst$$Register);
__ ld_ptr($constanttablebase, con_offset, $dst$$Register);
%}
ins_pipe(loadConP);
%}
instruct loadConP_no_oop_cheap(iRegP dst, immP_no_oop_cheap con) %{
match(Set dst con);
ins_cost(DEFAULT_COST * 3/2);
format %{ "SET $con,$dst\t! non-oop ptr" %}
ins_encode %{
__ set($con$$constant, $dst$$Register);
%}
ins_pipe(loadConP);
%}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册