提交 eb01dfa1 编写于 作者: K kvn

7079329: Adjust allocation prefetching for T4

Summary: on T4 2 BIS instructions should be issued to prefetch 64 bytes
Reviewed-by: iveresov, phh, twisti
上级 2161afbd
......@@ -886,7 +886,11 @@ class Assembler : public AbstractAssembler {
enum ASIs { // page 72, v9
ASI_PRIMARY = 0x80,
ASI_PRIMARY_LITTLE = 0x88
ASI_PRIMARY_LITTLE = 0x88,
// Block initializing store
ASI_ST_BLKINIT_PRIMARY = 0xE2,
// Most-Recently-Used (MRU) BIS variant
ASI_ST_BLKINIT_MRU_PRIMARY = 0xF2
// add more from book as needed
};
......
......@@ -471,9 +471,6 @@ extern bool can_branch_register( Node *bol, Node *cmp );
source %{
#define __ _masm.
// Block initializing store
#define ASI_BLK_INIT_QUAD_LDD_P 0xE2
// tertiary op of a LoadP or StoreP encoding
#define REGP_OP true
......@@ -6269,6 +6266,7 @@ instruct loadConD(regD dst, immD con, o7RegI tmp) %{
instruct prefetchr( memory mem ) %{
match( PrefetchRead mem );
ins_cost(MEMORY_REF_COST);
size(4);
format %{ "PREFETCH $mem,0\t! Prefetch read-many" %}
opcode(Assembler::prefetch_op3);
......@@ -6277,9 +6275,9 @@ instruct prefetchr( memory mem ) %{
%}
instruct prefetchw( memory mem ) %{
predicate(AllocatePrefetchStyle != 3 );
match( PrefetchWrite mem );
ins_cost(MEMORY_REF_COST);
size(4);
format %{ "PREFETCH $mem,2\t! Prefetch write-many (and read)" %}
opcode(Assembler::prefetch_op3);
......@@ -6287,24 +6285,62 @@ instruct prefetchw( memory mem ) %{
ins_pipe(iload_mem);
%}
// Use BIS instruction to prefetch.
instruct prefetchw_bis( memory mem ) %{
predicate(AllocatePrefetchStyle == 3);
match( PrefetchWrite mem );
// Prefetch instructions for allocation.
instruct prefetchAlloc( memory mem ) %{
predicate(AllocatePrefetchInstr == 0);
match( PrefetchAllocation mem );
ins_cost(MEMORY_REF_COST);
size(4);
format %{ "STXA G0,$mem\t! // Block initializing store" %}
format %{ "PREFETCH $mem,2\t! Prefetch allocation" %}
opcode(Assembler::prefetch_op3);
ins_encode( form3_mem_prefetch_write( mem ) );
ins_pipe(iload_mem);
%}
// Use BIS instruction to prefetch for allocation.
// Could fault, need space at the end of TLAB.
instruct prefetchAlloc_bis( iRegP dst ) %{
predicate(AllocatePrefetchInstr == 1);
match( PrefetchAllocation dst );
ins_cost(MEMORY_REF_COST);
size(4);
format %{ "STXA [$dst]\t! // Prefetch allocation using BIS" %}
ins_encode %{
Register base = as_Register($mem$$base);
int disp = $mem$$disp;
if (disp != 0) {
__ add(base, AllocatePrefetchStepSize, base);
}
__ stxa(G0, base, G0, ASI_BLK_INIT_QUAD_LDD_P);
__ stxa(G0, $dst$$Register, G0, Assembler::ASI_ST_BLKINIT_PRIMARY);
%}
ins_pipe(istore_mem_reg);
%}
// Next code is used for finding next cache line address to prefetch.
#ifndef _LP64
instruct cacheLineAdr( iRegP dst, iRegP src, immI13 mask ) %{
match(Set dst (CastX2P (AndI (CastP2X src) mask)));
ins_cost(DEFAULT_COST);
size(4);
format %{ "AND $src,$mask,$dst\t! next cache line address" %}
ins_encode %{
__ and3($src$$Register, $mask$$constant, $dst$$Register);
%}
ins_pipe(ialu_reg_imm);
%}
#else
instruct cacheLineAdr( iRegP dst, iRegP src, immL13 mask ) %{
match(Set dst (CastX2P (AndL (CastP2X src) mask)));
ins_cost(DEFAULT_COST);
size(4);
format %{ "AND $src,$mask,$dst\t! next cache line address" %}
ins_encode %{
__ and3($src$$Register, $mask$$constant, $dst$$Register);
%}
ins_pipe(ialu_reg_imm);
%}
#endif
//----------Store Instructions-------------------------------------------------
// Store Byte
instruct storeB(memory mem, iRegI src) %{
......
......@@ -44,20 +44,31 @@ void VM_Version::initialize() {
PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
PrefetchFieldsAhead = prefetch_fields_ahead();
assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 1, "invalid value");
if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0;
if( AllocatePrefetchInstr > 1 ) AllocatePrefetchInstr = 0;
// Allocation prefetch settings
intx cache_line_size = L1_data_cache_line_size();
intx cache_line_size = prefetch_data_size();
if( cache_line_size > AllocatePrefetchStepSize )
AllocatePrefetchStepSize = cache_line_size;
if( FLAG_IS_DEFAULT(AllocatePrefetchLines) )
AllocatePrefetchLines = 3; // Optimistic value
assert( AllocatePrefetchLines > 0, "invalid value");
assert(AllocatePrefetchLines > 0, "invalid value");
if( AllocatePrefetchLines < 1 ) // set valid value in product VM
AllocatePrefetchLines = 1; // Conservative value
AllocatePrefetchLines = 3;
assert(AllocateInstancePrefetchLines > 0, "invalid value");
if( AllocateInstancePrefetchLines < 1 ) // set valid value in product VM
AllocateInstancePrefetchLines = 1;
AllocatePrefetchDistance = allocate_prefetch_distance();
AllocatePrefetchStyle = allocate_prefetch_style();
assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
assert((AllocatePrefetchDistance % AllocatePrefetchStepSize) == 0 &&
(AllocatePrefetchDistance > 0), "invalid value");
if ((AllocatePrefetchDistance % AllocatePrefetchStepSize) != 0 ||
(AllocatePrefetchDistance <= 0)) {
AllocatePrefetchDistance = AllocatePrefetchStepSize;
}
if (AllocatePrefetchStyle == 3 && !has_blk_init()) {
warning("BIS instructions are not available on this CPU");
......@@ -99,19 +110,42 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(InteriorEntryAlignment, 4);
}
if (is_niagara_plus()) {
if (has_blk_init() && AllocatePrefetchStyle > 0 &&
FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
// Use BIS instruction for allocation prefetch.
FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3);
if (has_blk_init() && UseTLAB &&
FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
// Use BIS instruction for TLAB allocation prefetch.
FLAG_SET_ERGO(intx, AllocatePrefetchInstr, 1);
if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
FLAG_SET_ERGO(intx, AllocatePrefetchStyle, 3);
}
if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
// Use smaller prefetch distance on N2 with BIS
// Use smaller prefetch distance with BIS
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
}
}
if (is_T4()) {
// Double number of prefetched cache lines on T4
// since L2 cache line size is smaller (32 bytes).
if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {
FLAG_SET_ERGO(intx, AllocatePrefetchLines, AllocatePrefetchLines*2);
}
if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) {
FLAG_SET_ERGO(intx, AllocateInstancePrefetchLines, AllocateInstancePrefetchLines*2);
}
}
if (AllocatePrefetchStyle != 3 && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
// Use different prefetch distance without BIS
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
}
if (AllocatePrefetchInstr == 1) {
// Need a space at the end of TLAB for BIS since it
// will fault when accessing memory outside of heap.
// +1 for rounding up to next cache line, +1 to be safe
int lines = AllocatePrefetchLines + 2;
int step_size = AllocatePrefetchStepSize;
int distance = AllocatePrefetchDistance;
_reserve_for_allocation_prefetch = (distance + step_size*lines)/(int)HeapWordSize;
}
}
#endif
}
......@@ -185,14 +219,20 @@ void VM_Version::initialize() {
#ifndef PRODUCT
if (PrintMiscellaneous && Verbose) {
tty->print("Allocation: ");
tty->print("Allocation");
if (AllocatePrefetchStyle <= 0) {
tty->print_cr("no prefetching");
tty->print_cr(": no prefetching");
} else {
tty->print(" prefetching: ");
if (AllocatePrefetchInstr == 0) {
tty->print("PREFETCH");
} else if (AllocatePrefetchInstr == 1) {
tty->print("BIS");
}
if (AllocatePrefetchLines > 1) {
tty->print_cr("PREFETCH %d, %d lines of size %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
tty->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
} else {
tty->print_cr("PREFETCH %d, one line", AllocatePrefetchDistance);
tty->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
}
}
if (PrefetchCopyIntervalInBytes > 0) {
......
......@@ -121,6 +121,7 @@ public:
// Returns true if the platform is in the niagara line (T series)
// and newer than the niagara1.
static bool is_niagara_plus() { return is_T_family(_features) && !is_T1_model(_features); }
static bool is_T4() { return is_T_family(_features) && has_cbcond(); }
// Fujitsu SPARC64
static bool is_sparc64() { return (_features & sparc64_family_m) != 0; }
......@@ -130,13 +131,17 @@ public:
static bool has_fast_fxtof() { return is_niagara() || is_sparc64() || has_v9() && !is_ultra3(); }
static bool has_fast_idiv() { return is_niagara_plus() || is_sparc64(); }
// T4 and newer Sparc have fast RDPC instruction.
static bool has_fast_rdpc() { return is_niagara_plus() && has_cbcond(); }
static bool has_fast_rdpc() { return is_T4(); }
// T4 and newer Sparc have Most-Recently-Used (MRU) BIS.
static bool has_mru_blk_init() { return has_blk_init() && is_T4(); }
static const char* cpu_features() { return _features_str; }
static intx L1_data_cache_line_size() {
return 64; // default prefetch block size on sparc
static intx prefetch_data_size() {
return is_T4() ? 32 : 64; // default prefetch block size on sparc
}
// Prefetch
......
......@@ -2315,7 +2315,7 @@ void Assembler::prefetchnta(Address src) {
}
void Assembler::prefetchr(Address src) {
NOT_LP64(assert(VM_Version::supports_3dnow_prefetch(), "must support"));
assert(VM_Version::supports_3dnow_prefetch(), "must support");
InstructionMark im(this);
prefetch_prefix(src);
emit_byte(0x0D);
......@@ -2347,7 +2347,7 @@ void Assembler::prefetcht2(Address src) {
}
void Assembler::prefetchw(Address src) {
NOT_LP64(assert(VM_Version::supports_3dnow_prefetch(), "must support"));
assert(VM_Version::supports_3dnow_prefetch(), "must support");
InstructionMark im(this);
prefetch_prefix(src);
emit_byte(0x0D);
......
......@@ -557,14 +557,16 @@ void VM_Version::get_processor_features() {
if( !supports_sse() && supports_3dnow_prefetch() ) AllocatePrefetchInstr = 3;
// Allocation prefetch settings
intx cache_line_size = L1_data_cache_line_size();
intx cache_line_size = prefetch_data_size();
if( cache_line_size > AllocatePrefetchStepSize )
AllocatePrefetchStepSize = cache_line_size;
if( FLAG_IS_DEFAULT(AllocatePrefetchLines) )
AllocatePrefetchLines = 3; // Optimistic value
assert(AllocatePrefetchLines > 0, "invalid value");
if( AllocatePrefetchLines < 1 ) // set valid value in product VM
AllocatePrefetchLines = 1; // Conservative value
AllocatePrefetchLines = 3;
assert(AllocateInstancePrefetchLines > 0, "invalid value");
if( AllocateInstancePrefetchLines < 1 ) // set valid value in product VM
AllocateInstancePrefetchLines = 1;
AllocatePrefetchDistance = allocate_prefetch_distance();
AllocatePrefetchStyle = allocate_prefetch_style();
......@@ -601,10 +603,11 @@ void VM_Version::get_processor_features() {
tty->print_cr("Logical CPUs per core: %u",
logical_processors_per_package());
tty->print_cr("UseSSE=%d",UseSSE);
tty->print("Allocation: ");
tty->print("Allocation");
if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
tty->print_cr("no prefetching");
tty->print_cr(": no prefetching");
} else {
tty->print(" prefetching: ");
if (UseSSE == 0 && supports_3dnow_prefetch()) {
tty->print("PREFETCHW");
} else if (UseSSE >= 1) {
......@@ -619,9 +622,9 @@ void VM_Version::get_processor_features() {
}
}
if (AllocatePrefetchLines > 1) {
tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
tty->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
} else {
tty->print_cr(" %d, one line", AllocatePrefetchDistance);
tty->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
}
}
......
......@@ -419,7 +419,7 @@ public:
return result;
}
static intx L1_data_cache_line_size() {
static intx prefetch_data_size() {
intx result = 0;
if (is_intel()) {
result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
......
......@@ -7325,8 +7325,9 @@ instruct prefetchr( memory mem ) %{
ins_cost(100);
format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %}
opcode(0x0F, 0x0d); /* Opcode 0F 0d /0 */
ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
ins_encode %{
__ prefetchr($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
......@@ -7336,8 +7337,9 @@ instruct prefetchrNTA( memory mem ) %{
ins_cost(100);
format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %}
opcode(0x0F, 0x18); /* Opcode 0F 18 /0 */
ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
ins_encode %{
__ prefetchnta($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
......@@ -7347,8 +7349,9 @@ instruct prefetchrT0( memory mem ) %{
ins_cost(100);
format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %}
opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
ins_encode %{
__ prefetcht0($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
......@@ -7358,8 +7361,9 @@ instruct prefetchrT2( memory mem ) %{
ins_cost(100);
format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %}
opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
ins_encode %{
__ prefetcht2($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
......@@ -7374,46 +7378,86 @@ instruct prefetchw0( memory mem ) %{
%}
instruct prefetchw( memory mem ) %{
predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || AllocatePrefetchInstr==3);
predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch());
match( PrefetchWrite mem );
ins_cost(100);
format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %}
opcode(0x0F, 0x0D); /* Opcode 0F 0D /1 */
ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
ins_encode %{
__ prefetchw($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchwNTA( memory mem ) %{
predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
predicate(UseSSE>=1);
match(PrefetchWrite mem);
ins_cost(100);
format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %}
opcode(0x0F, 0x18); /* Opcode 0F 18 /0 */
ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
ins_encode %{
__ prefetchnta($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
// Prefetch instructions for allocation.
instruct prefetchAlloc0( memory mem ) %{
predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
match(PrefetchAllocation mem);
ins_cost(0);
size(0);
format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
ins_encode();
ins_pipe(empty);
%}
instruct prefetchAlloc( memory mem ) %{
predicate(AllocatePrefetchInstr==3);
match( PrefetchAllocation mem );
ins_cost(100);
format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
ins_encode %{
__ prefetchw($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchwT0( memory mem ) %{
instruct prefetchAllocNTA( memory mem ) %{
predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
match(PrefetchAllocation mem);
ins_cost(100);
format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
ins_encode %{
__ prefetchnta($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchAllocT0( memory mem ) %{
predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
match(PrefetchWrite mem);
match(PrefetchAllocation mem);
ins_cost(100);
format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for write" %}
opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
ins_encode %{
__ prefetcht0($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchwT2( memory mem ) %{
instruct prefetchAllocT2( memory mem ) %{
predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
match(PrefetchWrite mem);
match(PrefetchAllocation mem);
ins_cost(100);
format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for write" %}
opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
ins_encode %{
__ prefetcht2($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
......
......@@ -6617,8 +6617,9 @@ instruct prefetchr( memory mem ) %{
ins_cost(125);
format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
opcode(0x0F, 0x0D); /* Opcode 0F 0D /0 */
ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
ins_encode %{
__ prefetchr($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
......@@ -6628,8 +6629,9 @@ instruct prefetchrNTA( memory mem ) %{
ins_cost(125);
format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
opcode(0x0F, 0x18); /* Opcode 0F 18 /0 */
ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
ins_encode %{
__ prefetchnta($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
......@@ -6639,8 +6641,9 @@ instruct prefetchrT0( memory mem ) %{
ins_cost(125);
format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
ins_encode %{
__ prefetcht0($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
......@@ -6650,52 +6653,70 @@ instruct prefetchrT2( memory mem ) %{
ins_cost(125);
format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
ins_encode %{
__ prefetcht2($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchw( memory mem ) %{
predicate(AllocatePrefetchInstr==3);
instruct prefetchwNTA( memory mem ) %{
match(PrefetchWrite mem);
ins_cost(125);
format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
opcode(0x0F, 0x0D); /* Opcode 0F 0D /1 */
ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
ins_encode %{
__ prefetchnta($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchwNTA( memory mem ) %{
// Prefetch instructions for allocation.
instruct prefetchAlloc( memory mem ) %{
predicate(AllocatePrefetchInstr==3);
match(PrefetchAllocation mem);
ins_cost(125);
format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
ins_encode %{
__ prefetchw($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchAllocNTA( memory mem ) %{
predicate(AllocatePrefetchInstr==0);
match(PrefetchWrite mem);
match(PrefetchAllocation mem);
ins_cost(125);
format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
opcode(0x0F, 0x18); /* Opcode 0F 18 /0 */
ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
ins_encode %{
__ prefetchnta($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchwT0( memory mem ) %{
instruct prefetchAllocT0( memory mem ) %{
predicate(AllocatePrefetchInstr==1);
match(PrefetchWrite mem);
match(PrefetchAllocation mem);
ins_cost(125);
format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
ins_encode %{
__ prefetcht0($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchwT2( memory mem ) %{
instruct prefetchAllocT2( memory mem ) %{
predicate(AllocatePrefetchInstr==2);
match(PrefetchWrite mem);
match(PrefetchAllocation mem);
ins_cost(125);
format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
ins_encode %{
__ prefetcht2($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
......
......@@ -3390,7 +3390,9 @@ int MatchNode::needs_ideal_memory_edge(FormDict &globals) const {
"ClearArray"
};
int cnt = sizeof(needs_ideal_memory_list)/sizeof(char*);
if( strcmp(_opType,"PrefetchRead")==0 || strcmp(_opType,"PrefetchWrite")==0 )
if( strcmp(_opType,"PrefetchRead")==0 ||
strcmp(_opType,"PrefetchWrite")==0 ||
strcmp(_opType,"PrefetchAllocation")==0 )
return 1;
if( _lChild ) {
const char *opType = _lChild->_opType;
......
......@@ -124,16 +124,7 @@ public:
// Reserve space at the end of TLAB
static size_t end_reserve() {
int reserve_size = typeArrayOopDesc::header_size(T_INT);
if (AllocatePrefetchStyle == 3) {
// BIS is used to prefetch - we need a space for it.
// +1 for rounding up to next cache line +1 to be safe
int lines = AllocatePrefetchLines + 2;
int step_size = AllocatePrefetchStepSize;
int distance = AllocatePrefetchDistance;
int prefetch_end = (distance + step_size*lines)/(int)HeapWordSize;
reserve_size = MAX2(reserve_size, prefetch_end);
}
return reserve_size;
return MAX2(reserve_size, VM_Version::reserve_for_allocation_prefetch());
}
static size_t alignment_reserve() { return align_object_size(end_reserve()); }
static size_t alignment_reserve_in_bytes() { return alignment_reserve() * HeapWordSize; }
......
......@@ -196,6 +196,7 @@ macro(Phi)
macro(PopCountI)
macro(PopCountL)
macro(PowD)
macro(PrefetchAllocation)
macro(PrefetchRead)
macro(PrefetchWrite)
macro(Proj)
......
......@@ -1590,7 +1590,7 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
prefetch_adr = new (C, 4) AddPNode( old_pf_wm, new_pf_wmt,
_igvn.MakeConX(distance) );
transform_later(prefetch_adr);
prefetch = new (C, 3) PrefetchWriteNode( i_o, prefetch_adr );
prefetch = new (C, 3) PrefetchAllocationNode( i_o, prefetch_adr );
transform_later(prefetch);
distance += step_size;
i_o = prefetch;
......@@ -1611,13 +1611,14 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
contended_phi_rawmem = pf_phi_rawmem;
i_o = pf_phi_abio;
} else if( UseTLAB && AllocatePrefetchStyle == 3 ) {
// Insert a prefetch for each allocation only on the fast-path
// Insert a prefetch for each allocation.
// This code is used for Sparc with BIS.
Node *pf_region = new (C, 3) RegionNode(3);
Node *pf_phi_rawmem = new (C, 3) PhiNode( pf_region, Type::MEMORY,
TypeRawPtr::BOTTOM );
// Generate several prefetch instructions only for arrays.
uint lines = (length != NULL) ? AllocatePrefetchLines : 1;
// Generate several prefetch instructions.
uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
uint step_size = AllocatePrefetchStepSize;
uint distance = AllocatePrefetchDistance;
......@@ -1634,7 +1635,7 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
transform_later(cache_adr);
// Prefetch
Node *prefetch = new (C, 3) PrefetchWriteNode( contended_phi_rawmem, cache_adr );
Node *prefetch = new (C, 3) PrefetchAllocationNode( contended_phi_rawmem, cache_adr );
prefetch->set_req(0, needgc_false);
transform_later(prefetch);
contended_phi_rawmem = prefetch;
......@@ -1644,7 +1645,7 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
prefetch_adr = new (C, 4) AddPNode( cache_adr, cache_adr,
_igvn.MakeConX(distance) );
transform_later(prefetch_adr);
prefetch = new (C, 3) PrefetchWriteNode( contended_phi_rawmem, prefetch_adr );
prefetch = new (C, 3) PrefetchAllocationNode( contended_phi_rawmem, prefetch_adr );
transform_later(prefetch);
distance += step_size;
contended_phi_rawmem = prefetch;
......@@ -1653,15 +1654,15 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
// Insert a prefetch for each allocation only on the fast-path
Node *prefetch_adr;
Node *prefetch;
// Generate several prefetch instructions only for arrays.
uint lines = (length != NULL) ? AllocatePrefetchLines : 1;
// Generate several prefetch instructions.
uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
uint step_size = AllocatePrefetchStepSize;
uint distance = AllocatePrefetchDistance;
for ( uint i = 0; i < lines; i++ ) {
prefetch_adr = new (C, 4) AddPNode( old_eden_top, new_eden_top,
_igvn.MakeConX(distance) );
transform_later(prefetch_adr);
prefetch = new (C, 3) PrefetchWriteNode( i_o, prefetch_adr );
prefetch = new (C, 3) PrefetchAllocationNode( i_o, prefetch_adr );
// Do not let it float too high, since if eden_top == eden_end,
// both might be null.
if( i == 0 ) { // Set control for first prefetch, next follows it
......
......@@ -826,6 +826,7 @@ static void match_alias_type(Compile* C, Node* n, Node* m) {
switch (n->Opcode()) {
case Op_PrefetchRead:
case Op_PrefetchWrite:
case Op_PrefetchAllocation:
nidx = Compile::AliasIdxRaw;
nat = TypeRawPtr::BOTTOM;
break;
......
......@@ -1278,6 +1278,16 @@ public:
virtual int Opcode() const;
virtual uint ideal_reg() const { return NotAMachineReg; }
virtual uint match_edge(uint idx) const { return idx==2; }
virtual const Type *bottom_type() const { return Type::ABIO; }
};
// Allocation prefetch which may fault, TLAB size have to be adjusted.
class PrefetchAllocationNode : public Node {
public:
PrefetchAllocationNode(Node *mem, Node *adr) : Node(0,mem,adr) {}
virtual int Opcode() const;
virtual uint ideal_reg() const { return NotAMachineReg; }
virtual uint match_edge(uint idx) const { return idx==2; }
virtual const Type *bottom_type() const { return ( AllocatePrefetchStyle == 3 ) ? Type::MEMORY : Type::ABIO; }
};
......
......@@ -2897,8 +2897,11 @@ class CommandLineFlags {
product(intx, AllocatePrefetchDistance, -1, \
"Distance to prefetch ahead of allocation pointer") \
\
product(intx, AllocatePrefetchLines, 1, \
"Number of lines to prefetch ahead of allocation pointer") \
product(intx, AllocatePrefetchLines, 3, \
"Number of lines to prefetch ahead of array allocation pointer") \
\
product(intx, AllocateInstancePrefetchLines, 1, \
"Number of lines to prefetch ahead of instance allocation pointer") \
\
product(intx, AllocatePrefetchStepSize, 16, \
"Step size in bytes of sequential prefetch instructions") \
......
......@@ -46,6 +46,7 @@ const char* Abstract_VM_Version::_s_vm_release = Abstract_VM_Version::vm_release
const char* Abstract_VM_Version::_s_internal_vm_info_string = Abstract_VM_Version::internal_vm_info_string();
bool Abstract_VM_Version::_supports_cx8 = false;
unsigned int Abstract_VM_Version::_logical_processors_per_package = 1U;
int Abstract_VM_Version::_reserve_for_allocation_prefetch = 0;
#ifndef HOTSPOT_RELEASE_VERSION
#error HOTSPOT_RELEASE_VERSION must be defined
......
......@@ -44,6 +44,7 @@ class Abstract_VM_Version: AllStatic {
static bool _initialized;
static int _parallel_worker_threads;
static bool _parallel_worker_threads_initialized;
static int _reserve_for_allocation_prefetch;
static unsigned int nof_parallel_worker_threads(unsigned int num,
unsigned int dem,
......@@ -77,6 +78,12 @@ class Abstract_VM_Version: AllStatic {
return _logical_processors_per_package;
}
// Need a space at the end of TLAB for prefetch instructions
// which may fault when accessing memory outside of heap.
static int reserve_for_allocation_prefetch() {
return _reserve_for_allocation_prefetch;
}
// ARCH specific policy for the BiasedLocking
static bool use_biased_locking() { return true; }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册