diff --git a/src/cpu/sparc/vm/sparc.ad b/src/cpu/sparc/vm/sparc.ad index 0243793479a38d516370456face93ec95db2b5d8..7f495a9569f1dec7615b0541229900865cde20ed 100644 --- a/src/cpu/sparc/vm/sparc.ad +++ b/src/cpu/sparc/vm/sparc.ad @@ -471,6 +471,9 @@ extern bool can_branch_register( Node *bol, Node *cmp ); source %{ #define __ _masm. +// Block initializing store +#define ASI_BLK_INIT_QUAD_LDD_P 0xE2 + // tertiary op of a LoadP or StoreP encoding #define REGP_OP true @@ -6147,6 +6150,7 @@ instruct prefetchr( memory mem ) %{ %} instruct prefetchw( memory mem ) %{ + predicate(AllocatePrefetchStyle != 3 ); match( PrefetchWrite mem ); ins_cost(MEMORY_REF_COST); @@ -6156,6 +6160,23 @@ instruct prefetchw( memory mem ) %{ ins_pipe(iload_mem); %} +// Use BIS instruction to prefetch. +instruct prefetchw_bis( memory mem ) %{ + predicate(AllocatePrefetchStyle == 3); + match( PrefetchWrite mem ); + ins_cost(MEMORY_REF_COST); + + format %{ "STXA G0,$mem\t! // Block initializing store" %} + ins_encode %{ + Register base = as_Register($mem$$base); + int disp = $mem$$disp; + if (disp != 0) { + __ add(base, AllocatePrefetchStepSize, base); + } + __ stxa(G0, base, G0, ASI_BLK_INIT_QUAD_LDD_P); + %} + ins_pipe(istore_mem_reg); +%} //----------Store Instructions------------------------------------------------- // Store Byte diff --git a/src/cpu/sparc/vm/vm_version_sparc.cpp b/src/cpu/sparc/vm/vm_version_sparc.cpp index fb0cfdc5ea046e68b8649f7619faef58b2b1b5e1..9458c1c012f0d861372c9f4afb58c0536dfa3fc9 100644 --- a/src/cpu/sparc/vm/vm_version_sparc.cpp +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp @@ -86,9 +86,19 @@ void VM_Version::initialize() { if (FLAG_IS_DEFAULT(InteriorEntryAlignment)) { FLAG_SET_DEFAULT(InteriorEntryAlignment, 4); } - if (is_niagara1_plus() && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { - // Use smaller prefetch distance on N2 - FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256); + if (is_niagara1_plus()) { + if (AllocatePrefetchStyle > 0 && FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { + // Use BIS instruction for allocation prefetch. + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3); + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + // Use smaller prefetch distance on N2 with BIS + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64); + } + } + if (AllocatePrefetchStyle != 3 && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + // Use different prefetch distance without BIS + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256); + } } #endif if (FLAG_IS_DEFAULT(OptoLoopAlignment)) { diff --git a/src/share/vm/memory/threadLocalAllocBuffer.hpp b/src/share/vm/memory/threadLocalAllocBuffer.hpp index 8007cb497a54a756deb5c7726523199c3989c03f..0b903ffa5a9794a7d1c1976e64abe3c028cd3d08 100644 --- a/src/share/vm/memory/threadLocalAllocBuffer.hpp +++ b/src/share/vm/memory/threadLocalAllocBuffer.hpp @@ -111,7 +111,22 @@ public: // Allocate size HeapWords. The memory is NOT initialized to zero. inline HeapWord* allocate(size_t size); - static size_t alignment_reserve() { return align_object_size(typeArrayOopDesc::header_size(T_INT)); } + + // Reserve space at the end of TLAB + static size_t end_reserve() { + int reserve_size = typeArrayOopDesc::header_size(T_INT); + if (AllocatePrefetchStyle == 3) { + // BIS is used to prefetch - we need a space for it. + // +1 for rounding up to next cache line +1 to be safe + int lines = AllocatePrefetchLines + 2; + int step_size = AllocatePrefetchStepSize; + int distance = AllocatePrefetchDistance; + int prefetch_end = (distance + step_size*lines)/(int)HeapWordSize; + reserve_size = MAX2(reserve_size, prefetch_end); + } + return reserve_size; + } + static size_t alignment_reserve() { return align_object_size(end_reserve()); } static size_t alignment_reserve_in_bytes() { return alignment_reserve() * HeapWordSize; } // Return tlab size or remaining space in eden such that the diff --git a/src/share/vm/opto/macro.cpp b/src/share/vm/opto/macro.cpp index 2fdc335b918d25c8554b2a6a0049257d90f4349f..32046b07ad722e092e25a8963390ca4c7e8989b8 100644 --- a/src/share/vm/opto/macro.cpp +++ b/src/share/vm/opto/macro.cpp @@ -1487,11 +1487,11 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false, Node*& contended_phi_rawmem, Node* old_eden_top, Node* new_eden_top, Node* length) { + enum { fall_in_path = 1, pf_path = 2 }; if( UseTLAB && AllocatePrefetchStyle == 2 ) { // Generate prefetch allocation with watermark check. // As an allocation hits the watermark, we will prefetch starting // at a "distance" away from watermark. - enum { fall_in_path = 1, pf_path = 2 }; Node *pf_region = new (C, 3) RegionNode(3); Node *pf_phi_rawmem = new (C, 3) PhiNode( pf_region, Type::MEMORY, @@ -1570,6 +1570,45 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false, needgc_false = pf_region; contended_phi_rawmem = pf_phi_rawmem; i_o = pf_phi_abio; + } else if( UseTLAB && AllocatePrefetchStyle == 3 ) { + // Insert a prefetch for each allocation only on the fast-path + Node *pf_region = new (C, 3) RegionNode(3); + Node *pf_phi_rawmem = new (C, 3) PhiNode( pf_region, Type::MEMORY, + TypeRawPtr::BOTTOM ); + + // Generate several prefetch instructions only for arrays. + uint lines = (length != NULL) ? AllocatePrefetchLines : 1; + uint step_size = AllocatePrefetchStepSize; + uint distance = AllocatePrefetchDistance; + + // Next cache address. + Node *cache_adr = new (C, 4) AddPNode(old_eden_top, old_eden_top, + _igvn.MakeConX(distance)); + transform_later(cache_adr); + cache_adr = new (C, 2) CastP2XNode(needgc_false, cache_adr); + transform_later(cache_adr); + Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1)); + cache_adr = new (C, 3) AndXNode(cache_adr, mask); + transform_later(cache_adr); + cache_adr = new (C, 2) CastX2PNode(cache_adr); + transform_later(cache_adr); + + // Prefetch + Node *prefetch = new (C, 3) PrefetchWriteNode( contended_phi_rawmem, cache_adr ); + prefetch->set_req(0, needgc_false); + transform_later(prefetch); + contended_phi_rawmem = prefetch; + Node *prefetch_adr; + distance = step_size; + for ( uint i = 1; i < lines; i++ ) { + prefetch_adr = new (C, 4) AddPNode( cache_adr, cache_adr, + _igvn.MakeConX(distance) ); + transform_later(prefetch_adr); + prefetch = new (C, 3) PrefetchWriteNode( contended_phi_rawmem, prefetch_adr ); + transform_later(prefetch); + distance += step_size; + contended_phi_rawmem = prefetch; + } } else if( AllocatePrefetchStyle > 0 ) { // Insert a prefetch for each allocation only on the fast-path Node *prefetch_adr; diff --git a/src/share/vm/opto/memnode.hpp b/src/share/vm/opto/memnode.hpp index 918a8353310ddf03779e5d33fbc220af2f89cece..86ee853b5aefbc3ac2de6a66c8b80a9816fe1023 100644 --- a/src/share/vm/opto/memnode.hpp +++ b/src/share/vm/opto/memnode.hpp @@ -1244,5 +1244,5 @@ public: virtual int Opcode() const; virtual uint ideal_reg() const { return NotAMachineReg; } virtual uint match_edge(uint idx) const { return idx==2; } - virtual const Type *bottom_type() const { return Type::ABIO; } + virtual const Type *bottom_type() const { return ( AllocatePrefetchStyle == 3 ) ? Type::MEMORY : Type::ABIO; } }; diff --git a/src/share/vm/runtime/globals.hpp b/src/share/vm/runtime/globals.hpp index b4991f62dd1a32bceba752776437024b40404dec..861d89cb6997371867a525eb34d6a81de3fd60c1 100644 --- a/src/share/vm/runtime/globals.hpp +++ b/src/share/vm/runtime/globals.hpp @@ -2708,7 +2708,8 @@ class CommandLineFlags { product(intx, AllocatePrefetchStyle, 1, \ "0 = no prefetch, " \ "1 = prefetch instructions for each allocation, " \ - "2 = use TLAB watermark to gate allocation prefetch") \ + "2 = use TLAB watermark to gate allocation prefetch, " \ + "3 = use BIS instruction on Sparc for allocation prefetch") \ \ product(intx, AllocatePrefetchDistance, -1, \ "Distance to prefetch ahead of allocation pointer") \