From 4fec4ec72c921787231525c02b444059b7bfc246 Mon Sep 17 00:00:00 2001 From: goetz Date: Tue, 26 Nov 2013 18:38:19 -0800 Subject: [PATCH] 8028515: PPPC64 (part 113.2): opto: Introduce LoadFence/StoreFence. Summary: Use new nodes for loadFence/storeFence intrinsics in C2. Reviewed-by: kvn, dholmes --- make/jprt.properties | 72 ++++++++++++++++++++++++++++++ src/cpu/sparc/vm/sparc.ad | 2 + src/cpu/x86/vm/x86_32.ad | 2 + src/cpu/x86/vm/x86_64.ad | 2 + src/share/vm/adlc/formssel.cpp | 14 +++--- src/share/vm/opto/classes.hpp | 2 + src/share/vm/opto/library_call.cpp | 4 +- src/share/vm/opto/matcher.cpp | 4 +- src/share/vm/opto/memnode.cpp | 30 ++++++++----- src/share/vm/opto/memnode.hpp | 22 +++++++++ src/share/vm/runtime/vmStructs.cpp | 2 + 11 files changed, 135 insertions(+), 21 deletions(-) diff --git a/make/jprt.properties b/make/jprt.properties index 9109ded6f..a0548f2b3 100644 --- a/make/jprt.properties +++ b/make/jprt.properties @@ -329,9 +329,81 @@ jprt.my.linux.i586.test.targets.embedded = \ # The complete list of test targets for jprt # Note: no PPC or ARM tests at this stage +jprt.my.linux.armvfpsflt.test.targets.embedded = \ + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-scimark, \ + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_default, \ + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_SerialGC, \ + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_ParallelGC, \ + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_ParNewGC, \ + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_CMS, \ + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_G1, \ + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_ParOldGC, \ + linux_armvfpsflt_2.6-productEmb-{c1|c2}-GCOld_default, \ + linux_armvfpsflt_2.6-productEmb-{c1|c2}-GCOld_SerialGC, \ + linux_armvfpsflt_2.6-productEmb-{c1|c2}-GCOld_ParallelGC, \ + linux_armvfpsflt_2.6-productEmb-{c1|c2}-GCOld_ParNewGC, \ + linux_armvfpsflt_2.6-productEmb-{c1|c2}-GCOld_CMS, \ + linux_armvfpsflt_2.6-productEmb-{c1|c2}-GCOld_G1, \ + linux_armvfpsflt_2.6-productEmb-{c1|c2}-GCOld_ParOldGC, \ + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_default, \ + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-c2-jbb_default_nontiered, \ + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_ParallelGC, \ + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_CMS, \ + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_G1, \ + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_ParOldGC + +# QEMU Emulators for ARM VFP HFLT +jprt.my.linux.armvfphflt.test.targets.embedded = \ + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-scimark, \ + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_default, \ + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_SerialGC, \ + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_ParallelGC, \ + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_ParNewGC, \ + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_CMS, \ + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_G1, \ + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_ParOldGC, \ + linux_armvfphflt_2.6-productEmb-{c1|c2}-GCOld_default, \ + linux_armvfphflt_2.6-productEmb-{c1|c2}-GCOld_SerialGC, \ + linux_armvfphflt_2.6-productEmb-{c1|c2}-GCOld_ParallelGC, \ + linux_armvfphflt_2.6-productEmb-{c1|c2}-GCOld_ParNewGC, \ + linux_armvfphflt_2.6-productEmb-{c1|c2}-GCOld_CMS, \ + linux_armvfphflt_2.6-productEmb-{c1|c2}-GCOld_G1, \ + linux_armvfphflt_2.6-productEmb-{c1|c2}-GCOld_ParOldGC, \ + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_default, \ + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-c2-jbb_default_nontiered, \ + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_ParallelGC, \ + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_CMS, \ + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_G1, \ + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_ParOldGC + +jprt.my.linux.ppc.test.targets.embedded = \ + linux_ppc_2.6-{productEmb|fastdebugEmb}-{c1|c2}-scimark, \ + linux_ppc_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_default, \ + linux_ppc_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_SerialGC, \ + linux_ppc_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_ParallelGC, \ + linux_ppc_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_ParNewGC, \ + linux_ppc_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_CMS, \ + linux_ppc_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_G1, \ + linux_ppc_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_ParOldGC, \ + linux_ppc_2.6-productEmb-{c1|c2}-GCOld_default, \ + linux_ppc_2.6-productEmb-{c1|c2}-GCOld_SerialGC, \ + linux_ppc_2.6-productEmb-{c1|c2}-GCOld_ParallelGC, \ + linux_ppc_2.6-productEmb-{c1|c2}-GCOld_ParNewGC, \ + linux_ppc_2.6-productEmb-{c1|c2}-GCOld_CMS, \ + linux_ppc_2.6-productEmb-{c1|c2}-GCOld_G1, \ + linux_ppc_2.6-productEmb-{c1|c2}-GCOld_ParOldGC, \ + linux_ppc_2.6-{productEmb|fastdebugEmb}-c1-jbb_default, \ + linux_ppc_2.6-{productEmb|fastdebugEmb}-c2-jbb_default_nontiered, \ + linux_ppc_2.6-{productEmb|fastdebugEmb}-c1-jbb_ParallelGC, \ + linux_ppc_2.6-{productEmb|fastdebugEmb}-c1-jbb_CMS, \ + linux_ppc_2.6-{productEmb|fastdebugEmb}-c1-jbb_G1, \ + linux_ppc_2.6-{productEmb|fastdebugEmb}-c1-jbb_ParOldGC jprt.test.targets.standard = \ ${jprt.my.linux.i586.test.targets.embedded}, \ + ${jprt.my.linux.armvfpsflt.test.targets.embedded}, \ + ${jprt.my.linux.armvfphflt.test.targets.embedded}, \ + ${jprt.my.linux.ppc.test.targets.embedded}, \ ${jprt.my.solaris.sparcv9.test.targets}, \ ${jprt.my.solaris.x64.test.targets}, \ ${jprt.my.linux.i586.test.targets}, \ diff --git a/src/cpu/sparc/vm/sparc.ad b/src/cpu/sparc/vm/sparc.ad index a84af2e8e..8dfe76b5a 100644 --- a/src/cpu/sparc/vm/sparc.ad +++ b/src/cpu/sparc/vm/sparc.ad @@ -6651,6 +6651,7 @@ instruct decodeKlass_not_null(iRegP dst, iRegN src) %{ instruct membar_acquire() %{ match(MemBarAcquire); + match(LoadFence); ins_cost(4*MEMORY_REF_COST); size(0); @@ -6671,6 +6672,7 @@ instruct membar_acquire_lock() %{ instruct membar_release() %{ match(MemBarRelease); + match(StoreFence); ins_cost(4*MEMORY_REF_COST); size(0); diff --git a/src/cpu/x86/vm/x86_32.ad b/src/cpu/x86/vm/x86_32.ad index 00a9861df..e9d34e948 100644 --- a/src/cpu/x86/vm/x86_32.ad +++ b/src/cpu/x86/vm/x86_32.ad @@ -7096,6 +7096,7 @@ instruct storeSSL(stackSlotL dst, eRegL src) %{ instruct membar_acquire() %{ match(MemBarAcquire); + match(LoadFence); ins_cost(400); size(0); @@ -7116,6 +7117,7 @@ instruct membar_acquire_lock() %{ instruct membar_release() %{ match(MemBarRelease); + match(StoreFence); ins_cost(400); size(0); diff --git a/src/cpu/x86/vm/x86_64.ad b/src/cpu/x86/vm/x86_64.ad index b94f5a542..12280ddb1 100644 --- a/src/cpu/x86/vm/x86_64.ad +++ b/src/cpu/x86/vm/x86_64.ad @@ -6345,6 +6345,7 @@ instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{ instruct membar_acquire() %{ match(MemBarAcquire); + match(LoadFence); ins_cost(0); size(0); @@ -6367,6 +6368,7 @@ instruct membar_acquire_lock() instruct membar_release() %{ match(MemBarRelease); + match(StoreFence); ins_cost(0); size(0); diff --git a/src/share/vm/adlc/formssel.cpp b/src/share/vm/adlc/formssel.cpp index b20835815..904b888c2 100644 --- a/src/share/vm/adlc/formssel.cpp +++ b/src/share/vm/adlc/formssel.cpp @@ -648,6 +648,8 @@ bool InstructForm::is_wide_memory_kill(FormDict &globals) const { if( strcmp(_matrule->_opType,"MemBarReleaseLock") == 0 ) return true; if( strcmp(_matrule->_opType,"MemBarAcquireLock") == 0 ) return true; if( strcmp(_matrule->_opType,"MemBarStoreStore") == 0 ) return true; + if( strcmp(_matrule->_opType,"StoreFence") == 0 ) return true; + if( strcmp(_matrule->_opType,"LoadFence") == 0 ) return true; return false; } @@ -4054,13 +4056,15 @@ bool MatchRule::is_ideal_fastlock() const { bool MatchRule::is_ideal_membar() const { if( !_opType ) return false; return - !strcmp(_opType,"MemBarAcquire" ) || - !strcmp(_opType,"MemBarRelease" ) || + !strcmp(_opType,"MemBarAcquire") || + !strcmp(_opType,"MemBarRelease") || !strcmp(_opType,"MemBarAcquireLock") || !strcmp(_opType,"MemBarReleaseLock") || - !strcmp(_opType,"MemBarVolatile" ) || - !strcmp(_opType,"MemBarCPUOrder" ) || - !strcmp(_opType,"MemBarStoreStore" ); + !strcmp(_opType,"LoadFence" ) || + !strcmp(_opType,"StoreFence") || + !strcmp(_opType,"MemBarVolatile") || + !strcmp(_opType,"MemBarCPUOrder") || + !strcmp(_opType,"MemBarStoreStore"); } bool MatchRule::is_ideal_loadPC() const { diff --git a/src/share/vm/opto/classes.hpp b/src/share/vm/opto/classes.hpp index 002d2db63..79aa1b8b6 100644 --- a/src/share/vm/opto/classes.hpp +++ b/src/share/vm/opto/classes.hpp @@ -175,9 +175,11 @@ macro(MathExactI) macro(MathExactL) macro(MaxI) macro(MemBarAcquire) +macro(LoadFence) macro(MemBarAcquireLock) macro(MemBarCPUOrder) macro(MemBarRelease) +macro(StoreFence) macro(MemBarReleaseLock) macro(MemBarVolatile) macro(MemBarStoreStore) diff --git a/src/share/vm/opto/library_call.cpp b/src/share/vm/opto/library_call.cpp index 14f7015bd..8ad2fb7cf 100644 --- a/src/share/vm/opto/library_call.cpp +++ b/src/share/vm/opto/library_call.cpp @@ -3105,10 +3105,10 @@ bool LibraryCallKit::inline_unsafe_fence(vmIntrinsics::ID id) { insert_mem_bar(Op_MemBarCPUOrder); switch(id) { case vmIntrinsics::_loadFence: - insert_mem_bar(Op_MemBarAcquire); + insert_mem_bar(Op_LoadFence); return true; case vmIntrinsics::_storeFence: - insert_mem_bar(Op_MemBarRelease); + insert_mem_bar(Op_StoreFence); return true; case vmIntrinsics::_fullFence: insert_mem_bar(Op_MemBarVolatile); diff --git a/src/share/vm/opto/matcher.cpp b/src/share/vm/opto/matcher.cpp index 87eb22446..708711e2a 100644 --- a/src/share/vm/opto/matcher.cpp +++ b/src/share/vm/opto/matcher.cpp @@ -2333,7 +2333,7 @@ void Matcher::validate_null_checks( ) { bool Matcher::post_store_load_barrier(const Node* vmb) { Compile* C = Compile::current(); assert(vmb->is_MemBar(), ""); - assert(vmb->Opcode() != Op_MemBarAcquire, ""); + assert(vmb->Opcode() != Op_MemBarAcquire && vmb->Opcode() != Op_LoadFence, ""); const MemBarNode* membar = vmb->as_MemBar(); // Get the Ideal Proj node, ctrl, that can be used to iterate forward @@ -2378,7 +2378,7 @@ bool Matcher::post_store_load_barrier(const Node* vmb) { if (x->is_MemBar()) { // We must retain this membar if there is an upcoming volatile // load, which will be followed by acquire membar. - if (xop == Op_MemBarAcquire) { + if (xop == Op_MemBarAcquire || xop == Op_LoadFence) { return false; } else { // For other kinds of barriers, check by pretending we diff --git a/src/share/vm/opto/memnode.cpp b/src/share/vm/opto/memnode.cpp index db9568624..939827b89 100644 --- a/src/share/vm/opto/memnode.cpp +++ b/src/share/vm/opto/memnode.cpp @@ -1002,9 +1002,13 @@ Node* MemNode::can_see_stored_value(Node* st, PhaseTransform* phase) const { // a synchronized region. while (current->is_Proj()) { int opc = current->in(0)->Opcode(); - if ((final && (opc == Op_MemBarAcquire || opc == Op_MemBarAcquireLock)) || - opc == Op_MemBarRelease || opc == Op_MemBarCPUOrder || - opc == Op_MemBarReleaseLock) { + if ((final && (opc == Op_MemBarAcquire || + opc == Op_MemBarAcquireLock || + opc == Op_LoadFence)) || + opc == Op_MemBarRelease || + opc == Op_StoreFence || + opc == Op_MemBarReleaseLock || + opc == Op_MemBarCPUOrder) { Node* mem = current->in(0)->in(TypeFunc::Memory); if (mem->is_MergeMem()) { MergeMemNode* merge = mem->as_MergeMem(); @@ -2973,15 +2977,17 @@ uint MemBarNode::cmp( const Node &n ) const { //------------------------------make------------------------------------------- MemBarNode* MemBarNode::make(Compile* C, int opcode, int atp, Node* pn) { switch (opcode) { - case Op_MemBarAcquire: return new(C) MemBarAcquireNode(C, atp, pn); - case Op_MemBarRelease: return new(C) MemBarReleaseNode(C, atp, pn); - case Op_MemBarAcquireLock: return new(C) MemBarAcquireLockNode(C, atp, pn); - case Op_MemBarReleaseLock: return new(C) MemBarReleaseLockNode(C, atp, pn); - case Op_MemBarVolatile: return new(C) MemBarVolatileNode(C, atp, pn); - case Op_MemBarCPUOrder: return new(C) MemBarCPUOrderNode(C, atp, pn); - case Op_Initialize: return new(C) InitializeNode(C, atp, pn); - case Op_MemBarStoreStore: return new(C) MemBarStoreStoreNode(C, atp, pn); - default: ShouldNotReachHere(); return NULL; + case Op_MemBarAcquire: return new(C) MemBarAcquireNode(C, atp, pn); + case Op_LoadFence: return new(C) LoadFenceNode(C, atp, pn); + case Op_MemBarRelease: return new(C) MemBarReleaseNode(C, atp, pn); + case Op_StoreFence: return new(C) StoreFenceNode(C, atp, pn); + case Op_MemBarAcquireLock: return new(C) MemBarAcquireLockNode(C, atp, pn); + case Op_MemBarReleaseLock: return new(C) MemBarReleaseLockNode(C, atp, pn); + case Op_MemBarVolatile: return new(C) MemBarVolatileNode(C, atp, pn); + case Op_MemBarCPUOrder: return new(C) MemBarCPUOrderNode(C, atp, pn); + case Op_Initialize: return new(C) InitializeNode(C, atp, pn); + case Op_MemBarStoreStore: return new(C) MemBarStoreStoreNode(C, atp, pn); + default: ShouldNotReachHere(); return NULL; } } diff --git a/src/share/vm/opto/memnode.hpp b/src/share/vm/opto/memnode.hpp index 750a79af8..7c92e1c5a 100644 --- a/src/share/vm/opto/memnode.hpp +++ b/src/share/vm/opto/memnode.hpp @@ -994,6 +994,17 @@ public: virtual int Opcode() const; }; +// "Acquire" - no following ref can move before (but earlier refs can +// follow, like an early Load stalled in cache). Requires multi-cpu +// visibility. Inserted independ of any load, as required +// for intrinsic sun.misc.Unsafe.loadFence(). +class LoadFenceNode: public MemBarNode { +public: + LoadFenceNode(Compile* C, int alias_idx, Node* precedent) + : MemBarNode(C, alias_idx, precedent) {} + virtual int Opcode() const; +}; + // "Release" - no earlier ref can move after (but later refs can move // up, like a speculative pipelined cache-hitting Load). Requires // multi-cpu visibility. Inserted before a volatile store. @@ -1004,6 +1015,17 @@ public: virtual int Opcode() const; }; +// "Release" - no earlier ref can move after (but later refs can move +// up, like a speculative pipelined cache-hitting Load). Requires +// multi-cpu visibility. Inserted independent of any store, as required +// for intrinsic sun.misc.Unsafe.storeFence(). +class StoreFenceNode: public MemBarNode { +public: + StoreFenceNode(Compile* C, int alias_idx, Node* precedent) + : MemBarNode(C, alias_idx, precedent) {} + virtual int Opcode() const; +}; + // "Acquire" - no following ref can move before (but earlier refs can // follow, like an early Load stalled in cache). Requires multi-cpu // visibility. Inserted after a FastLock. diff --git a/src/share/vm/runtime/vmStructs.cpp b/src/share/vm/runtime/vmStructs.cpp index 1867ba513..b67187c6e 100644 --- a/src/share/vm/runtime/vmStructs.cpp +++ b/src/share/vm/runtime/vmStructs.cpp @@ -1820,6 +1820,8 @@ typedef BinaryTreeDictionary MetablockTreeDictionary; declare_c2_type(MemBarNode, MultiNode) \ declare_c2_type(MemBarAcquireNode, MemBarNode) \ declare_c2_type(MemBarReleaseNode, MemBarNode) \ + declare_c2_type(LoadFenceNode, MemBarNode) \ + declare_c2_type(StoreFenceNode, MemBarNode) \ declare_c2_type(MemBarVolatileNode, MemBarNode) \ declare_c2_type(MemBarCPUOrderNode, MemBarNode) \ declare_c2_type(InitializeNode, MemBarNode) \ -- GitLab