From 8eda69f2bbe185c2a55467349c033ca859e612ef Mon Sep 17 00:00:00 2001 From: goetz Date: Thu, 16 Jan 2014 14:25:51 +0100 Subject: [PATCH] 8029101: PPC64 (part 211): ordering of Independent Reads of Independent Writes Reviewed-by: dholmes, kvn Contributed-by: martin.doerr@sap.com --- src/cpu/ppc/vm/globalDefinitions_ppc.hpp | 3 +++ src/share/vm/interpreter/bytecodeInterpreter.cpp | 3 +++ src/share/vm/opto/library_call.cpp | 16 ++++++++++++---- src/share/vm/opto/parse.hpp | 5 ++++- src/share/vm/opto/parse1.cpp | 9 ++++++++- src/share/vm/opto/parse3.cpp | 14 +++++++++++++- src/share/vm/prims/unsafe.cpp | 3 +++ src/share/vm/utilities/globalDefinitions.hpp | 11 +++++++++++ 8 files changed, 57 insertions(+), 7 deletions(-) diff --git a/src/cpu/ppc/vm/globalDefinitions_ppc.hpp b/src/cpu/ppc/vm/globalDefinitions_ppc.hpp index aa8b89f90..4cd5efdd4 100644 --- a/src/cpu/ppc/vm/globalDefinitions_ppc.hpp +++ b/src/cpu/ppc/vm/globalDefinitions_ppc.hpp @@ -37,4 +37,7 @@ const int StackAlignmentInBytes = 16; // signatures accordingly. const bool CCallingConventionRequiresIntsAsLongs = true; +// The PPC CPUs are NOT multiple-copy-atomic. +#define CPU_NOT_MULTIPLE_COPY_ATOMIC + #endif // CPU_PPC_VM_GLOBALDEFINITIONS_PPC_HPP diff --git a/src/share/vm/interpreter/bytecodeInterpreter.cpp b/src/share/vm/interpreter/bytecodeInterpreter.cpp index 73b4886a3..73c1575cd 100644 --- a/src/share/vm/interpreter/bytecodeInterpreter.cpp +++ b/src/share/vm/interpreter/bytecodeInterpreter.cpp @@ -2034,6 +2034,9 @@ run: TosState tos_type = cache->flag_state(); int field_offset = cache->f2_as_index(); if (cache->is_volatile()) { + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + OrderAccess::fence(); + } if (tos_type == atos) { VERIFY_OOP(obj->obj_field_acquire(field_offset)); SET_STACK_OBJECT(obj->obj_field_acquire(field_offset), -1); diff --git a/src/share/vm/opto/library_call.cpp b/src/share/vm/opto/library_call.cpp index 8ad2fb7cf..66141a1f9 100644 --- a/src/share/vm/opto/library_call.cpp +++ b/src/share/vm/opto/library_call.cpp @@ -2627,8 +2627,13 @@ bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, Bas // rough approximation of type. need_mem_bar = true; // For Stores, place a memory ordering barrier now. - if (is_store) + if (is_store) { insert_mem_bar(Op_MemBarRelease); + } else { + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + insert_mem_bar(Op_MemBarVolatile); + } + } } // Memory barrier to prevent normal and 'unsafe' accesses from @@ -2717,10 +2722,13 @@ bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, Bas } if (is_volatile) { - if (!is_store) + if (!is_store) { insert_mem_bar(Op_MemBarAcquire); - else - insert_mem_bar(Op_MemBarVolatile); + } else { + if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { + insert_mem_bar(Op_MemBarVolatile); + } + } } if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder); diff --git a/src/share/vm/opto/parse.hpp b/src/share/vm/opto/parse.hpp index f60e6a540..5a599e77c 100644 --- a/src/share/vm/opto/parse.hpp +++ b/src/share/vm/opto/parse.hpp @@ -330,7 +330,8 @@ class Parse : public GraphKit { GraphKit _exits; // Record all normal returns and throws here. bool _wrote_final; // Did we write a final field? - bool _count_invocations; // update and test invocation counter + bool _wrote_volatile; // Did we write a volatile field? + bool _count_invocations; // update and test invocation counter bool _method_data_update; // update method data oop Node* _alloc_with_final; // An allocation node with final field @@ -373,6 +374,8 @@ class Parse : public GraphKit { GraphKit& exits() { return _exits; } bool wrote_final() const { return _wrote_final; } void set_wrote_final(bool z) { _wrote_final = z; } + bool wrote_volatile() const { return _wrote_volatile; } + void set_wrote_volatile(bool z) { _wrote_volatile = z; } bool count_invocations() const { return _count_invocations; } bool method_data_update() const { return _method_data_update; } Node* alloc_with_final() const { return _alloc_with_final; } diff --git a/src/share/vm/opto/parse1.cpp b/src/share/vm/opto/parse1.cpp index ac432ea3e..81e1c96c7 100644 --- a/src/share/vm/opto/parse1.cpp +++ b/src/share/vm/opto/parse1.cpp @@ -390,6 +390,7 @@ Parse::Parse(JVMState* caller, ciMethod* parse_method, float expected_uses, Pars _expected_uses = expected_uses; _depth = 1 + (caller->has_method() ? caller->depth() : 0); _wrote_final = false; + _wrote_volatile = false; _alloc_with_final = NULL; _entry_bci = InvocationEntryBci; _tf = NULL; @@ -907,7 +908,13 @@ void Parse::do_exits() { Node* iophi = _exits.i_o(); _exits.set_i_o(gvn().transform(iophi)); - if (wrote_final()) { + // On PPC64, also add MemBarRelease for constructors which write + // volatile fields. As support_IRIW_for_not_multiple_copy_atomic_cpu + // is set on PPC64, no sync instruction is issued after volatile + // stores. We want to quarantee the same behaviour as on platforms + // with total store order, although this is not required by the Java + // memory model. So as with finals, we add a barrier here. + if (wrote_final() PPC64_ONLY(|| (wrote_volatile() && method()->is_initializer()))) { // This method (which must be a constructor by the rules of Java) // wrote a final. The effects of all initializations must be // committed to memory before any code after the constructor diff --git a/src/share/vm/opto/parse3.cpp b/src/share/vm/opto/parse3.cpp index a1b0929af..3deb9f426 100644 --- a/src/share/vm/opto/parse3.cpp +++ b/src/share/vm/opto/parse3.cpp @@ -227,6 +227,9 @@ void Parse::do_get_xxx(Node* obj, ciField* field, bool is_field) { } else { type = Type::get_const_basic_type(bt); } + if (support_IRIW_for_not_multiple_copy_atomic_cpu && field->is_volatile()) { + insert_mem_bar(Op_MemBarVolatile); // StoreLoad barrier + } // Build the load. // MemNode::MemOrd mo = is_vol ? MemNode::acquire : MemNode::unordered; @@ -317,7 +320,16 @@ void Parse::do_put_xxx(Node* obj, ciField* field, bool is_field) { // If reference is volatile, prevent following volatiles ops from // floating up before the volatile write. if (is_vol) { - insert_mem_bar(Op_MemBarVolatile); // Use fat membar + // If not multiple copy atomic, we do the MemBarVolatile before the load. + if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { + insert_mem_bar(Op_MemBarVolatile); // Use fat membar + } + // Remember we wrote a volatile field. + // For not multiple copy atomic cpu (ppc64) a barrier should be issued + // in constructors which have such stores. See do_exits() in parse1.cpp. + if (is_field) { + set_wrote_volatile(true); + } } // If the field is final, the rules of Java say we are in or . diff --git a/src/share/vm/prims/unsafe.cpp b/src/share/vm/prims/unsafe.cpp index 2e432264b..8daafcd89 100644 --- a/src/share/vm/prims/unsafe.cpp +++ b/src/share/vm/prims/unsafe.cpp @@ -162,6 +162,9 @@ jint Unsafe_invocation_key_to_method_slot(jint key) { #define GET_FIELD_VOLATILE(obj, offset, type_name, v) \ oop p = JNIHandles::resolve(obj); \ + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { \ + OrderAccess::fence(); \ + } \ volatile type_name v = OrderAccess::load_acquire((volatile type_name*)index_oop_from_field_offset_long(p, offset)); #define SET_FIELD_VOLATILE(obj, offset, type_name, x) \ diff --git a/src/share/vm/utilities/globalDefinitions.hpp b/src/share/vm/utilities/globalDefinitions.hpp index e522a98d7..6461d8303 100644 --- a/src/share/vm/utilities/globalDefinitions.hpp +++ b/src/share/vm/utilities/globalDefinitions.hpp @@ -398,6 +398,17 @@ const uint64_t KlassEncodingMetaspaceMax = (uint64_t(max_juint) + 1) << LogKlass #define PLATFORM_NATIVE_STACK_WALKING_SUPPORTED 1 #endif +// To assure the IRIW property on processors that are not multiple copy +// atomic, sync instructions must be issued between volatile reads to +// assure their ordering, instead of after volatile stores. +// (See "A Tutorial Introduction to the ARM and POWER Relaxed Memory Models" +// by Luc Maranget, Susmit Sarkar and Peter Sewell, INRIA/Cambridge) +#ifdef CPU_NOT_MULTIPLE_COPY_ATOMIC +const bool support_IRIW_for_not_multiple_copy_atomic_cpu = true; +#else +const bool support_IRIW_for_not_multiple_copy_atomic_cpu = false; +#endif + // The byte alignment to be used by Arena::Amalloc. See bugid 4169348. // Note: this value must be a power of 2 -- GitLab