8029101: PPC64 (part 211): ordering of Independent Reads of Independent Writes

Reviewed-by: dholmes, kvn Contributed-by: martin.doerr@sap.com

8029101: PPC64 (part 211): ordering of Independent Reads of Independent Writes
Reviewed-by: dholmes, kvn Contributed-by: martin.doerr@sap.com
8eda69f2 · goetz · fbd11f67 · 8eda69f2 · 8eda69f2 · 8eda69f2
8 changed file
--- a/src/cpu/ppc/vm/globalDefinitions_ppc.hpp
+++ b/src/cpu/ppc/vm/globalDefinitions_ppc.hpp
@@ -37,4 +37,7 @@ const int StackAlignmentInBytes = 16;
 // signatures accordingly.
 const bool CCallingConventionRequiresIntsAsLongs = true;
+// The PPC CPUs are NOT multiple-copy-atomic.
+#define CPU_NOT_MULTIPLE_COPY_ATOMIC
 #endif // CPU_PPC_VM_GLOBALDEFINITIONS_PPC_HPP
--- a/src/share/vm/interpreter/bytecodeInterpreter.cpp
+++ b/src/share/vm/interpreter/bytecodeInterpreter.cpp
@@ -2034,6 +2034,9 @@ run:
          TosState tos_type = cache->flag_state();
          int field_offset = cache->f2_as_index();
          if (cache->is_volatile()) {
+            if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+              OrderAccess::fence();
+            }
            if (tos_type == atos) {
              VERIFY_OOP(obj->obj_field_acquire(field_offset));
              SET_STACK_OBJECT(obj->obj_field_acquire(field_offset), -1);

--- a/src/share/vm/opto/library_call.cpp
+++ b/src/share/vm/opto/library_call.cpp
@@ -2627,8 +2627,13 @@ bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, Bas
    // rough approximation of type.
    need_mem_bar = true;
    // For Stores, place a memory ordering barrier now.
-    if (is_store)
+    if (is_store) {
      insert_mem_bar(Op_MemBarRelease);
+    } else {
+      if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+        insert_mem_bar(Op_MemBarVolatile);
+      }
+    }
  }
  // Memory barrier to prevent normal and 'unsafe' accesses from
@@ -2717,10 +2722,13 @@ bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, Bas
  }
  if (is_volatile) {
-    if (!is_store)
+    if (!is_store) {
      insert_mem_bar(Op_MemBarAcquire);
-    else
+    } else {
-      insert_mem_bar(Op_MemBarVolatile);
+      if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
+        insert_mem_bar(Op_MemBarVolatile);
+      }
+    }
  }
  if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);

--- a/src/share/vm/opto/parse.hpp
+++ b/src/share/vm/opto/parse.hpp
@@ -330,7 +330,8 @@ class Parse : public GraphKit {
  GraphKit      _exits;         // Record all normal returns and throws here.
  bool          _wrote_final;   // Did we write a final field?
-  bool          _count_invocations; // update and test invocation counter
+  bool          _wrote_volatile;     // Did we write a volatile field?
+  bool          _count_invocations;  // update and test invocation counter
  bool          _method_data_update; // update method data oop
  Node*         _alloc_with_final;   // An allocation node with final field
@@ -373,6 +374,8 @@ class Parse : public GraphKit {
  GraphKit&     exits()               { return _exits; }
  bool          wrote_final() const   { return _wrote_final; }
  void      set_wrote_final(bool z)   { _wrote_final = z; }
+  bool          wrote_volatile() const { return _wrote_volatile; }
+  void      set_wrote_volatile(bool z) { _wrote_volatile = z; }
  bool          count_invocations() const  { return _count_invocations; }
  bool          method_data_update() const { return _method_data_update; }
  Node*    alloc_with_final() const   { return _alloc_with_final; }

--- a/src/share/vm/opto/parse1.cpp
+++ b/src/share/vm/opto/parse1.cpp
@@ -390,6 +390,7 @@ Parse::Parse(JVMState* caller, ciMethod* parse_method, float expected_uses, Pars
  _expected_uses = expected_uses;
  _depth = 1 + (caller->has_method() ? caller->depth() : 0);
  _wrote_final = false;
+  _wrote_volatile = false;
  _alloc_with_final = NULL;
  _entry_bci = InvocationEntryBci;
  _tf = NULL;
@@ -907,7 +908,13 @@ void Parse::do_exits() {
  Node* iophi = _exits.i_o();
  _exits.set_i_o(gvn().transform(iophi));
-  if (wrote_final()) {
+  // On PPC64, also add MemBarRelease for constructors which write
+  // volatile fields. As support_IRIW_for_not_multiple_copy_atomic_cpu
+  // is set on PPC64, no sync instruction is issued after volatile
+  // stores. We want to quarantee the same behaviour as on platforms
+  // with total store order, although this is not required by the Java
+  // memory model. So as with finals, we add a barrier here.
+  if (wrote_final() PPC64_ONLY(|| (wrote_volatile() && method()->is_initializer()))) {
    // This method (which must be a constructor by the rules of Java)
    // wrote a final.  The effects of all initializations must be
    // committed to memory before any code after the constructor

--- a/src/share/vm/opto/parse3.cpp
+++ b/src/share/vm/opto/parse3.cpp
@@ -227,6 +227,9 @@ void Parse::do_get_xxx(Node* obj, ciField* field, bool is_field) {
  } else {
    type = Type::get_const_basic_type(bt);
  }
+  if (support_IRIW_for_not_multiple_copy_atomic_cpu && field->is_volatile()) {
+    insert_mem_bar(Op_MemBarVolatile);   // StoreLoad barrier
+  }
  // Build the load.
  //
  MemNode::MemOrd mo = is_vol ? MemNode::acquire : MemNode::unordered;
@@ -317,7 +320,16 @@ void Parse::do_put_xxx(Node* obj, ciField* field, bool is_field) {
  // If reference is volatile, prevent following volatiles ops from
  // floating up before the volatile write.
  if (is_vol) {
-    insert_mem_bar(Op_MemBarVolatile); // Use fat membar
+    // If not multiple copy atomic, we do the MemBarVolatile before the load.
+    if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      insert_mem_bar(Op_MemBarVolatile); // Use fat membar
+    }
+    // Remember we wrote a volatile field.
+    // For not multiple copy atomic cpu (ppc64) a barrier should be issued
+    // in constructors which have such stores. See do_exits() in parse1.cpp.
+    if (is_field) {
+      set_wrote_volatile(true);
+    }
  }
  // If the field is final, the rules of Java say we are in <init> or <clinit>.

--- a/src/share/vm/prims/unsafe.cpp
+++ b/src/share/vm/prims/unsafe.cpp
@@ -162,6 +162,9 @@ jint Unsafe_invocation_key_to_method_slot(jint key) {
 #define GET_FIELD_VOLATILE(obj, offset, type_name, v) \
  oop p = JNIHandles::resolve(obj); \
+  if (support_IRIW_for_not_multiple_copy_atomic_cpu) { \
+    OrderAccess::fence(); \
+  } \
  volatile type_name v = OrderAccess::load_acquire((volatile type_name*)index_oop_from_field_offset_long(p, offset));
 #define SET_FIELD_VOLATILE(obj, offset, type_name, x) \

--- a/src/share/vm/utilities/globalDefinitions.hpp
+++ b/src/share/vm/utilities/globalDefinitions.hpp
@@ -398,6 +398,17 @@ const uint64_t KlassEncodingMetaspaceMax = (uint64_t(max_juint) + 1) << LogKlass
 #define PLATFORM_NATIVE_STACK_WALKING_SUPPORTED 1
 #endif
+// To assure the IRIW property on processors that are not multiple copy
+// atomic, sync instructions must be issued between volatile reads to
+// assure their ordering, instead of after volatile stores.
+// (See "A Tutorial Introduction to the ARM and POWER Relaxed Memory Models"
+// by Luc Maranget, Susmit Sarkar and Peter Sewell, INRIA/Cambridge)
+#ifdef CPU_NOT_MULTIPLE_COPY_ATOMIC
+const bool support_IRIW_for_not_multiple_copy_atomic_cpu = true;
+#else
+const bool support_IRIW_for_not_multiple_copy_atomic_cpu = false;
+#endif
 // The byte alignment to be used by Arena::Amalloc.  See bugid 4169348.
 // Note: this value must be a power of 2