Merge

c7dd6873 · twisti · 61efc6c2 · 02e6b195 · c7dd6873 · c7dd6873
14 changed file
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp
@@ -1498,27 +1498,29 @@ class StubGenerator: public StubCodeGenerator {
    __ movptr(elem_klass, elem_klass_addr); // query the object klass
    generate_type_check(elem_klass, ckoff_arg, ckval_arg, temp,
                        &L_store_element, NULL);
-      // (On fall-through, we have failed the element type check.)
+    // (On fall-through, we have failed the element type check.)
    // ======== end loop ========

    // It was a real error; we must depend on the caller to finish the job.
    // Register "count" = -1 * number of *remaining* oops, length_arg = *total* oops.
    // Emit GC store barriers for the oops we have copied (length_arg + count),
    // and report their number to the caller.
+    assert_different_registers(to, count, rax);
+    Label L_post_barrier;
    __ addl(count, length_arg);         // transfers = (length - remaining)
    __ movl2ptr(rax, count);            // save the value
-    __ notptr(rax);                     // report (-1^K) to caller
-    __ movptr(to, to_arg);              // reload
-    assert_different_registers(to, count, rax);
-    gen_write_ref_array_post_barrier(to, count);
-    __ jmpb(L_done);
+    __ notptr(rax);                     // report (-1^K) to caller (does not affect flags)
+    __ jccb(Assembler::notZero, L_post_barrier);
+    __ jmp(L_done); // K == 0, nothing was copied, skip post barrier

    // Come here on success only.
    __ BIND(L_do_card_marks);
+    __ xorptr(rax, rax);                // return 0 on success
    __ movl2ptr(count, length_arg);
-    __ movptr(to, to_arg);                // reload
+
+    __ BIND(L_post_barrier);
+    __ movptr(to, to_arg);              // reload
    gen_write_ref_array_post_barrier(to, count);
-    __ xorptr(rax, rax);                  // return 0 on success

    // Common exit point (success or failure).
    __ BIND(L_done);

--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
@@ -1217,27 +1217,28 @@ class StubGenerator: public StubCodeGenerator {
  //
  //  Input:
  //     start    - register containing starting address of destination array
-  //     end      - register containing ending address of destination array
+  //     count    - elements count
  //     scratch  - scratch register
  //
  //  The input registers are overwritten.
-  //  The ending address is inclusive.
-  void  gen_write_ref_array_post_barrier(Register start, Register end, Register scratch) {
-    assert_different_registers(start, end, scratch);
+  //
+  void  gen_write_ref_array_post_barrier(Register start, Register count, Register scratch) {
+    assert_different_registers(start, count, scratch);
    BarrierSet* bs = Universe::heap()->barrier_set();
    switch (bs->kind()) {
      case BarrierSet::G1SATBCT:
      case BarrierSet::G1SATBCTLogging:
-
        {
-          __ pusha();                      // push registers (overkill)
-          // must compute element count unless barrier set interface is changed (other platforms supply count)
-          assert_different_registers(start, end, scratch);
-          __ lea(scratch, Address(end, BytesPerHeapOop));
-          __ subptr(scratch, start);               // subtract start to get #bytes
-          __ shrptr(scratch, LogBytesPerHeapOop);  // convert to element count
-          __ mov(c_rarg0, start);
-          __ mov(c_rarg1, scratch);
+          __ pusha();             // push registers (overkill)
+          if (c_rarg0 == count) { // On win64 c_rarg0 == rcx
+            assert_different_registers(c_rarg1, start);
+            __ mov(c_rarg1, count);
+            __ mov(c_rarg0, start);
+          } else {
+            assert_different_registers(c_rarg0, count);
+            __ mov(c_rarg0, start);
+            __ mov(c_rarg1, count);
+          }
          __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2);
          __ popa();
        }
@@ -1249,22 +1250,16 @@ class StubGenerator: public StubCodeGenerator {
          assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");

          Label L_loop;
+          const Register end = count;

-           __ shrptr(start, CardTableModRefBS::card_shift);
-           __ addptr(end, BytesPerHeapOop);
-           __ shrptr(end, CardTableModRefBS::card_shift);
-           __ subptr(end, start); // number of bytes to copy
-
-          intptr_t disp = (intptr_t) ct->byte_map_base;
-          if (Assembler::is_simm32(disp)) {
-            Address cardtable(noreg, noreg, Address::no_scale, disp);
-            __ lea(scratch, cardtable);
-          } else {
-            ExternalAddress cardtable((address)disp);
-            __ lea(scratch, cardtable);
-          }
+          __ leaq(end, Address(start, count, TIMES_OOP, 0));  // end == start+count*oop_size
+          __ subptr(end, BytesPerHeapOop); // end - 1 to make inclusive
+          __ shrptr(start, CardTableModRefBS::card_shift);
+          __ shrptr(end,   CardTableModRefBS::card_shift);
+          __ subptr(end, start); // end --> cards count

-          const Register count = end; // 'end' register contains bytes count now
+          int64_t disp = (int64_t) ct->byte_map_base;
+          __ mov64(scratch, disp);
          __ addptr(start, scratch);
        __ BIND(L_loop);
          __ movb(Address(start, count, Address::times_1), 0);
@@ -1916,8 +1911,7 @@ class StubGenerator: public StubCodeGenerator {

  __ BIND(L_exit);
    if (is_oop) {
-      __ leaq(end_to, Address(saved_to, dword_count, Address::times_4, -4));
-      gen_write_ref_array_post_barrier(saved_to, end_to, rax);
+      gen_write_ref_array_post_barrier(saved_to, dword_count, rax);
    }
    restore_arg_regs();
    inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
@@ -2012,12 +2006,10 @@ class StubGenerator: public StubCodeGenerator {
    // Copy in multi-bytes chunks
    copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);

-   __ bind(L_exit);
-     if (is_oop) {
-       Register end_to = rdx;
-       __ leaq(end_to, Address(to, dword_count, Address::times_4, -4));
-       gen_write_ref_array_post_barrier(to, end_to, rax);
-     }
+  __ BIND(L_exit);
+    if (is_oop) {
+      gen_write_ref_array_post_barrier(to, dword_count, rax);
+    }
    restore_arg_regs();
    inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
    __ xorptr(rax, rax); // return 0
@@ -2055,6 +2047,7 @@ class StubGenerator: public StubCodeGenerator {
    const Register end_from    = from; // source array end address
    const Register end_to      = rcx;  // destination array end address
    const Register saved_to    = to;
+    const Register saved_count = r11;
    // End pointers are inclusive, and if count is not zero they point
    // to the last unit copied:  end_to[0] := end_from[0]

@@ -2072,6 +2065,8 @@ class StubGenerator: public StubCodeGenerator {
                      // r9 and r10 may be used to save non-volatile registers
    // 'from', 'to' and 'qword_count' are now valid
    if (is_oop) {
+      // Save to and count for store barrier
+      __ movptr(saved_count, qword_count);
      // no registers are destroyed by this call
      gen_write_ref_array_pre_barrier(to, qword_count, dest_uninitialized);
    }
@@ -2104,7 +2099,7 @@ class StubGenerator: public StubCodeGenerator {

    if (is_oop) {
    __ BIND(L_exit);
-      gen_write_ref_array_post_barrier(saved_to, end_to, rax);
+      gen_write_ref_array_post_barrier(saved_to, saved_count, rax);
    }
    restore_arg_regs();
    if (is_oop) {
@@ -2187,8 +2182,7 @@ class StubGenerator: public StubCodeGenerator {

    if (is_oop) {
    __ BIND(L_exit);
-      __ lea(rcx, Address(to, saved_count, Address::times_8, -8));
-      gen_write_ref_array_post_barrier(to, rcx, rax);
+      gen_write_ref_array_post_barrier(to, saved_count, rax);
    }
    restore_arg_regs();
    if (is_oop) {
@@ -2375,20 +2369,20 @@ class StubGenerator: public StubCodeGenerator {
    // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
    // Emit GC store barriers for the oops we have copied (r14 + rdx),
    // and report their number to the caller.
-    assert_different_registers(rax, r14_length, count, to, end_to, rcx);
-    __ lea(end_to, to_element_addr);
-    __ addptr(end_to, -heapOopSize);      // make an inclusive end pointer
-    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
-    __ movptr(rax, r14_length);           // original oops
-    __ addptr(rax, count);                // K = (original - remaining) oops
-    __ notptr(rax);                       // report (-1^K) to caller
-    __ jmp(L_done);
+    assert_different_registers(rax, r14_length, count, to, end_to, rcx, rscratch1);
+    Label L_post_barrier;
+    __ addptr(r14_length, count);     // K = (original - remaining) oops
+    __ movptr(rax, r14_length);       // save the value
+    __ notptr(rax);                   // report (-1^K) to caller (does not affect flags)
+    __ jccb(Assembler::notZero, L_post_barrier);
+    __ jmp(L_done); // K == 0, nothing was copied, skip post barrier

    // Come here on success only.
    __ BIND(L_do_card_marks);
-    __ addptr(end_to, -heapOopSize);         // make an inclusive end pointer
-    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
-    __ xorptr(rax, rax);                  // return 0 on success
+    __ xorptr(rax, rax);              // return 0 on success
+
+    __ BIND(L_post_barrier);
+    gen_write_ref_array_post_barrier(to, r14_length, rscratch1);

    // Common exit point (success or failure).
    __ BIND(L_done);

--- a/src/share/tools/hsdis/hsdis.c
+++ b/src/share/tools/hsdis/hsdis.c
@@ -27,6 +27,7 @@
   HotSpot PrintAssembly option.
 */

+#include <config.h> /* required by bfd.h */
 #include <libiberty.h>
 #include <bfd.h>
 #include <dis-asm.h>

--- a/src/share/vm/c1/c1_Compiler.cpp
+++ b/src/share/vm/c1/c1_Compiler.cpp
@@ -77,30 +77,42 @@ void Compiler::initialize() {
 }


-BufferBlob* Compiler::build_buffer_blob() {
+BufferBlob* Compiler::get_buffer_blob(ciEnv* env) {
+  // Allocate buffer blob once at startup since allocation for each
+  // compilation seems to be too expensive (at least on Intel win32).
+  BufferBlob* buffer_blob = CompilerThread::current()->get_buffer_blob();
+  if (buffer_blob != NULL) {
+    return buffer_blob;
+  }
+
  // setup CodeBuffer.  Preallocate a BufferBlob of size
  // NMethodSizeLimit plus some extra space for constants.
  int code_buffer_size = Compilation::desired_max_code_buffer_size() +
    Compilation::desired_max_constant_size();
-  BufferBlob* blob = BufferBlob::create("Compiler1 temporary CodeBuffer",
-                                        code_buffer_size);
-  guarantee(blob != NULL, "must create initial code buffer");
-  return blob;
+
+  buffer_blob = BufferBlob::create("Compiler1 temporary CodeBuffer",
+                                   code_buffer_size);
+  if (buffer_blob == NULL) {
+    CompileBroker::handle_full_code_cache();
+    env->record_failure("CodeCache is full");
+  } else {
+    CompilerThread::current()->set_buffer_blob(buffer_blob);
+  }
+
+  return buffer_blob;
 }


 void Compiler::compile_method(ciEnv* env, ciMethod* method, int entry_bci) {
-  // Allocate buffer blob once at startup since allocation for each
-  // compilation seems to be too expensive (at least on Intel win32).
-  BufferBlob* buffer_blob = CompilerThread::current()->get_buffer_blob();
+  BufferBlob* buffer_blob = Compiler::get_buffer_blob(env);
  if (buffer_blob == NULL) {
-    buffer_blob = build_buffer_blob();
-    CompilerThread::current()->set_buffer_blob(buffer_blob);
+    return;
  }

  if (!is_initialized()) {
    initialize();
  }
+
  // invoke compilation
  {
    // We are nested here because we need for the destructor

--- a/src/share/vm/c1/c1_Compiler.hpp
+++ b/src/share/vm/c1/c1_Compiler.hpp
@@ -46,7 +46,7 @@ class Compiler: public AbstractCompiler {

  virtual bool is_c1()                           { return true; };

-  BufferBlob* build_buffer_blob();
+  BufferBlob* get_buffer_blob(ciEnv* env);

  // Missing feature tests
  virtual bool supports_native()                 { return true; }

--- a/src/share/vm/code/codeCache.cpp
+++ b/src/share/vm/code/codeCache.cpp
@@ -622,6 +622,15 @@ address CodeCache::last_address() {
  return (address)_heap->high();
 }

+/**
+ * Returns the reverse free ratio. E.g., if 25% (1/4) of the code cache
+ * is free, reverse_free_ratio() returns 4.
+ */
+double CodeCache::reverse_free_ratio() {
+  double unallocated_capacity = (double)(CodeCache::unallocated_capacity() - CodeCacheMinimumFreeSpace);
+  double max_capacity = (double)CodeCache::max_capacity();
+  return max_capacity / unallocated_capacity;
+}

 void icache_init();


--- a/src/share/vm/code/codeCache.hpp
+++ b/src/share/vm/code/codeCache.hpp
@@ -163,6 +163,7 @@ class CodeCache : AllStatic {
  static size_t  max_capacity()                  { return _heap->max_capacity(); }
  static size_t  unallocated_capacity()          { return _heap->unallocated_capacity(); }
  static bool    needs_flushing()                { return unallocated_capacity() < CodeCacheFlushingMinimumFreeSpace; }
+  static double  reverse_free_ratio();

  static bool needs_cache_clean()                { return _needs_cache_clean; }
  static void set_needs_cache_clean(bool v)      { _needs_cache_clean = v;    }

--- a/src/share/vm/opto/loopnode.hpp
+++ b/src/share/vm/opto/loopnode.hpp
@@ -965,7 +965,7 @@ public:
  // Has use internal to the vector set (ie. not in a phi at the loop head)
  bool has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop );
  // clone "n" for uses that are outside of loop
-  void clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist );
+  int  clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist );
  // clone "n" for special uses that are in the not_peeled region
  void clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
                                          VectorSet& not_peel, Node_List& sink_list, Node_List& worklist );

--- a/src/share/vm/opto/loopopts.cpp
+++ b/src/share/vm/opto/loopopts.cpp
@@ -1939,8 +1939,8 @@ bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoo

 //------------------------------ clone_for_use_outside_loop -------------------------------------
 // clone "n" for uses that are outside of loop
-void PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
-
+int PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
+  int cloned = 0;
  assert(worklist.size() == 0, "should be empty");
  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
    Node* use = n->fast_out(j);
@@ -1960,6 +1960,7 @@ void PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, N
    // clone "n" and insert it between the inputs of "n" and the use outside the loop
    Node* n_clone = n->clone();
    _igvn.replace_input_of(use, j, n_clone);
+    cloned++;
    Node* use_c;
    if (!use->is_Phi()) {
      use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
@@ -1977,6 +1978,7 @@ void PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, N
    }
 #endif
  }
+  return cloned;
 }


@@ -2495,6 +2497,7 @@ bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {

  // Evacuate nodes in peel region into the not_peeled region if possible
  uint new_phi_cnt = 0;
+  uint cloned_for_outside_use = 0;
  for (i = 0; i < peel_list.size();) {
    Node* n = peel_list.at(i);
 #if !defined(PRODUCT)
@@ -2513,8 +2516,7 @@ bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
          // if not pinned and not a load (which maybe anti-dependent on a store)
          // and not a CMove (Matcher expects only bool->cmove).
          if ( n->in(0) == NULL && !n->is_Load() && !n->is_CMove() ) {
-            clone_for_use_outside_loop( loop, n, worklist );
-
+            cloned_for_outside_use += clone_for_use_outside_loop( loop, n, worklist );
            sink_list.push(n);
            peel     >>= n->_idx; // delete n from peel set.
            not_peel <<= n->_idx; // add n to not_peel set.
@@ -2551,6 +2553,12 @@ bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
    // Inhibit more partial peeling on this loop
    assert(!head->is_partial_peel_loop(), "not partial peeled");
    head->mark_partial_peel_failed();
+    if (cloned_for_outside_use > 0) {
+      // Terminate this round of loop opts because
+      // the graph outside this loop was changed.
+      C->set_major_progress();
+      return true;
+    }
    return false;
  }


--- a/src/share/vm/runtime/advancedThresholdPolicy.cpp
+++ b/src/share/vm/runtime/advancedThresholdPolicy.cpp
@@ -68,7 +68,7 @@ void AdvancedThresholdPolicy::initialize() {
  }
 #endif

-
+  set_increase_threshold_at_ratio();
  set_start_time(os::javaTimeMillis());
 }

@@ -205,6 +205,17 @@ double AdvancedThresholdPolicy::threshold_scale(CompLevel level, int feedback_k)
  double queue_size = CompileBroker::queue_size(level);
  int comp_count = compiler_count(level);
  double k = queue_size / (feedback_k * comp_count) + 1;
+
+  // Increase C1 compile threshold when the code cache is filled more
+  // than specified by IncreaseFirstTierCompileThresholdAt percentage.
+  // The main intention is to keep enough free space for C2 compiled code
+  // to achieve peak performance if the code cache is under stress.
+  if ((TieredStopAtLevel == CompLevel_full_optimization) && (level != CompLevel_full_optimization))  {
+    double current_reverse_free_ratio = CodeCache::reverse_free_ratio();
+    if (current_reverse_free_ratio > _increase_threshold_at_ratio) {
+      k *= exp(current_reverse_free_ratio - _increase_threshold_at_ratio);
+    }
+  }
  return k;
 }


--- a/src/share/vm/runtime/advancedThresholdPolicy.hpp
+++ b/src/share/vm/runtime/advancedThresholdPolicy.hpp
@@ -201,9 +201,12 @@ class AdvancedThresholdPolicy : public SimpleThresholdPolicy {
  // Is method profiled enough?
  bool is_method_profiled(Method* method);

+  double _increase_threshold_at_ratio;
+
 protected:
  void print_specific(EventType type, methodHandle mh, methodHandle imh, int bci, CompLevel level);

+  void set_increase_threshold_at_ratio() { _increase_threshold_at_ratio = 100 / (100 - (double)IncreaseFirstTierCompileThresholdAt); }
  void set_start_time(jlong t) { _start_time = t;    }
  jlong start_time() const     { return _start_time; }


--- a/src/share/vm/runtime/arguments.cpp
+++ b/src/share/vm/runtime/arguments.cpp
@@ -2629,6 +2629,16 @@ jint Arguments::parse_each_vm_init_arg(const JavaVMInitArgs* args,
        return JNI_EINVAL;
      }
      FLAG_SET_CMDLINE(uintx, ReservedCodeCacheSize, (uintx)long_ReservedCodeCacheSize);
+      //-XX:IncreaseFirstTierCompileThresholdAt=
+      } else if (match_option(option, "-XX:IncreaseFirstTierCompileThresholdAt=", &tail)) {
+        uintx uint_IncreaseFirstTierCompileThresholdAt = 0;
+        if (!parse_uintx(tail, &uint_IncreaseFirstTierCompileThresholdAt, 0) || uint_IncreaseFirstTierCompileThresholdAt > 99) {
+          jio_fprintf(defaultStream::error_stream(),
+                      "Invalid value for IncreaseFirstTierCompileThresholdAt: %s. Should be between 0 and 99.\n",
+                      option->optionString);
+          return JNI_EINVAL;
+        }
+        FLAG_SET_CMDLINE(uintx, IncreaseFirstTierCompileThresholdAt, (uintx)uint_IncreaseFirstTierCompileThresholdAt);
    // -green
    } else if (match_option(option, "-green", &tail)) {
      jio_fprintf(defaultStream::error_stream(),

--- a/src/share/vm/runtime/globals.hpp
+++ b/src/share/vm/runtime/globals.hpp
@@ -3436,6 +3436,10 @@ class CommandLineFlags {
          "Start profiling in interpreter if the counters exceed tier 3"    \
          "thresholds by the specified percentage")                         \
                                                                            \
+  product(uintx, IncreaseFirstTierCompileThresholdAt, 50,                   \
+          "Increase the compile threshold for C1 compilation if the code"   \
+          "cache is filled by the specified percentage.")                   \
+                                                                            \
  product(intx, TieredRateUpdateMinTime, 1,                                 \
          "Minimum rate sampling interval (in milliseconds)")               \
                                                                            \

--- a/test/compiler/8010927/Test8010927.java
+++ b/test/compiler/8010927/Test8010927.java
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8010927
+ * @summary Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
+ * @library /testlibrary/whitebox /testlibrary
+ * @build Test8010927
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:. -Xmx64m -XX:NewSize=20971520 -XX:MaxNewSize=32m -XX:-UseTLAB -XX:-UseParNewGC -XX:-UseAdaptiveSizePolicy Test8010927
+ */
+
+import sun.hotspot.WhiteBox;
+import java.lang.reflect.Field;
+import sun.misc.Unsafe;
+
+/**
+ * The test creates uncommitted space between oldgen and young gen
+ * by specifying MaxNewSize bigger than NewSize.
+ * NewSize = 20971520 = (512*4K) * 10 for 4k pages
+ * Then it tries to execute arraycopy() with elements type check
+ * to the array at the end of survive space near unused space.
+ */
+
+public class Test8010927 {
+
+  private static final Unsafe U;
+
+  static {
+    try {
+      Field unsafe = Unsafe.class.getDeclaredField("theUnsafe");
+      unsafe.setAccessible(true);
+      U = (Unsafe) unsafe.get(null);
+    } catch (Exception e) {
+      throw new Error(e);
+    }
+  }
+
+  public static Object[] o;
+
+  public static final boolean debug = Boolean.getBoolean("debug");
+
+  // 2 different obect arrays but same element types
+  static Test8010927[] masterA;
+  static Object[] masterB;
+  static final Test8010927 elem = new Test8010927();
+  static final WhiteBox wb = WhiteBox.getWhiteBox();
+
+  static final int obj_header_size = U.ARRAY_OBJECT_BASE_OFFSET;
+  static final int heap_oop_size = wb.getHeapOopSize();
+  static final int card_size = 512;
+  static final int one_card = (card_size - obj_header_size)/heap_oop_size;
+
+  static final int surv_size = 2112 * 1024;
+
+  // The size is big to not fit into survive space.
+  static final Object[] cache = new Object[(surv_size / card_size)];
+
+  public static void main(String[] args) {
+    masterA = new Test8010927[one_card];
+    masterB = new Object[one_card];
+    for (int i = 0; i < one_card; ++i) {
+      masterA[i] = elem;
+      masterB[i] = elem;
+    }
+
+    // Move cache[] to the old gen.
+    long low_limit = wb.getObjectAddress(cache);
+    System.gc();
+    // Move 'cache' to oldgen.
+    long upper_limit = wb.getObjectAddress(cache);
+    if ((low_limit - upper_limit) > 0) { // substaction works with unsigned values
+      // OldGen is placed before youngger for ParallelOldGC.
+      upper_limit = low_limit + 21000000l; // +20971520
+    }
+    // Each A[one_card] size is 512 bytes,
+    // it will take about 40000 allocations to trigger GC.
+    // cache[] has 8192 elements so GC should happen
+    // each 5th iteration.
+    for(long l = 0; l < 20; l++) {
+      fill_heap();
+      if (debug) {
+        System.out.println("test oop_disjoint_arraycopy");
+      }
+      testA_arraycopy();
+      if (debug) {
+        System.out.println("test checkcast_arraycopy");
+      }
+      testB_arraycopy();
+      // Execute arraycopy to the topmost array in young gen
+      if (debug) {
+        int top_index = get_top_address(low_limit, upper_limit);
+        if (top_index >= 0) {
+          long addr = wb.getObjectAddress(cache[top_index]);
+          System.out.println("top_addr: 0x" + Long.toHexString(addr) + ", 0x" + Long.toHexString(addr + 512));
+        }
+      }
+    }
+  }
+  static void fill_heap() {
+    for (int i = 0; i < cache.length; ++i) {
+      o = new Test8010927[one_card];
+      System.arraycopy(masterA, 0, o, 0, masterA.length);
+      cache[i] = o;
+    }
+    for (long j = 0; j < 256; ++j) {
+      o = new Long[10000]; // to trigger GC
+    }
+  }
+  static void testA_arraycopy() {
+    for (int i = 0; i < cache.length; ++i) {
+      System.arraycopy(masterA, 0, cache[i], 0, masterA.length);
+    }
+  }
+  static void testB_arraycopy() {
+    for (int i = 0; i < cache.length; ++i) {
+      System.arraycopy(masterB, 0, cache[i], 0, masterB.length);
+    }
+  }
+  static int get_top_address(long min, long max) {
+    int index = -1;
+    long addr = min;
+    for (int i = 0; i < cache.length; ++i) {
+      long test = wb.getObjectAddress(cache[i]);
+      if (((test - addr) > 0) && ((max - test) > 0)) { // substaction works with unsigned values
+        addr = test;
+        index = i;
+      }
+    }
+    return index;
+  }
+}