Merge

f9d9730e · acorn · 8a50fa1a · 470855bd · f9d9730e · f9d9730e
52 changed file
--- a/src/share/vm/asm/assembler.cpp
+++ b/src/share/vm/asm/assembler.cpp
@@ -249,8 +249,6 @@ void AbstractAssembler::block_comment(const char* comment) {
 bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
  // Exception handler checks the nmethod's implicit null checks table
  // only when this method returns false.
-#ifndef SPARC
-  // Sparc does not have based addressing
  if (UseCompressedOops) {
    // The first page after heap_base is unmapped and
    // the 'offset' is equal to [heap_base + offset] for
@@ -261,7 +259,6 @@ bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
      offset = (intptr_t)(pointer_delta((void*)offset, (void*)heap_base, 1));
    }
  }
-#endif // SPARC
  return offset < 0 || os::vm_page_size() <= offset;
 }


--- a/src/share/vm/ci/ciMethodBlocks.cpp
+++ b/src/share/vm/ci/ciMethodBlocks.cpp
@@ -49,7 +49,7 @@ bool ciMethodBlocks::is_block_start(int bci) {
 // first half.  Returns the range beginning at bci.
 ciBlock *ciMethodBlocks::split_block_at(int bci) {
  ciBlock *former_block = block_containing(bci);
-  ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, this, former_block->start_bci());
+  ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, former_block->start_bci());
  _blocks->append(new_block);
  assert(former_block != NULL, "must not be NULL");
  new_block->set_limit_bci(bci);
@@ -83,7 +83,7 @@ ciBlock *ciMethodBlocks::make_block_at(int bci) {
  if (cb == NULL ) {
    // This is our first time visiting this bytecode.  Create
    // a fresh block and assign it this starting point.
-    ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, this, bci);
+    ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, bci);
    _blocks->append(nb);
     _bci_to_block[bci] = nb;
    return nb;
@@ -98,6 +98,11 @@ ciBlock *ciMethodBlocks::make_block_at(int bci) {
  }
 }

+ciBlock *ciMethodBlocks::make_dummy_block() {
+  ciBlock *dum = new(_arena) ciBlock(_method, -1, 0);
+  return dum;
+}
+
 void ciMethodBlocks::do_analysis() {
  ciBytecodeStream s(_method);
  ciBlock *cur_block = block_containing(0);
@@ -253,7 +258,7 @@ ciMethodBlocks::ciMethodBlocks(Arena *arena, ciMethod *meth): _method(meth),
  Copy::zero_to_words((HeapWord*) _bci_to_block, b2bsize / sizeof(HeapWord));

  // create initial block covering the entire method
-  ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, this, 0);
+  ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, 0);
  _blocks->append(b);
  _bci_to_block[0] = b;

@@ -334,7 +339,7 @@ void ciMethodBlocks::dump() {
 #endif


-ciBlock::ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci) :
+ciBlock::ciBlock(ciMethod *method, int index, int start_bci) :
 #ifndef PRODUCT
                         _method(method),
 #endif

--- a/src/share/vm/ci/ciMethodBlocks.hpp
+++ b/src/share/vm/ci/ciMethodBlocks.hpp
@@ -48,6 +48,8 @@ public:
  int num_blocks()  { return _num_blocks;}
  void clear_processed();

+  ciBlock *make_dummy_block(); // a block not associated with a bci
+
 #ifndef PRODUCT
  void dump();
 #endif
@@ -81,7 +83,7 @@ public:
    fall_through_bci = -1
  };

-  ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci);
+  ciBlock(ciMethod *method, int index, int start_bci);
  int start_bci() const         { return _start_bci; }
  int limit_bci() const         { return _limit_bci; }
  int control_bci() const       { return _control_bci; }
@@ -94,7 +96,6 @@ public:
  int ex_limit_bci() const      { return _ex_limit_bci; }
  bool contains(int bci) const { return start_bci() <= bci && bci < limit_bci(); }

-
  // flag handling
  bool  processed() const           { return (_flags & Processed) != 0; }
  bool  is_handler() const          { return (_flags & Handler) != 0; }

--- a/src/share/vm/ci/ciTypeFlow.cpp
+++ b/src/share/vm/ci/ciTypeFlow.cpp
@@ -338,8 +338,10 @@ ciTypeFlow::StateVector::StateVector(ciTypeFlow* analyzer) {
  }
  _trap_bci = -1;
  _trap_index = 0;
+  _def_locals.clear();
 }

+
 // ------------------------------------------------------------------
 // ciTypeFlow::get_start_state
 //
@@ -735,7 +737,7 @@ void ciTypeFlow::StateVector::do_multianewarray(ciBytecodeStream* str) {
 void ciTypeFlow::StateVector::do_new(ciBytecodeStream* str) {
  bool will_link;
  ciKlass* klass = str->get_klass(will_link);
-  if (!will_link) {
+  if (!will_link || str->is_unresolved_klass()) {
    trap(str, klass, str->get_klass_index());
  } else {
    push_object(klass);
@@ -1268,7 +1270,9 @@ bool ciTypeFlow::StateVector::apply_one_bytecode(ciBytecodeStream* str) {
    }
  case Bytecodes::_iinc:
    {
-      check_int(local(str->get_index()));
+      int lnum = str->get_index();
+      check_int(local(lnum));
+      store_to_local(lnum);
      break;
    }
  case Bytecodes::_iload:   load_local_int(str->get_index()); break;
@@ -1506,6 +1510,46 @@ void ciTypeFlow::StateVector::print_on(outputStream* st) const {
 }
 #endif

+
+// ------------------------------------------------------------------
+// ciTypeFlow::SuccIter::next
+//
+void ciTypeFlow::SuccIter::next() {
+  int succ_ct = _pred->successors()->length();
+  int next = _index + 1;
+  if (next < succ_ct) {
+    _index = next;
+    _succ = _pred->successors()->at(next);
+    return;
+  }
+  for (int i = next - succ_ct; i < _pred->exceptions()->length(); i++) {
+    // Do not compile any code for unloaded exception types.
+    // Following compiler passes are responsible for doing this also.
+    ciInstanceKlass* exception_klass = _pred->exc_klasses()->at(i);
+    if (exception_klass->is_loaded()) {
+      _index = next;
+      _succ = _pred->exceptions()->at(i);
+      return;
+    }
+    next++;
+  }
+  _index = -1;
+  _succ = NULL;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::SuccIter::set_succ
+//
+void ciTypeFlow::SuccIter::set_succ(Block* succ) {
+  int succ_ct = _pred->successors()->length();
+  if (_index < succ_ct) {
+    _pred->successors()->at_put(_index, succ);
+  } else {
+    int idx = _index - succ_ct;
+    _pred->exceptions()->at_put(idx, succ);
+  }
+}
+
 // ciTypeFlow::Block
 //
 // A basic block.
@@ -1526,10 +1570,11 @@ ciTypeFlow::Block::Block(ciTypeFlow* outer,
  _jsrs = new_jsrs;
  _next = NULL;
  _on_work_list = false;
-  _pre_order = -1; assert(!has_pre_order(), "");
-  _private_copy = false;
+  _backedge_copy = false;
+  _exception_entry = false;
  _trap_bci = -1;
  _trap_index = 0;
+  df_init();

  if (CITraceTypeFlow) {
    tty->print_cr(">> Created new block");
@@ -1541,55 +1586,13 @@ ciTypeFlow::Block::Block(ciTypeFlow* outer,
 }

 // ------------------------------------------------------------------
-// ciTypeFlow::Block::clone_loop_head
-//
-ciTypeFlow::Block*
-ciTypeFlow::Block::clone_loop_head(ciTypeFlow* analyzer,
-                                   int branch_bci,
-                                   ciTypeFlow::Block* target,
-                                   ciTypeFlow::JsrSet* jsrs) {
-  // Loop optimizations are not performed on Tier1 compiles. Do nothing.
-  if (analyzer->env()->comp_level() < CompLevel_full_optimization) {
-    return target;
-  }
-
-  // The current block ends with a branch.
-  //
-  // If the target block appears to be the test-clause of a for loop, and
-  // it is not too large, and it has not yet been cloned, clone it.
-  // The pre-existing copy becomes the private clone used only by
-  // the initial iteration of the loop.  (We know we are simulating
-  // the initial iteration right now, since we have never calculated
-  // successors before for this block.)
-
-  if (branch_bci <= start()
-      && (target->limit() - target->start()) <= CICloneLoopTestLimit
-      && target->private_copy_count() == 0) {
-    // Setting the private_copy bit ensures that the target block cannot be
-    // reached by any other paths, such as fall-in from the loop body.
-    // The private copy will be accessible only on successor lists
-    // created up to this point.
-    target->set_private_copy(true);
-    if (CITraceTypeFlow) {
-      tty->print(">> Cloning a test-clause block ");
-      print_value_on(tty);
-      tty->cr();
-    }
-    // If the target is the current block, then later on a new copy of the
-    // target block will be created when its bytecodes are reached by
-    // an alternate path. (This is the case for loops with the loop
-    // head at the bci-wise bottom of the loop, as with pre-1.4.2 javac.)
-    //
-    // Otherwise, duplicate the target block now and use it immediately.
-    // (The case for loops with the loop head at the bci-wise top of the
-    // loop, as with 1.4.2 javac.)
-    //
-    // In either case, the new copy of the block will remain public.
-    if (target != this) {
-      target = analyzer->block_at(branch_bci, jsrs);
-    }
-  }
-  return target;
+// ciTypeFlow::Block::df_init
+void ciTypeFlow::Block::df_init() {
+  _pre_order = -1; assert(!has_pre_order(), "");
+  _post_order = -1; assert(!has_post_order(), "");
+  _loop = NULL;
+  _irreducible_entry = false;
+  _rpo_next = NULL;
 }

 // ------------------------------------------------------------------
@@ -1644,7 +1647,6 @@ ciTypeFlow::Block::successors(ciBytecodeStream* str,
      case Bytecodes::_ifnull:       case Bytecodes::_ifnonnull:
        // Our successors are the branch target and the next bci.
        branch_bci = str->get_dest();
-        clone_loop_head(analyzer, branch_bci, this, jsrs);
        _successors =
          new (arena) GrowableArray<Block*>(arena, 2, 0, NULL);
        assert(_successors->length() == IF_NOT_TAKEN, "");
@@ -1658,14 +1660,7 @@ ciTypeFlow::Block::successors(ciBytecodeStream* str,
        _successors =
          new (arena) GrowableArray<Block*>(arena, 1, 0, NULL);
        assert(_successors->length() == GOTO_TARGET, "");
-        target = analyzer->block_at(branch_bci, jsrs);
-        // If the target block has not been visited yet, and looks like
-        // a two-way branch, attempt to clone it if it is a loop head.
-        if (target->_successors != NULL
-            && target->_successors->length() == (IF_TAKEN + 1)) {
-          target = clone_loop_head(analyzer, branch_bci, target, jsrs);
-        }
-        _successors->append(target);
+        _successors->append(analyzer->block_at(branch_bci, jsrs));
        break;

      case Bytecodes::_jsr:
@@ -1801,65 +1796,60 @@ void ciTypeFlow::Block::compute_exceptions() {
 }

 // ------------------------------------------------------------------
-// ciTypeFlow::Block::is_simpler_than
-//
-// A relation used to order our work list.  We work on a block earlier
-// if it has a smaller jsr stack or it occurs earlier in the program
-// text.
+// ciTypeFlow::Block::set_backedge_copy
+// Use this only to make a pre-existing public block into a backedge copy.
+void ciTypeFlow::Block::set_backedge_copy(bool z) {
+  assert(z || (z == is_backedge_copy()), "cannot make a backedge copy public");
+  _backedge_copy = z;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::Block::is_clonable_exit
 //
-// Note: maybe we should redo this functionality to make blocks
-// which correspond to exceptions lower priority.
-bool ciTypeFlow::Block::is_simpler_than(ciTypeFlow::Block* other) {
-  if (other == NULL) {
-    return true;
-  } else {
-    int size1 = _jsrs->size();
-    int size2 = other->_jsrs->size();
-    if (size1 < size2) {
-      return true;
-    } else if (size2 < size1) {
-      return false;
-    } else {
-#if 0
-      if (size1 > 0) {
-        int r1 = _jsrs->record_at(0)->return_address();
-        int r2 = _jsrs->record_at(0)->return_address();
-        if (r1 < r2) {
-          return true;
-        } else if (r2 < r1) {
-          return false;
-        } else {
-          int e1 = _jsrs->record_at(0)->return_address();
-          int e2 = _jsrs->record_at(0)->return_address();
-          if (e1 < e2) {
-            return true;
-          } else if (e2 < e1) {
-            return false;
-          }
-        }
+// At most 2 normal successors, one of which continues looping,
+// and all exceptional successors must exit.
+bool ciTypeFlow::Block::is_clonable_exit(ciTypeFlow::Loop* lp) {
+  int normal_cnt  = 0;
+  int in_loop_cnt = 0;
+  for (SuccIter iter(this); !iter.done(); iter.next()) {
+    Block* succ = iter.succ();
+    if (iter.is_normal_ctrl()) {
+      if (++normal_cnt > 2) return false;
+      if (lp->contains(succ->loop())) {
+        if (++in_loop_cnt > 1) return false;
      }
-#endif
-      return (start() <= other->start());
+    } else {
+      if (lp->contains(succ->loop())) return false;
    }
  }
+  return in_loop_cnt == 1;
 }

 // ------------------------------------------------------------------
-// ciTypeFlow::Block::set_private_copy
-// Use this only to make a pre-existing public block into a private copy.
-void ciTypeFlow::Block::set_private_copy(bool z) {
-  assert(z || (z == is_private_copy()), "cannot make a private copy public");
-  _private_copy = z;
+// ciTypeFlow::Block::looping_succ
+//
+ciTypeFlow::Block* ciTypeFlow::Block::looping_succ(ciTypeFlow::Loop* lp) {
+  assert(successors()->length() <= 2, "at most 2 normal successors");
+  for (SuccIter iter(this); !iter.done(); iter.next()) {
+    Block* succ = iter.succ();
+    if (lp->contains(succ->loop())) {
+      return succ;
+    }
+  }
+  return NULL;
 }

 #ifndef PRODUCT
 // ------------------------------------------------------------------
 // ciTypeFlow::Block::print_value_on
 void ciTypeFlow::Block::print_value_on(outputStream* st) const {
-  if (has_pre_order())  st->print("#%-2d ", pre_order());
+  if (has_pre_order()) st->print("#%-2d ", pre_order());
+  if (has_rpo())       st->print("rpo#%-2d ", rpo());
  st->print("[%d - %d)", start(), limit());
+  if (is_loop_head()) st->print(" lphd");
+  if (is_irreducible_entry()) st->print(" irred");
  if (_jsrs->size() > 0) { st->print("/");  _jsrs->print_on(st); }
-  if (is_private_copy())  st->print("/private_copy");
+  if (is_backedge_copy())  st->print("/backedge_copy");
 }

 // ------------------------------------------------------------------
@@ -1871,6 +1861,16 @@ void ciTypeFlow::Block::print_on(outputStream* st) const {
  st->print_cr("  ====================================================  ");
  st->print ("  ");
  print_value_on(st);
+  st->print(" Stored locals: "); def_locals()->print_on(st, outer()->method()->max_locals()); tty->cr();
+  if (loop() && loop()->parent() != NULL) {
+    st->print(" loops:");
+    Loop* lp = loop();
+    do {
+      st->print(" %d<-%d", lp->head()->pre_order(),lp->tail()->pre_order());
+      if (lp->is_irreducible()) st->print("(ir)");
+      lp = lp->parent();
+    } while (lp->parent() != NULL);
+  }
  st->cr();
  _state->print_on(st);
  if (_successors == NULL) {
@@ -1907,6 +1907,21 @@ void ciTypeFlow::Block::print_on(outputStream* st) const {
 }
 #endif

+#ifndef PRODUCT
+// ------------------------------------------------------------------
+// ciTypeFlow::LocalSet::print_on
+void ciTypeFlow::LocalSet::print_on(outputStream* st, int limit) const {
+  st->print("{");
+  for (int i = 0; i < max; i++) {
+    if (test(i)) st->print(" %d", i);
+  }
+  if (limit > max) {
+    st->print(" %d..%d ", max, limit);
+  }
+  st->print(" }");
+}
+#endif
+
 // ciTypeFlow
 //
 // This is a pass over the bytecodes which computes the following:
@@ -1922,12 +1937,11 @@ ciTypeFlow::ciTypeFlow(ciEnv* env, ciMethod* method, int osr_bci) {
  _max_locals = method->max_locals();
  _max_stack = method->max_stack();
  _code_size = method->code_size();
+  _has_irreducible_entry = false;
  _osr_bci = osr_bci;
  _failure_reason = NULL;
  assert(start_bci() >= 0 && start_bci() < code_size() , "correct osr_bci argument");
-
  _work_list = NULL;
-  _next_pre_order = 0;

  _ciblock_count = _methodBlocks->num_blocks();
  _idx_to_blocklist = NEW_ARENA_ARRAY(arena(), GrowableArray<Block*>*, _ciblock_count);
@@ -1949,12 +1963,6 @@ ciTypeFlow::Block* ciTypeFlow::work_list_next() {
  _work_list = next_block->next();
  next_block->set_next(NULL);
  next_block->set_on_work_list(false);
-  if (!next_block->has_pre_order()) {
-    // Assign "pre_order" as each new block is taken from the work list.
-    // This number may be used by following phases to order block visits.
-    assert(!have_block_count(), "must not have mapped blocks yet")
-    next_block->set_pre_order(_next_pre_order++);
-  }
  return next_block;
 }

@@ -1962,30 +1970,37 @@ ciTypeFlow::Block* ciTypeFlow::work_list_next() {
 // ciTypeFlow::add_to_work_list
 //
 // Add a basic block to our work list.
+// List is sorted by decreasing postorder sort (same as increasing RPO)
 void ciTypeFlow::add_to_work_list(ciTypeFlow::Block* block) {
  assert(!block->is_on_work_list(), "must not already be on work list");

  if (CITraceTypeFlow) {
-    tty->print(">> Adding block%s ", block->has_pre_order() ? " (again)" : "");
+    tty->print(">> Adding block ");
    block->print_value_on(tty);
    tty->print_cr(" to the work list : ");
  }

  block->set_on_work_list(true);
-  if (block->is_simpler_than(_work_list)) {
+
+  // decreasing post order sort
+
+  Block* prev = NULL;
+  Block* current = _work_list;
+  int po = block->post_order();
+  while (current != NULL) {
+    if (!current->has_post_order() || po > current->post_order())
+      break;
+    prev = current;
+    current = current->next();
+  }
+  if (prev == NULL) {
    block->set_next(_work_list);
    _work_list = block;
  } else {
-    Block *temp = _work_list;
-    while (!block->is_simpler_than(temp->next())) {
-      if (CITraceTypeFlow) {
-        tty->print(".");
-      }
-      temp = temp->next();
-    }
-    block->set_next(temp->next());
-    temp->set_next(block);
+    block->set_next(current);
+    prev->set_next(block);
  }
+
  if (CITraceTypeFlow) {
    tty->cr();
  }
@@ -2008,7 +2023,7 @@ ciTypeFlow::Block* ciTypeFlow::block_at(int bci, ciTypeFlow::JsrSet* jsrs, Creat
  assert(ciblk->start_bci() == bci, "bad ciBlock boundaries");
  Block* block = get_block_for(ciblk->index(), jsrs, option);

-  assert(block == NULL? (option == no_create): block->is_private_copy() == (option == create_private_copy), "create option consistent with result");
+  assert(block == NULL? (option == no_create): block->is_backedge_copy() == (option == create_backedge_copy), "create option consistent with result");

  if (CITraceTypeFlow) {
    if (block != NULL) {
@@ -2072,8 +2087,9 @@ void ciTypeFlow::flow_exceptions(GrowableArray<ciTypeFlow::Block*>* exceptions,
    }

    if (block->meet_exception(exception_klass, state)) {
-      // Block was modified.  Add it to the work list.
-      if (!block->is_on_work_list()) {
+      // Block was modified and has PO.  Add it to the work list.
+      if (block->has_post_order() &&
+          !block->is_on_work_list()) {
        add_to_work_list(block);
      }
    }
@@ -2091,8 +2107,9 @@ void ciTypeFlow::flow_successors(GrowableArray<ciTypeFlow::Block*>* successors,
  for (int i = 0; i < len; i++) {
    Block* block = successors->at(i);
    if (block->meet(state)) {
-      // Block was modified.  Add it to the work list.
-      if (!block->is_on_work_list()) {
+      // Block was modified and has PO.  Add it to the work list.
+      if (block->has_post_order() &&
+          !block->is_on_work_list()) {
        add_to_work_list(block);
      }
    }
@@ -2133,6 +2150,111 @@ bool ciTypeFlow::can_trap(ciBytecodeStream& str) {
  return true;
 }

+// ------------------------------------------------------------------
+// ciTypeFlow::clone_loop_heads
+//
+// Clone the loop heads
+bool ciTypeFlow::clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set) {
+  bool rslt = false;
+  for (PreorderLoops iter(loop_tree_root()); !iter.done(); iter.next()) {
+    lp = iter.current();
+    Block* head = lp->head();
+    if (lp == loop_tree_root() ||
+        lp->is_irreducible() ||
+        !head->is_clonable_exit(lp))
+      continue;
+
+    // check not already cloned
+    if (head->backedge_copy_count() != 0)
+      continue;
+
+    // check _no_ shared head below us
+    Loop* ch;
+    for (ch = lp->child(); ch != NULL && ch->head() != head; ch = ch->sibling());
+    if (ch != NULL)
+      continue;
+
+    // Clone head
+    Block* new_head = head->looping_succ(lp);
+    Block* clone = clone_loop_head(lp, temp_vector, temp_set);
+    // Update lp's info
+    clone->set_loop(lp);
+    lp->set_head(new_head);
+    lp->set_tail(clone);
+    // And move original head into outer loop
+    head->set_loop(lp->parent());
+
+    rslt = true;
+  }
+  return rslt;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::clone_loop_head
+//
+// Clone lp's head and replace tail's successors with clone.
+//
+//  |
+//  v
+// head <-> body
+//  |
+//  v
+// exit
+//
+// new_head
+//
+//  |
+//  v
+// head ----------\
+//  |             |
+//  |             v
+//  |  clone <-> body
+//  |    |
+//  | /--/
+//  | |
+//  v v
+// exit
+//
+ciTypeFlow::Block* ciTypeFlow::clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set) {
+  Block* head = lp->head();
+  Block* tail = lp->tail();
+  if (CITraceTypeFlow) {
+    tty->print(">> Requesting clone of loop head "); head->print_value_on(tty);
+    tty->print("  for predecessor ");                tail->print_value_on(tty);
+    tty->cr();
+  }
+  Block* clone = block_at(head->start(), head->jsrs(), create_backedge_copy);
+  assert(clone->backedge_copy_count() == 1, "one backedge copy for all back edges");
+
+  assert(!clone->has_pre_order(), "just created");
+  clone->set_next_pre_order();
+
+  // Insert clone after (orig) tail in reverse post order
+  clone->set_rpo_next(tail->rpo_next());
+  tail->set_rpo_next(clone);
+
+  // tail->head becomes tail->clone
+  for (SuccIter iter(tail); !iter.done(); iter.next()) {
+    if (iter.succ() == head) {
+      iter.set_succ(clone);
+      break;
+    }
+  }
+  flow_block(tail, temp_vector, temp_set);
+  if (head == tail) {
+    // For self-loops, clone->head becomes clone->clone
+    flow_block(clone, temp_vector, temp_set);
+    for (SuccIter iter(clone); !iter.done(); iter.next()) {
+      if (iter.succ() == head) {
+        iter.set_succ(clone);
+        break;
+      }
+    }
+  }
+  flow_block(clone, temp_vector, temp_set);
+
+  return clone;
+}

 // ------------------------------------------------------------------
 // ciTypeFlow::flow_block
@@ -2159,11 +2281,14 @@ void ciTypeFlow::flow_block(ciTypeFlow::Block* block,

  // Grab the state from the current block.
  block->copy_state_into(state);
+  state->def_locals()->clear();

  GrowableArray<Block*>*           exceptions = block->exceptions();
  GrowableArray<ciInstanceKlass*>* exc_klasses = block->exc_klasses();
  bool has_exceptions = exceptions->length() > 0;

+  bool exceptions_used = false;
+
  ciBytecodeStream str(method());
  str.reset_to_bci(start);
  Bytecodes::Code code;
@@ -2172,6 +2297,7 @@ void ciTypeFlow::flow_block(ciTypeFlow::Block* block,
    // Check for exceptional control flow from this point.
    if (has_exceptions && can_trap(str)) {
      flow_exceptions(exceptions, exc_klasses, state);
+      exceptions_used = true;
    }
    // Apply the effects of the current bytecode to our state.
    bool res = state->apply_one_bytecode(&str);
@@ -2189,9 +2315,14 @@ void ciTypeFlow::flow_block(ciTypeFlow::Block* block,
        block->print_on(tty);
      }

+      // Save set of locals defined in this block
+      block->def_locals()->add(state->def_locals());
+
      // Record (no) successors.
      block->successors(&str, state, jsrs);

+      assert(!has_exceptions || exceptions_used, "Not removing exceptions");
+
      // Discontinue interpretation of this Block.
      return;
    }
@@ -2202,6 +2333,7 @@ void ciTypeFlow::flow_block(ciTypeFlow::Block* block,
    // Check for exceptional control flow from this point.
    if (has_exceptions && can_trap(str)) {
      flow_exceptions(exceptions, exc_klasses, state);
+      exceptions_used = true;
    }

    // Fix the JsrSet to reflect effect of the bytecode.
@@ -2218,10 +2350,305 @@ void ciTypeFlow::flow_block(ciTypeFlow::Block* block,
    successors = block->successors(&str, NULL, NULL);
  }

+  // Save set of locals defined in this block
+  block->def_locals()->add(state->def_locals());
+
+  // Remove untaken exception paths
+  if (!exceptions_used)
+    exceptions->clear();
+
  // Pass our state to successors.
  flow_successors(successors, state);
 }

+// ------------------------------------------------------------------
+// ciTypeFlow::PostOrderLoops::next
+//
+// Advance to next loop tree using a postorder, left-to-right traversal.
+void ciTypeFlow::PostorderLoops::next() {
+  assert(!done(), "must not be done.");
+  if (_current->sibling() != NULL) {
+    _current = _current->sibling();
+    while (_current->child() != NULL) {
+      _current = _current->child();
+    }
+  } else {
+    _current = _current->parent();
+  }
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::PreOrderLoops::next
+//
+// Advance to next loop tree using a preorder, left-to-right traversal.
+void ciTypeFlow::PreorderLoops::next() {
+  assert(!done(), "must not be done.");
+  if (_current->child() != NULL) {
+    _current = _current->child();
+  } else if (_current->sibling() != NULL) {
+    _current = _current->sibling();
+  } else {
+    while (_current != _root && _current->sibling() == NULL) {
+      _current = _current->parent();
+    }
+    if (_current == _root) {
+      _current = NULL;
+      assert(done(), "must be done.");
+    } else {
+      assert(_current->sibling() != NULL, "must be more to do");
+      _current = _current->sibling();
+    }
+  }
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::Loop::sorted_merge
+//
+// Merge the branch lp into this branch, sorting on the loop head
+// pre_orders. Returns the leaf of the merged branch.
+// Child and sibling pointers will be setup later.
+// Sort is (looking from leaf towards the root)
+//  descending on primary key: loop head's pre_order, and
+//  ascending  on secondary key: loop tail's pre_order.
+ciTypeFlow::Loop* ciTypeFlow::Loop::sorted_merge(Loop* lp) {
+  Loop* leaf = this;
+  Loop* prev = NULL;
+  Loop* current = leaf;
+  while (lp != NULL) {
+    int lp_pre_order = lp->head()->pre_order();
+    // Find insertion point for "lp"
+    while (current != NULL) {
+      if (current == lp)
+        return leaf; // Already in list
+      if (current->head()->pre_order() < lp_pre_order)
+        break;
+      if (current->head()->pre_order() == lp_pre_order &&
+          current->tail()->pre_order() > lp->tail()->pre_order()) {
+        break;
+      }
+      prev = current;
+      current = current->parent();
+    }
+    Loop* next_lp = lp->parent(); // Save future list of items to insert
+    // Insert lp before current
+    lp->set_parent(current);
+    if (prev != NULL) {
+      prev->set_parent(lp);
+    } else {
+      leaf = lp;
+    }
+    prev = lp;     // Inserted item is new prev[ious]
+    lp = next_lp;  // Next item to insert
+  }
+  return leaf;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::build_loop_tree
+//
+// Incrementally build loop tree.
+void ciTypeFlow::build_loop_tree(Block* blk) {
+  assert(!blk->is_post_visited(), "precondition");
+  Loop* innermost = NULL; // merge of loop tree branches over all successors
+
+  for (SuccIter iter(blk); !iter.done(); iter.next()) {
+    Loop*  lp   = NULL;
+    Block* succ = iter.succ();
+    if (!succ->is_post_visited()) {
+      // Found backedge since predecessor post visited, but successor is not
+      assert(succ->pre_order() <= blk->pre_order(), "should be backedge");
+
+      // Create a LoopNode to mark this loop.
+      lp = new (arena()) Loop(succ, blk);
+      if (succ->loop() == NULL)
+        succ->set_loop(lp);
+      // succ->loop will be updated to innermost loop on a later call, when blk==succ
+
+    } else {  // Nested loop
+      lp = succ->loop();
+
+      // If succ is loop head, find outer loop.
+      while (lp != NULL && lp->head() == succ) {
+        lp = lp->parent();
+      }
+      if (lp == NULL) {
+        // Infinite loop, it's parent is the root
+        lp = loop_tree_root();
+      }
+    }
+
+    // Check for irreducible loop.
+    // Successor has already been visited. If the successor's loop head
+    // has already been post-visited, then this is another entry into the loop.
+    while (lp->head()->is_post_visited() && lp != loop_tree_root()) {
+      _has_irreducible_entry = true;
+      lp->set_irreducible(succ);
+      if (!succ->is_on_work_list()) {
+        // Assume irreducible entries need more data flow
+        add_to_work_list(succ);
+      }
+      lp = lp->parent();
+      assert(lp != NULL, "nested loop must have parent by now");
+    }
+
+    // Merge loop tree branch for all successors.
+    innermost = innermost == NULL ? lp : innermost->sorted_merge(lp);
+
+  } // end loop
+
+  if (innermost == NULL) {
+    assert(blk->successors()->length() == 0, "CFG exit");
+    blk->set_loop(loop_tree_root());
+  } else if (innermost->head() == blk) {
+    // If loop header, complete the tree pointers
+    if (blk->loop() != innermost) {
+#if ASSERT
+      assert(blk->loop()->head() == innermost->head(), "same head");
+      Loop* dl;
+      for (dl = innermost; dl != NULL && dl != blk->loop(); dl = dl->parent());
+      assert(dl == blk->loop(), "blk->loop() already in innermost list");
+#endif
+      blk->set_loop(innermost);
+    }
+    innermost->def_locals()->add(blk->def_locals());
+    Loop* l = innermost;
+    Loop* p = l->parent();
+    while (p && l->head() == blk) {
+      l->set_sibling(p->child());  // Put self on parents 'next child'
+      p->set_child(l);             // Make self the first child of parent
+      p->def_locals()->add(l->def_locals());
+      l = p;                       // Walk up the parent chain
+      p = l->parent();
+    }
+  } else {
+    blk->set_loop(innermost);
+    innermost->def_locals()->add(blk->def_locals());
+  }
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::Loop::contains
+//
+// Returns true if lp is nested loop.
+bool ciTypeFlow::Loop::contains(ciTypeFlow::Loop* lp) const {
+  assert(lp != NULL, "");
+  if (this == lp || head() == lp->head()) return true;
+  int depth1 = depth();
+  int depth2 = lp->depth();
+  if (depth1 > depth2)
+    return false;
+  while (depth1 < depth2) {
+    depth2--;
+    lp = lp->parent();
+  }
+  return this == lp;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::Loop::depth
+//
+// Loop depth
+int ciTypeFlow::Loop::depth() const {
+  int dp = 0;
+  for (Loop* lp = this->parent(); lp != NULL; lp = lp->parent())
+    dp++;
+  return dp;
+}
+
+#ifndef PRODUCT
+// ------------------------------------------------------------------
+// ciTypeFlow::Loop::print
+void ciTypeFlow::Loop::print(outputStream* st, int indent) const {
+  for (int i = 0; i < indent; i++) st->print(" ");
+  st->print("%d<-%d %s",
+            is_root() ? 0 : this->head()->pre_order(),
+            is_root() ? 0 : this->tail()->pre_order(),
+            is_irreducible()?" irr":"");
+  st->print(" defs: ");
+  def_locals()->print_on(st, _head->outer()->method()->max_locals());
+  st->cr();
+  for (Loop* ch = child(); ch != NULL; ch = ch->sibling())
+    ch->print(st, indent+2);
+}
+#endif
+
+// ------------------------------------------------------------------
+// ciTypeFlow::df_flow_types
+//
+// Perform the depth first type flow analysis. Helper for flow_types.
+void ciTypeFlow::df_flow_types(Block* start,
+                               bool do_flow,
+                               StateVector* temp_vector,
+                               JsrSet* temp_set) {
+  int dft_len = 100;
+  GrowableArray<Block*> stk(arena(), dft_len, 0, NULL);
+
+  ciBlock* dummy = _methodBlocks->make_dummy_block();
+  JsrSet* root_set = new JsrSet(NULL, 0);
+  Block* root_head = new (arena()) Block(this, dummy, root_set);
+  Block* root_tail = new (arena()) Block(this, dummy, root_set);
+  root_head->set_pre_order(0);
+  root_head->set_post_order(0);
+  root_tail->set_pre_order(max_jint);
+  root_tail->set_post_order(max_jint);
+  set_loop_tree_root(new (arena()) Loop(root_head, root_tail));
+
+  stk.push(start);
+
+  _next_pre_order = 0;  // initialize pre_order counter
+  _rpo_list = NULL;
+  int next_po = 0;      // initialize post_order counter
+
+  // Compute RPO and the control flow graph
+  int size;
+  while ((size = stk.length()) > 0) {
+    Block* blk = stk.top(); // Leave node on stack
+    if (!blk->is_visited()) {
+      // forward arc in graph
+      assert (!blk->has_pre_order(), "");
+      blk->set_next_pre_order();
+
+      if (_next_pre_order >= MaxNodeLimit / 2) {
+        // Too many basic blocks.  Bail out.
+        // This can happen when try/finally constructs are nested to depth N,
+        // and there is O(2**N) cloning of jsr bodies.  See bug 4697245!
+        // "MaxNodeLimit / 2" is used because probably the parser will
+        // generate at least twice that many nodes and bail out.
+        record_failure("too many basic blocks");
+        return;
+      }
+      if (do_flow) {
+        flow_block(blk, temp_vector, temp_set);
+        if (failing()) return; // Watch for bailouts.
+      }
+    } else if (!blk->is_post_visited()) {
+      // cross or back arc
+      for (SuccIter iter(blk); !iter.done(); iter.next()) {
+        Block* succ = iter.succ();
+        if (!succ->is_visited()) {
+          stk.push(succ);
+        }
+      }
+      if (stk.length() == size) {
+        // There were no additional children, post visit node now
+        stk.pop(); // Remove node from stack
+
+        build_loop_tree(blk);
+        blk->set_post_order(next_po++);   // Assign post order
+        prepend_to_rpo_list(blk);
+        assert(blk->is_post_visited(), "");
+
+        if (blk->is_loop_head() && !blk->is_on_work_list()) {
+          // Assume loop heads need more data flow
+          add_to_work_list(blk);
+        }
+      }
+    } else {
+      stk.pop(); // Remove post-visited node from stack
+    }
+  }
+}
+
 // ------------------------------------------------------------------
 // ciTypeFlow::flow_types
 //
@@ -2233,91 +2660,93 @@ void ciTypeFlow::flow_types() {
  JsrSet* temp_set = new JsrSet(NULL, 16);

  // Create the method entry block.
-  Block* block = block_at(start_bci(), temp_set);
-  block->set_pre_order(_next_pre_order++);
-  assert(block->is_start(), "start block must have order #0");
+  Block* start = block_at(start_bci(), temp_set);

  // Load the initial state into it.
  const StateVector* start_state = get_start_state();
  if (failing())  return;
-  block->meet(start_state);
-  add_to_work_list(block);
+  start->meet(start_state);

-  // Trickle away.
-  while (!work_list_empty()) {
-    Block* block = work_list_next();
-    flow_block(block, temp_vector, temp_set);
+  // Depth first visit
+  df_flow_types(start, true /*do flow*/, temp_vector, temp_set);

+  if (failing())  return;
+  assert(_rpo_list == start, "must be start");
+
+  // Any loops found?
+  if (loop_tree_root()->child() != NULL &&
+      env()->comp_level() >= CompLevel_full_optimization) {
+      // Loop optimizations are not performed on Tier1 compiles.
+
+    bool changed = clone_loop_heads(loop_tree_root(), temp_vector, temp_set);
+
+    // If some loop heads were cloned, recompute postorder and loop tree
+    if (changed) {
+      loop_tree_root()->set_child(NULL);
+      for (Block* blk = _rpo_list; blk != NULL;) {
+        Block* next = blk->rpo_next();
+        blk->df_init();
+        blk = next;
+      }
+      df_flow_types(start, false /*no flow*/, temp_vector, temp_set);
+    }
+  }

-    // NodeCountCutoff is the number of nodes at which the parser
-    // will bail out.  Probably if we already have lots of BBs,
-    // the parser will generate at least twice that many nodes and bail out.
-    // Therefore, this is a conservatively large limit at which to
-    // bail out in the pre-parse typeflow pass.
-    int block_limit = MaxNodeLimit / 2;
+  if (CITraceTypeFlow) {
+    tty->print_cr("\nLoop tree");
+    loop_tree_root()->print();
+  }

-    if (_next_pre_order >= block_limit) {
-      // Too many basic blocks.  Bail out.
-      //
-      // This can happen when try/finally constructs are nested to depth N,
-      // and there is O(2**N) cloning of jsr bodies.  See bug 4697245!
-      record_failure("too many basic blocks");
-      return;
-    }
+  // Continue flow analysis until fixed point reached

-    // Watch for bailouts.
-    if (failing())  return;
+  debug_only(int max_block = _next_pre_order;)
+
+  while (!work_list_empty()) {
+    Block* blk = work_list_next();
+    assert (blk->has_post_order(), "post order assigned above");
+
+    flow_block(blk, temp_vector, temp_set);
+
+    assert (max_block == _next_pre_order, "no new blocks");
+    assert (!failing(), "no more bailouts");
  }
 }

 // ------------------------------------------------------------------
 // ciTypeFlow::map_blocks
 //
-// Create the block map, which indexes blocks in pre_order.
+// Create the block map, which indexes blocks in reverse post-order.
 void ciTypeFlow::map_blocks() {
  assert(_block_map == NULL, "single initialization");
-  int pre_order_limit = _next_pre_order;
-  _block_map = NEW_ARENA_ARRAY(arena(), Block*, pre_order_limit);
-  assert(pre_order_limit == block_count(), "");
-  int po;
-  for (po = 0; po < pre_order_limit; po++) {
-    debug_only(_block_map[po] = NULL);
-  }
-  ciMethodBlocks *mblks = _methodBlocks;
-  ciBlock* current = NULL;
-  int limit_bci = code_size();
-  for (int bci = 0; bci < limit_bci; bci++) {
-    ciBlock* ciblk = mblks->block_containing(bci);
-    if (ciblk != NULL && ciblk != current) {
-      current = ciblk;
-      int curidx = ciblk->index();
-      int block_count = (_idx_to_blocklist[curidx] == NULL) ? 0 : _idx_to_blocklist[curidx]->length();
-      for (int i = 0; i < block_count; i++) {
-        Block* block = _idx_to_blocklist[curidx]->at(i);
-        if (!block->has_pre_order())  continue;
-        int po = block->pre_order();
-        assert(_block_map[po] == NULL, "unique ref to block");
-        assert(0 <= po && po < pre_order_limit, "");
-        _block_map[po] = block;
-      }
-    }
-  }
-  for (po = 0; po < pre_order_limit; po++) {
-    assert(_block_map[po] != NULL, "must not drop any blocks");
-    Block* block = _block_map[po];
+  int block_ct = _next_pre_order;
+  _block_map = NEW_ARENA_ARRAY(arena(), Block*, block_ct);
+  assert(block_ct == block_count(), "");
+
+  Block* blk = _rpo_list;
+  for (int m = 0; m < block_ct; m++) {
+    int rpo = blk->rpo();
+    assert(rpo == m, "should be sequential");
+    _block_map[rpo] = blk;
+    blk = blk->rpo_next();
+  }
+  assert(blk == NULL, "should be done");
+
+  for (int j = 0; j < block_ct; j++) {
+    assert(_block_map[j] != NULL, "must not drop any blocks");
+    Block* block = _block_map[j];
    // Remove dead blocks from successor lists:
    for (int e = 0; e <= 1; e++) {
      GrowableArray<Block*>* l = e? block->exceptions(): block->successors();
-      for (int i = 0; i < l->length(); i++) {
-        Block* s = l->at(i);
-        if (!s->has_pre_order()) {
+      for (int k = 0; k < l->length(); k++) {
+        Block* s = l->at(k);
+        if (!s->has_post_order()) {
          if (CITraceTypeFlow) {
            tty->print("Removing dead %s successor of #%d: ", (e? "exceptional":  "normal"), block->pre_order());
            s->print_value_on(tty);
            tty->cr();
          }
          l->remove(s);
-          --i;
+          --k;
        }
      }
    }
@@ -2329,7 +2758,7 @@ void ciTypeFlow::map_blocks() {
 //
 // Find a block with this ciBlock which has a compatible JsrSet.
 // If no such block exists, create it, unless the option is no_create.
-// If the option is create_private_copy, always create a fresh private copy.
+// If the option is create_backedge_copy, always create a fresh backedge copy.
 ciTypeFlow::Block* ciTypeFlow::get_block_for(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs, CreateOption option) {
  Arena* a = arena();
  GrowableArray<Block*>* blocks = _idx_to_blocklist[ciBlockIndex];
@@ -2342,11 +2771,11 @@ ciTypeFlow::Block* ciTypeFlow::get_block_for(int ciBlockIndex, ciTypeFlow::JsrSe
    _idx_to_blocklist[ciBlockIndex] = blocks;
  }

-  if (option != create_private_copy) {
+  if (option != create_backedge_copy) {
    int len = blocks->length();
    for (int i = 0; i < len; i++) {
      Block* block = blocks->at(i);
-      if (!block->is_private_copy() && block->is_compatible_with(jsrs)) {
+      if (!block->is_backedge_copy() && block->is_compatible_with(jsrs)) {
        return block;
      }
    }
@@ -2357,15 +2786,15 @@ ciTypeFlow::Block* ciTypeFlow::get_block_for(int ciBlockIndex, ciTypeFlow::JsrSe

  // We did not find a compatible block.  Create one.
  Block* new_block = new (a) Block(this, _methodBlocks->block(ciBlockIndex), jsrs);
-  if (option == create_private_copy)  new_block->set_private_copy(true);
+  if (option == create_backedge_copy)  new_block->set_backedge_copy(true);
  blocks->append(new_block);
  return new_block;
 }

 // ------------------------------------------------------------------
-// ciTypeFlow::private_copy_count
+// ciTypeFlow::backedge_copy_count
 //
-int ciTypeFlow::private_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) const {
+int ciTypeFlow::backedge_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) const {
  GrowableArray<Block*>* blocks = _idx_to_blocklist[ciBlockIndex];

  if (blocks == NULL) {
@@ -2376,7 +2805,7 @@ int ciTypeFlow::private_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) c
  int len = blocks->length();
  for (int i = 0; i < len; i++) {
    Block* block = blocks->at(i);
-    if (block->is_private_copy() && block->is_compatible_with(jsrs)) {
+    if (block->is_backedge_copy() && block->is_compatible_with(jsrs)) {
      count++;
    }
  }
@@ -2405,10 +2834,12 @@ void ciTypeFlow::do_flow() {
  if (failing()) {
    return;
  }
+
+  map_blocks();
+
  if (CIPrintTypeFlow || CITraceTypeFlow) {
-    print_on(tty);
+    rpo_print_on(tty);
  }
-  map_blocks();
 }

 // ------------------------------------------------------------------
@@ -2466,4 +2897,19 @@ void ciTypeFlow::print_on(outputStream* st) const {
  st->print_cr("********************************************************");
  st->cr();
 }
+
+void ciTypeFlow::rpo_print_on(outputStream* st) const {
+  st->print_cr("********************************************************");
+  st->print   ("TypeFlow for ");
+  method()->name()->print_symbol_on(st);
+  int limit_bci = code_size();
+  st->print_cr("  %d bytes", limit_bci);
+  for (Block* blk = _rpo_list; blk != NULL; blk = blk->rpo_next()) {
+    blk->print_on(st);
+    st->print_cr("--------------------------------------------------------");
+    st->cr();
+  }
+  st->print_cr("********************************************************");
+  st->cr();
+}
 #endif
--- a/src/share/vm/ci/ciTypeFlow.hpp
+++ b/src/share/vm/ci/ciTypeFlow.hpp
@@ -34,11 +34,13 @@ private:
  int _max_locals;
  int _max_stack;
  int _code_size;
+  bool      _has_irreducible_entry;

  const char* _failure_reason;

 public:
  class StateVector;
+  class Loop;
  class Block;

  // Build a type flow analyzer
@@ -55,6 +57,7 @@ public:
  int       max_stack() const  { return _max_stack; }
  int       max_cells() const  { return _max_locals + _max_stack; }
  int       code_size() const  { return _code_size; }
+  bool      has_irreducible_entry() const { return _has_irreducible_entry; }

  // Represents information about an "active" jsr call.  This
  // class represents a call to the routine at some entry address
@@ -125,6 +128,19 @@ public:
    void print_on(outputStream* st) const PRODUCT_RETURN;
  };

+  class LocalSet VALUE_OBJ_CLASS_SPEC {
+  private:
+    enum Constants { max = 63 };
+    uint64_t _bits;
+  public:
+    LocalSet() : _bits(0) {}
+    void add(uint32_t i)        { if (i < (uint32_t)max) _bits |=  (1LL << i); }
+    void add(LocalSet* ls)      { _bits |= ls->_bits; }
+    bool test(uint32_t i) const { return i < (uint32_t)max ? (_bits>>i)&1U : true; }
+    void clear()                { _bits = 0; }
+    void print_on(outputStream* st, int limit) const  PRODUCT_RETURN;
+  };
+
  // Used as a combined index for locals and temps
  enum Cell {
    Cell_0, Cell_max = INT_MAX
@@ -142,6 +158,8 @@ public:
    int         _trap_bci;
    int         _trap_index;

+    LocalSet    _def_locals;  // For entire block
+
    static ciType* type_meet_internal(ciType* t1, ciType* t2, ciTypeFlow* analyzer);

  public:
@@ -181,6 +199,9 @@ public:
    int         monitor_count() const  { return _monitor_count; }
    void    set_monitor_count(int mc)  { _monitor_count = mc; }

+    LocalSet* def_locals() { return &_def_locals; }
+    const LocalSet* def_locals() const { return &_def_locals; }
+
    static Cell start_cell()           { return (Cell)0; }
    static Cell next_cell(Cell c)      { return (Cell)(((int)c) + 1); }
    Cell        limit_cell() const {
@@ -250,6 +271,10 @@ public:
      return type->basic_type() == T_DOUBLE;
    }

+    void store_to_local(int lnum) {
+      _def_locals.add((uint) lnum);
+    }
+
    void      push_translate(ciType* type);

    void      push_int() {
@@ -358,6 +383,7 @@ public:
             "must be reference type or return address");
      overwrite_local_double_long(index);
      set_type_at(local(index), type);
+      store_to_local(index);
    }

    void load_local_double(int index) {
@@ -376,6 +402,8 @@ public:
      overwrite_local_double_long(index);
      set_type_at(local(index), type);
      set_type_at(local(index+1), type2);
+      store_to_local(index);
+      store_to_local(index+1);
    }

    void load_local_float(int index) {
@@ -388,6 +416,7 @@ public:
      assert(is_float(type), "must be float type");
      overwrite_local_double_long(index);
      set_type_at(local(index), type);
+      store_to_local(index);
    }

    void load_local_int(int index) {
@@ -400,6 +429,7 @@ public:
      assert(is_int(type), "must be int type");
      overwrite_local_double_long(index);
      set_type_at(local(index), type);
+      store_to_local(index);
    }

    void load_local_long(int index) {
@@ -418,6 +448,8 @@ public:
      overwrite_local_double_long(index);
      set_type_at(local(index), type);
      set_type_at(local(index+1), type2);
+      store_to_local(index);
+      store_to_local(index+1);
    }

    // Stop interpretation of this path with a trap.
@@ -450,13 +482,31 @@ public:
  };

  // Parameter for "find_block" calls:
-  // Describes the difference between a public and private copy.
+  // Describes the difference between a public and backedge copy.
  enum CreateOption {
    create_public_copy,
-    create_private_copy,
+    create_backedge_copy,
    no_create
  };

+  // Successor iterator
+  class SuccIter : public StackObj {
+  private:
+    Block* _pred;
+    int    _index;
+    Block* _succ;
+  public:
+    SuccIter()                        : _pred(NULL), _index(-1), _succ(NULL) {}
+    SuccIter(Block* pred)             : _pred(pred), _index(-1), _succ(NULL) { next(); }
+    int    index()     { return _index; }
+    Block* pred()      { return _pred; }           // Return predecessor
+    bool   done()      { return _index < 0; }      // Finished?
+    Block* succ()      { return _succ; }           // Return current successor
+    void   next();                                 // Advance
+    void   set_succ(Block* succ);                  // Update current successor
+    bool   is_normal_ctrl() { return index() < _pred->successors()->length(); }
+  };
+
  // A basic block
  class Block : public ResourceObj {
  private:
@@ -470,15 +520,24 @@ public:
    int                              _trap_bci;
    int                              _trap_index;

-    // A reasonable approximation to pre-order, provided.to the client.
+    // pre_order, assigned at first visit. Used as block ID and "visited" tag
    int                              _pre_order;

-    // Has this block been cloned for some special purpose?
-    bool                             _private_copy;
+    // A post-order, used to compute the reverse post order (RPO) provided to the client
+    int                              _post_order;  // used to compute rpo
+
+    // Has this block been cloned for a loop backedge?
+    bool                             _backedge_copy;

    // A pointer used for our internal work list
-    Block*                 _next;
-    bool                   _on_work_list;
+    Block*                           _next;
+    bool                             _on_work_list;      // on the work list
+    Block*                           _rpo_next;          // Reverse post order list
+
+    // Loop info
+    Loop*                            _loop;              // nearest loop
+    bool                             _irreducible_entry; // entry to irreducible loop
+    bool                             _exception_entry;   // entry to exception handler

    ciBlock*     ciblock() const     { return _ciblock; }
    StateVector* state() const     { return _state; }
@@ -504,10 +563,11 @@ public:
    int start() const         { return _ciblock->start_bci(); }
    int limit() const         { return _ciblock->limit_bci(); }
    int control() const       { return _ciblock->control_bci(); }
+    JsrSet* jsrs() const      { return _jsrs; }

-    bool    is_private_copy() const       { return _private_copy; }
-    void   set_private_copy(bool z);
-    int        private_copy_count() const { return outer()->private_copy_count(ciblock()->index(), _jsrs); }
+    bool    is_backedge_copy() const       { return _backedge_copy; }
+    void   set_backedge_copy(bool z);
+    int        backedge_copy_count() const { return outer()->backedge_copy_count(ciblock()->index(), _jsrs); }

    // access to entry state
    int     stack_size() const         { return _state->stack_size(); }
@@ -515,6 +575,20 @@ public:
    ciType* local_type_at(int i) const { return _state->local_type_at(i); }
    ciType* stack_type_at(int i) const { return _state->stack_type_at(i); }

+    // Data flow on locals
+    bool is_invariant_local(uint v) const {
+      assert(is_loop_head(), "only loop heads");
+      // Find outermost loop with same loop head
+      Loop* lp = loop();
+      while (lp->parent() != NULL) {
+        if (lp->parent()->head() != lp->head()) break;
+        lp = lp->parent();
+      }
+      return !lp->def_locals()->test(v);
+    }
+    LocalSet* def_locals() { return _state->def_locals(); }
+    const LocalSet* def_locals() const { return _state->def_locals(); }
+
    // Get the successors for this Block.
    GrowableArray<Block*>* successors(ciBytecodeStream* str,
                                      StateVector* state,
@@ -524,13 +598,6 @@ public:
      return _successors;
    }

-    // Helper function for "successors" when making private copies of
-    // loop heads for C2.
-    Block * clone_loop_head(ciTypeFlow* analyzer,
-                            int branch_bci,
-                            Block* target,
-                            JsrSet* jsrs);
-
    // Get the exceptional successors for this Block.
    GrowableArray<Block*>* exceptions() {
      if (_exceptions == NULL) {
@@ -584,17 +651,126 @@ public:
    bool   is_on_work_list() const  { return _on_work_list; }

    bool   has_pre_order() const  { return _pre_order >= 0; }
-    void   set_pre_order(int po)  { assert(!has_pre_order() && po >= 0, ""); _pre_order = po; }
+    void   set_pre_order(int po)  { assert(!has_pre_order(), ""); _pre_order = po; }
    int    pre_order() const      { assert(has_pre_order(), ""); return _pre_order; }
+    void   set_next_pre_order()   { set_pre_order(outer()->inc_next_pre_order()); }
    bool   is_start() const       { return _pre_order == outer()->start_block_num(); }

-    // A ranking used in determining order within the work list.
-    bool   is_simpler_than(Block* other);
+    // Reverse post order
+    void   df_init();
+    bool   has_post_order() const { return _post_order >= 0; }
+    void   set_post_order(int po) { assert(!has_post_order() && po >= 0, ""); _post_order = po; }
+    void   reset_post_order(int o){ _post_order = o; }
+    int    post_order() const     { assert(has_post_order(), ""); return _post_order; }
+
+    bool   has_rpo() const        { return has_post_order() && outer()->have_block_count(); }
+    int    rpo() const            { assert(has_rpo(), ""); return outer()->block_count() - post_order() - 1; }
+    void   set_rpo_next(Block* b) { _rpo_next = b; }
+    Block* rpo_next()             { return _rpo_next; }
+
+    // Loops
+    Loop*  loop() const                  { return _loop; }
+    void   set_loop(Loop* lp)            { _loop = lp; }
+    bool   is_loop_head() const          { return _loop && _loop->head() == this; }
+    void   set_irreducible_entry(bool c) { _irreducible_entry = c; }
+    bool   is_irreducible_entry() const  { return _irreducible_entry; }
+    bool   is_visited() const            { return has_pre_order(); }
+    bool   is_post_visited() const       { return has_post_order(); }
+    bool   is_clonable_exit(Loop* lp);
+    Block* looping_succ(Loop* lp);       // Successor inside of loop
+    bool   is_single_entry_loop_head() const {
+      if (!is_loop_head()) return false;
+      for (Loop* lp = loop(); lp != NULL && lp->head() == this; lp = lp->parent())
+        if (lp->is_irreducible()) return false;
+      return true;
+    }

    void   print_value_on(outputStream* st) const PRODUCT_RETURN;
    void   print_on(outputStream* st) const       PRODUCT_RETURN;
  };

+  // Loop
+  class Loop : public ResourceObj {
+  private:
+    Loop* _parent;
+    Loop* _sibling;  // List of siblings, null terminated
+    Loop* _child;    // Head of child list threaded thru sibling pointer
+    Block* _head;    // Head of loop
+    Block* _tail;    // Tail of loop
+    bool   _irreducible;
+    LocalSet _def_locals;
+
+  public:
+    Loop(Block* head, Block* tail) :
+      _head(head),   _tail(tail),
+      _parent(NULL), _sibling(NULL), _child(NULL),
+      _irreducible(false), _def_locals() {}
+
+    Loop* parent()  const { return _parent; }
+    Loop* sibling() const { return _sibling; }
+    Loop* child()   const { return _child; }
+    Block* head()   const { return _head; }
+    Block* tail()   const { return _tail; }
+    void set_parent(Loop* p)  { _parent = p; }
+    void set_sibling(Loop* s) { _sibling = s; }
+    void set_child(Loop* c)   { _child = c; }
+    void set_head(Block* hd)  { _head = hd; }
+    void set_tail(Block* tl)  { _tail = tl; }
+
+    int depth() const;              // nesting depth
+
+    // Returns true if lp is a nested loop or us.
+    bool contains(Loop* lp) const;
+    bool contains(Block* blk) const { return contains(blk->loop()); }
+
+    // Data flow on locals
+    LocalSet* def_locals() { return &_def_locals; }
+    const LocalSet* def_locals() const { return &_def_locals; }
+
+    // Merge the branch lp into this branch, sorting on the loop head
+    // pre_orders. Returns the new branch.
+    Loop* sorted_merge(Loop* lp);
+
+    // Mark non-single entry to loop
+    void set_irreducible(Block* entry) {
+      _irreducible = true;
+      entry->set_irreducible_entry(true);
+    }
+    bool is_irreducible() const { return _irreducible; }
+
+    bool is_root() const { return _tail->pre_order() == max_jint; }
+
+    void print(outputStream* st = tty, int indent = 0) const PRODUCT_RETURN;
+  };
+
+  // Postorder iteration over the loop tree.
+  class PostorderLoops : public StackObj {
+  private:
+    Loop* _root;
+    Loop* _current;
+  public:
+    PostorderLoops(Loop* root) : _root(root), _current(root) {
+      while (_current->child() != NULL) {
+        _current = _current->child();
+      }
+    }
+    bool done() { return _current == NULL; }  // Finished iterating?
+    void next();                            // Advance to next loop
+    Loop* current() { return _current; }      // Return current loop.
+  };
+
+  // Preorder iteration over the loop tree.
+  class PreorderLoops : public StackObj {
+  private:
+    Loop* _root;
+    Loop* _current;
+  public:
+    PreorderLoops(Loop* root) : _root(root), _current(root) {}
+    bool done() { return _current == NULL; }  // Finished iterating?
+    void next();                            // Advance to next loop
+    Loop* current() { return _current; }      // Return current loop.
+  };
+
  // Standard indexes of successors, for various bytecodes.
  enum {
    FALL_THROUGH   = 0,  // normal control
@@ -619,6 +795,12 @@ private:
  // Tells if a given instruction is able to generate an exception edge.
  bool can_trap(ciBytecodeStream& str);

+  // Clone the loop heads. Returns true if any cloning occurred.
+  bool clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set);
+
+  // Clone lp's head and replace tail's successors with clone.
+  Block* clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set);
+
 public:
  // Return the block beginning at bci which has a JsrSet compatible
  // with jsrs.
@@ -627,8 +809,8 @@ public:
  // block factory
  Block* get_block_for(int ciBlockIndex, JsrSet* jsrs, CreateOption option = create_public_copy);

-  // How many of the blocks have the private_copy bit set?
-  int private_copy_count(int ciBlockIndex, JsrSet* jsrs) const;
+  // How many of the blocks have the backedge_copy bit set?
+  int backedge_copy_count(int ciBlockIndex, JsrSet* jsrs) const;

  // Return an existing block containing bci which has a JsrSet compatible
  // with jsrs, or NULL if there is none.
@@ -651,11 +833,18 @@ public:
                                      return _block_map[po]; }
  Block* start_block() const        { return pre_order_at(start_block_num()); }
  int start_block_num() const       { return 0; }
+  Block* rpo_at(int rpo) const      { assert(0 <= rpo && rpo < block_count(), "out of bounds");
+                                      return _block_map[rpo]; }
+  int next_pre_order()              { return _next_pre_order; }
+  int inc_next_pre_order()          { return _next_pre_order++; }

 private:
  // A work list used during flow analysis.
  Block* _work_list;

+  // List of blocks in reverse post order
+  Block* _rpo_list;
+
  // Next Block::_pre_order.  After mapping, doubles as block_count.
  int _next_pre_order;

@@ -668,6 +857,15 @@ private:
  // Add a basic block to our work list.
  void add_to_work_list(Block* block);

+  // Prepend a basic block to rpo list.
+  void prepend_to_rpo_list(Block* blk) {
+    blk->set_rpo_next(_rpo_list);
+    _rpo_list = blk;
+  }
+
+  // Root of the loop tree
+  Loop* _loop_tree_root;
+
  // State used for make_jsr_record
  int _jsr_count;
  GrowableArray<JsrRecord*>* _jsr_records;
@@ -677,6 +875,9 @@ public:
  // does not already exist.
  JsrRecord* make_jsr_record(int entry_address, int return_address);

+  void  set_loop_tree_root(Loop* ltr) { _loop_tree_root = ltr; }
+  Loop* loop_tree_root()              { return _loop_tree_root; }
+
 private:
  // Get the initial state for start_bci:
  const StateVector* get_start_state();
@@ -703,6 +904,15 @@ private:
  // necessary.
  void flow_types();

+  // Perform the depth first type flow analysis. Helper for flow_types.
+  void df_flow_types(Block* start,
+                     bool do_flow,
+                     StateVector* temp_vector,
+                     JsrSet* temp_set);
+
+  // Incrementally build loop tree.
+  void build_loop_tree(Block* blk);
+
  // Create the block map, which indexes blocks in pre_order.
  void map_blocks();

@@ -711,4 +921,6 @@ public:
  void do_flow();

  void print_on(outputStream* st) const PRODUCT_RETURN;
+
+  void rpo_print_on(outputStream* st) const PRODUCT_RETURN;
 };
--- a/src/share/vm/code/nmethod.cpp
+++ b/src/share/vm/code/nmethod.cpp
@@ -1350,11 +1350,7 @@ bool nmethod::can_unload(BoolObjectClosure* is_alive,
      return false;
    }
  }
-  if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
-    // Cannot do this test if verification of the UseParallelOldGC
-    // code using the PSMarkSweep code is being done.
-    assert(unloading_occurred, "Inconsistency in unloading");
-  }
+  assert(unloading_occurred, "Inconsistency in unloading");
  make_unloaded(is_alive, obj);
  return true;
 }

--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp
@@ -210,10 +210,6 @@ void ParallelScavengeHeap::post_initialize() {
  PSScavenge::initialize();
  if (UseParallelOldGC) {
    PSParallelCompact::post_initialize();
-    if (VerifyParallelOldWithMarkSweep) {
-      // Will be used for verification of par old.
-      PSMarkSweep::initialize();
-    }
  } else {
    PSMarkSweep::initialize();
  }
@@ -402,7 +398,7 @@ HeapWord* ParallelScavengeHeap::mem_allocate(
        return result;
      }
      if (!is_tlab &&
-          size >= (young_gen()->eden_space()->capacity_in_words() / 2)) {
+          size >= (young_gen()->eden_space()->capacity_in_words(Thread::current()) / 2)) {
        result = old_gen()->allocate(size, is_tlab);
        if (result != NULL) {
          return result;

--- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp
@@ -146,7 +146,7 @@ void RefProcTaskExecutor::execute(ProcessTask& task)
 {
  ParallelScavengeHeap* heap = PSParallelCompact::gc_heap();
  uint parallel_gc_threads = heap->gc_task_manager()->workers();
-  ChunkTaskQueueSet* qset = ParCompactionManager::chunk_array();
+  RegionTaskQueueSet* qset = ParCompactionManager::region_array();
  ParallelTaskTerminator terminator(parallel_gc_threads, qset);
  GCTaskQueue* q = GCTaskQueue::create();
  for(uint i=0; i<parallel_gc_threads; i++) {
@@ -205,38 +205,38 @@ void StealMarkingTask::do_it(GCTaskManager* manager, uint which) {
 }

 //
-// StealChunkCompactionTask
+// StealRegionCompactionTask
 //


-StealChunkCompactionTask::StealChunkCompactionTask(ParallelTaskTerminator* t) :
-  _terminator(t) {};
+StealRegionCompactionTask::StealRegionCompactionTask(ParallelTaskTerminator* t):
+  _terminator(t) {}

-void StealChunkCompactionTask::do_it(GCTaskManager* manager, uint which) {
+void StealRegionCompactionTask::do_it(GCTaskManager* manager, uint which) {
  assert(Universe::heap()->is_gc_active(), "called outside gc");

-  NOT_PRODUCT(TraceTime tm("StealChunkCompactionTask",
+  NOT_PRODUCT(TraceTime tm("StealRegionCompactionTask",
    PrintGCDetails && TraceParallelOldGCTasks, true, gclog_or_tty));

  ParCompactionManager* cm =
    ParCompactionManager::gc_thread_compaction_manager(which);

-  // Has to drain stacks first because there may be chunks on
+  // Has to drain stacks first because there may be regions on
  // preloaded onto the stack and this thread may never have
  // done a draining task.  Are the draining tasks needed?

-  cm->drain_chunk_stacks();
+  cm->drain_region_stacks();

-  size_t chunk_index = 0;
+  size_t region_index = 0;
  int random_seed = 17;

  // If we're the termination task, try 10 rounds of stealing before
  // setting the termination flag

  while(true) {
-    if (ParCompactionManager::steal(which, &random_seed, chunk_index)) {
-      PSParallelCompact::fill_and_update_chunk(cm, chunk_index);
-      cm->drain_chunk_stacks();
+    if (ParCompactionManager::steal(which, &random_seed, region_index)) {
+      PSParallelCompact::fill_and_update_region(cm, region_index);
+      cm->drain_region_stacks();
    } else {
      if (terminator()->offer_termination()) {
        break;
@@ -249,11 +249,10 @@ void StealChunkCompactionTask::do_it(GCTaskManager* manager, uint which) {

 UpdateDensePrefixTask::UpdateDensePrefixTask(
                                   PSParallelCompact::SpaceId space_id,
-                                   size_t chunk_index_start,
-                                   size_t chunk_index_end) :
-  _space_id(space_id), _chunk_index_start(chunk_index_start),
-  _chunk_index_end(chunk_index_end)
-{}
+                                   size_t region_index_start,
+                                   size_t region_index_end) :
+  _space_id(space_id), _region_index_start(region_index_start),
+  _region_index_end(region_index_end) {}

 void UpdateDensePrefixTask::do_it(GCTaskManager* manager, uint which) {

@@ -265,8 +264,8 @@ void UpdateDensePrefixTask::do_it(GCTaskManager* manager, uint which) {

  PSParallelCompact::update_and_deadwood_in_dense_prefix(cm,
                                                         _space_id,
-                                                         _chunk_index_start,
-                                                         _chunk_index_end);
+                                                         _region_index_start,
+                                                         _region_index_end);
 }

 void DrainStacksCompactionTask::do_it(GCTaskManager* manager, uint which) {
@@ -278,6 +277,6 @@ void DrainStacksCompactionTask::do_it(GCTaskManager* manager, uint which) {
  ParCompactionManager* cm =
    ParCompactionManager::gc_thread_compaction_manager(which);

-  // Process any chunks already in the compaction managers stacks.
-  cm->drain_chunk_stacks();
+  // Process any regions already in the compaction managers stacks.
+  cm->drain_region_stacks();
 }
--- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp
@@ -188,18 +188,18 @@ class StealMarkingTask : public GCTask {
 };

 //
-// StealChunkCompactionTask
+// StealRegionCompactionTask
 //
 // This task is used to distribute work to idle threads.
 //

-class StealChunkCompactionTask : public GCTask {
+class StealRegionCompactionTask : public GCTask {
 private:
   ParallelTaskTerminator* const _terminator;
 public:
-  StealChunkCompactionTask(ParallelTaskTerminator* t);
+  StealRegionCompactionTask(ParallelTaskTerminator* t);

-  char* name() { return (char *)"steal-chunk-task"; }
+  char* name() { return (char *)"steal-region-task"; }
  ParallelTaskTerminator* terminator() { return _terminator; }

  virtual void do_it(GCTaskManager* manager, uint which);
@@ -215,15 +215,15 @@ class StealChunkCompactionTask : public GCTask {
 class UpdateDensePrefixTask : public GCTask {
 private:
  PSParallelCompact::SpaceId _space_id;
-  size_t _chunk_index_start;
-  size_t _chunk_index_end;
+  size_t _region_index_start;
+  size_t _region_index_end;

 public:
  char* name() { return (char *)"update-dense_prefix-task"; }

  UpdateDensePrefixTask(PSParallelCompact::SpaceId space_id,
-                        size_t chunk_index_start,
-                        size_t chunk_index_end);
+                        size_t region_index_start,
+                        size_t region_index_end);

  virtual void do_it(GCTaskManager* manager, uint which);
 };
@@ -231,17 +231,17 @@ class UpdateDensePrefixTask : public GCTask {
 //
 // DrainStacksCompactionTask
 //
-// This task processes chunks that have been added to the stacks of each
+// This task processes regions that have been added to the stacks of each
 // compaction manager.
 //
 // Trying to use one draining thread does not work because there are no
 // guarantees about which task will be picked up by which thread.  For example,
-// if thread A gets all the preloaded chunks, thread A may not get a draining
+// if thread A gets all the preloaded regions, thread A may not get a draining
 // task (they may all be done by other threads).
 //

 class DrainStacksCompactionTask : public GCTask {
 public:
-  char* name() { return (char *)"drain-chunk-task"; }
+  char* name() { return (char *)"drain-region-task"; }
  virtual void do_it(GCTaskManager* manager, uint which);
 };
--- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp
@@ -30,7 +30,7 @@ ParCompactionManager**  ParCompactionManager::_manager_array = NULL;
 OopTaskQueueSet*     ParCompactionManager::_stack_array = NULL;
 ObjectStartArray*    ParCompactionManager::_start_array = NULL;
 ParMarkBitMap*       ParCompactionManager::_mark_bitmap = NULL;
-ChunkTaskQueueSet*   ParCompactionManager::_chunk_array = NULL;
+RegionTaskQueueSet*   ParCompactionManager::_region_array = NULL;

 ParCompactionManager::ParCompactionManager() :
    _action(CopyAndUpdate) {
@@ -46,13 +46,13 @@ ParCompactionManager::ParCompactionManager() :

  // We want the overflow stack to be permanent
  _overflow_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(10, true);
-#ifdef USE_ChunkTaskQueueWithOverflow
-  chunk_stack()->initialize();
+#ifdef USE_RegionTaskQueueWithOverflow
+  region_stack()->initialize();
 #else
-  chunk_stack()->initialize();
+  region_stack()->initialize();

  // We want the overflow stack to be permanent
-  _chunk_overflow_stack =
+  _region_overflow_stack =
    new (ResourceObj::C_HEAP) GrowableArray<size_t>(10, true);
 #endif

@@ -86,18 +86,18 @@ void ParCompactionManager::initialize(ParMarkBitMap* mbm) {

  _stack_array = new OopTaskQueueSet(parallel_gc_threads);
  guarantee(_stack_array != NULL, "Count not initialize promotion manager");
-  _chunk_array = new ChunkTaskQueueSet(parallel_gc_threads);
-  guarantee(_chunk_array != NULL, "Count not initialize promotion manager");
+  _region_array = new RegionTaskQueueSet(parallel_gc_threads);
+  guarantee(_region_array != NULL, "Count not initialize promotion manager");

  // Create and register the ParCompactionManager(s) for the worker threads.
  for(uint i=0; i<parallel_gc_threads; i++) {
    _manager_array[i] = new ParCompactionManager();
    guarantee(_manager_array[i] != NULL, "Could not create ParCompactionManager");
    stack_array()->register_queue(i, _manager_array[i]->marking_stack());
-#ifdef USE_ChunkTaskQueueWithOverflow
-    chunk_array()->register_queue(i, _manager_array[i]->chunk_stack()->task_queue());
+#ifdef USE_RegionTaskQueueWithOverflow
+    region_array()->register_queue(i, _manager_array[i]->region_stack()->task_queue());
 #else
-    chunk_array()->register_queue(i, _manager_array[i]->chunk_stack());
+    region_array()->register_queue(i, _manager_array[i]->region_stack());
 #endif
  }

@@ -153,31 +153,31 @@ oop ParCompactionManager::retrieve_for_scanning() {
  return NULL;
 }

-// Save chunk on a stack
-void ParCompactionManager::save_for_processing(size_t chunk_index) {
+// Save region on a stack
+void ParCompactionManager::save_for_processing(size_t region_index) {
 #ifdef ASSERT
  const ParallelCompactData& sd = PSParallelCompact::summary_data();
-  ParallelCompactData::ChunkData* const chunk_ptr = sd.chunk(chunk_index);
-  assert(chunk_ptr->claimed(), "must be claimed");
-  assert(chunk_ptr->_pushed++ == 0, "should only be pushed once");
+  ParallelCompactData::RegionData* const region_ptr = sd.region(region_index);
+  assert(region_ptr->claimed(), "must be claimed");
+  assert(region_ptr->_pushed++ == 0, "should only be pushed once");
 #endif
-  chunk_stack_push(chunk_index);
+  region_stack_push(region_index);
 }

-void ParCompactionManager::chunk_stack_push(size_t chunk_index) {
+void ParCompactionManager::region_stack_push(size_t region_index) {

-#ifdef USE_ChunkTaskQueueWithOverflow
-  chunk_stack()->save(chunk_index);
+#ifdef USE_RegionTaskQueueWithOverflow
+  region_stack()->save(region_index);
 #else
-  if(!chunk_stack()->push(chunk_index)) {
-    chunk_overflow_stack()->push(chunk_index);
+  if(!region_stack()->push(region_index)) {
+    region_overflow_stack()->push(region_index);
  }
 #endif
 }

-bool ParCompactionManager::retrieve_for_processing(size_t& chunk_index) {
-#ifdef USE_ChunkTaskQueueWithOverflow
-  return chunk_stack()->retrieve(chunk_index);
+bool ParCompactionManager::retrieve_for_processing(size_t& region_index) {
+#ifdef USE_RegionTaskQueueWithOverflow
+  return region_stack()->retrieve(region_index);
 #else
  // Should not be used in the parallel case
  ShouldNotReachHere();
@@ -230,14 +230,14 @@ void ParCompactionManager::drain_marking_stacks(OopClosure* blk) {
  assert(overflow_stack()->length() == 0, "Sanity");
 }

-void ParCompactionManager::drain_chunk_overflow_stack() {
-  size_t chunk_index = (size_t) -1;
-  while(chunk_stack()->retrieve_from_overflow(chunk_index)) {
-    PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+void ParCompactionManager::drain_region_overflow_stack() {
+  size_t region_index = (size_t) -1;
+  while(region_stack()->retrieve_from_overflow(region_index)) {
+    PSParallelCompact::fill_and_update_region(this, region_index);
  }
 }

-void ParCompactionManager::drain_chunk_stacks() {
+void ParCompactionManager::drain_region_stacks() {
 #ifdef ASSERT
  ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
  assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
@@ -249,42 +249,42 @@ void ParCompactionManager::drain_chunk_stacks() {
 #if 1 // def DO_PARALLEL - the serial code hasn't been updated
  do {

-#ifdef USE_ChunkTaskQueueWithOverflow
+#ifdef USE_RegionTaskQueueWithOverflow
    // Drain overflow stack first, so other threads can steal from
    // claimed stack while we work.
-    size_t chunk_index = (size_t) -1;
-    while(chunk_stack()->retrieve_from_overflow(chunk_index)) {
-      PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+    size_t region_index = (size_t) -1;
+    while(region_stack()->retrieve_from_overflow(region_index)) {
+      PSParallelCompact::fill_and_update_region(this, region_index);
    }

-    while (chunk_stack()->retrieve_from_stealable_queue(chunk_index)) {
-      PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+    while (region_stack()->retrieve_from_stealable_queue(region_index)) {
+      PSParallelCompact::fill_and_update_region(this, region_index);
    }
-  } while (!chunk_stack()->is_empty());
+  } while (!region_stack()->is_empty());
 #else
    // Drain overflow stack first, so other threads can steal from
    // claimed stack while we work.
-    while(!chunk_overflow_stack()->is_empty()) {
-      size_t chunk_index = chunk_overflow_stack()->pop();
-      PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+    while(!region_overflow_stack()->is_empty()) {
+      size_t region_index = region_overflow_stack()->pop();
+      PSParallelCompact::fill_and_update_region(this, region_index);
    }

-    size_t chunk_index = -1;
+    size_t region_index = -1;
    // obj is a reference!!!
-    while (chunk_stack()->pop_local(chunk_index)) {
+    while (region_stack()->pop_local(region_index)) {
      // It would be nice to assert about the type of objects we might
      // pop, but they can come from anywhere, unfortunately.
-      PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+      PSParallelCompact::fill_and_update_region(this, region_index);
    }
-  } while((chunk_stack()->size() != 0) ||
-          (chunk_overflow_stack()->length() != 0));
+  } while((region_stack()->size() != 0) ||
+          (region_overflow_stack()->length() != 0));
 #endif

-#ifdef USE_ChunkTaskQueueWithOverflow
-  assert(chunk_stack()->is_empty(), "Sanity");
+#ifdef USE_RegionTaskQueueWithOverflow
+  assert(region_stack()->is_empty(), "Sanity");
 #else
-  assert(chunk_stack()->size() == 0, "Sanity");
-  assert(chunk_overflow_stack()->length() == 0, "Sanity");
+  assert(region_stack()->size() == 0, "Sanity");
+  assert(region_overflow_stack()->length() == 0, "Sanity");
 #endif
 #else
  oop obj;

--- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp
@@ -52,7 +52,7 @@ class ParCompactionManager : public CHeapObj {
  friend class ParallelTaskTerminator;
  friend class ParMarkBitMap;
  friend class PSParallelCompact;
-  friend class StealChunkCompactionTask;
+  friend class StealRegionCompactionTask;
  friend class UpdateAndFillClosure;
  friend class RefProcTaskExecutor;

@@ -72,27 +72,27 @@ class ParCompactionManager : public CHeapObj {
 // ------------------------  End don't putback if not needed

 private:
-  static ParCompactionManager**  _manager_array;
-  static OopTaskQueueSet*      _stack_array;
-  static ObjectStartArray*     _start_array;
-  static ChunkTaskQueueSet*    _chunk_array;
-  static PSOldGen*             _old_gen;
-
-  OopTaskQueue                 _marking_stack;
-  GrowableArray<oop>*          _overflow_stack;
+  static ParCompactionManager** _manager_array;
+  static OopTaskQueueSet*       _stack_array;
+  static ObjectStartArray*      _start_array;
+  static RegionTaskQueueSet*    _region_array;
+  static PSOldGen*              _old_gen;
+
+  OopTaskQueue                  _marking_stack;
+  GrowableArray<oop>*           _overflow_stack;
  // Is there a way to reuse the _marking_stack for the
-  // saving empty chunks?  For now just create a different
+  // saving empty regions?  For now just create a different
  // type of TaskQueue.

-#ifdef USE_ChunkTaskQueueWithOverflow
-  ChunkTaskQueueWithOverflow   _chunk_stack;
+#ifdef USE_RegionTaskQueueWithOverflow
+  RegionTaskQueueWithOverflow   _region_stack;
 #else
-  ChunkTaskQueue               _chunk_stack;
-  GrowableArray<size_t>*       _chunk_overflow_stack;
+  RegionTaskQueue               _region_stack;
+  GrowableArray<size_t>*        _region_overflow_stack;
 #endif

 #if 1  // does this happen enough to need a per thread stack?
-  GrowableArray<Klass*>*       _revisit_klass_stack;
+  GrowableArray<Klass*>*        _revisit_klass_stack;
 #endif
  static ParMarkBitMap* _mark_bitmap;

@@ -100,21 +100,22 @@ class ParCompactionManager : public CHeapObj {

  static PSOldGen* old_gen()             { return _old_gen; }
  static ObjectStartArray* start_array() { return _start_array; }
-  static OopTaskQueueSet* stack_array()   { return _stack_array; }
+  static OopTaskQueueSet* stack_array()  { return _stack_array; }

  static void initialize(ParMarkBitMap* mbm);

 protected:
  // Array of tasks.  Needed by the ParallelTaskTerminator.
-  static ChunkTaskQueueSet* chunk_array()   { return _chunk_array; }
-
-  OopTaskQueue*  marking_stack()          { return &_marking_stack; }
-  GrowableArray<oop>* overflow_stack()    { return _overflow_stack; }
-#ifdef USE_ChunkTaskQueueWithOverflow
-  ChunkTaskQueueWithOverflow* chunk_stack() { return &_chunk_stack; }
+  static RegionTaskQueueSet* region_array()      { return _region_array; }
+  OopTaskQueue*  marking_stack()                 { return &_marking_stack; }
+  GrowableArray<oop>* overflow_stack()           { return _overflow_stack; }
+#ifdef USE_RegionTaskQueueWithOverflow
+  RegionTaskQueueWithOverflow* region_stack()    { return &_region_stack; }
 #else
-  ChunkTaskQueue*  chunk_stack()          { return &_chunk_stack; }
-  GrowableArray<size_t>* chunk_overflow_stack() { return _chunk_overflow_stack; }
+  RegionTaskQueue*  region_stack()               { return &_region_stack; }
+  GrowableArray<size_t>* region_overflow_stack() {
+    return _region_overflow_stack;
+  }
 #endif

  // Pushes onto the marking stack.  If the marking stack is full,
@@ -123,9 +124,9 @@ class ParCompactionManager : public CHeapObj {
  // Do not implement an equivalent stack_pop.  Deal with the
  // marking stack and overflow stack directly.

-  // Pushes onto the chunk stack.  If the chunk stack is full,
-  // pushes onto the chunk overflow stack.
-  void chunk_stack_push(size_t chunk_index);
+  // Pushes onto the region stack.  If the region stack is full,
+  // pushes onto the region overflow stack.
+  void region_stack_push(size_t region_index);
 public:

  Action action() { return _action; }
@@ -160,10 +161,10 @@ class ParCompactionManager : public CHeapObj {
  // Get a oop for scanning.  If returns null, no oop were found.
  oop retrieve_for_scanning();

-  // Save chunk for later processing.  Must not fail.
-  void save_for_processing(size_t chunk_index);
-  // Get a chunk for processing.  If returns null, no chunk were found.
-  bool retrieve_for_processing(size_t& chunk_index);
+  // Save region for later processing.  Must not fail.
+  void save_for_processing(size_t region_index);
+  // Get a region for processing.  If returns null, no region were found.
+  bool retrieve_for_processing(size_t& region_index);

  // Access function for compaction managers
  static ParCompactionManager* gc_thread_compaction_manager(int index);
@@ -172,18 +173,18 @@ class ParCompactionManager : public CHeapObj {
    return stack_array()->steal(queue_num, seed, t);
  }

-  static bool steal(int queue_num, int* seed, ChunkTask& t) {
-    return chunk_array()->steal(queue_num, seed, t);
+  static bool steal(int queue_num, int* seed, RegionTask& t) {
+    return region_array()->steal(queue_num, seed, t);
  }

  // Process tasks remaining on any stack
  void drain_marking_stacks(OopClosure *blk);

  // Process tasks remaining on any stack
-  void drain_chunk_stacks();
+  void drain_region_stacks();

  // Process tasks remaining on any stack
-  void drain_chunk_overflow_stack();
+  void drain_region_overflow_stack();

  // Debugging support
 #ifdef ASSERT

--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp
@@ -35,9 +35,7 @@ void PSMarkSweep::initialize() {
  _ref_processor = new ReferenceProcessor(mr,
                                          true,    // atomic_discovery
                                          false);  // mt_discovery
-  if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
-    _counters = new CollectorCounters("PSMarkSweep", 1);
-  }
+  _counters = new CollectorCounters("PSMarkSweep", 1);
 }

 // This method contains all heap specific policy for invoking mark sweep.
@@ -518,9 +516,6 @@ void PSMarkSweep::mark_sweep_phase1(bool clear_all_softrefs) {
  follow_stack();

  // Process reference objects found during marking
-
-  // Skipping the reference processing for VerifyParallelOldWithMarkSweep
-  // affects the marking (makes it different).
  {
    ReferencePolicy *soft_ref_policy;
    if (clear_all_softrefs) {

--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp
@@ -152,20 +152,15 @@ void PSMarkSweepDecorator::precompact() {
        oop(q)->forward_to(oop(compact_top));
        assert(oop(q)->is_gc_marked(), "encoding the pointer should preserve the mark");
      } else {
-        // Don't clear the mark since it's confuses parallel old
-        // verification.
-        if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
-          // if the object isn't moving we can just set the mark to the default
-          // mark and handle it specially later on.
-          oop(q)->init_mark();
-        }
+        // if the object isn't moving we can just set the mark to the default
+        // mark and handle it specially later on.
+        oop(q)->init_mark();
        assert(oop(q)->forwardee() == NULL, "should be forwarded to NULL");
      }

      // Update object start array
-      if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
-        if (start_array)
-          start_array->allocate_block(compact_top);
+      if (start_array) {
+        start_array->allocate_block(compact_top);
      }

      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::register_live_oop(oop(q), size));
@@ -219,19 +214,14 @@ void PSMarkSweepDecorator::precompact() {
            assert(oop(q)->is_gc_marked(), "encoding the pointer should preserve the mark");
          } else {
            // if the object isn't moving we can just set the mark to the default
-            // Don't clear the mark since it's confuses parallel old
-            // verification.
-            if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
-              // mark and handle it specially later on.
-              oop(q)->init_mark();
-            }
+            // mark and handle it specially later on.
+            oop(q)->init_mark();
            assert(oop(q)->forwardee() == NULL, "should be forwarded to NULL");
          }

-          if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
-            // Update object start array
-            if (start_array)
-              start_array->allocate_block(compact_top);
+          // Update object start array
+          if (start_array) {
+            start_array->allocate_block(compact_top);
          }

          VALIDATE_MARK_SWEEP_ONLY(MarkSweep::register_live_oop(oop(q), sz));

--- a/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp
@@ -152,9 +152,7 @@ void PSOldGen::precompact() {
  assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");

  // Reset start array first.
-  debug_only(if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {)
  start_array()->reset();
-  debug_only(})

  object_mark_sweep()->precompact();


--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp
@@ -28,43 +28,31 @@
 #include <math.h>

 // All sizes are in HeapWords.
-const size_t ParallelCompactData::Log2ChunkSize  = 9; // 512 words
-const size_t ParallelCompactData::ChunkSize      = (size_t)1 << Log2ChunkSize;
-const size_t ParallelCompactData::ChunkSizeBytes = ChunkSize << LogHeapWordSize;
-const size_t ParallelCompactData::ChunkSizeOffsetMask = ChunkSize - 1;
-const size_t ParallelCompactData::ChunkAddrOffsetMask = ChunkSizeBytes - 1;
-const size_t ParallelCompactData::ChunkAddrMask  = ~ChunkAddrOffsetMask;
+const size_t ParallelCompactData::Log2RegionSize  = 9; // 512 words
+const size_t ParallelCompactData::RegionSize      = (size_t)1 << Log2RegionSize;
+const size_t ParallelCompactData::RegionSizeBytes =
+  RegionSize << LogHeapWordSize;
+const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1;
+const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1;
+const size_t ParallelCompactData::RegionAddrMask  = ~RegionAddrOffsetMask;

-// 32-bit:  128 words covers 4 bitmap words
-// 64-bit:  128 words covers 2 bitmap words
-const size_t ParallelCompactData::Log2BlockSize   = 7; // 128 words
-const size_t ParallelCompactData::BlockSize       = (size_t)1 << Log2BlockSize;
-const size_t ParallelCompactData::BlockOffsetMask = BlockSize - 1;
-const size_t ParallelCompactData::BlockMask       = ~BlockOffsetMask;
+const ParallelCompactData::RegionData::region_sz_t
+ParallelCompactData::RegionData::dc_shift = 27;

-const size_t ParallelCompactData::BlocksPerChunk = ChunkSize / BlockSize;
+const ParallelCompactData::RegionData::region_sz_t
+ParallelCompactData::RegionData::dc_mask = ~0U << dc_shift;

-const ParallelCompactData::ChunkData::chunk_sz_t
-ParallelCompactData::ChunkData::dc_shift = 27;
+const ParallelCompactData::RegionData::region_sz_t
+ParallelCompactData::RegionData::dc_one = 0x1U << dc_shift;

-const ParallelCompactData::ChunkData::chunk_sz_t
-ParallelCompactData::ChunkData::dc_mask = ~0U << dc_shift;
+const ParallelCompactData::RegionData::region_sz_t
+ParallelCompactData::RegionData::los_mask = ~dc_mask;

-const ParallelCompactData::ChunkData::chunk_sz_t
-ParallelCompactData::ChunkData::dc_one = 0x1U << dc_shift;
+const ParallelCompactData::RegionData::region_sz_t
+ParallelCompactData::RegionData::dc_claimed = 0x8U << dc_shift;

-const ParallelCompactData::ChunkData::chunk_sz_t
-ParallelCompactData::ChunkData::los_mask = ~dc_mask;
-
-const ParallelCompactData::ChunkData::chunk_sz_t
-ParallelCompactData::ChunkData::dc_claimed = 0x8U << dc_shift;
-
-const ParallelCompactData::ChunkData::chunk_sz_t
-ParallelCompactData::ChunkData::dc_completed = 0xcU << dc_shift;
-
-#ifdef ASSERT
-short   ParallelCompactData::BlockData::_cur_phase = 0;
-#endif
+const ParallelCompactData::RegionData::region_sz_t
+ParallelCompactData::RegionData::dc_completed = 0xcU << dc_shift;

 SpaceInfo PSParallelCompact::_space_info[PSParallelCompact::last_space_id];
 bool      PSParallelCompact::_print_phases = false;
@@ -100,99 +88,12 @@ GrowableArray<HeapWord*>* PSParallelCompact::_last_gc_live_oops_moved_to = NULL;
 GrowableArray<size_t>   * PSParallelCompact::_last_gc_live_oops_size = NULL;
 #endif

-// XXX beg - verification code; only works while we also mark in object headers
-static void
-verify_mark_bitmap(ParMarkBitMap& _mark_bitmap)
-{
-  ParallelScavengeHeap* heap = PSParallelCompact::gc_heap();
-
-  PSPermGen* perm_gen = heap->perm_gen();
-  PSOldGen* old_gen = heap->old_gen();
-  PSYoungGen* young_gen = heap->young_gen();
-
-  MutableSpace* perm_space = perm_gen->object_space();
-  MutableSpace* old_space = old_gen->object_space();
-  MutableSpace* eden_space = young_gen->eden_space();
-  MutableSpace* from_space = young_gen->from_space();
-  MutableSpace* to_space = young_gen->to_space();
-
-  // 'from_space' here is the survivor space at the lower address.
-  if (to_space->bottom() < from_space->bottom()) {
-    from_space = to_space;
-    to_space = young_gen->from_space();
-  }
-
-  HeapWord* boundaries[12];
-  unsigned int bidx = 0;
-  const unsigned int bidx_max = sizeof(boundaries) / sizeof(boundaries[0]);
-
-  boundaries[0] = perm_space->bottom();
-  boundaries[1] = perm_space->top();
-  boundaries[2] = old_space->bottom();
-  boundaries[3] = old_space->top();
-  boundaries[4] = eden_space->bottom();
-  boundaries[5] = eden_space->top();
-  boundaries[6] = from_space->bottom();
-  boundaries[7] = from_space->top();
-  boundaries[8] = to_space->bottom();
-  boundaries[9] = to_space->top();
-  boundaries[10] = to_space->end();
-  boundaries[11] = to_space->end();
-
-  BitMap::idx_t beg_bit = 0;
-  BitMap::idx_t end_bit;
-  BitMap::idx_t tmp_bit;
-  const BitMap::idx_t last_bit = _mark_bitmap.size();
-  do {
-    HeapWord* addr = _mark_bitmap.bit_to_addr(beg_bit);
-    if (_mark_bitmap.is_marked(beg_bit)) {
-      oop obj = (oop)addr;
-      assert(obj->is_gc_marked(), "obj header is not marked");
-      end_bit = _mark_bitmap.find_obj_end(beg_bit, last_bit);
-      const size_t size = _mark_bitmap.obj_size(beg_bit, end_bit);
-      assert(size == (size_t)obj->size(), "end bit wrong?");
-      beg_bit = _mark_bitmap.find_obj_beg(beg_bit + 1, last_bit);
-      assert(beg_bit > end_bit, "bit set in middle of an obj");
-    } else {
-      if (addr >= boundaries[bidx] && addr < boundaries[bidx + 1]) {
-        // a dead object in the current space.
-        oop obj = (oop)addr;
-        end_bit = _mark_bitmap.addr_to_bit(addr + obj->size());
-        assert(!obj->is_gc_marked(), "obj marked in header, not in bitmap");
-        tmp_bit = beg_bit + 1;
-        beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, end_bit);
-        assert(beg_bit == end_bit, "beg bit set in unmarked obj");
-        beg_bit = _mark_bitmap.find_obj_end(tmp_bit, end_bit);
-        assert(beg_bit == end_bit, "end bit set in unmarked obj");
-      } else if (addr < boundaries[bidx + 2]) {
-        // addr is between top in the current space and bottom in the next.
-        end_bit = beg_bit + pointer_delta(boundaries[bidx + 2], addr);
-        tmp_bit = beg_bit;
-        beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, end_bit);
-        assert(beg_bit == end_bit, "beg bit set above top");
-        beg_bit = _mark_bitmap.find_obj_end(tmp_bit, end_bit);
-        assert(beg_bit == end_bit, "end bit set above top");
-        bidx += 2;
-      } else if (bidx < bidx_max - 2) {
-        bidx += 2; // ???
-      } else {
-        tmp_bit = beg_bit;
-        beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, last_bit);
-        assert(beg_bit == last_bit, "beg bit set outside heap");
-        beg_bit = _mark_bitmap.find_obj_end(tmp_bit, last_bit);
-        assert(beg_bit == last_bit, "end bit set outside heap");
-      }
-    }
-  } while (beg_bit < last_bit);
-}
-// XXX end - verification code; only works while we also mark in object headers
-
 #ifndef PRODUCT
 const char* PSParallelCompact::space_names[] = {
  "perm", "old ", "eden", "from", "to  "
 };

-void PSParallelCompact::print_chunk_ranges()
+void PSParallelCompact::print_region_ranges()
 {
  tty->print_cr("space  bottom     top        end        new_top");
  tty->print_cr("------ ---------- ---------- ---------- ----------");
@@ -203,31 +104,31 @@ void PSParallelCompact::print_chunk_ranges()
                  SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10) " "
                  SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10) " ",
                  id, space_names[id],
-                  summary_data().addr_to_chunk_idx(space->bottom()),
-                  summary_data().addr_to_chunk_idx(space->top()),
-                  summary_data().addr_to_chunk_idx(space->end()),
-                  summary_data().addr_to_chunk_idx(_space_info[id].new_top()));
+                  summary_data().addr_to_region_idx(space->bottom()),
+                  summary_data().addr_to_region_idx(space->top()),
+                  summary_data().addr_to_region_idx(space->end()),
+                  summary_data().addr_to_region_idx(_space_info[id].new_top()));
  }
 }

 void
-print_generic_summary_chunk(size_t i, const ParallelCompactData::ChunkData* c)
+print_generic_summary_region(size_t i, const ParallelCompactData::RegionData* c)
 {
-#define CHUNK_IDX_FORMAT        SIZE_FORMAT_W(7)
-#define CHUNK_DATA_FORMAT       SIZE_FORMAT_W(5)
+#define REGION_IDX_FORMAT        SIZE_FORMAT_W(7)
+#define REGION_DATA_FORMAT       SIZE_FORMAT_W(5)

  ParallelCompactData& sd = PSParallelCompact::summary_data();
-  size_t dci = c->destination() ? sd.addr_to_chunk_idx(c->destination()) : 0;
-  tty->print_cr(CHUNK_IDX_FORMAT " " PTR_FORMAT " "
-                CHUNK_IDX_FORMAT " " PTR_FORMAT " "
-                CHUNK_DATA_FORMAT " " CHUNK_DATA_FORMAT " "
-                CHUNK_DATA_FORMAT " " CHUNK_IDX_FORMAT " %d",
+  size_t dci = c->destination() ? sd.addr_to_region_idx(c->destination()) : 0;
+  tty->print_cr(REGION_IDX_FORMAT " " PTR_FORMAT " "
+                REGION_IDX_FORMAT " " PTR_FORMAT " "
+                REGION_DATA_FORMAT " " REGION_DATA_FORMAT " "
+                REGION_DATA_FORMAT " " REGION_IDX_FORMAT " %d",
                i, c->data_location(), dci, c->destination(),
                c->partial_obj_size(), c->live_obj_size(),
-                c->data_size(), c->source_chunk(), c->destination_count());
+                c->data_size(), c->source_region(), c->destination_count());

-#undef  CHUNK_IDX_FORMAT
-#undef  CHUNK_DATA_FORMAT
+#undef  REGION_IDX_FORMAT
+#undef  REGION_DATA_FORMAT
 }

 void
@@ -236,14 +137,14 @@ print_generic_summary_data(ParallelCompactData& summary_data,
                           HeapWord* const end_addr)
 {
  size_t total_words = 0;
-  size_t i = summary_data.addr_to_chunk_idx(beg_addr);
-  const size_t last = summary_data.addr_to_chunk_idx(end_addr);
+  size_t i = summary_data.addr_to_region_idx(beg_addr);
+  const size_t last = summary_data.addr_to_region_idx(end_addr);
  HeapWord* pdest = 0;

  while (i <= last) {
-    ParallelCompactData::ChunkData* c = summary_data.chunk(i);
+    ParallelCompactData::RegionData* c = summary_data.region(i);
    if (c->data_size() != 0 || c->destination() != pdest) {
-      print_generic_summary_chunk(i, c);
+      print_generic_summary_region(i, c);
      total_words += c->data_size();
      pdest = c->destination();
    }
@@ -265,16 +166,16 @@ print_generic_summary_data(ParallelCompactData& summary_data,
 }

 void
-print_initial_summary_chunk(size_t i,
-                            const ParallelCompactData::ChunkData* c,
-                            bool newline = true)
+print_initial_summary_region(size_t i,
+                             const ParallelCompactData::RegionData* c,
+                             bool newline = true)
 {
  tty->print(SIZE_FORMAT_W(5) " " PTR_FORMAT " "
             SIZE_FORMAT_W(5) " " SIZE_FORMAT_W(5) " "
             SIZE_FORMAT_W(5) " " SIZE_FORMAT_W(5) " %d",
             i, c->destination(),
             c->partial_obj_size(), c->live_obj_size(),
-             c->data_size(), c->source_chunk(), c->destination_count());
+             c->data_size(), c->source_region(), c->destination_count());
  if (newline) tty->cr();
 }

@@ -285,47 +186,48 @@ print_initial_summary_data(ParallelCompactData& summary_data,
    return;
  }

-  const size_t chunk_size = ParallelCompactData::ChunkSize;
-  HeapWord* const top_aligned_up = summary_data.chunk_align_up(space->top());
-  const size_t end_chunk = summary_data.addr_to_chunk_idx(top_aligned_up);
-  const ParallelCompactData::ChunkData* c = summary_data.chunk(end_chunk - 1);
+  const size_t region_size = ParallelCompactData::RegionSize;
+  typedef ParallelCompactData::RegionData RegionData;
+  HeapWord* const top_aligned_up = summary_data.region_align_up(space->top());
+  const size_t end_region = summary_data.addr_to_region_idx(top_aligned_up);
+  const RegionData* c = summary_data.region(end_region - 1);
  HeapWord* end_addr = c->destination() + c->data_size();
  const size_t live_in_space = pointer_delta(end_addr, space->bottom());

-  // Print (and count) the full chunks at the beginning of the space.
-  size_t full_chunk_count = 0;
-  size_t i = summary_data.addr_to_chunk_idx(space->bottom());
-  while (i < end_chunk && summary_data.chunk(i)->data_size() == chunk_size) {
-    print_initial_summary_chunk(i, summary_data.chunk(i));
-    ++full_chunk_count;
+  // Print (and count) the full regions at the beginning of the space.
+  size_t full_region_count = 0;
+  size_t i = summary_data.addr_to_region_idx(space->bottom());
+  while (i < end_region && summary_data.region(i)->data_size() == region_size) {
+    print_initial_summary_region(i, summary_data.region(i));
+    ++full_region_count;
    ++i;
  }

-  size_t live_to_right = live_in_space - full_chunk_count * chunk_size;
+  size_t live_to_right = live_in_space - full_region_count * region_size;

  double max_reclaimed_ratio = 0.0;
-  size_t max_reclaimed_ratio_chunk = 0;
+  size_t max_reclaimed_ratio_region = 0;
  size_t max_dead_to_right = 0;
  size_t max_live_to_right = 0;

-  // Print the 'reclaimed ratio' for chunks while there is something live in the
-  // chunk or to the right of it.  The remaining chunks are empty (and
+  // Print the 'reclaimed ratio' for regions while there is something live in
+  // the region or to the right of it.  The remaining regions are empty (and
  // uninteresting), and computing the ratio will result in division by 0.
-  while (i < end_chunk && live_to_right > 0) {
-    c = summary_data.chunk(i);
-    HeapWord* const chunk_addr = summary_data.chunk_to_addr(i);
-    const size_t used_to_right = pointer_delta(space->top(), chunk_addr);
+  while (i < end_region && live_to_right > 0) {
+    c = summary_data.region(i);
+    HeapWord* const region_addr = summary_data.region_to_addr(i);
+    const size_t used_to_right = pointer_delta(space->top(), region_addr);
    const size_t dead_to_right = used_to_right - live_to_right;
    const double reclaimed_ratio = double(dead_to_right) / live_to_right;

    if (reclaimed_ratio > max_reclaimed_ratio) {
            max_reclaimed_ratio = reclaimed_ratio;
-            max_reclaimed_ratio_chunk = i;
+            max_reclaimed_ratio_region = i;
            max_dead_to_right = dead_to_right;
            max_live_to_right = live_to_right;
    }

-    print_initial_summary_chunk(i, c, false);
+    print_initial_summary_region(i, c, false);
    tty->print_cr(" %12.10f " SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10),
                  reclaimed_ratio, dead_to_right, live_to_right);

@@ -333,14 +235,14 @@ print_initial_summary_data(ParallelCompactData& summary_data,
    ++i;
  }

-  // Any remaining chunks are empty.  Print one more if there is one.
-  if (i < end_chunk) {
-    print_initial_summary_chunk(i, summary_data.chunk(i));
+  // Any remaining regions are empty.  Print one more if there is one.
+  if (i < end_region) {
+    print_initial_summary_region(i, summary_data.region(i));
  }

  tty->print_cr("max:  " SIZE_FORMAT_W(4) " d2r=" SIZE_FORMAT_W(10) " "
                "l2r=" SIZE_FORMAT_W(10) " max_ratio=%14.12f",
-                max_reclaimed_ratio_chunk, max_dead_to_right,
+                max_reclaimed_ratio_region, max_dead_to_right,
                max_live_to_right, max_reclaimed_ratio);
 }

@@ -372,13 +274,9 @@ ParallelCompactData::ParallelCompactData()
 {
  _region_start = 0;

-  _chunk_vspace = 0;
-  _chunk_data = 0;
-  _chunk_count = 0;
-
-  _block_vspace = 0;
-  _block_data = 0;
-  _block_count = 0;
+  _region_vspace = 0;
+  _region_data = 0;
+  _region_count = 0;
 }

 bool ParallelCompactData::initialize(MemRegion covered_region)
@@ -387,18 +285,12 @@ bool ParallelCompactData::initialize(MemRegion covered_region)
  const size_t region_size = covered_region.word_size();
  DEBUG_ONLY(_region_end = _region_start + region_size;)

-  assert(chunk_align_down(_region_start) == _region_start,
+  assert(region_align_down(_region_start) == _region_start,
         "region start not aligned");
-  assert((region_size & ChunkSizeOffsetMask) == 0,
-         "region size not a multiple of ChunkSize");
-
-  bool result = initialize_chunk_data(region_size);
+  assert((region_size & RegionSizeOffsetMask) == 0,
+         "region size not a multiple of RegionSize");

-  // Initialize the block data if it will be used for updating pointers, or if
-  // this is a debug build.
-  if (!UseParallelOldGCChunkPointerCalc || trueInDebug) {
-    result = result && initialize_block_data(region_size);
-  }
+  bool result = initialize_region_data(region_size);

  return result;
 }
@@ -429,25 +321,13 @@ ParallelCompactData::create_vspace(size_t count, size_t element_size)
  return 0;
 }

-bool ParallelCompactData::initialize_chunk_data(size_t region_size)
-{
-  const size_t count = (region_size + ChunkSizeOffsetMask) >> Log2ChunkSize;
-  _chunk_vspace = create_vspace(count, sizeof(ChunkData));
-  if (_chunk_vspace != 0) {
-    _chunk_data = (ChunkData*)_chunk_vspace->reserved_low_addr();
-    _chunk_count = count;
-    return true;
-  }
-  return false;
-}
-
-bool ParallelCompactData::initialize_block_data(size_t region_size)
+bool ParallelCompactData::initialize_region_data(size_t region_size)
 {
-  const size_t count = (region_size + BlockOffsetMask) >> Log2BlockSize;
-  _block_vspace = create_vspace(count, sizeof(BlockData));
-  if (_block_vspace != 0) {
-    _block_data = (BlockData*)_block_vspace->reserved_low_addr();
-    _block_count = count;
+  const size_t count = (region_size + RegionSizeOffsetMask) >> Log2RegionSize;
+  _region_vspace = create_vspace(count, sizeof(RegionData));
+  if (_region_vspace != 0) {
+    _region_data = (RegionData*)_region_vspace->reserved_low_addr();
+    _region_count = count;
    return true;
  }
  return false;
@@ -455,38 +335,27 @@ bool ParallelCompactData::initialize_block_data(size_t region_size)

 void ParallelCompactData::clear()
 {
-  if (_block_data) {
-    memset(_block_data, 0, _block_vspace->committed_size());
-  }
-  memset(_chunk_data, 0, _chunk_vspace->committed_size());
+  memset(_region_data, 0, _region_vspace->committed_size());
 }

-void ParallelCompactData::clear_range(size_t beg_chunk, size_t end_chunk) {
-  assert(beg_chunk <= _chunk_count, "beg_chunk out of range");
-  assert(end_chunk <= _chunk_count, "end_chunk out of range");
-  assert(ChunkSize % BlockSize == 0, "ChunkSize not a multiple of BlockSize");
+void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) {
+  assert(beg_region <= _region_count, "beg_region out of range");
+  assert(end_region <= _region_count, "end_region out of range");

-  const size_t chunk_cnt = end_chunk - beg_chunk;
-
-  if (_block_data) {
-    const size_t blocks_per_chunk = ChunkSize / BlockSize;
-    const size_t beg_block = beg_chunk * blocks_per_chunk;
-    const size_t block_cnt = chunk_cnt * blocks_per_chunk;
-    memset(_block_data + beg_block, 0, block_cnt * sizeof(BlockData));
-  }
-  memset(_chunk_data + beg_chunk, 0, chunk_cnt * sizeof(ChunkData));
+  const size_t region_cnt = end_region - beg_region;
+  memset(_region_data + beg_region, 0, region_cnt * sizeof(RegionData));
 }

-HeapWord* ParallelCompactData::partial_obj_end(size_t chunk_idx) const
+HeapWord* ParallelCompactData::partial_obj_end(size_t region_idx) const
 {
-  const ChunkData* cur_cp = chunk(chunk_idx);
-  const ChunkData* const end_cp = chunk(chunk_count() - 1);
+  const RegionData* cur_cp = region(region_idx);
+  const RegionData* const end_cp = region(region_count() - 1);

-  HeapWord* result = chunk_to_addr(chunk_idx);
+  HeapWord* result = region_to_addr(region_idx);
  if (cur_cp < end_cp) {
    do {
      result += cur_cp->partial_obj_size();
-    } while (cur_cp->partial_obj_size() == ChunkSize && ++cur_cp < end_cp);
+    } while (cur_cp->partial_obj_size() == RegionSize && ++cur_cp < end_cp);
  }
  return result;
 }
@@ -494,56 +363,56 @@ HeapWord* ParallelCompactData::partial_obj_end(size_t chunk_idx) const
 void ParallelCompactData::add_obj(HeapWord* addr, size_t len)
 {
  const size_t obj_ofs = pointer_delta(addr, _region_start);
-  const size_t beg_chunk = obj_ofs >> Log2ChunkSize;
-  const size_t end_chunk = (obj_ofs + len - 1) >> Log2ChunkSize;
+  const size_t beg_region = obj_ofs >> Log2RegionSize;
+  const size_t end_region = (obj_ofs + len - 1) >> Log2RegionSize;

  DEBUG_ONLY(Atomic::inc_ptr(&add_obj_count);)
  DEBUG_ONLY(Atomic::add_ptr(len, &add_obj_size);)

-  if (beg_chunk == end_chunk) {
-    // All in one chunk.
-    _chunk_data[beg_chunk].add_live_obj(len);
+  if (beg_region == end_region) {
+    // All in one region.
+    _region_data[beg_region].add_live_obj(len);
    return;
  }

-  // First chunk.
-  const size_t beg_ofs = chunk_offset(addr);
-  _chunk_data[beg_chunk].add_live_obj(ChunkSize - beg_ofs);
+  // First region.
+  const size_t beg_ofs = region_offset(addr);
+  _region_data[beg_region].add_live_obj(RegionSize - beg_ofs);

  klassOop klass = ((oop)addr)->klass();
-  // Middle chunks--completely spanned by this object.
-  for (size_t chunk = beg_chunk + 1; chunk < end_chunk; ++chunk) {
-    _chunk_data[chunk].set_partial_obj_size(ChunkSize);
-    _chunk_data[chunk].set_partial_obj_addr(addr);
+  // Middle regions--completely spanned by this object.
+  for (size_t region = beg_region + 1; region < end_region; ++region) {
+    _region_data[region].set_partial_obj_size(RegionSize);
+    _region_data[region].set_partial_obj_addr(addr);
  }

-  // Last chunk.
-  const size_t end_ofs = chunk_offset(addr + len - 1);
-  _chunk_data[end_chunk].set_partial_obj_size(end_ofs + 1);
-  _chunk_data[end_chunk].set_partial_obj_addr(addr);
+  // Last region.
+  const size_t end_ofs = region_offset(addr + len - 1);
+  _region_data[end_region].set_partial_obj_size(end_ofs + 1);
+  _region_data[end_region].set_partial_obj_addr(addr);
 }

 void
 ParallelCompactData::summarize_dense_prefix(HeapWord* beg, HeapWord* end)
 {
-  assert(chunk_offset(beg) == 0, "not ChunkSize aligned");
-  assert(chunk_offset(end) == 0, "not ChunkSize aligned");
+  assert(region_offset(beg) == 0, "not RegionSize aligned");
+  assert(region_offset(end) == 0, "not RegionSize aligned");

-  size_t cur_chunk = addr_to_chunk_idx(beg);
-  const size_t end_chunk = addr_to_chunk_idx(end);
+  size_t cur_region = addr_to_region_idx(beg);
+  const size_t end_region = addr_to_region_idx(end);
  HeapWord* addr = beg;
-  while (cur_chunk < end_chunk) {
-    _chunk_data[cur_chunk].set_destination(addr);
-    _chunk_data[cur_chunk].set_destination_count(0);
-    _chunk_data[cur_chunk].set_source_chunk(cur_chunk);
-    _chunk_data[cur_chunk].set_data_location(addr);
+  while (cur_region < end_region) {
+    _region_data[cur_region].set_destination(addr);
+    _region_data[cur_region].set_destination_count(0);
+    _region_data[cur_region].set_source_region(cur_region);
+    _region_data[cur_region].set_data_location(addr);

-    // Update live_obj_size so the chunk appears completely full.
-    size_t live_size = ChunkSize - _chunk_data[cur_chunk].partial_obj_size();
-    _chunk_data[cur_chunk].set_live_obj_size(live_size);
+    // Update live_obj_size so the region appears completely full.
+    size_t live_size = RegionSize - _region_data[cur_region].partial_obj_size();
+    _region_data[cur_region].set_live_obj_size(live_size);

-    ++cur_chunk;
-    addr += ChunkSize;
+    ++cur_region;
+    addr += RegionSize;
  }
 }

@@ -552,7 +421,7 @@ bool ParallelCompactData::summarize(HeapWord* target_beg, HeapWord* target_end,
                                    HeapWord** target_next,
                                    HeapWord** source_next) {
  // This is too strict.
-  // assert(chunk_offset(source_beg) == 0, "not ChunkSize aligned");
+  // assert(region_offset(source_beg) == 0, "not RegionSize aligned");

  if (TraceParallelOldGCSummaryPhase) {
    tty->print_cr("tb=" PTR_FORMAT " te=" PTR_FORMAT " "
@@ -564,125 +433,93 @@ bool ParallelCompactData::summarize(HeapWord* target_beg, HeapWord* target_end,
                  source_next != 0 ? *source_next : (HeapWord*) 0);
  }

-  size_t cur_chunk = addr_to_chunk_idx(source_beg);
-  const size_t end_chunk = addr_to_chunk_idx(chunk_align_up(source_end));
+  size_t cur_region = addr_to_region_idx(source_beg);
+  const size_t end_region = addr_to_region_idx(region_align_up(source_end));

  HeapWord *dest_addr = target_beg;
-  while (cur_chunk < end_chunk) {
-    size_t words = _chunk_data[cur_chunk].data_size();
+  while (cur_region < end_region) {
+    size_t words = _region_data[cur_region].data_size();

 #if     1
    assert(pointer_delta(target_end, dest_addr) >= words,
           "source region does not fit into target region");
 #else
-    // XXX - need some work on the corner cases here.  If the chunk does not
-    // fit, then must either make sure any partial_obj from the chunk fits, or
-    // 'undo' the initial part of the partial_obj that is in the previous chunk.
+    // XXX - need some work on the corner cases here.  If the region does not
+    // fit, then must either make sure any partial_obj from the region fits, or
+    // "undo" the initial part of the partial_obj that is in the previous
+    // region.
    if (dest_addr + words >= target_end) {
      // Let the caller know where to continue.
      *target_next = dest_addr;
-      *source_next = chunk_to_addr(cur_chunk);
+      *source_next = region_to_addr(cur_region);
      return false;
    }
 #endif  // #if 1

-    _chunk_data[cur_chunk].set_destination(dest_addr);
+    _region_data[cur_region].set_destination(dest_addr);

-    // Set the destination_count for cur_chunk, and if necessary, update
-    // source_chunk for a destination chunk.  The source_chunk field is updated
-    // if cur_chunk is the first (left-most) chunk to be copied to a destination
-    // chunk.
+    // Set the destination_count for cur_region, and if necessary, update
+    // source_region for a destination region.  The source_region field is
+    // updated if cur_region is the first (left-most) region to be copied to a
+    // destination region.
    //
-    // The destination_count calculation is a bit subtle.  A chunk that has data
-    // that compacts into itself does not count itself as a destination.  This
-    // maintains the invariant that a zero count means the chunk is available
-    // and can be claimed and then filled.
+    // The destination_count calculation is a bit subtle.  A region that has
+    // data that compacts into itself does not count itself as a destination.
+    // This maintains the invariant that a zero count means the region is
+    // available and can be claimed and then filled.
    if (words > 0) {
      HeapWord* const last_addr = dest_addr + words - 1;
-      const size_t dest_chunk_1 = addr_to_chunk_idx(dest_addr);
-      const size_t dest_chunk_2 = addr_to_chunk_idx(last_addr);
+      const size_t dest_region_1 = addr_to_region_idx(dest_addr);
+      const size_t dest_region_2 = addr_to_region_idx(last_addr);
 #if     0
-      // Initially assume that the destination chunks will be the same and
+      // Initially assume that the destination regions will be the same and
      // adjust the value below if necessary.  Under this assumption, if
-      // cur_chunk == dest_chunk_2, then cur_chunk will be compacted completely
-      // into itself.
-      uint destination_count = cur_chunk == dest_chunk_2 ? 0 : 1;
-      if (dest_chunk_1 != dest_chunk_2) {
-        // Destination chunks differ; adjust destination_count.
+      // cur_region == dest_region_2, then cur_region will be compacted
+      // completely into itself.
+      uint destination_count = cur_region == dest_region_2 ? 0 : 1;
+      if (dest_region_1 != dest_region_2) {
+        // Destination regions differ; adjust destination_count.
        destination_count += 1;
-        // Data from cur_chunk will be copied to the start of dest_chunk_2.
-        _chunk_data[dest_chunk_2].set_source_chunk(cur_chunk);
-      } else if (chunk_offset(dest_addr) == 0) {
-        // Data from cur_chunk will be copied to the start of the destination
-        // chunk.
-        _chunk_data[dest_chunk_1].set_source_chunk(cur_chunk);
+        // Data from cur_region will be copied to the start of dest_region_2.
+        _region_data[dest_region_2].set_source_region(cur_region);
+      } else if (region_offset(dest_addr) == 0) {
+        // Data from cur_region will be copied to the start of the destination
+        // region.
+        _region_data[dest_region_1].set_source_region(cur_region);
      }
 #else
-      // Initially assume that the destination chunks will be different and
+      // Initially assume that the destination regions will be different and
      // adjust the value below if necessary.  Under this assumption, if
-      // cur_chunk == dest_chunk2, then cur_chunk will be compacted partially
-      // into dest_chunk_1 and partially into itself.
-      uint destination_count = cur_chunk == dest_chunk_2 ? 1 : 2;
-      if (dest_chunk_1 != dest_chunk_2) {
-        // Data from cur_chunk will be copied to the start of dest_chunk_2.
-        _chunk_data[dest_chunk_2].set_source_chunk(cur_chunk);
+      // cur_region == dest_region2, then cur_region will be compacted partially
+      // into dest_region_1 and partially into itself.
+      uint destination_count = cur_region == dest_region_2 ? 1 : 2;
+      if (dest_region_1 != dest_region_2) {
+        // Data from cur_region will be copied to the start of dest_region_2.
+        _region_data[dest_region_2].set_source_region(cur_region);
      } else {
-        // Destination chunks are the same; adjust destination_count.
+        // Destination regions are the same; adjust destination_count.
        destination_count -= 1;
-        if (chunk_offset(dest_addr) == 0) {
-          // Data from cur_chunk will be copied to the start of the destination
-          // chunk.
-          _chunk_data[dest_chunk_1].set_source_chunk(cur_chunk);
+        if (region_offset(dest_addr) == 0) {
+          // Data from cur_region will be copied to the start of the destination
+          // region.
+          _region_data[dest_region_1].set_source_region(cur_region);
        }
      }
 #endif  // #if 0

-      _chunk_data[cur_chunk].set_destination_count(destination_count);
-      _chunk_data[cur_chunk].set_data_location(chunk_to_addr(cur_chunk));
+      _region_data[cur_region].set_destination_count(destination_count);
+      _region_data[cur_region].set_data_location(region_to_addr(cur_region));
      dest_addr += words;
    }

-    ++cur_chunk;
+    ++cur_region;
  }

  *target_next = dest_addr;
  return true;
 }

-bool ParallelCompactData::partial_obj_ends_in_block(size_t block_index) {
-  HeapWord* block_addr = block_to_addr(block_index);
-  HeapWord* block_end_addr = block_addr + BlockSize;
-  size_t chunk_index = addr_to_chunk_idx(block_addr);
-  HeapWord* partial_obj_end_addr = partial_obj_end(chunk_index);
-
-  // An object that ends at the end of the block, ends
-  // in the block (the last word of the object is to
-  // the left of the end).
-  if ((block_addr < partial_obj_end_addr) &&
-      (partial_obj_end_addr <= block_end_addr)) {
-    return true;
-  }
-
-  return false;
-}
-
 HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr) {
-  HeapWord* result = NULL;
-  if (UseParallelOldGCChunkPointerCalc) {
-    result = chunk_calc_new_pointer(addr);
-  } else {
-    result = block_calc_new_pointer(addr);
-  }
-  return result;
-}
-
-// This method is overly complicated (expensive) to be called
-// for every reference.
-// Try to restructure this so that a NULL is returned if
-// the object is dead.  But don't wast the cycles to explicitly check
-// that it is dead since only live objects should be passed in.
-
-HeapWord* ParallelCompactData::chunk_calc_new_pointer(HeapWord* addr) {
  assert(addr != NULL, "Should detect NULL oop earlier");
  assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap");
 #ifdef ASSERT
@@ -692,30 +529,30 @@ HeapWord* ParallelCompactData::chunk_calc_new_pointer(HeapWord* addr) {
 #endif
  assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked");

-  // Chunk covering the object.
-  size_t chunk_index = addr_to_chunk_idx(addr);
-  const ChunkData* const chunk_ptr = chunk(chunk_index);
-  HeapWord* const chunk_addr = chunk_align_down(addr);
+  // Region covering the object.
+  size_t region_index = addr_to_region_idx(addr);
+  const RegionData* const region_ptr = region(region_index);
+  HeapWord* const region_addr = region_align_down(addr);

-  assert(addr < chunk_addr + ChunkSize, "Chunk does not cover object");
-  assert(addr_to_chunk_ptr(chunk_addr) == chunk_ptr, "sanity check");
+  assert(addr < region_addr + RegionSize, "Region does not cover object");
+  assert(addr_to_region_ptr(region_addr) == region_ptr, "sanity check");

-  HeapWord* result = chunk_ptr->destination();
+  HeapWord* result = region_ptr->destination();

-  // If all the data in the chunk is live, then the new location of the object
-  // can be calculated from the destination of the chunk plus the offset of the
-  // object in the chunk.
-  if (chunk_ptr->data_size() == ChunkSize) {
-    result += pointer_delta(addr, chunk_addr);
+  // If all the data in the region is live, then the new location of the object
+  // can be calculated from the destination of the region plus the offset of the
+  // object in the region.
+  if (region_ptr->data_size() == RegionSize) {
+    result += pointer_delta(addr, region_addr);
    return result;
  }

  // The new location of the object is
-  //    chunk destination +
-  //    size of the partial object extending onto the chunk +
-  //    sizes of the live objects in the Chunk that are to the left of addr
-  const size_t partial_obj_size = chunk_ptr->partial_obj_size();
-  HeapWord* const search_start = chunk_addr + partial_obj_size;
+  //    region destination +
+  //    size of the partial object extending onto the region +
+  //    sizes of the live objects in the Region that are to the left of addr
+  const size_t partial_obj_size = region_ptr->partial_obj_size();
+  HeapWord* const search_start = region_addr + partial_obj_size;

  const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap();
  size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr));
@@ -725,50 +562,6 @@ HeapWord* ParallelCompactData::chunk_calc_new_pointer(HeapWord* addr) {
  return result;
 }

-HeapWord* ParallelCompactData::block_calc_new_pointer(HeapWord* addr) {
-  assert(addr != NULL, "Should detect NULL oop earlier");
-  assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap");
-#ifdef ASSERT
-  if (PSParallelCompact::mark_bitmap()->is_unmarked(addr)) {
-    gclog_or_tty->print_cr("calc_new_pointer:: addr " PTR_FORMAT, addr);
-  }
-#endif
-  assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked");
-
-  // Chunk covering the object.
-  size_t chunk_index = addr_to_chunk_idx(addr);
-  const ChunkData* const chunk_ptr = chunk(chunk_index);
-  HeapWord* const chunk_addr = chunk_align_down(addr);
-
-  assert(addr < chunk_addr + ChunkSize, "Chunk does not cover object");
-  assert(addr_to_chunk_ptr(chunk_addr) == chunk_ptr, "sanity check");
-
-  HeapWord* result = chunk_ptr->destination();
-
-  // If all the data in the chunk is live, then the new location of the object
-  // can be calculated from the destination of the chunk plus the offset of the
-  // object in the chunk.
-  if (chunk_ptr->data_size() == ChunkSize) {
-    result += pointer_delta(addr, chunk_addr);
-    return result;
-  }
-
-  // The new location of the object is
-  //    chunk destination +
-  //    block offset +
-  //    sizes of the live objects in the Block that are to the left of addr
-  const size_t block_offset = addr_to_block_ptr(addr)->offset();
-  HeapWord* const search_start = chunk_addr + block_offset;
-
-  const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap();
-  size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr));
-
-  result += block_offset + live_to_left;
-  assert(result <= addr, "object cannot move to the right");
-  assert(result == chunk_calc_new_pointer(addr), "Should match");
-  return result;
-}
-
 klassOop ParallelCompactData::calc_new_klass(klassOop old_klass) {
  klassOop updated_klass;
  if (PSParallelCompact::should_update_klass(old_klass)) {
@@ -792,15 +585,14 @@ void ParallelCompactData::verify_clear(const PSVirtualSpace* vspace)

 void ParallelCompactData::verify_clear()
 {
-  verify_clear(_chunk_vspace);
-  verify_clear(_block_vspace);
+  verify_clear(_region_vspace);
 }
 #endif  // #ifdef ASSERT

 #ifdef NOT_PRODUCT
-ParallelCompactData::ChunkData* debug_chunk(size_t chunk_index) {
+ParallelCompactData::RegionData* debug_region(size_t region_index) {
  ParallelCompactData& sd = PSParallelCompact::summary_data();
-  return sd.chunk(chunk_index);
+  return sd.region(region_index);
 }
 #endif

@@ -953,10 +745,10 @@ PSParallelCompact::clear_data_covering_space(SpaceId id)
  const idx_t end_bit = BitMap::word_align_up(_mark_bitmap.addr_to_bit(top));
  _mark_bitmap.clear_range(beg_bit, end_bit);

-  const size_t beg_chunk = _summary_data.addr_to_chunk_idx(bot);
-  const size_t end_chunk =
-    _summary_data.addr_to_chunk_idx(_summary_data.chunk_align_up(max_top));
-  _summary_data.clear_range(beg_chunk, end_chunk);
+  const size_t beg_region = _summary_data.addr_to_region_idx(bot);
+  const size_t end_region =
+    _summary_data.addr_to_region_idx(_summary_data.region_align_up(max_top));
+  _summary_data.clear_range(beg_region, end_region);
 }

 void PSParallelCompact::pre_compact(PreGCValues* pre_gc_values)
@@ -1072,19 +864,19 @@ HeapWord*
 PSParallelCompact::compute_dense_prefix_via_density(const SpaceId id,
                                                    bool maximum_compaction)
 {
-  const size_t chunk_size = ParallelCompactData::ChunkSize;
+  const size_t region_size = ParallelCompactData::RegionSize;
  const ParallelCompactData& sd = summary_data();

  const MutableSpace* const space = _space_info[id].space();
-  HeapWord* const top_aligned_up = sd.chunk_align_up(space->top());
-  const ChunkData* const beg_cp = sd.addr_to_chunk_ptr(space->bottom());
-  const ChunkData* const end_cp = sd.addr_to_chunk_ptr(top_aligned_up);
+  HeapWord* const top_aligned_up = sd.region_align_up(space->top());
+  const RegionData* const beg_cp = sd.addr_to_region_ptr(space->bottom());
+  const RegionData* const end_cp = sd.addr_to_region_ptr(top_aligned_up);

-  // Skip full chunks at the beginning of the space--they are necessarily part
+  // Skip full regions at the beginning of the space--they are necessarily part
  // of the dense prefix.
  size_t full_count = 0;
-  const ChunkData* cp;
-  for (cp = beg_cp; cp < end_cp && cp->data_size() == chunk_size; ++cp) {
+  const RegionData* cp;
+  for (cp = beg_cp; cp < end_cp && cp->data_size() == region_size; ++cp) {
    ++full_count;
  }

@@ -1093,7 +885,7 @@ PSParallelCompact::compute_dense_prefix_via_density(const SpaceId id,
  const bool interval_ended = gcs_since_max > HeapMaximumCompactionInterval;
  if (maximum_compaction || cp == end_cp || interval_ended) {
    _maximum_compaction_gc_num = total_invocations();
-    return sd.chunk_to_addr(cp);
+    return sd.region_to_addr(cp);
  }

  HeapWord* const new_top = _space_info[id].new_top();
@@ -1116,52 +908,53 @@ PSParallelCompact::compute_dense_prefix_via_density(const SpaceId id,
  }

  // XXX - Use binary search?
-  HeapWord* dense_prefix = sd.chunk_to_addr(cp);
-  const ChunkData* full_cp = cp;
-  const ChunkData* const top_cp = sd.addr_to_chunk_ptr(space->top() - 1);
+  HeapWord* dense_prefix = sd.region_to_addr(cp);
+  const RegionData* full_cp = cp;
+  const RegionData* const top_cp = sd.addr_to_region_ptr(space->top() - 1);
  while (cp < end_cp) {
-    HeapWord* chunk_destination = cp->destination();
-    const size_t cur_deadwood = pointer_delta(dense_prefix, chunk_destination);
+    HeapWord* region_destination = cp->destination();
+    const size_t cur_deadwood = pointer_delta(dense_prefix, region_destination);
    if (TraceParallelOldGCDensePrefix && Verbose) {
      tty->print_cr("c#=" SIZE_FORMAT_W(4) " dst=" PTR_FORMAT " "
                    "dp=" SIZE_FORMAT_W(8) " " "cdw=" SIZE_FORMAT_W(8),
-                    sd.chunk(cp), chunk_destination,
+                    sd.region(cp), region_destination,
                    dense_prefix, cur_deadwood);
    }

    if (cur_deadwood >= deadwood_goal) {
-      // Found the chunk that has the correct amount of deadwood to the left.
-      // This typically occurs after crossing a fairly sparse set of chunks, so
-      // iterate backwards over those sparse chunks, looking for the chunk that
-      // has the lowest density of live objects 'to the right.'
-      size_t space_to_left = sd.chunk(cp) * chunk_size;
+      // Found the region that has the correct amount of deadwood to the left.
+      // This typically occurs after crossing a fairly sparse set of regions, so
+      // iterate backwards over those sparse regions, looking for the region
+      // that has the lowest density of live objects 'to the right.'
+      size_t space_to_left = sd.region(cp) * region_size;
      size_t live_to_left = space_to_left - cur_deadwood;
      size_t space_to_right = space_capacity - space_to_left;
      size_t live_to_right = space_live - live_to_left;
      double density_to_right = double(live_to_right) / space_to_right;
      while (cp > full_cp) {
        --cp;
-        const size_t prev_chunk_live_to_right = live_to_right - cp->data_size();
-        const size_t prev_chunk_space_to_right = space_to_right + chunk_size;
-        double prev_chunk_density_to_right =
-          double(prev_chunk_live_to_right) / prev_chunk_space_to_right;
-        if (density_to_right <= prev_chunk_density_to_right) {
+        const size_t prev_region_live_to_right = live_to_right -
+          cp->data_size();
+        const size_t prev_region_space_to_right = space_to_right + region_size;
+        double prev_region_density_to_right =
+          double(prev_region_live_to_right) / prev_region_space_to_right;
+        if (density_to_right <= prev_region_density_to_right) {
          return dense_prefix;
        }
        if (TraceParallelOldGCDensePrefix && Verbose) {
          tty->print_cr("backing up from c=" SIZE_FORMAT_W(4) " d2r=%10.8f "
-                        "pc_d2r=%10.8f", sd.chunk(cp), density_to_right,
-                        prev_chunk_density_to_right);
+                        "pc_d2r=%10.8f", sd.region(cp), density_to_right,
+                        prev_region_density_to_right);
        }
-        dense_prefix -= chunk_size;
-        live_to_right = prev_chunk_live_to_right;
-        space_to_right = prev_chunk_space_to_right;
-        density_to_right = prev_chunk_density_to_right;
+        dense_prefix -= region_size;
+        live_to_right = prev_region_live_to_right;
+        space_to_right = prev_region_space_to_right;
+        density_to_right = prev_region_density_to_right;
      }
      return dense_prefix;
    }

-    dense_prefix += chunk_size;
+    dense_prefix += region_size;
    ++cp;
  }

@@ -1174,8 +967,8 @@ void PSParallelCompact::print_dense_prefix_stats(const char* const algorithm,
                                                 const bool maximum_compaction,
                                                 HeapWord* const addr)
 {
-  const size_t chunk_idx = summary_data().addr_to_chunk_idx(addr);
-  ChunkData* const cp = summary_data().chunk(chunk_idx);
+  const size_t region_idx = summary_data().addr_to_region_idx(addr);
+  RegionData* const cp = summary_data().region(region_idx);
  const MutableSpace* const space = _space_info[id].space();
  HeapWord* const new_top = _space_info[id].new_top();

@@ -1191,7 +984,7 @@ void PSParallelCompact::print_dense_prefix_stats(const char* const algorithm,
                "d2l=" SIZE_FORMAT " d2l%%=%6.4f "
                "d2r=" SIZE_FORMAT " l2r=" SIZE_FORMAT
                " ratio=%10.8f",
-                algorithm, addr, chunk_idx,
+                algorithm, addr, region_idx,
                space_live,
                dead_to_left, dead_to_left_pct,
                dead_to_right, live_to_right,
@@ -1253,52 +1046,52 @@ double PSParallelCompact::dead_wood_limiter(double density, size_t min_percent)
  return MAX2(limit, 0.0);
 }

-ParallelCompactData::ChunkData*
-PSParallelCompact::first_dead_space_chunk(const ChunkData* beg,
-                                          const ChunkData* end)
+ParallelCompactData::RegionData*
+PSParallelCompact::first_dead_space_region(const RegionData* beg,
+                                           const RegionData* end)
 {
-  const size_t chunk_size = ParallelCompactData::ChunkSize;
+  const size_t region_size = ParallelCompactData::RegionSize;
  ParallelCompactData& sd = summary_data();
-  size_t left = sd.chunk(beg);
-  size_t right = end > beg ? sd.chunk(end) - 1 : left;
+  size_t left = sd.region(beg);
+  size_t right = end > beg ? sd.region(end) - 1 : left;

  // Binary search.
  while (left < right) {
    // Equivalent to (left + right) / 2, but does not overflow.
    const size_t middle = left + (right - left) / 2;
-    ChunkData* const middle_ptr = sd.chunk(middle);
+    RegionData* const middle_ptr = sd.region(middle);
    HeapWord* const dest = middle_ptr->destination();
-    HeapWord* const addr = sd.chunk_to_addr(middle);
+    HeapWord* const addr = sd.region_to_addr(middle);
    assert(dest != NULL, "sanity");
    assert(dest <= addr, "must move left");

    if (middle > left && dest < addr) {
      right = middle - 1;
-    } else if (middle < right && middle_ptr->data_size() == chunk_size) {
+    } else if (middle < right && middle_ptr->data_size() == region_size) {
      left = middle + 1;
    } else {
      return middle_ptr;
    }
  }
-  return sd.chunk(left);
+  return sd.region(left);
 }

-ParallelCompactData::ChunkData*
-PSParallelCompact::dead_wood_limit_chunk(const ChunkData* beg,
-                                         const ChunkData* end,
-                                         size_t dead_words)
+ParallelCompactData::RegionData*
+PSParallelCompact::dead_wood_limit_region(const RegionData* beg,
+                                          const RegionData* end,
+                                          size_t dead_words)
 {
  ParallelCompactData& sd = summary_data();
-  size_t left = sd.chunk(beg);
-  size_t right = end > beg ? sd.chunk(end) - 1 : left;
+  size_t left = sd.region(beg);
+  size_t right = end > beg ? sd.region(end) - 1 : left;

  // Binary search.
  while (left < right) {
    // Equivalent to (left + right) / 2, but does not overflow.
    const size_t middle = left + (right - left) / 2;
-    ChunkData* const middle_ptr = sd.chunk(middle);
+    RegionData* const middle_ptr = sd.region(middle);
    HeapWord* const dest = middle_ptr->destination();
-    HeapWord* const addr = sd.chunk_to_addr(middle);
+    HeapWord* const addr = sd.region_to_addr(middle);
    assert(dest != NULL, "sanity");
    assert(dest <= addr, "must move left");

@@ -1311,13 +1104,13 @@ PSParallelCompact::dead_wood_limit_chunk(const ChunkData* beg,
      return middle_ptr;
    }
  }
-  return sd.chunk(left);
+  return sd.region(left);
 }

 // The result is valid during the summary phase, after the initial summarization
 // of each space into itself, and before final summarization.
 inline double
-PSParallelCompact::reclaimed_ratio(const ChunkData* const cp,
+PSParallelCompact::reclaimed_ratio(const RegionData* const cp,
                                   HeapWord* const bottom,
                                   HeapWord* const top,
                                   HeapWord* const new_top)
@@ -1331,12 +1124,13 @@ PSParallelCompact::reclaimed_ratio(const ChunkData* const cp,
  assert(top >= new_top, "summary data problem?");
  assert(new_top > bottom, "space is empty; should not be here");
  assert(new_top >= cp->destination(), "sanity");
-  assert(top >= sd.chunk_to_addr(cp), "sanity");
+  assert(top >= sd.region_to_addr(cp), "sanity");

  HeapWord* const destination = cp->destination();
  const size_t dense_prefix_live  = pointer_delta(destination, bottom);
  const size_t compacted_region_live = pointer_delta(new_top, destination);
-  const size_t compacted_region_used = pointer_delta(top, sd.chunk_to_addr(cp));
+  const size_t compacted_region_used = pointer_delta(top,
+                                                     sd.region_to_addr(cp));
  const size_t reclaimable = compacted_region_used - compacted_region_live;

  const double divisor = dense_prefix_live + 1.25 * compacted_region_live;
@@ -1344,39 +1138,40 @@ PSParallelCompact::reclaimed_ratio(const ChunkData* const cp,
 }

 // Return the address of the end of the dense prefix, a.k.a. the start of the
-// compacted region.  The address is always on a chunk boundary.
+// compacted region.  The address is always on a region boundary.
 //
-// Completely full chunks at the left are skipped, since no compaction can occur
-// in those chunks.  Then the maximum amount of dead wood to allow is computed,
-// based on the density (amount live / capacity) of the generation; the chunk
-// with approximately that amount of dead space to the left is identified as the
-// limit chunk.  Chunks between the last completely full chunk and the limit
-// chunk are scanned and the one that has the best (maximum) reclaimed_ratio()
-// is selected.
+// Completely full regions at the left are skipped, since no compaction can
+// occur in those regions.  Then the maximum amount of dead wood to allow is
+// computed, based on the density (amount live / capacity) of the generation;
+// the region with approximately that amount of dead space to the left is
+// identified as the limit region.  Regions between the last completely full
+// region and the limit region are scanned and the one that has the best
+// (maximum) reclaimed_ratio() is selected.
 HeapWord*
 PSParallelCompact::compute_dense_prefix(const SpaceId id,
                                        bool maximum_compaction)
 {
-  const size_t chunk_size = ParallelCompactData::ChunkSize;
+  const size_t region_size = ParallelCompactData::RegionSize;
  const ParallelCompactData& sd = summary_data();

  const MutableSpace* const space = _space_info[id].space();
  HeapWord* const top = space->top();
-  HeapWord* const top_aligned_up = sd.chunk_align_up(top);
+  HeapWord* const top_aligned_up = sd.region_align_up(top);
  HeapWord* const new_top = _space_info[id].new_top();
-  HeapWord* const new_top_aligned_up = sd.chunk_align_up(new_top);
+  HeapWord* const new_top_aligned_up = sd.region_align_up(new_top);
  HeapWord* const bottom = space->bottom();
-  const ChunkData* const beg_cp = sd.addr_to_chunk_ptr(bottom);
-  const ChunkData* const top_cp = sd.addr_to_chunk_ptr(top_aligned_up);
-  const ChunkData* const new_top_cp = sd.addr_to_chunk_ptr(new_top_aligned_up);
+  const RegionData* const beg_cp = sd.addr_to_region_ptr(bottom);
+  const RegionData* const top_cp = sd.addr_to_region_ptr(top_aligned_up);
+  const RegionData* const new_top_cp =
+    sd.addr_to_region_ptr(new_top_aligned_up);

-  // Skip full chunks at the beginning of the space--they are necessarily part
+  // Skip full regions at the beginning of the space--they are necessarily part
  // of the dense prefix.
-  const ChunkData* const full_cp = first_dead_space_chunk(beg_cp, new_top_cp);
-  assert(full_cp->destination() == sd.chunk_to_addr(full_cp) ||
+  const RegionData* const full_cp = first_dead_space_region(beg_cp, new_top_cp);
+  assert(full_cp->destination() == sd.region_to_addr(full_cp) ||
         space->is_empty(), "no dead space allowed to the left");
-  assert(full_cp->data_size() < chunk_size || full_cp == new_top_cp - 1,
-         "chunk must have dead space");
+  assert(full_cp->data_size() < region_size || full_cp == new_top_cp - 1,
+         "region must have dead space");

  // The gc number is saved whenever a maximum compaction is done, and used to
  // determine when the maximum compaction interval has expired.  This avoids
@@ -1387,7 +1182,7 @@ PSParallelCompact::compute_dense_prefix(const SpaceId id,
    total_invocations() == HeapFirstMaximumCompactionCount;
  if (maximum_compaction || full_cp == top_cp || interval_ended) {
    _maximum_compaction_gc_num = total_invocations();
-    return sd.chunk_to_addr(full_cp);
+    return sd.region_to_addr(full_cp);
  }

  const size_t space_live = pointer_delta(new_top, bottom);
@@ -1413,15 +1208,15 @@ PSParallelCompact::compute_dense_prefix(const SpaceId id,
                  dead_wood_max, dead_wood_limit);
  }

-  // Locate the chunk with the desired amount of dead space to the left.
-  const ChunkData* const limit_cp =
-    dead_wood_limit_chunk(full_cp, top_cp, dead_wood_limit);
+  // Locate the region with the desired amount of dead space to the left.
+  const RegionData* const limit_cp =
+    dead_wood_limit_region(full_cp, top_cp, dead_wood_limit);

-  // Scan from the first chunk with dead space to the limit chunk and find the
+  // Scan from the first region with dead space to the limit region and find the
  // one with the best (largest) reclaimed ratio.
  double best_ratio = 0.0;
-  const ChunkData* best_cp = full_cp;
-  for (const ChunkData* cp = full_cp; cp < limit_cp; ++cp) {
+  const RegionData* best_cp = full_cp;
+  for (const RegionData* cp = full_cp; cp < limit_cp; ++cp) {
    double tmp_ratio = reclaimed_ratio(cp, bottom, top, new_top);
    if (tmp_ratio > best_ratio) {
      best_cp = cp;
@@ -1430,18 +1225,18 @@ PSParallelCompact::compute_dense_prefix(const SpaceId id,
  }

 #if     0
-  // Something to consider:  if the chunk with the best ratio is 'close to' the
-  // first chunk w/free space, choose the first chunk with free space
-  // ("first-free").  The first-free chunk is usually near the start of the
+  // Something to consider:  if the region with the best ratio is 'close to' the
+  // first region w/free space, choose the first region with free space
+  // ("first-free").  The first-free region is usually near the start of the
  // heap, which means we are copying most of the heap already, so copy a bit
  // more to get complete compaction.
-  if (pointer_delta(best_cp, full_cp, sizeof(ChunkData)) < 4) {
+  if (pointer_delta(best_cp, full_cp, sizeof(RegionData)) < 4) {
    _maximum_compaction_gc_num = total_invocations();
    best_cp = full_cp;
  }
 #endif  // #if 0

-  return sd.chunk_to_addr(best_cp);
+  return sd.region_to_addr(best_cp);
 }

 void PSParallelCompact::summarize_spaces_quick()
@@ -1459,9 +1254,9 @@ void PSParallelCompact::summarize_spaces_quick()
 void PSParallelCompact::fill_dense_prefix_end(SpaceId id)
 {
  HeapWord* const dense_prefix_end = dense_prefix(id);
-  const ChunkData* chunk = _summary_data.addr_to_chunk_ptr(dense_prefix_end);
+  const RegionData* region = _summary_data.addr_to_region_ptr(dense_prefix_end);
  const idx_t dense_prefix_bit = _mark_bitmap.addr_to_bit(dense_prefix_end);
-  if (dead_space_crosses_boundary(chunk, dense_prefix_bit)) {
+  if (dead_space_crosses_boundary(region, dense_prefix_bit)) {
    // Only enough dead space is filled so that any remaining dead space to the
    // left is larger than the minimum filler object.  (The remainder is filled
    // during the copy/update phase.)
@@ -1552,7 +1347,7 @@ PSParallelCompact::summarize_space(SpaceId id, bool maximum_compaction)
      fill_dense_prefix_end(id);
    }

-    // Compute the destination of each Chunk, and thus each object.
+    // Compute the destination of each Region, and thus each object.
    _summary_data.summarize_dense_prefix(space->bottom(), dense_prefix_end);
    _summary_data.summarize(dense_prefix_end, space->end(),
                            dense_prefix_end, space->top(),
@@ -1560,19 +1355,19 @@ PSParallelCompact::summarize_space(SpaceId id, bool maximum_compaction)
  }

  if (TraceParallelOldGCSummaryPhase) {
-    const size_t chunk_size = ParallelCompactData::ChunkSize;
+    const size_t region_size = ParallelCompactData::RegionSize;
    HeapWord* const dense_prefix_end = _space_info[id].dense_prefix();
-    const size_t dp_chunk = _summary_data.addr_to_chunk_idx(dense_prefix_end);
+    const size_t dp_region = _summary_data.addr_to_region_idx(dense_prefix_end);
    const size_t dp_words = pointer_delta(dense_prefix_end, space->bottom());
    HeapWord* const new_top = _space_info[id].new_top();
-    const HeapWord* nt_aligned_up = _summary_data.chunk_align_up(new_top);
+    const HeapWord* nt_aligned_up = _summary_data.region_align_up(new_top);
    const size_t cr_words = pointer_delta(nt_aligned_up, dense_prefix_end);
    tty->print_cr("id=%d cap=" SIZE_FORMAT " dp=" PTR_FORMAT " "
-                  "dp_chunk=" SIZE_FORMAT " " "dp_count=" SIZE_FORMAT " "
+                  "dp_region=" SIZE_FORMAT " " "dp_count=" SIZE_FORMAT " "
                  "cr_count=" SIZE_FORMAT " " "nt=" PTR_FORMAT,
                  id, space->capacity_in_words(), dense_prefix_end,
-                  dp_chunk, dp_words / chunk_size,
-                  cr_words / chunk_size, new_top);
+                  dp_region, dp_words / region_size,
+                  cr_words / region_size, new_top);
  }
 }

@@ -1584,11 +1379,6 @@ void PSParallelCompact::summary_phase(ParCompactionManager* cm,
  // trace("2");

 #ifdef  ASSERT
-  if (VerifyParallelOldWithMarkSweep  &&
-      (PSParallelCompact::total_invocations() %
-         VerifyParallelOldWithMarkSweepInterval) == 0) {
-    verify_mark_bitmap(_mark_bitmap);
-  }
  if (TraceParallelOldGCMarkingPhase) {
    tty->print_cr("add_obj_count=" SIZE_FORMAT " "
                  "add_obj_bytes=" SIZE_FORMAT,
@@ -1605,7 +1395,7 @@ void PSParallelCompact::summary_phase(ParCompactionManager* cm,
  if (TraceParallelOldGCSummaryPhase) {
    tty->print_cr("summary_phase:  after summarizing each space to self");
    Universe::print();
-    NOT_PRODUCT(print_chunk_ranges());
+    NOT_PRODUCT(print_region_ranges());
    if (Verbose) {
      NOT_PRODUCT(print_initial_summary_data(_summary_data, _space_info));
    }
@@ -1651,14 +1441,15 @@ void PSParallelCompact::summary_phase(ParCompactionManager* cm,
                              space->bottom(), space->top(),
                              new_top_addr);

-      // Clear the source_chunk field for each chunk in the space.
+      // Clear the source_region field for each region in the space.
      HeapWord* const new_top = _space_info[id].new_top();
-      HeapWord* const clear_end = _summary_data.chunk_align_up(new_top);
-      ChunkData* beg_chunk = _summary_data.addr_to_chunk_ptr(space->bottom());
-      ChunkData* end_chunk = _summary_data.addr_to_chunk_ptr(clear_end);
-      while (beg_chunk < end_chunk) {
-        beg_chunk->set_source_chunk(0);
-        ++beg_chunk;
+      HeapWord* const clear_end = _summary_data.region_align_up(new_top);
+      RegionData* beg_region =
+        _summary_data.addr_to_region_ptr(space->bottom());
+      RegionData* end_region = _summary_data.addr_to_region_ptr(clear_end);
+      while (beg_region < end_region) {
+        beg_region->set_source_region(0);
+        ++beg_region;
      }

      // Reset the new_top value for the space.
@@ -1666,243 +1457,16 @@ void PSParallelCompact::summary_phase(ParCompactionManager* cm,
    }
  }

-  // Fill in the block data after any changes to the chunks have
-  // been made.
-#ifdef  ASSERT
-  summarize_blocks(cm, perm_space_id);
-  summarize_blocks(cm, old_space_id);
-#else
-  if (!UseParallelOldGCChunkPointerCalc) {
-    summarize_blocks(cm, perm_space_id);
-    summarize_blocks(cm, old_space_id);
-  }
-#endif
-
  if (TraceParallelOldGCSummaryPhase) {
    tty->print_cr("summary_phase:  after final summarization");
    Universe::print();
-    NOT_PRODUCT(print_chunk_ranges());
+    NOT_PRODUCT(print_region_ranges());
    if (Verbose) {
      NOT_PRODUCT(print_generic_summary_data(_summary_data, _space_info));
    }
  }
 }

-// Fill in the BlockData.
-// Iterate over the spaces and within each space iterate over
-// the chunks and fill in the BlockData for each chunk.
-
-void PSParallelCompact::summarize_blocks(ParCompactionManager* cm,
-                                         SpaceId first_compaction_space_id) {
-#if     0
-  DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(1);)
-  for (SpaceId cur_space_id = first_compaction_space_id;
-       cur_space_id != last_space_id;
-       cur_space_id = next_compaction_space_id(cur_space_id)) {
-    // Iterate over the chunks in the space
-    size_t start_chunk_index =
-      _summary_data.addr_to_chunk_idx(space(cur_space_id)->bottom());
-    BitBlockUpdateClosure bbu(mark_bitmap(),
-                              cm,
-                              start_chunk_index);
-    // Iterate over blocks.
-    for (size_t chunk_index =  start_chunk_index;
-         chunk_index < _summary_data.chunk_count() &&
-         _summary_data.chunk_to_addr(chunk_index) < space(cur_space_id)->top();
-         chunk_index++) {
-
-      // Reset the closure for the new chunk.  Note that the closure
-      // maintains some data that does not get reset for each chunk
-      // so a new instance of the closure is no appropriate.
-      bbu.reset_chunk(chunk_index);
-
-      // Start the iteration with the first live object.  This
-      // may return the end of the chunk.  That is acceptable since
-      // it will properly limit the iterations.
-      ParMarkBitMap::idx_t left_offset = mark_bitmap()->addr_to_bit(
-        _summary_data.first_live_or_end_in_chunk(chunk_index));
-
-      // End the iteration at the end of the chunk.
-      HeapWord* chunk_addr = _summary_data.chunk_to_addr(chunk_index);
-      HeapWord* chunk_end = chunk_addr + ParallelCompactData::ChunkSize;
-      ParMarkBitMap::idx_t right_offset =
-        mark_bitmap()->addr_to_bit(chunk_end);
-
-      // Blocks that have not objects starting in them can be
-      // skipped because their data will never be used.
-      if (left_offset < right_offset) {
-
-        // Iterate through the objects in the chunk.
-        ParMarkBitMap::idx_t last_offset =
-          mark_bitmap()->pair_iterate(&bbu, left_offset, right_offset);
-
-        // If last_offset is less than right_offset, then the iterations
-        // terminated while it was looking for an end bit.  "last_offset"
-        // is then the offset for the last start bit.  In this situation
-        // the "offset" field for the next block to the right (_cur_block + 1)
-        // will not have been update although there may be live data
-        // to the left of the chunk.
-
-        size_t cur_block_plus_1 = bbu.cur_block() + 1;
-        HeapWord* cur_block_plus_1_addr =
-        _summary_data.block_to_addr(bbu.cur_block()) +
-        ParallelCompactData::BlockSize;
-        HeapWord* last_offset_addr = mark_bitmap()->bit_to_addr(last_offset);
- #if 1  // This code works.  The else doesn't but should.  Why does it?
-        // The current block (cur_block()) has already been updated.
-        // The last block that may need to be updated is either the
-        // next block (current block + 1) or the block where the
-        // last object starts (which can be greater than the
-        // next block if there were no objects found in intervening
-        // blocks).
-        size_t last_block =
-          MAX2(bbu.cur_block() + 1,
-               _summary_data.addr_to_block_idx(last_offset_addr));
- #else
-        // The current block has already been updated.  The only block
-        // that remains to be updated is the block where the last
-        // object in the chunk starts.
-        size_t last_block = _summary_data.addr_to_block_idx(last_offset_addr);
- #endif
-        assert_bit_is_start(last_offset);
-        assert((last_block == _summary_data.block_count()) ||
-             (_summary_data.block(last_block)->raw_offset() == 0),
-          "Should not have been set");
-        // Is the last block still in the current chunk?  If still
-        // in this chunk, update the last block (the counting that
-        // included the current block is meant for the offset of the last
-        // block).  If not in this chunk, do nothing.  Should not
-        // update a block in the next chunk.
-        if (ParallelCompactData::chunk_contains_block(bbu.chunk_index(),
-                                                      last_block)) {
-          if (last_offset < right_offset) {
-            // The last object started in this chunk but ends beyond
-            // this chunk.  Update the block for this last object.
-            assert(mark_bitmap()->is_marked(last_offset), "Should be marked");
-            // No end bit was found.  The closure takes care of
-            // the cases where
-            //   an objects crosses over into the next block
-            //   an objects starts and ends in the next block
-            // It does not handle the case where an object is
-            // the first object in a later block and extends
-            // past the end of the chunk (i.e., the closure
-            // only handles complete objects that are in the range
-            // it is given).  That object is handed back here
-            // for any special consideration necessary.
-            //
-            // Is the first bit in the last block a start or end bit?
-            //
-            // If the partial object ends in the last block L,
-            // then the 1st bit in L may be an end bit.
-            //
-            // Else does the last object start in a block after the current
-            // block? A block AA will already have been updated if an
-            // object ends in the next block AA+1.  An object found to end in
-            // the AA+1 is the trigger that updates AA.  Objects are being
-            // counted in the current block for updaing a following
-            // block.  An object may start in later block
-            // block but may extend beyond the last block in the chunk.
-            // Updates are only done when the end of an object has been
-            // found. If the last object (covered by block L) starts
-            // beyond the current block, then no object ends in L (otherwise
-            // L would be the current block).  So the first bit in L is
-            // a start bit.
-            //
-            // Else the last objects start in the current block and ends
-            // beyond the chunk.  The current block has already been
-            // updated and there is no later block (with an object
-            // starting in it) that needs to be updated.
-            //
-            if (_summary_data.partial_obj_ends_in_block(last_block)) {
-              _summary_data.block(last_block)->set_end_bit_offset(
-                bbu.live_data_left());
-            } else if (last_offset_addr >= cur_block_plus_1_addr) {
-              //   The start of the object is on a later block
-              // (to the right of the current block and there are no
-              // complete live objects to the left of this last object
-              // within the chunk.
-              //   The first bit in the block is for the start of the
-              // last object.
-              _summary_data.block(last_block)->set_start_bit_offset(
-                bbu.live_data_left());
-            } else {
-              //   The start of the last object was found in
-              // the current chunk (which has already
-              // been updated).
-              assert(bbu.cur_block() ==
-                      _summary_data.addr_to_block_idx(last_offset_addr),
-                "Should be a block already processed");
-            }
-#ifdef ASSERT
-            // Is there enough block information to find this object?
-            // The destination of the chunk has not been set so the
-            // values returned by calc_new_pointer() and
-            // block_calc_new_pointer() will only be
-            // offsets.  But they should agree.
-            HeapWord* moved_obj_with_chunks =
-              _summary_data.chunk_calc_new_pointer(last_offset_addr);
-            HeapWord* moved_obj_with_blocks =
-              _summary_data.calc_new_pointer(last_offset_addr);
-            assert(moved_obj_with_chunks == moved_obj_with_blocks,
-              "Block calculation is wrong");
-#endif
-          } else if (last_block < _summary_data.block_count()) {
-            // Iterations ended looking for a start bit (but
-            // did not run off the end of the block table).
-            _summary_data.block(last_block)->set_start_bit_offset(
-              bbu.live_data_left());
-          }
-        }
-#ifdef ASSERT
-        // Is there enough block information to find this object?
-          HeapWord* left_offset_addr = mark_bitmap()->bit_to_addr(left_offset);
-        HeapWord* moved_obj_with_chunks =
-          _summary_data.calc_new_pointer(left_offset_addr);
-        HeapWord* moved_obj_with_blocks =
-          _summary_data.calc_new_pointer(left_offset_addr);
-          assert(moved_obj_with_chunks == moved_obj_with_blocks,
-          "Block calculation is wrong");
-#endif
-
-        // Is there another block after the end of this chunk?
-#ifdef ASSERT
-        if (last_block < _summary_data.block_count()) {
-        // No object may have been found in a block.  If that
-        // block is at the end of the chunk, the iteration will
-        // terminate without incrementing the current block so
-        // that the current block is not the last block in the
-        // chunk.  That situation precludes asserting that the
-        // current block is the last block in the chunk.  Assert
-        // the lesser condition that the current block does not
-        // exceed the chunk.
-          assert(_summary_data.block_to_addr(last_block) <=
-               (_summary_data.chunk_to_addr(chunk_index) +
-                 ParallelCompactData::ChunkSize),
-              "Chunk and block inconsistency");
-          assert(last_offset <= right_offset, "Iteration over ran end");
-        }
-#endif
-      }
-#ifdef ASSERT
-      if (PrintGCDetails && Verbose) {
-        if (_summary_data.chunk(chunk_index)->partial_obj_size() == 1) {
-          size_t first_block =
-            chunk_index / ParallelCompactData::BlocksPerChunk;
-          gclog_or_tty->print_cr("first_block " PTR_FORMAT
-            " _offset " PTR_FORMAT
-            "_first_is_start_bit %d",
-            first_block,
-            _summary_data.block(first_block)->raw_offset(),
-            _summary_data.block(first_block)->first_is_start_bit());
-        }
-      }
-#endif
-    }
-  }
-  DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(16);)
-#endif  // #if 0
-}
-
 // This method should contain all heap-specific policy for invoking a full
 // collection.  invoke_no_policy() will only attempt to compact the heap; it
 // will do nothing further.  If we need to bail out for policy reasons, scavenge
@@ -1937,18 +1501,9 @@ void PSParallelCompact::invoke(bool maximum_heap_compaction) {
  }
 }

-bool ParallelCompactData::chunk_contains(size_t chunk_index, HeapWord* addr) {
-  size_t addr_chunk_index = addr_to_chunk_idx(addr);
-  return chunk_index == addr_chunk_index;
-}
-
-bool ParallelCompactData::chunk_contains_block(size_t chunk_index,
-                                               size_t block_index) {
-  size_t first_block_in_chunk = chunk_index * BlocksPerChunk;
-  size_t last_block_in_chunk = (chunk_index + 1) * BlocksPerChunk - 1;
-
-  return (first_block_in_chunk <= block_index) &&
-         (block_index <= last_block_in_chunk);
+bool ParallelCompactData::region_contains(size_t region_index, HeapWord* addr) {
+  size_t addr_region_index = addr_to_region_idx(addr);
+  return region_index == addr_region_index;
 }

 // This method contains no policy. You should probably
@@ -2038,39 +1593,9 @@ void PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
    }
 #endif  // #ifndef PRODUCT

-#ifdef ASSERT
-    if (VerifyParallelOldWithMarkSweep &&
-        (PSParallelCompact::total_invocations() %
-           VerifyParallelOldWithMarkSweepInterval) == 0) {
-      gclog_or_tty->print_cr("Verify marking with mark_sweep_phase1()");
-      if (PrintGCDetails && Verbose) {
-        gclog_or_tty->print_cr("mark_sweep_phase1:");
-      }
-      // Clear the discovered lists so that discovered objects
-      // don't look like they have been discovered twice.
-      ref_processor()->clear_discovered_references();
-
-      PSMarkSweep::allocate_stacks();
-      MemRegion mr = Universe::heap()->reserved_region();
-      PSMarkSweep::ref_processor()->enable_discovery();
-      PSMarkSweep::mark_sweep_phase1(maximum_heap_compaction);
-    }
-#endif
-
    bool max_on_system_gc = UseMaximumCompactionOnSystemGC && is_system_gc;
    summary_phase(vmthread_cm, maximum_heap_compaction || max_on_system_gc);

-#ifdef ASSERT
-    if (VerifyParallelOldWithMarkSweep &&
-        (PSParallelCompact::total_invocations() %
-           VerifyParallelOldWithMarkSweepInterval) == 0) {
-      if (PrintGCDetails && Verbose) {
-        gclog_or_tty->print_cr("mark_sweep_phase2:");
-      }
-      PSMarkSweep::mark_sweep_phase2();
-    }
-#endif
-
    COMPILER2_PRESENT(assert(DerivedPointerTable::is_active(), "Sanity"));
    COMPILER2_PRESENT(DerivedPointerTable::set_active(false));

@@ -2078,28 +1603,6 @@ void PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
    // needed by the compaction for filling holes in the dense prefix.
    adjust_roots();

-#ifdef ASSERT
-    if (VerifyParallelOldWithMarkSweep &&
-        (PSParallelCompact::total_invocations() %
-           VerifyParallelOldWithMarkSweepInterval) == 0) {
-      // Do a separate verify phase so that the verify
-      // code can use the the forwarding pointers to
-      // check the new pointer calculation.  The restore_marks()
-      // has to be done before the real compact.
-      vmthread_cm->set_action(ParCompactionManager::VerifyUpdate);
-      compact_perm(vmthread_cm);
-      compact_serial(vmthread_cm);
-      vmthread_cm->set_action(ParCompactionManager::ResetObjects);
-      compact_perm(vmthread_cm);
-      compact_serial(vmthread_cm);
-      vmthread_cm->set_action(ParCompactionManager::UpdateAndCopy);
-
-      // For debugging only
-      PSMarkSweep::restore_marks();
-      PSMarkSweep::deallocate_stacks();
-    }
-#endif
-
    compaction_start.update();
    // Does the perm gen always have to be done serially because
    // klasses are used in the update of an object?
@@ -2349,7 +1852,7 @@ void PSParallelCompact::marking_phase(ParCompactionManager* cm,

  ParallelScavengeHeap* heap = gc_heap();
  uint parallel_gc_threads = heap->gc_task_manager()->workers();
-  TaskQueueSetSuper* qset = ParCompactionManager::chunk_array();
+  TaskQueueSetSuper* qset = ParCompactionManager::region_array();
  ParallelTaskTerminator terminator(parallel_gc_threads, qset);

  PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
@@ -2487,8 +1990,9 @@ void PSParallelCompact::compact_perm(ParCompactionManager* cm) {
  move_and_update(cm, perm_space_id);
 }

-void PSParallelCompact::enqueue_chunk_draining_tasks(GCTaskQueue* q,
-                                                     uint parallel_gc_threads) {
+void PSParallelCompact::enqueue_region_draining_tasks(GCTaskQueue* q,
+                                                      uint parallel_gc_threads)
+{
  TraceTime tm("drain task setup", print_phases(), true, gclog_or_tty);

  const unsigned int task_count = MAX2(parallel_gc_threads, 1U);
@@ -2496,13 +2000,13 @@ void PSParallelCompact::enqueue_chunk_draining_tasks(GCTaskQueue* q,
    q->enqueue(new DrainStacksCompactionTask());
  }

-  // Find all chunks that are available (can be filled immediately) and
+  // Find all regions that are available (can be filled immediately) and
  // distribute them to the thread stacks.  The iteration is done in reverse
-  // order (high to low) so the chunks will be removed in ascending order.
+  // order (high to low) so the regions will be removed in ascending order.

  const ParallelCompactData& sd = PSParallelCompact::summary_data();

-  size_t fillable_chunks = 0;   // A count for diagnostic purposes.
+  size_t fillable_regions = 0;   // A count for diagnostic purposes.
  unsigned int which = 0;       // The worker thread number.

  for (unsigned int id = to_space_id; id > perm_space_id; --id) {
@@ -2510,25 +2014,26 @@ void PSParallelCompact::enqueue_chunk_draining_tasks(GCTaskQueue* q,
    MutableSpace* const space = space_info->space();
    HeapWord* const new_top = space_info->new_top();

-    const size_t beg_chunk = sd.addr_to_chunk_idx(space_info->dense_prefix());
-    const size_t end_chunk = sd.addr_to_chunk_idx(sd.chunk_align_up(new_top));
-    assert(end_chunk > 0, "perm gen cannot be empty");
+    const size_t beg_region = sd.addr_to_region_idx(space_info->dense_prefix());
+    const size_t end_region =
+      sd.addr_to_region_idx(sd.region_align_up(new_top));
+    assert(end_region > 0, "perm gen cannot be empty");

-    for (size_t cur = end_chunk - 1; cur >= beg_chunk; --cur) {
-      if (sd.chunk(cur)->claim_unsafe()) {
+    for (size_t cur = end_region - 1; cur >= beg_region; --cur) {
+      if (sd.region(cur)->claim_unsafe()) {
        ParCompactionManager* cm = ParCompactionManager::manager_array(which);
        cm->save_for_processing(cur);

        if (TraceParallelOldGCCompactionPhase && Verbose) {
-          const size_t count_mod_8 = fillable_chunks & 7;
+          const size_t count_mod_8 = fillable_regions & 7;
          if (count_mod_8 == 0) gclog_or_tty->print("fillable: ");
          gclog_or_tty->print(" " SIZE_FORMAT_W(7), cur);
          if (count_mod_8 == 7) gclog_or_tty->cr();
        }

-        NOT_PRODUCT(++fillable_chunks;)
+        NOT_PRODUCT(++fillable_regions;)

-        // Assign chunks to threads in round-robin fashion.
+        // Assign regions to threads in round-robin fashion.
        if (++which == task_count) {
          which = 0;
        }
@@ -2537,8 +2042,8 @@ void PSParallelCompact::enqueue_chunk_draining_tasks(GCTaskQueue* q,
  }

  if (TraceParallelOldGCCompactionPhase) {
-    if (Verbose && (fillable_chunks & 7) != 0) gclog_or_tty->cr();
-    gclog_or_tty->print_cr("%u initially fillable chunks", fillable_chunks);
+    if (Verbose && (fillable_regions & 7) != 0) gclog_or_tty->cr();
+    gclog_or_tty->print_cr("%u initially fillable regions", fillable_regions);
  }
 }

@@ -2551,7 +2056,7 @@ void PSParallelCompact::enqueue_dense_prefix_tasks(GCTaskQueue* q,
  ParallelCompactData& sd = PSParallelCompact::summary_data();

  // Iterate over all the spaces adding tasks for updating
-  // chunks in the dense prefix.  Assume that 1 gc thread
+  // regions in the dense prefix.  Assume that 1 gc thread
  // will work on opening the gaps and the remaining gc threads
  // will work on the dense prefix.
  SpaceId space_id = old_space_id;
@@ -2565,30 +2070,31 @@ void PSParallelCompact::enqueue_dense_prefix_tasks(GCTaskQueue* q,
      continue;
    }

-    // The dense prefix is before this chunk.
-    size_t chunk_index_end_dense_prefix =
-        sd.addr_to_chunk_idx(dense_prefix_end);
-    ChunkData* const dense_prefix_cp = sd.chunk(chunk_index_end_dense_prefix);
+    // The dense prefix is before this region.
+    size_t region_index_end_dense_prefix =
+        sd.addr_to_region_idx(dense_prefix_end);
+    RegionData* const dense_prefix_cp =
+      sd.region(region_index_end_dense_prefix);
    assert(dense_prefix_end == space->end() ||
           dense_prefix_cp->available() ||
           dense_prefix_cp->claimed(),
-           "The chunk after the dense prefix should always be ready to fill");
+           "The region after the dense prefix should always be ready to fill");

-    size_t chunk_index_start = sd.addr_to_chunk_idx(space->bottom());
+    size_t region_index_start = sd.addr_to_region_idx(space->bottom());

    // Is there dense prefix work?
-    size_t total_dense_prefix_chunks =
-      chunk_index_end_dense_prefix - chunk_index_start;
-    // How many chunks of the dense prefix should be given to
+    size_t total_dense_prefix_regions =
+      region_index_end_dense_prefix - region_index_start;
+    // How many regions of the dense prefix should be given to
    // each thread?
-    if (total_dense_prefix_chunks > 0) {
+    if (total_dense_prefix_regions > 0) {
      uint tasks_for_dense_prefix = 1;
      if (UseParallelDensePrefixUpdate) {
-        if (total_dense_prefix_chunks <=
+        if (total_dense_prefix_regions <=
            (parallel_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING)) {
          // Don't over partition.  This assumes that
          // PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING is a small integer value
-          // so there are not many chunks to process.
+          // so there are not many regions to process.
          tasks_for_dense_prefix = parallel_gc_threads;
        } else {
          // Over partition
@@ -2596,50 +2102,50 @@ void PSParallelCompact::enqueue_dense_prefix_tasks(GCTaskQueue* q,
            PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING;
        }
      }
-      size_t chunks_per_thread = total_dense_prefix_chunks /
+      size_t regions_per_thread = total_dense_prefix_regions /
        tasks_for_dense_prefix;
-      // Give each thread at least 1 chunk.
-      if (chunks_per_thread == 0) {
-        chunks_per_thread = 1;
+      // Give each thread at least 1 region.
+      if (regions_per_thread == 0) {
+        regions_per_thread = 1;
      }

      for (uint k = 0; k < tasks_for_dense_prefix; k++) {
-        if (chunk_index_start >= chunk_index_end_dense_prefix) {
+        if (region_index_start >= region_index_end_dense_prefix) {
          break;
        }
-        // chunk_index_end is not processed
-        size_t chunk_index_end = MIN2(chunk_index_start + chunks_per_thread,
-                                      chunk_index_end_dense_prefix);
+        // region_index_end is not processed
+        size_t region_index_end = MIN2(region_index_start + regions_per_thread,
+                                       region_index_end_dense_prefix);
        q->enqueue(new UpdateDensePrefixTask(
                                 space_id,
-                                 chunk_index_start,
-                                 chunk_index_end));
-        chunk_index_start = chunk_index_end;
+                                 region_index_start,
+                                 region_index_end));
+        region_index_start = region_index_end;
      }
    }
    // This gets any part of the dense prefix that did not
    // fit evenly.
-    if (chunk_index_start < chunk_index_end_dense_prefix) {
+    if (region_index_start < region_index_end_dense_prefix) {
      q->enqueue(new UpdateDensePrefixTask(
                                 space_id,
-                                 chunk_index_start,
-                                 chunk_index_end_dense_prefix));
+                                 region_index_start,
+                                 region_index_end_dense_prefix));
    }
    space_id = next_compaction_space_id(space_id);
  }  // End tasks for dense prefix
 }

-void PSParallelCompact::enqueue_chunk_stealing_tasks(
+void PSParallelCompact::enqueue_region_stealing_tasks(
                                     GCTaskQueue* q,
                                     ParallelTaskTerminator* terminator_ptr,
                                     uint parallel_gc_threads) {
  TraceTime tm("steal task setup", print_phases(), true, gclog_or_tty);

-  // Once a thread has drained it's stack, it should try to steal chunks from
+  // Once a thread has drained it's stack, it should try to steal regions from
  // other threads.
  if (parallel_gc_threads > 1) {
    for (uint j = 0; j < parallel_gc_threads; j++) {
-      q->enqueue(new StealChunkCompactionTask(terminator_ptr));
+      q->enqueue(new StealRegionCompactionTask(terminator_ptr));
    }
  }
 }
@@ -2654,13 +2160,13 @@ void PSParallelCompact::compact() {
  PSOldGen* old_gen = heap->old_gen();
  old_gen->start_array()->reset();
  uint parallel_gc_threads = heap->gc_task_manager()->workers();
-  TaskQueueSetSuper* qset = ParCompactionManager::chunk_array();
+  TaskQueueSetSuper* qset = ParCompactionManager::region_array();
  ParallelTaskTerminator terminator(parallel_gc_threads, qset);

  GCTaskQueue* q = GCTaskQueue::create();
-  enqueue_chunk_draining_tasks(q, parallel_gc_threads);
+  enqueue_region_draining_tasks(q, parallel_gc_threads);
  enqueue_dense_prefix_tasks(q, parallel_gc_threads);
-  enqueue_chunk_stealing_tasks(q, &terminator, parallel_gc_threads);
+  enqueue_region_stealing_tasks(q, &terminator, parallel_gc_threads);

  {
    TraceTime tm_pc("par compact", print_phases(), true, gclog_or_tty);
@@ -2676,9 +2182,9 @@ void PSParallelCompact::compact() {
    WaitForBarrierGCTask::destroy(fin);

 #ifdef  ASSERT
-    // Verify that all chunks have been processed before the deferred updates.
+    // Verify that all regions have been processed before the deferred updates.
    // Note that perm_space_id is skipped; this type of verification is not
-    // valid until the perm gen is compacted by chunks.
+    // valid until the perm gen is compacted by regions.
    for (unsigned int id = old_space_id; id < last_space_id; ++id) {
      verify_complete(SpaceId(id));
    }
@@ -2697,42 +2203,42 @@ void PSParallelCompact::compact() {

 #ifdef  ASSERT
 void PSParallelCompact::verify_complete(SpaceId space_id) {
-  // All Chunks between space bottom() to new_top() should be marked as filled
-  // and all Chunks between new_top() and top() should be available (i.e.,
+  // All Regions between space bottom() to new_top() should be marked as filled
+  // and all Regions between new_top() and top() should be available (i.e.,
  // should have been emptied).
  ParallelCompactData& sd = summary_data();
  SpaceInfo si = _space_info[space_id];
-  HeapWord* new_top_addr = sd.chunk_align_up(si.new_top());
-  HeapWord* old_top_addr = sd.chunk_align_up(si.space()->top());
-  const size_t beg_chunk = sd.addr_to_chunk_idx(si.space()->bottom());
-  const size_t new_top_chunk = sd.addr_to_chunk_idx(new_top_addr);
-  const size_t old_top_chunk = sd.addr_to_chunk_idx(old_top_addr);
+  HeapWord* new_top_addr = sd.region_align_up(si.new_top());
+  HeapWord* old_top_addr = sd.region_align_up(si.space()->top());
+  const size_t beg_region = sd.addr_to_region_idx(si.space()->bottom());
+  const size_t new_top_region = sd.addr_to_region_idx(new_top_addr);
+  const size_t old_top_region = sd.addr_to_region_idx(old_top_addr);

  bool issued_a_warning = false;

-  size_t cur_chunk;
-  for (cur_chunk = beg_chunk; cur_chunk < new_top_chunk; ++cur_chunk) {
-    const ChunkData* const c = sd.chunk(cur_chunk);
+  size_t cur_region;
+  for (cur_region = beg_region; cur_region < new_top_region; ++cur_region) {
+    const RegionData* const c = sd.region(cur_region);
    if (!c->completed()) {
-      warning("chunk " SIZE_FORMAT " not filled:  "
+      warning("region " SIZE_FORMAT " not filled:  "
              "destination_count=" SIZE_FORMAT,
-              cur_chunk, c->destination_count());
+              cur_region, c->destination_count());
      issued_a_warning = true;
    }
  }

-  for (cur_chunk = new_top_chunk; cur_chunk < old_top_chunk; ++cur_chunk) {
-    const ChunkData* const c = sd.chunk(cur_chunk);
+  for (cur_region = new_top_region; cur_region < old_top_region; ++cur_region) {
+    const RegionData* const c = sd.region(cur_region);
    if (!c->available()) {
-      warning("chunk " SIZE_FORMAT " not empty:   "
+      warning("region " SIZE_FORMAT " not empty:   "
              "destination_count=" SIZE_FORMAT,
-              cur_chunk, c->destination_count());
+              cur_region, c->destination_count());
      issued_a_warning = true;
    }
  }

  if (issued_a_warning) {
-    print_chunk_ranges();
+    print_region_ranges();
  }
 }
 #endif  // #ifdef ASSERT
@@ -2933,46 +2439,47 @@ void PSParallelCompact::print_new_location_of_heap_address(HeapWord* q) {
 }
 #endif //VALIDATE_MARK_SWEEP

-// Update interior oops in the ranges of chunks [beg_chunk, end_chunk).
+// Update interior oops in the ranges of regions [beg_region, end_region).
 void
 PSParallelCompact::update_and_deadwood_in_dense_prefix(ParCompactionManager* cm,
                                                       SpaceId space_id,
-                                                       size_t beg_chunk,
-                                                       size_t end_chunk) {
+                                                       size_t beg_region,
+                                                       size_t end_region) {
  ParallelCompactData& sd = summary_data();
  ParMarkBitMap* const mbm = mark_bitmap();

-  HeapWord* beg_addr = sd.chunk_to_addr(beg_chunk);
-  HeapWord* const end_addr = sd.chunk_to_addr(end_chunk);
-  assert(beg_chunk <= end_chunk, "bad chunk range");
+  HeapWord* beg_addr = sd.region_to_addr(beg_region);
+  HeapWord* const end_addr = sd.region_to_addr(end_region);
+  assert(beg_region <= end_region, "bad region range");
  assert(end_addr <= dense_prefix(space_id), "not in the dense prefix");

 #ifdef  ASSERT
-  // Claim the chunks to avoid triggering an assert when they are marked as
+  // Claim the regions to avoid triggering an assert when they are marked as
  // filled.
-  for (size_t claim_chunk = beg_chunk; claim_chunk < end_chunk; ++claim_chunk) {
-    assert(sd.chunk(claim_chunk)->claim_unsafe(), "claim() failed");
+  for (size_t claim_region = beg_region; claim_region < end_region; ++claim_region) {
+    assert(sd.region(claim_region)->claim_unsafe(), "claim() failed");
  }
 #endif  // #ifdef ASSERT

  if (beg_addr != space(space_id)->bottom()) {
    // Find the first live object or block of dead space that *starts* in this
-    // range of chunks.  If a partial object crosses onto the chunk, skip it; it
-    // will be marked for 'deferred update' when the object head is processed.
-    // If dead space crosses onto the chunk, it is also skipped; it will be
-    // filled when the prior chunk is processed.  If neither of those apply, the
-    // first word in the chunk is the start of a live object or dead space.
+    // range of regions.  If a partial object crosses onto the region, skip it;
+    // it will be marked for 'deferred update' when the object head is
+    // processed.  If dead space crosses onto the region, it is also skipped; it
+    // will be filled when the prior region is processed.  If neither of those
+    // apply, the first word in the region is the start of a live object or dead
+    // space.
    assert(beg_addr > space(space_id)->bottom(), "sanity");
-    const ChunkData* const cp = sd.chunk(beg_chunk);
+    const RegionData* const cp = sd.region(beg_region);
    if (cp->partial_obj_size() != 0) {
-      beg_addr = sd.partial_obj_end(beg_chunk);
+      beg_addr = sd.partial_obj_end(beg_region);
    } else if (dead_space_crosses_boundary(cp, mbm->addr_to_bit(beg_addr))) {
      beg_addr = mbm->find_obj_beg(beg_addr, end_addr);
    }
  }

  if (beg_addr < end_addr) {
-    // A live object or block of dead space starts in this range of Chunks.
+    // A live object or block of dead space starts in this range of Regions.
     HeapWord* const dense_prefix_end = dense_prefix(space_id);

    // Create closures and iterate.
@@ -2986,10 +2493,10 @@ PSParallelCompact::update_and_deadwood_in_dense_prefix(ParCompactionManager* cm,
    }
  }

-  // Mark the chunks as filled.
-  ChunkData* const beg_cp = sd.chunk(beg_chunk);
-  ChunkData* const end_cp = sd.chunk(end_chunk);
-  for (ChunkData* cp = beg_cp; cp < end_cp; ++cp) {
+  // Mark the regions as filled.
+  RegionData* const beg_cp = sd.region(beg_region);
+  RegionData* const end_cp = sd.region(end_region);
+  for (RegionData* cp = beg_cp; cp < end_cp; ++cp) {
    cp->set_completed();
  }
 }
@@ -3021,13 +2528,13 @@ void PSParallelCompact::update_deferred_objects(ParCompactionManager* cm,
  const MutableSpace* const space = space_info->space();
  assert(space_info->dense_prefix() >= space->bottom(), "dense_prefix not set");
  HeapWord* const beg_addr = space_info->dense_prefix();
-  HeapWord* const end_addr = sd.chunk_align_up(space_info->new_top());
+  HeapWord* const end_addr = sd.region_align_up(space_info->new_top());

-  const ChunkData* const beg_chunk = sd.addr_to_chunk_ptr(beg_addr);
-  const ChunkData* const end_chunk = sd.addr_to_chunk_ptr(end_addr);
-  const ChunkData* cur_chunk;
-  for (cur_chunk = beg_chunk; cur_chunk < end_chunk; ++cur_chunk) {
-    HeapWord* const addr = cur_chunk->deferred_obj_addr();
+  const RegionData* const beg_region = sd.addr_to_region_ptr(beg_addr);
+  const RegionData* const end_region = sd.addr_to_region_ptr(end_addr);
+  const RegionData* cur_region;
+  for (cur_region = beg_region; cur_region < end_region; ++cur_region) {
+    HeapWord* const addr = cur_region->deferred_obj_addr();
    if (addr != NULL) {
      if (start_array != NULL) {
        start_array->allocate_block(addr);
@@ -3073,45 +2580,45 @@ PSParallelCompact::skip_live_words(HeapWord* beg, HeapWord* end, size_t count)

 HeapWord*
 PSParallelCompact::first_src_addr(HeapWord* const dest_addr,
-                                 size_t src_chunk_idx)
+                                 size_t src_region_idx)
 {
  ParMarkBitMap* const bitmap = mark_bitmap();
  const ParallelCompactData& sd = summary_data();
-  const size_t ChunkSize = ParallelCompactData::ChunkSize;
+  const size_t RegionSize = ParallelCompactData::RegionSize;

-  assert(sd.is_chunk_aligned(dest_addr), "not aligned");
+  assert(sd.is_region_aligned(dest_addr), "not aligned");

-  const ChunkData* const src_chunk_ptr = sd.chunk(src_chunk_idx);
-  const size_t partial_obj_size = src_chunk_ptr->partial_obj_size();
-  HeapWord* const src_chunk_destination = src_chunk_ptr->destination();
+  const RegionData* const src_region_ptr = sd.region(src_region_idx);
+  const size_t partial_obj_size = src_region_ptr->partial_obj_size();
+  HeapWord* const src_region_destination = src_region_ptr->destination();

-  assert(dest_addr >= src_chunk_destination, "wrong src chunk");
-  assert(src_chunk_ptr->data_size() > 0, "src chunk cannot be empty");
+  assert(dest_addr >= src_region_destination, "wrong src region");
+  assert(src_region_ptr->data_size() > 0, "src region cannot be empty");

-  HeapWord* const src_chunk_beg = sd.chunk_to_addr(src_chunk_idx);
-  HeapWord* const src_chunk_end = src_chunk_beg + ChunkSize;
+  HeapWord* const src_region_beg = sd.region_to_addr(src_region_idx);
+  HeapWord* const src_region_end = src_region_beg + RegionSize;

-  HeapWord* addr = src_chunk_beg;
-  if (dest_addr == src_chunk_destination) {
-    // Return the first live word in the source chunk.
+  HeapWord* addr = src_region_beg;
+  if (dest_addr == src_region_destination) {
+    // Return the first live word in the source region.
    if (partial_obj_size == 0) {
-      addr = bitmap->find_obj_beg(addr, src_chunk_end);
-      assert(addr < src_chunk_end, "no objects start in src chunk");
+      addr = bitmap->find_obj_beg(addr, src_region_end);
+      assert(addr < src_region_end, "no objects start in src region");
    }
    return addr;
  }

  // Must skip some live data.
-  size_t words_to_skip = dest_addr - src_chunk_destination;
-  assert(src_chunk_ptr->data_size() > words_to_skip, "wrong src chunk");
+  size_t words_to_skip = dest_addr - src_region_destination;
+  assert(src_region_ptr->data_size() > words_to_skip, "wrong src region");

  if (partial_obj_size >= words_to_skip) {
    // All the live words to skip are part of the partial object.
    addr += words_to_skip;
    if (partial_obj_size == words_to_skip) {
      // Find the first live word past the partial object.
-      addr = bitmap->find_obj_beg(addr, src_chunk_end);
-      assert(addr < src_chunk_end, "wrong src chunk");
+      addr = bitmap->find_obj_beg(addr, src_region_end);
+      assert(addr < src_region_end, "wrong src region");
    }
    return addr;
  }
@@ -3122,63 +2629,64 @@ PSParallelCompact::first_src_addr(HeapWord* const dest_addr,
    addr += partial_obj_size;
  }

-  // Skip over live words due to objects that start in the chunk.
-  addr = skip_live_words(addr, src_chunk_end, words_to_skip);
-  assert(addr < src_chunk_end, "wrong src chunk");
+  // Skip over live words due to objects that start in the region.
+  addr = skip_live_words(addr, src_region_end, words_to_skip);
+  assert(addr < src_region_end, "wrong src region");
  return addr;
 }

 void PSParallelCompact::decrement_destination_counts(ParCompactionManager* cm,
-                                                     size_t beg_chunk,
+                                                     size_t beg_region,
                                                     HeapWord* end_addr)
 {
  ParallelCompactData& sd = summary_data();
-  ChunkData* const beg = sd.chunk(beg_chunk);
-  HeapWord* const end_addr_aligned_up = sd.chunk_align_up(end_addr);
-  ChunkData* const end = sd.addr_to_chunk_ptr(end_addr_aligned_up);
-  size_t cur_idx = beg_chunk;
-  for (ChunkData* cur = beg; cur < end; ++cur, ++cur_idx) {
-    assert(cur->data_size() > 0, "chunk must have live data");
+  RegionData* const beg = sd.region(beg_region);
+  HeapWord* const end_addr_aligned_up = sd.region_align_up(end_addr);
+  RegionData* const end = sd.addr_to_region_ptr(end_addr_aligned_up);
+  size_t cur_idx = beg_region;
+  for (RegionData* cur = beg; cur < end; ++cur, ++cur_idx) {
+    assert(cur->data_size() > 0, "region must have live data");
    cur->decrement_destination_count();
-    if (cur_idx <= cur->source_chunk() && cur->available() && cur->claim()) {
+    if (cur_idx <= cur->source_region() && cur->available() && cur->claim()) {
      cm->save_for_processing(cur_idx);
    }
  }
 }

-size_t PSParallelCompact::next_src_chunk(MoveAndUpdateClosure& closure,
-                                         SpaceId& src_space_id,
-                                         HeapWord*& src_space_top,
-                                         HeapWord* end_addr)
+size_t PSParallelCompact::next_src_region(MoveAndUpdateClosure& closure,
+                                          SpaceId& src_space_id,
+                                          HeapWord*& src_space_top,
+                                          HeapWord* end_addr)
 {
-  typedef ParallelCompactData::ChunkData ChunkData;
+  typedef ParallelCompactData::RegionData RegionData;

  ParallelCompactData& sd = PSParallelCompact::summary_data();
-  const size_t chunk_size = ParallelCompactData::ChunkSize;
-
-  size_t src_chunk_idx = 0;
-
-  // Skip empty chunks (if any) up to the top of the space.
-  HeapWord* const src_aligned_up = sd.chunk_align_up(end_addr);
-  ChunkData* src_chunk_ptr = sd.addr_to_chunk_ptr(src_aligned_up);
-  HeapWord* const top_aligned_up = sd.chunk_align_up(src_space_top);
-  const ChunkData* const top_chunk_ptr = sd.addr_to_chunk_ptr(top_aligned_up);
-  while (src_chunk_ptr < top_chunk_ptr && src_chunk_ptr->data_size() == 0) {
-    ++src_chunk_ptr;
-  }
-
-  if (src_chunk_ptr < top_chunk_ptr) {
-    // The next source chunk is in the current space.  Update src_chunk_idx and
-    // the source address to match src_chunk_ptr.
-    src_chunk_idx = sd.chunk(src_chunk_ptr);
-    HeapWord* const src_chunk_addr = sd.chunk_to_addr(src_chunk_idx);
-    if (src_chunk_addr > closure.source()) {
-      closure.set_source(src_chunk_addr);
+  const size_t region_size = ParallelCompactData::RegionSize;
+
+  size_t src_region_idx = 0;
+
+  // Skip empty regions (if any) up to the top of the space.
+  HeapWord* const src_aligned_up = sd.region_align_up(end_addr);
+  RegionData* src_region_ptr = sd.addr_to_region_ptr(src_aligned_up);
+  HeapWord* const top_aligned_up = sd.region_align_up(src_space_top);
+  const RegionData* const top_region_ptr =
+    sd.addr_to_region_ptr(top_aligned_up);
+  while (src_region_ptr < top_region_ptr && src_region_ptr->data_size() == 0) {
+    ++src_region_ptr;
+  }
+
+  if (src_region_ptr < top_region_ptr) {
+    // The next source region is in the current space.  Update src_region_idx
+    // and the source address to match src_region_ptr.
+    src_region_idx = sd.region(src_region_ptr);
+    HeapWord* const src_region_addr = sd.region_to_addr(src_region_idx);
+    if (src_region_addr > closure.source()) {
+      closure.set_source(src_region_addr);
    }
-    return src_chunk_idx;
+    return src_region_idx;
  }

-  // Switch to a new source space and find the first non-empty chunk.
+  // Switch to a new source space and find the first non-empty region.
  unsigned int space_id = src_space_id + 1;
  assert(space_id < last_space_id, "not enough spaces");

@@ -3187,14 +2695,14 @@ size_t PSParallelCompact::next_src_chunk(MoveAndUpdateClosure& closure,
  do {
    MutableSpace* space = _space_info[space_id].space();
    HeapWord* const bottom = space->bottom();
-    const ChunkData* const bottom_cp = sd.addr_to_chunk_ptr(bottom);
+    const RegionData* const bottom_cp = sd.addr_to_region_ptr(bottom);

    // Iterate over the spaces that do not compact into themselves.
    if (bottom_cp->destination() != bottom) {
-      HeapWord* const top_aligned_up = sd.chunk_align_up(space->top());
-      const ChunkData* const top_cp = sd.addr_to_chunk_ptr(top_aligned_up);
+      HeapWord* const top_aligned_up = sd.region_align_up(space->top());
+      const RegionData* const top_cp = sd.addr_to_region_ptr(top_aligned_up);

-      for (const ChunkData* src_cp = bottom_cp; src_cp < top_cp; ++src_cp) {
+      for (const RegionData* src_cp = bottom_cp; src_cp < top_cp; ++src_cp) {
        if (src_cp->live_obj_size() > 0) {
          // Found it.
          assert(src_cp->destination() == destination,
@@ -3204,9 +2712,9 @@ size_t PSParallelCompact::next_src_chunk(MoveAndUpdateClosure& closure,

          src_space_id = SpaceId(space_id);
          src_space_top = space->top();
-          const size_t src_chunk_idx = sd.chunk(src_cp);
-          closure.set_source(sd.chunk_to_addr(src_chunk_idx));
-          return src_chunk_idx;
+          const size_t src_region_idx = sd.region(src_cp);
+          closure.set_source(sd.region_to_addr(src_region_idx));
+          return src_region_idx;
        } else {
          assert(src_cp->data_size() == 0, "sanity");
        }
@@ -3214,38 +2722,38 @@ size_t PSParallelCompact::next_src_chunk(MoveAndUpdateClosure& closure,
    }
  } while (++space_id < last_space_id);

-  assert(false, "no source chunk was found");
+  assert(false, "no source region was found");
  return 0;
 }

-void PSParallelCompact::fill_chunk(ParCompactionManager* cm, size_t chunk_idx)
+void PSParallelCompact::fill_region(ParCompactionManager* cm, size_t region_idx)
 {
  typedef ParMarkBitMap::IterationStatus IterationStatus;
-  const size_t ChunkSize = ParallelCompactData::ChunkSize;
+  const size_t RegionSize = ParallelCompactData::RegionSize;
  ParMarkBitMap* const bitmap = mark_bitmap();
  ParallelCompactData& sd = summary_data();
-  ChunkData* const chunk_ptr = sd.chunk(chunk_idx);
+  RegionData* const region_ptr = sd.region(region_idx);

  // Get the items needed to construct the closure.
-  HeapWord* dest_addr = sd.chunk_to_addr(chunk_idx);
+  HeapWord* dest_addr = sd.region_to_addr(region_idx);
  SpaceId dest_space_id = space_id(dest_addr);
  ObjectStartArray* start_array = _space_info[dest_space_id].start_array();
  HeapWord* new_top = _space_info[dest_space_id].new_top();
  assert(dest_addr < new_top, "sanity");
-  const size_t words = MIN2(pointer_delta(new_top, dest_addr), ChunkSize);
+  const size_t words = MIN2(pointer_delta(new_top, dest_addr), RegionSize);

-  // Get the source chunk and related info.
-  size_t src_chunk_idx = chunk_ptr->source_chunk();
-  SpaceId src_space_id = space_id(sd.chunk_to_addr(src_chunk_idx));
+  // Get the source region and related info.
+  size_t src_region_idx = region_ptr->source_region();
+  SpaceId src_space_id = space_id(sd.region_to_addr(src_region_idx));
  HeapWord* src_space_top = _space_info[src_space_id].space()->top();

  MoveAndUpdateClosure closure(bitmap, cm, start_array, dest_addr, words);
-  closure.set_source(first_src_addr(dest_addr, src_chunk_idx));
+  closure.set_source(first_src_addr(dest_addr, src_region_idx));

-  // Adjust src_chunk_idx to prepare for decrementing destination counts (the
-  // destination count is not decremented when a chunk is copied to itself).
-  if (src_chunk_idx == chunk_idx) {
-    src_chunk_idx += 1;
+  // Adjust src_region_idx to prepare for decrementing destination counts (the
+  // destination count is not decremented when a region is copied to itself).
+  if (src_region_idx == region_idx) {
+    src_region_idx += 1;
  }

  if (bitmap->is_unmarked(closure.source())) {
@@ -3255,32 +2763,33 @@ void PSParallelCompact::fill_chunk(ParCompactionManager* cm, size_t chunk_idx)
    HeapWord* const old_src_addr = closure.source();
    closure.copy_partial_obj();
    if (closure.is_full()) {
-      decrement_destination_counts(cm, src_chunk_idx, closure.source());
-      chunk_ptr->set_deferred_obj_addr(NULL);
-      chunk_ptr->set_completed();
+      decrement_destination_counts(cm, src_region_idx, closure.source());
+      region_ptr->set_deferred_obj_addr(NULL);
+      region_ptr->set_completed();
      return;
    }

-    HeapWord* const end_addr = sd.chunk_align_down(closure.source());
-    if (sd.chunk_align_down(old_src_addr) != end_addr) {
-      // The partial object was copied from more than one source chunk.
-      decrement_destination_counts(cm, src_chunk_idx, end_addr);
+    HeapWord* const end_addr = sd.region_align_down(closure.source());
+    if (sd.region_align_down(old_src_addr) != end_addr) {
+      // The partial object was copied from more than one source region.
+      decrement_destination_counts(cm, src_region_idx, end_addr);

-      // Move to the next source chunk, possibly switching spaces as well.  All
+      // Move to the next source region, possibly switching spaces as well.  All
      // args except end_addr may be modified.
-      src_chunk_idx = next_src_chunk(closure, src_space_id, src_space_top,
-                                     end_addr);
+      src_region_idx = next_src_region(closure, src_space_id, src_space_top,
+                                       end_addr);
    }
  }

  do {
    HeapWord* const cur_addr = closure.source();
-    HeapWord* const end_addr = MIN2(sd.chunk_align_up(cur_addr + 1),
+    HeapWord* const end_addr = MIN2(sd.region_align_up(cur_addr + 1),
                                    src_space_top);
    IterationStatus status = bitmap->iterate(&closure, cur_addr, end_addr);

    if (status == ParMarkBitMap::incomplete) {
-      // The last obj that starts in the source chunk does not end in the chunk.
+      // The last obj that starts in the source region does not end in the
+      // region.
      assert(closure.source() < end_addr, "sanity")
      HeapWord* const obj_beg = closure.source();
      HeapWord* const range_end = MIN2(obj_beg + closure.words_remaining(),
@@ -3299,28 +2808,28 @@ void PSParallelCompact::fill_chunk(ParCompactionManager* cm, size_t chunk_idx)

    if (status == ParMarkBitMap::would_overflow) {
      // The last object did not fit.  Note that interior oop updates were
-      // deferred, then copy enough of the object to fill the chunk.
-      chunk_ptr->set_deferred_obj_addr(closure.destination());
+      // deferred, then copy enough of the object to fill the region.
+      region_ptr->set_deferred_obj_addr(closure.destination());
      status = closure.copy_until_full(); // copies from closure.source()

-      decrement_destination_counts(cm, src_chunk_idx, closure.source());
-      chunk_ptr->set_completed();
+      decrement_destination_counts(cm, src_region_idx, closure.source());
+      region_ptr->set_completed();
      return;
    }

    if (status == ParMarkBitMap::full) {
-      decrement_destination_counts(cm, src_chunk_idx, closure.source());
-      chunk_ptr->set_deferred_obj_addr(NULL);
-      chunk_ptr->set_completed();
+      decrement_destination_counts(cm, src_region_idx, closure.source());
+      region_ptr->set_deferred_obj_addr(NULL);
+      region_ptr->set_completed();
      return;
    }

-    decrement_destination_counts(cm, src_chunk_idx, end_addr);
+    decrement_destination_counts(cm, src_region_idx, end_addr);

-    // Move to the next source chunk, possibly switching spaces as well.  All
+    // Move to the next source region, possibly switching spaces as well.  All
    // args except end_addr may be modified.
-    src_chunk_idx = next_src_chunk(closure, src_space_id, src_space_top,
-                                   end_addr);
+    src_region_idx = next_src_region(closure, src_space_id, src_space_top,
+                                     end_addr);
  } while (true);
 }

@@ -3352,15 +2861,15 @@ PSParallelCompact::move_and_update(ParCompactionManager* cm, SpaceId space_id) {
  }
 #endif

-  const size_t beg_chunk = sd.addr_to_chunk_idx(beg_addr);
-  const size_t dp_chunk = sd.addr_to_chunk_idx(dp_addr);
-  if (beg_chunk < dp_chunk) {
-    update_and_deadwood_in_dense_prefix(cm, space_id, beg_chunk, dp_chunk);
+  const size_t beg_region = sd.addr_to_region_idx(beg_addr);
+  const size_t dp_region = sd.addr_to_region_idx(dp_addr);
+  if (beg_region < dp_region) {
+    update_and_deadwood_in_dense_prefix(cm, space_id, beg_region, dp_region);
  }

-  // The destination of the first live object that starts in the chunk is one
-  // past the end of the partial object entering the chunk (if any).
-  HeapWord* const dest_addr = sd.partial_obj_end(dp_chunk);
+  // The destination of the first live object that starts in the region is one
+  // past the end of the partial object entering the region (if any).
+  HeapWord* const dest_addr = sd.partial_obj_end(dp_region);
  HeapWord* const new_top = _space_info[space_id].new_top();
  assert(new_top >= dest_addr, "bad new_top value");
  const size_t words = pointer_delta(new_top, dest_addr);
@@ -3469,172 +2978,6 @@ UpdateOnlyClosure::do_addr(HeapWord* addr, size_t words) {
  return ParMarkBitMap::incomplete;
 }

-BitBlockUpdateClosure::BitBlockUpdateClosure(ParMarkBitMap* mbm,
-                        ParCompactionManager* cm,
-                        size_t chunk_index) :
-                        ParMarkBitMapClosure(mbm, cm),
-                        _live_data_left(0),
-                        _cur_block(0) {
-  _chunk_start =
-    PSParallelCompact::summary_data().chunk_to_addr(chunk_index);
-  _chunk_end =
-    PSParallelCompact::summary_data().chunk_to_addr(chunk_index) +
-                 ParallelCompactData::ChunkSize;
-  _chunk_index = chunk_index;
-  _cur_block =
-    PSParallelCompact::summary_data().addr_to_block_idx(_chunk_start);
-}
-
-bool BitBlockUpdateClosure::chunk_contains_cur_block() {
-  return ParallelCompactData::chunk_contains_block(_chunk_index, _cur_block);
-}
-
-void BitBlockUpdateClosure::reset_chunk(size_t chunk_index) {
-  DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(7);)
-  ParallelCompactData& sd = PSParallelCompact::summary_data();
-  _chunk_index = chunk_index;
-  _live_data_left = 0;
-  _chunk_start = sd.chunk_to_addr(chunk_index);
-  _chunk_end = sd.chunk_to_addr(chunk_index) + ParallelCompactData::ChunkSize;
-
-  // The first block in this chunk
-  size_t first_block =  sd.addr_to_block_idx(_chunk_start);
-  size_t partial_live_size = sd.chunk(chunk_index)->partial_obj_size();
-
-  // Set the offset to 0. By definition it should have that value
-  // but it may have been written while processing an earlier chunk.
-  if (partial_live_size == 0) {
-    // No live object extends onto the chunk.  The first bit
-    // in the bit map for the first chunk must be a start bit.
-    // Although there may not be any marked bits, it is safe
-    // to set it as a start bit.
-    sd.block(first_block)->set_start_bit_offset(0);
-    sd.block(first_block)->set_first_is_start_bit(true);
-  } else if (sd.partial_obj_ends_in_block(first_block)) {
-    sd.block(first_block)->set_end_bit_offset(0);
-    sd.block(first_block)->set_first_is_start_bit(false);
-  } else {
-    // The partial object extends beyond the first block.
-    // There is no object starting in the first block
-    // so the offset and bit parity are not needed.
-    // Set the the bit parity to start bit so assertions
-    // work when not bit is found.
-    sd.block(first_block)->set_end_bit_offset(0);
-    sd.block(first_block)->set_first_is_start_bit(false);
-  }
-  _cur_block = first_block;
-#ifdef ASSERT
-  if (sd.block(first_block)->first_is_start_bit()) {
-    assert(!sd.partial_obj_ends_in_block(first_block),
-      "Partial object cannot end in first block");
-  }
-
-  if (PrintGCDetails && Verbose) {
-    if (partial_live_size == 1) {
-    gclog_or_tty->print_cr("first_block " PTR_FORMAT
-      " _offset " PTR_FORMAT
-      " _first_is_start_bit %d",
-      first_block,
-      sd.block(first_block)->raw_offset(),
-      sd.block(first_block)->first_is_start_bit());
-    }
-  }
-#endif
-  DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(17);)
-}
-
-// This method is called when a object has been found (both beginning
-// and end of the object) in the range of iteration.  This method is
-// calculating the words of live data to the left of a block.  That live
-// data includes any object starting to the left of the block (i.e.,
-// the live-data-to-the-left of block AAA will include the full size
-// of any object entering AAA).
-
-ParMarkBitMapClosure::IterationStatus
-BitBlockUpdateClosure::do_addr(HeapWord* addr, size_t words) {
-  // add the size to the block data.
-  HeapWord* obj = addr;
-  ParallelCompactData& sd = PSParallelCompact::summary_data();
-
-  assert(bitmap()->obj_size(obj) == words, "bad size");
-  assert(_chunk_start <= obj, "object is not in chunk");
-  assert(obj + words <= _chunk_end, "object is not in chunk");
-
-  // Update the live data to the left
-  size_t prev_live_data_left = _live_data_left;
-  _live_data_left = _live_data_left + words;
-
-  // Is this object in the current block.
-  size_t block_of_obj = sd.addr_to_block_idx(obj);
-  size_t block_of_obj_last = sd.addr_to_block_idx(obj + words - 1);
-  HeapWord* block_of_obj_last_addr = sd.block_to_addr(block_of_obj_last);
-  if (_cur_block < block_of_obj) {
-
-    //
-    // No object crossed the block boundary and this object was found
-    // on the other side of the block boundary.  Update the offset for
-    // the new block with the data size that does not include this object.
-    //
-    // The first bit in block_of_obj is a start bit except in the
-    // case where the partial object for the chunk extends into
-    // this block.
-    if (sd.partial_obj_ends_in_block(block_of_obj)) {
-      sd.block(block_of_obj)->set_end_bit_offset(prev_live_data_left);
-    } else {
-      sd.block(block_of_obj)->set_start_bit_offset(prev_live_data_left);
-    }
-
-    // Does this object pass beyond the its block?
-    if (block_of_obj < block_of_obj_last) {
-      // Object crosses block boundary.  Two blocks need to be udpated:
-      //        the current block where the object started
-      //        the block where the object ends
-      //
-      // The offset for blocks with no objects starting in them
-      // (e.g., blocks between _cur_block and  block_of_obj_last)
-      // should not be needed.
-      // Note that block_of_obj_last may be in another chunk.  If so,
-      // it should be overwritten later.  This is a problem (writting
-      // into a block in a later chunk) for parallel execution.
-      assert(obj < block_of_obj_last_addr,
-        "Object should start in previous block");
-
-      // obj is crossing into block_of_obj_last so the first bit
-      // is and end bit.
-      sd.block(block_of_obj_last)->set_end_bit_offset(_live_data_left);
-
-      _cur_block = block_of_obj_last;
-    } else {
-      // _first_is_start_bit has already been set correctly
-      // in the if-then-else above so don't reset it here.
-      _cur_block = block_of_obj;
-    }
-  } else {
-    // The current block only changes if the object extends beyound
-    // the block it starts in.
-    //
-    // The object starts in the current block.
-    // Does this object pass beyond the end of it?
-    if (block_of_obj < block_of_obj_last) {
-      // Object crosses block boundary.
-      // See note above on possible blocks between block_of_obj and
-      // block_of_obj_last
-      assert(obj < block_of_obj_last_addr,
-        "Object should start in previous block");
-
-      sd.block(block_of_obj_last)->set_end_bit_offset(_live_data_left);
-
-      _cur_block = block_of_obj_last;
-    }
-  }
-
-  // Return incomplete if there are more blocks to be done.
-  if (chunk_contains_cur_block()) {
-    return ParMarkBitMap::incomplete;
-  }
-  return ParMarkBitMap::complete;
-}
-
 // Verify the new location using the forwarding pointer
 // from MarkSweep::mark_sweep_phase2().  Set the mark_word
 // to the initial value.
@@ -3707,12 +3050,3 @@ PSParallelCompact::next_compaction_space_id(SpaceId id) {
      return last_space_id;
  }
 }
-
-// Here temporarily for debugging
-#ifdef ASSERT
-  size_t ParallelCompactData::block_idx(BlockData* block) {
-    size_t index = pointer_delta(block,
-      PSParallelCompact::summary_data()._block_data, sizeof(BlockData));
-    return index;
-  }
-#endif
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp
@@ -76,87 +76,80 @@ class ParallelCompactData
 {
 public:
  // Sizes are in HeapWords, unless indicated otherwise.
-  static const size_t Log2ChunkSize;
-  static const size_t ChunkSize;
-  static const size_t ChunkSizeBytes;
-
-  // Mask for the bits in a size_t to get an offset within a chunk.
-  static const size_t ChunkSizeOffsetMask;
-  // Mask for the bits in a pointer to get an offset within a chunk.
-  static const size_t ChunkAddrOffsetMask;
-  // Mask for the bits in a pointer to get the address of the start of a chunk.
-  static const size_t ChunkAddrMask;
-
-  static const size_t Log2BlockSize;
-  static const size_t BlockSize;
-  static const size_t BlockOffsetMask;
-  static const size_t BlockMask;
-
-  static const size_t BlocksPerChunk;
-
-  class ChunkData
+  static const size_t Log2RegionSize;
+  static const size_t RegionSize;
+  static const size_t RegionSizeBytes;
+
+  // Mask for the bits in a size_t to get an offset within a region.
+  static const size_t RegionSizeOffsetMask;
+  // Mask for the bits in a pointer to get an offset within a region.
+  static const size_t RegionAddrOffsetMask;
+  // Mask for the bits in a pointer to get the address of the start of a region.
+  static const size_t RegionAddrMask;
+
+  class RegionData
  {
  public:
-    // Destination address of the chunk.
+    // Destination address of the region.
    HeapWord* destination() const { return _destination; }

-    // The first chunk containing data destined for this chunk.
-    size_t source_chunk() const { return _source_chunk; }
+    // The first region containing data destined for this region.
+    size_t source_region() const { return _source_region; }

-    // The object (if any) starting in this chunk and ending in a different
-    // chunk that could not be updated during the main (parallel) compaction
+    // The object (if any) starting in this region and ending in a different
+    // region that could not be updated during the main (parallel) compaction
    // phase.  This is different from _partial_obj_addr, which is an object that
-    // extends onto a source chunk.  However, the two uses do not overlap in
+    // extends onto a source region.  However, the two uses do not overlap in
    // time, so the same field is used to save space.
    HeapWord* deferred_obj_addr() const { return _partial_obj_addr; }

-    // The starting address of the partial object extending onto the chunk.
+    // The starting address of the partial object extending onto the region.
    HeapWord* partial_obj_addr() const { return _partial_obj_addr; }

-    // Size of the partial object extending onto the chunk (words).
+    // Size of the partial object extending onto the region (words).
    size_t partial_obj_size() const { return _partial_obj_size; }

-    // Size of live data that lies within this chunk due to objects that start
-    // in this chunk (words).  This does not include the partial object
-    // extending onto the chunk (if any), or the part of an object that extends
-    // onto the next chunk (if any).
+    // Size of live data that lies within this region due to objects that start
+    // in this region (words).  This does not include the partial object
+    // extending onto the region (if any), or the part of an object that extends
+    // onto the next region (if any).
    size_t live_obj_size() const { return _dc_and_los & los_mask; }

-    // Total live data that lies within the chunk (words).
+    // Total live data that lies within the region (words).
    size_t data_size() const { return partial_obj_size() + live_obj_size(); }

-    // The destination_count is the number of other chunks to which data from
-    // this chunk will be copied.  At the end of the summary phase, the valid
+    // The destination_count is the number of other regions to which data from
+    // this region will be copied.  At the end of the summary phase, the valid
    // values of destination_count are
    //
-    // 0 - data from the chunk will be compacted completely into itself, or the
-    //     chunk is empty.  The chunk can be claimed and then filled.
-    // 1 - data from the chunk will be compacted into 1 other chunk; some
-    //     data from the chunk may also be compacted into the chunk itself.
-    // 2 - data from the chunk will be copied to 2 other chunks.
+    // 0 - data from the region will be compacted completely into itself, or the
+    //     region is empty.  The region can be claimed and then filled.
+    // 1 - data from the region will be compacted into 1 other region; some
+    //     data from the region may also be compacted into the region itself.
+    // 2 - data from the region will be copied to 2 other regions.
    //
-    // During compaction as chunks are emptied, the destination_count is
+    // During compaction as regions are emptied, the destination_count is
    // decremented (atomically) and when it reaches 0, it can be claimed and
    // then filled.
    //
-    // A chunk is claimed for processing by atomically changing the
-    // destination_count to the claimed value (dc_claimed).  After a chunk has
+    // A region is claimed for processing by atomically changing the
+    // destination_count to the claimed value (dc_claimed).  After a region has
    // been filled, the destination_count should be set to the completed value
    // (dc_completed).
    inline uint destination_count() const;
    inline uint destination_count_raw() const;

-    // The location of the java heap data that corresponds to this chunk.
+    // The location of the java heap data that corresponds to this region.
    inline HeapWord* data_location() const;

-    // The highest address referenced by objects in this chunk.
+    // The highest address referenced by objects in this region.
    inline HeapWord* highest_ref() const;

-    // Whether this chunk is available to be claimed, has been claimed, or has
+    // Whether this region is available to be claimed, has been claimed, or has
    // been completed.
    //
-    // Minor subtlety:  claimed() returns true if the chunk is marked
-    // completed(), which is desirable since a chunk must be claimed before it
+    // Minor subtlety:  claimed() returns true if the region is marked
+    // completed(), which is desirable since a region must be claimed before it
    // can be completed.
    bool available() const { return _dc_and_los < dc_one; }
    bool claimed() const   { return _dc_and_los >= dc_claimed; }
@@ -164,11 +157,11 @@ public:

    // These are not atomic.
    void set_destination(HeapWord* addr)       { _destination = addr; }
-    void set_source_chunk(size_t chunk)        { _source_chunk = chunk; }
+    void set_source_region(size_t region)      { _source_region = region; }
    void set_deferred_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; }
    void set_partial_obj_addr(HeapWord* addr)  { _partial_obj_addr = addr; }
    void set_partial_obj_size(size_t words)    {
-      _partial_obj_size = (chunk_sz_t) words;
+      _partial_obj_size = (region_sz_t) words;
    }

    inline void set_destination_count(uint count);
@@ -184,101 +177,35 @@ public:
    inline bool claim();

  private:
-    // The type used to represent object sizes within a chunk.
-    typedef uint chunk_sz_t;
+    // The type used to represent object sizes within a region.
+    typedef uint region_sz_t;

    // Constants for manipulating the _dc_and_los field, which holds both the
    // destination count and live obj size.  The live obj size lives at the
    // least significant end so no masking is necessary when adding.
-    static const chunk_sz_t dc_shift;           // Shift amount.
-    static const chunk_sz_t dc_mask;            // Mask for destination count.
-    static const chunk_sz_t dc_one;             // 1, shifted appropriately.
-    static const chunk_sz_t dc_claimed;         // Chunk has been claimed.
-    static const chunk_sz_t dc_completed;       // Chunk has been completed.
-    static const chunk_sz_t los_mask;           // Mask for live obj size.
-
-    HeapWord*           _destination;
-    size_t              _source_chunk;
-    HeapWord*           _partial_obj_addr;
-    chunk_sz_t          _partial_obj_size;
-    chunk_sz_t volatile _dc_and_los;
+    static const region_sz_t dc_shift;           // Shift amount.
+    static const region_sz_t dc_mask;            // Mask for destination count.
+    static const region_sz_t dc_one;             // 1, shifted appropriately.
+    static const region_sz_t dc_claimed;         // Region has been claimed.
+    static const region_sz_t dc_completed;       // Region has been completed.
+    static const region_sz_t los_mask;           // Mask for live obj size.
+
+    HeapWord*            _destination;
+    size_t               _source_region;
+    HeapWord*            _partial_obj_addr;
+    region_sz_t          _partial_obj_size;
+    region_sz_t volatile _dc_and_los;
 #ifdef ASSERT
    // These enable optimizations that are only partially implemented.  Use
    // debug builds to prevent the code fragments from breaking.
-    HeapWord*           _data_location;
-    HeapWord*           _highest_ref;
+    HeapWord*            _data_location;
+    HeapWord*            _highest_ref;
 #endif  // #ifdef ASSERT

 #ifdef ASSERT
   public:
-    uint            _pushed;    // 0 until chunk is pushed onto a worker's stack
-   private:
-#endif
-  };
-
-  // 'Blocks' allow shorter sections of the bitmap to be searched.  Each Block
-  // holds an offset, which is the amount of live data in the Chunk to the left
-  // of the first live object in the Block.  This amount of live data will
-  // include any object extending into the block. The first block in
-  // a chunk does not include any partial object extending into the
-  // the chunk.
-  //
-  // The offset also encodes the
-  // 'parity' of the first 1 bit in the Block:  a positive offset means the
-  // first 1 bit marks the start of an object, a negative offset means the first
-  // 1 bit marks the end of an object.
-  class BlockData
-  {
-   public:
-    typedef short int blk_ofs_t;
-
-    blk_ofs_t offset() const { return _offset >= 0 ? _offset : -_offset; }
-    blk_ofs_t raw_offset() const { return _offset; }
-    void set_first_is_start_bit(bool v) { _first_is_start_bit = v; }
-
-#if 0
-    // The need for this method was anticipated but it is
-    // never actually used.  Do not include it for now.  If
-    // it is needed, consider the problem of what is passed
-    // as "v".  To avoid warning errors the method set_start_bit_offset()
-    // was changed to take a size_t as the parameter and to do the
-    // check for the possible overflow.  Doing the cast in these
-    // methods better limits the potential problems because of
-    // the size of the field to this class.
-    void set_raw_offset(blk_ofs_t v) { _offset = v; }
-#endif
-    void set_start_bit_offset(size_t val) {
-      assert(val >= 0, "sanity");
-      _offset = (blk_ofs_t) val;
-      assert(val == (size_t) _offset, "Value is too large");
-      _first_is_start_bit = true;
-    }
-    void set_end_bit_offset(size_t val) {
-      assert(val >= 0, "sanity");
-      _offset = (blk_ofs_t) val;
-      assert(val == (size_t) _offset, "Value is too large");
-      _offset = - _offset;
-      _first_is_start_bit = false;
-    }
-    bool first_is_start_bit() {
-      assert(_set_phase > 0, "Not initialized");
-      return _first_is_start_bit;
-    }
-    bool first_is_end_bit() {
-      assert(_set_phase > 0, "Not initialized");
-      return !_first_is_start_bit;
-    }
-
+    uint            _pushed;   // 0 until region is pushed onto a worker's stack
   private:
-    blk_ofs_t _offset;
-    // This is temporary until the mark_bitmap is separated into
-    // a start bit array and an end bit array.
-    bool      _first_is_start_bit;
-#ifdef ASSERT
-    short     _set_phase;
-    static short _cur_phase;
-   public:
-    static void set_cur_phase(short v) { _cur_phase = v; }
 #endif
  };

@@ -286,27 +213,21 @@ public:
  ParallelCompactData();
  bool initialize(MemRegion covered_region);

-  size_t chunk_count() const { return _chunk_count; }
+  size_t region_count() const { return _region_count; }

-  // Convert chunk indices to/from ChunkData pointers.
-  inline ChunkData* chunk(size_t chunk_idx) const;
-  inline size_t     chunk(const ChunkData* const chunk_ptr) const;
+  // Convert region indices to/from RegionData pointers.
+  inline RegionData* region(size_t region_idx) const;
+  inline size_t     region(const RegionData* const region_ptr) const;

-  // Returns true if the given address is contained within the chunk
-  bool chunk_contains(size_t chunk_index, HeapWord* addr);
-
-  size_t block_count() const { return _block_count; }
-  inline BlockData* block(size_t n) const;
-
-  // Returns true if the given block is in the given chunk.
-  static bool chunk_contains_block(size_t chunk_index, size_t block_index);
+  // Returns true if the given address is contained within the region
+  bool region_contains(size_t region_index, HeapWord* addr);

  void add_obj(HeapWord* addr, size_t len);
  void add_obj(oop p, size_t len) { add_obj((HeapWord*)p, len); }

-  // Fill in the chunks covering [beg, end) so that no data moves; i.e., the
-  // destination of chunk n is simply the start of chunk n.  The argument beg
-  // must be chunk-aligned; end need not be.
+  // Fill in the regions covering [beg, end) so that no data moves; i.e., the
+  // destination of region n is simply the start of region n.  The argument beg
+  // must be region-aligned; end need not be.
  void summarize_dense_prefix(HeapWord* beg, HeapWord* end);

  bool summarize(HeapWord* target_beg, HeapWord* target_end,
@@ -314,48 +235,33 @@ public:
                 HeapWord** target_next, HeapWord** source_next = 0);

  void clear();
-  void clear_range(size_t beg_chunk, size_t end_chunk);
+  void clear_range(size_t beg_region, size_t end_region);
  void clear_range(HeapWord* beg, HeapWord* end) {
-    clear_range(addr_to_chunk_idx(beg), addr_to_chunk_idx(end));
+    clear_range(addr_to_region_idx(beg), addr_to_region_idx(end));
  }

-  // Return the number of words between addr and the start of the chunk
+  // Return the number of words between addr and the start of the region
  // containing addr.
-  inline size_t     chunk_offset(const HeapWord* addr) const;
-
-  // Convert addresses to/from a chunk index or chunk pointer.
-  inline size_t     addr_to_chunk_idx(const HeapWord* addr) const;
-  inline ChunkData* addr_to_chunk_ptr(const HeapWord* addr) const;
-  inline HeapWord*  chunk_to_addr(size_t chunk) const;
-  inline HeapWord*  chunk_to_addr(size_t chunk, size_t offset) const;
-  inline HeapWord*  chunk_to_addr(const ChunkData* chunk) const;
-
-  inline HeapWord*  chunk_align_down(HeapWord* addr) const;
-  inline HeapWord*  chunk_align_up(HeapWord* addr) const;
-  inline bool       is_chunk_aligned(HeapWord* addr) const;
-
-  // Analogous to chunk_offset() for blocks.
-  size_t     block_offset(const HeapWord* addr) const;
-  size_t     addr_to_block_idx(const HeapWord* addr) const;
-  size_t     addr_to_block_idx(const oop obj) const {
-    return addr_to_block_idx((HeapWord*) obj);
-  }
-  inline BlockData* addr_to_block_ptr(const HeapWord* addr) const;
-  inline HeapWord*  block_to_addr(size_t block) const;
+  inline size_t     region_offset(const HeapWord* addr) const;
+
+  // Convert addresses to/from a region index or region pointer.
+  inline size_t     addr_to_region_idx(const HeapWord* addr) const;
+  inline RegionData* addr_to_region_ptr(const HeapWord* addr) const;
+  inline HeapWord*  region_to_addr(size_t region) const;
+  inline HeapWord*  region_to_addr(size_t region, size_t offset) const;
+  inline HeapWord*  region_to_addr(const RegionData* region) const;
+
+  inline HeapWord*  region_align_down(HeapWord* addr) const;
+  inline HeapWord*  region_align_up(HeapWord* addr) const;
+  inline bool       is_region_aligned(HeapWord* addr) const;

  // Return the address one past the end of the partial object.
-  HeapWord* partial_obj_end(size_t chunk_idx) const;
+  HeapWord* partial_obj_end(size_t region_idx) const;

  // Return the new location of the object p after the
  // the compaction.
  HeapWord* calc_new_pointer(HeapWord* addr);

-  // Same as calc_new_pointer() using blocks.
-  HeapWord* block_calc_new_pointer(HeapWord* addr);
-
-  // Same as calc_new_pointer() using chunks.
-  HeapWord* chunk_calc_new_pointer(HeapWord* addr);
-
  HeapWord* calc_new_pointer(oop p) {
    return calc_new_pointer((HeapWord*) p);
  }
@@ -363,22 +269,13 @@ public:
  // Return the updated address for the given klass
  klassOop calc_new_klass(klassOop);

-  // Given a block returns true if the partial object for the
-  // corresponding chunk ends in the block.  Returns false, otherwise
-  // If there is no partial object, returns false.
-  bool partial_obj_ends_in_block(size_t block_index);
-
-  // Returns the block index for the block
-  static size_t block_idx(BlockData* block);
-
 #ifdef  ASSERT
  void verify_clear(const PSVirtualSpace* vspace);
  void verify_clear();
 #endif  // #ifdef ASSERT

 private:
-  bool initialize_block_data(size_t region_size);
-  bool initialize_chunk_data(size_t region_size);
+  bool initialize_region_data(size_t region_size);
  PSVirtualSpace* create_vspace(size_t count, size_t element_size);

 private:
@@ -387,74 +284,70 @@ private:
  HeapWord*       _region_end;
 #endif  // #ifdef ASSERT

-  PSVirtualSpace* _chunk_vspace;
-  ChunkData*      _chunk_data;
-  size_t          _chunk_count;
-
-  PSVirtualSpace* _block_vspace;
-  BlockData*      _block_data;
-  size_t          _block_count;
+  PSVirtualSpace* _region_vspace;
+  RegionData*     _region_data;
+  size_t          _region_count;
 };

 inline uint
-ParallelCompactData::ChunkData::destination_count_raw() const
+ParallelCompactData::RegionData::destination_count_raw() const
 {
  return _dc_and_los & dc_mask;
 }

 inline uint
-ParallelCompactData::ChunkData::destination_count() const
+ParallelCompactData::RegionData::destination_count() const
 {
  return destination_count_raw() >> dc_shift;
 }

 inline void
-ParallelCompactData::ChunkData::set_destination_count(uint count)
+ParallelCompactData::RegionData::set_destination_count(uint count)
 {
  assert(count <= (dc_completed >> dc_shift), "count too large");
-  const chunk_sz_t live_sz = (chunk_sz_t) live_obj_size();
+  const region_sz_t live_sz = (region_sz_t) live_obj_size();
  _dc_and_los = (count << dc_shift) | live_sz;
 }

-inline void ParallelCompactData::ChunkData::set_live_obj_size(size_t words)
+inline void ParallelCompactData::RegionData::set_live_obj_size(size_t words)
 {
  assert(words <= los_mask, "would overflow");
-  _dc_and_los = destination_count_raw() | (chunk_sz_t)words;
+  _dc_and_los = destination_count_raw() | (region_sz_t)words;
 }

-inline void ParallelCompactData::ChunkData::decrement_destination_count()
+inline void ParallelCompactData::RegionData::decrement_destination_count()
 {
  assert(_dc_and_los < dc_claimed, "already claimed");
  assert(_dc_and_los >= dc_one, "count would go negative");
  Atomic::add((int)dc_mask, (volatile int*)&_dc_and_los);
 }

-inline HeapWord* ParallelCompactData::ChunkData::data_location() const
+inline HeapWord* ParallelCompactData::RegionData::data_location() const
 {
  DEBUG_ONLY(return _data_location;)
  NOT_DEBUG(return NULL;)
 }

-inline HeapWord* ParallelCompactData::ChunkData::highest_ref() const
+inline HeapWord* ParallelCompactData::RegionData::highest_ref() const
 {
  DEBUG_ONLY(return _highest_ref;)
  NOT_DEBUG(return NULL;)
 }

-inline void ParallelCompactData::ChunkData::set_data_location(HeapWord* addr)
+inline void ParallelCompactData::RegionData::set_data_location(HeapWord* addr)
 {
  DEBUG_ONLY(_data_location = addr;)
 }

-inline void ParallelCompactData::ChunkData::set_completed()
+inline void ParallelCompactData::RegionData::set_completed()
 {
  assert(claimed(), "must be claimed first");
-  _dc_and_los = dc_completed | (chunk_sz_t) live_obj_size();
+  _dc_and_los = dc_completed | (region_sz_t) live_obj_size();
 }

-// MT-unsafe claiming of a chunk.  Should only be used during single threaded
+// MT-unsafe claiming of a region.  Should only be used during single threaded
 // execution.
-inline bool ParallelCompactData::ChunkData::claim_unsafe()
+inline bool ParallelCompactData::RegionData::claim_unsafe()
 {
  if (available()) {
    _dc_and_los |= dc_claimed;
@@ -463,13 +356,13 @@ inline bool ParallelCompactData::ChunkData::claim_unsafe()
  return false;
 }

-inline void ParallelCompactData::ChunkData::add_live_obj(size_t words)
+inline void ParallelCompactData::RegionData::add_live_obj(size_t words)
 {
  assert(words <= (size_t)los_mask - live_obj_size(), "overflow");
  Atomic::add((int) words, (volatile int*) &_dc_and_los);
 }

-inline void ParallelCompactData::ChunkData::set_highest_ref(HeapWord* addr)
+inline void ParallelCompactData::RegionData::set_highest_ref(HeapWord* addr)
 {
 #ifdef ASSERT
  HeapWord* tmp = _highest_ref;
@@ -479,7 +372,7 @@ inline void ParallelCompactData::ChunkData::set_highest_ref(HeapWord* addr)
 #endif  // #ifdef ASSERT
 }

-inline bool ParallelCompactData::ChunkData::claim()
+inline bool ParallelCompactData::RegionData::claim()
 {
  const int los = (int) live_obj_size();
  const int old = Atomic::cmpxchg(dc_claimed | los,
@@ -487,119 +380,85 @@ inline bool ParallelCompactData::ChunkData::claim()
  return old == los;
 }

-inline ParallelCompactData::ChunkData*
-ParallelCompactData::chunk(size_t chunk_idx) const
+inline ParallelCompactData::RegionData*
+ParallelCompactData::region(size_t region_idx) const
 {
-  assert(chunk_idx <= chunk_count(), "bad arg");
-  return _chunk_data + chunk_idx;
+  assert(region_idx <= region_count(), "bad arg");
+  return _region_data + region_idx;
 }

 inline size_t
-ParallelCompactData::chunk(const ChunkData* const chunk_ptr) const
+ParallelCompactData::region(const RegionData* const region_ptr) const
 {
-  assert(chunk_ptr >= _chunk_data, "bad arg");
-  assert(chunk_ptr <= _chunk_data + chunk_count(), "bad arg");
-  return pointer_delta(chunk_ptr, _chunk_data, sizeof(ChunkData));
-}
-
-inline ParallelCompactData::BlockData*
-ParallelCompactData::block(size_t n) const {
-  assert(n < block_count(), "bad arg");
-  return _block_data + n;
+  assert(region_ptr >= _region_data, "bad arg");
+  assert(region_ptr <= _region_data + region_count(), "bad arg");
+  return pointer_delta(region_ptr, _region_data, sizeof(RegionData));
 }

 inline size_t
-ParallelCompactData::chunk_offset(const HeapWord* addr) const
+ParallelCompactData::region_offset(const HeapWord* addr) const
 {
  assert(addr >= _region_start, "bad addr");
  assert(addr <= _region_end, "bad addr");
-  return (size_t(addr) & ChunkAddrOffsetMask) >> LogHeapWordSize;
+  return (size_t(addr) & RegionAddrOffsetMask) >> LogHeapWordSize;
 }

 inline size_t
-ParallelCompactData::addr_to_chunk_idx(const HeapWord* addr) const
+ParallelCompactData::addr_to_region_idx(const HeapWord* addr) const
 {
  assert(addr >= _region_start, "bad addr");
  assert(addr <= _region_end, "bad addr");
-  return pointer_delta(addr, _region_start) >> Log2ChunkSize;
+  return pointer_delta(addr, _region_start) >> Log2RegionSize;
 }

-inline ParallelCompactData::ChunkData*
-ParallelCompactData::addr_to_chunk_ptr(const HeapWord* addr) const
+inline ParallelCompactData::RegionData*
+ParallelCompactData::addr_to_region_ptr(const HeapWord* addr) const
 {
-  return chunk(addr_to_chunk_idx(addr));
+  return region(addr_to_region_idx(addr));
 }

 inline HeapWord*
-ParallelCompactData::chunk_to_addr(size_t chunk) const
+ParallelCompactData::region_to_addr(size_t region) const
 {
-  assert(chunk <= _chunk_count, "chunk out of range");
-  return _region_start + (chunk << Log2ChunkSize);
+  assert(region <= _region_count, "region out of range");
+  return _region_start + (region << Log2RegionSize);
 }

 inline HeapWord*
-ParallelCompactData::chunk_to_addr(const ChunkData* chunk) const
+ParallelCompactData::region_to_addr(const RegionData* region) const
 {
-  return chunk_to_addr(pointer_delta(chunk, _chunk_data, sizeof(ChunkData)));
+  return region_to_addr(pointer_delta(region, _region_data,
+                                      sizeof(RegionData)));
 }

 inline HeapWord*
-ParallelCompactData::chunk_to_addr(size_t chunk, size_t offset) const
+ParallelCompactData::region_to_addr(size_t region, size_t offset) const
 {
-  assert(chunk <= _chunk_count, "chunk out of range");
-  assert(offset < ChunkSize, "offset too big");  // This may be too strict.
-  return chunk_to_addr(chunk) + offset;
+  assert(region <= _region_count, "region out of range");
+  assert(offset < RegionSize, "offset too big");  // This may be too strict.
+  return region_to_addr(region) + offset;
 }

 inline HeapWord*
-ParallelCompactData::chunk_align_down(HeapWord* addr) const
+ParallelCompactData::region_align_down(HeapWord* addr) const
 {
  assert(addr >= _region_start, "bad addr");
-  assert(addr < _region_end + ChunkSize, "bad addr");
-  return (HeapWord*)(size_t(addr) & ChunkAddrMask);
+  assert(addr < _region_end + RegionSize, "bad addr");
+  return (HeapWord*)(size_t(addr) & RegionAddrMask);
 }

 inline HeapWord*
-ParallelCompactData::chunk_align_up(HeapWord* addr) const
+ParallelCompactData::region_align_up(HeapWord* addr) const
 {
  assert(addr >= _region_start, "bad addr");
  assert(addr <= _region_end, "bad addr");
-  return chunk_align_down(addr + ChunkSizeOffsetMask);
+  return region_align_down(addr + RegionSizeOffsetMask);
 }

 inline bool
-ParallelCompactData::is_chunk_aligned(HeapWord* addr) const
-{
-  return chunk_offset(addr) == 0;
-}
-
-inline size_t
-ParallelCompactData::block_offset(const HeapWord* addr) const
-{
-  assert(addr >= _region_start, "bad addr");
-  assert(addr <= _region_end, "bad addr");
-  return pointer_delta(addr, _region_start) & BlockOffsetMask;
-}
-
-inline size_t
-ParallelCompactData::addr_to_block_idx(const HeapWord* addr) const
-{
-  assert(addr >= _region_start, "bad addr");
-  assert(addr <= _region_end, "bad addr");
-  return pointer_delta(addr, _region_start) >> Log2BlockSize;
-}
-
-inline ParallelCompactData::BlockData*
-ParallelCompactData::addr_to_block_ptr(const HeapWord* addr) const
-{
-  return block(addr_to_block_idx(addr));
-}
-
-inline HeapWord*
-ParallelCompactData::block_to_addr(size_t block) const
+ParallelCompactData::is_region_aligned(HeapWord* addr) const
 {
-  assert(block < _block_count, "block out of range");
-  return _region_start + (block << Log2BlockSize);
+  return region_offset(addr) == 0;
 }

 // Abstract closure for use with ParMarkBitMap::iterate(), which will invoke the
@@ -687,45 +546,15 @@ inline void ParMarkBitMapClosure::decrement_words_remaining(size_t words) {
  _words_remaining -= words;
 }

-// Closure for updating the block data during the summary phase.
-class BitBlockUpdateClosure: public ParMarkBitMapClosure {
-  // ParallelCompactData::BlockData::blk_ofs_t _live_data_left;
-  size_t    _live_data_left;
-  size_t    _cur_block;
-  HeapWord* _chunk_start;
-  HeapWord* _chunk_end;
-  size_t    _chunk_index;
-
- public:
-  BitBlockUpdateClosure(ParMarkBitMap* mbm,
-                        ParCompactionManager* cm,
-                        size_t chunk_index);
-
-  size_t cur_block() { return _cur_block; }
-  size_t chunk_index() { return _chunk_index; }
-  size_t live_data_left() { return _live_data_left; }
-  // Returns true the first bit in the current block (cur_block) is
-  // a start bit.
-  // Returns true if the current block is within the chunk for the closure;
-  bool chunk_contains_cur_block();
-
-  // Set the chunk index and related chunk values for
-  // a new chunk.
-  void reset_chunk(size_t chunk_index);
-
-  virtual IterationStatus do_addr(HeapWord* addr, size_t words);
-};
-
-// The UseParallelOldGC collector is a stop-the-world garbage
-// collector that does parts of the collection using parallel threads.
-// The collection includes the tenured generation and the young
-// generation.  The permanent generation is collected at the same
-// time as the other two generations but the permanent generation
-// is collect by a single GC thread.  The permanent generation is
-// collected serially because of the requirement that during the
-// processing of a klass AAA, any objects reference by AAA must
-// already have been processed.  This requirement is enforced by
-// a left (lower address) to right (higher address) sliding compaction.
+// The UseParallelOldGC collector is a stop-the-world garbage collector that
+// does parts of the collection using parallel threads.  The collection includes
+// the tenured generation and the young generation.  The permanent generation is
+// collected at the same time as the other two generations but the permanent
+// generation is collect by a single GC thread.  The permanent generation is
+// collected serially because of the requirement that during the processing of a
+// klass AAA, any objects reference by AAA must already have been processed.
+// This requirement is enforced by a left (lower address) to right (higher
+// address) sliding compaction.
 //
 // There are four phases of the collection.
 //
@@ -740,81 +569,75 @@ class BitBlockUpdateClosure: public ParMarkBitMapClosure {
 //      - move the objects to their destination
 //      - update some references and reinitialize some variables
 //
-// These three phases are invoked in PSParallelCompact::invoke_no_policy().
-// The marking phase is implemented in PSParallelCompact::marking_phase()
-// and does a complete marking of the heap.
-// The summary phase is implemented in PSParallelCompact::summary_phase().
-// The move and update phase is implemented in PSParallelCompact::compact().
+// These three phases are invoked in PSParallelCompact::invoke_no_policy().  The
+// marking phase is implemented in PSParallelCompact::marking_phase() and does a
+// complete marking of the heap.  The summary phase is implemented in
+// PSParallelCompact::summary_phase().  The move and update phase is implemented
+// in PSParallelCompact::compact().
 //
-// A space that is being collected is divided into chunks and with
-// each chunk is associated an object of type ParallelCompactData.
-// Each chunk is of a fixed size and typically will contain more than
-// 1 object and may have parts of objects at the front and back of the
-// chunk.
+// A space that is being collected is divided into regions and with each region
+// is associated an object of type ParallelCompactData.  Each region is of a
+// fixed size and typically will contain more than 1 object and may have parts
+// of objects at the front and back of the region.
 //
-// chunk            -----+---------------------+----------
+// region            -----+---------------------+----------
 // objects covered   [ AAA  )[ BBB )[ CCC   )[ DDD     )
 //
-// The marking phase does a complete marking of all live objects in the
-// heap.  The marking also compiles the size of the data for
-// all live objects covered by the chunk.  This size includes the
-// part of any live object spanning onto the chunk (part of AAA
-// if it is live) from the front, all live objects contained in the chunk
-// (BBB and/or CCC if they are live), and the part of any live objects
-// covered by the chunk that extends off the chunk (part of DDD if it is
-// live).  The marking phase uses multiple GC threads and marking is
-// done in a bit array of type ParMarkBitMap.  The marking of the
-// bit map is done atomically as is the accumulation of the size of the
-// live objects covered by a chunk.
+// The marking phase does a complete marking of all live objects in the heap.
+// The marking also compiles the size of the data for all live objects covered
+// by the region.  This size includes the part of any live object spanning onto
+// the region (part of AAA if it is live) from the front, all live objects
+// contained in the region (BBB and/or CCC if they are live), and the part of
+// any live objects covered by the region that extends off the region (part of
+// DDD if it is live).  The marking phase uses multiple GC threads and marking
+// is done in a bit array of type ParMarkBitMap.  The marking of the bit map is
+// done atomically as is the accumulation of the size of the live objects
+// covered by a region.
 //
-// The summary phase calculates the total live data to the left of
-// each chunk XXX.  Based on that total and the bottom of the space,
-// it can calculate the starting location of the live data in XXX.
-// The summary phase calculates for each chunk XXX quantites such as
+// The summary phase calculates the total live data to the left of each region
+// XXX.  Based on that total and the bottom of the space, it can calculate the
+// starting location of the live data in XXX.  The summary phase calculates for
+// each region XXX quantites such as
 //
-//      - the amount of live data at the beginning of a chunk from an object
-//      entering the chunk.
-//      - the location of the first live data on the chunk
-//      - a count of the number of chunks receiving live data from XXX.
+//      - the amount of live data at the beginning of a region from an object
+//        entering the region.
+//      - the location of the first live data on the region
+//      - a count of the number of regions receiving live data from XXX.
 //
 // See ParallelCompactData for precise details.  The summary phase also
-// calculates the dense prefix for the compaction.  The dense prefix
-// is a portion at the beginning of the space that is not moved.  The
-// objects in the dense prefix do need to have their object references
-// updated.  See method summarize_dense_prefix().
+// calculates the dense prefix for the compaction.  The dense prefix is a
+// portion at the beginning of the space that is not moved.  The objects in the
+// dense prefix do need to have their object references updated.  See method
+// summarize_dense_prefix().
 //
 // The summary phase is done using 1 GC thread.
 //
-// The compaction phase moves objects to their new location and updates
-// all references in the object.
-//
-// A current exception is that objects that cross a chunk boundary
-// are moved but do not have their references updated.  References are
-// not updated because it cannot easily be determined if the klass
-// pointer KKK for the object AAA has been updated.  KKK likely resides
-// in a chunk to the left of the chunk containing AAA.  These AAA's
-// have there references updated at the end in a clean up phase.
-// See the method PSParallelCompact::update_deferred_objects().  An
-// alternate strategy is being investigated for this deferral of updating.
+// The compaction phase moves objects to their new location and updates all
+// references in the object.
 //
-// Compaction is done on a chunk basis.  A chunk that is ready to be
-// filled is put on a ready list and GC threads take chunk off the list
-// and fill them.  A chunk is ready to be filled if it
-// empty of live objects.  Such a chunk may have been initially
-// empty (only contained
-// dead objects) or may have had all its live objects copied out already.
-// A chunk that compacts into itself is also ready for filling.  The
-// ready list is initially filled with empty chunks and chunks compacting
-// into themselves.  There is always at least 1 chunk that can be put on
-// the ready list.  The chunks are atomically added and removed from
-// the ready list.
+// A current exception is that objects that cross a region boundary are moved
+// but do not have their references updated.  References are not updated because
+// it cannot easily be determined if the klass pointer KKK for the object AAA
+// has been updated.  KKK likely resides in a region to the left of the region
+// containing AAA.  These AAA's have there references updated at the end in a
+// clean up phase.  See the method PSParallelCompact::update_deferred_objects().
+// An alternate strategy is being investigated for this deferral of updating.
 //
+// Compaction is done on a region basis.  A region that is ready to be filled is
+// put on a ready list and GC threads take region off the list and fill them.  A
+// region is ready to be filled if it empty of live objects.  Such a region may
+// have been initially empty (only contained dead objects) or may have had all
+// its live objects copied out already.  A region that compacts into itself is
+// also ready for filling.  The ready list is initially filled with empty
+// regions and regions compacting into themselves.  There is always at least 1
+// region that can be put on the ready list.  The regions are atomically added
+// and removed from the ready list.
+
 class PSParallelCompact : AllStatic {
 public:
  // Convenient access to type names.
  typedef ParMarkBitMap::idx_t idx_t;
-  typedef ParallelCompactData::ChunkData ChunkData;
-  typedef ParallelCompactData::BlockData BlockData;
+  typedef ParallelCompactData::RegionData RegionData;

  typedef enum {
    perm_space_id, old_space_id, eden_space_id,
@@ -977,26 +800,26 @@ class PSParallelCompact : AllStatic {
  // not reclaimed).
  static double dead_wood_limiter(double density, size_t min_percent);

-  // Find the first (left-most) chunk in the range [beg, end) that has at least
+  // Find the first (left-most) region in the range [beg, end) that has at least
  // dead_words of dead space to the left.  The argument beg must be the first
-  // chunk in the space that is not completely live.
-  static ChunkData* dead_wood_limit_chunk(const ChunkData* beg,
-                                          const ChunkData* end,
-                                          size_t dead_words);
+  // region in the space that is not completely live.
+  static RegionData* dead_wood_limit_region(const RegionData* beg,
+                                            const RegionData* end,
+                                            size_t dead_words);

-  // Return a pointer to the first chunk in the range [beg, end) that is not
+  // Return a pointer to the first region in the range [beg, end) that is not
  // completely full.
-  static ChunkData* first_dead_space_chunk(const ChunkData* beg,
-                                           const ChunkData* end);
+  static RegionData* first_dead_space_region(const RegionData* beg,
+                                             const RegionData* end);

  // Return a value indicating the benefit or 'yield' if the compacted region
  // were to start (or equivalently if the dense prefix were to end) at the
-  // candidate chunk.  Higher values are better.
+  // candidate region.  Higher values are better.
  //
  // The value is based on the amount of space reclaimed vs. the costs of (a)
  // updating references in the dense prefix plus (b) copying objects and
  // updating references in the compacted region.
-  static inline double reclaimed_ratio(const ChunkData* const candidate,
+  static inline double reclaimed_ratio(const RegionData* const candidate,
                                       HeapWord* const bottom,
                                       HeapWord* const top,
                                       HeapWord* const new_top);
@@ -1005,9 +828,9 @@ class PSParallelCompact : AllStatic {
  static HeapWord* compute_dense_prefix(const SpaceId id,
                                        bool maximum_compaction);

-  // Return true if dead space crosses onto the specified Chunk; bit must be the
-  // bit index corresponding to the first word of the Chunk.
-  static inline bool dead_space_crosses_boundary(const ChunkData* chunk,
+  // Return true if dead space crosses onto the specified Region; bit must be
+  // the bit index corresponding to the first word of the Region.
+  static inline bool dead_space_crosses_boundary(const RegionData* region,
                                                 idx_t bit);

  // Summary phase utility routine to fill dead space (if any) at the dense
@@ -1019,12 +842,6 @@ class PSParallelCompact : AllStatic {
  static void summarize_space(SpaceId id, bool maximum_compaction);
  static void summary_phase(ParCompactionManager* cm, bool maximum_compaction);

-  static bool block_first_offset(size_t block_index, idx_t* block_offset_ptr);
-
-  // Fill in the BlockData
-  static void summarize_blocks(ParCompactionManager* cm,
-                               SpaceId first_compaction_space_id);
-
  // The space that is compacted after space_id.
  static SpaceId next_compaction_space_id(SpaceId space_id);

@@ -1038,16 +855,16 @@ class PSParallelCompact : AllStatic {
  static void compact_perm(ParCompactionManager* cm);
  static void compact();

-  // Add available chunks to the stack and draining tasks to the task queue.
-  static void enqueue_chunk_draining_tasks(GCTaskQueue* q,
-                                           uint parallel_gc_threads);
+  // Add available regions to the stack and draining tasks to the task queue.
+  static void enqueue_region_draining_tasks(GCTaskQueue* q,
+                                            uint parallel_gc_threads);

  // Add dense prefix update tasks to the task queue.
  static void enqueue_dense_prefix_tasks(GCTaskQueue* q,
                                         uint parallel_gc_threads);

-  // Add chunk stealing tasks to the task queue.
-  static void enqueue_chunk_stealing_tasks(
+  // Add region stealing tasks to the task queue.
+  static void enqueue_region_stealing_tasks(
                                       GCTaskQueue* q,
                                       ParallelTaskTerminator* terminator_ptr,
                                       uint parallel_gc_threads);
@@ -1154,56 +971,56 @@ class PSParallelCompact : AllStatic {
  // Move and update the live objects in the specified space.
  static void move_and_update(ParCompactionManager* cm, SpaceId space_id);

-  // Process the end of the given chunk range in the dense prefix.
+  // Process the end of the given region range in the dense prefix.
  // This includes saving any object not updated.
-  static void dense_prefix_chunks_epilogue(ParCompactionManager* cm,
-                                           size_t chunk_start_index,
-                                           size_t chunk_end_index,
-                                           idx_t exiting_object_offset,
-                                           idx_t chunk_offset_start,
-                                           idx_t chunk_offset_end);
-
-  // Update a chunk in the dense prefix.  For each live object
-  // in the chunk, update it's interior references.  For each
+  static void dense_prefix_regions_epilogue(ParCompactionManager* cm,
+                                            size_t region_start_index,
+                                            size_t region_end_index,
+                                            idx_t exiting_object_offset,
+                                            idx_t region_offset_start,
+                                            idx_t region_offset_end);
+
+  // Update a region in the dense prefix.  For each live object
+  // in the region, update it's interior references.  For each
  // dead object, fill it with deadwood. Dead space at the end
-  // of a chunk range will be filled to the start of the next
-  // live object regardless of the chunk_index_end.  None of the
+  // of a region range will be filled to the start of the next
+  // live object regardless of the region_index_end.  None of the
  // objects in the dense prefix move and dead space is dead
  // (holds only dead objects that don't need any processing), so
  // dead space can be filled in any order.
  static void update_and_deadwood_in_dense_prefix(ParCompactionManager* cm,
                                                  SpaceId space_id,
-                                                  size_t chunk_index_start,
-                                                  size_t chunk_index_end);
+                                                  size_t region_index_start,
+                                                  size_t region_index_end);

  // Return the address of the count + 1st live word in the range [beg, end).
  static HeapWord* skip_live_words(HeapWord* beg, HeapWord* end, size_t count);

  // Return the address of the word to be copied to dest_addr, which must be
-  // aligned to a chunk boundary.
+  // aligned to a region boundary.
  static HeapWord* first_src_addr(HeapWord* const dest_addr,
-                                  size_t src_chunk_idx);
+                                  size_t src_region_idx);

-  // Determine the next source chunk, set closure.source() to the start of the
-  // new chunk return the chunk index.  Parameter end_addr is the address one
+  // Determine the next source region, set closure.source() to the start of the
+  // new region return the region index.  Parameter end_addr is the address one
  // beyond the end of source range just processed.  If necessary, switch to a
  // new source space and set src_space_id (in-out parameter) and src_space_top
  // (out parameter) accordingly.
-  static size_t next_src_chunk(MoveAndUpdateClosure& closure,
-                               SpaceId& src_space_id,
-                               HeapWord*& src_space_top,
-                               HeapWord* end_addr);
+  static size_t next_src_region(MoveAndUpdateClosure& closure,
+                                SpaceId& src_space_id,
+                                HeapWord*& src_space_top,
+                                HeapWord* end_addr);

-  // Decrement the destination count for each non-empty source chunk in the
-  // range [beg_chunk, chunk(chunk_align_up(end_addr))).
+  // Decrement the destination count for each non-empty source region in the
+  // range [beg_region, region(region_align_up(end_addr))).
  static void decrement_destination_counts(ParCompactionManager* cm,
-                                           size_t beg_chunk,
+                                           size_t beg_region,
                                           HeapWord* end_addr);

-  // Fill a chunk, copying objects from one or more source chunks.
-  static void fill_chunk(ParCompactionManager* cm, size_t chunk_idx);
-  static void fill_and_update_chunk(ParCompactionManager* cm, size_t chunk) {
-    fill_chunk(cm, chunk);
+  // Fill a region, copying objects from one or more source regions.
+  static void fill_region(ParCompactionManager* cm, size_t region_idx);
+  static void fill_and_update_region(ParCompactionManager* cm, size_t region) {
+    fill_region(cm, region);
  }

  // Update the deferred objects in the space.
@@ -1259,7 +1076,7 @@ class PSParallelCompact : AllStatic {
 #ifndef PRODUCT
  // Debugging support.
  static const char* space_names[last_space_id];
-  static void print_chunk_ranges();
+  static void print_region_ranges();
  static void print_dense_prefix_stats(const char* const algorithm,
                                       const SpaceId id,
                                       const bool maximum_compaction,
@@ -1267,7 +1084,7 @@ class PSParallelCompact : AllStatic {
 #endif  // #ifndef PRODUCT

 #ifdef  ASSERT
-  // Verify that all the chunks have been emptied.
+  // Verify that all the regions have been emptied.
  static void verify_complete(SpaceId space_id);
 #endif  // #ifdef ASSERT
 };
@@ -1376,17 +1193,17 @@ inline double PSParallelCompact::normal_distribution(double density) {
 }

 inline bool
-PSParallelCompact::dead_space_crosses_boundary(const ChunkData* chunk,
+PSParallelCompact::dead_space_crosses_boundary(const RegionData* region,
                                               idx_t bit)
 {
-  assert(bit > 0, "cannot call this for the first bit/chunk");
-  assert(_summary_data.chunk_to_addr(chunk) == _mark_bitmap.bit_to_addr(bit),
+  assert(bit > 0, "cannot call this for the first bit/region");
+  assert(_summary_data.region_to_addr(region) == _mark_bitmap.bit_to_addr(bit),
         "sanity check");

  // Dead space crosses the boundary if (1) a partial object does not extend
-  // onto the chunk, (2) an object does not start at the beginning of the chunk,
-  // and (3) an object does not end at the end of the prior chunk.
-  return chunk->partial_obj_size() == 0 &&
+  // onto the region, (2) an object does not start at the beginning of the
+  // region, and (3) an object does not end at the end of the prior region.
+  return region->partial_obj_size() == 0 &&
    !_mark_bitmap.is_obj_beg(bit) &&
    !_mark_bitmap.is_obj_end(bit - 1);
 }

--- a/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp
@@ -123,8 +123,6 @@ void PSPermGen::move_and_update(ParCompactionManager* cm) {

 void PSPermGen::precompact() {
  // Reset start array first.
-  debug_only(if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {)
  _start_array.reset();
-  debug_only(})
  object_mark_sweep()->precompact();
 }
--- a/src/share/vm/gc_implementation/shared/immutableSpace.hpp
+++ b/src/share/vm/gc_implementation/shared/immutableSpace.hpp
@@ -50,7 +50,8 @@ class ImmutableSpace: public CHeapObj {
  size_t capacity_in_bytes() const            { return capacity_in_words() * HeapWordSize; }

  // Size computations.  Sizes are in heapwords.
-  size_t capacity_in_words() const            { return pointer_delta(end(), bottom()); }
+  size_t capacity_in_words() const                { return pointer_delta(end(), bottom()); }
+  virtual size_t capacity_in_words(Thread*) const { return capacity_in_words(); }

  // Iteration.
  virtual void oop_iterate(OopClosure* cl);

--- a/src/share/vm/gc_implementation/shared/markSweep.inline.hpp
+++ b/src/share/vm/gc_implementation/shared/markSweep.inline.hpp
@@ -23,13 +23,6 @@
 */

 inline void MarkSweep::mark_object(oop obj) {
-#ifndef SERIALGC
-  if (UseParallelOldGC && VerifyParallelOldWithMarkSweep) {
-    assert(PSParallelCompact::mark_bitmap()->is_marked(obj),
-           "Should be marked in the marking bitmap");
-  }
-#endif // SERIALGC
-
  // some marks may contain information we need to preserve so we store them away
  // and overwrite the mark.  We'll restore it at the end of markSweep.
  markOop mark = obj->mark();

--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp
@@ -181,6 +181,25 @@ size_t MutableNUMASpace::unsafe_max_tlab_alloc(Thread *thr) const {
  return lgrp_spaces()->at(i)->space()->free_in_bytes();
 }

+
+size_t MutableNUMASpace::capacity_in_words(Thread* thr) const {
+  guarantee(thr != NULL, "No thread");
+  int lgrp_id = thr->lgrp_id();
+  if (lgrp_id == -1) {
+    if (lgrp_spaces()->length() > 0) {
+      return capacity_in_words() / lgrp_spaces()->length();
+    } else {
+      assert(false, "There should be at least one locality group");
+      return 0;
+    }
+  }
+  int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
+  if (i == -1) {
+    return 0;
+  }
+  return lgrp_spaces()->at(i)->space()->capacity_in_words();
+}
+
 // Check if the NUMA topology has changed. Add and remove spaces if needed.
 // The update can be forced by setting the force parameter equal to true.
 bool MutableNUMASpace::update_layout(bool force) {
@@ -722,7 +741,8 @@ HeapWord* MutableNUMASpace::allocate(size_t size) {
    i = os::random() % lgrp_spaces()->length();
  }

-  MutableSpace *s = lgrp_spaces()->at(i)->space();
+  LGRPSpace* ls = lgrp_spaces()->at(i);
+  MutableSpace *s = ls->space();
  HeapWord *p = s->allocate(size);

  if (p != NULL) {
@@ -743,6 +763,9 @@ HeapWord* MutableNUMASpace::allocate(size_t size) {
      *(int*)i = 0;
    }
  }
+  if (p == NULL) {
+    ls->set_allocation_failed();
+  }
  return p;
 }

@@ -761,7 +784,8 @@ HeapWord* MutableNUMASpace::cas_allocate(size_t size) {
  if (i == -1) {
    i = os::random() % lgrp_spaces()->length();
  }
-  MutableSpace *s = lgrp_spaces()->at(i)->space();
+  LGRPSpace *ls = lgrp_spaces()->at(i);
+  MutableSpace *s = ls->space();
  HeapWord *p = s->cas_allocate(size);
  if (p != NULL) {
    size_t remainder = pointer_delta(s->end(), p + size);
@@ -790,6 +814,9 @@ HeapWord* MutableNUMASpace::cas_allocate(size_t size) {
      *(int*)i = 0;
    }
  }
+  if (p == NULL) {
+    ls->set_allocation_failed();
+  }
  return p;
 }


--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp
@@ -60,6 +60,7 @@ class MutableNUMASpace : public MutableSpace {
    MutableSpace* _space;
    MemRegion _invalid_region;
    AdaptiveWeightedAverage *_alloc_rate;
+    bool _allocation_failed;

    struct SpaceStats {
      size_t _local_space, _remote_space, _unbiased_space, _uncommited_space;
@@ -81,7 +82,7 @@ class MutableNUMASpace : public MutableSpace {
    char* last_page_scanned()            { return _last_page_scanned; }
    void set_last_page_scanned(char* p)  { _last_page_scanned = p;    }
   public:
-    LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL) {
+    LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) {
      _space = new MutableSpace();
      _alloc_rate = new AdaptiveWeightedAverage(NUMAChunkResizeWeight);
    }
@@ -103,8 +104,21 @@ class MutableNUMASpace : public MutableSpace {
      return *(int*)lgrp_id_value == p->lgrp_id();
    }

+    // Report a failed allocation.
+    void set_allocation_failed() { _allocation_failed = true;  }
+
    void sample() {
-      alloc_rate()->sample(space()->used_in_bytes());
+      // If there was a failed allocation make allocation rate equal
+      // to the size of the whole chunk. This ensures the progress of
+      // the adaptation process.
+      size_t alloc_rate_sample;
+      if (_allocation_failed) {
+        alloc_rate_sample = space()->capacity_in_bytes();
+        _allocation_failed = false;
+      } else {
+        alloc_rate_sample = space()->used_in_bytes();
+      }
+      alloc_rate()->sample(alloc_rate_sample);
    }

    MemRegion invalid_region() const                { return _invalid_region;      }
@@ -190,6 +204,9 @@ class MutableNUMASpace : public MutableSpace {
  virtual void ensure_parsability();
  virtual size_t used_in_words() const;
  virtual size_t free_in_words() const;
+
+  using MutableSpace::capacity_in_words;
+  virtual size_t capacity_in_words(Thread* thr) const;
  virtual size_t tlab_capacity(Thread* thr) const;
  virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;


--- a/src/share/vm/includeDB_compiler2
+++ b/src/share/vm/includeDB_compiler2
@@ -586,6 +586,7 @@ locknode.hpp                            subnode.hpp
 loopTransform.cpp                       addnode.hpp
 loopTransform.cpp                       allocation.inline.hpp
 loopTransform.cpp                       connode.hpp
+loopTransform.cpp                       compileLog.hpp
 loopTransform.cpp                       divnode.hpp
 loopTransform.cpp                       loopnode.hpp
 loopTransform.cpp                       mulnode.hpp
@@ -601,6 +602,7 @@ loopnode.cpp                            addnode.hpp
 loopnode.cpp                            allocation.inline.hpp
 loopnode.cpp                            callnode.hpp
 loopnode.cpp                            ciMethodData.hpp
+loopnode.cpp                            compileLog.hpp
 loopnode.cpp                            connode.hpp
 loopnode.cpp                            divnode.hpp
 loopnode.cpp                            loopnode.hpp

--- a/src/share/vm/opto/bytecodeInfo.cpp
+++ b/src/share/vm/opto/bytecodeInfo.cpp
@@ -25,19 +25,6 @@
 #include "incls/_precompiled.incl"
 #include "incls/_bytecodeInfo.cpp.incl"

-// These variables are declared in parse1.cpp
-extern int  explicit_null_checks_inserted;
-extern int  explicit_null_checks_elided;
-extern int  explicit_null_checks_inserted_old;
-extern int  explicit_null_checks_elided_old;
-extern int  nodes_created_old;
-extern int  nodes_created;
-extern int  methods_parsed_old;
-extern int  methods_parsed;
-extern int  methods_seen;
-extern int  methods_seen_old;
-
-
 //=============================================================================
 //------------------------------InlineTree-------------------------------------
 InlineTree::InlineTree( Compile* c, const InlineTree *caller_tree, ciMethod* callee, JVMState* caller_jvms, int caller_bci, float site_invoke_ratio )
@@ -517,27 +504,3 @@ InlineTree* InlineTree::find_subtree_from_root(InlineTree* root, JVMState* jvms,
  }
  return iltp;
 }
-
-// ----------------------------------------------------------------------------
-#ifndef PRODUCT
-
-static void per_method_stats() {
-  // Compute difference between this method's cumulative totals and old totals
-  int explicit_null_checks_cur = explicit_null_checks_inserted - explicit_null_checks_inserted_old;
-  int elided_null_checks_cur = explicit_null_checks_elided - explicit_null_checks_elided_old;
-
-  // Print differences
-  if( explicit_null_checks_cur )
-    tty->print_cr("XXX Explicit NULL checks inserted: %d", explicit_null_checks_cur);
-  if( elided_null_checks_cur )
-    tty->print_cr("XXX Explicit NULL checks removed at parse time: %d", elided_null_checks_cur);
-
-  // Store the current cumulative totals
-  nodes_created_old = nodes_created;
-  methods_parsed_old = methods_parsed;
-  methods_seen_old = methods_seen;
-  explicit_null_checks_inserted_old = explicit_null_checks_inserted;
-  explicit_null_checks_elided_old = explicit_null_checks_elided;
-}
-
-#endif
--- a/src/share/vm/opto/callnode.cpp
+++ b/src/share/vm/opto/callnode.cpp
@@ -1034,6 +1034,39 @@ AllocateNode::AllocateNode(Compile* C, const TypeFunc *atype,
 //=============================================================================
 uint AllocateArrayNode::size_of() const { return sizeof(*this); }

+// Retrieve the length from the AllocateArrayNode. Narrow the type with a
+// CastII, if appropriate.  If we are not allowed to create new nodes, and
+// a CastII is appropriate, return NULL.
+Node *AllocateArrayNode::make_ideal_length(const TypeOopPtr* oop_type, PhaseTransform *phase, bool allow_new_nodes) {
+  Node *length = in(AllocateNode::ALength);
+  assert(length != NULL, "length is not null");
+
+  const TypeInt* length_type = phase->find_int_type(length);
+  const TypeAryPtr* ary_type = oop_type->isa_aryptr();
+
+  if (ary_type != NULL && length_type != NULL) {
+    const TypeInt* narrow_length_type = ary_type->narrow_size_type(length_type);
+    if (narrow_length_type != length_type) {
+      // Assert one of:
+      //   - the narrow_length is 0
+      //   - the narrow_length is not wider than length
+      assert(narrow_length_type == TypeInt::ZERO ||
+             (narrow_length_type->_hi <= length_type->_hi &&
+              narrow_length_type->_lo >= length_type->_lo),
+             "narrow type must be narrower than length type");
+
+      // Return NULL if new nodes are not allowed
+      if (!allow_new_nodes) return NULL;
+      // Create a cast which is control dependent on the initialization to
+      // propagate the fact that the array length must be positive.
+      length = new (phase->C, 2) CastIINode(length, narrow_length_type);
+      length->set_req(0, initialization()->proj_out(0));
+    }
+  }
+
+  return length;
+}
+
 //=============================================================================
 uint LockNode::size_of() const { return sizeof(*this); }


--- a/src/share/vm/opto/callnode.hpp
+++ b/src/share/vm/opto/callnode.hpp
@@ -755,6 +755,15 @@ public:
  virtual int Opcode() const;
  virtual uint size_of() const; // Size is bigger

+  // Dig the length operand out of a array allocation site.
+  Node* Ideal_length() {
+    return in(AllocateNode::ALength);
+  }
+
+  // Dig the length operand out of a array allocation site and narrow the
+  // type with a CastII, if necesssary
+  Node* make_ideal_length(const TypeOopPtr* ary_type, PhaseTransform *phase, bool can_create = true);
+
  // Pattern-match a possible usage of AllocateArrayNode.
  // Return null if no allocation is recognized.
  static AllocateArrayNode* Ideal_array_allocation(Node* ptr, PhaseTransform* phase) {
@@ -762,12 +771,6 @@ public:
    return (allo == NULL || !allo->is_AllocateArray())
           ? NULL : allo->as_AllocateArray();
  }
-
-  // Dig the length operand out of a (possible) array allocation site.
-  static Node* Ideal_length(Node* ptr, PhaseTransform* phase) {
-    AllocateArrayNode* allo = Ideal_array_allocation(ptr, phase);
-    return (allo == NULL) ? NULL : allo->in(AllocateNode::ALength);
-  }
 };

 //------------------------------AbstractLockNode-----------------------------------

--- a/src/share/vm/opto/cfgnode.cpp
+++ b/src/share/vm/opto/cfgnode.cpp
@@ -1665,7 +1665,11 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
            // compress paths and change unreachable cycles to TOP
            // If not, we can update the input infinitely along a MergeMem cycle
            // Equivalent code is in MemNode::Ideal_common
-            Node         *m  = phase->transform(n);
+            Node *m  = phase->transform(n);
+            if (outcnt() == 0) {  // Above transform() may kill us!
+              progress = phase->C->top();
+              break;
+            }
            // If tranformed to a MergeMem, get the desired slice
            // Otherwise the returned node represents memory for every slice
            Node *new_mem = (m->is_MergeMem()) ?
@@ -1765,6 +1769,51 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
    }
  }

+#ifdef _LP64
+  // Push DecodeN down through phi.
+  // The rest of phi graph will transform by split EncodeP node though phis up.
+  if (UseCompressedOops && can_reshape && progress == NULL) {
+    bool may_push = true;
+    bool has_decodeN = false;
+    Node* in_decodeN = NULL;
+    for (uint i=1; i<req(); ++i) {// For all paths in
+      Node *ii = in(i);
+      if (ii->is_DecodeN() && ii->bottom_type() == bottom_type()) {
+        has_decodeN = true;
+        in_decodeN = ii->in(1);
+      } else if (!ii->is_Phi()) {
+        may_push = false;
+      }
+    }
+
+    if (has_decodeN && may_push) {
+      PhaseIterGVN *igvn = phase->is_IterGVN();
+      // Note: in_decodeN is used only to define the type of new phi here.
+      PhiNode *new_phi = PhiNode::make_blank(in(0), in_decodeN);
+      uint orig_cnt = req();
+      for (uint i=1; i<req(); ++i) {// For all paths in
+        Node *ii = in(i);
+        Node* new_ii = NULL;
+        if (ii->is_DecodeN()) {
+          assert(ii->bottom_type() == bottom_type(), "sanity");
+          new_ii = ii->in(1);
+        } else {
+          assert(ii->is_Phi(), "sanity");
+          if (ii->as_Phi() == this) {
+            new_ii = new_phi;
+          } else {
+            new_ii = new (phase->C, 2) EncodePNode(ii, in_decodeN->bottom_type());
+            igvn->register_new_node_with_optimizer(new_ii);
+          }
+        }
+        new_phi->set_req(i, new_ii);
+      }
+      igvn->register_new_node_with_optimizer(new_phi, this);
+      progress = new (phase->C, 2) DecodeNNode(new_phi, bottom_type());
+    }
+  }
+#endif
+
  return progress;              // Return any progress
 }


--- a/src/share/vm/opto/compile.cpp
+++ b/src/share/vm/opto/compile.cpp
@@ -467,6 +467,7 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
    }
  }
  set_print_assembly(print_opto_assembly);
+  set_parsed_irreducible_loop(false);
 #endif

  if (ProfileTraps) {
@@ -550,6 +551,8 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
      rethrow_exceptions(kit.transfer_exceptions_into_jvms());
    }

+    print_method("Before RemoveUseless");
+
    // Remove clutter produced by parsing.
    if (!failing()) {
      ResourceMark rm;
@@ -615,8 +618,6 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
  if (failing())  return;
  NOT_PRODUCT( verify_graph_edges(); )

-  print_method("Before Matching");
-
 #ifndef PRODUCT
  if (PrintIdeal) {
    ttyLocker ttyl;  // keep the following output all in one block
@@ -720,6 +721,7 @@ Compile::Compile( ciEnv* ci_env,
  TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false);
  TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false);
  set_print_assembly(PrintFrameConverterAssembly);
+  set_parsed_irreducible_loop(false);
 #endif
  CompileWrapper cw(this);
  Init(/*AliasLevel=*/ 0);
@@ -2073,6 +2075,44 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
  }

 #ifdef _LP64
+  case Op_CastPP:
+    if (n->in(1)->is_DecodeN() && UseImplicitNullCheckForNarrowOop) {
+      Compile* C = Compile::current();
+      Node* in1 = n->in(1);
+      const Type* t = n->bottom_type();
+      Node* new_in1 = in1->clone();
+      new_in1->as_DecodeN()->set_type(t);
+
+      if (!Matcher::clone_shift_expressions) {
+        //
+        // x86, ARM and friends can handle 2 adds in addressing mode
+        // and Matcher can fold a DecodeN node into address by using
+        // a narrow oop directly and do implicit NULL check in address:
+        //
+        // [R12 + narrow_oop_reg<<3 + offset]
+        // NullCheck narrow_oop_reg
+        //
+        // On other platforms (Sparc) we have to keep new DecodeN node and
+        // use it to do implicit NULL check in address:
+        //
+        // decode_not_null narrow_oop_reg, base_reg
+        // [base_reg + offset]
+        // NullCheck base_reg
+        //
+        // Pin the new DecodeN node to non-null path on these patforms (Sparc)
+        // to keep the information to which NULL check the new DecodeN node
+        // corresponds to use it as value in implicit_null_check().
+        //
+        new_in1->set_req(0, n->in(0));
+      }
+
+      n->subsume_by(new_in1);
+      if (in1->outcnt() == 0) {
+        in1->disconnect_inputs(NULL);
+      }
+    }
+    break;
+
  case Op_CmpP:
    // Do this transformation here to preserve CmpPNode::sub() and
    // other TypePtr related Ideal optimizations (for example, ptr nullness).
@@ -2092,24 +2132,44 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
      } else if (in2->Opcode() == Op_ConP) {
        const Type* t = in2->bottom_type();
        if (t == TypePtr::NULL_PTR && UseImplicitNullCheckForNarrowOop) {
-          if (Matcher::clone_shift_expressions) {
-            // x86, ARM and friends can handle 2 adds in addressing mode.
-            // Decode a narrow oop and do implicit NULL check in address
-            // [R12 + narrow_oop_reg<<3 + offset]
-            new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
-          } else {
-            // Don't replace CmpP(o ,null) if 'o' is used in AddP
-            // to generate implicit NULL check on Sparc where
-            // narrow oops can't be used in address.
-            uint i = 0;
-            for (; i < in1->outcnt(); i++) {
-              if (in1->raw_out(i)->is_AddP())
-                break;
-            }
-            if (i >= in1->outcnt()) {
-              new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
-            }
-          }
+          new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
+          //
+          // This transformation together with CastPP transformation above
+          // will generated code for implicit NULL checks for compressed oops.
+          //
+          // The original code after Optimize()
+          //
+          //    LoadN memory, narrow_oop_reg
+          //    decode narrow_oop_reg, base_reg
+          //    CmpP base_reg, NULL
+          //    CastPP base_reg // NotNull
+          //    Load [base_reg + offset], val_reg
+          //
+          // after these transformations will be
+          //
+          //    LoadN memory, narrow_oop_reg
+          //    CmpN narrow_oop_reg, NULL
+          //    decode_not_null narrow_oop_reg, base_reg
+          //    Load [base_reg + offset], val_reg
+          //
+          // and the uncommon path (== NULL) will use narrow_oop_reg directly
+          // since narrow oops can be used in debug info now (see the code in
+          // final_graph_reshaping_walk()).
+          //
+          // At the end the code will be matched to
+          // on x86:
+          //
+          //    Load_narrow_oop memory, narrow_oop_reg
+          //    Load [R12 + narrow_oop_reg<<3 + offset], val_reg
+          //    NullCheck narrow_oop_reg
+          //
+          // and on sparc:
+          //
+          //    Load_narrow_oop memory, narrow_oop_reg
+          //    decode_not_null narrow_oop_reg, base_reg
+          //    Load [base_reg + offset], val_reg
+          //    NullCheck base_reg
+          //
        } else if (t->isa_oopptr()) {
          new_in2 = ConNode::make(C, t->make_narrowoop());
        }
@@ -2126,6 +2186,49 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
      }
    }
    break;
+
+  case Op_DecodeN:
+    assert(!n->in(1)->is_EncodeP(), "should be optimized out");
+    break;
+
+  case Op_EncodeP: {
+    Node* in1 = n->in(1);
+    if (in1->is_DecodeN()) {
+      n->subsume_by(in1->in(1));
+    } else if (in1->Opcode() == Op_ConP) {
+      Compile* C = Compile::current();
+      const Type* t = in1->bottom_type();
+      if (t == TypePtr::NULL_PTR) {
+        n->subsume_by(ConNode::make(C, TypeNarrowOop::NULL_PTR));
+      } else if (t->isa_oopptr()) {
+        n->subsume_by(ConNode::make(C, t->make_narrowoop()));
+      }
+    }
+    if (in1->outcnt() == 0) {
+      in1->disconnect_inputs(NULL);
+    }
+    break;
+  }
+
+  case Op_Phi:
+    if (n->as_Phi()->bottom_type()->isa_narrowoop()) {
+      // The EncodeP optimization may create Phi with the same edges
+      // for all paths. It is not handled well by Register Allocator.
+      Node* unique_in = n->in(1);
+      assert(unique_in != NULL, "");
+      uint cnt = n->req();
+      for (uint i = 2; i < cnt; i++) {
+        Node* m = n->in(i);
+        assert(m != NULL, "");
+        if (unique_in != m)
+          unique_in = NULL;
+      }
+      if (unique_in != NULL) {
+        n->subsume_by(unique_in);
+      }
+    }
+    break;
+
 #endif

  case Op_ModI:

--- a/src/share/vm/opto/compile.hpp
+++ b/src/share/vm/opto/compile.hpp
@@ -160,6 +160,7 @@ class Compile : public Phase {
  bool                  _print_assembly;        // True if we should dump assembly code for this compilation
 #ifndef PRODUCT
  bool                  _trace_opto_output;
+  bool                  _parsed_irreducible_loop; // True if ciTypeFlow detected irreducible loops during parsing
 #endif

  // Compilation environment.
@@ -319,6 +320,8 @@ class Compile : public Phase {
  }
 #ifndef PRODUCT
  bool          trace_opto_output() const       { return _trace_opto_output; }
+  bool              parsed_irreducible_loop() const { return _parsed_irreducible_loop; }
+  void          set_parsed_irreducible_loop(bool z) { _parsed_irreducible_loop = z; }
 #endif

  void begin_method() {

--- a/src/share/vm/opto/connode.cpp
+++ b/src/share/vm/opto/connode.cpp
@@ -433,8 +433,8 @@ Node *ConstraintCastNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
 // If not converting int->oop, throw away cast after constant propagation
 Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
  const Type *t = ccp->type(in(1));
-  if (!t->isa_oop_ptr()) {
-    return NULL;                // do not transform raw pointers
+  if (!t->isa_oop_ptr() || in(1)->is_DecodeN()) {
+    return NULL; // do not transform raw pointers or narrow oops
  }
  return ConstraintCastNode::Ideal_DU_postCCP(ccp);
 }

--- a/src/share/vm/opto/doCall.cpp
+++ b/src/share/vm/opto/doCall.cpp
@@ -795,7 +795,7 @@ ciMethod* Parse::optimize_inlining(ciMethod* caller, int bci, ciInstanceKlass* k

    ciInstanceKlass *ikl = receiver_type->klass()->as_instance_klass();
    if (ikl->is_loaded() && ikl->is_initialized() && !ikl->is_interface() &&
-        (ikl == actual_receiver || ikl->is_subclass_of(actual_receiver))) {
+        (ikl == actual_receiver || ikl->is_subtype_of(actual_receiver))) {
      // ikl is a same or better type than the original actual_receiver,
      // e.g. static receiver from bytecodes.
      actual_receiver = ikl;

--- a/src/share/vm/opto/graphKit.cpp
+++ b/src/share/vm/opto/graphKit.cpp
@@ -587,7 +587,7 @@ PreserveJVMState::PreserveJVMState(GraphKit* kit, bool clone_map) {
 #ifdef ASSERT
  _bci    = kit->bci();
  Parse* parser = kit->is_Parse();
-  int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order();
+  int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->rpo();
  _block  = block;
 #endif
 }
@@ -596,7 +596,7 @@ PreserveJVMState::~PreserveJVMState() {
 #ifdef ASSERT
  assert(kit->bci() == _bci, "bci must not shift");
  Parse* parser = kit->is_Parse();
-  int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order();
+  int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->rpo();
  assert(block == _block,    "block must not shift");
 #endif
  kit->set_map(_map);
@@ -1049,10 +1049,19 @@ Node* GraphKit::load_object_klass(Node* obj) {
 //-------------------------load_array_length-----------------------------------
 Node* GraphKit::load_array_length(Node* array) {
  // Special-case a fresh allocation to avoid building nodes:
-  Node* alen = AllocateArrayNode::Ideal_length(array, &_gvn);
-  if (alen != NULL)  return alen;
-  Node *r_adr = basic_plus_adr(array, arrayOopDesc::length_offset_in_bytes());
-  return _gvn.transform( new (C, 3) LoadRangeNode(0, immutable_memory(), r_adr, TypeInt::POS));
+  AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(array, &_gvn);
+  Node *alen;
+  if (alloc == NULL) {
+    Node *r_adr = basic_plus_adr(array, arrayOopDesc::length_offset_in_bytes());
+    alen = _gvn.transform( new (C, 3) LoadRangeNode(0, immutable_memory(), r_adr, TypeInt::POS));
+  } else {
+    alen = alloc->Ideal_length();
+    Node* ccast = alloc->make_ideal_length(_gvn.type(array)->is_aryptr(), &_gvn);
+    if (ccast != alen) {
+      alen = _gvn.transform(ccast);
+    }
+  }
+  return alen;
 }

 //------------------------------do_null_check----------------------------------
@@ -2847,20 +2856,18 @@ Node* GraphKit::set_output_for_allocation(AllocateNode* alloc,
  assert(just_allocated_object(control()) == javaoop, "just allocated");

 #ifdef ASSERT
-  { // Verify that the AllocateNode::Ideal_foo recognizers work:
-    Node* kn = alloc->in(AllocateNode::KlassNode);
-    Node* ln = alloc->in(AllocateNode::ALength);
-    assert(AllocateNode::Ideal_klass(rawoop, &_gvn) == kn,
-           "Ideal_klass works");
-    assert(AllocateNode::Ideal_klass(javaoop, &_gvn) == kn,
-           "Ideal_klass works");
+  { // Verify that the AllocateNode::Ideal_allocation recognizers work:
+    assert(AllocateNode::Ideal_allocation(rawoop, &_gvn) == alloc,
+           "Ideal_allocation works");
+    assert(AllocateNode::Ideal_allocation(javaoop, &_gvn) == alloc,
+           "Ideal_allocation works");
    if (alloc->is_AllocateArray()) {
-      assert(AllocateArrayNode::Ideal_length(rawoop, &_gvn) == ln,
-             "Ideal_length works");
-      assert(AllocateArrayNode::Ideal_length(javaoop, &_gvn) == ln,
-             "Ideal_length works");
+      assert(AllocateArrayNode::Ideal_array_allocation(rawoop, &_gvn) == alloc->as_AllocateArray(),
+             "Ideal_allocation works");
+      assert(AllocateArrayNode::Ideal_array_allocation(javaoop, &_gvn) == alloc->as_AllocateArray(),
+             "Ideal_allocation works");
    } else {
-      assert(ln->is_top(), "no length, please");
+      assert(alloc->in(AllocateNode::ALength)->is_top(), "no length, please");
    }
  }
 #endif //ASSERT
@@ -3109,25 +3116,20 @@ Node* GraphKit::new_array(Node* klass_node,     // array klass (maybe variable)
  // (This happens via a non-constant argument to inline_native_newArray.)
  // In any case, the value of klass_node provides the desired array type.
  const TypeInt* length_type = _gvn.find_int_type(length);
-  const TypeInt* narrow_length_type = NULL;
  const TypeOopPtr* ary_type = _gvn.type(klass_node)->is_klassptr()->as_instance_type();
  if (ary_type->isa_aryptr() && length_type != NULL) {
    // Try to get a better type than POS for the size
    ary_type = ary_type->is_aryptr()->cast_to_size(length_type);
-    narrow_length_type = ary_type->is_aryptr()->size();
-    if (narrow_length_type == length_type)
-      narrow_length_type = NULL;
  }

  Node* javaoop = set_output_for_allocation(alloc, ary_type, raw_mem_only);

-  // Cast length on remaining path to be positive:
-  if (narrow_length_type != NULL) {
-    Node* ccast = new (C, 2) CastIINode(length, narrow_length_type);
-    ccast->set_req(0, control());
-    _gvn.set_type_bottom(ccast);
-    record_for_igvn(ccast);
-    if (map()->find_edge(length) >= 0) {
+  // Cast length on remaining path to be as narrow as possible
+  if (map()->find_edge(length) >= 0) {
+    Node* ccast = alloc->make_ideal_length(ary_type, &_gvn);
+    if (ccast != length) {
+      _gvn.set_type_bottom(ccast);
+      record_for_igvn(ccast);
      replace_in_map(length, ccast);
    }
  }

--- a/src/share/vm/opto/ifg.cpp
+++ b/src/share/vm/opto/ifg.cpp
@@ -485,8 +485,9 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
    // Liveout things are presumed live for the whole block.  We accumulate
    // 'area' accordingly.  If they get killed in the block, we'll subtract
    // the unused part of the block from the area.
-    double cost = b->_freq * double(last_inst-last_phi);
-    assert( cost >= 0, "negative spill cost" );
+    int inst_count = last_inst - last_phi;
+    double cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count);
+    assert(!(cost < 0.0), "negative spill cost" );
    IndexSetIterator elements(&liveout);
    uint lidx;
    while ((lidx = elements.next()) != 0) {
@@ -590,7 +591,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
        } else {                // Else it is live
          // A DEF also ends 'area' partway through the block.
          lrgs(r)._area -= cost;
-          assert( lrgs(r)._area >= 0, "negative spill area" );
+          assert(!(lrgs(r)._area < 0.0), "negative spill area" );

          // Insure high score for immediate-use spill copies so they get a color
          if( n->is_SpillCopy()
@@ -703,8 +704,9 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {

      } // End of if normal register-allocated value

-      cost -= b->_freq;         // Area remaining in the block
-      if( cost < 0.0 ) cost = 0.0;  // Cost goes negative in the Phi area
+      // Area remaining in the block
+      inst_count--;
+      cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count);

      // Make all inputs live
      if( !n->is_Phi() ) {      // Phi function uses come from prior block
@@ -751,7 +753,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
            assert( pressure[0] == count_int_pressure  (&liveout), "" );
            assert( pressure[1] == count_float_pressure(&liveout), "" );
          }
-          assert( lrg._area >= 0, "negative spill area" );
+          assert(!(lrg._area < 0.0), "negative spill area" );
        }
      }
    } // End of reverse pass over all instructions in block

--- a/src/share/vm/opto/loopTransform.cpp
+++ b/src/share/vm/opto/loopTransform.cpp
@@ -1012,6 +1012,8 @@ void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool ad
    if (!has_ctrl(old))
      set_loop(nnn, loop);
  }
+
+  loop->record_for_igvn();
 }

 //------------------------------do_maximally_unroll----------------------------

--- a/src/share/vm/opto/loopnode.cpp
+++ b/src/share/vm/opto/loopnode.cpp
@@ -1279,7 +1279,7 @@ void IdealLoopTree::counted_loop( PhaseIdealLoop *phase ) {
    // Visit all children, looking for Phis
    for (DUIterator i = cl->outs(); cl->has_out(i); i++) {
      Node *out = cl->out(i);
-      if (!out->is_Phi())  continue; // Looking for phis
+      if (!out->is_Phi() || out == phi)  continue; // Looking for other phis
      PhiNode* phi2 = out->as_Phi();
      Node *incr2 = phi2->in( LoopNode::LoopBackControl );
      // Look for induction variables of the form:  X += constant
@@ -1388,6 +1388,37 @@ void IdealLoopTree::dump( ) const {

 #endif

+static void log_loop_tree(IdealLoopTree* root, IdealLoopTree* loop, CompileLog* log) {
+  if (loop == root) {
+    if (loop->_child != NULL) {
+      log->begin_head("loop_tree");
+      log->end_head();
+      if( loop->_child ) log_loop_tree(root, loop->_child, log);
+      log->tail("loop_tree");
+      assert(loop->_next == NULL, "what?");
+    }
+  } else {
+    Node* head = loop->_head;
+    log->begin_head("loop");
+    log->print(" idx='%d' ", head->_idx);
+    if (loop->_irreducible) log->print("irreducible='1' ");
+    if (head->is_Loop()) {
+      if (head->as_Loop()->is_inner_loop()) log->print("inner_loop='1' ");
+      if (head->as_Loop()->is_partial_peel_loop()) log->print("partial_peel_loop='1' ");
+    }
+    if (head->is_CountedLoop()) {
+      CountedLoopNode* cl = head->as_CountedLoop();
+      if (cl->is_pre_loop())  log->print("pre_loop='%d' ",  cl->main_idx());
+      if (cl->is_main_loop()) log->print("main_loop='%d' ", cl->_idx);
+      if (cl->is_post_loop()) log->print("post_loop='%d' ",  cl->main_idx());
+    }
+    log->end_head();
+    if( loop->_child ) log_loop_tree(root, loop->_child, log);
+    log->tail("loop");
+    if( loop->_next  ) log_loop_tree(root, loop->_next, log);
+  }
+}
+
 //=============================================================================
 //------------------------------PhaseIdealLoop---------------------------------
 // Create a PhaseLoop.  Build the ideal Loop tree.  Map each Ideal Node to
@@ -1624,10 +1655,13 @@ PhaseIdealLoop::PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify
  // Cleanup any modified bits
  _igvn.optimize();

-  // Do not repeat loop optimizations if irreducible loops are present
-  // by claiming no-progress.
-  if( _has_irreducible_loops )
-    C->clear_major_progress();
+  // disable assert until issue with split_flow_path is resolved (6742111)
+  // assert(!_has_irreducible_loops || C->parsed_irreducible_loop() || C->is_osr_compilation(),
+  //        "shouldn't introduce irreducible loops");
+
+  if (C->log() != NULL) {
+    log_loop_tree(_ltree_root, _ltree_root, C->log());
+  }
 }

 #ifndef PRODUCT
@@ -2732,11 +2766,7 @@ void PhaseIdealLoop::dump( ) const {
 }

 void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const {
-
-  // Indent by loop nesting depth
-  for( uint x = 0; x < loop->_nest; x++ )
-    tty->print("  ");
-  tty->print_cr("---- Loop N%d-N%d ----", loop->_head->_idx,loop->_tail->_idx);
+  loop->dump_head();

  // Now scan for CFG nodes in the same loop
  for( uint j=idx; j > 0;  j-- ) {

--- a/src/share/vm/opto/loopnode.hpp
+++ b/src/share/vm/opto/loopnode.hpp
@@ -192,6 +192,8 @@ public:
  int is_main_no_pre_loop() const { return _loop_flags & Main_Has_No_Pre_Loop; }
  void set_main_no_pre_loop() { _loop_flags |= Main_Has_No_Pre_Loop; }

+  int main_idx() const { return _main_idx; }
+

  void set_pre_loop  (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
  void set_main_loop (                     ) { assert(is_normal_loop(),""); _loop_flags |= Main;                         }

--- a/src/share/vm/opto/loopopts.cpp
+++ b/src/share/vm/opto/loopopts.cpp
@@ -2667,6 +2667,10 @@ void PhaseIdealLoop::reorg_offsets( IdealLoopTree *loop ) {
  // Fix this by adjusting to use the post-increment trip counter.
  Node *phi = cl->phi();
  if( !phi ) return;            // Dead infinite loop
+
+  // Shape messed up, probably by iteration_split_impl
+  if (phi->in(LoopNode::LoopBackControl) != cl->incr()) return;
+
  bool progress = true;
  while (progress) {
    progress = false;

--- a/src/share/vm/opto/matcher.cpp
+++ b/src/share/vm/opto/matcher.cpp
@@ -273,7 +273,7 @@ void Matcher::match( ) {
  find_shared( C->root() );
  find_shared( C->top() );

-  C->print_method("Before Matching", 2);
+  C->print_method("Before Matching");

  // Swap out to old-space; emptying new-space
  Arena *old = C->node_arena()->move_contents(C->old_arena());
@@ -840,7 +840,7 @@ Node *Matcher::xform( Node *n, int max_stack ) {
              _new2old_map.map(m->_idx, n);
 #endif
              if (m->in(0) != NULL) // m might be top
-                collect_null_checks(m);
+                collect_null_checks(m, n);
            } else {                // Else just a regular 'ol guy
              m = n->clone();       // So just clone into new-space
 #ifdef ASSERT
@@ -1478,12 +1478,19 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
        m = _mem_node;
        assert(m != NULL && m->is_Mem(), "expecting memory node");
      }
-      if (m->adr_type() != mach->adr_type()) {
+      const Type* mach_at = mach->adr_type();
+      // DecodeN node consumed by an address may have different type
+      // then its input. Don't compare types for such case.
+      if (m->adr_type() != mach_at && m->in(MemNode::Address)->is_AddP() &&
+          m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN()) {
+        mach_at = m->adr_type();
+      }
+      if (m->adr_type() != mach_at) {
        m->dump();
        tty->print_cr("mach:");
        mach->dump(1);
      }
-      assert(m->adr_type() == mach->adr_type(), "matcher should not change adr type");
+      assert(m->adr_type() == mach_at, "matcher should not change adr type");
    }
 #endif
  }
@@ -1995,7 +2002,7 @@ void Matcher::dump_old2new_map() {
 // it.  Used by later implicit-null-check handling.  Actually collects
 // either an IfTrue or IfFalse for the common NOT-null path, AND the ideal
 // value being tested.
-void Matcher::collect_null_checks( Node *proj ) {
+void Matcher::collect_null_checks( Node *proj, Node *orig_proj ) {
  Node *iff = proj->in(0);
  if( iff->Opcode() == Op_If ) {
    // During matching If's have Bool & Cmp side-by-side
@@ -2008,20 +2015,47 @@ void Matcher::collect_null_checks( Node *proj ) {
    if (ct == TypePtr::NULL_PTR ||
        (opc == Op_CmpN && ct == TypeNarrowOop::NULL_PTR)) {

+      bool push_it = false;
      if( proj->Opcode() == Op_IfTrue ) {
        extern int all_null_checks_found;
        all_null_checks_found++;
        if( b->_test._test == BoolTest::ne ) {
-          _null_check_tests.push(proj);
-          _null_check_tests.push(cmp->in(1));
+          push_it = true;
        }
      } else {
        assert( proj->Opcode() == Op_IfFalse, "" );
        if( b->_test._test == BoolTest::eq ) {
-          _null_check_tests.push(proj);
-          _null_check_tests.push(cmp->in(1));
+          push_it = true;
        }
      }
+      if( push_it ) {
+        _null_check_tests.push(proj);
+        Node* val = cmp->in(1);
+#ifdef _LP64
+        if (UseCompressedOops && !Matcher::clone_shift_expressions &&
+            val->bottom_type()->isa_narrowoop()) {
+          //
+          // Look for DecodeN node which should be pinned to orig_proj.
+          // On platforms (Sparc) which can not handle 2 adds
+          // in addressing mode we have to keep a DecodeN node and
+          // use it to do implicit NULL check in address.
+          //
+          // DecodeN node was pinned to non-null path (orig_proj) during
+          // CastPP transformation in final_graph_reshaping_impl().
+          //
+          uint cnt = orig_proj->outcnt();
+          for (uint i = 0; i < orig_proj->outcnt(); i++) {
+            Node* d = orig_proj->raw_out(i);
+            if (d->is_DecodeN() && d->in(1) == val) {
+              val = d;
+              val->set_req(0, NULL); // Unpin now.
+              break;
+            }
+          }
+        }
+#endif
+        _null_check_tests.push(val);
+      }
    }
  }
 }

--- a/src/share/vm/opto/matcher.hpp
+++ b/src/share/vm/opto/matcher.hpp
@@ -166,7 +166,7 @@ public:
  // List of IfFalse or IfTrue Nodes that indicate a taken null test.
  // List is valid in the post-matching space.
  Node_List _null_check_tests;
-  void collect_null_checks( Node *proj );
+  void collect_null_checks( Node *proj, Node *orig_proj );
  void validate_null_checks( );

  Matcher( Node_List &proj_list );

--- a/src/share/vm/opto/memnode.cpp
+++ b/src/share/vm/opto/memnode.cpp
@@ -1887,6 +1887,38 @@ const Type *LoadRangeNode::Value( PhaseTransform *phase ) const {
  return tap->size();
 }

+//-------------------------------Ideal---------------------------------------
+// Feed through the length in AllocateArray(...length...)._length.
+Node *LoadRangeNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  Node* p = MemNode::Ideal_common(phase, can_reshape);
+  if (p)  return (p == NodeSentinel) ? NULL : p;
+
+  // Take apart the address into an oop and and offset.
+  // Return 'this' if we cannot.
+  Node*    adr    = in(MemNode::Address);
+  intptr_t offset = 0;
+  Node*    base   = AddPNode::Ideal_base_and_offset(adr, phase,  offset);
+  if (base == NULL)     return NULL;
+  const TypeAryPtr* tary = phase->type(adr)->isa_aryptr();
+  if (tary == NULL)     return NULL;
+
+  // We can fetch the length directly through an AllocateArrayNode.
+  // This works even if the length is not constant (clone or newArray).
+  if (offset == arrayOopDesc::length_offset_in_bytes()) {
+    AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase);
+    if (alloc != NULL) {
+      Node* allocated_length = alloc->Ideal_length();
+      Node* len = alloc->make_ideal_length(tary, phase);
+      if (allocated_length != len) {
+        // New CastII improves on this.
+        return len;
+      }
+    }
+  }
+
+  return NULL;
+}
+
 //------------------------------Identity---------------------------------------
 // Feed through the length in AllocateArray(...length...)._length.
 Node* LoadRangeNode::Identity( PhaseTransform *phase ) {
@@ -1905,15 +1937,22 @@ Node* LoadRangeNode::Identity( PhaseTransform *phase ) {
  // We can fetch the length directly through an AllocateArrayNode.
  // This works even if the length is not constant (clone or newArray).
  if (offset == arrayOopDesc::length_offset_in_bytes()) {
-    Node* allocated_length = AllocateArrayNode::Ideal_length(base, phase);
-    if (allocated_length != NULL) {
-      return allocated_length;
+    AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase);
+    if (alloc != NULL) {
+      Node* allocated_length = alloc->Ideal_length();
+      // Do not allow make_ideal_length to allocate a CastII node.
+      Node* len = alloc->make_ideal_length(tary, phase, false);
+      if (allocated_length == len) {
+        // Return allocated_length only if it would not be improved by a CastII.
+        return allocated_length;
+      }
    }
  }

  return this;

 }
+
 //=============================================================================
 //---------------------------StoreNode::make-----------------------------------
 // Polymorphic factory method:

--- a/src/share/vm/opto/memnode.hpp
+++ b/src/share/vm/opto/memnode.hpp
@@ -241,6 +241,7 @@ public:
  virtual int Opcode() const;
  virtual const Type *Value( PhaseTransform *phase ) const;
  virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
 };

 //------------------------------LoadLNode--------------------------------------

--- a/src/share/vm/opto/parse.hpp
+++ b/src/share/vm/opto/parse.hpp
@@ -167,9 +167,19 @@ class Parse : public GraphKit {

    int start() const                      { return flow()->start(); }
    int limit() const                      { return flow()->limit(); }
-    int pre_order() const                  { return flow()->pre_order(); }
+    int rpo() const                        { return flow()->rpo(); }
    int start_sp() const                   { return flow()->stack_size(); }

+    bool is_loop_head() const              { return flow()->is_loop_head(); }
+    bool is_SEL_head() const               { return flow()->is_single_entry_loop_head(); }
+    bool is_SEL_backedge(Block* pred) const{ return is_SEL_head() && pred->rpo() >= rpo(); }
+    bool is_invariant_local(uint i) const  {
+      const JVMState* jvms = start_map()->jvms();
+      if (!jvms->is_loc(i)) return false;
+      return flow()->is_invariant_local(i - jvms->locoff());
+    }
+    bool can_elide_SEL_phi(uint i) const  { assert(is_SEL_head(),""); return is_invariant_local(i); }
+
    const Type* peek(int off=0) const      { return stack_type_at(start_sp() - (off+1)); }

    const Type* stack_type_at(int i) const;
@@ -305,7 +315,7 @@ class Parse : public GraphKit {
  //            entry_bci()     -- see osr_bci, etc.

  ciTypeFlow*   flow()          const { return _flow; }
-  //            blocks()        -- see pre_order_at, start_block, etc.
+  //            blocks()        -- see rpo_at, start_block, etc.
  int           block_count()   const { return _block_count; }

  GraphKit&     exits()               { return _exits; }
@@ -330,12 +340,12 @@ class Parse : public GraphKit {
  // Must this parse be aborted?
  bool failing()                { return C->failing(); }

-  Block* pre_order_at(int po) {
-    assert(0 <= po && po < _block_count, "oob");
-    return &_blocks[po];
+  Block* rpo_at(int rpo) {
+    assert(0 <= rpo && rpo < _block_count, "oob");
+    return &_blocks[rpo];
  }
  Block* start_block() {
-    return pre_order_at(flow()->start_block()->pre_order());
+    return rpo_at(flow()->start_block()->rpo());
  }
  // Can return NULL if the flow pass did not complete a block.
  Block* successor_for_bci(int bci) {
@@ -359,9 +369,6 @@ class Parse : public GraphKit {
  // Parse all the basic blocks.
  void do_all_blocks();

-  // Helper for do_all_blocks; makes one pass in pre-order.
-  void visit_blocks();
-
  // Parse the current basic block
  void do_one_block();


--- a/src/share/vm/opto/parse1.cpp
+++ b/src/share/vm/opto/parse1.cpp
@@ -29,17 +29,17 @@
 // the most. Some of the non-static variables are needed in bytecodeInfo.cpp
 // and eventually should be encapsulated in a proper class (gri 8/18/98).

-int nodes_created              = 0; int nodes_created_old              = 0;
-int methods_parsed             = 0; int methods_parsed_old             = 0;
-int methods_seen               = 0; int methods_seen_old               = 0;
-
-int explicit_null_checks_inserted = 0, explicit_null_checks_inserted_old = 0;
-int explicit_null_checks_elided   = 0, explicit_null_checks_elided_old   = 0;
+int nodes_created              = 0;
+int methods_parsed             = 0;
+int methods_seen               = 0;
+int blocks_parsed              = 0;
+int blocks_seen                = 0;
+
+int explicit_null_checks_inserted = 0;
+int explicit_null_checks_elided   = 0;
 int all_null_checks_found         = 0, implicit_null_checks              = 0;
 int implicit_null_throws          = 0;

-int parse_idx = 0;
-size_t parse_arena = 0;
 int reclaim_idx  = 0;
 int reclaim_in   = 0;
 int reclaim_node = 0;
@@ -61,6 +61,7 @@ void Parse::print_statistics() {
  tty->cr();
  if (methods_seen != methods_parsed)
    tty->print_cr("Reasons for parse failures (NOT cumulative):");
+  tty->print_cr("Blocks parsed: %d  Blocks seen: %d", blocks_parsed, blocks_seen);

  if( explicit_null_checks_inserted )
    tty->print_cr("%d original NULL checks - %d elided (%2d%%); optimizer leaves %d,", explicit_null_checks_inserted, explicit_null_checks_elided, (100*explicit_null_checks_elided)/explicit_null_checks_inserted, all_null_checks_found);
@@ -373,6 +374,12 @@ Parse::Parse(JVMState* caller, ciMethod* parse_method, float expected_uses)
    C->record_method_not_compilable_all_tiers(_flow->failure_reason());
  }

+#ifndef PRODUCT
+  if (_flow->has_irreducible_entry()) {
+    C->set_parsed_irreducible_loop(true);
+  }
+#endif
+
  if (_expected_uses <= 0) {
    _prof_factor = 1;
  } else {
@@ -556,118 +563,93 @@ Parse::Parse(JVMState* caller, ciMethod* parse_method, float expected_uses)
  set_map(entry_map);
  do_exits();

-  // Collect a few more statistics.
-  parse_idx += C->unique();
-  parse_arena += C->node_arena()->used();
-
  if (log)  log->done("parse nodes='%d' memory='%d'",
                      C->unique(), C->node_arena()->used());
 }

 //---------------------------do_all_blocks-------------------------------------
 void Parse::do_all_blocks() {
-  _blocks_merged = 0;
-  _blocks_parsed = 0;
+  bool has_irreducible = flow()->has_irreducible_entry();

-  int old_blocks_merged = -1;
-  int old_blocks_parsed = -1;
+  // Walk over all blocks in Reverse Post-Order.
+  while (true) {
+    bool progress = false;
+    for (int rpo = 0; rpo < block_count(); rpo++) {
+      Block* block = rpo_at(rpo);

-  for (int tries = 0; ; tries++) {
-    visit_blocks();
-    if (failing())  return; // Check for bailout
+      if (block->is_parsed()) continue;

-    // No need for a work list.  The outer loop is hardly ever repeated.
-    // The following loop traverses the blocks in a reasonable pre-order,
-    // as produced by the ciTypeFlow pass.
+      if (!block->is_merged()) {
+        // Dead block, no state reaches this block
+        continue;
+      }

-    // This loop can be taken more than once if there are two entries to
-    // a loop (irreduceable CFG), and the edge which ciTypeFlow chose
-    // as the first predecessor to the loop goes dead in the parser,
-    // due to parse-time optimization.  (Could happen with obfuscated code.)
+      // Prepare to parse this block.
+      load_state_from(block);

-    // Look for progress, or the lack of it:
-    if (_blocks_parsed == block_count()) {
-      // That's all, folks.
-      if (TraceOptoParse) {
-        tty->print_cr("All blocks parsed.");
+      if (stopped()) {
+        // Block is dead.
+        continue;
      }
-      break;
-    }

-    // How much work was done this time around?
-    int new_blocks_merged = _blocks_merged - old_blocks_merged;
-    int new_blocks_parsed = _blocks_parsed - old_blocks_parsed;
-    if (new_blocks_merged == 0) {
-      if (TraceOptoParse) {
-        tty->print_cr("All live blocks parsed; %d dead blocks.", block_count() - _blocks_parsed);
+      blocks_parsed++;
+
+      progress = true;
+      if (block->is_loop_head() || block->is_handler() || has_irreducible && !block->is_ready()) {
+        // Not all preds have been parsed.  We must build phis everywhere.
+        // (Note that dead locals do not get phis built, ever.)
+        ensure_phis_everywhere();
+
+        // Leave behind an undisturbed copy of the map, for future merges.
+        set_map(clone_map());
      }
-      // No new blocks have become parseable.  Some blocks are just dead.
-      break;
+
+      if (control()->is_Region() && !block->is_loop_head() && !has_irreducible && !block->is_handler()) {
+        // In the absence of irreducible loops, the Region and Phis
+        // associated with a merge that doesn't involve a backedge can
+        // be simplfied now since the RPO parsing order guarantees
+        // that any path which was supposed to reach here has already
+        // been parsed or must be dead.
+        Node* c = control();
+        Node* result = _gvn.transform_no_reclaim(control());
+        if (c != result && TraceOptoParse) {
+          tty->print_cr("Block #%d replace %d with %d", block->rpo(), c->_idx, result->_idx);
+        }
+        if (result != top()) {
+          record_for_igvn(result);
+        }
+      }
+
+      // Parse the block.
+      do_one_block();
+
+      // Check for bailouts.
+      if (failing())  return;
    }
-    assert(new_blocks_parsed > 0, "must make progress");
-    assert(tries < block_count(), "the pre-order cannot be this bad!");

-    old_blocks_merged = _blocks_merged;
-    old_blocks_parsed = _blocks_parsed;
+    // with irreducible loops multiple passes might be necessary to parse everything
+    if (!has_irreducible || !progress) {
+      break;
+    }
  }

+  blocks_seen += block_count();
+
 #ifndef PRODUCT
  // Make sure there are no half-processed blocks remaining.
  // Every remaining unprocessed block is dead and may be ignored now.
-  for (int po = 0; po < block_count(); po++) {
-    Block* block = pre_order_at(po);
+  for (int rpo = 0; rpo < block_count(); rpo++) {
+    Block* block = rpo_at(rpo);
    if (!block->is_parsed()) {
      if (TraceOptoParse) {
-        tty->print("Skipped dead block %d at bci:%d", po, block->start());
-        assert(!block->is_merged(), "no half-processed blocks");
+        tty->print_cr("Skipped dead block %d at bci:%d", rpo, block->start());
      }
+      assert(!block->is_merged(), "no half-processed blocks");
    }
  }
 #endif
 }

-//---------------------------visit_blocks--------------------------------------
-void Parse::visit_blocks() {
-  // Walk over all blocks, parsing every one that has been reached (merged).
-  for (int po = 0; po < block_count(); po++) {
-    Block* block = pre_order_at(po);
-
-    if (block->is_parsed()) {
-      // Do not parse twice.
-      continue;
-    }
-
-    if (!block->is_merged()) {
-      // No state on this block.  It had not yet been reached.
-      // Delay reaching it until later.
-      continue;
-    }
-
-    // Prepare to parse this block.
-    load_state_from(block);
-
-    if (stopped()) {
-      // Block is dead.
-      continue;
-    }
-
-    if (!block->is_ready() || block->is_handler()) {
-      // Not all preds have been parsed.  We must build phis everywhere.
-      // (Note that dead locals do not get phis built, ever.)
-      ensure_phis_everywhere();
-
-      // Leave behind an undisturbed copy of the map, for future merges.
-      set_map(clone_map());
-    }
-
-    // Ready or not, parse the block.
-    do_one_block();
-
-    // Check for bailouts.
-    if (failing())  return;
-  }
-}
-
 //-------------------------------build_exits----------------------------------
 // Build normal and exceptional exit merge points.
 void Parse::build_exits() {
@@ -1134,24 +1116,24 @@ void Parse::init_blocks() {
  _blocks = NEW_RESOURCE_ARRAY(Block, _block_count);
  Copy::zero_to_bytes(_blocks, sizeof(Block)*_block_count);

-  int po;
+  int rpo;

  // Initialize the structs.
-  for (po = 0; po < block_count(); po++) {
-    Block* block = pre_order_at(po);
-    block->init_node(this, po);
+  for (rpo = 0; rpo < block_count(); rpo++) {
+    Block* block = rpo_at(rpo);
+    block->init_node(this, rpo);
  }

  // Collect predecessor and successor information.
-  for (po = 0; po < block_count(); po++) {
-    Block* block = pre_order_at(po);
+  for (rpo = 0; rpo < block_count(); rpo++) {
+    Block* block = rpo_at(rpo);
    block->init_graph(this);
  }
 }

 //-------------------------------init_node-------------------------------------
-void Parse::Block::init_node(Parse* outer, int po) {
-  _flow = outer->flow()->pre_order_at(po);
+void Parse::Block::init_node(Parse* outer, int rpo) {
+  _flow = outer->flow()->rpo_at(rpo);
  _pred_count = 0;
  _preds_parsed = 0;
  _count = 0;
@@ -1177,7 +1159,7 @@ void Parse::Block::init_graph(Parse* outer) {
  int p = 0;
  for (int i = 0; i < ns+ne; i++) {
    ciTypeFlow::Block* tf2 = (i < ns) ? tfs->at(i) : tfe->at(i-ns);
-    Block* block2 = outer->pre_order_at(tf2->pre_order());
+    Block* block2 = outer->rpo_at(tf2->rpo());
    _successors[i] = block2;

    // Accumulate pred info for the other block, too.
@@ -1368,10 +1350,11 @@ void Parse::do_one_block() {
    int nt = b->all_successors();

    tty->print("Parsing block #%d at bci [%d,%d), successors: ",
-                  block()->pre_order(), block()->start(), block()->limit());
+                  block()->rpo(), block()->start(), block()->limit());
    for (int i = 0; i < nt; i++) {
-      tty->print((( i < ns) ? " %d" : " %d(e)"), b->successor_at(i)->pre_order());
+      tty->print((( i < ns) ? " %d" : " %d(e)"), b->successor_at(i)->rpo());
    }
+    if (b->is_loop_head()) tty->print("  lphd");
    tty->print_cr("");
  }

@@ -1501,7 +1484,7 @@ void Parse::handle_missing_successor(int target_bci) {
 #ifndef PRODUCT
  Block* b = block();
  int trap_bci = b->flow()->has_trap()? b->flow()->trap_bci(): -1;
-  tty->print_cr("### Missing successor at bci:%d for block #%d (trap_bci:%d)", target_bci, b->pre_order(), trap_bci);
+  tty->print_cr("### Missing successor at bci:%d for block #%d (trap_bci:%d)", target_bci, b->rpo(), trap_bci);
 #endif
  ShouldNotReachHere();
 }
@@ -1509,7 +1492,7 @@ void Parse::handle_missing_successor(int target_bci) {
 //--------------------------merge_common---------------------------------------
 void Parse::merge_common(Parse::Block* target, int pnum) {
  if (TraceOptoParse) {
-    tty->print("Merging state at block #%d bci:%d", target->pre_order(), target->start());
+    tty->print("Merging state at block #%d bci:%d", target->rpo(), target->start());
  }

  // Zap extra stack slots to top
@@ -1534,6 +1517,7 @@ void Parse::merge_common(Parse::Block* target, int pnum) {
    // which must not be allowed into this block's map.)
    if (pnum > PhiNode::Input         // Known multiple inputs.
        || target->is_handler()       // These have unpredictable inputs.
+        || target->is_loop_head()     // Known multiple inputs
        || control()->is_Region()) {  // We must hide this guy.
      // Add a Region to start the new basic block.  Phis will be added
      // later lazily.
@@ -1575,15 +1559,21 @@ void Parse::merge_common(Parse::Block* target, int pnum) {

    // Compute where to merge into
    // Merge incoming control path
-    r->set_req(pnum, newin->control());
+    r->init_req(pnum, newin->control());

    if (pnum == 1) {            // Last merge for this Region?
-      _gvn.transform_no_reclaim(r);
+      if (!block()->flow()->is_irreducible_entry()) {
+        Node* result = _gvn.transform_no_reclaim(r);
+        if (r != result && TraceOptoParse) {
+          tty->print_cr("Block #%d replace %d with %d", block()->rpo(), r->_idx, result->_idx);
+        }
+      }
      record_for_igvn(r);
    }

    // Update all the non-control inputs to map:
    assert(TypeFunc::Parms == newin->jvms()->locoff(), "parser map should contain only youngest jvms");
+    bool check_elide_phi = target->is_SEL_backedge(save_block);
    for (uint j = 1; j < newin->req(); j++) {
      Node* m = map()->in(j);   // Current state of target.
      Node* n = newin->in(j);   // Incoming change to target state.
@@ -1603,7 +1593,11 @@ void Parse::merge_common(Parse::Block* target, int pnum) {
          merge_memory_edges(n->as_MergeMem(), pnum, nophi);
          continue;
        default:                // All normal stuff
-          if (phi == NULL)  phi = ensure_phi(j, nophi);
+          if (phi == NULL) {
+            if (!check_elide_phi || !target->can_elide_SEL_phi(j)) {
+              phi = ensure_phi(j, nophi);
+            }
+          }
          break;
        }
      }
@@ -1736,9 +1730,13 @@ void Parse::ensure_phis_everywhere() {
  uint nof_monitors = map()->jvms()->nof_monitors();

  assert(TypeFunc::Parms == map()->jvms()->locoff(), "parser map should contain only youngest jvms");
+  bool check_elide_phi = block()->is_SEL_head();
  for (uint i = TypeFunc::Parms; i < monoff; i++) {
-    ensure_phi(i);
+    if (!check_elide_phi || !block()->can_elide_SEL_phi(i)) {
+      ensure_phi(i);
+    }
  }
+
  // Even monitors need Phis, though they are well-structured.
  // This is true for OSR methods, and also for the rare cases where
  // a monitor object is the subject of a replace_in_map operation.

--- a/src/share/vm/opto/parse2.cpp
+++ b/src/share/vm/opto/parse2.cpp
@@ -100,16 +100,17 @@ Node* Parse::array_addressing(BasicType type, int vals, const Type* *result2) {

  // Do the range check
  if (GenerateRangeChecks && need_range_check) {
-    // Range is constant in array-oop, so we can use the original state of mem
-    Node* len = load_array_length(ary);
    Node* tst;
    if (sizetype->_hi <= 0) {
-      // If the greatest array bound is negative, we can conclude that we're
+      // The greatest array bound is negative, so we can conclude that we're
      // compiling unreachable code, but the unsigned compare trick used below
      // only works with non-negative lengths.  Instead, hack "tst" to be zero so
      // the uncommon_trap path will always be taken.
      tst = _gvn.intcon(0);
    } else {
+      // Range is constant in array-oop, so we can use the original state of mem
+      Node* len = load_array_length(ary);
+
      // Test length vs index (standard trick using unsigned compare)
      Node* chk = _gvn.transform( new (C, 3) CmpUNode(idx, len) );
      BoolTest::mask btest = BoolTest::lt;
@@ -137,9 +138,12 @@ Node* Parse::array_addressing(BasicType type, int vals, const Type* *result2) {
  // Check for always knowing you are throwing a range-check exception
  if (stopped())  return top();

-  Node* ptr = array_element_address( ary, idx, type, sizetype);
+  Node* ptr = array_element_address(ary, idx, type, sizetype);

  if (result2 != NULL)  *result2 = elemtype;
+
+  assert(ptr != top(), "top should go hand-in-hand with stopped");
+
  return ptr;
 }


--- a/src/share/vm/opto/type.cpp
+++ b/src/share/vm/opto/type.cpp
@@ -3157,17 +3157,18 @@ static jint max_array_length(BasicType etype) {

 // Narrow the given size type to the index range for the given array base type.
 // Return NULL if the resulting int type becomes empty.
-const TypeInt* TypeAryPtr::narrow_size_type(const TypeInt* size, BasicType elem) {
+const TypeInt* TypeAryPtr::narrow_size_type(const TypeInt* size) const {
  jint hi = size->_hi;
  jint lo = size->_lo;
  jint min_lo = 0;
-  jint max_hi = max_array_length(elem);
+  jint max_hi = max_array_length(elem()->basic_type());
  //if (index_not_size)  --max_hi;     // type of a valid array index, FTR
  bool chg = false;
  if (lo < min_lo) { lo = min_lo; chg = true; }
  if (hi > max_hi) { hi = max_hi; chg = true; }
+  // Negative length arrays will produce weird intermediate dead fath-path code
  if (lo > hi)
-    return NULL;
+    return TypeInt::ZERO;
  if (!chg)
    return size;
  return TypeInt::make(lo, hi, Type::WidenMin);
@@ -3176,9 +3177,7 @@ const TypeInt* TypeAryPtr::narrow_size_type(const TypeInt* size, BasicType elem)
 //-------------------------------cast_to_size----------------------------------
 const TypeAryPtr* TypeAryPtr::cast_to_size(const TypeInt* new_size) const {
  assert(new_size != NULL, "");
-  new_size = narrow_size_type(new_size, elem()->basic_type());
-  if (new_size == NULL)       // Negative length arrays will produce weird
-    new_size = TypeInt::ZERO; // intermediate dead fast-path goo
+  new_size = narrow_size_type(new_size);
  if (new_size == size())  return this;
  const TypeAry* new_ary = TypeAry::make(elem(), new_size);
  return make(ptr(), const_oop(), new_ary, klass(), klass_is_exact(), _offset, _instance_id);

--- a/src/share/vm/opto/type.hpp
+++ b/src/share/vm/opto/type.hpp
@@ -840,6 +840,7 @@ public:
  virtual const TypeOopPtr *cast_to_instance_id(int instance_id) const;

  virtual const TypeAryPtr* cast_to_size(const TypeInt* size) const;
+  virtual const TypeInt* narrow_size_type(const TypeInt* size) const;

  virtual bool empty(void) const;        // TRUE if type is vacuous
  virtual const TypePtr *add_offset( intptr_t offset ) const;
@@ -865,7 +866,6 @@ public:
  }
  static const TypeAryPtr *_array_body_type[T_CONFLICT+1];
  // sharpen the type of an int which is used as an array size
-  static const TypeInt* narrow_size_type(const TypeInt* size, BasicType elem);
 #ifndef PRODUCT
  virtual void dump2( Dict &d, uint depth, outputStream *st ) const; // Specialized per-Type dumping
 #endif

--- a/src/share/vm/prims/jvmtiEnvBase.cpp
+++ b/src/share/vm/prims/jvmtiEnvBase.cpp
@@ -121,7 +121,7 @@ JvmtiEnvBase::JvmtiEnvBase() : _env_event_enable() {
  JvmtiEventController::env_initialize((JvmtiEnv*)this);

 #ifdef JVMTI_TRACE
-  _jvmti_external.functions = strlen(TraceJVMTI)? &jvmtiTrace_Interface : &jvmti_Interface;
+  _jvmti_external.functions = TraceJVMTI != NULL ? &jvmtiTrace_Interface : &jvmti_Interface;
 #else
  _jvmti_external.functions = &jvmti_Interface;
 #endif

--- a/src/share/vm/prims/jvmtiTrace.cpp
+++ b/src/share/vm/prims/jvmtiTrace.cpp
@@ -73,7 +73,7 @@ void JvmtiTrace::initialize() {

  const char *very_end;
  const char *curr;
-  if (strlen(TraceJVMTI)) {
+  if (TraceJVMTI != NULL) {
    curr = TraceJVMTI;
  } else {
    curr = "";  // hack in fixed tracing here

--- a/src/share/vm/runtime/globals.cpp
+++ b/src/share/vm/runtime/globals.cpp
@@ -365,8 +365,11 @@ bool CommandLineFlags::ccstrAtPut(char* name, size_t len, ccstr* value, FlagValu
  if (result == NULL) return false;
  if (!result->is_ccstr()) return false;
  ccstr old_value = result->get_ccstr();
-  char* new_value = NEW_C_HEAP_ARRAY(char, strlen(*value)+1);
-  strcpy(new_value, *value);
+  char* new_value = NULL;
+  if (*value != NULL) {
+    new_value = NEW_C_HEAP_ARRAY(char, strlen(*value)+1);
+    strcpy(new_value, *value);
+  }
  result->set_ccstr(new_value);
  if (result->origin == DEFAULT && old_value != NULL) {
    // Prior value is NOT heap allocated, but was a literal constant.

--- a/src/share/vm/runtime/globals.hpp
+++ b/src/share/vm/runtime/globals.hpp
@@ -707,7 +707,7 @@ class CommandLineFlags {
  diagnostic(bool, PrintAssembly, false,                                    \
          "Print assembly code (using external disassembler.so)")           \
                                                                            \
-  diagnostic(ccstr, PrintAssemblyOptions, false,                            \
+  diagnostic(ccstr, PrintAssemblyOptions, NULL,                             \
          "Options string passed to disassembler.so")                       \
                                                                            \
  diagnostic(bool, PrintNMethods, false,                                    \
@@ -848,7 +848,7 @@ class CommandLineFlags {
          "Use LWP-based instead of libthread-based synchronization "       \
          "(SPARC only)")                                                   \
                                                                            \
-  product(ccstr, SyncKnobs, "",                                             \
+  product(ccstr, SyncKnobs, NULL,                                           \
          "(Unstable) Various monitor synchronization tunables")            \
                                                                            \
  product(intx, EmitSync, 0,                                                \
@@ -1032,7 +1032,7 @@ class CommandLineFlags {
  notproduct(bool, TraceJVMCalls, false,                                    \
          "Trace JVM calls")                                                \
                                                                            \
-  product(ccstr, TraceJVMTI, "",                                            \
+  product(ccstr, TraceJVMTI, NULL,                                          \
          "Trace flags for JVMTI functions and events")                     \
                                                                            \
  /* This option can change an EMCP method into an obsolete method. */      \
@@ -1157,10 +1157,6 @@ class CommandLineFlags {
          "In the Parallel Old garbage collector use parallel dense"        \
          " prefix update")                                                 \
                                                                            \
-  develop(bool, UseParallelOldGCChunkPointerCalc, true,                     \
-          "In the Parallel Old garbage collector use chucks to calculate"   \
-          " new object locations")                                          \
-                                                                            \
  product(uintx, HeapMaximumCompactionInterval, 20,                         \
          "How often should we maximally compact the heap (not allowing "   \
          "any dead space)")                                                \
@@ -1189,21 +1185,14 @@ class CommandLineFlags {
  product(uintx, ParallelCMSThreads, 0,                                     \
          "Max number of threads CMS will use for concurrent work")         \
                                                                            \
-  develop(bool, VerifyParallelOldWithMarkSweep, false,                      \
-          "Use the MarkSweep code to verify phases of Parallel Old")        \
-                                                                            \
-  develop(uintx, VerifyParallelOldWithMarkSweepInterval, 1,                 \
-          "Interval at which the MarkSweep code is used to verify "         \
-          "phases of Parallel Old")                                         \
-                                                                            \
  develop(bool, ParallelOldMTUnsafeMarkBitMap, false,                       \
          "Use the Parallel Old MT unsafe in marking the bitmap")           \
                                                                            \
  develop(bool, ParallelOldMTUnsafeUpdateLiveData, false,                   \
          "Use the Parallel Old MT unsafe in update of live size")          \
                                                                            \
-  develop(bool, TraceChunkTasksQueuing, false,                              \
-          "Trace the queuing of the chunk tasks")                           \
+  develop(bool, TraceRegionTasksQueuing, false,                             \
+          "Trace the queuing of the region tasks")                          \
                                                                            \
  product(uintx, ParallelMarkingThreads, 0,                                 \
          "Number of marking threads concurrent gc will use")               \

--- a/src/share/vm/utilities/taskqueue.cpp
+++ b/src/share/vm/utilities/taskqueue.cpp
@@ -109,72 +109,72 @@ void ParallelTaskTerminator::reset_for_reuse() {
  }
 }

-bool ChunkTaskQueueWithOverflow::is_empty() {
-  return (_chunk_queue.size() == 0) &&
+bool RegionTaskQueueWithOverflow::is_empty() {
+  return (_region_queue.size() == 0) &&
         (_overflow_stack->length() == 0);
 }

-bool ChunkTaskQueueWithOverflow::stealable_is_empty() {
-  return _chunk_queue.size() == 0;
+bool RegionTaskQueueWithOverflow::stealable_is_empty() {
+  return _region_queue.size() == 0;
 }

-bool ChunkTaskQueueWithOverflow::overflow_is_empty() {
+bool RegionTaskQueueWithOverflow::overflow_is_empty() {
  return _overflow_stack->length() == 0;
 }

-void ChunkTaskQueueWithOverflow::initialize() {
-  _chunk_queue.initialize();
+void RegionTaskQueueWithOverflow::initialize() {
+  _region_queue.initialize();
  assert(_overflow_stack == 0, "Creating memory leak");
  _overflow_stack =
-    new (ResourceObj::C_HEAP) GrowableArray<ChunkTask>(10, true);
+    new (ResourceObj::C_HEAP) GrowableArray<RegionTask>(10, true);
 }

-void ChunkTaskQueueWithOverflow::save(ChunkTask t) {
-  if (TraceChunkTasksQueuing && Verbose) {
+void RegionTaskQueueWithOverflow::save(RegionTask t) {
+  if (TraceRegionTasksQueuing && Verbose) {
    gclog_or_tty->print_cr("CTQ: save " PTR_FORMAT, t);
  }
-  if(!_chunk_queue.push(t)) {
+  if(!_region_queue.push(t)) {
    _overflow_stack->push(t);
  }
 }

-// Note that using this method will retrieve all chunks
+// Note that using this method will retrieve all regions
 // that have been saved but that it will always check
 // the overflow stack.  It may be more efficient to
 // check the stealable queue and the overflow stack
 // separately.
-bool ChunkTaskQueueWithOverflow::retrieve(ChunkTask& chunk_task) {
-  bool result = retrieve_from_overflow(chunk_task);
+bool RegionTaskQueueWithOverflow::retrieve(RegionTask& region_task) {
+  bool result = retrieve_from_overflow(region_task);
  if (!result) {
-    result = retrieve_from_stealable_queue(chunk_task);
+    result = retrieve_from_stealable_queue(region_task);
  }
-  if (TraceChunkTasksQueuing && Verbose && result) {
+  if (TraceRegionTasksQueuing && Verbose && result) {
    gclog_or_tty->print_cr("  CTQ: retrieve " PTR_FORMAT, result);
  }
  return result;
 }

-bool ChunkTaskQueueWithOverflow::retrieve_from_stealable_queue(
-                                   ChunkTask& chunk_task) {
-  bool result = _chunk_queue.pop_local(chunk_task);
-  if (TraceChunkTasksQueuing && Verbose) {
-    gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, chunk_task);
+bool RegionTaskQueueWithOverflow::retrieve_from_stealable_queue(
+                                   RegionTask& region_task) {
+  bool result = _region_queue.pop_local(region_task);
+  if (TraceRegionTasksQueuing && Verbose) {
+    gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, region_task);
  }
  return result;
 }

-bool ChunkTaskQueueWithOverflow::retrieve_from_overflow(
-                                        ChunkTask& chunk_task) {
+bool
+RegionTaskQueueWithOverflow::retrieve_from_overflow(RegionTask& region_task) {
  bool result;
  if (!_overflow_stack->is_empty()) {
-    chunk_task = _overflow_stack->pop();
+    region_task = _overflow_stack->pop();
    result = true;
  } else {
-    chunk_task = (ChunkTask) NULL;
+    region_task = (RegionTask) NULL;
    result = false;
  }
-  if (TraceChunkTasksQueuing && Verbose) {
-    gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, chunk_task);
+  if (TraceRegionTasksQueuing && Verbose) {
+    gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, region_task);
  }
  return result;
 }
--- a/src/share/vm/utilities/taskqueue.hpp
+++ b/src/share/vm/utilities/taskqueue.hpp
@@ -557,32 +557,32 @@ class StarTask {
 typedef GenericTaskQueue<StarTask>     OopStarTaskQueue;
 typedef GenericTaskQueueSet<StarTask>  OopStarTaskQueueSet;

-typedef size_t ChunkTask;  // index for chunk
-typedef GenericTaskQueue<ChunkTask>    ChunkTaskQueue;
-typedef GenericTaskQueueSet<ChunkTask> ChunkTaskQueueSet;
+typedef size_t RegionTask;  // index for region
+typedef GenericTaskQueue<RegionTask>    RegionTaskQueue;
+typedef GenericTaskQueueSet<RegionTask> RegionTaskQueueSet;

-class ChunkTaskQueueWithOverflow: public CHeapObj {
+class RegionTaskQueueWithOverflow: public CHeapObj {
 protected:
-  ChunkTaskQueue              _chunk_queue;
-  GrowableArray<ChunkTask>*   _overflow_stack;
+  RegionTaskQueue              _region_queue;
+  GrowableArray<RegionTask>*   _overflow_stack;

 public:
-  ChunkTaskQueueWithOverflow() : _overflow_stack(NULL) {}
+  RegionTaskQueueWithOverflow() : _overflow_stack(NULL) {}
  // Initialize both stealable queue and overflow
  void initialize();
  // Save first to stealable queue and then to overflow
-  void save(ChunkTask t);
+  void save(RegionTask t);
  // Retrieve first from overflow and then from stealable queue
-  bool retrieve(ChunkTask& chunk_index);
+  bool retrieve(RegionTask& region_index);
  // Retrieve from stealable queue
-  bool retrieve_from_stealable_queue(ChunkTask& chunk_index);
+  bool retrieve_from_stealable_queue(RegionTask& region_index);
  // Retrieve from overflow
-  bool retrieve_from_overflow(ChunkTask& chunk_index);
+  bool retrieve_from_overflow(RegionTask& region_index);
  bool is_empty();
  bool stealable_is_empty();
  bool overflow_is_empty();
-  juint stealable_size() { return _chunk_queue.size(); }
-  ChunkTaskQueue* task_queue() { return &_chunk_queue; }
+  juint stealable_size() { return _region_queue.size(); }
+  RegionTaskQueue* task_queue() { return &_region_queue; }
 };

-#define USE_ChunkTaskQueueWithOverflow
+#define USE_RegionTaskQueueWithOverflow
--- a/test/compiler/6711100/Test.java
+++ b/test/compiler/6711100/Test.java
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug 6711100
+ * @summary 64bit fastdebug server vm crashes with assert(_base == Int,"Not an Int")
+ * @run main/othervm -Xcomp -XX:CompileOnly=Test.<init> Test
+ */
+
+public class Test {
+
+    static byte b;
+
+    // The server compiler chokes on compiling
+    // this method when f() is not inlined
+    public Test() {
+        b = (new byte[1])[(new byte[f()])[-1]];
+    }
+
+    protected static int f() {
+      return 1;
+    }
+
+    public static void main(String[] args) {
+      try {
+        Test t = new Test();
+      } catch (ArrayIndexOutOfBoundsException e) {
+      }
+    }
+}
+
+