Merge

f9d9730e · acorn · 8a50fa1a · 470855bd · f9d9730e · f9d9730e
52 changed file
--- a/src/share/vm/asm/assembler.cpp
+++ b/src/share/vm/asm/assembler.cpp
@@ -249,8 +249,6 @@ void AbstractAssembler::block_comment(const char* comment) {
 bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
  // Exception handler checks the nmethod's implicit null checks table
  // only when this method returns false.
-#ifndef SPARC
-  // Sparc does not have based addressing
  if (UseCompressedOops) {
    // The first page after heap_base is unmapped and
    // the 'offset' is equal to [heap_base + offset] for
@@ -261,7 +259,6 @@ bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
      offset = (intptr_t)(pointer_delta((void*)offset, (void*)heap_base, 1));
    }
  }
-#endif // SPARC
  return offset < 0 || os::vm_page_size() <= offset;
 }

--- a/src/share/vm/ci/ciMethodBlocks.cpp
+++ b/src/share/vm/ci/ciMethodBlocks.cpp
@@ -49,7 +49,7 @@ bool ciMethodBlocks::is_block_start(int bci) {
 // first half.  Returns the range beginning at bci.
 ciBlock *ciMethodBlocks::split_block_at(int bci) {
  ciBlock *former_block = block_containing(bci);
-  ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, this, former_block->start_bci());
+  ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, former_block->start_bci());
  _blocks->append(new_block);
  assert(former_block != NULL, "must not be NULL");
  new_block->set_limit_bci(bci);
@@ -83,7 +83,7 @@ ciBlock *ciMethodBlocks::make_block_at(int bci) {
  if (cb == NULL ) {
    // This is our first time visiting this bytecode.  Create
    // a fresh block and assign it this starting point.
-    ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, this, bci);
+    ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, bci);
    _blocks->append(nb);
     _bci_to_block[bci] = nb;
    return nb;
@@ -98,6 +98,11 @@ ciBlock *ciMethodBlocks::make_block_at(int bci) {
  }
 }
+ciBlock *ciMethodBlocks::make_dummy_block() {
+  ciBlock *dum = new(_arena) ciBlock(_method, -1, 0);
+  return dum;
+}
 void ciMethodBlocks::do_analysis() {
  ciBytecodeStream s(_method);
  ciBlock *cur_block = block_containing(0);
@@ -253,7 +258,7 @@ ciMethodBlocks::ciMethodBlocks(Arena *arena, ciMethod *meth): _method(meth),
  Copy::zero_to_words((HeapWord*) _bci_to_block, b2bsize / sizeof(HeapWord));
  // create initial block covering the entire method
-  ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, this, 0);
+  ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, 0);
  _blocks->append(b);
  _bci_to_block[0] = b;
@@ -334,7 +339,7 @@ void ciMethodBlocks::dump() {
 #endif
-ciBlock::ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci) :
+ciBlock::ciBlock(ciMethod *method, int index, int start_bci) :
 #ifndef PRODUCT
                         _method(method),
 #endif

--- a/src/share/vm/ci/ciMethodBlocks.hpp
+++ b/src/share/vm/ci/ciMethodBlocks.hpp
@@ -48,6 +48,8 @@ public:
  int num_blocks()  { return _num_blocks;}
  void clear_processed();
+  ciBlock *make_dummy_block(); // a block not associated with a bci
 #ifndef PRODUCT
  void dump();
 #endif
@@ -81,7 +83,7 @@ public:
    fall_through_bci = -1
  };
-  ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci);
+  ciBlock(ciMethod *method, int index, int start_bci);
  int start_bci() const         { return _start_bci; }
  int limit_bci() const         { return _limit_bci; }
  int control_bci() const       { return _control_bci; }
@@ -94,7 +96,6 @@ public:
  int ex_limit_bci() const      { return _ex_limit_bci; }
  bool contains(int bci) const { return start_bci() <= bci && bci < limit_bci(); }
  // flag handling
  bool  processed() const           { return (_flags & Processed) != 0; }
  bool  is_handler() const          { return (_flags & Handler) != 0; }

--- a/src/share/vm/ci/ciTypeFlow.cpp
+++ b/src/share/vm/ci/ciTypeFlow.cpp
--- a/src/share/vm/ci/ciTypeFlow.hpp
+++ b/src/share/vm/ci/ciTypeFlow.hpp
@@ -34,11 +34,13 @@ private:
  int _max_locals;
  int _max_stack;
  int _code_size;
+  bool      _has_irreducible_entry;
  const char* _failure_reason;
 public:
  class StateVector;
+  class Loop;
  class Block;
  // Build a type flow analyzer
@@ -55,6 +57,7 @@ public:
  int       max_stack() const  { return _max_stack; }
  int       max_cells() const  { return _max_locals + _max_stack; }
  int       code_size() const  { return _code_size; }
+  bool      has_irreducible_entry() const { return _has_irreducible_entry; }
  // Represents information about an "active" jsr call.  This
  // class represents a call to the routine at some entry address
@@ -125,6 +128,19 @@ public:
    void print_on(outputStream* st) const PRODUCT_RETURN;
  };
+  class LocalSet VALUE_OBJ_CLASS_SPEC {
+  private:
+    enum Constants { max = 63 };
+    uint64_t _bits;
+  public:
+    LocalSet() : _bits(0) {}
+    void add(uint32_t i)        { if (i < (uint32_t)max) _bits |=  (1LL << i); }
+    void add(LocalSet* ls)      { _bits |= ls->_bits; }
+    bool test(uint32_t i) const { return i < (uint32_t)max ? (_bits>>i)&1U : true; }
+    void clear()                { _bits = 0; }
+    void print_on(outputStream* st, int limit) const  PRODUCT_RETURN;
+  };
  // Used as a combined index for locals and temps
  enum Cell {
    Cell_0, Cell_max = INT_MAX
@@ -142,6 +158,8 @@ public:
    int         _trap_bci;
    int         _trap_index;
+    LocalSet    _def_locals;  // For entire block
    static ciType* type_meet_internal(ciType* t1, ciType* t2, ciTypeFlow* analyzer);
  public:
@@ -181,6 +199,9 @@ public:
    int         monitor_count() const  { return _monitor_count; }
    void    set_monitor_count(int mc)  { _monitor_count = mc; }
+    LocalSet* def_locals() { return &_def_locals; }
+    const LocalSet* def_locals() const { return &_def_locals; }
    static Cell start_cell()           { return (Cell)0; }
    static Cell next_cell(Cell c)      { return (Cell)(((int)c) + 1); }
    Cell        limit_cell() const {
@@ -250,6 +271,10 @@ public:
      return type->basic_type() == T_DOUBLE;
    }
+    void store_to_local(int lnum) {
+      _def_locals.add((uint) lnum);
+    }
    void      push_translate(ciType* type);
    void      push_int() {
@@ -358,6 +383,7 @@ public:
             "must be reference type or return address");
      overwrite_local_double_long(index);
      set_type_at(local(index), type);
+      store_to_local(index);
    }
    void load_local_double(int index) {
@@ -376,6 +402,8 @@ public:
      overwrite_local_double_long(index);
      set_type_at(local(index), type);
      set_type_at(local(index+1), type2);
+      store_to_local(index);
+      store_to_local(index+1);
    }
    void load_local_float(int index) {
@@ -388,6 +416,7 @@ public:
      assert(is_float(type), "must be float type");
      overwrite_local_double_long(index);
      set_type_at(local(index), type);
+      store_to_local(index);
    }
    void load_local_int(int index) {
@@ -400,6 +429,7 @@ public:
      assert(is_int(type), "must be int type");
      overwrite_local_double_long(index);
      set_type_at(local(index), type);
+      store_to_local(index);
    }
    void load_local_long(int index) {
@@ -418,6 +448,8 @@ public:
      overwrite_local_double_long(index);
      set_type_at(local(index), type);
      set_type_at(local(index+1), type2);
+      store_to_local(index);
+      store_to_local(index+1);
    }
    // Stop interpretation of this path with a trap.
@@ -450,13 +482,31 @@ public:
  };
  // Parameter for "find_block" calls:
-  // Describes the difference between a public and private copy.
+  // Describes the difference between a public and backedge copy.
  enum CreateOption {
    create_public_copy,
-    create_private_copy,
+    create_backedge_copy,
    no_create
  };
+  // Successor iterator
+  class SuccIter : public StackObj {
+  private:
+    Block* _pred;
+    int    _index;
+    Block* _succ;
+  public:
+    SuccIter()                        : _pred(NULL), _index(-1), _succ(NULL) {}
+    SuccIter(Block* pred)             : _pred(pred), _index(-1), _succ(NULL) { next(); }
+    int    index()     { return _index; }
+    Block* pred()      { return _pred; }           // Return predecessor
+    bool   done()      { return _index < 0; }      // Finished?
+    Block* succ()      { return _succ; }           // Return current successor
+    void   next();                                 // Advance
+    void   set_succ(Block* succ);                  // Update current successor
+    bool   is_normal_ctrl() { return index() < _pred->successors()->length(); }
+  };
  // A basic block
  class Block : public ResourceObj {
  private:
@@ -470,15 +520,24 @@ public:
    int                              _trap_bci;
    int                              _trap_index;
-    // A reasonable approximation to pre-order, provided.to the client.
+    // pre_order, assigned at first visit. Used as block ID and "visited" tag
    int                              _pre_order;
-    // Has this block been cloned for some special purpose?
+    // A post-order, used to compute the reverse post order (RPO) provided to the client
-    bool                             _private_copy;
+    int                              _post_order;  // used to compute rpo
+    // Has this block been cloned for a loop backedge?
+    bool                             _backedge_copy;
    // A pointer used for our internal work list
    Block*                           _next;
-    bool                   _on_work_list;
+    bool                             _on_work_list;      // on the work list
+    Block*                           _rpo_next;          // Reverse post order list
+    // Loop info
+    Loop*                            _loop;              // nearest loop
+    bool                             _irreducible_entry; // entry to irreducible loop
+    bool                             _exception_entry;   // entry to exception handler
    ciBlock*     ciblock() const     { return _ciblock; }
    StateVector* state() const     { return _state; }
@@ -504,10 +563,11 @@ public:
    int start() const         { return _ciblock->start_bci(); }
    int limit() const         { return _ciblock->limit_bci(); }
    int control() const       { return _ciblock->control_bci(); }
+    JsrSet* jsrs() const      { return _jsrs; }
-    bool    is_private_copy() const       { return _private_copy; }
+    bool    is_backedge_copy() const       { return _backedge_copy; }
-    void   set_private_copy(bool z);
+    void   set_backedge_copy(bool z);
-    int        private_copy_count() const { return outer()->private_copy_count(ciblock()->index(), _jsrs); }
+    int        backedge_copy_count() const { return outer()->backedge_copy_count(ciblock()->index(), _jsrs); }
    // access to entry state
    int     stack_size() const         { return _state->stack_size(); }
@@ -515,6 +575,20 @@ public:
    ciType* local_type_at(int i) const { return _state->local_type_at(i); }
    ciType* stack_type_at(int i) const { return _state->stack_type_at(i); }
+    // Data flow on locals
+    bool is_invariant_local(uint v) const {
+      assert(is_loop_head(), "only loop heads");
+      // Find outermost loop with same loop head
+      Loop* lp = loop();
+      while (lp->parent() != NULL) {
+        if (lp->parent()->head() != lp->head()) break;
+        lp = lp->parent();
+      }
+      return !lp->def_locals()->test(v);
+    }
+    LocalSet* def_locals() { return _state->def_locals(); }
+    const LocalSet* def_locals() const { return _state->def_locals(); }
    // Get the successors for this Block.
    GrowableArray<Block*>* successors(ciBytecodeStream* str,
                                      StateVector* state,
@@ -524,13 +598,6 @@ public:
      return _successors;
    }
-    // Helper function for "successors" when making private copies of
-    // loop heads for C2.
-    Block * clone_loop_head(ciTypeFlow* analyzer,
-                            int branch_bci,
-                            Block* target,
-                            JsrSet* jsrs);
    // Get the exceptional successors for this Block.
    GrowableArray<Block*>* exceptions() {
      if (_exceptions == NULL) {
@@ -584,17 +651,126 @@ public:
    bool   is_on_work_list() const  { return _on_work_list; }
    bool   has_pre_order() const  { return _pre_order >= 0; }
-    void   set_pre_order(int po)  { assert(!has_pre_order() && po >= 0, ""); _pre_order = po; }
+    void   set_pre_order(int po)  { assert(!has_pre_order(), ""); _pre_order = po; }
    int    pre_order() const      { assert(has_pre_order(), ""); return _pre_order; }
+    void   set_next_pre_order()   { set_pre_order(outer()->inc_next_pre_order()); }
    bool   is_start() const       { return _pre_order == outer()->start_block_num(); }
-    // A ranking used in determining order within the work list.
+    // Reverse post order
-    bool   is_simpler_than(Block* other);
+    void   df_init();
+    bool   has_post_order() const { return _post_order >= 0; }
+    void   set_post_order(int po) { assert(!has_post_order() && po >= 0, ""); _post_order = po; }
+    void   reset_post_order(int o){ _post_order = o; }
+    int    post_order() const     { assert(has_post_order(), ""); return _post_order; }
+    bool   has_rpo() const        { return has_post_order() && outer()->have_block_count(); }
+    int    rpo() const            { assert(has_rpo(), ""); return outer()->block_count() - post_order() - 1; }
+    void   set_rpo_next(Block* b) { _rpo_next = b; }
+    Block* rpo_next()             { return _rpo_next; }
+    // Loops
+    Loop*  loop() const                  { return _loop; }
+    void   set_loop(Loop* lp)            { _loop = lp; }
+    bool   is_loop_head() const          { return _loop && _loop->head() == this; }
+    void   set_irreducible_entry(bool c) { _irreducible_entry = c; }
+    bool   is_irreducible_entry() const  { return _irreducible_entry; }
+    bool   is_visited() const            { return has_pre_order(); }
+    bool   is_post_visited() const       { return has_post_order(); }
+    bool   is_clonable_exit(Loop* lp);
+    Block* looping_succ(Loop* lp);       // Successor inside of loop
+    bool   is_single_entry_loop_head() const {
+      if (!is_loop_head()) return false;
+      for (Loop* lp = loop(); lp != NULL && lp->head() == this; lp = lp->parent())
+        if (lp->is_irreducible()) return false;
+      return true;
+    }
    void   print_value_on(outputStream* st) const PRODUCT_RETURN;
    void   print_on(outputStream* st) const       PRODUCT_RETURN;
  };
+  // Loop
+  class Loop : public ResourceObj {
+  private:
+    Loop* _parent;
+    Loop* _sibling;  // List of siblings, null terminated
+    Loop* _child;    // Head of child list threaded thru sibling pointer
+    Block* _head;    // Head of loop
+    Block* _tail;    // Tail of loop
+    bool   _irreducible;
+    LocalSet _def_locals;
+  public:
+    Loop(Block* head, Block* tail) :
+      _head(head),   _tail(tail),
+      _parent(NULL), _sibling(NULL), _child(NULL),
+      _irreducible(false), _def_locals() {}
+    Loop* parent()  const { return _parent; }
+    Loop* sibling() const { return _sibling; }
+    Loop* child()   const { return _child; }
+    Block* head()   const { return _head; }
+    Block* tail()   const { return _tail; }
+    void set_parent(Loop* p)  { _parent = p; }
+    void set_sibling(Loop* s) { _sibling = s; }
+    void set_child(Loop* c)   { _child = c; }
+    void set_head(Block* hd)  { _head = hd; }
+    void set_tail(Block* tl)  { _tail = tl; }
+    int depth() const;              // nesting depth
+    // Returns true if lp is a nested loop or us.
+    bool contains(Loop* lp) const;
+    bool contains(Block* blk) const { return contains(blk->loop()); }
+    // Data flow on locals
+    LocalSet* def_locals() { return &_def_locals; }
+    const LocalSet* def_locals() const { return &_def_locals; }
+    // Merge the branch lp into this branch, sorting on the loop head
+    // pre_orders. Returns the new branch.
+    Loop* sorted_merge(Loop* lp);
+    // Mark non-single entry to loop
+    void set_irreducible(Block* entry) {
+      _irreducible = true;
+      entry->set_irreducible_entry(true);
+    }
+    bool is_irreducible() const { return _irreducible; }
+    bool is_root() const { return _tail->pre_order() == max_jint; }
+    void print(outputStream* st = tty, int indent = 0) const PRODUCT_RETURN;
+  };
+  // Postorder iteration over the loop tree.
+  class PostorderLoops : public StackObj {
+  private:
+    Loop* _root;
+    Loop* _current;
+  public:
+    PostorderLoops(Loop* root) : _root(root), _current(root) {
+      while (_current->child() != NULL) {
+        _current = _current->child();
+      }
+    }
+    bool done() { return _current == NULL; }  // Finished iterating?
+    void next();                            // Advance to next loop
+    Loop* current() { return _current; }      // Return current loop.
+  };
+  // Preorder iteration over the loop tree.
+  class PreorderLoops : public StackObj {
+  private:
+    Loop* _root;
+    Loop* _current;
+  public:
+    PreorderLoops(Loop* root) : _root(root), _current(root) {}
+    bool done() { return _current == NULL; }  // Finished iterating?
+    void next();                            // Advance to next loop
+    Loop* current() { return _current; }      // Return current loop.
+  };
  // Standard indexes of successors, for various bytecodes.
  enum {
    FALL_THROUGH   = 0,  // normal control
@@ -619,6 +795,12 @@ private:
  // Tells if a given instruction is able to generate an exception edge.
  bool can_trap(ciBytecodeStream& str);
+  // Clone the loop heads. Returns true if any cloning occurred.
+  bool clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set);
+  // Clone lp's head and replace tail's successors with clone.
+  Block* clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set);
 public:
  // Return the block beginning at bci which has a JsrSet compatible
  // with jsrs.
@@ -627,8 +809,8 @@ public:
  // block factory
  Block* get_block_for(int ciBlockIndex, JsrSet* jsrs, CreateOption option = create_public_copy);
-  // How many of the blocks have the private_copy bit set?
+  // How many of the blocks have the backedge_copy bit set?
-  int private_copy_count(int ciBlockIndex, JsrSet* jsrs) const;
+  int backedge_copy_count(int ciBlockIndex, JsrSet* jsrs) const;
  // Return an existing block containing bci which has a JsrSet compatible
  // with jsrs, or NULL if there is none.
@@ -651,11 +833,18 @@ public:
                                      return _block_map[po]; }
  Block* start_block() const        { return pre_order_at(start_block_num()); }
  int start_block_num() const       { return 0; }
+  Block* rpo_at(int rpo) const      { assert(0 <= rpo && rpo < block_count(), "out of bounds");
+                                      return _block_map[rpo]; }
+  int next_pre_order()              { return _next_pre_order; }
+  int inc_next_pre_order()          { return _next_pre_order++; }
 private:
  // A work list used during flow analysis.
  Block* _work_list;
+  // List of blocks in reverse post order
+  Block* _rpo_list;
  // Next Block::_pre_order.  After mapping, doubles as block_count.
  int _next_pre_order;
@@ -668,6 +857,15 @@ private:
  // Add a basic block to our work list.
  void add_to_work_list(Block* block);
+  // Prepend a basic block to rpo list.
+  void prepend_to_rpo_list(Block* blk) {
+    blk->set_rpo_next(_rpo_list);
+    _rpo_list = blk;
+  }
+  // Root of the loop tree
+  Loop* _loop_tree_root;
  // State used for make_jsr_record
  int _jsr_count;
  GrowableArray<JsrRecord*>* _jsr_records;
@@ -677,6 +875,9 @@ public:
  // does not already exist.
  JsrRecord* make_jsr_record(int entry_address, int return_address);
+  void  set_loop_tree_root(Loop* ltr) { _loop_tree_root = ltr; }
+  Loop* loop_tree_root()              { return _loop_tree_root; }
 private:
  // Get the initial state for start_bci:
  const StateVector* get_start_state();
@@ -703,6 +904,15 @@ private:
  // necessary.
  void flow_types();
+  // Perform the depth first type flow analysis. Helper for flow_types.
+  void df_flow_types(Block* start,
+                     bool do_flow,
+                     StateVector* temp_vector,
+                     JsrSet* temp_set);
+  // Incrementally build loop tree.
+  void build_loop_tree(Block* blk);
  // Create the block map, which indexes blocks in pre_order.
  void map_blocks();
@@ -711,4 +921,6 @@ public:
  void do_flow();
  void print_on(outputStream* st) const PRODUCT_RETURN;
+  void rpo_print_on(outputStream* st) const PRODUCT_RETURN;
 };
--- a/src/share/vm/code/nmethod.cpp
+++ b/src/share/vm/code/nmethod.cpp
@@ -1350,11 +1350,7 @@ bool nmethod::can_unload(BoolObjectClosure* is_alive,
      return false;
    }
  }
-  if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
-    // Cannot do this test if verification of the UseParallelOldGC
-    // code using the PSMarkSweep code is being done.
  assert(unloading_occurred, "Inconsistency in unloading");
-  }
  make_unloaded(is_alive, obj);
  return true;
 }

--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp
@@ -210,10 +210,6 @@ void ParallelScavengeHeap::post_initialize() {
  PSScavenge::initialize();
  if (UseParallelOldGC) {
    PSParallelCompact::post_initialize();
-    if (VerifyParallelOldWithMarkSweep) {
-      // Will be used for verification of par old.
-      PSMarkSweep::initialize();
-    }
  } else {
    PSMarkSweep::initialize();
  }
@@ -402,7 +398,7 @@ HeapWord* ParallelScavengeHeap::mem_allocate(
        return result;
      }
      if (!is_tlab &&
-          size >= (young_gen()->eden_space()->capacity_in_words() / 2)) {
+          size >= (young_gen()->eden_space()->capacity_in_words(Thread::current()) / 2)) {
        result = old_gen()->allocate(size, is_tlab);
        if (result != NULL) {
          return result;

--- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp
@@ -146,7 +146,7 @@ void RefProcTaskExecutor::execute(ProcessTask& task)
 {
  ParallelScavengeHeap* heap = PSParallelCompact::gc_heap();
  uint parallel_gc_threads = heap->gc_task_manager()->workers();
-  ChunkTaskQueueSet* qset = ParCompactionManager::chunk_array();
+  RegionTaskQueueSet* qset = ParCompactionManager::region_array();
  ParallelTaskTerminator terminator(parallel_gc_threads, qset);
  GCTaskQueue* q = GCTaskQueue::create();
  for(uint i=0; i<parallel_gc_threads; i++) {
@@ -205,38 +205,38 @@ void StealMarkingTask::do_it(GCTaskManager* manager, uint which) {
 }
 //
-// StealChunkCompactionTask
+// StealRegionCompactionTask
 //
-StealChunkCompactionTask::StealChunkCompactionTask(ParallelTaskTerminator* t) :
+StealRegionCompactionTask::StealRegionCompactionTask(ParallelTaskTerminator* t):
-  _terminator(t) {};
+  _terminator(t) {}
-void StealChunkCompactionTask::do_it(GCTaskManager* manager, uint which) {
+void StealRegionCompactionTask::do_it(GCTaskManager* manager, uint which) {
  assert(Universe::heap()->is_gc_active(), "called outside gc");
-  NOT_PRODUCT(TraceTime tm("StealChunkCompactionTask",
+  NOT_PRODUCT(TraceTime tm("StealRegionCompactionTask",
    PrintGCDetails && TraceParallelOldGCTasks, true, gclog_or_tty));
  ParCompactionManager* cm =
    ParCompactionManager::gc_thread_compaction_manager(which);
-  // Has to drain stacks first because there may be chunks on
+  // Has to drain stacks first because there may be regions on
  // preloaded onto the stack and this thread may never have
  // done a draining task.  Are the draining tasks needed?
-  cm->drain_chunk_stacks();
+  cm->drain_region_stacks();
-  size_t chunk_index = 0;
+  size_t region_index = 0;
  int random_seed = 17;
  // If we're the termination task, try 10 rounds of stealing before
  // setting the termination flag
  while(true) {
-    if (ParCompactionManager::steal(which, &random_seed, chunk_index)) {
+    if (ParCompactionManager::steal(which, &random_seed, region_index)) {
-      PSParallelCompact::fill_and_update_chunk(cm, chunk_index);
+      PSParallelCompact::fill_and_update_region(cm, region_index);
-      cm->drain_chunk_stacks();
+      cm->drain_region_stacks();
    } else {
      if (terminator()->offer_termination()) {
        break;
@@ -249,11 +249,10 @@ void StealChunkCompactionTask::do_it(GCTaskManager* manager, uint which) {
 UpdateDensePrefixTask::UpdateDensePrefixTask(
                                   PSParallelCompact::SpaceId space_id,
-                                   size_t chunk_index_start,
+                                   size_t region_index_start,
-                                   size_t chunk_index_end) :
+                                   size_t region_index_end) :
-  _space_id(space_id), _chunk_index_start(chunk_index_start),
+  _space_id(space_id), _region_index_start(region_index_start),
-  _chunk_index_end(chunk_index_end)
+  _region_index_end(region_index_end) {}
-{}
 void UpdateDensePrefixTask::do_it(GCTaskManager* manager, uint which) {
@@ -265,8 +264,8 @@ void UpdateDensePrefixTask::do_it(GCTaskManager* manager, uint which) {
  PSParallelCompact::update_and_deadwood_in_dense_prefix(cm,
                                                         _space_id,
-                                                         _chunk_index_start,
+                                                         _region_index_start,
-                                                         _chunk_index_end);
+                                                         _region_index_end);
 }
 void DrainStacksCompactionTask::do_it(GCTaskManager* manager, uint which) {
@@ -278,6 +277,6 @@ void DrainStacksCompactionTask::do_it(GCTaskManager* manager, uint which) {
  ParCompactionManager* cm =
    ParCompactionManager::gc_thread_compaction_manager(which);
-  // Process any chunks already in the compaction managers stacks.
+  // Process any regions already in the compaction managers stacks.
-  cm->drain_chunk_stacks();
+  cm->drain_region_stacks();
 }
--- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp
@@ -188,18 +188,18 @@ class StealMarkingTask : public GCTask {
 };
 //
-// StealChunkCompactionTask
+// StealRegionCompactionTask
 //
 // This task is used to distribute work to idle threads.
 //
-class StealChunkCompactionTask : public GCTask {
+class StealRegionCompactionTask : public GCTask {
 private:
   ParallelTaskTerminator* const _terminator;
 public:
-  StealChunkCompactionTask(ParallelTaskTerminator* t);
+  StealRegionCompactionTask(ParallelTaskTerminator* t);
-  char* name() { return (char *)"steal-chunk-task"; }
+  char* name() { return (char *)"steal-region-task"; }
  ParallelTaskTerminator* terminator() { return _terminator; }
  virtual void do_it(GCTaskManager* manager, uint which);
@@ -215,15 +215,15 @@ class StealChunkCompactionTask : public GCTask {
 class UpdateDensePrefixTask : public GCTask {
 private:
  PSParallelCompact::SpaceId _space_id;
-  size_t _chunk_index_start;
+  size_t _region_index_start;
-  size_t _chunk_index_end;
+  size_t _region_index_end;
 public:
  char* name() { return (char *)"update-dense_prefix-task"; }
  UpdateDensePrefixTask(PSParallelCompact::SpaceId space_id,
-                        size_t chunk_index_start,
+                        size_t region_index_start,
-                        size_t chunk_index_end);
+                        size_t region_index_end);
  virtual void do_it(GCTaskManager* manager, uint which);
 };
@@ -231,17 +231,17 @@ class UpdateDensePrefixTask : public GCTask {
 //
 // DrainStacksCompactionTask
 //
-// This task processes chunks that have been added to the stacks of each
+// This task processes regions that have been added to the stacks of each
 // compaction manager.
 //
 // Trying to use one draining thread does not work because there are no
 // guarantees about which task will be picked up by which thread.  For example,
-// if thread A gets all the preloaded chunks, thread A may not get a draining
+// if thread A gets all the preloaded regions, thread A may not get a draining
 // task (they may all be done by other threads).
 //
 class DrainStacksCompactionTask : public GCTask {
 public:
-  char* name() { return (char *)"drain-chunk-task"; }
+  char* name() { return (char *)"drain-region-task"; }
  virtual void do_it(GCTaskManager* manager, uint which);
 };
--- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp
@@ -30,7 +30,7 @@ ParCompactionManager**  ParCompactionManager::_manager_array = NULL;
 OopTaskQueueSet*     ParCompactionManager::_stack_array = NULL;
 ObjectStartArray*    ParCompactionManager::_start_array = NULL;
 ParMarkBitMap*       ParCompactionManager::_mark_bitmap = NULL;
-ChunkTaskQueueSet*   ParCompactionManager::_chunk_array = NULL;
+RegionTaskQueueSet*   ParCompactionManager::_region_array = NULL;
 ParCompactionManager::ParCompactionManager() :
    _action(CopyAndUpdate) {
@@ -46,13 +46,13 @@ ParCompactionManager::ParCompactionManager() :
  // We want the overflow stack to be permanent
  _overflow_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(10, true);
-#ifdef USE_ChunkTaskQueueWithOverflow
+#ifdef USE_RegionTaskQueueWithOverflow
-  chunk_stack()->initialize();
+  region_stack()->initialize();
 #else
-  chunk_stack()->initialize();
+  region_stack()->initialize();
  // We want the overflow stack to be permanent
-  _chunk_overflow_stack =
+  _region_overflow_stack =
    new (ResourceObj::C_HEAP) GrowableArray<size_t>(10, true);
 #endif
@@ -86,18 +86,18 @@ void ParCompactionManager::initialize(ParMarkBitMap* mbm) {
  _stack_array = new OopTaskQueueSet(parallel_gc_threads);
  guarantee(_stack_array != NULL, "Count not initialize promotion manager");
-  _chunk_array = new ChunkTaskQueueSet(parallel_gc_threads);
+  _region_array = new RegionTaskQueueSet(parallel_gc_threads);
-  guarantee(_chunk_array != NULL, "Count not initialize promotion manager");
+  guarantee(_region_array != NULL, "Count not initialize promotion manager");
  // Create and register the ParCompactionManager(s) for the worker threads.
  for(uint i=0; i<parallel_gc_threads; i++) {
    _manager_array[i] = new ParCompactionManager();
    guarantee(_manager_array[i] != NULL, "Could not create ParCompactionManager");
    stack_array()->register_queue(i, _manager_array[i]->marking_stack());
-#ifdef USE_ChunkTaskQueueWithOverflow
+#ifdef USE_RegionTaskQueueWithOverflow
-    chunk_array()->register_queue(i, _manager_array[i]->chunk_stack()->task_queue());
+    region_array()->register_queue(i, _manager_array[i]->region_stack()->task_queue());
 #else
-    chunk_array()->register_queue(i, _manager_array[i]->chunk_stack());
+    region_array()->register_queue(i, _manager_array[i]->region_stack());
 #endif
  }
@@ -153,31 +153,31 @@ oop ParCompactionManager::retrieve_for_scanning() {
  return NULL;
 }
-// Save chunk on a stack
+// Save region on a stack
-void ParCompactionManager::save_for_processing(size_t chunk_index) {
+void ParCompactionManager::save_for_processing(size_t region_index) {
 #ifdef ASSERT
  const ParallelCompactData& sd = PSParallelCompact::summary_data();
-  ParallelCompactData::ChunkData* const chunk_ptr = sd.chunk(chunk_index);
+  ParallelCompactData::RegionData* const region_ptr = sd.region(region_index);
-  assert(chunk_ptr->claimed(), "must be claimed");
+  assert(region_ptr->claimed(), "must be claimed");
-  assert(chunk_ptr->_pushed++ == 0, "should only be pushed once");
+  assert(region_ptr->_pushed++ == 0, "should only be pushed once");
 #endif
-  chunk_stack_push(chunk_index);
+  region_stack_push(region_index);
 }
-void ParCompactionManager::chunk_stack_push(size_t chunk_index) {
+void ParCompactionManager::region_stack_push(size_t region_index) {
-#ifdef USE_ChunkTaskQueueWithOverflow
+#ifdef USE_RegionTaskQueueWithOverflow
-  chunk_stack()->save(chunk_index);
+  region_stack()->save(region_index);
 #else
-  if(!chunk_stack()->push(chunk_index)) {
+  if(!region_stack()->push(region_index)) {
-    chunk_overflow_stack()->push(chunk_index);
+    region_overflow_stack()->push(region_index);
  }
 #endif
 }
-bool ParCompactionManager::retrieve_for_processing(size_t& chunk_index) {
+bool ParCompactionManager::retrieve_for_processing(size_t& region_index) {
-#ifdef USE_ChunkTaskQueueWithOverflow
+#ifdef USE_RegionTaskQueueWithOverflow
-  return chunk_stack()->retrieve(chunk_index);
+  return region_stack()->retrieve(region_index);
 #else
  // Should not be used in the parallel case
  ShouldNotReachHere();
@@ -230,14 +230,14 @@ void ParCompactionManager::drain_marking_stacks(OopClosure* blk) {
  assert(overflow_stack()->length() == 0, "Sanity");
 }
-void ParCompactionManager::drain_chunk_overflow_stack() {
+void ParCompactionManager::drain_region_overflow_stack() {
-  size_t chunk_index = (size_t) -1;
+  size_t region_index = (size_t) -1;
-  while(chunk_stack()->retrieve_from_overflow(chunk_index)) {
+  while(region_stack()->retrieve_from_overflow(region_index)) {
-    PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+    PSParallelCompact::fill_and_update_region(this, region_index);
  }
 }
-void ParCompactionManager::drain_chunk_stacks() {
+void ParCompactionManager::drain_region_stacks() {
 #ifdef ASSERT
  ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
  assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
@@ -249,42 +249,42 @@ void ParCompactionManager::drain_chunk_stacks() {
 #if 1 // def DO_PARALLEL - the serial code hasn't been updated
  do {
-#ifdef USE_ChunkTaskQueueWithOverflow
+#ifdef USE_RegionTaskQueueWithOverflow
    // Drain overflow stack first, so other threads can steal from
    // claimed stack while we work.
-    size_t chunk_index = (size_t) -1;
+    size_t region_index = (size_t) -1;
-    while(chunk_stack()->retrieve_from_overflow(chunk_index)) {
+    while(region_stack()->retrieve_from_overflow(region_index)) {
-      PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+      PSParallelCompact::fill_and_update_region(this, region_index);
    }
-    while (chunk_stack()->retrieve_from_stealable_queue(chunk_index)) {
+    while (region_stack()->retrieve_from_stealable_queue(region_index)) {
-      PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+      PSParallelCompact::fill_and_update_region(this, region_index);
    }
-  } while (!chunk_stack()->is_empty());
+  } while (!region_stack()->is_empty());
 #else
    // Drain overflow stack first, so other threads can steal from
    // claimed stack while we work.
-    while(!chunk_overflow_stack()->is_empty()) {
+    while(!region_overflow_stack()->is_empty()) {
-      size_t chunk_index = chunk_overflow_stack()->pop();
+      size_t region_index = region_overflow_stack()->pop();
-      PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+      PSParallelCompact::fill_and_update_region(this, region_index);
    }
-    size_t chunk_index = -1;
+    size_t region_index = -1;
    // obj is a reference!!!
-    while (chunk_stack()->pop_local(chunk_index)) {
+    while (region_stack()->pop_local(region_index)) {
      // It would be nice to assert about the type of objects we might
      // pop, but they can come from anywhere, unfortunately.
-      PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+      PSParallelCompact::fill_and_update_region(this, region_index);
    }
-  } while((chunk_stack()->size() != 0) ||
+  } while((region_stack()->size() != 0) ||
-          (chunk_overflow_stack()->length() != 0));
+          (region_overflow_stack()->length() != 0));
 #endif
-#ifdef USE_ChunkTaskQueueWithOverflow
+#ifdef USE_RegionTaskQueueWithOverflow
-  assert(chunk_stack()->is_empty(), "Sanity");
+  assert(region_stack()->is_empty(), "Sanity");
 #else
-  assert(chunk_stack()->size() == 0, "Sanity");
+  assert(region_stack()->size() == 0, "Sanity");
-  assert(chunk_overflow_stack()->length() == 0, "Sanity");
+  assert(region_overflow_stack()->length() == 0, "Sanity");
 #endif
 #else
  oop obj;

--- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp
@@ -52,7 +52,7 @@ class ParCompactionManager : public CHeapObj {
  friend class ParallelTaskTerminator;
  friend class ParMarkBitMap;
  friend class PSParallelCompact;
-  friend class StealChunkCompactionTask;
+  friend class StealRegionCompactionTask;
  friend class UpdateAndFillClosure;
  friend class RefProcTaskExecutor;
@@ -75,20 +75,20 @@ class ParCompactionManager : public CHeapObj {
  static ParCompactionManager** _manager_array;
  static OopTaskQueueSet*       _stack_array;
  static ObjectStartArray*      _start_array;
-  static ChunkTaskQueueSet*    _chunk_array;
+  static RegionTaskQueueSet*    _region_array;
  static PSOldGen*              _old_gen;
  OopTaskQueue                  _marking_stack;
  GrowableArray<oop>*           _overflow_stack;
  // Is there a way to reuse the _marking_stack for the
-  // saving empty chunks?  For now just create a different
+  // saving empty regions?  For now just create a different
  // type of TaskQueue.
-#ifdef USE_ChunkTaskQueueWithOverflow
+#ifdef USE_RegionTaskQueueWithOverflow
-  ChunkTaskQueueWithOverflow   _chunk_stack;
+  RegionTaskQueueWithOverflow   _region_stack;
 #else
-  ChunkTaskQueue               _chunk_stack;
+  RegionTaskQueue               _region_stack;
-  GrowableArray<size_t>*       _chunk_overflow_stack;
+  GrowableArray<size_t>*        _region_overflow_stack;
 #endif
 #if 1  // does this happen enough to need a per thread stack?
@@ -106,15 +106,16 @@ class ParCompactionManager : public CHeapObj {
 protected:
  // Array of tasks.  Needed by the ParallelTaskTerminator.
-  static ChunkTaskQueueSet* chunk_array()   { return _chunk_array; }
+  static RegionTaskQueueSet* region_array()      { return _region_array; }
  OopTaskQueue*  marking_stack()                 { return &_marking_stack; }
  GrowableArray<oop>* overflow_stack()           { return _overflow_stack; }
-#ifdef USE_ChunkTaskQueueWithOverflow
+#ifdef USE_RegionTaskQueueWithOverflow
-  ChunkTaskQueueWithOverflow* chunk_stack() { return &_chunk_stack; }
+  RegionTaskQueueWithOverflow* region_stack()    { return &_region_stack; }
 #else
-  ChunkTaskQueue*  chunk_stack()          { return &_chunk_stack; }
+  RegionTaskQueue*  region_stack()               { return &_region_stack; }
-  GrowableArray<size_t>* chunk_overflow_stack() { return _chunk_overflow_stack; }
+  GrowableArray<size_t>* region_overflow_stack() {
+    return _region_overflow_stack;
+  }
 #endif
  // Pushes onto the marking stack.  If the marking stack is full,
@@ -123,9 +124,9 @@ class ParCompactionManager : public CHeapObj {
  // Do not implement an equivalent stack_pop.  Deal with the
  // marking stack and overflow stack directly.
-  // Pushes onto the chunk stack.  If the chunk stack is full,
+  // Pushes onto the region stack.  If the region stack is full,
-  // pushes onto the chunk overflow stack.
+  // pushes onto the region overflow stack.
-  void chunk_stack_push(size_t chunk_index);
+  void region_stack_push(size_t region_index);
 public:
  Action action() { return _action; }
@@ -160,10 +161,10 @@ class ParCompactionManager : public CHeapObj {
  // Get a oop for scanning.  If returns null, no oop were found.
  oop retrieve_for_scanning();
-  // Save chunk for later processing.  Must not fail.
+  // Save region for later processing.  Must not fail.
-  void save_for_processing(size_t chunk_index);
+  void save_for_processing(size_t region_index);
-  // Get a chunk for processing.  If returns null, no chunk were found.
+  // Get a region for processing.  If returns null, no region were found.
-  bool retrieve_for_processing(size_t& chunk_index);
+  bool retrieve_for_processing(size_t& region_index);
  // Access function for compaction managers
  static ParCompactionManager* gc_thread_compaction_manager(int index);
@@ -172,18 +173,18 @@ class ParCompactionManager : public CHeapObj {
    return stack_array()->steal(queue_num, seed, t);
  }
-  static bool steal(int queue_num, int* seed, ChunkTask& t) {
+  static bool steal(int queue_num, int* seed, RegionTask& t) {
-    return chunk_array()->steal(queue_num, seed, t);
+    return region_array()->steal(queue_num, seed, t);
  }
  // Process tasks remaining on any stack
  void drain_marking_stacks(OopClosure *blk);
  // Process tasks remaining on any stack
-  void drain_chunk_stacks();
+  void drain_region_stacks();
  // Process tasks remaining on any stack
-  void drain_chunk_overflow_stack();
+  void drain_region_overflow_stack();
  // Debugging support
 #ifdef ASSERT

--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp
@@ -35,9 +35,7 @@ void PSMarkSweep::initialize() {
  _ref_processor = new ReferenceProcessor(mr,
                                          true,    // atomic_discovery
                                          false);  // mt_discovery
-  if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
  _counters = new CollectorCounters("PSMarkSweep", 1);
-  }
 }
 // This method contains all heap specific policy for invoking mark sweep.
@@ -518,9 +516,6 @@ void PSMarkSweep::mark_sweep_phase1(bool clear_all_softrefs) {
  follow_stack();
  // Process reference objects found during marking
-  // Skipping the reference processing for VerifyParallelOldWithMarkSweep
-  // affects the marking (makes it different).
  {
    ReferencePolicy *soft_ref_policy;
    if (clear_all_softrefs) {

--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp
@@ -152,19 +152,14 @@ void PSMarkSweepDecorator::precompact() {
        oop(q)->forward_to(oop(compact_top));
        assert(oop(q)->is_gc_marked(), "encoding the pointer should preserve the mark");
      } else {
-        // Don't clear the mark since it's confuses parallel old
-        // verification.
-        if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
        // if the object isn't moving we can just set the mark to the default
        // mark and handle it specially later on.
        oop(q)->init_mark();
-        }
        assert(oop(q)->forwardee() == NULL, "should be forwarded to NULL");
      }
      // Update object start array
-      if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
+      if (start_array) {
-        if (start_array)
        start_array->allocate_block(compact_top);
      }
@@ -219,18 +214,13 @@ void PSMarkSweepDecorator::precompact() {
            assert(oop(q)->is_gc_marked(), "encoding the pointer should preserve the mark");
          } else {
            // if the object isn't moving we can just set the mark to the default
-            // Don't clear the mark since it's confuses parallel old
-            // verification.
-            if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
            // mark and handle it specially later on.
            oop(q)->init_mark();
-            }
            assert(oop(q)->forwardee() == NULL, "should be forwarded to NULL");
          }
-          if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
          // Update object start array
-            if (start_array)
+          if (start_array) {
            start_array->allocate_block(compact_top);
          }

--- a/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp
@@ -152,9 +152,7 @@ void PSOldGen::precompact() {
  assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
  // Reset start array first.
-  debug_only(if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {)
  start_array()->reset();
-  debug_only(})
  object_mark_sweep()->precompact();

--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp
--- a/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp
@@ -123,8 +123,6 @@ void PSPermGen::move_and_update(ParCompactionManager* cm) {
 void PSPermGen::precompact() {
  // Reset start array first.
-  debug_only(if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {)
  _start_array.reset();
-  debug_only(})
  object_mark_sweep()->precompact();
 }
--- a/src/share/vm/gc_implementation/shared/immutableSpace.hpp
+++ b/src/share/vm/gc_implementation/shared/immutableSpace.hpp
@@ -51,6 +51,7 @@ class ImmutableSpace: public CHeapObj {
  // Size computations.  Sizes are in heapwords.
  size_t capacity_in_words() const                { return pointer_delta(end(), bottom()); }
+  virtual size_t capacity_in_words(Thread*) const { return capacity_in_words(); }
  // Iteration.
  virtual void oop_iterate(OopClosure* cl);

--- a/src/share/vm/gc_implementation/shared/markSweep.inline.hpp
+++ b/src/share/vm/gc_implementation/shared/markSweep.inline.hpp
@@ -23,13 +23,6 @@
 */
 inline void MarkSweep::mark_object(oop obj) {
-#ifndef SERIALGC
-  if (UseParallelOldGC && VerifyParallelOldWithMarkSweep) {
-    assert(PSParallelCompact::mark_bitmap()->is_marked(obj),
-           "Should be marked in the marking bitmap");
-  }
-#endif // SERIALGC
  // some marks may contain information we need to preserve so we store them away
  // and overwrite the mark.  We'll restore it at the end of markSweep.
  markOop mark = obj->mark();

--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp
@@ -181,6 +181,25 @@ size_t MutableNUMASpace::unsafe_max_tlab_alloc(Thread *thr) const {
  return lgrp_spaces()->at(i)->space()->free_in_bytes();
 }
+size_t MutableNUMASpace::capacity_in_words(Thread* thr) const {
+  guarantee(thr != NULL, "No thread");
+  int lgrp_id = thr->lgrp_id();
+  if (lgrp_id == -1) {
+    if (lgrp_spaces()->length() > 0) {
+      return capacity_in_words() / lgrp_spaces()->length();
+    } else {
+      assert(false, "There should be at least one locality group");
+      return 0;
+    }
+  }
+  int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
+  if (i == -1) {
+    return 0;
+  }
+  return lgrp_spaces()->at(i)->space()->capacity_in_words();
+}
 // Check if the NUMA topology has changed. Add and remove spaces if needed.
 // The update can be forced by setting the force parameter equal to true.
 bool MutableNUMASpace::update_layout(bool force) {
@@ -722,7 +741,8 @@ HeapWord* MutableNUMASpace::allocate(size_t size) {
    i = os::random() % lgrp_spaces()->length();
  }
-  MutableSpace *s = lgrp_spaces()->at(i)->space();
+  LGRPSpace* ls = lgrp_spaces()->at(i);
+  MutableSpace *s = ls->space();
  HeapWord *p = s->allocate(size);
  if (p != NULL) {
@@ -743,6 +763,9 @@ HeapWord* MutableNUMASpace::allocate(size_t size) {
      *(int*)i = 0;
    }
  }
+  if (p == NULL) {
+    ls->set_allocation_failed();
+  }
  return p;
 }
@@ -761,7 +784,8 @@ HeapWord* MutableNUMASpace::cas_allocate(size_t size) {
  if (i == -1) {
    i = os::random() % lgrp_spaces()->length();
  }
-  MutableSpace *s = lgrp_spaces()->at(i)->space();
+  LGRPSpace *ls = lgrp_spaces()->at(i);
+  MutableSpace *s = ls->space();
  HeapWord *p = s->cas_allocate(size);
  if (p != NULL) {
    size_t remainder = pointer_delta(s->end(), p + size);
@@ -790,6 +814,9 @@ HeapWord* MutableNUMASpace::cas_allocate(size_t size) {
      *(int*)i = 0;
    }
  }
+  if (p == NULL) {
+    ls->set_allocation_failed();
+  }
  return p;
 }

--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp
@@ -60,6 +60,7 @@ class MutableNUMASpace : public MutableSpace {
    MutableSpace* _space;
    MemRegion _invalid_region;
    AdaptiveWeightedAverage *_alloc_rate;
+    bool _allocation_failed;
    struct SpaceStats {
      size_t _local_space, _remote_space, _unbiased_space, _uncommited_space;
@@ -81,7 +82,7 @@ class MutableNUMASpace : public MutableSpace {
    char* last_page_scanned()            { return _last_page_scanned; }
    void set_last_page_scanned(char* p)  { _last_page_scanned = p;    }
   public:
-    LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL) {
+    LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) {
      _space = new MutableSpace();
      _alloc_rate = new AdaptiveWeightedAverage(NUMAChunkResizeWeight);
    }
@@ -103,8 +104,21 @@ class MutableNUMASpace : public MutableSpace {
      return *(int*)lgrp_id_value == p->lgrp_id();
    }
+    // Report a failed allocation.
+    void set_allocation_failed() { _allocation_failed = true;  }
    void sample() {
-      alloc_rate()->sample(space()->used_in_bytes());
+      // If there was a failed allocation make allocation rate equal
+      // to the size of the whole chunk. This ensures the progress of
+      // the adaptation process.
+      size_t alloc_rate_sample;
+      if (_allocation_failed) {
+        alloc_rate_sample = space()->capacity_in_bytes();
+        _allocation_failed = false;
+      } else {
+        alloc_rate_sample = space()->used_in_bytes();
+      }
+      alloc_rate()->sample(alloc_rate_sample);
    }
    MemRegion invalid_region() const                { return _invalid_region;      }
@@ -190,6 +204,9 @@ class MutableNUMASpace : public MutableSpace {
  virtual void ensure_parsability();
  virtual size_t used_in_words() const;
  virtual size_t free_in_words() const;
+  using MutableSpace::capacity_in_words;
+  virtual size_t capacity_in_words(Thread* thr) const;
  virtual size_t tlab_capacity(Thread* thr) const;
  virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;

--- a/src/share/vm/includeDB_compiler2
+++ b/src/share/vm/includeDB_compiler2
@@ -586,6 +586,7 @@ locknode.hpp                            subnode.hpp
 loopTransform.cpp                       addnode.hpp
 loopTransform.cpp                       allocation.inline.hpp
 loopTransform.cpp                       connode.hpp
+loopTransform.cpp                       compileLog.hpp
 loopTransform.cpp                       divnode.hpp
 loopTransform.cpp                       loopnode.hpp
 loopTransform.cpp                       mulnode.hpp
@@ -601,6 +602,7 @@ loopnode.cpp                            addnode.hpp
 loopnode.cpp                            allocation.inline.hpp
 loopnode.cpp                            callnode.hpp
 loopnode.cpp                            ciMethodData.hpp
+loopnode.cpp                            compileLog.hpp
 loopnode.cpp                            connode.hpp
 loopnode.cpp                            divnode.hpp
 loopnode.cpp                            loopnode.hpp

--- a/src/share/vm/opto/bytecodeInfo.cpp
+++ b/src/share/vm/opto/bytecodeInfo.cpp
@@ -25,19 +25,6 @@
 #include "incls/_precompiled.incl"
 #include "incls/_bytecodeInfo.cpp.incl"
-// These variables are declared in parse1.cpp
-extern int  explicit_null_checks_inserted;
-extern int  explicit_null_checks_elided;
-extern int  explicit_null_checks_inserted_old;
-extern int  explicit_null_checks_elided_old;
-extern int  nodes_created_old;
-extern int  nodes_created;
-extern int  methods_parsed_old;
-extern int  methods_parsed;
-extern int  methods_seen;
-extern int  methods_seen_old;
 //=============================================================================
 //------------------------------InlineTree-------------------------------------
 InlineTree::InlineTree( Compile* c, const InlineTree *caller_tree, ciMethod* callee, JVMState* caller_jvms, int caller_bci, float site_invoke_ratio )
@@ -517,27 +504,3 @@ InlineTree* InlineTree::find_subtree_from_root(InlineTree* root, JVMState* jvms,
  }
  return iltp;
 }
-// ----------------------------------------------------------------------------
-#ifndef PRODUCT
-static void per_method_stats() {
-  // Compute difference between this method's cumulative totals and old totals
-  int explicit_null_checks_cur = explicit_null_checks_inserted - explicit_null_checks_inserted_old;
-  int elided_null_checks_cur = explicit_null_checks_elided - explicit_null_checks_elided_old;
-  // Print differences
-  if( explicit_null_checks_cur )
-    tty->print_cr("XXX Explicit NULL checks inserted: %d", explicit_null_checks_cur);
-  if( elided_null_checks_cur )
-    tty->print_cr("XXX Explicit NULL checks removed at parse time: %d", elided_null_checks_cur);
-  // Store the current cumulative totals
-  nodes_created_old = nodes_created;
-  methods_parsed_old = methods_parsed;
-  methods_seen_old = methods_seen;
-  explicit_null_checks_inserted_old = explicit_null_checks_inserted;
-  explicit_null_checks_elided_old = explicit_null_checks_elided;
-}
-#endif
--- a/src/share/vm/opto/callnode.cpp
+++ b/src/share/vm/opto/callnode.cpp
@@ -1034,6 +1034,39 @@ AllocateNode::AllocateNode(Compile* C, const TypeFunc *atype,
 //=============================================================================
 uint AllocateArrayNode::size_of() const { return sizeof(*this); }
+// Retrieve the length from the AllocateArrayNode. Narrow the type with a
+// CastII, if appropriate.  If we are not allowed to create new nodes, and
+// a CastII is appropriate, return NULL.
+Node *AllocateArrayNode::make_ideal_length(const TypeOopPtr* oop_type, PhaseTransform *phase, bool allow_new_nodes) {
+  Node *length = in(AllocateNode::ALength);
+  assert(length != NULL, "length is not null");
+  const TypeInt* length_type = phase->find_int_type(length);
+  const TypeAryPtr* ary_type = oop_type->isa_aryptr();
+  if (ary_type != NULL && length_type != NULL) {
+    const TypeInt* narrow_length_type = ary_type->narrow_size_type(length_type);
+    if (narrow_length_type != length_type) {
+      // Assert one of:
+      //   - the narrow_length is 0
+      //   - the narrow_length is not wider than length
+      assert(narrow_length_type == TypeInt::ZERO ||
+             (narrow_length_type->_hi <= length_type->_hi &&
+              narrow_length_type->_lo >= length_type->_lo),
+             "narrow type must be narrower than length type");
+      // Return NULL if new nodes are not allowed
+      if (!allow_new_nodes) return NULL;
+      // Create a cast which is control dependent on the initialization to
+      // propagate the fact that the array length must be positive.
+      length = new (phase->C, 2) CastIINode(length, narrow_length_type);
+      length->set_req(0, initialization()->proj_out(0));
+    }
+  }
+  return length;
+}
 //=============================================================================
 uint LockNode::size_of() const { return sizeof(*this); }

--- a/src/share/vm/opto/callnode.hpp
+++ b/src/share/vm/opto/callnode.hpp
@@ -755,6 +755,15 @@ public:
  virtual int Opcode() const;
  virtual uint size_of() const; // Size is bigger
+  // Dig the length operand out of a array allocation site.
+  Node* Ideal_length() {
+    return in(AllocateNode::ALength);
+  }
+  // Dig the length operand out of a array allocation site and narrow the
+  // type with a CastII, if necesssary
+  Node* make_ideal_length(const TypeOopPtr* ary_type, PhaseTransform *phase, bool can_create = true);
  // Pattern-match a possible usage of AllocateArrayNode.
  // Return null if no allocation is recognized.
  static AllocateArrayNode* Ideal_array_allocation(Node* ptr, PhaseTransform* phase) {
@@ -762,12 +771,6 @@ public:
    return (allo == NULL || !allo->is_AllocateArray())
           ? NULL : allo->as_AllocateArray();
  }
-  // Dig the length operand out of a (possible) array allocation site.
-  static Node* Ideal_length(Node* ptr, PhaseTransform* phase) {
-    AllocateArrayNode* allo = Ideal_array_allocation(ptr, phase);
-    return (allo == NULL) ? NULL : allo->in(AllocateNode::ALength);
-  }
 };
 //------------------------------AbstractLockNode-----------------------------------

--- a/src/share/vm/opto/cfgnode.cpp
+++ b/src/share/vm/opto/cfgnode.cpp
@@ -1666,6 +1666,10 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
            // If not, we can update the input infinitely along a MergeMem cycle
            // Equivalent code is in MemNode::Ideal_common
            Node *m  = phase->transform(n);
+            if (outcnt() == 0) {  // Above transform() may kill us!
+              progress = phase->C->top();
+              break;
+            }
            // If tranformed to a MergeMem, get the desired slice
            // Otherwise the returned node represents memory for every slice
            Node *new_mem = (m->is_MergeMem()) ?
@@ -1765,6 +1769,51 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
    }
  }
+#ifdef _LP64
+  // Push DecodeN down through phi.
+  // The rest of phi graph will transform by split EncodeP node though phis up.
+  if (UseCompressedOops && can_reshape && progress == NULL) {
+    bool may_push = true;
+    bool has_decodeN = false;
+    Node* in_decodeN = NULL;
+    for (uint i=1; i<req(); ++i) {// For all paths in
+      Node *ii = in(i);
+      if (ii->is_DecodeN() && ii->bottom_type() == bottom_type()) {
+        has_decodeN = true;
+        in_decodeN = ii->in(1);
+      } else if (!ii->is_Phi()) {
+        may_push = false;
+      }
+    }
+    if (has_decodeN && may_push) {
+      PhaseIterGVN *igvn = phase->is_IterGVN();
+      // Note: in_decodeN is used only to define the type of new phi here.
+      PhiNode *new_phi = PhiNode::make_blank(in(0), in_decodeN);
+      uint orig_cnt = req();
+      for (uint i=1; i<req(); ++i) {// For all paths in
+        Node *ii = in(i);
+        Node* new_ii = NULL;
+        if (ii->is_DecodeN()) {
+          assert(ii->bottom_type() == bottom_type(), "sanity");
+          new_ii = ii->in(1);
+        } else {
+          assert(ii->is_Phi(), "sanity");
+          if (ii->as_Phi() == this) {
+            new_ii = new_phi;
+          } else {
+            new_ii = new (phase->C, 2) EncodePNode(ii, in_decodeN->bottom_type());
+            igvn->register_new_node_with_optimizer(new_ii);
+          }
+        }
+        new_phi->set_req(i, new_ii);
+      }
+      igvn->register_new_node_with_optimizer(new_phi, this);
+      progress = new (phase->C, 2) DecodeNNode(new_phi, bottom_type());
+    }
+  }
+#endif
  return progress;              // Return any progress
 }

--- a/src/share/vm/opto/compile.cpp
+++ b/src/share/vm/opto/compile.cpp
--- a/src/share/vm/opto/compile.hpp
+++ b/src/share/vm/opto/compile.hpp
@@ -160,6 +160,7 @@ class Compile : public Phase {
  bool                  _print_assembly;        // True if we should dump assembly code for this compilation
 #ifndef PRODUCT
  bool                  _trace_opto_output;
+  bool                  _parsed_irreducible_loop; // True if ciTypeFlow detected irreducible loops during parsing
 #endif
  // Compilation environment.
@@ -319,6 +320,8 @@ class Compile : public Phase {
  }
 #ifndef PRODUCT
  bool          trace_opto_output() const       { return _trace_opto_output; }
+  bool              parsed_irreducible_loop() const { return _parsed_irreducible_loop; }
+  void          set_parsed_irreducible_loop(bool z) { _parsed_irreducible_loop = z; }
 #endif
  void begin_method() {

--- a/src/share/vm/opto/connode.cpp
+++ b/src/share/vm/opto/connode.cpp
@@ -433,8 +433,8 @@ Node *ConstraintCastNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
 // If not converting int->oop, throw away cast after constant propagation
 Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
  const Type *t = ccp->type(in(1));
-  if (!t->isa_oop_ptr()) {
+  if (!t->isa_oop_ptr() || in(1)->is_DecodeN()) {
-    return NULL;                // do not transform raw pointers
+    return NULL; // do not transform raw pointers or narrow oops
  }
  return ConstraintCastNode::Ideal_DU_postCCP(ccp);
 }

--- a/src/share/vm/opto/doCall.cpp
+++ b/src/share/vm/opto/doCall.cpp
@@ -795,7 +795,7 @@ ciMethod* Parse::optimize_inlining(ciMethod* caller, int bci, ciInstanceKlass* k
    ciInstanceKlass *ikl = receiver_type->klass()->as_instance_klass();
    if (ikl->is_loaded() && ikl->is_initialized() && !ikl->is_interface() &&
-        (ikl == actual_receiver || ikl->is_subclass_of(actual_receiver))) {
+        (ikl == actual_receiver || ikl->is_subtype_of(actual_receiver))) {
      // ikl is a same or better type than the original actual_receiver,
      // e.g. static receiver from bytecodes.
      actual_receiver = ikl;

--- a/src/share/vm/opto/graphKit.cpp
+++ b/src/share/vm/opto/graphKit.cpp
--- a/src/share/vm/opto/ifg.cpp
+++ b/src/share/vm/opto/ifg.cpp
--- a/src/share/vm/opto/loopTransform.cpp
+++ b/src/share/vm/opto/loopTransform.cpp
@@ -1012,6 +1012,8 @@ void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool ad
    if (!has_ctrl(old))
      set_loop(nnn, loop);
  }
+  loop->record_for_igvn();
 }
 //------------------------------do_maximally_unroll----------------------------

--- a/src/share/vm/opto/loopnode.cpp
+++ b/src/share/vm/opto/loopnode.cpp
--- a/src/share/vm/opto/loopnode.hpp
+++ b/src/share/vm/opto/loopnode.hpp
--- a/src/share/vm/opto/loopopts.cpp
+++ b/src/share/vm/opto/loopopts.cpp
--- a/src/share/vm/opto/matcher.cpp
+++ b/src/share/vm/opto/matcher.cpp
--- a/src/share/vm/opto/matcher.hpp
+++ b/src/share/vm/opto/matcher.hpp
--- a/src/share/vm/opto/memnode.cpp
+++ b/src/share/vm/opto/memnode.cpp
--- a/src/share/vm/opto/memnode.hpp
+++ b/src/share/vm/opto/memnode.hpp
--- a/src/share/vm/opto/parse.hpp
+++ b/src/share/vm/opto/parse.hpp
--- a/src/share/vm/opto/parse1.cpp
+++ b/src/share/vm/opto/parse1.cpp
--- a/src/share/vm/opto/parse2.cpp
+++ b/src/share/vm/opto/parse2.cpp
--- a/src/share/vm/opto/type.cpp
+++ b/src/share/vm/opto/type.cpp
--- a/src/share/vm/opto/type.hpp
+++ b/src/share/vm/opto/type.hpp
--- a/src/share/vm/prims/jvmtiEnvBase.cpp
+++ b/src/share/vm/prims/jvmtiEnvBase.cpp
--- a/src/share/vm/prims/jvmtiTrace.cpp
+++ b/src/share/vm/prims/jvmtiTrace.cpp
--- a/src/share/vm/runtime/globals.cpp
+++ b/src/share/vm/runtime/globals.cpp
--- a/src/share/vm/runtime/globals.hpp
+++ b/src/share/vm/runtime/globals.hpp
--- a/src/share/vm/utilities/taskqueue.cpp
+++ b/src/share/vm/utilities/taskqueue.cpp
--- a/src/share/vm/utilities/taskqueue.hpp
+++ b/src/share/vm/utilities/taskqueue.hpp
--- a/test/compiler/6711100/Test.java
+++ b/test/compiler/6711100/Test.java