Merge

4619dfcb · iveresov · df8a5db9 · dcd17c07 · 4619dfcb · 4619dfcb
7 changed file
--- a/src/share/vm/opto/chaitin.cpp
+++ b/src/share/vm/opto/chaitin.cpp
@@ -575,6 +575,9 @@ void PhaseChaitin::Register_Allocate() {
  // Peephole remove copies
  post_allocate_copy_removal();

+  // Merge multidefs if multiple defs representing the same value are used in a single block.
+  merge_multidefs();
+
 #ifdef ASSERT
  // Veify the graph after RA.
  verify(&live_arena);

--- a/src/share/vm/opto/chaitin.hpp
+++ b/src/share/vm/opto/chaitin.hpp
@@ -578,6 +578,32 @@ private:
  // Extend the node to LRG mapping
  void add_reference( const Node *node, const Node *old_node);

+  // Record the first use of a def in the block for a register.
+  class RegDefUse {
+    Node* _def;
+    Node* _first_use;
+  public:
+    RegDefUse() : _def(NULL), _first_use(NULL) { }
+    Node* def() const       { return _def;       }
+    Node* first_use() const { return _first_use; }
+
+    void update(Node* def, Node* use) {
+      if (_def != def) {
+        _def = def;
+        _first_use = use;
+      }
+    }
+    void clear() {
+      _def = NULL;
+      _first_use = NULL;
+    }
+  };
+  typedef GrowableArray<RegDefUse> RegToDefUseMap;
+  int possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse);
+
+  // Merge nodes that are a part of a multidef lrg and produce the same value within a block.
+  void merge_multidefs();
+
 private:

  static int _final_loads, _final_stores, _final_copies, _final_memoves;

--- a/src/share/vm/opto/machnode.hpp
+++ b/src/share/vm/opto/machnode.hpp
@@ -558,6 +558,29 @@ public:
 #endif
 };

+// MachMergeNode is similar to a PhiNode in a sense it merges multiple values,
+// however it doesn't have a control input and is more like a MergeMem.
+// It is inserted after the register allocation is done to ensure that nodes use single
+// definition of a multidef lrg in a block.
+class MachMergeNode : public MachIdealNode {
+public:
+  MachMergeNode(Node *n1) {
+    init_class_id(Class_MachMerge);
+    add_req(NULL);
+    add_req(n1);
+  }
+  virtual const RegMask &out_RegMask() const { return in(1)->out_RegMask(); }
+  virtual const RegMask &in_RegMask(uint idx) const { return in(1)->in_RegMask(idx); }
+  virtual const class Type *bottom_type() const { return in(1)->bottom_type(); }
+  virtual uint ideal_reg() const { return bottom_type()->ideal_reg(); }
+  virtual uint oper_input_base() const { return 1; }
+  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { }
+  virtual uint size(PhaseRegAlloc *ra_) const { return 0; }
+#ifndef PRODUCT
+  virtual const char *Name() const { return "MachMerge"; }
+#endif
+};
+
 //------------------------------MachBranchNode--------------------------------
 // Abstract machine branch Node
 class MachBranchNode : public MachIdealNode {

--- a/src/share/vm/opto/node.hpp
+++ b/src/share/vm/opto/node.hpp
@@ -98,6 +98,7 @@ class MachReturnNode;
 class MachSafePointNode;
 class MachSpillCopyNode;
 class MachTempNode;
+class MachMergeNode;
 class Matcher;
 class MemBarNode;
 class MemBarStoreStoreNode;
@@ -591,6 +592,7 @@ public:
      DEFINE_CLASS_ID(MachTemp,         Mach, 3)
      DEFINE_CLASS_ID(MachConstantBase, Mach, 4)
      DEFINE_CLASS_ID(MachConstant,     Mach, 5)
+      DEFINE_CLASS_ID(MachMerge,        Mach, 6)

    DEFINE_CLASS_ID(Type,  Node, 2)
      DEFINE_CLASS_ID(Phi,   Type, 0)
@@ -761,6 +763,7 @@ public:
  DEFINE_CLASS_QUERY(MachSafePoint)
  DEFINE_CLASS_QUERY(MachSpillCopy)
  DEFINE_CLASS_QUERY(MachTemp)
+  DEFINE_CLASS_QUERY(MachMerge)
  DEFINE_CLASS_QUERY(Mem)
  DEFINE_CLASS_QUERY(MemBar)
  DEFINE_CLASS_QUERY(MemBarStoreStore)

--- a/src/share/vm/opto/phase.cpp
+++ b/src/share/vm/opto/phase.cpp
@@ -74,6 +74,7 @@ elapsedTimer   Phase::_t_buildIFGphysical;
 elapsedTimer   Phase::_t_computeLive;
 elapsedTimer   Phase::_t_regAllocSplit;
 elapsedTimer   Phase::_t_postAllocCopyRemoval;
+elapsedTimer   Phase::_t_mergeMultidefs;
 elapsedTimer   Phase::_t_fixupSpills;

 // Subtimers for _t_output
@@ -136,11 +137,12 @@ void Phase::print_timers() {
    tty->print_cr ("      computeLive    : %3.3f sec", Phase::_t_computeLive.seconds());
    tty->print_cr ("      regAllocSplit  : %3.3f sec", Phase::_t_regAllocSplit.seconds());
    tty->print_cr ("      postAllocCopyRemoval: %3.3f sec", Phase::_t_postAllocCopyRemoval.seconds());
+    tty->print_cr ("      mergeMultidefs: %3.3f sec", Phase::_t_mergeMultidefs.seconds());
    tty->print_cr ("      fixupSpills    : %3.3f sec", Phase::_t_fixupSpills.seconds());
    double regalloc_subtotal = Phase::_t_ctorChaitin.seconds() +
      Phase::_t_buildIFGphysical.seconds() + Phase::_t_computeLive.seconds() +
      Phase::_t_regAllocSplit.seconds()    + Phase::_t_fixupSpills.seconds() +
-      Phase::_t_postAllocCopyRemoval.seconds();
+      Phase::_t_postAllocCopyRemoval.seconds() + Phase::_t_mergeMultidefs.seconds();
    double percent_of_regalloc = ((regalloc_subtotal == 0.0) ? 0.0 : (regalloc_subtotal / Phase::_t_registerAllocation.seconds() * 100.0));
    tty->print_cr ("      subtotal       : %3.3f sec,  %3.2f %%", regalloc_subtotal, percent_of_regalloc);
  }

--- a/src/share/vm/opto/phase.hpp
+++ b/src/share/vm/opto/phase.hpp
@@ -109,6 +109,7 @@ protected:
  static elapsedTimer   _t_computeLive;
  static elapsedTimer   _t_regAllocSplit;
  static elapsedTimer   _t_postAllocCopyRemoval;
+  static elapsedTimer   _t_mergeMultidefs;
  static elapsedTimer   _t_fixupSpills;

 // Subtimers for _t_output

--- a/src/share/vm/opto/postaloc.cpp
+++ b/src/share/vm/opto/postaloc.cpp
@@ -263,20 +263,6 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
  // intermediate copies might be illegal, i.e., value is stored down to stack
  // then reloaded BUT survives in a register the whole way.
  Node *val = skip_copies(n->in(k));
-
-  if (val == x && nk_idx != 0 &&
-      regnd[nk_reg] != NULL && regnd[nk_reg] != x &&
-      _lrg_map.live_range_id(x) == _lrg_map.live_range_id(regnd[nk_reg])) {
-    // When rematerialzing nodes and stretching lifetimes, the
-    // allocator will reuse the original def for multidef LRG instead
-    // of the current reaching def because it can't know it's safe to
-    // do so.  After allocation completes if they are in the same LRG
-    // then it should use the current reaching def instead.
-    n->set_req(k, regnd[nk_reg]);
-    blk_adjust += yank_if_dead(val, current_block, &value, &regnd);
-    val = skip_copies(n->in(k));
-  }
-
  if (val == x) return blk_adjust; // No progress?

  int n_regs = RegMask::num_registers(val->ideal_reg());
@@ -382,6 +368,94 @@ bool PhaseChaitin::eliminate_copy_of_constant(Node* val, Node* n,
  return false;
 }

+// The algorithms works as follows:
+// We traverse the block top to bottom. possibly_merge_multidef() is invoked for every input edge k
+// of the instruction n. We check to see if the input is a multidef lrg. If it is, we record the fact that we've
+// seen a definition (coming as an input) and add that fact to the reg2defuse array. The array maps registers to their
+// current reaching definitions (we track only multidefs though). With each definition we also associate the first
+// instruction we saw use it. If we encounter the situation when we observe an def (an input) that is a part of the
+// same lrg but is different from the previous seen def we merge the two with a MachMerge node and substitute
+// all the uses that we've seen so far to use the merge. After that we keep replacing the new defs in the same lrg
+// as they get encountered with the merge node and keep adding these defs to the merge inputs.
+void PhaseChaitin::merge_multidefs() {
+  NOT_PRODUCT( Compile::TracePhase t3("mergeMultidefs", &_t_mergeMultidefs, TimeCompiler); )
+  ResourceMark rm;
+  // Keep track of the defs seen in registers and collect their uses in the block.
+  RegToDefUseMap reg2defuse(_max_reg, _max_reg, RegDefUse());
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
+    for (uint j = 1; j < block->number_of_nodes(); j++) {
+      Node* n = block->get_node(j);
+      if (n->is_Phi()) continue;
+      for (uint k = 1; k < n->req(); k++) {
+        j += possibly_merge_multidef(n, k, block, reg2defuse);
+      }
+      // Null out the value produced by the instruction itself, since we're only interested in defs
+      // implicitly defined by the uses. We are actually interested in tracking only redefinitions
+      // of the multidef lrgs in the same register. For that matter it's enough to track changes in
+      // the base register only and ignore other effects of multi-register lrgs and fat projections.
+      // It is also ok to ignore defs coming from singledefs. After an implicit overwrite by one of
+      // those our register is guaranteed to be used by another lrg and we won't attempt to merge it.
+      uint lrg = _lrg_map.live_range_id(n);
+      if (lrg > 0 && lrgs(lrg).is_multidef()) {
+        OptoReg::Name reg = lrgs(lrg).reg();
+        reg2defuse.at(reg).clear();
+      }
+    }
+    // Clear reg->def->use tracking for the next block
+    for (int j = 0; j < reg2defuse.length(); j++) {
+      reg2defuse.at(j).clear();
+    }
+  }
+}
+
+int PhaseChaitin::possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse) {
+  int blk_adjust = 0;
+
+  uint lrg = _lrg_map.live_range_id(n->in(k));
+  if (lrg > 0 && lrgs(lrg).is_multidef()) {
+    OptoReg::Name reg = lrgs(lrg).reg();
+
+    Node* def = reg2defuse.at(reg).def();
+    if (def != NULL && lrg == _lrg_map.live_range_id(def) && def != n->in(k)) {
+      // Same lrg but different node, we have to merge.
+      MachMergeNode* merge;
+      if (def->is_MachMerge()) { // is it already a merge?
+        merge = def->as_MachMerge();
+      } else {
+        merge = new (C) MachMergeNode(def);
+
+        // Insert the merge node into the block before the first use.
+        uint use_index = block->find_node(reg2defuse.at(reg).first_use());
+        block->insert_node(merge, use_index++);
+
+        // Let the allocator know about the new node, use the same lrg
+        _lrg_map.extend(merge->_idx, lrg);
+        blk_adjust++;
+
+        // Fixup all the uses (there is at least one) that happened between the first
+        // use and before the current one.
+        for (; use_index < block->number_of_nodes(); use_index++) {
+          Node* use = block->get_node(use_index);
+          if (use == n) {
+            break;
+          }
+          use->replace_edge(def, merge);
+        }
+      }
+      if (merge->find_edge(n->in(k)) == -1) {
+        merge->add_req(n->in(k));
+      }
+      n->set_req(k, merge);
+    }
+
+    // update the uses
+    reg2defuse.at(reg).update(n->in(k), n);
+  }
+
+  return blk_adjust;
+}
+

 //------------------------------post_allocate_copy_removal---------------------
 // Post-Allocation peephole copy removal.  We do this in 1 pass over the