提交 4619dfcb 编写于 作者: I iveresov

Merge

...@@ -575,6 +575,9 @@ void PhaseChaitin::Register_Allocate() { ...@@ -575,6 +575,9 @@ void PhaseChaitin::Register_Allocate() {
// Peephole remove copies // Peephole remove copies
post_allocate_copy_removal(); post_allocate_copy_removal();
// Merge multidefs if multiple defs representing the same value are used in a single block.
merge_multidefs();
#ifdef ASSERT #ifdef ASSERT
// Veify the graph after RA. // Veify the graph after RA.
verify(&live_arena); verify(&live_arena);
......
...@@ -578,6 +578,32 @@ private: ...@@ -578,6 +578,32 @@ private:
// Extend the node to LRG mapping // Extend the node to LRG mapping
void add_reference( const Node *node, const Node *old_node); void add_reference( const Node *node, const Node *old_node);
// Record the first use of a def in the block for a register.
class RegDefUse {
Node* _def;
Node* _first_use;
public:
RegDefUse() : _def(NULL), _first_use(NULL) { }
Node* def() const { return _def; }
Node* first_use() const { return _first_use; }
void update(Node* def, Node* use) {
if (_def != def) {
_def = def;
_first_use = use;
}
}
void clear() {
_def = NULL;
_first_use = NULL;
}
};
typedef GrowableArray<RegDefUse> RegToDefUseMap;
int possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse);
// Merge nodes that are a part of a multidef lrg and produce the same value within a block.
void merge_multidefs();
private: private:
static int _final_loads, _final_stores, _final_copies, _final_memoves; static int _final_loads, _final_stores, _final_copies, _final_memoves;
......
...@@ -558,6 +558,29 @@ public: ...@@ -558,6 +558,29 @@ public:
#endif #endif
}; };
// MachMergeNode is similar to a PhiNode in a sense it merges multiple values,
// however it doesn't have a control input and is more like a MergeMem.
// It is inserted after the register allocation is done to ensure that nodes use single
// definition of a multidef lrg in a block.
class MachMergeNode : public MachIdealNode {
public:
MachMergeNode(Node *n1) {
init_class_id(Class_MachMerge);
add_req(NULL);
add_req(n1);
}
virtual const RegMask &out_RegMask() const { return in(1)->out_RegMask(); }
virtual const RegMask &in_RegMask(uint idx) const { return in(1)->in_RegMask(idx); }
virtual const class Type *bottom_type() const { return in(1)->bottom_type(); }
virtual uint ideal_reg() const { return bottom_type()->ideal_reg(); }
virtual uint oper_input_base() const { return 1; }
virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { }
virtual uint size(PhaseRegAlloc *ra_) const { return 0; }
#ifndef PRODUCT
virtual const char *Name() const { return "MachMerge"; }
#endif
};
//------------------------------MachBranchNode-------------------------------- //------------------------------MachBranchNode--------------------------------
// Abstract machine branch Node // Abstract machine branch Node
class MachBranchNode : public MachIdealNode { class MachBranchNode : public MachIdealNode {
......
...@@ -98,6 +98,7 @@ class MachReturnNode; ...@@ -98,6 +98,7 @@ class MachReturnNode;
class MachSafePointNode; class MachSafePointNode;
class MachSpillCopyNode; class MachSpillCopyNode;
class MachTempNode; class MachTempNode;
class MachMergeNode;
class Matcher; class Matcher;
class MemBarNode; class MemBarNode;
class MemBarStoreStoreNode; class MemBarStoreStoreNode;
...@@ -591,6 +592,7 @@ public: ...@@ -591,6 +592,7 @@ public:
DEFINE_CLASS_ID(MachTemp, Mach, 3) DEFINE_CLASS_ID(MachTemp, Mach, 3)
DEFINE_CLASS_ID(MachConstantBase, Mach, 4) DEFINE_CLASS_ID(MachConstantBase, Mach, 4)
DEFINE_CLASS_ID(MachConstant, Mach, 5) DEFINE_CLASS_ID(MachConstant, Mach, 5)
DEFINE_CLASS_ID(MachMerge, Mach, 6)
DEFINE_CLASS_ID(Type, Node, 2) DEFINE_CLASS_ID(Type, Node, 2)
DEFINE_CLASS_ID(Phi, Type, 0) DEFINE_CLASS_ID(Phi, Type, 0)
...@@ -761,6 +763,7 @@ public: ...@@ -761,6 +763,7 @@ public:
DEFINE_CLASS_QUERY(MachSafePoint) DEFINE_CLASS_QUERY(MachSafePoint)
DEFINE_CLASS_QUERY(MachSpillCopy) DEFINE_CLASS_QUERY(MachSpillCopy)
DEFINE_CLASS_QUERY(MachTemp) DEFINE_CLASS_QUERY(MachTemp)
DEFINE_CLASS_QUERY(MachMerge)
DEFINE_CLASS_QUERY(Mem) DEFINE_CLASS_QUERY(Mem)
DEFINE_CLASS_QUERY(MemBar) DEFINE_CLASS_QUERY(MemBar)
DEFINE_CLASS_QUERY(MemBarStoreStore) DEFINE_CLASS_QUERY(MemBarStoreStore)
......
...@@ -74,6 +74,7 @@ elapsedTimer Phase::_t_buildIFGphysical; ...@@ -74,6 +74,7 @@ elapsedTimer Phase::_t_buildIFGphysical;
elapsedTimer Phase::_t_computeLive; elapsedTimer Phase::_t_computeLive;
elapsedTimer Phase::_t_regAllocSplit; elapsedTimer Phase::_t_regAllocSplit;
elapsedTimer Phase::_t_postAllocCopyRemoval; elapsedTimer Phase::_t_postAllocCopyRemoval;
elapsedTimer Phase::_t_mergeMultidefs;
elapsedTimer Phase::_t_fixupSpills; elapsedTimer Phase::_t_fixupSpills;
// Subtimers for _t_output // Subtimers for _t_output
...@@ -136,11 +137,12 @@ void Phase::print_timers() { ...@@ -136,11 +137,12 @@ void Phase::print_timers() {
tty->print_cr (" computeLive : %3.3f sec", Phase::_t_computeLive.seconds()); tty->print_cr (" computeLive : %3.3f sec", Phase::_t_computeLive.seconds());
tty->print_cr (" regAllocSplit : %3.3f sec", Phase::_t_regAllocSplit.seconds()); tty->print_cr (" regAllocSplit : %3.3f sec", Phase::_t_regAllocSplit.seconds());
tty->print_cr (" postAllocCopyRemoval: %3.3f sec", Phase::_t_postAllocCopyRemoval.seconds()); tty->print_cr (" postAllocCopyRemoval: %3.3f sec", Phase::_t_postAllocCopyRemoval.seconds());
tty->print_cr (" mergeMultidefs: %3.3f sec", Phase::_t_mergeMultidefs.seconds());
tty->print_cr (" fixupSpills : %3.3f sec", Phase::_t_fixupSpills.seconds()); tty->print_cr (" fixupSpills : %3.3f sec", Phase::_t_fixupSpills.seconds());
double regalloc_subtotal = Phase::_t_ctorChaitin.seconds() + double regalloc_subtotal = Phase::_t_ctorChaitin.seconds() +
Phase::_t_buildIFGphysical.seconds() + Phase::_t_computeLive.seconds() + Phase::_t_buildIFGphysical.seconds() + Phase::_t_computeLive.seconds() +
Phase::_t_regAllocSplit.seconds() + Phase::_t_fixupSpills.seconds() + Phase::_t_regAllocSplit.seconds() + Phase::_t_fixupSpills.seconds() +
Phase::_t_postAllocCopyRemoval.seconds(); Phase::_t_postAllocCopyRemoval.seconds() + Phase::_t_mergeMultidefs.seconds();
double percent_of_regalloc = ((regalloc_subtotal == 0.0) ? 0.0 : (regalloc_subtotal / Phase::_t_registerAllocation.seconds() * 100.0)); double percent_of_regalloc = ((regalloc_subtotal == 0.0) ? 0.0 : (regalloc_subtotal / Phase::_t_registerAllocation.seconds() * 100.0));
tty->print_cr (" subtotal : %3.3f sec, %3.2f %%", regalloc_subtotal, percent_of_regalloc); tty->print_cr (" subtotal : %3.3f sec, %3.2f %%", regalloc_subtotal, percent_of_regalloc);
} }
......
...@@ -109,6 +109,7 @@ protected: ...@@ -109,6 +109,7 @@ protected:
static elapsedTimer _t_computeLive; static elapsedTimer _t_computeLive;
static elapsedTimer _t_regAllocSplit; static elapsedTimer _t_regAllocSplit;
static elapsedTimer _t_postAllocCopyRemoval; static elapsedTimer _t_postAllocCopyRemoval;
static elapsedTimer _t_mergeMultidefs;
static elapsedTimer _t_fixupSpills; static elapsedTimer _t_fixupSpills;
// Subtimers for _t_output // Subtimers for _t_output
......
...@@ -263,20 +263,6 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v ...@@ -263,20 +263,6 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
// intermediate copies might be illegal, i.e., value is stored down to stack // intermediate copies might be illegal, i.e., value is stored down to stack
// then reloaded BUT survives in a register the whole way. // then reloaded BUT survives in a register the whole way.
Node *val = skip_copies(n->in(k)); Node *val = skip_copies(n->in(k));
if (val == x && nk_idx != 0 &&
regnd[nk_reg] != NULL && regnd[nk_reg] != x &&
_lrg_map.live_range_id(x) == _lrg_map.live_range_id(regnd[nk_reg])) {
// When rematerialzing nodes and stretching lifetimes, the
// allocator will reuse the original def for multidef LRG instead
// of the current reaching def because it can't know it's safe to
// do so. After allocation completes if they are in the same LRG
// then it should use the current reaching def instead.
n->set_req(k, regnd[nk_reg]);
blk_adjust += yank_if_dead(val, current_block, &value, &regnd);
val = skip_copies(n->in(k));
}
if (val == x) return blk_adjust; // No progress? if (val == x) return blk_adjust; // No progress?
int n_regs = RegMask::num_registers(val->ideal_reg()); int n_regs = RegMask::num_registers(val->ideal_reg());
...@@ -382,6 +368,94 @@ bool PhaseChaitin::eliminate_copy_of_constant(Node* val, Node* n, ...@@ -382,6 +368,94 @@ bool PhaseChaitin::eliminate_copy_of_constant(Node* val, Node* n,
return false; return false;
} }
// The algorithms works as follows:
// We traverse the block top to bottom. possibly_merge_multidef() is invoked for every input edge k
// of the instruction n. We check to see if the input is a multidef lrg. If it is, we record the fact that we've
// seen a definition (coming as an input) and add that fact to the reg2defuse array. The array maps registers to their
// current reaching definitions (we track only multidefs though). With each definition we also associate the first
// instruction we saw use it. If we encounter the situation when we observe an def (an input) that is a part of the
// same lrg but is different from the previous seen def we merge the two with a MachMerge node and substitute
// all the uses that we've seen so far to use the merge. After that we keep replacing the new defs in the same lrg
// as they get encountered with the merge node and keep adding these defs to the merge inputs.
void PhaseChaitin::merge_multidefs() {
NOT_PRODUCT( Compile::TracePhase t3("mergeMultidefs", &_t_mergeMultidefs, TimeCompiler); )
ResourceMark rm;
// Keep track of the defs seen in registers and collect their uses in the block.
RegToDefUseMap reg2defuse(_max_reg, _max_reg, RegDefUse());
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
Block* block = _cfg.get_block(i);
for (uint j = 1; j < block->number_of_nodes(); j++) {
Node* n = block->get_node(j);
if (n->is_Phi()) continue;
for (uint k = 1; k < n->req(); k++) {
j += possibly_merge_multidef(n, k, block, reg2defuse);
}
// Null out the value produced by the instruction itself, since we're only interested in defs
// implicitly defined by the uses. We are actually interested in tracking only redefinitions
// of the multidef lrgs in the same register. For that matter it's enough to track changes in
// the base register only and ignore other effects of multi-register lrgs and fat projections.
// It is also ok to ignore defs coming from singledefs. After an implicit overwrite by one of
// those our register is guaranteed to be used by another lrg and we won't attempt to merge it.
uint lrg = _lrg_map.live_range_id(n);
if (lrg > 0 && lrgs(lrg).is_multidef()) {
OptoReg::Name reg = lrgs(lrg).reg();
reg2defuse.at(reg).clear();
}
}
// Clear reg->def->use tracking for the next block
for (int j = 0; j < reg2defuse.length(); j++) {
reg2defuse.at(j).clear();
}
}
}
int PhaseChaitin::possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse) {
int blk_adjust = 0;
uint lrg = _lrg_map.live_range_id(n->in(k));
if (lrg > 0 && lrgs(lrg).is_multidef()) {
OptoReg::Name reg = lrgs(lrg).reg();
Node* def = reg2defuse.at(reg).def();
if (def != NULL && lrg == _lrg_map.live_range_id(def) && def != n->in(k)) {
// Same lrg but different node, we have to merge.
MachMergeNode* merge;
if (def->is_MachMerge()) { // is it already a merge?
merge = def->as_MachMerge();
} else {
merge = new (C) MachMergeNode(def);
// Insert the merge node into the block before the first use.
uint use_index = block->find_node(reg2defuse.at(reg).first_use());
block->insert_node(merge, use_index++);
// Let the allocator know about the new node, use the same lrg
_lrg_map.extend(merge->_idx, lrg);
blk_adjust++;
// Fixup all the uses (there is at least one) that happened between the first
// use and before the current one.
for (; use_index < block->number_of_nodes(); use_index++) {
Node* use = block->get_node(use_index);
if (use == n) {
break;
}
use->replace_edge(def, merge);
}
}
if (merge->find_edge(n->in(k)) == -1) {
merge->add_req(n->in(k));
}
n->set_req(k, merge);
}
// update the uses
reg2defuse.at(reg).update(n->in(k), n);
}
return blk_adjust;
}
//------------------------------post_allocate_copy_removal--------------------- //------------------------------post_allocate_copy_removal---------------------
// Post-Allocation peephole copy removal. We do this in 1 pass over the // Post-Allocation peephole copy removal. We do this in 1 pass over the
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册