提交 564ba317 编写于 作者: A adlertz

8023003: Cleanup the public interface to PhaseCFG

Summary: public methods that don't need to be public should be private.
Reviewed-by: kvn, twisti
上级 c4a87fef
此差异已折叠。
......@@ -348,20 +348,77 @@ class Block : public CFGElement {
class PhaseCFG : public Phase {
friend class VMStructs;
private:
// Root of whole program
RootNode* _root;
// The block containing the root node
Block* _root_block;
// List of basic blocks that are created during CFG creation
Block_List _blocks;
// Count of basic blocks
uint _number_of_blocks;
// Arena for the blocks to be stored in
Arena* _block_arena;
// The matcher for this compilation
Matcher& _matcher;
// Map nodes to owning basic block
Block_Array _node_to_block_mapping;
// Loop from the root
CFGLoop* _root_loop;
// Outmost loop frequency
float _outer_loop_frequency;
// Per node latency estimation, valid only during GCM
GrowableArray<uint>* _node_latency;
// Build a proper looking cfg. Return count of basic blocks
uint build_cfg();
// Perform DFS search.
// Build the dominator tree so that we know where we can move instructions
void build_dominator_tree();
// Estimate block frequencies based on IfNode probabilities, so that we know where we want to move instructions
void estimate_block_frequency();
// Global Code Motion. See Click's PLDI95 paper. Place Nodes in specific
// basic blocks; i.e. _node_to_block_mapping now maps _idx for all Nodes to some Block.
// Move nodes to ensure correctness from GVN and also try to move nodes out of loops.
void global_code_motion();
// Schedule Nodes early in their basic blocks.
bool schedule_early(VectorSet &visited, Node_List &roots);
// For each node, find the latest block it can be scheduled into
// and then select the cheapest block between the latest and earliest
// block to place the node.
void schedule_late(VectorSet &visited, Node_List &stack);
// Compute the (backwards) latency of a node from a single use
int latency_from_use(Node *n, const Node *def, Node *use);
// Compute the (backwards) latency of a node from the uses of this instruction
void partial_latency_of_defs(Node *n);
// Compute the instruction global latency with a backwards walk
void compute_latencies_backwards(VectorSet &visited, Node_List &stack);
// Pick a block between early and late that is a cheaper alternative
// to late. Helper for schedule_late.
Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self);
// Perform a Depth First Search (DFS).
// Setup 'vertex' as DFS to vertex mapping.
// Setup 'semi' as vertex to DFS mapping.
// Set 'parent' to DFS parent.
uint DFS( Tarjan *tarjan );
uint do_DFS(Tarjan* tarjan, uint rpo_counter);
// Helper function to insert a node into a block
void schedule_node_into_block( Node *n, Block *b );
......@@ -372,7 +429,8 @@ class PhaseCFG : public Phase {
void schedule_pinned_nodes( VectorSet &visited );
// I'll need a few machine-specific GotoNodes. Clone from this one.
MachNode *_goto;
// Used when building the CFG and creating end nodes for blocks.
MachNode* _goto;
Block* insert_anti_dependences(Block* LCA, Node* load, bool verify = false);
void verify_anti_dependences(Block* LCA, Node* load) {
......@@ -380,17 +438,77 @@ class PhaseCFG : public Phase {
insert_anti_dependences(LCA, load, true);
}
bool move_to_next(Block* bx, uint b_index);
void move_to_end(Block* bx, uint b_index);
void insert_goto_at(uint block_no, uint succ_no);
// Check for NeverBranch at block end. This needs to become a GOTO to the
// true target. NeverBranch are treated as a conditional branch that always
// goes the same direction for most of the optimizer and are used to give a
// fake exit path to infinite loops. At this late stage they need to turn
// into Goto's so that when you enter the infinite loop you indeed hang.
void convert_NeverBranch_to_Goto(Block *b);
CFGLoop* create_loop_tree();
#ifndef PRODUCT
bool _trace_opto_pipelining; // tracing flag
#endif
public:
PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher);
uint _num_blocks; // Count of basic blocks
Block_List _blocks; // List of basic blocks
RootNode *_root; // Root of whole program
Block *_broot; // Basic block of root
uint _rpo_ctr;
CFGLoop* _root_loop;
float _outer_loop_freq; // Outmost loop frequency
void set_latency_for_node(Node* node, int latency) {
_node_latency->at_put_grow(node->_idx, latency);
}
uint get_latency_for_node(Node* node) {
return _node_latency->at_grow(node->_idx);
}
// Get the outer most frequency
float get_outer_loop_frequency() const {
return _outer_loop_frequency;
}
// Get the root node of the CFG
RootNode* get_root_node() const {
return _root;
}
// Get the block of the root node
Block* get_root_block() const {
return _root_block;
}
// Add a block at a position and moves the later ones one step
void add_block_at(uint pos, Block* block) {
_blocks.insert(pos, block);
_number_of_blocks++;
}
// Adds a block to the top of the block list
void add_block(Block* block) {
_blocks.push(block);
_number_of_blocks++;
}
// Clear the list of blocks
void clear_blocks() {
_blocks.reset();
_number_of_blocks = 0;
}
// Get the block at position pos in _blocks
Block* get_block(uint pos) const {
return _blocks[pos];
}
// Number of blocks
uint number_of_blocks() const {
return _number_of_blocks;
}
// set which block this node should reside in
void map_node_to_block(const Node* node, Block* block) {
......@@ -412,72 +530,26 @@ class PhaseCFG : public Phase {
return (_node_to_block_mapping.lookup(node->_idx) != NULL);
}
// Per node latency estimation, valid only during GCM
GrowableArray<uint> *_node_latency;
#ifndef PRODUCT
bool _trace_opto_pipelining; // tracing flag
#endif
#ifdef ASSERT
Unique_Node_List _raw_oops;
#endif
// Build dominators
void Dominators();
// Estimate block frequencies based on IfNode probabilities
void Estimate_Block_Frequency();
// Global Code Motion. See Click's PLDI95 paper. Place Nodes in specific
// basic blocks; i.e. _node_to_block_mapping now maps _idx for all Nodes to some Block.
void GlobalCodeMotion( Matcher &m, uint unique, Node_List &proj_list );
// Do global code motion by first building dominator tree and estimate block frequency
// Returns true on success
bool do_global_code_motion();
// Compute the (backwards) latency of a node from the uses
void latency_from_uses(Node *n);
// Compute the (backwards) latency of a node from a single use
int latency_from_use(Node *n, const Node *def, Node *use);
// Compute the (backwards) latency of a node from the uses of this instruction
void partial_latency_of_defs(Node *n);
// Schedule Nodes early in their basic blocks.
bool schedule_early(VectorSet &visited, Node_List &roots);
// For each node, find the latest block it can be scheduled into
// and then select the cheapest block between the latest and earliest
// block to place the node.
void schedule_late(VectorSet &visited, Node_List &stack);
// Pick a block between early and late that is a cheaper alternative
// to late. Helper for schedule_late.
Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self);
// Compute the instruction global latency with a backwards walk
void ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack);
// Set loop alignment
void set_loop_alignment();
// Remove empty basic blocks
void remove_empty();
void remove_empty_blocks();
void fixup_flow();
bool move_to_next(Block* bx, uint b_index);
void move_to_end(Block* bx, uint b_index);
void insert_goto_at(uint block_no, uint succ_no);
// Check for NeverBranch at block end. This needs to become a GOTO to the
// true target. NeverBranch are treated as a conditional branch that always
// goes the same direction for most of the optimizer and are used to give a
// fake exit path to infinite loops. At this late stage they need to turn
// into Goto's so that when you enter the infinite loop you indeed hang.
void convert_NeverBranch_to_Goto(Block *b);
CFGLoop* create_loop_tree();
// Insert a node into a block, and update the _bbs
void insert( Block *b, uint idx, Node *n ) {
// Insert a node into a block at index and map the node to the block
void insert(Block *b, uint idx, Node *n) {
b->_nodes.insert( idx, n );
map_node_to_block(n, b);
}
......
......@@ -87,7 +87,6 @@
// OptoReg::Bad for not-callee-saved.
//------------------------------OopFlow----------------------------------------
// Structure to pass around
struct OopFlow : public ResourceObj {
short *_callees; // Array mapping register to callee-saved
......@@ -119,7 +118,6 @@ struct OopFlow : public ResourceObj {
OopMap *build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live );
};
//------------------------------compute_reach----------------------------------
// Given reaching-defs for this block start, compute it for this block end
void OopFlow::compute_reach( PhaseRegAlloc *regalloc, int max_reg, Dict *safehash ) {
......@@ -177,7 +175,6 @@ void OopFlow::compute_reach( PhaseRegAlloc *regalloc, int max_reg, Dict *safehas
}
}
//------------------------------merge------------------------------------------
// Merge the given flow into the 'this' flow
void OopFlow::merge( OopFlow *flow, int max_reg ) {
assert( _b == NULL, "merging into a happy flow" );
......@@ -197,14 +194,12 @@ void OopFlow::merge( OopFlow *flow, int max_reg ) {
}
//------------------------------clone------------------------------------------
void OopFlow::clone( OopFlow *flow, int max_size ) {
_b = flow->_b;
memcpy( _callees, flow->_callees, sizeof(short)*max_size);
memcpy( _defs , flow->_defs , sizeof(Node*)*max_size);
}
//------------------------------make-------------------------------------------
OopFlow *OopFlow::make( Arena *A, int max_size, Compile* C ) {
short *callees = NEW_ARENA_ARRAY(A,short,max_size+1);
Node **defs = NEW_ARENA_ARRAY(A,Node*,max_size+1);
......@@ -215,7 +210,6 @@ OopFlow *OopFlow::make( Arena *A, int max_size, Compile* C ) {
return flow;
}
//------------------------------bit twiddlers----------------------------------
static int get_live_bit( int *live, int reg ) {
return live[reg>>LogBitsPerInt] & (1<<(reg&(BitsPerInt-1))); }
static void set_live_bit( int *live, int reg ) {
......@@ -223,7 +217,6 @@ static void set_live_bit( int *live, int reg ) {
static void clr_live_bit( int *live, int reg ) {
live[reg>>LogBitsPerInt] &= ~(1<<(reg&(BitsPerInt-1))); }
//------------------------------build_oop_map----------------------------------
// Build an oopmap from the current flow info
OopMap *OopFlow::build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live ) {
int framesize = regalloc->_framesize;
......@@ -412,19 +405,18 @@ OopMap *OopFlow::build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, i
return omap;
}
//------------------------------do_liveness------------------------------------
// Compute backwards liveness on registers
static void do_liveness( PhaseRegAlloc *regalloc, PhaseCFG *cfg, Block_List *worklist, int max_reg_ints, Arena *A, Dict *safehash ) {
int *live = NEW_ARENA_ARRAY(A, int, (cfg->_num_blocks+1) * max_reg_ints);
int *tmp_live = &live[cfg->_num_blocks * max_reg_ints];
Node *root = cfg->C->root();
static void do_liveness(PhaseRegAlloc* regalloc, PhaseCFG* cfg, Block_List* worklist, int max_reg_ints, Arena* A, Dict* safehash) {
int* live = NEW_ARENA_ARRAY(A, int, (cfg->number_of_blocks() + 1) * max_reg_ints);
int* tmp_live = &live[cfg->number_of_blocks() * max_reg_ints];
Node* root = cfg->get_root_node();
// On CISC platforms, get the node representing the stack pointer that regalloc
// used for spills
Node *fp = NodeSentinel;
if (UseCISCSpill && root->req() > 1) {
fp = root->in(1)->in(TypeFunc::FramePtr);
}
memset( live, 0, cfg->_num_blocks * (max_reg_ints<<LogBytesPerInt) );
memset(live, 0, cfg->number_of_blocks() * (max_reg_ints << LogBytesPerInt));
// Push preds onto worklist
for (uint i = 1; i < root->req(); i++) {
Block* block = cfg->get_block_for_node(root->in(i));
......@@ -549,29 +541,32 @@ static void do_liveness( PhaseRegAlloc *regalloc, PhaseCFG *cfg, Block_List *wor
// Scan for any missing safepoints. Happens to infinite loops
// ala ZKM.jar
uint i;
for( i=1; i<cfg->_num_blocks; i++ ) {
Block *b = cfg->_blocks[i];
for (i = 1; i < cfg->number_of_blocks(); i++) {
Block* block = cfg->get_block(i);
uint j;
for( j=1; j<b->_nodes.size(); j++ )
if( b->_nodes[j]->jvms() &&
(*safehash)[b->_nodes[j]] == NULL )
for (j = 1; j < block->_nodes.size(); j++) {
if (block->_nodes[j]->jvms() && (*safehash)[block->_nodes[j]] == NULL) {
break;
if( j<b->_nodes.size() ) break;
}
if( i == cfg->_num_blocks )
}
if (j < block->_nodes.size()) {
break;
}
}
if (i == cfg->number_of_blocks()) {
break; // Got 'em all
}
#ifndef PRODUCT
if( PrintOpto && Verbose )
tty->print_cr("retripping live calc");
#endif
// Force the issue (expensively): recheck everybody
for( i=1; i<cfg->_num_blocks; i++ )
worklist->push(cfg->_blocks[i]);
for (i = 1; i < cfg->number_of_blocks(); i++) {
worklist->push(cfg->get_block(i));
}
}
}
//------------------------------BuildOopMaps-----------------------------------
// Collect GC mask info - where are all the OOPs?
void Compile::BuildOopMaps() {
NOT_PRODUCT( TracePhase t3("bldOopMaps", &_t_buildOopMaps, TimeCompiler); )
......@@ -592,12 +587,12 @@ void Compile::BuildOopMaps() {
OopFlow *free_list = NULL; // Free, unused
// Array mapping blocks to completed oopflows
OopFlow **flows = NEW_ARENA_ARRAY(A, OopFlow*, _cfg->_num_blocks);
memset( flows, 0, _cfg->_num_blocks*sizeof(OopFlow*) );
OopFlow **flows = NEW_ARENA_ARRAY(A, OopFlow*, _cfg->number_of_blocks());
memset( flows, 0, _cfg->number_of_blocks() * sizeof(OopFlow*) );
// Do the first block 'by hand' to prime the worklist
Block *entry = _cfg->_blocks[1];
Block *entry = _cfg->get_block(1);
OopFlow *rootflow = OopFlow::make(A,max_reg,this);
// Initialize to 'bottom' (not 'top')
memset( rootflow->_callees, OptoReg::Bad, max_reg*sizeof(short) );
......@@ -623,7 +618,9 @@ void Compile::BuildOopMaps() {
Block *b = worklist.pop();
// Ignore root block
if( b == _cfg->_broot ) continue;
if (b == _cfg->get_root_block()) {
continue;
}
// Block is already done? Happens if block has several predecessors,
// he can get on the worklist more than once.
if( flows[b->_pre_order] ) continue;
......
此差异已折叠。
......@@ -34,8 +34,6 @@
#include "opto/matcher.hpp"
#include "opto/regmask.hpp"
//=============================================================================
//------------------------------Dump-------------------------------------------
#ifndef PRODUCT
void PhaseCoalesce::dump(Node *n) const {
// Being a const function means I cannot use 'Find'
......@@ -43,12 +41,11 @@ void PhaseCoalesce::dump(Node *n) const {
tty->print("L%d/N%d ",r,n->_idx);
}
//------------------------------dump-------------------------------------------
void PhaseCoalesce::dump() const {
// I know I have a block layout now, so I can print blocks in a loop
for( uint i=0; i<_phc._cfg._num_blocks; i++ ) {
for( uint i=0; i<_phc._cfg.number_of_blocks(); i++ ) {
uint j;
Block *b = _phc._cfg._blocks[i];
Block* b = _phc._cfg.get_block(i);
// Print a nice block header
tty->print("B%d: ",b->_pre_order);
for( j=1; j<b->num_preds(); j++ )
......@@ -85,7 +82,6 @@ void PhaseCoalesce::dump() const {
}
#endif
//------------------------------combine_these_two------------------------------
// Combine the live ranges def'd by these 2 Nodes. N2 is an input to N1.
void PhaseCoalesce::combine_these_two(Node *n1, Node *n2) {
uint lr1 = _phc._lrg_map.find(n1);
......@@ -127,18 +123,15 @@ void PhaseCoalesce::combine_these_two(Node *n1, Node *n2) {
}
}
//------------------------------coalesce_driver--------------------------------
// Copy coalescing
void PhaseCoalesce::coalesce_driver( ) {
void PhaseCoalesce::coalesce_driver() {
verify();
// Coalesce from high frequency to low
for( uint i=0; i<_phc._cfg._num_blocks; i++ )
coalesce( _phc._blks[i] );
for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) {
coalesce(_phc._blks[i]);
}
}
//------------------------------insert_copy_with_overlap-----------------------
// I am inserting copies to come out of SSA form. In the general case, I am
// doing a parallel renaming. I'm in the Named world now, so I can't do a
// general parallel renaming. All the copies now use "names" (live-ranges)
......@@ -216,7 +209,6 @@ void PhaseAggressiveCoalesce::insert_copy_with_overlap( Block *b, Node *copy, ui
b->_nodes.insert(last_use_idx+1,copy);
}
//------------------------------insert_copies----------------------------------
void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
// We do LRGs compressing and fix a liveout data only here since the other
// place in Split() is guarded by the assert which we never hit.
......@@ -225,8 +217,8 @@ void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
for (uint lrg = 1; lrg < _phc._lrg_map.max_lrg_id(); lrg++) {
uint compressed_lrg = _phc._lrg_map.find(lrg);
if (lrg != compressed_lrg) {
for (uint bidx = 0; bidx < _phc._cfg._num_blocks; bidx++) {
IndexSet *liveout = _phc._live->live(_phc._cfg._blocks[bidx]);
for (uint bidx = 0; bidx < _phc._cfg.number_of_blocks(); bidx++) {
IndexSet *liveout = _phc._live->live(_phc._cfg.get_block(bidx));
if (liveout->member(lrg)) {
liveout->remove(lrg);
liveout->insert(compressed_lrg);
......@@ -239,10 +231,10 @@ void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
// Nodes with index less than '_unique' are original, non-virtual Nodes.
_unique = C->unique();
for( uint i=0; i<_phc._cfg._num_blocks; i++ ) {
for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) {
C->check_node_count(NodeLimitFudgeFactor, "out of nodes in coalesce");
if (C->failing()) return;
Block *b = _phc._cfg._blocks[i];
Block *b = _phc._cfg.get_block(i);
uint cnt = b->num_preds(); // Number of inputs to the Phi
for( uint l = 1; l<b->_nodes.size(); l++ ) {
......@@ -403,8 +395,7 @@ void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
} // End of for all blocks
}
//=============================================================================
//------------------------------coalesce---------------------------------------
// Aggressive (but pessimistic) copy coalescing of a single block
// The following coalesce pass represents a single round of aggressive
......@@ -464,20 +455,16 @@ void PhaseAggressiveCoalesce::coalesce( Block *b ) {
} // End of for all instructions in block
}
//=============================================================================
//------------------------------PhaseConservativeCoalesce----------------------
PhaseConservativeCoalesce::PhaseConservativeCoalesce(PhaseChaitin &chaitin) : PhaseCoalesce(chaitin) {
_ulr.initialize(_phc._lrg_map.max_lrg_id());
}
//------------------------------verify-----------------------------------------
void PhaseConservativeCoalesce::verify() {
#ifdef ASSERT
_phc.set_was_low();
#endif
}
//------------------------------union_helper-----------------------------------
void PhaseConservativeCoalesce::union_helper( Node *lr1_node, Node *lr2_node, uint lr1, uint lr2, Node *src_def, Node *dst_copy, Node *src_copy, Block *b, uint bindex ) {
// Join live ranges. Merge larger into smaller. Union lr2 into lr1 in the
// union-find tree
......@@ -520,7 +507,6 @@ void PhaseConservativeCoalesce::union_helper( Node *lr1_node, Node *lr2_node, ui
}
}
//------------------------------compute_separating_interferences---------------
// Factored code from copy_copy that computes extra interferences from
// lengthening a live range by double-coalescing.
uint PhaseConservativeCoalesce::compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint reg_degree, uint rm_size, uint lr1, uint lr2 ) {
......@@ -586,7 +572,6 @@ uint PhaseConservativeCoalesce::compute_separating_interferences(Node *dst_copy,
return reg_degree;
}
//------------------------------update_ifg-------------------------------------
void PhaseConservativeCoalesce::update_ifg(uint lr1, uint lr2, IndexSet *n_lr1, IndexSet *n_lr2) {
// Some original neighbors of lr1 might have gone away
// because the constrained register mask prevented them.
......@@ -616,7 +601,6 @@ void PhaseConservativeCoalesce::update_ifg(uint lr1, uint lr2, IndexSet *n_lr1,
lrgs(neighbor).inc_degree( lrg1.compute_degree(lrgs(neighbor)) );
}
//------------------------------record_bias------------------------------------
static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) {
// Tag copy bias here
if( !ifg->lrgs(lr1)._copy_bias )
......@@ -625,7 +609,6 @@ static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) {
ifg->lrgs(lr2)._copy_bias = lr1;
}
//------------------------------copy_copy--------------------------------------
// See if I can coalesce a series of multiple copies together. I need the
// final dest copy and the original src copy. They can be the same Node.
// Compute the compatible register masks.
......@@ -785,7 +768,6 @@ bool PhaseConservativeCoalesce::copy_copy(Node *dst_copy, Node *src_copy, Block
return true;
}
//------------------------------coalesce---------------------------------------
// Conservative (but pessimistic) copy coalescing of a single block
void PhaseConservativeCoalesce::coalesce( Block *b ) {
// Bail out on infrequent blocks
......
......@@ -2136,7 +2136,9 @@ void Compile::Optimize() {
//------------------------------Code_Gen---------------------------------------
// Given a graph, generate code for it
void Compile::Code_Gen() {
if (failing()) return;
if (failing()) {
return;
}
// Perform instruction selection. You might think we could reclaim Matcher
// memory PDQ, but actually the Matcher is used in generating spill code.
......@@ -2148,12 +2150,11 @@ void Compile::Code_Gen() {
// nodes. Mapping is only valid at the root of each matched subtree.
NOT_PRODUCT( verify_graph_edges(); )
Node_List proj_list;
Matcher m(proj_list);
_matcher = &m;
Matcher matcher;
_matcher = &matcher;
{
TracePhase t2("matcher", &_t_matcher, true);
m.match();
matcher.match();
}
// In debug mode can dump m._nodes.dump() for mapping of ideal to machine
// nodes. Mapping is only valid at the root of each matched subtree.
......@@ -2161,31 +2162,26 @@ void Compile::Code_Gen() {
// If you have too many nodes, or if matching has failed, bail out
check_node_count(0, "out of nodes matching instructions");
if (failing()) return;
if (failing()) {
return;
}
// Build a proper-looking CFG
PhaseCFG cfg(node_arena(), root(), m);
PhaseCFG cfg(node_arena(), root(), matcher);
_cfg = &cfg;
{
NOT_PRODUCT( TracePhase t2("scheduler", &_t_scheduler, TimeCompiler); )
cfg.Dominators();
if (failing()) return;
NOT_PRODUCT( verify_graph_edges(); )
cfg.Estimate_Block_Frequency();
cfg.GlobalCodeMotion(m,unique(),proj_list);
if (failing()) return;
bool success = cfg.do_global_code_motion();
if (!success) {
return;
}
print_method(PHASE_GLOBAL_CODE_MOTION, 2);
NOT_PRODUCT( verify_graph_edges(); )
debug_only( cfg.verify(); )
}
NOT_PRODUCT( verify_graph_edges(); )
PhaseChaitin regalloc(unique(), cfg, m);
PhaseChaitin regalloc(unique(), cfg, matcher);
_regalloc = &regalloc;
{
TracePhase t2("regalloc", &_t_registerAllocation, true);
......@@ -2206,7 +2202,7 @@ void Compile::Code_Gen() {
// can now safely remove it.
{
NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); )
cfg.remove_empty();
cfg.remove_empty_blocks();
if (do_freq_based_layout()) {
PhaseBlockLayout layout(cfg);
} else {
......@@ -2253,38 +2249,50 @@ void Compile::dump_asm(int *pcs, uint pc_limit) {
_regalloc->dump_frame();
Node *n = NULL;
for( uint i=0; i<_cfg->_num_blocks; i++ ) {
if (VMThread::should_terminate()) { cut_short = true; break; }
Block *b = _cfg->_blocks[i];
if (b->is_connector() && !Verbose) continue;
n = b->_nodes[0];
if (pcs && n->_idx < pc_limit)
for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
if (VMThread::should_terminate()) {
cut_short = true;
break;
}
Block* block = _cfg->get_block(i);
if (block->is_connector() && !Verbose) {
continue;
}
n = block->_nodes[0];
if (pcs && n->_idx < pc_limit) {
tty->print("%3.3x ", pcs[n->_idx]);
else
} else {
tty->print(" ");
b->dump_head(_cfg);
if (b->is_connector()) {
}
block->dump_head(_cfg);
if (block->is_connector()) {
tty->print_cr(" # Empty connector block");
} else if (b->num_preds() == 2 && b->pred(1)->is_CatchProj() && b->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
} else if (block->num_preds() == 2 && block->pred(1)->is_CatchProj() && block->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
tty->print_cr(" # Block is sole successor of call");
}
// For all instructions
Node *delay = NULL;
for( uint j = 0; j<b->_nodes.size(); j++ ) {
if (VMThread::should_terminate()) { cut_short = true; break; }
n = b->_nodes[j];
for (uint j = 0; j < block->_nodes.size(); j++) {
if (VMThread::should_terminate()) {
cut_short = true;
break;
}
n = block->_nodes[j];
if (valid_bundle_info(n)) {
Bundle *bundle = node_bundling(n);
Bundle* bundle = node_bundling(n);
if (bundle->used_in_unconditional_delay()) {
delay = n;
continue;
}
if (bundle->starts_bundle())
if (bundle->starts_bundle()) {
starts_bundle = '+';
}
}
if (WizardMode) n->dump();
if (WizardMode) {
n->dump();
}
if( !n->is_Region() && // Dont print in the Assembly
!n->is_Phi() && // a few noisely useless nodes
......
......@@ -32,9 +32,6 @@
// Portions of code courtesy of Clifford Click
// Optimization - Graph Style
//------------------------------Tarjan-----------------------------------------
// A data structure that holds all the information needed to find dominators.
struct Tarjan {
Block *_block; // Basic block for this info
......@@ -60,23 +57,21 @@ struct Tarjan {
};
//------------------------------Dominator--------------------------------------
// Compute the dominator tree of the CFG. The CFG must already have been
// constructed. This is the Lengauer & Tarjan O(E-alpha(E,V)) algorithm.
void PhaseCFG::Dominators( ) {
void PhaseCFG::build_dominator_tree() {
// Pre-grow the blocks array, prior to the ResourceMark kicking in
_blocks.map(_num_blocks,0);
_blocks.map(number_of_blocks(), 0);
ResourceMark rm;
// Setup mappings from my Graph to Tarjan's stuff and back
// Note: Tarjan uses 1-based arrays
Tarjan *tarjan = NEW_RESOURCE_ARRAY(Tarjan,_num_blocks+1);
Tarjan* tarjan = NEW_RESOURCE_ARRAY(Tarjan, number_of_blocks() + 1);
// Tarjan's algorithm, almost verbatim:
// Step 1:
_rpo_ctr = _num_blocks;
uint dfsnum = DFS( tarjan );
if( dfsnum-1 != _num_blocks ) {// Check for unreachable loops!
uint dfsnum = do_DFS(tarjan, number_of_blocks());
if (dfsnum - 1 != number_of_blocks()) { // Check for unreachable loops!
// If the returned dfsnum does not match the number of blocks, then we
// must have some unreachable loops. These can be made at any time by
// IterGVN. They are cleaned up by CCP or the loop opts, but the last
......@@ -93,14 +88,13 @@ void PhaseCFG::Dominators( ) {
C->record_method_not_compilable("unreachable loop");
return;
}
_blocks._cnt = _num_blocks;
_blocks._cnt = number_of_blocks();
// Tarjan is using 1-based arrays, so these are some initialize flags
tarjan[0]._size = tarjan[0]._semi = 0;
tarjan[0]._label = &tarjan[0];
uint i;
for( i=_num_blocks; i>=2; i-- ) { // For all vertices in DFS order
for (uint i = number_of_blocks(); i >= 2; i--) { // For all vertices in DFS order
Tarjan *w = &tarjan[i]; // Get vertex from DFS
// Step 2:
......@@ -130,19 +124,19 @@ void PhaseCFG::Dominators( ) {
}
// Step 4:
for( i=2; i <= _num_blocks; i++ ) {
for (uint i = 2; i <= number_of_blocks(); i++) {
Tarjan *w = &tarjan[i];
if( w->_dom != &tarjan[w->_semi] )
w->_dom = w->_dom->_dom;
w->_dom_next = w->_dom_child = NULL; // Initialize for building tree later
}
// No immediate dominator for the root
Tarjan *w = &tarjan[_broot->_pre_order];
Tarjan *w = &tarjan[get_root_block()->_pre_order];
w->_dom = NULL;
w->_dom_next = w->_dom_child = NULL; // Initialize for building tree later
// Convert the dominator tree array into my kind of graph
for( i=1; i<=_num_blocks;i++){// For all Tarjan vertices
for(uint i = 1; i <= number_of_blocks(); i++){ // For all Tarjan vertices
Tarjan *t = &tarjan[i]; // Handy access
Tarjan *tdom = t->_dom; // Handy access to immediate dominator
if( tdom ) { // Root has no immediate dominator
......@@ -152,11 +146,10 @@ void PhaseCFG::Dominators( ) {
} else
t->_block->_idom = NULL; // Root
}
w->setdepth( _num_blocks+1 ); // Set depth in dominator tree
w->setdepth(number_of_blocks() + 1); // Set depth in dominator tree
}
//----------------------------Block_Stack--------------------------------------
class Block_Stack {
private:
struct Block_Descr {
......@@ -214,7 +207,6 @@ class Block_Stack {
}
};
//-------------------------most_frequent_successor-----------------------------
// Find the index into the b->succs[] array of the most frequent successor.
uint Block_Stack::most_frequent_successor( Block *b ) {
uint freq_idx = 0;
......@@ -258,40 +250,38 @@ uint Block_Stack::most_frequent_successor( Block *b ) {
return freq_idx;
}
//------------------------------DFS--------------------------------------------
// Perform DFS search. Setup 'vertex' as DFS to vertex mapping. Setup
// 'semi' as vertex to DFS mapping. Set 'parent' to DFS parent.
uint PhaseCFG::DFS( Tarjan *tarjan ) {
Block *b = _broot;
uint PhaseCFG::do_DFS(Tarjan *tarjan, uint rpo_counter) {
Block* root_block = get_root_block();
uint pre_order = 1;
// Allocate stack of size _num_blocks+1 to avoid frequent realloc
Block_Stack bstack(tarjan, _num_blocks+1);
// Allocate stack of size number_of_blocks() + 1 to avoid frequent realloc
Block_Stack bstack(tarjan, number_of_blocks() + 1);
// Push on stack the state for the first block
bstack.push(pre_order, b);
bstack.push(pre_order, root_block);
++pre_order;
while (bstack.is_nonempty()) {
if (!bstack.last_successor()) {
// Walk over all successors in pre-order (DFS).
Block *s = bstack.next_successor();
if (s->_pre_order == 0) { // Check for no-pre-order, not-visited
Block* next_block = bstack.next_successor();
if (next_block->_pre_order == 0) { // Check for no-pre-order, not-visited
// Push on stack the state of successor
bstack.push(pre_order, s);
bstack.push(pre_order, next_block);
++pre_order;
}
}
else {
// Build a reverse post-order in the CFG _blocks array
Block *stack_top = bstack.pop();
stack_top->_rpo = --_rpo_ctr;
stack_top->_rpo = --rpo_counter;
_blocks.map(stack_top->_rpo, stack_top);
}
}
return pre_order;
}
//------------------------------COMPRESS---------------------------------------
void Tarjan::COMPRESS()
{
assert( _ancestor != 0, "" );
......@@ -303,14 +293,12 @@ void Tarjan::COMPRESS()
}
}
//------------------------------EVAL-------------------------------------------
Tarjan *Tarjan::EVAL() {
if( !_ancestor ) return _label;
COMPRESS();
return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label;
}
//------------------------------LINK-------------------------------------------
void Tarjan::LINK( Tarjan *w, Tarjan *tarjan0 ) {
Tarjan *s = w;
while( w->_label->_semi < s->_child->_label->_semi ) {
......@@ -333,7 +321,6 @@ void Tarjan::LINK( Tarjan *w, Tarjan *tarjan0 ) {
}
}
//------------------------------setdepth---------------------------------------
void Tarjan::setdepth( uint stack_size ) {
Tarjan **top = NEW_RESOURCE_ARRAY(Tarjan*, stack_size);
Tarjan **next = top;
......@@ -362,8 +349,7 @@ void Tarjan::setdepth( uint stack_size ) {
} while (last < top);
}
//*********************** DOMINATORS ON THE SEA OF NODES***********************
//------------------------------NTarjan----------------------------------------
// Compute dominators on the Sea of Nodes form
// A data structure that holds all the information needed to find dominators.
struct NTarjan {
Node *_control; // Control node associated with this info
......@@ -396,7 +382,6 @@ struct NTarjan {
#endif
};
//------------------------------Dominator--------------------------------------
// Compute the dominator tree of the sea of nodes. This version walks all CFG
// nodes (using the is_CFG() call) and places them in a dominator tree. Thus,
// it needs a count of the CFG nodes for the mapping table. This is the
......@@ -517,7 +502,6 @@ void PhaseIdealLoop::Dominators() {
}
}
//------------------------------DFS--------------------------------------------
// Perform DFS search. Setup 'vertex' as DFS to vertex mapping. Setup
// 'semi' as vertex to DFS mapping. Set 'parent' to DFS parent.
int NTarjan::DFS( NTarjan *ntarjan, VectorSet &visited, PhaseIdealLoop *pil, uint *dfsorder) {
......@@ -560,7 +544,6 @@ int NTarjan::DFS( NTarjan *ntarjan, VectorSet &visited, PhaseIdealLoop *pil, uin
return dfsnum;
}
//------------------------------COMPRESS---------------------------------------
void NTarjan::COMPRESS()
{
assert( _ancestor != 0, "" );
......@@ -572,14 +555,12 @@ void NTarjan::COMPRESS()
}
}
//------------------------------EVAL-------------------------------------------
NTarjan *NTarjan::EVAL() {
if( !_ancestor ) return _label;
COMPRESS();
return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label;
}
//------------------------------LINK-------------------------------------------
void NTarjan::LINK( NTarjan *w, NTarjan *ntarjan0 ) {
NTarjan *s = w;
while( w->_label->_semi < s->_child->_label->_semi ) {
......@@ -602,7 +583,6 @@ void NTarjan::LINK( NTarjan *w, NTarjan *ntarjan0 ) {
}
}
//------------------------------setdepth---------------------------------------
void NTarjan::setdepth( uint stack_size, uint *dom_depth ) {
NTarjan **top = NEW_RESOURCE_ARRAY(NTarjan*, stack_size);
NTarjan **next = top;
......@@ -631,7 +611,6 @@ void NTarjan::setdepth( uint stack_size, uint *dom_depth ) {
} while (last < top);
}
//------------------------------dump-------------------------------------------
#ifndef PRODUCT
void NTarjan::dump(int offset) const {
// Dump the data from this node
......
此差异已折叠。
......@@ -416,7 +416,7 @@ void IdealGraphPrinter::visit_node(Node *n, bool edges, VectorSet* temp_set) {
if (C->cfg() != NULL) {
Block* block = C->cfg()->get_block_for_node(node);
if (block == NULL) {
print_prop("block", C->cfg()->_blocks[0]->_pre_order);
print_prop("block", C->cfg()->get_block(0)->_pre_order);
} else {
print_prop("block", block->_pre_order);
}
......@@ -637,10 +637,10 @@ void IdealGraphPrinter::walk_nodes(Node *start, bool edges, VectorSet* temp_set)
if (C->cfg() != NULL) {
// once we have a CFG there are some nodes that aren't really
// reachable but are in the CFG so add them here.
for (uint i = 0; i < C->cfg()->_blocks.size(); i++) {
Block *b = C->cfg()->_blocks[i];
for (uint s = 0; s < b->_nodes.size(); s++) {
nodeStack.push(b->_nodes[s]);
for (uint i = 0; i < C->cfg()->number_of_blocks(); i++) {
Block* block = C->cfg()->get_block(i);
for (uint s = 0; s < block->_nodes.size(); s++) {
nodeStack.push(block->_nodes[s]);
}
}
}
......@@ -698,24 +698,24 @@ void IdealGraphPrinter::print(Compile* compile, const char *name, Node *node, in
tail(EDGES_ELEMENT);
if (C->cfg() != NULL) {
head(CONTROL_FLOW_ELEMENT);
for (uint i = 0; i < C->cfg()->_blocks.size(); i++) {
Block *b = C->cfg()->_blocks[i];
for (uint i = 0; i < C->cfg()->number_of_blocks(); i++) {
Block* block = C->cfg()->get_block(i);
begin_head(BLOCK_ELEMENT);
print_attr(BLOCK_NAME_PROPERTY, b->_pre_order);
print_attr(BLOCK_NAME_PROPERTY, block->_pre_order);
end_head();
head(SUCCESSORS_ELEMENT);
for (uint s = 0; s < b->_num_succs; s++) {
for (uint s = 0; s < block->_num_succs; s++) {
begin_elem(SUCCESSOR_ELEMENT);
print_attr(BLOCK_NAME_PROPERTY, b->_succs[s]->_pre_order);
print_attr(BLOCK_NAME_PROPERTY, block->_succs[s]->_pre_order);
end_elem();
}
tail(SUCCESSORS_ELEMENT);
head(NODES_ELEMENT);
for (uint s = 0; s < b->_nodes.size(); s++) {
for (uint s = 0; s < block->_nodes.size(); s++) {
begin_elem(NODE_ELEMENT);
print_attr(NODE_ID_PROPERTY, get_node_id(b->_nodes[s]));
print_attr(NODE_ID_PROPERTY, get_node_id(block->_nodes[s]));
end_elem();
}
tail(NODES_ELEMENT);
......
此差异已折叠。
......@@ -501,7 +501,7 @@ Node *Block::select(PhaseCFG *cfg, Node_List &worklist, GrowableArray<int> &read
n_choice = 1;
}
uint n_latency = cfg->_node_latency->at_grow(n->_idx);
uint n_latency = cfg->get_latency_for_node(n);
uint n_score = n->req(); // Many inputs get high score to break ties
// Keep best latency found
......@@ -797,7 +797,7 @@ bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, GrowableArray<int> &
Node *n = _nodes[j];
int idx = n->_idx;
tty->print("# ready cnt:%3d ", ready_cnt.at(idx));
tty->print("latency:%3d ", cfg->_node_latency->at_grow(idx));
tty->print("latency:%3d ", cfg->get_latency_for_node(n));
tty->print("%4d: %s\n", idx, n->Name());
}
}
......@@ -825,7 +825,7 @@ bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, GrowableArray<int> &
#ifndef PRODUCT
if (cfg->trace_opto_pipelining()) {
tty->print("# select %d: %s", n->_idx, n->Name());
tty->print(", latency:%d", cfg->_node_latency->at_grow(n->_idx));
tty->print(", latency:%d", cfg->get_latency_for_node(n));
n->dump();
if (Verbose) {
tty->print("# ready list:");
......
......@@ -30,9 +30,6 @@
#include "opto/machnode.hpp"
//=============================================================================
//------------------------------PhaseLive--------------------------------------
// Compute live-in/live-out. We use a totally incremental algorithm. The LIVE
// problem is monotonic. The steady-state solution looks like this: pull a
// block from the worklist. It has a set of delta's - values which are newly
......@@ -53,9 +50,9 @@ void PhaseLive::compute(uint maxlrg) {
// Init the sparse live arrays. This data is live on exit from here!
// The _live info is the live-out info.
_live = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*_cfg._num_blocks);
_live = (IndexSet*)_arena->Amalloc(sizeof(IndexSet) * _cfg.number_of_blocks());
uint i;
for( i=0; i<_cfg._num_blocks; i++ ) {
for (i = 0; i < _cfg.number_of_blocks(); i++) {
_live[i].initialize(_maxlrg);
}
......@@ -65,14 +62,14 @@ void PhaseLive::compute(uint maxlrg) {
// Does the memory used by _defs and _deltas get reclaimed? Does it matter? TT
// Array of values defined locally in blocks
_defs = NEW_RESOURCE_ARRAY(IndexSet,_cfg._num_blocks);
for( i=0; i<_cfg._num_blocks; i++ ) {
_defs = NEW_RESOURCE_ARRAY(IndexSet,_cfg.number_of_blocks());
for (i = 0; i < _cfg.number_of_blocks(); i++) {
_defs[i].initialize(_maxlrg);
}
// Array of delta-set pointers, indexed by block pre_order-1.
_deltas = NEW_RESOURCE_ARRAY(IndexSet*,_cfg._num_blocks);
memset( _deltas, 0, sizeof(IndexSet*)* _cfg._num_blocks);
_deltas = NEW_RESOURCE_ARRAY(IndexSet*,_cfg.number_of_blocks());
memset( _deltas, 0, sizeof(IndexSet*)* _cfg.number_of_blocks());
_free_IndexSet = NULL;
......@@ -80,31 +77,32 @@ void PhaseLive::compute(uint maxlrg) {
VectorSet first_pass(Thread::current()->resource_area());
// Outer loop: must compute local live-in sets and push into predecessors.
uint iters = _cfg._num_blocks; // stat counters
for( uint j=_cfg._num_blocks; j>0; j-- ) {
Block *b = _cfg._blocks[j-1];
for (uint j = _cfg.number_of_blocks(); j > 0; j--) {
Block* block = _cfg.get_block(j - 1);
// Compute the local live-in set. Start with any new live-out bits.
IndexSet *use = getset( b );
IndexSet *def = &_defs[b->_pre_order-1];
IndexSet* use = getset(block);
IndexSet* def = &_defs[block->_pre_order-1];
DEBUG_ONLY(IndexSet *def_outside = getfreeset();)
uint i;
for( i=b->_nodes.size(); i>1; i-- ) {
Node *n = b->_nodes[i-1];
if( n->is_Phi() ) break;
for (i = block->_nodes.size(); i > 1; i--) {
Node* n = block->_nodes[i-1];
if (n->is_Phi()) {
break;
}
uint r = _names[n->_idx];
assert(!def_outside->member(r), "Use of external LRG overlaps the same LRG defined in this block");
def->insert( r );
use->remove( r );
uint cnt = n->req();
for( uint k=1; k<cnt; k++ ) {
for (uint k = 1; k < cnt; k++) {
Node *nk = n->in(k);
uint nkidx = nk->_idx;
if (_cfg.get_block_for_node(nk) != b) {
if (_cfg.get_block_for_node(nk) != block) {
uint u = _names[nkidx];
use->insert( u );
DEBUG_ONLY(def_outside->insert( u );)
use->insert(u);
DEBUG_ONLY(def_outside->insert(u);)
}
}
}
......@@ -113,41 +111,38 @@ void PhaseLive::compute(uint maxlrg) {
_free_IndexSet = def_outside; // Drop onto free list
#endif
// Remove anything defined by Phis and the block start instruction
for( uint k=i; k>0; k-- ) {
uint r = _names[b->_nodes[k-1]->_idx];
def->insert( r );
use->remove( r );
for (uint k = i; k > 0; k--) {
uint r = _names[block->_nodes[k - 1]->_idx];
def->insert(r);
use->remove(r);
}
// Push these live-in things to predecessors
for( uint l=1; l<b->num_preds(); l++ ) {
Block *p = _cfg.get_block_for_node(b->pred(l));
add_liveout( p, use, first_pass );
for (uint l = 1; l < block->num_preds(); l++) {
Block* p = _cfg.get_block_for_node(block->pred(l));
add_liveout(p, use, first_pass);
// PhiNode uses go in the live-out set of prior blocks.
for( uint k=i; k>0; k-- )
add_liveout( p, _names[b->_nodes[k-1]->in(l)->_idx], first_pass );
for (uint k = i; k > 0; k--) {
add_liveout(p, _names[block->_nodes[k-1]->in(l)->_idx], first_pass);
}
freeset( b );
first_pass.set(b->_pre_order);
}
freeset(block);
first_pass.set(block->_pre_order);
// Inner loop: blocks that picked up new live-out values to be propagated
while( _worklist->size() ) {
// !!!!!
// #ifdef ASSERT
iters++;
// #endif
Block *b = _worklist->pop();
IndexSet *delta = getset(b);
while (_worklist->size()) {
Block* block = _worklist->pop();
IndexSet *delta = getset(block);
assert( delta->count(), "missing delta set" );
// Add new-live-in to predecessors live-out sets
for (uint l = 1; l < b->num_preds(); l++) {
Block* block = _cfg.get_block_for_node(b->pred(l));
add_liveout(block, delta, first_pass);
for (uint l = 1; l < block->num_preds(); l++) {
Block* predecessor = _cfg.get_block_for_node(block->pred(l));
add_liveout(predecessor, delta, first_pass);
}
freeset(b);
freeset(block);
} // End of while-worklist-not-empty
} // End of for-all-blocks-outer-loop
......@@ -155,7 +150,7 @@ void PhaseLive::compute(uint maxlrg) {
// We explicitly clear all of the IndexSets which we are about to release.
// This allows us to recycle their internal memory into IndexSet's free list.
for( i=0; i<_cfg._num_blocks; i++ ) {
for (i = 0; i < _cfg.number_of_blocks(); i++) {
_defs[i].clear();
if (_deltas[i]) {
// Is this always true?
......@@ -171,13 +166,11 @@ void PhaseLive::compute(uint maxlrg) {
}
//------------------------------stats------------------------------------------
#ifndef PRODUCT
void PhaseLive::stats(uint iters) const {
}
#endif
//------------------------------getset-----------------------------------------
// Get an IndexSet for a block. Return existing one, if any. Make a new
// empty one if a prior one does not exist.
IndexSet *PhaseLive::getset( Block *p ) {
......@@ -188,7 +181,6 @@ IndexSet *PhaseLive::getset( Block *p ) {
return delta; // Return set of new live-out items
}
//------------------------------getfreeset-------------------------------------
// Pull from free list, or allocate. Internal allocation on the returned set
// is always from thread local storage.
IndexSet *PhaseLive::getfreeset( ) {
......@@ -207,7 +199,6 @@ IndexSet *PhaseLive::getfreeset( ) {
return f;
}
//------------------------------freeset----------------------------------------
// Free an IndexSet from a block.
void PhaseLive::freeset( const Block *p ) {
IndexSet *f = _deltas[p->_pre_order-1];
......@@ -216,7 +207,6 @@ void PhaseLive::freeset( const Block *p ) {
_deltas[p->_pre_order-1] = NULL;
}
//------------------------------add_liveout------------------------------------
// Add a live-out value to a given blocks live-out set. If it is new, then
// also add it to the delta set and stick the block on the worklist.
void PhaseLive::add_liveout( Block *p, uint r, VectorSet &first_pass ) {
......@@ -233,8 +223,6 @@ void PhaseLive::add_liveout( Block *p, uint r, VectorSet &first_pass ) {
}
}
//------------------------------add_liveout------------------------------------
// Add a vector of live-out values to a given blocks live-out set.
void PhaseLive::add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass ) {
IndexSet *live = &_live[p->_pre_order-1];
......@@ -262,7 +250,6 @@ void PhaseLive::add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass ) {
}
#ifndef PRODUCT
//------------------------------dump-------------------------------------------
// Dump the live-out set for a block
void PhaseLive::dump( const Block *b ) const {
tty->print("Block %d: ",b->_pre_order);
......@@ -275,18 +262,19 @@ void PhaseLive::dump( const Block *b ) const {
tty->print("\n");
}
//------------------------------verify_base_ptrs-------------------------------
// Verify that base pointers and derived pointers are still sane.
void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const {
#ifdef ASSERT
Unique_Node_List worklist(a);
for( uint i = 0; i < _cfg._num_blocks; i++ ) {
Block *b = _cfg._blocks[i];
for( uint j = b->end_idx() + 1; j > 1; j-- ) {
Node *n = b->_nodes[j-1];
if( n->is_Phi() ) break;
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
Block* block = _cfg.get_block(i);
for (uint j = block->end_idx() + 1; j > 1; j--) {
Node* n = block->_nodes[j-1];
if (n->is_Phi()) {
break;
}
// Found a safepoint?
if( n->is_MachSafePoint() ) {
if (n->is_MachSafePoint()) {
MachSafePointNode *sfpt = n->as_MachSafePoint();
JVMState* jvms = sfpt->jvms();
if (jvms != NULL) {
......@@ -358,7 +346,6 @@ void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const {
#endif
}
//------------------------------verify-------------------------------------
// Verify that graphs and base pointers are still sane.
void PhaseChaitin::verify( ResourceArea *a, bool verify_ifg ) const {
#ifdef ASSERT
......
......@@ -67,8 +67,8 @@ const uint Matcher::_begin_rematerialize = _BEGIN_REMATERIALIZE;
const uint Matcher::_end_rematerialize = _END_REMATERIALIZE;
//---------------------------Matcher-------------------------------------------
Matcher::Matcher( Node_List &proj_list ) :
PhaseTransform( Phase::Ins_Select ),
Matcher::Matcher()
: PhaseTransform( Phase::Ins_Select ),
#ifdef ASSERT
_old2new_map(C->comp_arena()),
_new2old_map(C->comp_arena()),
......@@ -78,7 +78,7 @@ Matcher::Matcher( Node_List &proj_list ) :
_swallowed(swallowed),
_begin_inst_chain_rule(_BEGIN_INST_CHAIN_RULE),
_end_inst_chain_rule(_END_INST_CHAIN_RULE),
_must_clone(must_clone), _proj_list(proj_list),
_must_clone(must_clone),
_register_save_policy(register_save_policy),
_c_reg_save_policy(c_reg_save_policy),
_register_save_type(register_save_type),
......@@ -1304,8 +1304,9 @@ MachNode *Matcher::match_sfpt( SafePointNode *sfpt ) {
for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++)
proj->_rout.Insert(OptoReg::Name(i));
}
if( proj->_rout.is_NotEmpty() )
_proj_list.push(proj);
if (proj->_rout.is_NotEmpty()) {
push_projection(proj);
}
}
// Transfer the safepoint information from the call to the mcall
// Move the JVMState list
......@@ -1685,14 +1686,15 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
}
// If the _leaf is an AddP, insert the base edge
if( leaf->is_AddP() )
if (leaf->is_AddP()) {
mach->ins_req(AddPNode::Base,leaf->in(AddPNode::Base));
}
uint num_proj = _proj_list.size();
uint number_of_projections_prior = number_of_projections();
// Perform any 1-to-many expansions required
MachNode *ex = mach->Expand(s,_proj_list, mem);
if( ex != mach ) {
MachNode *ex = mach->Expand(s, _projection_list, mem);
if (ex != mach) {
assert(ex->ideal_reg() == mach->ideal_reg(), "ideal types should match");
if( ex->in(1)->is_Con() )
ex->in(1)->set_req(0, C->root());
......@@ -1713,7 +1715,7 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
// generated belatedly during spill code generation.
if (_allocation_started) {
guarantee(ex == mach, "no expand rules during spill generation");
guarantee(_proj_list.size() == num_proj, "no allocation during spill generation");
guarantee(number_of_projections_prior == number_of_projections(), "no allocation during spill generation");
}
if (leaf->is_Con() || leaf->is_DecodeNarrowPtr()) {
......
......@@ -88,7 +88,7 @@ class Matcher : public PhaseTransform {
Node *transform( Node *dummy );
Node_List &_proj_list; // For Machine nodes killing many values
Node_List _projection_list; // For Machine nodes killing many values
Node_Array _shared_nodes;
......@@ -183,10 +183,30 @@ public:
void collect_null_checks( Node *proj, Node *orig_proj );
void validate_null_checks( );
Matcher( Node_List &proj_list );
Matcher();
// Get a projection node at position pos
Node* get_projection(uint pos) {
return _projection_list[pos];
}
// Push a projection node onto the projection list
void push_projection(Node* node) {
_projection_list.push(node);
}
Node* pop_projection() {
return _projection_list.pop();
}
// Number of nodes in the projection list
uint number_of_projections() const {
return _projection_list.size();
}
// Select instructions for entire method
void match( );
void match();
// Helper for match
OptoReg::Name warp_incoming_stk_arg( VMReg reg );
......
此差异已折叠。
......@@ -1643,8 +1643,8 @@ void PhasePeephole::do_transform() {
bool method_name_not_printed = true;
// Examine each basic block
for( uint block_number = 1; block_number < _cfg._num_blocks; ++block_number ) {
Block *block = _cfg._blocks[block_number];
for (uint block_number = 1; block_number < _cfg.number_of_blocks(); ++block_number) {
Block* block = _cfg.get_block(block_number);
bool block_not_printed = true;
// and each instruction within a block
......
......@@ -405,50 +405,53 @@ void PhaseChaitin::post_allocate_copy_removal() {
// Need a mapping from basic block Node_Lists. We need a Node_List to
// map from register number to value-producing Node.
Node_List **blk2value = NEW_RESOURCE_ARRAY( Node_List *, _cfg._num_blocks+1);
memset( blk2value, 0, sizeof(Node_List*)*(_cfg._num_blocks+1) );
Node_List **blk2value = NEW_RESOURCE_ARRAY( Node_List *, _cfg.number_of_blocks() + 1);
memset(blk2value, 0, sizeof(Node_List*) * (_cfg.number_of_blocks() + 1));
// Need a mapping from basic block Node_Lists. We need a Node_List to
// map from register number to register-defining Node.
Node_List **blk2regnd = NEW_RESOURCE_ARRAY( Node_List *, _cfg._num_blocks+1);
memset( blk2regnd, 0, sizeof(Node_List*)*(_cfg._num_blocks+1) );
Node_List **blk2regnd = NEW_RESOURCE_ARRAY( Node_List *, _cfg.number_of_blocks() + 1);
memset(blk2regnd, 0, sizeof(Node_List*) * (_cfg.number_of_blocks() + 1));
// We keep unused Node_Lists on a free_list to avoid wasting
// memory.
GrowableArray<Node_List*> free_list = GrowableArray<Node_List*>(16);
// For all blocks
for( uint i = 0; i < _cfg._num_blocks; i++ ) {
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
uint j;
Block *b = _cfg._blocks[i];
Block* block = _cfg.get_block(i);
// Count of Phis in block
uint phi_dex;
for( phi_dex = 1; phi_dex < b->_nodes.size(); phi_dex++ ) {
Node *phi = b->_nodes[phi_dex];
if( !phi->is_Phi() )
for (phi_dex = 1; phi_dex < block->_nodes.size(); phi_dex++) {
Node* phi = block->_nodes[phi_dex];
if (!phi->is_Phi()) {
break;
}
}
// If any predecessor has not been visited, we do not know the state
// of registers at the start. Check for this, while updating copies
// along Phi input edges
bool missing_some_inputs = false;
Block *freed = NULL;
for( j = 1; j < b->num_preds(); j++ ) {
Block *pb = _cfg.get_block_for_node(b->pred(j));
for (j = 1; j < block->num_preds(); j++) {
Block* pb = _cfg.get_block_for_node(block->pred(j));
// Remove copies along phi edges
for( uint k=1; k<phi_dex; k++ )
elide_copy( b->_nodes[k], j, b, *blk2value[pb->_pre_order], *blk2regnd[pb->_pre_order], false );
if( blk2value[pb->_pre_order] ) { // Have a mapping on this edge?
for (uint k = 1; k < phi_dex; k++) {
elide_copy(block->_nodes[k], j, block, *blk2value[pb->_pre_order], *blk2regnd[pb->_pre_order], false);
}
if (blk2value[pb->_pre_order]) { // Have a mapping on this edge?
// See if this predecessor's mappings have been used by everybody
// who wants them. If so, free 'em.
uint k;
for( k=0; k<pb->_num_succs; k++ ) {
Block *pbsucc = pb->_succs[k];
if( !blk2value[pbsucc->_pre_order] && pbsucc != b )
for (k = 0; k < pb->_num_succs; k++) {
Block* pbsucc = pb->_succs[k];
if (!blk2value[pbsucc->_pre_order] && pbsucc != block) {
break; // Found a future user
}
if( k >= pb->_num_succs ) { // No more uses, free!
}
if (k >= pb->_num_succs) { // No more uses, free!
freed = pb; // Record last block freed
free_list.push(blk2value[pb->_pre_order]);
free_list.push(blk2regnd[pb->_pre_order]);
......@@ -467,20 +470,20 @@ void PhaseChaitin::post_allocate_copy_removal() {
value.map(_max_reg,NULL);
regnd.map(_max_reg,NULL);
// Set mappings as OUR mappings
blk2value[b->_pre_order] = &value;
blk2regnd[b->_pre_order] = &regnd;
blk2value[block->_pre_order] = &value;
blk2regnd[block->_pre_order] = &regnd;
// Initialize value & regnd for this block
if( missing_some_inputs ) {
if (missing_some_inputs) {
// Some predecessor has not yet been visited; zap map to empty
for( uint k = 0; k < (uint)_max_reg; k++ ) {
for (uint k = 0; k < (uint)_max_reg; k++) {
value.map(k,NULL);
regnd.map(k,NULL);
}
} else {
if( !freed ) { // Didn't get a freebie prior block
// Must clone some data
freed = _cfg.get_block_for_node(b->pred(1));
freed = _cfg.get_block_for_node(block->pred(1));
Node_List &f_value = *blk2value[freed->_pre_order];
Node_List &f_regnd = *blk2regnd[freed->_pre_order];
for( uint k = 0; k < (uint)_max_reg; k++ ) {
......@@ -489,9 +492,11 @@ void PhaseChaitin::post_allocate_copy_removal() {
}
}
// Merge all inputs together, setting to NULL any conflicts.
for( j = 1; j < b->num_preds(); j++ ) {
Block *pb = _cfg.get_block_for_node(b->pred(j));
if( pb == freed ) continue; // Did self already via freelist
for (j = 1; j < block->num_preds(); j++) {
Block* pb = _cfg.get_block_for_node(block->pred(j));
if (pb == freed) {
continue; // Did self already via freelist
}
Node_List &p_regnd = *blk2regnd[pb->_pre_order];
for( uint k = 0; k < (uint)_max_reg; k++ ) {
if( regnd[k] != p_regnd[k] ) { // Conflict on reaching defs?
......@@ -503,9 +508,9 @@ void PhaseChaitin::post_allocate_copy_removal() {
}
// For all Phi's
for( j = 1; j < phi_dex; j++ ) {
for (j = 1; j < phi_dex; j++) {
uint k;
Node *phi = b->_nodes[j];
Node *phi = block->_nodes[j];
uint pidx = _lrg_map.live_range_id(phi);
OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg();
......@@ -516,8 +521,8 @@ void PhaseChaitin::post_allocate_copy_removal() {
if( phi != x && u != x ) // Found a different input
u = u ? NodeSentinel : x; // Capture unique input, or NodeSentinel for 2nd input
}
if( u != NodeSentinel ) { // Junk Phi. Remove
b->_nodes.remove(j--);
if (u != NodeSentinel) { // Junk Phi. Remove
block->_nodes.remove(j--);
phi_dex--;
_cfg.unmap_node_from_block(phi);
phi->replace_by(u);
......@@ -547,13 +552,13 @@ void PhaseChaitin::post_allocate_copy_removal() {
}
// For all remaining instructions
for( j = phi_dex; j < b->_nodes.size(); j++ ) {
Node *n = b->_nodes[j];
for (j = phi_dex; j < block->_nodes.size(); j++) {
Node* n = block->_nodes[j];
if( n->outcnt() == 0 && // Dead?
if(n->outcnt() == 0 && // Dead?
n != C->top() && // (ignore TOP, it has no du info)
!n->is_Proj() ) { // fat-proj kills
j -= yank_if_dead(n,b,&value,&regnd);
j -= yank_if_dead(n, block, &value, &regnd);
continue;
}
......@@ -598,8 +603,9 @@ void PhaseChaitin::post_allocate_copy_removal() {
const uint two_adr = n->is_Mach() ? n->as_Mach()->two_adr() : 0;
// Remove copies along input edges
for( k = 1; k < n->req(); k++ )
j -= elide_copy( n, k, b, value, regnd, two_adr!=k );
for (k = 1; k < n->req(); k++) {
j -= elide_copy(n, k, block, value, regnd, two_adr != k);
}
// Unallocated Nodes define no registers
uint lidx = _lrg_map.live_range_id(n);
......@@ -630,8 +636,8 @@ void PhaseChaitin::post_allocate_copy_removal() {
// then 'n' is a useless copy. Do not update the register->node
// mapping so 'n' will go dead.
if( value[nreg] != val ) {
if (eliminate_copy_of_constant(val, n, b, value, regnd, nreg, OptoReg::Bad)) {
j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
if (eliminate_copy_of_constant(val, n, block, value, regnd, nreg, OptoReg::Bad)) {
j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
} else {
// Update the mapping: record new Node defined by the register
regnd.map(nreg,n);
......@@ -640,8 +646,8 @@ void PhaseChaitin::post_allocate_copy_removal() {
value.map(nreg,val);
}
} else if( !may_be_copy_of_callee(n) ) {
assert( n->is_Copy(), "" );
j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
assert(n->is_Copy(), "");
j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
}
} else if (RegMask::is_vector(n_ideal_reg)) {
// If Node 'n' does not change the value mapped by the register,
......@@ -660,7 +666,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
}
} else if (n->is_Copy()) {
// Note: vector can't be constant and can't be copy of calee.
j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
}
} else {
// If the value occupies a register pair, record same info
......@@ -674,18 +680,18 @@ void PhaseChaitin::post_allocate_copy_removal() {
tmp.Remove(nreg);
nreg_lo = tmp.find_first_elem();
}
if( value[nreg] != val || value[nreg_lo] != val ) {
if (eliminate_copy_of_constant(val, n, b, value, regnd, nreg, nreg_lo)) {
j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
if (value[nreg] != val || value[nreg_lo] != val) {
if (eliminate_copy_of_constant(val, n, block, value, regnd, nreg, nreg_lo)) {
j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
} else {
regnd.map(nreg , n );
regnd.map(nreg_lo, n );
value.map(nreg ,val);
value.map(nreg_lo,val);
}
} else if( !may_be_copy_of_callee(n) ) {
assert( n->is_Copy(), "" );
j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
} else if (!may_be_copy_of_callee(n)) {
assert(n->is_Copy(), "");
j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
}
}
......
......@@ -529,13 +529,13 @@ uint PhaseChaitin::Split(uint maxlrg, ResourceArea* split_arena) {
// a Def is UP or DOWN. UP means that it should get a register (ie -
// it is always in LRP regions), and DOWN means that it is probably
// on the stack (ie - it crosses HRP regions).
Node ***Reaches = NEW_SPLIT_ARRAY( Node**, _cfg._num_blocks+1 );
bool **UP = NEW_SPLIT_ARRAY( bool*, _cfg._num_blocks+1 );
Node ***Reaches = NEW_SPLIT_ARRAY( Node**, _cfg.number_of_blocks() + 1);
bool **UP = NEW_SPLIT_ARRAY( bool*, _cfg.number_of_blocks() + 1);
Node **debug_defs = NEW_SPLIT_ARRAY( Node*, spill_cnt );
VectorSet **UP_entry= NEW_SPLIT_ARRAY( VectorSet*, spill_cnt );
// Initialize Reaches & UP
for( bidx = 0; bidx < _cfg._num_blocks+1; bidx++ ) {
for (bidx = 0; bidx < _cfg.number_of_blocks() + 1; bidx++) {
Reaches[bidx] = NEW_SPLIT_ARRAY( Node*, spill_cnt );
UP[bidx] = NEW_SPLIT_ARRAY( bool, spill_cnt );
Node **Reachblock = Reaches[bidx];
......@@ -555,13 +555,13 @@ uint PhaseChaitin::Split(uint maxlrg, ResourceArea* split_arena) {
//----------PASS 1----------
//----------Propagation & Node Insertion Code----------
// Walk the Blocks in RPO for DEF & USE info
for( bidx = 0; bidx < _cfg._num_blocks; bidx++ ) {
for( bidx = 0; bidx < _cfg.number_of_blocks(); bidx++ ) {
if (C->check_node_count(spill_cnt, out_of_nodes)) {
return 0;
}
b = _cfg._blocks[bidx];
b = _cfg.get_block(bidx);
// Reaches & UP arrays for this block
Reachblock = Reaches[b->_pre_order];
UPblock = UP[b->_pre_order];
......@@ -1394,8 +1394,8 @@ uint PhaseChaitin::Split(uint maxlrg, ResourceArea* split_arena) {
// DEBUG
#ifdef ASSERT
// Validate all live range index assignments
for (bidx = 0; bidx < _cfg._num_blocks; bidx++) {
b = _cfg._blocks[bidx];
for (bidx = 0; bidx < _cfg.number_of_blocks(); bidx++) {
b = _cfg.get_block(bidx);
for (insidx = 0; insidx <= b->end_idx(); insidx++) {
Node *n = b->_nodes[insidx];
uint defidx = _lrg_map.find(n);
......
......@@ -1096,10 +1096,10 @@ typedef BinaryTreeDictionary<Metablock, FreeList> MetablockTreeDictionary;
\
c2_nonstatic_field(MachCallRuntimeNode, _name, const char*) \
\
c2_nonstatic_field(PhaseCFG, _num_blocks, uint) \
c2_nonstatic_field(PhaseCFG, _number_of_blocks, uint) \
c2_nonstatic_field(PhaseCFG, _blocks, Block_List) \
c2_nonstatic_field(PhaseCFG, _node_to_block_mapping, Block_Array) \
c2_nonstatic_field(PhaseCFG, _broot, Block*) \
c2_nonstatic_field(PhaseCFG, _root_block, Block*) \
\
c2_nonstatic_field(PhaseRegAlloc, _node_regs, OptoRegPair*) \
c2_nonstatic_field(PhaseRegAlloc, _node_regs_max_index, uint) \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册