coalesce.cpp 31.4 KB
Newer Older
D
duke 已提交
1
/*
2
 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
D
duke 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
19 20 21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
D
duke 已提交
22 23 24
 *
 */

25 26 27 28 29 30 31 32 33 34 35
#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/block.hpp"
#include "opto/cfgnode.hpp"
#include "opto/chaitin.hpp"
#include "opto/coalesce.hpp"
#include "opto/connode.hpp"
#include "opto/indexSet.hpp"
#include "opto/machnode.hpp"
#include "opto/matcher.hpp"
#include "opto/regmask.hpp"
D
duke 已提交
36 37 38 39

//=============================================================================
//------------------------------Dump-------------------------------------------
#ifndef PRODUCT
40
void PhaseCoalesce::dump(Node *n) const {
D
duke 已提交
41
  // Being a const function means I cannot use 'Find'
42
  uint r = _phc._lrg_map.find(n);
D
duke 已提交
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
  tty->print("L%d/N%d ",r,n->_idx);
}

//------------------------------dump-------------------------------------------
void PhaseCoalesce::dump() const {
  // I know I have a block layout now, so I can print blocks in a loop
  for( uint i=0; i<_phc._cfg._num_blocks; i++ ) {
    uint j;
    Block *b = _phc._cfg._blocks[i];
    // Print a nice block header
    tty->print("B%d: ",b->_pre_order);
    for( j=1; j<b->num_preds(); j++ )
      tty->print("B%d ", _phc._cfg._bbs[b->pred(j)->_idx]->_pre_order);
    tty->print("-> ");
    for( j=0; j<b->_num_succs; j++ )
      tty->print("B%d ",b->_succs[j]->_pre_order);
    tty->print(" IDom: B%d/#%d\n", b->_idom ? b->_idom->_pre_order : 0, b->_dom_depth);
    uint cnt = b->_nodes.size();
    for( j=0; j<cnt; j++ ) {
      Node *n = b->_nodes[j];
      dump( n );
      tty->print("\t%s\t",n->Name());

      // Dump the inputs
      uint k;                   // Exit value of loop
      for( k=0; k<n->req(); k++ ) // For all required inputs
        if( n->in(k) ) dump( n->in(k) );
        else tty->print("_ ");
      int any_prec = 0;
      for( ; k<n->len(); k++ )          // For all precedence inputs
        if( n->in(k) ) {
          if( !any_prec++ ) tty->print(" |");
          dump( n->in(k) );
        }

      // Dump node-specific info
      n->dump_spec(tty);
      tty->print("\n");

    }
    tty->print("\n");
  }
}
#endif

//------------------------------combine_these_two------------------------------
// Combine the live ranges def'd by these 2 Nodes.  N2 is an input to N1.
90 91 92
void PhaseCoalesce::combine_these_two(Node *n1, Node *n2) {
  uint lr1 = _phc._lrg_map.find(n1);
  uint lr2 = _phc._lrg_map.find(n2);
D
duke 已提交
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
  if( lr1 != lr2 &&             // Different live ranges already AND
      !_phc._ifg->test_edge_sq( lr1, lr2 ) ) {  // Do not interfere
    LRG *lrg1 = &_phc.lrgs(lr1);
    LRG *lrg2 = &_phc.lrgs(lr2);
    // Not an oop->int cast; oop->oop, int->int, AND int->oop are OK.

    // Now, why is int->oop OK?  We end up declaring a raw-pointer as an oop
    // and in general that's a bad thing.  However, int->oop conversions only
    // happen at GC points, so the lifetime of the misclassified raw-pointer
    // is from the CheckCastPP (that converts it to an oop) backwards up
    // through a merge point and into the slow-path call, and around the
    // diamond up to the heap-top check and back down into the slow-path call.
    // The misclassified raw pointer is NOT live across the slow-path call,
    // and so does not appear in any GC info, so the fact that it is
    // misclassified is OK.

    if( (lrg1->_is_oop || !lrg2->_is_oop) && // not an oop->int cast AND
        // Compatible final mask
        lrg1->mask().overlap( lrg2->mask() ) ) {
      // Merge larger into smaller.
      if( lr1 > lr2 ) {
        uint  tmp =  lr1;  lr1 =  lr2;  lr2 =  tmp;
        Node   *n =   n1;   n1 =   n2;   n2 =    n;
        LRG *ltmp = lrg1; lrg1 = lrg2; lrg2 = ltmp;
      }
      // Union lr2 into lr1
      _phc.Union( n1, n2 );
      if (lrg1->_maxfreq < lrg2->_maxfreq)
        lrg1->_maxfreq = lrg2->_maxfreq;
      // Merge in the IFG
      _phc._ifg->Union( lr1, lr2 );
      // Combine register restrictions
      lrg1->AND(lrg2->mask());
    }
  }
}

//------------------------------coalesce_driver--------------------------------
// Copy coalescing
void PhaseCoalesce::coalesce_driver( ) {

  verify();
  // Coalesce from high frequency to low
  for( uint i=0; i<_phc._cfg._num_blocks; i++ )
    coalesce( _phc._blks[i] );

}

//------------------------------insert_copy_with_overlap-----------------------
// I am inserting copies to come out of SSA form.  In the general case, I am
// doing a parallel renaming.  I'm in the Named world now, so I can't do a
// general parallel renaming.  All the copies now use  "names" (live-ranges)
// to carry values instead of the explicit use-def chains.  Suppose I need to
// insert 2 copies into the same block.  They copy L161->L128 and L128->L132.
// If I insert them in the wrong order then L128 will get clobbered before it
// can get used by the second copy.  This cannot happen in the SSA model;
// direct use-def chains get me the right value.  It DOES happen in the named
// model so I have to handle the reordering of copies.
//
// In general, I need to topo-sort the placed copies to avoid conflicts.
// Its possible to have a closed cycle of copies (e.g., recirculating the same
// values around a loop).  In this case I need a temp to break the cycle.
void PhaseAggressiveCoalesce::insert_copy_with_overlap( Block *b, Node *copy, uint dst_name, uint src_name ) {

  // Scan backwards for the locations of the last use of the dst_name.
  // I am about to clobber the dst_name, so the copy must be inserted
  // after the last use.  Last use is really first-use on a backwards scan.
  uint i = b->end_idx()-1;
161
  while(1) {
D
duke 已提交
162 163 164
    Node *n = b->_nodes[i];
    // Check for end of virtual copies; this is also the end of the
    // parallel renaming effort.
165 166 167
    if (n->_idx < _unique) {
      break;
    }
D
duke 已提交
168
    uint idx = n->is_Copy();
K
kvn 已提交
169
    assert( idx || n->is_Con() || n->is_MachProj(), "Only copies during parallel renaming" );
170 171 172
    if (idx && _phc._lrg_map.find(n->in(idx)) == dst_name) {
      break;
    }
D
duke 已提交
173 174 175 176 177 178 179 180 181 182
    i--;
  }
  uint last_use_idx = i;

  // Also search for any kill of src_name that exits the block.
  // Since the copy uses src_name, I have to come before any kill.
  uint kill_src_idx = b->end_idx();
  // There can be only 1 kill that exits any block and that is
  // the last kill.  Thus it is the first kill on a backwards scan.
  i = b->end_idx()-1;
183
  while (1) {
D
duke 已提交
184 185 186
    Node *n = b->_nodes[i];
    // Check for end of virtual copies; this is also the end of the
    // parallel renaming effort.
187 188 189
    if (n->_idx < _unique) {
      break;
    }
K
kvn 已提交
190
    assert( n->is_Copy() || n->is_Con() || n->is_MachProj(), "Only copies during parallel renaming" );
191
    if (_phc._lrg_map.find(n) == src_name) {
D
duke 已提交
192 193 194 195 196 197
      kill_src_idx = i;
      break;
    }
    i--;
  }
  // Need a temp?  Last use of dst comes after the kill of src?
198
  if (last_use_idx >= kill_src_idx) {
D
duke 已提交
199 200 201
    // Need to break a cycle with a temp
    uint idx = copy->is_Copy();
    Node *tmp = copy->clone();
202 203 204 205
    uint max_lrg_id = _phc._lrg_map.max_lrg_id();
    _phc.new_lrg(tmp, max_lrg_id);
    _phc._lrg_map.set_max_lrg_id(max_lrg_id + 1);

D
duke 已提交
206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
    // Insert new temp between copy and source
    tmp ->set_req(idx,copy->in(idx));
    copy->set_req(idx,tmp);
    // Save source in temp early, before source is killed
    b->_nodes.insert(kill_src_idx,tmp);
    _phc._cfg._bbs.map( tmp->_idx, b );
    last_use_idx++;
  }

  // Insert just after last use
  b->_nodes.insert(last_use_idx+1,copy);
}

//------------------------------insert_copies----------------------------------
void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
  // We do LRGs compressing and fix a liveout data only here since the other
  // place in Split() is guarded by the assert which we never hit.
223
  _phc._lrg_map.compress_uf_map_for_nodes();
D
duke 已提交
224
  // Fix block's liveout data for compressed live ranges.
225 226 227 228
  for (uint lrg = 1; lrg < _phc._lrg_map.max_lrg_id(); lrg++) {
    uint compressed_lrg = _phc._lrg_map.find(lrg);
    if (lrg != compressed_lrg) {
      for (uint bidx = 0; bidx < _phc._cfg._num_blocks; bidx++) {
D
duke 已提交
229
        IndexSet *liveout = _phc._live->live(_phc._cfg._blocks[bidx]);
230
        if (liveout->member(lrg)) {
D
duke 已提交
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
          liveout->remove(lrg);
          liveout->insert(compressed_lrg);
        }
      }
    }
  }

  // All new nodes added are actual copies to replace virtual copies.
  // Nodes with index less than '_unique' are original, non-virtual Nodes.
  _unique = C->unique();

  for( uint i=0; i<_phc._cfg._num_blocks; i++ ) {
    Block *b = _phc._cfg._blocks[i];
    uint cnt = b->num_preds();  // Number of inputs to the Phi

    for( uint l = 1; l<b->_nodes.size(); l++ ) {
      Node *n = b->_nodes[l];

      // Do not use removed-copies, use copied value instead
      uint ncnt = n->req();
      for( uint k = 1; k<ncnt; k++ ) {
        Node *copy = n->in(k);
        uint cidx = copy->is_Copy();
        if( cidx ) {
          Node *def = copy->in(cidx);
256 257 258
          if (_phc._lrg_map.find(copy) == _phc._lrg_map.find(def)) {
            n->set_req(k, def);
          }
D
duke 已提交
259 260 261 262 263 264 265
        }
      }

      // Remove any explicit copies that get coalesced.
      uint cidx = n->is_Copy();
      if( cidx ) {
        Node *def = n->in(cidx);
266
        if (_phc._lrg_map.find(n) == _phc._lrg_map.find(def)) {
D
duke 已提交
267 268 269 270 271 272 273 274
          n->replace_by(def);
          n->set_req(cidx,NULL);
          b->_nodes.remove(l);
          l--;
          continue;
        }
      }

275
      if (n->is_Phi()) {
D
duke 已提交
276
        // Get the chosen name for the Phi
277
        uint phi_name = _phc._lrg_map.find(n);
D
duke 已提交
278
        // Ignore the pre-allocated specials
279 280 281
        if (!phi_name) {
          continue;
        }
D
duke 已提交
282
        // Check for mismatch inputs to Phi
283
        for (uint j = 1; j < cnt; j++) {
D
duke 已提交
284
          Node *m = n->in(j);
285 286
          uint src_name = _phc._lrg_map.find(m);
          if (src_name != phi_name) {
D
duke 已提交
287 288 289 290 291 292 293 294 295 296
            Block *pred = _phc._cfg._bbs[b->pred(j)->_idx];
            Node *copy;
            assert(!m->is_Con() || m->is_Mach(), "all Con must be Mach");
            // Rematerialize constants instead of copying them
            if( m->is_Mach() && m->as_Mach()->is_Con() &&
                m->as_Mach()->rematerialize() ) {
              copy = m->clone();
              // Insert the copy in the predecessor basic block
              pred->add_inst(copy);
              // Copy any flags as well
297
              _phc.clone_projs(pred, pred->end_idx(), m, copy, _phc._lrg_map);
D
duke 已提交
298 299
            } else {
              const RegMask *rm = C->matcher()->idealreg2spillmask[m->ideal_reg()];
300
              copy = new (C) MachSpillCopyNode(m, *rm, *rm);
D
duke 已提交
301 302 303 304
              // Find a good place to insert.  Kinda tricky, use a subroutine
              insert_copy_with_overlap(pred,copy,phi_name,src_name);
            }
            // Insert the copy in the use-def chain
305
            n->set_req(j, copy);
D
duke 已提交
306 307
            _phc._cfg._bbs.map( copy->_idx, pred );
            // Extend ("register allocate") the names array for the copy.
308
            _phc._lrg_map.extend(copy->_idx, phi_name);
D
duke 已提交
309 310 311 312 313 314 315 316
          } // End of if Phi names do not match
        } // End of for all inputs to Phi
      } else { // End of if Phi

        // Now check for 2-address instructions
        uint idx;
        if( n->is_Mach() && (idx=n->as_Mach()->two_adr()) ) {
          // Get the chosen name for the Node
317 318
          uint name = _phc._lrg_map.find(n);
          assert (name, "no 2-address specials");
D
duke 已提交
319 320
          // Check for name mis-match on the 2-address input
          Node *m = n->in(idx);
321
          if (_phc._lrg_map.find(m) != name) {
D
duke 已提交
322 323 324 325
            Node *copy;
            assert(!m->is_Con() || m->is_Mach(), "all Con must be Mach");
            // At this point it is unsafe to extend live ranges (6550579).
            // Rematerialize only constants as we do for Phi above.
326 327
            if(m->is_Mach() && m->as_Mach()->is_Con() &&
               m->as_Mach()->rematerialize()) {
D
duke 已提交
328 329
              copy = m->clone();
              // Insert the copy in the basic block, just before us
330 331
              b->_nodes.insert(l++, copy);
              if(_phc.clone_projs(b, l, m, copy, _phc._lrg_map)) {
D
duke 已提交
332
                l++;
333
              }
D
duke 已提交
334 335
            } else {
              const RegMask *rm = C->matcher()->idealreg2spillmask[m->ideal_reg()];
336
              copy = new (C) MachSpillCopyNode(m, *rm, *rm);
D
duke 已提交
337
              // Insert the copy in the basic block, just before us
338
              b->_nodes.insert(l++, copy);
D
duke 已提交
339 340
            }
            // Insert the copy in the use-def chain
341
            n->set_req(idx, copy);
D
duke 已提交
342
            // Extend ("register allocate") the names array for the copy.
343
            _phc._lrg_map.extend(copy->_idx, name);
D
duke 已提交
344 345 346 347 348 349
            _phc._cfg._bbs.map( copy->_idx, b );
          }

        } // End of is two-adr

        // Insert a copy at a debug use for a lrg which has high frequency
350
        if (b->_freq < OPTO_DEBUG_SPLIT_FREQ || b->is_uncommon(_phc._cfg._bbs)) {
D
duke 已提交
351 352 353 354 355 356 357
          // Walk the debug inputs to the node and check for lrg freq
          JVMState* jvms = n->jvms();
          uint debug_start = jvms ? jvms->debug_start() : 999999;
          uint debug_end   = jvms ? jvms->debug_end()   : 999999;
          for(uint inpidx = debug_start; inpidx < debug_end; inpidx++) {
            // Do not split monitors; they are only needed for debug table
            // entries and need no code.
358 359 360
            if (jvms->is_monitor_use(inpidx)) {
              continue;
            }
D
duke 已提交
361
            Node *inp = n->in(inpidx);
362
            uint nidx = _phc._lrg_map.live_range_id(inp);
D
duke 已提交
363 364 365
            LRG &lrg = lrgs(nidx);

            // If this lrg has a high frequency use/def
366
            if( lrg._maxfreq >= _phc.high_frequency_lrg() ) {
D
duke 已提交
367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388
              // If the live range is also live out of this block (like it
              // would be for a fast/slow idiom), the normal spill mechanism
              // does an excellent job.  If it is not live out of this block
              // (like it would be for debug info to uncommon trap) splitting
              // the live range now allows a better allocation in the high
              // frequency blocks.
              //   Build_IFG_virtual has converted the live sets to
              // live-IN info, not live-OUT info.
              uint k;
              for( k=0; k < b->_num_succs; k++ )
                if( _phc._live->live(b->_succs[k])->member( nidx ) )
                  break;      // Live in to some successor block?
              if( k < b->_num_succs )
                continue;     // Live out; do not pre-split
              // Split the lrg at this use
              const RegMask *rm = C->matcher()->idealreg2spillmask[inp->ideal_reg()];
              Node *copy = new (C) MachSpillCopyNode( inp, *rm, *rm );
              // Insert the copy in the use-def chain
              n->set_req(inpidx, copy );
              // Insert the copy in the basic block, just before us
              b->_nodes.insert( l++, copy );
              // Extend ("register allocate") the names array for the copy.
389 390 391 392
              uint max_lrg_id = _phc._lrg_map.max_lrg_id();
              _phc.new_lrg(copy, max_lrg_id);
              _phc._lrg_map.set_max_lrg_id(max_lrg_id + 1);
              _phc._cfg._bbs.map(copy->_idx, b);
D
duke 已提交
393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
              //tty->print_cr("Split a debug use in Aggressive Coalesce");
            }  // End of if high frequency use/def
          }  // End of for all debug inputs
        }  // End of if low frequency safepoint

      } // End of if Phi

    } // End of for all instructions
  } // End of for all blocks
}

//=============================================================================
//------------------------------coalesce---------------------------------------
// Aggressive (but pessimistic) copy coalescing of a single block

// The following coalesce pass represents a single round of aggressive
// pessimistic coalesce.  "Aggressive" means no attempt to preserve
// colorability when coalescing.  This occasionally means more spills, but
// it also means fewer rounds of coalescing for better code - and that means
// faster compiles.

// "Pessimistic" means we do not hit the fixed point in one pass (and we are
// reaching for the least fixed point to boot).  This is typically solved
// with a few more rounds of coalescing, but the compiler must run fast.  We
// could optimistically coalescing everything touching PhiNodes together
// into one big live range, then check for self-interference.  Everywhere
// the live range interferes with self it would have to be split.  Finding
// the right split points can be done with some heuristics (based on
// expected frequency of edges in the live range).  In short, it's a real
// research problem and the timeline is too short to allow such research.
// Further thoughts: (1) build the LR in a pass, (2) find self-interference
// in another pass, (3) per each self-conflict, split, (4) split by finding
// the low-cost cut (min-cut) of the LR, (5) edges in the LR are weighted
// according to the GCM algorithm (or just exec freq on CFG edges).

void PhaseAggressiveCoalesce::coalesce( Block *b ) {
  // Copies are still "virtual" - meaning we have not made them explicitly
  // copies.  Instead, Phi functions of successor blocks have mis-matched
  // live-ranges.  If I fail to coalesce, I'll have to insert a copy to line
  // up the live-ranges.  Check for Phis in successor blocks.
  uint i;
  for( i=0; i<b->_num_succs; i++ ) {
    Block *bs = b->_succs[i];
    // Find index of 'b' in 'bs' predecessors
    uint j=1;
    while( _phc._cfg._bbs[bs->pred(j)->_idx] != b ) j++;
    // Visit all the Phis in successor block
    for( uint k = 1; k<bs->_nodes.size(); k++ ) {
      Node *n = bs->_nodes[k];
      if( !n->is_Phi() ) break;
      combine_these_two( n, n->in(j) );
    }
  } // End of for all successor blocks


  // Check _this_ block for 2-address instructions and copies.
  uint cnt = b->end_idx();
  for( i = 1; i<cnt; i++ ) {
    Node *n = b->_nodes[i];
    uint idx;
    // 2-address instructions have a virtual Copy matching their input
    // to their output
455
    if (n->is_Mach() && (idx = n->as_Mach()->two_adr())) {
D
duke 已提交
456
      MachNode *mach = n->as_Mach();
457
      combine_these_two(mach, mach->in(idx));
D
duke 已提交
458 459 460 461 462 463
    }
  } // End of for all instructions in block
}

//=============================================================================
//------------------------------PhaseConservativeCoalesce----------------------
464 465
PhaseConservativeCoalesce::PhaseConservativeCoalesce(PhaseChaitin &chaitin) : PhaseCoalesce(chaitin) {
  _ulr.initialize(_phc._lrg_map.max_lrg_id());
D
duke 已提交
466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
}

//------------------------------verify-----------------------------------------
void PhaseConservativeCoalesce::verify() {
#ifdef ASSERT
  _phc.set_was_low();
#endif
}

//------------------------------union_helper-----------------------------------
void PhaseConservativeCoalesce::union_helper( Node *lr1_node, Node *lr2_node, uint lr1, uint lr2, Node *src_def, Node *dst_copy, Node *src_copy, Block *b, uint bindex ) {
  // Join live ranges.  Merge larger into smaller.  Union lr2 into lr1 in the
  // union-find tree
  _phc.Union( lr1_node, lr2_node );

  // Single-def live range ONLY if both live ranges are single-def.
  // If both are single def, then src_def powers one live range
  // and def_copy powers the other.  After merging, src_def powers
  // the combined live range.
485 486
  lrgs(lr1)._def = (lrgs(lr1).is_multidef() ||
                        lrgs(lr2).is_multidef() )
D
duke 已提交
487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544
    ? NodeSentinel : src_def;
  lrgs(lr2)._def = NULL;    // No def for lrg 2
  lrgs(lr2).Clear();        // Force empty mask for LRG 2
  //lrgs(lr2)._size = 0;      // Live-range 2 goes dead
  lrgs(lr1)._is_oop |= lrgs(lr2)._is_oop;
  lrgs(lr2)._is_oop = 0;    // In particular, not an oop for GC info

  if (lrgs(lr1)._maxfreq < lrgs(lr2)._maxfreq)
    lrgs(lr1)._maxfreq = lrgs(lr2)._maxfreq;

  // Copy original value instead.  Intermediate copies go dead, and
  // the dst_copy becomes useless.
  int didx = dst_copy->is_Copy();
  dst_copy->set_req( didx, src_def );
  // Add copy to free list
  // _phc.free_spillcopy(b->_nodes[bindex]);
  assert( b->_nodes[bindex] == dst_copy, "" );
  dst_copy->replace_by( dst_copy->in(didx) );
  dst_copy->set_req( didx, NULL);
  b->_nodes.remove(bindex);
  if( bindex < b->_ihrp_index ) b->_ihrp_index--;
  if( bindex < b->_fhrp_index ) b->_fhrp_index--;

  // Stretched lr1; add it to liveness of intermediate blocks
  Block *b2 = _phc._cfg._bbs[src_copy->_idx];
  while( b != b2 ) {
    b = _phc._cfg._bbs[b->pred(1)->_idx];
    _phc._live->live(b)->insert(lr1);
  }
}

//------------------------------compute_separating_interferences---------------
// Factored code from copy_copy that computes extra interferences from
// lengthening a live range by double-coalescing.
uint PhaseConservativeCoalesce::compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint reg_degree, uint rm_size, uint lr1, uint lr2 ) {

  assert(!lrgs(lr1)._fat_proj, "cannot coalesce fat_proj");
  assert(!lrgs(lr2)._fat_proj, "cannot coalesce fat_proj");
  Node *prev_copy = dst_copy->in(dst_copy->is_Copy());
  Block *b2 = b;
  uint bindex2 = bindex;
  while( 1 ) {
    // Find previous instruction
    bindex2--;                  // Chain backwards 1 instruction
    while( bindex2 == 0 ) {     // At block start, find prior block
      assert( b2->num_preds() == 2, "cannot double coalesce across c-flow" );
      b2 = _phc._cfg._bbs[b2->pred(1)->_idx];
      bindex2 = b2->end_idx()-1;
    }
    // Get prior instruction
    assert(bindex2 < b2->_nodes.size(), "index out of bounds");
    Node *x = b2->_nodes[bindex2];
    if( x == prev_copy ) {      // Previous copy in copy chain?
      if( prev_copy == src_copy)// Found end of chain and all interferences
        break;                  // So break out of loop
      // Else work back one in copy chain
      prev_copy = prev_copy->in(prev_copy->is_Copy());
    } else {                    // Else collect interferences
545
      uint lidx = _phc._lrg_map.find(x);
D
duke 已提交
546
      // Found another def of live-range being stretched?
547 548 549 550 551 552
      if(lidx == lr1) {
        return max_juint;
      }
      if(lidx == lr2) {
        return max_juint;
      }
D
duke 已提交
553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578

      // If we attempt to coalesce across a bound def
      if( lrgs(lidx).is_bound() ) {
        // Do not let the coalesced LRG expect to get the bound color
        rm.SUBTRACT( lrgs(lidx).mask() );
        // Recompute rm_size
        rm_size = rm.Size();
        //if( rm._flags ) rm_size += 1000000;
        if( reg_degree >= rm_size ) return max_juint;
      }
      if( rm.overlap(lrgs(lidx).mask()) ) {
        // Insert lidx into union LRG; returns TRUE if actually inserted
        if( _ulr.insert(lidx) ) {
          // Infinite-stack neighbors do not alter colorability, as they
          // can always color to some other color.
          if( !lrgs(lidx).mask().is_AllStack() ) {
            // If this coalesce will make any new neighbor uncolorable,
            // do not coalesce.
            if( lrgs(lidx).just_lo_degree() )
              return max_juint;
            // Bump our degree
            if( ++reg_degree >= rm_size )
              return max_juint;
          } // End of if not infinite-stack neighbor
        } // End of if actually inserted
      } // End of if live range overlaps
T
twisti 已提交
579 580
    } // End of else collect interferences for 1 node
  } // End of while forever, scan back for interferences
D
duke 已提交
581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626
  return reg_degree;
}

//------------------------------update_ifg-------------------------------------
void PhaseConservativeCoalesce::update_ifg(uint lr1, uint lr2, IndexSet *n_lr1, IndexSet *n_lr2) {
  // Some original neighbors of lr1 might have gone away
  // because the constrained register mask prevented them.
  // Remove lr1 from such neighbors.
  IndexSetIterator one(n_lr1);
  uint neighbor;
  LRG &lrg1 = lrgs(lr1);
  while ((neighbor = one.next()) != 0)
    if( !_ulr.member(neighbor) )
      if( _phc._ifg->neighbors(neighbor)->remove(lr1) )
        lrgs(neighbor).inc_degree( -lrg1.compute_degree(lrgs(neighbor)) );


  // lr2 is now called (coalesced into) lr1.
  // Remove lr2 from the IFG.
  IndexSetIterator two(n_lr2);
  LRG &lrg2 = lrgs(lr2);
  while ((neighbor = two.next()) != 0)
    if( _phc._ifg->neighbors(neighbor)->remove(lr2) )
      lrgs(neighbor).inc_degree( -lrg2.compute_degree(lrgs(neighbor)) );

  // Some neighbors of intermediate copies now interfere with the
  // combined live range.
  IndexSetIterator three(&_ulr);
  while ((neighbor = three.next()) != 0)
    if( _phc._ifg->neighbors(neighbor)->insert(lr1) )
      lrgs(neighbor).inc_degree( lrg1.compute_degree(lrgs(neighbor)) );
}

//------------------------------record_bias------------------------------------
static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) {
  // Tag copy bias here
  if( !ifg->lrgs(lr1)._copy_bias )
    ifg->lrgs(lr1)._copy_bias = lr2;
  if( !ifg->lrgs(lr2)._copy_bias )
    ifg->lrgs(lr2)._copy_bias = lr1;
}

//------------------------------copy_copy--------------------------------------
// See if I can coalesce a series of multiple copies together.  I need the
// final dest copy and the original src copy.  They can be the same Node.
// Compute the compatible register masks.
627
bool PhaseConservativeCoalesce::copy_copy(Node *dst_copy, Node *src_copy, Block *b, uint bindex) {
D
duke 已提交
628

629 630 631 632 633 634
  if (!dst_copy->is_SpillCopy()) {
    return false;
  }
  if (!src_copy->is_SpillCopy()) {
    return false;
  }
D
duke 已提交
635
  Node *src_def = src_copy->in(src_copy->is_Copy());
636 637
  uint lr1 = _phc._lrg_map.find(dst_copy);
  uint lr2 = _phc._lrg_map.find(src_def);
D
duke 已提交
638 639

  // Same live ranges already?
640 641 642
  if (lr1 == lr2) {
    return false;
  }
D
duke 已提交
643 644

  // Interfere?
645 646 647
  if (_phc._ifg->test_edge_sq(lr1, lr2)) {
    return false;
  }
D
duke 已提交
648 649

  // Not an oop->int cast; oop->oop, int->int, AND int->oop are OK.
650
  if (!lrgs(lr1)._is_oop && lrgs(lr2)._is_oop) { // not an oop->int cast
D
duke 已提交
651
    return false;
652
  }
D
duke 已提交
653 654 655

  // Coalescing between an aligned live range and a mis-aligned live range?
  // No, no!  Alignment changes how we count degree.
656
  if (lrgs(lr1)._fat_proj != lrgs(lr2)._fat_proj) {
D
duke 已提交
657
    return false;
658
  }
D
duke 已提交
659 660 661 662

  // Sort; use smaller live-range number
  Node *lr1_node = dst_copy;
  Node *lr2_node = src_def;
663
  if (lr1 > lr2) {
D
duke 已提交
664 665 666 667 668 669 670 671 672 673 674
    uint tmp = lr1; lr1 = lr2; lr2 = tmp;
    lr1_node = src_def;  lr2_node = dst_copy;
  }

  // Check for compatibility of the 2 live ranges by
  // intersecting their allowed register sets.
  RegMask rm = lrgs(lr1).mask();
  rm.AND(lrgs(lr2).mask());
  // Number of bits free
  uint rm_size = rm.Size();

675 676 677 678 679 680 681 682
  if (UseFPUForSpilling && rm.is_AllStack() ) {
    // Don't coalesce when frequency difference is large
    Block *dst_b = _phc._cfg._bbs[dst_copy->_idx];
    Block *src_def_b = _phc._cfg._bbs[src_def->_idx];
    if (src_def_b->_freq > 10*dst_b->_freq )
      return false;
  }

D
duke 已提交
683 684 685 686 687 688
  // If we can use any stack slot, then effective size is infinite
  if( rm.is_AllStack() ) rm_size += 1000000;
  // Incompatible masks, no way to coalesce
  if( rm_size == 0 ) return false;

  // Another early bail-out test is when we are double-coalescing and the
T
twisti 已提交
689
  // 2 copies are separated by some control flow.
D
duke 已提交
690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803
  if( dst_copy != src_copy ) {
    Block *src_b = _phc._cfg._bbs[src_copy->_idx];
    Block *b2 = b;
    while( b2 != src_b ) {
      if( b2->num_preds() > 2 ){// Found merge-point
        _phc._lost_opp_cflow_coalesce++;
        // extra record_bias commented out because Chris believes it is not
        // productive.  Since we can record only 1 bias, we want to choose one
        // that stands a chance of working and this one probably does not.
        //record_bias( _phc._lrgs, lr1, lr2 );
        return false;           // To hard to find all interferences
      }
      b2 = _phc._cfg._bbs[b2->pred(1)->_idx];
    }
  }

  // Union the two interference sets together into '_ulr'
  uint reg_degree = _ulr.lrg_union( lr1, lr2, rm_size, _phc._ifg, rm );

  if( reg_degree >= rm_size ) {
    record_bias( _phc._ifg, lr1, lr2 );
    return false;
  }

  // Now I need to compute all the interferences between dst_copy and
  // src_copy.  I'm not willing visit the entire interference graph, so
  // I limit my search to things in dst_copy's block or in a straight
  // line of previous blocks.  I give up at merge points or when I get
  // more interferences than my degree.  I can stop when I find src_copy.
  if( dst_copy != src_copy ) {
    reg_degree = compute_separating_interferences(dst_copy, src_copy, b, bindex, rm, rm_size, reg_degree, lr1, lr2 );
    if( reg_degree == max_juint ) {
      record_bias( _phc._ifg, lr1, lr2 );
      return false;
    }
  } // End of if dst_copy & src_copy are different


  // ---- THE COMBINED LRG IS COLORABLE ----

  // YEAH - Now coalesce this copy away
  assert( lrgs(lr1).num_regs() == lrgs(lr2).num_regs(),   "" );

  IndexSet *n_lr1 = _phc._ifg->neighbors(lr1);
  IndexSet *n_lr2 = _phc._ifg->neighbors(lr2);

  // Update the interference graph
  update_ifg(lr1, lr2, n_lr1, n_lr2);

  _ulr.remove(lr1);

  // Uncomment the following code to trace Coalescing in great detail.
  //
  //if (false) {
  //  tty->cr();
  //  tty->print_cr("#######################################");
  //  tty->print_cr("union %d and %d", lr1, lr2);
  //  n_lr1->dump();
  //  n_lr2->dump();
  //  tty->print_cr("resulting set is");
  //  _ulr.dump();
  //}

  // Replace n_lr1 with the new combined live range.  _ulr will use
  // n_lr1's old memory on the next iteration.  n_lr2 is cleared to
  // send its internal memory to the free list.
  _ulr.swap(n_lr1);
  _ulr.clear();
  n_lr2->clear();

  lrgs(lr1).set_degree( _phc._ifg->effective_degree(lr1) );
  lrgs(lr2).set_degree( 0 );

  // Join live ranges.  Merge larger into smaller.  Union lr2 into lr1 in the
  // union-find tree
  union_helper( lr1_node, lr2_node, lr1, lr2, src_def, dst_copy, src_copy, b, bindex );
  // Combine register restrictions
  lrgs(lr1).set_mask(rm);
  lrgs(lr1).compute_set_mask_size();
  lrgs(lr1)._cost += lrgs(lr2)._cost;
  lrgs(lr1)._area += lrgs(lr2)._area;

  // While its uncommon to successfully coalesce live ranges that started out
  // being not-lo-degree, it can happen.  In any case the combined coalesced
  // live range better Simplify nicely.
  lrgs(lr1)._was_lo = 1;

  // kinda expensive to do all the time
  //tty->print_cr("warning: slow verify happening");
  //_phc._ifg->verify( &_phc );
  return true;
}

//------------------------------coalesce---------------------------------------
// Conservative (but pessimistic) copy coalescing of a single block
void PhaseConservativeCoalesce::coalesce( Block *b ) {
  // Bail out on infrequent blocks
  if( b->is_uncommon(_phc._cfg._bbs) )
    return;
  // Check this block for copies.
  for( uint i = 1; i<b->end_idx(); i++ ) {
    // Check for actual copies on inputs.  Coalesce a copy into its
    // input if use and copy's input are compatible.
    Node *copy1 = b->_nodes[i];
    uint idx1 = copy1->is_Copy();
    if( !idx1 ) continue;       // Not a copy

    if( copy_copy(copy1,copy1,b,i) ) {
      i--;                      // Retry, same location in block
      PhaseChaitin::_conserv_coalesce++;  // Collect stats on success
      continue;
    }
  }
}