coalesce.cpp 30.2 KB
Newer Older
D
duke 已提交
1
/*
2
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
D
duke 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
19 20 21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
D
duke 已提交
22 23 24
 *
 */

25 26 27 28 29 30 31 32 33 34 35
#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/block.hpp"
#include "opto/cfgnode.hpp"
#include "opto/chaitin.hpp"
#include "opto/coalesce.hpp"
#include "opto/connode.hpp"
#include "opto/indexSet.hpp"
#include "opto/machnode.hpp"
#include "opto/matcher.hpp"
#include "opto/regmask.hpp"
D
duke 已提交
36 37

#ifndef PRODUCT
38
void PhaseCoalesce::dump(Node *n) const {
D
duke 已提交
39
  // Being a const function means I cannot use 'Find'
40
  uint r = _phc._lrg_map.find(n);
D
duke 已提交
41 42 43 44 45
  tty->print("L%d/N%d ",r,n->_idx);
}

void PhaseCoalesce::dump() const {
  // I know I have a block layout now, so I can print blocks in a loop
46
  for( uint i=0; i<_phc._cfg.number_of_blocks(); i++ ) {
D
duke 已提交
47
    uint j;
48
    Block* b = _phc._cfg.get_block(i);
D
duke 已提交
49 50 51
    // Print a nice block header
    tty->print("B%d: ",b->_pre_order);
    for( j=1; j<b->num_preds(); j++ )
52
      tty->print("B%d ", _phc._cfg.get_block_for_node(b->pred(j))->_pre_order);
D
duke 已提交
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
    tty->print("-> ");
    for( j=0; j<b->_num_succs; j++ )
      tty->print("B%d ",b->_succs[j]->_pre_order);
    tty->print(" IDom: B%d/#%d\n", b->_idom ? b->_idom->_pre_order : 0, b->_dom_depth);
    uint cnt = b->_nodes.size();
    for( j=0; j<cnt; j++ ) {
      Node *n = b->_nodes[j];
      dump( n );
      tty->print("\t%s\t",n->Name());

      // Dump the inputs
      uint k;                   // Exit value of loop
      for( k=0; k<n->req(); k++ ) // For all required inputs
        if( n->in(k) ) dump( n->in(k) );
        else tty->print("_ ");
      int any_prec = 0;
      for( ; k<n->len(); k++ )          // For all precedence inputs
        if( n->in(k) ) {
          if( !any_prec++ ) tty->print(" |");
          dump( n->in(k) );
        }

      // Dump node-specific info
      n->dump_spec(tty);
      tty->print("\n");

    }
    tty->print("\n");
  }
}
#endif

// Combine the live ranges def'd by these 2 Nodes.  N2 is an input to N1.
86 87 88
void PhaseCoalesce::combine_these_two(Node *n1, Node *n2) {
  uint lr1 = _phc._lrg_map.find(n1);
  uint lr2 = _phc._lrg_map.find(n2);
D
duke 已提交
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
  if( lr1 != lr2 &&             // Different live ranges already AND
      !_phc._ifg->test_edge_sq( lr1, lr2 ) ) {  // Do not interfere
    LRG *lrg1 = &_phc.lrgs(lr1);
    LRG *lrg2 = &_phc.lrgs(lr2);
    // Not an oop->int cast; oop->oop, int->int, AND int->oop are OK.

    // Now, why is int->oop OK?  We end up declaring a raw-pointer as an oop
    // and in general that's a bad thing.  However, int->oop conversions only
    // happen at GC points, so the lifetime of the misclassified raw-pointer
    // is from the CheckCastPP (that converts it to an oop) backwards up
    // through a merge point and into the slow-path call, and around the
    // diamond up to the heap-top check and back down into the slow-path call.
    // The misclassified raw pointer is NOT live across the slow-path call,
    // and so does not appear in any GC info, so the fact that it is
    // misclassified is OK.

    if( (lrg1->_is_oop || !lrg2->_is_oop) && // not an oop->int cast AND
        // Compatible final mask
        lrg1->mask().overlap( lrg2->mask() ) ) {
      // Merge larger into smaller.
      if( lr1 > lr2 ) {
        uint  tmp =  lr1;  lr1 =  lr2;  lr2 =  tmp;
        Node   *n =   n1;   n1 =   n2;   n2 =    n;
        LRG *ltmp = lrg1; lrg1 = lrg2; lrg2 = ltmp;
      }
      // Union lr2 into lr1
      _phc.Union( n1, n2 );
      if (lrg1->_maxfreq < lrg2->_maxfreq)
        lrg1->_maxfreq = lrg2->_maxfreq;
      // Merge in the IFG
      _phc._ifg->Union( lr1, lr2 );
      // Combine register restrictions
      lrg1->AND(lrg2->mask());
    }
  }
}

// Copy coalescing
127
void PhaseCoalesce::coalesce_driver() {
D
duke 已提交
128 129
  verify();
  // Coalesce from high frequency to low
130 131 132
  for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) {
    coalesce(_phc._blks[i]);
  }
D
duke 已提交
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
}

// I am inserting copies to come out of SSA form.  In the general case, I am
// doing a parallel renaming.  I'm in the Named world now, so I can't do a
// general parallel renaming.  All the copies now use  "names" (live-ranges)
// to carry values instead of the explicit use-def chains.  Suppose I need to
// insert 2 copies into the same block.  They copy L161->L128 and L128->L132.
// If I insert them in the wrong order then L128 will get clobbered before it
// can get used by the second copy.  This cannot happen in the SSA model;
// direct use-def chains get me the right value.  It DOES happen in the named
// model so I have to handle the reordering of copies.
//
// In general, I need to topo-sort the placed copies to avoid conflicts.
// Its possible to have a closed cycle of copies (e.g., recirculating the same
// values around a loop).  In this case I need a temp to break the cycle.
void PhaseAggressiveCoalesce::insert_copy_with_overlap( Block *b, Node *copy, uint dst_name, uint src_name ) {

  // Scan backwards for the locations of the last use of the dst_name.
  // I am about to clobber the dst_name, so the copy must be inserted
  // after the last use.  Last use is really first-use on a backwards scan.
  uint i = b->end_idx()-1;
154
  while(1) {
D
duke 已提交
155 156 157
    Node *n = b->_nodes[i];
    // Check for end of virtual copies; this is also the end of the
    // parallel renaming effort.
158 159 160
    if (n->_idx < _unique) {
      break;
    }
D
duke 已提交
161
    uint idx = n->is_Copy();
K
kvn 已提交
162
    assert( idx || n->is_Con() || n->is_MachProj(), "Only copies during parallel renaming" );
163 164 165
    if (idx && _phc._lrg_map.find(n->in(idx)) == dst_name) {
      break;
    }
D
duke 已提交
166 167 168 169 170 171 172 173 174 175
    i--;
  }
  uint last_use_idx = i;

  // Also search for any kill of src_name that exits the block.
  // Since the copy uses src_name, I have to come before any kill.
  uint kill_src_idx = b->end_idx();
  // There can be only 1 kill that exits any block and that is
  // the last kill.  Thus it is the first kill on a backwards scan.
  i = b->end_idx()-1;
176
  while (1) {
D
duke 已提交
177 178 179
    Node *n = b->_nodes[i];
    // Check for end of virtual copies; this is also the end of the
    // parallel renaming effort.
180 181 182
    if (n->_idx < _unique) {
      break;
    }
K
kvn 已提交
183
    assert( n->is_Copy() || n->is_Con() || n->is_MachProj(), "Only copies during parallel renaming" );
184
    if (_phc._lrg_map.find(n) == src_name) {
D
duke 已提交
185 186 187 188 189 190
      kill_src_idx = i;
      break;
    }
    i--;
  }
  // Need a temp?  Last use of dst comes after the kill of src?
191
  if (last_use_idx >= kill_src_idx) {
D
duke 已提交
192 193 194
    // Need to break a cycle with a temp
    uint idx = copy->is_Copy();
    Node *tmp = copy->clone();
195 196 197 198
    uint max_lrg_id = _phc._lrg_map.max_lrg_id();
    _phc.new_lrg(tmp, max_lrg_id);
    _phc._lrg_map.set_max_lrg_id(max_lrg_id + 1);

D
duke 已提交
199 200 201 202 203
    // Insert new temp between copy and source
    tmp ->set_req(idx,copy->in(idx));
    copy->set_req(idx,tmp);
    // Save source in temp early, before source is killed
    b->_nodes.insert(kill_src_idx,tmp);
204
    _phc._cfg.map_node_to_block(tmp, b);
D
duke 已提交
205 206 207 208 209 210 211 212 213 214
    last_use_idx++;
  }

  // Insert just after last use
  b->_nodes.insert(last_use_idx+1,copy);
}

void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
  // We do LRGs compressing and fix a liveout data only here since the other
  // place in Split() is guarded by the assert which we never hit.
215
  _phc._lrg_map.compress_uf_map_for_nodes();
D
duke 已提交
216
  // Fix block's liveout data for compressed live ranges.
217 218 219
  for (uint lrg = 1; lrg < _phc._lrg_map.max_lrg_id(); lrg++) {
    uint compressed_lrg = _phc._lrg_map.find(lrg);
    if (lrg != compressed_lrg) {
220 221
      for (uint bidx = 0; bidx < _phc._cfg.number_of_blocks(); bidx++) {
        IndexSet *liveout = _phc._live->live(_phc._cfg.get_block(bidx));
222
        if (liveout->member(lrg)) {
D
duke 已提交
223 224 225 226 227 228 229 230 231 232 233
          liveout->remove(lrg);
          liveout->insert(compressed_lrg);
        }
      }
    }
  }

  // All new nodes added are actual copies to replace virtual copies.
  // Nodes with index less than '_unique' are original, non-virtual Nodes.
  _unique = C->unique();

234
  for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) {
235 236
    C->check_node_count(NodeLimitFudgeFactor, "out of nodes in coalesce");
    if (C->failing()) return;
237
    Block *b = _phc._cfg.get_block(i);
D
duke 已提交
238 239 240 241 242 243 244 245 246 247 248 249
    uint cnt = b->num_preds();  // Number of inputs to the Phi

    for( uint l = 1; l<b->_nodes.size(); l++ ) {
      Node *n = b->_nodes[l];

      // Do not use removed-copies, use copied value instead
      uint ncnt = n->req();
      for( uint k = 1; k<ncnt; k++ ) {
        Node *copy = n->in(k);
        uint cidx = copy->is_Copy();
        if( cidx ) {
          Node *def = copy->in(cidx);
250 251 252
          if (_phc._lrg_map.find(copy) == _phc._lrg_map.find(def)) {
            n->set_req(k, def);
          }
D
duke 已提交
253 254 255 256 257 258 259
        }
      }

      // Remove any explicit copies that get coalesced.
      uint cidx = n->is_Copy();
      if( cidx ) {
        Node *def = n->in(cidx);
260
        if (_phc._lrg_map.find(n) == _phc._lrg_map.find(def)) {
D
duke 已提交
261 262 263 264 265 266 267 268
          n->replace_by(def);
          n->set_req(cidx,NULL);
          b->_nodes.remove(l);
          l--;
          continue;
        }
      }

269
      if (n->is_Phi()) {
D
duke 已提交
270
        // Get the chosen name for the Phi
271
        uint phi_name = _phc._lrg_map.find(n);
D
duke 已提交
272
        // Ignore the pre-allocated specials
273 274 275
        if (!phi_name) {
          continue;
        }
D
duke 已提交
276
        // Check for mismatch inputs to Phi
277
        for (uint j = 1; j < cnt; j++) {
D
duke 已提交
278
          Node *m = n->in(j);
279 280
          uint src_name = _phc._lrg_map.find(m);
          if (src_name != phi_name) {
281
            Block *pred = _phc._cfg.get_block_for_node(b->pred(j));
D
duke 已提交
282 283 284 285 286 287 288 289 290
            Node *copy;
            assert(!m->is_Con() || m->is_Mach(), "all Con must be Mach");
            // Rematerialize constants instead of copying them
            if( m->is_Mach() && m->as_Mach()->is_Con() &&
                m->as_Mach()->rematerialize() ) {
              copy = m->clone();
              // Insert the copy in the predecessor basic block
              pred->add_inst(copy);
              // Copy any flags as well
291
              _phc.clone_projs(pred, pred->end_idx(), m, copy, _phc._lrg_map);
D
duke 已提交
292 293
            } else {
              const RegMask *rm = C->matcher()->idealreg2spillmask[m->ideal_reg()];
294
              copy = new (C) MachSpillCopyNode(m, *rm, *rm);
D
duke 已提交
295 296 297 298
              // Find a good place to insert.  Kinda tricky, use a subroutine
              insert_copy_with_overlap(pred,copy,phi_name,src_name);
            }
            // Insert the copy in the use-def chain
299
            n->set_req(j, copy);
300
            _phc._cfg.map_node_to_block(copy, pred);
D
duke 已提交
301
            // Extend ("register allocate") the names array for the copy.
302
            _phc._lrg_map.extend(copy->_idx, phi_name);
D
duke 已提交
303 304 305 306 307 308 309 310
          } // End of if Phi names do not match
        } // End of for all inputs to Phi
      } else { // End of if Phi

        // Now check for 2-address instructions
        uint idx;
        if( n->is_Mach() && (idx=n->as_Mach()->two_adr()) ) {
          // Get the chosen name for the Node
311 312
          uint name = _phc._lrg_map.find(n);
          assert (name, "no 2-address specials");
D
duke 已提交
313 314
          // Check for name mis-match on the 2-address input
          Node *m = n->in(idx);
315
          if (_phc._lrg_map.find(m) != name) {
D
duke 已提交
316 317 318 319
            Node *copy;
            assert(!m->is_Con() || m->is_Mach(), "all Con must be Mach");
            // At this point it is unsafe to extend live ranges (6550579).
            // Rematerialize only constants as we do for Phi above.
320 321
            if(m->is_Mach() && m->as_Mach()->is_Con() &&
               m->as_Mach()->rematerialize()) {
D
duke 已提交
322 323
              copy = m->clone();
              // Insert the copy in the basic block, just before us
324 325
              b->_nodes.insert(l++, copy);
              if(_phc.clone_projs(b, l, m, copy, _phc._lrg_map)) {
D
duke 已提交
326
                l++;
327
              }
D
duke 已提交
328 329
            } else {
              const RegMask *rm = C->matcher()->idealreg2spillmask[m->ideal_reg()];
330
              copy = new (C) MachSpillCopyNode(m, *rm, *rm);
D
duke 已提交
331
              // Insert the copy in the basic block, just before us
332
              b->_nodes.insert(l++, copy);
D
duke 已提交
333 334
            }
            // Insert the copy in the use-def chain
335
            n->set_req(idx, copy);
D
duke 已提交
336
            // Extend ("register allocate") the names array for the copy.
337
            _phc._lrg_map.extend(copy->_idx, name);
338
            _phc._cfg.map_node_to_block(copy, b);
D
duke 已提交
339 340 341 342 343
          }

        } // End of is two-adr

        // Insert a copy at a debug use for a lrg which has high frequency
344
        if (b->_freq < OPTO_DEBUG_SPLIT_FREQ || b->is_uncommon(&_phc._cfg)) {
D
duke 已提交
345 346 347 348 349 350 351
          // Walk the debug inputs to the node and check for lrg freq
          JVMState* jvms = n->jvms();
          uint debug_start = jvms ? jvms->debug_start() : 999999;
          uint debug_end   = jvms ? jvms->debug_end()   : 999999;
          for(uint inpidx = debug_start; inpidx < debug_end; inpidx++) {
            // Do not split monitors; they are only needed for debug table
            // entries and need no code.
352 353 354
            if (jvms->is_monitor_use(inpidx)) {
              continue;
            }
D
duke 已提交
355
            Node *inp = n->in(inpidx);
356
            uint nidx = _phc._lrg_map.live_range_id(inp);
D
duke 已提交
357 358 359
            LRG &lrg = lrgs(nidx);

            // If this lrg has a high frequency use/def
360
            if( lrg._maxfreq >= _phc.high_frequency_lrg() ) {
D
duke 已提交
361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
              // If the live range is also live out of this block (like it
              // would be for a fast/slow idiom), the normal spill mechanism
              // does an excellent job.  If it is not live out of this block
              // (like it would be for debug info to uncommon trap) splitting
              // the live range now allows a better allocation in the high
              // frequency blocks.
              //   Build_IFG_virtual has converted the live sets to
              // live-IN info, not live-OUT info.
              uint k;
              for( k=0; k < b->_num_succs; k++ )
                if( _phc._live->live(b->_succs[k])->member( nidx ) )
                  break;      // Live in to some successor block?
              if( k < b->_num_succs )
                continue;     // Live out; do not pre-split
              // Split the lrg at this use
              const RegMask *rm = C->matcher()->idealreg2spillmask[inp->ideal_reg()];
              Node *copy = new (C) MachSpillCopyNode( inp, *rm, *rm );
              // Insert the copy in the use-def chain
              n->set_req(inpidx, copy );
              // Insert the copy in the basic block, just before us
              b->_nodes.insert( l++, copy );
              // Extend ("register allocate") the names array for the copy.
383 384 385
              uint max_lrg_id = _phc._lrg_map.max_lrg_id();
              _phc.new_lrg(copy, max_lrg_id);
              _phc._lrg_map.set_max_lrg_id(max_lrg_id + 1);
386
              _phc._cfg.map_node_to_block(copy, b);
D
duke 已提交
387 388 389 390 391 392 393 394 395 396 397
              //tty->print_cr("Split a debug use in Aggressive Coalesce");
            }  // End of if high frequency use/def
          }  // End of for all debug inputs
        }  // End of if low frequency safepoint

      } // End of if Phi

    } // End of for all instructions
  } // End of for all blocks
}

398

D
duke 已提交
399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430
// Aggressive (but pessimistic) copy coalescing of a single block

// The following coalesce pass represents a single round of aggressive
// pessimistic coalesce.  "Aggressive" means no attempt to preserve
// colorability when coalescing.  This occasionally means more spills, but
// it also means fewer rounds of coalescing for better code - and that means
// faster compiles.

// "Pessimistic" means we do not hit the fixed point in one pass (and we are
// reaching for the least fixed point to boot).  This is typically solved
// with a few more rounds of coalescing, but the compiler must run fast.  We
// could optimistically coalescing everything touching PhiNodes together
// into one big live range, then check for self-interference.  Everywhere
// the live range interferes with self it would have to be split.  Finding
// the right split points can be done with some heuristics (based on
// expected frequency of edges in the live range).  In short, it's a real
// research problem and the timeline is too short to allow such research.
// Further thoughts: (1) build the LR in a pass, (2) find self-interference
// in another pass, (3) per each self-conflict, split, (4) split by finding
// the low-cost cut (min-cut) of the LR, (5) edges in the LR are weighted
// according to the GCM algorithm (or just exec freq on CFG edges).

void PhaseAggressiveCoalesce::coalesce( Block *b ) {
  // Copies are still "virtual" - meaning we have not made them explicitly
  // copies.  Instead, Phi functions of successor blocks have mis-matched
  // live-ranges.  If I fail to coalesce, I'll have to insert a copy to line
  // up the live-ranges.  Check for Phis in successor blocks.
  uint i;
  for( i=0; i<b->_num_succs; i++ ) {
    Block *bs = b->_succs[i];
    // Find index of 'b' in 'bs' predecessors
    uint j=1;
431 432 433 434
    while (_phc._cfg.get_block_for_node(bs->pred(j)) != b) {
      j++;
    }

D
duke 已提交
435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
    // Visit all the Phis in successor block
    for( uint k = 1; k<bs->_nodes.size(); k++ ) {
      Node *n = bs->_nodes[k];
      if( !n->is_Phi() ) break;
      combine_these_two( n, n->in(j) );
    }
  } // End of for all successor blocks


  // Check _this_ block for 2-address instructions and copies.
  uint cnt = b->end_idx();
  for( i = 1; i<cnt; i++ ) {
    Node *n = b->_nodes[i];
    uint idx;
    // 2-address instructions have a virtual Copy matching their input
    // to their output
451
    if (n->is_Mach() && (idx = n->as_Mach()->two_adr())) {
D
duke 已提交
452
      MachNode *mach = n->as_Mach();
453
      combine_these_two(mach, mach->in(idx));
D
duke 已提交
454 455 456 457
    }
  } // End of for all instructions in block
}

458 459
PhaseConservativeCoalesce::PhaseConservativeCoalesce(PhaseChaitin &chaitin) : PhaseCoalesce(chaitin) {
  _ulr.initialize(_phc._lrg_map.max_lrg_id());
D
duke 已提交
460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476
}

void PhaseConservativeCoalesce::verify() {
#ifdef ASSERT
  _phc.set_was_low();
#endif
}

void PhaseConservativeCoalesce::union_helper( Node *lr1_node, Node *lr2_node, uint lr1, uint lr2, Node *src_def, Node *dst_copy, Node *src_copy, Block *b, uint bindex ) {
  // Join live ranges.  Merge larger into smaller.  Union lr2 into lr1 in the
  // union-find tree
  _phc.Union( lr1_node, lr2_node );

  // Single-def live range ONLY if both live ranges are single-def.
  // If both are single def, then src_def powers one live range
  // and def_copy powers the other.  After merging, src_def powers
  // the combined live range.
477 478
  lrgs(lr1)._def = (lrgs(lr1).is_multidef() ||
                        lrgs(lr2).is_multidef() )
D
duke 已提交
479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502
    ? NodeSentinel : src_def;
  lrgs(lr2)._def = NULL;    // No def for lrg 2
  lrgs(lr2).Clear();        // Force empty mask for LRG 2
  //lrgs(lr2)._size = 0;      // Live-range 2 goes dead
  lrgs(lr1)._is_oop |= lrgs(lr2)._is_oop;
  lrgs(lr2)._is_oop = 0;    // In particular, not an oop for GC info

  if (lrgs(lr1)._maxfreq < lrgs(lr2)._maxfreq)
    lrgs(lr1)._maxfreq = lrgs(lr2)._maxfreq;

  // Copy original value instead.  Intermediate copies go dead, and
  // the dst_copy becomes useless.
  int didx = dst_copy->is_Copy();
  dst_copy->set_req( didx, src_def );
  // Add copy to free list
  // _phc.free_spillcopy(b->_nodes[bindex]);
  assert( b->_nodes[bindex] == dst_copy, "" );
  dst_copy->replace_by( dst_copy->in(didx) );
  dst_copy->set_req( didx, NULL);
  b->_nodes.remove(bindex);
  if( bindex < b->_ihrp_index ) b->_ihrp_index--;
  if( bindex < b->_fhrp_index ) b->_fhrp_index--;

  // Stretched lr1; add it to liveness of intermediate blocks
503
  Block *b2 = _phc._cfg.get_block_for_node(src_copy);
D
duke 已提交
504
  while( b != b2 ) {
505
    b = _phc._cfg.get_block_for_node(b->pred(1));
D
duke 已提交
506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523
    _phc._live->live(b)->insert(lr1);
  }
}

// Factored code from copy_copy that computes extra interferences from
// lengthening a live range by double-coalescing.
uint PhaseConservativeCoalesce::compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint reg_degree, uint rm_size, uint lr1, uint lr2 ) {

  assert(!lrgs(lr1)._fat_proj, "cannot coalesce fat_proj");
  assert(!lrgs(lr2)._fat_proj, "cannot coalesce fat_proj");
  Node *prev_copy = dst_copy->in(dst_copy->is_Copy());
  Block *b2 = b;
  uint bindex2 = bindex;
  while( 1 ) {
    // Find previous instruction
    bindex2--;                  // Chain backwards 1 instruction
    while( bindex2 == 0 ) {     // At block start, find prior block
      assert( b2->num_preds() == 2, "cannot double coalesce across c-flow" );
524
      b2 = _phc._cfg.get_block_for_node(b2->pred(1));
D
duke 已提交
525 526 527 528 529 530 531 532 533 534 535
      bindex2 = b2->end_idx()-1;
    }
    // Get prior instruction
    assert(bindex2 < b2->_nodes.size(), "index out of bounds");
    Node *x = b2->_nodes[bindex2];
    if( x == prev_copy ) {      // Previous copy in copy chain?
      if( prev_copy == src_copy)// Found end of chain and all interferences
        break;                  // So break out of loop
      // Else work back one in copy chain
      prev_copy = prev_copy->in(prev_copy->is_Copy());
    } else {                    // Else collect interferences
536
      uint lidx = _phc._lrg_map.find(x);
D
duke 已提交
537
      // Found another def of live-range being stretched?
538 539 540 541 542 543
      if(lidx == lr1) {
        return max_juint;
      }
      if(lidx == lr2) {
        return max_juint;
      }
D
duke 已提交
544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569

      // If we attempt to coalesce across a bound def
      if( lrgs(lidx).is_bound() ) {
        // Do not let the coalesced LRG expect to get the bound color
        rm.SUBTRACT( lrgs(lidx).mask() );
        // Recompute rm_size
        rm_size = rm.Size();
        //if( rm._flags ) rm_size += 1000000;
        if( reg_degree >= rm_size ) return max_juint;
      }
      if( rm.overlap(lrgs(lidx).mask()) ) {
        // Insert lidx into union LRG; returns TRUE if actually inserted
        if( _ulr.insert(lidx) ) {
          // Infinite-stack neighbors do not alter colorability, as they
          // can always color to some other color.
          if( !lrgs(lidx).mask().is_AllStack() ) {
            // If this coalesce will make any new neighbor uncolorable,
            // do not coalesce.
            if( lrgs(lidx).just_lo_degree() )
              return max_juint;
            // Bump our degree
            if( ++reg_degree >= rm_size )
              return max_juint;
          } // End of if not infinite-stack neighbor
        } // End of if actually inserted
      } // End of if live range overlaps
T
twisti 已提交
570 571
    } // End of else collect interferences for 1 node
  } // End of while forever, scan back for interferences
D
duke 已提交
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614
  return reg_degree;
}

void PhaseConservativeCoalesce::update_ifg(uint lr1, uint lr2, IndexSet *n_lr1, IndexSet *n_lr2) {
  // Some original neighbors of lr1 might have gone away
  // because the constrained register mask prevented them.
  // Remove lr1 from such neighbors.
  IndexSetIterator one(n_lr1);
  uint neighbor;
  LRG &lrg1 = lrgs(lr1);
  while ((neighbor = one.next()) != 0)
    if( !_ulr.member(neighbor) )
      if( _phc._ifg->neighbors(neighbor)->remove(lr1) )
        lrgs(neighbor).inc_degree( -lrg1.compute_degree(lrgs(neighbor)) );


  // lr2 is now called (coalesced into) lr1.
  // Remove lr2 from the IFG.
  IndexSetIterator two(n_lr2);
  LRG &lrg2 = lrgs(lr2);
  while ((neighbor = two.next()) != 0)
    if( _phc._ifg->neighbors(neighbor)->remove(lr2) )
      lrgs(neighbor).inc_degree( -lrg2.compute_degree(lrgs(neighbor)) );

  // Some neighbors of intermediate copies now interfere with the
  // combined live range.
  IndexSetIterator three(&_ulr);
  while ((neighbor = three.next()) != 0)
    if( _phc._ifg->neighbors(neighbor)->insert(lr1) )
      lrgs(neighbor).inc_degree( lrg1.compute_degree(lrgs(neighbor)) );
}

static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) {
  // Tag copy bias here
  if( !ifg->lrgs(lr1)._copy_bias )
    ifg->lrgs(lr1)._copy_bias = lr2;
  if( !ifg->lrgs(lr2)._copy_bias )
    ifg->lrgs(lr2)._copy_bias = lr1;
}

// See if I can coalesce a series of multiple copies together.  I need the
// final dest copy and the original src copy.  They can be the same Node.
// Compute the compatible register masks.
615
bool PhaseConservativeCoalesce::copy_copy(Node *dst_copy, Node *src_copy, Block *b, uint bindex) {
D
duke 已提交
616

617 618 619 620 621 622
  if (!dst_copy->is_SpillCopy()) {
    return false;
  }
  if (!src_copy->is_SpillCopy()) {
    return false;
  }
D
duke 已提交
623
  Node *src_def = src_copy->in(src_copy->is_Copy());
624 625
  uint lr1 = _phc._lrg_map.find(dst_copy);
  uint lr2 = _phc._lrg_map.find(src_def);
D
duke 已提交
626 627

  // Same live ranges already?
628 629 630
  if (lr1 == lr2) {
    return false;
  }
D
duke 已提交
631 632

  // Interfere?
633 634 635
  if (_phc._ifg->test_edge_sq(lr1, lr2)) {
    return false;
  }
D
duke 已提交
636 637

  // Not an oop->int cast; oop->oop, int->int, AND int->oop are OK.
638
  if (!lrgs(lr1)._is_oop && lrgs(lr2)._is_oop) { // not an oop->int cast
D
duke 已提交
639
    return false;
640
  }
D
duke 已提交
641 642 643

  // Coalescing between an aligned live range and a mis-aligned live range?
  // No, no!  Alignment changes how we count degree.
644
  if (lrgs(lr1)._fat_proj != lrgs(lr2)._fat_proj) {
D
duke 已提交
645
    return false;
646
  }
D
duke 已提交
647 648 649 650

  // Sort; use smaller live-range number
  Node *lr1_node = dst_copy;
  Node *lr2_node = src_def;
651
  if (lr1 > lr2) {
D
duke 已提交
652 653 654 655 656 657 658 659 660 661 662
    uint tmp = lr1; lr1 = lr2; lr2 = tmp;
    lr1_node = src_def;  lr2_node = dst_copy;
  }

  // Check for compatibility of the 2 live ranges by
  // intersecting their allowed register sets.
  RegMask rm = lrgs(lr1).mask();
  rm.AND(lrgs(lr2).mask());
  // Number of bits free
  uint rm_size = rm.Size();

663 664
  if (UseFPUForSpilling && rm.is_AllStack() ) {
    // Don't coalesce when frequency difference is large
665 666
    Block *dst_b = _phc._cfg.get_block_for_node(dst_copy);
    Block *src_def_b = _phc._cfg.get_block_for_node(src_def);
667 668 669 670
    if (src_def_b->_freq > 10*dst_b->_freq )
      return false;
  }

D
duke 已提交
671 672 673 674 675 676
  // If we can use any stack slot, then effective size is infinite
  if( rm.is_AllStack() ) rm_size += 1000000;
  // Incompatible masks, no way to coalesce
  if( rm_size == 0 ) return false;

  // Another early bail-out test is when we are double-coalescing and the
T
twisti 已提交
677
  // 2 copies are separated by some control flow.
D
duke 已提交
678
  if( dst_copy != src_copy ) {
679
    Block *src_b = _phc._cfg.get_block_for_node(src_copy);
D
duke 已提交
680 681 682 683 684 685 686 687 688 689
    Block *b2 = b;
    while( b2 != src_b ) {
      if( b2->num_preds() > 2 ){// Found merge-point
        _phc._lost_opp_cflow_coalesce++;
        // extra record_bias commented out because Chris believes it is not
        // productive.  Since we can record only 1 bias, we want to choose one
        // that stands a chance of working and this one probably does not.
        //record_bias( _phc._lrgs, lr1, lr2 );
        return false;           // To hard to find all interferences
      }
690
      b2 = _phc._cfg.get_block_for_node(b2->pred(1));
D
duke 已提交
691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773
    }
  }

  // Union the two interference sets together into '_ulr'
  uint reg_degree = _ulr.lrg_union( lr1, lr2, rm_size, _phc._ifg, rm );

  if( reg_degree >= rm_size ) {
    record_bias( _phc._ifg, lr1, lr2 );
    return false;
  }

  // Now I need to compute all the interferences between dst_copy and
  // src_copy.  I'm not willing visit the entire interference graph, so
  // I limit my search to things in dst_copy's block or in a straight
  // line of previous blocks.  I give up at merge points or when I get
  // more interferences than my degree.  I can stop when I find src_copy.
  if( dst_copy != src_copy ) {
    reg_degree = compute_separating_interferences(dst_copy, src_copy, b, bindex, rm, rm_size, reg_degree, lr1, lr2 );
    if( reg_degree == max_juint ) {
      record_bias( _phc._ifg, lr1, lr2 );
      return false;
    }
  } // End of if dst_copy & src_copy are different


  // ---- THE COMBINED LRG IS COLORABLE ----

  // YEAH - Now coalesce this copy away
  assert( lrgs(lr1).num_regs() == lrgs(lr2).num_regs(),   "" );

  IndexSet *n_lr1 = _phc._ifg->neighbors(lr1);
  IndexSet *n_lr2 = _phc._ifg->neighbors(lr2);

  // Update the interference graph
  update_ifg(lr1, lr2, n_lr1, n_lr2);

  _ulr.remove(lr1);

  // Uncomment the following code to trace Coalescing in great detail.
  //
  //if (false) {
  //  tty->cr();
  //  tty->print_cr("#######################################");
  //  tty->print_cr("union %d and %d", lr1, lr2);
  //  n_lr1->dump();
  //  n_lr2->dump();
  //  tty->print_cr("resulting set is");
  //  _ulr.dump();
  //}

  // Replace n_lr1 with the new combined live range.  _ulr will use
  // n_lr1's old memory on the next iteration.  n_lr2 is cleared to
  // send its internal memory to the free list.
  _ulr.swap(n_lr1);
  _ulr.clear();
  n_lr2->clear();

  lrgs(lr1).set_degree( _phc._ifg->effective_degree(lr1) );
  lrgs(lr2).set_degree( 0 );

  // Join live ranges.  Merge larger into smaller.  Union lr2 into lr1 in the
  // union-find tree
  union_helper( lr1_node, lr2_node, lr1, lr2, src_def, dst_copy, src_copy, b, bindex );
  // Combine register restrictions
  lrgs(lr1).set_mask(rm);
  lrgs(lr1).compute_set_mask_size();
  lrgs(lr1)._cost += lrgs(lr2)._cost;
  lrgs(lr1)._area += lrgs(lr2)._area;

  // While its uncommon to successfully coalesce live ranges that started out
  // being not-lo-degree, it can happen.  In any case the combined coalesced
  // live range better Simplify nicely.
  lrgs(lr1)._was_lo = 1;

  // kinda expensive to do all the time
  //tty->print_cr("warning: slow verify happening");
  //_phc._ifg->verify( &_phc );
  return true;
}

// Conservative (but pessimistic) copy coalescing of a single block
void PhaseConservativeCoalesce::coalesce( Block *b ) {
  // Bail out on infrequent blocks
774
  if (b->is_uncommon(&_phc._cfg)) {
D
duke 已提交
775
    return;
776
  }
D
duke 已提交
777 778 779 780 781 782 783 784 785 786 787 788 789 790 791
  // Check this block for copies.
  for( uint i = 1; i<b->end_idx(); i++ ) {
    // Check for actual copies on inputs.  Coalesce a copy into its
    // input if use and copy's input are compatible.
    Node *copy1 = b->_nodes[i];
    uint idx1 = copy1->is_Copy();
    if( !idx1 ) continue;       // Not a copy

    if( copy_copy(copy1,copy1,b,i) ) {
      i--;                      // Retry, same location in block
      PhaseChaitin::_conserv_coalesce++;  // Collect stats on success
      continue;
    }
  }
}