lcm.cpp 41.0 KB
Newer Older
D
duke 已提交
1
/*
2
 * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
D
duke 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
19 20 21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
D
duke 已提交
22 23 24
 *
 */

25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/block.hpp"
#include "opto/c2compiler.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/machnode.hpp"
#include "opto/runtime.hpp"
#ifdef TARGET_ARCH_MODEL_x86_32
# include "adfiles/ad_x86_32.hpp"
#endif
#ifdef TARGET_ARCH_MODEL_x86_64
# include "adfiles/ad_x86_64.hpp"
#endif
#ifdef TARGET_ARCH_MODEL_sparc
# include "adfiles/ad_sparc.hpp"
#endif
#ifdef TARGET_ARCH_MODEL_zero
# include "adfiles/ad_zero.hpp"
#endif
45 46 47
#ifdef TARGET_ARCH_MODEL_arm
# include "adfiles/ad_arm.hpp"
#endif
48 49 50 51 52
#ifdef TARGET_ARCH_MODEL_ppc_32
# include "adfiles/ad_ppc_32.hpp"
#endif
#ifdef TARGET_ARCH_MODEL_ppc_64
# include "adfiles/ad_ppc_64.hpp"
53
#endif
D
duke 已提交
54

55
// Optimization - Graph Style
D
duke 已提交
56 57 58 59 60 61

//------------------------------implicit_null_check----------------------------
// Detect implicit-null-check opportunities.  Basically, find NULL checks
// with suitable memory ops nearby.  Use the memory op to do the NULL check.
// I can generate a memory op if there is not one nearby.
// The proj is the control projection for the not-null case.
62 63
// The val is the pointer being checked for nullness or
// decodeHeapOop_not_null node if it did not fold into address.
64
void PhaseCFG::implicit_null_check(Block* block, Node *proj, Node *val, int allowed_reasons) {
D
duke 已提交
65 66 67 68 69 70
  // Assume if null check need for 0 offset then always needed
  // Intel solaris doesn't support any null checks yet and no
  // mechanism exists (yet) to set the switches at an os_cpu level
  if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return;

  // Make sure the ptr-is-null path appears to be uncommon!
71
  float f = block->end()->as_MachIf()->_prob;
D
duke 已提交
72 73 74 75 76 77 78 79 80
  if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f;
  if( f > PROB_UNLIKELY_MAG(4) ) return;

  uint bidx = 0;                // Capture index of value into memop
  bool was_store;               // Memory op is a store op

  // Get the successor block for if the test ptr is non-null
  Block* not_null_block;  // this one goes with the proj
  Block* null_block;
81 82 83
  if (block->get_node(block->number_of_nodes()-1) == proj) {
    null_block     = block->_succs[0];
    not_null_block = block->_succs[1];
D
duke 已提交
84
  } else {
85 86 87
    assert(block->get_node(block->number_of_nodes()-2) == proj, "proj is one or the other");
    not_null_block = block->_succs[0];
    null_block     = block->_succs[1];
D
duke 已提交
88
  }
89 90 91
  while (null_block->is_Empty() == Block::empty_with_goto) {
    null_block     = null_block->_succs[0];
  }
D
duke 已提交
92 93 94 95 96 97 98

  // Search the exception block for an uncommon trap.
  // (See Parse::do_if and Parse::do_ifnull for the reason
  // we need an uncommon trap.  Briefly, we need a way to
  // detect failure of this optimization, as in 6366351.)
  {
    bool found_trap = false;
99
    for (uint i1 = 0; i1 < null_block->number_of_nodes(); i1++) {
100
      Node* nn = null_block->get_node(i1);
D
duke 已提交
101
      if (nn->is_MachCall() &&
T
twisti 已提交
102
          nn->as_MachCall()->entry_point() == SharedRuntime::uncommon_trap_blob()->entry_point()) {
D
duke 已提交
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
        const Type* trtype = nn->in(TypeFunc::Parms)->bottom_type();
        if (trtype->isa_int() && trtype->is_int()->is_con()) {
          jint tr_con = trtype->is_int()->get_con();
          Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(tr_con);
          Deoptimization::DeoptAction action = Deoptimization::trap_request_action(tr_con);
          assert((int)reason < (int)BitsPerInt, "recode bit map");
          if (is_set_nth_bit(allowed_reasons, (int) reason)
              && action != Deoptimization::Action_none) {
            // This uncommon trap is sure to recompile, eventually.
            // When that happens, C->too_many_traps will prevent
            // this transformation from happening again.
            found_trap = true;
          }
        }
        break;
      }
    }
    if (!found_trap) {
      // We did not find an uncommon trap.
      return;
    }
  }

126 127 128 129 130 131 132
  // Check for decodeHeapOop_not_null node which did not fold into address
  bool is_decoden = ((intptr_t)val) & 1;
  val = (Node*)(((intptr_t)val) & ~1);

  assert(!is_decoden || (val->in(0) == NULL) && val->is_Mach() &&
         (val->as_Mach()->ideal_Opcode() == Op_DecodeN), "sanity");

D
duke 已提交
133 134 135 136 137 138 139 140 141
  // Search the successor block for a load or store who's base value is also
  // the tested value.  There may be several.
  Node_List *out = new Node_List(Thread::current()->resource_area());
  MachNode *best = NULL;        // Best found so far
  for (DUIterator i = val->outs(); val->has_out(i); i++) {
    Node *m = val->out(i);
    if( !m->is_Mach() ) continue;
    MachNode *mach = m->as_Mach();
    was_store = false;
142 143
    int iop = mach->ideal_Opcode();
    switch( iop ) {
D
duke 已提交
144
    case Op_LoadB:
145
    case Op_LoadUB:
146
    case Op_LoadUS:
D
duke 已提交
147 148 149 150 151
    case Op_LoadD:
    case Op_LoadF:
    case Op_LoadI:
    case Op_LoadL:
    case Op_LoadP:
152
    case Op_LoadN:
D
duke 已提交
153 154
    case Op_LoadS:
    case Op_LoadKlass:
155
    case Op_LoadNKlass:
D
duke 已提交
156 157 158
    case Op_LoadRange:
    case Op_LoadD_unaligned:
    case Op_LoadL_unaligned:
159
      assert(mach->in(2) == val, "should be address");
D
duke 已提交
160 161 162 163 164 165 166 167 168
      break;
    case Op_StoreB:
    case Op_StoreC:
    case Op_StoreCM:
    case Op_StoreD:
    case Op_StoreF:
    case Op_StoreI:
    case Op_StoreL:
    case Op_StoreP:
169
    case Op_StoreN:
170
    case Op_StoreNKlass:
D
duke 已提交
171 172 173 174 175 176 177
      was_store = true;         // Memory op is a store op
      // Stores will have their address in slot 2 (memory in slot 1).
      // If the value being nul-checked is in another slot, it means we
      // are storing the checked value, which does NOT check the value!
      if( mach->in(2) != val ) continue;
      break;                    // Found a memory op?
    case Op_StrComp:
C
cfang 已提交
178 179
    case Op_StrEquals:
    case Op_StrIndexOf:
180
    case Op_AryEq:
181
    case Op_EncodeISOArray:
D
duke 已提交
182 183 184 185 186 187
      // Not a legit memory op for implicit null check regardless of
      // embedded loads
      continue;
    default:                    // Also check for embedded loads
      if( !mach->needs_anti_dependence_check() )
        continue;               // Not an memory op; skip it
188 189 190 191 192 193
      if( must_clone[iop] ) {
        // Do not move nodes which produce flags because
        // RA will try to clone it to place near branch and
        // it will cause recompilation, see clone_node().
        continue;
      }
194
      {
195 196
        // Check that value is used in memory address in
        // instructions with embedded load (CmpP val1,(val2+off)).
197 198 199 200 201 202 203 204 205 206 207 208 209
        Node* base;
        Node* index;
        const MachOper* oper = mach->memory_inputs(base, index);
        if (oper == NULL || oper == (MachOper*)-1) {
          continue;             // Not an memory op; skip it
        }
        if (val == base ||
            val == index && val->bottom_type()->isa_narrowoop()) {
          break;                // Found it
        } else {
          continue;             // Skip it
        }
      }
D
duke 已提交
210 211 212 213 214 215 216 217
      break;
    }
    // check if the offset is not too high for implicit exception
    {
      intptr_t offset = 0;
      const TypePtr *adr_type = NULL;  // Do not need this return value here
      const Node* base = mach->get_base_and_disp(offset, adr_type);
      if (base == NULL || base == NodeSentinel) {
218 219 220 221
        // Narrow oop address doesn't have base, only index
        if( val->bottom_type()->isa_narrowoop() &&
            MacroAssembler::needs_explicit_null_check(offset) )
          continue;             // Give up if offset is beyond page size
D
duke 已提交
222 223
        // cannot reason about it; is probably not implicit null exception
      } else {
224
        const TypePtr* tptr;
225 226
        if (UseCompressedOops && (Universe::narrow_oop_shift() == 0 ||
                                  Universe::narrow_klass_shift() == 0)) {
227
          // 32-bits narrow oop can be the base of address expressions
228
          tptr = base->get_ptr_type();
229 230 231 232
        } else {
          // only regular oops are expected here
          tptr = base->bottom_type()->is_ptr();
        }
D
duke 已提交
233 234 235 236 237 238 239 240 241 242
        // Give up if offset is not a compile-time constant
        if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot )
          continue;
        offset += tptr->_offset; // correct if base is offseted
        if( MacroAssembler::needs_explicit_null_check(offset) )
          continue;             // Give up is reference is beyond 4K page size
      }
    }

    // Check ctrl input to see if the null-check dominates the memory op
243
    Block *cb = get_block_for_node(mach);
D
duke 已提交
244 245
    cb = cb->_idom;             // Always hoist at least 1 block
    if( !was_store ) {          // Stores can be hoisted only one block
246
      while( cb->_dom_depth > (block->_dom_depth + 1))
D
duke 已提交
247 248 249 250
        cb = cb->_idom;         // Hoist loads as far as we want
      // The non-null-block should dominate the memory op, too. Live
      // range spilling will insert a spill in the non-null-block if it is
      // needs to spill the memory op for an implicit null check.
251
      if (cb->_dom_depth == (block->_dom_depth + 1)) {
D
duke 已提交
252 253 254 255
        if (cb != not_null_block) continue;
        cb = cb->_idom;
      }
    }
256
    if( cb != block ) continue;
D
duke 已提交
257 258 259 260 261

    // Found a memory user; see if it can be hoisted to check-block
    uint vidx = 0;              // Capture index of value into memop
    uint j;
    for( j = mach->req()-1; j > 0; j-- ) {
262 263 264 265 266
      if( mach->in(j) == val ) {
        vidx = j;
        // Ignore DecodeN val which could be hoisted to where needed.
        if( is_decoden ) continue;
      }
D
duke 已提交
267
      // Block of memory-op input
268 269
      Block *inb = get_block_for_node(mach->in(j));
      Block *b = block;          // Start from nul check
D
duke 已提交
270 271 272 273 274 275 276 277
      while( b != inb && b->_dom_depth > inb->_dom_depth )
        b = b->_idom;           // search upwards for input
      // See if input dominates null check
      if( b != inb )
        break;
    }
    if( j > 0 )
      continue;
278
    Block *mb = get_block_for_node(mach);
D
duke 已提交
279 280 281 282 283 284
    // Hoisting stores requires more checks for the anti-dependence case.
    // Give up hoisting if we have to move the store past any load.
    if( was_store ) {
      Block *b = mb;            // Start searching here for a local load
      // mach use (faulting) trying to hoist
      // n might be blocker to hoisting
285
      while( b != block ) {
D
duke 已提交
286
        uint k;
287
        for( k = 1; k < b->number_of_nodes(); k++ ) {
288
          Node *n = b->get_node(k);
D
duke 已提交
289 290 291 292
          if( n->needs_anti_dependence_check() &&
              n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) )
            break;              // Found anti-dependent load
        }
293
        if( k < b->number_of_nodes() )
D
duke 已提交
294 295 296
          break;                // Found anti-dependent load
        // Make sure control does not do a merge (would have to check allpaths)
        if( b->num_preds() != 2 ) break;
297
        b = get_block_for_node(b->pred(1)); // Move up to predecessor block
D
duke 已提交
298
      }
299
      if( b != block ) continue;
D
duke 已提交
300 301 302 303 304 305 306 307 308
    }

    // Make sure this memory op is not already being used for a NullCheck
    Node *e = mb->end();
    if( e->is_MachNullCheck() && e->in(1) == mach )
      continue;                 // Already being used as a NULL check

    // Found a candidate!  Pick one with least dom depth - the highest
    // in the dom tree should be closest to the null check.
309
    if (best == NULL || get_block_for_node(mach)->_dom_depth < get_block_for_node(best)->_dom_depth) {
D
duke 已提交
310 311 312 313 314
      best = mach;
      bidx = vidx;
    }
  }
  // No candidate!
315 316 317
  if (best == NULL) {
    return;
  }
D
duke 已提交
318 319 320 321 322

  // ---- Found an implicit null check
  extern int implicit_null_checks;
  implicit_null_checks++;

323 324
  if( is_decoden ) {
    // Check if we need to hoist decodeHeapOop_not_null first.
325 326
    Block *valb = get_block_for_node(val);
    if( block != valb && block->_dom_depth < valb->_dom_depth ) {
327 328
      // Hoist it up to the end of the test block.
      valb->find_remove(val);
329 330
      block->add_inst(val);
      map_node_to_block(val, block);
331 332 333 334
      // DecodeN on x86 may kill flags. Check for flag-killing projections
      // that also need to be hoisted.
      for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) {
        Node* n = val->fast_out(j);
K
kvn 已提交
335
        if( n->is_MachProj() ) {
336 337 338
          get_block_for_node(n)->find_remove(n);
          block->add_inst(n);
          map_node_to_block(n, block);
339 340 341 342
        }
      }
    }
  }
D
duke 已提交
343
  // Hoist the memory candidate up to the end of the test block.
344
  Block *old_block = get_block_for_node(best);
D
duke 已提交
345
  old_block->find_remove(best);
346 347
  block->add_inst(best);
  map_node_to_block(best, block);
D
duke 已提交
348 349

  // Move the control dependence
350
  if (best->in(0) && best->in(0) == old_block->head())
351
    best->set_req(0, block->head());
D
duke 已提交
352 353 354 355 356

  // Check for flag-killing projections that also need to be hoisted
  // Should be DU safe because no edge updates.
  for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) {
    Node* n = best->fast_out(j);
K
kvn 已提交
357
    if( n->is_MachProj() ) {
358 359 360
      get_block_for_node(n)->find_remove(n);
      block->add_inst(n);
      map_node_to_block(n, block);
D
duke 已提交
361 362 363 364 365 366 367 368 369 370 371 372
    }
  }

  // proj==Op_True --> ne test; proj==Op_False --> eq test.
  // One of two graph shapes got matched:
  //   (IfTrue  (If (Bool NE (CmpP ptr NULL))))
  //   (IfFalse (If (Bool EQ (CmpP ptr NULL))))
  // NULL checks are always branch-if-eq.  If we see a IfTrue projection
  // then we are replacing a 'ne' test with a 'eq' NULL check test.
  // We need to flip the projections to keep the same semantics.
  if( proj->Opcode() == Op_IfTrue ) {
    // Swap order of projections in basic block to swap branch targets
373 374 375 376
    Node *tmp1 = block->get_node(block->end_idx()+1);
    Node *tmp2 = block->get_node(block->end_idx()+2);
    block->map_node(tmp2, block->end_idx()+1);
    block->map_node(tmp1, block->end_idx()+2);
377
    Node *tmp = new (C) Node(C->top()); // Use not NULL input
D
duke 已提交
378 379 380 381 382 383 384 385 386 387 388
    tmp1->replace_by(tmp);
    tmp2->replace_by(tmp1);
    tmp->replace_by(tmp2);
    tmp->destruct();
  }

  // Remove the existing null check; use a new implicit null check instead.
  // Since schedule-local needs precise def-use info, we need to correct
  // it as well.
  Node *old_tst = proj->in(0);
  MachNode *nul_chk = new (C) MachNullCheckNode(old_tst->in(0),best,bidx);
389 390
  block->map_node(nul_chk, block->end_idx());
  map_node_to_block(nul_chk, block);
D
duke 已提交
391 392 393 394 395 396 397
  // Redirect users of old_test to nul_chk
  for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2)
    old_tst->last_out(i2)->set_req(0, nul_chk);
  // Clean-up any dead code
  for (uint i3 = 0; i3 < old_tst->req(); i3++)
    old_tst->set_req(i3, NULL);

398 399
  latency_from_uses(nul_chk);
  latency_from_uses(best);
D
duke 已提交
400 401 402 403 404 405 406 407 408 409 410 411 412
}


//------------------------------select-----------------------------------------
// Select a nice fellow from the worklist to schedule next. If there is only
// one choice, then use it. Projections take top priority for correctness
// reasons - if I see a projection, then it is next.  There are a number of
// other special cases, for instructions that consume condition codes, et al.
// These are chosen immediately. Some instructions are required to immediately
// precede the last instruction in the block, and these are taken last. Of the
// remaining cases (most), choose the instruction with the greatest latency
// (that is, the most number of pseudo-cycles required to the end of the
// routine). If there is a tie, choose the instruction with the most inputs.
413
Node* PhaseCFG::select(Block* block, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot) {
D
duke 已提交
414 415 416 417 418 419 420 421 422 423 424 425

  // If only a single entry on the stack, use it
  uint cnt = worklist.size();
  if (cnt == 1) {
    Node *n = worklist[0];
    worklist.map(0,worklist.pop());
    return n;
  }

  uint choice  = 0; // Bigger is most important
  uint latency = 0; // Bigger is scheduled first
  uint score   = 0; // Bigger is better
426
  int idx = -1;     // Index in worklist
427
  int cand_cnt = 0; // Candidate count
D
duke 已提交
428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446

  for( uint i=0; i<cnt; i++ ) { // Inspect entire worklist
    // Order in worklist is used to break ties.
    // See caller for how this is used to delay scheduling
    // of induction variable increments to after the other
    // uses of the phi are scheduled.
    Node *n = worklist[i];      // Get Node on worklist

    int iop = n->is_Mach() ? n->as_Mach()->ideal_Opcode() : 0;
    if( n->is_Proj() ||         // Projections always win
        n->Opcode()== Op_Con || // So does constant 'Top'
        iop == Op_CreateEx ||   // Create-exception must start block
        iop == Op_CheckCastPP
        ) {
      worklist.map(i,worklist.pop());
      return n;
    }

    // Final call in a block must be adjacent to 'catch'
447
    Node *e = block->end();
D
duke 已提交
448 449 450 451 452 453 454
    if( e->is_Catch() && e->in(0)->in(0) == n )
      continue;

    // Memory op for an implicit null check has to be at the end of the block
    if( e->is_MachNullCheck() && e->in(1) == n )
      continue;

455 456 457 458 459
    // Schedule IV increment last.
    if (e->is_Mach() && e->as_Mach()->ideal_Opcode() == Op_CountedLoopEnd &&
        e->in(1)->in(1) == n && n->is_iteratively_computed())
      continue;

D
duke 已提交
460 461 462 463 464 465 466 467 468 469 470 471 472
    uint n_choice  = 2;

    // See if this instruction is consumed by a branch. If so, then (as the
    // branch is the last instruction in the basic block) force it to the
    // end of the basic block
    if ( must_clone[iop] ) {
      // See if any use is a branch
      bool found_machif = false;

      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
        Node* use = n->fast_out(j);

        // The use is a conditional branch, make them adjacent
473
        if (use->is_MachIf() && get_block_for_node(use) == block) {
D
duke 已提交
474 475 476 477
          found_machif = true;
          break;
        }

478 479 480
        // For nodes that produce a FlagsProj, make the node adjacent to the
        // use of the FlagsProj
        if (use->is_FlagsProj() && get_block_for_node(use) == block) {
D
duke 已提交
481 482 483 484 485 486
          found_machif = true;
          break;
        }

        // More than this instruction pending for successor to be ready,
        // don't choose this if other opportunities are ready
487
        if (ready_cnt.at(use->_idx) > 1)
D
duke 已提交
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512
          n_choice = 1;
      }

      // loop terminated, prefer not to use this instruction
      if (found_machif)
        continue;
    }

    // See if this has a predecessor that is "must_clone", i.e. sets the
    // condition code. If so, choose this first
    for (uint j = 0; j < n->req() ; j++) {
      Node *inn = n->in(j);
      if (inn) {
        if (inn->is_Mach() && must_clone[inn->as_Mach()->ideal_Opcode()] ) {
          n_choice = 3;
          break;
        }
      }
    }

    // MachTemps should be scheduled last so they are near their uses
    if (n->is_MachTemp()) {
      n_choice = 1;
    }

513
    uint n_latency = get_latency_for_node(n);
D
duke 已提交
514 515 516
    uint n_score   = n->req();   // Many inputs get high score to break ties

    // Keep best latency found
517 518 519 520 521 522 523 524
    cand_cnt++;
    if (choice < n_choice ||
        (choice == n_choice &&
         ((StressLCM && Compile::randomized_select(cand_cnt)) ||
          (!StressLCM &&
           (latency < n_latency ||
            (latency == n_latency &&
             (score < n_score))))))) {
D
duke 已提交
525 526 527 528 529 530 531
      choice  = n_choice;
      latency = n_latency;
      score   = n_score;
      idx     = i;               // Also keep index in worklist
    }
  } // End of for all ready nodes in worklist

532 533
  assert(idx >= 0, "index should be set");
  Node *n = worklist[(uint)idx];      // Get the winner
D
duke 已提交
534

535
  worklist.map((uint)idx, worklist.pop());     // Compress worklist
D
duke 已提交
536 537 538 539 540
  return n;
}


//------------------------------set_next_call----------------------------------
541
void PhaseCFG::set_next_call(Block* block, Node* n, VectorSet& next_call) {
D
duke 已提交
542 543 544 545
  if( next_call.test_set(n->_idx) ) return;
  for( uint i=0; i<n->len(); i++ ) {
    Node *m = n->in(i);
    if( !m ) continue;  // must see all nodes in block that precede call
546 547
    if (get_block_for_node(m) == block) {
      set_next_call(block, m, next_call);
548
    }
D
duke 已提交
549 550 551 552 553 554 555 556 557
  }
}

//------------------------------needed_for_next_call---------------------------
// Set the flag 'next_call' for each Node that is needed for the next call to
// be scheduled.  This flag lets me bias scheduling so Nodes needed for the
// next subroutine call get priority - basically it moves things NOT needed
// for the next call till after the call.  This prevents me from trying to
// carry lots of stuff live across a call.
558
void PhaseCFG::needed_for_next_call(Block* block, Node* this_call, VectorSet& next_call) {
D
duke 已提交
559 560 561 562
  // Find the next control-defining Node in this block
  Node* call = NULL;
  for (DUIterator_Fast imax, i = this_call->fast_outs(imax); i < imax; i++) {
    Node* m = this_call->fast_out(i);
563
    if (get_block_for_node(m) == block && // Local-block user
D
duke 已提交
564
        m != this_call &&       // Not self-start node
565
        m->is_MachCall()) {
D
duke 已提交
566 567
      call = m;
      break;
568
    }
D
duke 已提交
569 570 571
  }
  if (call == NULL)  return;    // No next call (e.g., block end is near)
  // Set next-call for all inputs to this call
572
  set_next_call(block, call, next_call);
D
duke 已提交
573 574
}

575
//------------------------------add_call_kills-------------------------------------
576 577
// helper function that adds caller save registers to MachProjNode
static void add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe) {
578 579 580 581 582 583 584 585 586 587 588 589 590 591
  // Fill in the kill mask for the call
  for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) {
    if( !regs.Member(r) ) {     // Not already defined by the call
      // Save-on-call register?
      if ((save_policy[r] == 'C') ||
          (save_policy[r] == 'A') ||
          ((save_policy[r] == 'E') && exclude_soe)) {
        proj->_rout.Insert(r);
      }
    }
  }
}


D
duke 已提交
592
//------------------------------sched_call-------------------------------------
593
uint PhaseCFG::sched_call(Block* block, uint node_cnt, Node_List& worklist, GrowableArray<int>& ready_cnt, MachCallNode* mcall, VectorSet& next_call) {
D
duke 已提交
594 595 596 597 598 599 600
  RegMask regs;

  // Schedule all the users of the call right now.  All the users are
  // projection Nodes, so they must be scheduled next to the call.
  // Collect all the defined registers.
  for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) {
    Node* n = mcall->fast_out(i);
K
kvn 已提交
601
    assert( n->is_MachProj(), "" );
602 603 604
    int n_cnt = ready_cnt.at(n->_idx)-1;
    ready_cnt.at_put(n->_idx, n_cnt);
    assert( n_cnt == 0, "" );
D
duke 已提交
605
    // Schedule next to call
606
    block->map_node(n, node_cnt++);
D
duke 已提交
607 608 609 610 611
    // Collect defined registers
    regs.OR(n->out_RegMask());
    // Check for scheduling the next control-definer
    if( n->bottom_type() == Type::CONTROL )
      // Warm up next pile of heuristic bits
612
      needed_for_next_call(block, n, next_call);
D
duke 已提交
613 614 615 616

    // Children of projections are now all ready
    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
      Node* m = n->fast_out(j); // Get user
617
      if(get_block_for_node(m) != block) {
618 619
        continue;
      }
D
duke 已提交
620
      if( m->is_Phi() ) continue;
621 622 623
      int m_cnt = ready_cnt.at(m->_idx)-1;
      ready_cnt.at_put(m->_idx, m_cnt);
      if( m_cnt == 0 )
D
duke 已提交
624 625 626 627 628 629 630
        worklist.push(m);
    }

  }

  // Act as if the call defines the Frame Pointer.
  // Certainly the FP is alive and well after the call.
631
  regs.Insert(_matcher.c_frame_pointer());
D
duke 已提交
632 633 634 635

  // Set all registers killed and not already defined by the call.
  uint r_cnt = mcall->tf()->range()->cnt();
  int op = mcall->ideal_Opcode();
636 637 638
  MachProjNode *proj = new (C) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj );
  map_node_to_block(proj, block);
  block->insert_node(proj, node_cnt++);
D
duke 已提交
639 640 641 642 643 644 645 646

  // Select the right register save policy.
  const char * save_policy;
  switch (op) {
    case Op_CallRuntime:
    case Op_CallLeaf:
    case Op_CallLeafNoFP:
      // Calling C code so use C calling convention
647
      save_policy = _matcher._c_reg_save_policy;
D
duke 已提交
648 649 650 651 652
      break;

    case Op_CallStaticJava:
    case Op_CallDynamicJava:
      // Calling Java code so use Java calling convention
653
      save_policy = _matcher._register_save_policy;
D
duke 已提交
654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669
      break;

    default:
      ShouldNotReachHere();
  }

  // When using CallRuntime mark SOE registers as killed by the call
  // so values that could show up in the RegisterMap aren't live in a
  // callee saved register since the register wouldn't know where to
  // find them.  CallLeaf and CallLeafNoFP are ok because they can't
  // have debug info on them.  Strictly speaking this only needs to be
  // done for oops since idealreg2debugmask takes care of debug info
  // references but there no way to handle oops differently than other
  // pointers as far as the kill mask goes.
  bool exclude_soe = op == Op_CallRuntime;

670 671 672 673 674 675 676 677 678 679
  // If the call is a MethodHandle invoke, we need to exclude the
  // register which is used to save the SP value over MH invokes from
  // the mask.  Otherwise this register could be used for
  // deoptimization information.
  if (op == Op_CallStaticJava) {
    MachCallStaticJavaNode* mcallstaticjava = (MachCallStaticJavaNode*) mcall;
    if (mcallstaticjava->_method_handle_invoke)
      proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask());
  }

680
  add_call_kills(proj, regs, save_policy, exclude_soe);
D
duke 已提交
681 682 683 684 685 686 687

  return node_cnt;
}


//------------------------------schedule_local---------------------------------
// Topological sort within a block.  Someday become a real scheduler.
688
bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call) {
D
duke 已提交
689 690 691 692 693 694
  // Already "sorted" are the block start Node (as the first entry), and
  // the block-ending Node and any trailing control projections.  We leave
  // these alone.  PhiNodes and ParmNodes are made to follow the block start
  // Node.  Everything else gets topo-sorted.

#ifndef PRODUCT
695 696 697
    if (trace_opto_pipelining()) {
      tty->print_cr("# --- schedule_local B%d, before: ---", block->_pre_order);
      for (uint i = 0;i < block->number_of_nodes(); i++) {
D
duke 已提交
698
        tty->print("# ");
699
        block->get_node(i)->fast_dump();
D
duke 已提交
700 701 702 703 704 705
      }
      tty->print_cr("#");
    }
#endif

  // RootNode is already sorted
706 707 708
  if (block->number_of_nodes() == 1) {
    return true;
  }
D
duke 已提交
709 710

  // Move PhiNodes and ParmNodes from 1 to cnt up to the start
711
  uint node_cnt = block->end_idx();
D
duke 已提交
712 713 714
  uint phi_cnt = 1;
  uint i;
  for( i = 1; i<node_cnt; i++ ) { // Scan for Phi
715
    Node *n = block->get_node(i);
D
duke 已提交
716
    if( n->is_Phi() ||          // Found a PhiNode or ParmNode
717
        (n->is_Proj()  && n->in(0) == block->head()) ) {
D
duke 已提交
718
      // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt
719 720
      block->map_node(block->get_node(phi_cnt), i);
      block->map_node(n, phi_cnt++);  // swap Phi/Parm up front
D
duke 已提交
721 722 723 724 725 726
    } else {                    // All others
      // Count block-local inputs to 'n'
      uint cnt = n->len();      // Input count
      uint local = 0;
      for( uint j=0; j<cnt; j++ ) {
        Node *m = n->in(j);
727
        if( m && get_block_for_node(m) == block && !m->is_top() )
D
duke 已提交
728 729
          local++;              // One more block-local input
      }
730
      ready_cnt.at_put(n->_idx, local); // Count em up
D
duke 已提交
731

732
#ifdef ASSERT
733
      if( UseConcMarkSweepGC || UseG1GC ) {
D
duke 已提交
734
        if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) {
735 736 737 738
          // Check the precedence edges
          for (uint prec = n->req(); prec < n->len(); prec++) {
            Node* oop_store = n->in(prec);
            if (oop_store != NULL) {
739
              assert(get_block_for_node(oop_store)->_dom_depth <= block->_dom_depth, "oop_store must dominate card-mark");
740 741
            }
          }
D
duke 已提交
742 743
        }
      }
744 745 746 747
#endif

      // A few node types require changing a required edge to a precedence edge
      // before allocation.
748 749 750
      if( n->is_Mach() && n->req() > TypeFunc::Parms &&
          (n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire ||
           n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) {
751 752 753 754 755 756
        // MemBarAcquire could be created without Precedent edge.
        // del_req() replaces the specified edge with the last input edge
        // and then removes the last edge. If the specified edge > number of
        // edges the last edge will be moved outside of the input edges array
        // and the edge will be lost. This is why this code should be
        // executed only when Precedent (== TypeFunc::Parms) edge is present.
D
duke 已提交
757 758 759 760 761 762
        Node *x = n->in(TypeFunc::Parms);
        n->del_req(TypeFunc::Parms);
        n->add_prec(x);
      }
    }
  }
763 764
  for(uint i2=i; i2< block->number_of_nodes(); i2++ ) // Trailing guys get zapped count
    ready_cnt.at_put(block->get_node(i2)->_idx, 0);
D
duke 已提交
765 766 767 768

  // All the prescheduled guys do not hold back internal nodes
  uint i3;
  for(i3 = 0; i3<phi_cnt; i3++ ) {  // For all pre-scheduled
769
    Node *n = block->get_node(i3);       // Get pre-scheduled
D
duke 已提交
770 771
    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
      Node* m = n->fast_out(j);
772
      if (get_block_for_node(m) == block) { // Local-block user
773 774 775
        int m_cnt = ready_cnt.at(m->_idx)-1;
        ready_cnt.at_put(m->_idx, m_cnt);   // Fix ready count
      }
D
duke 已提交
776 777 778 779 780 781 782
    }
  }

  Node_List delay;
  // Make a worklist
  Node_List worklist;
  for(uint i4=i3; i4<node_cnt; i4++ ) {    // Put ready guys on worklist
783
    Node *m = block->get_node(i4);
784
    if( !ready_cnt.at(m->_idx) ) {   // Zero ready count?
D
duke 已提交
785 786 787 788 789
      if (m->is_iteratively_computed()) {
        // Push induction variable increments last to allow other uses
        // of the phi to be scheduled first. The select() method breaks
        // ties in scheduling by worklist order.
        delay.push(m);
790 791 792 793
      } else if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_CreateEx) {
        // Force the CreateEx to the top of the list so it's processed
        // first and ends up at the start of the block.
        worklist.insert(0, m);
D
duke 已提交
794 795 796 797 798 799 800 801 802 803 804
      } else {
        worklist.push(m);         // Then on to worklist!
      }
    }
  }
  while (delay.size()) {
    Node* d = delay.pop();
    worklist.push(d);
  }

  // Warm up the 'next_call' heuristic bits
805
  needed_for_next_call(block, block->head(), next_call);
D
duke 已提交
806 807

#ifndef PRODUCT
808 809 810
    if (trace_opto_pipelining()) {
      for (uint j=0; j< block->number_of_nodes(); j++) {
        Node     *n = block->get_node(j);
D
duke 已提交
811
        int     idx = n->_idx;
812
        tty->print("#   ready cnt:%3d  ", ready_cnt.at(idx));
813
        tty->print("latency:%3d  ", get_latency_for_node(n));
D
duke 已提交
814 815 816 817 818
        tty->print("%4d: %s\n", idx, n->Name());
      }
    }
#endif

819
  uint max_idx = (uint)ready_cnt.length();
D
duke 已提交
820 821 822 823
  // Pull from worklist and schedule
  while( worklist.size() ) {    // Worklist is not ready

#ifndef PRODUCT
824
    if (trace_opto_pipelining()) {
D
duke 已提交
825 826 827 828 829 830 831 832 833 834
      tty->print("#   ready list:");
      for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist
        Node *n = worklist[i];      // Get Node on worklist
        tty->print(" %d", n->_idx);
      }
      tty->cr();
    }
#endif

    // Select and pop a ready guy from worklist
835 836
    Node* n = select(block, worklist, ready_cnt, next_call, phi_cnt);
    block->map_node(n, phi_cnt++);    // Schedule him next
D
duke 已提交
837 838

#ifndef PRODUCT
839
    if (trace_opto_pipelining()) {
D
duke 已提交
840
      tty->print("#    select %d: %s", n->_idx, n->Name());
841
      tty->print(", latency:%d", get_latency_for_node(n));
D
duke 已提交
842 843 844 845 846 847 848 849 850 851 852 853 854 855
      n->dump();
      if (Verbose) {
        tty->print("#   ready list:");
        for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist
          Node *n = worklist[i];      // Get Node on worklist
          tty->print(" %d", n->_idx);
        }
        tty->cr();
      }
    }

#endif
    if( n->is_MachCall() ) {
      MachCallNode *mcall = n->as_MachCall();
856
      phi_cnt = sched_call(block, phi_cnt, worklist, ready_cnt, mcall, next_call);
D
duke 已提交
857 858
      continue;
    }
859 860 861

    if (n->is_Mach() && n->as_Mach()->has_call()) {
      RegMask regs;
862
      regs.Insert(_matcher.c_frame_pointer());
863 864
      regs.OR(n->out_RegMask());

865 866 867
      MachProjNode *proj = new (C) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj );
      map_node_to_block(proj, block);
      block->insert_node(proj, phi_cnt++);
868

869
      add_call_kills(proj, regs, _matcher._c_reg_save_policy, false);
870 871
    }

D
duke 已提交
872 873 874
    // Children are now all ready
    for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) {
      Node* m = n->fast_out(i5); // Get user
875
      if (get_block_for_node(m) != block) {
876 877
        continue;
      }
D
duke 已提交
878
      if( m->is_Phi() ) continue;
879
      if (m->_idx >= max_idx) { // new node, skip it
880 881 882
        assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types");
        continue;
      }
883 884 885
      int m_cnt = ready_cnt.at(m->_idx)-1;
      ready_cnt.at_put(m->_idx, m_cnt);
      if( m_cnt == 0 )
D
duke 已提交
886 887 888 889
        worklist.push(m);
    }
  }

890
  if( phi_cnt != block->end_idx() ) {
D
duke 已提交
891 892 893 894 895 896 897 898 899 900 901 902
    // did not schedule all.  Retry, Bailout, or Die
    if (C->subsume_loads() == true && !C->failing()) {
      // Retry with subsume_loads == false
      // If this is the first failure, the sentinel string will "stick"
      // to the Compile object, and the C2Compiler will see it and retry.
      C->record_failure(C2Compiler::retry_no_subsuming_loads());
    }
    // assert( phi_cnt == end_idx(), "did not schedule all" );
    return false;
  }

#ifndef PRODUCT
903
  if (trace_opto_pipelining()) {
D
duke 已提交
904 905
    tty->print_cr("#");
    tty->print_cr("# after schedule_local");
906
    for (uint i = 0;i < block->number_of_nodes();i++) {
D
duke 已提交
907
      tty->print("# ");
908
      block->get_node(i)->fast_dump();
D
duke 已提交
909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933
    }
    tty->cr();
  }
#endif


  return true;
}

//--------------------------catch_cleanup_fix_all_inputs-----------------------
static void catch_cleanup_fix_all_inputs(Node *use, Node *old_def, Node *new_def) {
  for (uint l = 0; l < use->len(); l++) {
    if (use->in(l) == old_def) {
      if (l < use->req()) {
        use->set_req(l, new_def);
      } else {
        use->rm_prec(l);
        use->add_prec(new_def);
        l--;
      }
    }
  }
}

//------------------------------catch_cleanup_find_cloned_def------------------
934
Node* PhaseCFG::catch_cleanup_find_cloned_def(Block *use_blk, Node *def, Block *def_blk, int n_clone_idx) {
D
duke 已提交
935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959
  assert( use_blk != def_blk, "Inter-block cleanup only");

  // The use is some block below the Catch.  Find and return the clone of the def
  // that dominates the use. If there is no clone in a dominating block, then
  // create a phi for the def in a dominating block.

  // Find which successor block dominates this use.  The successor
  // blocks must all be single-entry (from the Catch only; I will have
  // split blocks to make this so), hence they all dominate.
  while( use_blk->_dom_depth > def_blk->_dom_depth+1 )
    use_blk = use_blk->_idom;

  // Find the successor
  Node *fixup = NULL;

  uint j;
  for( j = 0; j < def_blk->_num_succs; j++ )
    if( use_blk == def_blk->_succs[j] )
      break;

  if( j == def_blk->_num_succs ) {
    // Block at same level in dom-tree is not a successor.  It needs a
    // PhiNode, the PhiNode uses from the def and IT's uses need fixup.
    Node_Array inputs = new Node_List(Thread::current()->resource_area());
    for(uint k = 1; k < use_blk->num_preds(); k++) {
960 961
      Block* block = get_block_for_node(use_blk->pred(k));
      inputs.map(k, catch_cleanup_find_cloned_def(block, def, def_blk, n_clone_idx));
D
duke 已提交
962 963 964 965 966
    }

    // Check to see if the use_blk already has an identical phi inserted.
    // If it exists, it will be at the first position since all uses of a
    // def are processed together.
967
    Node *phi = use_blk->get_node(1);
D
duke 已提交
968 969 970 971 972 973 974 975 976 977 978 979 980 981
    if( phi->is_Phi() ) {
      fixup = phi;
      for (uint k = 1; k < use_blk->num_preds(); k++) {
        if (phi->in(k) != inputs[k]) {
          // Not a match
          fixup = NULL;
          break;
        }
      }
    }

    // If an existing PhiNode was not found, make a new one.
    if (fixup == NULL) {
      Node *new_phi = PhiNode::make(use_blk->head(), def);
982
      use_blk->insert_node(new_phi, 1);
983
      map_node_to_block(new_phi, use_blk);
D
duke 已提交
984 985 986 987 988 989 990 991
      for (uint k = 1; k < use_blk->num_preds(); k++) {
        new_phi->set_req(k, inputs[k]);
      }
      fixup = new_phi;
    }

  } else {
    // Found the use just below the Catch.  Make it use the clone.
992
    fixup = use_blk->get_node(n_clone_idx);
D
duke 已提交
993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
  }

  return fixup;
}

//--------------------------catch_cleanup_intra_block--------------------------
// Fix all input edges in use that reference "def".  The use is in the same
// block as the def and both have been cloned in each successor block.
static void catch_cleanup_intra_block(Node *use, Node *def, Block *blk, int beg, int n_clone_idx) {

  // Both the use and def have been cloned. For each successor block,
  // get the clone of the use, and make its input the clone of the def
  // found in that block.

  uint use_idx = blk->find_node(use);
  uint offset_idx = use_idx - beg;
  for( uint k = 0; k < blk->_num_succs; k++ ) {
    // Get clone in each successor block
    Block *sb = blk->_succs[k];
1012
    Node *clone = sb->get_node(offset_idx+1);
D
duke 已提交
1013 1014 1015
    assert( clone->Opcode() == use->Opcode(), "" );

    // Make use-clone reference the def-clone
1016
    catch_cleanup_fix_all_inputs(clone, def, sb->get_node(n_clone_idx));
D
duke 已提交
1017 1018 1019 1020 1021 1022
  }
}

//------------------------------catch_cleanup_inter_block---------------------
// Fix all input edges in use that reference "def".  The use is in a different
// block than the def.
1023
void PhaseCFG::catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, int n_clone_idx) {
D
duke 已提交
1024 1025
  if( !use_blk ) return;        // Can happen if the use is a precedence edge

1026
  Node *new_def = catch_cleanup_find_cloned_def(use_blk, def, def_blk, n_clone_idx);
D
duke 已提交
1027 1028 1029 1030 1031 1032
  catch_cleanup_fix_all_inputs(use, def, new_def);
}

//------------------------------call_catch_cleanup-----------------------------
// If we inserted any instructions between a Call and his CatchNode,
// clone the instructions on all paths below the Catch.
1033
void PhaseCFG::call_catch_cleanup(Block* block) {
D
duke 已提交
1034 1035

  // End of region to clone
1036 1037
  uint end = block->end_idx();
  if( !block->get_node(end)->is_Catch() ) return;
D
duke 已提交
1038 1039
  // Start of region to clone
  uint beg = end;
1040 1041
  while(!block->get_node(beg-1)->is_MachProj() ||
        !block->get_node(beg-1)->in(0)->is_MachCall() ) {
D
duke 已提交
1042 1043 1044 1045 1046 1047 1048 1049
    beg--;
    assert(beg > 0,"Catch cleanup walking beyond block boundary");
  }
  // Range of inserted instructions is [beg, end)
  if( beg == end ) return;

  // Clone along all Catch output paths.  Clone area between the 'beg' and
  // 'end' indices.
1050 1051
  for( uint i = 0; i < block->_num_succs; i++ ) {
    Block *sb = block->_succs[i];
D
duke 已提交
1052 1053
    // Clone the entire area; ignoring the edge fixup for now.
    for( uint j = end; j > beg; j-- ) {
1054 1055
      // It is safe here to clone a node with anti_dependence
      // since clones dominate on each path.
1056
      Node *clone = block->get_node(j-1)->clone();
1057
      sb->insert_node(clone, 1);
1058
      map_node_to_block(clone, sb);
D
duke 已提交
1059 1060 1061 1062 1063 1064 1065
    }
  }


  // Fixup edges.  Check the def-use info per cloned Node
  for(uint i2 = beg; i2 < end; i2++ ) {
    uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block
1066
    Node *n = block->get_node(i2);        // Node that got cloned
D
duke 已提交
1067 1068 1069 1070 1071 1072 1073 1074
    // Need DU safe iterator because of edge manipulation in calls.
    Unique_Node_List *out = new Unique_Node_List(Thread::current()->resource_area());
    for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) {
      out->push(n->fast_out(j1));
    }
    uint max = out->size();
    for (uint j = 0; j < max; j++) {// For all users
      Node *use = out->pop();
1075
      Block *buse = get_block_for_node(use);
D
duke 已提交
1076 1077 1078
      if( use->is_Phi() ) {
        for( uint k = 1; k < use->req(); k++ )
          if( use->in(k) == n ) {
1079 1080
            Block* b = get_block_for_node(buse->pred(k));
            Node *fixup = catch_cleanup_find_cloned_def(b, n, block, n_clone_idx);
D
duke 已提交
1081 1082 1083
            use->set_req(k, fixup);
          }
      } else {
1084 1085
        if (block == buse) {
          catch_cleanup_intra_block(use, n, block, beg, n_clone_idx);
D
duke 已提交
1086
        } else {
1087
          catch_cleanup_inter_block(use, buse, n, block, n_clone_idx);
D
duke 已提交
1088 1089 1090 1091 1092 1093 1094 1095
        }
      }
    } // End for all users

  } // End of for all Nodes in cloned area

  // Remove the now-dead cloned ops
  for(uint i3 = beg; i3 < end; i3++ ) {
1096 1097
    block->get_node(beg)->disconnect_inputs(NULL, C);
    block->remove_node(beg);
D
duke 已提交
1098 1099 1100
  }

  // If the successor blocks have a CreateEx node, move it back to the top
1101 1102
  for(uint i4 = 0; i4 < block->_num_succs; i4++ ) {
    Block *sb = block->_succs[i4];
D
duke 已提交
1103 1104 1105
    uint new_cnt = end - beg;
    // Remove any newly created, but dead, nodes.
    for( uint j = new_cnt; j > 0; j-- ) {
1106
      Node *n = sb->get_node(j);
D
duke 已提交
1107 1108
      if (n->outcnt() == 0 &&
          (!n->is_Proj() || n->as_Proj()->in(0)->outcnt() == 1) ){
1109
        n->disconnect_inputs(NULL, C);
1110
        sb->remove_node(j);
D
duke 已提交
1111 1112 1113 1114 1115
        new_cnt--;
      }
    }
    // If any newly created nodes remain, move the CreateEx node to the top
    if (new_cnt > 0) {
1116
      Node *cex = sb->get_node(1+new_cnt);
D
duke 已提交
1117
      if( cex->is_Mach() && cex->as_Mach()->ideal_Opcode() == Op_CreateEx ) {
1118 1119
        sb->remove_node(1+new_cnt);
        sb->insert_node(cex, 1);
D
duke 已提交
1120 1121 1122 1123
      }
    }
  }
}