7004555: Add new policy for one iteration loops

Summary: Add new policy for one iteration loops (mostly formal pre- loops). Reviewed-by: never

7004555: Add new policy for one iteration loops
Summary: Add new policy for one iteration loops (mostly formal pre- loops). Reviewed-by: never
8082c8d0 · kvn · 5413f3b9 · 8082c8d0 · 8082c8d0 · 8082c8d0
3 changed file
--- a/src/share/vm/opto/loopTransform.cpp
+++ b/src/share/vm/opto/loopTransform.cpp
@@ -63,6 +63,46 @@ void IdealLoopTree::record_for_igvn() {
  }
 }

+//------------------------------compute_exact_trip_count-----------------------
+// Compute loop exact trip count if possible. Do not recalculate trip count for
+// split loops (pre-main-post) which have their limits and inits behind Opaque node.
+void IdealLoopTree::compute_exact_trip_count( PhaseIdealLoop *phase ) {
+  if (!_head->as_Loop()->is_valid_counted_loop()) {
+    return;
+  }
+  CountedLoopNode* cl = _head->as_CountedLoop();
+  // Trip count may become nonexact for iteration split loops since
+  // RCE modifies limits. Note, _trip_count value is not reset since
+  // it is used to limit unrolling of main loop.
+  cl->set_nonexact_trip_count();
+
+  // Loop's test should be part of loop.
+  if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue))))
+    return; // Infinite loop
+
+#ifdef ASSERT
+  BoolTest::mask bt = cl->loopexit()->test_trip();
+  assert(bt == BoolTest::lt || bt == BoolTest::gt ||
+         bt == BoolTest::ne, "canonical test is expected");
+#endif
+
+  Node* init_n = cl->init_trip();
+  Node* limit_n = cl->limit();
+  if (init_n  != NULL &&  init_n->is_Con() &&
+      limit_n != NULL && limit_n->is_Con()) {
+    // Use longs to avoid integer overflow.
+    int stride_con  = cl->stride_con();
+    long init_con   = cl->init_trip()->get_int();
+    long limit_con  = cl->limit()->get_int();
+    int stride_m    = stride_con - (stride_con > 0 ? 1 : -1);
+    long trip_count = (limit_con - init_con + stride_m)/stride_con;
+    if (trip_count > 0 && (julong)trip_count < (julong)max_juint) {
+      // Set exact trip count.
+      cl->set_exact_trip_count((uint)trip_count);
+    }
+  }
+}
+
 //------------------------------compute_profile_trip_cnt----------------------------
 // Compute loop trip count from profile data as
 //    (backedge_count + loop_exit_count) / loop_exit_count
@@ -533,29 +573,15 @@ bool IdealLoopTree::policy_maximally_unroll( PhaseIdealLoop *phase ) const {
  if (!cl->is_valid_counted_loop())
    return false; // Malformed counted loop

-  Node *init_n = cl->init_trip();
-  Node *limit_n = cl->limit();
-
-  // Non-constant bounds
-  if (init_n   == NULL || !init_n->is_Con()  ||
-      limit_n  == NULL || !limit_n->is_Con()) {
+  if (!cl->has_exact_trip_count()) {
+    // Trip count is not exact.
    return false;
  }
-  // Use longs to avoid integer overflow.
-  int  stride_con = cl->stride_con();
-  long init_con   = cl->init_trip()->get_int();
-  long limit_con  = cl->limit()->get_int();
-  int  stride_m   = stride_con - (stride_con > 0 ? 1 : -1);
-  long trip_cnt   = (limit_con - init_con + stride_m)/stride_con;
-
-  // Note, max_jint is used to indicate unknown trip count.
-  if (trip_cnt <= 0 || trip_cnt >= (long)max_jint) {
-    // return a false (no maximally unroll) and the regular unroll/peel
-    // route will make a small mess which CCP will fold away.
-    return false;
-  }
-  uint trip_count = (uint)trip_cnt;
-  cl->set_trip_count(trip_count);
+
+  uint trip_count = cl->trip_count();
+  // Note, max_juint is used to indicate unknown trip count.
+  assert(trip_count > 1, "one iteration loop should be optimized out already");
+  assert(trip_count < max_juint, "exact trip_count should be less than max_uint.");

  // Real policy: if we maximally unroll, does it get too big?
  // Allow the unrolled mess to get larger than standard loop
@@ -1096,7 +1122,7 @@ void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool ad
    tty->print("Unrolling ");
    loop->dump_head();
  } else if (TraceLoopOpts) {
-    if (loop_head->trip_count() < LoopUnrollLimit) {
+    if (loop_head->trip_count() < (uint)LoopUnrollLimit) {
      tty->print("Unroll  %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count());
    } else {
      tty->print("Unroll  %d     ", loop_head->unrolled_count()*2);
@@ -1801,6 +1827,17 @@ bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) {

  // main and post loops have explicitly created zero trip guard
  bool needs_guard = !cl->is_main_loop() && !cl->is_post_loop();
+  if (needs_guard) {
+    // Skip guard if values not overlap.
+    const TypeInt* init_t = phase->_igvn.type(cl->init_trip())->is_int();
+    const TypeInt* limit_t = phase->_igvn.type(cl->limit())->is_int();
+    int  stride_con = cl->stride_con();
+    if (stride_con > 0) {
+      needs_guard = (init_t->_hi >= limit_t->_lo);
+    } else {
+      needs_guard = (init_t->_lo <= limit_t->_hi);
+    }
+  }
  if (needs_guard) {
    // Check for an obvious zero trip guard.
    Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->in(LoopNode::EntryControl));
@@ -1846,12 +1883,49 @@ bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) {
  return true;
 }

+//------------------------------policy_do_one_iteration_loop-------------------
+// Convert one iteration loop into normal code.
+bool IdealLoopTree::policy_do_one_iteration_loop( PhaseIdealLoop *phase ) {
+  if (!_head->as_Loop()->is_valid_counted_loop())
+    return false; // Only for counted loop
+
+  CountedLoopNode *cl = _head->as_CountedLoop();
+  if (!cl->has_exact_trip_count() || cl->trip_count() != 1) {
+    return false;
+  }
+
+#ifndef PRODUCT
+  if(TraceLoopOpts) {
+    tty->print("OneIteration ");
+    this->dump_head();
+  }
+#endif
+
+  Node *init_n = cl->init_trip();
+#ifdef ASSERT
+  // Loop boundaries should be constant since trip count is exact.
+  assert(init_n->get_int() + cl->stride_con() >= cl->limit()->get_int(), "should be one iteration");
+#endif
+  // Replace the phi at loop head with the value of the init_trip.
+  // Then the CountedLoopEnd will collapse (backedge will not be taken)
+  // and all loop-invariant uses of the exit values will be correct.
+  phase->_igvn.replace_node(cl->phi(), cl->init_trip());
+  phase->C->set_major_progress();
+  return true;
+}

 //=============================================================================
 //------------------------------iteration_split_impl---------------------------
 bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) {
+  // Compute exact loop trip count if possible.
+  compute_exact_trip_count(phase);
+
+  // Convert one iteration loop into normal code.
+  if (policy_do_one_iteration_loop(phase))
+    return true;
+
  // Check and remove empty loops (spam micro-benchmarks)
-  if( policy_do_remove_empty_loop(phase) )
+  if (policy_do_remove_empty_loop(phase))
    return true;  // Here we removed an empty loop

  bool should_peel = policy_peeling(phase); // Should we peel?
@@ -1860,40 +1934,40 @@ bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_

  // Non-counted loops may be peeled; exactly 1 iteration is peeled.
  // This removes loop-invariant tests (usually null checks).
-  if( !_head->is_CountedLoop() ) { // Non-counted loop
+  if (!_head->is_CountedLoop()) { // Non-counted loop
    if (PartialPeelLoop && phase->partial_peel(this, old_new)) {
      // Partial peel succeeded so terminate this round of loop opts
      return false;
    }
-    if( should_peel ) {            // Should we peel?
+    if (should_peel) {            // Should we peel?
 #ifndef PRODUCT
      if (PrintOpto) tty->print_cr("should_peel");
 #endif
      phase->do_peeling(this,old_new);
-    } else if( should_unswitch ) {
+    } else if (should_unswitch) {
      phase->do_unswitching(this, old_new);
    }
    return true;
  }
  CountedLoopNode *cl = _head->as_CountedLoop();

-  if( !cl->loopexit() ) return true; // Ignore various kinds of broken loops
+  if (!cl->loopexit()) return true; // Ignore various kinds of broken loops

  // Do nothing special to pre- and post- loops
-  if( cl->is_pre_loop() || cl->is_post_loop() ) return true;
+  if (cl->is_pre_loop() || cl->is_post_loop()) return true;

  // Compute loop trip count from profile data
  compute_profile_trip_cnt(phase);

  // Before attempting fancy unrolling, RCE or alignment, see if we want
  // to completely unroll this loop or do loop unswitching.
-  if( cl->is_normal_loop() ) {
+  if (cl->is_normal_loop()) {
    if (should_unswitch) {
      phase->do_unswitching(this, old_new);
      return true;
    }
    bool should_maximally_unroll =  policy_maximally_unroll(phase);
-    if( should_maximally_unroll ) {
+    if (should_maximally_unroll) {
      // Here we did some unrolling and peeling.  Eventually we will
      // completely unroll this loop and it will no longer be a loop.
      phase->do_maximally_unroll(this,old_new);
@@ -1937,14 +2011,14 @@ bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_
  // If we have any of these conditions (RCE, alignment, unrolling) met, then
  // we switch to the pre-/main-/post-loop model.  This model also covers
  // peeling.
-  if( should_rce || should_align || should_unroll ) {
-    if( cl->is_normal_loop() )  // Convert to 'pre/main/post' loops
+  if (should_rce || should_align || should_unroll) {
+    if (cl->is_normal_loop())  // Convert to 'pre/main/post' loops
      phase->insert_pre_post_loops(this,old_new, !may_rce_align);

    // Adjust the pre- and main-loop limits to let the pre and post loops run
    // with full checks, but the main-loop with no checks.  Remove said
    // checks from the main body.
-    if( should_rce )
+    if (should_rce)
      phase->do_range_check(this,old_new);

    // Double loop body for unrolling.  Adjust the minimum-trip test (will do
@@ -1952,16 +2026,16 @@ bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_
    // an even number of trips).  If we are peeling, we might enable some RCE
    // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
    // peeling.
-      if( should_unroll && !should_peel )
-        phase->do_unroll(this,old_new, true);
+    if (should_unroll && !should_peel)
+      phase->do_unroll(this,old_new, true);

    // Adjust the pre-loop limits to align the main body
    // iterations.
-    if( should_align )
+    if (should_align)
      Unimplemented();

  } else {                      // Else we have an unchanged counted loop
-    if( should_peel )           // Might want to peel but do nothing else
+    if (should_peel)           // Might want to peel but do nothing else
      phase->do_peeling(this,old_new);
  }
  return true;

--- a/src/share/vm/opto/loopnode.cpp
+++ b/src/share/vm/opto/loopnode.cpp
@@ -1450,6 +1450,21 @@ void IdealLoopTree::dump_head( ) const {
  if (_head->is_CountedLoop()) {
    CountedLoopNode *cl = _head->as_CountedLoop();
    tty->print(" counted");
+
+    Node* init_n = cl->init_trip();
+    if (init_n  != NULL &&  init_n->is_Con())
+      tty->print(" [%d,", cl->init_trip()->get_int());
+    else
+      tty->print(" [int,");
+    Node* limit_n = cl->limit();
+    if (limit_n  != NULL &&  limit_n->is_Con())
+      tty->print("%d),", cl->limit()->get_int());
+    else
+      tty->print("int),");
+    int stride_con  = cl->stride_con();
+    if (stride_con > 0) tty->print("+");
+    tty->print("%d", stride_con);
+
    if (cl->is_pre_loop ()) tty->print(" pre" );
    if (cl->is_main_loop()) tty->print(" main");
    if (cl->is_post_loop()) tty->print(" post");

--- a/src/share/vm/opto/loopnode.hpp
+++ b/src/share/vm/opto/loopnode.hpp
@@ -57,7 +57,12 @@ class LoopNode : public RegionNode {
 protected:
  short _loop_flags;
  // Names for flag bitfields
-  enum { pre_post_main=0, inner_loop=8, partial_peel_loop=16, partial_peel_failed=32  };
+  enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
+         MainHasNoPreLoop=4,
+         HasExactTripCount=8,
+         InnerLoop=16,
+         PartialPeelLoop=32,
+         PartialPeelFailed=64 };
  char _unswitch_count;
  enum { _unswitch_max=3 };

@@ -65,13 +70,13 @@ public:
  // Names for edge indices
  enum { Self=0, EntryControl, LoopBackControl };

-  int is_inner_loop() const { return _loop_flags & inner_loop; }
-  void set_inner_loop() { _loop_flags |= inner_loop; }
+  int is_inner_loop() const { return _loop_flags & InnerLoop; }
+  void set_inner_loop() { _loop_flags |= InnerLoop; }

-  int is_partial_peel_loop() const { return _loop_flags & partial_peel_loop; }
-  void set_partial_peel_loop() { _loop_flags |= partial_peel_loop; }
-  int partial_peel_has_failed() const { return _loop_flags & partial_peel_failed; }
-  void mark_partial_peel_failed() { _loop_flags |= partial_peel_failed; }
+  int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
+  void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
+  int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
+  void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }

  int unswitch_max() { return _unswitch_max; }
  int unswitch_count() { return _unswitch_count; }
@@ -137,8 +142,8 @@ class CountedLoopNode : public LoopNode {
  // the Main CountedLoop.  Used to assert that we understand the graph shape.
  node_idx_t _main_idx;

-  // Known trip count calculated by policy_maximally_unroll
-  int   _trip_count;
+  // Known trip count calculated by compute_exact_trip_count()
+  uint  _trip_count;

  // Expected trip count from profile data
  float _profile_trip_cnt;
@@ -152,7 +157,7 @@ class CountedLoopNode : public LoopNode {

 public:
  CountedLoopNode( Node *entry, Node *backedge )
-    : LoopNode(entry, backedge), _trip_count(max_jint),
+    : LoopNode(entry, backedge), _main_idx(0), _trip_count(max_juint),
      _profile_trip_cnt(COUNT_UNKNOWN), _unrolled_count_log2(0),
      _node_count_before_unroll(0) {
    init_class_id(Class_CountedLoop);
@@ -194,13 +199,12 @@ public:

  // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or
  // Aligned, may be missing it's pre-loop.
-  enum { Normal=0, Pre=1, Main=2, Post=3, PrePostFlagsMask=3, Main_Has_No_Pre_Loop=4 };
-  int is_normal_loop() const { return (_loop_flags&PrePostFlagsMask) == Normal; }
-  int is_pre_loop   () const { return (_loop_flags&PrePostFlagsMask) == Pre;    }
-  int is_main_loop  () const { return (_loop_flags&PrePostFlagsMask) == Main;   }
-  int is_post_loop  () const { return (_loop_flags&PrePostFlagsMask) == Post;   }
-  int is_main_no_pre_loop() const { return _loop_flags & Main_Has_No_Pre_Loop; }
-  void set_main_no_pre_loop() { _loop_flags |= Main_Has_No_Pre_Loop; }
+  int is_normal_loop() const { return (_loop_flags&PreMainPostFlagsMask) == Normal; }
+  int is_pre_loop   () const { return (_loop_flags&PreMainPostFlagsMask) == Pre;    }
+  int is_main_loop  () const { return (_loop_flags&PreMainPostFlagsMask) == Main;   }
+  int is_post_loop  () const { return (_loop_flags&PreMainPostFlagsMask) == Post;   }
+  int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; }
+  void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; }

  int main_idx() const { return _main_idx; }

@@ -208,10 +212,19 @@ public:
  void set_pre_loop  (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
  void set_main_loop (                     ) { assert(is_normal_loop(),""); _loop_flags |= Main;                         }
  void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; }
-  void set_normal_loop(                    ) { _loop_flags &= ~PrePostFlagsMask; }
+  void set_normal_loop(                    ) { _loop_flags &= ~PreMainPostFlagsMask; }

-  void set_trip_count(int tc) { _trip_count = tc; }
-  int trip_count()            { return _trip_count; }
+  void set_trip_count(uint tc) { _trip_count = tc; }
+  uint trip_count()            { return _trip_count; }
+
+  bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; }
+  void set_exact_trip_count(uint tc) {
+    _trip_count = tc;
+    _loop_flags |= HasExactTripCount;
+  }
+  void set_nonexact_trip_count() {
+    _loop_flags &= ~HasExactTripCount;
+  }

  void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
  float profile_trip_cnt()             { return _profile_trip_cnt; }
@@ -384,6 +397,9 @@ public:
  // Micro-benchmark spamming.  Remove empty loops.
  bool policy_do_remove_empty_loop( PhaseIdealLoop *phase );

+  // Convert one iteration loop into normal code.
+  bool policy_do_one_iteration_loop( PhaseIdealLoop *phase );
+
  // Return TRUE or FALSE if the loop should be peeled or not.  Peel if we can
  // make some loop-invariant test (usually a null-check) happen before the
  // loop.
@@ -412,6 +428,9 @@ public:
  // Return TRUE if "iff" is a range check.
  bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const;

+  // Compute loop exact trip count if possible
+  void compute_exact_trip_count( PhaseIdealLoop *phase );
+
  // Compute loop trip count from profile data
  void compute_profile_trip_cnt( PhaseIdealLoop *phase );