8029302: Performance regression in Math.pow intrinsic

Summary: Added special case for x^y where y == 2 Reviewed-by: kvn

8029302: Performance regression in Math.pow intrinsic
Summary: Added special case for x^y where y == 2 Reviewed-by: kvn
0fd06556 · adlertz · 58699dca · 0fd06556 · 0fd06556
隐藏空白更改
内联并排

Showing with 52 addition and 19 deletion

src/cpu/x86/vm/macroAssembler_x86.cpp src/cpu/x86/vm/macroAssembler_x86.cpp +2 -0

src/share/vm/opto/library_call.cpp src/share/vm/opto/library_call.cpp +50 -19

未找到文件。
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp
@@ -3152,10 +3152,12 @@ void MacroAssembler::fast_pow() {
  // if fast computation is not possible, result is NaN. Requires
  // fallback from user of this macro.
  // increase precision for intermediate steps of the computation
+  BLOCK_COMMENT("fast_pow {");
  increase_precision();
  fyl2x();                 // Stack: (Y*log2(X)) ...
  pow_exp_core_encoding(); // Stack: exp(X) ...
  restore_precision();
+  BLOCK_COMMENT("} fast_pow");
 }

 void MacroAssembler::fast_exp() {

--- a/src/share/vm/opto/library_call.cpp
+++ b/src/share/vm/opto/library_call.cpp
@@ -216,7 +216,7 @@ class LibraryCallKit : public GraphKit {
  bool inline_math_subtractExactL(bool is_decrement);
  bool inline_exp();
  bool inline_pow();
-  void finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName);
+  Node* finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName);
  bool inline_min_max(vmIntrinsics::ID id);
  Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y);
  // This returns Type::AnyPtr, RawPtr, or OopPtr.
@@ -1678,7 +1678,7 @@ bool LibraryCallKit::inline_trig(vmIntrinsics::ID id) {
  return true;
 }

-void LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName) {
+Node* LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName) {
  //-------------------
  //result=(result.isNaN())? funcAddr():result;
  // Check: If isNaN() by checking result!=result? then either trap
@@ -1694,7 +1694,7 @@ void LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFu
      uncommon_trap(Deoptimization::Reason_intrinsic,
                    Deoptimization::Action_make_not_entrant);
    }
-    set_result(result);
+    return result;
  } else {
    // If this inlining ever returned NaN in the past, we compile a call
    // to the runtime to properly handle corner cases
@@ -1724,9 +1724,10 @@ void LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFu

      result_region->init_req(2, control());
      result_val->init_req(2, value);
-      set_result(result_region, result_val);
+      set_control(_gvn.transform(result_region));
+      return result_val;
    } else {
-      set_result(result);
+      return result;
    }
  }
 }
@@ -1738,7 +1739,8 @@ bool LibraryCallKit::inline_exp() {
  Node* arg = round_double_node(argument(0));
  Node* n   = _gvn.transform(new (C) ExpDNode(C, control(), arg));

-  finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
+  n = finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
+  set_result(n);

  C->set_has_split_ifs(true); // Has chance for split-if optimization
  return true;
@@ -1748,27 +1750,48 @@ bool LibraryCallKit::inline_exp() {
 // Inline power instructions, if possible.
 bool LibraryCallKit::inline_pow() {
  // Pseudocode for pow
-  // if (x <= 0.0) {
-  //   long longy = (long)y;
-  //   if ((double)longy == y) { // if y is long
-  //     if (y + 1 == y) longy = 0; // huge number: even
-  //     result = ((1&longy) == 0)?-DPow(abs(x), y):DPow(abs(x), y);
+  // if (y == 2) {
+  //   return x * x;
+  // } else {
+  //   if (x <= 0.0) {
+  //     long longy = (long)y;
+  //     if ((double)longy == y) { // if y is long
+  //       if (y + 1 == y) longy = 0; // huge number: even
+  //       result = ((1&longy) == 0)?-DPow(abs(x), y):DPow(abs(x), y);
+  //     } else {
+  //       result = NaN;
+  //     }
  //   } else {
-  //     result = NaN;
+  //     result = DPow(x,y);
  //   }
-  // } else {
-  //   result = DPow(x,y);
-  // }
-  // if (result != result)?  {
-  //   result = uncommon_trap() or runtime_call();
+  //   if (result != result)?  {
+  //     result = uncommon_trap() or runtime_call();
+  //   }
+  //   return result;
  // }
-  // return result;

  Node* x = round_double_node(argument(0));
  Node* y = round_double_node(argument(2));

  Node* result = NULL;

+  Node*   const_two_node = makecon(TypeD::make(2.0));
+  Node*   cmp_node       = _gvn.transform(new (C) CmpDNode(y, const_two_node));
+  Node*   bool_node      = _gvn.transform(new (C) BoolNode(cmp_node, BoolTest::eq));
+  IfNode* if_node        = create_and_xform_if(control(), bool_node, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
+  Node*   if_true        = _gvn.transform(new (C) IfTrueNode(if_node));
+  Node*   if_false       = _gvn.transform(new (C) IfFalseNode(if_node));
+
+  RegionNode* region_node = new (C) RegionNode(3);
+  region_node->init_req(1, if_true);
+
+  Node* phi_node = new (C) PhiNode(region_node, Type::DOUBLE);
+  // special case for x^y where y == 2, we can convert it to x * x
+  phi_node->init_req(1, _gvn.transform(new (C) MulDNode(x, x)));
+
+  // set control to if_false since we will now process the false branch
+  set_control(if_false);
+
  if (!too_many_traps(Deoptimization::Reason_intrinsic)) {
    // Short form: skip the fancy tests and just check for NaN result.
    result = _gvn.transform(new (C) PowDNode(C, control(), x, y));
@@ -1892,7 +1915,15 @@ bool LibraryCallKit::inline_pow() {
    result = _gvn.transform(phi);
  }

-  finish_pow_exp(result, x, y, OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW");
+  result = finish_pow_exp(result, x, y, OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW");
+
+  // control from finish_pow_exp is now input to the region node
+  region_node->set_req(2, control());
+  // the result from finish_pow_exp is now input to the phi node
+  phi_node->init_req(2, _gvn.transform(result));
+  set_control(_gvn.transform(region_node));
+  record_for_igvn(region_node);
+  set_result(_gvn.transform(phi_node));

  C->set_has_split_ifs(true); // Has chance for split-if optimization
  return true;