codegen for fused elementwise operation (#19520)

* test=develop codegen for fused elementwise operation * fix test=develop

codegen for fused elementwise operation (#19520)
* test=develop codegen for fused elementwise operation * fix test=develop
ed8f44ea · wangchaochaohu · GitHub · 25c0eb28 · ed8f44ea · ed8f44ea
7 changed file
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -30,6 +30,8 @@ function(pass_library TARGET DEST)
    endif()
 endfunction()
+cc_library(codegen SRCS codegen.cc DEPS codegen_helper)
+cc_library(codegen_helper SRCS codegen_helper.cc DEPS graph node graph_helper)
 cc_library(node SRCS node.cc DEPS proto_desc)
 cc_library(graph SRCS graph.cc DEPS node pretty_log)
 cc_library(graph_helper SRCS graph_helper.cc DEPS graph)
@@ -107,6 +109,7 @@ set(GLOB_PASS_LIB ${PASS_LIBRARY} CACHE INTERNAL "Global PASS library")
 cc_library(pass_builder SRCS pass_builder.cc DEPS pass)
+cc_test(codegen_test SRCS codegen_test.cc DEPS codegen_helper codegen)
 cc_test(node_test SRCS node_test.cc DEPS node)
 cc_test(pass_test SRCS pass_test.cc DEPS graph pass graph_helper)
 cc_test(graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry)

--- a/paddle/fluid/framework/ir/codegen.cc
+++ b/paddle/fluid/framework/ir/codegen.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/framework/ir/codegen.h"
+#include <set>
+#include <sstream>
+#include "paddle/fluid/framework/ir/codegen_helper.h"
+namespace paddle {
+namespace framework {
+namespace ir {
+// we get the parameter list code for the expression information
+std::string CodeGen::GetDeclarationCode(
+    std::vector<OperationExpression> expression) {
+  std::stringstream ret;
+  ret << "fuse_kernel";
+  ret << R"((int N )";
+  std::set<int> input_ids;
+  std::set<int> output_ids;
+  std::vector<int> last_output_idis;
+  for (size_t i = 0; i < expression.size(); i++) {
+    std::vector<int> tmp_input = expression[i].GetInputIds();
+    for (size_t j = 0; j < tmp_input.size(); j++) {
+      int id = tmp_input[j];
+      input_ids.insert(id);
+    }
+    int tmp_output = expression[i].GetOutputId();
+    output_ids.insert(tmp_output);
+  }
+  std::set<int>::iterator it = input_ids.begin();
+  while (it != input_ids.end()) {
+    int var_index = *it;
+    if (output_ids.find(var_index) != output_ids.end()) {
+      input_ids.erase(it++);
+    } else {
+      it++;
+    }
+  }
+  for (it = input_ids.begin(); it != input_ids.end(); it++) {
+    int var_index = *it;
+    ret << R"(, const T* var)" << var_index;
+  }
+  for (it = output_ids.begin(); it != output_ids.end(); it++) {
+    int var_index = *it;
+    ret << R"(, T* var)" << var_index;
+  }
+  ret << R"())";
+  return ret.str();
+}
+std::string CodeGen::GetOffsetCode() {
+  std::stringstream ret;
+  ret << indentation << "int offset = idx;" << std::endl;
+  return ret.str();
+}
+std::string CodeGen::GetComputeCode(
+    std::vector<OperationExpression> expression) {
+  // get the right experssion code using suffix expression
+  std::stringstream ret;
+  for (size_t i = 0; i < expression.size(); i++) {
+    ret << expression[i].GetExpression();
+  }
+  return ret.str();
+}
+// in order to get the right result of expression, we need to calculate, we
+// store the expression as
+// suffix Expressions using vector
+std::string CodeGen::GetKernelCode(
+    std::vector<OperationExpression> expression) {
+  auto declaration_code = GetDeclarationCode(expression);
+  auto offset_code = GetOffsetCode();
+  auto compute_code = GetComputeCode(expression);
+  auto cuda_kernel = const_kernel_start + declaration_code + const_kernel_mid +
+                     offset_code + compute_code + const_kernel_end;
+  return cuda_kernel;
+}
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/ir/codegen.h
+++ b/paddle/fluid/framework/ir/codegen.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include <string>
+#include <vector>
+#include "paddle/fluid/framework/ir/codegen_helper.h"
+namespace paddle {
+namespace framework {
+namespace ir {
+class CodeGen {
+ public:
+  std::string GetKernelCode(std::vector<OperationExpression> expression);
+ private:
+  std::string GetDeclarationCode(
+      std::vector<paddle::framework::ir::OperationExpression> expression);
+  std::string GetOffsetCode();
+  std::string GetComputeCode(
+      std::vector<paddle::framework::ir::OperationExpression> expression);
+};
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/ir/codegen_helper.cc
+++ b/paddle/fluid/framework/ir/codegen_helper.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. */
+#include "paddle/fluid/framework/ir/codegen_helper.h"
+#include <algorithm>
+#include <sstream>
+#include <string>
+#include <vector>
+namespace paddle {
+namespace framework {
+namespace ir {
+OperationExpression::OperationExpression(std::vector<int> input_ids,
+                                         int output_id,
+                                         std::string search_operation) {
+  input_ids_ = input_ids;
+  output_id_ = output_id;
+  search_operation_ = search_operation;
+}
+// we Traverse the graph and get the group , all input id and output id is
+// unique for the node which belong the group
+std::string OperationExpression::GetExpression() {
+  std::stringstream ret;
+  if (operator_cuda_table.find(search_operation_) ==
+      operator_cuda_table.end()) {
+    std::cerr << "Not supportted operation, " << search_operation_ << std::endl;
+  } else {
+    auto rhs = operator_cuda_table[search_operation_];
+    std::string replaced_str = "$";
+    int count = 0;
+    auto pos = rhs.find(replaced_str);
+    while (pos != -1) {
+      auto index = input_ids_[count];
+      rhs.replace(pos, replaced_str.length(),
+                  std::to_string(index) + R"([offset])");
+      pos = rhs.find(replaced_str);
+      count++;
+    }
+    auto lhs = std::string(indentation) + "var" + std::to_string(output_id_) +
+               R"([offset])";
+    auto equal_split = R"( = )";
+    auto semicolon = R"(;)";
+    ret << lhs << equal_split << rhs << semicolon << std::endl;
+  }
+  return ret.str();
+}
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/ir/codegen_helper.h
+++ b/paddle/fluid/framework/ir/codegen_helper.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include <iostream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+namespace paddle {
+namespace framework {
+namespace ir {
+static std::unordered_map<std::string, std::string> operator_cuda_table = {
+    {"elementwise_add", "var$ + var$"},
+    {"elementwise_sub", "var$ - var$"},
+    {"elementwise_mul", "var$ * var$"},
+    {"elementwise_div", "var$ / var$"},
+    {"elementwise_min", "real_min(var$, var$)"},
+    {"elementwise_max", "real_max(var$, var$)"},
+    {"relu", "real_max(var$, 0)"},
+    {"sigmoid", "1.0 / (1.0 + real_exp(-var$))"}};
+// op computation is composed by single or many operation
+class OperationExpression {
+ public:
+  OperationExpression(std::vector<int> input_ids, int output_id,
+                      std::string search_oprtation);
+  std::string GetExpression();
+  std::vector<int> GetInputIds() { return input_ids_; }
+  int GetOutputId() { return output_id_; }
+ private:
+  std::vector<int> input_ids_;
+  int output_id_;
+  std::string search_operation_;
+};
+static const char indentation[] = R"(    )";
+static const char const_kernel_start[] = R"(
+template <typename T>
+extern "C" __global__ void
+)";
+static const char const_kernel_mid[] = R"(
+{
+  for(int idx = blockIdx.x * blockDim.x + threadIdx.x;
+      idx < N;
+      idx += gridDim.x * blockDim.x) {
+)";
+static const char const_kernel_end[] = R"(
+}
+}
+)";
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/ir/codegen_test.cc
+++ b/paddle/fluid/framework/ir/codegen_test.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/fluid/framework/ir/codegen.h"
+#include <gtest/gtest.h>
+#include <string>
+#include <vector>
+#include "paddle/fluid/framework/ir/codegen_helper.h"
+#ifdef PADDLE_WITH_CUDA
+TEST(codegen, cuda) {
+  std::vector<int> mul_input{1, 2};
+  std::vector<int> add_input{3, 4};
+  std::vector<int> sigmod_input{5};
+  int mul_out = 3;
+  int add_out = 5;
+  int sigmod_out = 6;
+  std::string op1 = "elementwise_mul";
+  std::string op2 = "elementwise_add";
+  std::string op3 = "sigmoid";
+  paddle::framework::ir::OperationExpression opexp1(mul_input, mul_out, op1);
+  paddle::framework::ir::OperationExpression opexp2(add_input, add_out, op2);
+  paddle::framework::ir::OperationExpression opexp3(sigmod_input, sigmod_out,
+                                                    op3);
+  std::vector<paddle::framework::ir::OperationExpression> fused_op = {
+      opexp1, opexp2, opexp3};
+  paddle::framework::ir::CodeGen codegen;
+  std::string result = codegen.GetKernelCode(fused_op);
+  std::cout << result << std::endl;
+}
+#endif
--- a/paddle/fluid/operators/math.h
+++ b/paddle/fluid/operators/math.h
@@ -38,5 +38,9 @@ inline HOSTDEVICE float real_log(float x) { return ::logf(x); }
 inline HOSTDEVICE double real_log(double x) { return ::log(x); }
+inline HOSTDEVICE float real_min(float x, float y) { return ::fminf(x, y); }
+inline HOSTDEVICE double real_min(double x, double y) { return ::fmin(x, y); }
 }  // namespace operators
 }  // namespace paddle