diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 271e075386c58b53ead4150cfbd3899eefd36204..792df4b30d7f144cc27b0f5281baa8060abd4441 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -30,6 +30,8 @@ function(pass_library TARGET DEST) endif() endfunction() +cc_library(codegen SRCS codegen.cc DEPS codegen_helper) +cc_library(codegen_helper SRCS codegen_helper.cc DEPS graph node graph_helper) cc_library(node SRCS node.cc DEPS proto_desc) cc_library(graph SRCS graph.cc DEPS node pretty_log) cc_library(graph_helper SRCS graph_helper.cc DEPS graph) @@ -107,6 +109,7 @@ set(GLOB_PASS_LIB ${PASS_LIBRARY} CACHE INTERNAL "Global PASS library") cc_library(pass_builder SRCS pass_builder.cc DEPS pass) +cc_test(codegen_test SRCS codegen_test.cc DEPS codegen_helper codegen) cc_test(node_test SRCS node_test.cc DEPS node) cc_test(pass_test SRCS pass_test.cc DEPS graph pass graph_helper) cc_test(graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry) diff --git a/paddle/fluid/framework/ir/codegen.cc b/paddle/fluid/framework/ir/codegen.cc new file mode 100644 index 0000000000000000000000000000000000000000..c3e5efccba570192453d4336ea36a9a550e5be4d --- /dev/null +++ b/paddle/fluid/framework/ir/codegen.cc @@ -0,0 +1,96 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include "paddle/fluid/framework/ir/codegen.h" +#include +#include +#include "paddle/fluid/framework/ir/codegen_helper.h" +namespace paddle { +namespace framework { +namespace ir { + +// we get the parameter list code for the expression information +std::string CodeGen::GetDeclarationCode( + std::vector expression) { + std::stringstream ret; + ret << "fuse_kernel"; + ret << R"((int N )"; + std::set input_ids; + std::set output_ids; + std::vector last_output_idis; + + for (size_t i = 0; i < expression.size(); i++) { + std::vector tmp_input = expression[i].GetInputIds(); + for (size_t j = 0; j < tmp_input.size(); j++) { + int id = tmp_input[j]; + input_ids.insert(id); + } + int tmp_output = expression[i].GetOutputId(); + output_ids.insert(tmp_output); + } + + std::set::iterator it = input_ids.begin(); + while (it != input_ids.end()) { + int var_index = *it; + if (output_ids.find(var_index) != output_ids.end()) { + input_ids.erase(it++); + } else { + it++; + } + } + + for (it = input_ids.begin(); it != input_ids.end(); it++) { + int var_index = *it; + ret << R"(, const T* var)" << var_index; + } + + for (it = output_ids.begin(); it != output_ids.end(); it++) { + int var_index = *it; + ret << R"(, T* var)" << var_index; + } + + ret << R"())"; + + return ret.str(); +} + +std::string CodeGen::GetOffsetCode() { + std::stringstream ret; + ret << indentation << "int offset = idx;" << std::endl; + return ret.str(); +} + +std::string CodeGen::GetComputeCode( + std::vector expression) { + // get the right experssion code using suffix expression + std::stringstream ret; + for (size_t i = 0; i < expression.size(); i++) { + ret << expression[i].GetExpression(); + } + return ret.str(); +} +// in order to get the right result of expression, we need to calculate, we +// store the expression as +// suffix Expressions using vector +std::string CodeGen::GetKernelCode( + std::vector expression) { + auto declaration_code = GetDeclarationCode(expression); + auto offset_code = GetOffsetCode(); + auto compute_code = GetComputeCode(expression); + auto cuda_kernel = const_kernel_start + declaration_code + const_kernel_mid + + offset_code + compute_code + const_kernel_end; + return cuda_kernel; +} +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/codegen.h b/paddle/fluid/framework/ir/codegen.h new file mode 100644 index 0000000000000000000000000000000000000000..975d48885e72a3b6f6aa5cf89fa943118593834e --- /dev/null +++ b/paddle/fluid/framework/ir/codegen.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once +#include +#include +#include "paddle/fluid/framework/ir/codegen_helper.h" + +namespace paddle { +namespace framework { +namespace ir { + +class CodeGen { + public: + std::string GetKernelCode(std::vector expression); + + private: + std::string GetDeclarationCode( + std::vector expression); + std::string GetOffsetCode(); + std::string GetComputeCode( + std::vector expression); +}; +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/codegen_helper.cc b/paddle/fluid/framework/ir/codegen_helper.cc new file mode 100644 index 0000000000000000000000000000000000000000..8f14549eb717835063bba66503c269729ca2773d --- /dev/null +++ b/paddle/fluid/framework/ir/codegen_helper.cc @@ -0,0 +1,61 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ +#include "paddle/fluid/framework/ir/codegen_helper.h" +#include +#include +#include +#include +namespace paddle { +namespace framework { +namespace ir { + +OperationExpression::OperationExpression(std::vector input_ids, + int output_id, + std::string search_operation) { + input_ids_ = input_ids; + output_id_ = output_id; + search_operation_ = search_operation; +} + +// we Traverse the graph and get the group , all input id and output id is +// unique for the node which belong the group +std::string OperationExpression::GetExpression() { + std::stringstream ret; + if (operator_cuda_table.find(search_operation_) == + operator_cuda_table.end()) { + std::cerr << "Not supportted operation, " << search_operation_ << std::endl; + } else { + auto rhs = operator_cuda_table[search_operation_]; + std::string replaced_str = "$"; + int count = 0; + auto pos = rhs.find(replaced_str); + while (pos != -1) { + auto index = input_ids_[count]; + rhs.replace(pos, replaced_str.length(), + std::to_string(index) + R"([offset])"); + pos = rhs.find(replaced_str); + count++; + } + auto lhs = std::string(indentation) + "var" + std::to_string(output_id_) + + R"([offset])"; + auto equal_split = R"( = )"; + auto semicolon = R"(;)"; + ret << lhs << equal_split << rhs << semicolon << std::endl; + } + + return ret.str(); +} +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/codegen_helper.h b/paddle/fluid/framework/ir/codegen_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..be8d3c8ac26fcde9e8964475709d604822c70688 --- /dev/null +++ b/paddle/fluid/framework/ir/codegen_helper.h @@ -0,0 +1,70 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#include +#include +#include +#include + +namespace paddle { +namespace framework { +namespace ir { +static std::unordered_map operator_cuda_table = { + {"elementwise_add", "var$ + var$"}, + {"elementwise_sub", "var$ - var$"}, + {"elementwise_mul", "var$ * var$"}, + {"elementwise_div", "var$ / var$"}, + {"elementwise_min", "real_min(var$, var$)"}, + {"elementwise_max", "real_max(var$, var$)"}, + {"relu", "real_max(var$, 0)"}, + {"sigmoid", "1.0 / (1.0 + real_exp(-var$))"}}; + +// op computation is composed by single or many operation +class OperationExpression { + public: + OperationExpression(std::vector input_ids, int output_id, + std::string search_oprtation); + std::string GetExpression(); + std::vector GetInputIds() { return input_ids_; } + int GetOutputId() { return output_id_; } + + private: + std::vector input_ids_; + int output_id_; + std::string search_operation_; +}; + +static const char indentation[] = R"( )"; + +static const char const_kernel_start[] = R"( +template +extern "C" __global__ void +)"; + +static const char const_kernel_mid[] = R"( +{ + for(int idx = blockIdx.x * blockDim.x + threadIdx.x; + idx < N; + idx += gridDim.x * blockDim.x) { + +)"; + +static const char const_kernel_end[] = R"( +} +} +)"; +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/codegen_test.cc b/paddle/fluid/framework/ir/codegen_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..8fd5fde3df2c1a1876b346f747f9158a3d40499b --- /dev/null +++ b/paddle/fluid/framework/ir/codegen_test.cc @@ -0,0 +1,43 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/ir/codegen.h" +#include +#include +#include +#include "paddle/fluid/framework/ir/codegen_helper.h" +#ifdef PADDLE_WITH_CUDA +TEST(codegen, cuda) { + std::vector mul_input{1, 2}; + std::vector add_input{3, 4}; + std::vector sigmod_input{5}; + int mul_out = 3; + int add_out = 5; + int sigmod_out = 6; + + std::string op1 = "elementwise_mul"; + std::string op2 = "elementwise_add"; + std::string op3 = "sigmoid"; + paddle::framework::ir::OperationExpression opexp1(mul_input, mul_out, op1); + paddle::framework::ir::OperationExpression opexp2(add_input, add_out, op2); + paddle::framework::ir::OperationExpression opexp3(sigmod_input, sigmod_out, + op3); + + std::vector fused_op = { + opexp1, opexp2, opexp3}; + paddle::framework::ir::CodeGen codegen; + std::string result = codegen.GetKernelCode(fused_op); + std::cout << result << std::endl; +} +#endif diff --git a/paddle/fluid/operators/math.h b/paddle/fluid/operators/math.h index 8cc24200d37dffaff1deda2f0181e5875141add0..3b28928a52892db865523c71ea72b234bd1a5edc 100644 --- a/paddle/fluid/operators/math.h +++ b/paddle/fluid/operators/math.h @@ -38,5 +38,9 @@ inline HOSTDEVICE float real_log(float x) { return ::logf(x); } inline HOSTDEVICE double real_log(double x) { return ::log(x); } +inline HOSTDEVICE float real_min(float x, float y) { return ::fminf(x, y); } + +inline HOSTDEVICE double real_min(double x, double y) { return ::fmin(x, y); } + } // namespace operators } // namespace paddle