From b21770a2aae5f944171fde79471c661d83895374 Mon Sep 17 00:00:00 2001 From: nhzlx Date: Wed, 20 Mar 2019 12:25:33 +0000 Subject: [PATCH] cherry-pick from feature/anakin-engine: Add subgraph fuse support and anakin engine #16018 --- .../inference/anakin/convert/CMakeLists.txt | 20 +- .../inference/anakin/convert/activation.cc | 4 - .../inference/anakin/convert/activation.h | 11 +- .../fluid/inference/anakin/convert/concat.cc | 2 +- .../fluid/inference/anakin/convert/conv2d.cc | 5 +- .../inference/anakin/convert/conv2d_fusion.cc | 113 ++++++++ .../inference/anakin/convert/conv2d_fusion.h | 35 +++ .../inference/anakin/convert/elementwise.cc | 57 ++++ .../inference/anakin/convert/elementwise.h | 37 +++ paddle/fluid/inference/anakin/convert/fc.cc | 71 ++++- paddle/fluid/inference/anakin/convert/fc.h | 18 +- .../inference/anakin/convert/op_converter.h | 8 - paddle/fluid/inference/anakin/convert/relu.cc | 47 +++ paddle/fluid/inference/anakin/convert/relu.h | 37 +++ .../anakin/convert/test_activation_op.cc | 3 - .../anakin/convert/test_concat_op.cc | 8 +- .../anakin/convert/test_conv2d_op.cc | 8 +- .../anakin/convert/test_elementwise_op.cc | 50 ++++ .../inference/anakin/convert/test_fc_op.cc | 10 +- .../inference/anakin/convert/test_relu_op.cc | 50 ++++ .../inference/anakin/convert/ut_helper.h | 7 +- paddle/fluid/inference/anakin/engine.cc | 17 +- paddle/fluid/inference/anakin/engine.h | 8 +- paddle/fluid/inference/anakin/op_teller.cc | 15 +- .../inference/analysis/ir_pass_manager.cc | 12 +- .../analysis/ir_passes/CMakeLists.txt | 12 + .../ir_passes/anakin_subgraph_pass.cc | 273 ++++++++++++++++++ .../analysis/ir_passes/anakin_subgraph_pass.h | 37 +++ .../analysis/ir_passes/subgraph_detector.cc | 4 +- .../ir_params_sync_among_devices_pass.cc | 1 + paddle/fluid/inference/api/CMakeLists.txt | 4 + .../fluid/inference/api/analysis_predictor.cc | 13 + paddle/fluid/operators/CMakeLists.txt | 4 + paddle/fluid/operators/anakin/CMakeLists.txt | 2 + .../operators/anakin/anakin_engine_op.cc | 54 ++++ .../fluid/operators/anakin/anakin_engine_op.h | 152 ++++++++++ 36 files changed, 1129 insertions(+), 80 deletions(-) create mode 100644 paddle/fluid/inference/anakin/convert/conv2d_fusion.cc create mode 100644 paddle/fluid/inference/anakin/convert/conv2d_fusion.h create mode 100644 paddle/fluid/inference/anakin/convert/elementwise.cc create mode 100644 paddle/fluid/inference/anakin/convert/elementwise.h create mode 100644 paddle/fluid/inference/anakin/convert/relu.cc create mode 100644 paddle/fluid/inference/anakin/convert/relu.h create mode 100644 paddle/fluid/inference/anakin/convert/test_elementwise_op.cc create mode 100644 paddle/fluid/inference/anakin/convert/test_relu_op.cc create mode 100644 paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc create mode 100644 paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h create mode 100644 paddle/fluid/operators/anakin/CMakeLists.txt create mode 100644 paddle/fluid/operators/anakin/anakin_engine_op.cc create mode 100644 paddle/fluid/operators/anakin/anakin_engine_op.h diff --git a/paddle/fluid/inference/anakin/convert/CMakeLists.txt b/paddle/fluid/inference/anakin/convert/CMakeLists.txt index 268bb0b2933..dc7fb7f79c0 100644 --- a/paddle/fluid/inference/anakin/convert/CMakeLists.txt +++ b/paddle/fluid/inference/anakin/convert/CMakeLists.txt @@ -1,8 +1,12 @@ -cc_library(anakin_op_converter SRCS fc.cc conv2d.cc activation.cc pool2d.cc concat.cc split.cc DEPS anakin_engine framework_proto scope op_registry) -cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} anakin_op_converter mul_op) -cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL) -cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} activation_op anakin_op_converter - SERIAL) -cc_test(test_anakin_pool2d SRCS test_pool2d_op.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} anakin_op_converter pool_op pooling) -cc_test(test_anakin_concat SRCS test_concat_op.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} anakin_op_converter concat_op concat_and_split) -cc_test(test_anakin_split SRCS test_split_op.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} anakin_op_converter split_op concat_and_split) +cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc +elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc DEPS anakin_engine framework_proto scope op_registry) +cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op) +cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL) +cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS activation_op anakin_op_converter SERIAL) +cc_test(test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling) +cc_test(test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split) +cc_test(test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split) +cc_test(test_anakin_elementwise SRCS test_elementwise_op.cc DEPS +anakin_op_converter elementwise_add_op) + +cc_test(test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter SERIAL) diff --git a/paddle/fluid/inference/anakin/convert/activation.cc b/paddle/fluid/inference/anakin/convert/activation.cc index 786869e3dbe..4939d28045b 100644 --- a/paddle/fluid/inference/anakin/convert/activation.cc +++ b/paddle/fluid/inference/anakin/convert/activation.cc @@ -45,15 +45,11 @@ void ActivationOpConverter::operator()(const framework::proto::OpDesc &op, auto output_name = op_desc.Output("Out").front(); engine_->AddOp(op_name, "Activation", {input_name}, {output_name}); engine_->AddOpAttr(op_name, "type", anakin_op_type_); - if (op_type_ == "relu") { - engine_->AddOpAttr(op_name, "alpha", 0); - } } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(relu, ReluOpConverter); REGISTER_ANAKIN_OP_CONVERTER(sigmoid, SigmoidOpConverter); REGISTER_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/activation.h b/paddle/fluid/inference/anakin/convert/activation.h index f1db154a12c..527b0d3360b 100644 --- a/paddle/fluid/inference/anakin/convert/activation.h +++ b/paddle/fluid/inference/anakin/convert/activation.h @@ -34,13 +34,8 @@ class ActivationOpConverter : public AnakinOpConverter { private: std::string op_type_; std::string anakin_op_type_; - std::map anakin_ops_type_{ - {"relu", "Relu"}, {"tanh", "TanH"}, {"sigmoid", "Sigmoid"}}; -}; - -class ReluOpConverter : public ActivationOpConverter { - public: - ReluOpConverter() : ActivationOpConverter("relu") {} + std::map anakin_ops_type_{{"tanh", "TanH"}, + {"sigmoid", "Sigmoid"}}; }; class TanhOpConverter : public ActivationOpConverter { @@ -50,7 +45,7 @@ class TanhOpConverter : public ActivationOpConverter { class SigmoidOpConverter : public ActivationOpConverter { public: - SigmoidOpConverter() : ActivationOpConverter("tanh") {} + SigmoidOpConverter() : ActivationOpConverter("sigmoid") {} }; } // namespace anakin } // namespace inference diff --git a/paddle/fluid/inference/anakin/convert/concat.cc b/paddle/fluid/inference/anakin/convert/concat.cc index dbd0c6bc0d8..42253071373 100644 --- a/paddle/fluid/inference/anakin/convert/concat.cc +++ b/paddle/fluid/inference/anakin/convert/concat.cc @@ -32,8 +32,8 @@ void ConcatOpConverter::operator()(const framework::proto::OpDesc &op, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); - auto input_names = op_desc.Input("X"); int axis = boost::get(op_desc.GetAttr("axis")); + auto input_names = op_desc.Input("X"); PADDLE_ENFORCE(axis > 0, "The axis attr of Concat op should be large than 0 for trt"); diff --git a/paddle/fluid/inference/anakin/convert/conv2d.cc b/paddle/fluid/inference/anakin/convert/conv2d.cc index af0a39fdfd9..b99c6e71c4d 100644 --- a/paddle/fluid/inference/anakin/convert/conv2d.cc +++ b/paddle/fluid/inference/anakin/convert/conv2d.cc @@ -51,10 +51,11 @@ void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op, PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL); // const int n_output = weight_tensor->dims()[0]; - const int n_input = weight_tensor->dims()[1]; + // const int n_input = weight_tensor->dims()[1]; const int filter_h = weight_tensor->dims()[2]; const int filter_w = weight_tensor->dims()[3]; - auto filter_num = n_input * filter_h * filter_w; + // auto filter_num = n_input * filter_h * filter_w ; + auto filter_num = weight_tensor->dims()[0]; engine_->AddOpAttr(op_name, "filter_num", filter_num); engine_->AddOpAttr>(op_name, "kernel_size", {filter_h, filter_w}); auto strides = boost::get>(op_desc.GetAttr("strides")); diff --git a/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc b/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc new file mode 100644 index 00000000000..4d105430dd2 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc @@ -0,0 +1,113 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/conv2d_fusion.h" +#include +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::saber::NV; +using anakin::saber::Shape; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL); + PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL); + PADDLE_ENFORCE_EQ(op_desc.Input("Bias").size(), 1UL); + PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1UL); + + auto input_name = op_desc.Input("Input").front(); + auto output_name = op_desc.Output("Output").front(); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front(); + engine_->AddOp(op_name, "Convolution", {input_name}, {output_name}); + + auto *filter_v = scope.FindVar(op_desc.Input("Filter").front()); + PADDLE_ENFORCE_NOT_NULL(filter_v); + auto *filter_t = filter_v->GetMutable(); + + auto *b_v = scope.FindVar(op_desc.Input("Bias").front()); + PADDLE_ENFORCE_NOT_NULL(b_v); + auto *b_t = b_v->GetMutable(); + + std::unique_ptr weight_tensor( + new framework::LoDTensor()); + weight_tensor->Resize(filter_t->dims()); + TensorCopySync((*filter_t), platform::CPUPlace(), weight_tensor.get()); + + PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL); + + // const int n_output = weight_tensor->dims()[0]; + // const int n_input = weight_tensor->dims()[1]; + const int filter_h = weight_tensor->dims()[2]; + const int filter_w = weight_tensor->dims()[3]; + // auto filter_num = n_input * filter_h * filter_w ; + auto filter_num = weight_tensor->dims()[0]; + engine_->AddOpAttr(op_name, "filter_num", filter_num); + engine_->AddOpAttr>(op_name, "kernel_size", {filter_h, filter_w}); + auto strides = boost::get>(op_desc.GetAttr("strides")); + engine_->AddOpAttr>(op_name, "strides", strides); + auto paddings = boost::get>(op_desc.GetAttr("paddings")); + engine_->AddOpAttr>(op_name, "padding", paddings); + auto dilations = boost::get>(op_desc.GetAttr("dilations")); + engine_->AddOpAttr>(op_name, "dilation_rate", dilations); + const int groups = boost::get(op_desc.GetAttr("groups")); + engine_->AddOpAttr(op_name, "group", groups); + engine_->AddOpAttr(op_name, "axis", 1); + engine_->AddOpAttr(op_name, "bias_term", true); + + auto weight_shape = framework::vectorize2int(filter_t->dims()); + Shape anakin_shape(weight_shape); + auto *weight1 = + GraphGlobalMem::Global().template new_block(anakin_shape); + float *cpu_data = static_cast(weight1->h_tensor().mutable_data()); + std::copy_n(weight_tensor->data(), weight_tensor->numel(), cpu_data); + weight1->d_tensor().set_shape(anakin_shape); + weight1->d_tensor().copy_from(weight1->h_tensor()); + engine_->AddOpAttr(op_name, "weight_1", *weight1); + + auto bias_shape = framework::vectorize2int(b_t->dims()); + framework::LoDTensor bias_tensor; + bias_tensor.Resize(b_t->dims()); + TensorCopySync((*b_t), platform::CPUPlace(), &bias_tensor); + auto *bias_data = bias_tensor.data(); + bias_shape.insert(bias_shape.begin(), 1); + bias_shape.insert(bias_shape.begin(), 1); + bias_shape.insert(bias_shape.begin(), 1); + // bias_shape.push_back(1); + // bias_shape.push_back(1); + Shape anakin_bias_shape(bias_shape); + + auto *weight2 = GraphGlobalMem::Global().template new_block( + anakin_bias_shape); + float *cpu_data2 = static_cast(weight2->h_tensor().mutable_data()); + std::copy_n(bias_data, bias_tensor.numel(), cpu_data2); + weight2->d_tensor().set_shape(anakin_bias_shape); + weight2->d_tensor().copy_from(weight2->h_tensor()); + engine_->AddOpAttr(op_name, "weight_2", *weight2); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(conv2d_fusion, Conv2dFusionOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/conv2d_fusion.h b/paddle/fluid/inference/anakin/convert/conv2d_fusion.h new file mode 100644 index 00000000000..07359b9cba0 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/conv2d_fusion.h @@ -0,0 +1,35 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class Conv2dFusionOpConverter : public AnakinOpConverter { + public: + Conv2dFusionOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~Conv2dFusionOpConverter() {} +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/elementwise.cc b/paddle/fluid/inference/anakin/convert/elementwise.cc new file mode 100644 index 00000000000..d400e617d87 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/elementwise.cc @@ -0,0 +1,57 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/elementwise.h" +#include +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::Precision; +using anakin::saber::NV; +using anakin::saber::X86; +using anakin::saber::Shape; +using anakin::PBlock; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +void ElementwiseAddOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto x_name = op_desc.Input("X").front(); + auto y_name = op_desc.Input("Y").front(); + auto out_name = op_desc.Output("Out").front(); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + + engine_->AddOp(op_name, "Eltwise", {x_name, y_name}, {out_name}); + std::string elementwise_type = "Add"; + engine_->AddOpAttr(op_name, "type", elementwise_type); + std::vector coeff = {1.0, 1.0}; + engine_->AddOpAttr>(op_name, "coeff", coeff); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(elementwise_add, ElementwiseAddOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/elementwise.h b/paddle/fluid/inference/anakin/convert/elementwise.h new file mode 100644 index 00000000000..d41a593803d --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/elementwise.h @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class ElementwiseAddOpConverter : public AnakinOpConverter { + public: + ElementwiseAddOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~ElementwiseAddOpConverter() {} + + private: +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/fc.cc b/paddle/fluid/inference/anakin/convert/fc.cc index c88e3af33c9..8638c5b5f05 100644 --- a/paddle/fluid/inference/anakin/convert/fc.cc +++ b/paddle/fluid/inference/anakin/convert/fc.cc @@ -14,6 +14,8 @@ #include "paddle/fluid/inference/anakin/convert/fc.h" #include +#include +#include using anakin::graph::GraphGlobalMem; using anakin::AK_FLOAT; @@ -24,28 +26,39 @@ namespace paddle { namespace inference { namespace anakin { -void FcOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::Scope &scope, bool test_mode) { +void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { framework::OpDesc op_desc(op, nullptr); - PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); - PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); - PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + auto input_names = op_desc.InputNames(); + bool with_bias = input_names.size() == 3; + + std::string w_name = "Y"; + std::string i_name = "X"; + if (with_bias) { + w_name = "W"; + i_name = "Input"; + } - auto x_name = op_desc.Input("X").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); - auto *y_v = scope.FindVar(op_desc.Input("Y").front()); + + // get weights + auto *y_v = scope.FindVar(op_desc.Input(w_name).front()); PADDLE_ENFORCE_NOT_NULL(y_v); auto *y_t = y_v->GetMutable(); - auto input_name = op_desc.Input("X").front(); + auto input_name = op_desc.Input(i_name).front(); auto output_name = op_desc.Output("Out").front(); - auto weight_shape = framework::vectorize2int(y_t->dims()); engine_->AddOp(op_name, "Dense", {input_name}, {output_name}); - engine_->AddOpAttr(op_name, "bias_term", false); + engine_->AddOpAttr(op_name, "bias_term", with_bias); engine_->AddOpAttr(op_name, "axis", 1); + + auto weight_shape = framework::vectorize2int(y_t->dims()); int out_dim = weight_shape[1]; engine_->AddOpAttr(op_name, "out_dim", out_dim); + const int w_m = weight_shape[0]; + const int w_k = weight_shape[1]; weight_shape.push_back(1); weight_shape.push_back(1); @@ -54,18 +67,54 @@ void FcOpConverter::operator()(const framework::proto::OpDesc &op, framework::LoDTensor weight_tensor; weight_tensor.Resize(y_t->dims()); TensorCopySync((*y_t), platform::CPUPlace(), &weight_tensor); + auto *weight_data = weight_tensor.data(); + PADDLE_ENFORCE(w_m * w_k == weight_tensor.numel()); + std::vector trans_weight_data(weight_tensor.numel()); + for (int i = 0; i < w_m; i++) { + for (int j = 0; j < w_k; j++) { + trans_weight_data[i + j * w_m] = weight_data[i * w_k + j]; + } + } auto *weight1 = GraphGlobalMem::Global().template new_block(anakin_shape); float *cpu_data = static_cast(weight1->h_tensor().mutable_data()); - std::copy_n(weight_tensor.data(), weight_tensor.numel(), cpu_data); + std::copy_n(trans_weight_data.data(), weight_tensor.numel(), cpu_data); weight1->d_tensor().set_shape(anakin_shape); weight1->d_tensor().copy_from(weight1->h_tensor()); engine_->AddOpAttr(op_name, "weight_1", *weight1); + + // get bias + if (with_bias) { + auto *b_v = scope.FindVar(op_desc.Input("Bias").front()); + PADDLE_ENFORCE_NOT_NULL(b_v); + auto *b_t = b_v->GetMutable(); + + auto bias_shape = framework::vectorize2int(b_t->dims()); + framework::LoDTensor bias_tensor; + bias_tensor.Resize(b_t->dims()); + TensorCopySync((*b_t), platform::CPUPlace(), &bias_tensor); + auto *bias_data = bias_tensor.data(); + bias_shape.insert(bias_shape.begin(), 1); + bias_shape.insert(bias_shape.begin(), 1); + bias_shape.insert(bias_shape.begin(), 1); + // bias_shape.push_back(1); + // bias_shape.push_back(1); + Shape anakin_bias_shape(bias_shape); + + auto *weight2 = GraphGlobalMem::Global().template new_block( + anakin_bias_shape); + float *cpu_data2 = static_cast(weight2->h_tensor().mutable_data()); + std::copy_n(bias_data, bias_tensor.numel(), cpu_data2); + weight2->d_tensor().set_shape(anakin_bias_shape); + weight2->d_tensor().copy_from(weight2->h_tensor()); + engine_->AddOpAttr(op_name, "weight_2", *weight2); + } } } // namespace anakin } // namespace inference } // namespace paddle +REGISTER_ANAKIN_OP_CONVERTER(mul, MulOpConverter); REGISTER_ANAKIN_OP_CONVERTER(fc, FcOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/fc.h b/paddle/fluid/inference/anakin/convert/fc.h index e67c8dedb31..060c649b19e 100644 --- a/paddle/fluid/inference/anakin/convert/fc.h +++ b/paddle/fluid/inference/anakin/convert/fc.h @@ -20,14 +20,26 @@ namespace paddle { namespace inference { namespace anakin { -class FcOpConverter : public AnakinOpConverter { +class FcBaseOpConverter : public AnakinOpConverter { public: - FcOpConverter() = default; + FcBaseOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, const framework::Scope &scope, bool test_mode) override; - virtual ~FcOpConverter() {} + virtual ~FcBaseOpConverter() {} +}; + +// with bias +class FcOpConverter : public FcBaseOpConverter { + public: + FcOpConverter() = default; +}; + +// without bias +class MulOpConverter : public FcBaseOpConverter { + public: + MulOpConverter() = default; }; } // namespace anakin diff --git a/paddle/fluid/inference/anakin/convert/op_converter.h b/paddle/fluid/inference/anakin/convert/op_converter.h index ee4d7948783..0d214d82eb8 100644 --- a/paddle/fluid/inference/anakin/convert/op_converter.h +++ b/paddle/fluid/inference/anakin/convert/op_converter.h @@ -47,14 +47,6 @@ class AnakinOpConverter { std::string op_type = op_desc.Type(); AnakinOpConverter *it = nullptr; - if (op_type == "mul") { - PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL); - std::string Y = op_desc.Input("Y")[0]; - if (parameters.count(Y)) { - it = Registry::Global().Lookup("fc"); - } - } - if (!it) { it = Registry::Global().Lookup(op_type); } diff --git a/paddle/fluid/inference/anakin/convert/relu.cc b/paddle/fluid/inference/anakin/convert/relu.cc new file mode 100644 index 00000000000..2ce96db1804 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/relu.cc @@ -0,0 +1,47 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/relu.h" +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::saber::NV; +using anakin::saber::Shape; + +namespace paddle { +namespace inference { +namespace anakin { + +void ReluOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + auto input_name = op_desc.Input("X").front(); + auto output_name = op_desc.Output("Out").front(); + + engine_->AddOp(op_name, "ReLU", {input_name}, {output_name}); + engine_->AddOpAttr(op_name, "alpha", 0); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(relu, ReluOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/relu.h b/paddle/fluid/inference/anakin/convert/relu.h new file mode 100644 index 00000000000..54c4c2316eb --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/relu.h @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class ReluOpConverter : public AnakinOpConverter { + public: + ReluOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~ReluOpConverter() {} +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/test_activation_op.cc b/paddle/fluid/inference/anakin/convert/test_activation_op.cc index 356dfea29f6..6a81ec54ec4 100644 --- a/paddle/fluid/inference/anakin/convert/test_activation_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_activation_op.cc @@ -41,16 +41,13 @@ static void test_activation_op(const std::string &op_type) { validator.Execute(5); } -TEST(relu_op, test) { test_activation_op("relu"); } TEST(sigm_op, test) { test_activation_op("sigmoid"); } TEST(tanh_op, test) { test_activation_op("tanh"); } } // namespace anakin } // namespace inference } // namespace paddle -USE_OP(relu); USE_OP(sigmoid); USE_OP(tanh); -USE_ANAKIN_CONVERTER(relu); USE_ANAKIN_CONVERTER(sigmoid); USE_ANAKIN_CONVERTER(tanh); diff --git a/paddle/fluid/inference/anakin/convert/test_concat_op.cc b/paddle/fluid/inference/anakin/convert/test_concat_op.cc index 780e9acee12..0944e8dbdc9 100644 --- a/paddle/fluid/inference/anakin/convert/test_concat_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_concat_op.cc @@ -25,10 +25,10 @@ TEST(concat_op, test) { std::unordered_set parameters({""}); framework::Scope scope; AnakinConvertValidation validator(parameters, scope); - validator.DeclInputVar("concat_x1", {1, 10, 3, 1}); - validator.DeclInputVar("concat_x2", {1, 3, 3, 1}); - validator.DeclInputVar("concat_x3", {1, 7, 3, 1}); - validator.DeclOutputVar("concat_out", {1, 20, 3, 1}); + validator.DeclInputVar("concat_x1", {1, 2, 1, 1}); + validator.DeclInputVar("concat_x2", {1, 3, 1, 1}); + validator.DeclInputVar("concat_x3", {1, 1, 1, 1}); + validator.DeclOutputVar("concat_out", {1, 6, 1, 1}); // Prepare Op description framework::OpDesc desc; diff --git a/paddle/fluid/inference/anakin/convert/test_conv2d_op.cc b/paddle/fluid/inference/anakin/convert/test_conv2d_op.cc index 3049d3d0334..76f11c7b958 100644 --- a/paddle/fluid/inference/anakin/convert/test_conv2d_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_conv2d_op.cc @@ -28,9 +28,9 @@ TEST(conv2d_op, test) { std::unordered_set parameters({"conv2d-Y"}); framework::Scope scope; AnakinConvertValidation validator(parameters, scope); - validator.DeclInputVar("conv2d-X", {1, 2, 5, 5}); - validator.DeclParamVar("conv2d-Y", {3, 2, 3, 3}); - validator.DeclOutputVar("conv2d-Out", {1, 3, 5, 5}); + validator.DeclInputVar("conv2d-X", {1, 3, 3, 3}); + validator.DeclParamVar("conv2d-Y", {4, 3, 1, 1}); + validator.DeclOutputVar("conv2d-Out", {1, 4, 3, 3}); // Prepare Op description framework::OpDesc desc; @@ -40,7 +40,7 @@ TEST(conv2d_op, test) { desc.SetOutput("Output", {"conv2d-Out"}); const std::vector strides({1, 1}); - const std::vector paddings({1, 1}); + const std::vector paddings({0, 0}); const std::vector dilations({1, 1}); const int groups = 1; diff --git a/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc b/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc new file mode 100644 index 00000000000..a4298500c46 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc @@ -0,0 +1,50 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace anakin { + +TEST(elementwise_op, native) { + std::unordered_set parameters; + framework::Scope scope; + AnakinConvertValidation validator(parameters, scope); + validator.DeclInputVar("elementwise_add_x", {1, 1, 2, 2}); + validator.DeclInputVar("elementwise_y", {1, 1, 2, 2}); + validator.DeclOutputVar("elementwise_out", {1, 1, 2, 2}); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("elementwise_add"); + desc.SetInput("X", {"elementwise_add_x"}); + desc.SetInput("Y", {"elementwise_y"}); + desc.SetOutput("Out", {"elementwise_out"}); + + int axis = -1; + desc.SetAttr("axis", axis); + + validator.SetOp(*desc.Proto()); + validator.Execute(1); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +USE_OP(elementwise_add); +USE_ANAKIN_CONVERTER(elementwise_add); diff --git a/paddle/fluid/inference/anakin/convert/test_fc_op.cc b/paddle/fluid/inference/anakin/convert/test_fc_op.cc index b4e20e1e470..c72974cd596 100644 --- a/paddle/fluid/inference/anakin/convert/test_fc_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_fc_op.cc @@ -27,9 +27,9 @@ TEST(fc_op, test) { std::unordered_set parameters({"mul_y"}); framework::Scope scope; AnakinConvertValidation validator(parameters, scope); - validator.DeclInputVar("mul_x", {1, 1, 1, 1}); - validator.DeclParamVar("mul_y", {1, 2}); - validator.DeclOutputVar("mul_out", {1, 1, 1, 2}); + validator.DeclInputVar("mul_x", {1, 1, 2, 2}); + validator.DeclParamVar("mul_y", {4, 2}); + validator.DeclOutputVar("mul_out", {1, 2}); // Prepare Op description framework::OpDesc desc; @@ -37,8 +37,8 @@ TEST(fc_op, test) { desc.SetInput("X", {"mul_x"}); desc.SetInput("Y", {"mul_y"}); desc.SetOutput("Out", {"mul_out"}); - int num_flatten_dims = 3; - desc.SetAttr("x_num_col_dims", num_flatten_dims); + // int num_flatten_dims = 3; + // desc.SetAttr("x_num_col_dims", num_flatten_dims); validator.SetOp(*desc.Proto()); validator.Execute(10); diff --git a/paddle/fluid/inference/anakin/convert/test_relu_op.cc b/paddle/fluid/inference/anakin/convert/test_relu_op.cc new file mode 100644 index 00000000000..1695c0dbf05 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_relu_op.cc @@ -0,0 +1,50 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/relu.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace anakin { + +static void test_activation_op(const std::string &op_type) { + auto *converter = Registry::Global().Lookup(op_type); + PADDLE_ENFORCE(converter != nullptr); + std::unordered_set parameters; + framework::Scope scope; + AnakinConvertValidation validator(parameters, scope); + validator.DeclInputVar("act-X", {10, 6, 1, 1}); + validator.DeclOutputVar("act-Out", {10, 6, 1, 1}); + framework::OpDesc desc; + desc.SetType(op_type); + desc.SetInput("X", {"act-X"}); + desc.SetOutput("Out", {"act-Out"}); + + LOG(INFO) << "set OP"; + validator.SetOp(*desc.Proto()); + LOG(INFO) << "execute"; + + validator.Execute(5); +} + +TEST(sigm_op, test) { test_activation_op("relu"); } +} // namespace anakin +} // namespace inference +} // namespace paddle + +USE_OP(relu); +USE_ANAKIN_CONVERTER(relu); diff --git a/paddle/fluid/inference/anakin/convert/ut_helper.h b/paddle/fluid/inference/anakin/convert/ut_helper.h index e4fc72a2539..621b9c601ff 100644 --- a/paddle/fluid/inference/anakin/convert/ut_helper.h +++ b/paddle/fluid/inference/anakin/convert/ut_helper.h @@ -161,10 +161,6 @@ class AnakinConvertValidation { framework::TensorToVector(*tensor, ctx, &fluid_out); fluid_outputs.push_back(fluid_out); - // size_t fluid_out_size = fluid_out.size(); - /*for (size_t i = 0; i < fluid_out_size; i++) { - std::cout << fluid_out[i] << std::endl; - }*/ outputs.insert({output, tensor}); } @@ -180,8 +176,7 @@ class AnakinConvertValidation { size_t anakin_out_size = anakin_out.size(); auto fluid_out = fluid_outputs[i_output++]; for (size_t i = 0; i < anakin_out_size; i++) { - LOG(INFO) << "Output[" << i << "]: anakin[" << anakin_out[i] << "], " - << "fluid[" << fluid_out[i] << "]"; + EXPECT_LT(std::abs(fluid_out[i] - anakin_out[i]), 1e-3); } } } diff --git a/paddle/fluid/inference/anakin/engine.cc b/paddle/fluid/inference/anakin/engine.cc index 6549991474f..fb95bd03e07 100644 --- a/paddle/fluid/inference/anakin/engine.cc +++ b/paddle/fluid/inference/anakin/engine.cc @@ -68,29 +68,34 @@ void AnakinEngine::Execute( auto *tensor = input.second; auto *data = tensor->data(); auto shape = framework::vectorize2int(tensor->dims()); - ::anakin::saber::Shape anakin_shape(shape); auto *anakin_input = net_->get_in(input.first); + auto anakin_input_shape = anakin_input->valid_shape(); + PADDLE_ENFORCE(tensor->numel(), anakin_input_shape.count(), + "the fluid input size should be equal to anakin"); ::anakin::saber::Tensor tmp_anakin_tensor(data, TargetT(), 0, - anakin_shape); - anakin_input->share_from(tmp_anakin_tensor); + anakin_input_shape); + anakin_input->copy_from(tmp_anakin_tensor); } for (const auto &output : outputs) { auto *tensor = output.second; auto *data = tensor->data(); auto shape = framework::vectorize2int(tensor->dims()); - ::anakin::saber::Shape anakin_shape(shape); auto *anakin_output = net_->get_out(output.first); + auto anakin_output_shape = anakin_output->valid_shape(); + PADDLE_ENFORCE(tensor->numel(), anakin_output_shape.count(), + "the fluid output size should be equal to anakin"); ::anakin::saber::Tensor tmp_anakin_tensor(data, TargetT(), 0, - anakin_shape); + anakin_output_shape); anakin_output->share_from(tmp_anakin_tensor); } net_->prediction(); + cudaDeviceSynchronize(); } template void AnakinEngine::Freeze() { - PADDLE_ENFORCE(graph_->Freeze(), "Freeze anakin subgraph."); + PADDLE_ENFORCE(graph_->Freeze_v3(), "Freeze anakin subgraph."); } template diff --git a/paddle/fluid/inference/anakin/engine.h b/paddle/fluid/inference/anakin/engine.h index d8f32f57be5..408ad3b9f62 100644 --- a/paddle/fluid/inference/anakin/engine.h +++ b/paddle/fluid/inference/anakin/engine.h @@ -46,6 +46,9 @@ namespace anakin { template class AnakinEngine { + using NetT = ::anakin::Net; + using GraphT = ::anakin::graph::Graph; + public: explicit AnakinEngine(bool need_summary = false); ~AnakinEngine(); @@ -61,16 +64,15 @@ class AnakinEngine { PADDLE_ENFORCE(graph_->AddOpAttr(op_name, attr_name, attr_value), "Add operation's attribution."); } - + NetT *Net() { return net_.get(); } std::unique_ptr Clone(); void Freeze(); void Optimize(); + void Save(std::string path) { graph_->save(path); } void Execute(const std::map &inputs, const std::map &outputs); private: - using NetT = ::anakin::Net; - using GraphT = ::anakin::graph::Graph; std::unique_ptr graph_; std::unique_ptr net_; }; diff --git a/paddle/fluid/inference/anakin/op_teller.cc b/paddle/fluid/inference/anakin/op_teller.cc index 928661b0a9c..015bcc066e1 100644 --- a/paddle/fluid/inference/anakin/op_teller.cc +++ b/paddle/fluid/inference/anakin/op_teller.cc @@ -20,7 +20,18 @@ namespace anakin { // Just tell by the op_types. struct SimpleOpTypeSetTeller : public Teller { - SimpleOpTypeSetTeller() {} + SimpleOpTypeSetTeller() { + // teller_set.insert("mul"); + teller_set.insert("fc"); + teller_set.insert("conv2d_fusion"); + teller_set.insert("split"); + teller_set.insert("relu"); + teller_set.insert("pool2d"); + teller_set.insert("elementwise_add"); + teller_set.insert("concat"); + teller_set.insert("tanh"); + // teller_set.insert("conv2d"); + } bool operator()(const std::string& op_type, const framework::OpDesc& desc) override { @@ -28,7 +39,7 @@ struct SimpleOpTypeSetTeller : public Teller { } private: - std::unordered_set teller_set{{"mul"}}; + std::unordered_set teller_set; }; bool OpTeller::Tell(const std::string& op_type, const framework::OpDesc& desc) { diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 8fd86b2cc56..07423a14855 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -13,8 +13,11 @@ // limitations under the License. #include "paddle/fluid/inference/analysis/ir_pass_manager.h" +#include #include #include +#include +#include #include #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" @@ -63,7 +66,14 @@ void IRPassManager::CreatePasses(Argument *argument, } else if (pass_name == "cpu_quantize_pass") { pass->Set("quant_var_scales", new VarQuantScale(argument->quant_var_scales())); - } else if (pass_name == "tensorrt_subgraph_pass") { + } + + if (pass_name == "anakin_subgraph_pass") { + pass->Set("program", + new framework::ProgramDesc *(&argument->main_program())); + } + + if (pass_name == "tensorrt_subgraph_pass") { pass->Set("workspace_size", new int(argument->tensorrt_workspace_size())); pass->Set("max_batch_size", new int(argument->tensorrt_max_batch_size())); pass->Set("min_subgraph_size", diff --git a/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt b/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt index 410a90132aa..bd9f08d84e4 100644 --- a/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt @@ -14,3 +14,15 @@ if (WITH_GPU AND TENSORRT_FOUND) file(APPEND ${pass_file} "USE_PASS(tensorrt_subgraph_pass);\n") set(INFER_IR_PASSES ${INFER_IR_PASSES} tensorrt_subgraph_pass CACHE INTERNAL "") endif() + +if (WITH_ANAKIN_SUBGRAPH) + cc_library(anakin_subgraph_pass SRCS anakin_subgraph_pass.cc DEPS subgraph_detector anakin_op_teller) + + set(analysis_deps ${analysis_deps} + subgraph_detector anakin_subgraph_pass + CACHE INTERNAL "") + + set(pass_file ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h) + file(APPEND ${pass_file} "USE_PASS(anakin_subgraph_pass);\n") + set(INFER_IR_PASSES ${INFER_IR_PASSES} anakin_subgraph_pass CACHE INTERNAL "") +endif() diff --git a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc new file mode 100644 index 00000000000..f57b89baab7 --- /dev/null +++ b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc @@ -0,0 +1,273 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include + +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/inference/anakin/op_teller.h" +#include "paddle/fluid/inference/analysis/helper.h" +#include "paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h" +#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h" +#include "paddle/fluid/string/pretty_log.h" + +namespace paddle { +namespace inference { +namespace analysis { + +using framework::ir::Node; + +std::vector ExtractAnakinParameters( + const std::unordered_set &nodes); + +std::unique_ptr analysis::AnakinSubgraphPass::ApplyImpl( + std::unique_ptr graph) const { + framework::ir::FusePassBase::Init("anakin_subgraph_pass", graph.get()); + + auto teller = [](const framework::ir::Node *node) { + if (!node->IsOp() || !node->Op()) return false; + return anakin::OpTeller::Global().Tell(node->Op()->Type(), *node->Op()); + }; + + SubGraphFuser fuser(graph.get(), teller, 0); + fuser(); + + for (auto *node : graph->Nodes()) { + if (node->IsOp() && !Agent(node).subgraph()->empty()) { + CreateAnakinOp(node, graph.get()); + std::unordered_set nodes2remove( + Agent(node).subgraph()->begin(), Agent(node).subgraph()->end()); + framework::ir::GraphSafeRemoveNodes(graph.get(), nodes2remove); + } + } + + std::unordered_set nodes2remove; + for (auto *node : graph->Nodes()) { + if (node->IsOp() && Agent(node).deleted()) { + nodes2remove.insert(node); + } + } + framework::ir::GraphSafeRemoveNodes(graph.get(), nodes2remove); + + return graph; +} + +std::string GenerateAnakinEngineKey( + const std::set &engine_inputs, + const std::set &engine_outputs) { + std::string engine_hash_key = ""; + for (auto name : engine_inputs) { + engine_hash_key += name; + } + for (auto name : engine_outputs) { + engine_hash_key += name; + } + auto engine_key = std::to_string(std::hash()(engine_hash_key)); + return engine_key; +} + +void AnakinSubgraphPass::CreateAnakinOp(framework::ir::Node *node, + Graph *graph) const { + auto *op_desc = node->Op(); + auto &subgraph = *Agent(node).subgraph(); + PADDLE_ENFORCE(!subgraph.empty()); + + framework::ProgramDesc *program_desc = + Get("program"); + // Add new block for TensorRTEngineOP + const framework::BlockDesc &main_block = + program_desc->Block(framework::kRootBlockIndex); + // const framework::BlockDesc& main_block = program_desc->Block(0); + framework::BlockDesc *new_block = program_desc->AppendBlock(main_block); + + // An fake block desc. + framework::proto::BlockDesc block_proto; + framework::BlockDesc block_desc(nullptr, &block_proto); + block_desc.Proto()->set_parent_idx(-1); + block_desc.Proto()->set_idx(0); + string::PrettyLogDetail("--- detect a sub-graph with %d nodes", + subgraph.size()); + + for (auto *node : subgraph) { + auto *new_block_op = new_block->AppendOp(); + auto *op = block_desc.AppendOp(); + *new_block_op->Proto() = *node->Op()->Proto(); + *op->Proto() = *node->Op()->Proto(); + } + + // Then, we will use the input_names_with_id and output_names_with_id to + // generate the eigine key. + // So, We use set instead of unordered_set here to ensure that the engine key + // is unique. + std::set input_names; + std::set input_names_with_id; + for (auto *x : node->inputs) { + input_names.insert(x->Name()); + input_names_with_id.insert(x->Name() + std::to_string(x->id())); + } + op_desc->SetInput( + "Xs", std::vector(input_names.begin(), input_names.end())); + + std::set output_names; + std::set output_names_with_id; + for (auto *x : node->outputs) { + output_names.insert(x->Name()); + output_names_with_id.insert(x->Name() + std::to_string(x->id())); + } + + op_desc->SetOutput( + "Ys", std::vector(output_names.begin(), output_names.end())); + op_desc->SetType("anakin_engine"); + + std::unordered_map output_name_map; + + // The following procedure is used to rename all the intermediate + // variables and the output variables of the subgraph. + // Why we do this? + // During the transition from fluid OP to anakin OP, we map + // the input and output Tensor(fluid data structure) of fluid OP + // to the corresponding ITensor (trt data structure) through the + // Tensor name. When we set up ITensor for an variable, we must + // ensure that it has not been set before. + // If there is variable in the fluid graph, which is not only the + // input of a OP, but also the output of a Op, there will be problems. + // So we have to rename the variable in the subgraph to make sure + // it is either an OP's input or an OP's output. + + auto &subgraph_nodes = *Agent(node).subgraph(); + for (size_t index = 0; index < block_desc.OpSize(); ++index) { + framework::proto::OpDesc *op = block_desc.Op(index)->Proto(); + auto correspond_node = subgraph_nodes[index]; + PADDLE_ENFORCE_EQ(correspond_node->Name(), op->type()); + + std::unordered_map var2id; + for (auto *in_var : correspond_node->inputs) { + var2id[in_var->Name()] = in_var->id(); + } + // rename for the input variables of op inside subgraph + for (int i = 0; i < op->inputs_size(); i++) { + // one input + auto *in_var = op->mutable_inputs(i); + std::vector replaced_names; + for (int k = 0; k < in_var->arguments_size(); k++) { // all the arguments + std::string arg_value = in_var->arguments(k); + std::string arg_value_with_id = + arg_value + std::to_string(var2id[arg_value]); + if (input_names_with_id.count(arg_value_with_id)) { + replaced_names.push_back(arg_value); + } else { + replaced_names.push_back(arg_value_with_id); + } + } + in_var->clear_arguments(); + for (size_t k = 0; k < replaced_names.size(); k++) { + in_var->add_arguments(replaced_names[k]); + } + } + var2id.clear(); + for (auto out_var : correspond_node->outputs) { + var2id[out_var->Name()] = out_var->id(); + } + + // rename for the output variables of op inside subgraph + for (int i = 0; i < op->outputs_size(); i++) { + framework::proto::OpDesc_Var *out_var = op->mutable_outputs(i); + std::vector replaced_names; + for (int k = 0; k < out_var->arguments_size(); k++) { + std::string arg_value = out_var->arguments(k); + std::string arg_value_with_id = + arg_value + std::to_string(var2id[arg_value]); + if (output_names_with_id.count(arg_value_with_id)) { + output_name_map[arg_value] = arg_value_with_id; + } + replaced_names.push_back(arg_value_with_id); + } + out_var->clear_arguments(); + for (size_t k = 0; k < replaced_names.size(); k++) { + out_var->add_arguments(replaced_names[k]); + } + } + } + + // When anakin engine runs at the end of the operation, + // output_mapping help us copy the data from the renamed ITensor + // to Tensor. + std::vector output_mapping; + for (auto name : output_names) { + PADDLE_ENFORCE(output_name_map.count(name) != 0); + output_mapping.push_back(output_name_map[name]); + } + + auto *vars = block_desc.Proto()->mutable_vars(); + for (framework::ir::Node *node : graph->Nodes()) { + if (node->IsVar() && node->Var()) { + *vars->Add() = *node->Var()->Proto(); + } + } + + PADDLE_ENFORCE(!block_desc.Proto()->vars().empty(), + "the block has no var-desc"); + PADDLE_ENFORCE(!output_mapping.empty()); + op_desc->SetBlockAttr("sub_block", new_block); + SetAttr(op_desc->Proto(), "subgraph", + block_desc.Proto()->SerializeAsString()); + // Set attrs + SetAttr(op_desc->Proto(), "parameters", + ExtractAnakinParameters(graph->Nodes())); + SetAttr(op_desc->Proto(), "output_name_mapping", output_mapping); + auto engine_key = + GenerateAnakinEngineKey(input_names_with_id, output_names_with_id); + + SetAttr(op_desc->Proto(), "engine_key", engine_key); +} + +std::vector ExtractAnakinParameters( + const std::unordered_set &nodes) { + // We can judge whether a variable is a parameter by + // its presistable property, but sometimes the presistable + // of the feed op output is true, so we have to identify it. + std::vector feed_outputs; + for (const auto &node : nodes) { + if (!node->IsOp()) continue; + std::string op_type = node->Op()->Type(); + if (op_type == "feed") { + std::vector output_names = node->Op()->OutputArgumentNames(); + std::copy(output_names.begin(), output_names.end(), + std::back_inserter(feed_outputs)); + } + } + + std::vector parameters; + for (const auto &node : nodes) { + if (!node->IsVar()) continue; + if (node->Var()->Persistable() && + std::find(feed_outputs.begin(), feed_outputs.end(), node->Name()) == + feed_outputs.end()) { + parameters.push_back(node->Name()); + } + } + return parameters; +} + +} // namespace analysis +} // namespace inference +} // namespace paddle + +REGISTER_PASS(anakin_subgraph_pass, + paddle::inference::analysis::AnakinSubgraphPass); diff --git a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h new file mode 100644 index 00000000000..5f45ff579d5 --- /dev/null +++ b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include "paddle/fluid/framework/ir/pass.h" + +namespace paddle { +namespace inference { +namespace analysis { + +class AnakinSubgraphPass : public framework::ir::FusePassBase { + public: + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; + + private: + void CreateAnakinOp(framework::ir::Node *x, + framework::ir::Graph *graph) const; + void CleanIntermediateOutputs(framework::ir::Node *node); +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc b/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc index 96befe7f8a5..76b1671601e 100644 --- a/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc +++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc @@ -14,6 +14,8 @@ limitations under the License. */ #include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h" #include +#include +#include #include #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -418,7 +420,7 @@ void SubGraphFuser::ReplaceNodesWithSubGraphs() { // Node that contains this subgraph 2. Mark the nodes inside the sub-graph // as deleted. 3. Replace the deleted node with the new Block Node. framework::OpDesc empty_desc; - empty_desc.SetType("tensorrt_engine"); + empty_desc.SetType("anakin_engine"); auto *block_node = graph_->CreateOpNode(&empty_desc); Agent(block_node).set_subgraph({}); auto io = ExtractInputAndOutputOfSubGraph(subgraph); diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc index d13ec7608c3..8360963f736 100644 --- a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc @@ -30,6 +30,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { // The parameters are on the cpu, therefore, synchronization is not necessary. if (!argument->use_gpu()) return; + return; auto &graph = argument->main_graph(); std::vector repetitive_params; diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index 85755fc471a..38313754ea9 100644 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -27,6 +27,10 @@ if(WITH_GPU AND TENSORRT_FOUND) set(inference_deps ${inference_deps} tensorrt_engine tensorrt_converter) endif() +if (WITH_ANAKIN_SUBGRAPH) + set(inference_deps ${inference_deps} anakin_op_converter anakin_engine) +endif() + add_subdirectory(details) cc_library(analysis_config SRCS analysis_config.cc DEPS lod_tensor paddle_pass_builder) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index b58c60e96a0..aee1d951a1e 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -43,6 +43,8 @@ #endif +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + DECLARE_bool(profile); namespace paddle { @@ -805,3 +807,14 @@ USE_TRT_CONVERTER(prelu); USE_TRT_CONVERTER(conv2d_transpose); USE_TRT_CONVERTER(leaky_relu); #endif + +USE_ANAKIN_CONVERTER(fc); +USE_ANAKIN_CONVERTER(conv2d); +USE_ANAKIN_CONVERTER(concat); +USE_ANAKIN_CONVERTER(split); +USE_ANAKIN_CONVERTER(relu); +USE_ANAKIN_CONVERTER(sigmoid); +USE_ANAKIN_CONVERTER(tanh); +USE_ANAKIN_CONVERTER(pool2d); +USE_ANAKIN_CONVERTER(conv2d_fusion); +USE_ANAKIN_CONVERTER(elementwise_add); diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 651c5e6e758..8ccb6d5a1c0 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -34,6 +34,10 @@ if (WITH_GPU AND TENSORRT_FOUND) add_subdirectory(tensorrt) endif() +if (WITH_ANAKIN_SUBGRAPH) + add_subdirectory(anakin) +endif() + SET(OP_HEADER_DEPS xxhash) if (WITH_GPU) SET(OP_HEADER_DEPS ${OP_HEADER_DEPS} cub) diff --git a/paddle/fluid/operators/anakin/CMakeLists.txt b/paddle/fluid/operators/anakin/CMakeLists.txt new file mode 100644 index 00000000000..5eacefc645b --- /dev/null +++ b/paddle/fluid/operators/anakin/CMakeLists.txt @@ -0,0 +1,2 @@ +op_library(anakin_engine_op DEPS anakin_engine anakin_op_converter) +# file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(anakin_engine);\n") diff --git a/paddle/fluid/operators/anakin/anakin_engine_op.cc b/paddle/fluid/operators/anakin/anakin_engine_op.cc new file mode 100644 index 00000000000..48b0490d041 --- /dev/null +++ b/paddle/fluid/operators/anakin/anakin_engine_op.cc @@ -0,0 +1,54 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_CUDA + +#include +#include + +#include "paddle/fluid/operators/anakin/anakin_engine_op.h" + +namespace paddle { + +namespace operators { + +class AnakinEngineOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("Xs", "A list of inputs.").AsDuplicable(); + AddOutput("Ys", "A list of outputs").AsDuplicable(); + AddAttr("subgraph", "the subgraph."); + AddAttr( + "engine_key", + "The engine_key here is used to distinguish different TRT Engines"); + AddAttr("sub_block", "the trt block"); + AddComment("Anakin engine operator."); + } +}; + +class AnakinEngineInferVarType : public framework::VarTypeInference { + public: + void operator()(const framework::OpDesc &op_desc, + framework::BlockDesc *block) const override {} +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR(anakin_engine, ops::AnakinEngineOp, ops::AnakinEngineOpMaker, + ops::AnakinEngineOpMaker); + +#endif // PADDLE_WITH_CUDA diff --git a/paddle/fluid/operators/anakin/anakin_engine_op.h b/paddle/fluid/operators/anakin/anakin_engine_op.h new file mode 100644 index 00000000000..93569367ffd --- /dev/null +++ b/paddle/fluid/operators/anakin/anakin_engine_op.h @@ -0,0 +1,152 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#ifdef PADDLE_WITH_CUDA + +#include +#include +#include +#include +#include +#include + +#include "paddle/fluid/framework/executor.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/engine.h" +#include "paddle/fluid/inference/analysis/helper.h" + +namespace paddle { +namespace operators { + +using FluidDT = framework::proto::VarType_Type; +using inference::Singleton; + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::Precision; +using anakin::saber::NV; +using anakin::saber::X86; +using anakin::saber::Shape; +using anakin::PBlock; +using anakin::PTuple; +using inference::anakin::AnakinEngine; + +class AnakinEngineOp : public framework::OperatorBase { + using AnakinNvEngineT = AnakinEngine; + + private: + std::vector input_names_; + std::unordered_set param_names_; + mutable std::unique_ptr anakin_engine_; + std::string engine_key_; + + public: + AnakinEngineOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : framework::OperatorBase(type, inputs, outputs, attrs) { + input_names_ = Inputs("Xs"); + engine_key_ = Attr("engine_key"); + auto params = Attr>("parameters"); + for (const auto ¶m : params) { + param_names_.insert(param); + } + } + + protected: + void RunImpl(const framework::Scope &scope, + const platform::Place &dev_place) const override { + RunAnakin(scope, dev_place); + } + + void RunAnakin(const framework::Scope &scope, + const platform::Place &dev_place) const { + if (anakin_engine_.get() == nullptr) { + anakin_engine_.reset(new AnakinEngine(true)); + Prepare(scope, dev_place, anakin_engine_.get()); + } + + auto *engine = anakin_engine_.get(); + PADDLE_ENFORCE(!input_names_.empty(), "should pass more than one inputs"); + + std::vector output_maps = + Attr>("output_name_mapping"); + + std::map inputs; + // Convert input tensor from fluid to engine. + for (const auto &x : Inputs("Xs")) { + if (param_names_.count(x)) continue; + auto &t = + inference::analysis::GetFromScope(scope, x); + auto t_shape = framework::vectorize(t.dims()); + inputs.insert({x, &t}); + } + + std::map outputs; + int output_index = 0; + for (const auto &y : Outputs("Ys")) { + std::vector ddim = + engine->Net()->get_out(output_maps[output_index])->valid_shape(); + // we need get the output anakin output shape. + auto *fluid_v = scope.FindVar(y); + PADDLE_ENFORCE_NOT_NULL(fluid_v, "no output variable called %s", y); + auto *fluid_t = fluid_v->GetMutable(); + fluid_t->Resize(framework::make_ddim(ddim)); + fluid_t->mutable_data(boost::get(dev_place)); + outputs.insert({output_maps[output_index], fluid_t}); + output_index += 1; + } + engine->Execute(inputs, outputs); + } + + void Prepare(const framework::Scope &scope, const platform::Place &dev_place, + AnakinNvEngineT *engine) const { + LOG(INFO) << "Prepare Anakin engine (Optimize model structure, Select OP " + "kernel etc). This process may cost a lot of time."; + framework::proto::BlockDesc block_desc; + block_desc.ParseFromString(Attr("subgraph")); + + std::vector output_maps = + Attr>("output_name_mapping"); + + inference::Singleton::Global() + .ConvertBlock(block_desc, param_names_, scope, engine); + engine->Freeze(); + engine->Optimize(); + + for (const auto &x : Inputs("Xs")) { + if (param_names_.count(x)) continue; + auto &t = + inference::analysis::GetFromScope(scope, x); + auto t_shape = framework::vectorize2int(t.dims()); + // all input shape should be 4 dims + if (t_shape.size() == 2) { + t_shape.push_back(1); + t_shape.push_back(1); + } + engine->SetInputShape(x, t_shape); + } + engine->InitGraph(); + } +}; + +} // namespace operators +} // namespace paddle + +#endif // PADDLE_WITH_CUDA -- GitLab