From 07dcf2856cdf49af2415432c07dea6545992940d Mon Sep 17 00:00:00 2001 From: nhzlx Date: Wed, 20 Mar 2019 13:36:56 +0000 Subject: [PATCH] git cherry-pick from feature/anakin-engine: update anakin subgraph #16278 --- .../inference/anakin/convert/CMakeLists.txt | 10 +-- .../fluid/inference/anakin/convert/dropout.cc | 66 +++++++++++++++++++ .../fluid/inference/anakin/convert/dropout.h | 37 +++++++++++ .../inference/anakin/convert/elementwise.cc | 33 +++++++++- .../inference/anakin/convert/elementwise.h | 12 ++++ .../inference/anakin/convert/im2sequence.cc | 62 +++++++++++++++++ .../inference/anakin/convert/im2sequence.h | 37 +++++++++++ .../fluid/inference/anakin/convert/pool2d.cc | 6 +- paddle/fluid/inference/anakin/convert/sum.cc | 54 +++++++++++++++ paddle/fluid/inference/anakin/convert/sum.h | 37 +++++++++++ .../anakin/convert/test_dropout_op.cc | 53 +++++++++++++++ .../anakin/convert/test_elementwise_op.cc | 22 ++++--- .../anakin/convert/test_im2sequence_op.cc | 55 ++++++++++++++++ .../inference/anakin/convert/test_sum_op.cc | 48 ++++++++++++++ paddle/fluid/inference/anakin/op_teller.cc | 5 +- .../ir_passes/anakin_subgraph_pass.cc | 2 +- .../ir_passes/tensorrt_subgraph_pass.cc | 4 +- .../fluid/inference/api/analysis_predictor.cc | 5 +- .../inference/api/paddle_analysis_config.h | 2 +- .../inference/api/paddle_pass_builder.cc | 1 + paddle/fluid/platform/device_context.cc | 19 ++++-- paddle/fluid/platform/device_context.h | 5 +- 22 files changed, 546 insertions(+), 29 deletions(-) create mode 100644 paddle/fluid/inference/anakin/convert/dropout.cc create mode 100644 paddle/fluid/inference/anakin/convert/dropout.h create mode 100644 paddle/fluid/inference/anakin/convert/im2sequence.cc create mode 100644 paddle/fluid/inference/anakin/convert/im2sequence.h create mode 100644 paddle/fluid/inference/anakin/convert/sum.cc create mode 100644 paddle/fluid/inference/anakin/convert/sum.h create mode 100644 paddle/fluid/inference/anakin/convert/test_dropout_op.cc create mode 100644 paddle/fluid/inference/anakin/convert/test_im2sequence_op.cc create mode 100644 paddle/fluid/inference/anakin/convert/test_sum_op.cc diff --git a/paddle/fluid/inference/anakin/convert/CMakeLists.txt b/paddle/fluid/inference/anakin/convert/CMakeLists.txt index da9ffa5bb..9cfe6671f 100644 --- a/paddle/fluid/inference/anakin/convert/CMakeLists.txt +++ b/paddle/fluid/inference/anakin/convert/CMakeLists.txt @@ -1,7 +1,5 @@ cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc - elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc -batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc -detection_out.cc scale.cc DEPS anakin_engine framework_proto scope op_registry) + elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc DEPS anakin_engine framework_proto scope op_registry) cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op) cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv) @@ -9,11 +7,13 @@ cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS activation_op ana cc_test(test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling) cc_test(test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split) cc_test(test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split) -cc_test(test_anakin_elementwise SRCS test_elementwise_op.cc DEPS anakin_op_converter elementwise_add_op) +cc_test(test_anakin_elementwise SRCS test_elementwise_op.cc DEPS anakin_op_converter elementwise_add_op elementwise_mul_op) cc_test(test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter SERIAL) cc_test(test_anakin_softmax SRCS test_softmax_op.cc DEPS anakin_op_converter softmax_op softmax) cc_test(test_anakin_reshape SRCS test_reshape_op.cc DEPS anakin_op_converter reshape_op) cc_test(test_anakin_flatten SRCS test_flatten_op.cc DEPS anakin_op_converter flatten_op reshape_op) cc_test(test_anakin_transpose SRCS test_transpose_op.cc DEPS anakin_op_converter transpose_op) cc_test(test_anakin_batch_norm SRCS test_batch_norm_op.cc DEPS anakin_op_converter batch_norm_op) -cc_test(test_anakin_scale SRCS test_scale_op.cc DEPS anakin_op_converter scale_op math_function) +cc_test(test_anakin_dropout SRCS test_dropout_op.cc DEPS anakin_op_converter dropout_op) +cc_test(test_anakin_im2sequence SRCS test_im2sequence_op.cc DEPS anakin_op_converter im2sequence_op im2col) +cc_test(test_anakin_sum SRCS test_sum_op.cc DEPS anakin_op_converter sum_op selected_rows_functor) diff --git a/paddle/fluid/inference/anakin/convert/dropout.cc b/paddle/fluid/inference/anakin/convert/dropout.cc new file mode 100644 index 000000000..ed6d7f756 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/dropout.cc @@ -0,0 +1,66 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/dropout.h" +#include +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::Precision; +using anakin::saber::NV; +using anakin::saber::X86; +using anakin::saber::Shape; +using anakin::PBlock; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +void DropoutOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Output("Mask").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto x_name = op_desc.Input("X").front(); + auto out_name = op_desc.Output("Out").front(); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + + engine_->AddOp(op_name, "Scale", {x_name}, {out_name}); + + auto dropout_prob = boost::get(op_desc.GetAttr("dropout_prob")); + auto factor = 1 - dropout_prob; + Shape shape1(std::vector({1, 1, 1, 1})); + auto *weight1 = + GraphGlobalMem::Global().template new_block(shape1); + auto *factor_data = static_cast(weight1->h_tensor().mutable_data()); + float weight1_data[] = {factor}; + std::copy(std::begin(weight1_data), std::end(weight1_data), factor_data); + + engine_->AddOpAttr(op_name, "weight_1", *weight1); + engine_->AddOpAttr(op_name, "axis", 0); + engine_->AddOpAttr(op_name, "num_axes", 0); + engine_->AddOpAttr(op_name, "bias_term", false); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(dropout, DropoutOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/dropout.h b/paddle/fluid/inference/anakin/convert/dropout.h new file mode 100644 index 000000000..2a0fb6e76 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/dropout.h @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class DropoutOpConverter : public AnakinOpConverter { + public: + DropoutOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~DropoutOpConverter() {} + + private: +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/elementwise.cc b/paddle/fluid/inference/anakin/convert/elementwise.cc index d400e617d..55b12390b 100644 --- a/paddle/fluid/inference/anakin/convert/elementwise.cc +++ b/paddle/fluid/inference/anakin/convert/elementwise.cc @@ -35,7 +35,7 @@ void ElementwiseAddOpConverter::operator()(const framework::proto::OpDesc &op, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); - PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight + PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); auto x_name = op_desc.Input("X").front(); @@ -50,8 +50,39 @@ void ElementwiseAddOpConverter::operator()(const framework::proto::OpDesc &op, engine_->AddOpAttr>(op_name, "coeff", coeff); } +void ElementwiseMulOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto x_name = op_desc.Input("X").front(); + auto y_name = op_desc.Input("Y").front(); + auto out_name = op_desc.Output("Out").front(); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + + engine_->AddOp(op_name, "Scale", {x_name, y_name}, {out_name}); + // Fill a number to weight_1 as a placeholder. + Shape shape1(std::vector({1, 1, 1, 1})); + auto *weight1 = + GraphGlobalMem::Global().template new_block(shape1); + auto *placeholder_data = + static_cast(weight1->h_tensor().mutable_data()); + float weight1_data[] = {1}; + std::copy(std::begin(weight1_data), std::end(weight1_data), placeholder_data); + engine_->AddOpAttr(op_name, "weight_1", *weight1); + + auto axis = boost::get(op_desc.GetAttr("axis")); + engine_->AddOpAttr(op_name, "axis", axis); + engine_->AddOpAttr(op_name, "num_axes", 1); + engine_->AddOpAttr(op_name, "bias_term", false); +} + } // namespace anakin } // namespace inference } // namespace paddle REGISTER_ANAKIN_OP_CONVERTER(elementwise_add, ElementwiseAddOpConverter); +REGISTER_ANAKIN_OP_CONVERTER(elementwise_mul, ElementwiseMulOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/elementwise.h b/paddle/fluid/inference/anakin/convert/elementwise.h index d41a59380..47525e41d 100644 --- a/paddle/fluid/inference/anakin/convert/elementwise.h +++ b/paddle/fluid/inference/anakin/convert/elementwise.h @@ -32,6 +32,18 @@ class ElementwiseAddOpConverter : public AnakinOpConverter { private: }; +class ElementwiseMulOpConverter : public AnakinOpConverter { + public: + ElementwiseMulOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~ElementwiseMulOpConverter() {} + + private: +}; + } // namespace anakin } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/im2sequence.cc b/paddle/fluid/inference/anakin/convert/im2sequence.cc new file mode 100644 index 000000000..568d7e474 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/im2sequence.cc @@ -0,0 +1,62 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/im2sequence.h" +#include +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::Precision; +using anakin::saber::NV; +using anakin::saber::X86; +using anakin::saber::Shape; +using anakin::PBlock; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +void Im2SequenceConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 0); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto x_name = op_desc.Input("X").front(); + auto out_name = op_desc.Output("Out").front(); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + + engine_->AddOp(op_name, "Im2Sequence", {x_name}, {out_name}); + + std::vector dilations = {1, 1}; + auto paddings = boost::get>(op_desc.GetAttr("paddings")); + auto strides = boost::get>(op_desc.GetAttr("strides")); + auto kernels = boost::get>(op_desc.GetAttr("kernels")); + + engine_->AddOpAttr>(op_name, "paddings", paddings); + engine_->AddOpAttr>(op_name, "strides", strides); + engine_->AddOpAttr>(op_name, "window_size", kernels); + engine_->AddOpAttr>(op_name, "dilations", dilations); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(im2sequence, Im2SequenceConverter); diff --git a/paddle/fluid/inference/anakin/convert/im2sequence.h b/paddle/fluid/inference/anakin/convert/im2sequence.h new file mode 100644 index 000000000..3003eac2c --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/im2sequence.h @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class Im2SequenceConverter : public AnakinOpConverter { + public: + Im2SequenceConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~Im2SequenceConverter() {} + + private: +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/pool2d.cc b/paddle/fluid/inference/anakin/convert/pool2d.cc index 3dee589ac..9b01d56a1 100644 --- a/paddle/fluid/inference/anakin/convert/pool2d.cc +++ b/paddle/fluid/inference/anakin/convert/pool2d.cc @@ -55,7 +55,11 @@ void Pool2dOpConverter::operator()(const framework::proto::OpDesc &op, if (pool_type == "max") { anakin_pool_type = "MAX"; } else if (pool_type == "avg") { - anakin_pool_type = "AVGEXC"; + if (paddings[0] || paddings[1]) { + anakin_pool_type = "AVGEXC"; + } else { + anakin_pool_type = "AVG"; + } } else { PADDLE_THROW("TensorRT unsupported pooling type!"); } diff --git a/paddle/fluid/inference/anakin/convert/sum.cc b/paddle/fluid/inference/anakin/convert/sum.cc new file mode 100644 index 000000000..df9104cf4 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/sum.cc @@ -0,0 +1,54 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/sum.h" +#include +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::Precision; +using anakin::saber::NV; +using anakin::saber::X86; +using anakin::saber::Shape; +using anakin::PBlock; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +void SumOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 2); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto input_names = op_desc.Input("X"); + auto out_name = op_desc.Output("Out").front(); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + + std::vector coeff = {1, 1}; + std::string elementwise_type = "Add"; + engine_->AddOp(op_name, "Eltwise", input_names, {out_name}); + engine_->AddOpAttr>(op_name, "coeff", coeff); + engine_->AddOpAttr(op_name, "type", elementwise_type); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(sum, SumOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/sum.h b/paddle/fluid/inference/anakin/convert/sum.h new file mode 100644 index 000000000..ddecc4b3b --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/sum.h @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class SumOpConverter : public AnakinOpConverter { + public: + SumOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~SumOpConverter() {} + + private: +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/test_dropout_op.cc b/paddle/fluid/inference/anakin/convert/test_dropout_op.cc new file mode 100644 index 000000000..ded279bf4 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_dropout_op.cc @@ -0,0 +1,53 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/dropout.h" +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace anakin { + +TEST(dropout_op, native) { + std::unordered_set parameters; + framework::Scope scope; + AnakinConvertValidation validator(parameters, scope); + validator.DeclInputVar("x", {1, 1, 2, 2}); + validator.DeclOutputVar("out", {1, 1, 2, 2}); + validator.DeclOutputVar("mask", {1, 1, 2, 2}); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("dropout"); + desc.SetInput("X", {"x"}); + desc.SetOutput("Out", {"out"}); + desc.SetOutput("Mask", {"mask"}); + + float dropout_prob = 0.5; + desc.SetAttr("dropout_prob", dropout_prob); + desc.SetAttr("is_test", true); + + validator.SetOp(*desc.Proto()); + std::unordered_set neglected_output = {"mask"}; + validator.Execute(1, neglected_output); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +USE_OP(dropout); +USE_ANAKIN_CONVERTER(dropout); diff --git a/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc b/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc index a4298500c..eec8b96f7 100644 --- a/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include +#include "paddle/fluid/inference/anakin/convert/elementwise.h" #include "paddle/fluid/inference/anakin/convert/op_converter.h" #include "paddle/fluid/inference/anakin/convert/ut_helper.h" @@ -20,20 +21,20 @@ namespace paddle { namespace inference { namespace anakin { -TEST(elementwise_op, native) { +static void test_elementwise_op(const std::string &op_type) { std::unordered_set parameters; framework::Scope scope; AnakinConvertValidation validator(parameters, scope); - validator.DeclInputVar("elementwise_add_x", {1, 1, 2, 2}); - validator.DeclInputVar("elementwise_y", {1, 1, 2, 2}); - validator.DeclOutputVar("elementwise_out", {1, 1, 2, 2}); + validator.DeclInputVar("x", {1, 1, 2, 2}); + validator.DeclInputVar("y", {1, 1, 2, 2}); + validator.DeclOutputVar("out", {1, 1, 2, 2}); // Prepare Op description framework::OpDesc desc; - desc.SetType("elementwise_add"); - desc.SetInput("X", {"elementwise_add_x"}); - desc.SetInput("Y", {"elementwise_y"}); - desc.SetOutput("Out", {"elementwise_out"}); + desc.SetType(op_type); + desc.SetInput("X", {"x"}); + desc.SetInput("Y", {"y"}); + desc.SetOutput("Out", {"out"}); int axis = -1; desc.SetAttr("axis", axis); @@ -42,9 +43,14 @@ TEST(elementwise_op, native) { validator.Execute(1); } +TEST(elementwise_op, native_add) { test_elementwise_op("elementwise_add"); } +TEST(elementwise_op, native_mul) { test_elementwise_op("elementwise_mul"); } + } // namespace anakin } // namespace inference } // namespace paddle USE_OP(elementwise_add); USE_ANAKIN_CONVERTER(elementwise_add); +USE_OP(elementwise_mul); +USE_ANAKIN_CONVERTER(elementwise_mul); diff --git a/paddle/fluid/inference/anakin/convert/test_im2sequence_op.cc b/paddle/fluid/inference/anakin/convert/test_im2sequence_op.cc new file mode 100644 index 000000000..2f6c48629 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_im2sequence_op.cc @@ -0,0 +1,55 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/im2sequence.h" +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace anakin { + +TEST(im2sequence_op, native) { + std::unordered_set parameters; + framework::Scope scope; + AnakinConvertValidation validator(parameters, scope); + + std::vector kernels = {6, 1}; + std::vector strides = {1, 1}; + std::vector paddings = {0, 0, 0, 0}; + + validator.DeclInputVar("x", {1, 1, 2, 2}); + validator.DeclOutputVar("out", {1, 1 * kernels[0] * kernels[1]}); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("im2sequence"); + desc.SetInput("X", {"x"}); + desc.SetOutput("Out", {"out"}); + + desc.SetAttr("kernels", kernels); + desc.SetAttr("strides", strides); + desc.SetAttr("paddings", paddings); + + validator.SetOp(*desc.Proto()); + validator.Execute(1); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +USE_OP(im2sequence); +USE_ANAKIN_CONVERTER(im2sequence); diff --git a/paddle/fluid/inference/anakin/convert/test_sum_op.cc b/paddle/fluid/inference/anakin/convert/test_sum_op.cc new file mode 100644 index 000000000..a93539303 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_sum_op.cc @@ -0,0 +1,48 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/sum.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" +#include "paddle/fluid/operators/sum_op.h" + +namespace paddle { +namespace inference { +namespace anakin { + +TEST(sum, native) { + std::unordered_set parameters; + framework::Scope scope; + AnakinConvertValidation validator(parameters, scope); + validator.DeclInputVar("sum_x1", {1, 2, 1, 2}); + validator.DeclInputVar("sum_x2", {1, 2, 1, 2}); + validator.DeclOutputVar("sum_out", {1, 2, 1, 2}); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("sum"); + desc.SetInput("X", {"sum_x1", "sum_x2"}); + desc.SetOutput("Out", {"sum_out"}); + + validator.SetOp(*desc.Proto()); + validator.Execute(1); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +USE_OP(sum); +USE_ANAKIN_CONVERTER(sum); diff --git a/paddle/fluid/inference/anakin/op_teller.cc b/paddle/fluid/inference/anakin/op_teller.cc index 3270f5b57..90cf021de 100644 --- a/paddle/fluid/inference/anakin/op_teller.cc +++ b/paddle/fluid/inference/anakin/op_teller.cc @@ -28,6 +28,7 @@ struct SimpleOpTypeSetTeller : public Teller { teller_set.insert("relu"); teller_set.insert("pool2d"); teller_set.insert("elementwise_add"); + teller_set.insert("elementwise_mul"); teller_set.insert("concat"); teller_set.insert("tanh"); teller_set.insert("conv2d"); @@ -38,7 +39,9 @@ struct SimpleOpTypeSetTeller : public Teller { teller_set.insert("transpose2"); teller_set.insert("density_prior_box"); teller_set.insert("detection_out"); - teller_set.insert("scale"); + teller_set.insert("dropout"); + teller_set.insert("sigmoid"); + teller_set.insert("sum"); } bool operator()(const std::string& op_type, diff --git a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc index de41e05f1..0cd1d327e 100644 --- a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc @@ -47,7 +47,7 @@ std::unique_ptr analysis::AnakinSubgraphPass::ApplyImpl( return anakin::OpTeller::Global().Tell(node->Op()->Type(), *node->Op()); }; - SubGraphFuser fuser(graph.get(), teller, 0 /* min_subgraph_size */); + SubGraphFuser fuser(graph.get(), teller, 6 /* min_subgraph_size */); fuser(); std::vector graph_param_names = diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index 69d6ab102..00490fbb1 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -210,13 +210,14 @@ void TensorRtSubgraphPass::CreateTensorRTOp( SetAttr(op_desc->Proto(), "parameters", params); auto enable_int8 = Get("enable_int8"); + auto use_static_engine = Get("use_static_engine"); auto engine_key = GenerateEngineKey(input_names_with_id, output_names_with_id, std::to_string(0)); // Get "" when there is no cached calibration table data. bool load_from_memory = Get("model_from_memory"); std::string calibration_data = ""; - if (!load_from_memory) { + if (!load_from_memory && use_static_engine) { calibration_data = GetTrtCalibTableData( Get("model_opt_cache_dir"), engine_key, enable_int8); } @@ -240,7 +241,6 @@ void TensorRtSubgraphPass::CreateTensorRTOp( calibrator.reset(new tensorrt::TRTInt8Calibrator(calibration_data)); } - bool use_static_engine = Get("use_static_engine"); // When in int8 mode and calibration_mode, the program just produce the // calibration table data. bool calibration_mode = (enable_int8 && calibration_data.size() == 0); diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index bcae080bc..4278d579e 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -828,12 +828,13 @@ USE_ANAKIN_CONVERTER(sigmoid); USE_ANAKIN_CONVERTER(tanh); USE_ANAKIN_CONVERTER(pool2d); USE_ANAKIN_CONVERTER(elementwise_add); +USE_ANAKIN_CONVERTER(elementwise_mul); USE_ANAKIN_CONVERTER(batch_norm); USE_ANAKIN_CONVERTER(flatten); USE_ANAKIN_CONVERTER(reshape); USE_ANAKIN_CONVERTER(transpose); USE_ANAKIN_CONVERTER(softmax); - USE_ANAKIN_CONVERTER(detection_out); USE_ANAKIN_CONVERTER(density_prior_box); -USE_ANAKIN_CONVERTER(scale); +USE_ANAKIN_CONVERTER(dropout); +USE_ANAKIN_CONVERTER(sum); diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 9a29f8f77..23df507aa 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -138,7 +138,7 @@ struct AnalysisConfig { void EnableTensorRtEngine(int workspace_size = 1 << 20, int max_batch_size = 1, int min_subgraph_size = 3, Precision precision = Precision::kFloat32, - bool use_static = true); + bool use_static = false); /** A boolean state telling whether the TensorRT engine is used. */ bool tensorrt_engine_enabled() const { return use_tensorrt_; } diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 182aa1b6b..28d02e8fe 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -80,6 +80,7 @@ const std::vector kAnakinSubgraphPasses({ "conv_elementwise_add_fuse_pass", // "conv_bn_fuse_pass", // "conv_elementwise_add_fuse_pass", // + "fc_gru_fuse_pass", // "anakin_subgraph_pass", }); diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index d54a3e867..961e8271e 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -233,6 +233,8 @@ void CudnnHolder::ReallocateWorkspace(size_t required_workspace_len) { paddle::memory::Allocator::kScratchpad); } +std::once_flag CUDADeviceContext::init_cudnn_; + CUDADeviceContext::CUDADeviceContext(CUDAPlace place) : place_(place), cudnn_holder_(nullptr) { CUDADeviceGuard guard(place_.device); @@ -252,10 +254,6 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place) #endif } - if (dynload::HasCUDNN()) { - cudnn_holder_.reset(new CudnnHolder(&stream_, place)); - } - driver_version_ = GetCUDADriverVersion(place_.device); runtime_version_ = GetCUDARuntimeVersion(place_.device); @@ -348,12 +346,21 @@ bool CUDADeviceContext::tensor_core_available() const { return cublas_tensor_core_handle_ != nullptr; } +CudnnHolder* CUDADeviceContext::cudnn_holder() const { + std::call_once(init_cudnn_, [&]() { + if (dynload::HasCUDNN()) { + cudnn_holder_.reset(new CudnnHolder(&stream_, place_)); + } + }); + return cudnn_holder_.get(); +} + cudnnHandle_t CUDADeviceContext::cudnn_handle() const { - return cudnn_holder_->cudnn_handle(); + return cudnn_holder()->cudnn_handle(); } CudnnWorkspaceHandle CUDADeviceContext::cudnn_workspace_handle() const { - return CudnnWorkspaceHandle(cudnn_holder_.get()); + return CudnnWorkspaceHandle(cudnn_holder()); } cudaStream_t CUDADeviceContext::stream() const { return stream_; } diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 19ce769ab..162c64552 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -290,9 +290,11 @@ class CUDADeviceContext : public DeviceContext { private: CUDAPlace place_; + static std::once_flag init_cudnn_; + std::unique_ptr eigen_device_; std::unique_ptr eigen_stream_; - std::unique_ptr cudnn_holder_; + mutable std::unique_ptr cudnn_holder_; cudaStream_t stream_; std::unique_ptr cublas_handle_; @@ -315,6 +317,7 @@ class CUDADeviceContext : public DeviceContext { // StreamCallbackManager is thread-safe std::unique_ptr callback_manager_; + CudnnHolder* cudnn_holder() const; DISABLE_COPY_AND_ASSIGN(CUDADeviceContext); }; -- GitLab