From 88770542cbeaa9e788981f82f0ebcc6a1b25dd2c Mon Sep 17 00:00:00 2001 From: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com> Date: Thu, 28 Mar 2019 16:11:43 +0000 Subject: [PATCH] resolve conflicts with the develop branch test=release/1.4 (cherry picked from commit bddb2cd315e73c459fcd553caf726c5d56dd96eb) --- cmake/external/protobuf.cmake | 2 +- .../inference/anakin/convert/CMakeLists.txt | 3 +- .../inference/anakin/convert/activation.cc | 1 + .../inference/anakin/convert/activation.h | 1 + .../inference/anakin/convert/batch_norm.cc | 1 + .../inference/anakin/convert/batch_norm.h | 1 + .../fluid/inference/anakin/convert/concat.cc | 1 + .../fluid/inference/anakin/convert/concat.h | 1 + .../fluid/inference/anakin/convert/conv2d.cc | 1 + .../fluid/inference/anakin/convert/conv2d.h | 1 + .../inference/anakin/convert/conv2d_fusion.cc | 1 + .../inference/anakin/convert/conv2d_fusion.h | 1 + .../anakin/convert/density_prior_box.cc | 6 ++-- .../anakin/convert/density_prior_box.h | 1 + .../inference/anakin/convert/detection_out.cc | 1 + .../inference/anakin/convert/detection_out.h | 1 + .../fluid/inference/anakin/convert/dropout.cc | 1 + .../fluid/inference/anakin/convert/dropout.h | 1 + .../inference/anakin/convert/elementwise.cc | 12 +++---- .../inference/anakin/convert/elementwise.h | 2 ++ paddle/fluid/inference/anakin/convert/fc.cc | 1 + paddle/fluid/inference/anakin/convert/fc.h | 1 + .../fluid/inference/anakin/convert/flatten.cc | 1 + .../fluid/inference/anakin/convert/flatten.h | 1 + .../inference/anakin/convert/im2sequence.cc | 1 + .../inference/anakin/convert/im2sequence.h | 1 + .../inference/anakin/convert/op_converter.h | 17 +++++----- .../fluid/inference/anakin/convert/pool2d.cc | 1 + .../fluid/inference/anakin/convert/pool2d.h | 1 + paddle/fluid/inference/anakin/convert/relu.cc | 1 + paddle/fluid/inference/anakin/convert/relu.h | 1 + .../fluid/inference/anakin/convert/reshape.cc | 1 + .../fluid/inference/anakin/convert/reshape.h | 1 + .../fluid/inference/anakin/convert/scale.cc | 1 + paddle/fluid/inference/anakin/convert/scale.h | 1 + .../fluid/inference/anakin/convert/softmax.cc | 11 ++++++- .../fluid/inference/anakin/convert/softmax.h | 1 + .../fluid/inference/anakin/convert/split.cc | 1 + paddle/fluid/inference/anakin/convert/split.h | 1 + paddle/fluid/inference/anakin/convert/sum.cc | 1 + paddle/fluid/inference/anakin/convert/sum.h | 1 + .../inference/anakin/convert/transpose.cc | 1 + .../inference/anakin/convert/transpose.h | 1 + .../inference/anakin/convert/ut_helper.h | 17 +++++++++- paddle/fluid/inference/anakin/engine.cc | 1 - paddle/fluid/inference/analysis/argument.h | 1 + .../ir_passes/anakin_subgraph_pass.cc | 16 +++++----- .../analysis/ir_passes/subgraph_util.cc | 30 +++++++++++++---- .../analysis/ir_passes/subgraph_util.h | 1 + .../ir_passes/tensorrt_subgraph_pass.cc | 19 ++++++----- paddle/fluid/inference/api/analysis_config.cc | 7 ++-- .../fluid/inference/api/analysis_predictor.cc | 1 + .../inference/api/paddle_analysis_config.h | 4 ++- .../fluid/operators/anakin/anakin_engine_op.h | 32 ------------------- 54 files changed, 136 insertions(+), 82 deletions(-) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index bc7fe5454f5..69da9b98198 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -201,7 +201,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} "-DCMAKE_GENERATOR_PLATFORM=x64") ENDIF() - SET(PROTOBUF_REPO "https://github.com/google/protobuf.git") + SET(PROTOBUF_REPO "https://github.com/protocolbuffers/protobuf.git") SET(PROTOBUF_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546") ExternalProject_Add( diff --git a/paddle/fluid/inference/anakin/convert/CMakeLists.txt b/paddle/fluid/inference/anakin/convert/CMakeLists.txt index 1e7f5ac799d..d3d1522dccf 100644 --- a/paddle/fluid/inference/anakin/convert/CMakeLists.txt +++ b/paddle/fluid/inference/anakin/convert/CMakeLists.txt @@ -1,5 +1,4 @@ -cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc - elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc DEPS anakin_engine framework_proto scope op_registry) +cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc DEPS anakin_engine framework_proto scope op_registry) cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op SERIAL) cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL) diff --git a/paddle/fluid/inference/anakin/convert/activation.cc b/paddle/fluid/inference/anakin/convert/activation.cc index c85b958d7b8..a9aeb19ffd5 100644 --- a/paddle/fluid/inference/anakin/convert/activation.cc +++ b/paddle/fluid/inference/anakin/convert/activation.cc @@ -34,6 +34,7 @@ ActivationOpConverter::ActivationOpConverter(const std::string &op_type) } void ActivationOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/activation.h b/paddle/fluid/inference/anakin/convert/activation.h index 49a4518bef4..592a3d5bd9d 100644 --- a/paddle/fluid/inference/anakin/convert/activation.h +++ b/paddle/fluid/inference/anakin/convert/activation.h @@ -27,6 +27,7 @@ class ActivationOpConverter : public AnakinOpConverter { explicit ActivationOpConverter(const std::string &op_type); virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~ActivationOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/batch_norm.cc b/paddle/fluid/inference/anakin/convert/batch_norm.cc index 94014802bdb..38cf6172027 100644 --- a/paddle/fluid/inference/anakin/convert/batch_norm.cc +++ b/paddle/fluid/inference/anakin/convert/batch_norm.cc @@ -29,6 +29,7 @@ namespace inference { namespace anakin { void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/batch_norm.h b/paddle/fluid/inference/anakin/convert/batch_norm.h index cee5c43ae76..c56735f15b4 100644 --- a/paddle/fluid/inference/anakin/convert/batch_norm.h +++ b/paddle/fluid/inference/anakin/convert/batch_norm.h @@ -25,6 +25,7 @@ class BatchNormOpConverter : public AnakinOpConverter { BatchNormOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~BatchNormOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/concat.cc b/paddle/fluid/inference/anakin/convert/concat.cc index e2d1111acbb..ae90c083690 100644 --- a/paddle/fluid/inference/anakin/convert/concat.cc +++ b/paddle/fluid/inference/anakin/convert/concat.cc @@ -29,6 +29,7 @@ namespace inference { namespace anakin { void ConcatOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/concat.h b/paddle/fluid/inference/anakin/convert/concat.h index 4ff2b6d85b7..974ff689bfe 100644 --- a/paddle/fluid/inference/anakin/convert/concat.h +++ b/paddle/fluid/inference/anakin/convert/concat.h @@ -25,6 +25,7 @@ class ConcatOpConverter : public AnakinOpConverter { ConcatOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~ConcatOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/conv2d.cc b/paddle/fluid/inference/anakin/convert/conv2d.cc index b99c6e71c4d..308f14604b9 100644 --- a/paddle/fluid/inference/anakin/convert/conv2d.cc +++ b/paddle/fluid/inference/anakin/convert/conv2d.cc @@ -28,6 +28,7 @@ namespace inference { namespace anakin { void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/conv2d.h b/paddle/fluid/inference/anakin/convert/conv2d.h index 75a30c10d48..dca5d19f468 100644 --- a/paddle/fluid/inference/anakin/convert/conv2d.h +++ b/paddle/fluid/inference/anakin/convert/conv2d.h @@ -25,6 +25,7 @@ class Conv2dOpConverter : public AnakinOpConverter { Conv2dOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~Conv2dOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc b/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc index 4d105430dd2..fa1ab0efeeb 100644 --- a/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc +++ b/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc @@ -28,6 +28,7 @@ namespace inference { namespace anakin { void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/conv2d_fusion.h b/paddle/fluid/inference/anakin/convert/conv2d_fusion.h index 07359b9cba0..0d9ef28183b 100644 --- a/paddle/fluid/inference/anakin/convert/conv2d_fusion.h +++ b/paddle/fluid/inference/anakin/convert/conv2d_fusion.h @@ -25,6 +25,7 @@ class Conv2dFusionOpConverter : public AnakinOpConverter { Conv2dFusionOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~Conv2dFusionOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/density_prior_box.cc b/paddle/fluid/inference/anakin/convert/density_prior_box.cc index 35e02919aa7..30796f75924 100644 --- a/paddle/fluid/inference/anakin/convert/density_prior_box.cc +++ b/paddle/fluid/inference/anakin/convert/density_prior_box.cc @@ -27,9 +27,9 @@ namespace paddle { namespace inference { namespace anakin { -void DensityPriorBoxOpConverter::operator()(const framework::proto::OpDesc& op, - const framework::Scope& scope, - bool test_mode) { +void DensityPriorBoxOpConverter::operator()( + const framework::proto::OpDesc& op, const framework::BlockDesc& block_desc, + const framework::Scope& scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); auto input_name = op_desc.Input("Input").front(); auto image_name = op_desc.Input("Image").front(); diff --git a/paddle/fluid/inference/anakin/convert/density_prior_box.h b/paddle/fluid/inference/anakin/convert/density_prior_box.h index 44265cbf2e9..bf9210711a0 100644 --- a/paddle/fluid/inference/anakin/convert/density_prior_box.h +++ b/paddle/fluid/inference/anakin/convert/density_prior_box.h @@ -27,6 +27,7 @@ class DensityPriorBoxOpConverter : public AnakinOpConverter { DensityPriorBoxOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~DensityPriorBoxOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/detection_out.cc b/paddle/fluid/inference/anakin/convert/detection_out.cc index 67636651017..262ad28a654 100644 --- a/paddle/fluid/inference/anakin/convert/detection_out.cc +++ b/paddle/fluid/inference/anakin/convert/detection_out.cc @@ -26,6 +26,7 @@ namespace inference { namespace anakin { void DetectionOutOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/detection_out.h b/paddle/fluid/inference/anakin/convert/detection_out.h index 5bf1c3ecbc8..ca78f10fdc2 100644 --- a/paddle/fluid/inference/anakin/convert/detection_out.h +++ b/paddle/fluid/inference/anakin/convert/detection_out.h @@ -27,6 +27,7 @@ class DetectionOutOpConverter : public AnakinOpConverter { DetectionOutOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~DetectionOutOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/dropout.cc b/paddle/fluid/inference/anakin/convert/dropout.cc index ed6d7f7561c..bc9b26dcf27 100644 --- a/paddle/fluid/inference/anakin/convert/dropout.cc +++ b/paddle/fluid/inference/anakin/convert/dropout.cc @@ -31,6 +31,7 @@ namespace inference { namespace anakin { void DropoutOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/dropout.h b/paddle/fluid/inference/anakin/convert/dropout.h index 2a0fb6e76ac..11412e217ef 100644 --- a/paddle/fluid/inference/anakin/convert/dropout.h +++ b/paddle/fluid/inference/anakin/convert/dropout.h @@ -25,6 +25,7 @@ class DropoutOpConverter : public AnakinOpConverter { DropoutOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~DropoutOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/elementwise.cc b/paddle/fluid/inference/anakin/convert/elementwise.cc index 55b12390baf..fe9a896d826 100644 --- a/paddle/fluid/inference/anakin/convert/elementwise.cc +++ b/paddle/fluid/inference/anakin/convert/elementwise.cc @@ -30,9 +30,9 @@ namespace paddle { namespace inference { namespace anakin { -void ElementwiseAddOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::Scope &scope, - bool test_mode) { +void ElementwiseAddOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); @@ -50,9 +50,9 @@ void ElementwiseAddOpConverter::operator()(const framework::proto::OpDesc &op, engine_->AddOpAttr>(op_name, "coeff", coeff); } -void ElementwiseMulOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::Scope &scope, - bool test_mode) { +void ElementwiseMulOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); diff --git a/paddle/fluid/inference/anakin/convert/elementwise.h b/paddle/fluid/inference/anakin/convert/elementwise.h index 47525e41daa..e4664493a9d 100644 --- a/paddle/fluid/inference/anakin/convert/elementwise.h +++ b/paddle/fluid/inference/anakin/convert/elementwise.h @@ -25,6 +25,7 @@ class ElementwiseAddOpConverter : public AnakinOpConverter { ElementwiseAddOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~ElementwiseAddOpConverter() {} @@ -37,6 +38,7 @@ class ElementwiseMulOpConverter : public AnakinOpConverter { ElementwiseMulOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~ElementwiseMulOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/fc.cc b/paddle/fluid/inference/anakin/convert/fc.cc index 2514eb1e093..a80a1a47e91 100644 --- a/paddle/fluid/inference/anakin/convert/fc.cc +++ b/paddle/fluid/inference/anakin/convert/fc.cc @@ -27,6 +27,7 @@ namespace inference { namespace anakin { void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/fc.h b/paddle/fluid/inference/anakin/convert/fc.h index 060c649b19e..fb461908b35 100644 --- a/paddle/fluid/inference/anakin/convert/fc.h +++ b/paddle/fluid/inference/anakin/convert/fc.h @@ -25,6 +25,7 @@ class FcBaseOpConverter : public AnakinOpConverter { FcBaseOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~FcBaseOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/flatten.cc b/paddle/fluid/inference/anakin/convert/flatten.cc index c6c372bbef8..7f5c1510960 100644 --- a/paddle/fluid/inference/anakin/convert/flatten.cc +++ b/paddle/fluid/inference/anakin/convert/flatten.cc @@ -26,6 +26,7 @@ namespace inference { namespace anakin { void FlattenOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/flatten.h b/paddle/fluid/inference/anakin/convert/flatten.h index 1ace76b1638..c9cc0006eb2 100644 --- a/paddle/fluid/inference/anakin/convert/flatten.h +++ b/paddle/fluid/inference/anakin/convert/flatten.h @@ -25,6 +25,7 @@ class FlattenOpConverter : public AnakinOpConverter { FlattenOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~FlattenOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/im2sequence.cc b/paddle/fluid/inference/anakin/convert/im2sequence.cc index 568d7e4746f..2cc330c3829 100644 --- a/paddle/fluid/inference/anakin/convert/im2sequence.cc +++ b/paddle/fluid/inference/anakin/convert/im2sequence.cc @@ -31,6 +31,7 @@ namespace inference { namespace anakin { void Im2SequenceConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/im2sequence.h b/paddle/fluid/inference/anakin/convert/im2sequence.h index 3003eac2c6f..714679c1d96 100644 --- a/paddle/fluid/inference/anakin/convert/im2sequence.h +++ b/paddle/fluid/inference/anakin/convert/im2sequence.h @@ -25,6 +25,7 @@ class Im2SequenceConverter : public AnakinOpConverter { Im2SequenceConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~Im2SequenceConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/op_converter.h b/paddle/fluid/inference/anakin/convert/op_converter.h index 45db4221747..1ca62658ef2 100644 --- a/paddle/fluid/inference/anakin/convert/op_converter.h +++ b/paddle/fluid/inference/anakin/convert/op_converter.h @@ -40,8 +40,10 @@ class AnakinOpConverter { AnakinOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) {} void ConvertOp(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const std::unordered_set ¶meters, const framework::Scope &scope, AnakinNvEngine *engine, bool test_mode = false) { @@ -58,16 +60,17 @@ class AnakinOpConverter { } PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", op_type); it->SetEngine(engine); - (*it)(op, scope, test_mode); + (*it)(op, block_desc, scope, test_mode); } - void ConvertBlock(const framework::proto::BlockDesc &block, + void ConvertBlock(framework::BlockDesc *block_desc, const std::unordered_set ¶meters, const framework::Scope &scope, AnakinNvEngine *engine) { std::unique_lock lock(mutex_); - for (auto i = 0; i < block.ops_size(); i++) { - auto &op = block.ops(i); - ConvertOp(op, parameters, scope, engine); + framework::proto::BlockDesc *block = block_desc->Proto(); + for (auto i = 0; i < block->ops_size(); i++) { + auto &op = block->ops(i); + ConvertOp(op, *block_desc, parameters, scope, engine); } } @@ -77,9 +80,7 @@ class AnakinOpConverter { const std::vector &inputs, const std::unordered_set ¶meters, const std::vector &outputs, AnakinNvEngine *engine) { - framework::proto::BlockDesc *block_proto = block_desc->Proto(); - ConvertBlock(*block_proto, parameters, *scope, engine); - + ConvertBlock(block_desc, parameters, *scope, engine); engine->Freeze(); // if the max_batch size int max_batch_size = engine->GetMaxBatchSize(); diff --git a/paddle/fluid/inference/anakin/convert/pool2d.cc b/paddle/fluid/inference/anakin/convert/pool2d.cc index 9b01d56a126..87eefe712a5 100644 --- a/paddle/fluid/inference/anakin/convert/pool2d.cc +++ b/paddle/fluid/inference/anakin/convert/pool2d.cc @@ -31,6 +31,7 @@ namespace inference { namespace anakin { void Pool2dOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/pool2d.h b/paddle/fluid/inference/anakin/convert/pool2d.h index 1931a03c7ac..ec28e48ac84 100644 --- a/paddle/fluid/inference/anakin/convert/pool2d.h +++ b/paddle/fluid/inference/anakin/convert/pool2d.h @@ -25,6 +25,7 @@ class Pool2dOpConverter : public AnakinOpConverter { Pool2dOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~Pool2dOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/relu.cc b/paddle/fluid/inference/anakin/convert/relu.cc index 2ce96db1804..993437d014b 100644 --- a/paddle/fluid/inference/anakin/convert/relu.cc +++ b/paddle/fluid/inference/anakin/convert/relu.cc @@ -26,6 +26,7 @@ namespace inference { namespace anakin { void ReluOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/relu.h b/paddle/fluid/inference/anakin/convert/relu.h index 54c4c2316eb..6ede5065119 100644 --- a/paddle/fluid/inference/anakin/convert/relu.h +++ b/paddle/fluid/inference/anakin/convert/relu.h @@ -27,6 +27,7 @@ class ReluOpConverter : public AnakinOpConverter { ReluOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~ReluOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/reshape.cc b/paddle/fluid/inference/anakin/convert/reshape.cc index eee36d2f37e..17e0a1acb5f 100644 --- a/paddle/fluid/inference/anakin/convert/reshape.cc +++ b/paddle/fluid/inference/anakin/convert/reshape.cc @@ -26,6 +26,7 @@ namespace inference { namespace anakin { void ReshapeOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/reshape.h b/paddle/fluid/inference/anakin/convert/reshape.h index 970e8ce5572..9ce2ea2a4f3 100644 --- a/paddle/fluid/inference/anakin/convert/reshape.h +++ b/paddle/fluid/inference/anakin/convert/reshape.h @@ -25,6 +25,7 @@ class ReshapeOpConverter : public AnakinOpConverter { ReshapeOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~ReshapeOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/scale.cc b/paddle/fluid/inference/anakin/convert/scale.cc index 6f3aa8c5d11..dd68af4f79a 100644 --- a/paddle/fluid/inference/anakin/convert/scale.cc +++ b/paddle/fluid/inference/anakin/convert/scale.cc @@ -26,6 +26,7 @@ namespace inference { namespace anakin { void ScaleOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/scale.h b/paddle/fluid/inference/anakin/convert/scale.h index b858e3c5124..ba3bcdd2149 100644 --- a/paddle/fluid/inference/anakin/convert/scale.h +++ b/paddle/fluid/inference/anakin/convert/scale.h @@ -27,6 +27,7 @@ class ScaleOpConverter : public AnakinOpConverter { ScaleOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~ScaleOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/softmax.cc b/paddle/fluid/inference/anakin/convert/softmax.cc index d5cd8908ebf..a6c1e971b16 100644 --- a/paddle/fluid/inference/anakin/convert/softmax.cc +++ b/paddle/fluid/inference/anakin/convert/softmax.cc @@ -24,6 +24,7 @@ namespace inference { namespace anakin { void SoftMaxOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); @@ -32,8 +33,16 @@ void SoftMaxOpConverter::operator()(const framework::proto::OpDesc &op, auto input = op_desc.Input("X").front(); auto output = op_desc.Output("Out").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + + auto input_var_desc = block_desc.FindVar(input); + PADDLE_ENFORCE(input_var_desc, + "Cant find %s variable When runing Anakin Softmax converter.", + input); + auto input_shape_in_fluid = input_var_desc->GetShape(); + size_t input_dims = input_shape_in_fluid.size(); + engine_->AddOp(op_name, "Softmax", {input}, {output}); - engine_->AddOpAttr(op_name, "axis", 2); + engine_->AddOpAttr(op_name, "axis", static_cast(input_dims - 1)); } } // namespace anakin diff --git a/paddle/fluid/inference/anakin/convert/softmax.h b/paddle/fluid/inference/anakin/convert/softmax.h index 0508da0c6fe..a16356d5bb6 100644 --- a/paddle/fluid/inference/anakin/convert/softmax.h +++ b/paddle/fluid/inference/anakin/convert/softmax.h @@ -25,6 +25,7 @@ class SoftMaxOpConverter : public AnakinOpConverter { SoftMaxOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~SoftMaxOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/split.cc b/paddle/fluid/inference/anakin/convert/split.cc index b8464a766d2..ec582c18126 100644 --- a/paddle/fluid/inference/anakin/convert/split.cc +++ b/paddle/fluid/inference/anakin/convert/split.cc @@ -30,6 +30,7 @@ namespace inference { namespace anakin { void SplitOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/split.h b/paddle/fluid/inference/anakin/convert/split.h index a4c6a14e621..184112e589e 100644 --- a/paddle/fluid/inference/anakin/convert/split.h +++ b/paddle/fluid/inference/anakin/convert/split.h @@ -25,6 +25,7 @@ class SplitOpConverter : public AnakinOpConverter { SplitOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~SplitOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/sum.cc b/paddle/fluid/inference/anakin/convert/sum.cc index df9104cf463..2a4178e2371 100644 --- a/paddle/fluid/inference/anakin/convert/sum.cc +++ b/paddle/fluid/inference/anakin/convert/sum.cc @@ -31,6 +31,7 @@ namespace inference { namespace anakin { void SumOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 2); diff --git a/paddle/fluid/inference/anakin/convert/sum.h b/paddle/fluid/inference/anakin/convert/sum.h index ddecc4b3bcb..b5d402b77fc 100644 --- a/paddle/fluid/inference/anakin/convert/sum.h +++ b/paddle/fluid/inference/anakin/convert/sum.h @@ -25,6 +25,7 @@ class SumOpConverter : public AnakinOpConverter { SumOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~SumOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/transpose.cc b/paddle/fluid/inference/anakin/convert/transpose.cc index 6a887401034..f35372fe5c3 100644 --- a/paddle/fluid/inference/anakin/convert/transpose.cc +++ b/paddle/fluid/inference/anakin/convert/transpose.cc @@ -28,6 +28,7 @@ namespace inference { namespace anakin { void TransposeOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/transpose.h b/paddle/fluid/inference/anakin/convert/transpose.h index 62d26b6a9cc..bacbf152bc1 100644 --- a/paddle/fluid/inference/anakin/convert/transpose.h +++ b/paddle/fluid/inference/anakin/convert/transpose.h @@ -25,6 +25,7 @@ class TransposeOpConverter : public AnakinOpConverter { TransposeOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~TransposeOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/ut_helper.h b/paddle/fluid/inference/anakin/convert/ut_helper.h index e0371d95347..029aff6704f 100644 --- a/paddle/fluid/inference/anakin/convert/ut_helper.h +++ b/paddle/fluid/inference/anakin/convert/ut_helper.h @@ -22,6 +22,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" @@ -112,6 +113,17 @@ class AnakinConvertValidation { auto* x_tensor = x->GetMutable(); x_tensor->Resize(framework::make_ddim(dim_vec)); RandomizeTensor(x_tensor, place_, ctx); + + std::vector dim_vec_int64; + for (auto& ele : dim_vec) { + dim_vec_int64.push_back(static_cast(ele)); + } + + // Add var_desc to block_desc + auto* block_desc = program_desc_.MutableBlock(framework::kRootBlockIndex); + + auto* var_desc = block_desc->Var(name); + var_desc->SetShape(dim_vec_int64); } void SetOp(const framework::proto::OpDesc& desc) { @@ -119,8 +131,10 @@ class AnakinConvertValidation { op_desc_.reset(new framework::OpDesc(desc, nullptr)); // should init anakin engine here. + auto& block_desc = program_desc_.Block(framework::kRootBlockIndex); Singleton::Global().ConvertOp( - desc, parameters_, *scope_, engine_.get(), true /*test_mode*/); + desc, block_desc, parameters_, *scope_, engine_.get(), + true /*test_mode*/); engine_->Freeze(); std::map> temp_max_input_shape; @@ -194,6 +208,7 @@ class AnakinConvertValidation { cudaStream_t stream_; std::unique_ptr op_; std::unique_ptr op_desc_; + framework::ProgramDesc program_desc_; const std::unordered_set& parameters_; framework::Scope* scope_; platform::CUDAPlace place_; diff --git a/paddle/fluid/inference/anakin/engine.cc b/paddle/fluid/inference/anakin/engine.cc index ccf78ad7e56..ba044c9401a 100644 --- a/paddle/fluid/inference/anakin/engine.cc +++ b/paddle/fluid/inference/anakin/engine.cc @@ -91,7 +91,6 @@ void AnakinEngine::Execute( " or equal to the real input shape, Please set the max " "input shape using EnableAnakinEngine"); anakin_input->reshape(fluid_input_shape); - ::anakin::saber::Tensor tmp_anakin_tensor(data, TargetT(), 0, fluid_input_shape); anakin_input->copy_from(tmp_anakin_tensor); diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 29f16943e0c..a736ca393cc 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -168,6 +168,7 @@ struct Argument { DECL_ARGUMENT_FIELD(anakin_max_input_shape, AnakinMaxInputShape, anakin_max_shape_t); DECL_ARGUMENT_FIELD(anakin_max_batch_size, AnakinMaxBatchSize, int); + DECL_ARGUMENT_FIELD(anakin_min_subgraph_size, AnakinMinSubgraphSize, int); DECL_ARGUMENT_FIELD(use_anakin, UseAnakin, bool); // Memory optimized related. diff --git a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc index 38612d5cc3d..b8d8b6fed8c 100644 --- a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc @@ -151,13 +151,20 @@ void AnakinSubgraphPass::CreateAnakinOp( op_desc->SetType("anakin_engine"); std::unordered_map output_name_map; + std::unordered_map graph_var_map; + + for (framework::ir::Node *node : graph->Nodes()) { + if (node->IsVar() && node->Var()) { + graph_var_map[node->Name()] = node; + } + } auto &subgraph_nodes = *Agent(node).subgraph(); // The following procedure is used to rename all the intermediate // variables and the output variables of the subgraph. RenameAndGetOutputs(subgraph_nodes, &block_desc, input_names_with_id, &output_names_with_id, &output_names, &output_name_map, - false); + graph_var_map, false); // When anakin engine runs at the end of the operation, // output_mapping help us copy the data from the renamed ITensor @@ -168,13 +175,6 @@ void AnakinSubgraphPass::CreateAnakinOp( output_mapping.push_back(output_name_map[name]); } - auto *vars = block_desc.Proto()->mutable_vars(); - for (framework::ir::Node *node : graph->Nodes()) { - if (node->IsVar() && node->Var()) { - *vars->Add() = *node->Var()->Proto(); - } - } - PADDLE_ENFORCE(!block_desc.Proto()->vars().empty(), "the block has no var-desc"); PADDLE_ENFORCE(!output_mapping.empty()); diff --git a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc index a17ee1b707a..33b6d0980b7 100644 --- a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc +++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc @@ -60,6 +60,7 @@ void RenameAndGetOutputs( std::set *output_names_with_id, std::set *output_names, std::unordered_map *output_name_map, + const std::unordered_map &graph_var_map, bool is_trt) { //// In the normal case, the paddle-trt exists bug when runing the googlenet. // When there are more than two convolutions of 1 * 1 with the same input, the @@ -69,6 +70,13 @@ void RenameAndGetOutputs( std::unordered_map same_hierarchy_conv2d_num_map; + auto set_var_shape = [&](const std::string &arg_value) { + auto arg_var_node = graph_var_map.find(arg_value); + PADDLE_ENFORCE(arg_var_node != graph_var_map.end()); + auto *var_t = block_desc->Var(arg_value); + var_t->SetShape(arg_var_node->second->Var()->GetShape()); + }; + for (size_t index = 0; index < block_desc->OpSize(); ++index) { framework::proto::OpDesc *op = block_desc->Op(index)->Proto(); framework::OpDesc op_desc(*op, nullptr); @@ -87,14 +95,20 @@ void RenameAndGetOutputs( auto *in_var = op->mutable_inputs(i); std::vector replaced_names; for (int k = 0; k < in_var->arguments_size(); k++) { // all the arguments - std::string arg_value = in_var->arguments(k); - std::string arg_value_with_id = + const std::string arg_value = in_var->arguments(k); + const std::string arg_value_with_id = arg_value + std::to_string(var2id[arg_value]); + + bool is_var_in_graph = graph_var_map.count(arg_value); + if (input_names_with_id.count(arg_value_with_id)) { replaced_names.push_back(arg_value); } else { replaced_names.push_back(arg_value_with_id); } + if (is_var_in_graph) { + set_var_shape(arg_value); + } } in_var->clear_arguments(); for (size_t k = 0; k < replaced_names.size(); k++) { @@ -105,7 +119,6 @@ void RenameAndGetOutputs( for (auto out_var : correspond_node->outputs) { var2id[out_var->Name()] = out_var->id(); } - if (op_desc.Type() == "conv2d" && is_trt) { auto input_var_name = op_desc.Input("Input").front(); auto filter_var_name = op_desc.Input("Filter").front(); @@ -125,15 +138,20 @@ void RenameAndGetOutputs( same_hierarchy_conv2d_num_map[input_var_name] += 1; } } - // rename for the output variables of op inside subgraph for (int i = 0; i < op->outputs_size(); i++) { framework::proto::OpDesc_Var *out_var = op->mutable_outputs(i); std::vector replaced_names; for (int k = 0; k < out_var->arguments_size(); k++) { - std::string arg_value = out_var->arguments(k); - std::string arg_value_with_id = + const std::string arg_value = out_var->arguments(k); + const std::string arg_value_with_id = arg_value + std::to_string(var2id[arg_value]); + + bool is_var_in_graph = graph_var_map.count(arg_value); + if (is_var_in_graph) { + set_var_shape(arg_value); + } + if (output_names_with_id->count(arg_value_with_id)) { (*output_name_map)[arg_value] = arg_value_with_id; } diff --git a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h index 3cf21bf5f42..bb445027821 100644 --- a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h +++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h @@ -42,6 +42,7 @@ void RenameAndGetOutputs( std::set *output_names_with_id, std::set *output_names, std::unordered_map *output_name_map, + const std::unordered_map &graph_var_map, bool is_trt = true); } // namespace analysis diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index 019098a5dd0..67650a352d8 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -142,6 +142,13 @@ void TensorRtSubgraphPass::CreateTensorRTOp( } std::unordered_map output_name_map; + std::unordered_map graph_var_map; + + for (framework::ir::Node *node : graph->Nodes()) { + if (node->IsVar() && node->Var()) { + graph_var_map[node->Name()] = node; + } + } auto &subgraph_nodes = *Agent(node).subgraph(); // The following procedure is used to rename all the intermediate @@ -157,7 +164,8 @@ void TensorRtSubgraphPass::CreateTensorRTOp( // So we have to rename the variable in the subgraph to make sure // it is either an OP's input or an OP's output. RenameAndGetOutputs(subgraph_nodes, &block_desc, input_names_with_id, - &output_names_with_id, &output_names, &output_name_map); + &output_names_with_id, &output_names, &output_name_map, + graph_var_map); // When tensorrt engine runs at the end of the operation, // output_mapping help us copy the data from the renamed ITensor @@ -168,14 +176,6 @@ void TensorRtSubgraphPass::CreateTensorRTOp( output_mapping.push_back(output_name_map[name]); } PADDLE_ENFORCE(!output_mapping.empty()); - - auto *vars = block_desc.Proto()->mutable_vars(); - for (framework::ir::Node *node : graph->Nodes()) { - if (node->IsVar() && node->Var()) { - *vars->Add() = *node->Var()->Proto(); - } - } - PADDLE_ENFORCE(!block_desc.Proto()->vars().empty(), "the block has no var-desc"); @@ -213,7 +213,6 @@ void TensorRtSubgraphPass::CreateTensorRTOp( SetAttr(op_desc->Proto(), "enable_int8", enable_int8); SetAttr(op_desc->Proto(), "engine_key", engine_key); std::string trt_engine_serialized_data = ""; - SetAttr(op_desc->Proto(), "engine_serialized_data", trt_engine_serialized_data); diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index aee94e12340..e5036d94019 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -115,6 +115,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(use_anakin_); CP_MEMBER(anakin_max_batchsize_); CP_MEMBER(anakin_max_input_shape_); + CP_MEMBER(anakin_min_subgraph_size_); // Ir related. CP_MEMBER(enable_ir_optim_); @@ -322,6 +323,7 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << specify_input_name_; ss << cpu_math_library_num_threads_; ss << use_anakin_; + ss << anakin_min_subgraph_size_; return ss.str(); } @@ -393,10 +395,11 @@ void AnalysisConfig::SwitchIrDebug(int x) { Update(); } void AnalysisConfig::EnableAnakinEngine( - int max_batch_size, - std::map> max_input_shape) { + int max_batch_size, std::map> max_input_shape, + int min_subgraph_size) { anakin_max_batchsize_ = max_batch_size; anakin_max_input_shape_ = max_input_shape; + anakin_min_subgraph_size_ = min_subgraph_size; use_anakin_ = true; Update(); } diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 7d8e9fe8bfa..6942604b072 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -385,6 +385,7 @@ void AnalysisPredictor::PrepareArgument() { if (config_.use_gpu() && config_.anakin_engine_enabled()) { argument_.SetAnakinMaxBatchSize(config_.anakin_max_batchsize_); argument_.SetAnakinMaxInputShape(config_.anakin_max_input_shape_); + argument_.SetAnakinMinSubgraphSize(config_.anakin_min_subgraph_size_); LOG(INFO) << "Anakin subgraph engine is enabled"; } diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 2ad4add2945..c67c4b5bd0b 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -151,7 +151,8 @@ struct AnalysisConfig { */ void EnableAnakinEngine( int max_batch_size = 1, - std::map> max_input_shape = {}); + std::map> max_input_shape = {}, + int min_subgraph_size = 6); /** A boolean state indicating whether the Anakin sub-graph engine is used. */ @@ -288,6 +289,7 @@ struct AnalysisConfig { bool use_anakin_{false}; int anakin_max_batchsize_; + int anakin_min_subgraph_size_{6}; std::map> anakin_max_input_shape_; std::map engine_opt_info_; diff --git a/paddle/fluid/operators/anakin/anakin_engine_op.h b/paddle/fluid/operators/anakin/anakin_engine_op.h index 9d5b4f6f54c..e4feb14b227 100644 --- a/paddle/fluid/operators/anakin/anakin_engine_op.h +++ b/paddle/fluid/operators/anakin/anakin_engine_op.h @@ -120,40 +120,8 @@ class AnakinEngineOp : public framework::OperatorBase { inference::Singleton::Global() .Get(engine_key_); } - return anakin_engine_; } - - void Prepare(const framework::Scope &scope, const platform::Place &dev_place, - AnakinNvEngineT *engine) const { - LOG(INFO) << "Prepare Anakin engine (Optimize model structure, Select OP " - "kernel etc). This process may cost a lot of time."; - framework::proto::BlockDesc block_desc; - block_desc.ParseFromString(Attr("subgraph")); - - std::vector output_maps = - Attr>("output_name_mapping"); - - inference::Singleton::Global() - .ConvertBlock(block_desc, param_names_, scope, engine); - engine->Freeze(); - for (const auto &x : Inputs("Xs")) { - if (param_names_.count(x)) continue; - auto &t = - inference::analysis::GetFromScope(scope, x); - auto t_shape = framework::vectorize2int(t.dims()); - // all input shape should be 4 dims - if (t_shape.size() == 2) { - t_shape.push_back(1); - t_shape.push_back(1); - } - engine->SetInputShape(x, t_shape); - } - - engine->Optimize(); - - engine->InitGraph(); - } }; } // namespace operators -- GitLab