diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index bc7fe5454f5883108e43b4ca47920995dc13a1ff..69da9b98198de358348621ecdb444f2f81c7757f 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -201,7 +201,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} "-DCMAKE_GENERATOR_PLATFORM=x64") ENDIF() - SET(PROTOBUF_REPO "https://github.com/google/protobuf.git") + SET(PROTOBUF_REPO "https://github.com/protocolbuffers/protobuf.git") SET(PROTOBUF_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546") ExternalProject_Add( diff --git a/paddle/fluid/inference/anakin/convert/CMakeLists.txt b/paddle/fluid/inference/anakin/convert/CMakeLists.txt index 1e7f5ac799de0d7a1debec0529d262f021bba790..d3d1522dccf0d8af4f26eec4e0c57257279880e0 100644 --- a/paddle/fluid/inference/anakin/convert/CMakeLists.txt +++ b/paddle/fluid/inference/anakin/convert/CMakeLists.txt @@ -1,5 +1,4 @@ -cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc - elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc DEPS anakin_engine framework_proto scope op_registry) +cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc DEPS anakin_engine framework_proto scope op_registry) cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op SERIAL) cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL) diff --git a/paddle/fluid/inference/anakin/convert/activation.cc b/paddle/fluid/inference/anakin/convert/activation.cc index c85b958d7b85cb3e21df8714c89eee10b9b3fecc..a9aeb19ffd5f04c03df593e8f48976e7fa6155ab 100644 --- a/paddle/fluid/inference/anakin/convert/activation.cc +++ b/paddle/fluid/inference/anakin/convert/activation.cc @@ -34,6 +34,7 @@ ActivationOpConverter::ActivationOpConverter(const std::string &op_type) } void ActivationOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/activation.h b/paddle/fluid/inference/anakin/convert/activation.h index 49a4518bef418491a7fbc0bcde403bf047f774bd..592a3d5bd9d1272aae8a13d0d0acc77f8990c6b3 100644 --- a/paddle/fluid/inference/anakin/convert/activation.h +++ b/paddle/fluid/inference/anakin/convert/activation.h @@ -27,6 +27,7 @@ class ActivationOpConverter : public AnakinOpConverter { explicit ActivationOpConverter(const std::string &op_type); virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~ActivationOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/batch_norm.cc b/paddle/fluid/inference/anakin/convert/batch_norm.cc index 94014802bdbe1792e9eaba28d7134624dd3edc90..38cf6172027b3b200a378a61b6d5b395cc571de7 100644 --- a/paddle/fluid/inference/anakin/convert/batch_norm.cc +++ b/paddle/fluid/inference/anakin/convert/batch_norm.cc @@ -29,6 +29,7 @@ namespace inference { namespace anakin { void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/batch_norm.h b/paddle/fluid/inference/anakin/convert/batch_norm.h index cee5c43ae76bf28284118380ca4c861d5cbedd1c..c56735f15b435b46cf9f623bd284b5731a36c327 100644 --- a/paddle/fluid/inference/anakin/convert/batch_norm.h +++ b/paddle/fluid/inference/anakin/convert/batch_norm.h @@ -25,6 +25,7 @@ class BatchNormOpConverter : public AnakinOpConverter { BatchNormOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~BatchNormOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/concat.cc b/paddle/fluid/inference/anakin/convert/concat.cc index e2d1111acbb60690167530a25aeaf59858b71987..ae90c083690da6e108a05460de68be2eb0cd9b48 100644 --- a/paddle/fluid/inference/anakin/convert/concat.cc +++ b/paddle/fluid/inference/anakin/convert/concat.cc @@ -29,6 +29,7 @@ namespace inference { namespace anakin { void ConcatOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/concat.h b/paddle/fluid/inference/anakin/convert/concat.h index 4ff2b6d85b758efc7529c5034a34e094ee06cccb..974ff689bfef681f8993d5dbb0dbbbdde91f33bd 100644 --- a/paddle/fluid/inference/anakin/convert/concat.h +++ b/paddle/fluid/inference/anakin/convert/concat.h @@ -25,6 +25,7 @@ class ConcatOpConverter : public AnakinOpConverter { ConcatOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~ConcatOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/conv2d.cc b/paddle/fluid/inference/anakin/convert/conv2d.cc index b99c6e71c4dfd2b567d85904f57ebecf0ed9a1cc..308f14604b9c83f2278499359328109d31f9ff17 100644 --- a/paddle/fluid/inference/anakin/convert/conv2d.cc +++ b/paddle/fluid/inference/anakin/convert/conv2d.cc @@ -28,6 +28,7 @@ namespace inference { namespace anakin { void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/conv2d.h b/paddle/fluid/inference/anakin/convert/conv2d.h index 75a30c10d481762fe5579ccb4d79feeba73dc98a..dca5d19f468ac6d6e2f4bcda8ecaa3922d80e6b1 100644 --- a/paddle/fluid/inference/anakin/convert/conv2d.h +++ b/paddle/fluid/inference/anakin/convert/conv2d.h @@ -25,6 +25,7 @@ class Conv2dOpConverter : public AnakinOpConverter { Conv2dOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~Conv2dOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc b/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc index 4d105430dd298076fa8aa4c1925329c3a0e356a1..fa1ab0efeeb5cacd112ca1b644735eaaf49e55f8 100644 --- a/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc +++ b/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc @@ -28,6 +28,7 @@ namespace inference { namespace anakin { void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/conv2d_fusion.h b/paddle/fluid/inference/anakin/convert/conv2d_fusion.h index 07359b9cba05bf7c885eb38d64816bdb718a6aba..0d9ef28183b309c4b50714fcbe64e24c5d9dfbaa 100644 --- a/paddle/fluid/inference/anakin/convert/conv2d_fusion.h +++ b/paddle/fluid/inference/anakin/convert/conv2d_fusion.h @@ -25,6 +25,7 @@ class Conv2dFusionOpConverter : public AnakinOpConverter { Conv2dFusionOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~Conv2dFusionOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/density_prior_box.cc b/paddle/fluid/inference/anakin/convert/density_prior_box.cc index 35e02919aa70c211da5d4a5785a9833747d99ce2..30796f7592427191a4396a154be62838b7e666ad 100644 --- a/paddle/fluid/inference/anakin/convert/density_prior_box.cc +++ b/paddle/fluid/inference/anakin/convert/density_prior_box.cc @@ -27,9 +27,9 @@ namespace paddle { namespace inference { namespace anakin { -void DensityPriorBoxOpConverter::operator()(const framework::proto::OpDesc& op, - const framework::Scope& scope, - bool test_mode) { +void DensityPriorBoxOpConverter::operator()( + const framework::proto::OpDesc& op, const framework::BlockDesc& block_desc, + const framework::Scope& scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); auto input_name = op_desc.Input("Input").front(); auto image_name = op_desc.Input("Image").front(); diff --git a/paddle/fluid/inference/anakin/convert/density_prior_box.h b/paddle/fluid/inference/anakin/convert/density_prior_box.h index 44265cbf2e968e8821bc1a9ae3225c9b7d405235..bf9210711a0f69595c241803cd40d42770ccd5d7 100644 --- a/paddle/fluid/inference/anakin/convert/density_prior_box.h +++ b/paddle/fluid/inference/anakin/convert/density_prior_box.h @@ -27,6 +27,7 @@ class DensityPriorBoxOpConverter : public AnakinOpConverter { DensityPriorBoxOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~DensityPriorBoxOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/detection_out.cc b/paddle/fluid/inference/anakin/convert/detection_out.cc index 67636651017cfb18967cf8dc76d4f4a552fbd021..262ad28a654609cddde979d387621bb0c7c1a7f9 100644 --- a/paddle/fluid/inference/anakin/convert/detection_out.cc +++ b/paddle/fluid/inference/anakin/convert/detection_out.cc @@ -26,6 +26,7 @@ namespace inference { namespace anakin { void DetectionOutOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/detection_out.h b/paddle/fluid/inference/anakin/convert/detection_out.h index 5bf1c3ecbc89795d075301a2fd568312236bd874..ca78f10fdc2a7c7064ae0399e7f1afff1383ce67 100644 --- a/paddle/fluid/inference/anakin/convert/detection_out.h +++ b/paddle/fluid/inference/anakin/convert/detection_out.h @@ -27,6 +27,7 @@ class DetectionOutOpConverter : public AnakinOpConverter { DetectionOutOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~DetectionOutOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/dropout.cc b/paddle/fluid/inference/anakin/convert/dropout.cc index ed6d7f7561cb78666855146864b33254026926ef..bc9b26dcf2733369e558cde2954e9d0caaba86b0 100644 --- a/paddle/fluid/inference/anakin/convert/dropout.cc +++ b/paddle/fluid/inference/anakin/convert/dropout.cc @@ -31,6 +31,7 @@ namespace inference { namespace anakin { void DropoutOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/dropout.h b/paddle/fluid/inference/anakin/convert/dropout.h index 2a0fb6e76ac8354d884f9d815a4df785248e6475..11412e217ef5fa77bd22d7530d88be1347f2616f 100644 --- a/paddle/fluid/inference/anakin/convert/dropout.h +++ b/paddle/fluid/inference/anakin/convert/dropout.h @@ -25,6 +25,7 @@ class DropoutOpConverter : public AnakinOpConverter { DropoutOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~DropoutOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/elementwise.cc b/paddle/fluid/inference/anakin/convert/elementwise.cc index 55b12390baf90a9365fd4d197b19a3c5cd675afd..fe9a896d8266e06250b712be0c75290c039e9a08 100644 --- a/paddle/fluid/inference/anakin/convert/elementwise.cc +++ b/paddle/fluid/inference/anakin/convert/elementwise.cc @@ -30,9 +30,9 @@ namespace paddle { namespace inference { namespace anakin { -void ElementwiseAddOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::Scope &scope, - bool test_mode) { +void ElementwiseAddOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); @@ -50,9 +50,9 @@ void ElementwiseAddOpConverter::operator()(const framework::proto::OpDesc &op, engine_->AddOpAttr>(op_name, "coeff", coeff); } -void ElementwiseMulOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::Scope &scope, - bool test_mode) { +void ElementwiseMulOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); diff --git a/paddle/fluid/inference/anakin/convert/elementwise.h b/paddle/fluid/inference/anakin/convert/elementwise.h index 47525e41daafcbca0c7c86bad44066f18a3ac79c..e4664493a9d3ce1ed9a0c79a05fb466c4e781b3e 100644 --- a/paddle/fluid/inference/anakin/convert/elementwise.h +++ b/paddle/fluid/inference/anakin/convert/elementwise.h @@ -25,6 +25,7 @@ class ElementwiseAddOpConverter : public AnakinOpConverter { ElementwiseAddOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~ElementwiseAddOpConverter() {} @@ -37,6 +38,7 @@ class ElementwiseMulOpConverter : public AnakinOpConverter { ElementwiseMulOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~ElementwiseMulOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/fc.cc b/paddle/fluid/inference/anakin/convert/fc.cc index 2514eb1e093b4e05b7e6b2814cfd8185b3aede6c..a80a1a47e91aa085935b5febb3858e028f396091 100644 --- a/paddle/fluid/inference/anakin/convert/fc.cc +++ b/paddle/fluid/inference/anakin/convert/fc.cc @@ -27,6 +27,7 @@ namespace inference { namespace anakin { void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/fc.h b/paddle/fluid/inference/anakin/convert/fc.h index 060c649b19ef335a9e926eb205ec691a2a188fe1..fb461908b35e0111065e1a46c52306c64ace7d7c 100644 --- a/paddle/fluid/inference/anakin/convert/fc.h +++ b/paddle/fluid/inference/anakin/convert/fc.h @@ -25,6 +25,7 @@ class FcBaseOpConverter : public AnakinOpConverter { FcBaseOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~FcBaseOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/flatten.cc b/paddle/fluid/inference/anakin/convert/flatten.cc index c6c372bbef87de7f38c1f66a21c170cabac8c0ed..7f5c1510960d1014c33bd565939812fe7c7dfc06 100644 --- a/paddle/fluid/inference/anakin/convert/flatten.cc +++ b/paddle/fluid/inference/anakin/convert/flatten.cc @@ -26,6 +26,7 @@ namespace inference { namespace anakin { void FlattenOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/flatten.h b/paddle/fluid/inference/anakin/convert/flatten.h index 1ace76b16381980a9eaec12806e0bc94d7b1fb85..c9cc0006eb2448917bbcc0952f5e2cae72b73de1 100644 --- a/paddle/fluid/inference/anakin/convert/flatten.h +++ b/paddle/fluid/inference/anakin/convert/flatten.h @@ -25,6 +25,7 @@ class FlattenOpConverter : public AnakinOpConverter { FlattenOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~FlattenOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/im2sequence.cc b/paddle/fluid/inference/anakin/convert/im2sequence.cc index 568d7e4746f11b13ce8ea9e5a47a1b43d1c12693..2cc330c3829f6033229748523c3df750b951626f 100644 --- a/paddle/fluid/inference/anakin/convert/im2sequence.cc +++ b/paddle/fluid/inference/anakin/convert/im2sequence.cc @@ -31,6 +31,7 @@ namespace inference { namespace anakin { void Im2SequenceConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/im2sequence.h b/paddle/fluid/inference/anakin/convert/im2sequence.h index 3003eac2c6f416663c3e7c4c3e297b6347edfb47..714679c1d9601136f1f54287bb58d611e852f3fe 100644 --- a/paddle/fluid/inference/anakin/convert/im2sequence.h +++ b/paddle/fluid/inference/anakin/convert/im2sequence.h @@ -25,6 +25,7 @@ class Im2SequenceConverter : public AnakinOpConverter { Im2SequenceConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~Im2SequenceConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/op_converter.h b/paddle/fluid/inference/anakin/convert/op_converter.h index 45db4221747128cd7f6d26c8830fa75ebf81ac72..1ca62658ef26ffebcc068c91ece7d9bbed0a348f 100644 --- a/paddle/fluid/inference/anakin/convert/op_converter.h +++ b/paddle/fluid/inference/anakin/convert/op_converter.h @@ -40,8 +40,10 @@ class AnakinOpConverter { AnakinOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) {} void ConvertOp(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const std::unordered_set ¶meters, const framework::Scope &scope, AnakinNvEngine *engine, bool test_mode = false) { @@ -58,16 +60,17 @@ class AnakinOpConverter { } PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", op_type); it->SetEngine(engine); - (*it)(op, scope, test_mode); + (*it)(op, block_desc, scope, test_mode); } - void ConvertBlock(const framework::proto::BlockDesc &block, + void ConvertBlock(framework::BlockDesc *block_desc, const std::unordered_set ¶meters, const framework::Scope &scope, AnakinNvEngine *engine) { std::unique_lock lock(mutex_); - for (auto i = 0; i < block.ops_size(); i++) { - auto &op = block.ops(i); - ConvertOp(op, parameters, scope, engine); + framework::proto::BlockDesc *block = block_desc->Proto(); + for (auto i = 0; i < block->ops_size(); i++) { + auto &op = block->ops(i); + ConvertOp(op, *block_desc, parameters, scope, engine); } } @@ -77,9 +80,7 @@ class AnakinOpConverter { const std::vector &inputs, const std::unordered_set ¶meters, const std::vector &outputs, AnakinNvEngine *engine) { - framework::proto::BlockDesc *block_proto = block_desc->Proto(); - ConvertBlock(*block_proto, parameters, *scope, engine); - + ConvertBlock(block_desc, parameters, *scope, engine); engine->Freeze(); // if the max_batch size int max_batch_size = engine->GetMaxBatchSize(); diff --git a/paddle/fluid/inference/anakin/convert/pool2d.cc b/paddle/fluid/inference/anakin/convert/pool2d.cc index 9b01d56a126b2ebc194f5b5bb5b2f52c298a316e..87eefe712a5ad2acd8c9b5abe521c832ad2c1ef2 100644 --- a/paddle/fluid/inference/anakin/convert/pool2d.cc +++ b/paddle/fluid/inference/anakin/convert/pool2d.cc @@ -31,6 +31,7 @@ namespace inference { namespace anakin { void Pool2dOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/pool2d.h b/paddle/fluid/inference/anakin/convert/pool2d.h index 1931a03c7ac236b4e57236cd1eb2947110f279a8..ec28e48ac848eff1d37c39063725624bf7d65723 100644 --- a/paddle/fluid/inference/anakin/convert/pool2d.h +++ b/paddle/fluid/inference/anakin/convert/pool2d.h @@ -25,6 +25,7 @@ class Pool2dOpConverter : public AnakinOpConverter { Pool2dOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~Pool2dOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/relu.cc b/paddle/fluid/inference/anakin/convert/relu.cc index 2ce96db1804a3d6d6d1afac79e4e1fc55ed4c35d..993437d014b1f951dac94da7a3179b4bcb63466d 100644 --- a/paddle/fluid/inference/anakin/convert/relu.cc +++ b/paddle/fluid/inference/anakin/convert/relu.cc @@ -26,6 +26,7 @@ namespace inference { namespace anakin { void ReluOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/relu.h b/paddle/fluid/inference/anakin/convert/relu.h index 54c4c2316eb32ef70696a2477211008e04892552..6ede506511917c80faa59d40ee0a7bfff194da97 100644 --- a/paddle/fluid/inference/anakin/convert/relu.h +++ b/paddle/fluid/inference/anakin/convert/relu.h @@ -27,6 +27,7 @@ class ReluOpConverter : public AnakinOpConverter { ReluOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~ReluOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/reshape.cc b/paddle/fluid/inference/anakin/convert/reshape.cc index eee36d2f37ea79c841ac8bf60c6e533069d06240..17e0a1acb5f4e08e848e91bbb051757d85796c0a 100644 --- a/paddle/fluid/inference/anakin/convert/reshape.cc +++ b/paddle/fluid/inference/anakin/convert/reshape.cc @@ -26,6 +26,7 @@ namespace inference { namespace anakin { void ReshapeOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/reshape.h b/paddle/fluid/inference/anakin/convert/reshape.h index 970e8ce5572572bd18c34eeffa902fa2495c1cce..9ce2ea2a4f3f8802225fe8ca8ed602c9f7d27968 100644 --- a/paddle/fluid/inference/anakin/convert/reshape.h +++ b/paddle/fluid/inference/anakin/convert/reshape.h @@ -25,6 +25,7 @@ class ReshapeOpConverter : public AnakinOpConverter { ReshapeOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~ReshapeOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/scale.cc b/paddle/fluid/inference/anakin/convert/scale.cc index 6f3aa8c5d1111dc2829e241c9331eeb521003c03..dd68af4f79a6d1e8add04bde6a6890bca1b00d14 100644 --- a/paddle/fluid/inference/anakin/convert/scale.cc +++ b/paddle/fluid/inference/anakin/convert/scale.cc @@ -26,6 +26,7 @@ namespace inference { namespace anakin { void ScaleOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/scale.h b/paddle/fluid/inference/anakin/convert/scale.h index b858e3c512494f80c7c3818a570e43d90d65251b..ba3bcdd21494a4eeb6190aa8383e17e1b828b5f3 100644 --- a/paddle/fluid/inference/anakin/convert/scale.h +++ b/paddle/fluid/inference/anakin/convert/scale.h @@ -27,6 +27,7 @@ class ScaleOpConverter : public AnakinOpConverter { ScaleOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~ScaleOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/softmax.cc b/paddle/fluid/inference/anakin/convert/softmax.cc index d5cd8908ebf623f0334a3b4df2b19147c63f77a3..a6c1e971b16fa7fe6a074bcb2cdf391410f8871f 100644 --- a/paddle/fluid/inference/anakin/convert/softmax.cc +++ b/paddle/fluid/inference/anakin/convert/softmax.cc @@ -24,6 +24,7 @@ namespace inference { namespace anakin { void SoftMaxOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); @@ -32,8 +33,16 @@ void SoftMaxOpConverter::operator()(const framework::proto::OpDesc &op, auto input = op_desc.Input("X").front(); auto output = op_desc.Output("Out").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + + auto input_var_desc = block_desc.FindVar(input); + PADDLE_ENFORCE(input_var_desc, + "Cant find %s variable When runing Anakin Softmax converter.", + input); + auto input_shape_in_fluid = input_var_desc->GetShape(); + size_t input_dims = input_shape_in_fluid.size(); + engine_->AddOp(op_name, "Softmax", {input}, {output}); - engine_->AddOpAttr(op_name, "axis", 2); + engine_->AddOpAttr(op_name, "axis", static_cast(input_dims - 1)); } } // namespace anakin diff --git a/paddle/fluid/inference/anakin/convert/softmax.h b/paddle/fluid/inference/anakin/convert/softmax.h index 0508da0c6fecaf29b7376005904235dadf04ea28..a16356d5bb61ac2f3b4f7751e257ce36ca604bf1 100644 --- a/paddle/fluid/inference/anakin/convert/softmax.h +++ b/paddle/fluid/inference/anakin/convert/softmax.h @@ -25,6 +25,7 @@ class SoftMaxOpConverter : public AnakinOpConverter { SoftMaxOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~SoftMaxOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/split.cc b/paddle/fluid/inference/anakin/convert/split.cc index b8464a766d21e93426eb4a00b8caab2af5470055..ec582c1812623cd4bcefa2097015ba258f6bacbb 100644 --- a/paddle/fluid/inference/anakin/convert/split.cc +++ b/paddle/fluid/inference/anakin/convert/split.cc @@ -30,6 +30,7 @@ namespace inference { namespace anakin { void SplitOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/split.h b/paddle/fluid/inference/anakin/convert/split.h index a4c6a14e62168ffaf5ff67b5cf953d477ff9e34d..184112e589e2bbdb30bc7a5d2cd053b7f3732a58 100644 --- a/paddle/fluid/inference/anakin/convert/split.h +++ b/paddle/fluid/inference/anakin/convert/split.h @@ -25,6 +25,7 @@ class SplitOpConverter : public AnakinOpConverter { SplitOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~SplitOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/sum.cc b/paddle/fluid/inference/anakin/convert/sum.cc index df9104cf4631d86e0cbd87cb0e93a96d984953f5..2a4178e2371389b44557d44ea526c7cc4a731d16 100644 --- a/paddle/fluid/inference/anakin/convert/sum.cc +++ b/paddle/fluid/inference/anakin/convert/sum.cc @@ -31,6 +31,7 @@ namespace inference { namespace anakin { void SumOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 2); diff --git a/paddle/fluid/inference/anakin/convert/sum.h b/paddle/fluid/inference/anakin/convert/sum.h index ddecc4b3bcb84f83af95e77399847f191c785563..b5d402b77fcf555ffaf910f8c9d1b7337181a64b 100644 --- a/paddle/fluid/inference/anakin/convert/sum.h +++ b/paddle/fluid/inference/anakin/convert/sum.h @@ -25,6 +25,7 @@ class SumOpConverter : public AnakinOpConverter { SumOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~SumOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/transpose.cc b/paddle/fluid/inference/anakin/convert/transpose.cc index 6a887401034f9d8c0b8b6aa3eeffb6579e395029..f35372fe5c315ec68bc80a6d03c5931899ff7555 100644 --- a/paddle/fluid/inference/anakin/convert/transpose.cc +++ b/paddle/fluid/inference/anakin/convert/transpose.cc @@ -28,6 +28,7 @@ namespace inference { namespace anakin { void TransposeOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); diff --git a/paddle/fluid/inference/anakin/convert/transpose.h b/paddle/fluid/inference/anakin/convert/transpose.h index 62d26b6a9cc9885682f5750df32018596f014b33..bacbf152bc12319e6296677500b17d55d9772412 100644 --- a/paddle/fluid/inference/anakin/convert/transpose.h +++ b/paddle/fluid/inference/anakin/convert/transpose.h @@ -25,6 +25,7 @@ class TransposeOpConverter : public AnakinOpConverter { TransposeOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) override; virtual ~TransposeOpConverter() {} diff --git a/paddle/fluid/inference/anakin/convert/ut_helper.h b/paddle/fluid/inference/anakin/convert/ut_helper.h index e0371d95347a521f499dd9454d284907b3048a04..029aff6704ff1015e5c2378a2202c94043df990d 100644 --- a/paddle/fluid/inference/anakin/convert/ut_helper.h +++ b/paddle/fluid/inference/anakin/convert/ut_helper.h @@ -22,6 +22,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" @@ -112,6 +113,17 @@ class AnakinConvertValidation { auto* x_tensor = x->GetMutable(); x_tensor->Resize(framework::make_ddim(dim_vec)); RandomizeTensor(x_tensor, place_, ctx); + + std::vector dim_vec_int64; + for (auto& ele : dim_vec) { + dim_vec_int64.push_back(static_cast(ele)); + } + + // Add var_desc to block_desc + auto* block_desc = program_desc_.MutableBlock(framework::kRootBlockIndex); + + auto* var_desc = block_desc->Var(name); + var_desc->SetShape(dim_vec_int64); } void SetOp(const framework::proto::OpDesc& desc) { @@ -119,8 +131,10 @@ class AnakinConvertValidation { op_desc_.reset(new framework::OpDesc(desc, nullptr)); // should init anakin engine here. + auto& block_desc = program_desc_.Block(framework::kRootBlockIndex); Singleton::Global().ConvertOp( - desc, parameters_, *scope_, engine_.get(), true /*test_mode*/); + desc, block_desc, parameters_, *scope_, engine_.get(), + true /*test_mode*/); engine_->Freeze(); std::map> temp_max_input_shape; @@ -194,6 +208,7 @@ class AnakinConvertValidation { cudaStream_t stream_; std::unique_ptr op_; std::unique_ptr op_desc_; + framework::ProgramDesc program_desc_; const std::unordered_set& parameters_; framework::Scope* scope_; platform::CUDAPlace place_; diff --git a/paddle/fluid/inference/anakin/engine.cc b/paddle/fluid/inference/anakin/engine.cc index ccf78ad7e56306d24af829c45c888021f4e3fbc4..ba044c9401a5f0fb5a839c1766fdd9d412d42212 100644 --- a/paddle/fluid/inference/anakin/engine.cc +++ b/paddle/fluid/inference/anakin/engine.cc @@ -91,7 +91,6 @@ void AnakinEngine::Execute( " or equal to the real input shape, Please set the max " "input shape using EnableAnakinEngine"); anakin_input->reshape(fluid_input_shape); - ::anakin::saber::Tensor tmp_anakin_tensor(data, TargetT(), 0, fluid_input_shape); anakin_input->copy_from(tmp_anakin_tensor); diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 29f16943e0c13fbe080e8e073b081583f1d14d11..a736ca393ccb7168a9faf650a6bce13f35fffca8 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -168,6 +168,7 @@ struct Argument { DECL_ARGUMENT_FIELD(anakin_max_input_shape, AnakinMaxInputShape, anakin_max_shape_t); DECL_ARGUMENT_FIELD(anakin_max_batch_size, AnakinMaxBatchSize, int); + DECL_ARGUMENT_FIELD(anakin_min_subgraph_size, AnakinMinSubgraphSize, int); DECL_ARGUMENT_FIELD(use_anakin, UseAnakin, bool); // Memory optimized related. diff --git a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc index 38612d5cc3d093885144f3b1cd6107232885b645..b8d8b6fed8ca237e87cfc67979ec6ddd340b8916 100644 --- a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc @@ -151,13 +151,20 @@ void AnakinSubgraphPass::CreateAnakinOp( op_desc->SetType("anakin_engine"); std::unordered_map output_name_map; + std::unordered_map graph_var_map; + + for (framework::ir::Node *node : graph->Nodes()) { + if (node->IsVar() && node->Var()) { + graph_var_map[node->Name()] = node; + } + } auto &subgraph_nodes = *Agent(node).subgraph(); // The following procedure is used to rename all the intermediate // variables and the output variables of the subgraph. RenameAndGetOutputs(subgraph_nodes, &block_desc, input_names_with_id, &output_names_with_id, &output_names, &output_name_map, - false); + graph_var_map, false); // When anakin engine runs at the end of the operation, // output_mapping help us copy the data from the renamed ITensor @@ -168,13 +175,6 @@ void AnakinSubgraphPass::CreateAnakinOp( output_mapping.push_back(output_name_map[name]); } - auto *vars = block_desc.Proto()->mutable_vars(); - for (framework::ir::Node *node : graph->Nodes()) { - if (node->IsVar() && node->Var()) { - *vars->Add() = *node->Var()->Proto(); - } - } - PADDLE_ENFORCE(!block_desc.Proto()->vars().empty(), "the block has no var-desc"); PADDLE_ENFORCE(!output_mapping.empty()); diff --git a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc index a17ee1b707a7f950cddc62373a9a57c793d5528f..7c4aab06a1d2b3fadc76b46c7e95cea7818c56e2 100644 --- a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc +++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc @@ -60,6 +60,7 @@ void RenameAndGetOutputs( std::set *output_names_with_id, std::set *output_names, std::unordered_map *output_name_map, + const std::unordered_map &graph_var_map, bool is_trt) { //// In the normal case, the paddle-trt exists bug when runing the googlenet. // When there are more than two convolutions of 1 * 1 with the same input, the @@ -69,6 +70,15 @@ void RenameAndGetOutputs( std::unordered_map same_hierarchy_conv2d_num_map; + auto add_block_var = [&](const std::string &graph_arg, + const std::string &block_arg) { + auto arg_var_node = graph_var_map.find(graph_arg); + PADDLE_ENFORCE(arg_var_node != graph_var_map.end()); + auto *var_t = block_desc->Var(block_arg); + var_t->SetShape(arg_var_node->second->Var()->GetShape()); + var_t->SetDataType(arg_var_node->second->Var()->GetDataType()); + }; + for (size_t index = 0; index < block_desc->OpSize(); ++index) { framework::proto::OpDesc *op = block_desc->Op(index)->Proto(); framework::OpDesc op_desc(*op, nullptr); @@ -87,13 +97,20 @@ void RenameAndGetOutputs( auto *in_var = op->mutable_inputs(i); std::vector replaced_names; for (int k = 0; k < in_var->arguments_size(); k++) { // all the arguments - std::string arg_value = in_var->arguments(k); - std::string arg_value_with_id = + const std::string arg_value = in_var->arguments(k); + const std::string arg_value_with_id = arg_value + std::to_string(var2id[arg_value]); + if (input_names_with_id.count(arg_value_with_id)) { replaced_names.push_back(arg_value); + if (graph_var_map.count(arg_value)) { + add_block_var(arg_value, arg_value); + } } else { replaced_names.push_back(arg_value_with_id); + if (graph_var_map.count(arg_value)) { + add_block_var(arg_value, arg_value_with_id); + } } } in_var->clear_arguments(); @@ -105,7 +122,6 @@ void RenameAndGetOutputs( for (auto out_var : correspond_node->outputs) { var2id[out_var->Name()] = out_var->id(); } - if (op_desc.Type() == "conv2d" && is_trt) { auto input_var_name = op_desc.Input("Input").front(); auto filter_var_name = op_desc.Input("Filter").front(); @@ -125,15 +141,18 @@ void RenameAndGetOutputs( same_hierarchy_conv2d_num_map[input_var_name] += 1; } } - // rename for the output variables of op inside subgraph for (int i = 0; i < op->outputs_size(); i++) { framework::proto::OpDesc_Var *out_var = op->mutable_outputs(i); std::vector replaced_names; for (int k = 0; k < out_var->arguments_size(); k++) { - std::string arg_value = out_var->arguments(k); - std::string arg_value_with_id = + const std::string arg_value = out_var->arguments(k); + const std::string arg_value_with_id = arg_value + std::to_string(var2id[arg_value]); + + if (graph_var_map.count(arg_value)) { + add_block_var(arg_value, arg_value_with_id); + } if (output_names_with_id->count(arg_value_with_id)) { (*output_name_map)[arg_value] = arg_value_with_id; } diff --git a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h index 3cf21bf5f426a7142626e6ae1db6ee478418d08a..bb445027821096689965096c69b8183dd9da403c 100644 --- a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h +++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h @@ -42,6 +42,7 @@ void RenameAndGetOutputs( std::set *output_names_with_id, std::set *output_names, std::unordered_map *output_name_map, + const std::unordered_map &graph_var_map, bool is_trt = true); } // namespace analysis diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index 019098a5dd0d372a690955698a2ab6a4039a2416..67650a352d8b8239da228462c21877ff440147b8 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -142,6 +142,13 @@ void TensorRtSubgraphPass::CreateTensorRTOp( } std::unordered_map output_name_map; + std::unordered_map graph_var_map; + + for (framework::ir::Node *node : graph->Nodes()) { + if (node->IsVar() && node->Var()) { + graph_var_map[node->Name()] = node; + } + } auto &subgraph_nodes = *Agent(node).subgraph(); // The following procedure is used to rename all the intermediate @@ -157,7 +164,8 @@ void TensorRtSubgraphPass::CreateTensorRTOp( // So we have to rename the variable in the subgraph to make sure // it is either an OP's input or an OP's output. RenameAndGetOutputs(subgraph_nodes, &block_desc, input_names_with_id, - &output_names_with_id, &output_names, &output_name_map); + &output_names_with_id, &output_names, &output_name_map, + graph_var_map); // When tensorrt engine runs at the end of the operation, // output_mapping help us copy the data from the renamed ITensor @@ -168,14 +176,6 @@ void TensorRtSubgraphPass::CreateTensorRTOp( output_mapping.push_back(output_name_map[name]); } PADDLE_ENFORCE(!output_mapping.empty()); - - auto *vars = block_desc.Proto()->mutable_vars(); - for (framework::ir::Node *node : graph->Nodes()) { - if (node->IsVar() && node->Var()) { - *vars->Add() = *node->Var()->Proto(); - } - } - PADDLE_ENFORCE(!block_desc.Proto()->vars().empty(), "the block has no var-desc"); @@ -213,7 +213,6 @@ void TensorRtSubgraphPass::CreateTensorRTOp( SetAttr(op_desc->Proto(), "enable_int8", enable_int8); SetAttr(op_desc->Proto(), "engine_key", engine_key); std::string trt_engine_serialized_data = ""; - SetAttr(op_desc->Proto(), "engine_serialized_data", trt_engine_serialized_data); diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index aee94e12340597e981ac385a01335d2ffa069191..e5036d940197ef012cbfd8f52700c8aeb54fb6c5 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -115,6 +115,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(use_anakin_); CP_MEMBER(anakin_max_batchsize_); CP_MEMBER(anakin_max_input_shape_); + CP_MEMBER(anakin_min_subgraph_size_); // Ir related. CP_MEMBER(enable_ir_optim_); @@ -322,6 +323,7 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << specify_input_name_; ss << cpu_math_library_num_threads_; ss << use_anakin_; + ss << anakin_min_subgraph_size_; return ss.str(); } @@ -393,10 +395,11 @@ void AnalysisConfig::SwitchIrDebug(int x) { Update(); } void AnalysisConfig::EnableAnakinEngine( - int max_batch_size, - std::map> max_input_shape) { + int max_batch_size, std::map> max_input_shape, + int min_subgraph_size) { anakin_max_batchsize_ = max_batch_size; anakin_max_input_shape_ = max_input_shape; + anakin_min_subgraph_size_ = min_subgraph_size; use_anakin_ = true; Update(); } diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 7d8e9fe8bfada743388afd3ae4eedb5d84961706..6942604b0723f8665f0e8b058d48a5356a1a01f4 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -385,6 +385,7 @@ void AnalysisPredictor::PrepareArgument() { if (config_.use_gpu() && config_.anakin_engine_enabled()) { argument_.SetAnakinMaxBatchSize(config_.anakin_max_batchsize_); argument_.SetAnakinMaxInputShape(config_.anakin_max_input_shape_); + argument_.SetAnakinMinSubgraphSize(config_.anakin_min_subgraph_size_); LOG(INFO) << "Anakin subgraph engine is enabled"; } diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 2ad4add2945d65037829e0bb453372e38a04421c..c67c4b5bd0bfeea6d022f9e821f6d0b877c71d7a 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -151,7 +151,8 @@ struct AnalysisConfig { */ void EnableAnakinEngine( int max_batch_size = 1, - std::map> max_input_shape = {}); + std::map> max_input_shape = {}, + int min_subgraph_size = 6); /** A boolean state indicating whether the Anakin sub-graph engine is used. */ @@ -288,6 +289,7 @@ struct AnalysisConfig { bool use_anakin_{false}; int anakin_max_batchsize_; + int anakin_min_subgraph_size_{6}; std::map> anakin_max_input_shape_; std::map engine_opt_info_; diff --git a/paddle/fluid/operators/anakin/anakin_engine_op.h b/paddle/fluid/operators/anakin/anakin_engine_op.h index 9d5b4f6f54ccfc9802cef6abac428e28a72ac293..e4feb14b2271a50c8e8fb7ce4c81dd6c99042e21 100644 --- a/paddle/fluid/operators/anakin/anakin_engine_op.h +++ b/paddle/fluid/operators/anakin/anakin_engine_op.h @@ -120,40 +120,8 @@ class AnakinEngineOp : public framework::OperatorBase { inference::Singleton::Global() .Get(engine_key_); } - return anakin_engine_; } - - void Prepare(const framework::Scope &scope, const platform::Place &dev_place, - AnakinNvEngineT *engine) const { - LOG(INFO) << "Prepare Anakin engine (Optimize model structure, Select OP " - "kernel etc). This process may cost a lot of time."; - framework::proto::BlockDesc block_desc; - block_desc.ParseFromString(Attr("subgraph")); - - std::vector output_maps = - Attr>("output_name_mapping"); - - inference::Singleton::Global() - .ConvertBlock(block_desc, param_names_, scope, engine); - engine->Freeze(); - for (const auto &x : Inputs("Xs")) { - if (param_names_.count(x)) continue; - auto &t = - inference::analysis::GetFromScope(scope, x); - auto t_shape = framework::vectorize2int(t.dims()); - // all input shape should be 4 dims - if (t_shape.size() == 2) { - t_shape.push_back(1); - t_shape.push_back(1); - } - engine->SetInputShape(x, t_shape); - } - - engine->Optimize(); - - engine->InitGraph(); - } }; } // namespace operators