From 39c85064a27a6a6ab0d8eed8d8e996caf5302ff8 Mon Sep 17 00:00:00 2001 From: czr-gc <96037699+czr-gc@users.noreply.github.com> Date: Wed, 16 Nov 2022 09:53:30 +0800 Subject: [PATCH] feat(ipu): add paddle inference support for model_runtime. (#47364) --- .../ir/ipu/inference_process_pass.cc | 4 ++ paddle/fluid/inference/analysis/argument.h | 3 + .../analysis/passes/ir_graph_build_pass.cc | 3 + paddle/fluid/inference/api/analysis_config.cc | 11 +++- .../fluid/inference/api/analysis_predictor.cc | 2 + .../inference/api/paddle_analysis_config.h | 11 +++- .../inference/tests/api/ipu_resnet50_test.cc | 63 +++++++++++++++++++ paddle/fluid/pybind/inference_api.cc | 3 +- 8 files changed, 95 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/framework/ir/ipu/inference_process_pass.cc b/paddle/fluid/framework/ir/ipu/inference_process_pass.cc index 11679c95b1..0213d20d30 100644 --- a/paddle/fluid/framework/ir/ipu/inference_process_pass.cc +++ b/paddle/fluid/framework/ir/ipu/inference_process_pass.cc @@ -86,6 +86,10 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const { } } + // Set executor + ipu_strategy_instance_->enable_model_runtime_executor = + graph->Get("enable_model_runtime_executor"); + // Set available memory proportion for matmul/conv ipu_strategy_instance_->available_memory_proportion = graph->Get("available_memory_proportion"); diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 76b51f5890..496cd9d1e2 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -353,6 +353,9 @@ struct Argument { DECL_ARGUMENT_FIELD(ipu_custom_patterns, IpuCustomPatterns, std::vector>); + DECL_ARGUMENT_FIELD(ipu_enable_model_runtime_executor, + IpuEnableModelRuntimeExecutor, + bool); // npu related DECL_ARGUMENT_FIELD(use_npu, UseNpu, bool); diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc index e07eaa6461..18f5c9e4a9 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc @@ -97,6 +97,9 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { &argument->ipu_custom_ops_info()); argument->main_graph().SetNotOwned("custom_patterns", &argument->ipu_custom_patterns()); + argument->main_graph().SetNotOwned( + "enable_model_runtime_executor", + &argument->ipu_enable_model_runtime_executor()); } } #endif diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 08d569635b..7d243c6df2 100755 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -205,11 +205,13 @@ void AnalysisConfig::EnableIpu(int ipu_device_num, void AnalysisConfig::SetIpuConfig(bool ipu_enable_fp16, int ipu_replica_num, float ipu_available_memory_proportion, - bool ipu_enable_half_partial) { + bool ipu_enable_half_partial, + bool ipu_enable_model_runtime_executor) { ipu_enable_fp16_ = ipu_enable_fp16; ipu_replica_num_ = ipu_replica_num; ipu_available_memory_proportion_ = ipu_available_memory_proportion; ipu_enable_half_partial_ = ipu_enable_half_partial; + ipu_enable_model_runtime_executor_ = ipu_enable_model_runtime_executor; Update(); } @@ -284,7 +286,7 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) { if (ipu_config_mapper_.find(key) == ipu_config_mapper_.end()) { PADDLE_THROW(platform::errors::InvalidArgument( - "invalid key {} in IPU config", key)); + "invalid key {} in IPU config: ", key)); } switch (ipu_config_mapper_.at(key)) { case ipu_config_code::ipu_device_num: @@ -317,6 +319,9 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) { case ipu_config_code::ipu_custom_patterns: ipu_custom_patterns_ = string2vector(value); break; + case ipu_config_code::ipu_enable_model_runtime_executor: + ipu_enable_model_runtime_executor_ = string2bool(value); + break; default: PADDLE_THROW(platform::errors::InvalidArgument( @@ -482,6 +487,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(ipu_replica_num_); CP_MEMBER(ipu_available_memory_proportion_); CP_MEMBER(ipu_enable_half_partial_); + CP_MEMBER(ipu_enable_model_runtime_executor_); CP_MEMBER(ipu_custom_ops_info_); CP_MEMBER(ipu_custom_patterns_); @@ -1061,6 +1067,7 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << ipu_replica_num_; ss << ipu_available_memory_proportion_; ss << ipu_enable_half_partial_; + ss << ipu_enable_model_runtime_executor_; for (auto custom_op : ipu_custom_ops_info_) for (auto attr : custom_op) ss << attr; ss << ";"; diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 48dc6f0afc..d2b0ba0a5f 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1185,6 +1185,8 @@ void AnalysisPredictor::PrepareArgument() { argument_.SetIpuAvailableMemoryProportion( config_.ipu_available_memory_proportion_); argument_.SetIpuEnableHalfPartial(config_.ipu_enable_half_partial_); + argument_.SetIpuEnableModelRuntimeExecutor( + config_.ipu_enable_model_runtime_executor_); argument_.SetIpuCustomOpsInfo(config_.ipu_custom_ops_info_); argument_.SetIpuCustomPatterns(config_.ipu_custom_patterns_); #endif diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index af34bfc796..0fef4f6ced 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -297,7 +297,8 @@ struct PD_INFER_DECL AnalysisConfig { ipu_available_memory_proportion, ipu_enable_half_partial, ipu_custom_ops_info, - ipu_custom_patterns + ipu_custom_patterns, + ipu_enable_model_runtime_executor, }; /// @@ -323,11 +324,14 @@ struct PD_INFER_DECL AnalysisConfig { /// matmul/conv. /// \param ipu_enable_half_partial enable fp16 partial for matmul, only work /// with fp16. + /// \param ipu_enable_model_runtime_executor whether to use model_runtime + /// executor. /// void SetIpuConfig(bool ipu_enable_fp16 = false, int ipu_replica_num = 1, float ipu_available_memory_proportion = 1.0, - bool ipu_enable_half_partial = false); + bool ipu_enable_half_partial = false, + bool ipu_enable_model_runtime_executor = false); /// /// \brief Set IPU custom ops and patterns. @@ -1176,6 +1180,7 @@ struct PD_INFER_DECL AnalysisConfig { int ipu_replica_num_{1}; float ipu_available_memory_proportion_{1.0}; bool ipu_enable_half_partial_{false}; + bool ipu_enable_model_runtime_executor_{false}; std::vector> ipu_custom_ops_info_; std::vector> ipu_custom_patterns_; @@ -1190,6 +1195,8 @@ struct PD_INFER_DECL AnalysisConfig { {"ipu_available_memory_proportion", ipu_config_code::ipu_available_memory_proportion}, {"ipu_enable_half_partial", ipu_config_code::ipu_enable_half_partial}, + {"ipu_enable_model_runtime_executor", + ipu_config_code::ipu_enable_model_runtime_executor}, {"ipu_custom_ops_info", ipu_config_code::ipu_custom_ops_info}, {"ipu_custom_patterns", ipu_config_code::ipu_custom_patterns}}; diff --git a/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc b/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc index cc37ae0695..ab7d8bd368 100644 --- a/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc +++ b/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc @@ -111,5 +111,68 @@ TEST(Analyzer_Resnet50_ipu, compare_results_2_batch) { } } +// multi threading +TEST(Analyzer_Resnet50_ipu, model_runtime_multi_thread) { + std::string model_dir = FLAGS_infer_model + "/" + "model"; + AnalysisConfig config; + const int thread_num = 10; + // ipu_device_num, ipu_micro_batch_size, ipu_enable_pipelining + config.EnableIpu(1, 1, false); + config.SetIpuConfig(false, 1, 1.0, false, true); + config.SetModel(model_dir + "/model", model_dir + "/params"); + + auto main_predictor = CreatePaddlePredictor(config); + std::vector> inputs; + std::vector> outputs; + std::vector predictors; + std::vector threads; + outputs.resize(thread_num); + inputs.resize(thread_num); + + const int batch = 1; + const int channel = 3; + const int height = 318; + const int width = 318; + const int input_num = batch * channel * height * width; + std::vector input(input_num, 1); + + PaddleTensor in; + in.shape = {batch, channel, height, width}; + in.data = + PaddleBuf(static_cast(input.data()), input_num * sizeof(float)); + in.dtype = PaddleDType::FLOAT32; + + for (int i = 0; i < thread_num; ++i) { + inputs[i].emplace_back(in); + predictors.emplace_back(std::move(main_predictor->Clone())); + } + + auto run = [](PaddlePredictor* predictor, + std::vector& input, + std::vector& output) { + ASSERT_TRUE(predictor->Run(input, &output)); + }; + + for (int i = 0; i < thread_num; ++i) { + threads.emplace_back( + run, predictors[i].get(), std::ref(inputs[i]), std::ref(outputs[i])); + } + + for (int i = 0; i < thread_num; ++i) { + threads[i].join(); + } + + const size_t expected_size = 1; + for (int i = 0; i < thread_num; ++i) { + EXPECT_EQ(outputs[i].size(), expected_size); + float* data_o = static_cast(outputs[i][0].data.data()); + + for (size_t j = 0; j < outputs[i][0].data.length() / sizeof(float); + j += 10) { + EXPECT_NEAR( + (data_o[j] - truth_values[j / 10]) / truth_values[j / 10], 0., 12e-5); + } + } +} } // namespace inference } // namespace paddle diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 2bfe221659..83db629dc8 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -678,7 +678,8 @@ void BindAnalysisConfig(py::module *m) { py::arg("ipu_enable_fp16") = false, py::arg("ipu_replica_num") = 1, py::arg("ipu_available_memory_proportion") = 1.0, - py::arg("ipu_enable_half_partial") = false) + py::arg("ipu_enable_half_partial") = false, + py::arg("ipu_enable_model_runtime_executor") = false) .def("set_ipu_custom_info", &AnalysisConfig::SetIpuCustomInfo, py::arg("ipu_custom_ops_info") = -- GitLab