diff --git a/paddle/fluid/framework/ir/ipu/inference_process_pass.cc b/paddle/fluid/framework/ir/ipu/inference_process_pass.cc index 11679c95b1133f9b02e0ba7aae9e0e6e82932a05..0213d20d30c712fef38849bebc65fee7938219bd 100644 --- a/paddle/fluid/framework/ir/ipu/inference_process_pass.cc +++ b/paddle/fluid/framework/ir/ipu/inference_process_pass.cc @@ -86,6 +86,10 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const { } } + // Set executor + ipu_strategy_instance_->enable_model_runtime_executor = + graph->Get("enable_model_runtime_executor"); + // Set available memory proportion for matmul/conv ipu_strategy_instance_->available_memory_proportion = graph->Get("available_memory_proportion"); diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 76b51f5890ff39ac673ec9c014fcd882b371adc6..496cd9d1e2d5304382878f5895b786e155d0533f 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -353,6 +353,9 @@ struct Argument { DECL_ARGUMENT_FIELD(ipu_custom_patterns, IpuCustomPatterns, std::vector>); + DECL_ARGUMENT_FIELD(ipu_enable_model_runtime_executor, + IpuEnableModelRuntimeExecutor, + bool); // npu related DECL_ARGUMENT_FIELD(use_npu, UseNpu, bool); diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc index e07eaa64615c88b1a8af59d05bb56a808028217f..18f5c9e4a9c6c91385f8429d6c146cb64236cd77 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc @@ -97,6 +97,9 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { &argument->ipu_custom_ops_info()); argument->main_graph().SetNotOwned("custom_patterns", &argument->ipu_custom_patterns()); + argument->main_graph().SetNotOwned( + "enable_model_runtime_executor", + &argument->ipu_enable_model_runtime_executor()); } } #endif diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 08d569635b0c9309ad03f7bf62e61693a34facb4..7d243c6df2a55d382b6a73bb809708b2a437ef15 100755 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -205,11 +205,13 @@ void AnalysisConfig::EnableIpu(int ipu_device_num, void AnalysisConfig::SetIpuConfig(bool ipu_enable_fp16, int ipu_replica_num, float ipu_available_memory_proportion, - bool ipu_enable_half_partial) { + bool ipu_enable_half_partial, + bool ipu_enable_model_runtime_executor) { ipu_enable_fp16_ = ipu_enable_fp16; ipu_replica_num_ = ipu_replica_num; ipu_available_memory_proportion_ = ipu_available_memory_proportion; ipu_enable_half_partial_ = ipu_enable_half_partial; + ipu_enable_model_runtime_executor_ = ipu_enable_model_runtime_executor; Update(); } @@ -284,7 +286,7 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) { if (ipu_config_mapper_.find(key) == ipu_config_mapper_.end()) { PADDLE_THROW(platform::errors::InvalidArgument( - "invalid key {} in IPU config", key)); + "invalid key {} in IPU config: ", key)); } switch (ipu_config_mapper_.at(key)) { case ipu_config_code::ipu_device_num: @@ -317,6 +319,9 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) { case ipu_config_code::ipu_custom_patterns: ipu_custom_patterns_ = string2vector(value); break; + case ipu_config_code::ipu_enable_model_runtime_executor: + ipu_enable_model_runtime_executor_ = string2bool(value); + break; default: PADDLE_THROW(platform::errors::InvalidArgument( @@ -482,6 +487,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(ipu_replica_num_); CP_MEMBER(ipu_available_memory_proportion_); CP_MEMBER(ipu_enable_half_partial_); + CP_MEMBER(ipu_enable_model_runtime_executor_); CP_MEMBER(ipu_custom_ops_info_); CP_MEMBER(ipu_custom_patterns_); @@ -1061,6 +1067,7 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << ipu_replica_num_; ss << ipu_available_memory_proportion_; ss << ipu_enable_half_partial_; + ss << ipu_enable_model_runtime_executor_; for (auto custom_op : ipu_custom_ops_info_) for (auto attr : custom_op) ss << attr; ss << ";"; diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 48dc6f0afcda7c2da4d55d2db5612d70701fdfac..d2b0ba0a5fcf86d8abd16ff91088edaa68a65ceb 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1185,6 +1185,8 @@ void AnalysisPredictor::PrepareArgument() { argument_.SetIpuAvailableMemoryProportion( config_.ipu_available_memory_proportion_); argument_.SetIpuEnableHalfPartial(config_.ipu_enable_half_partial_); + argument_.SetIpuEnableModelRuntimeExecutor( + config_.ipu_enable_model_runtime_executor_); argument_.SetIpuCustomOpsInfo(config_.ipu_custom_ops_info_); argument_.SetIpuCustomPatterns(config_.ipu_custom_patterns_); #endif diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index af34bfc796d21e507209cd411e9b2082abe03b45..0fef4f6ced5fdf465359a68cfc2402be9844365d 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -297,7 +297,8 @@ struct PD_INFER_DECL AnalysisConfig { ipu_available_memory_proportion, ipu_enable_half_partial, ipu_custom_ops_info, - ipu_custom_patterns + ipu_custom_patterns, + ipu_enable_model_runtime_executor, }; /// @@ -323,11 +324,14 @@ struct PD_INFER_DECL AnalysisConfig { /// matmul/conv. /// \param ipu_enable_half_partial enable fp16 partial for matmul, only work /// with fp16. + /// \param ipu_enable_model_runtime_executor whether to use model_runtime + /// executor. /// void SetIpuConfig(bool ipu_enable_fp16 = false, int ipu_replica_num = 1, float ipu_available_memory_proportion = 1.0, - bool ipu_enable_half_partial = false); + bool ipu_enable_half_partial = false, + bool ipu_enable_model_runtime_executor = false); /// /// \brief Set IPU custom ops and patterns. @@ -1176,6 +1180,7 @@ struct PD_INFER_DECL AnalysisConfig { int ipu_replica_num_{1}; float ipu_available_memory_proportion_{1.0}; bool ipu_enable_half_partial_{false}; + bool ipu_enable_model_runtime_executor_{false}; std::vector> ipu_custom_ops_info_; std::vector> ipu_custom_patterns_; @@ -1190,6 +1195,8 @@ struct PD_INFER_DECL AnalysisConfig { {"ipu_available_memory_proportion", ipu_config_code::ipu_available_memory_proportion}, {"ipu_enable_half_partial", ipu_config_code::ipu_enable_half_partial}, + {"ipu_enable_model_runtime_executor", + ipu_config_code::ipu_enable_model_runtime_executor}, {"ipu_custom_ops_info", ipu_config_code::ipu_custom_ops_info}, {"ipu_custom_patterns", ipu_config_code::ipu_custom_patterns}}; diff --git a/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc b/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc index cc37ae0695d3d1a3f5a0863565d5dda089d6bccc..ab7d8bd368e13f2a490ad55f11712e88ec76cc38 100644 --- a/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc +++ b/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc @@ -111,5 +111,68 @@ TEST(Analyzer_Resnet50_ipu, compare_results_2_batch) { } } +// multi threading +TEST(Analyzer_Resnet50_ipu, model_runtime_multi_thread) { + std::string model_dir = FLAGS_infer_model + "/" + "model"; + AnalysisConfig config; + const int thread_num = 10; + // ipu_device_num, ipu_micro_batch_size, ipu_enable_pipelining + config.EnableIpu(1, 1, false); + config.SetIpuConfig(false, 1, 1.0, false, true); + config.SetModel(model_dir + "/model", model_dir + "/params"); + + auto main_predictor = CreatePaddlePredictor(config); + std::vector> inputs; + std::vector> outputs; + std::vector predictors; + std::vector threads; + outputs.resize(thread_num); + inputs.resize(thread_num); + + const int batch = 1; + const int channel = 3; + const int height = 318; + const int width = 318; + const int input_num = batch * channel * height * width; + std::vector input(input_num, 1); + + PaddleTensor in; + in.shape = {batch, channel, height, width}; + in.data = + PaddleBuf(static_cast(input.data()), input_num * sizeof(float)); + in.dtype = PaddleDType::FLOAT32; + + for (int i = 0; i < thread_num; ++i) { + inputs[i].emplace_back(in); + predictors.emplace_back(std::move(main_predictor->Clone())); + } + + auto run = [](PaddlePredictor* predictor, + std::vector& input, + std::vector& output) { + ASSERT_TRUE(predictor->Run(input, &output)); + }; + + for (int i = 0; i < thread_num; ++i) { + threads.emplace_back( + run, predictors[i].get(), std::ref(inputs[i]), std::ref(outputs[i])); + } + + for (int i = 0; i < thread_num; ++i) { + threads[i].join(); + } + + const size_t expected_size = 1; + for (int i = 0; i < thread_num; ++i) { + EXPECT_EQ(outputs[i].size(), expected_size); + float* data_o = static_cast(outputs[i][0].data.data()); + + for (size_t j = 0; j < outputs[i][0].data.length() / sizeof(float); + j += 10) { + EXPECT_NEAR( + (data_o[j] - truth_values[j / 10]) / truth_values[j / 10], 0., 12e-5); + } + } +} } // namespace inference } // namespace paddle diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 2bfe221659adee4fdc202ca4b3c9a8b984377518..83db629dc89f23537746d0ba12e31007115d4ba7 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -678,7 +678,8 @@ void BindAnalysisConfig(py::module *m) { py::arg("ipu_enable_fp16") = false, py::arg("ipu_replica_num") = 1, py::arg("ipu_available_memory_proportion") = 1.0, - py::arg("ipu_enable_half_partial") = false) + py::arg("ipu_enable_half_partial") = false, + py::arg("ipu_enable_model_runtime_executor") = false) .def("set_ipu_custom_info", &AnalysisConfig::SetIpuCustomInfo, py::arg("ipu_custom_ops_info") =