未验证 提交 39c85064 编写于 作者: C czr-gc 提交者: GitHub

feat(ipu): add paddle inference support for model_runtime. (#47364)

上级 5859d0a6
...@@ -86,6 +86,10 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const { ...@@ -86,6 +86,10 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const {
} }
} }
// Set executor
ipu_strategy_instance_->enable_model_runtime_executor =
graph->Get<bool>("enable_model_runtime_executor");
// Set available memory proportion for matmul/conv // Set available memory proportion for matmul/conv
ipu_strategy_instance_->available_memory_proportion = ipu_strategy_instance_->available_memory_proportion =
graph->Get<float>("available_memory_proportion"); graph->Get<float>("available_memory_proportion");
......
...@@ -353,6 +353,9 @@ struct Argument { ...@@ -353,6 +353,9 @@ struct Argument {
DECL_ARGUMENT_FIELD(ipu_custom_patterns, DECL_ARGUMENT_FIELD(ipu_custom_patterns,
IpuCustomPatterns, IpuCustomPatterns,
std::vector<std::vector<std::string>>); std::vector<std::vector<std::string>>);
DECL_ARGUMENT_FIELD(ipu_enable_model_runtime_executor,
IpuEnableModelRuntimeExecutor,
bool);
// npu related // npu related
DECL_ARGUMENT_FIELD(use_npu, UseNpu, bool); DECL_ARGUMENT_FIELD(use_npu, UseNpu, bool);
......
...@@ -97,6 +97,9 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { ...@@ -97,6 +97,9 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
&argument->ipu_custom_ops_info()); &argument->ipu_custom_ops_info());
argument->main_graph().SetNotOwned("custom_patterns", argument->main_graph().SetNotOwned("custom_patterns",
&argument->ipu_custom_patterns()); &argument->ipu_custom_patterns());
argument->main_graph().SetNotOwned(
"enable_model_runtime_executor",
&argument->ipu_enable_model_runtime_executor());
} }
} }
#endif #endif
......
...@@ -205,11 +205,13 @@ void AnalysisConfig::EnableIpu(int ipu_device_num, ...@@ -205,11 +205,13 @@ void AnalysisConfig::EnableIpu(int ipu_device_num,
void AnalysisConfig::SetIpuConfig(bool ipu_enable_fp16, void AnalysisConfig::SetIpuConfig(bool ipu_enable_fp16,
int ipu_replica_num, int ipu_replica_num,
float ipu_available_memory_proportion, float ipu_available_memory_proportion,
bool ipu_enable_half_partial) { bool ipu_enable_half_partial,
bool ipu_enable_model_runtime_executor) {
ipu_enable_fp16_ = ipu_enable_fp16; ipu_enable_fp16_ = ipu_enable_fp16;
ipu_replica_num_ = ipu_replica_num; ipu_replica_num_ = ipu_replica_num;
ipu_available_memory_proportion_ = ipu_available_memory_proportion; ipu_available_memory_proportion_ = ipu_available_memory_proportion;
ipu_enable_half_partial_ = ipu_enable_half_partial; ipu_enable_half_partial_ = ipu_enable_half_partial;
ipu_enable_model_runtime_executor_ = ipu_enable_model_runtime_executor;
Update(); Update();
} }
...@@ -284,7 +286,7 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) { ...@@ -284,7 +286,7 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) {
if (ipu_config_mapper_.find(key) == ipu_config_mapper_.end()) { if (ipu_config_mapper_.find(key) == ipu_config_mapper_.end()) {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"invalid key {} in IPU config", key)); "invalid key {} in IPU config: ", key));
} }
switch (ipu_config_mapper_.at(key)) { switch (ipu_config_mapper_.at(key)) {
case ipu_config_code::ipu_device_num: case ipu_config_code::ipu_device_num:
...@@ -317,6 +319,9 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) { ...@@ -317,6 +319,9 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) {
case ipu_config_code::ipu_custom_patterns: case ipu_config_code::ipu_custom_patterns:
ipu_custom_patterns_ = string2vector(value); ipu_custom_patterns_ = string2vector(value);
break; break;
case ipu_config_code::ipu_enable_model_runtime_executor:
ipu_enable_model_runtime_executor_ = string2bool(value);
break;
default: default:
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
...@@ -482,6 +487,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { ...@@ -482,6 +487,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(ipu_replica_num_); CP_MEMBER(ipu_replica_num_);
CP_MEMBER(ipu_available_memory_proportion_); CP_MEMBER(ipu_available_memory_proportion_);
CP_MEMBER(ipu_enable_half_partial_); CP_MEMBER(ipu_enable_half_partial_);
CP_MEMBER(ipu_enable_model_runtime_executor_);
CP_MEMBER(ipu_custom_ops_info_); CP_MEMBER(ipu_custom_ops_info_);
CP_MEMBER(ipu_custom_patterns_); CP_MEMBER(ipu_custom_patterns_);
...@@ -1061,6 +1067,7 @@ std::string AnalysisConfig::SerializeInfoCache() { ...@@ -1061,6 +1067,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << ipu_replica_num_; ss << ipu_replica_num_;
ss << ipu_available_memory_proportion_; ss << ipu_available_memory_proportion_;
ss << ipu_enable_half_partial_; ss << ipu_enable_half_partial_;
ss << ipu_enable_model_runtime_executor_;
for (auto custom_op : ipu_custom_ops_info_) for (auto custom_op : ipu_custom_ops_info_)
for (auto attr : custom_op) ss << attr; for (auto attr : custom_op) ss << attr;
ss << ";"; ss << ";";
......
...@@ -1185,6 +1185,8 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -1185,6 +1185,8 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetIpuAvailableMemoryProportion( argument_.SetIpuAvailableMemoryProportion(
config_.ipu_available_memory_proportion_); config_.ipu_available_memory_proportion_);
argument_.SetIpuEnableHalfPartial(config_.ipu_enable_half_partial_); argument_.SetIpuEnableHalfPartial(config_.ipu_enable_half_partial_);
argument_.SetIpuEnableModelRuntimeExecutor(
config_.ipu_enable_model_runtime_executor_);
argument_.SetIpuCustomOpsInfo(config_.ipu_custom_ops_info_); argument_.SetIpuCustomOpsInfo(config_.ipu_custom_ops_info_);
argument_.SetIpuCustomPatterns(config_.ipu_custom_patterns_); argument_.SetIpuCustomPatterns(config_.ipu_custom_patterns_);
#endif #endif
......
...@@ -297,7 +297,8 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -297,7 +297,8 @@ struct PD_INFER_DECL AnalysisConfig {
ipu_available_memory_proportion, ipu_available_memory_proportion,
ipu_enable_half_partial, ipu_enable_half_partial,
ipu_custom_ops_info, ipu_custom_ops_info,
ipu_custom_patterns ipu_custom_patterns,
ipu_enable_model_runtime_executor,
}; };
/// ///
...@@ -323,11 +324,14 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -323,11 +324,14 @@ struct PD_INFER_DECL AnalysisConfig {
/// matmul/conv. /// matmul/conv.
/// \param ipu_enable_half_partial enable fp16 partial for matmul, only work /// \param ipu_enable_half_partial enable fp16 partial for matmul, only work
/// with fp16. /// with fp16.
/// \param ipu_enable_model_runtime_executor whether to use model_runtime
/// executor.
/// ///
void SetIpuConfig(bool ipu_enable_fp16 = false, void SetIpuConfig(bool ipu_enable_fp16 = false,
int ipu_replica_num = 1, int ipu_replica_num = 1,
float ipu_available_memory_proportion = 1.0, float ipu_available_memory_proportion = 1.0,
bool ipu_enable_half_partial = false); bool ipu_enable_half_partial = false,
bool ipu_enable_model_runtime_executor = false);
/// ///
/// \brief Set IPU custom ops and patterns. /// \brief Set IPU custom ops and patterns.
...@@ -1176,6 +1180,7 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -1176,6 +1180,7 @@ struct PD_INFER_DECL AnalysisConfig {
int ipu_replica_num_{1}; int ipu_replica_num_{1};
float ipu_available_memory_proportion_{1.0}; float ipu_available_memory_proportion_{1.0};
bool ipu_enable_half_partial_{false}; bool ipu_enable_half_partial_{false};
bool ipu_enable_model_runtime_executor_{false};
std::vector<std::vector<std::string>> ipu_custom_ops_info_; std::vector<std::vector<std::string>> ipu_custom_ops_info_;
std::vector<std::vector<std::string>> ipu_custom_patterns_; std::vector<std::vector<std::string>> ipu_custom_patterns_;
...@@ -1190,6 +1195,8 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -1190,6 +1195,8 @@ struct PD_INFER_DECL AnalysisConfig {
{"ipu_available_memory_proportion", {"ipu_available_memory_proportion",
ipu_config_code::ipu_available_memory_proportion}, ipu_config_code::ipu_available_memory_proportion},
{"ipu_enable_half_partial", ipu_config_code::ipu_enable_half_partial}, {"ipu_enable_half_partial", ipu_config_code::ipu_enable_half_partial},
{"ipu_enable_model_runtime_executor",
ipu_config_code::ipu_enable_model_runtime_executor},
{"ipu_custom_ops_info", ipu_config_code::ipu_custom_ops_info}, {"ipu_custom_ops_info", ipu_config_code::ipu_custom_ops_info},
{"ipu_custom_patterns", ipu_config_code::ipu_custom_patterns}}; {"ipu_custom_patterns", ipu_config_code::ipu_custom_patterns}};
......
...@@ -111,5 +111,68 @@ TEST(Analyzer_Resnet50_ipu, compare_results_2_batch) { ...@@ -111,5 +111,68 @@ TEST(Analyzer_Resnet50_ipu, compare_results_2_batch) {
} }
} }
// multi threading
TEST(Analyzer_Resnet50_ipu, model_runtime_multi_thread) {
std::string model_dir = FLAGS_infer_model + "/" + "model";
AnalysisConfig config;
const int thread_num = 10;
// ipu_device_num, ipu_micro_batch_size, ipu_enable_pipelining
config.EnableIpu(1, 1, false);
config.SetIpuConfig(false, 1, 1.0, false, true);
config.SetModel(model_dir + "/model", model_dir + "/params");
auto main_predictor = CreatePaddlePredictor(config);
std::vector<std::vector<PaddleTensor>> inputs;
std::vector<std::vector<PaddleTensor>> outputs;
std::vector<decltype(main_predictor)> predictors;
std::vector<std::thread> threads;
outputs.resize(thread_num);
inputs.resize(thread_num);
const int batch = 1;
const int channel = 3;
const int height = 318;
const int width = 318;
const int input_num = batch * channel * height * width;
std::vector<float> input(input_num, 1);
PaddleTensor in;
in.shape = {batch, channel, height, width};
in.data =
PaddleBuf(static_cast<void*>(input.data()), input_num * sizeof(float));
in.dtype = PaddleDType::FLOAT32;
for (int i = 0; i < thread_num; ++i) {
inputs[i].emplace_back(in);
predictors.emplace_back(std::move(main_predictor->Clone()));
}
auto run = [](PaddlePredictor* predictor,
std::vector<PaddleTensor>& input,
std::vector<PaddleTensor>& output) {
ASSERT_TRUE(predictor->Run(input, &output));
};
for (int i = 0; i < thread_num; ++i) {
threads.emplace_back(
run, predictors[i].get(), std::ref(inputs[i]), std::ref(outputs[i]));
}
for (int i = 0; i < thread_num; ++i) {
threads[i].join();
}
const size_t expected_size = 1;
for (int i = 0; i < thread_num; ++i) {
EXPECT_EQ(outputs[i].size(), expected_size);
float* data_o = static_cast<float*>(outputs[i][0].data.data());
for (size_t j = 0; j < outputs[i][0].data.length() / sizeof(float);
j += 10) {
EXPECT_NEAR(
(data_o[j] - truth_values[j / 10]) / truth_values[j / 10], 0., 12e-5);
}
}
}
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
...@@ -678,7 +678,8 @@ void BindAnalysisConfig(py::module *m) { ...@@ -678,7 +678,8 @@ void BindAnalysisConfig(py::module *m) {
py::arg("ipu_enable_fp16") = false, py::arg("ipu_enable_fp16") = false,
py::arg("ipu_replica_num") = 1, py::arg("ipu_replica_num") = 1,
py::arg("ipu_available_memory_proportion") = 1.0, py::arg("ipu_available_memory_proportion") = 1.0,
py::arg("ipu_enable_half_partial") = false) py::arg("ipu_enable_half_partial") = false,
py::arg("ipu_enable_model_runtime_executor") = false)
.def("set_ipu_custom_info", .def("set_ipu_custom_info",
&AnalysisConfig::SetIpuCustomInfo, &AnalysisConfig::SetIpuCustomInfo,
py::arg("ipu_custom_ops_info") = py::arg("ipu_custom_ops_info") =
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册