未验证 提交 39c85064 编写于 作者: C czr-gc 提交者: GitHub

feat(ipu): add paddle inference support for model_runtime. (#47364)

上级 5859d0a6
......@@ -86,6 +86,10 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const {
}
}
// Set executor
ipu_strategy_instance_->enable_model_runtime_executor =
graph->Get<bool>("enable_model_runtime_executor");
// Set available memory proportion for matmul/conv
ipu_strategy_instance_->available_memory_proportion =
graph->Get<float>("available_memory_proportion");
......
......@@ -353,6 +353,9 @@ struct Argument {
DECL_ARGUMENT_FIELD(ipu_custom_patterns,
IpuCustomPatterns,
std::vector<std::vector<std::string>>);
DECL_ARGUMENT_FIELD(ipu_enable_model_runtime_executor,
IpuEnableModelRuntimeExecutor,
bool);
// npu related
DECL_ARGUMENT_FIELD(use_npu, UseNpu, bool);
......
......@@ -97,6 +97,9 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
&argument->ipu_custom_ops_info());
argument->main_graph().SetNotOwned("custom_patterns",
&argument->ipu_custom_patterns());
argument->main_graph().SetNotOwned(
"enable_model_runtime_executor",
&argument->ipu_enable_model_runtime_executor());
}
}
#endif
......
......@@ -205,11 +205,13 @@ void AnalysisConfig::EnableIpu(int ipu_device_num,
void AnalysisConfig::SetIpuConfig(bool ipu_enable_fp16,
int ipu_replica_num,
float ipu_available_memory_proportion,
bool ipu_enable_half_partial) {
bool ipu_enable_half_partial,
bool ipu_enable_model_runtime_executor) {
ipu_enable_fp16_ = ipu_enable_fp16;
ipu_replica_num_ = ipu_replica_num;
ipu_available_memory_proportion_ = ipu_available_memory_proportion;
ipu_enable_half_partial_ = ipu_enable_half_partial;
ipu_enable_model_runtime_executor_ = ipu_enable_model_runtime_executor;
Update();
}
......@@ -284,7 +286,7 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) {
if (ipu_config_mapper_.find(key) == ipu_config_mapper_.end()) {
PADDLE_THROW(platform::errors::InvalidArgument(
"invalid key {} in IPU config", key));
"invalid key {} in IPU config: ", key));
}
switch (ipu_config_mapper_.at(key)) {
case ipu_config_code::ipu_device_num:
......@@ -317,6 +319,9 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) {
case ipu_config_code::ipu_custom_patterns:
ipu_custom_patterns_ = string2vector(value);
break;
case ipu_config_code::ipu_enable_model_runtime_executor:
ipu_enable_model_runtime_executor_ = string2bool(value);
break;
default:
PADDLE_THROW(platform::errors::InvalidArgument(
......@@ -482,6 +487,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(ipu_replica_num_);
CP_MEMBER(ipu_available_memory_proportion_);
CP_MEMBER(ipu_enable_half_partial_);
CP_MEMBER(ipu_enable_model_runtime_executor_);
CP_MEMBER(ipu_custom_ops_info_);
CP_MEMBER(ipu_custom_patterns_);
......@@ -1061,6 +1067,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << ipu_replica_num_;
ss << ipu_available_memory_proportion_;
ss << ipu_enable_half_partial_;
ss << ipu_enable_model_runtime_executor_;
for (auto custom_op : ipu_custom_ops_info_)
for (auto attr : custom_op) ss << attr;
ss << ";";
......
......@@ -1185,6 +1185,8 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetIpuAvailableMemoryProportion(
config_.ipu_available_memory_proportion_);
argument_.SetIpuEnableHalfPartial(config_.ipu_enable_half_partial_);
argument_.SetIpuEnableModelRuntimeExecutor(
config_.ipu_enable_model_runtime_executor_);
argument_.SetIpuCustomOpsInfo(config_.ipu_custom_ops_info_);
argument_.SetIpuCustomPatterns(config_.ipu_custom_patterns_);
#endif
......
......@@ -297,7 +297,8 @@ struct PD_INFER_DECL AnalysisConfig {
ipu_available_memory_proportion,
ipu_enable_half_partial,
ipu_custom_ops_info,
ipu_custom_patterns
ipu_custom_patterns,
ipu_enable_model_runtime_executor,
};
///
......@@ -323,11 +324,14 @@ struct PD_INFER_DECL AnalysisConfig {
/// matmul/conv.
/// \param ipu_enable_half_partial enable fp16 partial for matmul, only work
/// with fp16.
/// \param ipu_enable_model_runtime_executor whether to use model_runtime
/// executor.
///
void SetIpuConfig(bool ipu_enable_fp16 = false,
int ipu_replica_num = 1,
float ipu_available_memory_proportion = 1.0,
bool ipu_enable_half_partial = false);
bool ipu_enable_half_partial = false,
bool ipu_enable_model_runtime_executor = false);
///
/// \brief Set IPU custom ops and patterns.
......@@ -1176,6 +1180,7 @@ struct PD_INFER_DECL AnalysisConfig {
int ipu_replica_num_{1};
float ipu_available_memory_proportion_{1.0};
bool ipu_enable_half_partial_{false};
bool ipu_enable_model_runtime_executor_{false};
std::vector<std::vector<std::string>> ipu_custom_ops_info_;
std::vector<std::vector<std::string>> ipu_custom_patterns_;
......@@ -1190,6 +1195,8 @@ struct PD_INFER_DECL AnalysisConfig {
{"ipu_available_memory_proportion",
ipu_config_code::ipu_available_memory_proportion},
{"ipu_enable_half_partial", ipu_config_code::ipu_enable_half_partial},
{"ipu_enable_model_runtime_executor",
ipu_config_code::ipu_enable_model_runtime_executor},
{"ipu_custom_ops_info", ipu_config_code::ipu_custom_ops_info},
{"ipu_custom_patterns", ipu_config_code::ipu_custom_patterns}};
......
......@@ -111,5 +111,68 @@ TEST(Analyzer_Resnet50_ipu, compare_results_2_batch) {
}
}
// multi threading
TEST(Analyzer_Resnet50_ipu, model_runtime_multi_thread) {
std::string model_dir = FLAGS_infer_model + "/" + "model";
AnalysisConfig config;
const int thread_num = 10;
// ipu_device_num, ipu_micro_batch_size, ipu_enable_pipelining
config.EnableIpu(1, 1, false);
config.SetIpuConfig(false, 1, 1.0, false, true);
config.SetModel(model_dir + "/model", model_dir + "/params");
auto main_predictor = CreatePaddlePredictor(config);
std::vector<std::vector<PaddleTensor>> inputs;
std::vector<std::vector<PaddleTensor>> outputs;
std::vector<decltype(main_predictor)> predictors;
std::vector<std::thread> threads;
outputs.resize(thread_num);
inputs.resize(thread_num);
const int batch = 1;
const int channel = 3;
const int height = 318;
const int width = 318;
const int input_num = batch * channel * height * width;
std::vector<float> input(input_num, 1);
PaddleTensor in;
in.shape = {batch, channel, height, width};
in.data =
PaddleBuf(static_cast<void*>(input.data()), input_num * sizeof(float));
in.dtype = PaddleDType::FLOAT32;
for (int i = 0; i < thread_num; ++i) {
inputs[i].emplace_back(in);
predictors.emplace_back(std::move(main_predictor->Clone()));
}
auto run = [](PaddlePredictor* predictor,
std::vector<PaddleTensor>& input,
std::vector<PaddleTensor>& output) {
ASSERT_TRUE(predictor->Run(input, &output));
};
for (int i = 0; i < thread_num; ++i) {
threads.emplace_back(
run, predictors[i].get(), std::ref(inputs[i]), std::ref(outputs[i]));
}
for (int i = 0; i < thread_num; ++i) {
threads[i].join();
}
const size_t expected_size = 1;
for (int i = 0; i < thread_num; ++i) {
EXPECT_EQ(outputs[i].size(), expected_size);
float* data_o = static_cast<float*>(outputs[i][0].data.data());
for (size_t j = 0; j < outputs[i][0].data.length() / sizeof(float);
j += 10) {
EXPECT_NEAR(
(data_o[j] - truth_values[j / 10]) / truth_values[j / 10], 0., 12e-5);
}
}
}
} // namespace inference
} // namespace paddle
......@@ -678,7 +678,8 @@ void BindAnalysisConfig(py::module *m) {
py::arg("ipu_enable_fp16") = false,
py::arg("ipu_replica_num") = 1,
py::arg("ipu_available_memory_proportion") = 1.0,
py::arg("ipu_enable_half_partial") = false)
py::arg("ipu_enable_half_partial") = false,
py::arg("ipu_enable_model_runtime_executor") = false)
.def("set_ipu_custom_info",
&AnalysisConfig::SetIpuCustomInfo,
py::arg("ipu_custom_ops_info") =
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册