From a6b4bee3da00b7c7a257995459ce98ee28b0c9ec Mon Sep 17 00:00:00 2001 From: Allen Guo Date: Fri, 30 Sep 2022 15:57:00 +0800 Subject: [PATCH] [IPU] paddle-inference support custom-ops (#45235) * paddle-inference support custom-ops Co-authored-by: Zhixin Yao * fix tolower Co-authored-by: Zhixin Yao --- .../framework/ir/ipu/infer_shape_pass.cc | 4 + .../ir/ipu/inference_dtype_transfer_pass.cc | 25 ++++ .../ir/ipu/inference_process_pass.cc | 32 +++++ paddle/fluid/inference/analysis/argument.h | 6 + .../analysis/passes/ir_graph_build_pass.cc | 4 + paddle/fluid/inference/api/analysis_config.cc | 124 +++++++++++++++++- .../fluid/inference/api/analysis_predictor.cc | 2 + .../inference/api/paddle_analysis_config.h | 51 +++++++ .../fluid/platform/device/ipu/ipu_backend.cc | 13 +- paddle/fluid/pybind/inference_api.cc | 8 ++ 10 files changed, 267 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc b/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc index 9b2793c3034..ed5ac1a4c09 100644 --- a/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc +++ b/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc @@ -53,6 +53,10 @@ void InferShapePass::ApplyImpl(ir::Graph* graph) const { if (node->Var()->GetDataType() == proto::VarType::INT64) { node->Var()->SetDataType(proto::VarType::INT32); } + // float64->float32 + if (node->Var()->GetDataType() == proto::VarType::FP64) { + node->Var()->SetDataType(proto::VarType::FP32); + } } } diff --git a/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.cc b/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.cc index a0820afc2d8..76b2792e3df 100644 --- a/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.cc +++ b/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.cc @@ -37,11 +37,30 @@ void InferenceDtypeTransferPass::ApplyImpl(ir::Graph* graph) const { VLOG(10) << "Transfer var to fp16..."; auto* scope = ipu_backend->GetScope(); + // Record specific vars to skip + std::set skip_var_lists; + for (auto* node : graph->Nodes()) { + if (node->IsOp()) { + // clip op' attrs `max` and `min` only support FP32 + if (node->Op()->Type() == "popart_clip") { + auto min_tensor_name = node->Op()->InputArgumentNames()[1]; + auto max_tensor_name = node->Op()->InputArgumentNames()[2]; + skip_var_lists.insert(min_tensor_name); + skip_var_lists.insert(max_tensor_name); + } + } + } + std::unordered_set used_var_names; for (auto* node : graph->Nodes()) { if (node->IsVar()) { auto var_desc = node->Var(); if (var_desc->GetDataType() == proto::VarType::FP32) { + // Skip specific vars + if (skip_var_lists.find(var_desc->Name()) != skip_var_lists.end()) { + continue; + } + // Transfer the dtypes of var_desc var_desc->SetDataType(proto::VarType::FP16); VLOG(10) << "Transfer the VarDesc of " << var_desc->Name() << " to " @@ -81,6 +100,12 @@ void InferenceDtypeTransferPass::ApplyImpl(ir::Graph* graph) const { } } if (op_desc->Type() == "popart_constant") { + // Skip specific constant + auto output_var_name = node->outputs[0]->Var()->Name(); + if (skip_var_lists.find(output_var_name) != skip_var_lists.end()) { + continue; + } + // Transfer the dtype of fill_constant Op if (op_desc->GetAttrIfExists("dtype") == 1) { op_desc->SetAttr("dtype", 10); diff --git a/paddle/fluid/framework/ir/ipu/inference_process_pass.cc b/paddle/fluid/framework/ir/ipu/inference_process_pass.cc index 55a4e320ea2..11679c95b11 100644 --- a/paddle/fluid/framework/ir/ipu/inference_process_pass.cc +++ b/paddle/fluid/framework/ir/ipu/inference_process_pass.cc @@ -93,6 +93,33 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const { // Set tiles_per_ipu for IPUMODEL ipu_strategy_instance_->tiles_per_ipu = 128; + // Set Cache path + auto* ipu_cache_path = getenv("IPU_CACHE_PATH"); + if (ipu_cache_path) { + ipu_strategy_instance_->popart_options.enableEngineCaching = true; + ipu_strategy_instance_->popart_options.cachePath = + std::string{ipu_cache_path}; + } + + // custom ops and patterns + std::unordered_set custom_op_names; + auto custom_ops_info = + graph->Get>>("custom_ops_info"); + for (auto custom_op : custom_ops_info) { + ipu_strategy_instance_->AddCustomOp( + custom_op[0], custom_op[1], custom_op[2], atoi(custom_op[3].c_str())); + custom_op_names.insert(custom_op[0]); + } + auto patterns = + graph->Get>>("custom_patterns"); + for (auto pattern : patterns) { + if (pattern[1] == "True") { + ipu_strategy_instance_->EnablePattern(pattern[0]); + } else if (pattern[1] == "False") { + ipu_strategy_instance_->DisablePattern(pattern[0]); + } + } + ipu_backend->SetIpuStrategy(*(ipu_strategy_instance_.get())); // Get feed_list and fetch list @@ -140,6 +167,11 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const { "feed_list", new std::vector(feed_list.begin(), feed_list.end())); } + if (pass_name == "popart_canonicalization_pass") { + pass->Set("custom_ops", + new std::unordered_set(custom_op_names.begin(), + custom_op_names.end())); + } pass->Apply(graph); } diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 39e844aacb1..c6c636c2c67 100755 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -341,6 +341,12 @@ struct Argument { IpuAvailableMemoryProportion, float); DECL_ARGUMENT_FIELD(ipu_enable_half_partial, IpuEnableHalfPartial, bool); + DECL_ARGUMENT_FIELD(ipu_custom_ops_info, + IpuCustomOpsInfo, + std::vector>); + DECL_ARGUMENT_FIELD(ipu_custom_patterns, + IpuCustomPatterns, + std::vector>); // npu related DECL_ARGUMENT_FIELD(use_npu, UseNpu, bool); diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc index 5070328394b..cd93238ff2b 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc @@ -92,6 +92,10 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { &argument->ipu_available_memory_proportion()); argument->main_graph().SetNotOwned("enable_half_partial", &argument->ipu_enable_half_partial()); + argument->main_graph().SetNotOwned("custom_ops_info", + &argument->ipu_custom_ops_info()); + argument->main_graph().SetNotOwned("custom_patterns", + &argument->ipu_custom_patterns()); } } #endif diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index f8c53353ca2..68864ad1528 100755 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -22,6 +22,7 @@ #include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/utils/string/split.h" #ifdef PADDLE_WITH_TENSORRT #include "paddle/fluid/inference/tensorrt/helper.h" @@ -208,6 +209,120 @@ void AnalysisConfig::SetIpuConfig(bool ipu_enable_fp16, Update(); } +void AnalysisConfig::SetIpuCustomInfo( + const std::vector> &ipu_custom_ops_info, + const std::map &ipu_custom_patterns) { + ipu_custom_ops_info_ = ipu_custom_ops_info; + for (auto iter = ipu_custom_patterns.begin(); + iter != ipu_custom_patterns.end(); + iter++) { + if (iter->second == true) { + ipu_custom_patterns_.push_back( + std::vector{iter->first, "True"}); + } else if (iter->second == false) { + ipu_custom_patterns_.push_back( + std::vector{iter->first, "False"}); + } + } + + Update(); +} + +void AnalysisConfig::LoadIpuConfig(const std::string &config_path) { + std::ifstream fin(config_path, std::ios::in); + PADDLE_ENFORCE_EQ( + static_cast(fin.is_open()), + true, + platform::errors::NotFound( + "Cannot open file %s, please confirm whether the file is normal.", + config_path)); + std::string line; + while (std::getline(fin, line)) { + // remove all space + line.erase(std::remove(line.begin(), line.end(), ' '), line.end()); + + std::string key; + std::string value; + std::istringstream stream(line); + // Split string to key and value based on the first `,` + std::getline(stream, key, ','); + std::getline(stream, value); + + auto string2bool = [](std::string s) { + std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { + return ::tolower(c); + }); + return s == "true" || s == "1"; + }; + + // ipu_custom_ops_info: + // [[paddle_op_name, popart_op_name, domain, version], [paddle_op_name, + // popart_op_name, domain, version]...] + // ipu_custom_patterns: + // [[paddle_op_name, enable_pattern], [paddle_op_name, enable_pattern]...] + auto string2vector = [](std::string s) { + std::vector> custom_info; + s.erase(0, 1); + s.pop_back(); + + std::string one; + std::istringstream s_stream(s); + while (std::getline(s_stream, one, ']')) { + if (!one.empty()) { + // remove `[` + one.erase(0, 1); + custom_info.push_back(paddle::string::Split(one, ',')); + } + } + return custom_info; + }; + + if (ipu_config_mapper_.find(key) == ipu_config_mapper_.end()) { + PADDLE_THROW(platform::errors::InvalidArgument( + "invalid key {} in IPU config", key)); + } + switch (ipu_config_mapper_.at(key)) { + case ipu_config_code::ipu_device_num: + ipu_device_num_ = std::stoi(value); + break; + case ipu_config_code::ipu_micro_batch_size: + ipu_micro_batch_size_ = std::stoi(value); + break; + case ipu_config_code::ipu_enable_pipelining: + ipu_enable_pipelining_ = string2bool(value); + break; + case ipu_config_code::ipu_batches_per_step: + ipu_batches_per_step_ = std::stoi(value); + break; + case ipu_config_code::ipu_enable_fp16: + ipu_enable_fp16_ = string2bool(value); + break; + case ipu_config_code::ipu_replica_num: + ipu_replica_num_ = std::stoi(value); + break; + case ipu_config_code::ipu_available_memory_proportion: + ipu_available_memory_proportion_ = std::stof(value); + break; + case ipu_config_code::ipu_enable_half_partial: + ipu_enable_half_partial_ = string2bool(value); + break; + case ipu_config_code::ipu_custom_ops_info: + ipu_custom_ops_info_ = string2vector(value); + break; + case ipu_config_code::ipu_custom_patterns: + ipu_custom_patterns_ = string2vector(value); + break; + + default: + PADDLE_THROW(platform::errors::InvalidArgument( + "invalid key {} in IPU config", key)); + break; + } + } + + Update(); +} + void AnalysisConfig::EnableONNXRuntime() { #ifdef PADDLE_WITH_ONNXRUNTIME use_onnxruntime_ = true; @@ -358,6 +473,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(ipu_replica_num_); CP_MEMBER(ipu_available_memory_proportion_); CP_MEMBER(ipu_enable_half_partial_); + CP_MEMBER(ipu_custom_ops_info_); + CP_MEMBER(ipu_custom_patterns_); // fleet exe related CP_MEMBER(dist_config_); @@ -914,7 +1031,12 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << ipu_replica_num_; ss << ipu_available_memory_proportion_; ss << ipu_enable_half_partial_; - + for (auto custom_op : ipu_custom_ops_info_) + for (auto attr : custom_op) ss << attr; + ss << ";"; + for (auto pattern : ipu_custom_patterns_) + for (auto attr : pattern) ss << attr; + ss << ";"; for (auto &op : mixed_black_list_) ss << op.c_str(); return ss.str(); } diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 5805b6a2be3..df1858f6e8b 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1161,6 +1161,8 @@ void AnalysisPredictor::PrepareArgument() { argument_.SetIpuAvailableMemoryProportion( config_.ipu_available_memory_proportion_); argument_.SetIpuEnableHalfPartial(config_.ipu_enable_half_partial_); + argument_.SetIpuCustomOpsInfo(config_.ipu_custom_ops_info_); + argument_.SetIpuCustomPatterns(config_.ipu_custom_patterns_); #endif argument_.SetUseNpu(config_.use_npu_); diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index b92802a7bf0..d2463d875b6 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -289,6 +289,22 @@ struct PD_INFER_DECL AnalysisConfig { const std::string& precision = "int16", bool adaptive_seqlen = false); + /// + /// \brief configs of IPU + /// + enum class ipu_config_code { + ipu_device_num, + ipu_micro_batch_size, + ipu_enable_pipelining, + ipu_batches_per_step, + ipu_enable_fp16, + ipu_replica_num, + ipu_available_memory_proportion, + ipu_enable_half_partial, + ipu_custom_ops_info, + ipu_custom_patterns + }; + /// /// \brief Turn on IPU. /// @@ -318,6 +334,25 @@ struct PD_INFER_DECL AnalysisConfig { float ipu_available_memory_proportion = 1.0, bool ipu_enable_half_partial = false); + /// + /// \brief Set IPU custom ops and patterns. + /// + /// \param custom_ops_info the mapper of paddle custom ops and popart ops. + /// e.g. {{paddle_op_name, popart_op_name, op_domain, op_version}}. + /// \param custom_patterns the names of popart patterns. e.g. {{pattern_name, + /// enable_pattern}}} + /// + void SetIpuCustomInfo( + const std::vector>& ipu_custom_ops_info = {}, + const std::map& ipu_custom_patterns = {}); + + /// + /// \brief Load IPU config from configuration file. + /// + /// \param config_path configure file path for ipu. + /// + void LoadIpuConfig(const std::string& config_path); + /// /// \brief Set XPU device id. /// @@ -1113,6 +1148,22 @@ struct PD_INFER_DECL AnalysisConfig { float ipu_available_memory_proportion_{1.0}; bool ipu_enable_half_partial_{false}; + std::vector> ipu_custom_ops_info_; + std::vector> ipu_custom_patterns_; + + const std::unordered_map ipu_config_mapper_ = { + {"ipu_device_num", ipu_config_code::ipu_device_num}, + {"ipu_micro_batch_size", ipu_config_code::ipu_micro_batch_size}, + {"ipu_enable_pipelining", ipu_config_code::ipu_enable_pipelining}, + {"ipu_batches_per_step", ipu_config_code::ipu_batches_per_step}, + {"ipu_enable_fp16", ipu_config_code::ipu_enable_fp16}, + {"ipu_replica_num", ipu_config_code::ipu_replica_num}, + {"ipu_available_memory_proportion", + ipu_config_code::ipu_available_memory_proportion}, + {"ipu_enable_half_partial", ipu_config_code::ipu_enable_half_partial}, + {"ipu_custom_ops_info", ipu_config_code::ipu_custom_ops_info}, + {"ipu_custom_patterns", ipu_config_code::ipu_custom_patterns}}; + // If the config is already used on a predictor, it becomes invalid. // Any config can only be used with one predictor. // Variables held by config can take up a lot of memory in some cases. diff --git a/paddle/fluid/platform/device/ipu/ipu_backend.cc b/paddle/fluid/platform/device/ipu/ipu_backend.cc index 30ee14c4489..fb8686601a1 100644 --- a/paddle/fluid/platform/device/ipu/ipu_backend.cc +++ b/paddle/fluid/platform/device/ipu/ipu_backend.cc @@ -52,9 +52,20 @@ void IpuBackend::Compile(framework::ir::Graph* graph, if (ipu_strategy_->is_training) { compiler_->LowerOptimizer(scope_); } + + // environment variable IPU_ONNX_DUMP_PATH have higher priority + std::string onnx_dump_path; if (!ipu_strategy_->onnx_dump_path.empty()) { - SaveModelProto(ipu_strategy_->onnx_dump_path); + onnx_dump_path = ipu_strategy_->onnx_dump_path; + } + auto* ipu_onnx_dump_path = getenv("IPU_ONNX_DUMP_PATH"); + if (ipu_onnx_dump_path) { + onnx_dump_path = std::string{ipu_onnx_dump_path}; } + if (!onnx_dump_path.empty()) { + SaveModelProto(onnx_dump_path); + } + executor_->SetCompilerResources(compiler_->GetResources()); executor_->Prepare(compiler_->GetModelProto()); is_compiled_ = true; diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 221d0af5428..2193b0fa5f7 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -673,6 +673,14 @@ void BindAnalysisConfig(py::module *m) { py::arg("ipu_replica_num") = 1, py::arg("ipu_available_memory_proportion") = 1.0, py::arg("ipu_enable_half_partial") = false) + .def("set_ipu_custom_info", + &AnalysisConfig::SetIpuCustomInfo, + py::arg("ipu_custom_ops_info") = + std::vector>({}), + py::arg("ipu_custom_patterns") = std::map({})) + .def("load_ipu_config", + &AnalysisConfig::LoadIpuConfig, + py::arg("config_path")) .def("disable_gpu", &AnalysisConfig::DisableGpu) .def("enable_onnxruntime", &AnalysisConfig::EnableONNXRuntime) .def("disable_onnxruntime", &AnalysisConfig::DisableONNXRuntime) -- GitLab