未验证 提交 a6b4bee3 编写于 作者: A Allen Guo 提交者: GitHub

[IPU] paddle-inference support custom-ops (#45235)

* paddle-inference support custom-ops
Co-authored-by: NZhixin Yao <zhixiny@graphcore.ai>

* fix tolower
Co-authored-by: NZhixin Yao <zhixiny@graphcore.ai>
上级 04eb211a
...@@ -53,6 +53,10 @@ void InferShapePass::ApplyImpl(ir::Graph* graph) const { ...@@ -53,6 +53,10 @@ void InferShapePass::ApplyImpl(ir::Graph* graph) const {
if (node->Var()->GetDataType() == proto::VarType::INT64) { if (node->Var()->GetDataType() == proto::VarType::INT64) {
node->Var()->SetDataType(proto::VarType::INT32); node->Var()->SetDataType(proto::VarType::INT32);
} }
// float64->float32
if (node->Var()->GetDataType() == proto::VarType::FP64) {
node->Var()->SetDataType(proto::VarType::FP32);
}
} }
} }
......
...@@ -37,11 +37,30 @@ void InferenceDtypeTransferPass::ApplyImpl(ir::Graph* graph) const { ...@@ -37,11 +37,30 @@ void InferenceDtypeTransferPass::ApplyImpl(ir::Graph* graph) const {
VLOG(10) << "Transfer var to fp16..."; VLOG(10) << "Transfer var to fp16...";
auto* scope = ipu_backend->GetScope(); auto* scope = ipu_backend->GetScope();
// Record specific vars to skip
std::set<std::string> skip_var_lists;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
// clip op' attrs `max` and `min` only support FP32
if (node->Op()->Type() == "popart_clip") {
auto min_tensor_name = node->Op()->InputArgumentNames()[1];
auto max_tensor_name = node->Op()->InputArgumentNames()[2];
skip_var_lists.insert(min_tensor_name);
skip_var_lists.insert(max_tensor_name);
}
}
}
std::unordered_set<std::string> used_var_names; std::unordered_set<std::string> used_var_names;
for (auto* node : graph->Nodes()) { for (auto* node : graph->Nodes()) {
if (node->IsVar()) { if (node->IsVar()) {
auto var_desc = node->Var(); auto var_desc = node->Var();
if (var_desc->GetDataType() == proto::VarType::FP32) { if (var_desc->GetDataType() == proto::VarType::FP32) {
// Skip specific vars
if (skip_var_lists.find(var_desc->Name()) != skip_var_lists.end()) {
continue;
}
// Transfer the dtypes of var_desc // Transfer the dtypes of var_desc
var_desc->SetDataType(proto::VarType::FP16); var_desc->SetDataType(proto::VarType::FP16);
VLOG(10) << "Transfer the VarDesc of " << var_desc->Name() << " to " VLOG(10) << "Transfer the VarDesc of " << var_desc->Name() << " to "
...@@ -81,6 +100,12 @@ void InferenceDtypeTransferPass::ApplyImpl(ir::Graph* graph) const { ...@@ -81,6 +100,12 @@ void InferenceDtypeTransferPass::ApplyImpl(ir::Graph* graph) const {
} }
} }
if (op_desc->Type() == "popart_constant") { if (op_desc->Type() == "popart_constant") {
// Skip specific constant
auto output_var_name = node->outputs[0]->Var()->Name();
if (skip_var_lists.find(output_var_name) != skip_var_lists.end()) {
continue;
}
// Transfer the dtype of fill_constant Op // Transfer the dtype of fill_constant Op
if (op_desc->GetAttrIfExists<int>("dtype") == 1) { if (op_desc->GetAttrIfExists<int>("dtype") == 1) {
op_desc->SetAttr("dtype", 10); op_desc->SetAttr("dtype", 10);
......
...@@ -93,6 +93,33 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const { ...@@ -93,6 +93,33 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const {
// Set tiles_per_ipu for IPUMODEL // Set tiles_per_ipu for IPUMODEL
ipu_strategy_instance_->tiles_per_ipu = 128; ipu_strategy_instance_->tiles_per_ipu = 128;
// Set Cache path
auto* ipu_cache_path = getenv("IPU_CACHE_PATH");
if (ipu_cache_path) {
ipu_strategy_instance_->popart_options.enableEngineCaching = true;
ipu_strategy_instance_->popart_options.cachePath =
std::string{ipu_cache_path};
}
// custom ops and patterns
std::unordered_set<std::string> custom_op_names;
auto custom_ops_info =
graph->Get<std::vector<std::vector<std::string>>>("custom_ops_info");
for (auto custom_op : custom_ops_info) {
ipu_strategy_instance_->AddCustomOp(
custom_op[0], custom_op[1], custom_op[2], atoi(custom_op[3].c_str()));
custom_op_names.insert(custom_op[0]);
}
auto patterns =
graph->Get<std::vector<std::vector<std::string>>>("custom_patterns");
for (auto pattern : patterns) {
if (pattern[1] == "True") {
ipu_strategy_instance_->EnablePattern(pattern[0]);
} else if (pattern[1] == "False") {
ipu_strategy_instance_->DisablePattern(pattern[0]);
}
}
ipu_backend->SetIpuStrategy(*(ipu_strategy_instance_.get())); ipu_backend->SetIpuStrategy(*(ipu_strategy_instance_.get()));
// Get feed_list and fetch list // Get feed_list and fetch list
...@@ -140,6 +167,11 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const { ...@@ -140,6 +167,11 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const {
"feed_list", "feed_list",
new std::vector<std::string>(feed_list.begin(), feed_list.end())); new std::vector<std::string>(feed_list.begin(), feed_list.end()));
} }
if (pass_name == "popart_canonicalization_pass") {
pass->Set("custom_ops",
new std::unordered_set<std::string>(custom_op_names.begin(),
custom_op_names.end()));
}
pass->Apply(graph); pass->Apply(graph);
} }
......
...@@ -341,6 +341,12 @@ struct Argument { ...@@ -341,6 +341,12 @@ struct Argument {
IpuAvailableMemoryProportion, IpuAvailableMemoryProportion,
float); float);
DECL_ARGUMENT_FIELD(ipu_enable_half_partial, IpuEnableHalfPartial, bool); DECL_ARGUMENT_FIELD(ipu_enable_half_partial, IpuEnableHalfPartial, bool);
DECL_ARGUMENT_FIELD(ipu_custom_ops_info,
IpuCustomOpsInfo,
std::vector<std::vector<std::string>>);
DECL_ARGUMENT_FIELD(ipu_custom_patterns,
IpuCustomPatterns,
std::vector<std::vector<std::string>>);
// npu related // npu related
DECL_ARGUMENT_FIELD(use_npu, UseNpu, bool); DECL_ARGUMENT_FIELD(use_npu, UseNpu, bool);
......
...@@ -92,6 +92,10 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { ...@@ -92,6 +92,10 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
&argument->ipu_available_memory_proportion()); &argument->ipu_available_memory_proportion());
argument->main_graph().SetNotOwned("enable_half_partial", argument->main_graph().SetNotOwned("enable_half_partial",
&argument->ipu_enable_half_partial()); &argument->ipu_enable_half_partial());
argument->main_graph().SetNotOwned("custom_ops_info",
&argument->ipu_custom_ops_info());
argument->main_graph().SetNotOwned("custom_patterns",
&argument->ipu_custom_patterns());
} }
} }
#endif #endif
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/utils/string/split.h"
#ifdef PADDLE_WITH_TENSORRT #ifdef PADDLE_WITH_TENSORRT
#include "paddle/fluid/inference/tensorrt/helper.h" #include "paddle/fluid/inference/tensorrt/helper.h"
...@@ -208,6 +209,120 @@ void AnalysisConfig::SetIpuConfig(bool ipu_enable_fp16, ...@@ -208,6 +209,120 @@ void AnalysisConfig::SetIpuConfig(bool ipu_enable_fp16,
Update(); Update();
} }
void AnalysisConfig::SetIpuCustomInfo(
const std::vector<std::vector<std::string>> &ipu_custom_ops_info,
const std::map<std::string, bool> &ipu_custom_patterns) {
ipu_custom_ops_info_ = ipu_custom_ops_info;
for (auto iter = ipu_custom_patterns.begin();
iter != ipu_custom_patterns.end();
iter++) {
if (iter->second == true) {
ipu_custom_patterns_.push_back(
std::vector<std::string>{iter->first, "True"});
} else if (iter->second == false) {
ipu_custom_patterns_.push_back(
std::vector<std::string>{iter->first, "False"});
}
}
Update();
}
void AnalysisConfig::LoadIpuConfig(const std::string &config_path) {
std::ifstream fin(config_path, std::ios::in);
PADDLE_ENFORCE_EQ(
static_cast<bool>(fin.is_open()),
true,
platform::errors::NotFound(
"Cannot open file %s, please confirm whether the file is normal.",
config_path));
std::string line;
while (std::getline(fin, line)) {
// remove all space
line.erase(std::remove(line.begin(), line.end(), ' '), line.end());
std::string key;
std::string value;
std::istringstream stream(line);
// Split string to key and value based on the first `,`
std::getline(stream, key, ',');
std::getline(stream, value);
auto string2bool = [](std::string s) {
std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) {
return ::tolower(c);
});
return s == "true" || s == "1";
};
// ipu_custom_ops_info:
// [[paddle_op_name, popart_op_name, domain, version], [paddle_op_name,
// popart_op_name, domain, version]...]
// ipu_custom_patterns:
// [[paddle_op_name, enable_pattern], [paddle_op_name, enable_pattern]...]
auto string2vector = [](std::string s) {
std::vector<std::vector<std::string>> custom_info;
s.erase(0, 1);
s.pop_back();
std::string one;
std::istringstream s_stream(s);
while (std::getline(s_stream, one, ']')) {
if (!one.empty()) {
// remove `[`
one.erase(0, 1);
custom_info.push_back(paddle::string::Split(one, ','));
}
}
return custom_info;
};
if (ipu_config_mapper_.find(key) == ipu_config_mapper_.end()) {
PADDLE_THROW(platform::errors::InvalidArgument(
"invalid key {} in IPU config", key));
}
switch (ipu_config_mapper_.at(key)) {
case ipu_config_code::ipu_device_num:
ipu_device_num_ = std::stoi(value);
break;
case ipu_config_code::ipu_micro_batch_size:
ipu_micro_batch_size_ = std::stoi(value);
break;
case ipu_config_code::ipu_enable_pipelining:
ipu_enable_pipelining_ = string2bool(value);
break;
case ipu_config_code::ipu_batches_per_step:
ipu_batches_per_step_ = std::stoi(value);
break;
case ipu_config_code::ipu_enable_fp16:
ipu_enable_fp16_ = string2bool(value);
break;
case ipu_config_code::ipu_replica_num:
ipu_replica_num_ = std::stoi(value);
break;
case ipu_config_code::ipu_available_memory_proportion:
ipu_available_memory_proportion_ = std::stof(value);
break;
case ipu_config_code::ipu_enable_half_partial:
ipu_enable_half_partial_ = string2bool(value);
break;
case ipu_config_code::ipu_custom_ops_info:
ipu_custom_ops_info_ = string2vector(value);
break;
case ipu_config_code::ipu_custom_patterns:
ipu_custom_patterns_ = string2vector(value);
break;
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"invalid key {} in IPU config", key));
break;
}
}
Update();
}
void AnalysisConfig::EnableONNXRuntime() { void AnalysisConfig::EnableONNXRuntime() {
#ifdef PADDLE_WITH_ONNXRUNTIME #ifdef PADDLE_WITH_ONNXRUNTIME
use_onnxruntime_ = true; use_onnxruntime_ = true;
...@@ -358,6 +473,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { ...@@ -358,6 +473,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(ipu_replica_num_); CP_MEMBER(ipu_replica_num_);
CP_MEMBER(ipu_available_memory_proportion_); CP_MEMBER(ipu_available_memory_proportion_);
CP_MEMBER(ipu_enable_half_partial_); CP_MEMBER(ipu_enable_half_partial_);
CP_MEMBER(ipu_custom_ops_info_);
CP_MEMBER(ipu_custom_patterns_);
// fleet exe related // fleet exe related
CP_MEMBER(dist_config_); CP_MEMBER(dist_config_);
...@@ -914,7 +1031,12 @@ std::string AnalysisConfig::SerializeInfoCache() { ...@@ -914,7 +1031,12 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << ipu_replica_num_; ss << ipu_replica_num_;
ss << ipu_available_memory_proportion_; ss << ipu_available_memory_proportion_;
ss << ipu_enable_half_partial_; ss << ipu_enable_half_partial_;
for (auto custom_op : ipu_custom_ops_info_)
for (auto attr : custom_op) ss << attr;
ss << ";";
for (auto pattern : ipu_custom_patterns_)
for (auto attr : pattern) ss << attr;
ss << ";";
for (auto &op : mixed_black_list_) ss << op.c_str(); for (auto &op : mixed_black_list_) ss << op.c_str();
return ss.str(); return ss.str();
} }
......
...@@ -1161,6 +1161,8 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -1161,6 +1161,8 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetIpuAvailableMemoryProportion( argument_.SetIpuAvailableMemoryProportion(
config_.ipu_available_memory_proportion_); config_.ipu_available_memory_proportion_);
argument_.SetIpuEnableHalfPartial(config_.ipu_enable_half_partial_); argument_.SetIpuEnableHalfPartial(config_.ipu_enable_half_partial_);
argument_.SetIpuCustomOpsInfo(config_.ipu_custom_ops_info_);
argument_.SetIpuCustomPatterns(config_.ipu_custom_patterns_);
#endif #endif
argument_.SetUseNpu(config_.use_npu_); argument_.SetUseNpu(config_.use_npu_);
......
...@@ -289,6 +289,22 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -289,6 +289,22 @@ struct PD_INFER_DECL AnalysisConfig {
const std::string& precision = "int16", const std::string& precision = "int16",
bool adaptive_seqlen = false); bool adaptive_seqlen = false);
///
/// \brief configs of IPU
///
enum class ipu_config_code {
ipu_device_num,
ipu_micro_batch_size,
ipu_enable_pipelining,
ipu_batches_per_step,
ipu_enable_fp16,
ipu_replica_num,
ipu_available_memory_proportion,
ipu_enable_half_partial,
ipu_custom_ops_info,
ipu_custom_patterns
};
/// ///
/// \brief Turn on IPU. /// \brief Turn on IPU.
/// ///
...@@ -318,6 +334,25 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -318,6 +334,25 @@ struct PD_INFER_DECL AnalysisConfig {
float ipu_available_memory_proportion = 1.0, float ipu_available_memory_proportion = 1.0,
bool ipu_enable_half_partial = false); bool ipu_enable_half_partial = false);
///
/// \brief Set IPU custom ops and patterns.
///
/// \param custom_ops_info the mapper of paddle custom ops and popart ops.
/// e.g. {{paddle_op_name, popart_op_name, op_domain, op_version}}.
/// \param custom_patterns the names of popart patterns. e.g. {{pattern_name,
/// enable_pattern}}}
///
void SetIpuCustomInfo(
const std::vector<std::vector<std::string>>& ipu_custom_ops_info = {},
const std::map<std::string, bool>& ipu_custom_patterns = {});
///
/// \brief Load IPU config from configuration file.
///
/// \param config_path configure file path for ipu.
///
void LoadIpuConfig(const std::string& config_path);
/// ///
/// \brief Set XPU device id. /// \brief Set XPU device id.
/// ///
...@@ -1113,6 +1148,22 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -1113,6 +1148,22 @@ struct PD_INFER_DECL AnalysisConfig {
float ipu_available_memory_proportion_{1.0}; float ipu_available_memory_proportion_{1.0};
bool ipu_enable_half_partial_{false}; bool ipu_enable_half_partial_{false};
std::vector<std::vector<std::string>> ipu_custom_ops_info_;
std::vector<std::vector<std::string>> ipu_custom_patterns_;
const std::unordered_map<std::string, ipu_config_code> ipu_config_mapper_ = {
{"ipu_device_num", ipu_config_code::ipu_device_num},
{"ipu_micro_batch_size", ipu_config_code::ipu_micro_batch_size},
{"ipu_enable_pipelining", ipu_config_code::ipu_enable_pipelining},
{"ipu_batches_per_step", ipu_config_code::ipu_batches_per_step},
{"ipu_enable_fp16", ipu_config_code::ipu_enable_fp16},
{"ipu_replica_num", ipu_config_code::ipu_replica_num},
{"ipu_available_memory_proportion",
ipu_config_code::ipu_available_memory_proportion},
{"ipu_enable_half_partial", ipu_config_code::ipu_enable_half_partial},
{"ipu_custom_ops_info", ipu_config_code::ipu_custom_ops_info},
{"ipu_custom_patterns", ipu_config_code::ipu_custom_patterns}};
// If the config is already used on a predictor, it becomes invalid. // If the config is already used on a predictor, it becomes invalid.
// Any config can only be used with one predictor. // Any config can only be used with one predictor.
// Variables held by config can take up a lot of memory in some cases. // Variables held by config can take up a lot of memory in some cases.
......
...@@ -52,9 +52,20 @@ void IpuBackend::Compile(framework::ir::Graph* graph, ...@@ -52,9 +52,20 @@ void IpuBackend::Compile(framework::ir::Graph* graph,
if (ipu_strategy_->is_training) { if (ipu_strategy_->is_training) {
compiler_->LowerOptimizer(scope_); compiler_->LowerOptimizer(scope_);
} }
// environment variable IPU_ONNX_DUMP_PATH have higher priority
std::string onnx_dump_path;
if (!ipu_strategy_->onnx_dump_path.empty()) { if (!ipu_strategy_->onnx_dump_path.empty()) {
SaveModelProto(ipu_strategy_->onnx_dump_path); onnx_dump_path = ipu_strategy_->onnx_dump_path;
}
auto* ipu_onnx_dump_path = getenv("IPU_ONNX_DUMP_PATH");
if (ipu_onnx_dump_path) {
onnx_dump_path = std::string{ipu_onnx_dump_path};
} }
if (!onnx_dump_path.empty()) {
SaveModelProto(onnx_dump_path);
}
executor_->SetCompilerResources(compiler_->GetResources()); executor_->SetCompilerResources(compiler_->GetResources());
executor_->Prepare(compiler_->GetModelProto()); executor_->Prepare(compiler_->GetModelProto());
is_compiled_ = true; is_compiled_ = true;
......
...@@ -673,6 +673,14 @@ void BindAnalysisConfig(py::module *m) { ...@@ -673,6 +673,14 @@ void BindAnalysisConfig(py::module *m) {
py::arg("ipu_replica_num") = 1, py::arg("ipu_replica_num") = 1,
py::arg("ipu_available_memory_proportion") = 1.0, py::arg("ipu_available_memory_proportion") = 1.0,
py::arg("ipu_enable_half_partial") = false) py::arg("ipu_enable_half_partial") = false)
.def("set_ipu_custom_info",
&AnalysisConfig::SetIpuCustomInfo,
py::arg("ipu_custom_ops_info") =
std::vector<std::vector<std::string>>({}),
py::arg("ipu_custom_patterns") = std::map<std::string, bool>({}))
.def("load_ipu_config",
&AnalysisConfig::LoadIpuConfig,
py::arg("config_path"))
.def("disable_gpu", &AnalysisConfig::DisableGpu) .def("disable_gpu", &AnalysisConfig::DisableGpu)
.def("enable_onnxruntime", &AnalysisConfig::EnableONNXRuntime) .def("enable_onnxruntime", &AnalysisConfig::EnableONNXRuntime)
.def("disable_onnxruntime", &AnalysisConfig::DisableONNXRuntime) .def("disable_onnxruntime", &AnalysisConfig::DisableONNXRuntime)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册