未验证 提交 bd89be12 编写于 作者: A Allen Guo 提交者: GitHub

[IPU] paddle-inference support custom-ops (#45235) (#46868)

* paddle-inference support custom-ops
Co-authored-by: NZhixin Yao <zhixiny@graphcore.ai>

* fix tolower
Co-authored-by: NZhixin Yao <zhixiny@graphcore.ai>
Co-authored-by: NZhixin Yao <zhixiny@graphcore.ai>
上级 a1cdbad1
......@@ -53,6 +53,10 @@ void InferShapePass::ApplyImpl(ir::Graph* graph) const {
if (node->Var()->GetDataType() == proto::VarType::INT64) {
node->Var()->SetDataType(proto::VarType::INT32);
}
// float64->float32
if (node->Var()->GetDataType() == proto::VarType::FP64) {
node->Var()->SetDataType(proto::VarType::FP32);
}
}
}
......
......@@ -37,11 +37,30 @@ void InferenceDtypeTransferPass::ApplyImpl(ir::Graph* graph) const {
VLOG(10) << "Transfer var to fp16...";
auto* scope = ipu_backend->GetScope();
// Record specific vars to skip
std::set<std::string> skip_var_lists;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
// clip op' attrs `max` and `min` only support FP32
if (node->Op()->Type() == "popart_clip") {
auto min_tensor_name = node->Op()->InputArgumentNames()[1];
auto max_tensor_name = node->Op()->InputArgumentNames()[2];
skip_var_lists.insert(min_tensor_name);
skip_var_lists.insert(max_tensor_name);
}
}
}
std::unordered_set<std::string> used_var_names;
for (auto* node : graph->Nodes()) {
if (node->IsVar()) {
auto var_desc = node->Var();
if (var_desc->GetDataType() == proto::VarType::FP32) {
// Skip specific vars
if (skip_var_lists.find(var_desc->Name()) != skip_var_lists.end()) {
continue;
}
// Transfer the dtypes of var_desc
var_desc->SetDataType(proto::VarType::FP16);
VLOG(10) << "Transfer the VarDesc of " << var_desc->Name() << " to "
......@@ -81,6 +100,12 @@ void InferenceDtypeTransferPass::ApplyImpl(ir::Graph* graph) const {
}
}
if (op_desc->Type() == "popart_constant") {
// Skip specific constant
auto output_var_name = node->outputs[0]->Var()->Name();
if (skip_var_lists.find(output_var_name) != skip_var_lists.end()) {
continue;
}
// Transfer the dtype of fill_constant Op
if (op_desc->GetAttrIfExists<int>("dtype") == 1) {
op_desc->SetAttr("dtype", 10);
......
......@@ -93,6 +93,33 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const {
// Set tiles_per_ipu for IPUMODEL
ipu_strategy_instance_->tiles_per_ipu = 128;
// Set Cache path
auto* ipu_cache_path = getenv("IPU_CACHE_PATH");
if (ipu_cache_path) {
ipu_strategy_instance_->popart_options.enableEngineCaching = true;
ipu_strategy_instance_->popart_options.cachePath =
std::string{ipu_cache_path};
}
// custom ops and patterns
std::unordered_set<std::string> custom_op_names;
auto custom_ops_info =
graph->Get<std::vector<std::vector<std::string>>>("custom_ops_info");
for (auto custom_op : custom_ops_info) {
ipu_strategy_instance_->AddCustomOp(
custom_op[0], custom_op[1], custom_op[2], atoi(custom_op[3].c_str()));
custom_op_names.insert(custom_op[0]);
}
auto patterns =
graph->Get<std::vector<std::vector<std::string>>>("custom_patterns");
for (auto pattern : patterns) {
if (pattern[1] == "True") {
ipu_strategy_instance_->EnablePattern(pattern[0]);
} else if (pattern[1] == "False") {
ipu_strategy_instance_->DisablePattern(pattern[0]);
}
}
ipu_backend->SetIpuStrategy(*(ipu_strategy_instance_.get()));
// Get feed_list and fetch list
......@@ -140,6 +167,11 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const {
"feed_list",
new std::vector<std::string>(feed_list.begin(), feed_list.end()));
}
if (pass_name == "popart_canonicalization_pass") {
pass->Set("custom_ops",
new std::unordered_set<std::string>(custom_op_names.begin(),
custom_op_names.end()));
}
pass->Apply(graph);
}
......
......@@ -343,6 +343,12 @@ struct Argument {
IpuAvailableMemoryProportion,
float);
DECL_ARGUMENT_FIELD(ipu_enable_half_partial, IpuEnableHalfPartial, bool);
DECL_ARGUMENT_FIELD(ipu_custom_ops_info,
IpuCustomOpsInfo,
std::vector<std::vector<std::string>>);
DECL_ARGUMENT_FIELD(ipu_custom_patterns,
IpuCustomPatterns,
std::vector<std::vector<std::string>>);
// npu related
DECL_ARGUMENT_FIELD(use_npu, UseNpu, bool);
......
......@@ -92,6 +92,10 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
&argument->ipu_available_memory_proportion());
argument->main_graph().SetNotOwned("enable_half_partial",
&argument->ipu_enable_half_partial());
argument->main_graph().SetNotOwned("custom_ops_info",
&argument->ipu_custom_ops_info());
argument->main_graph().SetNotOwned("custom_patterns",
&argument->ipu_custom_patterns());
}
}
#endif
......
......@@ -22,6 +22,7 @@
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/utils/string/split.h"
#ifdef PADDLE_WITH_TENSORRT
#include "paddle/fluid/inference/tensorrt/helper.h"
......@@ -208,6 +209,120 @@ void AnalysisConfig::SetIpuConfig(bool ipu_enable_fp16,
Update();
}
void AnalysisConfig::SetIpuCustomInfo(
const std::vector<std::vector<std::string>> &ipu_custom_ops_info,
const std::map<std::string, bool> &ipu_custom_patterns) {
ipu_custom_ops_info_ = ipu_custom_ops_info;
for (auto iter = ipu_custom_patterns.begin();
iter != ipu_custom_patterns.end();
iter++) {
if (iter->second == true) {
ipu_custom_patterns_.push_back(
std::vector<std::string>{iter->first, "True"});
} else if (iter->second == false) {
ipu_custom_patterns_.push_back(
std::vector<std::string>{iter->first, "False"});
}
}
Update();
}
void AnalysisConfig::LoadIpuConfig(const std::string &config_path) {
std::ifstream fin(config_path, std::ios::in);
PADDLE_ENFORCE_EQ(
static_cast<bool>(fin.is_open()),
true,
platform::errors::NotFound(
"Cannot open file %s, please confirm whether the file is normal.",
config_path));
std::string line;
while (std::getline(fin, line)) {
// remove all space
line.erase(std::remove(line.begin(), line.end(), ' '), line.end());
std::string key;
std::string value;
std::istringstream stream(line);
// Split string to key and value based on the first `,`
std::getline(stream, key, ',');
std::getline(stream, value);
auto string2bool = [](std::string s) {
std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) {
return ::tolower(c);
});
return s == "true" || s == "1";
};
// ipu_custom_ops_info:
// [[paddle_op_name, popart_op_name, domain, version], [paddle_op_name,
// popart_op_name, domain, version]...]
// ipu_custom_patterns:
// [[paddle_op_name, enable_pattern], [paddle_op_name, enable_pattern]...]
auto string2vector = [](std::string s) {
std::vector<std::vector<std::string>> custom_info;
s.erase(0, 1);
s.pop_back();
std::string one;
std::istringstream s_stream(s);
while (std::getline(s_stream, one, ']')) {
if (!one.empty()) {
// remove `[`
one.erase(0, 1);
custom_info.push_back(paddle::string::Split(one, ','));
}
}
return custom_info;
};
if (ipu_config_mapper_.find(key) == ipu_config_mapper_.end()) {
PADDLE_THROW(platform::errors::InvalidArgument(
"invalid key {} in IPU config", key));
}
switch (ipu_config_mapper_.at(key)) {
case ipu_config_code::ipu_device_num:
ipu_device_num_ = std::stoi(value);
break;
case ipu_config_code::ipu_micro_batch_size:
ipu_micro_batch_size_ = std::stoi(value);
break;
case ipu_config_code::ipu_enable_pipelining:
ipu_enable_pipelining_ = string2bool(value);
break;
case ipu_config_code::ipu_batches_per_step:
ipu_batches_per_step_ = std::stoi(value);
break;
case ipu_config_code::ipu_enable_fp16:
ipu_enable_fp16_ = string2bool(value);
break;
case ipu_config_code::ipu_replica_num:
ipu_replica_num_ = std::stoi(value);
break;
case ipu_config_code::ipu_available_memory_proportion:
ipu_available_memory_proportion_ = std::stof(value);
break;
case ipu_config_code::ipu_enable_half_partial:
ipu_enable_half_partial_ = string2bool(value);
break;
case ipu_config_code::ipu_custom_ops_info:
ipu_custom_ops_info_ = string2vector(value);
break;
case ipu_config_code::ipu_custom_patterns:
ipu_custom_patterns_ = string2vector(value);
break;
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"invalid key {} in IPU config", key));
break;
}
}
Update();
}
void AnalysisConfig::EnableONNXRuntime() {
#ifdef PADDLE_WITH_ONNXRUNTIME
use_onnxruntime_ = true;
......@@ -358,6 +473,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(ipu_replica_num_);
CP_MEMBER(ipu_available_memory_proportion_);
CP_MEMBER(ipu_enable_half_partial_);
CP_MEMBER(ipu_custom_ops_info_);
CP_MEMBER(ipu_custom_patterns_);
// fleet exe related
CP_MEMBER(dist_config_);
......@@ -910,7 +1027,12 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << ipu_replica_num_;
ss << ipu_available_memory_proportion_;
ss << ipu_enable_half_partial_;
for (auto custom_op : ipu_custom_ops_info_)
for (auto attr : custom_op) ss << attr;
ss << ";";
for (auto pattern : ipu_custom_patterns_)
for (auto attr : pattern) ss << attr;
ss << ";";
for (auto &op : mixed_black_list_) ss << op.c_str();
return ss.str();
}
......
......@@ -1162,6 +1162,8 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetIpuAvailableMemoryProportion(
config_.ipu_available_memory_proportion_);
argument_.SetIpuEnableHalfPartial(config_.ipu_enable_half_partial_);
argument_.SetIpuCustomOpsInfo(config_.ipu_custom_ops_info_);
argument_.SetIpuCustomPatterns(config_.ipu_custom_patterns_);
#endif
argument_.SetUseNpu(config_.use_npu_);
......
......@@ -282,6 +282,22 @@ struct PD_INFER_DECL AnalysisConfig {
const std::string& precision = "int16",
bool adaptive_seqlen = false);
///
/// \brief configs of IPU
///
enum class ipu_config_code {
ipu_device_num,
ipu_micro_batch_size,
ipu_enable_pipelining,
ipu_batches_per_step,
ipu_enable_fp16,
ipu_replica_num,
ipu_available_memory_proportion,
ipu_enable_half_partial,
ipu_custom_ops_info,
ipu_custom_patterns
};
///
/// \brief Turn on IPU.
///
......@@ -311,6 +327,25 @@ struct PD_INFER_DECL AnalysisConfig {
float ipu_available_memory_proportion = 1.0,
bool ipu_enable_half_partial = false);
///
/// \brief Set IPU custom ops and patterns.
///
/// \param custom_ops_info the mapper of paddle custom ops and popart ops.
/// e.g. {{paddle_op_name, popart_op_name, op_domain, op_version}}.
/// \param custom_patterns the names of popart patterns. e.g. {{pattern_name,
/// enable_pattern}}}
///
void SetIpuCustomInfo(
const std::vector<std::vector<std::string>>& ipu_custom_ops_info = {},
const std::map<std::string, bool>& ipu_custom_patterns = {});
///
/// \brief Load IPU config from configuration file.
///
/// \param config_path configure file path for ipu.
///
void LoadIpuConfig(const std::string& config_path);
///
/// \brief Set XPU device id.
///
......@@ -1111,6 +1146,22 @@ struct PD_INFER_DECL AnalysisConfig {
float ipu_available_memory_proportion_{1.0};
bool ipu_enable_half_partial_{false};
std::vector<std::vector<std::string>> ipu_custom_ops_info_;
std::vector<std::vector<std::string>> ipu_custom_patterns_;
const std::unordered_map<std::string, ipu_config_code> ipu_config_mapper_ = {
{"ipu_device_num", ipu_config_code::ipu_device_num},
{"ipu_micro_batch_size", ipu_config_code::ipu_micro_batch_size},
{"ipu_enable_pipelining", ipu_config_code::ipu_enable_pipelining},
{"ipu_batches_per_step", ipu_config_code::ipu_batches_per_step},
{"ipu_enable_fp16", ipu_config_code::ipu_enable_fp16},
{"ipu_replica_num", ipu_config_code::ipu_replica_num},
{"ipu_available_memory_proportion",
ipu_config_code::ipu_available_memory_proportion},
{"ipu_enable_half_partial", ipu_config_code::ipu_enable_half_partial},
{"ipu_custom_ops_info", ipu_config_code::ipu_custom_ops_info},
{"ipu_custom_patterns", ipu_config_code::ipu_custom_patterns}};
// If the config is already used on a predictor, it becomes invalid.
// Any config can only be used with one predictor.
// Variables held by config can take up a lot of memory in some cases.
......
......@@ -52,9 +52,20 @@ void IpuBackend::Compile(framework::ir::Graph* graph,
if (ipu_strategy_->is_training) {
compiler_->LowerOptimizer(scope_);
}
// environment variable IPU_ONNX_DUMP_PATH have higher priority
std::string onnx_dump_path;
if (!ipu_strategy_->onnx_dump_path.empty()) {
SaveModelProto(ipu_strategy_->onnx_dump_path);
onnx_dump_path = ipu_strategy_->onnx_dump_path;
}
auto* ipu_onnx_dump_path = getenv("IPU_ONNX_DUMP_PATH");
if (ipu_onnx_dump_path) {
onnx_dump_path = std::string{ipu_onnx_dump_path};
}
if (!onnx_dump_path.empty()) {
SaveModelProto(onnx_dump_path);
}
executor_->SetCompilerResources(compiler_->GetResources());
executor_->Prepare(compiler_->GetModelProto());
is_compiled_ = true;
......
......@@ -666,6 +666,14 @@ void BindAnalysisConfig(py::module *m) {
py::arg("ipu_replica_num") = 1,
py::arg("ipu_available_memory_proportion") = 1.0,
py::arg("ipu_enable_half_partial") = false)
.def("set_ipu_custom_info",
&AnalysisConfig::SetIpuCustomInfo,
py::arg("ipu_custom_ops_info") =
std::vector<std::vector<std::string>>({}),
py::arg("ipu_custom_patterns") = std::map<std::string, bool>({}))
.def("load_ipu_config",
&AnalysisConfig::LoadIpuConfig,
py::arg("config_path"))
.def("disable_gpu", &AnalysisConfig::DisableGpu)
.def("enable_onnxruntime", &AnalysisConfig::EnableONNXRuntime)
.def("disable_onnxruntime", &AnalysisConfig::DisableONNXRuntime)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册