提交 8121b3ec 编写于 作者: N nhzlx

Cherry-pick : anakin subgraph add opt config layout argument #16846

test=release/1.4
上级 a6fb066f
......@@ -36,13 +36,14 @@ template <typename TargetT, Precision PrecisionType, OpRunType RunType>
AnakinEngine<TargetT, PrecisionType, RunType>::AnakinEngine(
bool need_summary, int device, int max_batch_size,
std::map<std::string, std::vector<int>> max_input_shape,
std::vector<std::string> program_inputs)
std::vector<std::string> program_inputs, bool auto_config_layout)
: graph_(new AnakinGraphT<TargetT, PrecisionType>()),
net_(new AnakinNetT<TargetT, PrecisionType, RunType>(need_summary)) {
device_ = device;
max_batch_size_ = max_batch_size;
max_input_shape_ = max_input_shape;
program_inputs_ = program_inputs;
auto_config_layout_ = auto_config_layout;
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
......@@ -57,7 +58,7 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::SetInputShape(
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::InitNet() {
net_->init(*graph_);
net_->init(*graph_, auto_config_layout_);
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
......
......@@ -58,7 +58,8 @@ class AnakinEngine {
explicit AnakinEngine(
bool need_summary = false, int device = 0, int max_batch_size = 1,
std::map<std::string, std::vector<int>> max_input_shape = {},
std::vector<std::string> program_inputs = {});
std::vector<std::string> program_inputs = {},
bool auto_config_layout = false);
~AnakinEngine();
void InitNet();
void SetInputShape(const std::string &name, std::vector<int> shape);
......@@ -120,6 +121,8 @@ class AnakinEngine {
std::unique_ptr<NetT> net_;
std::vector<std::string> program_inputs_;
std::unordered_map<std::string, float> tensor_scales_;
// Always be false in gpu mode but true in most cpu cases.
bool auto_config_layout_;
};
template <typename TargetT, ::anakin::Precision PrecisionType>
......@@ -138,10 +141,11 @@ class AnakinEngineManager {
AnakinEngineT *Create(bool need_summary, int device, int max_batch_size,
std::map<std::string, std::vector<int>> max_input_shape,
std::vector<std::string> program_inputs,
std::string engine_name) {
bool auto_config_layout, std::string engine_name) {
std::unique_lock<std::mutex> lk(mut_);
auto *p = new AnakinEngine<TargetT, PrecisionType>(
need_summary, device, max_batch_size, max_input_shape, program_inputs);
need_summary, device, max_batch_size, max_input_shape, program_inputs,
auto_config_layout);
engines_[engine_name].reset(p);
return p;
}
......
......@@ -171,6 +171,7 @@ struct Argument {
DECL_ARGUMENT_FIELD(anakin_min_subgraph_size, AnakinMinSubgraphSize, int);
DECL_ARGUMENT_FIELD(anakin_precision_mode, AnakinPrecisionMode,
AnalysisConfig::Precision);
DECL_ARGUMENT_FIELD(anakin_auto_config_layout, AnakinAutoConfigLayout, bool);
DECL_ARGUMENT_FIELD(use_anakin, UseAnakin, bool);
DECL_ARGUMENT_FIELD(anakin_passes_filter, AnakinPassesFilter,
std::vector<std::string>);
......
......@@ -128,6 +128,8 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("enable_int8", new bool(enable_int8));
pass->Set("anakin_ops_filter",
new std::vector<std::string>(argument->anakin_ops_filter()));
pass->Set("auto_config_layout",
new bool(argument->anakin_auto_config_layout()));
}
pre_pass = pass_name;
......
......@@ -226,18 +226,20 @@ void AnakinSubgraphPass::CreateAnakinEngine(
auto max_batch_size = Get<int>("max_batch_size");
auto max_input_shape =
Get<std::map<std::string, std::vector<int>>>("max_input_shape");
bool auto_config_layout = Get<bool>("auto_config_layout");
if (use_gpu) {
#ifdef PADDLE_WITH_CUDA
inference::Singleton<
anakin::AnakinEngineManager<::anakin::saber::NV, PrecisionT>>::Global()
.Create(true, Get<int>("gpu_device_id"), max_batch_size,
max_input_shape, program_inputs, engine_key);
max_input_shape, program_inputs, false, engine_key);
#endif
} else {
inference::Singleton<
anakin::AnakinEngineManager<::anakin::saber::X86, PrecisionT>>::Global()
.Create(true, Get<int>("gpu_device_id"), max_batch_size,
max_input_shape, program_inputs, engine_key);
max_input_shape, program_inputs, auto_config_layout,
engine_key);
}
auto *scope = param_scope();
......
......@@ -117,6 +117,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(anakin_max_input_shape_);
CP_MEMBER(anakin_min_subgraph_size_);
CP_MEMBER(anakin_precision_mode_);
CP_MEMBER(anakin_auto_config_layout_);
CP_MEMBER(anakin_passes_filter_);
CP_MEMBER(anakin_ops_filter_);
......@@ -398,7 +399,7 @@ void AnalysisConfig::SwitchIrDebug(int x) {
void AnalysisConfig::EnableAnakinEngine(
int max_batch_size, std::map<std::string, std::vector<int>> max_input_shape,
int min_subgraph_size, AnalysisConfig::Precision precision_mode,
std::vector<std::string> passes_filter,
bool auto_config_layout, std::vector<std::string> passes_filter,
std::vector<std::string> ops_filter) {
anakin_max_batchsize_ = max_batch_size;
anakin_max_input_shape_ = max_input_shape;
......@@ -407,6 +408,7 @@ void AnalysisConfig::EnableAnakinEngine(
anakin_ops_filter_ = ops_filter;
use_anakin_ = true;
anakin_precision_mode_ = precision_mode;
anakin_auto_config_layout_ = auto_config_layout;
Update();
}
} // namespace paddle
......@@ -387,6 +387,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetAnakinMaxInputShape(config_.anakin_max_input_shape_);
argument_.SetAnakinMinSubgraphSize(config_.anakin_min_subgraph_size_);
argument_.SetAnakinPrecisionMode(config_.anakin_precision_mode_);
argument_.SetAnakinAutoConfigLayout(config_.anakin_auto_config_layout_);
argument_.SetAnakinPassesFilter(config_.anakin_passes_filter_);
argument_.SetAnakinOpsFilter(config_.anakin_ops_filter_);
LOG(INFO) << "Anakin subgraph engine is enabled";
......
......@@ -153,6 +153,7 @@ struct AnalysisConfig {
int max_batch_size = 1,
std::map<std::string, std::vector<int>> max_input_shape = {},
int min_subgraph_size = 6, Precision precision = Precision::kFloat32,
bool auto_config_layout = false,
std::vector<std::string> passes_filter = {},
std::vector<std::string> ops_filter = {});
......@@ -294,6 +295,7 @@ struct AnalysisConfig {
int anakin_min_subgraph_size_{6};
std::map<std::string, std::vector<int>> anakin_max_input_shape_;
Precision anakin_precision_mode_;
bool anakin_auto_config_layout_{false};
std::vector<std::string> anakin_passes_filter_;
std::vector<std::string> anakin_ops_filter_;
std::map<std::string, std::string> engine_opt_info_;
......
......@@ -78,11 +78,9 @@ const std::vector<std::string> kAnakinSubgraphPasses({
"fillconstant_elementwisemul_fuse", //
"fc_fuse_pass", //
"conv_elementwise_add_fuse_pass", //
// "conv_bn_fuse_pass", //
// "conv_elementwise_add_fuse_pass", //
"fc_gru_fuse_pass", //
"anakin_subgraph_pass", //
"fc_gru_fuse_pass", //
"fc_gru_fuse_pass", //
"anakin_subgraph_pass", //
"fc_gru_fuse_pass", //
});
GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
......
......@@ -236,6 +236,7 @@ void BindAnalysisConfig(py::module *m) {
std::map<std::string, std::vector<int>>(),
py::arg("min_subgraph_size") = 6,
py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32,
py::arg("auto_config_layout") = false,
py::arg("passes_filter") = std::vector<std::string>(),
py::arg("ops_filter") = std::vector<std::string>())
.def("tensorrt_engine_enabled", &AnalysisConfig::tensorrt_engine_enabled)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册