提交 c79f06d3 编写于 作者: N nhzlx

cherry-pick from feature/anakin-engine: add batch interface for pd-anakin #16178

上级 69d37f81
......@@ -90,10 +90,12 @@ class AnakinOpConverter {
for (int i = 0; i < var_shape.size(); i++) {
input_shape.push_back(var_shape[i]);
}
input_shape[0] = 1;
input_shape[0] = engine->GetMaxBatch();
engine->SetInputShape(input, input_shape);
}
// engine->Graph()->RegistAllOut();
engine->Optimize();
engine->InitGraph();
}
......
......@@ -34,10 +34,12 @@ namespace anakin {
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
AnakinEngine<TargetT, PrecisionType, RunType>::AnakinEngine(bool need_summary,
int device)
int device,
int max_batch_size)
: graph_(new AnakinGraphT<TargetT, PrecisionType>()),
net_(new AnakinNetT<TargetT, PrecisionType, RunType>(need_summary)) {
device_ = device;
max_batch_size_ = max_batch_size;
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
......@@ -71,8 +73,8 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
for (const auto &input : inputs) {
auto *tensor = input.second;
auto *data = tensor->data<float>();
auto fluid_input_shape = framework::vectorize2int(tensor->dims());
auto fluid_input_shape = framework::vectorize2int(tensor->dims());
auto *anakin_input = net_->get_in(input.first);
auto net_shape = anakin_input->shape();
if (tensor->numel() > net_shape.count()) {
......@@ -84,11 +86,13 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
anakin_input->reshape(fluid_input_shape);
net_shape = anakin_input->shape();
::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), 0,
net_shape);
anakin_input->share_from(tmp_anakin_tensor);
// net_shape);
fluid_input_shape);
anakin_input->copy_from(tmp_anakin_tensor);
}
cudaDeviceSynchronize();
net_->prediction();
for (const auto &output : outputs) {
platform::CUDAPlace gpu_place(device_);
......@@ -98,12 +102,10 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
auto anakin_output_shape = anakin_output->valid_shape();
tensor->Resize(framework::make_ddim(anakin_output_shape));
auto *fluid_data = tensor->mutable_data<float>(gpu_place);
memory::Copy(gpu_place, static_cast<void *>(fluid_data), gpu_place,
static_cast<void *>(anakin_data),
tensor->numel() * sizeof(float), stream);
}
cudaDeviceSynchronize();
}
......
......@@ -55,7 +55,8 @@ class AnakinEngine {
using GraphT = ::anakin::graph::Graph<TargetT, PrecisionType>;
public:
explicit AnakinEngine(bool need_summary = false, int device = 0);
explicit AnakinEngine(bool need_summary = false, int device = 0,
int max_batch_size = 1);
~AnakinEngine();
void InitGraph();
void SetInputShape(const std::string &name, std::vector<int> shape);
......@@ -70,10 +71,12 @@ class AnakinEngine {
"Add operation's attribution.");
}
NetT *Net() { return net_.get(); }
GraphT *Graph() { return graph_.get(); }
std::unique_ptr<AnakinEngine> Clone();
void Freeze();
void Optimize();
void Save(std::string path) { graph_->save(path); }
int GetMaxBatch() { return max_batch_size_; }
// void SaveSerializedData(std::string& data) { graph_->save_to_string(data);
// }
// void LoadSerializedData(const std::string& data) {
......@@ -83,6 +86,7 @@ class AnakinEngine {
cudaStream_t stream);
private:
int max_batch_size_;
int device_;
std::unique_ptr<GraphT> graph_;
std::unique_ptr<NetT> net_;
......@@ -100,10 +104,11 @@ class AnakinEngineManager {
return engines_.at(name).get();
}
AnakinNvEngineT *Create(bool need_summary, int device,
AnakinNvEngineT *Create(bool need_summary, int device, int max_batch_size,
std::string engine_name) {
std::unique_lock<std::mutex> lk(mut_);
auto *p = new AnakinEngine<NV, Precision::FP32>(need_summary, device);
auto *p = new AnakinEngine<NV, Precision::FP32>(need_summary, device,
max_batch_size);
engines_[engine_name].reset(p);
return p;
}
......
......@@ -150,6 +150,7 @@ struct Argument {
DECL_ARGUMENT_FIELD(tensorrt_use_static_engine, TensorRtUseStaticEngine,
bool);
DECL_ARGUMENT_FIELD(anakin_max_batch_size, AnakinMaxBatchSize, int);
DECL_ARGUMENT_FIELD(use_anakin, UseAnakin, bool);
// Memory optimized related.
......
......@@ -77,6 +77,7 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("engine_opt_info", new std::map<std::string, std::string>(
argument->engine_opt_info()));
pass->Set("predictor_id", new int(argument->predictor_id()));
pass->Set("max_batch_size", new int(argument->anakin_max_batch_size()));
}
if (pass_name == "tensorrt_subgraph_pass") {
......@@ -91,16 +92,20 @@ void IRPassManager::CreatePasses(Argument *argument,
AnalysisConfig::Precision::kInt8;
pass->Set("enable_int8", new bool(enable_int8));
std::string model_opt_cache_dir =
argument->Has("model_dir")
? argument->model_dir()
: GetDirRoot(argument->model_program_path());
pass->Set(
"model_opt_cache_dir",
new std::string(GetOrCreateModelOptCacheDir(model_opt_cache_dir)));
bool use_static_engine = argument->tensorrt_use_static_engine();
bool model_from_memory = argument->model_from_memory();
if ((!model_from_memory && use_static_engine)) {
std::string model_opt_cache_dir =
argument->Has("model_dir")
? argument->model_dir()
: GetDirRoot(argument->model_program_path());
pass->Set(
"model_opt_cache_dir",
new std::string(GetOrCreateModelOptCacheDir(model_opt_cache_dir)));
}
pass->Set("gpu_device_id", new int(argument->gpu_device_id()));
pass->Set("use_static_engine",
new bool(argument->tensorrt_use_static_engine()));
pass->Set("use_static_engine", new bool(use_static_engine));
pass->Set("model_from_memory", new bool(argument->model_from_memory()));
pass->Set("engine_opt_info", new std::map<std::string, std::string>(
argument->engine_opt_info()));
......
......@@ -256,10 +256,11 @@ void AnakinSubgraphPass::CreateAnakinOp(
input_names_with_id, output_names_with_id, std::to_string(predictor_id));
SetAttr(op_desc->Proto(), "engine_key", engine_key);
int max_batch_size = Get<int>("max_batch_size");
auto *anakin_engine =
inference::Singleton<anakin::AnakinEngineManager>::Global().Create(
true, Get<int>("gpu_device_id"), engine_key);
true, Get<int>("gpu_device_id"), max_batch_size, engine_key);
auto *scope = param_scope();
std::unordered_set<std::string> param_set(params.begin(), params.end());
......
......@@ -245,8 +245,11 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
trt_engine_serialized_data.empty()) {
std::copy(params.begin(), params.end(),
std::back_inserter(*repetitive_params));
trt_engine_serialized_data = GetTrtEngineSerializedData(
Get<std::string>("model_opt_cache_dir"), engine_key);
if (use_static_engine && !load_from_memory) {
trt_engine_serialized_data = GetTrtEngineSerializedData(
Get<std::string>("model_opt_cache_dir"), engine_key);
}
if (trt_engine_serialized_data.empty()) {
LOG(INFO) << "Prepare TRT engine (Optimize model structure, Select OP "
......@@ -267,10 +270,13 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
trt_engine_serialized_data =
std::string((const char *)serialized_engine_data->data(),
serialized_engine_data->size());
SaveTrtEngineSerializedDataToFile(
GetTrtEngineSerializedPath(Get<std::string>("model_opt_cache_dir"),
engine_key),
trt_engine_serialized_data);
if (use_static_engine && !load_from_memory) {
SaveTrtEngineSerializedDataToFile(
GetTrtEngineSerializedPath(Get<std::string>("model_opt_cache_dir"),
engine_key),
trt_engine_serialized_data);
}
} else {
LOG(INFO) << "Load TRT Optimized Info from "
<< GetTrtEngineSerializedPath(
......
......@@ -109,6 +109,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(use_mkldnn_);
CP_MEMBER(mkldnn_enabled_op_types_);
CP_MEMBER(use_anakin_);
CP_MEMBER(anakin_max_batchsize_);
// Ir related.
CP_MEMBER(enable_ir_optim_);
CP_MEMBER(use_feed_fetch_ops_);
......@@ -352,7 +355,8 @@ void AnalysisConfig::SwitchIrDebug(int x) {
ir_debug_ = x;
Update();
}
void AnalysisConfig::EnableAnakinEngine() {
void AnalysisConfig::EnableAnakinEngine(int max_batch_size) {
anakin_max_batchsize_ = max_batch_size;
use_anakin_ = true;
Update();
}
......
......@@ -379,6 +379,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
}
if (config_.use_gpu() && config_.anakin_engine_enabled()) {
argument_.SetAnakinMaxBatchSize(config_.anakin_max_batchsize_);
LOG(INFO) << "Anakin subgraph engine is enabled";
}
......
......@@ -145,7 +145,7 @@ struct AnalysisConfig {
/**
* \brief Turn on the usage of Anakin sub-graph engine.
*/
void EnableAnakinEngine();
void EnableAnakinEngine(int max_batch_size = 1);
/** A boolean state indicating whether the Anakin sub-graph engine is used.
*/
......@@ -270,6 +270,7 @@ struct AnalysisConfig {
mutable std::unique_ptr<PassStrategy> pass_builder_;
bool use_anakin_{false};
int anakin_max_batchsize_;
std::map<std::string, std::string> engine_opt_info_;
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册