未验证 提交 ae576f3c 编写于 作者: Z Zhaolong Xing 提交者: GitHub

fix: when use the load model from memory mode, the RAM occupy is high (#17788)

test=develop
上级 5efe8c72
...@@ -63,6 +63,16 @@ struct Argument { ...@@ -63,6 +63,16 @@ struct Argument {
using anakin_max_shape_t = std::map<std::string, std::vector<int>>; using anakin_max_shape_t = std::map<std::string, std::vector<int>>;
bool Has(const std::string& key) const { return valid_fields_.count(key); } bool Has(const std::string& key) const { return valid_fields_.count(key); }
void PartiallyRelease() {
if (Has("model_program_path")) {
if (Has("model_from_memory") && model_from_memory()) {
model_program_path().clear();
model_program_path().shrink_to_fit();
model_params_path().clear();
model_params_path().shrink_to_fit();
}
}
}
#define DECL_ARGUMENT_FIELD(field__, Field, type__) \ #define DECL_ARGUMENT_FIELD(field__, Field, type__) \
public: \ public: \
......
...@@ -87,6 +87,7 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -87,6 +87,7 @@ void IRPassManager::CreatePasses(Argument *argument,
bool enable_int8 = argument->tensorrt_precision_mode() == bool enable_int8 = argument->tensorrt_precision_mode() ==
AnalysisConfig::Precision::kInt8; AnalysisConfig::Precision::kInt8;
pass->Set("predictor_id", new int(argument->predictor_id()));
bool use_calib_mode = argument->tensorrt_use_calib_mode(); bool use_calib_mode = argument->tensorrt_use_calib_mode();
pass->Set("enable_int8", new bool(enable_int8)); pass->Set("enable_int8", new bool(enable_int8));
pass->Set("use_calib_mode", new bool(use_calib_mode)); pass->Set("use_calib_mode", new bool(use_calib_mode));
......
...@@ -199,8 +199,12 @@ void TensorRtSubgraphPass::CreateTensorRTOp( ...@@ -199,8 +199,12 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
SetAttr(op_desc->Proto(), "parameters", params); SetAttr(op_desc->Proto(), "parameters", params);
auto use_static_engine = Get<bool>("use_static_engine"); auto use_static_engine = Get<bool>("use_static_engine");
// TODO(NHZlX)
// There are models with the same structure but the different parameters,
// when runing in the 'use_serialize' mode, there is a bug.
auto engine_key = GenerateEngineKey(input_names_with_id, output_names_with_id, auto engine_key = GenerateEngineKey(input_names_with_id, output_names_with_id,
std::to_string(0)); std::to_string(0));
auto predictor_id = Get<int>("predictor_id");
// Get "" when there is no cached calibration table data. // Get "" when there is no cached calibration table data.
bool load_from_memory = Get<bool>("model_from_memory"); bool load_from_memory = Get<bool>("model_from_memory");
...@@ -214,6 +218,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( ...@@ -214,6 +218,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
SetAttr(op_desc->Proto(), "enable_int8", enable_int8); SetAttr(op_desc->Proto(), "enable_int8", enable_int8);
SetAttr(op_desc->Proto(), "use_calib_mode", use_calib_mode); SetAttr(op_desc->Proto(), "use_calib_mode", use_calib_mode);
SetAttr(op_desc->Proto(), "engine_key", engine_key); SetAttr(op_desc->Proto(), "engine_key", engine_key);
SetAttr(op_desc->Proto(), "predictor_id", predictor_id);
std::string trt_engine_serialized_data = ""; std::string trt_engine_serialized_data = "";
SetAttr(op_desc->Proto(), "engine_serialized_data", SetAttr(op_desc->Proto(), "engine_serialized_data",
trt_engine_serialized_data); trt_engine_serialized_data);
...@@ -233,15 +238,20 @@ void TensorRtSubgraphPass::CreateTensorRTOp( ...@@ -233,15 +238,20 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
std::copy(params.begin(), params.end(), std::copy(params.begin(), params.end(),
std::back_inserter(*repetitive_params)); std::back_inserter(*repetitive_params));
bool need_serialize = (use_static_engine && !load_from_memory);
tensorrt::TensorRTEngine *trt_engine =
inference::Singleton<inference::tensorrt::TRTEngineManager>::Global()
.Create(engine_key + std::to_string(predictor_id),
Get<int>("max_batch_size"), Get<int>("workspace_size"),
enable_int8, calibrator.get(), Get<int>("gpu_device_id"));
bool need_serialize = (use_static_engine && !load_from_memory);
if (need_serialize) { if (need_serialize) {
trt_engine_serialized_data = GetTrtEngineSerializedData( trt_engine_serialized_data = GetTrtEngineSerializedData(
Get<std::string>("model_opt_cache_dir"), engine_key); Get<std::string>("model_opt_cache_dir"), engine_key);
// we can load the engine info serialized before from the disk. // we can load the engine info serialized before from the disk.
if (!trt_engine_serialized_data.empty()) { if (!trt_engine_serialized_data.empty()) {
SetAttr(op_desc->Proto(), "engine_serialized_data", trt_engine->Deserialize(trt_engine_serialized_data);
trt_engine_serialized_data);
LOG(INFO) << "Load TRT Optimized Info from " LOG(INFO) << "Load TRT Optimized Info from "
<< GetTrtEngineSerializedPath( << GetTrtEngineSerializedPath(
Get<std::string>("model_opt_cache_dir"), engine_key); Get<std::string>("model_opt_cache_dir"), engine_key);
...@@ -254,10 +264,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( ...@@ -254,10 +264,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
// 2. already load serialized trt engine info. // 2. already load serialized trt engine info.
LOG(INFO) << "Prepare TRT engine (Optimize model structure, Select OP " LOG(INFO) << "Prepare TRT engine (Optimize model structure, Select OP "
"kernel etc). This process may cost a lot of time."; "kernel etc). This process may cost a lot of time.";
std::unique_ptr<tensorrt::TensorRTEngine> trt_engine(
new tensorrt::TensorRTEngine(
Get<int>("max_batch_size"), Get<int>("workspace_size"), enable_int8,
calibrator.get(), Get<int>("gpu_device_id")));
auto *scope = param_scope(); auto *scope = param_scope();
framework::BlockDesc block_desc_temp(nullptr, block_desc.Proto()); framework::BlockDesc block_desc_temp(nullptr, block_desc.Proto());
std::unordered_set<std::string> param_set(params.begin(), params.end()); std::unordered_set<std::string> param_set(params.begin(), params.end());
...@@ -265,20 +272,18 @@ void TensorRtSubgraphPass::CreateTensorRTOp( ...@@ -265,20 +272,18 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
.ConvertBlockToTRTEngine( .ConvertBlockToTRTEngine(
&block_desc_temp, *scope, &block_desc_temp, *scope,
std::vector<std::string>(input_names.begin(), input_names.end()), std::vector<std::string>(input_names.begin(), input_names.end()),
param_set, output_mapping, trt_engine.get()); param_set, output_mapping, trt_engine);
if (need_serialize) {
nvinfer1::IHostMemory *serialized_engine_data = trt_engine->Serialize(); nvinfer1::IHostMemory *serialized_engine_data = trt_engine->Serialize();
trt_engine_serialized_data = trt_engine_serialized_data =
std::string((const char *)serialized_engine_data->data(), std::string((const char *)serialized_engine_data->data(),
serialized_engine_data->size()); serialized_engine_data->size());
if (need_serialize) {
SaveTrtEngineSerializedDataToFile( SaveTrtEngineSerializedDataToFile(
GetTrtEngineSerializedPath(Get<std::string>("model_opt_cache_dir"), GetTrtEngineSerializedPath(Get<std::string>("model_opt_cache_dir"),
engine_key), engine_key),
trt_engine_serialized_data); trt_engine_serialized_data);
} }
SetAttr(op_desc->Proto(), "engine_serialized_data",
trt_engine_serialized_data);
} }
} // namespace analysis } // namespace analysis
......
...@@ -69,7 +69,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { ...@@ -69,7 +69,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
// Copy the parameter data to a tmp tensor. // Copy the parameter data to a tmp tensor.
TensorCopySync(*t, cpu_place, &temp_tensor); TensorCopySync(*t, cpu_place, &temp_tensor);
// Reallocation the space on GPU // Reallocation the space on GPU
t->mutable_data<float>(place); t->clear();
// Copy parameter data to newly allocated GPU space. // Copy parameter data to newly allocated GPU space.
TensorCopySync(temp_tensor, place, t); TensorCopySync(temp_tensor, place, t);
......
...@@ -87,10 +87,12 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { ...@@ -87,10 +87,12 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// Model related. // Model related.
CP_MEMBER(model_dir_); CP_MEMBER(model_dir_);
CP_MEMBER(prog_file_);
CP_MEMBER(params_file_);
CP_MEMBER(model_from_memory_); // the memory model reuses prog_file_ and CP_MEMBER(model_from_memory_); // the memory model reuses prog_file_ and
// params_file_ fields. // params_file_ fields.
prog_file_ = std::move(other.prog_file_);
params_file_ = std::move(other.params_file_);
// Gpu related. // Gpu related.
CP_MEMBER(use_gpu_); CP_MEMBER(use_gpu_);
CP_MEMBER(device_id_); CP_MEMBER(device_id_);
...@@ -439,4 +441,12 @@ void AnalysisConfig::EnableAnakinEngine( ...@@ -439,4 +441,12 @@ void AnalysisConfig::EnableAnakinEngine(
anakin_auto_config_layout_ = auto_config_layout; anakin_auto_config_layout_ = auto_config_layout;
Update(); Update();
} }
void AnalysisConfig::PartiallyRelease() {
prog_file_.clear();
prog_file_.shrink_to_fit();
params_file_.clear();
params_file_.shrink_to_fit();
}
} // namespace paddle } // namespace paddle
...@@ -444,6 +444,10 @@ void AnalysisPredictor::OptimizeInferenceProgram() { ...@@ -444,6 +444,10 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
ARGUMENT_CHECK_FIELD((&argument_), ir_analyzed_program); ARGUMENT_CHECK_FIELD((&argument_), ir_analyzed_program);
inference_program_.reset( inference_program_.reset(
new framework::ProgramDesc(argument_.ir_analyzed_program())); new framework::ProgramDesc(argument_.ir_analyzed_program()));
// The config and argument take a lot of storage,
// when the predictor settings are complete, we release these stores.
argument_.PartiallyRelease();
config_.PartiallyRelease();
LOG(INFO) << "== optimize end =="; LOG(INFO) << "== optimize end ==";
} }
...@@ -451,6 +455,8 @@ template <> ...@@ -451,6 +455,8 @@ template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) { AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) {
VLOG(3) << "create AnalysisConfig"; VLOG(3) << "create AnalysisConfig";
PADDLE_ENFORCE(config.is_valid(),
"Note: Each config can only be used for one predictor.");
if (config.use_gpu()) { if (config.use_gpu()) {
// 1. GPU memory // 1. GPU memory
PADDLE_ENFORCE_GE(config.memory_pool_init_size_mb(), 0.f); PADDLE_ENFORCE_GE(config.memory_pool_init_size_mb(), 0.f);
...@@ -480,6 +486,8 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< ...@@ -480,6 +486,8 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
} }
std::unique_ptr<PaddlePredictor> predictor(new AnalysisPredictor(config)); std::unique_ptr<PaddlePredictor> predictor(new AnalysisPredictor(config));
// Each config can only be used for one predictor.
config.SetInValid();
auto predictor_p = dynamic_cast<AnalysisPredictor *>(predictor.get()); auto predictor_p = dynamic_cast<AnalysisPredictor *>(predictor.get());
if (!predictor_p->Init(nullptr)) { if (!predictor_p->Init(nullptr)) {
......
...@@ -232,6 +232,8 @@ struct AnalysisConfig { ...@@ -232,6 +232,8 @@ struct AnalysisConfig {
bool force_update_static_cache = false); bool force_update_static_cache = false);
/** Tell whether the memory optimization is activated. */ /** Tell whether the memory optimization is activated. */
bool enable_memory_optim() const; bool enable_memory_optim() const;
void SetInValid() const { is_valid_ = false; }
bool is_valid() const { return is_valid_; }
friend class ::paddle::AnalysisPredictor; friend class ::paddle::AnalysisPredictor;
...@@ -239,6 +241,7 @@ struct AnalysisConfig { ...@@ -239,6 +241,7 @@ struct AnalysisConfig {
* Get a pass builder for customize the passes in IR analysis phase. * Get a pass builder for customize the passes in IR analysis phase.
*/ */
PassStrategy* pass_builder() const; PassStrategy* pass_builder() const;
void PartiallyRelease();
protected: protected:
// Update the config. // Update the config.
...@@ -249,8 +252,8 @@ struct AnalysisConfig { ...@@ -249,8 +252,8 @@ struct AnalysisConfig {
protected: protected:
// Model pathes. // Model pathes.
std::string model_dir_; std::string model_dir_;
std::string prog_file_; mutable std::string prog_file_;
std::string params_file_; mutable std::string params_file_;
// GPU related. // GPU related.
bool use_gpu_{false}; bool use_gpu_{false};
...@@ -312,6 +315,11 @@ struct AnalysisConfig { ...@@ -312,6 +315,11 @@ struct AnalysisConfig {
bool use_mkldnn_quantizer_{false}; bool use_mkldnn_quantizer_{false};
std::shared_ptr<MkldnnQuantizerConfig> mkldnn_quantizer_config_; std::shared_ptr<MkldnnQuantizerConfig> mkldnn_quantizer_config_;
// If the config is already used on a predictor, it becomes invalid.
mutable bool is_valid_{true};
// Any config can only be used with one predictor.
// Variables held by config can take up a lot of memory in some cases.
// So we release the memory when the predictor is set up.
}; };
} // namespace paddle } // namespace paddle
...@@ -109,6 +109,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { ...@@ -109,6 +109,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
"conv_affine_channel_fuse_pass", // "conv_affine_channel_fuse_pass", //
"conv_eltwiseadd_affine_channel_fuse_pass", // "conv_eltwiseadd_affine_channel_fuse_pass", //
"conv_bn_fuse_pass", // "conv_bn_fuse_pass", //
"conv_eltwiseadd_bn_fuse_pass", //
#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be #if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be
// guaranteed at least v7 // guaranteed at least v7
"conv_elementwise_add_act_fuse_pass", // "conv_elementwise_add_act_fuse_pass", //
......
...@@ -170,6 +170,7 @@ class OpConverter { ...@@ -170,6 +170,7 @@ class OpConverter {
engine->DeclareOutput(output); engine->DeclareOutput(output);
} }
engine->FreezeNetwork(); engine->FreezeNetwork();
engine->ClearWeights();
} }
void RreplenishLayerAndOutput( void RreplenishLayerAndOutput(
......
...@@ -149,6 +149,12 @@ class TensorRTEngine { ...@@ -149,6 +149,12 @@ class TensorRTEngine {
std::unordered_map<std::string /*name*/, std::unique_ptr<framework::Tensor>> std::unordered_map<std::string /*name*/, std::unique_ptr<framework::Tensor>>
weight_map; weight_map;
void ClearWeights() {
for (auto& weight_pair : weight_map) {
weight_pair.second.reset(nullptr);
}
}
private: private:
// Each ICudaEngine object is bound to a specific GPU when it is instantiated, // Each ICudaEngine object is bound to a specific GPU when it is instantiated,
// ensure that the thread is associated with the correct device by calling // ensure that the thread is associated with the correct device by calling
...@@ -213,6 +219,39 @@ class TensorRTEngine { ...@@ -213,6 +219,39 @@ class TensorRTEngine {
#define TRT_ENGINE_ADD_LAYER(engine__, layer__, ARGS...) \ #define TRT_ENGINE_ADD_LAYER(engine__, layer__, ARGS...) \
engine__->network()->add##layer__(ARGS); engine__->network()->add##layer__(ARGS);
class TRTEngineManager {
public:
bool Empty() const { return engines_.size() == 0; }
bool Has(const std::string& name) const {
if (engines_.count(name) == 0) return false;
return engines_.at(name).get() != nullptr;
}
TensorRTEngine* Get(const std::string& name) const {
return engines_.at(name).get();
}
TensorRTEngine* Create(std::string name, int max_batch, int max_workspace,
bool enable_int8 = false,
TRTInt8Calibrator* calibrator = nullptr,
int device_id = 0,
nvinfer1::ILogger& logger = NaiveLogger::Global()) {
auto* p = new TensorRTEngine(max_batch, max_workspace, enable_int8,
calibrator, device_id, logger);
engines_[name].reset(p);
return p;
}
void DeleteAll() {
for (auto& item : engines_) {
item.second.reset(nullptr);
}
}
private:
std::unordered_map<std::string, std::unique_ptr<TensorRTEngine>> engines_;
};
} // namespace tensorrt } // namespace tensorrt
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
...@@ -31,7 +31,7 @@ struct SimpleOpTypeSetTeller : public Teller { ...@@ -31,7 +31,7 @@ struct SimpleOpTypeSetTeller : public Teller {
std::unordered_set<std::string> teller_set{ std::unordered_set<std::string> teller_set{
{"mul", "conv2d", "pool2d", "relu", "softmax", "sigmoid", {"mul", "conv2d", "pool2d", "relu", "softmax", "sigmoid",
"depthwise_conv2d", "batch_norm", "concat", "tanh", "pad", "depthwise_conv2d", "batch_norm", "concat", "tanh", "pad",
"elementwise_add", "elementwise_mul", "dropout", "split", "prelu", "elementwise_add", "elementwise_mul", "dropout", "prelu",
"conv2d_transpose", "leaky_relu", "fc"}}; "conv2d_transpose", "leaky_relu", "fc"}};
}; };
......
...@@ -177,11 +177,15 @@ TEST(Analyzer_Pyramid_DNN, compare_zero_copy) { ...@@ -177,11 +177,15 @@ TEST(Analyzer_Pyramid_DNN, compare_zero_copy) {
AnalysisConfig cfg; AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
AnalysisConfig cfg1;
SetConfig(&cfg1);
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
std::vector<std::string> outputs_name; std::vector<std::string> outputs_name;
outputs_name.emplace_back("cos_sim_2.tmp_0"); outputs_name.emplace_back("cos_sim_2.tmp_0");
CompareAnalysisAndZeroCopy(reinterpret_cast<PaddlePredictor::Config *>(&cfg), CompareAnalysisAndZeroCopy(reinterpret_cast<PaddlePredictor::Config *>(&cfg),
reinterpret_cast<PaddlePredictor::Config *>(&cfg1),
input_slots_all, outputs_name); input_slots_all, outputs_name);
} }
......
...@@ -293,11 +293,15 @@ TEST(Analyzer_rnn1, compare_zero_copy) { ...@@ -293,11 +293,15 @@ TEST(Analyzer_rnn1, compare_zero_copy) {
AnalysisConfig cfg; AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
AnalysisConfig cfg1;
SetConfig(&cfg1);
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
std::vector<std::string> outputs_name; std::vector<std::string> outputs_name;
outputs_name.emplace_back("final_output.tmp_1"); outputs_name.emplace_back("final_output.tmp_1");
CompareAnalysisAndZeroCopy(reinterpret_cast<PaddlePredictor::Config *>(&cfg), CompareAnalysisAndZeroCopy(reinterpret_cast<PaddlePredictor::Config *>(&cfg),
reinterpret_cast<PaddlePredictor::Config *>(&cfg1),
input_slots_all, outputs_name); input_slots_all, outputs_name);
} }
......
...@@ -39,10 +39,17 @@ TEST(Analyzer, save_model) { ...@@ -39,10 +39,17 @@ TEST(Analyzer, save_model) {
mkdir(optimModelPath.c_str(), 0777); mkdir(optimModelPath.c_str(), 0777);
SaveOptimModel(&cfg, optimModelPath); SaveOptimModel(&cfg, optimModelPath);
cfg.pass_builder()->ClearPasses(); // Each config can only be applied to one predictor.
int origin_num_ops = GetNumOps(cfg); AnalysisConfig cfg2;
cfg.SetModel(optimModelPath + "/model", optimModelPath + "/params"); SetConfig(&cfg2);
int fused_num_ops = GetNumOps(cfg); cfg2.pass_builder()->ClearPasses();
cfg2.SetModel(optimModelPath + "/model", optimModelPath + "/params");
int origin_num_ops = GetNumOps(cfg2);
AnalysisConfig cfg3;
SetConfig(&cfg3);
cfg3.SetModel(optimModelPath + "/model", optimModelPath + "/params");
int fused_num_ops = GetNumOps(cfg3);
CHECK_LE(fused_num_ops, origin_num_ops); CHECK_LE(fused_num_ops, origin_num_ops);
} }
......
...@@ -215,11 +215,15 @@ TEST(Analyzer_seq_pool1, compare_zero_copy) { ...@@ -215,11 +215,15 @@ TEST(Analyzer_seq_pool1, compare_zero_copy) {
AnalysisConfig cfg; AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
AnalysisConfig cfg1;
SetConfig(&cfg1);
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
std::vector<std::string> outputs_name; std::vector<std::string> outputs_name;
outputs_name.emplace_back(out_var_name); outputs_name.emplace_back(out_var_name);
CompareAnalysisAndZeroCopy(reinterpret_cast<PaddlePredictor::Config *>(&cfg), CompareAnalysisAndZeroCopy(reinterpret_cast<PaddlePredictor::Config *>(&cfg),
reinterpret_cast<PaddlePredictor::Config *>(&cfg1),
input_slots_all, outputs_name); input_slots_all, outputs_name);
} }
......
...@@ -534,7 +534,7 @@ void CompareNativeAndAnalysis( ...@@ -534,7 +534,7 @@ void CompareNativeAndAnalysis(
} }
void CompareAnalysisAndZeroCopy( void CompareAnalysisAndZeroCopy(
PaddlePredictor::Config *config, PaddlePredictor::Config *config, PaddlePredictor::Config *config1,
const std::vector<std::vector<PaddleTensor>> &inputs, const std::vector<std::vector<PaddleTensor>> &inputs,
const std::vector<std::string> &outputs_name) { const std::vector<std::string> &outputs_name) {
int batch_size = FLAGS_batch_size; int batch_size = FLAGS_batch_size;
...@@ -544,8 +544,8 @@ void CompareAnalysisAndZeroCopy( ...@@ -544,8 +544,8 @@ void CompareAnalysisAndZeroCopy(
predictor->Run(inputs[0], &analysis_outputs, batch_size); predictor->Run(inputs[0], &analysis_outputs, batch_size);
// analysis + zero_copy // analysis + zero_copy
std::vector<ZeroCopyTensor> zerocopy_outputs; std::vector<ZeroCopyTensor> zerocopy_outputs;
reinterpret_cast<AnalysisConfig *>(config)->SwitchUseFeedFetchOps(false); reinterpret_cast<AnalysisConfig *>(config1)->SwitchUseFeedFetchOps(false);
predictor = CreateTestPredictor(config, true); predictor = CreateTestPredictor(config1, true);
ConvertPaddleTensorToZeroCopyTensor(predictor.get(), inputs[0]); ConvertPaddleTensorToZeroCopyTensor(predictor.get(), inputs[0]);
predictor->ZeroCopyRun(); predictor->ZeroCopyRun();
for (size_t i = 0; i < outputs_name.size(); i++) { for (size_t i = 0; i < outputs_name.size(); i++) {
......
...@@ -43,7 +43,7 @@ class TensorRTEngineOp : public framework::OperatorBase { ...@@ -43,7 +43,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
private: private:
std::vector<std::string> input_names_; std::vector<std::string> input_names_;
std::unordered_set<std::string> param_names_; std::unordered_set<std::string> param_names_;
mutable std::unique_ptr<TensorRTEngine> trt_engine_; mutable TensorRTEngine *trt_engine_{nullptr};
int max_batch_size_; int max_batch_size_;
int workspace_size_; int workspace_size_;
std::unique_ptr<TRTInt8Calibrator> calibrator_; std::unique_ptr<TRTInt8Calibrator> calibrator_;
...@@ -51,8 +51,8 @@ class TensorRTEngineOp : public framework::OperatorBase { ...@@ -51,8 +51,8 @@ class TensorRTEngineOp : public framework::OperatorBase {
bool use_calib_mode_; bool use_calib_mode_;
std::string calibration_data_; std::string calibration_data_;
std::string engine_key_; std::string engine_key_;
std::string engine_serialized_data_;
bool calibration_mode_; bool calibration_mode_;
int predictor_id_;
int device_id_; int device_id_;
public: public:
...@@ -69,7 +69,7 @@ class TensorRTEngineOp : public framework::OperatorBase { ...@@ -69,7 +69,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
use_calib_mode_ = Attr<bool>("use_calib_mode"); use_calib_mode_ = Attr<bool>("use_calib_mode");
calibration_data_ = Attr<std::string>("calibration_data"); calibration_data_ = Attr<std::string>("calibration_data");
engine_key_ = Attr<std::string>("engine_key"); engine_key_ = Attr<std::string>("engine_key");
engine_serialized_data_ = Attr<std::string>("engine_serialized_data"); predictor_id_ = Attr<int>("predictor_id");
auto params = Attr<std::vector<std::string>>("parameters"); auto params = Attr<std::vector<std::string>>("parameters");
for (const auto &param : params) { for (const auto &param : params) {
...@@ -84,16 +84,14 @@ class TensorRTEngineOp : public framework::OperatorBase { ...@@ -84,16 +84,14 @@ class TensorRTEngineOp : public framework::OperatorBase {
if (enable_int8_ && calibration_data_.size()) { if (enable_int8_ && calibration_data_.size()) {
calibrator_.reset(new TRTInt8Calibrator(calibration_data_)); calibrator_.reset(new TRTInt8Calibrator(calibration_data_));
} }
bool has_engine =
if (!calibration_mode_ && !engine_serialized_data_.empty()) { inference::Singleton<inference::tensorrt::TRTEngineManager>::Global()
trt_engine_.reset(new inference::tensorrt::TensorRTEngine( .Has(engine_key_ + std::to_string(predictor_id_));
max_batch_size_, workspace_size_, enable_int8_, calibrator_.get(),
device_id_)); if (!calibration_mode_ && has_engine) {
PADDLE_ENFORCE(engine_serialized_data_.size(), trt_engine_ =
"TRT serialized data should not be empty here," inference::Singleton<inference::tensorrt::TRTEngineManager>::Global()
"there must be error when generate serialized data in TRT " .Get(engine_key_ + std::to_string(predictor_id_));
"subgraph detect pass.");
trt_engine_->Deserialize(engine_serialized_data_);
} }
} }
...@@ -239,12 +237,14 @@ class TensorRTEngineOp : public framework::OperatorBase { ...@@ -239,12 +237,14 @@ class TensorRTEngineOp : public framework::OperatorBase {
TensorRTEngine *GetEngine(const framework::Scope &scope, TensorRTEngine *GetEngine(const framework::Scope &scope,
const platform::Place &dev_place) const { const platform::Place &dev_place) const {
if (!trt_engine_) { if (!trt_engine_) {
trt_engine_.reset(new inference::tensorrt::TensorRTEngine( trt_engine_ =
max_batch_size_, workspace_size_, enable_int8_, calibrator_.get(), inference::Singleton<inference::tensorrt::TRTEngineManager>::Global()
device_id_)); .Create(engine_key_ + std::to_string(predictor_id_),
PrepareTRTEngine(scope, trt_engine_.get()); max_batch_size_, workspace_size_, enable_int8_,
calibrator_.get(), device_id_);
PrepareTRTEngine(scope, trt_engine_);
} }
return trt_engine_.get(); return trt_engine_;
} }
void PrepareTRTEngine(const framework::Scope &scope, void PrepareTRTEngine(const framework::Scope &scope,
......
...@@ -102,6 +102,7 @@ TEST(TensorRTEngineOp, manual) { ...@@ -102,6 +102,7 @@ TEST(TensorRTEngineOp, manual) {
engine_op_desc.SetAttr("workspace_size", static_cast<int>(1 << 20)); engine_op_desc.SetAttr("workspace_size", static_cast<int>(1 << 20));
engine_op_desc.SetAttr("parameters", std::vector<std::string>({})); engine_op_desc.SetAttr("parameters", std::vector<std::string>({}));
engine_op_desc.SetAttr("engine_key", std::string("a_engine")); engine_op_desc.SetAttr("engine_key", std::string("a_engine"));
engine_op_desc.SetAttr("predictor_id", 1);
engine_op_desc.SetAttr("calibration_data", std::string("")); engine_op_desc.SetAttr("calibration_data", std::string(""));
engine_op_desc.SetAttr("enable_int8", static_cast<bool>(false)); engine_op_desc.SetAttr("enable_int8", static_cast<bool>(false));
engine_op_desc.SetAttr("use_calib_mode", static_cast<bool>(false)); engine_op_desc.SetAttr("use_calib_mode", static_cast<bool>(false));
...@@ -201,6 +202,7 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) { ...@@ -201,6 +202,7 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {
engine_op_desc.SetAttr("parameters", engine_op_desc.SetAttr("parameters",
std::vector<std::string>({"y0", "y1", "y2", "y3"})); std::vector<std::string>({"y0", "y1", "y2", "y3"}));
engine_op_desc.SetAttr("engine_key", std::string("b_engine")); engine_op_desc.SetAttr("engine_key", std::string("b_engine"));
engine_op_desc.SetAttr("predictor_id", 1);
engine_op_desc.SetAttr("calibration_data", std::string("")); engine_op_desc.SetAttr("calibration_data", std::string(""));
engine_op_desc.SetAttr("enable_int8", static_cast<bool>(false)); engine_op_desc.SetAttr("enable_int8", static_cast<bool>(false));
engine_op_desc.SetAttr("use_calib_mode", static_cast<bool>(false)); engine_op_desc.SetAttr("use_calib_mode", static_cast<bool>(false));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册