未验证 提交 bdce552b 编写于 作者: Z Zhang Jun 提交者: GitHub

update trt workspace size param (#44469)

* update trt workspace size param

* update

* update

* update

* use int64_t

* use int64_t

* upate

* update
上级 54d98963
...@@ -216,7 +216,7 @@ struct Argument { ...@@ -216,7 +216,7 @@ struct Argument {
DECL_ARGUMENT_FIELD(tensorrt_use_dla, TensorRtUseDLA, bool); DECL_ARGUMENT_FIELD(tensorrt_use_dla, TensorRtUseDLA, bool);
DECL_ARGUMENT_FIELD(tensorrt_dla_core, TensorRtDLACore, int); DECL_ARGUMENT_FIELD(tensorrt_dla_core, TensorRtDLACore, int);
DECL_ARGUMENT_FIELD(tensorrt_max_batch_size, TensorRtMaxBatchSize, int); DECL_ARGUMENT_FIELD(tensorrt_max_batch_size, TensorRtMaxBatchSize, int);
DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int); DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int64_t);
DECL_ARGUMENT_FIELD(tensorrt_min_subgraph_size, TensorRtMinSubgraphSize, int); DECL_ARGUMENT_FIELD(tensorrt_min_subgraph_size, TensorRtMinSubgraphSize, int);
DECL_ARGUMENT_FIELD(tensorrt_disabled_ops, DECL_ARGUMENT_FIELD(tensorrt_disabled_ops,
TensorRtDisabledOPs, TensorRtDisabledOPs,
......
...@@ -133,7 +133,8 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -133,7 +133,8 @@ void IRPassManager::CreatePasses(Argument *argument,
argument->bfloat16_enabled_op_types())); argument->bfloat16_enabled_op_types()));
#endif #endif
} else if (pass_name == "tensorrt_subgraph_pass") { } else if (pass_name == "tensorrt_subgraph_pass") {
pass->Set("workspace_size", new int(argument->tensorrt_workspace_size())); pass->Set("workspace_size",
new int64_t(argument->tensorrt_workspace_size()));
pass->Set("max_batch_size", new int(argument->tensorrt_max_batch_size())); pass->Set("max_batch_size", new int(argument->tensorrt_max_batch_size()));
pass->Set("min_subgraph_size", pass->Set("min_subgraph_size",
new int(argument->tensorrt_min_subgraph_size())); new int(argument->tensorrt_min_subgraph_size()));
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
...@@ -378,7 +379,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( ...@@ -378,7 +379,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
op_desc->SetBlockAttr("sub_block", new_block); op_desc->SetBlockAttr("sub_block", new_block);
op_desc->SetAttr("subgraph", block_desc.Proto()->SerializeAsString()); op_desc->SetAttr("subgraph", block_desc.Proto()->SerializeAsString());
op_desc->SetAttr("max_batch_size", max_batch_size); op_desc->SetAttr("max_batch_size", max_batch_size);
op_desc->SetAttr("workspace_size", Get<int>("workspace_size")); op_desc->SetAttr("workspace_size", Get<int64_t>("workspace_size"));
op_desc->SetAttr("gpu_id", Get<int>("gpu_device_id")); op_desc->SetAttr("gpu_id", Get<int>("gpu_device_id"));
op_desc->SetAttr("output_name_mapping", output_mapping); op_desc->SetAttr("output_name_mapping", output_mapping);
op_desc->SetAttr("origin_output_dims", renamed_output_dims); op_desc->SetAttr("origin_output_dims", renamed_output_dims);
...@@ -499,7 +500,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( ...@@ -499,7 +500,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
inference::Singleton<inference::tensorrt::TRTEngineManager>::Global() inference::Singleton<inference::tensorrt::TRTEngineManager>::Global()
.Create(engine_key + std::to_string(predictor_id), .Create(engine_key + std::to_string(predictor_id),
max_batch_size, max_batch_size,
Get<int>("workspace_size"), Get<int64_t>("workspace_size"),
precision_mode, precision_mode,
calibrator.get(), calibrator.get(),
Get<int>("gpu_device_id"), Get<int>("gpu_device_id"),
......
...@@ -517,7 +517,7 @@ MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const { ...@@ -517,7 +517,7 @@ MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const {
} }
void AnalysisConfig::EnableTensorRtEngine( void AnalysisConfig::EnableTensorRtEngine(
int workspace_size, int64_t workspace_size,
int max_batch_size, int max_batch_size,
int min_subgraph_size, int min_subgraph_size,
AnalysisConfig::Precision precision_mode, AnalysisConfig::Precision precision_mode,
......
...@@ -523,7 +523,7 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -523,7 +523,7 @@ struct PD_INFER_DECL AnalysisConfig {
/// quantization). /// quantization).
/// ///
/// ///
void EnableTensorRtEngine(int workspace_size = 1 << 20, void EnableTensorRtEngine(int64_t workspace_size = 1 << 30,
int max_batch_size = 1, int max_batch_size = 1,
int min_subgraph_size = 3, int min_subgraph_size = 3,
Precision precision = Precision::kFloat32, Precision precision = Precision::kFloat32,
...@@ -967,7 +967,7 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -967,7 +967,7 @@ struct PD_INFER_DECL AnalysisConfig {
bool use_tensorrt_{false}; bool use_tensorrt_{false};
// For workspace_size, refer it from here: // For workspace_size, refer it from here:
// https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting // https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting
int tensorrt_workspace_size_{1 << 30}; int64_t tensorrt_workspace_size_{1 << 30};
// While TensorRT allows an engine optimized for a given max batch size // While TensorRT allows an engine optimized for a given max batch size
// to run at any smaller size, the performance for those smaller // to run at any smaller size, the performance for those smaller
// sizes may not be as well-optimized. Therefore, Max batch is best // sizes may not be as well-optimized. Therefore, Max batch is best
......
...@@ -214,7 +214,7 @@ PADDLE_CAPI_EXPORT extern bool PD_SpecifyInputName( ...@@ -214,7 +214,7 @@ PADDLE_CAPI_EXPORT extern bool PD_SpecifyInputName(
PADDLE_CAPI_EXPORT extern void PD_EnableTensorRtEngine( PADDLE_CAPI_EXPORT extern void PD_EnableTensorRtEngine(
PD_AnalysisConfig* config, PD_AnalysisConfig* config,
int workspace_size, int64_t workspace_size,
int max_batch_size, int max_batch_size,
int min_subgraph_size, int min_subgraph_size,
Precision precision, Precision precision,
......
...@@ -243,7 +243,7 @@ bool PD_SpecifyInputName(const PD_AnalysisConfig* config) { ...@@ -243,7 +243,7 @@ bool PD_SpecifyInputName(const PD_AnalysisConfig* config) {
} }
void PD_EnableTensorRtEngine(PD_AnalysisConfig* config, void PD_EnableTensorRtEngine(PD_AnalysisConfig* config,
int workspace_size, int64_t workspace_size,
int max_batch_size, int max_batch_size,
int min_subgraph_size, int min_subgraph_size,
Precision precision, Precision precision,
......
...@@ -219,7 +219,7 @@ PD_Bool PD_ConfigIrOptim(__pd_keep PD_Config* pd_config) { ...@@ -219,7 +219,7 @@ PD_Bool PD_ConfigIrOptim(__pd_keep PD_Config* pd_config) {
} }
void PD_ConfigEnableTensorRtEngine(__pd_keep PD_Config* pd_config, void PD_ConfigEnableTensorRtEngine(__pd_keep PD_Config* pd_config,
int32_t workspace_size, int64_t workspace_size,
int32_t max_batch_size, int32_t max_batch_size,
int32_t min_subgraph_size, int32_t min_subgraph_size,
PD_PrecisionType precision, PD_PrecisionType precision,
......
...@@ -329,7 +329,7 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigIrOptim( ...@@ -329,7 +329,7 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigIrOptim(
/// ///
PADDLE_CAPI_EXPORT extern void PD_ConfigEnableTensorRtEngine( PADDLE_CAPI_EXPORT extern void PD_ConfigEnableTensorRtEngine(
__pd_keep PD_Config* pd_config, __pd_keep PD_Config* pd_config,
int32_t workspace_size, int64_t workspace_size,
int32_t max_batch_size, int32_t max_batch_size,
int32_t min_subgraph_size, int32_t min_subgraph_size,
PD_PrecisionType precision, PD_PrecisionType precision,
......
...@@ -79,7 +79,7 @@ class TRTConvertValidation { ...@@ -79,7 +79,7 @@ class TRTConvertValidation {
TRTConvertValidation(int max_batch_size, TRTConvertValidation(int max_batch_size,
const std::unordered_set<std::string>& parameters, const std::unordered_set<std::string>& parameters,
framework::Scope& scope, // NOLINT framework::Scope& scope, // NOLINT
int workspace_size = 1 << 10, int64_t workspace_size = 1 << 30,
bool if_add_batch = true) bool if_add_batch = true)
: parameters_(parameters), : parameters_(parameters),
scope_(scope), scope_(scope),
......
...@@ -206,7 +206,7 @@ class TensorRTEngine { ...@@ -206,7 +206,7 @@ class TensorRTEngine {
TensorRTEngine( TensorRTEngine(
int max_batch, int max_batch,
int max_workspace, int64_t max_workspace,
AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32, AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32,
TRTInt8Calibrator* calibrator = nullptr, TRTInt8Calibrator* calibrator = nullptr,
int device_id = 0, int device_id = 0,
...@@ -672,7 +672,7 @@ class TensorRTEngine { ...@@ -672,7 +672,7 @@ class TensorRTEngine {
// the runtime batch size // the runtime batch size
static int runtime_batch_; static int runtime_batch_;
// the max memory size the engine uses // the max memory size the engine uses
int max_workspace_; int64_t max_workspace_;
AnalysisConfig::Precision precision_; AnalysisConfig::Precision precision_;
TRTInt8Calibrator* calibrator_; TRTInt8Calibrator* calibrator_;
...@@ -767,7 +767,7 @@ class TRTEngineManager { ...@@ -767,7 +767,7 @@ class TRTEngineManager {
TensorRTEngine* Create( TensorRTEngine* Create(
std::string name, std::string name,
int max_batch, int max_batch,
int max_workspace, int64_t max_workspace,
AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32, AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32,
TRTInt8Calibrator* calibrator = nullptr, TRTInt8Calibrator* calibrator = nullptr,
int device_id = 0, int device_id = 0,
......
...@@ -34,7 +34,7 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -34,7 +34,7 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker {
"engine_key", "engine_key",
"The engine_key here is used to distinguish different TRT Engines"); "The engine_key here is used to distinguish different TRT Engines");
AddAttr<int>("max_batch_size", "the maximum batch size."); AddAttr<int>("max_batch_size", "the maximum batch size.");
AddAttr<int>("workspace_size", "the workspace size."); AddAttr<int64_t>("workspace_size", "the workspace size.").AsExtra();
AddAttr<framework::BlockDesc *>("sub_block", "the trt block"); AddAttr<framework::BlockDesc *>("sub_block", "the trt block");
AddAttr<bool>("enable_int8", "whether swith to int8 mode"); AddAttr<bool>("enable_int8", "whether swith to int8 mode");
AddComment("TensorRT engine operator."); AddComment("TensorRT engine operator.");
......
...@@ -177,7 +177,7 @@ class TensorRTEngineOp : public framework::OperatorBase { ...@@ -177,7 +177,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
std::vector<std::string> runtime_input_names_; std::vector<std::string> runtime_input_names_;
mutable TensorRTEngine *trt_engine_{nullptr}; mutable TensorRTEngine *trt_engine_{nullptr};
int max_batch_size_; int max_batch_size_;
int workspace_size_; int64_t workspace_size_;
std::unique_ptr<TRTInt8Calibrator> calibrator_; std::unique_ptr<TRTInt8Calibrator> calibrator_;
bool enable_int8_; bool enable_int8_;
bool enable_fp16_; bool enable_fp16_;
...@@ -207,7 +207,7 @@ class TensorRTEngineOp : public framework::OperatorBase { ...@@ -207,7 +207,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
: framework::OperatorBase(type, inputs, outputs, attrs) { : framework::OperatorBase(type, inputs, outputs, attrs) {
input_names_ = Inputs("Xs"); input_names_ = Inputs("Xs");
max_batch_size_ = Attr<int>("max_batch_size"); max_batch_size_ = Attr<int>("max_batch_size");
workspace_size_ = Attr<int>("workspace_size"); workspace_size_ = Attr<int64_t>("workspace_size");
device_id_ = Attr<int>("gpu_id"); device_id_ = Attr<int>("gpu_id");
enable_int8_ = Attr<bool>("enable_int8"); enable_int8_ = Attr<bool>("enable_int8");
enable_fp16_ = Attr<bool>("enable_fp16"); enable_fp16_ = Attr<bool>("enable_fp16");
......
...@@ -107,7 +107,7 @@ void DynamicShapeTest(bool allow_build_at_runtime) { ...@@ -107,7 +107,7 @@ void DynamicShapeTest(bool allow_build_at_runtime) {
engine_op_desc.SetBlockAttr("sub_block", &block_desc); engine_op_desc.SetBlockAttr("sub_block", &block_desc);
engine_op_desc.SetAttr("max_batch_size", static_cast<int>(2)); engine_op_desc.SetAttr("max_batch_size", static_cast<int>(2));
engine_op_desc.SetAttr("workspace_size", static_cast<int>(1 << 20)); engine_op_desc.SetAttr("workspace_size", static_cast<int64_t>(1 << 20));
engine_op_desc.SetAttr("parameters", std::vector<std::string>({})); engine_op_desc.SetAttr("parameters", std::vector<std::string>({}));
engine_op_desc.SetAttr("engine_key", std::string("a_engine")); engine_op_desc.SetAttr("engine_key", std::string("a_engine"));
engine_op_desc.SetAttr("calibration_engine_key", engine_op_desc.SetAttr("calibration_engine_key",
...@@ -259,7 +259,7 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) { ...@@ -259,7 +259,7 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {
engine_op_desc.SetBlockAttr("sub_block", &block_desc); engine_op_desc.SetBlockAttr("sub_block", &block_desc);
engine_op_desc.SetAttr("max_batch_size", static_cast<int>(batch_size)); engine_op_desc.SetAttr("max_batch_size", static_cast<int>(batch_size));
engine_op_desc.SetAttr("workspace_size", static_cast<int>(1 << 20)); engine_op_desc.SetAttr("workspace_size", static_cast<int64_t>(1 << 20));
engine_op_desc.SetAttr("parameters", engine_op_desc.SetAttr("parameters",
std::vector<std::string>({"y0", "y1", "y2", "y3"})); std::vector<std::string>({"y0", "y1", "y2", "y3"}));
engine_op_desc.SetAttr("engine_key", std::string("b_engine")); engine_op_desc.SetAttr("engine_key", std::string("b_engine"));
......
...@@ -687,7 +687,7 @@ void BindAnalysisConfig(py::module *m) { ...@@ -687,7 +687,7 @@ void BindAnalysisConfig(py::module *m) {
.def("specify_input_name", &AnalysisConfig::specify_input_name) .def("specify_input_name", &AnalysisConfig::specify_input_name)
.def("enable_tensorrt_engine", .def("enable_tensorrt_engine",
&AnalysisConfig::EnableTensorRtEngine, &AnalysisConfig::EnableTensorRtEngine,
py::arg("workspace_size") = 1 << 20, py::arg("workspace_size") = 1 << 30,
py::arg("max_batch_size") = 1, py::arg("max_batch_size") = 1,
py::arg("min_subgraph_size") = 3, py::arg("min_subgraph_size") = 3,
py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32, py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册