未验证 提交 bdce552b 编写于 作者: Z Zhang Jun 提交者: GitHub

update trt workspace size param (#44469)

* update trt workspace size param

* update

* update

* update

* use int64_t

* use int64_t

* upate

* update
上级 54d98963
......@@ -216,7 +216,7 @@ struct Argument {
DECL_ARGUMENT_FIELD(tensorrt_use_dla, TensorRtUseDLA, bool);
DECL_ARGUMENT_FIELD(tensorrt_dla_core, TensorRtDLACore, int);
DECL_ARGUMENT_FIELD(tensorrt_max_batch_size, TensorRtMaxBatchSize, int);
DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int);
DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int64_t);
DECL_ARGUMENT_FIELD(tensorrt_min_subgraph_size, TensorRtMinSubgraphSize, int);
DECL_ARGUMENT_FIELD(tensorrt_disabled_ops,
TensorRtDisabledOPs,
......
......@@ -133,7 +133,8 @@ void IRPassManager::CreatePasses(Argument *argument,
argument->bfloat16_enabled_op_types()));
#endif
} else if (pass_name == "tensorrt_subgraph_pass") {
pass->Set("workspace_size", new int(argument->tensorrt_workspace_size()));
pass->Set("workspace_size",
new int64_t(argument->tensorrt_workspace_size()));
pass->Set("max_batch_size", new int(argument->tensorrt_max_batch_size()));
pass->Set("min_subgraph_size",
new int(argument->tensorrt_min_subgraph_size()));
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
......@@ -378,7 +379,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
op_desc->SetBlockAttr("sub_block", new_block);
op_desc->SetAttr("subgraph", block_desc.Proto()->SerializeAsString());
op_desc->SetAttr("max_batch_size", max_batch_size);
op_desc->SetAttr("workspace_size", Get<int>("workspace_size"));
op_desc->SetAttr("workspace_size", Get<int64_t>("workspace_size"));
op_desc->SetAttr("gpu_id", Get<int>("gpu_device_id"));
op_desc->SetAttr("output_name_mapping", output_mapping);
op_desc->SetAttr("origin_output_dims", renamed_output_dims);
......@@ -499,7 +500,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
inference::Singleton<inference::tensorrt::TRTEngineManager>::Global()
.Create(engine_key + std::to_string(predictor_id),
max_batch_size,
Get<int>("workspace_size"),
Get<int64_t>("workspace_size"),
precision_mode,
calibrator.get(),
Get<int>("gpu_device_id"),
......
......@@ -517,7 +517,7 @@ MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const {
}
void AnalysisConfig::EnableTensorRtEngine(
int workspace_size,
int64_t workspace_size,
int max_batch_size,
int min_subgraph_size,
AnalysisConfig::Precision precision_mode,
......
......@@ -523,7 +523,7 @@ struct PD_INFER_DECL AnalysisConfig {
/// quantization).
///
///
void EnableTensorRtEngine(int workspace_size = 1 << 20,
void EnableTensorRtEngine(int64_t workspace_size = 1 << 30,
int max_batch_size = 1,
int min_subgraph_size = 3,
Precision precision = Precision::kFloat32,
......@@ -967,7 +967,7 @@ struct PD_INFER_DECL AnalysisConfig {
bool use_tensorrt_{false};
// For workspace_size, refer it from here:
// https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting
int tensorrt_workspace_size_{1 << 30};
int64_t tensorrt_workspace_size_{1 << 30};
// While TensorRT allows an engine optimized for a given max batch size
// to run at any smaller size, the performance for those smaller
// sizes may not be as well-optimized. Therefore, Max batch is best
......
......@@ -214,7 +214,7 @@ PADDLE_CAPI_EXPORT extern bool PD_SpecifyInputName(
PADDLE_CAPI_EXPORT extern void PD_EnableTensorRtEngine(
PD_AnalysisConfig* config,
int workspace_size,
int64_t workspace_size,
int max_batch_size,
int min_subgraph_size,
Precision precision,
......
......@@ -243,7 +243,7 @@ bool PD_SpecifyInputName(const PD_AnalysisConfig* config) {
}
void PD_EnableTensorRtEngine(PD_AnalysisConfig* config,
int workspace_size,
int64_t workspace_size,
int max_batch_size,
int min_subgraph_size,
Precision precision,
......
......@@ -219,7 +219,7 @@ PD_Bool PD_ConfigIrOptim(__pd_keep PD_Config* pd_config) {
}
void PD_ConfigEnableTensorRtEngine(__pd_keep PD_Config* pd_config,
int32_t workspace_size,
int64_t workspace_size,
int32_t max_batch_size,
int32_t min_subgraph_size,
PD_PrecisionType precision,
......
......@@ -329,7 +329,7 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigIrOptim(
///
PADDLE_CAPI_EXPORT extern void PD_ConfigEnableTensorRtEngine(
__pd_keep PD_Config* pd_config,
int32_t workspace_size,
int64_t workspace_size,
int32_t max_batch_size,
int32_t min_subgraph_size,
PD_PrecisionType precision,
......
......@@ -79,7 +79,7 @@ class TRTConvertValidation {
TRTConvertValidation(int max_batch_size,
const std::unordered_set<std::string>& parameters,
framework::Scope& scope, // NOLINT
int workspace_size = 1 << 10,
int64_t workspace_size = 1 << 30,
bool if_add_batch = true)
: parameters_(parameters),
scope_(scope),
......
......@@ -206,7 +206,7 @@ class TensorRTEngine {
TensorRTEngine(
int max_batch,
int max_workspace,
int64_t max_workspace,
AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32,
TRTInt8Calibrator* calibrator = nullptr,
int device_id = 0,
......@@ -672,7 +672,7 @@ class TensorRTEngine {
// the runtime batch size
static int runtime_batch_;
// the max memory size the engine uses
int max_workspace_;
int64_t max_workspace_;
AnalysisConfig::Precision precision_;
TRTInt8Calibrator* calibrator_;
......@@ -767,7 +767,7 @@ class TRTEngineManager {
TensorRTEngine* Create(
std::string name,
int max_batch,
int max_workspace,
int64_t max_workspace,
AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32,
TRTInt8Calibrator* calibrator = nullptr,
int device_id = 0,
......
......@@ -34,7 +34,7 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker {
"engine_key",
"The engine_key here is used to distinguish different TRT Engines");
AddAttr<int>("max_batch_size", "the maximum batch size.");
AddAttr<int>("workspace_size", "the workspace size.");
AddAttr<int64_t>("workspace_size", "the workspace size.").AsExtra();
AddAttr<framework::BlockDesc *>("sub_block", "the trt block");
AddAttr<bool>("enable_int8", "whether swith to int8 mode");
AddComment("TensorRT engine operator.");
......
......@@ -177,7 +177,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
std::vector<std::string> runtime_input_names_;
mutable TensorRTEngine *trt_engine_{nullptr};
int max_batch_size_;
int workspace_size_;
int64_t workspace_size_;
std::unique_ptr<TRTInt8Calibrator> calibrator_;
bool enable_int8_;
bool enable_fp16_;
......@@ -207,7 +207,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
: framework::OperatorBase(type, inputs, outputs, attrs) {
input_names_ = Inputs("Xs");
max_batch_size_ = Attr<int>("max_batch_size");
workspace_size_ = Attr<int>("workspace_size");
workspace_size_ = Attr<int64_t>("workspace_size");
device_id_ = Attr<int>("gpu_id");
enable_int8_ = Attr<bool>("enable_int8");
enable_fp16_ = Attr<bool>("enable_fp16");
......
......@@ -107,7 +107,7 @@ void DynamicShapeTest(bool allow_build_at_runtime) {
engine_op_desc.SetBlockAttr("sub_block", &block_desc);
engine_op_desc.SetAttr("max_batch_size", static_cast<int>(2));
engine_op_desc.SetAttr("workspace_size", static_cast<int>(1 << 20));
engine_op_desc.SetAttr("workspace_size", static_cast<int64_t>(1 << 20));
engine_op_desc.SetAttr("parameters", std::vector<std::string>({}));
engine_op_desc.SetAttr("engine_key", std::string("a_engine"));
engine_op_desc.SetAttr("calibration_engine_key",
......@@ -259,7 +259,7 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {
engine_op_desc.SetBlockAttr("sub_block", &block_desc);
engine_op_desc.SetAttr("max_batch_size", static_cast<int>(batch_size));
engine_op_desc.SetAttr("workspace_size", static_cast<int>(1 << 20));
engine_op_desc.SetAttr("workspace_size", static_cast<int64_t>(1 << 20));
engine_op_desc.SetAttr("parameters",
std::vector<std::string>({"y0", "y1", "y2", "y3"}));
engine_op_desc.SetAttr("engine_key", std::string("b_engine"));
......
......@@ -687,7 +687,7 @@ void BindAnalysisConfig(py::module *m) {
.def("specify_input_name", &AnalysisConfig::specify_input_name)
.def("enable_tensorrt_engine",
&AnalysisConfig::EnableTensorRtEngine,
py::arg("workspace_size") = 1 << 20,
py::arg("workspace_size") = 1 << 30,
py::arg("max_batch_size") = 1,
py::arg("min_subgraph_size") = 3,
py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册