diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 717737749a96beb220d271e96051d2ce8c4addc2..997022abde3f9c500098573d47dc08c1e7e107e6 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -216,7 +216,7 @@ struct Argument { DECL_ARGUMENT_FIELD(tensorrt_use_dla, TensorRtUseDLA, bool); DECL_ARGUMENT_FIELD(tensorrt_dla_core, TensorRtDLACore, int); DECL_ARGUMENT_FIELD(tensorrt_max_batch_size, TensorRtMaxBatchSize, int); - DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int); + DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int64_t); DECL_ARGUMENT_FIELD(tensorrt_min_subgraph_size, TensorRtMinSubgraphSize, int); DECL_ARGUMENT_FIELD(tensorrt_disabled_ops, TensorRtDisabledOPs, diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 3c04638003cdd0c31c1e9f3aeb1cd9cf07130db6..723a787722143dc8d497c9a143469bfa7b53edd3 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -133,7 +133,8 @@ void IRPassManager::CreatePasses(Argument *argument, argument->bfloat16_enabled_op_types())); #endif } else if (pass_name == "tensorrt_subgraph_pass") { - pass->Set("workspace_size", new int(argument->tensorrt_workspace_size())); + pass->Set("workspace_size", + new int64_t(argument->tensorrt_workspace_size())); pass->Set("max_batch_size", new int(argument->tensorrt_max_batch_size())); pass->Set("min_subgraph_size", new int(argument->tensorrt_min_subgraph_size())); diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index d39eadc7cc8f19d95719a7103a8dd5a5db6aa340..30697e66864821c233f16e83e28ac2972056125e 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -1,3 +1,4 @@ + // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -378,7 +379,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( op_desc->SetBlockAttr("sub_block", new_block); op_desc->SetAttr("subgraph", block_desc.Proto()->SerializeAsString()); op_desc->SetAttr("max_batch_size", max_batch_size); - op_desc->SetAttr("workspace_size", Get("workspace_size")); + op_desc->SetAttr("workspace_size", Get("workspace_size")); op_desc->SetAttr("gpu_id", Get("gpu_device_id")); op_desc->SetAttr("output_name_mapping", output_mapping); op_desc->SetAttr("origin_output_dims", renamed_output_dims); @@ -499,7 +500,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( inference::Singleton::Global() .Create(engine_key + std::to_string(predictor_id), max_batch_size, - Get("workspace_size"), + Get("workspace_size"), precision_mode, calibrator.get(), Get("gpu_device_id"), diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 742ce01e8458c1a11635260d581c52fb06e5ca9e..24925901312605e60cc71bd0db7c8a2b55eda814 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -517,7 +517,7 @@ MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const { } void AnalysisConfig::EnableTensorRtEngine( - int workspace_size, + int64_t workspace_size, int max_batch_size, int min_subgraph_size, AnalysisConfig::Precision precision_mode, diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 08d0e073babc18c5691be32a5642efaa15ff098d..b925a0c361f94ba059b1868aee9c180dad84e58a 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -523,7 +523,7 @@ struct PD_INFER_DECL AnalysisConfig { /// quantization). /// /// - void EnableTensorRtEngine(int workspace_size = 1 << 20, + void EnableTensorRtEngine(int64_t workspace_size = 1 << 30, int max_batch_size = 1, int min_subgraph_size = 3, Precision precision = Precision::kFloat32, @@ -967,7 +967,7 @@ struct PD_INFER_DECL AnalysisConfig { bool use_tensorrt_{false}; // For workspace_size, refer it from here: // https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting - int tensorrt_workspace_size_{1 << 30}; + int64_t tensorrt_workspace_size_{1 << 30}; // While TensorRT allows an engine optimized for a given max batch size // to run at any smaller size, the performance for those smaller // sizes may not be as well-optimized. Therefore, Max batch is best diff --git a/paddle/fluid/inference/capi/paddle_c_api.h b/paddle/fluid/inference/capi/paddle_c_api.h index 1b8bd08b76bad27fa5695e5076106797e2dd6c19..25ede726b144b02d46b98dd04f4adb5e4e9abd89 100644 --- a/paddle/fluid/inference/capi/paddle_c_api.h +++ b/paddle/fluid/inference/capi/paddle_c_api.h @@ -214,7 +214,7 @@ PADDLE_CAPI_EXPORT extern bool PD_SpecifyInputName( PADDLE_CAPI_EXPORT extern void PD_EnableTensorRtEngine( PD_AnalysisConfig* config, - int workspace_size, + int64_t workspace_size, int max_batch_size, int min_subgraph_size, Precision precision, diff --git a/paddle/fluid/inference/capi/pd_config.cc b/paddle/fluid/inference/capi/pd_config.cc index b6d865ff3490c395467e4ad26efd52c7f16c7bd8..45fd2e45c19914220cd85f01e9bd1b67a1404f90 100644 --- a/paddle/fluid/inference/capi/pd_config.cc +++ b/paddle/fluid/inference/capi/pd_config.cc @@ -243,7 +243,7 @@ bool PD_SpecifyInputName(const PD_AnalysisConfig* config) { } void PD_EnableTensorRtEngine(PD_AnalysisConfig* config, - int workspace_size, + int64_t workspace_size, int max_batch_size, int min_subgraph_size, Precision precision, diff --git a/paddle/fluid/inference/capi_exp/pd_config.cc b/paddle/fluid/inference/capi_exp/pd_config.cc index a72497940d9da171208f43c4fef8de129f4bbb7d..b183ba8c63b25cf067976cd70c47c021cb27fd52 100644 --- a/paddle/fluid/inference/capi_exp/pd_config.cc +++ b/paddle/fluid/inference/capi_exp/pd_config.cc @@ -219,7 +219,7 @@ PD_Bool PD_ConfigIrOptim(__pd_keep PD_Config* pd_config) { } void PD_ConfigEnableTensorRtEngine(__pd_keep PD_Config* pd_config, - int32_t workspace_size, + int64_t workspace_size, int32_t max_batch_size, int32_t min_subgraph_size, PD_PrecisionType precision, diff --git a/paddle/fluid/inference/capi_exp/pd_config.h b/paddle/fluid/inference/capi_exp/pd_config.h index 9e06d8c72f0480c19b595ea3f0af97b14cf29077..a7054d53908389bf7102213bf118c874ec5aa05d 100644 --- a/paddle/fluid/inference/capi_exp/pd_config.h +++ b/paddle/fluid/inference/capi_exp/pd_config.h @@ -329,7 +329,7 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigIrOptim( /// PADDLE_CAPI_EXPORT extern void PD_ConfigEnableTensorRtEngine( __pd_keep PD_Config* pd_config, - int32_t workspace_size, + int64_t workspace_size, int32_t max_batch_size, int32_t min_subgraph_size, PD_PrecisionType precision, diff --git a/paddle/fluid/inference/tensorrt/convert/ut_helper.h b/paddle/fluid/inference/tensorrt/convert/ut_helper.h index 9b80aeb1d493887a4326a9025fd8c4a72b26abfe..d65273ac018892cd716481d5a13f155b5e5c8681 100644 --- a/paddle/fluid/inference/tensorrt/convert/ut_helper.h +++ b/paddle/fluid/inference/tensorrt/convert/ut_helper.h @@ -79,7 +79,7 @@ class TRTConvertValidation { TRTConvertValidation(int max_batch_size, const std::unordered_set& parameters, framework::Scope& scope, // NOLINT - int workspace_size = 1 << 10, + int64_t workspace_size = 1 << 30, bool if_add_batch = true) : parameters_(parameters), scope_(scope), diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index fcd28ec749cd8f8c50ad298ac9ec68f96612ff03..56a8987e641a67d177b23e2549ad15273fe52403 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -206,7 +206,7 @@ class TensorRTEngine { TensorRTEngine( int max_batch, - int max_workspace, + int64_t max_workspace, AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32, TRTInt8Calibrator* calibrator = nullptr, int device_id = 0, @@ -672,7 +672,7 @@ class TensorRTEngine { // the runtime batch size static int runtime_batch_; // the max memory size the engine uses - int max_workspace_; + int64_t max_workspace_; AnalysisConfig::Precision precision_; TRTInt8Calibrator* calibrator_; @@ -767,7 +767,7 @@ class TRTEngineManager { TensorRTEngine* Create( std::string name, int max_batch, - int max_workspace, + int64_t max_workspace, AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32, TRTInt8Calibrator* calibrator = nullptr, int device_id = 0, diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc index 5ef047cd06914f4100d5da6c53669df57ab11755..b229c4aed79b21958450a173d60e91e4fd2309b8 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc @@ -34,7 +34,7 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker { "engine_key", "The engine_key here is used to distinguish different TRT Engines"); AddAttr("max_batch_size", "the maximum batch size."); - AddAttr("workspace_size", "the workspace size."); + AddAttr("workspace_size", "the workspace size.").AsExtra(); AddAttr("sub_block", "the trt block"); AddAttr("enable_int8", "whether swith to int8 mode"); AddComment("TensorRT engine operator."); diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index b13996b6fab78a58ee60bba736db32e5dfa193c2..9b05faf8df47af016ff79cfe8092da935a489d93 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -177,7 +177,7 @@ class TensorRTEngineOp : public framework::OperatorBase { std::vector runtime_input_names_; mutable TensorRTEngine *trt_engine_{nullptr}; int max_batch_size_; - int workspace_size_; + int64_t workspace_size_; std::unique_ptr calibrator_; bool enable_int8_; bool enable_fp16_; @@ -207,7 +207,7 @@ class TensorRTEngineOp : public framework::OperatorBase { : framework::OperatorBase(type, inputs, outputs, attrs) { input_names_ = Inputs("Xs"); max_batch_size_ = Attr("max_batch_size"); - workspace_size_ = Attr("workspace_size"); + workspace_size_ = Attr("workspace_size"); device_id_ = Attr("gpu_id"); enable_int8_ = Attr("enable_int8"); enable_fp16_ = Attr("enable_fp16"); diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc b/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc index 33ebaff8eabad46b765f9a186257b7cc172a26a3..7b58a1bb7d6d27746d3552d1d92924b6b313b31a 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc @@ -107,7 +107,7 @@ void DynamicShapeTest(bool allow_build_at_runtime) { engine_op_desc.SetBlockAttr("sub_block", &block_desc); engine_op_desc.SetAttr("max_batch_size", static_cast(2)); - engine_op_desc.SetAttr("workspace_size", static_cast(1 << 20)); + engine_op_desc.SetAttr("workspace_size", static_cast(1 << 20)); engine_op_desc.SetAttr("parameters", std::vector({})); engine_op_desc.SetAttr("engine_key", std::string("a_engine")); engine_op_desc.SetAttr("calibration_engine_key", @@ -259,7 +259,7 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) { engine_op_desc.SetBlockAttr("sub_block", &block_desc); engine_op_desc.SetAttr("max_batch_size", static_cast(batch_size)); - engine_op_desc.SetAttr("workspace_size", static_cast(1 << 20)); + engine_op_desc.SetAttr("workspace_size", static_cast(1 << 20)); engine_op_desc.SetAttr("parameters", std::vector({"y0", "y1", "y2", "y3"})); engine_op_desc.SetAttr("engine_key", std::string("b_engine")); diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 3d2595860353e86318f9790bd09a26aae0f707f1..14975ac337aed61f355d6d60d02d54fb43d6d80e 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -687,7 +687,7 @@ void BindAnalysisConfig(py::module *m) { .def("specify_input_name", &AnalysisConfig::specify_input_name) .def("enable_tensorrt_engine", &AnalysisConfig::EnableTensorRtEngine, - py::arg("workspace_size") = 1 << 20, + py::arg("workspace_size") = 1 << 30, py::arg("max_batch_size") = 1, py::arg("min_subgraph_size") = 3, py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32,