diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 4cac76b47bc0c1f35fbdf4efed5ec8b942bc3a41..f44c877b939659c88f7c772df32a4c63ab622f3a 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -250,9 +250,6 @@ struct Argument { TensorRtAllowBuildAtRuntime, bool); DECL_ARGUMENT_FIELD(tensorrt_use_inspector, TensorRtUseInspector, bool); - DECL_ARGUMENT_FIELD(tensorrt_use_sparse_weights, - TensorRtUseSparseWeights, - bool); DECL_ARGUMENT_FIELD(use_dlnne, UseDlnne, bool); DECL_ARGUMENT_FIELD(dlnne_min_subgraph_size, DlnneMinSubgraphSize, int); diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 6408e5ca2d3623c0f466fac8a606e9f8703be681..4051511906b1be2b735cb2985faed7f9eb910e1b 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -213,8 +213,6 @@ void IRPassManager::CreatePasses(Argument *argument, pass->Set("use_static_engine", new bool(use_static_engine)); pass->Set("model_from_memory", new bool(argument->model_from_memory())); pass->Set("use_inspector", new bool(argument->tensorrt_use_inspector())); - pass->Set("use_sparse_weights", - new bool(argument->tensorrt_use_sparse_weights())); // tuned trt dynamic_shape pass->Set("trt_shape_range_info_path", diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index 9d39b6e6118271de8f39be99e53e06a7f855b2bc..89e1c1934932f4765ad92b1100ba6a99b48b9141 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -524,7 +524,6 @@ std::string TensorRtSubgraphPass::CreateTensorRTOp( op_desc->SetAttr("allow_build_at_runtime", allow_build_at_runtime); op_desc->SetAttr("shape_range_info_path", shape_range_info_path); op_desc->SetAttr("use_inspector", Get("use_inspector")); - op_desc->SetAttr("use_sparse_weights", Get("use_sparse_weights")); op_desc->SetAttr("model_precision", Get("model_precision")); op_desc->SetAttr("with_dynamic_shape", with_dynamic_shape); @@ -665,7 +664,6 @@ std::string TensorRtSubgraphPass::CreateTensorRTOp( trt_engine->SetUseDLA(Get("trt_use_dla")); trt_engine->SetDLACore(Get("trt_dla_core")); trt_engine->SetUseInspector(Get("use_inspector")); - trt_engine->SetUseSparseWeights(Get("use_sparse_weights")); trt_engine->SetWithErnie( graph->Has(framework::ir::kEmbEltwiseLayernormPass) && graph->Has(framework::ir::kMultiheadMatmulPass)); diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index ae281c5e9de44af801006f939003abb646869e78..af7b8574b863d1c9e36eeb4e18daa6573b28987e 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -451,7 +451,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(collect_shape_range_info_); CP_MEMBER(shape_range_info_path_); CP_MEMBER(trt_use_inspector_); - CP_MEMBER(trt_use_sparse_weights_); CP_MEMBER(trt_engine_memory_sharing_); CP_MEMBER(trt_engine_memory_sharing_identifier_); // Dlnne related @@ -806,10 +805,6 @@ void AnalysisConfig::EnableTensorRtDLA(int dla_core) { void AnalysisConfig::EnableTensorRtInspector() { trt_use_inspector_ = true; } -void AnalysisConfig::EnableTensorRtSparseWeights() { - trt_use_sparse_weights_ = true; -} - void AnalysisConfig::Exp_DisableTensorRtOPs( const std::vector &ops) { trt_disabled_ops_.insert(trt_disabled_ops_.end(), ops.begin(), ops.end()); diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 032b5d7734ce7d18a8fc19925463c2967668b1ee..d7f08cdf7ceceb6acac42d08a07423b79d6dd53a 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1397,7 +1397,6 @@ void AnalysisPredictor::PrepareArgument() { argument_->SetTensorRtAllowBuildAtRuntime( config_.trt_allow_build_at_runtime()); argument_->SetTensorRtUseInspector(config_.trt_use_inspector_); - argument_->SetTensorRtUseSparseWeights(config_.trt_use_sparse_weights_); argument_->SetTrtEngineMemorySharing(config_.trt_engine_memory_sharing()); } diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 28934864c8cfd02a8fef9633991c85766355984b..b30838c3680e91095d1d81fad66d7bbf5b8a95f5 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -742,9 +742,6 @@ struct PD_INFER_DECL AnalysisConfig { void EnableTensorRtInspector(); bool tensorrt_inspector_enabled() { return trt_use_inspector_; } - void EnableTensorRtSparseWeights(); - bool tensorrt_sparse_weights_enabled() { return trt_use_sparse_weights_; } - void EnableDlnne( int min_subgraph_size = 3, int max_batch_size = 1, @@ -1121,7 +1118,6 @@ struct PD_INFER_DECL AnalysisConfig { // tune to get dynamic_shape info. bool trt_tuned_dynamic_shape_{false}; bool trt_use_inspector_{false}; - bool trt_use_sparse_weights_{false}; // In CollectShapeInfo mode, we will collect the shape information of // all intermediate tensors in the compute graph and calculate the diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 2864adffdaa520dc4bf80cd1f3fbfabc294a1fd7..55ec446dcc73ce3f256e16c8c08fbf2304703879 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -357,7 +357,6 @@ void TensorRTEngine::FreezeNetwork() { "opt_shape, false /*disable_trt_plugin_fp16*/)'"; } } - #if IS_TRT_VERSION_GE(8200) if (use_inspector_) { infer_builder_config_->setProfilingVerbosity( @@ -369,9 +368,6 @@ void TensorRTEngine::FreezeNetwork() { infer_engine_.reset(infer_builder_->buildEngineWithConfig( *network(), *infer_builder_config_)); #else - if (use_sparse_weights_) { - infer_builder_config_->setFlag(nvinfer1::BuilderFlag::kSPARSE_WEIGHTS); - } ihost_memory_.reset(infer_builder_->buildSerializedNetwork( *network(), *infer_builder_config_)); infer_ptr runtime(createInferRuntime(&logger_)); diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index a0d146c9b6dad415e67a7b0f542483f25e3dac45..f789324e9e0d663204b4c2322529a3da0a20fcd3 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -738,9 +738,6 @@ class TensorRTEngine { void GetEngineInfo(); void SetUseInspector(bool use_inspector) { use_inspector_ = use_inspector; } - void SetUseSparseWeights(bool use_sparse_weights) { - use_sparse_weights_ = use_sparse_weights; - } void SetScope(const framework::Scope& scope) { scope_ = &scope; } void SetContextMemorySharing(bool context_memory_sharing) { @@ -829,7 +826,6 @@ class TensorRTEngine { #endif std::mutex mutex_; bool use_inspector_; - bool use_sparse_weights_{false}; public: thread_local static int predictor_id_per_thread; diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 0995bc832ac883b052520c980b48e438a19187f1..09e1e42cfbdd951eec6be4fa8db0f23f266a8474 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -879,10 +879,6 @@ void BindAnalysisConfig(py::module *m) { &AnalysisConfig::EnableTensorRtInspector) .def("tensorrt_inspector_enabled", &AnalysisConfig::tensorrt_inspector_enabled) - .def("enable_tensorrt_sparse_weights", - &AnalysisConfig::EnableTensorRtSparseWeights) - .def("tensorrt_sparse_weights_enabled", - &AnalysisConfig::tensorrt_sparse_weights_enabled) .def("tensorrt_engine_enabled", &AnalysisConfig::tensorrt_engine_enabled) .def("enable_dlnne", &AnalysisConfig::EnableDlnne, diff --git a/test/ir/inference/test_trt_inference_predictor.py b/test/ir/inference/test_trt_inference_predictor.py index ee0b8b48626bbf4f8ca5fbccd1cecfee8618a28e..e334e5eabfd74e761c0e6343c4731d3cb976cdd8 100644 --- a/test/ir/inference/test_trt_inference_predictor.py +++ b/test/ir/inference/test_trt_inference_predictor.py @@ -84,8 +84,6 @@ class BackendPaddle: # enable memory optim if not self.args.enable_tune: config.enable_memory_optim() - if self.args.enable_trt_sparse_weights: - config.enable_tensorrt_sparse_weights() config.set_cpu_math_library_num_threads(self.args.cpu_threads) config.switch_ir_optim(True) @@ -260,9 +258,6 @@ def parse_args(): parser.add_argument('--enable_dynamic_shape', type=str2bool, default=True) parser.add_argument('--enable_tune', type=str2bool, default=False) parser.add_argument('--enable_profile', type=str2bool, default=False) - parser.add_argument( - '--enable_trt_sparse_weights', type=str2bool, default=False - ) parser.add_argument('--enable_benchmark', type=str2bool, default=True) parser.add_argument('--save_result', type=str2bool, default=False) parser.add_argument('--return_result', type=str2bool, default=False) @@ -313,13 +308,6 @@ def run_infer(model_path): backend.load(conf) backend.predict() - # run inference predictor, enable trt sparse weights - conf.enable_tune = False - conf.enable_trt_sparse_weights = True - backend = BackendPaddle() - backend.load(conf) - backend.predict() - class ConvBNLayer(paddle.nn.Layer): def __init__(