diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 26bca9b1e54ecb854b944e7b1311d4877f2c6796..a4e263e2f464c4021b049093c49ddaecb056284f 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -114,13 +114,25 @@ void IRPassManager::CreatePasses(Argument *argument, "When you are in TRT INT8 mode, and load model from " "memory, you should set optim_cache_dir using " "config.SetOptimCacheDir()")); - PADDLE_ENFORCE_EQ( - !(model_from_memory && use_static_engine), true, - platform::errors::PreconditionNotMet( - "When you are using Paddle-TRT, and also using load model " - "from memory, you should set the use_static to false.")); + if (model_from_memory && use_static_engine) { + PADDLE_ENFORCE_EQ( + optim_cache_dir.empty(), false, + platform::errors::PreconditionNotMet( + "When you are using Paddle-TRT, and using load model " + "from memory, and also set the use_static to true. " + "you must set optim_cache_dir using " + "config.SetOptimCacheDir().")); + } if (!optim_cache_dir.empty()) { + if (!PathExists(optim_cache_dir)) { + PADDLE_ENFORCE_NE( + MKDIR(optim_cache_dir.c_str()), -1, + platform::errors::PreconditionNotMet( + "Can not create optimize cache directory: %s, Make sure you " + "have permission to write", + optim_cache_dir)); + } pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir)); } else if (use_static_engine || enable_int8) { std::string model_opt_cache_dir = diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index a450ebdf8919637f6753306e61c2467171cd9654..75111701f1f388d81d273fdc14847ead6772e581 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -250,7 +250,6 @@ void TensorRtSubgraphPass::CreateTensorRTOp( auto predictor_id = Get("predictor_id"); // Get "" when there is no cached calibration table data. - bool load_from_memory = Get("model_from_memory"); std::string calibration_data = ""; if (enable_int8 && use_calib_mode) { calibration_data = GetTrtCalibTableData( @@ -323,8 +322,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( graph->Has(framework::ir::kEmbEltwiseLayernormPass) && graph->Has(framework::ir::kMultiheadMatmulPass)); - bool need_serialize = (use_static_engine && !load_from_memory); - if (need_serialize) { + if (use_static_engine) { trt_engine_serialized_data = GetTrtEngineSerializedData( Get("model_opt_cache_dir"), engine_key); // we can load the engine info serialized before from the disk. @@ -352,7 +350,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( std::vector(input_names.begin(), input_names.end()), param_set, output_mapping, trt_engine); - if (need_serialize) { + if (use_static_engine) { nvinfer1::IHostMemory *serialized_engine_data = trt_engine->Serialize(); trt_engine_serialized_data = std::string((const char *)serialized_engine_data->data(), diff --git a/paddle/fluid/inference/tests/api/trt_dynamic_shape_test.cc b/paddle/fluid/inference/tests/api/trt_dynamic_shape_test.cc index 552aefac9b6da590d3684b50d827a0b78b0f8817..55ee2082e69593e6a4226c7cc2b11916f7c0f814 100644 --- a/paddle/fluid/inference/tests/api/trt_dynamic_shape_test.cc +++ b/paddle/fluid/inference/tests/api/trt_dynamic_shape_test.cc @@ -21,17 +21,32 @@ limitations under the License. */ namespace paddle { namespace inference { -void TestDynamic(bool with_dynamic = true) { +void TestDynamic(bool with_dynamic = true, bool delete_cache = true, + bool delete_conv_bn = false) { std::string model_dir = FLAGS_infer_model + "/conv_bn_swish_split_gelu/conv_bn_swish_split_gelu"; + + std::string opt_cache_dir = model_dir + "/my_cache"; + if (delete_cache) { + delete_cache_files(opt_cache_dir); + } + AnalysisConfig config; config.EnableUseGpu(100, 0); - config.SetModel(model_dir + "/model", model_dir + "/params"); + std::string buffer_prog, buffer_param; + ReadBinaryFile(model_dir + "/model", &buffer_prog); + ReadBinaryFile(model_dir + "/params", &buffer_param); + config.SetModelBuffer(&buffer_prog[0], buffer_prog.size(), &buffer_param[0], + buffer_param.size()); + config.SetOptimCacheDir(opt_cache_dir); + config.SwitchUseFeedFetchOps(false); // Set the input's min, max, opt shape - config.EnableTensorRtEngine(1 << 30, 1, 1, - AnalysisConfig::Precision::kFloat32, false, true); + AnalysisConfig::Precision::kFloat32, true, true); + if (delete_conv_bn) { + config.pass_builder()->DeletePass("conv_bn_fuse_pass"); + } if (with_dynamic) { std::map> min_input_shape = { {"image", {1, 1, 3, 3}}}; @@ -130,6 +145,12 @@ void TestDynamic2() { TEST(AnalysisPredictor, trt_dynamic) { TestDynamic(true); } TEST(AnalysisPredictor, trt_static) { TestDynamic(false); } +TEST(AnalysisPredictor, trt_memory_serialize) { + // serailize + TestDynamic(false, true, true); + // deserailize + TestDynamic(false, false, true); +} TEST(AnalysisPredictor, trt_dynamic2) { TestDynamic2(); } } // namespace inference diff --git a/paddle/fluid/inference/tests/api/trt_test_helper.h b/paddle/fluid/inference/tests/api/trt_test_helper.h index 1abde733581218bed5c9058ca505ec8c6d965f1e..aaa285b2fc2c9e991ac6c4433a5dc82dcb353ca4 100644 --- a/paddle/fluid/inference/tests/api/trt_test_helper.h +++ b/paddle/fluid/inference/tests/api/trt_test_helper.h @@ -148,6 +148,7 @@ void delete_cache_files(std::string path) { remove(file_rm.c_str()); } } + remove(path.c_str()); } } // namespace inference