diff --git a/paddle/fluid/inference/analysis/CMakeLists.txt b/paddle/fluid/inference/analysis/CMakeLists.txt index eb89fc5e1124e97b082d6299e3efc44591a8b01b..0c73778b201d77a6e8a35a38d17f2a86d5faaca9 100644 --- a/paddle/fluid/inference/analysis/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/CMakeLists.txt @@ -7,16 +7,17 @@ set(analysis_deps # analysis_deps can be extended accross the project add_subdirectory(ir_passes) add_subdirectory(passes) -cc_library(ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass ${INFER_IR_PASSES}) +cc_library(analysis_helper SRCS helper.cc DEPS framework_proto proto_desc graph paddle_fluid_api) + +cc_library(ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass ${INFER_IR_PASSES} analysis_helper) cc_library(argument SRCS argument.cc DEPS scope proto_desc) cc_library(analysis_pass SRCS analysis_pass.cc DEPS proto_desc) cc_library(analysis SRCS analyzer.cc - helper.cc analysis_pass - DEPS ${analysis_deps} + DEPS ${analysis_deps} analysis_helper ) cc_test(test_dot SRCS dot_tester.cc DEPS analysis) diff --git a/paddle/fluid/inference/analysis/analyzer_tester.cc b/paddle/fluid/inference/analysis/analyzer_tester.cc index 48fc5dda2a5bfa24d679d4bf655e580dafc614b3..84a0c3374c66f85313828332099cb372e14c7c83 100644 --- a/paddle/fluid/inference/analysis/analyzer_tester.cc +++ b/paddle/fluid/inference/analysis/analyzer_tester.cc @@ -30,6 +30,7 @@ TEST(Analyzer, analysis_without_tensorrt) { Argument argument; argument.SetModelDir(FLAGS_inference_model_dir); argument.SetIrAnalysisPasses({"infer_clean_graph_pass"}); + argument.SetUseGPU(false); Analyzer analyser; analyser.Run(&argument); @@ -41,6 +42,7 @@ TEST(Analyzer, analysis_with_tensorrt) { argument.SetTensorRtWorkspaceSize(1 << 20); argument.SetModelDir(FLAGS_inference_model_dir); argument.SetIrAnalysisPasses({"infer_clean_graph_pass"}); + argument.SetUseGPU(false); Analyzer analyser; analyser.Run(&argument); diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index d7a2f3d1e3a3251263c8670aef5db538fa2c48ea..21203e2d9f4e4cd22ea49ea7b6808aff07e70eff 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -116,6 +116,7 @@ struct Argument { std::vector); DECL_ARGUMENT_FIELD(use_gpu, UseGPU, bool); + DECL_ARGUMENT_FIELD(gpu_device_id, GPUDeviceId, int); DECL_ARGUMENT_FIELD(use_tensorrt, UseTensorRT, bool); DECL_ARGUMENT_FIELD(tensorrt_node_teller, TensorRtNodeTeller, std::function); diff --git a/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt b/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt index c71cff889ed7cdb95f79b9bc89a9ca5ab370271c..822c7799bb3ae6d79da6cf2a7b3c8c9b20353ed7 100644 --- a/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt @@ -4,4 +4,6 @@ set(analysis_deps ${analysis_deps} subgraph_detector tensorrt_subgraph_pass CACHE INTERNAL "") +set(pass_file ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h) +file(APPEND ${pass_file} "USE_PASS(tensorrt_subgraph_pass);\n") set(INFER_IR_PASSES ${INFER_IR_PASSES} tensorrt_subgraph_pass CACHE INTERNAL "") diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc index a30fef08b5726c965637e2fb489bdb2036bd2a8d..d5e0d90de1da8e54e2411c266f7a8c09c33b0336 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc @@ -30,15 +30,28 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { if (!argument->scope_valid()) { argument->SetScope(new framework::Scope); } + PADDLE_ENFORCE(argument->use_gpu_valid()); + + // The load program should run on the same device with the inference program, + // so that the parameters will on the same device, or they will keep copying + // between difference devices. + platform::Place place; + if (argument->use_gpu()) { + PADDLE_ENFORCE(argument->gpu_device_id_valid()); + place = platform::CUDAPlace(argument->gpu_device_id()); + } else { + place = platform::CPUPlace(); + } if (argument->model_dir_valid()) { - auto program = LoadModel(argument->model_dir(), argument->scope_ptr()); + auto program = + LoadModel(argument->model_dir(), argument->scope_ptr(), place); argument->SetMainProgram(program.release()); } else if (argument->model_program_path_valid() && argument->model_params_path_valid()) { auto program = LoadModel(argument->model_program_path(), argument->model_params_path(), - argument->scope_ptr()); + argument->scope_ptr(), place); argument->SetMainProgram(program.release()); } else { PADDLE_THROW( @@ -52,16 +65,15 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { } std::unique_ptr IrGraphBuildPass::LoadModel( - const std::string &path, framework::Scope *scope) { - platform::CPUPlace place; + const std::string &path, framework::Scope *scope, + const platform::Place &place) { framework::Executor exe(place); return Load(&exe, scope, path); } std::unique_ptr IrGraphBuildPass::LoadModel( const std::string &program_path, const std::string ¶ms_path, - framework::Scope *scope) { - platform::CPUPlace place; + framework::Scope *scope, const platform::Place &place) { framework::Executor exe(place); return Load(&exe, scope, program_path, params_path); } diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h index 3291e4f6ad3ca3079e672350805cab1f1e7b2413..271e64fce579bc9001b1dd632576571cec949752 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h +++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h @@ -17,6 +17,7 @@ #include #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/inference/analysis/analysis_pass.h" +#include "paddle/fluid/platform/place.h" namespace paddle { namespace inference { @@ -32,11 +33,12 @@ class IrGraphBuildPass : public AnalysisPass { std::string repr() const override; private: - std::unique_ptr LoadModel(const std::string &path, - framework::Scope *scope); + std::unique_ptr LoadModel( + const std::string &path, framework::Scope *scope, + const platform::Place &place); std::unique_ptr LoadModel( const std::string &program_path, const std::string ¶ms_path, - framework::Scope *scope); + framework::Scope *scope, const platform::Place &place); std::string model_binary_str_; }; diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index 82f74a269a5915dfa1d97a28f5ae15a12ea0b154..e9969b84f33483b048951f704de1e13e51cbeaea 100644 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -27,11 +27,10 @@ endif() cc_library(reset_tensor_array SRCS details/reset_tensor_array.cc DEPS lod_tensor scope) cc_library(analysis_config SRCS analysis_config.cc DEPS lod_tensor paddle_pass_builder) cc_library(paddle_pass_builder SRCS paddle_pass_builder.cc) -cc_library(paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope paddle_pass_builder reset_tensor_array analysis_config analysis_config paddle_pass_builder) -cc_library(analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis naive_executor zero_copy_tensor reset_tensor_array analysis_config paddle_pass_builder) -cc_library(zero_copy_tensor SRCS details/zero_copy_tensor.cc DEPS paddle_inference_api) -cc_library(zero_copy_tensor_dummy SRCS details/zero_copy_tensor_dummy.cc DEPS paddle_inference_api) - +cc_library(analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis naive_executor zero_copy_tensor reset_tensor_array analysis_config paddle_pass_builder ir_pass_manager) +cc_library(zero_copy_tensor SRCS details/zero_copy_tensor.cc DEPS scope lod_tensor enforce) +cc_library(zero_copy_tensor_dummy SRCS details/zero_copy_tensor_dummy.cc) +cc_library(paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope paddle_pass_builder reset_tensor_array analysis_config analysis_config paddle_pass_builder DEPS zero_copy_tensor) cc_test(test_paddle_inference_api SRCS api_tester.cc diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index ee1d1d839cbd52ecf6010046056e97c86f3e141c..cb14d2a2602808bd35106ed2bafcf7975f549597 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -285,6 +285,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() { status_program_optimized_ = true; argument_.SetUseGPU(config_.use_gpu); + argument_.SetGPUDeviceId(config_.device); // Analyze inference_program if (!config_.model_dir.empty()) { argument_.SetModelDir(config_.model_dir); @@ -491,8 +492,7 @@ bool AnalysisPredictor::LoadParameters() { } // Use NaiveExecutor to Load parameters. - platform::CPUPlace place; - framework::NaiveExecutor e(place); + framework::NaiveExecutor e(place_); e.Prepare(scope_.get(), *load_program, 0, false); e.Run(); VLOG(3) << "get " << scope_->LocalVarNames().size() << " vars after load"; diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index 825bee833bf918067497f56adebbbcaf55f892a2..12e3a6f42e14010feedbbb5d8f8a98f60cea4556 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -116,8 +116,12 @@ class CpuPassStrategy : public PassStrategy { class GpuPassStrategy : public PassStrategy { public: GpuPassStrategy() : PassStrategy({}) { + // TODO(NHZlX) Problem with Data synchronization between GPU and CPU + // When running in GPU mode, the parameters are all on GPU. But the + // opearations of "conv_bn_fuse_pass" are on CPU. passes_.assign({ - "infer_clean_graph_pass", "conv_bn_fuse_pass", + "infer_clean_graph_pass", + // "infer_clean_graph_pass", "conv_bn_fuse_pass", }); } diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index cf2a61ea61b6bb4566e5057a5c17630298d3f88e..e8bd13037ed6c2c3c639b76f6f3561921fb6ee37 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -1,5 +1,9 @@ set(INFERENCE_EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor) +if(WITH_GPU AND TENSORRT_FOUND) + set(INFERENCE_EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} analysis ${analysis_deps} ir_pass_manager analysis_predictor) +endif() + function(download_model install_dir model_name) if (NOT EXISTS ${install_dir}) inference_download_and_uncompress(${install_dir} ${INFERENCE_URL} ${model_name}) @@ -75,11 +79,11 @@ endif() inference_analysis_api_test(test_analyzer_ocr ${OCR_INSTALL_DIR} analyzer_vis_tester.cc) # resnet50 -inference_analysis_api_test_with_fake_data(test_analyzer_resnet50 +inference_analysis_api_test_with_fake_data(test_analyzer_resnet50 "${INFERENCE_DEMO_INSTALL_DIR}/resnet50" analyzer_resnet50_tester.cc "resnet50_model.tar.gz") # mobilenet with depthwise_conv op -inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet +inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet "${INFERENCE_DEMO_INSTALL_DIR}/mobilenet_depthwise_conv" analyzer_resnet50_tester.cc "mobilenet_model.tar.gz") # anakin @@ -89,15 +93,15 @@ if (WITH_ANAKIN AND WITH_MKL) # only needed in CI set(ANAKIN_RNN1_INSTALL_DIR "${ANAKIN_INSTALL_DIR}/rnn1") inference_download(${ANAKIN_RNN1_INSTALL_DIR} ${INFERENCE_URL} "anakin_test%2Fditu_rnn.anakin2.model.bin") inference_download(${ANAKIN_RNN1_INSTALL_DIR} ${INFERENCE_URL} "anakin_test%2Fditu_rnn_data.txt") - cc_test(test_anakin_rnn1 SRCS anakin_rnn1_tester.cc - ARGS --model=${ANAKIN_RNN1_INSTALL_DIR}/anakin_test%2Fditu_rnn.anakin2.model.bin + cc_test(test_anakin_rnn1 SRCS anakin_rnn1_tester.cc + ARGS --model=${ANAKIN_RNN1_INSTALL_DIR}/anakin_test%2Fditu_rnn.anakin2.model.bin --datapath=${ANAKIN_RNN1_INSTALL_DIR}/anakin_test%2Fditu_rnn_data.txt DEPS inference_anakin_api_shared SERIAL) # anakin mobilenet if(WITH_GPU) set(ANAKIN_MOBILENET_INSTALL_DIR "${ANAKIN_INSTALL_DIR}/mobilenet") inference_download(${ANAKIN_MOBILENET_INSTALL_DIR} ${INFERENCE_URL} "mobilenet_v2.anakin.bin") - cc_test(test_anakin_mobilenet SRCS anakin_mobilenet_tester.cc + cc_test(test_anakin_mobilenet SRCS anakin_mobilenet_tester.cc ARGS --model=${ANAKIN_MOBILENET_INSTALL_DIR}/mobilenet_v2.anakin.bin DEPS inference_anakin_api_shared dynload_cuda SERIAL) endif() @@ -109,6 +113,6 @@ if(WITH_GPU AND TENSORRT_FOUND) inference_download_and_uncompress(${TRT_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "trt_test_models.tar.gz") endif() inference_analysis_test(test_trt_models SRCS trt_models_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} analysis ${analysis_deps} ir_pass_manager analysis_predictor + EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_test_models SERIAL) endif() diff --git a/paddle/fluid/inference/tests/api/trt_models_tester.cc b/paddle/fluid/inference/tests/api/trt_models_tester.cc index 922feba10fec5d1d13b47dbce064fce2e01d8998..ef612ce6148329c33f194842945bb5438afcf645 100644 --- a/paddle/fluid/inference/tests/api/trt_models_tester.cc +++ b/paddle/fluid/inference/tests/api/trt_models_tester.cc @@ -145,5 +145,3 @@ TEST(TensorRT_mobilenet, analysis) { } // namespace inference } // namespace paddle - -USE_PASS(tensorrt_subgraph_pass);