未验证 提交 05280674 编写于 作者: Z Zhaolong Xing 提交者: GitHub

Merge pull request #14500 from NHZlX/refine_trt

Fix gpu load model and demo_ci on trt
...@@ -7,16 +7,17 @@ set(analysis_deps # analysis_deps can be extended accross the project ...@@ -7,16 +7,17 @@ set(analysis_deps # analysis_deps can be extended accross the project
add_subdirectory(ir_passes) add_subdirectory(ir_passes)
add_subdirectory(passes) add_subdirectory(passes)
cc_library(ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass ${INFER_IR_PASSES}) cc_library(analysis_helper SRCS helper.cc DEPS framework_proto proto_desc graph paddle_fluid_api)
cc_library(ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass ${INFER_IR_PASSES} analysis_helper)
cc_library(argument SRCS argument.cc DEPS scope proto_desc) cc_library(argument SRCS argument.cc DEPS scope proto_desc)
cc_library(analysis_pass SRCS analysis_pass.cc DEPS proto_desc) cc_library(analysis_pass SRCS analysis_pass.cc DEPS proto_desc)
cc_library(analysis SRCS cc_library(analysis SRCS
analyzer.cc analyzer.cc
helper.cc
analysis_pass analysis_pass
DEPS ${analysis_deps} DEPS ${analysis_deps} analysis_helper
) )
cc_test(test_dot SRCS dot_tester.cc DEPS analysis) cc_test(test_dot SRCS dot_tester.cc DEPS analysis)
......
...@@ -30,6 +30,7 @@ TEST(Analyzer, analysis_without_tensorrt) { ...@@ -30,6 +30,7 @@ TEST(Analyzer, analysis_without_tensorrt) {
Argument argument; Argument argument;
argument.SetModelDir(FLAGS_inference_model_dir); argument.SetModelDir(FLAGS_inference_model_dir);
argument.SetIrAnalysisPasses({"infer_clean_graph_pass"}); argument.SetIrAnalysisPasses({"infer_clean_graph_pass"});
argument.SetUseGPU(false);
Analyzer analyser; Analyzer analyser;
analyser.Run(&argument); analyser.Run(&argument);
...@@ -41,6 +42,7 @@ TEST(Analyzer, analysis_with_tensorrt) { ...@@ -41,6 +42,7 @@ TEST(Analyzer, analysis_with_tensorrt) {
argument.SetTensorRtWorkspaceSize(1 << 20); argument.SetTensorRtWorkspaceSize(1 << 20);
argument.SetModelDir(FLAGS_inference_model_dir); argument.SetModelDir(FLAGS_inference_model_dir);
argument.SetIrAnalysisPasses({"infer_clean_graph_pass"}); argument.SetIrAnalysisPasses({"infer_clean_graph_pass"});
argument.SetUseGPU(false);
Analyzer analyser; Analyzer analyser;
analyser.Run(&argument); analyser.Run(&argument);
......
...@@ -116,6 +116,7 @@ struct Argument { ...@@ -116,6 +116,7 @@ struct Argument {
std::vector<std::string>); std::vector<std::string>);
DECL_ARGUMENT_FIELD(use_gpu, UseGPU, bool); DECL_ARGUMENT_FIELD(use_gpu, UseGPU, bool);
DECL_ARGUMENT_FIELD(gpu_device_id, GPUDeviceId, int);
DECL_ARGUMENT_FIELD(use_tensorrt, UseTensorRT, bool); DECL_ARGUMENT_FIELD(use_tensorrt, UseTensorRT, bool);
DECL_ARGUMENT_FIELD(tensorrt_node_teller, TensorRtNodeTeller, DECL_ARGUMENT_FIELD(tensorrt_node_teller, TensorRtNodeTeller,
std::function<bool(const framework::ir::Node*)>); std::function<bool(const framework::ir::Node*)>);
......
...@@ -4,4 +4,6 @@ set(analysis_deps ${analysis_deps} ...@@ -4,4 +4,6 @@ set(analysis_deps ${analysis_deps}
subgraph_detector tensorrt_subgraph_pass subgraph_detector tensorrt_subgraph_pass
CACHE INTERNAL "") CACHE INTERNAL "")
set(pass_file ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h)
file(APPEND ${pass_file} "USE_PASS(tensorrt_subgraph_pass);\n")
set(INFER_IR_PASSES ${INFER_IR_PASSES} tensorrt_subgraph_pass CACHE INTERNAL "") set(INFER_IR_PASSES ${INFER_IR_PASSES} tensorrt_subgraph_pass CACHE INTERNAL "")
...@@ -30,15 +30,28 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { ...@@ -30,15 +30,28 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
if (!argument->scope_valid()) { if (!argument->scope_valid()) {
argument->SetScope(new framework::Scope); argument->SetScope(new framework::Scope);
} }
PADDLE_ENFORCE(argument->use_gpu_valid());
// The load program should run on the same device with the inference program,
// so that the parameters will on the same device, or they will keep copying
// between difference devices.
platform::Place place;
if (argument->use_gpu()) {
PADDLE_ENFORCE(argument->gpu_device_id_valid());
place = platform::CUDAPlace(argument->gpu_device_id());
} else {
place = platform::CPUPlace();
}
if (argument->model_dir_valid()) { if (argument->model_dir_valid()) {
auto program = LoadModel(argument->model_dir(), argument->scope_ptr()); auto program =
LoadModel(argument->model_dir(), argument->scope_ptr(), place);
argument->SetMainProgram(program.release()); argument->SetMainProgram(program.release());
} else if (argument->model_program_path_valid() && } else if (argument->model_program_path_valid() &&
argument->model_params_path_valid()) { argument->model_params_path_valid()) {
auto program = auto program =
LoadModel(argument->model_program_path(), argument->model_params_path(), LoadModel(argument->model_program_path(), argument->model_params_path(),
argument->scope_ptr()); argument->scope_ptr(), place);
argument->SetMainProgram(program.release()); argument->SetMainProgram(program.release());
} else { } else {
PADDLE_THROW( PADDLE_THROW(
...@@ -52,16 +65,15 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { ...@@ -52,16 +65,15 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
} }
std::unique_ptr<framework::ProgramDesc> IrGraphBuildPass::LoadModel( std::unique_ptr<framework::ProgramDesc> IrGraphBuildPass::LoadModel(
const std::string &path, framework::Scope *scope) { const std::string &path, framework::Scope *scope,
platform::CPUPlace place; const platform::Place &place) {
framework::Executor exe(place); framework::Executor exe(place);
return Load(&exe, scope, path); return Load(&exe, scope, path);
} }
std::unique_ptr<framework::ProgramDesc> IrGraphBuildPass::LoadModel( std::unique_ptr<framework::ProgramDesc> IrGraphBuildPass::LoadModel(
const std::string &program_path, const std::string &params_path, const std::string &program_path, const std::string &params_path,
framework::Scope *scope) { framework::Scope *scope, const platform::Place &place) {
platform::CPUPlace place;
framework::Executor exe(place); framework::Executor exe(place);
return Load(&exe, scope, program_path, params_path); return Load(&exe, scope, program_path, params_path);
} }
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <string> #include <string>
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/analysis/analysis_pass.h" #include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/platform/place.h"
namespace paddle { namespace paddle {
namespace inference { namespace inference {
...@@ -32,11 +33,12 @@ class IrGraphBuildPass : public AnalysisPass { ...@@ -32,11 +33,12 @@ class IrGraphBuildPass : public AnalysisPass {
std::string repr() const override; std::string repr() const override;
private: private:
std::unique_ptr<framework::ProgramDesc> LoadModel(const std::string &path, std::unique_ptr<framework::ProgramDesc> LoadModel(
framework::Scope *scope); const std::string &path, framework::Scope *scope,
const platform::Place &place);
std::unique_ptr<framework::ProgramDesc> LoadModel( std::unique_ptr<framework::ProgramDesc> LoadModel(
const std::string &program_path, const std::string &params_path, const std::string &program_path, const std::string &params_path,
framework::Scope *scope); framework::Scope *scope, const platform::Place &place);
std::string model_binary_str_; std::string model_binary_str_;
}; };
......
...@@ -27,11 +27,10 @@ endif() ...@@ -27,11 +27,10 @@ endif()
cc_library(reset_tensor_array SRCS details/reset_tensor_array.cc DEPS lod_tensor scope) cc_library(reset_tensor_array SRCS details/reset_tensor_array.cc DEPS lod_tensor scope)
cc_library(analysis_config SRCS analysis_config.cc DEPS lod_tensor paddle_pass_builder) cc_library(analysis_config SRCS analysis_config.cc DEPS lod_tensor paddle_pass_builder)
cc_library(paddle_pass_builder SRCS paddle_pass_builder.cc) cc_library(paddle_pass_builder SRCS paddle_pass_builder.cc)
cc_library(paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope paddle_pass_builder reset_tensor_array analysis_config analysis_config paddle_pass_builder) cc_library(analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis naive_executor zero_copy_tensor reset_tensor_array analysis_config paddle_pass_builder ir_pass_manager)
cc_library(analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis naive_executor zero_copy_tensor reset_tensor_array analysis_config paddle_pass_builder) cc_library(zero_copy_tensor SRCS details/zero_copy_tensor.cc DEPS scope lod_tensor enforce)
cc_library(zero_copy_tensor SRCS details/zero_copy_tensor.cc DEPS paddle_inference_api) cc_library(zero_copy_tensor_dummy SRCS details/zero_copy_tensor_dummy.cc)
cc_library(zero_copy_tensor_dummy SRCS details/zero_copy_tensor_dummy.cc DEPS paddle_inference_api) cc_library(paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope paddle_pass_builder reset_tensor_array analysis_config analysis_config paddle_pass_builder DEPS zero_copy_tensor)
cc_test(test_paddle_inference_api cc_test(test_paddle_inference_api
SRCS api_tester.cc SRCS api_tester.cc
......
...@@ -285,6 +285,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() { ...@@ -285,6 +285,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
status_program_optimized_ = true; status_program_optimized_ = true;
argument_.SetUseGPU(config_.use_gpu); argument_.SetUseGPU(config_.use_gpu);
argument_.SetGPUDeviceId(config_.device);
// Analyze inference_program // Analyze inference_program
if (!config_.model_dir.empty()) { if (!config_.model_dir.empty()) {
argument_.SetModelDir(config_.model_dir); argument_.SetModelDir(config_.model_dir);
...@@ -491,8 +492,7 @@ bool AnalysisPredictor::LoadParameters() { ...@@ -491,8 +492,7 @@ bool AnalysisPredictor::LoadParameters() {
} }
// Use NaiveExecutor to Load parameters. // Use NaiveExecutor to Load parameters.
platform::CPUPlace place; framework::NaiveExecutor e(place_);
framework::NaiveExecutor e(place);
e.Prepare(scope_.get(), *load_program, 0, false); e.Prepare(scope_.get(), *load_program, 0, false);
e.Run(); e.Run();
VLOG(3) << "get " << scope_->LocalVarNames().size() << " vars after load"; VLOG(3) << "get " << scope_->LocalVarNames().size() << " vars after load";
......
...@@ -116,8 +116,12 @@ class CpuPassStrategy : public PassStrategy { ...@@ -116,8 +116,12 @@ class CpuPassStrategy : public PassStrategy {
class GpuPassStrategy : public PassStrategy { class GpuPassStrategy : public PassStrategy {
public: public:
GpuPassStrategy() : PassStrategy({}) { GpuPassStrategy() : PassStrategy({}) {
// TODO(NHZlX) Problem with Data synchronization between GPU and CPU
// When running in GPU mode, the parameters are all on GPU. But the
// opearations of "conv_bn_fuse_pass" are on CPU.
passes_.assign({ passes_.assign({
"infer_clean_graph_pass", "conv_bn_fuse_pass", "infer_clean_graph_pass",
// "infer_clean_graph_pass", "conv_bn_fuse_pass",
}); });
} }
......
set(INFERENCE_EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor) set(INFERENCE_EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor)
if(WITH_GPU AND TENSORRT_FOUND)
set(INFERENCE_EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} analysis ${analysis_deps} ir_pass_manager analysis_predictor)
endif()
function(download_model install_dir model_name) function(download_model install_dir model_name)
if (NOT EXISTS ${install_dir}) if (NOT EXISTS ${install_dir})
inference_download_and_uncompress(${install_dir} ${INFERENCE_URL} ${model_name}) inference_download_and_uncompress(${install_dir} ${INFERENCE_URL} ${model_name})
...@@ -75,11 +79,11 @@ endif() ...@@ -75,11 +79,11 @@ endif()
inference_analysis_api_test(test_analyzer_ocr ${OCR_INSTALL_DIR} analyzer_vis_tester.cc) inference_analysis_api_test(test_analyzer_ocr ${OCR_INSTALL_DIR} analyzer_vis_tester.cc)
# resnet50 # resnet50
inference_analysis_api_test_with_fake_data(test_analyzer_resnet50 inference_analysis_api_test_with_fake_data(test_analyzer_resnet50
"${INFERENCE_DEMO_INSTALL_DIR}/resnet50" analyzer_resnet50_tester.cc "resnet50_model.tar.gz") "${INFERENCE_DEMO_INSTALL_DIR}/resnet50" analyzer_resnet50_tester.cc "resnet50_model.tar.gz")
# mobilenet with depthwise_conv op # mobilenet with depthwise_conv op
inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet
"${INFERENCE_DEMO_INSTALL_DIR}/mobilenet_depthwise_conv" analyzer_resnet50_tester.cc "mobilenet_model.tar.gz") "${INFERENCE_DEMO_INSTALL_DIR}/mobilenet_depthwise_conv" analyzer_resnet50_tester.cc "mobilenet_model.tar.gz")
# anakin # anakin
...@@ -89,15 +93,15 @@ if (WITH_ANAKIN AND WITH_MKL) # only needed in CI ...@@ -89,15 +93,15 @@ if (WITH_ANAKIN AND WITH_MKL) # only needed in CI
set(ANAKIN_RNN1_INSTALL_DIR "${ANAKIN_INSTALL_DIR}/rnn1") set(ANAKIN_RNN1_INSTALL_DIR "${ANAKIN_INSTALL_DIR}/rnn1")
inference_download(${ANAKIN_RNN1_INSTALL_DIR} ${INFERENCE_URL} "anakin_test%2Fditu_rnn.anakin2.model.bin") inference_download(${ANAKIN_RNN1_INSTALL_DIR} ${INFERENCE_URL} "anakin_test%2Fditu_rnn.anakin2.model.bin")
inference_download(${ANAKIN_RNN1_INSTALL_DIR} ${INFERENCE_URL} "anakin_test%2Fditu_rnn_data.txt") inference_download(${ANAKIN_RNN1_INSTALL_DIR} ${INFERENCE_URL} "anakin_test%2Fditu_rnn_data.txt")
cc_test(test_anakin_rnn1 SRCS anakin_rnn1_tester.cc cc_test(test_anakin_rnn1 SRCS anakin_rnn1_tester.cc
ARGS --model=${ANAKIN_RNN1_INSTALL_DIR}/anakin_test%2Fditu_rnn.anakin2.model.bin ARGS --model=${ANAKIN_RNN1_INSTALL_DIR}/anakin_test%2Fditu_rnn.anakin2.model.bin
--datapath=${ANAKIN_RNN1_INSTALL_DIR}/anakin_test%2Fditu_rnn_data.txt --datapath=${ANAKIN_RNN1_INSTALL_DIR}/anakin_test%2Fditu_rnn_data.txt
DEPS inference_anakin_api_shared SERIAL) DEPS inference_anakin_api_shared SERIAL)
# anakin mobilenet # anakin mobilenet
if(WITH_GPU) if(WITH_GPU)
set(ANAKIN_MOBILENET_INSTALL_DIR "${ANAKIN_INSTALL_DIR}/mobilenet") set(ANAKIN_MOBILENET_INSTALL_DIR "${ANAKIN_INSTALL_DIR}/mobilenet")
inference_download(${ANAKIN_MOBILENET_INSTALL_DIR} ${INFERENCE_URL} "mobilenet_v2.anakin.bin") inference_download(${ANAKIN_MOBILENET_INSTALL_DIR} ${INFERENCE_URL} "mobilenet_v2.anakin.bin")
cc_test(test_anakin_mobilenet SRCS anakin_mobilenet_tester.cc cc_test(test_anakin_mobilenet SRCS anakin_mobilenet_tester.cc
ARGS --model=${ANAKIN_MOBILENET_INSTALL_DIR}/mobilenet_v2.anakin.bin ARGS --model=${ANAKIN_MOBILENET_INSTALL_DIR}/mobilenet_v2.anakin.bin
DEPS inference_anakin_api_shared dynload_cuda SERIAL) DEPS inference_anakin_api_shared dynload_cuda SERIAL)
endif() endif()
...@@ -109,6 +113,6 @@ if(WITH_GPU AND TENSORRT_FOUND) ...@@ -109,6 +113,6 @@ if(WITH_GPU AND TENSORRT_FOUND)
inference_download_and_uncompress(${TRT_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "trt_test_models.tar.gz") inference_download_and_uncompress(${TRT_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "trt_test_models.tar.gz")
endif() endif()
inference_analysis_test(test_trt_models SRCS trt_models_tester.cc inference_analysis_test(test_trt_models SRCS trt_models_tester.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} analysis ${analysis_deps} ir_pass_manager analysis_predictor EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_test_models SERIAL) ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_test_models SERIAL)
endif() endif()
...@@ -145,5 +145,3 @@ TEST(TensorRT_mobilenet, analysis) { ...@@ -145,5 +145,3 @@ TEST(TensorRT_mobilenet, analysis) {
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
USE_PASS(tensorrt_subgraph_pass);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册