提交 c45ac9fe 编写于 作者: M MRXLT

trt code

上级 d198eb35
...@@ -31,10 +31,10 @@ message( "WITH_GPU = ${WITH_GPU}") ...@@ -31,10 +31,10 @@ message( "WITH_GPU = ${WITH_GPU}")
# Paddle Version should be one of: # Paddle Version should be one of:
# latest: latest develop build # latest: latest develop build
# version number like 1.5.2 # version number like 1.5.2
SET(PADDLE_VERSION "1.7.2") SET(PADDLE_VERSION "1.8.1")
if (WITH_GPU) if (WITH_GPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda${CUDA_VERSION_MAJOR}-cudnn7-avx-mkl") SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10-cudnn7-avx-mkl")
else() else()
if (WITH_AVX) if (WITH_AVX)
if (WITH_MKLML) if (WITH_MKLML)
...@@ -92,8 +92,15 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib) ...@@ -92,8 +92,15 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)
ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL) ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a) SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
ADD_LIBRARY(paddle_fluid STATIC IMPORTED GLOBAL) ADD_LIBRARY(paddle_fluid SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a) SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.so)
ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET nvinfer PROPERTY IMPORTED_LOCATION /paddle/third_party/TensorRT-6.0.1.5/lib/libnvinfer.so)
ADD_LIBRARY(nvinfer_plugin SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET nvinfer_plugin PROPERTY IMPORTED_LOCATION /paddle/third_party/TensorRT-6.0.1.5/lib/libnvinfer_plugin.so)
ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL) ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xxhash/lib/libxxhash.a) SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xxhash/lib/libxxhash.a)
...@@ -101,4 +108,4 @@ SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/thir ...@@ -101,4 +108,4 @@ SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/thir
LIST(APPEND external_project_dependencies paddle) LIST(APPEND external_project_dependencies paddle)
LIST(APPEND paddle_depend_libs LIST(APPEND paddle_depend_libs
xxhash) xxhash nvinfer nvinfer_plugin)
...@@ -553,7 +553,9 @@ class CloneDBReloadableInferEngine ...@@ -553,7 +553,9 @@ class CloneDBReloadableInferEngine
}; };
template <typename FluidFamilyCore> template <typename FluidFamilyCore>
class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> { // class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore>
// {
class FluidInferEngine : public DBReloadableInferEngine<FluidFamilyCore> {
public: public:
FluidInferEngine() {} FluidInferEngine() {}
~FluidInferEngine() {} ~FluidInferEngine() {}
...@@ -603,13 +605,13 @@ class VersionedInferEngine : public InferEngine { ...@@ -603,13 +605,13 @@ class VersionedInferEngine : public InferEngine {
LOG(ERROR) << "Failed generate engine with type:" << engine_type; LOG(ERROR) << "Failed generate engine with type:" << engine_type;
return -1; return -1;
} }
VLOG(2) << "FLAGS_logtostderr " << FLAGS_logtostderr; VLOG(2) << "FLGS_logtostderr " << FLAGS_logtostderr;
int tmp = FLAGS_logtostderr; int tmp = FLAGS_logtostderr;
if (engine->proc_initialize(conf, version) != 0) { if (engine->proc_initialize(conf, version) != 0) {
LOG(ERROR) << "Failed initialize engine, type:" << engine_type; LOG(ERROR) << "Failed initialize engine, type:" << engine_type;
return -1; return -1;
} }
VLOG(2) << "FLAGS_logtostderr " << FLAGS_logtostderr; VLOG(2) << "FLGS_logtostderr " << FLAGS_logtostderr;
FLAGS_logtostderr = tmp; FLAGS_logtostderr = tmp;
auto r = _versions.insert(std::make_pair(engine->version(), engine)); auto r = _versions.insert(std::make_pair(engine->version(), engine));
if (!r.second) { if (!r.second) {
......
...@@ -190,7 +190,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { ...@@ -190,7 +190,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
paddle::AnalysisConfig analysis_config; paddle::AnalysisConfig analysis_config;
analysis_config.SetModel(data_path); analysis_config.SetModel(data_path);
analysis_config.EnableUseGpu(100, FLAGS_gpuid); analysis_config.EnableUseGpu(1500, FLAGS_gpuid);
analysis_config.SwitchSpecifyInputNames(true); analysis_config.SwitchSpecifyInputNames(true);
analysis_config.SetCpuMathLibraryNumThreads(1); analysis_config.SetCpuMathLibraryNumThreads(1);
...@@ -198,11 +198,58 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { ...@@ -198,11 +198,58 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
analysis_config.EnableMemoryOptim(); analysis_config.EnableMemoryOptim();
} }
if (params.enable_ir_optimization()) { /*
analysis_config.SwitchIrOptim(true); if (params.enable_ir_optimization()) {
} else { analysis_config.SwitchIrOptim(true);
analysis_config.SwitchIrOptim(false); } else {
} analysis_config.SwitchIrOptim(false);
}
*/
int min_seq_len = 1;
int max_seq_len = 512;
int opt_seq_len = 128;
int head_number = 12;
int batch = 50;
std::vector<int> min_in_shape = {batch, min_seq_len, 1};
std::vector<int> max_in_shape = {batch, max_seq_len, 1};
std::vector<int> opt_in_shape = {batch, opt_seq_len, 1};
std::string input1_name = "src_text_a_ids";
std::string input2_name = "pos_text_a_ids";
std::string input3_name = "sent_text_a_ids";
std::string input4_name = "stack_0.tmp_0";
std::map<std::string, std::vector<int>> min_input_shape = {
{input1_name, min_in_shape},
{input2_name, min_in_shape},
{input3_name, min_in_shape},
{input4_name, {batch, head_number, min_seq_len, min_seq_len}},
};
std::map<std::string, std::vector<int>> max_input_shape = {
{input1_name, max_in_shape},
{input2_name, max_in_shape},
{input3_name, max_in_shape},
{input4_name, {batch, head_number, max_seq_len, max_seq_len}},
};
std::map<std::string, std::vector<int>> opt_input_shape = {
{input1_name, opt_in_shape},
{input2_name, opt_in_shape},
{input3_name, opt_in_shape},
{input4_name, {batch, head_number, opt_seq_len, opt_seq_len}},
};
analysis_config.EnableTensorRtEngine(
1 << 30,
batch,
5,
paddle::AnalysisConfig::Precision::kHalf,
true,
true);
analysis_config.SetTRTDynamicShapeInfo(
min_input_shape, max_input_shape, opt_input_shape);
AutoLock lock(GlobalPaddleCreateMutex::instance()); AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = _core =
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册