提交 c45ac9fe 编写于 作者: M MRXLT

trt code

上级 d198eb35
......@@ -31,10 +31,10 @@ message( "WITH_GPU = ${WITH_GPU}")
# Paddle Version should be one of:
# latest: latest develop build
# version number like 1.5.2
SET(PADDLE_VERSION "1.7.2")
SET(PADDLE_VERSION "1.8.1")
if (WITH_GPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda${CUDA_VERSION_MAJOR}-cudnn7-avx-mkl")
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10-cudnn7-avx-mkl")
else()
if (WITH_AVX)
if (WITH_MKLML)
......@@ -92,8 +92,15 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)
ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
ADD_LIBRARY(paddle_fluid STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a)
ADD_LIBRARY(paddle_fluid SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.so)
ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET nvinfer PROPERTY IMPORTED_LOCATION /paddle/third_party/TensorRT-6.0.1.5/lib/libnvinfer.so)
ADD_LIBRARY(nvinfer_plugin SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET nvinfer_plugin PROPERTY IMPORTED_LOCATION /paddle/third_party/TensorRT-6.0.1.5/lib/libnvinfer_plugin.so)
ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xxhash/lib/libxxhash.a)
......@@ -101,4 +108,4 @@ SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/thir
LIST(APPEND external_project_dependencies paddle)
LIST(APPEND paddle_depend_libs
xxhash)
xxhash nvinfer nvinfer_plugin)
......@@ -553,7 +553,9 @@ class CloneDBReloadableInferEngine
};
template <typename FluidFamilyCore>
class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
// class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore>
// {
class FluidInferEngine : public DBReloadableInferEngine<FluidFamilyCore> {
public:
FluidInferEngine() {}
~FluidInferEngine() {}
......@@ -603,13 +605,13 @@ class VersionedInferEngine : public InferEngine {
LOG(ERROR) << "Failed generate engine with type:" << engine_type;
return -1;
}
VLOG(2) << "FLAGS_logtostderr " << FLAGS_logtostderr;
VLOG(2) << "FLGS_logtostderr " << FLAGS_logtostderr;
int tmp = FLAGS_logtostderr;
if (engine->proc_initialize(conf, version) != 0) {
LOG(ERROR) << "Failed initialize engine, type:" << engine_type;
return -1;
}
VLOG(2) << "FLAGS_logtostderr " << FLAGS_logtostderr;
VLOG(2) << "FLGS_logtostderr " << FLAGS_logtostderr;
FLAGS_logtostderr = tmp;
auto r = _versions.insert(std::make_pair(engine->version(), engine));
if (!r.second) {
......
......@@ -190,7 +190,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
paddle::AnalysisConfig analysis_config;
analysis_config.SetModel(data_path);
analysis_config.EnableUseGpu(100, FLAGS_gpuid);
analysis_config.EnableUseGpu(1500, FLAGS_gpuid);
analysis_config.SwitchSpecifyInputNames(true);
analysis_config.SetCpuMathLibraryNumThreads(1);
......@@ -198,11 +198,58 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
analysis_config.EnableMemoryOptim();
}
/*
if (params.enable_ir_optimization()) {
analysis_config.SwitchIrOptim(true);
} else {
analysis_config.SwitchIrOptim(false);
}
*/
int min_seq_len = 1;
int max_seq_len = 512;
int opt_seq_len = 128;
int head_number = 12;
int batch = 50;
std::vector<int> min_in_shape = {batch, min_seq_len, 1};
std::vector<int> max_in_shape = {batch, max_seq_len, 1};
std::vector<int> opt_in_shape = {batch, opt_seq_len, 1};
std::string input1_name = "src_text_a_ids";
std::string input2_name = "pos_text_a_ids";
std::string input3_name = "sent_text_a_ids";
std::string input4_name = "stack_0.tmp_0";
std::map<std::string, std::vector<int>> min_input_shape = {
{input1_name, min_in_shape},
{input2_name, min_in_shape},
{input3_name, min_in_shape},
{input4_name, {batch, head_number, min_seq_len, min_seq_len}},
};
std::map<std::string, std::vector<int>> max_input_shape = {
{input1_name, max_in_shape},
{input2_name, max_in_shape},
{input3_name, max_in_shape},
{input4_name, {batch, head_number, max_seq_len, max_seq_len}},
};
std::map<std::string, std::vector<int>> opt_input_shape = {
{input1_name, opt_in_shape},
{input2_name, opt_in_shape},
{input3_name, opt_in_shape},
{input4_name, {batch, head_number, opt_seq_len, opt_seq_len}},
};
analysis_config.EnableTensorRtEngine(
1 << 30,
batch,
5,
paddle::AnalysisConfig::Precision::kHalf,
true,
true);
analysis_config.SetTRTDynamicShapeInfo(
min_input_shape, max_input_shape, opt_input_shape);
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core =
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册