提交 3cdad373 编写于 作者: M MRXLT

add trt

上级 460c43b8
...@@ -54,6 +54,7 @@ option(SERVER "Compile Paddle Serving Server" OFF) ...@@ -54,6 +54,7 @@ option(SERVER "Compile Paddle Serving Server" OFF)
option(APP "Compile Paddle Serving App package" OFF) option(APP "Compile Paddle Serving App package" OFF)
option(WITH_ELASTIC_CTR "Compile ELASITC-CTR solution" OFF) option(WITH_ELASTIC_CTR "Compile ELASITC-CTR solution" OFF)
option(PACK "Compile for whl" OFF) option(PACK "Compile for whl" OFF)
option(WITH_TRT "Compile Paddle Serving with TRT" OFF)
set(WITH_MKLML ${WITH_MKL}) set(WITH_MKLML ${WITH_MKL})
if (NOT DEFINED WITH_MKLDNN) if (NOT DEFINED WITH_MKLDNN)
......
...@@ -34,7 +34,10 @@ message( "WITH_GPU = ${WITH_GPU}") ...@@ -34,7 +34,10 @@ message( "WITH_GPU = ${WITH_GPU}")
SET(PADDLE_VERSION "1.8.1") SET(PADDLE_VERSION "1.8.1")
if (WITH_GPU) if (WITH_GPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10-cudnn7-avx-mkl") if (WITH_TRT)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6")
else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10-cudnn7-avx-mkl")
else() else()
if (WITH_AVX) if (WITH_AVX)
if (WITH_MKLML) if (WITH_MKLML)
......
...@@ -44,6 +44,7 @@ message EngineDesc { ...@@ -44,6 +44,7 @@ message EngineDesc {
optional bool static_optimization = 14; optional bool static_optimization = 14;
optional bool force_update_static_cache = 15; optional bool force_update_static_cache = 15;
optional bool enable_ir_optimization = 16; optional bool enable_ir_optimization = 16;
optional bool use_trt = 17;
}; };
// model_toolkit conf // model_toolkit conf
......
...@@ -178,7 +178,7 @@ class FluidGpuNativeCore : public FluidFamilyCore { ...@@ -178,7 +178,7 @@ class FluidGpuNativeCore : public FluidFamilyCore {
} }
}; };
class FluidGpuAnalysisDirCore : public FluidFamilyCore { class FluidTRTAnalysisDirCore : public FluidFamilyCore {
public: public:
int create(const predictor::InferEngineCreationParams& params) { int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path(); std::string data_path = params.get_path();
...@@ -198,13 +198,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { ...@@ -198,13 +198,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
analysis_config.EnableMemoryOptim(); analysis_config.EnableMemoryOptim();
} }
/* #if 0
if (params.enable_ir_optimization()) {
analysis_config.SwitchIrOptim(true);
} else {
analysis_config.SwitchIrOptim(false);
}
*/
int min_seq_len = 1; int min_seq_len = 1;
int max_seq_len = 512; int max_seq_len = 512;
...@@ -241,16 +235,18 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { ...@@ -241,16 +235,18 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
{input4_name, {batch, head_number, opt_seq_len, opt_seq_len}}, {input4_name, {batch, head_number, opt_seq_len, opt_seq_len}},
}; };
analysis_config.EnableTensorRtEngine(
1 << 30,
batch,
5,
paddle::AnalysisConfig::Precision::kHalf,
true,
true);
analysis_config.SetTRTDynamicShapeInfo( analysis_config.SetTRTDynamicShapeInfo(
min_input_shape, max_input_shape, opt_input_shape); min_input_shape, max_input_shape, opt_input_shape);
#endif
if (params.use_trt()) {
analysis_config.EnableTensorRtEngine(
1 << 30,
batch,
5,
paddle::AnalysisConfig::Precision::kFloat32,
true,
true);
}
AutoLock lock(GlobalPaddleCreateMutex::instance()); AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = _core =
paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config); paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
......
...@@ -73,6 +73,8 @@ def serve_args(): ...@@ -73,6 +73,8 @@ def serve_args():
default=False, default=False,
action="store_true", action="store_true",
help="Use Multi-language-service") help="Use Multi-language-service")
parser.add_argument(
"--use_trt", default=False, action="store_true", help="Use TensorRT")
return parser.parse_args() return parser.parse_args()
...@@ -195,6 +197,7 @@ class Server(object): ...@@ -195,6 +197,7 @@ class Server(object):
self.cur_path = os.getcwd() self.cur_path = os.getcwd()
self.use_local_bin = False self.use_local_bin = False
self.gpuid = 0 self.gpuid = 0
self.use_trt = False
self.model_config_paths = None # for multi-model in a workflow self.model_config_paths = None # for multi-model in a workflow
def set_max_concurrency(self, concurrency): def set_max_concurrency(self, concurrency):
...@@ -245,6 +248,9 @@ class Server(object): ...@@ -245,6 +248,9 @@ class Server(object):
def set_gpuid(self, gpuid=0): def set_gpuid(self, gpuid=0):
self.gpuid = gpuid self.gpuid = gpuid
def use_trt(self):
self.use_trt = True
def _prepare_engine(self, model_config_paths, device): def _prepare_engine(self, model_config_paths, device):
if self.model_toolkit_conf == None: if self.model_toolkit_conf == None:
self.model_toolkit_conf = server_sdk.ModelToolkitConf() self.model_toolkit_conf = server_sdk.ModelToolkitConf()
...@@ -264,6 +270,7 @@ class Server(object): ...@@ -264,6 +270,7 @@ class Server(object):
engine.enable_ir_optimization = self.ir_optimization engine.enable_ir_optimization = self.ir_optimization
engine.static_optimization = False engine.static_optimization = False
engine.force_update_static_cache = False engine.force_update_static_cache = False
engine.use_trt = self.use_trt
if device == "cpu": if device == "cpu":
engine.type = "FLUID_CPU_ANALYSIS_DIR" engine.type = "FLUID_CPU_ANALYSIS_DIR"
......
...@@ -64,6 +64,8 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss ...@@ -64,6 +64,8 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss
server.set_memory_optimize(mem_optim) server.set_memory_optimize(mem_optim)
server.set_ir_optimize(ir_optim) server.set_ir_optimize(ir_optim)
server.set_max_body_size(max_body_size) server.set_max_body_size(max_body_size)
if args.use_trt:
server.use_trt()
server.load_model_config(model) server.load_model_config(model)
server.prepare_server(workdir=workdir, port=port, device=device) server.prepare_server(workdir=workdir, port=port, device=device)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册