From 46d7a8da57453c26521660208fa90bb6826c3ad1 Mon Sep 17 00:00:00 2001 From: MRXLT Date: Tue, 12 May 2020 11:52:09 +0800 Subject: [PATCH] add ir optim for cpu --- core/configure/proto/server_configure.proto | 1 + core/predictor/framework/infer.h | 14 ++++++++++++++ .../include/fluid_cpu_engine.h | 6 ++++++ .../include/fluid_gpu_engine.h | 6 ++++++ python/paddle_serving_server/__init__.py | 7 ++++++- python/paddle_serving_server/serve.py | 4 ++++ 6 files changed, 37 insertions(+), 1 deletion(-) diff --git a/core/configure/proto/server_configure.proto b/core/configure/proto/server_configure.proto index 4bdc2330..89560226 100644 --- a/core/configure/proto/server_configure.proto +++ b/core/configure/proto/server_configure.proto @@ -43,6 +43,7 @@ message EngineDesc { optional bool enable_memory_optimization = 13; optional bool static_optimization = 14; optional bool force_update_static_cache = 15; + optional bool enable_ir_optimization = 16; }; // model_toolkit conf diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h index 4bb3be9a..e8c0ff47 100644 --- a/core/predictor/framework/infer.h +++ b/core/predictor/framework/infer.h @@ -35,6 +35,7 @@ class InferEngineCreationParams { InferEngineCreationParams() { _path = ""; _enable_memory_optimization = false; + _enable_ir_optimization = false; _static_optimization = false; _force_update_static_cache = false; } @@ -45,10 +46,16 @@ class InferEngineCreationParams { _enable_memory_optimization = enable_memory_optimization; } + void set_enable_ir_optimization(bool enable_ir_optimization) { + _enable_ir_optimization = enable_ir_optimization; + } + bool enable_memory_optimization() const { return _enable_memory_optimization; } + bool enable_ir_optimization() const { return _enable_ir_optimization; } + void set_static_optimization(bool static_optimization = false) { _static_optimization = static_optimization; } @@ -68,6 +75,7 @@ class InferEngineCreationParams { << "model_path = " << _path << ", " << "enable_memory_optimization = " << _enable_memory_optimization << ", " + << "enable_ir_optimization = " << _enable_ir_optimization << ", " << "static_optimization = " << _static_optimization << ", " << "force_update_static_cache = " << _force_update_static_cache; } @@ -75,6 +83,7 @@ class InferEngineCreationParams { private: std::string _path; bool _enable_memory_optimization; + bool _enable_ir_optimization; bool _static_optimization; bool _force_update_static_cache; }; @@ -150,6 +159,11 @@ class ReloadableInferEngine : public InferEngine { force_update_static_cache = conf.force_update_static_cache(); } + if (conf.has_enable_ir_optimization()) { + _infer_engine_params.set_enable_ir_optimization( + conf.enable_ir_optimization()); + } + _infer_engine_params.set_path(_model_data_path); if (enable_memory_optimization) { _infer_engine_params.set_enable_memory_optimization(true); diff --git a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h index 24148e37..a4d8dda7 100644 --- a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h +++ b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h @@ -194,6 +194,12 @@ class FluidCpuAnalysisDirCore : public FluidFamilyCore { analysis_config.EnableMemoryOptim(); } + if (params.enable_ir_optimization()) { + analysis_config.SwitchIrOptim(true); + } else { + analysis_config.SwitchIrOptim(false); + } + AutoLock lock(GlobalPaddleCreateMutex::instance()); _core = paddle::CreatePaddlePredictor(analysis_config); diff --git a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h index a3fa3654..2fc6ae58 100644 --- a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h +++ b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h @@ -198,6 +198,12 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { analysis_config.EnableMemoryOptim(); } + if (params.enable_ir_optimization()) { + analysis_config.SwitchIrOptim(true); + } else { + analysis_config.SwitchIrOptim(false); + } + AutoLock lock(GlobalPaddleCreateMutex::instance()); _core = paddle::CreatePaddlePredictor(analysis_config); diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py index a58fb11a..f4f3248c 100644 --- a/python/paddle_serving_server/__init__.py +++ b/python/paddle_serving_server/__init__.py @@ -127,6 +127,7 @@ class Server(object): self.model_toolkit_conf = None self.resource_conf = None self.memory_optimization = False + self.ir_optimization = False self.model_conf = None self.workflow_fn = "workflow.prototxt" self.resource_fn = "resource.prototxt" @@ -175,6 +176,9 @@ class Server(object): def set_memory_optimize(self, flag=False): self.memory_optimization = flag + def set_ir_optimize(self, flag=False): + self.ir_optimization = flag + def check_local_bin(self): if "SERVING_BIN" in os.environ: self.use_local_bin = True @@ -195,6 +199,7 @@ class Server(object): engine.enable_batch_align = 0 engine.model_data_path = model_config_path engine.enable_memory_optimization = self.memory_optimization + engine.enable_ir_optimization = self.ir_optimization engine.static_optimization = False engine.force_update_static_cache = False @@ -244,7 +249,7 @@ class Server(object): workflow_oi_config_path = None if isinstance(model_config_paths, str): # If there is only one model path, use the default infer_op. - # Because there are several infer_op type, we need to find + # Because there are several infer_op type, we need to find # it from workflow_conf. default_engine_names = [ 'general_infer_0', 'general_dist_kv_infer_0', diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index 395177a8..58a13013 100644 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -41,6 +41,8 @@ def parse_args(): # pylint: disable=doc-string-missing "--device", type=str, default="cpu", help="Type of device") parser.add_argument( "--mem_optim", type=bool, default=False, help="Memory optimize") + parser.add_argument( + "--ir_optim", type=bool, default=False, help="Graph optimize") parser.add_argument( "--max_body_size", type=int, @@ -57,6 +59,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing workdir = args.workdir device = args.device mem_optim = args.mem_optim + ir_optim = args.ir_optim max_body_size = args.max_body_size if model == "": @@ -78,6 +81,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num) server.set_memory_optimize(mem_optim) + server.set_ir_optimize(ir_optim) server.set_max_body_size(max_body_size) server.set_port(port) -- GitLab