diff --git a/core/configure/proto/server_configure.proto b/core/configure/proto/server_configure.proto index 4bdc233099cffbc7949a6b5cf8627fe6461f565c..8956022685090c94be2037445c646e9fbffd1a5c 100644 --- a/core/configure/proto/server_configure.proto +++ b/core/configure/proto/server_configure.proto @@ -43,6 +43,7 @@ message EngineDesc { optional bool enable_memory_optimization = 13; optional bool static_optimization = 14; optional bool force_update_static_cache = 15; + optional bool enable_ir_optimization = 16; }; // model_toolkit conf diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h index 4bb3be9ad2c3dc7ef94a32200b014325aceedf45..e8c0ff47d86f081516a35576655f843a28b0591b 100644 --- a/core/predictor/framework/infer.h +++ b/core/predictor/framework/infer.h @@ -35,6 +35,7 @@ class InferEngineCreationParams { InferEngineCreationParams() { _path = ""; _enable_memory_optimization = false; + _enable_ir_optimization = false; _static_optimization = false; _force_update_static_cache = false; } @@ -45,10 +46,16 @@ class InferEngineCreationParams { _enable_memory_optimization = enable_memory_optimization; } + void set_enable_ir_optimization(bool enable_ir_optimization) { + _enable_ir_optimization = enable_ir_optimization; + } + bool enable_memory_optimization() const { return _enable_memory_optimization; } + bool enable_ir_optimization() const { return _enable_ir_optimization; } + void set_static_optimization(bool static_optimization = false) { _static_optimization = static_optimization; } @@ -68,6 +75,7 @@ class InferEngineCreationParams { << "model_path = " << _path << ", " << "enable_memory_optimization = " << _enable_memory_optimization << ", " + << "enable_ir_optimization = " << _enable_ir_optimization << ", " << "static_optimization = " << _static_optimization << ", " << "force_update_static_cache = " << _force_update_static_cache; } @@ -75,6 +83,7 @@ class InferEngineCreationParams { private: std::string _path; bool _enable_memory_optimization; + bool _enable_ir_optimization; bool _static_optimization; bool _force_update_static_cache; }; @@ -150,6 +159,11 @@ class ReloadableInferEngine : public InferEngine { force_update_static_cache = conf.force_update_static_cache(); } + if (conf.has_enable_ir_optimization()) { + _infer_engine_params.set_enable_ir_optimization( + conf.enable_ir_optimization()); + } + _infer_engine_params.set_path(_model_data_path); if (enable_memory_optimization) { _infer_engine_params.set_enable_memory_optimization(true); diff --git a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h index 24148e374e51cb42cb0d8d1423e0ca009e9e8294..a4d8dda71a7977185106bb1552cb8f39ef6bc50e 100644 --- a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h +++ b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h @@ -194,6 +194,12 @@ class FluidCpuAnalysisDirCore : public FluidFamilyCore { analysis_config.EnableMemoryOptim(); } + if (params.enable_ir_optimization()) { + analysis_config.SwitchIrOptim(true); + } else { + analysis_config.SwitchIrOptim(false); + } + AutoLock lock(GlobalPaddleCreateMutex::instance()); _core = paddle::CreatePaddlePredictor(analysis_config); diff --git a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h index a3fa365444a40d505b16b22e702d4a8b69699073..2fc6ae587ff26f5f05ff9332f08067ab49d06254 100644 --- a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h +++ b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h @@ -198,6 +198,12 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { analysis_config.EnableMemoryOptim(); } + if (params.enable_ir_optimization()) { + analysis_config.SwitchIrOptim(true); + } else { + analysis_config.SwitchIrOptim(false); + } + AutoLock lock(GlobalPaddleCreateMutex::instance()); _core = paddle::CreatePaddlePredictor(analysis_config); diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py index a58fb11ac3ee1fbe5086ae4381f6d6208c0c73ec..f4f3248c8174487b51554f27182c0c0cd9740d25 100644 --- a/python/paddle_serving_server/__init__.py +++ b/python/paddle_serving_server/__init__.py @@ -127,6 +127,7 @@ class Server(object): self.model_toolkit_conf = None self.resource_conf = None self.memory_optimization = False + self.ir_optimization = False self.model_conf = None self.workflow_fn = "workflow.prototxt" self.resource_fn = "resource.prototxt" @@ -175,6 +176,9 @@ class Server(object): def set_memory_optimize(self, flag=False): self.memory_optimization = flag + def set_ir_optimize(self, flag=False): + self.ir_optimization = flag + def check_local_bin(self): if "SERVING_BIN" in os.environ: self.use_local_bin = True @@ -195,6 +199,7 @@ class Server(object): engine.enable_batch_align = 0 engine.model_data_path = model_config_path engine.enable_memory_optimization = self.memory_optimization + engine.enable_ir_optimization = self.ir_optimization engine.static_optimization = False engine.force_update_static_cache = False @@ -244,7 +249,7 @@ class Server(object): workflow_oi_config_path = None if isinstance(model_config_paths, str): # If there is only one model path, use the default infer_op. - # Because there are several infer_op type, we need to find + # Because there are several infer_op type, we need to find # it from workflow_conf. default_engine_names = [ 'general_infer_0', 'general_dist_kv_infer_0', diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index 395177a8c77e5c608c2e0364b1d43ac534172d66..58a1301384a7eb9b991139b225294cbe0cb198f1 100644 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -41,6 +41,8 @@ def parse_args(): # pylint: disable=doc-string-missing "--device", type=str, default="cpu", help="Type of device") parser.add_argument( "--mem_optim", type=bool, default=False, help="Memory optimize") + parser.add_argument( + "--ir_optim", type=bool, default=False, help="Graph optimize") parser.add_argument( "--max_body_size", type=int, @@ -57,6 +59,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing workdir = args.workdir device = args.device mem_optim = args.mem_optim + ir_optim = args.ir_optim max_body_size = args.max_body_size if model == "": @@ -78,6 +81,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num) server.set_memory_optimize(mem_optim) + server.set_ir_optimize(ir_optim) server.set_max_body_size(max_body_size) server.set_port(port)