diff --git a/paddle_inference/paddle/include/paddle_engine.h b/paddle_inference/paddle/include/paddle_engine.h index 7cc8120f4eb818905c303b22a0b00d6b205bddb4..c76147b6842b9f01b3b4f65785102766d3940aef 100644 --- a/paddle_inference/paddle/include/paddle_engine.h +++ b/paddle_inference/paddle/include/paddle_engine.h @@ -266,6 +266,7 @@ class PaddleInferenceEngine : public EngineCore { if (engine_conf.has_use_xpu() && engine_conf.use_xpu()) { // 2 MB l3 cache config.EnableXpu(2 * 1024 * 1024); + config.SetXpuDeviceId(gpu_id); } if (engine_conf.has_enable_memory_optimization() && diff --git a/python/paddle_serving_app/local_predict.py b/python/paddle_serving_app/local_predict.py index afe4ba62d69850482e82ba97d43ac747e0f69aaf..7de419530462b59f733f6ecc81e8b2fd9ce61b80 100644 --- a/python/paddle_serving_app/local_predict.py +++ b/python/paddle_serving_app/local_predict.py @@ -219,6 +219,7 @@ class LocalPredictor(object): if use_xpu: # 2MB l3 cache config.enable_xpu(8 * 1024 * 1024) + config.set_xpu_device_id(gpu_id) # set cpu low precision if not use_gpu and not use_lite: if precision_type == paddle_infer.PrecisionType.Int8: diff --git a/python/pipeline/local_service_handler.py b/python/pipeline/local_service_handler.py index d04b96547e9fb2f7fa35d0983b6cb046f505e698..d9df5e3091053a62c98fd108a5985a1e518a7767 100644 --- a/python/pipeline/local_service_handler.py +++ b/python/pipeline/local_service_handler.py @@ -280,6 +280,10 @@ class LocalServiceHandler(object): server.set_gpuid(gpuid) # TODO: support arm or arm + xpu later server.set_device(self._device_name) + if self._use_xpu: + server.set_xpu() + if self._use_lite: + server.set_lite() server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num)