diff --git a/deploy/cpp/docs/Jetson_build.md b/deploy/cpp/docs/Jetson_build.md
index 43dae340df172f7491e5cc31dadb8f7d6c3f8fd6..7d52250a12fe0547b69a2873a55944889526f8a3 100644
--- a/deploy/cpp/docs/Jetson_build.md
+++ b/deploy/cpp/docs/Jetson_build.md
@@ -159,7 +159,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/
 | --image_dir  |  要预测的图片文件夹路径   |
 | --video_file  | 要预测的视频文件路径 |
 | --camera_id | Option | 用来预测的摄像头ID，默认为-1（表示不使用摄像头预测）|
-| --use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
+| --device  | 运行时的设备，可选择`CPU/GPU/XPU`，默认为`CPU`|
 | --gpu_id  |  指定进行推理的GPU device id(默认值为0)|
 | --run_mode | 使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
 | --batch_size |预测时的batch size，在指定`image_dir`时有效 |
@@ -183,7 +183,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/
 `样例二`:
 ```shell
 #使用 `GPU`预测视频`/root/projects/videos/test.mp4`
-./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --use_gpu=1
+./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --device=GPU
 ```
 视频文件目前支持`.mp4`格式的预测，`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
 
diff --git a/deploy/cpp/docs/linux_build.md b/deploy/cpp/docs/linux_build.md
index 5631e8a27b20cec9e280c651fb0cfbe15c0ce504..440d6b9d83686ec597d71ecc1b6b62f1dc3f55d1 100755
--- a/deploy/cpp/docs/linux_build.md
+++ b/deploy/cpp/docs/linux_build.md
@@ -101,7 +101,7 @@ make
 | --image_dir  |  要预测的图片文件夹路径   |
 | --video_file  | 要预测的视频文件路径 |
 | --camera_id | Option | 用来预测的摄像头ID，默认为-1（表示不使用摄像头预测）|
-| --use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
+| --device  | 运行时的设备，可选择`CPU/GPU/XPU`，默认为`CPU`|
 | --gpu_id  |  指定进行推理的GPU device id(默认值为0)|
 | --run_mode | 使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
 | --batch_size  | 预测时的batch size，在指定`image_dir`时有效 |
@@ -125,7 +125,7 @@ make
 `样例二`:
 ```shell
 #使用 `GPU`预测视频`/root/projects/videos/test.mp4`
-./build/main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --use_gpu=1
+./build/main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --device=GPU
 ```
 视频文件目前支持`.mp4`格式的预测，`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
 
diff --git a/deploy/cpp/docs/windows_vs2019_build.md b/deploy/cpp/docs/windows_vs2019_build.md
index 6a9f1f9d9806c0fde15c8aa472100dd30e55e967..bd65848be1b5bfd13e44437c0ec3d78e6100d7a3 100755
--- a/deploy/cpp/docs/windows_vs2019_build.md
+++ b/deploy/cpp/docs/windows_vs2019_build.md
@@ -96,7 +96,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release
 | --image_dir  |  要预测的图片文件夹路径   |
 | --video_file  | 要预测的视频文件路径 |
 | --camera_id | Option | 用来预测的摄像头ID，默认为-1（表示不使用摄像头预测）|
-| --use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
+| --device  | 运行时的设备，可选择`CPU/GPU/XPU`，默认为`CPU`|
 | --gpu_id  |  指定进行推理的GPU device id(默认值为0)|
 | --run_mode | 使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
 | --batch_size  | 预测时的batch size，在指定`image_dir`时有效 |
@@ -122,7 +122,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release
 `样例二`:
 ```shell
 #使用`GPU`测试视频 `D:\\videos\\test.mp4`  
-.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --use_gpu=1
+.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --device=GPU
 ```
 
 视频文件目前支持`.mp4`格式的预测，`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
diff --git a/deploy/cpp/include/object_detector.h b/deploy/cpp/include/object_detector.h
index 0a5c4d9a187878f22b5805d03e35ade9145d2a15..2b86ba94527d2aeefa96269a5cadcffdd7470335 100644
--- a/deploy/cpp/include/object_detector.h
+++ b/deploy/cpp/include/object_detector.h
@@ -58,7 +58,7 @@ cv::Mat VisualizeResult(const cv::Mat& img,
 class ObjectDetector {
  public:
   explicit ObjectDetector(const std::string& model_dir, 
-                          bool use_gpu=false,
+                          const std::string& device="CPU",
                           bool use_mkldnn=false,
                           int cpu_threads=1,
                           const std::string& run_mode="fluid",
@@ -68,7 +68,7 @@ class ObjectDetector {
                           const int trt_max_shape=1280,
                           const int trt_opt_shape=640,
                           bool trt_calib_mode=false) {
-    this->use_gpu_ = use_gpu;
+    this->device_ = device;
     this->gpu_id_ = gpu_id;
     this->cpu_math_library_num_threads_ = cpu_threads;
     this->use_mkldnn_ = use_mkldnn;
@@ -106,7 +106,7 @@ class ObjectDetector {
   }
 
  private:
-  bool use_gpu_ = false;
+  std::string device_ = "CPU";
   int gpu_id_ = 0;
   int cpu_math_library_num_threads_ = 1;
   bool use_mkldnn_ = false;
diff --git a/deploy/cpp/src/main.cc b/deploy/cpp/src/main.cc
index b429a8727dd340195c12484ca7c2dea449dd247d..4869d9960f8a853a79327094dc8c35936ed226e9 100644
--- a/deploy/cpp/src/main.cc
+++ b/deploy/cpp/src/main.cc
@@ -22,6 +22,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <math.h>
+#include <algorithm>
 
 #ifdef _WIN32
 #include <direct.h>
@@ -41,7 +42,8 @@ DEFINE_string(image_dir, "", "Dir of input image, `image_file` has a higher prio
 DEFINE_int32(batch_size, 1, "batch_size");
 DEFINE_string(video_file, "", "Path of input video, `video_file` or `camera_id` has a highest priority.");
 DEFINE_int32(camera_id, -1, "Device id of camera to predict");
-DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
+DEFINE_bool(use_gpu, false, "Deprecated, please use `--device` to set the device you want to run.");
+DEFINE_string(device, "CPU", "Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU.");
 DEFINE_double(threshold, 0.5, "Threshold of score.");
 DEFINE_string(output_dir, "output", "Directory of output visualization files.");
 DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)");
@@ -56,7 +58,7 @@ DEFINE_bool(trt_calib_mode, false, "If the model is produced by TRT offline quan
 
 void PrintBenchmarkLog(std::vector<double> det_time, int img_num){
   LOG(INFO) << "----------------------- Config info -----------------------";
-  LOG(INFO) << "runtime_device: " << (FLAGS_use_gpu ? "gpu" : "cpu");
+  LOG(INFO) << "runtime_device: " << FLAGS_device;
   LOG(INFO) << "ir_optim: " << "True";
   LOG(INFO) << "enable_memory_optim: " << "True";
   int has_trt = FLAGS_run_mode.find("trt");
@@ -78,7 +80,7 @@ void PrintBenchmarkLog(std::vector<double> det_time, int img_num){
   LOG(INFO) << "model_name: " << FLAGS_model_dir.substr(FLAGS_model_dir.find_last_of('/') + 1);
   LOG(INFO) << "----------------------- Perf info ------------------------";
   LOG(INFO) << "Total number of predicted data: " << img_num
-            << " and total time spent(s): "
+            << " and total time spent(ms): "
             << std::accumulate(det_time.begin(), det_time.end(), 0);
   LOG(INFO) << "preproce_time(ms): " << det_time[0] / img_num
             << ", inference_time(ms): " << det_time[1] / img_num
@@ -358,8 +360,17 @@ int main(int argc, char** argv) {
     std::cout << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'.";
     return -1;
   }
+  transform(FLAGS_device.begin(),FLAGS_device.end(),FLAGS_device.begin(),::toupper);
+  if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" || FLAGS_device == "XPU")) {
+    std::cout << "device should be 'CPU', 'GPU' or 'XPU'.";
+    return -1;
+  }
+  if (FLAGS_use_gpu) {
+    std::cout << "Deprecated, please use `--device` to set the device you want to run.";
+    return -1;
+  }
   // Load model and create a object detector
-  PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_mkldnn,
+  PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_device, FLAGS_use_mkldnn,
                         FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id,
                         FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape,
 			FLAGS_trt_calib_mode);
diff --git a/deploy/cpp/src/object_detector.cc b/deploy/cpp/src/object_detector.cc
index 1c104bf7c554d108b097a1f92107484da09419ee..1839badc6f60274587f010243b6f6752633de868 100644
--- a/deploy/cpp/src/object_detector.cc
+++ b/deploy/cpp/src/object_detector.cc
@@ -30,7 +30,7 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
   std::string prog_file = model_dir + OS_PATH_SEP + "model.pdmodel";
   std::string params_file = model_dir + OS_PATH_SEP + "model.pdiparams";
   config.SetModel(prog_file, params_file);
-  if (this->use_gpu_) {
+  if (this->device_ == "GPU") {
     config.EnableUseGpu(200, this->gpu_id_);
     config.SwitchIrOptim(true);
     // use tensorrt
@@ -73,6 +73,8 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
       }
     }
 
+  } else if (this->device_ == "XPU"){
+    config.EnableXpu(10*1024*1024);
   } else {
     config.DisableGpu();
     if (this->use_mkldnn_) {
diff --git a/deploy/python/README.md b/deploy/python/README.md
index 3f4cd5832f6142909ee755a3a7edd3767ef79fcd..131a5f112365502579a0e2487459331fb1f74215 100644
--- a/deploy/python/README.md
+++ b/deploy/python/README.md
@@ -21,26 +21,26 @@ PaddleDetection在训练过程包括网络的前向和优化器相关参数，
 在终端输入以下命令进行预测：
 
 ```bash
-python deploy/python/infer.py --model_dir=./inference/yolov3_mobilenet_v1_roadsign --image_file=./demo/road554.png --use_gpu=True
+python deploy/python/infer.py --model_dir=./inference/yolov3_mobilenet_v1_roadsign --image_file=./demo/road554.png --device=GPU
 ```
 
 参数说明如下:
 
 | 参数 | 是否必须|含义 |
 |-------|-------|----------|
-| --model_dir | Yes|上述导出的模型路径 |
-| --image_file | Option |需要预测的图片 |
+| --model_dir | Yes| 上述导出的模型路径 |
+| --image_file | Option | 需要预测的图片 |
 | --image_dir  | Option |  要预测的图片文件夹路径   |
-| --video_file | Option |需要预测的视频 |
+| --video_file | Option | 需要预测的视频 |
 | --camera_id | Option | 用来预测的摄像头ID，默认为-1(表示不使用摄像头预测，可设置为：0 - (摄像头数目-1) )，预测过程中在可视化界面按`q`退出输出预测结果到：output/output.mp4|
-| --use_gpu | No |是否GPU，默认为False|
-| --run_mode | No |使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
-| --batch_size | No |预测时的batch size，在指定`image_dir`时有效 |
-| --threshold | No|预测得分的阈值，默认为0.5|
-| --output_dir | No|可视化结果保存的根目录，默认为output/|
-| --run_benchmark | No| 是否运行benchmark，同时需指定`--image_file`或`--image_dir` |
-| --enable_mkldnn | No | CPU预测中是否开启MKLDNN加速 |
-| --cpu_threads | No| 设置cpu线程数，默认为1 |
+| --device | Option | 运行时的设备，可选择`CPU/GPU/XPU`，默认为`CPU`|
+| --run_mode | Option |使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
+| --batch_size | Option |预测时的batch size，在指定`image_dir`时有效，默认为1 |
+| --threshold | Option|预测得分的阈值，默认为0.5|
+| --output_dir | Option|可视化结果保存的根目录，默认为output/|
+| --run_benchmark | Option| 是否运行benchmark，同时需指定`--image_file`或`--image_dir`，默认为False |
+| --enable_mkldnn | Option | CPU预测中是否开启MKLDNN加速，默认为False |
+| --cpu_threads | Option| 设置cpu线程数，默认为1 |
 
 说明：
 
diff --git a/deploy/python/infer.py b/deploy/python/infer.py
index 91a515e355cd7d62c0a16350b5a46f3991e08bc3..84f52b07b763dde975d6bc59bc16c912c246dac3 100644
--- a/deploy/python/infer.py
+++ b/deploy/python/infer.py
@@ -49,7 +49,7 @@ class Detector(object):
     Args:
         config (object): config of model, defined by `Config(model_dir)`
         model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        use_gpu (bool): whether use gpu
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
         run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
         batch_size (int): size of pre batch in inference
         trt_min_shape (int): min shape for dynamic shape in trt
@@ -62,7 +62,7 @@ class Detector(object):
     def __init__(self,
                  pred_config,
                  model_dir,
-                 use_gpu=False,
+                 device='CPU',
                  run_mode='fluid',
                  batch_size=1,
                  trt_min_shape=1,
@@ -77,7 +77,7 @@ class Detector(object):
             run_mode=run_mode,
             batch_size=batch_size,
             min_subgraph_size=self.pred_config.min_subgraph_size,
-            use_gpu=use_gpu,
+            device=device,
             use_dynamic_shape=self.pred_config.use_dynamic_shape,
             trt_min_shape=trt_min_shape,
             trt_max_shape=trt_max_shape,
@@ -177,7 +177,7 @@ class DetectorSOLOv2(Detector):
     Args:
         config (object): config of model, defined by `Config(model_dir)`
         model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        use_gpu (bool): whether use gpu
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
         run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
         batch_size (int): size of pre batch in inference
         trt_min_shape (int): min shape for dynamic shape in trt
@@ -189,7 +189,7 @@ class DetectorSOLOv2(Detector):
     def __init__(self,
                  pred_config,
                  model_dir,
-                 use_gpu=False,
+                 device='CPU',
                  run_mode='fluid',
                  batch_size=1,
                  trt_min_shape=1,
@@ -204,7 +204,7 @@ class DetectorSOLOv2(Detector):
             run_mode=run_mode,
             batch_size=batch_size,
             min_subgraph_size=self.pred_config.min_subgraph_size,
-            use_gpu=use_gpu,
+            device=device,
             use_dynamic_shape=self.pred_config.use_dynamic_shape,
             trt_min_shape=trt_min_shape,
             trt_max_shape=trt_max_shape,
@@ -352,7 +352,7 @@ class PredictConfig():
 def load_predictor(model_dir,
                    run_mode='fluid',
                    batch_size=1,
-                   use_gpu=False,
+                   device='CPU',
                    min_subgraph_size=3,
                    use_dynamic_shape=False,
                    trt_min_shape=1,
@@ -364,7 +364,7 @@ def load_predictor(model_dir,
     """set AnalysisConfig, generate AnalysisPredictor
     Args:
         model_dir (str): root path of __model__ and __params__
-        use_gpu (bool): whether use gpu
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
         run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8)
         use_dynamic_shape (bool): use dynamic shape or not
         trt_min_shape (int): min shape for dynamic shape in trt
@@ -375,25 +375,22 @@ def load_predictor(model_dir,
     Returns:
         predictor (PaddlePredictor): AnalysisPredictor
     Raises:
-        ValueError: predict by TensorRT need use_gpu == True.
+        ValueError: predict by TensorRT need device == 'GPU'.
     """
-    if not use_gpu and not run_mode == 'fluid':
+    if device != 'GPU' and run_mode != 'fluid':
         raise ValueError(
-            "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
-            .format(run_mode, use_gpu))
+            "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}"
+            .format(run_mode, device))
     config = Config(
         os.path.join(model_dir, 'model.pdmodel'),
         os.path.join(model_dir, 'model.pdiparams'))
-    precision_map = {
-        'trt_int8': Config.Precision.Int8,
-        'trt_fp32': Config.Precision.Float32,
-        'trt_fp16': Config.Precision.Half
-    }
-    if use_gpu:
+    if device == 'GPU':
         # initial GPU memory(M), device ID
         config.enable_use_gpu(200, 0)
         # optimize graph and fuse op
         config.switch_ir_optim(True)
+    elif device == 'XPU':
+        config.enable_xpu(10 * 1024 * 1024)
     else:
         config.disable_gpu()
         config.set_cpu_math_library_num_threads(cpu_threads)
@@ -408,6 +405,11 @@ def load_predictor(model_dir,
                 )
                 pass
 
+    precision_map = {
+        'trt_int8': Config.Precision.Int8,
+        'trt_fp32': Config.Precision.Float32,
+        'trt_fp16': Config.Precision.Half
+    }
     if run_mode in precision_map.keys():
         config.enable_tensorrt_engine(
             workspace_size=1 << 10,
@@ -582,7 +584,7 @@ def main():
     detector = Detector(
         pred_config,
         FLAGS.model_dir,
-        use_gpu=FLAGS.use_gpu,
+        device=FLAGS.device,
         run_mode=FLAGS.run_mode,
         batch_size=FLAGS.batch_size,
         trt_min_shape=FLAGS.trt_min_shape,
@@ -595,7 +597,7 @@ def main():
         detector = DetectorSOLOv2(
             pred_config,
             FLAGS.model_dir,
-            use_gpu=FLAGS.use_gpu,
+            device=FLAGS.device,
             run_mode=FLAGS.run_mode,
             batch_size=FLAGS.batch_size,
             trt_min_shape=FLAGS.trt_min_shape,
@@ -645,5 +647,9 @@ if __name__ == '__main__':
     parser = argsparser()
     FLAGS = parser.parse_args()
     print_arguments(FLAGS)
+    FLAGS.device = FLAGS.device.upper()
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+                            ], "device should be CPU, GPU or XPU"
+    assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
 
     main()
diff --git a/deploy/python/keypoint_det_unite_infer.py b/deploy/python/keypoint_det_unite_infer.py
index 6ff335971ff0521a52dadef9155dd5a4e9deff0a..d0321873d56ff2f012b1f8acd056c8f7dad8dff8 100644
--- a/deploy/python/keypoint_det_unite_infer.py
+++ b/deploy/python/keypoint_det_unite_infer.py
@@ -156,7 +156,7 @@ def main():
     detector = Detector(
         pred_config,
         FLAGS.det_model_dir,
-        use_gpu=FLAGS.use_gpu,
+        device=FLAGS.device,
         run_mode=FLAGS.run_mode,
         trt_min_shape=FLAGS.trt_min_shape,
         trt_max_shape=FLAGS.trt_max_shape,
@@ -169,7 +169,7 @@ def main():
     topdown_keypoint_detector = KeyPoint_Detector(
         pred_config,
         FLAGS.keypoint_model_dir,
-        use_gpu=FLAGS.use_gpu,
+        device=FLAGS.device,
         run_mode=FLAGS.run_mode,
         trt_min_shape=FLAGS.trt_min_shape,
         trt_max_shape=FLAGS.trt_max_shape,
@@ -193,5 +193,8 @@ if __name__ == '__main__':
     parser = argsparser()
     FLAGS = parser.parse_args()
     print_arguments(FLAGS)
+    FLAGS.device = FLAGS.device.upper()
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+                            ], "device should be CPU, GPU or XPU"
 
     main()
diff --git a/deploy/python/keypoint_infer.py b/deploy/python/keypoint_infer.py
index c1f5e15bf0237a6a2a60e6345759afb03f4110ae..74bd84a164cb5bbad2e911c1e662edf1253c64f3 100644
--- a/deploy/python/keypoint_infer.py
+++ b/deploy/python/keypoint_infer.py
@@ -44,7 +44,7 @@ class KeyPoint_Detector(object):
     Args:
         config (object): config of model, defined by `Config(model_dir)`
         model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        use_gpu (bool): whether use gpu
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
         run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
         trt_min_shape (int): min shape for dynamic shape in trt
         trt_max_shape (int): max shape for dynamic shape in trt
@@ -56,7 +56,7 @@ class KeyPoint_Detector(object):
     def __init__(self,
                  pred_config,
                  model_dir,
-                 use_gpu=False,
+                 device='CPU',
                  run_mode='fluid',
                  trt_min_shape=1,
                  trt_max_shape=1280,
@@ -69,7 +69,7 @@ class KeyPoint_Detector(object):
             model_dir,
             run_mode=run_mode,
             min_subgraph_size=self.pred_config.min_subgraph_size,
-            use_gpu=use_gpu,
+            device=device,
             use_dynamic_shape=self.pred_config.use_dynamic_shape,
             trt_min_shape=trt_min_shape,
             trt_max_shape=trt_max_shape,
@@ -236,7 +236,7 @@ class PredictConfig_KeyPoint():
 def load_predictor(model_dir,
                    run_mode='fluid',
                    batch_size=1,
-                   use_gpu=False,
+                   device='CPU',
                    min_subgraph_size=3,
                    use_dynamic_shape=False,
                    trt_min_shape=1,
@@ -248,7 +248,7 @@ def load_predictor(model_dir,
     """set AnalysisConfig, generate AnalysisPredictor
     Args:
         model_dir (str): root path of __model__ and __params__
-        use_gpu (bool): whether use gpu
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
         run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8)
         use_dynamic_shape (bool): use dynamic shape or not
         trt_min_shape (int): min shape for dynamic shape in trt
@@ -259,25 +259,22 @@ def load_predictor(model_dir,
     Returns:
         predictor (PaddlePredictor): AnalysisPredictor
     Raises:
-        ValueError: predict by TensorRT need use_gpu == True.
+        ValueError: predict by TensorRT need device == 'GPU'.
     """
-    if not use_gpu and not run_mode == 'fluid':
+    if device != 'GPU' and run_mode != 'fluid':
         raise ValueError(
-            "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
-            .format(run_mode, use_gpu))
+            "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}"
+            .format(run_mode, device))
     config = Config(
         os.path.join(model_dir, 'model.pdmodel'),
         os.path.join(model_dir, 'model.pdiparams'))
-    precision_map = {
-        'trt_int8': Config.Precision.Int8,
-        'trt_fp32': Config.Precision.Float32,
-        'trt_fp16': Config.Precision.Half
-    }
-    if use_gpu:
+    if device == 'GPU':
         # initial GPU memory(M), device ID
         config.enable_use_gpu(200, 0)
         # optimize graph and fuse op
         config.switch_ir_optim(True)
+    elif device == 'XPU':
+        config.enable_xpu(10 * 1024 * 1024)
     else:
         config.disable_gpu()
         config.set_cpu_math_library_num_threads(cpu_threads)
@@ -292,6 +289,11 @@ def load_predictor(model_dir,
                 )
                 pass
 
+    precision_map = {
+        'trt_int8': Config.Precision.Int8,
+        'trt_fp32': Config.Precision.Float32,
+        'trt_fp16': Config.Precision.Half
+    }
     if run_mode in precision_map.keys():
         config.enable_tensorrt_engine(
             workspace_size=1 << 10,
@@ -381,7 +383,7 @@ def main():
     detector = KeyPoint_Detector(
         pred_config,
         FLAGS.model_dir,
-        use_gpu=FLAGS.use_gpu,
+        device=FLAGS.device,
         run_mode=FLAGS.run_mode,
         trt_min_shape=FLAGS.trt_min_shape,
         trt_max_shape=FLAGS.trt_max_shape,
@@ -427,5 +429,9 @@ if __name__ == '__main__':
     parser = argsparser()
     FLAGS = parser.parse_args()
     print_arguments(FLAGS)
+    FLAGS.device = FLAGS.device.upper()
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+                            ], "device should be CPU, GPU or XPU"
+    assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
 
     main()
diff --git a/deploy/python/topdown_unite_utils.py b/deploy/python/topdown_unite_utils.py
index 02d3c604975e2c0b2b5333719d1e7b0c236e0723..ff34c5e8f43281367abd4069c56c1adb495c258e 100644
--- a/deploy/python/topdown_unite_utils.py
+++ b/deploy/python/topdown_unite_utils.py
@@ -68,10 +68,11 @@ def argsparser():
         default='fluid',
         help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)")
     parser.add_argument(
-        "--use_gpu",
-        type=ast.literal_eval,
-        default=False,
-        help="Whether to predict with GPU.")
+        "--device",
+        type=str,
+        default='cpu',
+        help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
+    )
     parser.add_argument(
         "--run_benchmark",
         type=ast.literal_eval,
diff --git a/deploy/python/utils.py b/deploy/python/utils.py
index aedcab194fcbb468cef1550e3f6a77ca93c5aea1..411f55a889ce82dae83ba5f5aea3ccf37f95a80d 100644
--- a/deploy/python/utils.py
+++ b/deploy/python/utils.py
@@ -59,11 +59,17 @@ def argsparser():
         type=str,
         default='fluid',
         help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
+    )
     parser.add_argument(
         "--use_gpu",
         type=ast.literal_eval,
         default=False,
-        help="Whether to predict with GPU.")
+        help="Deprecated, please use `--device`.")
     parser.add_argument(
         "--run_benchmark",
         type=ast.literal_eval,
diff --git a/static/deploy/cpp/docs/Jetson_build.md b/static/deploy/cpp/docs/Jetson_build.md
index 386cb0bcb6a697620afe877d4584e600e022050f..a1017371c5403c5bc33f77fb926da395f1b85364 100644
--- a/static/deploy/cpp/docs/Jetson_build.md
+++ b/static/deploy/cpp/docs/Jetson_build.md
@@ -153,7 +153,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/
 | --image_file  | 要预测的图片文件路径 |
 | --video_path  | 要预测的视频文件路径 |
 | --camera_id | Option | 用来预测的摄像头ID，默认为-1（表示不使用摄像头预测）|
-| --use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
+| --device  | 运行时的设备，可选择`CPU/GPU/XPU`，默认为`CPU`|
 | --gpu_id  |  指定进行推理的GPU device id(默认值为0)|
 | --run_mode | 使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
 | --run_benchmark | 是否重复预测来进行benchmark测速 ｜
@@ -174,7 +174,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/
 `样例二`:
 ```shell
 #使用 `GPU`预测视频`/root/projects/videos/test.mp4`
-./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --use_gpu=1
+./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --device=GPU
 ```
 视频文件目前支持`.mp4`格式的预测，`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
 
diff --git a/static/deploy/cpp/docs/linux_build.md b/static/deploy/cpp/docs/linux_build.md
index c63eadefd7b9792efaa3616658a37ddb5ba09d72..67bb718f58fe90764e9bcaa03a86b9fe253e13a7 100644
--- a/static/deploy/cpp/docs/linux_build.md
+++ b/static/deploy/cpp/docs/linux_build.md
@@ -100,7 +100,7 @@ make
 | --image_file  | 要预测的图片文件路径 |
 | --video_path  | 要预测的视频文件路径 |
 | --camera_id | Option | 用来预测的摄像头ID，默认为-1（表示不使用摄像头预测）|
-| --use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
+| --device  | 运行时的设备，可选择`CPU/GPU/XPU`，默认为`CPU`|
 | --gpu_id  |  指定进行推理的GPU device id(默认值为0)|
 | --run_mode | 使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
 | --run_benchmark | 是否重复预测来进行benchmark测速 ｜
@@ -121,6 +121,6 @@ make
 `样例二`:
 ```shell
 #使用 `GPU`预测视频`/root/projects/videos/test.mp4`
-./build/main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --use_gpu=1
+./build/main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --device=GPU
 ```
 视频文件目前支持`.mp4`格式的预测，`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
diff --git a/static/deploy/cpp/docs/windows_vs2019_build.md b/static/deploy/cpp/docs/windows_vs2019_build.md
index 7964c68d623071fa5b277d32e089db9f42123195..9073871466d6b625ba4ac95abff108c028e277f6 100644
--- a/static/deploy/cpp/docs/windows_vs2019_build.md
+++ b/static/deploy/cpp/docs/windows_vs2019_build.md
@@ -95,7 +95,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release
 | --image_file  | 要预测的图片文件路径 |
 | --video_path  | 要预测的视频文件路径 |
 | --camera_id | Option | 用来预测的摄像头ID，默认为-1（表示不使用摄像头预测）|
-| --use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
+| --device  | 运行时的设备，可选择`CPU/GPU/XPU`，默认为`CPU`|
 | --gpu_id  |  指定进行推理的GPU device id(默认值为0)|
 | --run_mode | 使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
 | --run_benchmark | 是否重复预测来进行benchmark测速 |
@@ -118,7 +118,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release
 `样例二`:
 ```shell
 #使用`GPU`测试视频 `D:\\videos\\test.mp4`  
-.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --use_gpu=1
+.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --device=GPU
 ```
 
 视频文件目前支持`.mp4`格式的预测，`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
diff --git a/static/deploy/cpp/include/object_detector.h b/static/deploy/cpp/include/object_detector.h
index 55778a9d3ef29a75d3d9aecc792c251cb3b4e65f..b0173989dd80782f1243dce9250ca5f7aee634c5 100644
--- a/static/deploy/cpp/include/object_detector.h
+++ b/static/deploy/cpp/include/object_detector.h
@@ -56,20 +56,20 @@ cv::Mat VisualizeResult(const cv::Mat& img,
 class ObjectDetector {
  public:
   explicit ObjectDetector(const std::string& model_dir, 
-                          bool use_gpu=false,
+                          const std::string& device,
                           const std::string& run_mode="fluid",
                           const int gpu_id=0,
                           bool trt_calib_mode=false) {
     config_.load_config(model_dir);
     threshold_ = config_.draw_threshold_;
     preprocessor_.Init(config_.preprocess_info_, config_.arch_);
-    LoadModel(model_dir, use_gpu, config_.min_subgraph_size_, 1, run_mode, gpu_id, trt_calib_mode);
+    LoadModel(model_dir, device, config_.min_subgraph_size_, 1, run_mode, gpu_id, trt_calib_mode);
   }
 
   // Load Paddle inference model
   void LoadModel(
     const std::string& model_dir,
-    bool use_gpu,
+    const std::string& device,
     const int min_subgraph_size,
     const int batch_size = 1,
     const std::string& run_mode = "fluid",
diff --git a/static/deploy/cpp/src/main.cc b/static/deploy/cpp/src/main.cc
index 3ca468e277f7c7cf0c62daa957e8c9e470decc82..c1d5693978d1b667e344ab891f7839b50ca2b93f 100644
--- a/static/deploy/cpp/src/main.cc
+++ b/static/deploy/cpp/src/main.cc
@@ -19,6 +19,7 @@
 #include <vector>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <algorithm>
 
 #ifdef _WIN32
 #include <direct.h>
@@ -35,7 +36,8 @@
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_string(image_file, "", "Path of input image");
 DEFINE_string(video_path, "", "Path of input video");
-DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
+DEFINE_bool(use_gpu, false, "Deprecated, please use `--device` to set the device you want to run.");
+DEFINE_string(device, "CPU", "Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU.");
 DEFINE_bool(use_camera, false, "Use camera or not");
 DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16)");
 DEFINE_int32(gpu_id, 0, "Device id of GPU to execute");
@@ -204,9 +206,18 @@ int main(int argc, char** argv) {
     std::cout << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'.";
     return -1;
   }
+  transform(FLAGS_device.begin(),FLAGS_device.end(),FLAGS_device.begin(),::toupper);
+  if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" || FLAGS_device == "XPU")) {
+    std::cout << "device should be 'CPU', 'GPU' or 'XPU'.";
+    return -1;
+  }
+  if (FLAGS_use_gpu) {
+    std::cout << "Deprecated, please use `--device` to set the device you want to run.";
+    return -1;
+  }
 
   // Load model and create a object detector
-  PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_use_gpu,
+  PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_device,
     FLAGS_run_mode, FLAGS_gpu_id, FLAGS_trt_calib_mode);
   // Do inference on input video or image
   if (!FLAGS_video_path.empty() || FLAGS_use_camera) {
diff --git a/static/deploy/cpp/src/object_detector.cc b/static/deploy/cpp/src/object_detector.cc
index 4e86d8d6bd822e56682de8f65d8f2c35845a95e4..f257d8021ed10e1f1f3b5fc1a726bc5118e3b13b 100644
--- a/static/deploy/cpp/src/object_detector.cc
+++ b/static/deploy/cpp/src/object_detector.cc
@@ -21,7 +21,7 @@ namespace PaddleDetection {
 
 // Load Model and create model predictor
 void ObjectDetector::LoadModel(const std::string& model_dir,
-                               bool use_gpu,
+                               const std::string& device,
                                const int min_subgraph_size,
                                const int batch_size,
                                const std::string& run_mode,
@@ -31,7 +31,7 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
   std::string prog_file = model_dir + OS_PATH_SEP + "__model__";
   std::string params_file = model_dir + OS_PATH_SEP + "__params__";
   config.SetModel(prog_file, params_file);
-  if (use_gpu) {
+  if (device == "GPU") {
     config.EnableUseGpu(100, gpu_id);
     config.SwitchIrOptim(true);
     if (run_mode != "fluid") {
@@ -51,6 +51,8 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
           false,
           trt_calib_mode);
    }
+  } else if (device == "XPU"){
+    config.EnableXpu(10*1024*1024);
   } else {
     config.DisableGpu();
   }
diff --git a/static/deploy/python/README.md b/static/deploy/python/README.md
index 2e8d761bc58db7d29f499c6888fd2396633f3343..ad3718b93903a1f814ede4e24f689bb8fac74b8e 100644
--- a/static/deploy/python/README.md
+++ b/static/deploy/python/README.md
@@ -45,7 +45,7 @@ python deploy/python/infer.py --model_dir=/path/to/models --image_file=/path/to/
 | --image_file | Option |需要预测的图片 |
 | --video_file | Option |需要预测的视频 |
 | --camera_id | Option | 用来预测的摄像头ID，默认为-1(表示不使用摄像头预测，可设置为：0 - (摄像头数目-1) )，预测过程中在可视化界面按`q`退出输出预测结果到：output/output.mp4|
-| --use_gpu |No|是否GPU，默认为False|
+| --device | Option | 运行时的设备，可选择`CPU/GPU`，默认为`CPU`|
 | --run_mode |No|使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
 | --threshold |No|预测得分的阈值，默认为0.5|
 | --output_dir |No|可视化结果保存的根目录，默认为output/|
diff --git a/static/deploy/python/infer.py b/static/deploy/python/infer.py
index 2da31b5e56c3795ed4e8d299129f6615d529a7b2..3f6e72a484c092e8ed4efbe7578c6fbb56321b83 100644
--- a/static/deploy/python/infer.py
+++ b/static/deploy/python/infer.py
@@ -55,7 +55,7 @@ class Detector(object):
     Args:
         config (object): config of model, defined by `Config(model_dir)`
         model_dir (str): root path of __model__, __params__ and infer_cfg.yml
-        use_gpu (bool): whether use gpu
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
         run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
         threshold (float): threshold to reserve the result for output.
     """
@@ -63,20 +63,20 @@ class Detector(object):
     def __init__(self,
                  config,
                  model_dir,
-                 use_gpu=False,
+                 device='CPU',
                  run_mode='fluid',
                  threshold=0.5,
                  trt_calib_mode=False):
         self.config = config
         if self.config.use_python_inference:
             self.executor, self.program, self.fecth_targets = load_executor(
-                model_dir, use_gpu=use_gpu)
+                model_dir, device=device)
         else:
             self.predictor = load_predictor(
                 model_dir,
                 run_mode=run_mode,
                 min_subgraph_size=self.config.min_subgraph_size,
-                use_gpu=use_gpu,
+                device=device,
                 trt_calib_mode=trt_calib_mode)
 
     def preprocess(self, im):
@@ -221,14 +221,14 @@ class DetectorSOLOv2(Detector):
     def __init__(self,
                  config,
                  model_dir,
-                 use_gpu=False,
+                 device='CPU',
                  run_mode='fluid',
                  threshold=0.5,
                  trt_calib_mode=False):
         super(DetectorSOLOv2, self).__init__(
             config=config,
             model_dir=model_dir,
-            use_gpu=use_gpu,
+            device=device,
             run_mode=run_mode,
             threshold=threshold,
             trt_calib_mode=trt_calib_mode)
@@ -382,24 +382,24 @@ class Config():
 def load_predictor(model_dir,
                    run_mode='fluid',
                    batch_size=1,
-                   use_gpu=False,
+                   device='CPU',
                    min_subgraph_size=3,
                    trt_calib_mode=False):
     """set AnalysisConfig, generate AnalysisPredictor
     Args:
         model_dir (str): root path of __model__ and __params__
-        use_gpu (bool): whether use gpu
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
         trt_calib_mode (bool): If the model is produced by TRT offline quantitative
             calibration, trt_calib_mode need to set True
     Returns:
         predictor (PaddlePredictor): AnalysisPredictor
     Raises:
-        ValueError: predict by TensorRT need use_gpu == True.
+        ValueError: predict by TensorRT need device == GPU.
     """
-    if not use_gpu and not run_mode == 'fluid':
+    if device != 'GPU' and not run_mode == 'fluid':
         raise ValueError(
-            "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
-            .format(run_mode, use_gpu))
+            "Predict by TensorRT mode: {}, expect device==GPU, but device == {}"
+            .format(run_mode, device))
     precision_map = {
         'trt_int8': fluid.core.AnalysisConfig.Precision.Int8,
         'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32,
@@ -408,11 +408,13 @@ def load_predictor(model_dir,
     config = fluid.core.AnalysisConfig(
         os.path.join(model_dir, '__model__'),
         os.path.join(model_dir, '__params__'))
-    if use_gpu:
+    if device == 'GPU':
         # initial GPU memory(M), device ID
         config.enable_use_gpu(100, 0)
         # optimize graph and fuse op
         config.switch_ir_optim(True)
+    elif device == 'XPU':
+        config.enable_xpu(10 * 1024 * 1024)
     else:
         config.disable_gpu()
 
@@ -435,8 +437,8 @@ def load_predictor(model_dir,
     return predictor
 
 
-def load_executor(model_dir, use_gpu=False):
-    if use_gpu:
+def load_executor(model_dir, device='CPU'):
+    if device == 'GPU':
         place = fluid.CUDAPlace(0)
     else:
         place = fluid.CPUPlace()
@@ -539,14 +541,14 @@ def main():
     detector = Detector(
         config,
         FLAGS.model_dir,
-        use_gpu=FLAGS.use_gpu,
+        device=FLAGS.device,
         run_mode=FLAGS.run_mode,
         trt_calib_mode=FLAGS.trt_calib_mode)
     if config.arch == 'SOLOv2':
         detector = DetectorSOLOv2(
             config,
             FLAGS.model_dir,
-            use_gpu=FLAGS.use_gpu,
+            device=FLAGS.device,
             run_mode=FLAGS.run_mode,
             trt_calib_mode=FLAGS.trt_calib_mode)
     # predict from image
@@ -584,11 +586,18 @@ if __name__ == '__main__':
         type=str,
         default='fluid',
         help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
+    )
     parser.add_argument(
         "--use_gpu",
         type=ast.literal_eval,
         default=False,
-        help="Whether to predict with GPU.")
+        help="Deprecated, please use `--device` to set the device you want to run."
+    )
     parser.add_argument(
         "--run_benchmark",
         type=ast.literal_eval,
@@ -612,5 +621,9 @@ if __name__ == '__main__':
     print_arguments(FLAGS)
     if FLAGS.image_file != '' and FLAGS.video_file != '':
         assert "Cannot predict image and video at the same time"
+    FLAGS.device = FLAGS.device.upper()
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+                            ], "device should be CPU, GPU or XPU"
+    assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
 
     main()