add xpu and npu support for keypoint_detection series. (#1656)

eddc6adf · houj04 · GitHub · 8ca88e56 · eddc6adf · eddc6adf
3 changed file
--- a/modules/image/keypoint_detection/hand_pose_localization/model.py
+++ b/modules/image/keypoint_detection/hand_pose_localization/model.py
@@ -8,9 +8,9 @@ __all__ = ['Model']
 class Model():
    # 初始化函数
-    def __init__(self, modelpath, use_gpu=False, use_mkldnn=True, combined=True):
+    def __init__(self, modelpath, use_gpu=False, use_mkldnn=True, combined=True, use_device=None):
        # 加载模型预测器
-        self.predictor = self.load_model(modelpath, use_gpu, use_mkldnn, combined)
+        self.predictor = self.load_model(modelpath, use_gpu, use_mkldnn, combined, use_device)
        # 获取模型的输入输出
        self.input_names = self.predictor.get_input_names()
@@ -18,18 +18,16 @@ class Model():
        self.input_handle = self.predictor.get_input_handle(self.input_names[0])
        self.output_handle = self.predictor.get_output_handle(self.output_names[0])
-    # 模型加载函数
+    def _get_device_id(self, places):
-    def load_model(self, modelpath, use_gpu, use_mkldnn, combined):
+        try:
-        # 对运行位置进行配置
+            places = os.environ[places]
-        if use_gpu:
+            id = int(places)
-            try:
+        except:
-                int(os.environ.get('CUDA_VISIBLE_DEVICES'))
+            id = -1
-            except Exception:
+        return id
-                print(
-                    'Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU.'
-                )
-                use_gpu = False
+    # 模型加载函数
+    def load_model(self, modelpath, use_gpu, use_mkldnn, combined, use_device):
        # 加载模型参数
        if combined:
            model = os.path.join(modelpath, "__model__")
@@ -38,13 +36,50 @@ class Model():
        else:
            config = Config(modelpath)
-        # 设置参数
+        # 对运行位置进行配置
-        if use_gpu:
+        if use_device is not None:
-            config.enable_use_gpu(100, 0)
+            if use_device == "cpu":
+                if use_mkldnn:
+                    config.enable_mkldnn()
+            elif use_device == "xpu":
+                xpu_id = self._get_device_id("XPU_VISIBLE_DEVICES")
+                if xpu_id != -1:
+                    config.enable_xpu(100)
+                else:
+                    print(
+                        'Error! Unable to use XPU. Please set the environment variables "XPU_VISIBLE_DEVICES=XPU_id" to use XPU.'
+                    )
+            elif use_device == "npu":
+                npu_id = self._get_device_id("FLAGS_selected_npus")
+                if npu_id != -1:
+                    config.enable_npu(device_id=npu_id)
+                else:
+                    print(
+                        'Error! Unable to use NPU. Please set the environment variables "FLAGS_selected_npus=NPU_id" to use NPU.'
+                    )
+            elif use_device == "gpu":
+                gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES")
+                if gpu_id != -1:
+                    config.enable_use_gpu(100, gpu_id)
+                else:
+                    print(
+                        'Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU.'
+                    )
+            else:
+                raise Exception("Unsupported device: " + use_device)
        else:
-            config.disable_gpu()
+            if use_gpu:
-            if use_mkldnn:
+                gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES")
-                config.enable_mkldnn()
+                if gpu_id != -1:
+                    config.enable_use_gpu(100, gpu_id)
+                else:
+                    print(
+                        'Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU.'
+                    )
+            else:
+                if use_mkldnn:
+                    config.enable_mkldnn()
        config.disable_glog_info()
        config.switch_ir_optim(True)
        config.enable_memory_optim()

--- a/modules/image/keypoint_detection/hand_pose_localization/module.py
+++ b/modules/image/keypoint_detection/hand_pose_localization/module.py
@@ -18,12 +18,13 @@ from hand_pose_localization.processor import base64_to_cv2, Processor
 )
 class Hand_Pose_Localization(Module):
    # 初始化函数
-    def __init__(self, name=None, use_gpu=False):
+    def __init__(self, name=None, use_gpu=False, use_device=None):
        # 设置模型路径
        self.model_path = os.path.join(self.directory, "hand_pose_localization")
        # 加载模型
-        self.model = Model(modelpath=self.model_path, use_gpu=use_gpu, use_mkldnn=False, combined=True)
+        self.model = Model(
+            modelpath=self.model_path, use_gpu=use_gpu, use_mkldnn=False, combined=True, use_device=use_device)
    # 关键点检测函数
    def keypoint_detection(self, images=None, paths=None, batch_size=1, output_dir='output', visualization=False):

--- a/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/module.py
+++ b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/module.py
@@ -9,7 +9,10 @@ import argparse
 import numpy as np
 import paddle.fluid as fluid
 import paddlehub as hub
-from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
+from paddle.inference import Config
+from paddle.inference import create_predictor
 from paddlehub.module.module import moduleinfo, runnable, serving
 from human_pose_estimation_resnet50_mpii.processor import base64_to_cv2, postprocess
@@ -30,26 +33,53 @@ class HumanPoseEstimation(hub.Module):
        self.default_pretrained_model_path = os.path.join(self.directory, "pose-resnet50-mpii-384x384")
        self._set_config()
+    def _get_device_id(self, places):
+        try:
+            places = os.environ[places]
+            id = int(places)
+        except:
+            id = -1
+        return id
    def _set_config(self):
        """
        predictor config setting
        """
-        cpu_config = AnalysisConfig(self.default_pretrained_model_path)
+        # create default cpu predictor
+        cpu_config = Config(self.default_pretrained_model_path)
        cpu_config.disable_glog_info()
        cpu_config.disable_gpu()
-        self.cpu_predictor = create_paddle_predictor(cpu_config)
+        self.cpu_predictor = create_predictor(cpu_config)
-        try:
+        # create predictors using various types of devices
-            _places = os.environ["CUDA_VISIBLE_DEVICES"]
-            int(_places[0])
+        # npu
-            use_gpu = True
+        npu_id = self._get_device_id("FLAGS_selected_npus")
-        except:
+        if npu_id != -1:
-            use_gpu = False
+            # use npu
-        if use_gpu:
+            npu_config = Config(self.default_pretrained_model_path)
-            gpu_config = AnalysisConfig(self.default_pretrained_model_path)
+            npu_config.disable_glog_info()
+            npu_config.enable_npu(device_id=npu_id)
+            self.npu_predictor = create_predictor(npu_config)
+        # gpu
+        gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES")
+        if gpu_id != -1:
+            # use gpu
+            gpu_config = Config(self.default_pretrained_model_path)
            gpu_config.disable_glog_info()
-            gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0)
+            gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=gpu_id)
-            self.gpu_predictor = create_paddle_predictor(gpu_config)
+            self.gpu_predictor = create_predictor(gpu_config)
+        # xpu
+        xpu_id = self._get_device_id("XPU_VISIBLE_DEVICES")
+        if xpu_id != -1:
+            # use xpu
+            xpu_config = Config(self.default_pretrained_model_path)
+            xpu_config.disable_glog_info()
+            xpu_config.enable_xpu(100)
+            self.xpu_predictor = create_predictor(xpu_config)
    def keypoint_detection(self,
                           images=None,
@@ -57,7 +87,8 @@ class HumanPoseEstimation(hub.Module):
                           batch_size=1,
                           use_gpu=False,
                           output_dir='output_pose',
-                           visualization=False):
+                           visualization=False,
+                           use_device=None):
        """
        API for human pose estimation and tracking.
@@ -68,12 +99,33 @@ class HumanPoseEstimation(hub.Module):
            use_gpu (bool): Whether to use gpu.
            output_dir (str): The path to store output images.
            visualization (bool): Whether to save image or not.
+            use_device (str): use cpu, gpu, xpu or npu, overwrites use_gpu flag.
        Returns:
            res (list[dict]): each element of res is a dict, keys contains 'path', 'data', the corresponding valus are:
                path (str): the path of original image.
                data (OrderedDict): The key points of human pose.
        """
+        # real predictor to use
+        if use_device is not None:
+            if use_device == "cpu":
+                predictor = self.cpu_predictor
+            elif use_device == "xpu":
+                predictor = self.xpu_predictor
+            elif use_device == "npu":
+                predictor = self.npu_predictor
+            elif use_device == "gpu":
+                predictor = self.gpu_predictor
+            else:
+                raise Exception("Unsupported device: " + use_device)
+        else:
+            # use_device is not set, therefore follow use_gpu
+            if use_gpu:
+                predictor = self.gpu_predictor
+            else:
+                predictor = self.cpu_predictor
        all_data = list()
        for yield_data in reader(images, paths):
            all_data.append(yield_data)
@@ -92,9 +144,18 @@ class HumanPoseEstimation(hub.Module):
                    pass
            # feed batch image
            batch_image = np.array([data['image'] for data in batch_data])
-            batch_image = PaddleTensor(batch_image.copy())
-            output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run([batch_image])
+            input_names = predictor.get_input_names()
-            output = np.expand_dims(output[0].as_ndarray(), axis=1)
+            input_tensor = predictor.get_input_handle(input_names[0])
+            input_tensor.reshape(batch_image.shape)
+            input_tensor.copy_from_cpu(batch_image.copy())
+            predictor.run()
+            output_names = predictor.get_output_names()
+            output_handle = predictor.get_output_handle(output_names[0])
+            predictor_output = output_handle.copy_to_cpu()
+            output = np.expand_dims(predictor_output, axis=1)
            # postprocess one by one
            for i in range(len(batch_data)):
                out = postprocess(
@@ -157,7 +218,8 @@ class HumanPoseEstimation(hub.Module):
            batch_size=args.batch_size,
            use_gpu=args.use_gpu,
            output_dir=args.output_dir,
-            visualization=args.visualization)
+            visualization=args.visualization,
+            use_device=args.use_device)
        return results
    def add_module_config_arg(self):
@@ -171,6 +233,10 @@ class HumanPoseEstimation(hub.Module):
        self.arg_config_group.add_argument(
            '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.")
        self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.")
+        self.arg_config_group.add_argument(
+            '--use_device',
+            choices=["cpu", "gpu", "xpu", "npu"],
+            help="use cpu, gpu, xpu or npu. overwrites use_gpu flag.")
    def add_module_input_arg(self):
        """