[NPU] cherry-pick fix npu tipc (#8212)

* fix npu tipc script * fix typo * fix conflict * add npu inference support * fix typo

[NPU] cherry-pick fix npu tipc (#8212)
* fix npu tipc script * fix typo * fix conflict * add npu inference support * fix typo
83136bb0 · duanyanhui · GitHub · 09a611b0 · 83136bb0 · 83136bb0
19 changed file
--- a/deploy/pipeline/pipeline.py
+++ b/deploy/pipeline/pipeline.py
@@ -1315,7 +1315,7 @@ if __name__ == '__main__':
    parser = argsparser()
    FLAGS = parser.parse_args()
    FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
-                            ], "device should be CPU, GPU or XPU"
+                            ], "device should be CPU, GPU, XPU or NPU"
    main()
--- a/deploy/pipeline/pphuman/action_infer.py
+++ b/deploy/pipeline/pphuman/action_infer.py
@@ -41,7 +41,7 @@ class SkeletonActionRecognizer(Detector):
    """
    Args:
        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
        run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
        batch_size (int): size of pre batch in inference
        trt_min_shape (int): min shape for dynamic shape in trt
@@ -285,7 +285,7 @@ class DetActionRecognizer(object):
    """
    Args:
        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
        run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
        batch_size (int): size of pre batch in inference
        trt_min_shape (int): min shape for dynamic shape in trt
@@ -454,7 +454,7 @@ class ClsActionRecognizer(AttrDetector):
    """
    Args:
        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
        run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
        batch_size (int): size of pre batch in inference
        trt_min_shape (int): min shape for dynamic shape in trt
@@ -684,8 +684,8 @@ if __name__ == '__main__':
    FLAGS = parser.parse_args()
    print_arguments(FLAGS)
    FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
-                            ], "device should be CPU, GPU or XPU"
+                            ], "device should be CPU, GPU, NPU or XPU"
    assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
    main()
--- a/deploy/pipeline/pphuman/attr_infer.py
+++ b/deploy/pipeline/pphuman/attr_infer.py
@@ -42,7 +42,7 @@ class AttrDetector(Detector):
    """
    Args:
        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
        run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
        batch_size (int): size of pre batch in inference
        trt_min_shape (int): min shape for dynamic shape in trt
@@ -341,8 +341,8 @@ if __name__ == '__main__':
    FLAGS = parser.parse_args()
    print_arguments(FLAGS)
    FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
-                            ], "device should be CPU, GPU or XPU"
+                            ], "device should be CPU, GPU, XPU or NPU"
    assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
    main()
--- a/deploy/pipeline/pphuman/reid.py
+++ b/deploy/pipeline/pphuman/reid.py
@@ -32,7 +32,7 @@ class ReID(object):
    Args:
        pred_config (object): config of model, defined by `Config(model_dir)`
        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
        run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
        batch_size (int): size of per batch in inference, default 50 means at most
            50 sub images can be made a batch and send into ReID model

--- a/deploy/pipeline/pphuman/video_action_infer.py
+++ b/deploy/pipeline/pphuman/video_action_infer.py
@@ -47,7 +47,7 @@ class VideoActionRecognizer(object):
    """
    Args:
        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
        run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
        batch_size (int): size of pre batch in inference
        trt_min_shape (int): min shape for dynamic shape in trt
@@ -105,6 +105,10 @@ class VideoActionRecognizer(object):
        if device == "GPU" or device == "gpu":
            self.config.enable_use_gpu(8000, 0)
+        elif device == "XPU" or device == "xpu":
+            self.config.enable_xpu(10 * 1024 * 1024)
+        elif device == "NPU" or device == "npu":
+            self.config.enable_custom_device('npu')
        else:
            self.config.disable_gpu()
        if self.enable_mkldnn:
@@ -308,7 +312,7 @@ if __name__ == '__main__':
    FLAGS = parser.parse_args()
    print_arguments(FLAGS)
    FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
-                            ], "device should be CPU, GPU or XPU"
+                            ], "device should be CPU, GPU, XPU or NPU"
    main()
--- a/deploy/pipeline/ppvehicle/vehicle_attr.py
+++ b/deploy/pipeline/ppvehicle/vehicle_attr.py
@@ -41,7 +41,7 @@ class VehicleAttr(AttrDetector):
    """
    Args:
        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
        run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
        batch_size (int): size of pre batch in inference
        trt_min_shape (int): min shape for dynamic shape in trt
@@ -143,8 +143,8 @@ if __name__ == '__main__':
    FLAGS = parser.parse_args()
    print_arguments(FLAGS)
    FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
-                            ], "device should be CPU, GPU or XPU"
+                            ], "device should be CPU, GPU, NPU or XPU"
    assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
    main()
--- a/deploy/pipeline/ppvehicle/vehicle_plate.py
+++ b/deploy/pipeline/ppvehicle/vehicle_plate.py
@@ -325,7 +325,7 @@ if __name__ == '__main__':
    parser = argsparser()
    FLAGS = parser.parse_args()
    FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
-                            ], "device should be CPU, GPU or XPU"
+                            ], "device should be CPU, GPU, NPU or XPU"
    main()
--- a/deploy/pptracking/python/det_infer.py
+++ b/deploy/pptracking/python/det_infer.py
@@ -70,7 +70,7 @@ class Detector(object):
    Args:
        pred_config (object): config of model, defined by `Config(model_dir)`
        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
        run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
        batch_size (int): size of pre batch in inference
        trt_min_shape (int): min shape for dynamic shape in trt
@@ -400,7 +400,7 @@ def load_predictor(model_dir,
    """set AnalysisConfig, generate AnalysisPredictor
    Args:
        model_dir (str): root path of __model__ and __params__
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
        run_mode (str): mode of running(paddle/trt_fp32/trt_fp16/trt_int8)
        use_dynamic_shape (bool): use dynamic shape or not
        trt_min_shape (int): min shape for dynamic shape in trt
@@ -432,8 +432,13 @@ def load_predictor(model_dir,
        # optimize graph and fuse op
        config.switch_ir_optim(True)
    elif device == 'XPU':
+        if config.lite_engine_enabled():
            config.enable_lite_engine()
        config.enable_xpu(10 * 1024 * 1024)
+    elif device == 'NPU':
+        if config.lite_engine_enabled():
+            config.enable_lite_engine()
+        config.enable_custom_device('npu')
    else:
        config.disable_gpu()
        config.set_cpu_math_library_num_threads(cpu_threads)

--- a/deploy/pptracking/python/mot_jde_infer.py
+++ b/deploy/pptracking/python/mot_jde_infer.py
@@ -45,7 +45,7 @@ class JDE_Detector(Detector):
    """
    Args:
        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
        run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
        batch_size (int): size of pre batch in inference
        trt_min_shape (int): min shape for dynamic shape in trt
@@ -502,7 +502,7 @@ if __name__ == '__main__':
    FLAGS = parser.parse_args()
    print_arguments(FLAGS)
    FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
-                            ], "device should be CPU, GPU or XPU"
+                            ], "device should be CPU, GPU, NPU or XPU"
    main()
--- a/deploy/pptracking/python/mot_sde_infer.py
+++ b/deploy/pptracking/python/mot_sde_infer.py
@@ -46,7 +46,7 @@ class SDE_Detector(Detector):
    Args:
        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
        tracker_config (str): tracker config path
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
        run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
        batch_size (int): size of pre batch in inference
        trt_min_shape (int): min shape for dynamic shape in trt
@@ -946,7 +946,7 @@ if __name__ == '__main__':
    FLAGS = parser.parse_args()
    print_arguments(FLAGS)
    FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
-                            ], "device should be CPU, GPU or XPU"
+                            ], "device should be CPU, GPU, NPU or XPU"
    main()
--- a/deploy/pptracking/python/mot_utils.py
+++ b/deploy/pptracking/python/mot_utils.py
@@ -64,7 +64,7 @@ def argsparser():
        "--device",
        type=str,
        default='cpu',
-        help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
+        help="Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU."
    )
    parser.add_argument(
        "--use_gpu",

--- a/deploy/python/keypoint_infer.py
+++ b/deploy/python/keypoint_infer.py
@@ -50,7 +50,7 @@ class KeyPointDetector(Detector):
    """
    Args:
        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
        run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
        batch_size (int): size of pre batch in inference
        trt_min_shape (int): min shape for dynamic shape in trt
@@ -408,8 +408,8 @@ if __name__ == '__main__':
    FLAGS = parser.parse_args()
    print_arguments(FLAGS)
    FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
-                            ], "device should be CPU, GPU or XPU"
+                            ], "device should be CPU, GPU, XPU or NPU"
    assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
    main()
--- a/deploy/python/mot_centertrack_infer.py
+++ b/deploy/python/mot_centertrack_infer.py
@@ -65,7 +65,7 @@ class CenterTrack(Detector):
    """
    Args:
        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
        run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
        batch_size (int): size of pre batch in inference
        trt_min_shape (int): min shape for dynamic shape in trt
@@ -173,8 +173,7 @@ class CenterTrack(Detector):
        #inputs = create_inputs(im, im_info)
        inputs = {}
        inputs['image'] = np.array((im, )).astype('float32')
-        inputs['im_shape'] = np.array(
+        inputs['im_shape'] = np.array((im_info['im_shape'], )).astype('float32')
-            (im_info['im_shape'], )).astype('float32')
        inputs['scale_factor'] = np.array(
            (im_info['scale_factor'], )).astype('float32')
@@ -256,8 +255,8 @@ class CenterTrack(Detector):
        return preds
    def tracking(self, inputs, det_results):
-        result = self.centertrack_post_process(
+        result = self.centertrack_post_process(det_results, inputs,
-            det_results, inputs, self.tracker.out_thresh)
+                                               self.tracker.out_thresh)
        online_targets = self.tracker.update(result)
        online_tlwhs, online_scores, online_ids = [], [], []
@@ -292,10 +291,7 @@ class CenterTrack(Detector):
            tracking_tensor = self.predictor.get_output_handle(output_names[2])
            np_tracking = tracking_tensor.copy_to_cpu()
-        result = dict(
+        result = dict(bboxes=np_bboxes, cts=np_cts, tracking=np_tracking)
-            bboxes=np_bboxes,
-            cts=np_cts,
-            tracking=np_tracking)
        return result
    def predict_image(self,
@@ -333,8 +329,8 @@ class CenterTrack(Detector):
                # tracking
                result_warmup = self.tracking(inputs, det_result)
                self.det_times.tracking_time_s.start()
-                online_tlwhs, online_scores, online_ids = self.tracking(inputs,
+                online_tlwhs, online_scores, online_ids = self.tracking(
-                    det_result)
+                    inputs, det_result)
                self.det_times.tracking_time_s.end()
                self.det_times.img_num += 1
@@ -358,8 +354,8 @@ class CenterTrack(Detector):
                # tracking process
                self.det_times.tracking_time_s.start()
-                online_tlwhs, online_scores, online_ids = self.tracking(inputs, 
+                online_tlwhs, online_scores, online_ids = self.tracking(
-                    det_result)
+                    inputs, det_result)
                self.det_times.tracking_time_s.end()
                self.det_times.img_num += 1
@@ -499,7 +495,7 @@ if __name__ == '__main__':
    FLAGS = parser.parse_args()
    print_arguments(FLAGS)
    FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
-                            ], "device should be CPU, GPU or XPU"
+                            ], "device should be CPU, GPU, NPU or XPU"
    main()
--- a/deploy/python/mot_jde_infer.py
+++ b/deploy/python/mot_jde_infer.py
@@ -45,7 +45,7 @@ class JDE_Detector(Detector):
    """
    Args:
        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
        run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
        batch_size (int): size of pre batch in inference
        trt_min_shape (int): min shape for dynamic shape in trt
@@ -375,7 +375,7 @@ if __name__ == '__main__':
    FLAGS = parser.parse_args()
    print_arguments(FLAGS)
    FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
-                            ], "device should be CPU, GPU or XPU"
+                            ], "device should be CPU, GPU, NPU or XPU"
    main()
--- a/deploy/python/mot_keypoint_unite_infer.py
+++ b/deploy/python/mot_keypoint_unite_infer.py
@@ -295,7 +295,7 @@ if __name__ == '__main__':
    FLAGS = parser.parse_args()
    print_arguments(FLAGS)
    FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
-                            ], "device should be CPU, GPU or XPU"
+                            ], "device should be CPU, GPU, NPU or XPU"
    main()
--- a/deploy/python/mot_keypoint_unite_utils.py
+++ b/deploy/python/mot_keypoint_unite_utils.py
@@ -78,7 +78,7 @@ def argsparser():
        "--device",
        type=str,
        default='cpu',
-        help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
+        help="Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU."
    )
    parser.add_argument(
        "--run_benchmark",

--- a/deploy/python/mot_sde_infer.py
+++ b/deploy/python/mot_sde_infer.py
@@ -40,7 +40,7 @@ class SDE_Detector(Detector):
    Args:
        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
        tracker_config (str): tracker config path
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
        run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
        batch_size (int): size of pre batch in inference
        trt_min_shape (int): min shape for dynamic shape in trt
@@ -516,7 +516,7 @@ if __name__ == '__main__':
    FLAGS = parser.parse_args()
    print_arguments(FLAGS)
    FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
-                            ], "device should be CPU, GPU or XPU"
+                            ], "device should be CPU, GPU, NPU or XPU"
    main()
--- a/deploy/python/utils.py
+++ b/deploy/python/utils.py
@@ -64,7 +64,7 @@ def argsparser():
        "--device",
        type=str,
        default='cpu',
-        help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
+        help="Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU."
    )
    parser.add_argument(
        "--use_gpu",

--- a/test_tipc/test_train_inference_python_npu.sh
+++ b/test_tipc/test_train_inference_python_npu.sh
@@ -33,6 +33,8 @@ FILENAME=$1
 # change gpu to npu in tipc txt configs
 sed -i "s/use_gpu:True/use_npu:True/g" $FILENAME
 sed -i "s/--device:gpu|cpu/--device:npu|cpu/g" $FILENAME
+ sed -i "s/--device:gpu/--device:npu/g" $FILENAME
+ sed -i "s/--device:cpu|gpu/--device:cpu|npu/g" $FILENAME
 sed -i "s/trainer:pact_train/trainer:norm_train/g" $FILENAME
 sed -i "s/trainer:fpgm_train/trainer:norm_train/g" $FILENAME
 sed -i "s/--slim_config _template_pact/ /g" $FILENAME
@@ -49,8 +51,8 @@ grep -n '.yml' $FILENAME  | cut -d ":" -f 1 \
 | while read line_num ; do 
    train_cmd=$(func_parser_value "${lines[line_num-1]}")
    trainer_config=$(func_parser_config ${train_cmd})
-    echo ${trainer_config}
    sed -i 's/use_gpu/use_npu/g' "$REPO_ROOT_PATH/$trainer_config"
+    sed -i 's/aligned: True/aligned: False/g' "$REPO_ROOT_PATH/$trainer_config"
    # fine use_gpu in those included yaml
    sub_datalinee=`cat $REPO_ROOT_PATH/$trainer_config`
    IFS=$'\n'
@@ -60,9 +62,10 @@ grep -n '.yml' $FILENAME  | cut -d ":" -f 1 \
        sub_config=${sub_lines[sub_line_num-1]} 
        dst=${#sub_config}-5
        sub_path=$(func_parser_dir "${trainer_config}")
-        sub_config_path="${REPO_ROOT_PATH}${sub_path}/${sub_config:3:${dst}}"
+        sub_config_name=$(echo "$sub_config" | awk -F"'" '{ print $2 }')
-        echo ${sub_config_path}
+        sub_config_path="${REPO_ROOT_PATH}${sub_path}/${sub_config_name}"
        sed -i 's/use_gpu/use_npu/g' "$sub_config_path"
+        sed -i 's/aligned: True/aligned: False/g' "$sub_config_path"
    done
 done
 # pass parameters to test_train_inference_python.sh