未验证 提交 6042fcd8 编写于 作者: D duanyanhui 提交者: GitHub

[npu-tipc] fix npu tipc (#8196)

* add npu inference support

* change aligned=false for npu

* fix typo
上级 d0076cb9
......@@ -1315,7 +1315,7 @@ if __name__ == '__main__':
parser = argsparser()
FLAGS = parser.parse_args()
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
], "device should be CPU, GPU, XPU or NPU"
main()
......@@ -41,7 +41,7 @@ class SkeletonActionRecognizer(Detector):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
......@@ -285,7 +285,7 @@ class DetActionRecognizer(object):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
......@@ -454,7 +454,7 @@ class ClsActionRecognizer(AttrDetector):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
......@@ -684,8 +684,8 @@ if __name__ == '__main__':
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
], "device should be CPU, GPU, NPU or XPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
main()
......@@ -42,7 +42,7 @@ class AttrDetector(Detector):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
......@@ -341,8 +341,8 @@ if __name__ == '__main__':
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
], "device should be CPU, GPU, XPU or NPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
main()
......@@ -32,7 +32,7 @@ class ReID(object):
Args:
pred_config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of per batch in inference, default 50 means at most
50 sub images can be made a batch and send into ReID model
......
......@@ -47,7 +47,7 @@ class VideoActionRecognizer(object):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
......@@ -105,6 +105,10 @@ class VideoActionRecognizer(object):
if device == "GPU" or device == "gpu":
self.config.enable_use_gpu(8000, 0)
elif device == "XPU" or device == "xpu":
self.config.enable_xpu(10 * 1024 * 1024)
elif device == "NPU" or device == "npu":
self.config.enable_custom_device('npu')
else:
self.config.disable_gpu()
if self.enable_mkldnn:
......@@ -308,7 +312,7 @@ if __name__ == '__main__':
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
], "device should be CPU, GPU, XPU or NPU"
main()
......@@ -41,7 +41,7 @@ class VehicleAttr(AttrDetector):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
......@@ -143,8 +143,8 @@ if __name__ == '__main__':
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
], "device should be CPU, GPU, NPU or XPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
main()
......@@ -325,7 +325,7 @@ if __name__ == '__main__':
parser = argsparser()
FLAGS = parser.parse_args()
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
], "device should be CPU, GPU, NPU or XPU"
main()
......@@ -70,7 +70,7 @@ class Detector(object):
Args:
pred_config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
......@@ -400,7 +400,7 @@ def load_predictor(model_dir,
"""set AnalysisConfig, generate AnalysisPredictor
Args:
model_dir (str): root path of __model__ and __params__
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16/trt_int8)
use_dynamic_shape (bool): use dynamic shape or not
trt_min_shape (int): min shape for dynamic shape in trt
......@@ -432,8 +432,13 @@ def load_predictor(model_dir,
# optimize graph and fuse op
config.switch_ir_optim(True)
elif device == 'XPU':
config.enable_lite_engine()
if config.lite_engine_enabled():
config.enable_lite_engine()
config.enable_xpu(10 * 1024 * 1024)
elif device == 'NPU':
if config.lite_engine_enabled():
config.enable_lite_engine()
config.enable_custom_device('npu')
else:
config.disable_gpu()
config.set_cpu_math_library_num_threads(cpu_threads)
......
......@@ -50,7 +50,7 @@ class KeyPointDetector(Detector):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
......@@ -408,8 +408,8 @@ if __name__ == '__main__':
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
], "device should be CPU, GPU, XPU or NPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
main()
......@@ -65,7 +65,7 @@ class CenterTrack(Detector):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
......@@ -130,7 +130,7 @@ class CenterTrack(Detector):
vertical_ratio=vertical_ratio,
track_thresh=track_thresh,
pre_thresh=pre_thresh)
self.pre_image = None
def get_additional_inputs(self, dets, meta, with_hm=True):
......@@ -173,11 +173,10 @@ class CenterTrack(Detector):
#inputs = create_inputs(im, im_info)
inputs = {}
inputs['image'] = np.array((im, )).astype('float32')
inputs['im_shape'] = np.array(
(im_info['im_shape'], )).astype('float32')
inputs['im_shape'] = np.array((im_info['im_shape'], )).astype('float32')
inputs['scale_factor'] = np.array(
(im_info['scale_factor'], )).astype('float32')
inputs['trans_input'] = im_info['trans_input']
inputs['inp_width'] = im_info['inp_width']
inputs['inp_height'] = im_info['inp_height']
......@@ -185,7 +184,7 @@ class CenterTrack(Detector):
inputs['scale'] = im_info['scale']
inputs['out_height'] = im_info['out_height']
inputs['out_width'] = im_info['out_width']
if self.pre_image is None:
self.pre_image = inputs['image']
# initializing tracker for the first frame
......@@ -196,7 +195,7 @@ class CenterTrack(Detector):
# render input heatmap from tracker status
pre_hm = self.get_additional_inputs(
self.tracker.tracks, inputs, with_hm=True)
inputs['pre_hm'] = pre_hm #.to_tensor(pre_hm)
inputs['pre_hm'] = pre_hm #.to_tensor(pre_hm)
input_names = self.predictor.get_input_names()
for i in range(len(input_names)):
......@@ -256,8 +255,8 @@ class CenterTrack(Detector):
return preds
def tracking(self, inputs, det_results):
result = self.centertrack_post_process(
det_results, inputs, self.tracker.out_thresh)
result = self.centertrack_post_process(det_results, inputs,
self.tracker.out_thresh)
online_targets = self.tracker.update(result)
online_tlwhs, online_scores, online_ids = [], [], []
......@@ -292,10 +291,7 @@ class CenterTrack(Detector):
tracking_tensor = self.predictor.get_output_handle(output_names[2])
np_tracking = tracking_tensor.copy_to_cpu()
result = dict(
bboxes=np_bboxes,
cts=np_cts,
tracking=np_tracking)
result = dict(bboxes=np_bboxes, cts=np_cts, tracking=np_tracking)
return result
def predict_image(self,
......@@ -333,8 +329,8 @@ class CenterTrack(Detector):
# tracking
result_warmup = self.tracking(inputs, det_result)
self.det_times.tracking_time_s.start()
online_tlwhs, online_scores, online_ids = self.tracking(inputs,
det_result)
online_tlwhs, online_scores, online_ids = self.tracking(
inputs, det_result)
self.det_times.tracking_time_s.end()
self.det_times.img_num += 1
......@@ -358,8 +354,8 @@ class CenterTrack(Detector):
# tracking process
self.det_times.tracking_time_s.start()
online_tlwhs, online_scores, online_ids = self.tracking(inputs,
det_result)
online_tlwhs, online_scores, online_ids = self.tracking(
inputs, det_result)
self.det_times.tracking_time_s.end()
self.det_times.img_num += 1
......@@ -499,7 +495,7 @@ if __name__ == '__main__':
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
], "device should be CPU, GPU, NPU or XPU"
main()
......@@ -45,7 +45,7 @@ class JDE_Detector(Detector):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
......@@ -375,7 +375,7 @@ if __name__ == '__main__':
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
], "device should be CPU, GPU, NPU or XPU"
main()
......@@ -295,7 +295,7 @@ if __name__ == '__main__':
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
], "device should be CPU, GPU, NPU or XPU"
main()
......@@ -78,7 +78,7 @@ def argsparser():
"--device",
type=str,
default='cpu',
help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
help="Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU."
)
parser.add_argument(
"--run_benchmark",
......
......@@ -40,7 +40,7 @@ class SDE_Detector(Detector):
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
tracker_config (str): tracker config path
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
......@@ -516,7 +516,7 @@ if __name__ == '__main__':
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
], "device should be CPU, GPU, NPU or XPU"
main()
......@@ -64,7 +64,7 @@ def argsparser():
"--device",
type=str,
default='cpu',
help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
help="Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU."
)
parser.add_argument(
"--use_gpu",
......
......@@ -49,8 +49,8 @@ grep -n '.yml' $FILENAME | cut -d ":" -f 1 \
| while read line_num ; do
train_cmd=$(func_parser_value "${lines[line_num-1]}")
trainer_config=$(func_parser_config ${train_cmd})
echo ${trainer_config}
sed -i 's/use_gpu/use_npu/g' "$REPO_ROOT_PATH/$trainer_config"
sed -i 's/aligned: True/aligned: False/g' "$REPO_ROOT_PATH/$trainer_config"
# fine use_gpu in those included yaml
sub_datalinee=`cat $REPO_ROOT_PATH/$trainer_config`
IFS=$'\n'
......@@ -60,9 +60,10 @@ grep -n '.yml' $FILENAME | cut -d ":" -f 1 \
sub_config=${sub_lines[sub_line_num-1]}
dst=${#sub_config}-5
sub_path=$(func_parser_dir "${trainer_config}")
sub_config_path="${REPO_ROOT_PATH}${sub_path}/${sub_config:3:${dst}}"
echo ${sub_config_path}
sub_config_name=$(echo "$sub_config" | awk -F"'" '{ print $2 }')
sub_config_path="${REPO_ROOT_PATH}${sub_path}/${sub_config_name}"
sed -i 's/use_gpu/use_npu/g' "$sub_config_path"
sed -i 's/aligned: True/aligned: False/g' "$sub_config_path"
done
done
# pass parameters to test_train_inference_python.sh
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册