diff --git a/deploy/cpp/docs/Jetson_build.md b/deploy/cpp/docs/Jetson_build.md index 43dae340df172f7491e5cc31dadb8f7d6c3f8fd6..7d52250a12fe0547b69a2873a55944889526f8a3 100644 --- a/deploy/cpp/docs/Jetson_build.md +++ b/deploy/cpp/docs/Jetson_build.md @@ -159,7 +159,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/ | --image_dir | 要预测的图片文件夹路径 | | --video_file | 要预测的视频文件路径 | | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| -| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| +| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| | --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --batch_size |预测时的batch size,在指定`image_dir`时有效 | @@ -183,7 +183,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/ `样例二`: ```shell #使用 `GPU`预测视频`/root/projects/videos/test.mp4` -./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --use_gpu=1 +./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --device=GPU ``` 视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。 diff --git a/deploy/cpp/docs/linux_build.md b/deploy/cpp/docs/linux_build.md index 5631e8a27b20cec9e280c651fb0cfbe15c0ce504..440d6b9d83686ec597d71ecc1b6b62f1dc3f55d1 100755 --- a/deploy/cpp/docs/linux_build.md +++ b/deploy/cpp/docs/linux_build.md @@ -101,7 +101,7 @@ make | --image_dir | 要预测的图片文件夹路径 | | --video_file | 要预测的视频文件路径 | | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| -| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| +| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| | --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --batch_size | 预测时的batch size,在指定`image_dir`时有效 | @@ -125,7 +125,7 @@ make `样例二`: ```shell #使用 `GPU`预测视频`/root/projects/videos/test.mp4` -./build/main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --use_gpu=1 +./build/main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --device=GPU ``` 视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。 diff --git a/deploy/cpp/docs/windows_vs2019_build.md b/deploy/cpp/docs/windows_vs2019_build.md index 6a9f1f9d9806c0fde15c8aa472100dd30e55e967..bd65848be1b5bfd13e44437c0ec3d78e6100d7a3 100755 --- a/deploy/cpp/docs/windows_vs2019_build.md +++ b/deploy/cpp/docs/windows_vs2019_build.md @@ -96,7 +96,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release | --image_dir | 要预测的图片文件夹路径 | | --video_file | 要预测的视频文件路径 | | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| -| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| +| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| | --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --batch_size | 预测时的batch size,在指定`image_dir`时有效 | @@ -122,7 +122,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release `样例二`: ```shell #使用`GPU`测试视频 `D:\\videos\\test.mp4` -.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --use_gpu=1 +.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --device=GPU ``` 视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。 diff --git a/deploy/cpp/include/object_detector.h b/deploy/cpp/include/object_detector.h index 0a5c4d9a187878f22b5805d03e35ade9145d2a15..2b86ba94527d2aeefa96269a5cadcffdd7470335 100644 --- a/deploy/cpp/include/object_detector.h +++ b/deploy/cpp/include/object_detector.h @@ -58,7 +58,7 @@ cv::Mat VisualizeResult(const cv::Mat& img, class ObjectDetector { public: explicit ObjectDetector(const std::string& model_dir, - bool use_gpu=false, + const std::string& device="CPU", bool use_mkldnn=false, int cpu_threads=1, const std::string& run_mode="fluid", @@ -68,7 +68,7 @@ class ObjectDetector { const int trt_max_shape=1280, const int trt_opt_shape=640, bool trt_calib_mode=false) { - this->use_gpu_ = use_gpu; + this->device_ = device; this->gpu_id_ = gpu_id; this->cpu_math_library_num_threads_ = cpu_threads; this->use_mkldnn_ = use_mkldnn; @@ -106,7 +106,7 @@ class ObjectDetector { } private: - bool use_gpu_ = false; + std::string device_ = "CPU"; int gpu_id_ = 0; int cpu_math_library_num_threads_ = 1; bool use_mkldnn_ = false; diff --git a/deploy/cpp/src/main.cc b/deploy/cpp/src/main.cc index b429a8727dd340195c12484ca7c2dea449dd247d..4869d9960f8a853a79327094dc8c35936ed226e9 100644 --- a/deploy/cpp/src/main.cc +++ b/deploy/cpp/src/main.cc @@ -22,6 +22,7 @@ #include #include #include +#include #ifdef _WIN32 #include @@ -41,7 +42,8 @@ DEFINE_string(image_dir, "", "Dir of input image, `image_file` has a higher prio DEFINE_int32(batch_size, 1, "batch_size"); DEFINE_string(video_file, "", "Path of input video, `video_file` or `camera_id` has a highest priority."); DEFINE_int32(camera_id, -1, "Device id of camera to predict"); -DEFINE_bool(use_gpu, false, "Infering with GPU or CPU"); +DEFINE_bool(use_gpu, false, "Deprecated, please use `--device` to set the device you want to run."); +DEFINE_string(device, "CPU", "Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."); DEFINE_double(threshold, 0.5, "Threshold of score."); DEFINE_string(output_dir, "output", "Directory of output visualization files."); DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)"); @@ -56,7 +58,7 @@ DEFINE_bool(trt_calib_mode, false, "If the model is produced by TRT offline quan void PrintBenchmarkLog(std::vector det_time, int img_num){ LOG(INFO) << "----------------------- Config info -----------------------"; - LOG(INFO) << "runtime_device: " << (FLAGS_use_gpu ? "gpu" : "cpu"); + LOG(INFO) << "runtime_device: " << FLAGS_device; LOG(INFO) << "ir_optim: " << "True"; LOG(INFO) << "enable_memory_optim: " << "True"; int has_trt = FLAGS_run_mode.find("trt"); @@ -78,7 +80,7 @@ void PrintBenchmarkLog(std::vector det_time, int img_num){ LOG(INFO) << "model_name: " << FLAGS_model_dir.substr(FLAGS_model_dir.find_last_of('/') + 1); LOG(INFO) << "----------------------- Perf info ------------------------"; LOG(INFO) << "Total number of predicted data: " << img_num - << " and total time spent(s): " + << " and total time spent(ms): " << std::accumulate(det_time.begin(), det_time.end(), 0); LOG(INFO) << "preproce_time(ms): " << det_time[0] / img_num << ", inference_time(ms): " << det_time[1] / img_num @@ -358,8 +360,17 @@ int main(int argc, char** argv) { std::cout << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; return -1; } + transform(FLAGS_device.begin(),FLAGS_device.end(),FLAGS_device.begin(),::toupper); + if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" || FLAGS_device == "XPU")) { + std::cout << "device should be 'CPU', 'GPU' or 'XPU'."; + return -1; + } + if (FLAGS_use_gpu) { + std::cout << "Deprecated, please use `--device` to set the device you want to run."; + return -1; + } // Load model and create a object detector - PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_mkldnn, + PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_device, FLAGS_use_mkldnn, FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id, FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, FLAGS_trt_calib_mode); diff --git a/deploy/cpp/src/object_detector.cc b/deploy/cpp/src/object_detector.cc index 1c104bf7c554d108b097a1f92107484da09419ee..1839badc6f60274587f010243b6f6752633de868 100644 --- a/deploy/cpp/src/object_detector.cc +++ b/deploy/cpp/src/object_detector.cc @@ -30,7 +30,7 @@ void ObjectDetector::LoadModel(const std::string& model_dir, std::string prog_file = model_dir + OS_PATH_SEP + "model.pdmodel"; std::string params_file = model_dir + OS_PATH_SEP + "model.pdiparams"; config.SetModel(prog_file, params_file); - if (this->use_gpu_) { + if (this->device_ == "GPU") { config.EnableUseGpu(200, this->gpu_id_); config.SwitchIrOptim(true); // use tensorrt @@ -73,6 +73,8 @@ void ObjectDetector::LoadModel(const std::string& model_dir, } } + } else if (this->device_ == "XPU"){ + config.EnableXpu(10*1024*1024); } else { config.DisableGpu(); if (this->use_mkldnn_) { diff --git a/deploy/python/README.md b/deploy/python/README.md index 3f4cd5832f6142909ee755a3a7edd3767ef79fcd..131a5f112365502579a0e2487459331fb1f74215 100644 --- a/deploy/python/README.md +++ b/deploy/python/README.md @@ -21,26 +21,26 @@ PaddleDetection在训练过程包括网络的前向和优化器相关参数, 在终端输入以下命令进行预测: ```bash -python deploy/python/infer.py --model_dir=./inference/yolov3_mobilenet_v1_roadsign --image_file=./demo/road554.png --use_gpu=True +python deploy/python/infer.py --model_dir=./inference/yolov3_mobilenet_v1_roadsign --image_file=./demo/road554.png --device=GPU ``` 参数说明如下: | 参数 | 是否必须|含义 | |-------|-------|----------| -| --model_dir | Yes|上述导出的模型路径 | -| --image_file | Option |需要预测的图片 | +| --model_dir | Yes| 上述导出的模型路径 | +| --image_file | Option | 需要预测的图片 | | --image_dir | Option | 要预测的图片文件夹路径 | -| --video_file | Option |需要预测的视频 | +| --video_file | Option | 需要预测的视频 | | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4| -| --use_gpu | No |是否GPU,默认为False| -| --run_mode | No |使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| -| --batch_size | No |预测时的batch size,在指定`image_dir`时有效 | -| --threshold | No|预测得分的阈值,默认为0.5| -| --output_dir | No|可视化结果保存的根目录,默认为output/| -| --run_benchmark | No| 是否运行benchmark,同时需指定`--image_file`或`--image_dir` | -| --enable_mkldnn | No | CPU预测中是否开启MKLDNN加速 | -| --cpu_threads | No| 设置cpu线程数,默认为1 | +| --device | Option | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| +| --run_mode | Option |使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| +| --batch_size | Option |预测时的batch size,在指定`image_dir`时有效,默认为1 | +| --threshold | Option|预测得分的阈值,默认为0.5| +| --output_dir | Option|可视化结果保存的根目录,默认为output/| +| --run_benchmark | Option| 是否运行benchmark,同时需指定`--image_file`或`--image_dir`,默认为False | +| --enable_mkldnn | Option | CPU预测中是否开启MKLDNN加速,默认为False | +| --cpu_threads | Option| 设置cpu线程数,默认为1 | 说明: diff --git a/deploy/python/infer.py b/deploy/python/infer.py index 91a515e355cd7d62c0a16350b5a46f3991e08bc3..84f52b07b763dde975d6bc59bc16c912c246dac3 100644 --- a/deploy/python/infer.py +++ b/deploy/python/infer.py @@ -49,7 +49,7 @@ class Detector(object): Args: config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml - use_gpu (bool): whether use gpu + device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) batch_size (int): size of pre batch in inference trt_min_shape (int): min shape for dynamic shape in trt @@ -62,7 +62,7 @@ class Detector(object): def __init__(self, pred_config, model_dir, - use_gpu=False, + device='CPU', run_mode='fluid', batch_size=1, trt_min_shape=1, @@ -77,7 +77,7 @@ class Detector(object): run_mode=run_mode, batch_size=batch_size, min_subgraph_size=self.pred_config.min_subgraph_size, - use_gpu=use_gpu, + device=device, use_dynamic_shape=self.pred_config.use_dynamic_shape, trt_min_shape=trt_min_shape, trt_max_shape=trt_max_shape, @@ -177,7 +177,7 @@ class DetectorSOLOv2(Detector): Args: config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml - use_gpu (bool): whether use gpu + device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) batch_size (int): size of pre batch in inference trt_min_shape (int): min shape for dynamic shape in trt @@ -189,7 +189,7 @@ class DetectorSOLOv2(Detector): def __init__(self, pred_config, model_dir, - use_gpu=False, + device='CPU', run_mode='fluid', batch_size=1, trt_min_shape=1, @@ -204,7 +204,7 @@ class DetectorSOLOv2(Detector): run_mode=run_mode, batch_size=batch_size, min_subgraph_size=self.pred_config.min_subgraph_size, - use_gpu=use_gpu, + device=device, use_dynamic_shape=self.pred_config.use_dynamic_shape, trt_min_shape=trt_min_shape, trt_max_shape=trt_max_shape, @@ -352,7 +352,7 @@ class PredictConfig(): def load_predictor(model_dir, run_mode='fluid', batch_size=1, - use_gpu=False, + device='CPU', min_subgraph_size=3, use_dynamic_shape=False, trt_min_shape=1, @@ -364,7 +364,7 @@ def load_predictor(model_dir, """set AnalysisConfig, generate AnalysisPredictor Args: model_dir (str): root path of __model__ and __params__ - use_gpu (bool): whether use gpu + device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8) use_dynamic_shape (bool): use dynamic shape or not trt_min_shape (int): min shape for dynamic shape in trt @@ -375,25 +375,22 @@ def load_predictor(model_dir, Returns: predictor (PaddlePredictor): AnalysisPredictor Raises: - ValueError: predict by TensorRT need use_gpu == True. + ValueError: predict by TensorRT need device == 'GPU'. """ - if not use_gpu and not run_mode == 'fluid': + if device != 'GPU' and run_mode != 'fluid': raise ValueError( - "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}" - .format(run_mode, use_gpu)) + "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}" + .format(run_mode, device)) config = Config( os.path.join(model_dir, 'model.pdmodel'), os.path.join(model_dir, 'model.pdiparams')) - precision_map = { - 'trt_int8': Config.Precision.Int8, - 'trt_fp32': Config.Precision.Float32, - 'trt_fp16': Config.Precision.Half - } - if use_gpu: + if device == 'GPU': # initial GPU memory(M), device ID config.enable_use_gpu(200, 0) # optimize graph and fuse op config.switch_ir_optim(True) + elif device == 'XPU': + config.enable_xpu(10 * 1024 * 1024) else: config.disable_gpu() config.set_cpu_math_library_num_threads(cpu_threads) @@ -408,6 +405,11 @@ def load_predictor(model_dir, ) pass + precision_map = { + 'trt_int8': Config.Precision.Int8, + 'trt_fp32': Config.Precision.Float32, + 'trt_fp16': Config.Precision.Half + } if run_mode in precision_map.keys(): config.enable_tensorrt_engine( workspace_size=1 << 10, @@ -582,7 +584,7 @@ def main(): detector = Detector( pred_config, FLAGS.model_dir, - use_gpu=FLAGS.use_gpu, + device=FLAGS.device, run_mode=FLAGS.run_mode, batch_size=FLAGS.batch_size, trt_min_shape=FLAGS.trt_min_shape, @@ -595,7 +597,7 @@ def main(): detector = DetectorSOLOv2( pred_config, FLAGS.model_dir, - use_gpu=FLAGS.use_gpu, + device=FLAGS.device, run_mode=FLAGS.run_mode, batch_size=FLAGS.batch_size, trt_min_shape=FLAGS.trt_min_shape, @@ -645,5 +647,9 @@ if __name__ == '__main__': parser = argsparser() FLAGS = parser.parse_args() print_arguments(FLAGS) + FLAGS.device = FLAGS.device.upper() + assert FLAGS.device in ['CPU', 'GPU', 'XPU' + ], "device should be CPU, GPU or XPU" + assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device" main() diff --git a/deploy/python/keypoint_det_unite_infer.py b/deploy/python/keypoint_det_unite_infer.py index 6ff335971ff0521a52dadef9155dd5a4e9deff0a..d0321873d56ff2f012b1f8acd056c8f7dad8dff8 100644 --- a/deploy/python/keypoint_det_unite_infer.py +++ b/deploy/python/keypoint_det_unite_infer.py @@ -156,7 +156,7 @@ def main(): detector = Detector( pred_config, FLAGS.det_model_dir, - use_gpu=FLAGS.use_gpu, + device=FLAGS.device, run_mode=FLAGS.run_mode, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, @@ -169,7 +169,7 @@ def main(): topdown_keypoint_detector = KeyPoint_Detector( pred_config, FLAGS.keypoint_model_dir, - use_gpu=FLAGS.use_gpu, + device=FLAGS.device, run_mode=FLAGS.run_mode, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, @@ -193,5 +193,8 @@ if __name__ == '__main__': parser = argsparser() FLAGS = parser.parse_args() print_arguments(FLAGS) + FLAGS.device = FLAGS.device.upper() + assert FLAGS.device in ['CPU', 'GPU', 'XPU' + ], "device should be CPU, GPU or XPU" main() diff --git a/deploy/python/keypoint_infer.py b/deploy/python/keypoint_infer.py index c1f5e15bf0237a6a2a60e6345759afb03f4110ae..74bd84a164cb5bbad2e911c1e662edf1253c64f3 100644 --- a/deploy/python/keypoint_infer.py +++ b/deploy/python/keypoint_infer.py @@ -44,7 +44,7 @@ class KeyPoint_Detector(object): Args: config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml - use_gpu (bool): whether use gpu + device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt @@ -56,7 +56,7 @@ class KeyPoint_Detector(object): def __init__(self, pred_config, model_dir, - use_gpu=False, + device='CPU', run_mode='fluid', trt_min_shape=1, trt_max_shape=1280, @@ -69,7 +69,7 @@ class KeyPoint_Detector(object): model_dir, run_mode=run_mode, min_subgraph_size=self.pred_config.min_subgraph_size, - use_gpu=use_gpu, + device=device, use_dynamic_shape=self.pred_config.use_dynamic_shape, trt_min_shape=trt_min_shape, trt_max_shape=trt_max_shape, @@ -236,7 +236,7 @@ class PredictConfig_KeyPoint(): def load_predictor(model_dir, run_mode='fluid', batch_size=1, - use_gpu=False, + device='CPU', min_subgraph_size=3, use_dynamic_shape=False, trt_min_shape=1, @@ -248,7 +248,7 @@ def load_predictor(model_dir, """set AnalysisConfig, generate AnalysisPredictor Args: model_dir (str): root path of __model__ and __params__ - use_gpu (bool): whether use gpu + device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8) use_dynamic_shape (bool): use dynamic shape or not trt_min_shape (int): min shape for dynamic shape in trt @@ -259,25 +259,22 @@ def load_predictor(model_dir, Returns: predictor (PaddlePredictor): AnalysisPredictor Raises: - ValueError: predict by TensorRT need use_gpu == True. + ValueError: predict by TensorRT need device == 'GPU'. """ - if not use_gpu and not run_mode == 'fluid': + if device != 'GPU' and run_mode != 'fluid': raise ValueError( - "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}" - .format(run_mode, use_gpu)) + "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}" + .format(run_mode, device)) config = Config( os.path.join(model_dir, 'model.pdmodel'), os.path.join(model_dir, 'model.pdiparams')) - precision_map = { - 'trt_int8': Config.Precision.Int8, - 'trt_fp32': Config.Precision.Float32, - 'trt_fp16': Config.Precision.Half - } - if use_gpu: + if device == 'GPU': # initial GPU memory(M), device ID config.enable_use_gpu(200, 0) # optimize graph and fuse op config.switch_ir_optim(True) + elif device == 'XPU': + config.enable_xpu(10 * 1024 * 1024) else: config.disable_gpu() config.set_cpu_math_library_num_threads(cpu_threads) @@ -292,6 +289,11 @@ def load_predictor(model_dir, ) pass + precision_map = { + 'trt_int8': Config.Precision.Int8, + 'trt_fp32': Config.Precision.Float32, + 'trt_fp16': Config.Precision.Half + } if run_mode in precision_map.keys(): config.enable_tensorrt_engine( workspace_size=1 << 10, @@ -381,7 +383,7 @@ def main(): detector = KeyPoint_Detector( pred_config, FLAGS.model_dir, - use_gpu=FLAGS.use_gpu, + device=FLAGS.device, run_mode=FLAGS.run_mode, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, @@ -427,5 +429,9 @@ if __name__ == '__main__': parser = argsparser() FLAGS = parser.parse_args() print_arguments(FLAGS) + FLAGS.device = FLAGS.device.upper() + assert FLAGS.device in ['CPU', 'GPU', 'XPU' + ], "device should be CPU, GPU or XPU" + assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device" main() diff --git a/deploy/python/topdown_unite_utils.py b/deploy/python/topdown_unite_utils.py index 02d3c604975e2c0b2b5333719d1e7b0c236e0723..ff34c5e8f43281367abd4069c56c1adb495c258e 100644 --- a/deploy/python/topdown_unite_utils.py +++ b/deploy/python/topdown_unite_utils.py @@ -68,10 +68,11 @@ def argsparser(): default='fluid', help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)") parser.add_argument( - "--use_gpu", - type=ast.literal_eval, - default=False, - help="Whether to predict with GPU.") + "--device", + type=str, + default='cpu', + help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU." + ) parser.add_argument( "--run_benchmark", type=ast.literal_eval, diff --git a/deploy/python/utils.py b/deploy/python/utils.py index aedcab194fcbb468cef1550e3f6a77ca93c5aea1..411f55a889ce82dae83ba5f5aea3ccf37f95a80d 100644 --- a/deploy/python/utils.py +++ b/deploy/python/utils.py @@ -59,11 +59,17 @@ def argsparser(): type=str, default='fluid', help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU." + ) parser.add_argument( "--use_gpu", type=ast.literal_eval, default=False, - help="Whether to predict with GPU.") + help="Deprecated, please use `--device`.") parser.add_argument( "--run_benchmark", type=ast.literal_eval, diff --git a/static/deploy/cpp/docs/Jetson_build.md b/static/deploy/cpp/docs/Jetson_build.md index 386cb0bcb6a697620afe877d4584e600e022050f..a1017371c5403c5bc33f77fb926da395f1b85364 100644 --- a/static/deploy/cpp/docs/Jetson_build.md +++ b/static/deploy/cpp/docs/Jetson_build.md @@ -153,7 +153,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/ | --image_file | 要预测的图片文件路径 | | --video_path | 要预测的视频文件路径 | | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| -| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| +| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| | --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_benchmark | 是否重复预测来进行benchmark测速 | @@ -174,7 +174,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/ `样例二`: ```shell #使用 `GPU`预测视频`/root/projects/videos/test.mp4` -./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --use_gpu=1 +./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --device=GPU ``` 视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。 diff --git a/static/deploy/cpp/docs/linux_build.md b/static/deploy/cpp/docs/linux_build.md index c63eadefd7b9792efaa3616658a37ddb5ba09d72..67bb718f58fe90764e9bcaa03a86b9fe253e13a7 100644 --- a/static/deploy/cpp/docs/linux_build.md +++ b/static/deploy/cpp/docs/linux_build.md @@ -100,7 +100,7 @@ make | --image_file | 要预测的图片文件路径 | | --video_path | 要预测的视频文件路径 | | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| -| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| +| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| | --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_benchmark | 是否重复预测来进行benchmark测速 | @@ -121,6 +121,6 @@ make `样例二`: ```shell #使用 `GPU`预测视频`/root/projects/videos/test.mp4` -./build/main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --use_gpu=1 +./build/main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --device=GPU ``` 视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。 diff --git a/static/deploy/cpp/docs/windows_vs2019_build.md b/static/deploy/cpp/docs/windows_vs2019_build.md index 7964c68d623071fa5b277d32e089db9f42123195..9073871466d6b625ba4ac95abff108c028e277f6 100644 --- a/static/deploy/cpp/docs/windows_vs2019_build.md +++ b/static/deploy/cpp/docs/windows_vs2019_build.md @@ -95,7 +95,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release | --image_file | 要预测的图片文件路径 | | --video_path | 要预测的视频文件路径 | | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| -| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| +| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| | --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_benchmark | 是否重复预测来进行benchmark测速 | @@ -118,7 +118,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release `样例二`: ```shell #使用`GPU`测试视频 `D:\\videos\\test.mp4` -.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --use_gpu=1 +.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --device=GPU ``` 视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。 diff --git a/static/deploy/cpp/include/object_detector.h b/static/deploy/cpp/include/object_detector.h index 55778a9d3ef29a75d3d9aecc792c251cb3b4e65f..b0173989dd80782f1243dce9250ca5f7aee634c5 100644 --- a/static/deploy/cpp/include/object_detector.h +++ b/static/deploy/cpp/include/object_detector.h @@ -56,20 +56,20 @@ cv::Mat VisualizeResult(const cv::Mat& img, class ObjectDetector { public: explicit ObjectDetector(const std::string& model_dir, - bool use_gpu=false, + const std::string& device, const std::string& run_mode="fluid", const int gpu_id=0, bool trt_calib_mode=false) { config_.load_config(model_dir); threshold_ = config_.draw_threshold_; preprocessor_.Init(config_.preprocess_info_, config_.arch_); - LoadModel(model_dir, use_gpu, config_.min_subgraph_size_, 1, run_mode, gpu_id, trt_calib_mode); + LoadModel(model_dir, device, config_.min_subgraph_size_, 1, run_mode, gpu_id, trt_calib_mode); } // Load Paddle inference model void LoadModel( const std::string& model_dir, - bool use_gpu, + const std::string& device, const int min_subgraph_size, const int batch_size = 1, const std::string& run_mode = "fluid", diff --git a/static/deploy/cpp/src/main.cc b/static/deploy/cpp/src/main.cc index 3ca468e277f7c7cf0c62daa957e8c9e470decc82..c1d5693978d1b667e344ab891f7839b50ca2b93f 100644 --- a/static/deploy/cpp/src/main.cc +++ b/static/deploy/cpp/src/main.cc @@ -19,6 +19,7 @@ #include #include #include +#include #ifdef _WIN32 #include @@ -35,7 +36,8 @@ DEFINE_string(model_dir, "", "Path of inference model"); DEFINE_string(image_file, "", "Path of input image"); DEFINE_string(video_path, "", "Path of input video"); -DEFINE_bool(use_gpu, false, "Infering with GPU or CPU"); +DEFINE_bool(use_gpu, false, "Deprecated, please use `--device` to set the device you want to run."); +DEFINE_string(device, "CPU", "Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."); DEFINE_bool(use_camera, false, "Use camera or not"); DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16)"); DEFINE_int32(gpu_id, 0, "Device id of GPU to execute"); @@ -204,9 +206,18 @@ int main(int argc, char** argv) { std::cout << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; return -1; } + transform(FLAGS_device.begin(),FLAGS_device.end(),FLAGS_device.begin(),::toupper); + if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" || FLAGS_device == "XPU")) { + std::cout << "device should be 'CPU', 'GPU' or 'XPU'."; + return -1; + } + if (FLAGS_use_gpu) { + std::cout << "Deprecated, please use `--device` to set the device you want to run."; + return -1; + } // Load model and create a object detector - PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_use_gpu, + PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_device, FLAGS_run_mode, FLAGS_gpu_id, FLAGS_trt_calib_mode); // Do inference on input video or image if (!FLAGS_video_path.empty() || FLAGS_use_camera) { diff --git a/static/deploy/cpp/src/object_detector.cc b/static/deploy/cpp/src/object_detector.cc index 4e86d8d6bd822e56682de8f65d8f2c35845a95e4..f257d8021ed10e1f1f3b5fc1a726bc5118e3b13b 100644 --- a/static/deploy/cpp/src/object_detector.cc +++ b/static/deploy/cpp/src/object_detector.cc @@ -21,7 +21,7 @@ namespace PaddleDetection { // Load Model and create model predictor void ObjectDetector::LoadModel(const std::string& model_dir, - bool use_gpu, + const std::string& device, const int min_subgraph_size, const int batch_size, const std::string& run_mode, @@ -31,7 +31,7 @@ void ObjectDetector::LoadModel(const std::string& model_dir, std::string prog_file = model_dir + OS_PATH_SEP + "__model__"; std::string params_file = model_dir + OS_PATH_SEP + "__params__"; config.SetModel(prog_file, params_file); - if (use_gpu) { + if (device == "GPU") { config.EnableUseGpu(100, gpu_id); config.SwitchIrOptim(true); if (run_mode != "fluid") { @@ -51,6 +51,8 @@ void ObjectDetector::LoadModel(const std::string& model_dir, false, trt_calib_mode); } + } else if (device == "XPU"){ + config.EnableXpu(10*1024*1024); } else { config.DisableGpu(); } diff --git a/static/deploy/python/README.md b/static/deploy/python/README.md index 2e8d761bc58db7d29f499c6888fd2396633f3343..ad3718b93903a1f814ede4e24f689bb8fac74b8e 100644 --- a/static/deploy/python/README.md +++ b/static/deploy/python/README.md @@ -45,7 +45,7 @@ python deploy/python/infer.py --model_dir=/path/to/models --image_file=/path/to/ | --image_file | Option |需要预测的图片 | | --video_file | Option |需要预测的视频 | | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4| -| --use_gpu |No|是否GPU,默认为False| +| --device | Option | 运行时的设备,可选择`CPU/GPU`,默认为`CPU`| | --run_mode |No|使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --threshold |No|预测得分的阈值,默认为0.5| | --output_dir |No|可视化结果保存的根目录,默认为output/| diff --git a/static/deploy/python/infer.py b/static/deploy/python/infer.py index 2da31b5e56c3795ed4e8d299129f6615d529a7b2..3f6e72a484c092e8ed4efbe7578c6fbb56321b83 100644 --- a/static/deploy/python/infer.py +++ b/static/deploy/python/infer.py @@ -55,7 +55,7 @@ class Detector(object): Args: config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of __model__, __params__ and infer_cfg.yml - use_gpu (bool): whether use gpu + device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) threshold (float): threshold to reserve the result for output. """ @@ -63,20 +63,20 @@ class Detector(object): def __init__(self, config, model_dir, - use_gpu=False, + device='CPU', run_mode='fluid', threshold=0.5, trt_calib_mode=False): self.config = config if self.config.use_python_inference: self.executor, self.program, self.fecth_targets = load_executor( - model_dir, use_gpu=use_gpu) + model_dir, device=device) else: self.predictor = load_predictor( model_dir, run_mode=run_mode, min_subgraph_size=self.config.min_subgraph_size, - use_gpu=use_gpu, + device=device, trt_calib_mode=trt_calib_mode) def preprocess(self, im): @@ -221,14 +221,14 @@ class DetectorSOLOv2(Detector): def __init__(self, config, model_dir, - use_gpu=False, + device='CPU', run_mode='fluid', threshold=0.5, trt_calib_mode=False): super(DetectorSOLOv2, self).__init__( config=config, model_dir=model_dir, - use_gpu=use_gpu, + device=device, run_mode=run_mode, threshold=threshold, trt_calib_mode=trt_calib_mode) @@ -382,24 +382,24 @@ class Config(): def load_predictor(model_dir, run_mode='fluid', batch_size=1, - use_gpu=False, + device='CPU', min_subgraph_size=3, trt_calib_mode=False): """set AnalysisConfig, generate AnalysisPredictor Args: model_dir (str): root path of __model__ and __params__ - use_gpu (bool): whether use gpu + device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU trt_calib_mode (bool): If the model is produced by TRT offline quantitative calibration, trt_calib_mode need to set True Returns: predictor (PaddlePredictor): AnalysisPredictor Raises: - ValueError: predict by TensorRT need use_gpu == True. + ValueError: predict by TensorRT need device == GPU. """ - if not use_gpu and not run_mode == 'fluid': + if device != 'GPU' and not run_mode == 'fluid': raise ValueError( - "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}" - .format(run_mode, use_gpu)) + "Predict by TensorRT mode: {}, expect device==GPU, but device == {}" + .format(run_mode, device)) precision_map = { 'trt_int8': fluid.core.AnalysisConfig.Precision.Int8, 'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32, @@ -408,11 +408,13 @@ def load_predictor(model_dir, config = fluid.core.AnalysisConfig( os.path.join(model_dir, '__model__'), os.path.join(model_dir, '__params__')) - if use_gpu: + if device == 'GPU': # initial GPU memory(M), device ID config.enable_use_gpu(100, 0) # optimize graph and fuse op config.switch_ir_optim(True) + elif device == 'XPU': + config.enable_xpu(10 * 1024 * 1024) else: config.disable_gpu() @@ -435,8 +437,8 @@ def load_predictor(model_dir, return predictor -def load_executor(model_dir, use_gpu=False): - if use_gpu: +def load_executor(model_dir, device='CPU'): + if device == 'GPU': place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() @@ -539,14 +541,14 @@ def main(): detector = Detector( config, FLAGS.model_dir, - use_gpu=FLAGS.use_gpu, + device=FLAGS.device, run_mode=FLAGS.run_mode, trt_calib_mode=FLAGS.trt_calib_mode) if config.arch == 'SOLOv2': detector = DetectorSOLOv2( config, FLAGS.model_dir, - use_gpu=FLAGS.use_gpu, + device=FLAGS.device, run_mode=FLAGS.run_mode, trt_calib_mode=FLAGS.trt_calib_mode) # predict from image @@ -584,11 +586,18 @@ if __name__ == '__main__': type=str, default='fluid', help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU." + ) parser.add_argument( "--use_gpu", type=ast.literal_eval, default=False, - help="Whether to predict with GPU.") + help="Deprecated, please use `--device` to set the device you want to run." + ) parser.add_argument( "--run_benchmark", type=ast.literal_eval, @@ -612,5 +621,9 @@ if __name__ == '__main__': print_arguments(FLAGS) if FLAGS.image_file != '' and FLAGS.video_file != '': assert "Cannot predict image and video at the same time" + FLAGS.device = FLAGS.device.upper() + assert FLAGS.device in ['CPU', 'GPU', 'XPU' + ], "device should be CPU, GPU or XPU" + assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device" main()