未验证 提交 44b83132 编写于 作者: G Guanghua Yu 提交者: GitHub

support xpu inference (#3307)

* support xpu inference
上级 5146077c
...@@ -159,7 +159,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/ ...@@ -159,7 +159,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/
| --image_dir | 要预测的图片文件夹路径 | | --image_dir | 要预测的图片文件夹路径 |
| --video_file | 要预测的视频文件路径 | | --video_file | 要预测的视频文件路径 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)|
| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| | --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)| | --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --batch_size |预测时的batch size,在指定`image_dir`时有效 | | --batch_size |预测时的batch size,在指定`image_dir`时有效 |
...@@ -183,7 +183,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/ ...@@ -183,7 +183,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/
`样例二`: `样例二`:
```shell ```shell
#使用 `GPU`预测视频`/root/projects/videos/test.mp4` #使用 `GPU`预测视频`/root/projects/videos/test.mp4`
./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --use_gpu=1 ./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --device=GPU
``` ```
视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。 视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
......
...@@ -101,7 +101,7 @@ make ...@@ -101,7 +101,7 @@ make
| --image_dir | 要预测的图片文件夹路径 | | --image_dir | 要预测的图片文件夹路径 |
| --video_file | 要预测的视频文件路径 | | --video_file | 要预测的视频文件路径 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)|
| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| | --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)| | --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --batch_size | 预测时的batch size,在指定`image_dir`时有效 | | --batch_size | 预测时的batch size,在指定`image_dir`时有效 |
...@@ -125,7 +125,7 @@ make ...@@ -125,7 +125,7 @@ make
`样例二`: `样例二`:
```shell ```shell
#使用 `GPU`预测视频`/root/projects/videos/test.mp4` #使用 `GPU`预测视频`/root/projects/videos/test.mp4`
./build/main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --use_gpu=1 ./build/main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --device=GPU
``` ```
视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。 视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
......
...@@ -96,7 +96,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release ...@@ -96,7 +96,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release
| --image_dir | 要预测的图片文件夹路径 | | --image_dir | 要预测的图片文件夹路径 |
| --video_file | 要预测的视频文件路径 | | --video_file | 要预测的视频文件路径 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)|
| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| | --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)| | --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --batch_size | 预测时的batch size,在指定`image_dir`时有效 | | --batch_size | 预测时的batch size,在指定`image_dir`时有效 |
...@@ -122,7 +122,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release ...@@ -122,7 +122,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release
`样例二`: `样例二`:
```shell ```shell
#使用`GPU`测试视频 `D:\\videos\\test.mp4` #使用`GPU`测试视频 `D:\\videos\\test.mp4`
.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --use_gpu=1 .\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --device=GPU
``` ```
视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。 视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
......
...@@ -58,7 +58,7 @@ cv::Mat VisualizeResult(const cv::Mat& img, ...@@ -58,7 +58,7 @@ cv::Mat VisualizeResult(const cv::Mat& img,
class ObjectDetector { class ObjectDetector {
public: public:
explicit ObjectDetector(const std::string& model_dir, explicit ObjectDetector(const std::string& model_dir,
bool use_gpu=false, const std::string& device="CPU",
bool use_mkldnn=false, bool use_mkldnn=false,
int cpu_threads=1, int cpu_threads=1,
const std::string& run_mode="fluid", const std::string& run_mode="fluid",
...@@ -68,7 +68,7 @@ class ObjectDetector { ...@@ -68,7 +68,7 @@ class ObjectDetector {
const int trt_max_shape=1280, const int trt_max_shape=1280,
const int trt_opt_shape=640, const int trt_opt_shape=640,
bool trt_calib_mode=false) { bool trt_calib_mode=false) {
this->use_gpu_ = use_gpu; this->device_ = device;
this->gpu_id_ = gpu_id; this->gpu_id_ = gpu_id;
this->cpu_math_library_num_threads_ = cpu_threads; this->cpu_math_library_num_threads_ = cpu_threads;
this->use_mkldnn_ = use_mkldnn; this->use_mkldnn_ = use_mkldnn;
...@@ -106,7 +106,7 @@ class ObjectDetector { ...@@ -106,7 +106,7 @@ class ObjectDetector {
} }
private: private:
bool use_gpu_ = false; std::string device_ = "CPU";
int gpu_id_ = 0; int gpu_id_ = 0;
int cpu_math_library_num_threads_ = 1; int cpu_math_library_num_threads_ = 1;
bool use_mkldnn_ = false; bool use_mkldnn_ = false;
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <math.h> #include <math.h>
#include <algorithm>
#ifdef _WIN32 #ifdef _WIN32
#include <direct.h> #include <direct.h>
...@@ -41,7 +42,8 @@ DEFINE_string(image_dir, "", "Dir of input image, `image_file` has a higher prio ...@@ -41,7 +42,8 @@ DEFINE_string(image_dir, "", "Dir of input image, `image_file` has a higher prio
DEFINE_int32(batch_size, 1, "batch_size"); DEFINE_int32(batch_size, 1, "batch_size");
DEFINE_string(video_file, "", "Path of input video, `video_file` or `camera_id` has a highest priority."); DEFINE_string(video_file, "", "Path of input video, `video_file` or `camera_id` has a highest priority.");
DEFINE_int32(camera_id, -1, "Device id of camera to predict"); DEFINE_int32(camera_id, -1, "Device id of camera to predict");
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU"); DEFINE_bool(use_gpu, false, "Deprecated, please use `--device` to set the device you want to run.");
DEFINE_string(device, "CPU", "Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU.");
DEFINE_double(threshold, 0.5, "Threshold of score."); DEFINE_double(threshold, 0.5, "Threshold of score.");
DEFINE_string(output_dir, "output", "Directory of output visualization files."); DEFINE_string(output_dir, "output", "Directory of output visualization files.");
DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)"); DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)");
...@@ -56,7 +58,7 @@ DEFINE_bool(trt_calib_mode, false, "If the model is produced by TRT offline quan ...@@ -56,7 +58,7 @@ DEFINE_bool(trt_calib_mode, false, "If the model is produced by TRT offline quan
void PrintBenchmarkLog(std::vector<double> det_time, int img_num){ void PrintBenchmarkLog(std::vector<double> det_time, int img_num){
LOG(INFO) << "----------------------- Config info -----------------------"; LOG(INFO) << "----------------------- Config info -----------------------";
LOG(INFO) << "runtime_device: " << (FLAGS_use_gpu ? "gpu" : "cpu"); LOG(INFO) << "runtime_device: " << FLAGS_device;
LOG(INFO) << "ir_optim: " << "True"; LOG(INFO) << "ir_optim: " << "True";
LOG(INFO) << "enable_memory_optim: " << "True"; LOG(INFO) << "enable_memory_optim: " << "True";
int has_trt = FLAGS_run_mode.find("trt"); int has_trt = FLAGS_run_mode.find("trt");
...@@ -78,7 +80,7 @@ void PrintBenchmarkLog(std::vector<double> det_time, int img_num){ ...@@ -78,7 +80,7 @@ void PrintBenchmarkLog(std::vector<double> det_time, int img_num){
LOG(INFO) << "model_name: " << FLAGS_model_dir.substr(FLAGS_model_dir.find_last_of('/') + 1); LOG(INFO) << "model_name: " << FLAGS_model_dir.substr(FLAGS_model_dir.find_last_of('/') + 1);
LOG(INFO) << "----------------------- Perf info ------------------------"; LOG(INFO) << "----------------------- Perf info ------------------------";
LOG(INFO) << "Total number of predicted data: " << img_num LOG(INFO) << "Total number of predicted data: " << img_num
<< " and total time spent(s): " << " and total time spent(ms): "
<< std::accumulate(det_time.begin(), det_time.end(), 0); << std::accumulate(det_time.begin(), det_time.end(), 0);
LOG(INFO) << "preproce_time(ms): " << det_time[0] / img_num LOG(INFO) << "preproce_time(ms): " << det_time[0] / img_num
<< ", inference_time(ms): " << det_time[1] / img_num << ", inference_time(ms): " << det_time[1] / img_num
...@@ -358,8 +360,17 @@ int main(int argc, char** argv) { ...@@ -358,8 +360,17 @@ int main(int argc, char** argv) {
std::cout << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; std::cout << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'.";
return -1; return -1;
} }
transform(FLAGS_device.begin(),FLAGS_device.end(),FLAGS_device.begin(),::toupper);
if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" || FLAGS_device == "XPU")) {
std::cout << "device should be 'CPU', 'GPU' or 'XPU'.";
return -1;
}
if (FLAGS_use_gpu) {
std::cout << "Deprecated, please use `--device` to set the device you want to run.";
return -1;
}
// Load model and create a object detector // Load model and create a object detector
PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_mkldnn, PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_device, FLAGS_use_mkldnn,
FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id, FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id,
FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape,
FLAGS_trt_calib_mode); FLAGS_trt_calib_mode);
......
...@@ -30,7 +30,7 @@ void ObjectDetector::LoadModel(const std::string& model_dir, ...@@ -30,7 +30,7 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
std::string prog_file = model_dir + OS_PATH_SEP + "model.pdmodel"; std::string prog_file = model_dir + OS_PATH_SEP + "model.pdmodel";
std::string params_file = model_dir + OS_PATH_SEP + "model.pdiparams"; std::string params_file = model_dir + OS_PATH_SEP + "model.pdiparams";
config.SetModel(prog_file, params_file); config.SetModel(prog_file, params_file);
if (this->use_gpu_) { if (this->device_ == "GPU") {
config.EnableUseGpu(200, this->gpu_id_); config.EnableUseGpu(200, this->gpu_id_);
config.SwitchIrOptim(true); config.SwitchIrOptim(true);
// use tensorrt // use tensorrt
...@@ -73,6 +73,8 @@ void ObjectDetector::LoadModel(const std::string& model_dir, ...@@ -73,6 +73,8 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
} }
} }
} else if (this->device_ == "XPU"){
config.EnableXpu(10*1024*1024);
} else { } else {
config.DisableGpu(); config.DisableGpu();
if (this->use_mkldnn_) { if (this->use_mkldnn_) {
......
...@@ -21,26 +21,26 @@ PaddleDetection在训练过程包括网络的前向和优化器相关参数, ...@@ -21,26 +21,26 @@ PaddleDetection在训练过程包括网络的前向和优化器相关参数,
在终端输入以下命令进行预测: 在终端输入以下命令进行预测:
```bash ```bash
python deploy/python/infer.py --model_dir=./inference/yolov3_mobilenet_v1_roadsign --image_file=./demo/road554.png --use_gpu=True python deploy/python/infer.py --model_dir=./inference/yolov3_mobilenet_v1_roadsign --image_file=./demo/road554.png --device=GPU
``` ```
参数说明如下: 参数说明如下:
| 参数 | 是否必须|含义 | | 参数 | 是否必须|含义 |
|-------|-------|----------| |-------|-------|----------|
| --model_dir | Yes|上述导出的模型路径 | | --model_dir | Yes| 上述导出的模型路径 |
| --image_file | Option |需要预测的图片 | | --image_file | Option | 需要预测的图片 |
| --image_dir | Option | 要预测的图片文件夹路径 | | --image_dir | Option | 要预测的图片文件夹路径 |
| --video_file | Option |需要预测的视频 | | --video_file | Option | 需要预测的视频 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4| | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4|
| --use_gpu | No |是否GPU,默认为False| | --device | Option | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`|
| --run_mode | No |使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_mode | Option |使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --batch_size | No |预测时的batch size,在指定`image_dir`时有效 | | --batch_size | Option |预测时的batch size,在指定`image_dir`时有效,默认为1 |
| --threshold | No|预测得分的阈值,默认为0.5| | --threshold | Option|预测得分的阈值,默认为0.5|
| --output_dir | No|可视化结果保存的根目录,默认为output/| | --output_dir | Option|可视化结果保存的根目录,默认为output/|
| --run_benchmark | No| 是否运行benchmark,同时需指定`--image_file``--image_dir` | | --run_benchmark | Option| 是否运行benchmark,同时需指定`--image_file``--image_dir`,默认为False |
| --enable_mkldnn | No | CPU预测中是否开启MKLDNN加速 | | --enable_mkldnn | Option | CPU预测中是否开启MKLDNN加速,默认为False |
| --cpu_threads | No| 设置cpu线程数,默认为1 | | --cpu_threads | Option| 设置cpu线程数,默认为1 |
说明: 说明:
......
...@@ -49,7 +49,7 @@ class Detector(object): ...@@ -49,7 +49,7 @@ class Detector(object):
Args: Args:
config (object): config of model, defined by `Config(model_dir)` config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
use_gpu (bool): whether use gpu device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt trt_min_shape (int): min shape for dynamic shape in trt
...@@ -62,7 +62,7 @@ class Detector(object): ...@@ -62,7 +62,7 @@ class Detector(object):
def __init__(self, def __init__(self,
pred_config, pred_config,
model_dir, model_dir,
use_gpu=False, device='CPU',
run_mode='fluid', run_mode='fluid',
batch_size=1, batch_size=1,
trt_min_shape=1, trt_min_shape=1,
...@@ -77,7 +77,7 @@ class Detector(object): ...@@ -77,7 +77,7 @@ class Detector(object):
run_mode=run_mode, run_mode=run_mode,
batch_size=batch_size, batch_size=batch_size,
min_subgraph_size=self.pred_config.min_subgraph_size, min_subgraph_size=self.pred_config.min_subgraph_size,
use_gpu=use_gpu, device=device,
use_dynamic_shape=self.pred_config.use_dynamic_shape, use_dynamic_shape=self.pred_config.use_dynamic_shape,
trt_min_shape=trt_min_shape, trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape, trt_max_shape=trt_max_shape,
...@@ -177,7 +177,7 @@ class DetectorSOLOv2(Detector): ...@@ -177,7 +177,7 @@ class DetectorSOLOv2(Detector):
Args: Args:
config (object): config of model, defined by `Config(model_dir)` config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
use_gpu (bool): whether use gpu device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt trt_min_shape (int): min shape for dynamic shape in trt
...@@ -189,7 +189,7 @@ class DetectorSOLOv2(Detector): ...@@ -189,7 +189,7 @@ class DetectorSOLOv2(Detector):
def __init__(self, def __init__(self,
pred_config, pred_config,
model_dir, model_dir,
use_gpu=False, device='CPU',
run_mode='fluid', run_mode='fluid',
batch_size=1, batch_size=1,
trt_min_shape=1, trt_min_shape=1,
...@@ -204,7 +204,7 @@ class DetectorSOLOv2(Detector): ...@@ -204,7 +204,7 @@ class DetectorSOLOv2(Detector):
run_mode=run_mode, run_mode=run_mode,
batch_size=batch_size, batch_size=batch_size,
min_subgraph_size=self.pred_config.min_subgraph_size, min_subgraph_size=self.pred_config.min_subgraph_size,
use_gpu=use_gpu, device=device,
use_dynamic_shape=self.pred_config.use_dynamic_shape, use_dynamic_shape=self.pred_config.use_dynamic_shape,
trt_min_shape=trt_min_shape, trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape, trt_max_shape=trt_max_shape,
...@@ -352,7 +352,7 @@ class PredictConfig(): ...@@ -352,7 +352,7 @@ class PredictConfig():
def load_predictor(model_dir, def load_predictor(model_dir,
run_mode='fluid', run_mode='fluid',
batch_size=1, batch_size=1,
use_gpu=False, device='CPU',
min_subgraph_size=3, min_subgraph_size=3,
use_dynamic_shape=False, use_dynamic_shape=False,
trt_min_shape=1, trt_min_shape=1,
...@@ -364,7 +364,7 @@ def load_predictor(model_dir, ...@@ -364,7 +364,7 @@ def load_predictor(model_dir,
"""set AnalysisConfig, generate AnalysisPredictor """set AnalysisConfig, generate AnalysisPredictor
Args: Args:
model_dir (str): root path of __model__ and __params__ model_dir (str): root path of __model__ and __params__
use_gpu (bool): whether use gpu device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8) run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8)
use_dynamic_shape (bool): use dynamic shape or not use_dynamic_shape (bool): use dynamic shape or not
trt_min_shape (int): min shape for dynamic shape in trt trt_min_shape (int): min shape for dynamic shape in trt
...@@ -375,25 +375,22 @@ def load_predictor(model_dir, ...@@ -375,25 +375,22 @@ def load_predictor(model_dir,
Returns: Returns:
predictor (PaddlePredictor): AnalysisPredictor predictor (PaddlePredictor): AnalysisPredictor
Raises: Raises:
ValueError: predict by TensorRT need use_gpu == True. ValueError: predict by TensorRT need device == 'GPU'.
""" """
if not use_gpu and not run_mode == 'fluid': if device != 'GPU' and run_mode != 'fluid':
raise ValueError( raise ValueError(
"Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}" "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}"
.format(run_mode, use_gpu)) .format(run_mode, device))
config = Config( config = Config(
os.path.join(model_dir, 'model.pdmodel'), os.path.join(model_dir, 'model.pdmodel'),
os.path.join(model_dir, 'model.pdiparams')) os.path.join(model_dir, 'model.pdiparams'))
precision_map = { if device == 'GPU':
'trt_int8': Config.Precision.Int8,
'trt_fp32': Config.Precision.Float32,
'trt_fp16': Config.Precision.Half
}
if use_gpu:
# initial GPU memory(M), device ID # initial GPU memory(M), device ID
config.enable_use_gpu(200, 0) config.enable_use_gpu(200, 0)
# optimize graph and fuse op # optimize graph and fuse op
config.switch_ir_optim(True) config.switch_ir_optim(True)
elif device == 'XPU':
config.enable_xpu(10 * 1024 * 1024)
else: else:
config.disable_gpu() config.disable_gpu()
config.set_cpu_math_library_num_threads(cpu_threads) config.set_cpu_math_library_num_threads(cpu_threads)
...@@ -408,6 +405,11 @@ def load_predictor(model_dir, ...@@ -408,6 +405,11 @@ def load_predictor(model_dir,
) )
pass pass
precision_map = {
'trt_int8': Config.Precision.Int8,
'trt_fp32': Config.Precision.Float32,
'trt_fp16': Config.Precision.Half
}
if run_mode in precision_map.keys(): if run_mode in precision_map.keys():
config.enable_tensorrt_engine( config.enable_tensorrt_engine(
workspace_size=1 << 10, workspace_size=1 << 10,
...@@ -582,7 +584,7 @@ def main(): ...@@ -582,7 +584,7 @@ def main():
detector = Detector( detector = Detector(
pred_config, pred_config,
FLAGS.model_dir, FLAGS.model_dir,
use_gpu=FLAGS.use_gpu, device=FLAGS.device,
run_mode=FLAGS.run_mode, run_mode=FLAGS.run_mode,
batch_size=FLAGS.batch_size, batch_size=FLAGS.batch_size,
trt_min_shape=FLAGS.trt_min_shape, trt_min_shape=FLAGS.trt_min_shape,
...@@ -595,7 +597,7 @@ def main(): ...@@ -595,7 +597,7 @@ def main():
detector = DetectorSOLOv2( detector = DetectorSOLOv2(
pred_config, pred_config,
FLAGS.model_dir, FLAGS.model_dir,
use_gpu=FLAGS.use_gpu, device=FLAGS.device,
run_mode=FLAGS.run_mode, run_mode=FLAGS.run_mode,
batch_size=FLAGS.batch_size, batch_size=FLAGS.batch_size,
trt_min_shape=FLAGS.trt_min_shape, trt_min_shape=FLAGS.trt_min_shape,
...@@ -645,5 +647,9 @@ if __name__ == '__main__': ...@@ -645,5 +647,9 @@ if __name__ == '__main__':
parser = argsparser() parser = argsparser()
FLAGS = parser.parse_args() FLAGS = parser.parse_args()
print_arguments(FLAGS) print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
main() main()
...@@ -156,7 +156,7 @@ def main(): ...@@ -156,7 +156,7 @@ def main():
detector = Detector( detector = Detector(
pred_config, pred_config,
FLAGS.det_model_dir, FLAGS.det_model_dir,
use_gpu=FLAGS.use_gpu, device=FLAGS.device,
run_mode=FLAGS.run_mode, run_mode=FLAGS.run_mode,
trt_min_shape=FLAGS.trt_min_shape, trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape, trt_max_shape=FLAGS.trt_max_shape,
...@@ -169,7 +169,7 @@ def main(): ...@@ -169,7 +169,7 @@ def main():
topdown_keypoint_detector = KeyPoint_Detector( topdown_keypoint_detector = KeyPoint_Detector(
pred_config, pred_config,
FLAGS.keypoint_model_dir, FLAGS.keypoint_model_dir,
use_gpu=FLAGS.use_gpu, device=FLAGS.device,
run_mode=FLAGS.run_mode, run_mode=FLAGS.run_mode,
trt_min_shape=FLAGS.trt_min_shape, trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape, trt_max_shape=FLAGS.trt_max_shape,
...@@ -193,5 +193,8 @@ if __name__ == '__main__': ...@@ -193,5 +193,8 @@ if __name__ == '__main__':
parser = argsparser() parser = argsparser()
FLAGS = parser.parse_args() FLAGS = parser.parse_args()
print_arguments(FLAGS) print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
main() main()
...@@ -44,7 +44,7 @@ class KeyPoint_Detector(object): ...@@ -44,7 +44,7 @@ class KeyPoint_Detector(object):
Args: Args:
config (object): config of model, defined by `Config(model_dir)` config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
use_gpu (bool): whether use gpu device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
trt_min_shape (int): min shape for dynamic shape in trt trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt
...@@ -56,7 +56,7 @@ class KeyPoint_Detector(object): ...@@ -56,7 +56,7 @@ class KeyPoint_Detector(object):
def __init__(self, def __init__(self,
pred_config, pred_config,
model_dir, model_dir,
use_gpu=False, device='CPU',
run_mode='fluid', run_mode='fluid',
trt_min_shape=1, trt_min_shape=1,
trt_max_shape=1280, trt_max_shape=1280,
...@@ -69,7 +69,7 @@ class KeyPoint_Detector(object): ...@@ -69,7 +69,7 @@ class KeyPoint_Detector(object):
model_dir, model_dir,
run_mode=run_mode, run_mode=run_mode,
min_subgraph_size=self.pred_config.min_subgraph_size, min_subgraph_size=self.pred_config.min_subgraph_size,
use_gpu=use_gpu, device=device,
use_dynamic_shape=self.pred_config.use_dynamic_shape, use_dynamic_shape=self.pred_config.use_dynamic_shape,
trt_min_shape=trt_min_shape, trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape, trt_max_shape=trt_max_shape,
...@@ -236,7 +236,7 @@ class PredictConfig_KeyPoint(): ...@@ -236,7 +236,7 @@ class PredictConfig_KeyPoint():
def load_predictor(model_dir, def load_predictor(model_dir,
run_mode='fluid', run_mode='fluid',
batch_size=1, batch_size=1,
use_gpu=False, device='CPU',
min_subgraph_size=3, min_subgraph_size=3,
use_dynamic_shape=False, use_dynamic_shape=False,
trt_min_shape=1, trt_min_shape=1,
...@@ -248,7 +248,7 @@ def load_predictor(model_dir, ...@@ -248,7 +248,7 @@ def load_predictor(model_dir,
"""set AnalysisConfig, generate AnalysisPredictor """set AnalysisConfig, generate AnalysisPredictor
Args: Args:
model_dir (str): root path of __model__ and __params__ model_dir (str): root path of __model__ and __params__
use_gpu (bool): whether use gpu device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8) run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8)
use_dynamic_shape (bool): use dynamic shape or not use_dynamic_shape (bool): use dynamic shape or not
trt_min_shape (int): min shape for dynamic shape in trt trt_min_shape (int): min shape for dynamic shape in trt
...@@ -259,25 +259,22 @@ def load_predictor(model_dir, ...@@ -259,25 +259,22 @@ def load_predictor(model_dir,
Returns: Returns:
predictor (PaddlePredictor): AnalysisPredictor predictor (PaddlePredictor): AnalysisPredictor
Raises: Raises:
ValueError: predict by TensorRT need use_gpu == True. ValueError: predict by TensorRT need device == 'GPU'.
""" """
if not use_gpu and not run_mode == 'fluid': if device != 'GPU' and run_mode != 'fluid':
raise ValueError( raise ValueError(
"Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}" "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}"
.format(run_mode, use_gpu)) .format(run_mode, device))
config = Config( config = Config(
os.path.join(model_dir, 'model.pdmodel'), os.path.join(model_dir, 'model.pdmodel'),
os.path.join(model_dir, 'model.pdiparams')) os.path.join(model_dir, 'model.pdiparams'))
precision_map = { if device == 'GPU':
'trt_int8': Config.Precision.Int8,
'trt_fp32': Config.Precision.Float32,
'trt_fp16': Config.Precision.Half
}
if use_gpu:
# initial GPU memory(M), device ID # initial GPU memory(M), device ID
config.enable_use_gpu(200, 0) config.enable_use_gpu(200, 0)
# optimize graph and fuse op # optimize graph and fuse op
config.switch_ir_optim(True) config.switch_ir_optim(True)
elif device == 'XPU':
config.enable_xpu(10 * 1024 * 1024)
else: else:
config.disable_gpu() config.disable_gpu()
config.set_cpu_math_library_num_threads(cpu_threads) config.set_cpu_math_library_num_threads(cpu_threads)
...@@ -292,6 +289,11 @@ def load_predictor(model_dir, ...@@ -292,6 +289,11 @@ def load_predictor(model_dir,
) )
pass pass
precision_map = {
'trt_int8': Config.Precision.Int8,
'trt_fp32': Config.Precision.Float32,
'trt_fp16': Config.Precision.Half
}
if run_mode in precision_map.keys(): if run_mode in precision_map.keys():
config.enable_tensorrt_engine( config.enable_tensorrt_engine(
workspace_size=1 << 10, workspace_size=1 << 10,
...@@ -381,7 +383,7 @@ def main(): ...@@ -381,7 +383,7 @@ def main():
detector = KeyPoint_Detector( detector = KeyPoint_Detector(
pred_config, pred_config,
FLAGS.model_dir, FLAGS.model_dir,
use_gpu=FLAGS.use_gpu, device=FLAGS.device,
run_mode=FLAGS.run_mode, run_mode=FLAGS.run_mode,
trt_min_shape=FLAGS.trt_min_shape, trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape, trt_max_shape=FLAGS.trt_max_shape,
...@@ -427,5 +429,9 @@ if __name__ == '__main__': ...@@ -427,5 +429,9 @@ if __name__ == '__main__':
parser = argsparser() parser = argsparser()
FLAGS = parser.parse_args() FLAGS = parser.parse_args()
print_arguments(FLAGS) print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
main() main()
...@@ -68,10 +68,11 @@ def argsparser(): ...@@ -68,10 +68,11 @@ def argsparser():
default='fluid', default='fluid',
help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)") help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)")
parser.add_argument( parser.add_argument(
"--use_gpu", "--device",
type=ast.literal_eval, type=str,
default=False, default='cpu',
help="Whether to predict with GPU.") help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
)
parser.add_argument( parser.add_argument(
"--run_benchmark", "--run_benchmark",
type=ast.literal_eval, type=ast.literal_eval,
......
...@@ -59,11 +59,17 @@ def argsparser(): ...@@ -59,11 +59,17 @@ def argsparser():
type=str, type=str,
default='fluid', default='fluid',
help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)") help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)")
parser.add_argument(
"--device",
type=str,
default='cpu',
help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
)
parser.add_argument( parser.add_argument(
"--use_gpu", "--use_gpu",
type=ast.literal_eval, type=ast.literal_eval,
default=False, default=False,
help="Whether to predict with GPU.") help="Deprecated, please use `--device`.")
parser.add_argument( parser.add_argument(
"--run_benchmark", "--run_benchmark",
type=ast.literal_eval, type=ast.literal_eval,
......
...@@ -153,7 +153,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/ ...@@ -153,7 +153,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/
| --image_file | 要预测的图片文件路径 | | --image_file | 要预测的图片文件路径 |
| --video_path | 要预测的视频文件路径 | | --video_path | 要预测的视频文件路径 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)|
| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| | --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)| | --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --run_benchmark | 是否重复预测来进行benchmark测速 | | --run_benchmark | 是否重复预测来进行benchmark测速 |
...@@ -174,7 +174,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/ ...@@ -174,7 +174,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/
`样例二`: `样例二`:
```shell ```shell
#使用 `GPU`预测视频`/root/projects/videos/test.mp4` #使用 `GPU`预测视频`/root/projects/videos/test.mp4`
./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --use_gpu=1 ./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --device=GPU
``` ```
视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。 视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
......
...@@ -100,7 +100,7 @@ make ...@@ -100,7 +100,7 @@ make
| --image_file | 要预测的图片文件路径 | | --image_file | 要预测的图片文件路径 |
| --video_path | 要预测的视频文件路径 | | --video_path | 要预测的视频文件路径 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)|
| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| | --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)| | --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --run_benchmark | 是否重复预测来进行benchmark测速 | | --run_benchmark | 是否重复预测来进行benchmark测速 |
...@@ -121,6 +121,6 @@ make ...@@ -121,6 +121,6 @@ make
`样例二`: `样例二`:
```shell ```shell
#使用 `GPU`预测视频`/root/projects/videos/test.mp4` #使用 `GPU`预测视频`/root/projects/videos/test.mp4`
./build/main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --use_gpu=1 ./build/main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --device=GPU
``` ```
视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。 视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
...@@ -95,7 +95,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release ...@@ -95,7 +95,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release
| --image_file | 要预测的图片文件路径 | | --image_file | 要预测的图片文件路径 |
| --video_path | 要预测的视频文件路径 | | --video_path | 要预测的视频文件路径 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)|
| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| | --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)| | --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --run_benchmark | 是否重复预测来进行benchmark测速 | | --run_benchmark | 是否重复预测来进行benchmark测速 |
...@@ -118,7 +118,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release ...@@ -118,7 +118,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release
`样例二`: `样例二`:
```shell ```shell
#使用`GPU`测试视频 `D:\\videos\\test.mp4` #使用`GPU`测试视频 `D:\\videos\\test.mp4`
.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --use_gpu=1 .\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --device=GPU
``` ```
视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。 视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
......
...@@ -56,20 +56,20 @@ cv::Mat VisualizeResult(const cv::Mat& img, ...@@ -56,20 +56,20 @@ cv::Mat VisualizeResult(const cv::Mat& img,
class ObjectDetector { class ObjectDetector {
public: public:
explicit ObjectDetector(const std::string& model_dir, explicit ObjectDetector(const std::string& model_dir,
bool use_gpu=false, const std::string& device,
const std::string& run_mode="fluid", const std::string& run_mode="fluid",
const int gpu_id=0, const int gpu_id=0,
bool trt_calib_mode=false) { bool trt_calib_mode=false) {
config_.load_config(model_dir); config_.load_config(model_dir);
threshold_ = config_.draw_threshold_; threshold_ = config_.draw_threshold_;
preprocessor_.Init(config_.preprocess_info_, config_.arch_); preprocessor_.Init(config_.preprocess_info_, config_.arch_);
LoadModel(model_dir, use_gpu, config_.min_subgraph_size_, 1, run_mode, gpu_id, trt_calib_mode); LoadModel(model_dir, device, config_.min_subgraph_size_, 1, run_mode, gpu_id, trt_calib_mode);
} }
// Load Paddle inference model // Load Paddle inference model
void LoadModel( void LoadModel(
const std::string& model_dir, const std::string& model_dir,
bool use_gpu, const std::string& device,
const int min_subgraph_size, const int min_subgraph_size,
const int batch_size = 1, const int batch_size = 1,
const std::string& run_mode = "fluid", const std::string& run_mode = "fluid",
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <vector> #include <vector>
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <algorithm>
#ifdef _WIN32 #ifdef _WIN32
#include <direct.h> #include <direct.h>
...@@ -35,7 +36,8 @@ ...@@ -35,7 +36,8 @@
DEFINE_string(model_dir, "", "Path of inference model"); DEFINE_string(model_dir, "", "Path of inference model");
DEFINE_string(image_file, "", "Path of input image"); DEFINE_string(image_file, "", "Path of input image");
DEFINE_string(video_path, "", "Path of input video"); DEFINE_string(video_path, "", "Path of input video");
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU"); DEFINE_bool(use_gpu, false, "Deprecated, please use `--device` to set the device you want to run.");
DEFINE_string(device, "CPU", "Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU.");
DEFINE_bool(use_camera, false, "Use camera or not"); DEFINE_bool(use_camera, false, "Use camera or not");
DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16)"); DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16)");
DEFINE_int32(gpu_id, 0, "Device id of GPU to execute"); DEFINE_int32(gpu_id, 0, "Device id of GPU to execute");
...@@ -204,9 +206,18 @@ int main(int argc, char** argv) { ...@@ -204,9 +206,18 @@ int main(int argc, char** argv) {
std::cout << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; std::cout << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'.";
return -1; return -1;
} }
transform(FLAGS_device.begin(),FLAGS_device.end(),FLAGS_device.begin(),::toupper);
if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" || FLAGS_device == "XPU")) {
std::cout << "device should be 'CPU', 'GPU' or 'XPU'.";
return -1;
}
if (FLAGS_use_gpu) {
std::cout << "Deprecated, please use `--device` to set the device you want to run.";
return -1;
}
// Load model and create a object detector // Load model and create a object detector
PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_use_gpu, PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_device,
FLAGS_run_mode, FLAGS_gpu_id, FLAGS_trt_calib_mode); FLAGS_run_mode, FLAGS_gpu_id, FLAGS_trt_calib_mode);
// Do inference on input video or image // Do inference on input video or image
if (!FLAGS_video_path.empty() || FLAGS_use_camera) { if (!FLAGS_video_path.empty() || FLAGS_use_camera) {
......
...@@ -21,7 +21,7 @@ namespace PaddleDetection { ...@@ -21,7 +21,7 @@ namespace PaddleDetection {
// Load Model and create model predictor // Load Model and create model predictor
void ObjectDetector::LoadModel(const std::string& model_dir, void ObjectDetector::LoadModel(const std::string& model_dir,
bool use_gpu, const std::string& device,
const int min_subgraph_size, const int min_subgraph_size,
const int batch_size, const int batch_size,
const std::string& run_mode, const std::string& run_mode,
...@@ -31,7 +31,7 @@ void ObjectDetector::LoadModel(const std::string& model_dir, ...@@ -31,7 +31,7 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
std::string prog_file = model_dir + OS_PATH_SEP + "__model__"; std::string prog_file = model_dir + OS_PATH_SEP + "__model__";
std::string params_file = model_dir + OS_PATH_SEP + "__params__"; std::string params_file = model_dir + OS_PATH_SEP + "__params__";
config.SetModel(prog_file, params_file); config.SetModel(prog_file, params_file);
if (use_gpu) { if (device == "GPU") {
config.EnableUseGpu(100, gpu_id); config.EnableUseGpu(100, gpu_id);
config.SwitchIrOptim(true); config.SwitchIrOptim(true);
if (run_mode != "fluid") { if (run_mode != "fluid") {
...@@ -51,6 +51,8 @@ void ObjectDetector::LoadModel(const std::string& model_dir, ...@@ -51,6 +51,8 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
false, false,
trt_calib_mode); trt_calib_mode);
} }
} else if (device == "XPU"){
config.EnableXpu(10*1024*1024);
} else { } else {
config.DisableGpu(); config.DisableGpu();
} }
......
...@@ -45,7 +45,7 @@ python deploy/python/infer.py --model_dir=/path/to/models --image_file=/path/to/ ...@@ -45,7 +45,7 @@ python deploy/python/infer.py --model_dir=/path/to/models --image_file=/path/to/
| --image_file | Option |需要预测的图片 | | --image_file | Option |需要预测的图片 |
| --video_file | Option |需要预测的视频 | | --video_file | Option |需要预测的视频 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4| | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4|
| --use_gpu |No|是否GPU,默认为False| | --device | Option | 运行时的设备,可选择`CPU/GPU`,默认为`CPU`|
| --run_mode |No|使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_mode |No|使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --threshold |No|预测得分的阈值,默认为0.5| | --threshold |No|预测得分的阈值,默认为0.5|
| --output_dir |No|可视化结果保存的根目录,默认为output/| | --output_dir |No|可视化结果保存的根目录,默认为output/|
......
...@@ -55,7 +55,7 @@ class Detector(object): ...@@ -55,7 +55,7 @@ class Detector(object):
Args: Args:
config (object): config of model, defined by `Config(model_dir)` config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of __model__, __params__ and infer_cfg.yml model_dir (str): root path of __model__, __params__ and infer_cfg.yml
use_gpu (bool): whether use gpu device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
threshold (float): threshold to reserve the result for output. threshold (float): threshold to reserve the result for output.
""" """
...@@ -63,20 +63,20 @@ class Detector(object): ...@@ -63,20 +63,20 @@ class Detector(object):
def __init__(self, def __init__(self,
config, config,
model_dir, model_dir,
use_gpu=False, device='CPU',
run_mode='fluid', run_mode='fluid',
threshold=0.5, threshold=0.5,
trt_calib_mode=False): trt_calib_mode=False):
self.config = config self.config = config
if self.config.use_python_inference: if self.config.use_python_inference:
self.executor, self.program, self.fecth_targets = load_executor( self.executor, self.program, self.fecth_targets = load_executor(
model_dir, use_gpu=use_gpu) model_dir, device=device)
else: else:
self.predictor = load_predictor( self.predictor = load_predictor(
model_dir, model_dir,
run_mode=run_mode, run_mode=run_mode,
min_subgraph_size=self.config.min_subgraph_size, min_subgraph_size=self.config.min_subgraph_size,
use_gpu=use_gpu, device=device,
trt_calib_mode=trt_calib_mode) trt_calib_mode=trt_calib_mode)
def preprocess(self, im): def preprocess(self, im):
...@@ -221,14 +221,14 @@ class DetectorSOLOv2(Detector): ...@@ -221,14 +221,14 @@ class DetectorSOLOv2(Detector):
def __init__(self, def __init__(self,
config, config,
model_dir, model_dir,
use_gpu=False, device='CPU',
run_mode='fluid', run_mode='fluid',
threshold=0.5, threshold=0.5,
trt_calib_mode=False): trt_calib_mode=False):
super(DetectorSOLOv2, self).__init__( super(DetectorSOLOv2, self).__init__(
config=config, config=config,
model_dir=model_dir, model_dir=model_dir,
use_gpu=use_gpu, device=device,
run_mode=run_mode, run_mode=run_mode,
threshold=threshold, threshold=threshold,
trt_calib_mode=trt_calib_mode) trt_calib_mode=trt_calib_mode)
...@@ -382,24 +382,24 @@ class Config(): ...@@ -382,24 +382,24 @@ class Config():
def load_predictor(model_dir, def load_predictor(model_dir,
run_mode='fluid', run_mode='fluid',
batch_size=1, batch_size=1,
use_gpu=False, device='CPU',
min_subgraph_size=3, min_subgraph_size=3,
trt_calib_mode=False): trt_calib_mode=False):
"""set AnalysisConfig, generate AnalysisPredictor """set AnalysisConfig, generate AnalysisPredictor
Args: Args:
model_dir (str): root path of __model__ and __params__ model_dir (str): root path of __model__ and __params__
use_gpu (bool): whether use gpu device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
trt_calib_mode (bool): If the model is produced by TRT offline quantitative trt_calib_mode (bool): If the model is produced by TRT offline quantitative
calibration, trt_calib_mode need to set True calibration, trt_calib_mode need to set True
Returns: Returns:
predictor (PaddlePredictor): AnalysisPredictor predictor (PaddlePredictor): AnalysisPredictor
Raises: Raises:
ValueError: predict by TensorRT need use_gpu == True. ValueError: predict by TensorRT need device == GPU.
""" """
if not use_gpu and not run_mode == 'fluid': if device != 'GPU' and not run_mode == 'fluid':
raise ValueError( raise ValueError(
"Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}" "Predict by TensorRT mode: {}, expect device==GPU, but device == {}"
.format(run_mode, use_gpu)) .format(run_mode, device))
precision_map = { precision_map = {
'trt_int8': fluid.core.AnalysisConfig.Precision.Int8, 'trt_int8': fluid.core.AnalysisConfig.Precision.Int8,
'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32, 'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32,
...@@ -408,11 +408,13 @@ def load_predictor(model_dir, ...@@ -408,11 +408,13 @@ def load_predictor(model_dir,
config = fluid.core.AnalysisConfig( config = fluid.core.AnalysisConfig(
os.path.join(model_dir, '__model__'), os.path.join(model_dir, '__model__'),
os.path.join(model_dir, '__params__')) os.path.join(model_dir, '__params__'))
if use_gpu: if device == 'GPU':
# initial GPU memory(M), device ID # initial GPU memory(M), device ID
config.enable_use_gpu(100, 0) config.enable_use_gpu(100, 0)
# optimize graph and fuse op # optimize graph and fuse op
config.switch_ir_optim(True) config.switch_ir_optim(True)
elif device == 'XPU':
config.enable_xpu(10 * 1024 * 1024)
else: else:
config.disable_gpu() config.disable_gpu()
...@@ -435,8 +437,8 @@ def load_predictor(model_dir, ...@@ -435,8 +437,8 @@ def load_predictor(model_dir,
return predictor return predictor
def load_executor(model_dir, use_gpu=False): def load_executor(model_dir, device='CPU'):
if use_gpu: if device == 'GPU':
place = fluid.CUDAPlace(0) place = fluid.CUDAPlace(0)
else: else:
place = fluid.CPUPlace() place = fluid.CPUPlace()
...@@ -539,14 +541,14 @@ def main(): ...@@ -539,14 +541,14 @@ def main():
detector = Detector( detector = Detector(
config, config,
FLAGS.model_dir, FLAGS.model_dir,
use_gpu=FLAGS.use_gpu, device=FLAGS.device,
run_mode=FLAGS.run_mode, run_mode=FLAGS.run_mode,
trt_calib_mode=FLAGS.trt_calib_mode) trt_calib_mode=FLAGS.trt_calib_mode)
if config.arch == 'SOLOv2': if config.arch == 'SOLOv2':
detector = DetectorSOLOv2( detector = DetectorSOLOv2(
config, config,
FLAGS.model_dir, FLAGS.model_dir,
use_gpu=FLAGS.use_gpu, device=FLAGS.device,
run_mode=FLAGS.run_mode, run_mode=FLAGS.run_mode,
trt_calib_mode=FLAGS.trt_calib_mode) trt_calib_mode=FLAGS.trt_calib_mode)
# predict from image # predict from image
...@@ -584,11 +586,18 @@ if __name__ == '__main__': ...@@ -584,11 +586,18 @@ if __name__ == '__main__':
type=str, type=str,
default='fluid', default='fluid',
help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)") help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)")
parser.add_argument(
"--device",
type=str,
default='cpu',
help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
)
parser.add_argument( parser.add_argument(
"--use_gpu", "--use_gpu",
type=ast.literal_eval, type=ast.literal_eval,
default=False, default=False,
help="Whether to predict with GPU.") help="Deprecated, please use `--device` to set the device you want to run."
)
parser.add_argument( parser.add_argument(
"--run_benchmark", "--run_benchmark",
type=ast.literal_eval, type=ast.literal_eval,
...@@ -612,5 +621,9 @@ if __name__ == '__main__': ...@@ -612,5 +621,9 @@ if __name__ == '__main__':
print_arguments(FLAGS) print_arguments(FLAGS)
if FLAGS.image_file != '' and FLAGS.video_file != '': if FLAGS.image_file != '' and FLAGS.video_file != '':
assert "Cannot predict image and video at the same time" assert "Cannot predict image and video at the same time"
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
main() main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册