未验证 提交 44b83132 编写于 作者: G Guanghua Yu 提交者: GitHub

support xpu inference (#3307)

* support xpu inference
上级 5146077c
......@@ -159,7 +159,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/
| --image_dir | 要预测的图片文件夹路径 |
| --video_file | 要预测的视频文件路径 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)|
| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --batch_size |预测时的batch size,在指定`image_dir`时有效 |
......@@ -183,7 +183,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/
`样例二`:
```shell
#使用 `GPU`预测视频`/root/projects/videos/test.mp4`
./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --use_gpu=1
./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --device=GPU
```
视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
......
......@@ -101,7 +101,7 @@ make
| --image_dir | 要预测的图片文件夹路径 |
| --video_file | 要预测的视频文件路径 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)|
| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --batch_size | 预测时的batch size,在指定`image_dir`时有效 |
......@@ -125,7 +125,7 @@ make
`样例二`:
```shell
#使用 `GPU`预测视频`/root/projects/videos/test.mp4`
./build/main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --use_gpu=1
./build/main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --device=GPU
```
视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
......
......@@ -96,7 +96,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release
| --image_dir | 要预测的图片文件夹路径 |
| --video_file | 要预测的视频文件路径 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)|
| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --batch_size | 预测时的batch size,在指定`image_dir`时有效 |
......@@ -122,7 +122,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release
`样例二`:
```shell
#使用`GPU`测试视频 `D:\\videos\\test.mp4`
.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --use_gpu=1
.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --device=GPU
```
视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
......
......@@ -58,7 +58,7 @@ cv::Mat VisualizeResult(const cv::Mat& img,
class ObjectDetector {
public:
explicit ObjectDetector(const std::string& model_dir,
bool use_gpu=false,
const std::string& device="CPU",
bool use_mkldnn=false,
int cpu_threads=1,
const std::string& run_mode="fluid",
......@@ -68,7 +68,7 @@ class ObjectDetector {
const int trt_max_shape=1280,
const int trt_opt_shape=640,
bool trt_calib_mode=false) {
this->use_gpu_ = use_gpu;
this->device_ = device;
this->gpu_id_ = gpu_id;
this->cpu_math_library_num_threads_ = cpu_threads;
this->use_mkldnn_ = use_mkldnn;
......@@ -106,7 +106,7 @@ class ObjectDetector {
}
private:
bool use_gpu_ = false;
std::string device_ = "CPU";
int gpu_id_ = 0;
int cpu_math_library_num_threads_ = 1;
bool use_mkldnn_ = false;
......
......@@ -22,6 +22,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <math.h>
#include <algorithm>
#ifdef _WIN32
#include <direct.h>
......@@ -41,7 +42,8 @@ DEFINE_string(image_dir, "", "Dir of input image, `image_file` has a higher prio
DEFINE_int32(batch_size, 1, "batch_size");
DEFINE_string(video_file, "", "Path of input video, `video_file` or `camera_id` has a highest priority.");
DEFINE_int32(camera_id, -1, "Device id of camera to predict");
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
DEFINE_bool(use_gpu, false, "Deprecated, please use `--device` to set the device you want to run.");
DEFINE_string(device, "CPU", "Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU.");
DEFINE_double(threshold, 0.5, "Threshold of score.");
DEFINE_string(output_dir, "output", "Directory of output visualization files.");
DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)");
......@@ -56,7 +58,7 @@ DEFINE_bool(trt_calib_mode, false, "If the model is produced by TRT offline quan
void PrintBenchmarkLog(std::vector<double> det_time, int img_num){
LOG(INFO) << "----------------------- Config info -----------------------";
LOG(INFO) << "runtime_device: " << (FLAGS_use_gpu ? "gpu" : "cpu");
LOG(INFO) << "runtime_device: " << FLAGS_device;
LOG(INFO) << "ir_optim: " << "True";
LOG(INFO) << "enable_memory_optim: " << "True";
int has_trt = FLAGS_run_mode.find("trt");
......@@ -78,7 +80,7 @@ void PrintBenchmarkLog(std::vector<double> det_time, int img_num){
LOG(INFO) << "model_name: " << FLAGS_model_dir.substr(FLAGS_model_dir.find_last_of('/') + 1);
LOG(INFO) << "----------------------- Perf info ------------------------";
LOG(INFO) << "Total number of predicted data: " << img_num
<< " and total time spent(s): "
<< " and total time spent(ms): "
<< std::accumulate(det_time.begin(), det_time.end(), 0);
LOG(INFO) << "preproce_time(ms): " << det_time[0] / img_num
<< ", inference_time(ms): " << det_time[1] / img_num
......@@ -358,8 +360,17 @@ int main(int argc, char** argv) {
std::cout << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'.";
return -1;
}
transform(FLAGS_device.begin(),FLAGS_device.end(),FLAGS_device.begin(),::toupper);
if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" || FLAGS_device == "XPU")) {
std::cout << "device should be 'CPU', 'GPU' or 'XPU'.";
return -1;
}
if (FLAGS_use_gpu) {
std::cout << "Deprecated, please use `--device` to set the device you want to run.";
return -1;
}
// Load model and create a object detector
PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_mkldnn,
PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_device, FLAGS_use_mkldnn,
FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id,
FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape,
FLAGS_trt_calib_mode);
......
......@@ -30,7 +30,7 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
std::string prog_file = model_dir + OS_PATH_SEP + "model.pdmodel";
std::string params_file = model_dir + OS_PATH_SEP + "model.pdiparams";
config.SetModel(prog_file, params_file);
if (this->use_gpu_) {
if (this->device_ == "GPU") {
config.EnableUseGpu(200, this->gpu_id_);
config.SwitchIrOptim(true);
// use tensorrt
......@@ -73,6 +73,8 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
}
}
} else if (this->device_ == "XPU"){
config.EnableXpu(10*1024*1024);
} else {
config.DisableGpu();
if (this->use_mkldnn_) {
......
......@@ -21,26 +21,26 @@ PaddleDetection在训练过程包括网络的前向和优化器相关参数,
在终端输入以下命令进行预测:
```bash
python deploy/python/infer.py --model_dir=./inference/yolov3_mobilenet_v1_roadsign --image_file=./demo/road554.png --use_gpu=True
python deploy/python/infer.py --model_dir=./inference/yolov3_mobilenet_v1_roadsign --image_file=./demo/road554.png --device=GPU
```
参数说明如下:
| 参数 | 是否必须|含义 |
|-------|-------|----------|
| --model_dir | Yes|上述导出的模型路径 |
| --image_file | Option |需要预测的图片 |
| --model_dir | Yes| 上述导出的模型路径 |
| --image_file | Option | 需要预测的图片 |
| --image_dir | Option | 要预测的图片文件夹路径 |
| --video_file | Option |需要预测的视频 |
| --video_file | Option | 需要预测的视频 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4|
| --use_gpu | No |是否GPU,默认为False|
| --run_mode | No |使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --batch_size | No |预测时的batch size,在指定`image_dir`时有效 |
| --threshold | No|预测得分的阈值,默认为0.5|
| --output_dir | No|可视化结果保存的根目录,默认为output/|
| --run_benchmark | No| 是否运行benchmark,同时需指定`--image_file``--image_dir` |
| --enable_mkldnn | No | CPU预测中是否开启MKLDNN加速 |
| --cpu_threads | No| 设置cpu线程数,默认为1 |
| --device | Option | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`|
| --run_mode | Option |使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --batch_size | Option |预测时的batch size,在指定`image_dir`时有效,默认为1 |
| --threshold | Option|预测得分的阈值,默认为0.5|
| --output_dir | Option|可视化结果保存的根目录,默认为output/|
| --run_benchmark | Option| 是否运行benchmark,同时需指定`--image_file``--image_dir`,默认为False |
| --enable_mkldnn | Option | CPU预测中是否开启MKLDNN加速,默认为False |
| --cpu_threads | Option| 设置cpu线程数,默认为1 |
说明:
......
......@@ -49,7 +49,7 @@ class Detector(object):
Args:
config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
use_gpu (bool): whether use gpu
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
......@@ -62,7 +62,7 @@ class Detector(object):
def __init__(self,
pred_config,
model_dir,
use_gpu=False,
device='CPU',
run_mode='fluid',
batch_size=1,
trt_min_shape=1,
......@@ -77,7 +77,7 @@ class Detector(object):
run_mode=run_mode,
batch_size=batch_size,
min_subgraph_size=self.pred_config.min_subgraph_size,
use_gpu=use_gpu,
device=device,
use_dynamic_shape=self.pred_config.use_dynamic_shape,
trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape,
......@@ -177,7 +177,7 @@ class DetectorSOLOv2(Detector):
Args:
config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
use_gpu (bool): whether use gpu
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
......@@ -189,7 +189,7 @@ class DetectorSOLOv2(Detector):
def __init__(self,
pred_config,
model_dir,
use_gpu=False,
device='CPU',
run_mode='fluid',
batch_size=1,
trt_min_shape=1,
......@@ -204,7 +204,7 @@ class DetectorSOLOv2(Detector):
run_mode=run_mode,
batch_size=batch_size,
min_subgraph_size=self.pred_config.min_subgraph_size,
use_gpu=use_gpu,
device=device,
use_dynamic_shape=self.pred_config.use_dynamic_shape,
trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape,
......@@ -352,7 +352,7 @@ class PredictConfig():
def load_predictor(model_dir,
run_mode='fluid',
batch_size=1,
use_gpu=False,
device='CPU',
min_subgraph_size=3,
use_dynamic_shape=False,
trt_min_shape=1,
......@@ -364,7 +364,7 @@ def load_predictor(model_dir,
"""set AnalysisConfig, generate AnalysisPredictor
Args:
model_dir (str): root path of __model__ and __params__
use_gpu (bool): whether use gpu
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8)
use_dynamic_shape (bool): use dynamic shape or not
trt_min_shape (int): min shape for dynamic shape in trt
......@@ -375,25 +375,22 @@ def load_predictor(model_dir,
Returns:
predictor (PaddlePredictor): AnalysisPredictor
Raises:
ValueError: predict by TensorRT need use_gpu == True.
ValueError: predict by TensorRT need device == 'GPU'.
"""
if not use_gpu and not run_mode == 'fluid':
if device != 'GPU' and run_mode != 'fluid':
raise ValueError(
"Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
.format(run_mode, use_gpu))
"Predict by TensorRT mode: {}, expect device=='GPU', but device == {}"
.format(run_mode, device))
config = Config(
os.path.join(model_dir, 'model.pdmodel'),
os.path.join(model_dir, 'model.pdiparams'))
precision_map = {
'trt_int8': Config.Precision.Int8,
'trt_fp32': Config.Precision.Float32,
'trt_fp16': Config.Precision.Half
}
if use_gpu:
if device == 'GPU':
# initial GPU memory(M), device ID
config.enable_use_gpu(200, 0)
# optimize graph and fuse op
config.switch_ir_optim(True)
elif device == 'XPU':
config.enable_xpu(10 * 1024 * 1024)
else:
config.disable_gpu()
config.set_cpu_math_library_num_threads(cpu_threads)
......@@ -408,6 +405,11 @@ def load_predictor(model_dir,
)
pass
precision_map = {
'trt_int8': Config.Precision.Int8,
'trt_fp32': Config.Precision.Float32,
'trt_fp16': Config.Precision.Half
}
if run_mode in precision_map.keys():
config.enable_tensorrt_engine(
workspace_size=1 << 10,
......@@ -582,7 +584,7 @@ def main():
detector = Detector(
pred_config,
FLAGS.model_dir,
use_gpu=FLAGS.use_gpu,
device=FLAGS.device,
run_mode=FLAGS.run_mode,
batch_size=FLAGS.batch_size,
trt_min_shape=FLAGS.trt_min_shape,
......@@ -595,7 +597,7 @@ def main():
detector = DetectorSOLOv2(
pred_config,
FLAGS.model_dir,
use_gpu=FLAGS.use_gpu,
device=FLAGS.device,
run_mode=FLAGS.run_mode,
batch_size=FLAGS.batch_size,
trt_min_shape=FLAGS.trt_min_shape,
......@@ -645,5 +647,9 @@ if __name__ == '__main__':
parser = argsparser()
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
main()
......@@ -156,7 +156,7 @@ def main():
detector = Detector(
pred_config,
FLAGS.det_model_dir,
use_gpu=FLAGS.use_gpu,
device=FLAGS.device,
run_mode=FLAGS.run_mode,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
......@@ -169,7 +169,7 @@ def main():
topdown_keypoint_detector = KeyPoint_Detector(
pred_config,
FLAGS.keypoint_model_dir,
use_gpu=FLAGS.use_gpu,
device=FLAGS.device,
run_mode=FLAGS.run_mode,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
......@@ -193,5 +193,8 @@ if __name__ == '__main__':
parser = argsparser()
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
main()
......@@ -44,7 +44,7 @@ class KeyPoint_Detector(object):
Args:
config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
use_gpu (bool): whether use gpu
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
......@@ -56,7 +56,7 @@ class KeyPoint_Detector(object):
def __init__(self,
pred_config,
model_dir,
use_gpu=False,
device='CPU',
run_mode='fluid',
trt_min_shape=1,
trt_max_shape=1280,
......@@ -69,7 +69,7 @@ class KeyPoint_Detector(object):
model_dir,
run_mode=run_mode,
min_subgraph_size=self.pred_config.min_subgraph_size,
use_gpu=use_gpu,
device=device,
use_dynamic_shape=self.pred_config.use_dynamic_shape,
trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape,
......@@ -236,7 +236,7 @@ class PredictConfig_KeyPoint():
def load_predictor(model_dir,
run_mode='fluid',
batch_size=1,
use_gpu=False,
device='CPU',
min_subgraph_size=3,
use_dynamic_shape=False,
trt_min_shape=1,
......@@ -248,7 +248,7 @@ def load_predictor(model_dir,
"""set AnalysisConfig, generate AnalysisPredictor
Args:
model_dir (str): root path of __model__ and __params__
use_gpu (bool): whether use gpu
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8)
use_dynamic_shape (bool): use dynamic shape or not
trt_min_shape (int): min shape for dynamic shape in trt
......@@ -259,25 +259,22 @@ def load_predictor(model_dir,
Returns:
predictor (PaddlePredictor): AnalysisPredictor
Raises:
ValueError: predict by TensorRT need use_gpu == True.
ValueError: predict by TensorRT need device == 'GPU'.
"""
if not use_gpu and not run_mode == 'fluid':
if device != 'GPU' and run_mode != 'fluid':
raise ValueError(
"Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
.format(run_mode, use_gpu))
"Predict by TensorRT mode: {}, expect device=='GPU', but device == {}"
.format(run_mode, device))
config = Config(
os.path.join(model_dir, 'model.pdmodel'),
os.path.join(model_dir, 'model.pdiparams'))
precision_map = {
'trt_int8': Config.Precision.Int8,
'trt_fp32': Config.Precision.Float32,
'trt_fp16': Config.Precision.Half
}
if use_gpu:
if device == 'GPU':
# initial GPU memory(M), device ID
config.enable_use_gpu(200, 0)
# optimize graph and fuse op
config.switch_ir_optim(True)
elif device == 'XPU':
config.enable_xpu(10 * 1024 * 1024)
else:
config.disable_gpu()
config.set_cpu_math_library_num_threads(cpu_threads)
......@@ -292,6 +289,11 @@ def load_predictor(model_dir,
)
pass
precision_map = {
'trt_int8': Config.Precision.Int8,
'trt_fp32': Config.Precision.Float32,
'trt_fp16': Config.Precision.Half
}
if run_mode in precision_map.keys():
config.enable_tensorrt_engine(
workspace_size=1 << 10,
......@@ -381,7 +383,7 @@ def main():
detector = KeyPoint_Detector(
pred_config,
FLAGS.model_dir,
use_gpu=FLAGS.use_gpu,
device=FLAGS.device,
run_mode=FLAGS.run_mode,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
......@@ -427,5 +429,9 @@ if __name__ == '__main__':
parser = argsparser()
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
main()
......@@ -68,10 +68,11 @@ def argsparser():
default='fluid',
help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)")
parser.add_argument(
"--use_gpu",
type=ast.literal_eval,
default=False,
help="Whether to predict with GPU.")
"--device",
type=str,
default='cpu',
help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
)
parser.add_argument(
"--run_benchmark",
type=ast.literal_eval,
......
......@@ -59,11 +59,17 @@ def argsparser():
type=str,
default='fluid',
help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)")
parser.add_argument(
"--device",
type=str,
default='cpu',
help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
)
parser.add_argument(
"--use_gpu",
type=ast.literal_eval,
default=False,
help="Whether to predict with GPU.")
help="Deprecated, please use `--device`.")
parser.add_argument(
"--run_benchmark",
type=ast.literal_eval,
......
......@@ -153,7 +153,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/
| --image_file | 要预测的图片文件路径 |
| --video_path | 要预测的视频文件路径 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)|
| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --run_benchmark | 是否重复预测来进行benchmark测速 |
......@@ -174,7 +174,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/
`样例二`:
```shell
#使用 `GPU`预测视频`/root/projects/videos/test.mp4`
./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --use_gpu=1
./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --device=GPU
```
视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
......
......@@ -100,7 +100,7 @@ make
| --image_file | 要预测的图片文件路径 |
| --video_path | 要预测的视频文件路径 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)|
| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --run_benchmark | 是否重复预测来进行benchmark测速 |
......@@ -121,6 +121,6 @@ make
`样例二`:
```shell
#使用 `GPU`预测视频`/root/projects/videos/test.mp4`
./build/main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --use_gpu=1
./build/main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --device=GPU
```
视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
......@@ -95,7 +95,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release
| --image_file | 要预测的图片文件路径 |
| --video_path | 要预测的视频文件路径 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)|
| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --run_benchmark | 是否重复预测来进行benchmark测速 |
......@@ -118,7 +118,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release
`样例二`:
```shell
#使用`GPU`测试视频 `D:\\videos\\test.mp4`
.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --use_gpu=1
.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --device=GPU
```
视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
......
......@@ -56,20 +56,20 @@ cv::Mat VisualizeResult(const cv::Mat& img,
class ObjectDetector {
public:
explicit ObjectDetector(const std::string& model_dir,
bool use_gpu=false,
const std::string& device,
const std::string& run_mode="fluid",
const int gpu_id=0,
bool trt_calib_mode=false) {
config_.load_config(model_dir);
threshold_ = config_.draw_threshold_;
preprocessor_.Init(config_.preprocess_info_, config_.arch_);
LoadModel(model_dir, use_gpu, config_.min_subgraph_size_, 1, run_mode, gpu_id, trt_calib_mode);
LoadModel(model_dir, device, config_.min_subgraph_size_, 1, run_mode, gpu_id, trt_calib_mode);
}
// Load Paddle inference model
void LoadModel(
const std::string& model_dir,
bool use_gpu,
const std::string& device,
const int min_subgraph_size,
const int batch_size = 1,
const std::string& run_mode = "fluid",
......
......@@ -19,6 +19,7 @@
#include <vector>
#include <sys/types.h>
#include <sys/stat.h>
#include <algorithm>
#ifdef _WIN32
#include <direct.h>
......@@ -35,7 +36,8 @@
DEFINE_string(model_dir, "", "Path of inference model");
DEFINE_string(image_file, "", "Path of input image");
DEFINE_string(video_path, "", "Path of input video");
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
DEFINE_bool(use_gpu, false, "Deprecated, please use `--device` to set the device you want to run.");
DEFINE_string(device, "CPU", "Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU.");
DEFINE_bool(use_camera, false, "Use camera or not");
DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16)");
DEFINE_int32(gpu_id, 0, "Device id of GPU to execute");
......@@ -204,9 +206,18 @@ int main(int argc, char** argv) {
std::cout << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'.";
return -1;
}
transform(FLAGS_device.begin(),FLAGS_device.end(),FLAGS_device.begin(),::toupper);
if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" || FLAGS_device == "XPU")) {
std::cout << "device should be 'CPU', 'GPU' or 'XPU'.";
return -1;
}
if (FLAGS_use_gpu) {
std::cout << "Deprecated, please use `--device` to set the device you want to run.";
return -1;
}
// Load model and create a object detector
PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_use_gpu,
PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_device,
FLAGS_run_mode, FLAGS_gpu_id, FLAGS_trt_calib_mode);
// Do inference on input video or image
if (!FLAGS_video_path.empty() || FLAGS_use_camera) {
......
......@@ -21,7 +21,7 @@ namespace PaddleDetection {
// Load Model and create model predictor
void ObjectDetector::LoadModel(const std::string& model_dir,
bool use_gpu,
const std::string& device,
const int min_subgraph_size,
const int batch_size,
const std::string& run_mode,
......@@ -31,7 +31,7 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
std::string prog_file = model_dir + OS_PATH_SEP + "__model__";
std::string params_file = model_dir + OS_PATH_SEP + "__params__";
config.SetModel(prog_file, params_file);
if (use_gpu) {
if (device == "GPU") {
config.EnableUseGpu(100, gpu_id);
config.SwitchIrOptim(true);
if (run_mode != "fluid") {
......@@ -51,6 +51,8 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
false,
trt_calib_mode);
}
} else if (device == "XPU"){
config.EnableXpu(10*1024*1024);
} else {
config.DisableGpu();
}
......
......@@ -45,7 +45,7 @@ python deploy/python/infer.py --model_dir=/path/to/models --image_file=/path/to/
| --image_file | Option |需要预测的图片 |
| --video_file | Option |需要预测的视频 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4|
| --use_gpu |No|是否GPU,默认为False|
| --device | Option | 运行时的设备,可选择`CPU/GPU`,默认为`CPU`|
| --run_mode |No|使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --threshold |No|预测得分的阈值,默认为0.5|
| --output_dir |No|可视化结果保存的根目录,默认为output/|
......
......@@ -55,7 +55,7 @@ class Detector(object):
Args:
config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of __model__, __params__ and infer_cfg.yml
use_gpu (bool): whether use gpu
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
threshold (float): threshold to reserve the result for output.
"""
......@@ -63,20 +63,20 @@ class Detector(object):
def __init__(self,
config,
model_dir,
use_gpu=False,
device='CPU',
run_mode='fluid',
threshold=0.5,
trt_calib_mode=False):
self.config = config
if self.config.use_python_inference:
self.executor, self.program, self.fecth_targets = load_executor(
model_dir, use_gpu=use_gpu)
model_dir, device=device)
else:
self.predictor = load_predictor(
model_dir,
run_mode=run_mode,
min_subgraph_size=self.config.min_subgraph_size,
use_gpu=use_gpu,
device=device,
trt_calib_mode=trt_calib_mode)
def preprocess(self, im):
......@@ -221,14 +221,14 @@ class DetectorSOLOv2(Detector):
def __init__(self,
config,
model_dir,
use_gpu=False,
device='CPU',
run_mode='fluid',
threshold=0.5,
trt_calib_mode=False):
super(DetectorSOLOv2, self).__init__(
config=config,
model_dir=model_dir,
use_gpu=use_gpu,
device=device,
run_mode=run_mode,
threshold=threshold,
trt_calib_mode=trt_calib_mode)
......@@ -382,24 +382,24 @@ class Config():
def load_predictor(model_dir,
run_mode='fluid',
batch_size=1,
use_gpu=False,
device='CPU',
min_subgraph_size=3,
trt_calib_mode=False):
"""set AnalysisConfig, generate AnalysisPredictor
Args:
model_dir (str): root path of __model__ and __params__
use_gpu (bool): whether use gpu
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
trt_calib_mode (bool): If the model is produced by TRT offline quantitative
calibration, trt_calib_mode need to set True
Returns:
predictor (PaddlePredictor): AnalysisPredictor
Raises:
ValueError: predict by TensorRT need use_gpu == True.
ValueError: predict by TensorRT need device == GPU.
"""
if not use_gpu and not run_mode == 'fluid':
if device != 'GPU' and not run_mode == 'fluid':
raise ValueError(
"Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
.format(run_mode, use_gpu))
"Predict by TensorRT mode: {}, expect device==GPU, but device == {}"
.format(run_mode, device))
precision_map = {
'trt_int8': fluid.core.AnalysisConfig.Precision.Int8,
'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32,
......@@ -408,11 +408,13 @@ def load_predictor(model_dir,
config = fluid.core.AnalysisConfig(
os.path.join(model_dir, '__model__'),
os.path.join(model_dir, '__params__'))
if use_gpu:
if device == 'GPU':
# initial GPU memory(M), device ID
config.enable_use_gpu(100, 0)
# optimize graph and fuse op
config.switch_ir_optim(True)
elif device == 'XPU':
config.enable_xpu(10 * 1024 * 1024)
else:
config.disable_gpu()
......@@ -435,8 +437,8 @@ def load_predictor(model_dir,
return predictor
def load_executor(model_dir, use_gpu=False):
if use_gpu:
def load_executor(model_dir, device='CPU'):
if device == 'GPU':
place = fluid.CUDAPlace(0)
else:
place = fluid.CPUPlace()
......@@ -539,14 +541,14 @@ def main():
detector = Detector(
config,
FLAGS.model_dir,
use_gpu=FLAGS.use_gpu,
device=FLAGS.device,
run_mode=FLAGS.run_mode,
trt_calib_mode=FLAGS.trt_calib_mode)
if config.arch == 'SOLOv2':
detector = DetectorSOLOv2(
config,
FLAGS.model_dir,
use_gpu=FLAGS.use_gpu,
device=FLAGS.device,
run_mode=FLAGS.run_mode,
trt_calib_mode=FLAGS.trt_calib_mode)
# predict from image
......@@ -584,11 +586,18 @@ if __name__ == '__main__':
type=str,
default='fluid',
help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)")
parser.add_argument(
"--device",
type=str,
default='cpu',
help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
)
parser.add_argument(
"--use_gpu",
type=ast.literal_eval,
default=False,
help="Whether to predict with GPU.")
help="Deprecated, please use `--device` to set the device you want to run."
)
parser.add_argument(
"--run_benchmark",
type=ast.literal_eval,
......@@ -612,5 +621,9 @@ if __name__ == '__main__':
print_arguments(FLAGS)
if FLAGS.image_file != '' and FLAGS.video_file != '':
assert "Cannot predict image and video at the same time"
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册