未验证 提交 64ebffc4 编写于 作者: X xiaoluomi 提交者: GitHub

fix detection infer (#1751)

上级 da3ef32e
...@@ -12,6 +12,18 @@ TrainDataset: ...@@ -12,6 +12,18 @@ TrainDataset:
anno_path: annotations/instances_val2017.json anno_path: annotations/instances_val2017.json
dataset_dir: dataset/coco/ dataset_dir: dataset/coco/
EvalDataset:
!COCODataSet
image_dir: val2017
anno_path: annotations/instances_val2017.json
dataset_dir: dataset/coco/
TestDataset:
!COCODataSet
image_dir: val2017
anno_path: annotations/instances_val2017.json
dataset_dir: dataset/coco/
worker_num: 0 worker_num: 0
# preprocess reader in test # preprocess reader in test
......
...@@ -64,7 +64,8 @@ def argsparser(): ...@@ -64,7 +64,8 @@ def argsparser():
"--device", "--device",
type=str, type=str,
default="GPU", default="GPU",
help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is GPU", help=
"Choose the device you want to run, it can be: CPU/GPU/XPU, default is GPU",
) )
parser.add_argument( parser.add_argument(
"--use_dynamic_shape", "--use_dynamic_shape",
...@@ -270,8 +271,8 @@ def load_predictor( ...@@ -270,8 +271,8 @@ def load_predictor(
dynamic_shape_file = os.path.join(FLAGS.model_path, dynamic_shape_file = os.path.join(FLAGS.model_path,
"dynamic_shape.txt") "dynamic_shape.txt")
if os.path.exists(dynamic_shape_file): if os.path.exists(dynamic_shape_file):
config.enable_tuned_tensorrt_dynamic_shape(dynamic_shape_file, config.enable_tuned_tensorrt_dynamic_shape(
True) dynamic_shape_file, True)
print("trt set dynamic shape done!") print("trt set dynamic shape done!")
else: else:
config.collect_shape_range_info(dynamic_shape_file) config.collect_shape_range_info(dynamic_shape_file)
...@@ -284,48 +285,6 @@ def load_predictor( ...@@ -284,48 +285,6 @@ def load_predictor(
return predictor, rerun_flag return predictor, rerun_flag
def get_current_memory_mb():
"""
It is used to Obtain the memory usage of the CPU and GPU during the running of the program.
And this function Current program is time-consuming.
"""
try:
pkg.require('pynvml')
except:
from pip._internal import main
main(['install', 'pynvml'])
try:
pkg.require('psutil')
except:
from pip._internal import main
main(['install', 'psutil'])
try:
pkg.require('GPUtil')
except:
from pip._internal import main
main(['install', 'GPUtil'])
import pynvml
import psutil
import GPUtil
gpu_id = int(os.environ.get("CUDA_VISIBLE_DEVICES", 0))
pid = os.getpid()
p = psutil.Process(pid)
info = p.memory_full_info()
cpu_mem = info.uss / 1024.0 / 1024.0
gpu_mem = 0
gpu_percent = 0
gpus = GPUtil.getGPUs()
if gpu_id is not None and len(gpus) > 0:
gpu_percent = gpus[gpu_id].load
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
gpu_mem = meminfo.used / 1024.0 / 1024.0
return round(cpu_mem, 4), round(gpu_mem, 4)
def predict_image(predictor, def predict_image(predictor,
image_file, image_file,
image_shape=[640, 640], image_shape=[640, 640],
...@@ -353,6 +312,7 @@ def predict_image(predictor, ...@@ -353,6 +312,7 @@ def predict_image(predictor,
predict_time = 0.0 predict_time = 0.0
time_min = float("inf") time_min = float("inf")
time_max = float("-inf") time_max = float("-inf")
paddle.device.cuda.synchronize()
for i in range(repeats): for i in range(repeats):
start_time = time.time() start_time = time.time()
predictor.run() predictor.run()
...@@ -367,13 +327,8 @@ def predict_image(predictor, ...@@ -367,13 +327,8 @@ def predict_image(predictor,
time_min = min(time_min, timed) time_min = min(time_min, timed)
time_max = max(time_max, timed) time_max = max(time_max, timed)
predict_time += timed predict_time += timed
cpu_mem, gpu_mem = get_current_memory_mb()
cpu_mems += cpu_mem
gpu_mems += gpu_mem
time_avg = predict_time / repeats time_avg = predict_time / repeats
print("[Benchmark]Avg cpu_mem:{} MB, avg gpu_mem: {} MB".format(
cpu_mems / repeats, gpu_mems / repeats))
print("[Benchmark]Inference time(ms): min={}, max={}, avg={}".format( print("[Benchmark]Inference time(ms): min={}, max={}, avg={}".format(
round(time_min * 1000, 2), round(time_min * 1000, 2),
round(time_max * 1000, 1), round(time_avg * 1000, 1))) round(time_max * 1000, 1), round(time_avg * 1000, 1)))
...@@ -406,6 +361,7 @@ def eval(predictor, val_loader, metric, rerun_flag=False): ...@@ -406,6 +361,7 @@ def eval(predictor, val_loader, metric, rerun_flag=False):
for i, _ in enumerate(input_names): for i, _ in enumerate(input_names):
input_tensor = predictor.get_input_handle(input_names[i]) input_tensor = predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(data_all[input_names[i]]) input_tensor.copy_from_cpu(data_all[input_names[i]])
paddle.device.cuda.synchronize()
start_time = time.time() start_time = time.time()
predictor.run() predictor.run()
np_boxes = boxes_tensor.copy_to_cpu() np_boxes = boxes_tensor.copy_to_cpu()
...@@ -418,9 +374,6 @@ def eval(predictor, val_loader, metric, rerun_flag=False): ...@@ -418,9 +374,6 @@ def eval(predictor, val_loader, metric, rerun_flag=False):
time_min = min(time_min, timed) time_min = min(time_min, timed)
time_max = max(time_max, timed) time_max = max(time_max, timed)
predict_time += timed predict_time += timed
cpu_mem, gpu_mem = get_current_memory_mb()
cpu_mems += cpu_mem
gpu_mems += gpu_mem
if not FLAGS.include_nms: if not FLAGS.include_nms:
postprocess = PPYOLOEPostProcess( postprocess = PPYOLOEPostProcess(
score_threshold=0.3, nms_threshold=0.6) score_threshold=0.3, nms_threshold=0.6)
...@@ -436,8 +389,6 @@ def eval(predictor, val_loader, metric, rerun_flag=False): ...@@ -436,8 +389,6 @@ def eval(predictor, val_loader, metric, rerun_flag=False):
map_res = metric.get_results() map_res = metric.get_results()
metric.reset() metric.reset()
time_avg = predict_time / sample_nums time_avg = predict_time / sample_nums
print("[Benchmark]Avg cpu_mem:{} MB, avg gpu_mem: {} MB".format(
cpu_mems / sample_nums, gpu_mems / sample_nums))
print("[Benchmark]Inference time(ms): min={}, max={}, avg={}".format( print("[Benchmark]Inference time(ms): min={}, max={}, avg={}".format(
round(time_min * 1000, 2), round(time_min * 1000, 2),
round(time_max * 1000, 1), round(time_avg * 1000, 1))) round(time_max * 1000, 1), round(time_avg * 1000, 1)))
...@@ -473,9 +424,10 @@ def main(): ...@@ -473,9 +424,10 @@ def main():
dataset = reader_cfg["EvalDataset"] dataset = reader_cfg["EvalDataset"]
global val_loader global val_loader
val_loader = create("EvalReader")(reader_cfg["EvalDataset"], val_loader = create("EvalReader")(
reader_cfg["worker_num"], reader_cfg["EvalDataset"],
return_list=True) reader_cfg["worker_num"],
return_list=True)
clsid2catid = {v: k for k, v in dataset.catid2clsid.items()} clsid2catid = {v: k for k, v in dataset.catid2clsid.items()}
anno_file = dataset.get_anno() anno_file = dataset.get_anno()
metric = COCOMetric( metric = COCOMetric(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册