
@@ -58,7 +58,7 @@
### 目录介绍
```shell
-PTDN/
+test_tipc/
├── configs/ # 配置文件目录
├── det_mv3_db.yml # 测试mobile版ppocr检测模型训练的yml文件
├── det_r50_vd_db.yml # 测试server版ppocr检测模型训练的yml文件
@@ -69,7 +69,7 @@ PTDN/
├── ppocr_sys_server_params.txt # 测试server版ppocr检测+识别模型串联的参数配置文件
├── ppocr_det_server_params.txt # 测试server版ppocr检测模型的参数配置文件
├── ppocr_rec_server_params.txt # 测试server版ppocr识别模型的参数配置文件
- ├── ...
+ ├── ...
├── results/ # 预先保存的预测结果,用于和实际预测结果进行精读比对
├── python_ppocr_det_mobile_results_fp32.txt # 预存的mobile版ppocr检测模型python预测fp32精度的结果
├── python_ppocr_det_mobile_results_fp16.txt # 预存的mobile版ppocr检测模型python预测fp16精度的结果
diff --git a/PTDN/results/cpp_ppocr_det_mobile_results_fp16.txt b/test_tipc/results/cpp_ppocr_det_mobile_results_fp16.txt
similarity index 100%
rename from PTDN/results/cpp_ppocr_det_mobile_results_fp16.txt
rename to test_tipc/results/cpp_ppocr_det_mobile_results_fp16.txt
diff --git a/PTDN/results/cpp_ppocr_det_mobile_results_fp32.txt b/test_tipc/results/cpp_ppocr_det_mobile_results_fp32.txt
similarity index 100%
rename from PTDN/results/cpp_ppocr_det_mobile_results_fp32.txt
rename to test_tipc/results/cpp_ppocr_det_mobile_results_fp32.txt
diff --git a/PTDN/results/python_ppocr_det_mobile_results_fp16.txt b/test_tipc/results/python_ppocr_det_mobile_results_fp16.txt
similarity index 100%
rename from PTDN/results/python_ppocr_det_mobile_results_fp16.txt
rename to test_tipc/results/python_ppocr_det_mobile_results_fp16.txt
diff --git a/PTDN/results/python_ppocr_det_mobile_results_fp32.txt b/test_tipc/results/python_ppocr_det_mobile_results_fp32.txt
similarity index 100%
rename from PTDN/results/python_ppocr_det_mobile_results_fp32.txt
rename to test_tipc/results/python_ppocr_det_mobile_results_fp32.txt
diff --git a/PTDN/test_inference_cpp.sh b/test_tipc/test_inference_cpp.sh
similarity index 99%
rename from PTDN/test_inference_cpp.sh
rename to test_tipc/test_inference_cpp.sh
index 124bdacb7dad04bdea07a62ba9c86b248be5a06d..3f8b54b189349aa9c011a56f6f12752b771ce43e 100644
--- a/PTDN/test_inference_cpp.sh
+++ b/test_tipc/test_inference_cpp.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-source tests/common_func.sh
+source test_tipc/common_func.sh
FILENAME=$1
dataline=$(awk 'NR==52, NR==66{print}' $FILENAME)
@@ -35,7 +35,7 @@ cpp_benchmark_key=$(func_parser_key "${lines[14]}")
cpp_benchmark_value=$(func_parser_value "${lines[14]}")
-LOG_PATH="./tests/output"
+LOG_PATH="./test_tipc/output"
mkdir -p ${LOG_PATH}
status_log="${LOG_PATH}/results_cpp.log"
diff --git a/test_tipc/test_lite.sh b/test_tipc/test_lite.sh
new file mode 100644
index 0000000000000000000000000000000000000000..832003ba302fe86995e20029cdb019e72d9ce162
--- /dev/null
+++ b/test_tipc/test_lite.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+source ./common_func.sh
+export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH
+
+FILENAME=$1
+dataline=$(awk 'NR==101, NR==110{print}' $FILENAME)
+echo $dataline
+# parser params
+IFS=$'\n'
+lines=(${dataline})
+
+# parser lite inference
+lite_inference_cmd=$(func_parser_value "${lines[1]}")
+lite_model_dir_list=$(func_parser_value "${lines[2]}")
+lite_cpu_threads_list=$(func_parser_value "${lines[3]}")
+lite_batch_size_list=$(func_parser_value "${lines[4]}")
+lite_power_mode_list=$(func_parser_value "${lines[5]}")
+lite_infer_img_dir_list=$(func_parser_value "${lines[6]}")
+lite_config_dir=$(func_parser_value "${lines[7]}")
+lite_rec_dict_dir=$(func_parser_value "${lines[8]}")
+lite_benchmark_value=$(func_parser_value "${lines[9]}")
+
+LOG_PATH="./output"
+mkdir -p ${LOG_PATH}
+status_log="${LOG_PATH}/results.log"
+
+
+function func_lite(){
+ IFS='|'
+ _script=$1
+ _lite_model=$2
+ _log_path=$3
+ _img_dir=$4
+ _config=$5
+ if [[ $lite_model =~ "slim" ]]; then
+ precision="INT8"
+ else
+ precision="FP32"
+ fi
+ is_single_img=$(echo $_img_dir | grep -E ".jpg|.jpeg|.png|.JPEG|.JPG")
+ if [[ "$is_single_img" != "" ]]; then
+ single_img="True"
+ else
+ single_img="False"
+ fi
+
+ # lite inference
+ for num_threads in ${lite_cpu_threads_list[*]}; do
+ for power_mode in ${lite_power_mode_list[*]}; do
+ for batchsize in ${lite_batch_size_list[*]}; do
+ model_name=$(echo $lite_model | awk -F "/" '{print $NF}')
+ _save_log_path="${_log_path}/lite_${model_name}_precision_${precision}_batchsize_${batchsize}_threads_${num_threads}_powermode_${power_mode}_singleimg_${single_img}.log"
+ command="${_script} ${lite_model} ${precision} ${num_threads} ${batchsize} ${power_mode} ${_img_dir} ${_config} ${lite_benchmark_value} > ${_save_log_path} 2>&1"
+ eval ${command}
+ status_check $? "${command}" "${status_log}"
+ done
+ done
+ done
+}
+
+
+echo "################### run test ###################"
+IFS="|"
+for lite_model in ${lite_model_dir_list[*]}; do
+ #run lite inference
+ for img_dir in ${lite_infer_img_dir_list[*]}; do
+ func_lite "${lite_inference_cmd}" "${lite_model}" "${LOG_PATH}" "${img_dir}" "${lite_config_dir}"
+ done
+done
diff --git a/test_tipc/test_paddle2onnx.sh b/test_tipc/test_paddle2onnx.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5dc6e65ec81e6b8674877fc686c8b3650ce93a59
--- /dev/null
+++ b/test_tipc/test_paddle2onnx.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+source test_tipc/common_func.sh
+
+FILENAME=$1
+
+dataline=$(cat ${FILENAME})
+lines=(${dataline})
+# common params
+model_name=$(func_parser_value "${lines[1]}")
+python=$(func_parser_value "${lines[2]}")
+
+
+# parser params
+dataline=$(awk 'NR==111, NR==123{print}' $FILENAME)
+IFS=$'\n'
+lines=(${dataline})
+
+# parser paddle2onnx
+padlle2onnx_cmd=$(func_parser_value "${lines[1]}")
+infer_model_dir_key=$(func_parser_key "${lines[2]}")
+infer_model_dir_value=$(func_parser_value "${lines[2]}")
+model_filename_key=$(func_parser_key "${lines[3]}")
+model_filename_value=$(func_parser_value "${lines[3]}")
+params_filename_key=$(func_parser_key "${lines[4]}")
+params_filename_value=$(func_parser_value "${lines[4]}")
+save_file_key=$(func_parser_key "${lines[5]}")
+save_file_value=$(func_parser_value "${lines[5]}")
+opset_version_key=$(func_parser_key "${lines[6]}")
+opset_version_value=$(func_parser_value "${lines[6]}")
+enable_onnx_checker_key=$(func_parser_key "${lines[7]}")
+enable_onnx_checker_value=$(func_parser_value "${lines[7]}")
+# parser onnx inference
+inference_py=$(func_parser_value "${lines[8]}")
+use_gpu_key=$(func_parser_key "${lines[9]}")
+use_gpu_value=$(func_parser_value "${lines[9]}")
+det_model_key=$(func_parser_key "${lines[10]}")
+image_dir_key=$(func_parser_key "${lines[11]}")
+image_dir_value=$(func_parser_value "${lines[11]}")
+
+
+LOG_PATH="./test_tipc/output"
+mkdir -p ./test_tipc/output
+status_log="${LOG_PATH}/results_paddle2onnx.log"
+
+
+function func_paddle2onnx(){
+ IFS='|'
+ _script=$1
+
+ # paddle2onnx
+ _save_log_path="${LOG_PATH}/paddle2onnx_infer_cpu.log"
+ set_dirname=$(func_set_params "${infer_model_dir_key}" "${infer_model_dir_value}")
+ set_model_filename=$(func_set_params "${model_filename_key}" "${model_filename_value}")
+ set_params_filename=$(func_set_params "${params_filename_key}" "${params_filename_value}")
+ set_save_model=$(func_set_params "${save_file_key}" "${save_file_value}")
+ set_opset_version=$(func_set_params "${opset_version_key}" "${opset_version_value}")
+ set_enable_onnx_checker=$(func_set_params "${enable_onnx_checker_key}" "${enable_onnx_checker_value}")
+ trans_model_cmd="${padlle2onnx_cmd} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_save_model} ${set_opset_version} ${set_enable_onnx_checker}"
+ eval $trans_model_cmd
+ last_status=${PIPESTATUS[0]}
+ status_check $last_status "${trans_model_cmd}" "${status_log}"
+ # python inference
+ set_gpu=$(func_set_params "${use_gpu_key}" "${use_gpu_value}")
+ set_model_dir=$(func_set_params "${det_model_key}" "${save_file_value}")
+ set_img_dir=$(func_set_params "${image_dir_key}" "${image_dir_value}")
+ infer_model_cmd="${python} ${inference_py} ${set_gpu} ${set_img_dir} ${set_model_dir} --use_onnx=True > ${_save_log_path} 2>&1 "
+ eval $infer_model_cmd
+ status_check $last_status "${infer_model_cmd}" "${status_log}"
+}
+
+
+echo "################### run test ###################"
+
+export Count=0
+IFS="|"
+func_paddle2onnx
\ No newline at end of file
diff --git a/PTDN/test_serving.sh b/test_tipc/test_serving.sh
similarity index 98%
rename from PTDN/test_serving.sh
rename to test_tipc/test_serving.sh
index ec79a46c9bf4b51c16b1c0ddfff41b772b13b0ae..be7b594c3848c423937c59336ce3bf686f8f228d 100644
--- a/PTDN/test_serving.sh
+++ b/test_tipc/test_serving.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-source tests/common_func.sh
+source test_tipc/common_func.sh
FILENAME=$1
dataline=$(awk 'NR==67, NR==83{print}' $FILENAME)
@@ -36,8 +36,8 @@ web_precision_key=$(func_parser_key "${lines[15]}")
web_precision_list=$(func_parser_value "${lines[15]}")
pipeline_py=$(func_parser_value "${lines[16]}")
-LOG_PATH="../../tests/output"
-mkdir -p ./tests/output
+LOG_PATH="../../test_tipc/output"
+mkdir -p ./test_tipc/output
status_log="${LOG_PATH}/results_serving.log"
function func_serving(){
diff --git a/PTDN/test_train_inference_python.sh b/test_tipc/test_train_inference_python.sh
similarity index 91%
rename from PTDN/test_train_inference_python.sh
rename to test_tipc/test_train_inference_python.sh
index 28cc037801bb4c1f1bcc10a74855b8c146197f4d..eaeaf9684b1fed6738149d61d3697232e105a72f 100644
--- a/PTDN/test_train_inference_python.sh
+++ b/test_tipc/test_train_inference_python.sh
@@ -1,8 +1,8 @@
#!/bin/bash
-source tests/common_func.sh
+source test_tipc/common_func.sh
FILENAME=$1
-# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer', 'klquant_infer']
+# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', 'whole_infer', 'klquant_whole_infer']
MODE=$2
dataline=$(awk 'NR==1, NR==51{print}' $FILENAME)
@@ -59,6 +59,7 @@ export_key1=$(func_parser_key "${lines[33]}")
export_value1=$(func_parser_value "${lines[33]}")
export_key2=$(func_parser_key "${lines[34]}")
export_value2=$(func_parser_value "${lines[34]}")
+inference_dir=$(func_parser_value "${lines[35]}")
# parser inference model
infer_model_dir_list=$(func_parser_value "${lines[36]}")
@@ -88,7 +89,7 @@ infer_key1=$(func_parser_key "${lines[50]}")
infer_value1=$(func_parser_value "${lines[50]}")
# parser klquant_infer
-if [ ${MODE} = "klquant_infer" ]; then
+if [ ${MODE} = "klquant_whole_infer" ]; then
dataline=$(awk 'NR==82, NR==98{print}' $FILENAME)
lines=(${dataline})
# parser inference model
@@ -119,7 +120,7 @@ if [ ${MODE} = "klquant_infer" ]; then
infer_value1=$(func_parser_value "${lines[15]}")
fi
-LOG_PATH="./tests/output"
+LOG_PATH="./test_tipc/output"
mkdir -p ${LOG_PATH}
status_log="${LOG_PATH}/results_python.log"
@@ -202,7 +203,7 @@ function func_inference(){
done
}
-if [ ${MODE} = "infer" ] || [ ${MODE} = "klquant_infer" ]; then
+if [ ${MODE} = "whole_infer" ] || [ ${MODE} = "klquant_whole_infer" ]; then
GPUID=$3
if [ ${#GPUID} -le 0 ];then
env=" "
@@ -245,6 +246,7 @@ else
for gpu in ${gpu_list[*]}; do
use_gpu=${USE_GPU_KEY[Count]}
Count=$(($Count + 1))
+ ips=""
if [ ${gpu} = "-1" ];then
env=""
elif [ ${#gpu} -le 1 ];then
@@ -264,6 +266,11 @@ else
env=" "
fi
for autocast in ${autocast_list[*]}; do
+ if [ ${autocast} = "amp" ]; then
+ set_amp_config="Global.use_amp=True Global.scale_loss=1024.0 Global.use_dynamic_loss_scaling=True"
+ else
+ set_amp_config=" "
+ fi
for trainer in ${trainer_list[*]}; do
flag_quant=False
if [ ${trainer} = ${pact_key} ]; then
@@ -290,7 +297,6 @@ else
if [ ${run_train} = "null" ]; then
continue
fi
-
set_autocast=$(func_set_params "${autocast_key}" "${autocast}")
set_epoch=$(func_set_params "${epoch_key}" "${epoch_num}")
set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}")
@@ -306,11 +312,11 @@ else
set_save_model=$(func_set_params "${save_model_key}" "${save_log}")
if [ ${#gpu} -le 2 ];then # train with cpu or single gpu
- cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} "
- elif [ ${#gpu} -le 15 ];then # train with multi-gpu
- cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1}"
+ cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config} "
+ elif [ ${#ips} -le 26 ];then # train with multi-gpu
+ cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
else # train with multi-machine
- cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1}"
+ cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
fi
# run train
eval "unset CUDA_VISIBLE_DEVICES"
@@ -342,7 +348,13 @@ else
#run inference
eval $env
save_infer_path="${save_log}"
- func_inference "${python}" "${inference_py}" "${save_infer_path}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}"
+ if [ ${inference_dir} != "null" ] && [ ${inference_dir} != '##' ]; then
+ infer_model_dir="${save_infer_path}/${inference_dir}"
+ else
+ infer_model_dir=${save_infer_path}
+ fi
+ func_inference "${python}" "${inference_py}" "${infer_model_dir}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}"
+
eval "unset CUDA_VISIBLE_DEVICES"
fi
done # done with: for trainer in ${trainer_list[*]}; do
diff --git a/tools/infer/predict_cls.py b/tools/infer/predict_cls.py
index 1c68494861e60b4aaef541a4e247071944cf420c..a25cac2600e67667badc76c648c1fcda12981a0f 100755
--- a/tools/infer/predict_cls.py
+++ b/tools/infer/predict_cls.py
@@ -47,6 +47,7 @@ class TextClassifier(object):
self.postprocess_op = build_post_process(postprocess_params)
self.predictor, self.input_tensor, self.output_tensors, _ = \
utility.create_predictor(args, 'cls', logger)
+ self.use_onnx = args.use_onnx
def resize_norm_img(self, img):
imgC, imgH, imgW = self.cls_image_shape
@@ -100,10 +101,16 @@ class TextClassifier(object):
norm_img_batch = np.concatenate(norm_img_batch)
norm_img_batch = norm_img_batch.copy()
- self.input_tensor.copy_from_cpu(norm_img_batch)
- self.predictor.run()
- prob_out = self.output_tensors[0].copy_to_cpu()
- self.predictor.try_shrink_memory()
+ if self.use_onnx:
+ input_dict = {}
+ input_dict[self.input_tensor.name] = norm_img_batch
+ outputs = self.predictor.run(self.output_tensors, input_dict)
+ prob_out = outputs[0]
+ else:
+ self.input_tensor.copy_from_cpu(norm_img_batch)
+ self.predictor.run()
+ prob_out = self.output_tensors[0].copy_to_cpu()
+ self.predictor.try_shrink_memory()
cls_result = self.postprocess_op(prob_out)
elapse += time.time() - starttime
for rno in range(len(cls_result)):
diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py
index b24ad2bbb504caf1f262b4e47625348ce32d6fce..5dfe8d648f06f6382e8e101a6002f7f1b7441323 100755
--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -38,6 +38,7 @@ class TextDetector(object):
def __init__(self, args):
self.args = args
self.det_algorithm = args.det_algorithm
+ self.use_onnx = args.use_onnx
pre_process_list = [{
'DetResizeForTest': {
'limit_side_len': args.det_limit_side_len,
@@ -100,7 +101,12 @@ class TextDetector(object):
else:
logger.info("unknown det_algorithm:{}".format(self.det_algorithm))
sys.exit(0)
-
+ if self.use_onnx:
+ pre_process_list[0] = {
+ 'DetResizeForTest': {
+ 'image_shape': [640, 640]
+ }
+ }
self.preprocess_op = create_operators(pre_process_list)
self.postprocess_op = build_post_process(postprocess_params)
self.predictor, self.input_tensor, self.output_tensors, self.config = utility.create_predictor(
@@ -198,15 +204,19 @@ class TextDetector(object):
if self.args.benchmark:
self.autolog.times.stamp()
-
- self.input_tensor.copy_from_cpu(img)
- self.predictor.run()
- outputs = []
- for output_tensor in self.output_tensors:
- output = output_tensor.copy_to_cpu()
- outputs.append(output)
- if self.args.benchmark:
- self.autolog.times.stamp()
+ if self.use_onnx:
+ input_dict = {}
+ input_dict[self.input_tensor.name] = img
+ outputs = self.predictor.run(self.output_tensors, input_dict)
+ else:
+ self.input_tensor.copy_from_cpu(img)
+ self.predictor.run()
+ outputs = []
+ for output_tensor in self.output_tensors:
+ output = output_tensor.copy_to_cpu()
+ outputs.append(output)
+ if self.args.benchmark:
+ self.autolog.times.stamp()
preds = {}
if self.det_algorithm == "EAST":
diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py
index 936994a215d10d543537b29cb41bfa42b42590c7..41982e3403b11dd4a1893f89af11a9201e0e15d7 100755
--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -73,6 +73,7 @@ class TextRecognizer(object):
self.predictor, self.input_tensor, self.output_tensors, self.config = \
utility.create_predictor(args, 'rec', logger)
self.benchmark = args.benchmark
+ self.use_onnx = args.use_onnx
if args.benchmark:
import auto_log
pid = os.getpid()
@@ -106,8 +107,9 @@ class TextRecognizer(object):
return norm_img.astype(np.float32) / 128. - 1.
assert imgC == img.shape[2]
- max_wh_ratio = max(max_wh_ratio, imgW / imgH)
imgW = int((32 * max_wh_ratio))
+ if self.use_onnx:
+ imgW = 100
h, w = img.shape[:2]
ratio = w / float(h)
if math.ceil(imgH * ratio) > imgW:
@@ -297,51 +299,72 @@ class TextRecognizer(object):
gsrm_slf_attn_bias1_list,
gsrm_slf_attn_bias2_list,
]
- input_names = self.predictor.get_input_names()
- for i in range(len(input_names)):
- input_tensor = self.predictor.get_input_handle(input_names[
- i])
- input_tensor.copy_from_cpu(inputs[i])
- self.predictor.run()
- outputs = []
- for output_tensor in self.output_tensors:
- output = output_tensor.copy_to_cpu()
- outputs.append(output)
- if self.benchmark:
- self.autolog.times.stamp()
- preds = {"predict": outputs[2]}
+ if self.use_onnx:
+ input_dict = {}
+ input_dict[self.input_tensor.name] = norm_img_batch
+ outputs = self.predictor.run(self.output_tensors,
+ input_dict)
+ preds = {"predict": outputs[2]}
+ else:
+ input_names = self.predictor.get_input_names()
+ for i in range(len(input_names)):
+ input_tensor = self.predictor.get_input_handle(
+ input_names[i])
+ input_tensor.copy_from_cpu(inputs[i])
+ self.predictor.run()
+ outputs = []
+ for output_tensor in self.output_tensors:
+ output = output_tensor.copy_to_cpu()
+ outputs.append(output)
+ if self.benchmark:
+ self.autolog.times.stamp()
+ preds = {"predict": outputs[2]}
elif self.rec_algorithm == "SAR":
valid_ratios = np.concatenate(valid_ratios)
inputs = [
norm_img_batch,
valid_ratios,
]
- input_names = self.predictor.get_input_names()
- for i in range(len(input_names)):
- input_tensor = self.predictor.get_input_handle(input_names[
- i])
- input_tensor.copy_from_cpu(inputs[i])
- self.predictor.run()
- outputs = []
- for output_tensor in self.output_tensors:
- output = output_tensor.copy_to_cpu()
- outputs.append(output)
- if self.benchmark:
- self.autolog.times.stamp()
- preds = outputs[0]
- else:
- self.input_tensor.copy_from_cpu(norm_img_batch)
- self.predictor.run()
- outputs = []
- for output_tensor in self.output_tensors:
- output = output_tensor.copy_to_cpu()
- outputs.append(output)
- if self.benchmark:
- self.autolog.times.stamp()
- if len(outputs) != 1:
- preds = outputs
+ if self.use_onnx:
+ input_dict = {}
+ input_dict[self.input_tensor.name] = norm_img_batch
+ outputs = self.predictor.run(self.output_tensors,
+ input_dict)
+ preds = outputs[0]
else:
+ input_names = self.predictor.get_input_names()
+ for i in range(len(input_names)):
+ input_tensor = self.predictor.get_input_handle(
+ input_names[i])
+ input_tensor.copy_from_cpu(inputs[i])
+ self.predictor.run()
+ outputs = []
+ for output_tensor in self.output_tensors:
+ output = output_tensor.copy_to_cpu()
+ outputs.append(output)
+ if self.benchmark:
+ self.autolog.times.stamp()
preds = outputs[0]
+ else:
+ if self.use_onnx:
+ input_dict = {}
+ input_dict[self.input_tensor.name] = norm_img_batch
+ outputs = self.predictor.run(self.output_tensors,
+ input_dict)
+ preds = outputs[0]
+ else:
+ self.input_tensor.copy_from_cpu(norm_img_batch)
+ self.predictor.run()
+ outputs = []
+ for output_tensor in self.output_tensors:
+ output = output_tensor.copy_to_cpu()
+ outputs.append(output)
+ if self.benchmark:
+ self.autolog.times.stamp()
+ if len(outputs) != 1:
+ preds = outputs
+ else:
+ preds = outputs[0]
rec_result = self.postprocess_op(preds)
for rno in range(len(rec_result)):
rec_res[indices[beg_img_no + rno]] = rec_result[rno]
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index 41a3c0f14b6378751a367a3709ad7943ee981a4e..a3cac647982b77f5ee54d8681b07e677987d9ccb 100755
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -121,6 +121,7 @@ def init_args():
parser.add_argument("--save_log_path", type=str, default="./log_output/")
parser.add_argument("--show_log", type=str2bool, default=True)
+ parser.add_argument("--use_onnx", type=str2bool, default=False)
return parser
@@ -144,152 +145,163 @@ def create_predictor(args, mode, logger):
if model_dir is None:
logger.info("not find {} model file path {}".format(mode, model_dir))
sys.exit(0)
- model_file_path = model_dir + "/inference.pdmodel"
- params_file_path = model_dir + "/inference.pdiparams"
- if not os.path.exists(model_file_path):
- raise ValueError("not find model file path {}".format(model_file_path))
- if not os.path.exists(params_file_path):
- raise ValueError("not find params file path {}".format(
- params_file_path))
-
- config = inference.Config(model_file_path, params_file_path)
-
- if hasattr(args, 'precision'):
- if args.precision == "fp16" and args.use_tensorrt:
- precision = inference.PrecisionType.Half
- elif args.precision == "int8":
- precision = inference.PrecisionType.Int8
- else:
- precision = inference.PrecisionType.Float32
+ if args.use_onnx:
+ import onnxruntime as ort
+ model_file_path = model_dir
+ if not os.path.exists(model_file_path):
+ raise ValueError("not find model file path {}".format(
+ model_file_path))
+ sess = ort.InferenceSession(model_file_path)
+ return sess, sess.get_inputs()[0], None, None
+
else:
- precision = inference.PrecisionType.Float32
-
- if args.use_gpu:
- gpu_id = get_infer_gpuid()
- if gpu_id is None:
- raise ValueError(
- "Not found GPU in current device. Please check your device or set args.use_gpu as False"
- )
- config.enable_use_gpu(args.gpu_mem, 0)
- if args.use_tensorrt:
- config.enable_tensorrt_engine(
- precision_mode=precision,
- max_batch_size=args.max_batch_size,
- min_subgraph_size=args.min_subgraph_size)
- # skip the minmum trt subgraph
- if mode == "det":
- min_input_shape = {
- "x": [1, 3, 50, 50],
- "conv2d_92.tmp_0": [1, 120, 20, 20],
- "conv2d_91.tmp_0": [1, 24, 10, 10],
- "conv2d_59.tmp_0": [1, 96, 20, 20],
- "nearest_interp_v2_1.tmp_0": [1, 256, 10, 10],
- "nearest_interp_v2_2.tmp_0": [1, 256, 20, 20],
- "conv2d_124.tmp_0": [1, 256, 20, 20],
- "nearest_interp_v2_3.tmp_0": [1, 64, 20, 20],
- "nearest_interp_v2_4.tmp_0": [1, 64, 20, 20],
- "nearest_interp_v2_5.tmp_0": [1, 64, 20, 20],
- "elementwise_add_7": [1, 56, 2, 2],
- "nearest_interp_v2_0.tmp_0": [1, 256, 2, 2]
- }
- max_input_shape = {
- "x": [1, 3, 2000, 2000],
- "conv2d_92.tmp_0": [1, 120, 400, 400],
- "conv2d_91.tmp_0": [1, 24, 200, 200],
- "conv2d_59.tmp_0": [1, 96, 400, 400],
- "nearest_interp_v2_1.tmp_0": [1, 256, 200, 200],
- "conv2d_124.tmp_0": [1, 256, 400, 400],
- "nearest_interp_v2_2.tmp_0": [1, 256, 400, 400],
- "nearest_interp_v2_3.tmp_0": [1, 64, 400, 400],
- "nearest_interp_v2_4.tmp_0": [1, 64, 400, 400],
- "nearest_interp_v2_5.tmp_0": [1, 64, 400, 400],
- "elementwise_add_7": [1, 56, 400, 400],
- "nearest_interp_v2_0.tmp_0": [1, 256, 400, 400]
- }
- opt_input_shape = {
- "x": [1, 3, 640, 640],
- "conv2d_92.tmp_0": [1, 120, 160, 160],
- "conv2d_91.tmp_0": [1, 24, 80, 80],
- "conv2d_59.tmp_0": [1, 96, 160, 160],
- "nearest_interp_v2_1.tmp_0": [1, 256, 80, 80],
- "nearest_interp_v2_2.tmp_0": [1, 256, 160, 160],
- "conv2d_124.tmp_0": [1, 256, 160, 160],
- "nearest_interp_v2_3.tmp_0": [1, 64, 160, 160],
- "nearest_interp_v2_4.tmp_0": [1, 64, 160, 160],
- "nearest_interp_v2_5.tmp_0": [1, 64, 160, 160],
- "elementwise_add_7": [1, 56, 40, 40],
- "nearest_interp_v2_0.tmp_0": [1, 256, 40, 40]
- }
- min_pact_shape = {
- "nearest_interp_v2_26.tmp_0": [1, 256, 20, 20],
- "nearest_interp_v2_27.tmp_0": [1, 64, 20, 20],
- "nearest_interp_v2_28.tmp_0": [1, 64, 20, 20],
- "nearest_interp_v2_29.tmp_0": [1, 64, 20, 20]
- }
- max_pact_shape = {
- "nearest_interp_v2_26.tmp_0": [1, 256, 400, 400],
- "nearest_interp_v2_27.tmp_0": [1, 64, 400, 400],
- "nearest_interp_v2_28.tmp_0": [1, 64, 400, 400],
- "nearest_interp_v2_29.tmp_0": [1, 64, 400, 400]
- }
- opt_pact_shape = {
- "nearest_interp_v2_26.tmp_0": [1, 256, 160, 160],
- "nearest_interp_v2_27.tmp_0": [1, 64, 160, 160],
- "nearest_interp_v2_28.tmp_0": [1, 64, 160, 160],
- "nearest_interp_v2_29.tmp_0": [1, 64, 160, 160]
- }
- min_input_shape.update(min_pact_shape)
- max_input_shape.update(max_pact_shape)
- opt_input_shape.update(opt_pact_shape)
- elif mode == "rec":
- min_input_shape = {"x": [1, 3, 32, 10]}
- max_input_shape = {"x": [args.rec_batch_num, 3, 32, 2000]}
- opt_input_shape = {"x": [args.rec_batch_num, 3, 32, 320]}
- elif mode == "cls":
- min_input_shape = {"x": [1, 3, 48, 10]}
- max_input_shape = {"x": [args.rec_batch_num, 3, 48, 2000]}
- opt_input_shape = {"x": [args.rec_batch_num, 3, 48, 320]}
+ model_file_path = model_dir + "/inference.pdmodel"
+ params_file_path = model_dir + "/inference.pdiparams"
+ if not os.path.exists(model_file_path):
+ raise ValueError("not find model file path {}".format(
+ model_file_path))
+ if not os.path.exists(params_file_path):
+ raise ValueError("not find params file path {}".format(
+ params_file_path))
+
+ config = inference.Config(model_file_path, params_file_path)
+
+ if hasattr(args, 'precision'):
+ if args.precision == "fp16" and args.use_tensorrt:
+ precision = inference.PrecisionType.Half
+ elif args.precision == "int8":
+ precision = inference.PrecisionType.Int8
+ else:
+ precision = inference.PrecisionType.Float32
else:
- min_input_shape = {"x": [1, 3, 10, 10]}
- max_input_shape = {"x": [1, 3, 1000, 1000]}
- opt_input_shape = {"x": [1, 3, 500, 500]}
- config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
- opt_input_shape)
+ precision = inference.PrecisionType.Float32
+
+ if args.use_gpu:
+ gpu_id = get_infer_gpuid()
+ if gpu_id is None:
+ raise ValueError(
+ "Not found GPU in current device. Please check your device or set args.use_gpu as False"
+ )
+ config.enable_use_gpu(args.gpu_mem, 0)
+ if args.use_tensorrt:
+ config.enable_tensorrt_engine(
+ precision_mode=precision,
+ max_batch_size=args.max_batch_size,
+ min_subgraph_size=args.min_subgraph_size)
+ # skip the minmum trt subgraph
+ if mode == "det":
+ min_input_shape = {
+ "x": [1, 3, 50, 50],
+ "conv2d_92.tmp_0": [1, 120, 20, 20],
+ "conv2d_91.tmp_0": [1, 24, 10, 10],
+ "conv2d_59.tmp_0": [1, 96, 20, 20],
+ "nearest_interp_v2_1.tmp_0": [1, 256, 10, 10],
+ "nearest_interp_v2_2.tmp_0": [1, 256, 20, 20],
+ "conv2d_124.tmp_0": [1, 256, 20, 20],
+ "nearest_interp_v2_3.tmp_0": [1, 64, 20, 20],
+ "nearest_interp_v2_4.tmp_0": [1, 64, 20, 20],
+ "nearest_interp_v2_5.tmp_0": [1, 64, 20, 20],
+ "elementwise_add_7": [1, 56, 2, 2],
+ "nearest_interp_v2_0.tmp_0": [1, 256, 2, 2]
+ }
+ max_input_shape = {
+ "x": [1, 3, 2000, 2000],
+ "conv2d_92.tmp_0": [1, 120, 400, 400],
+ "conv2d_91.tmp_0": [1, 24, 200, 200],
+ "conv2d_59.tmp_0": [1, 96, 400, 400],
+ "nearest_interp_v2_1.tmp_0": [1, 256, 200, 200],
+ "conv2d_124.tmp_0": [1, 256, 400, 400],
+ "nearest_interp_v2_2.tmp_0": [1, 256, 400, 400],
+ "nearest_interp_v2_3.tmp_0": [1, 64, 400, 400],
+ "nearest_interp_v2_4.tmp_0": [1, 64, 400, 400],
+ "nearest_interp_v2_5.tmp_0": [1, 64, 400, 400],
+ "elementwise_add_7": [1, 56, 400, 400],
+ "nearest_interp_v2_0.tmp_0": [1, 256, 400, 400]
+ }
+ opt_input_shape = {
+ "x": [1, 3, 640, 640],
+ "conv2d_92.tmp_0": [1, 120, 160, 160],
+ "conv2d_91.tmp_0": [1, 24, 80, 80],
+ "conv2d_59.tmp_0": [1, 96, 160, 160],
+ "nearest_interp_v2_1.tmp_0": [1, 256, 80, 80],
+ "nearest_interp_v2_2.tmp_0": [1, 256, 160, 160],
+ "conv2d_124.tmp_0": [1, 256, 160, 160],
+ "nearest_interp_v2_3.tmp_0": [1, 64, 160, 160],
+ "nearest_interp_v2_4.tmp_0": [1, 64, 160, 160],
+ "nearest_interp_v2_5.tmp_0": [1, 64, 160, 160],
+ "elementwise_add_7": [1, 56, 40, 40],
+ "nearest_interp_v2_0.tmp_0": [1, 256, 40, 40]
+ }
+ min_pact_shape = {
+ "nearest_interp_v2_26.tmp_0": [1, 256, 20, 20],
+ "nearest_interp_v2_27.tmp_0": [1, 64, 20, 20],
+ "nearest_interp_v2_28.tmp_0": [1, 64, 20, 20],
+ "nearest_interp_v2_29.tmp_0": [1, 64, 20, 20]
+ }
+ max_pact_shape = {
+ "nearest_interp_v2_26.tmp_0": [1, 256, 400, 400],
+ "nearest_interp_v2_27.tmp_0": [1, 64, 400, 400],
+ "nearest_interp_v2_28.tmp_0": [1, 64, 400, 400],
+ "nearest_interp_v2_29.tmp_0": [1, 64, 400, 400]
+ }
+ opt_pact_shape = {
+ "nearest_interp_v2_26.tmp_0": [1, 256, 160, 160],
+ "nearest_interp_v2_27.tmp_0": [1, 64, 160, 160],
+ "nearest_interp_v2_28.tmp_0": [1, 64, 160, 160],
+ "nearest_interp_v2_29.tmp_0": [1, 64, 160, 160]
+ }
+ min_input_shape.update(min_pact_shape)
+ max_input_shape.update(max_pact_shape)
+ opt_input_shape.update(opt_pact_shape)
+ elif mode == "rec":
+ min_input_shape = {"x": [1, 3, 32, 10]}
+ max_input_shape = {"x": [args.rec_batch_num, 3, 32, 2000]}
+ opt_input_shape = {"x": [args.rec_batch_num, 3, 32, 320]}
+ elif mode == "cls":
+ min_input_shape = {"x": [1, 3, 48, 10]}
+ max_input_shape = {"x": [args.rec_batch_num, 3, 48, 2000]}
+ opt_input_shape = {"x": [args.rec_batch_num, 3, 48, 320]}
+ else:
+ min_input_shape = {"x": [1, 3, 10, 10]}
+ max_input_shape = {"x": [1, 3, 1000, 1000]}
+ opt_input_shape = {"x": [1, 3, 500, 500]}
+ config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
+ opt_input_shape)
- else:
- config.disable_gpu()
- if hasattr(args, "cpu_threads"):
- config.set_cpu_math_library_num_threads(args.cpu_threads)
else:
- # default cpu threads as 10
- config.set_cpu_math_library_num_threads(10)
- if args.enable_mkldnn:
- # cache 10 different shapes for mkldnn to avoid memory leak
- config.set_mkldnn_cache_capacity(10)
- config.enable_mkldnn()
- if args.precision == "fp16":
- config.enable_mkldnn_bfloat16()
- # enable memory optim
- config.enable_memory_optim()
- config.disable_glog_info()
-
- config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
- if mode == 'table':
- config.delete_pass("fc_fuse_pass") # not supported for table
- config.switch_use_feed_fetch_ops(False)
- config.switch_ir_optim(True)
-
- # create predictor
- predictor = inference.create_predictor(config)
- input_names = predictor.get_input_names()
- for name in input_names:
- input_tensor = predictor.get_input_handle(name)
- output_names = predictor.get_output_names()
- output_tensors = []
- for output_name in output_names:
- output_tensor = predictor.get_output_handle(output_name)
- output_tensors.append(output_tensor)
- return predictor, input_tensor, output_tensors, config
+ config.disable_gpu()
+ if hasattr(args, "cpu_threads"):
+ config.set_cpu_math_library_num_threads(args.cpu_threads)
+ else:
+ # default cpu threads as 10
+ config.set_cpu_math_library_num_threads(10)
+ if args.enable_mkldnn:
+ # cache 10 different shapes for mkldnn to avoid memory leak
+ config.set_mkldnn_cache_capacity(10)
+ config.enable_mkldnn()
+ if args.precision == "fp16":
+ config.enable_mkldnn_bfloat16()
+ # enable memory optim
+ config.enable_memory_optim()
+ config.disable_glog_info()
+
+ config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
+ if mode == 'table':
+ config.delete_pass("fc_fuse_pass") # not supported for table
+ config.switch_use_feed_fetch_ops(False)
+ config.switch_ir_optim(True)
+
+ # create predictor
+ predictor = inference.create_predictor(config)
+ input_names = predictor.get_input_names()
+ for name in input_names:
+ input_tensor = predictor.get_input_handle(name)
+ output_names = predictor.get_output_names()
+ output_tensors = []
+ for output_name in output_names:
+ output_tensor = predictor.get_output_handle(output_name)
+ output_tensors.append(output_tensor)
+ return predictor, input_tensor, output_tensors, config
def get_infer_gpuid():
diff --git a/tools/program.py b/tools/program.py
index 798e6dff297ad1149942488cca1d5540f1924867..d110f70704028948dff2bc889e07d128e0bc94ea 100755
--- a/tools/program.py
+++ b/tools/program.py
@@ -159,7 +159,8 @@ def train(config,
eval_class,
pre_best_model_dict,
logger,
- vdl_writer=None):
+ vdl_writer=None,
+ scaler=None):
cal_metric_during_train = config['Global'].get('cal_metric_during_train',
False)
log_smooth_window = config['Global']['log_smooth_window']
@@ -211,33 +212,49 @@ def train(config,
for epoch in range(start_epoch, epoch_num + 1):
train_dataloader = build_dataloader(
config, 'Train', device, logger, seed=epoch)
- train_batch_cost = 0.0
train_reader_cost = 0.0
- batch_sum = 0
- batch_start = time.time()
+ train_run_cost = 0.0
+ total_samples = 0
+ reader_start = time.time()
max_iter = len(train_dataloader) - 1 if platform.system(
) == "Windows" else len(train_dataloader)
for idx, batch in enumerate(train_dataloader):
profiler.add_profiler_step(profiler_options)
- train_reader_cost += time.time() - batch_start
+ train_reader_cost += time.time() - reader_start
if idx >= max_iter:
break
lr = optimizer.get_lr()
images = batch[0]
if use_srn:
model_average = True
- if model_type == 'table' or extra_input:
- preds = model(images, data=batch[1:])
+
+ train_start = time.time()
+ # use amp
+ if scaler:
+ with paddle.amp.auto_cast():
+ if model_type == 'table' or extra_input:
+ preds = model(images, data=batch[1:])
+ else:
+ preds = model(images)
else:
- preds = model(images)
+ if model_type == 'table' or extra_input:
+ preds = model(images, data=batch[1:])
+ else:
+ preds = model(images)
loss = loss_class(preds, batch)
avg_loss = loss['loss']
- avg_loss.backward()
- optimizer.step()
+
+ if scaler:
+ scaled_avg_loss = scaler.scale(avg_loss)
+ scaled_avg_loss.backward()
+ scaler.minimize(optimizer, scaled_avg_loss)
+ else:
+ avg_loss.backward()
+ optimizer.step()
optimizer.clear_grad()
- train_batch_cost += time.time() - batch_start
- batch_sum += len(images)
+ train_run_cost += time.time() - train_start
+ total_samples += len(images)
if not isinstance(lr_scheduler, float):
lr_scheduler.step()
@@ -268,12 +285,13 @@ def train(config,
logs = train_stats.log()
strs = 'epoch: [{}/{}], iter: {}, {}, reader_cost: {:.5f} s, batch_cost: {:.5f} s, samples: {}, ips: {:.5f}'.format(
epoch, epoch_num, global_step, logs, train_reader_cost /
- print_batch_step, train_batch_cost / print_batch_step,
- batch_sum, batch_sum / train_batch_cost)
+ print_batch_step, (train_reader_cost + train_run_cost) /
+ print_batch_step, total_samples,
+ total_samples / (train_reader_cost + train_run_cost))
logger.info(strs)
- train_batch_cost = 0.0
train_reader_cost = 0.0
- batch_sum = 0
+ train_run_cost = 0.0
+ total_samples = 0
# eval
if global_step > start_eval_step and \
(global_step - start_eval_step) % eval_batch_step == 0 and dist.get_rank() == 0:
@@ -326,7 +344,7 @@ def train(config,
global_step)
global_step += 1
optimizer.clear_grad()
- batch_start = time.time()
+ reader_start = time.time()
if dist.get_rank() == 0:
save_model(
model,
@@ -367,7 +385,11 @@ def eval(model,
with paddle.no_grad():
total_frame = 0.0
total_time = 0.0
- pbar = tqdm(total=len(valid_dataloader), desc='eval model:')
+ pbar = tqdm(
+ total=len(valid_dataloader),
+ desc='eval model:',
+ position=0,
+ leave=True)
max_iter = len(valid_dataloader) - 1 if platform.system(
) == "Windows" else len(valid_dataloader)
for idx, batch in enumerate(valid_dataloader):
@@ -436,8 +458,6 @@ def get_center(model, eval_dataloader, post_process_class):
batch = [item.numpy() for item in batch]
# Obtain usable results from post-processing methods
- total_time += time.time() - start
- # Evaluate the results of the current batch
post_result = post_process_class(preds, batch[1])
#update char_center
@@ -480,11 +500,6 @@ def preprocess(is_train=False):
'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR', 'PSE',
'SEED'
]
- windows_not_support_list = ['PSE']
- if platform.system() == "Windows" and alg in windows_not_support_list:
- logger.warning('{} is not support in Windows now'.format(
- windows_not_support_list))
- sys.exit()
device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) if use_gpu else 'cpu'
device = paddle.set_device(device)
diff --git a/tools/train.py b/tools/train.py
index 05d295aa99718c25b94a123c23d08c2904fe8c6a..d182af2988cb29511be40a079d2b3e06605ebe28 100755
--- a/tools/train.py
+++ b/tools/train.py
@@ -102,10 +102,27 @@ def main(config, device, logger, vdl_writer):
if valid_dataloader is not None:
logger.info('valid dataloader has {} iters'.format(
len(valid_dataloader)))
+
+ use_amp = config["Global"].get("use_amp", False)
+ if use_amp:
+ AMP_RELATED_FLAGS_SETTING = {
+ 'FLAGS_cudnn_batchnorm_spatial_persistent': 1,
+ 'FLAGS_max_inplace_grad_add': 8,
+ }
+ paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING)
+ scale_loss = config["Global"].get("scale_loss", 1.0)
+ use_dynamic_loss_scaling = config["Global"].get(
+ "use_dynamic_loss_scaling", False)
+ scaler = paddle.amp.GradScaler(
+ init_loss_scaling=scale_loss,
+ use_dynamic_loss_scaling=use_dynamic_loss_scaling)
+ else:
+ scaler = None
+
# start train
program.train(config, train_dataloader, valid_dataloader, device, model,
loss_class, optimizer, lr_scheduler, post_process_class,
- eval_class, pre_best_model_dict, logger, vdl_writer)
+ eval_class, pre_best_model_dict, logger, vdl_writer, scaler)
def test_reader(config, device, logger):