diff --git a/test_tipc/common_func.sh b/test_tipc/common_func.sh new file mode 100644 index 0000000000000000000000000000000000000000..3f0fa66b77ff50b23b1e83dea506580f549f8ecf --- /dev/null +++ b/test_tipc/common_func.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +function func_parser_key(){ + strs=$1 + IFS=":" + array=(${strs}) + tmp=${array[0]} + echo ${tmp} +} + +function func_parser_value(){ + strs=$1 + IFS=":" + array=(${strs}) + tmp=${array[1]} + echo ${tmp} +} + +function func_set_params(){ + key=$1 + value=$2 + if [ ${key}x = "null"x ];then + echo " " + elif [[ ${value} = "null" ]] || [[ ${value} = " " ]] || [ ${#value} -le 0 ];then + echo " " + else + echo "${key}=${value}" + fi +} + +function func_parser_params(){ + strs=$1 + IFS=":" + array=(${strs}) + key=${array[0]} + tmp=${array[1]} + IFS="|" + res="" + for _params in ${tmp[*]}; do + IFS="=" + array=(${_params}) + mode=${array[0]} + value=${array[1]} + if [[ ${mode} = ${MODE} ]]; then + IFS="|" + #echo $(func_set_params "${mode}" "${value}") + echo $value + break + fi + IFS="|" + done + echo ${res} +} + +function status_check(){ + last_status=$1 # the exit code + run_command=$2 + run_log=$3 + if [ $last_status -eq 0 ]; then + echo -e "\033[33m Run successfully with command - ${run_command}! \033[0m" | tee -a ${run_log} + else + echo -e "\033[33m Run failed with command - ${run_command}! \033[0m" | tee -a ${run_log} + fi +} + diff --git a/test_tipc/configs/basicvsr/train_infer_python.txt b/test_tipc/configs/basicvsr/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..da70c25262afffe15b9831bdc9998b1b1c50279b --- /dev/null +++ b/test_tipc/configs/basicvsr/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:basicvsr +python:python3.7 +gpu_list:0 +## +auto_cast:null +total_iters:lite_train_lite_infer=5|whole_train_whole_infer=200 +output_dir:./output/ +dataset.train.batch_size:lite_train_lite_infer=1|whole_train_whole_infer=1 +pretrained_model:null +train_model_name:basicvsr_reds*/*checkpoint.pdparams +train_infer_img_dir:./data/basicvsr_reds/test +null:null +## +trainer:norm_train +norm_train:tools/main.py -c configs/basicvsr_reds.yaml -o dataset.train.dataset.num_clips=2 +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +--output_dir:./output/ +load:null +norm_export:tools/export_model.py -c configs/basicvsr_reds.yaml --inputs_size="1,6,3,180,320" --load +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +inference_dir:basicvsrmodel_generator +train_model:./inference/basicvsr/basicvsrmodel_generator +infer_export:null +infer_quant:False +inference:tools/inference.py --model_type basicvsr -c configs/basicvsr_reds.yaml -o dataset.test.num_clips=2 dataset.test.number_frames=6 +--device:gpu +null:null +null:null +null:null +null:null +null:null +--model_path: +null:null +null:null +--benchmark:True +null:null \ No newline at end of file diff --git a/test_tipc/configs/cyclegan/train_infer_python.txt b/test_tipc/configs/cyclegan/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..7eeb244c54584045a49de63c319d24814e63a64f --- /dev/null +++ b/test_tipc/configs/cyclegan/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:cyclegan +python:python3.7 +gpu_list:0|0,1 +## +auto_cast:null +epochs:lite_train_lite_infer=5|whole_train_whole_infer=200 +output_dir:./output/ +dataset.train.batch_size:lite_train_lite_infer=1|whole_train_whole_infer=1 +pretrained_model:null +train_model_name:cyclegan_horse2zebra*/*checkpoint.pdparams +train_infer_img_dir:./data/horse2zebra/test +null:null +## +trainer:norm_train +norm_train:tools/main.py -c configs/cyclegan_horse2zebra.yaml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +--output_dir:./output/ +load:null +norm_export:tools/export_model.py -c configs/cyclegan_horse2zebra.yaml --inputs_size="-1,3,-1,-1;-1,3,-1,-1" --load +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +inference_dir:cycleganmodel_netG_A +train_model:./inference/cyclegan_horse2zebra/cycleganmodel_netG_A +infer_export:null +infer_quant:False +inference:tools/inference.py --model_type cyclegan -c configs/cyclegan_horse2zebra.yaml +--device:gpu +null:null +null:null +null:null +null:null +null:null +--model_path: +null:null +null:null +--benchmark:True +null:null \ No newline at end of file diff --git a/test_tipc/configs/fom/train_infer_python.txt b/test_tipc/configs/fom/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..d2303556a722ebb31788749b6e3bc2d1fcedeebd --- /dev/null +++ b/test_tipc/configs/fom/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:fom +python:python3.7 +gpu_list:0 +## +auto_cast:null +epochs:lite_train_lite_infer=10|whole_train_whole_infer=100 +output_dir:./output/ +dataset.train.batch_size:lite_train_lite_infer=8|whole_train_whole_infer=8 +pretrained_model:null +train_model_name:firstorder_vox_256*/*checkpoint.pdparams +train_infer_img_dir:./data/firstorder_vox_256/test +null:null +## +trainer:norm_train +norm_train:tools/main.py -c configs/firstorder_vox_256.yaml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +--output_dir:./output/ +load:null +norm_export:tools/export_model.py -c configs/firstorder_vox_256.yaml --inputs_size="1,3,256,256;1,3,256,256;1,10,2;1,10,2,2" --load +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +inference_dir:fom_dy2st +train_model:./inference/fom_dy2st/ +infer_export:null +infer_quant:False +inference:tools/fom_infer.py --driving_path data/first_order/Voxceleb/test --output_path infer_output/fom +--device:gpu +null:null +null:null +null:null +null:null +null:null +--model_path: +null:null +null:null +--benchmark:True +null:null \ No newline at end of file diff --git a/test_tipc/configs/pix2pix/train_infer_python.txt b/test_tipc/configs/pix2pix/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..ed5cf514b08963d176597f740ec1d216989a5eda --- /dev/null +++ b/test_tipc/configs/pix2pix/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:pix2pix +python:python3.7 +gpu_list:0|0,1 +## +auto_cast:null +epochs:lite_train_lite_infer=5|whole_train_whole_infer=200 +output_dir:./output/ +dataset.train.batch_size:lite_train_lite_infer=1|whole_train_whole_infer=1 +pretrained_model:null +train_model_name:pix2pix_facades*/*checkpoint.pdparams +train_infer_img_dir:./data/facades/test +null:null +## +trainer:norm_train +norm_train:tools/main.py -c configs/pix2pix_facades.yaml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +--output_dir:./output/ +load:null +norm_export:tools/export_model.py -c configs/pix2pix_facades.yaml --inputs_size="-1,3,-1,-1" --load +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +inference_dir:pix2pixmodel_netG +train_model:./inference/pix2pix_facade/pix2pixmodel_netG +infer_export:null +infer_quant:False +inference:tools/inference.py --model_type pix2pix -c configs/pix2pix_facades.yaml +--device:cpu +null:null +null:null +null:null +null:null +null:null +--model_path: +null:null +null:null +--benchmark:True +null:null \ No newline at end of file diff --git a/test_tipc/configs/stylegan2/train_infer_python.txt b/test_tipc/configs/stylegan2/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..99cfccf7db2e148e42b708543e4179544a5a7058 --- /dev/null +++ b/test_tipc/configs/stylegan2/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:stylegan2 +python:python3.7 +gpu_list:0 +## +auto_cast:null +total_iters::lite_train_lite_infer=10|whole_train_whole_infer=800 +output_dir:./output/ +dataset.train.batch_size:lite_train_lite_infer=3|whole_train_whole_infer=3 +pretrained_model:null +train_model_name:stylegan_v2_256_ffhq*/*checkpoint.pdparams +train_infer_img_dir:null +null:null +## +trainer:norm_train +norm_train:tools/main.py -c configs/stylegan_v2_256_ffhq.yaml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +--output_dir:./output/ +load:null +norm_export:tools/export_model.py -c configs/stylegan_v2_256_ffhq.yaml --inputs_size="1,1,512;1,1" --load +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +inference_dir:stylegan2model_gen +train_model:./inference/stylegan2/stylegan2model_gen +infer_export:null +infer_quant:False +inference:tools/inference.py --model_type stylegan2 -c configs/stylegan_v2_256_ffhq.yaml +--device:gpu +null:null +null:null +null:null +null:null +null:null +--model_path: +null:null +null:null +--benchmark:True +null:null \ No newline at end of file diff --git a/test_tipc/docs/test_train_inference_python.md b/test_tipc/docs/test_train_inference_python.md new file mode 100644 index 0000000000000000000000000000000000000000..daca3c3476eb92a61f01b263ec959e8a7d3e626d --- /dev/null +++ b/test_tipc/docs/test_train_inference_python.md @@ -0,0 +1,127 @@ +# Linux端基础训练预测功能测试 + +Linux端基础训练预测功能测试的主程序为`test_train_inference_python.sh`,可以测试基于Python的模型训练、评估、推理等基本功能。 + + +## 1. 测试结论汇总 + +- 训练相关: + +| 算法论文 | 模型名称 | 模型类型 | 基础
训练预测 | 更多
训练方式 | 模型压缩 | 其他预测部署 | +| :--- | :--- | :----: | :--------: | :---- | :---- | :---- | +| Pix2Pix |Pix2Pix | 生成 | 支持 | 多机多卡 | | | +| CycleGAN |CycleGAN | 生成 | 支持 | 多机多卡 | | | +| StyleGAN2 |StyleGAN2 | 生成 | 支持 | 多机多卡 | | | +| FOMM |FOMM | 生成 | 支持 | 多机多卡 | | | +| BasicVSR |BasicVSR | 超分 | 支持 | 多机多卡 | | | +|PP-MSVSR|PP-MSVSR | 超分| + +- 预测相关:预测功能汇总如下, + +| 模型类型 |device | batchsize | tensorrt | mkldnn | cpu多线程 | +| ---- | ---- | ---- | :----: | :----: | :----: | +| 正常模型 | GPU | 1/6 | fp32 | - | - | + + + +## 2. 测试流程 + +运行环境配置请参考[文档](../../docs/zh_CN/install.md)的内容配置运行环境。 + +### 2.1 安装依赖 +- 安装PaddlePaddle >= 2.1 +- 安装PaddleGAN依赖 + ``` + pip install -v -e . + ``` +- 安装autolog(规范化日志输出工具) + ``` + git clone https://github.com/LDOUBLEV/AutoLog + cd AutoLog + pip3 install -r requirements.txt + python3 setup.py bdist_wheel + pip3 install ./dist/auto_log-1.0.0-py3-none-any.whl + cd ../ + ``` + + +### 2.2 功能测试 +先运行`prepare.sh`准备数据和模型,然后运行`test_train_inference_python.sh`进行测试,最终在```test_tipc/output```目录下生成`python_infer_*.log`格式的日志文件。 + + +`test_train_inference_python.sh`包含5种运行模式,每种模式的运行数据不同,分别用于测试速度和精度,分别是: + +- 模式1:lite_train_lite_infer,使用少量数据训练,用于快速验证训练到预测的走通流程,不验证精度和速度; +```shell +bash test_tipc/prepare.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'lite_train_lite_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'lite_train_lite_infer' +``` + +- 模式2:lite_train_whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理; +```shell +bash test_tipc/prepare.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'lite_train_whole_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'lite_train_whole_infer' +``` + +- 模式3:whole_infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度; +```shell +bash test_tipc/prepare.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'whole_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'whole_infer' +``` + +- 模式4:whole_train_whole_infer,CE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度; +```shell +bash test_tipc/prepare.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'whole_train_whole_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'whole_train_whole_infer' +``` + +运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如'lite_train_lite_infer'模式下,会运行训练+inference的链条,因此,在`test_tipc/output`文件夹有以下文件: +``` +test_tipc/output/ +|- results_python.log # 运行指令状态的日志 +|- norm_train_gpus_0_autocast_null/ # GPU 0号卡上正常训练的训练日志和模型保存文件夹 +...... +``` + +其中`results_python.log`中包含了每条指令的运行状态,如果运行成功会输出: +``` +Run successfully with command - python3.7 tools/main.py -c configs/basicvsr_reds.yaml -o dataset.train.dataset.num_clips=2 output_dir=./test_tipc/output/norm_train_gpus_0_autocast_null total_iters=5 dataset.train.batch_size=1 ! +-=Run successfully with command - python3.7 tools/export_model.py -c configs/basicvsr_reds.yaml --inputs_size="1,6,3,180,320" --load ./test_tipc/output/norm_train_gpus_0_autocast_null/basicvsr_reds-2021-11-22-07-18/iter_1_checkpoint.pdparams --output_dir ./test_tipc/output/norm_train_gpus_0_autocast_null! +...... +``` +如果运行失败,会输出: +``` +Run failed with command - python3.7 tools/main.py -c configs/basicvsr_reds.yaml -o dataset.train.dataset.num_clips=2 output_dir=./test_tipc/output/norm_train_gpus_0_autocast_null total_iters=5 dataset.train.batch_size=1 ! ! +Run failed with command - python3.7 tools/export_model.py -c configs/basicvsr_reds.yaml --inputs_size="1,6,3,180,320" --load ./test_tipc/output/norm_train_gpus_0_autocast_null/basicvsr_reds-2021-11-22-07-18/iter_1_checkpoint.pdparams --output_dir ./test_tipc/output/norm_train_gpus_0_autocast_null! +...... +``` +可以很方便的根据`results_python.log`中的内容判定哪一个指令运行错误。 + + +### 2.3 精度测试 + +使用compare_results.py脚本比较模型预测的结果是否符合预期,主要步骤包括: +- 提取日志中的预测坐标; +- 从本地文件中提取保存好的坐标结果; +- 比较上述两个结果是否符合精度预期,误差大于设置阈值时会报错。 + +#### 使用方式 +运行命令: +```shell +python3.7 test_tipc/compare_results.py --gt_file=./test_tipc/results/python_*.txt --log_file=./test_tipc/output/python_*.log --atol=1e-3 --rtol=1e-3 +``` + +参数介绍: +- gt_file: 指向事先保存好的预测结果路径,支持*.txt 结尾,会自动索引*.txt格式的文件,文件默认保存在test_tipc/result/ 文件夹下 +- log_file: 指向运行test_tipc/test_train_inference_python.sh 脚本的infer模式保存的预测日志,预测日志中打印的有预测结果, +- atol: 设置的绝对误差 +- rtol: 设置的相对误差 + +#### 运行结果 + +正常运行效果如下图: + + +出现不一致结果时的运行输出: + + diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..b967a47e61019b122948d2b670effee8a02dc05e --- /dev/null +++ b/test_tipc/prepare.sh @@ -0,0 +1,128 @@ +#!/bin/bash +FILENAME=$1 + +# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', +# 'whole_infer'] + +MODE=$2 + +dataline=$(cat ${FILENAME}) + +# parser params +IFS=$'\n' +lines=(${dataline}) +function func_parser_key(){ + strs=$1 + IFS=":" + array=(${strs}) + tmp=${array[0]} + echo ${tmp} +} +function func_parser_value(){ + strs=$1 + IFS=":" + array=(${strs}) + tmp=${array[1]} + echo ${tmp} +} +IFS=$'\n' +# The training params +model_name=$(func_parser_value "${lines[1]}") + +trainer_list=$(func_parser_value "${lines[14]}") + +# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', +# 'whole_infer'] +MODE=$2 + +if [ ${MODE} = "lite_train_lite_infer" ];then + if [ ${model_name} == "pix2pix" ]; then + rm -rf ./data/facades* + rm -rf ./data/pix2pix* + wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/pix2pix_facade_lite.tar --no-check-certificate + cd ./data/ && tar xf pix2pix_facade_lite.tar && cd ../ + elif [ ${model_name} == "cyclegan" ]; then + rm -rf ./data/horse2zebra* + wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/cyclegan_horse2zebra_lite.tar --no-check-certificate + cd ./data/ && tar xf cyclegan_horse2zebra_lite.tar && cd ../ + elif [ ${model_name} == "stylegan2" ]; then + rm -rf ./data/ffhq* + wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/ffhq.tar --no-check-certificate + cd ./data/ && tar xf ffhq.tar && cd ../ + elif [ ${model_name} == "fom" ]; then + rm -rf ./data/first_order* + wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/fom_lite.tar --no-check-certificate --no-check-certificate + cd ./data/ && tar xf fom_lite.tar && cd ../ + elif [ ${model_name} == "basicvsr" ]; then + rm -rf ./data/REDS* + wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/basicvsr_lite.tar --no-check-certificate + cd ./data/ && tar xf basicvsr_lite.tar && cd ../ + fi + +elif [ ${MODE} = "whole_train_whole_infer" ];then + if [ ${model_name} == "pix2pix" ]; then + rm -rf ./data/facades* + wget -nc -P ./data/ http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/facades.tar.gz --no-check-certificate + cd ./data/ && tar -xzf facades.tar.gz && cd ../ + elif [ ${model_name} == "cyclegan" ]; then + rm -rf ./data/horse2zebra* + wget -nc -P ./data/ https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/horse2zebra.zip --no-check-certificate + cd ./data/ && unzip horse2zebra.zip && cd ../ + fi +elif [ ${MODE} = "lite_train_whole_infer" ];then + if [ ${model_name} == "pix2pix" ]; then + rm -rf ./data/facades* + wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/pix2pix_facade_lite.tar --no-check-certificate + cd ./data/ && tar xf pix2pix_facade_lite.tar && cd ../ + elif [ ${model_name} == "cyclegan" ]; then + rm -rf ./data/horse2zebra* + wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/cyclegan_horse2zebra_lite.tar --no-check-certificate --no-check-certificate + cd ./data/ && tar xf cyclegan_horse2zebra_lite.tar && cd ../ + elif [ ${model_name} == "fom" ]; then + rm -rf ./data/first_order* + wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/fom_lite.tar --no-check-certificate --no-check-certificate + cd ./data/ && tar xf fom_lite.tar && cd ../ + elif [ ${model_name} == "stylegan2" ]; then + rm -rf ./data/ffhq* + wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/ffhq.tar --no-check-certificate + cd ./data/ && tar xf ffhq.tar && cd ../ + elif [ ${model_name} == "basicvsr" ]; then + rm -rf ./data/REDS* + wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/basicvsr_lite.tar --no-check-certificate + cd ./data/ && tar xf basicvsr_lite.tar && cd ../ + fi +elif [ ${MODE} = "whole_infer" ];then + if [ ${model_name} = "pix2pix" ]; then + rm -rf ./data/facades* + wget -nc -P ./inference https://paddlegan.bj.bcebos.com/static_model/pix2pix_facade.tar --no-check-certificate + wget -nc -P ./data https://paddlegan.bj.bcebos.com/datasets/facades_test.tar --no-check-certificate + cd ./data && tar xf facades_test.tar && mv facades_test facades && cd ../ + cd ./inference && tar xf pix2pix_facade.tar && cd ../ + elif [ ${model_name} = "cyclegan" ]; then + rm -rf ./data/horse2zebra* + wget -nc -P ./inference https://paddlegan.bj.bcebos.com/static_model/cyclegan_horse2zebra.tar --no-check-certificate + wget -nc -P ./data https://paddlegan.bj.bcebos.com/datasets/cyclegan_horse2zebra_test.tar --no-check-certificate + cd ./data && tar xf cyclegan_horse2zebra_test.tar && mv cyclegan_test horse2zebra && cd ../ + cd ./inference && tar xf cyclegan_horse2zebra.tar && cd ../ + elif [ ${model_name} == "fom" ]; then + rm -rf ./data/first_order* + wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/fom_lite_test.tar --no-check-certificate + wget -nc -P ./inference https://paddlegan.bj.bcebos.com/static_model/fom_dy2st.tar --no-check-certificate + cd ./data/ && tar xf fom_lite_test.tar && cd ../ + cd ./inference && tar xf fom_dy2st.tar && cd ../ + elif [ ${model_name} == "stylegan2" ]; then + rm -rf ./data/ffhq* + wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/ffhq.tar --no-check-certificate + wget -nc -P ./inference https://paddlegan.bj.bcebos.com/static_model/stylegan2_1024.tar --no-check-certificate + cd ./inference && tar xf stylegan2_1024.tar && cd ../ + cd ./data/ && tar xf ffhq.tar && cd ../ + elif [ ${model_name} == "basicvsr" ]; then + rm -rf ./data/basic* + rm -rf ./inference/basic* + wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/basicvsr_lite_test.tar --no-check-certificate + wget -nc -P ./inference https://paddlegan.bj.bcebos.com/static_model/basicvsr.tar --no-check-certificate + cd ./inference && tar xf basicvsr.tar && cd ../ + cd ./data/ && tar xf basicvsr_lite_test.tar && cd ../ + fi + +fi diff --git a/test_tipc/readme.md b/test_tipc/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..d4df724fc66d144f450a4d3fe77e22f82c0b1851 --- /dev/null +++ b/test_tipc/readme.md @@ -0,0 +1,74 @@ + +# 飞桨训推一体认证 + +## 1. 简介 + +飞桨除了基本的模型训练和预测,还提供了支持多端多平台的高性能推理部署工具。本文档提供了PaddleGAN中所有模型的飞桨训推一体认证 (Training and Inference Pipeline Certification(TIPC)) 信息和测试工具,方便用户查阅每种模型的训练推理部署打通情况,并可以进行一键测试。 + +## 2. 汇总信息 + +打通情况汇总如下,已填写的部分表示可以使用本工具进行一键测试,未填写的表示正在支持中。 + +**字段说明:** +- 基础训练预测:包括模型训练、Paddle Inference Python预测。 +- 更多训练方式:包括多机多卡、混合精度。 +- 模型压缩:包括裁剪、离线/在线量化、蒸馏。 +- 其他预测部署:包括Paddle Inference C++预测、Paddle Serving部署、Paddle-Lite部署等。 + +更详细的mkldnn、Tensorrt等预测加速相关功能的支持情况可以查看各测试工具的[更多教程](#more)。 + +| 算法论文 | 模型名称 | 模型类型 | 基础
训练预测 | 更多
训练方式 | 模型压缩 | 其他预测部署 | +| :--- | :--- | :----: | :--------: | :---- | :---- | :---- | +| Pix2Pix |Pix2Pix | 生成 | 支持 | 多机多卡 | | | +| CycleGAN |CycleGAN | 生成 | 支持 | 多机多卡 | | | +| StyleGAN2 |StyleGAN2 | 生成 | 支持 | 多机多卡 | | | +| FOMM |FOMM | 生成 | 支持 | 多机多卡 | | | +| BasicVSR |BasicVSR | 超分 | 支持 | 多机多卡 | | | +|PP-MSVSR|PP-MSVSR | 超分| + + + + +## 3. 一键测试工具使用 +### 目录介绍 + +```shell +test_tipc/ +├── configs/ # 配置文件目录 + ├── basicvsr_reds.yaml # 测试basicvsr模型训练的yaml文件 + ├── cyclegan_horse2zebra.yaml # 测试cyclegan模型训练的yaml文件 + ├── firstorder_vox_256.yaml # 测试fomm模型训练的yaml文件 + ├── pix2pix_facedes.yaml # 测试pix2pix模型训练的yaml文件 + ├── stylegan_v2_256_ffhq.yaml # 测试stylegan模型训练的yaml文件 + + ├── ... +├── results/ # 预先保存的预测结果,用于和实际预测结果进行精读比对 + ├── python_basicvsr_results_fp32.txt # 预存的basicvsr模型python预测fp32精度的结果 + ├── python_cyclegan_results_fp32.txt # 预存的cyclegan模型python预测fp32精度的结果 + ├── python_pix2pix_results_fp32.txt # 预存的pix2pix模型python预测的fp32精度的结果 + ├── python_stylegan_results_fp32.txt # 预存的stylegan模型python预测的fp32精度的结果 + ├── ... +├── prepare.sh # 完成test_*.sh运行所需要的数据和模型下载 +├── test_train_inference_python.sh # 测试python训练预测的主程序 +├── compare_results.py # 用于对比log中的预测结果与results中的预存结果精度误差是否在限定范围内 +└── readme.md # 使用文档 +``` + +### 测试流程 +使用本工具,可以测试不同功能的支持情况,以及预测结果是否对齐,测试流程如下: +
+ +
+ +1. 运行prepare.sh准备测试所需数据和模型; +2. 运行要测试的功能对应的测试脚本`test_*.sh`,产出log,由log可以看到不同配置是否运行成功; +3. 用`compare_results.py`对比log中的预测结果和预存在results目录下的结果,判断预测精度是否符合预期(在误差范围内)。 + +其中,有4个测试主程序,功能如下: +- `test_train_inference_python.sh`:测试基于Python的模型训练、评估、推理等基本功能。 + + + +#### 更多教程 +各功能测试中涉及混合精度、裁剪、量化等训练相关,及mkldnn、Tensorrt等多种预测相关参数配置,请点击下方相应链接了解更多细节和使用教程: +[test_train_inference_python 使用](docs/test_train_inference_python.md) diff --git a/test_tipc/test_train_inference_python.sh b/test_tipc/test_train_inference_python.sh new file mode 100644 index 0000000000000000000000000000000000000000..3ab490cf4b4860f3a3d92a2abaffd5b14949d59c --- /dev/null +++ b/test_tipc/test_train_inference_python.sh @@ -0,0 +1,297 @@ +#!/bin/bash +source test_tipc/common_func.sh + +FILENAME=$1 +# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', 'whole_infer'] +MODE=$2 + +dataline=$(awk 'NR==1, NR==51{print}' $FILENAME) + +# parser params +IFS=$'\n' +lines=(${dataline}) +# The training params +model_name=$(func_parser_value "${lines[1]}") +python=$(func_parser_value "${lines[2]}") +gpu_list=$(func_parser_value "${lines[3]}") + +autocast_list=$(func_parser_value "${lines[5]}") +epoch_key=$(func_parser_key "${lines[6]}") +epoch_num=$(func_parser_params "${lines[6]}") +save_model_key=$(func_parser_key "${lines[7]}") +train_batch_key=$(func_parser_key "${lines[8]}") +train_batch_value=$(func_parser_params "${lines[8]}") +pretrain_model_key=$(func_parser_key "${lines[9]}") +pretrain_model_value=$(func_parser_value "${lines[9]}") +train_model_name=$(func_parser_value "${lines[10]}") +train_infer_img_dir=$(func_parser_value "${lines[11]}") +train_param_key1=$(func_parser_key "${lines[12]}") +train_param_value1=$(func_parser_value "${lines[12]}") + +trainer_list=$(func_parser_value "${lines[14]}") + +trainer_norm=$(func_parser_key "${lines[15]}") +norm_trainer=$(func_parser_value "${lines[15]}") + +trainer_key1=$(func_parser_key "${lines[19]}") +trainer_value1=$(func_parser_value "${lines[19]}") +trainer_key2=$(func_parser_key "${lines[20]}") +trainer_value2=$(func_parser_value "${lines[20]}") + +eval_py=$(func_parser_value "${lines[23]}") +eval_key1=$(func_parser_key "${lines[24]}") +eval_value1=$(func_parser_value "${lines[24]}") + +save_infer_key=$(func_parser_key "${lines[27]}") +export_weight=$(func_parser_value "${lines[28]}") +norm_export=$(func_parser_value "${lines[29]}") + +inference_dir=$(func_parser_value "${lines[35]}") + +# parser inference model +infer_model_dir_list=$(func_parser_value "${lines[36]}") +infer_export_list=$(func_parser_value "${lines[37]}") +infer_is_quant=$(func_parser_value "${lines[38]}") +# parser inference +inference_py=$(func_parser_value "${lines[39]}") +use_gpu_key=$(func_parser_key "${lines[40]}") +use_gpu_list=$(func_parser_value "${lines[40]}") +use_mkldnn_key=$(func_parser_key "${lines[41]}") +use_mkldnn_list=$(func_parser_value "${lines[41]}") +cpu_threads_key=$(func_parser_key "${lines[42]}") +cpu_threads_list=$(func_parser_value "${lines[42]}") +batch_size_key=$(func_parser_key "${lines[43]}") +batch_size_list=$(func_parser_value "${lines[43]}") +use_trt_key=$(func_parser_key "${lines[44]}") +use_trt_list=$(func_parser_value "${lines[44]}") +precision_key=$(func_parser_key "${lines[45]}") +precision_list=$(func_parser_value "${lines[45]}") +infer_model_key=$(func_parser_key "${lines[46]}") +image_dir_key=$(func_parser_key "${lines[47]}") +infer_img_dir=$(func_parser_value "${lines[47]}") +save_log_key=$(func_parser_key "${lines[48]}") +infer_key1=$(func_parser_key "${lines[50]}") +infer_value1=$(func_parser_value "${lines[50]}") + +LOG_PATH="./test_tipc/output" +mkdir -p ${LOG_PATH} +status_log="${LOG_PATH}/results_python.log" + +function func_inference(){ + IFS='|' + _python=$1 + _script=$2 + _model_dir=$3 + _log_path=$4 + _img_dir=$5 + _flag_quant=$6 + # inference + for use_gpu in ${use_gpu_list[*]}; do + if [ ${use_gpu} = "False" ] || [ ${use_gpu} = "cpu" ]; then + for use_mkldnn in ${use_mkldnn_list[*]}; do + if [ ${use_mkldnn} = "False" ] && [ ${_flag_quant} = "True" ]; then + continue + fi + for threads in ${cpu_threads_list[*]}; do + for batch_size in ${batch_size_list[*]}; do + for precision in ${precision_list[*]}; do + if [ ${use_mkldnn} = "False" ] && [ ${precision} = "fp16" ]; then + continue + fi # skip when enable fp16 but disable mkldnn + if [ ${_flag_quant} = "True" ] && [ ${precision} != "int8" ]; then + continue + fi # skip when quant model inference but precision is not int8 + set_precision=$(func_set_params "${precision_key}" "${precision}") + + _save_log_path="${_log_path}/python_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_precision_${precision}_batchsize_${batch_size}.log" + set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}") + set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}") + set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}") + set_cpu_threads=$(func_set_params "${cpu_threads_key}" "${threads}") + set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}") + set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}") + command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${set_model_dir} > ${_save_log_path} 2>&1 " + eval $command + last_status=${PIPESTATUS[0]} + eval "cat ${_save_log_path}" + status_check $last_status "${command}" "${status_log}" + done + done + done + done + elif [ ${use_gpu} = "True" ] || [ ${use_gpu} = "gpu" ]; then + for use_trt in ${use_trt_list[*]}; do + for precision in ${precision_list[*]}; do + if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then + continue + fi + if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then + continue + fi + if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [ ${_flag_quant} = "True" ]; then + continue + fi + for batch_size in ${batch_size_list[*]}; do + _save_log_path="${_log_path}/python_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log" + set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}") + set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}") + set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}") + set_tensorrt=$(func_set_params "${use_trt_key}" "${use_trt}") + set_precision=$(func_set_params "${precision_key}" "${precision}") + set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}") + set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}") + command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} > ${_save_log_path} 2>&1 " + eval $command + last_status=${PIPESTATUS[0]} + eval "cat ${_save_log_path}" + status_check $last_status "${command}" "${status_log}" + + done + done + done + else + echo "Does not support hardware other than CPU and GPU Currently!" + fi + done +} + +if [ ${MODE} = "whole_infer" ]; then + GPUID=$3 + if [ ${#GPUID} -le 0 ];then + env=" " + else + env="export CUDA_VISIBLE_DEVICES=${GPUID}" + fi + # set CUDA_VISIBLE_DEVICES + eval $env + export Count=0 + IFS="|" + infer_run_exports=(${infer_export_list}) + infer_quant_flag=(${infer_is_quant}) + for infer_model in ${infer_model_dir_list[*]}; do + # run export + if [ ${infer_run_exports[Count]} != "null" ];then + save_infer_dir=$(dirname $infer_model) + set_export_weight=$(func_set_params "${export_weight}" "${infer_model}") + set_save_infer_key="${save_infer_key} ${save_infer_dir}" + export_cmd="${python} ${infer_run_exports[Count]} ${set_export_weight} ${set_save_infer_key}" + echo ${infer_run_exports[Count]} + echo $export_cmd + eval $export_cmd + status_export=$? + status_check $status_export "${export_cmd}" "${status_log}" + else + save_infer_dir=${infer_model} + fi + #run inference + func_inference "${python}" "${inference_py}" "${save_infer_dir}" "${LOG_PATH}" "${infer_img_dir}" + Count=$(($Count + 1)) + done +else + IFS="|" + export Count=0 + USE_GPU_KEY=(${train_use_gpu_value}) + for gpu in ${gpu_list[*]}; do + train_use_gpu=${USE_GPU_KEY[Count]} + Count=$(($Count + 1)) + ips="" + if [ ${gpu} = "-1" ];then + env="" + elif [ ${#gpu} -le 1 ];then + env="export CUDA_VISIBLE_DEVICES=${gpu}" + eval ${env} + elif [ ${#gpu} -le 15 ];then + IFS="," + array=(${gpu}) + env="export CUDA_VISIBLE_DEVICES=${gpu}" + IFS="|" + else + IFS=";" + array=(${gpu}) + ips=${array[0]} + gpu=${array[1]} + IFS="|" + env=" " + fi + for autocast in ${autocast_list[*]}; do + if [ ${autocast} = "amp" ]; then + set_amp_config="Global.use_amp=True Global.scale_loss=1024.0 Global.use_dynamic_loss_scaling=True" + else + set_amp_config=" " + fi + for trainer in ${trainer_list[*]}; do + flag_quant=False + run_train=${norm_trainer} + run_export=${norm_export} + + if [ ${run_train} = "null" ]; then + continue + fi + set_autocast=$(func_set_params "${autocast_key}" "${autocast}") + set_epoch=$(func_set_params "${epoch_key}" "${epoch_num}") + set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}") + set_batchsize=$(func_set_params "${train_batch_key}" "${train_batch_value}") + set_train_params1=$(func_set_params "${train_param_key1}" "${train_param_value1}") + set_use_gpu=$(func_set_params "${train_use_gpu_key}" "${train_use_gpu}") + if [ ${#ips} -le 26 ];then + save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}" + nodes=1 + else + IFS="," + ips_array=(${ips}) + IFS="|" + nodes=${#ips_array[@]} + save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}" + fi + set_save_model=$(func_set_params "${save_model_key}" "${save_log}") + if [ ${#gpu} -le 2 ];then # train with cpu or single gpu + cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config} " + elif [ ${#ips} -le 26 ];then # train with multi-gpu + cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}" + else # train with multi-machine + cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}" + fi + # run train + eval "unset CUDA_VISIBLE_DEVICES" + eval $cmd + echo $cmd + status_check $? "${cmd}" "${status_log}" + + set_eval_pretrain=$(func_set_params "${pretrain_model_key}" "${save_log}/${train_model_name}") + # save norm trained models to set pretrain for pact training and fpgm training + + # run eval + if [ ${eval_py} != "null" ]; then + set_eval_params1=$(func_set_params "${eval_key1}" "${eval_value1}") + eval_cmd="${python} ${eval_py} ${set_eval_pretrain} ${set_use_gpu} ${set_eval_params1}" + eval $eval_cmd + status_check $? "${eval_cmd}" "${status_log}" + fi + # run export model + if [ ${run_export} != "null" ]; then + # run export model + save_infer_path="${save_log}" + set_export_weight="${save_log}/${train_model_name}" + set_export_weight_path=$( echo ${set_export_weight}) + set_save_infer_key="${save_infer_key} ${save_infer_path}" + export_cmd="${python} ${run_export} ${set_export_weight_path} ${set_save_infer_key}" + eval "$export_cmd" + status_check $? "${export_cmd}" "${status_log}" + + #run inference + eval $env + save_infer_path="${save_log}" + if [ ${inference_dir} != "null" ] && [ ${inference_dir} != '##' ]; then + infer_model_dir="${save_infer_path}/${inference_dir}" + else + infer_model_dir=${save_infer_path} + fi + func_inference "${python}" "${inference_py}" "${infer_model_dir}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}" + + eval "unset CUDA_VISIBLE_DEVICES" + fi + done # done with: for trainer in ${trainer_list[*]}; do + done # done with: for autocast in ${autocast_list[*]}; do + done # done with: for gpu in ${gpu_list[*]}; do +fi # end if [ ${MODE} = "infer" ]; then +