Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleOCR
提交
caa7799b
P
PaddleOCR
项目概览
s920243400
/
PaddleOCR
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleOCR
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
caa7799b
编写于
11月 23, 2021
作者:
D
Double_V
提交者:
GitHub
11月 23, 2021
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4670 from hysunflower/update_ocr_scripts_for_benchmark
Update ocr scripts for benchmark
上级
0d933e9a
d4c2a685
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
25 addition
and
24 deletion
+25
-24
benchmark/run_benchmark_det.sh
benchmark/run_benchmark_det.sh
+19
-21
benchmark/run_det.sh
benchmark/run_det.sh
+6
-3
未找到文件。
benchmark/run_benchmark_det.sh
浏览文件 @
caa7799b
...
@@ -6,27 +6,35 @@ function _set_params(){
...
@@ -6,27 +6,35 @@ function _set_params(){
run_mode
=
${
1
:-
"sp"
}
# 单卡sp|多卡mp
run_mode
=
${
1
:-
"sp"
}
# 单卡sp|多卡mp
batch_size
=
${
2
:-
"64"
}
batch_size
=
${
2
:-
"64"
}
fp_item
=
${
3
:-
"fp32"
}
# fp32|fp16
fp_item
=
${
3
:-
"fp32"
}
# fp32|fp16
max_
iter
=
${
4
:-
"10"
}
# 可选,如果需要修改代码提前中断
max_
epoch
=
${
4
:-
"10"
}
# 可选,如果需要修改代码提前中断
model_
name
=
${
5
:-
"model_name
"
}
model_
item
=
${
5
:-
"model_item
"
}
run_log_path
=
${
TRAIN_LOG_DIR
:-
$(
pwd
)
}
# TRAIN_LOG_DIR 后续QA设置该参数
run_log_path
=
${
TRAIN_LOG_DIR
:-
$(
pwd
)
}
# TRAIN_LOG_DIR 后续QA设置该参数
# 日志解析所需参数
base_batch_size
=
${
batch_size
}
mission_name
=
"OCR"
direction_id
=
"0"
ips_unit
=
"images/sec"
skip_steps
=
2
# 解析日志,有些模型前几个step耗时长,需要跳过 (必填)
keyword
=
"ips:"
# 解析日志,筛选出数据所在行的关键字 (必填)
index
=
"1"
model_name
=
${
model_item
}
_
${
run_mode
}
_bs
${
batch_size
}
_
${
fp_item
}
# model_item 用于yml文件名匹配,model_name 用于数据入库前端展示
# 以下不用修改
# 以下不用修改
device
=
${
CUDA_VISIBLE_DEVICES
//,/
}
device
=
${
CUDA_VISIBLE_DEVICES
//,/
}
arr
=(
${
device
}
)
arr
=(
${
device
}
)
num_gpu_devices
=
${#
arr
[*]
}
num_gpu_devices
=
${#
arr
[*]
}
log_file
=
${
run_log_path
}
/
${
model_
name
}
_
${
run_mode
}
_bs
${
batch_size
}
_
${
fp_item
}
_
${
num_gpu_devices
}
log_file
=
${
run_log_path
}
/
${
model_
item
}
_
${
run_mode
}
_bs
${
batch_size
}
_
${
fp_item
}
_
${
num_gpu_devices
}
}
}
function
_train
(){
function
_train
(){
echo
"Train on
${
num_gpu_devices
}
GPUs"
echo
"Train on
${
num_gpu_devices
}
GPUs"
echo
"current CUDA_VISIBLE_DEVICES=
$CUDA_VISIBLE_DEVICES
, gpus=
$num_gpu_devices
, batch_size=
$batch_size
"
echo
"current CUDA_VISIBLE_DEVICES=
$CUDA_VISIBLE_DEVICES
, gpus=
$num_gpu_devices
, batch_size=
$batch_size
"
train_cmd
=
"-c configs/det/
${
model_
name
}
.yml -o Train.loader.batch_size_per_card=
${
batch_size
}
Global.epoch_num=
${
max_iter
}
Global.eval_batch_step=[0,20000] Global.print_batch_step=2"
train_cmd
=
"-c configs/det/
${
model_
item
}
.yml -o Train.loader.batch_size_per_card=
${
batch_size
}
Global.epoch_num=
${
max_epoch
}
Global.eval_batch_step=[0,20000] Global.print_batch_step=2"
case
${
run_mode
}
in
case
${
run_mode
}
in
sp
)
sp
)
train_cmd
=
"python
3.7
tools/train.py "
${
train_cmd
}
""
train_cmd
=
"python tools/train.py "
${
train_cmd
}
""
;;
;;
mp
)
mp
)
train_cmd
=
"python
3.7
-m paddle.distributed.launch --log_dir=./mylog --gpus=
$CUDA_VISIBLE_DEVICES
tools/train.py
${
train_cmd
}
"
train_cmd
=
"python -m paddle.distributed.launch --log_dir=./mylog --gpus=
$CUDA_VISIBLE_DEVICES
tools/train.py
${
train_cmd
}
"
;;
;;
*
)
echo
"choose run_mode(sp or mp)"
;
exit
1
;
*
)
echo
"choose run_mode(sp or mp)"
;
exit
1
;
esac
esac
...
@@ -46,17 +54,7 @@ function _train(){
...
@@ -46,17 +54,7 @@ function _train(){
fi
fi
}
}
function
_analysis_log
(){
source
${
BENCHMARK_ROOT
}
/scripts/run_model.sh
# 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;该脚本在连调时可从benchmark repo中下载https://github.com/PaddlePaddle/benchmark/blob/master/scripts/run_model.sh;如果不联调只想要产出训练log可以注掉本行,提交时需打开
analysis_cmd
=
"python3.7 benchmark/analysis.py --filename
${
log_file
}
--mission_name
${
model_name
}
--run_mode
${
run_mode
}
--direction_id 0 --keyword 'ips:' --base_batch_size
${
batch_size
}
--skip_steps 1 --gpu_num
${
num_gpu_devices
}
--index 1 --model_mode=-1 --ips_unit=samples/sec"
eval
$analysis_cmd
}
function
_kill_process
(){
kill
-9
`
ps
-ef
|grep
'python3.7'
|awk
'{print $2}'
`
}
_set_params
$@
_set_params
$@
_train
#_train # 如果只想产出训练log,不解析,可取消注释
_analysis_log
_run
# 该函数在run_model.sh中,执行时会调用_train; 如果不联调只想要产出训练log可以注掉本行,提交时需打开
_kill_process
\ No newline at end of file
benchmark/run_det.sh
浏览文件 @
caa7799b
#!/bin/bash
# 提供可稳定复现性能的脚本,默认在标准docker环境内py37执行: paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7 paddle=2.1.2 py=37
# 提供可稳定复现性能的脚本,默认在标准docker环境内py37执行: paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7 paddle=2.1.2 py=37
# 执行目录: ./PaddleOCR
# 执行目录: ./PaddleOCR
# 1 安装该模型需要的依赖 (如需开启优化策略请注明)
# 1 安装该模型需要的依赖 (如需开启优化策略请注明)
python
3.7
-m
pip
install
-r
requirements.txt
python
-m
pip
install
-r
requirements.txt
# 2 拷贝该模型需要数据、预训练模型
# 2 拷贝该模型需要数据、预训练模型
wget
-P
./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar
&&
cd
train_data
&&
tar
xf icdar2015.tar
&&
cd
../
wget
-P
./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar
&&
cd
train_data
&&
tar
xf icdar2015.tar
&&
cd
../
wget
-P
./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams
wget
-P
./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams
...
@@ -17,11 +18,13 @@ for model_mode in ${model_mode_list[@]}; do
...
@@ -17,11 +18,13 @@ for model_mode in ${model_mode_list[@]}; do
for
bs_item
in
${
bs_list
[@]
}
;
do
for
bs_item
in
${
bs_list
[@]
}
;
do
echo
"index is speed, 1gpus, begin,
${
model_name
}
"
echo
"index is speed, 1gpus, begin,
${
model_name
}
"
run_mode
=
sp
run_mode
=
sp
CUDA_VISIBLE_DEVICES
=
0 bash benchmark/run_benchmark_det.sh
${
run_mode
}
${
bs_item
}
${
fp_item
}
2
${
model_mode
}
# (5min)
log_name
=
ocr_
${
model_mode
}
_
${
run_mode
}
_bs
${
bs_item
}
_
${
fp_item
}
CUDA_VISIBLE_DEVICES
=
0 bash benchmark/run_benchmark_det.sh
${
run_mode
}
${
bs_item
}
${
fp_item
}
1
${
model_mode
}
|
tee
${
log_path
}
/
${
log_name
}
_speed_1gpus 2>&1
# (5min)
sleep
60
sleep
60
echo
"index is speed, 8gpus, run_mode is multi_process, begin,
${
model_name
}
"
echo
"index is speed, 8gpus, run_mode is multi_process, begin,
${
model_name
}
"
run_mode
=
mp
run_mode
=
mp
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7 bash benchmark/run_benchmark_det.sh
${
run_mode
}
${
bs_item
}
${
fp_item
}
2
${
model_mode
}
log_name
=
ocr_
${
model_mode
}
_
${
run_mode
}
_bs
${
bs_item
}
_
${
fp_item
}
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7 bash benchmark/run_benchmark_det.sh
${
run_mode
}
${
bs_item
}
${
fp_item
}
2
${
model_mode
}
|
tee
${
log_path
}
/
${
log_name
}
_speed_8gpus8p 2>&1
sleep
60
sleep
60
done
done
done
done
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录