提交 438e1bd3 编写于 作者: H Hui Zhang

add benmark scripts

上级 068c2b14
old-pd_env.txt
pd_env.txt
# Benchmark Test
## Data
* Aishell
## Docker
```
registry.baidubce.com/paddlepaddle/paddle 2.1.1-gpu-cuda10.2-cudnn7 59d5ec1de486
```
#!/bin/bash
# collect env info
bash ../../utils/pd_env_collect.sh
# 提供可稳定复现性能的脚本,默认在标准docker环境内py37执行: paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7 paddle=2.1.2 py=37
# 执行目录:需说明
cd **
# 1 安装该模型需要的依赖 (如需开启优化策略请注明)
pip install ...
# 2 拷贝该模型需要数据、预训练模型
# 3 批量运行(如不方便批量,1,2需放到单个模型中)
model_mode_list=(MobileNetv1 MobileNetv2)
fp_item_list=(fp32 fp16)
bs_item=(32 64 96)
for model_mode in ${model_mode_list[@]}; do
for fp_item in ${fp_item_list[@]}; do
for bs_item in ${bs_list[@]}
do
echo "index is speed, 1gpus, begin, ${model_name}"
run_mode=sp
CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} # (5min)
sleep 60
echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}"
run_mode=mp
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode}
sleep 60
done
done
done
#!/bin/bash
set -xe
# 运行示例:CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode}
# 参数说明
function _set_params(){
run_mode=${1:-"sp"} # 单卡sp|多卡mp
batch_size=${2:-"64"}
fp_item=${3:-"fp32"} # fp32|fp16
max_iter=${4:-"500"} # 可选,如果需要修改代码提前中断
model_name=${5:-"model_name"}
run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # TRAIN_LOG_DIR 后续QA设置该参数
# 以下不用修改
device=${CUDA_VISIBLE_DEVICES//,/ }
arr=(${device})
num_gpu_devices=${#arr[*]}
log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices}
}
function _train(){
echo "Train on ${num_gpu_devices} GPUs"
echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size"
train_cmd="--model_name=${model_name}
--batch_size=${batch_size}
--fp=${fp_item} \
--max_iter=${max_iter} "
case ${run_mode} in
sp) train_cmd="python -u tools/train.py "${train_cmd}" ;;
mp)
train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES tools/train.py "${train_cmd}"
log_parse_file="mylog/workerlog.0" ;;
*) echo "choose run_mode(sp or mp)"; exit 1;
esac
# 以下不用修改
timeout 15m ${train_cmd} > ${log_file} 2>&1
if [ $? -ne 0 ];then
echo -e "${model_name}, FAIL"
export job_fail_flag=1
else
echo -e "${model_name}, SUCCESS"
export job_fail_flag=0
fi
kill -9 `ps -ef|grep 'python'|awk '{print $2}'`
if [ $run_mode = "mp" -a -d mylog ]; then
rm ${log_file}
cp mylog/workerlog.0 ${log_file}
fi
}
_set_params $@
_train
#!/usr/bin/env bash
unset GREP_OPTIONS
set -u # Check for undefined variables
die() {
# Print a message and exit with code 1.
#
# Usage: die <error_message>
# e.g., die "Something bad happened."
echo $@
exit 1
}
echo "Collecting system information..."
OUTPUT_FILE=pd_env.txt
python_bin_path=$(which python || which python3 || die "Cannot find Python binary")
{
echo
echo '== check python ==================================================='
} >> ${OUTPUT_FILE}
cat <<EOF > /tmp/check_python.py
import platform
print("""python version: %s
python branch: %s
python build version: %s
python compiler version: %s
python implementation: %s
""" % (
platform.python_version(),
platform.python_branch(),
platform.python_build(),
platform.python_compiler(),
platform.python_implementation(),
))
EOF
${python_bin_path} /tmp/check_python.py 2>&1 >> ${OUTPUT_FILE}
{
echo
echo '== check os platform ==============================================='
} >> ${OUTPUT_FILE}
cat <<EOF > /tmp/check_os.py
import platform
print("""os: %s
os kernel version: %s
os release version: %s
os platform: %s
linux distribution: %s
linux os distribution: %s
mac version: %s
uname: %s
architecture: %s
machine: %s
""" % (
platform.system(),
platform.version(),
platform.release(),
platform.platform(),
platform.linux_distribution(),
platform.dist(),
platform.mac_ver(),
platform.uname(),
platform.architecture(),
platform.machine(),
))
EOF
${python_bin_path} /tmp/check_os.py 2>&1 >> ${OUTPUT_FILE}
{
echo
echo '== are we in docker ============================================='
num=`cat /proc/1/cgroup | grep docker | wc -l`;
if [ $num -ge 1 ]; then
echo "Yes"
else
echo "No"
fi
echo
echo '== compiler ====================================================='
c++ --version 2>&1
echo
echo '== check pips ==================================================='
pip list 2>&1 | grep "proto\|numpy\|paddlepaddle"
echo
echo '== check for virtualenv ========================================='
${python_bin_path} -c "import sys;print(hasattr(sys, \"real_prefix\"))"
echo
echo '== paddlepaddle import ============================================'
} >> ${OUTPUT_FILE}
cat <<EOF > /tmp/check_pd.py
import paddle as pd;
pd.set_device('cpu')
print("pd.version.full_version = %s" % pd.version.full_version)
print("pd.version.commit = %s" % pd.version.commit)
print("pd.__version__ = %s" % pd.__version__)
print("Sanity check: %r" % pd.zeros([1,2,3])[:1])
EOF
${python_bin_path} /tmp/check_pd.py 2>&1 >> ${OUTPUT_FILE}
LD_DEBUG=libs ${python_bin_path} -c "import paddle" 2>>${OUTPUT_FILE} > /tmp/loadedlibs
{
grep libcudnn.so /tmp/loadedlibs
echo
echo '== env =========================================================='
if [ -z ${LD_LIBRARY_PATH+x} ]; then
echo "LD_LIBRARY_PATH is unset";
else
echo LD_LIBRARY_PATH ${LD_LIBRARY_PATH} ;
fi
if [ -z ${DYLD_LIBRARY_PATH+x} ]; then
echo "DYLD_LIBRARY_PATH is unset";
else
echo DYLD_LIBRARY_PATH ${DYLD_LIBRARY_PATH} ;
fi
echo
echo '== nvidia-smi ==================================================='
nvidia-smi 2>&1
echo
echo '== cuda libs ==================================================='
} >> ${OUTPUT_FILE}
find /usr/local -type f -name 'libcudart*' 2>/dev/null | grep cuda | grep -v "\\.cache" >> ${OUTPUT_FILE}
find /usr/local -type f -name 'libudnn*' 2>/dev/null | grep cuda | grep -v "\\.cache" >> ${OUTPUT_FILE}
{
echo
echo '== paddlepaddle installed from info =================='
pip show paddlepaddle-gpu
echo
echo '== python version =============================================='
echo '(major, minor, micro, releaselevel, serial)'
python -c 'import sys; print(sys.version_info[:])'
echo
echo '== bazel version ==============================================='
bazel version
echo '== cmake version ==============================================='
cmake --version
} >> ${OUTPUT_FILE}
# Remove any words with google.
mv $OUTPUT_FILE old-$OUTPUT_FILE
grep -v -i google old-${OUTPUT_FILE} > $OUTPUT_FILE
echo "Wrote environment to ${OUTPUT_FILE}. You can review the contents of that file."
echo "and use it to populate the fields in the github issue template."
echo
echo "cat ${OUTPUT_FILE}"
echo
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册