You need to sign in or sign up before continuing.
train.sh 949 字节
Newer Older
1
#!/bin/bash
2

H
Hui Zhang 已提交
3
profiler_options=
H
Hui Zhang 已提交
4 5
benchmark_batch_size=0
benchmark_max_step=0
H
Hui Zhang 已提交
6 7 8 9 10

# seed may break model convergence
seed=0

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
11 12 13 14

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

H
Hui Zhang 已提交
15
if [ ${seed} != 0  ]; then
H
huangyuxin 已提交
16
    export FLAGS_cudnn_deterministic=True
H
Hui Zhang 已提交
17 18 19 20 21 22
    echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
fi

if [ $# != 2 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
    exit -1
H
huangyuxin 已提交
23 24
fi

H
Hui Zhang 已提交
25 26 27 28 29
config_path=$1
ckpt_name=$2

mkdir -p exp

30
python3 -u ${BIN_DIR}/train.py \
H
Hui Zhang 已提交
31
--seed ${seed} \
32 33
--nproc ${ngpu} \
--config ${config_path} \
H
huangyuxin 已提交
34
--output exp/${ckpt_name} \
H
Hui Zhang 已提交
35 36 37 38
--profiler-options "${profiler_options}" \
--benchmark-batch-size ${benchmark_batch_size} \
--benchmark-max-step ${benchmark_max_step}

H
huangyuxin 已提交
39

H
Hui Zhang 已提交
40
if [ ${seed} != 0  ]; then
H
huangyuxin 已提交
41 42
    unset FLAGS_cudnn_deterministic
fi
43 44 45 46 47 48 49

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0