train.sh 661 字节
Newer Older
H
Hui Zhang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
#! /usr/bin/env bash

if [ $# != 2 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_name=$2

echo "using ${device}..."

mkdir -p exp

18 19 20
# seed may break model convergence
seed=0
if [ ${seed} != 0 ]; then
H
huangyuxin 已提交
21 22 23
    export FLAGS_cudnn_deterministic=True
fi

H
Hui Zhang 已提交
24 25 26
python3 -u ${BIN_DIR}/train.py \
--nproc ${ngpu} \
--config ${config_path} \
H
huangyuxin 已提交
27 28 29
--output exp/${ckpt_name} \
--seed ${seed}

H
Hui Zhang 已提交
30
if [ ${seed} != 0 ]; then
H
huangyuxin 已提交
31 32
    unset FLAGS_cudnn_deterministic
fi
H
Hui Zhang 已提交
33 34 35 36 37 38 39

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0