pcloud_train.sh 980 字节
Newer Older
1 2
DATA_PATH=$1
MODEL_PATH=$2
3 4 5 6
NUM_CPU=$3
NUM_GPU=$4
IS_LOCAL=$5

7
TRAIN_MANI=${DATA_PATH}/cloud.train.manifest
8
DEV_MANI=${DATA_PATH}/cloud.dev.manifest
9
TRAIN_TAR=${DATA_PATH}/cloud.train.tar
10
DEV_TAR=${DATA_PATH}/cloud.dev.tar
11
VOCAB_PATH=${DATA_PATH}/vocab.txt
12 13 14
MEAN_STD_FILE=${DATA_PATH}/mean_std.npz

# split train data for each pcloud node
15
python ./cloud/split_data.py \
16 17 18
--in_manifest_path=${TRAIN_MANI} \
--data_tar_path=${TRAIN_TAR} \
--out_manifest_path='/local.train.manifest'
19 20

# split dev data for each pcloud node
21
python ./cloud/split_data.py \
22 23 24
--in_manifest_path=${DEV_MANI} \
--data_tar_path=${DEV_TAR} \
--out_manifest_path='/local.dev.manifest'
25

26
# run train
27 28
python train.py \
--use_gpu=1 \
29 30 31 32 33 34 35
--trainer_count=${NUM_GPU} \
--num_threads_data=${NUM_CPU} \
--is_local=${IS_LOCAL} \
--mean_std_filepath=${MEAN_STD_FILE} \
--train_manifest_path='/local.train.manifest' \
--dev_manifest_path='/local.dev.manifest' \
--vocab_filepath=${VOCAB_PATH} \
36
--output_model_dir=${MODEL_PATH}