提交 56d195fd 编写于 作者: W wanghaoshuang

Implement uploading data in submit scripts and fix issues

上级 e9424471
......@@ -21,21 +21,8 @@ The we can get job name 'deepspeech20170727130129' at last line
```
$ paddlecloud logs -n 10000 deepspeech20170727130129
$ ==========================deepspeech20170727130129-trainer-6vk3m==========================
label selector: paddle-job-pserver=deepspeech20170727130129, desired: 1
running pod list: [('Running', '10.1.3.6')]
label selector: paddle-job=deepspeech20170727130129, desired: 1
running pod list: [('Running', '10.1.83.14')]
Starting training job: /pfs/dlnel/home/****@baidu.com/jobs/deepspeech20170727130129, num_gradient_servers: 1, trainer_id: 0, version: v2
I0727 05:01:42.969719 25 Util.cpp:166] commandline: --num_gradient_servers=1 --ports_num_for_sparse=1 --use_gpu=1 --trainer_id=0 --pservers=10.1.3.6 --trainer_count=4 --num_passes=1 --ports_num=1 --port=7164
[INFO 2017-07-27 05:01:50,279 layers.py:2430] output for __conv_0__: c = 32, h = 81, w = 54, size = 139968
[WARNING 2017-07-27 05:01:50,280 layers.py:2789] brelu is not recommend for batch normalization's activation, maybe the relu is better
[INFO 2017-07-27 05:01:50,283 layers.py:2430] output for __conv_1__: c = 32, h = 41, w = 54, size = 70848
I0727 05:01:50.316176 25 MultiGradientMachine.cpp:99] numLogicalDevices=1 numThreads=4 numDevices=4
I0727 05:01:50.454787 25 GradientMachine.cpp:85] Initing parameters..
I0727 05:01:50.690007 25 GradientMachine.cpp:92] Init parameters done.
```
[More optins and cmd aoubt paddle cloud](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md)
[More options and cmd about paddle cloud](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md)
## Run DS2 by customize data
TODO
DS2_PATH=../
tar -czf deepspeech.tar.gz ${DS2_PATH}
#
TRAIN_MANIFEST="/home/work/wanghaoshuang/ds2/pcloud/models/deep_speech_2/datasets/manifest.dev"
TEST_MANIFEST="/home/work/wanghaoshuang/ds2/pcloud/models/deep_speech_2/datasets/manifest.dev"
VOCAB_PATH="/home/work/wanghaoshuang/ds2/pcloud/models/deep_speech_2/datasets/vocab/eng_vocab.txt"
MEAN_STD_PATH="/home/work/wanghaoshuang/ds2/pcloud/models/deep_speech_2/compute_mean_std.py"
CLOUD_DATA_DIR="/pfs/dlnel/home/wanghaoshuang@baidu.com/deepspeech2/data"
CLOUD_MODEL_DIR="/pfs/dlnel/home/wanghaoshuang@baidu.com/deepspeech2/model"
DS2_PATH=${PWD%/*}
rm -rf ./tmp
mkdir ./tmp
paddlecloud ls ${CLOUD_DATA_DIR}/mean_std.npz
if [ $? -ne 0 ];then
cp -f ${MEAN_STD_PATH} ./tmp/mean_std.npz
paddlecloud file put ./tmp/mean_std.npz ${CLOUD_DATA_DIR}/
fi
paddlecloud ls ${CLOUD_DATA_DIR}/vocab.txt
if [ $? -ne 0 ];then
cp -f ${VOCAB_PATH} ./tmp/vocab.txt
paddlecloud file put ./tmp/vocab.txt ${CLOUD_DATA_DIR}/
fi
paddlecloud ls ${CLOUD_DATA_DIR}/cloud.train.manifest
if [ $? -ne 0 ];then
python prepare_data.py \
--manifest_path=${TRAIN_MANIFEST} \
--out_tar_path="./tmp/cloud.train.tar" \
--out_manifest_path="tmp/cloud.train.manifest"
paddlecloud file put ./tmp/cloud.train.tar ${CLOUD_DATA_DIR}/
paddlecloud file put ./tmp/cloud.train.manifest ${CLOUD_DATA_DIR}/
fi
paddlecloud ls ${CLOUD_DATA_DIR}/cloud.test.manifest
if [ $? -ne 0 ];then
python prepare_data.py \
--manifest_path=${TEST_MANIFEST} \
--out_tar_path="./tmp/cloud.test.tar" \
--out_manifest_path="tmp/cloud.test.manifest"
paddlecloud file put ./tmp/cloud.test.tar ${CLOUD_DATA_DIR}/
paddlecloud file put ./tmp/cloud.test.manifest ${CLOUD_DATA_DIR}/
fi
rm -rf ./tmp
JOB_NAME=deepspeech`date +%Y%m%d%H%M%S`
cp pcloud_train.sh ${DS2_PATH}
paddlecloud submit \
-image wanghaoshuang/pcloud_ds2:latest-gpu-cudnn \
-image bootstrapper:5000/wanghaoshuang/pcloud_ds2:latest-gpu-cudnn \
-jobname ${JOB_NAME} \
-cpu 4 \
-gpu 4 \
......@@ -13,5 +58,5 @@ paddlecloud submit \
-pservers 1 \
-psmemory 10Gi \
-passes 1 \
-entry "sh pcloud_train.sh" \
.
-entry "sh pcloud_train.sh ${CLOUD_DATA_DIR} ${CLOUD_MODEl_DIR}" \
${DS2_PATH}
DATA_PATH=/pfs/dlnel/public/dataset/speech/libri
DATA_PATH=$1
MODEL_PATH=$2
#setted by user
TRAIN_MANI=${DATA_PATH}/manifest_pcloud.train
TRAIN_MANI=${DATA_PATH}/cloud.train.manifest
#setted by user
DEV_MANI=${DATA_PATH}/manifest_pcloud.dev
DEV_MANI=${DATA_PATH}/cloud.test.manifest
#setted by user
TRAIN_TAR=${DATA_PATH}/data.train.tar
TRAIN_TAR=${DATA_PATH}/cloud.train.tar
#setted by user
DEV_TAR=${DATA_PATH}/data.dev.tar
DEV_TAR=${DATA_PATH}/cloud.test.tar
#setted by user
VOCAB_PATH=${DATA_PATH}/eng_vocab.txt
#setted by user
MEAN_STD_FILE=${DATA_PATH}/mean_std.npz
tar -xzf deepspeech.tar.gz
rm -rf ./cloud/data/*
# split train data for each pcloud node
python ./cloud/pcloud_split_data.py \
python ./cloud/split_data.py \
--in_manifest_path=$TRAIN_MANI \
--data_tar_path=$TRAIN_TAR \
--out_manifest_path='./cloud/data/train.mani'
--out_manifest_path='./local.train.manifest'
# split dev data for each pcloud node
python pcloud_split_data.py \
python ./cloud/split_data.py \
--in_manifest_path=$DEV_MANI \
--data_tar_path=$DEV_TAR \
--out_manifest_path='./cloud/data/dev.mani'
--out_manifest_path='./local.test.manifest'
python train.py \
--use_gpu=1 \
--trainer_count=4 \
--batch_size=256 \
--mean_std_filepath=$MEAN_STD_FILE \
--train_manifest_path='./cloud/data/train.mani' \
--dev_manifest_path='./cloud/data/dev.mani' \
--train_manifest_path='./local.train.manifest' \
--dev_manifest_path='./local.test.manifest' \
--vocab_filepath=$VOCAB_PATH \
......@@ -25,12 +25,12 @@ parser.add_argument(
help="Manifest of target data. (default: %(default)s)")
parser.add_argument(
"--out_tar_path",
default="./data/dev.tar",
default="./tmp/cloud.train.tar",
type=str,
help="Output tar file path. (default: %(default)s)")
parser.add_argument(
"--out_manifest_path",
default="./data/dev.mani",
default="./tmp/cloud.train.manifest",
type=str,
help="Manifest of output data. (default: %(default)s)")
args = parser.parse_args()
......
......@@ -11,17 +11,17 @@ import argparse
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--in_manifest_path",
default='./cloud/data/dev.mani',
default='./cloud.train.manifest',
type=str,
help="Input manifest path. (default: %(default)s)")
parser.add_argument(
"--data_tar_path",
default='./cloud/data/dev.tar',
default='./cloud.train.tar',
type=str,
help="Data tar file path. (default: %(default)s)")
parser.add_argument(
"--out_manifest_path",
default='./cloud/data/dev.mani.split',
default='./local.train.manifest',
type=str,
help="Out manifest file path. (default: %(default)s)")
args = parser.parse_args()
......
DATA_PATH=/pfs/dlnel/public/dataset/speech/libri
DATA_PATH=$1
MODEL_PATH=$2
#setted by user
TRAIN_MANI=${DATA_PATH}/manifest_pcloud.train
TRAIN_MANI=${DATA_PATH}/cloud.train.manifest
#setted by user
DEV_MANI=${DATA_PATH}/manifest_pcloud.dev
DEV_MANI=${DATA_PATH}/cloud.test.manifest
#setted by user
TRAIN_TAR=${DATA_PATH}/data.train.tar
TRAIN_TAR=${DATA_PATH}/cloud.train.tar
#setted by user
DEV_TAR=${DATA_PATH}/data.dev.tar
DEV_TAR=${DATA_PATH}/cloud.test.tar
#setted by user
VOCAB_PATH=${DATA_PATH}/eng_vocab.txt
#setted by user
MEAN_STD_FILE=${DATA_PATH}/mean_std.npz
tar -xzvf deepspeech.tar.gz
rm -rf ./cloud/data/*
# split train data for each pcloud node
python ./cloud/pcloud_split_data.py \
python ./cloud/split_data.py \
--in_manifest_path=$TRAIN_MANI \
--data_tar_path=$TRAIN_TAR \
--out_manifest_path='./cloud/data/train.mani'
--out_manifest_path='./local.train.manifest'
# split dev data for each pcloud node
python pcloud_split_data.py \
python ./cloud/split_data.py \
--in_manifest_path=$DEV_MANI \
--data_tar_path=$DEV_TAR \
--out_manifest_path='./cloud/data/dev.mani'
--out_manifest_path='./local.test.manifest'
python train.py \
--use_gpu=1 \
--trainer_count=4 \
--batch_size=256 \
--mean_std_filepath=$MEAN_STD_FILE \
--train_manifest_path='./cloud/data/train.mani' \
--dev_manifest_path='./cloud/data/dev.mani' \
--train_manifest_path='./local.train.manifest' \
--dev_manifest_path='./local.test.manifest' \
--vocab_filepath=$VOCAB_PATH \
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册