提交 417a8b79 编写于 作者: H Hui Zhang

fix ted egs

上级 25c07e3f
# TED En-Zh
## Dataset
| Data Subset | Duration in Seconds |
| --- | --- |
| data/manifest.train | 0.942 ~ 60 |
| data/manifest.dev | 1.151 ~ 39 |
| data/manifest.test | 1.1 ~ 42.746 |
...@@ -7,37 +7,37 @@ stop_stage=100 ...@@ -7,37 +7,37 @@ stop_stage=100
nbpe=8000 nbpe=8000
bpemode=unigram bpemode=unigram
bpeprefix="data/bpe_${bpemode}_${nbpe}" bpeprefix="data/bpe_${bpemode}_${nbpe}"
DATA_DIR= data_dir=/mnt/dataset/TED_EnZh
source ${MAIN_ROOT}/utils/parse_options.sh source ${MAIN_ROOT}/utils/parse_options.sh
mkdir -p data
TARGET_DIR=${MAIN_ROOT}/examples/dataset TARGET_DIR=${MAIN_ROOT}/examples/dataset
mkdir -p ${TARGET_DIR} mkdir -p ${TARGET_DIR}
mkdir -p data
if [ ! -d ${SOURCE_DIR} ]; then
echo "Error: Dataset is not avaiable. Please download and unzip the dataset"
echo "Download Link: https://pan.baidu.com/s/18L-59wgeS96WkObISrytQQ Passwd: bva0"
echo "The tree of the directory should be:"
echo "."
echo "|-- En-Zh"
echo "|-- test-segment"
echo " |-- tst2010"
echo " |-- ..."
echo "|-- train-split"
echo " |-- train-segment"
echo "|-- README.md"
exit 1
fi
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
if [ ! -e ${data_dir} ]; then
echo "Error: Dataset is not avaiable. Please download and unzip the dataset"
echo "Download Link: https://pan.baidu.com/s/18L-59wgeS96WkObISrytQQ Passwd: bva0"
echo "The tree of the directory should be:"
echo "."
echo "|-- En-Zh"
echo "|-- test-segment"
echo " |-- tst2010"
echo " |-- ..."
echo "|-- train-split"
echo " |-- train-segment"
echo "|-- README.md"
exit 1
fi
# generate manifests # generate manifests
python3 ${TARGET_DIR}/ted_en_zh/ted_en_zh.py \ python3 ${TARGET_DIR}/ted_en_zh/ted_en_zh.py \
--manifest_prefix="data/manifest" \ --manifest_prefix="data/manifest" \
--src_dir="${DATA_DIR}" --src_dir="${data_dir}"
echo "Complete raw data pre-process." echo "Complete raw data pre-process."
fi fi
......
...@@ -16,7 +16,7 @@ echo "checkpoint name ${ckpt}" ...@@ -16,7 +16,7 @@ echo "checkpoint name ${ckpt}"
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data # prepare data
bash ./local/data.sh --DATA_DIR ${data_path} || exit -1 bash ./local/data.sh --data_dir ${data_path} || exit -1
fi fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册