提交 b69021f9 编写于 作者: H Hui Zhang

fix ds2 scripts and bugs

上级 09ab9f71
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Inferer for DeepSpeech2 model."""
from deepspeech.exps.deepspeech2.config import get_cfg_defaults
from deepspeech.exps.deepspeech2.model import DeepSpeech2Tester as Tester
from deepspeech.training.cli import default_argument_parser
from deepspeech.utils.utility import print_arguments
# TODO(hui zhang): dynamic load
def main_sp(config, args):
exp = Tester(config, args)
exp.setup()
exp.run_test()
def main(config, args):
main_sp(config, args)
if __name__ == "__main__":
parser = default_argument_parser()
args = parser.parse_args()
print_arguments(args, globals())
# https://yaml.org/type/float.html
config = get_cfg_defaults()
if args.config:
config.merge_from_file(args.config)
if args.opts:
config.merge_from_list(args.opts)
config.freeze()
print(config)
if args.dump_config:
with open(args.dump_config, 'w') as f:
print(config, file=f)
main(config, args)
......@@ -113,6 +113,7 @@ class DeepSpeech2Trainer(Trainer):
if self.parallel:
model = paddle.DataParallel(model)
logger.info(f"{model}")
layer_tools.print_params(model, logger.info)
grad_clip = ClipGradByGlobalNormWithLog(
......@@ -192,7 +193,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
trans.append(''.join([chr(i) for i in ids]))
return trans
def compute_metrics(self, audio, texts, audio_len, texts_len):
def compute_metrics(self, audio, audio_len, texts, texts_len):
cfg = self.config.decoding
errors_sum, len_refs, num_ins = 0.0, 0, 0
errors_func = error_rate.char_errors if cfg.error_rate_type == 'cer' else error_rate.word_errors
......@@ -253,7 +254,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
msg = "Test: "
msg += "epoch: {}, ".format(self.epoch)
msg += "step: {}, ".format(self.iteration)
msg += ", Final error rate [%s] (%d/%d) = %f" % (
msg += "Final error rate [%s] (%d/%d) = %f" % (
error_rate_type, num_ins, num_ins, errors_sum / len_refs)
logger.info(msg)
......@@ -319,8 +320,9 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
config.defrost()
# return raw text
config.data.manifest = config.data.test_manifest
config.data.keep_transcription_text = True
config.data.augmentation_config = ""
config.data.manifest = config.data.test_manifest
test_dataset = ManifestDataset.from_config(config)
# return text ord id
......
......@@ -131,8 +131,8 @@ class FeatureNormalizer(object):
def _read_mean_std_from_file(self, filepath, eps=1e-20):
"""Load mean and std from file."""
mean, istd = load_cmvn(filepath, filetype='json')
self._mean = mean
self._istd = istd
self._mean = np.expand_dims(mean, axis=-1)
self._istd = np.expand_dims(istd, axis=-1)
def write_to_file(self, filepath):
"""Write the mean and stddev to the file.
......
......@@ -60,7 +60,7 @@ class SpeechCollator():
# else text is string, convert to unicode ord
tokens = []
if self._keep_transcription_text:
assert isinstance(text, str), type(text)
assert isinstance(text, str), (type(text), text)
tokens = [ord(t) for t in text]
else:
tokens = text # token ids
......
......@@ -154,9 +154,9 @@ class DeepSpeech2Model(nn.Layer):
assert (self.encoder.output_size == rnn_size * 2)
self.decoder = CTCDecoder(
odim=dict_size + 1, # <blank> is append after vocab
odim=dict_size, # <blank> is in vocab
enc_n_units=self.encoder.output_size,
blank_id=dict_size, # last token is <blank>
blank_id=0, # first token is <blank>
dropout_rate=0.0,
reduction=True, # sum
batch_average=True) # sum / batch_size
......
......@@ -63,7 +63,7 @@ class U2BaseModel(nn.Module):
default = CfgNode()
# allow add new item when merge_with_file
default.cmvn_file = ""
default.cmvn_file_type = "npz"
default.cmvn_file_type = "json"
default.input_dim = 0
default.output_dim = 0
# encoder related
......
......@@ -40,7 +40,8 @@ def sequence_mask(x_len, max_len=None, dtype='float32'):
[[1., 1., 0., 0.],
[1., 1., 1., 1.]]
"""
assert x_len.dim() == 1
# (TODO: Hui Zhang): jit not support Tenosr.dim() and Tensor.ndim
# assert x_len.dim() == 1, (x_len.dim(), x_len)
max_len = max_len or x_len.max()
x_len = paddle.unsqueeze(x_len, -1)
row_vector = paddle.arange(max_len)
......
......@@ -127,7 +127,12 @@ class Trainer():
@mp_tools.rank_zero_only
def save(self, tag=None, infos: dict=None):
"""Save checkpoint (model parameters and optimizer states).
Args:
tag (int or str, optional): None for step, else using tag, e.g epoch. Defaults to None.
infos (dict, optional): meta data to save. Defaults to None.
"""
infos = infos if infos else dict()
infos.update({
"step": self.iteration,
......@@ -220,7 +225,7 @@ class Trainer():
'epoch', {'cv_loss': cv_loss,
'lr': self.lr_scheduler()}, self.epoch)
self.save(infos={'val_loss': cv_loss})
self.save(tag=self.epoch, infos={'val_loss': cv_loss})
self.lr_scheduler.step()
self.new_epoch()
......
......@@ -7,14 +7,20 @@ data:
vocab_filepath: data/vocab.txt
augmentation_config: conf/augmentation.json
batch_size: 64 # one gpu
max_duration: 27.0
min_duration: 0.0
min_input_len: 0.0
max_input_len: 27.0 # second
min_output_len: 0.0
max_output_len: 400.0
min_output_input_ratio: 0.05
max_output_input_ratio: 10.0
specgram_type: linear
target_sample_rate: 16000
max_freq: None
n_fft: None
stride_ms: 10.0
window_ms: 20.0
delta_delta: False
dither: 1.0
use_dB_normalization: True
target_dB: -20
random_seed: 0
......@@ -36,6 +42,7 @@ training:
lr_decay: 0.83
weight_decay: 1e-06
global_grad_clip: 5.0
log_interval: 100
decoding:
batch_size: 128
......
#! /usr/bin/env bash
if [ $# != 2 ];then
echo "usage: ${0} ckpt_dir avg_num"
exit -1
fi
ckpt_dir=${1}
average_num=${2}
decode_checkpoint=${ckpt_dir}/avg_${average_num}.pdparams
python3 -u ${MAIN_ROOT}/utils/avg_model.py \
--dst_model ${decode_checkpoint} \
--ckpt_dir ${ckpt_dir} \
--num ${average_num} \
--val_best
if [ $? -ne 0 ]; then
echo "Failed in avg ckpt!"
exit 1
fi
exit 0
\ No newline at end of file
文件模式从 100644 更改为 100755
......@@ -14,7 +14,7 @@ if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
python3 ${TARGET_DIR}/aishell/aishell.py \
--manifest_prefix="data/manifest" \
--target_dir="${TARGET_DIR}/aishell"
if [ $? -ne 0 ]; then
echo "Prepare Aishell failed. Terminated."
exit 1
......@@ -33,7 +33,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
--count_threshold=0 \
--vocab_path="data/vocab.txt" \
--manifest_paths "data/manifest.train.raw"
if [ $? -ne 0 ]; then
echo "Build vocabulary failed. Terminated."
exit 1
......@@ -43,19 +43,19 @@ fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
# compute mean and stddev for normalizer
num_workers=$(nproc)
python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
--manifest_path="data/manifest.train.raw" \
--specgram_type="fbank" \
--feat_dim=80 \
--specgram_type="linear" \
--delta_delta=false \
--stride_ms=10.0 \
--window_ms=25.0 \
--window_ms=20.0 \
--sample_rate=16000 \
--use_dB_normalization=False \
--num_samples=-1 \
--num_workers=16 \
--num_workers=${num_workers} \
--output_path="data/mean_std.json"
if [ $? -ne 0 ]; then
echo "Compute mean and stddev failed. Terminated."
exit 1
......@@ -74,7 +74,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
--manifest_path="data/manifest.${dataset}.raw" \
--output_path="data/manifest.${dataset}"
done
if [ $? -ne 0 ]; then
echo "Formt mnaifest failed. Terminated."
exit 1
......
#! /usr/bin/env bash
if [ $# != 2 ];then
echo "usage: export ckpt_path jit_model_path"
if [ $# != 3 ];then
echo "usage: $0 config_path ckpt_prefix jit_model_path"
exit -1
fi
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."
config_path=$1
ckpt_path_prefix=$2
jit_model_export_path=$3
device=gpu
if [ ngpu == 0 ];then
device=cpu
fi
python3 -u ${BIN_DIR}/export.py \
--config conf/deepspeech2.yaml \
--checkpoint_path ${1} \
--export_path ${2}
--device ${device} \
--nproc ${ngpu} \
--config ${config_path} \
--checkpoint_path ${ckpt_path_prefix} \
--export_path ${jit_model_export_path}
if [ $? -ne 0 ]; then
echo "Failed in evaluation!"
echo "Failed in export!"
exit 1
fi
......
文件模式从 100644 更改为 100755
#! /usr/bin/env bash
if [[ $# != 1 ]]; then
echo "usage: $0 ckpt-path"
if [ $# != 2 ];then
echo "usage: ${0} config_path ckpt_path_prefix"
exit -1
fi
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."
device=gpu
if [ ngpu == 0 ];then
device=cpu
fi
config_path=$1
ckpt_prefix=$2
# download language model
bash local/download_lm_ch.sh
if [ $? -ne 0 ]; then
exit 1
exit 1
fi
python3 -u ${BIN_DIR}/infer.py \
--device 'gpu' \
python3 -u ${BIN_DIR}/test.py \
--device ${device} \
--nproc 1 \
--config conf/deepspeech2.yaml \
--checkpoint_path ${1}
--config ${config_path} \
--result_file ${ckpt_prefix}.rsl \
--checkpoint_path ${ckpt_prefix}
if [ $? -ne 0 ]; then
echo "Failed in inference!"
echo "Failed in evaluation!"
exit 1
fi
......
#! /usr/bin/env bash
# train model
# if you wish to resume from an exists model, uncomment --init_from_pretrained_model
#export FLAGS_sync_nccl_allreduce=0
if [ $# != 2 ];then
echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
exit -1
fi
ngpu=$(echo ${CUDA_VISIBLE_DEVICES} | python -c 'import sys; a = sys.stdin.read(); print(len(a.split(",")));')
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."
config_path=$1
ckpt_name=$2
device=gpu
if [ ngpu == 0 ];then
device=cpu
fi
mkdir -p exp
python3 -u ${BIN_DIR}/train.py \
--device 'gpu' \
--device ${device} \
--nproc ${ngpu} \
--config conf/deepspeech2.yaml \
--output ckpt-${1}
--config ${config_path} \
--output exp/${ckpt_name}
if [ $? -ne 0 ]; then
echo "Failed in training!"
exit 1
fi
exit 0
文件模式从 100644 更改为 100755
#!/bin/bash
set -e
source path.sh
# only demos
# prepare data
bash ./local/data.sh
gpus=0
stage=0
stop_stage=100
conf_path=conf/deepspeech2.yaml
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
avg_num=1
avg_ckpt=avg_${avg_num}
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data
bash ./local/data.sh || exit -1
fi
# train model
CUDA_VISIBLE_DEVICES=0,1,2,3 bash ./local/train.sh baseline
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
# train model, all `ckpt` under `exp` dir
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt}
fi
# test model
CUDA_VISIBLE_DEVICES=0 bash ./local/test.sh
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# avg n best model
./local/avg.sh exp/${ckpt}/checkpoints ${avg_num}
fi
# infer model
CUDA_VISIBLE_DEVICES=0 bash ./local/infer.sh ckpt/checkpoints/step-3284
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
# test ckpt avg_n
CUDA_VISIBLE_DEVICES=${gpus} ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi
# export model
bash ./local/export.sh ckpt/checkpoints/step-3284 jit.model
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
# export ckpt avg_n
CUDA_VISIBLE_DEVICES=${gpus} ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
fi
../../s0/local/data.sh
\ No newline at end of file
#! /usr/bin/env bash
stage=-1
stop_stage=100
source ${MAIN_ROOT}/utils/parse_options.sh
mkdir -p data
TARGET_DIR=${MAIN_ROOT}/examples/dataset
mkdir -p ${TARGET_DIR}
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
# download data, generate manifests
python3 ${TARGET_DIR}/aishell/aishell.py \
--manifest_prefix="data/manifest" \
--target_dir="${TARGET_DIR}/aishell"
if [ $? -ne 0 ]; then
echo "Prepare Aishell failed. Terminated."
exit 1
fi
for dataset in train dev test; do
mv data/manifest.${dataset} data/manifest.${dataset}.raw
done
fi
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# download data, generate manifests
# build vocabulary
python3 ${MAIN_ROOT}/utils/build_vocab.py \
--unit_type="char" \
--count_threshold=0 \
--vocab_path="data/vocab.txt" \
--manifest_paths "data/manifest.train.raw"
if [ $? -ne 0 ]; then
echo "Build vocabulary failed. Terminated."
exit 1
fi
fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
# compute mean and stddev for normalizer
num_workers=$(nproc)
python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
--manifest_path="data/manifest.train.raw" \
--specgram_type="fbank" \
--feat_dim=80 \
--delta_delta=false \
--stride_ms=10.0 \
--window_ms=25.0 \
--sample_rate=16000 \
--use_dB_normalization=False \
--num_samples=-1 \
--num_workers=${num_workers} \
--output_path="data/mean_std.json"
if [ $? -ne 0 ]; then
echo "Compute mean and stddev failed. Terminated."
exit 1
fi
fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# format manifest with tokenids, vocab size
for dataset in train dev test; do
python3 ${MAIN_ROOT}/utils/format_data.py \
--feat_type "raw" \
--cmvn_path "data/mean_std.json" \
--unit_type "char" \
--vocab_path="data/vocab.txt" \
--manifest_path="data/manifest.${dataset}.raw" \
--output_path="data/manifest.${dataset}"
done
if [ $? -ne 0 ]; then
echo "Formt mnaifest failed. Terminated."
exit 1
fi
fi
echo "Aishell data preparation done."
exit 0
......@@ -24,6 +24,7 @@ import distutils.util
import io
import json
import os
from multiprocessing.pool import Pool
import soundfile
......@@ -122,42 +123,36 @@ def main():
if args.target_dir.startswith('~'):
args.target_dir = os.path.expanduser(args.target_dir)
prepare_dataset(
url=URL_TEST_CLEAN,
md5sum=MD5_TEST_CLEAN,
target_dir=os.path.join(args.target_dir, "test-clean"),
manifest_path=args.manifest_prefix + ".test-clean")
prepare_dataset(
url=URL_DEV_CLEAN,
md5sum=MD5_DEV_CLEAN,
target_dir=os.path.join(args.target_dir, "dev-clean"),
manifest_path=args.manifest_prefix + ".dev-clean")
tasks = [
(URL_TEST_CLEAN, MD5_TEST_CLEAN, os.path.join(args.target_dir,
"test-clean"),
args.manifest_prefix + ".test-clean"),
(URL_DEV_CLEAN, MD5_DEV_CLEAN, os.path.join(
args.target_dir, "dev-clean"), args.manifest_prefix + ".dev-clean"),
]
if args.full_download:
prepare_dataset(
url=URL_TRAIN_CLEAN_100,
md5sum=MD5_TRAIN_CLEAN_100,
target_dir=os.path.join(args.target_dir, "train-clean-100"),
manifest_path=args.manifest_prefix + ".train-clean-100")
prepare_dataset(
url=URL_TEST_OTHER,
md5sum=MD5_TEST_OTHER,
target_dir=os.path.join(args.target_dir, "test-other"),
manifest_path=args.manifest_prefix + ".test-other")
prepare_dataset(
url=URL_DEV_OTHER,
md5sum=MD5_DEV_OTHER,
target_dir=os.path.join(args.target_dir, "dev-other"),
manifest_path=args.manifest_prefix + ".dev-other")
prepare_dataset(
url=URL_TRAIN_CLEAN_360,
md5sum=MD5_TRAIN_CLEAN_360,
target_dir=os.path.join(args.target_dir, "train-clean-360"),
manifest_path=args.manifest_prefix + ".train-clean-360")
prepare_dataset(
url=URL_TRAIN_OTHER_500,
md5sum=MD5_TRAIN_OTHER_500,
target_dir=os.path.join(args.target_dir, "train-other-500"),
manifest_path=args.manifest_prefix + ".train-other-500")
tasks.extend([
(URL_TRAIN_CLEAN_100, MD5_TRAIN_CLEAN_100,
os.path.join(args.target_dir, "train-clean-100"),
args.manifest_prefix + ".train-clean-100"),
(URL_TEST_OTHER, MD5_TEST_OTHER, os.path.join(args.target_dir,
"test-other"),
args.manifest_prefix + ".test-other"),
(URL_DEV_OTHER, MD5_DEV_OTHER, os.path.join(args.target_dir,
"dev-other"),
args.manifest_prefix + ".dev-other"),
(URL_TRAIN_CLEAN_360, MD5_TRAIN_CLEAN_360,
os.path.join(args.target_dir, "train-clean-360"),
args.manifest_prefix + ".train-clean-360"),
(URL_TRAIN_OTHER_500, MD5_TRAIN_OTHER_500,
os.path.join(args.target_dir, "train-other-500"),
args.manifest_prefix + ".train-other-500"),
])
with Pool(7) as pool:
pool.starmap(prepare_dataset, tasks)
print("Data download and manifest prepare done!")
if __name__ == '__main__':
......
......@@ -23,6 +23,7 @@ import codecs
import io
import json
import os
from multiprocessing.pool import Pool
import soundfile
......@@ -103,16 +104,18 @@ def main():
if args.target_dir.startswith('~'):
args.target_dir = os.path.expanduser(args.target_dir)
prepare_dataset(
url=URL_TRAIN_CLEAN,
md5sum=MD5_TRAIN_CLEAN,
target_dir=os.path.join(args.target_dir, "train-clean"),
manifest_path=args.manifest_prefix + ".train-clean")
prepare_dataset(
url=URL_DEV_CLEAN,
md5sum=MD5_DEV_CLEAN,
target_dir=os.path.join(args.target_dir, "dev-clean"),
manifest_path=args.manifest_prefix + ".dev-clean")
tasks = [
(URL_TRAIN_CLEAN, MD5_TRAIN_CLEAN,
os.path.join(args.target_dir, "train-clean"),
args.manifest_prefix + ".train-clean"),
(URL_DEV_CLEAN, MD5_DEV_CLEAN, os.path.join(
args.target_dir, "dev-clean"), args.manifest_prefix + ".dev-clean"),
]
with Pool(2) as pool:
pool.starmap(prepare_dataset, tasks)
print("Data download and manifest prepare done!")
if __name__ == '__main__':
......
......@@ -7,14 +7,20 @@ data:
vocab_filepath: data/vocab.txt
augmentation_config: conf/augmentation.json
batch_size: 20
max_duration: 27.0
min_duration: 0.0
min_input_len: 0.0
max_input_len: 27.0 # second
min_output_len: 0.0
max_output_len: 400.0
min_output_input_ratio: 0.05
max_output_input_ratio: 10.0
specgram_type: linear
target_sample_rate: 16000
max_freq: None
n_fft: None
stride_ms: 10.0
window_ms: 20.0
delta_delta: False
dither: 1.0
use_dB_normalization: True
target_dB: -20
random_seed: 0
......@@ -22,18 +28,22 @@ data:
sortagrad: True
shuffle_method: batch_shuffle
num_workers: 0
model:
num_conv_layers: 2
num_rnn_layers: 3
rnn_layer_size: 2048
use_gru: False
share_rnn_weights: True
share_rnn_weights: True
training:
n_epoch: 50
lr: 1e-3
lr_decay: 0.83
weight_decay: 1e-06
global_grad_clip: 5.0
log_interval: 100
decoding:
batch_size: 128
error_rate_type: wer
......
#! /usr/bin/env bash
if [ $# != 2 ];then
echo "usage: ${0} ckpt_dir avg_num"
exit -1
fi
ckpt_dir=${1}
average_num=${2}
decode_checkpoint=${ckpt_dir}/avg_${average_num}.pdparams
python3 -u ${MAIN_ROOT}/utils/avg_model.py \
--dst_model ${decode_checkpoint} \
--ckpt_dir ${ckpt_dir} \
--num ${average_num} \
--val_best
if [ $? -ne 0 ]; then
echo "Failed in avg ckpt!"
exit 1
fi
exit 0
\ No newline at end of file
#! /usr/bin/env bash
stage=-1
stop_stage=100
unit_type=char
source ${MAIN_ROOT}/utils/parse_options.sh
mkdir -p data
TARGET_DIR=${MAIN_ROOT}/examples/dataset
mkdir -p ${TARGET_DIR}
# download data, generate manifests
PYTHONPATH=.:$PYTHONPATH python3 ${TARGET_DIR}/librispeech/librispeech.py \
--manifest_prefix="data/manifest" \
--target_dir="${TARGET_DIR}/librispeech" \
--full_download="True"
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
# download data, generate manifests
python3 ${TARGET_DIR}/librispeech/librispeech.py \
--manifest_prefix="data/manifest" \
--target_dir="${TARGET_DIR}/librispeech" \
--full_download="True"
if [ $? -ne 0 ]; then
echo "Prepare LibriSpeech failed. Terminated."
exit 1
fi
for set in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do
mv data/manifest.${set} data/manifest.${set}.raw
done
for set in train-clean-100 train-clean-360 train-other-500; do
cat data/manifest.${set}.raw >> data/manifest.train.raw
done
if [ $? -ne 0 ]; then
echo "Prepare LibriSpeech failed. Terminated."
exit 1
for set in dev-clean dev-other; do
cat data/manifest.${set}.raw >> data/manifest.dev.raw
done
for set in test-clean test-other; do
cat data/manifest.${set}.raw >> data/manifest.test.raw
done
fi
cat data/manifest.train-* | shuf > data/manifest.train
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# build vocabulary
python3 ${MAIN_ROOT}/utils/build_vocab.py \
--unit_type ${unit_type} \
--count_threshold=0 \
--vocab_path="data/vocab.txt" \
--manifest_paths="data/manifest.train.raw"
if [ $? -ne 0 ]; then
echo "Build vocabulary failed. Terminated."
exit 1
fi
fi
# build vocabulary
python3 ${MAIN_ROOT}/utils/build_vocab.py \
--count_threshold=0 \
--vocab_path="data/vocab.txt" \
--manifest_paths="data/manifest.train"
if [ $? -ne 0 ]; then
echo "Build vocabulary failed. Terminated."
exit 1
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
# compute mean and stddev for normalizer
num_workers=$(nproc)
python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
--manifest_path="data/manifest.train.raw" \
--num_samples=-1 \
--specgram_type="linear" \
--delta_delta=false \
--sample_rate=16000 \
--stride_ms=10.0 \
--window_ms=20.0 \
--use_dB_normalization=False \
--num_workers=${num_workers} \
--output_path="data/mean_std.json"
if [ $? -ne 0 ]; then
echo "Compute mean and stddev failed. Terminated."
exit 1
fi
fi
# compute mean and stddev for normalizer
python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
--manifest_path="data/manifest.train" \
--num_samples=2000 \
--specgram_type="linear" \
--output_path="data/mean_std.npz"
if [ $? -ne 0 ]; then
echo "Compute mean and stddev failed. Terminated."
exit 1
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# format manifest with tokenids, vocab size
for set in train dev test dev-clean dev-other test-clean test-other; do
{
python3 ${MAIN_ROOT}/utils/format_data.py \
--feat_type "raw" \
--cmvn_path "data/mean_std.json" \
--unit_type ${unit_type} \
--vocab_path="data/vocab.txt" \
--manifest_path="data/manifest.${set}.raw" \
--output_path="data/manifest.${set}"
if [ $? -ne 0 ]; then
echo "Formt mnaifest.${set} failed. Terminated."
exit 1
fi
}&
done
wait
fi
echo "LibriSpeech Data preparation done."
......
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
......@@ -3,18 +3,24 @@ data:
train_manifest: data/manifest.tiny
dev_manifest: data/manifest.tiny
test_manifest: data/manifest.tiny
mean_std_filepath: data/mean_std.npz
mean_std_filepath: data/mean_std.json
vocab_filepath: data/vocab.txt
augmentation_config: conf/augmentation.json
batch_size: 4
max_duration: 27.0
min_duration: 0.0
min_input_len: 0.0
max_input_len: 27.0
min_output_len: 0.0
max_output_len: 400.0
min_output_input_ratio: 0.05
max_output_input_ratio: 10.0
specgram_type: linear
target_sample_rate: 16000
max_freq: None
n_fft: None
stride_ms: 10.0
window_ms: 20.0
delta_delta: False
dither: 1.0
use_dB_normalization: True
target_dB: -20
random_seed: 0
......@@ -22,18 +28,22 @@ data:
sortagrad: True
shuffle_method: batch_shuffle
num_workers: 0
model:
num_conv_layers: 2
num_rnn_layers: 3
rnn_layer_size: 2048
use_gru: False
share_rnn_weights: True
training:
n_epoch: 20
lr: 1e-5
lr_decay: 1.0
weight_decay: 1e-06
global_grad_clip: 5.0
log_interval: 1
decoding:
batch_size: 128
error_rate_type: wer
......
#! /usr/bin/env bash
if [ $# != 2 ];then
echo "usage: ${0} ckpt_dir avg_num"
exit -1
fi
ckpt_dir=${1}
average_num=${2}
decode_checkpoint=${ckpt_dir}/avg_${average_num}.pdparams
python3 -u ${MAIN_ROOT}/utils/avg_model.py \
--dst_model ${decode_checkpoint} \
--ckpt_dir ${ckpt_dir} \
--num ${average_num} \
--val_best
if [ $? -ne 0 ]; then
echo "Failed in avg ckpt!"
exit 1
fi
exit 0
\ No newline at end of file
......@@ -3,10 +3,7 @@
stage=-1
stop_stage=100
# bpemode (unigram or bpe)
nbpe=200
bpemode=unigram
bpeprefix="data/bpe_${bpemode}_${nbpe}"
unit_type=char
source ${MAIN_ROOT}/utils/parse_options.sh
......@@ -32,10 +29,8 @@ fi
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# build vocabulary
python3 ${MAIN_ROOT}/utils/build_vocab.py \
--unit_type "spm" \
--spm_vocab_size=${nbpe} \
--spm_mode ${bpemode} \
--spm_model_prefix ${bpeprefix} \
--unit_type ${unit_type} \
--count_threshold=0 \
--vocab_path="data/vocab.txt" \
--manifest_paths="data/manifest.tiny.raw"
......@@ -51,12 +46,11 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
--manifest_path="data/manifest.tiny.raw" \
--num_samples=64 \
--specgram_type="fbank" \
--feat_dim=80 \
--specgram_type="linear" \
--delta_delta=false \
--sample_rate=16000 \
--stride_ms=10.0 \
--window_ms=25.0 \
--window_ms=20.0 \
--use_dB_normalization=False \
--num_workers=2 \
--output_path="data/mean_std.json"
......@@ -73,8 +67,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
python3 ${MAIN_ROOT}/utils/format_data.py \
--feat_type "raw" \
--cmvn_path "data/mean_std.json" \
--unit_type "spm" \
--spm_model_prefix ${bpeprefix} \
--unit_type ${unit_type} \
--vocab_path="data/vocab.txt" \
--manifest_path="data/manifest.tiny.raw" \
--output_path="data/manifest.tiny"
......
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
......@@ -2,6 +2,7 @@
set -e
source path.sh
gpus=0
stage=0
stop_stage=100
conf_path=conf/deepspeech2.yaml
......@@ -18,7 +19,7 @@ fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
# train model, all `ckpt` under `exp` dir
CUDA_VISIBLE_DEVICES=0 ./local/train.sh ${conf_path} ${ckpt}
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt}
fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
......@@ -28,10 +29,10 @@ fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
# test ckpt avg_n
CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
CUDA_VISIBLE_DEVICES=${gpus} ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
# export ckpt avg_n
CUDA_VISIBLE_DEVICES= ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
CUDA_VISIBLE_DEVICES=${gpus} ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
fi
../../s0/local/data.sh
\ No newline at end of file
#! /usr/bin/env bash
stage=-1
stop_stage=100
# bpemode (unigram or bpe)
nbpe=200
bpemode=unigram
bpeprefix="data/bpe_${bpemode}_${nbpe}"
source ${MAIN_ROOT}/utils/parse_options.sh
mkdir -p data
TARGET_DIR=${MAIN_ROOT}/examples/dataset
mkdir -p ${TARGET_DIR}
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
# download data, generate manifests
python3 ${TARGET_DIR}/librispeech/librispeech.py \
--manifest_prefix="data/manifest" \
--target_dir="${TARGET_DIR}/librispeech" \
--full_download="False"
if [ $? -ne 0 ]; then
echo "Prepare LibriSpeech failed. Terminated."
exit 1
fi
head -n 64 data/manifest.dev-clean > data/manifest.tiny.raw
fi
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# build vocabulary
python3 ${MAIN_ROOT}/utils/build_vocab.py \
--unit_type "spm" \
--spm_vocab_size=${nbpe} \
--spm_mode ${bpemode} \
--spm_model_prefix ${bpeprefix} \
--vocab_path="data/vocab.txt" \
--manifest_paths="data/manifest.tiny.raw"
if [ $? -ne 0 ]; then
echo "Build vocabulary failed. Terminated."
exit 1
fi
fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
# compute mean and stddev for normalizer
python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
--manifest_path="data/manifest.tiny.raw" \
--num_samples=64 \
--specgram_type="fbank" \
--feat_dim=80 \
--delta_delta=false \
--sample_rate=16000 \
--stride_ms=10.0 \
--window_ms=25.0 \
--use_dB_normalization=False \
--num_workers=2 \
--output_path="data/mean_std.json"
if [ $? -ne 0 ]; then
echo "Compute mean and stddev failed. Terminated."
exit 1
fi
fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# format manifest with tokenids, vocab size
python3 ${MAIN_ROOT}/utils/format_data.py \
--feat_type "raw" \
--cmvn_path "data/mean_std.json" \
--unit_type "spm" \
--spm_model_prefix ${bpeprefix} \
--vocab_path="data/vocab.txt" \
--manifest_path="data/manifest.tiny.raw" \
--output_path="data/manifest.tiny"
if [ $? -ne 0 ]; then
echo "Formt mnaifest failed. Terminated."
exit 1
fi
fi
echo "LibriSpeech Data preparation done."
exit 0
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册