diff --git a/deepspeech/exps/deepspeech2/bin/export.py b/deepspeech/exps/deepspeech2/bin/export.py index 8ae987947487937598a1186c6ea64170933168b2..f8764fde37f718b55fe84590368b28e9e96470cc 100644 --- a/deepspeech/exps/deepspeech2/bin/export.py +++ b/deepspeech/exps/deepspeech2/bin/export.py @@ -34,6 +34,7 @@ if __name__ == "__main__": args = parser.parse_args() if args.model_type is None: args.model_type = 'offline' + print("model_type:{}".format(args.model_type)) print_arguments(args) # https://yaml.org/type/float.html diff --git a/deepspeech/exps/deepspeech2/bin/test.py b/deepspeech/exps/deepspeech2/bin/test.py index 78a99b892a3ac5996899726af73349ab4212d786..376e18e38b0258d506fe19bff8bdb66ff9e0ccdf 100644 --- a/deepspeech/exps/deepspeech2/bin/test.py +++ b/deepspeech/exps/deepspeech2/bin/test.py @@ -35,6 +35,7 @@ if __name__ == "__main__": print_arguments(args, globals()) if args.model_type is None: args.model_type = 'offline' + print("model_type:{}".format(args.model_type)) # https://yaml.org/type/float.html config = get_cfg_defaults(args.model_type) diff --git a/deepspeech/exps/deepspeech2/bin/train.py b/deepspeech/exps/deepspeech2/bin/train.py index dcfa62f45510cd6c9abb42cc4f6828278e8b81a2..69ff043a08d28171711543afcbd51bcd571e69d2 100644 --- a/deepspeech/exps/deepspeech2/bin/train.py +++ b/deepspeech/exps/deepspeech2/bin/train.py @@ -39,6 +39,7 @@ if __name__ == "__main__": args = parser.parse_args() if args.model_type is None: args.model_type = 'offline' + print("model_type:{}".format(args.model_type)) print_arguments(args, globals()) # https://yaml.org/type/float.html diff --git a/deepspeech/exps/deepspeech2/config.py b/deepspeech/exps/deepspeech2/config.py index 66516b35dbf54278edf09ccf0a286531690f03bf..53358014c78afe21ac6ad66f44924d03fa9c4ba8 100644 --- a/deepspeech/exps/deepspeech2/config.py +++ b/deepspeech/exps/deepspeech2/config.py @@ -23,26 +23,14 @@ from deepspeech.models.ds2_online import DeepSpeech2ModelOnline def get_cfg_defaults(model_type='offline'): _C = CfgNode() + _C.data = ManifestDataset.params() + _C.collator = SpeechCollator.params() + _C.training = DeepSpeech2Trainer.params() + _C.decoding = DeepSpeech2Tester.params() if (model_type == 'offline'): - _C.data = ManifestDataset.params() - - _C.collator = SpeechCollator.params() - _C.model = DeepSpeech2Model.params() - - _C.training = DeepSpeech2Trainer.params() - - _C.decoding = DeepSpeech2Tester.params() else: - _C.data = ManifestDataset.params() - - _C.collator = SpeechCollator.params() - _C.model = DeepSpeech2ModelOnline.params() - - _C.training = DeepSpeech2Trainer.params() - - _C.decoding = DeepSpeech2Tester.params() """Get a yacs CfgNode object with default values for my_project.""" # Return a clone so that the defaults will not be altered # This is for the "local variable" use pattern diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index 1fd47bd1e2e9a39215cca11539fe9fb0f979b98a..4acfad86bdce5f6d957821e4f5e28f848fef8d1e 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -134,7 +134,6 @@ class DeepSpeech2Trainer(Trainer): use_gru=config.model.use_gru, share_rnn_weights=config.model.share_rnn_weights) elif self.args.model_type == 'online': - print("fc_layers_size_list", config.model.fc_layers_size_list) model = DeepSpeech2ModelOnline( feat_size=self.train_loader.collate_fn.feature_size, dict_size=self.train_loader.collate_fn.vocab_size, diff --git a/deepspeech/models/ds2_online/deepspeech2.py b/deepspeech/models/ds2_online/deepspeech2.py index d97e95740bbbe535eb9a9ff20b12885dcbb88a8f..bed9c41d39e93a8c48a6418486e2dd5bb4421496 100644 --- a/deepspeech/models/ds2_online/deepspeech2.py +++ b/deepspeech/models/ds2_online/deepspeech2.py @@ -174,6 +174,7 @@ class CRNNEncoder(nn.Layer): num_chunk = (max_len + padding_len - chunk_size) / chunk_stride + 1 num_chunk = int(num_chunk) chunk_state_list = [None] * self.num_rnn_layers + final_chunk_state_list = None for i in range(0, num_chunk): start = i * chunk_stride end = start + chunk_size @@ -366,4 +367,4 @@ class DeepSpeech2InferModelOnline(DeepSpeech2ModelOnline): eouts_chunk, eouts_chunk_lens, final_state_list = self.encoder.forward_chunk( audio_chunk, audio_chunk_lens, chunk_state_list) probs_chunk = self.decoder.softmax(eouts_chunk) - return probs_chunk, final_state_list + return probs_chunk, eouts_chunk_lens, final_state_list diff --git a/examples/tiny/s0/local/export.sh b/examples/tiny/s0/local/export.sh index 6955239c72e1c9adbc4c9ef7a6e5e5a75dfbee76..2e09e5f5e76a7f7cdf9cca8fbb91d66bb48aea0c 100755 --- a/examples/tiny/s0/local/export.sh +++ b/examples/tiny/s0/local/export.sh @@ -1,7 +1,7 @@ #!/bin/bash if [ $# != 4 ];then - echo "usage: $0 config_path ckpt_prefix jit_model_path" + echo "usage: $0 config_path ckpt_prefix jit_model_path model_type" exit -1 fi diff --git a/examples/tiny/s0/local/test.sh b/examples/tiny/s0/local/test.sh index 2f74491a101e56a1347450b79cb883d99ddee75e..b5b68c599c45ab50aa12ee35c120e02fb68740b4 100755 --- a/examples/tiny/s0/local/test.sh +++ b/examples/tiny/s0/local/test.sh @@ -1,7 +1,7 @@ #!/bin/bash if [ $# != 3 ];then - echo "usage: ${0} config_path ckpt_path_prefix" + echo "usage: ${0} config_path ckpt_path_prefix model_type" exit -1 fi diff --git a/examples/tiny/s0/local/train.sh b/examples/tiny/s0/local/train.sh index 1d49dcd1d65124731e755dca2358e2001e3ad468..c6a631800378640434dbe952035840895c1b23b5 100755 --- a/examples/tiny/s0/local/train.sh +++ b/examples/tiny/s0/local/train.sh @@ -1,7 +1,7 @@ #!/bin/bash if [ $# != 3 ];then - echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name" + echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name model_type" exit -1 fi diff --git a/examples/tiny/s0/run_online.sh b/examples/tiny/s0/run_online.sh deleted file mode 100755 index 3f5ecbb669c769ecbc0cf8132af91045c6bb0d6e..0000000000000000000000000000000000000000 --- a/examples/tiny/s0/run_online.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash -set -e -source path.sh - -gpus=7 -stage=1 -stop_stage=1 -conf_path=conf/deepspeech2_online.yaml -avg_num=1 -model_type=online #online | offline - -source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; - -avg_ckpt=avg_${avg_num} -ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}') ###ckpt = deepspeech2 -echo "checkpoint name ${ckpt}" - -if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then - # prepare data - bash ./local/data.sh || exit -1 -fi - -if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then - # train model, all `ckpt` under `exp` dir - CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${model_type} -fi - -if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then - # avg n best model - avg.sh exp/${ckpt}/checkpoints ${avg_num} -fi - -if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then - # test ckpt avg_n - CUDA_VISIBLE_DEVICES=${gpus} ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${model_type} || exit -1 -fi - -if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then - # export ckpt avg_n - CUDA_VISIBLE_DEVICES=${gpus} ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit ${model_type} -fi