提交 cda440e6 编写于 作者: T tianhao zhang

use reverse_weight in decode.yaml

上级 c98b5dd1
......@@ -9,7 +9,7 @@ Acoustic Model | Training Data | Token-based | Size | Descriptions | CER | WER |
[Ds2 Online Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_0.2.1.model.tar.gz) | Aishell Dataset | Char-based | 491 MB | 2 Conv + 5 LSTM layers | 0.0666 |-| 151 h | [D2 Online Aishell ASR0](../../examples/aishell/asr0) | onnx/inference/python |
[Ds2 Offline Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz)| Aishell Dataset | Char-based | 1.4 GB | 2 Conv + 5 bidirectional LSTM layers| 0.0554 |-| 151 h | [Ds2 Offline Aishell ASR0](../../examples/aishell/asr0) | inference/python |
[Conformer Online Wenetspeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz) | WenetSpeech Dataset | Char-based | 457 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.11 (test\_net) 0.1879 (test\_meeting) |-| 10000 h |- | python |
[Conformer U2PP Online Wenetspeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.1.1.model.tar.gz) | WenetSpeech Dataset | Char-based | 476 MB | Encoder:Conformer, Decoder:BiTransformer, Decoding method: Attention rescoring| 0.047198 (aishell test\_-1) 0.059212 (aishell test\_16) |-| 10000 h |- | python |
[Conformer U2PP Online Wenetspeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.1.4.model.tar.gz) | WenetSpeech Dataset | Char-based | 476 MB | Encoder:Conformer, Decoder:BiTransformer, Decoding method: Attention rescoring| 0.047198 (aishell test\_-1) 0.059212 (aishell test\_16) |-| 10000 h |- | python |
[Conformer Online Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_chunk_conformer_aishell_ckpt_0.2.0.model.tar.gz) | Aishell Dataset | Char-based | 189 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.0544 |-| 151 h | [Conformer Online Aishell ASR1](../../examples/aishell/asr1) | python |
[Conformer Offline Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_1.0.1.model.tar.gz) | Aishell Dataset | Char-based | 189 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0460 |-| 151 h | [Conformer Offline Aishell ASR1](../../examples/aishell/asr1) | python |
[Transformer Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_transformer_aishell_ckpt_0.1.1.model.tar.gz) | Aishell Dataset | Char-based | 128 MB | Encoder:Transformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0523 || 151 h | [Transformer Aishell ASR1](../../examples/aishell/asr1) | python |
......
......@@ -69,11 +69,11 @@ asr_dynamic_pretrained_models = {
},
},
"conformer_u2pp_wenetspeech-zh-16k": {
'1.1': {
'1.3': {
'url':
'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.1.1.model.tar.gz',
'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.1.3.model.tar.gz',
'md5':
'eae678c04ed3b3f89672052fdc0c5e10',
'662b347e1d2131b7a4dc5398365e2134',
'cfg_path':
'model.yaml',
'ckpt_path':
......@@ -89,11 +89,11 @@ asr_dynamic_pretrained_models = {
},
},
"conformer_u2pp_online_wenetspeech-zh-16k": {
'1.1': {
'1.4': {
'url':
'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.1.2.model.tar.gz',
'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.1.4.model.tar.gz',
'md5':
'925d047e9188dea7f421a718230c9ae3',
'3100fc1eac5779486cab859366992d0b',
'cfg_path':
'model.yaml',
'ckpt_path':
......
......@@ -39,7 +39,6 @@ class U2Infer():
self.preprocess_conf = config.preprocess_config
self.preprocess_args = {"train": False}
self.preprocessing = Transformation(self.preprocess_conf)
self.reverse_weight = getattr(config.model_conf, 'reverse_weight', 0.0)
self.text_feature = TextFeaturizer(
unit_type=config.unit_type,
vocab=config.vocab_filepath,
......@@ -81,6 +80,7 @@ class U2Infer():
xs = paddle.to_tensor(feat, dtype='float32').unsqueeze(0)
decode_config = self.config.decode
logger.info(f"decode cfg: {decode_config}")
reverse_weight = getattr(decode_config, 'reverse_weight', 0.0)
result_transcripts = self.model.decode(
xs,
ilen,
......@@ -91,7 +91,7 @@ class U2Infer():
decoding_chunk_size=decode_config.decoding_chunk_size,
num_decoding_left_chunks=decode_config.num_decoding_left_chunks,
simulate_streaming=decode_config.simulate_streaming,
reverse_weight=decode_config.reverse_weight)
reverse_weight=reverse_weight)
rsl = result_transcripts[0][0]
utt = Path(self.audio_file).name
logger.info(f"hyp: {utt} {rsl}")
......
......@@ -79,6 +79,7 @@ class U2Infer():
xs = paddle.to_tensor(feat, dtype='float32').unsqueeze(0)
decode_config = self.config.decode
logger.info(f"decode cfg: {decode_config}")
reverse_weight = getattr(decode_config, 'reverse_weight', 0.0)
result_transcripts = self.model.decode(
xs,
ilen,
......@@ -89,7 +90,7 @@ class U2Infer():
decoding_chunk_size=decode_config.decoding_chunk_size,
num_decoding_left_chunks=decode_config.num_decoding_left_chunks,
simulate_streaming=decode_config.simulate_streaming,
reverse_weight=decode_config.reverse_weight)
reverse_weight=reverse_weight)
rsl = result_transcripts[0][0]
utt = Path(self.audio_file).name
logger.info(f"hyp: {utt} {result_transcripts[0][0]}")
......
......@@ -337,6 +337,7 @@ class U2Tester(U2Trainer):
errors_sum, len_refs, num_ins = 0.0, 0, 0
errors_func = error_rate.char_errors if decode_config.error_rate_type == 'cer' else error_rate.word_errors
error_rate_func = error_rate.cer if decode_config.error_rate_type == 'cer' else error_rate.wer
reverse_weight = getattr(decode_config, 'reverse_weight', 0.0)
start_time = time.time()
target_transcripts = self.id2token(texts, texts_len, self.text_feature)
......@@ -351,7 +352,7 @@ class U2Tester(U2Trainer):
decoding_chunk_size=decode_config.decoding_chunk_size,
num_decoding_left_chunks=decode_config.num_decoding_left_chunks,
simulate_streaming=decode_config.simulate_streaming,
reverse_weight=decode_config.reverse_weight)
reverse_weight=reverse_weight)
decode_time = time.time() - start_time
for utt, target, result, rec_tids in zip(
......
......@@ -580,6 +580,7 @@ class PaddleASRConnectionHanddler:
self.update_result()
beam_size = self.ctc_decode_config.beam_size
reverse_weight = getattr(self.ctc_decode_config, 'reverse_weight', 0.0)
hyps = self.searcher.get_hyps()
if hyps is None or len(hyps) == 0:
logger.info("No Hyps!")
......@@ -613,7 +614,7 @@ class PaddleASRConnectionHanddler:
# ctc score in ln domain
# (beam_size, max_hyps_len, vocab_size)
decoder_out, r_decoder_out = self.model.forward_attention_decoder(
hyps_pad, hyps_lens, self.encoder_out, self.model.reverse_weight)
hyps_pad, hyps_lens, self.encoder_out, reverse_weight)
decoder_out = decoder_out.numpy()
# r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
......@@ -631,13 +632,12 @@ class PaddleASRConnectionHanddler:
# last decoder output token is `eos`, for laste decoder input token.
score += decoder_out[i][len(hyp[0])][self.model.eos]
if self.model.reverse_weight > 0:
if reverse_weight > 0:
r_score = 0.0
for j, w in enumerate(hyp[0]):
r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
r_score += r_decoder_out[i][len(hyp[0])][self.model.eos]
score = score * (1 - self.model.reverse_weight
) + r_score * self.model.reverse_weight
score = score * (1 - reverse_weight) + r_score * reverse_weight
# add ctc score (which in ln domain)
score += hyp[1] * self.ctc_decode_config.ctc_weight
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册