add collactor and evaluation code for ST

ac0ae57e · Junkun · 03231519 · ac0ae57e · ac0ae57e · ac0ae57e
5 changed file
--- a/deepspeech/exps/u2_st/model.py
+++ b/deepspeech/exps/u2_st/model.py
@@ -24,7 +24,6 @@ from typing import Tuple

 import numpy as np
 import paddle
-import sacrebleu
 from paddle import distributed as dist
 from paddle.io import DataLoader
 from yacs.config import CfgNode
@@ -32,6 +31,7 @@ from yacs.config import CfgNode
 from deepspeech.io.collator_st import KaldiPrePorocessedCollator
 from deepspeech.io.collator_st import SpeechCollator
 from deepspeech.io.collator_st import TripletKaldiPrePorocessedCollator
+from deepspeech.io.collator_st import TripletSpeechCollator
 from deepspeech.io.dataset import ManifestDataset
 from deepspeech.io.dataset import TripletManifestDataset
 from deepspeech.io.sampler import SortagradBatchSampler
@@ -40,6 +40,7 @@ from deepspeech.models.u2_st import U2STModel
 from deepspeech.training.gradclip import ClipGradByGlobalNormWithLog
 from deepspeech.training.scheduler import WarmupLR
 from deepspeech.training.trainer import Trainer
+from deepspeech.utils import bleu_score
 from deepspeech.utils import ctc_utils
 from deepspeech.utils import error_rate
 from deepspeech.utils import layer_tools
@@ -248,7 +249,11 @@ class U2STTrainer(Trainer):
        dev_dataset = Dataset.from_config(config)

        if config.collator.raw_wav:
-            TestCollator = Collator = SpeechCollator
+            if config.model.model_conf.asr_weight > 0.:
+                Collator = TripletSpeechCollator
+                TestCollator = SpeechCollator
+            else:
+                TestCollator = Collator = SpeechCollator
            # Not yet implement the mtl loader for raw_wav.
        else:
            if config.model.model_conf.asr_weight > 0.:
@@ -393,7 +398,7 @@ class U2STTester(U2STTrainer):
                lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm',  # Filepath for language model.
                decoding_method='attention',  # Decoding method. Options: 'attention', 'ctc_greedy_search',
                # 'ctc_prefix_beam_search', 'attention_rescoring'
-                error_rate_type='wer',  # Error rate type for evaluation. Options `wer`, 'cer'
+                error_rate_type='bleu',  # Error rate type for evaluation. Options `bleu`, 'char_bleu'
                num_proc_bsearch=8,  # # of CPUs for beam search.
                beam_size=10,  # Beam search width.
                batch_size=16,  # decoding batch size
@@ -428,10 +433,10 @@ class U2STTester(U2STTrainer):
                                    audio_len,
                                    texts,
                                    texts_len,
+                                    bleu_func,
                                    fout=None):
        cfg = self.config.decoding
        len_refs, num_ins = 0, 0
-        bleu_func = sacrebleu.corpus_bleu

        start_time = time.time()
        text_feature = self.test_loader.collate_fn.text_feature
@@ -487,6 +492,9 @@ class U2STTester(U2STTrainer):
        self.model.eval()
        logger.info(f"Test Total Examples: {len(self.test_loader.dataset)}")

+        cfg = self.config.decoding
+        bleu_func = bleu_score.char_bleu if cfg.error_rate_type == 'char-bleu' else bleu_score.bleu
+
        stride_ms = self.test_loader.collate_fn.stride_ms
        hyps, refs = [], []
        len_refs, num_ins = 0, 0
@@ -495,7 +503,7 @@ class U2STTester(U2STTrainer):
        with open(self.args.result_file, 'w') as fout:
            for i, batch in enumerate(self.test_loader):
                metrics = self.compute_translation_metrics(
-                    *batch, fout=fout)
+                    *batch, bleu_func=bleu_func, fout=fout)
                hyps += metrics['hyps']
                refs += metrics['refs']
                bleu = metrics['bleu']
@@ -504,19 +512,16 @@ class U2STTester(U2STTrainer):
                len_refs += metrics['len_refs']
                num_ins += metrics['num_ins']
                rtf = num_time / (num_frames * stride_ms)
-                logger.info("RTF: %f, BELU (%d) = %f" %
-                            (rtf, num_ins, bleu))
+                logger.info("RTF: %f, BELU (%d) = %f" % (rtf, num_ins, bleu))

        rtf = num_time / (num_frames * stride_ms)
        msg = "Test: "
        msg += "epoch: {}, ".format(self.epoch)
        msg += "step: {}, ".format(self.iteration)
        msg += "RTF: {}, ".format(rtf)
-        msg += "Test set [%s]: %s" % (
-            len(hyps), str(sacrebleu.corpus_bleu(hyps, [refs])))
+        msg += "Test set [%s]: %s" % (len(hyps), str(bleu_func(hyps, [refs])))
        logger.info(msg)
-        bleu_meta_path = os.path.splitext(
-            self.args.result_file)[0] + '.bleu'
+        bleu_meta_path = os.path.splitext(self.args.result_file)[0] + '.bleu'
        err_type_str = "BLEU"
        with open(bleu_meta_path, 'w') as f:
            data = json.dumps({
@@ -527,7 +532,7 @@ class U2STTester(U2STTrainer):
                "rtf":
                rtf,
                err_type_str:
-                sacrebleu.corpus_bleu(hyps, [refs]).score,
+                bleu_func(hyps, [refs]).score,
                "dataset_hour": (num_frames * stride_ms) / 1000.0 / 3600.0,
                "process_hour":
                num_time / 1000.0 / 3600.0,

--- a/deepspeech/io/collator_st.py
+++ b/deepspeech/io/collator_st.py
--- a/deepspeech/io/dataset.py
+++ b/deepspeech/io/dataset.py
@@ -19,9 +19,7 @@ from yacs.config import CfgNode
 from deepspeech.frontend.utility import read_manifest
 from deepspeech.utils.log import Log

-__all__ = [
-    "ManifestDataset",
-]
+__all__ = ["ManifestDataset", "TripletManifestDataset"]

 logger = Log(__name__).getlog()

@@ -105,3 +103,16 @@ class ManifestDataset(Dataset):
    def __getitem__(self, idx):
        instance = self._manifest[idx]
        return instance["utt"], instance["feat"], instance["text"]
+
+
+class TripletManifestDataset(ManifestDataset):
+    """
+        For Joint Training of Speech Translation and ASR.
+        text: translation,
+        text1: transcript.
+    """
+
+    def __getitem__(self, idx):
+        instance = self._manifest[idx]
+        return instance["utt"], instance["feat"], instance["text"], instance[
+            "text1"]
--- a/deepspeech/models/u2_st.py
+++ b/deepspeech/models/u2_st.py
--- a/deepspeech/utils/bleu_score.py
+++ b/deepspeech/utils/bleu_score.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This module provides functions to calculate bleu score in different level.
+e.g. wer for word-level, cer for char-level.
+"""
+import numpy as np
+import sacrebleu
+
+__all__ = ['bleu', 'char_bleu']
+
+
+def bleu(hypothesis, reference):
+    """Calculate BLEU. BLEU compares reference text and
+    hypothesis text in word-level using scarebleu.
+
+   
+
+    :param reference: The reference sentences.
+    :type reference: list[list[str]]
+    :param hypothesis: The hypothesis sentence.
+    :type hypothesis: list[str]
+    :raises ValueError: If the reference length is zero.
+    """
+
+    return sacrebleu.corpus_bleu(hypothesis, reference)
+
+def char_bleu(hypothesis, reference):
+    """Calculate BLEU. BLEU compares reference text and
+    hypothesis text in char-level using scarebleu.
+
+   
+
+    :param reference: The reference sentences.
+    :type reference: list[list[str]]
+    :param hypothesis: The hypothesis sentence.
+    :type hypothesis: list[str]
+    :raises ValueError: If the reference number is zero.
+    """
+    hypothesis =[' '.join(list(hyp.replace(' ', ''))) for hyp in hypothesis]
+    reference = [[' '.join(list(ref_i.replace(' ', ''))) for ref_i in ref ]for ref in reference ]
+
+    return sacrebleu.corpus_bleu(hypothesis, reference)
\ No newline at end of file