Add doc and adjust some codes.

a0843941 · yangyaming · 39dbcb4d · a0843941 · a0843941 · a0843941
隐藏空白更改
内联并排

Showing with 5 addition and 11 deletion

data_utils/data.py data_utils/data.py +2 -2

infer.py infer.py +1 -3

test.py test.py +1 -3

tools/tune.py tools/tune.py +1 -3

未找到文件。
--- a/data_utils/data.py
+++ b/data_utils/data.py
@@ -103,8 +103,8 @@ class DataGenerator(object):
        :type filename: basestring | file
        :param transcript: Transcription text.
        :type transcript: basestring
-        :return: Tuple of audio feature tensor and list of token ids for
+        :return: Tuple of audio feature tensor and data of transcription part,
-                 transcription.
+                 where transcription part could be token ids or text.
        :rtype: tuple of (2darray, list)
        """
        if filename.startswith('tar:'):

--- a/infer.py
+++ b/infer.py
@@ -103,9 +103,7 @@ def infer():
        num_processes=args.num_proc_bsearch)
    error_rate_func = cer if args.error_rate_type == 'cer' else wer
-    target_transcripts = [
+    target_transcripts = [transcript for _, transcript in infer_data]
-        transcript for _, transcript in infer_data
-    ]
    for target, result in zip(target_transcripts, result_transcripts):
        print("\nTarget Transcription: %s\nOutput Transcription: %s" %
              (target, result))

--- a/test.py
+++ b/test.py
@@ -104,9 +104,7 @@ def evaluate():
            vocab_list=vocab_list,
            language_model_path=args.lang_model_path,
            num_processes=args.num_proc_bsearch)
-        target_transcripts = [
+        target_transcripts = [transcript for _, transcript in infer_data]
-            transcript for _, transcript in infer_data
-        ]
        for target, result in zip(target_transcripts, result_transcripts):
            error_sum += error_rate_func(target, result)
            num_ins += 1

--- a/tools/tune.py
+++ b/tools/tune.py
@@ -164,9 +164,7 @@ def tune():
            for i in xrange(len(infer_data))
        ]
-        target_transcripts = [
+        target_transcripts = [transcript for _, transcript in infer_data]
-            transcript for _, transcript in infer_data
-        ]
        num_ins += len(target_transcripts)
        # grid search