From 83a5e0f8d662da35b0af6b639456ee68f4e54489 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Thu, 23 Sep 2021 03:00:41 +0000 Subject: [PATCH] using jsonlines to read manifest and dump decode result --- deepspeech/frontend/utility.py | 37 +++++++++++++++------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/deepspeech/frontend/utility.py b/deepspeech/frontend/utility.py index c6781cd4..926abf18 100644 --- a/deepspeech/frontend/utility.py +++ b/deepspeech/frontend/utility.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. """Contains data helper functions.""" -import codecs import json import math from typing import List @@ -92,26 +91,22 @@ def read_manifest( """ manifest = [] - for json_line in codecs.open(manifest_path, 'r', 'utf-8'): - try: - json_data = json.loads(json_line) - except Exception as e: - raise IOError("Error reading manifest: %s" % str(e)) - - feat_len = json_data["feat_shape"][ - 0] if 'feat_shape' in json_data else 1.0 - token_len = json_data["token_shape"][ - 0] if 'token_shape' in json_data else 1.0 - conditions = [ - feat_len >= min_input_len, - feat_len <= max_input_len, - token_len >= min_output_len, - token_len <= max_output_len, - token_len / feat_len >= min_output_input_ratio, - token_len / feat_len <= max_output_input_ratio, - ] - if all(conditions): - manifest.append(json_data) + with jsonlines.open(manifest_path, 'r') as reader: + for json_data in reader: + feat_len = json_data["feat_shape"][ + 0] if 'feat_shape' in json_data else 1.0 + token_len = json_data["token_shape"][ + 0] if 'token_shape' in json_data else 1.0 + conditions = [ + feat_len >= min_input_len, + feat_len <= max_input_len, + token_len >= min_output_len, + token_len <= max_output_len, + token_len / feat_len >= min_output_input_ratio, + token_len / feat_len <= max_output_input_ratio, + ] + if all(conditions): + manifest.append(json_data) return manifest -- GitLab