提交 83a5e0f8 编写于 作者: H Hui Zhang

using jsonlines to read manifest and dump decode result

上级 33b87902
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Contains data helper functions.""" """Contains data helper functions."""
import codecs
import json import json
import math import math
from typing import List from typing import List
...@@ -92,26 +91,22 @@ def read_manifest( ...@@ -92,26 +91,22 @@ def read_manifest(
""" """
manifest = [] manifest = []
for json_line in codecs.open(manifest_path, 'r', 'utf-8'): with jsonlines.open(manifest_path, 'r') as reader:
try: for json_data in reader:
json_data = json.loads(json_line) feat_len = json_data["feat_shape"][
except Exception as e: 0] if 'feat_shape' in json_data else 1.0
raise IOError("Error reading manifest: %s" % str(e)) token_len = json_data["token_shape"][
0] if 'token_shape' in json_data else 1.0
feat_len = json_data["feat_shape"][ conditions = [
0] if 'feat_shape' in json_data else 1.0 feat_len >= min_input_len,
token_len = json_data["token_shape"][ feat_len <= max_input_len,
0] if 'token_shape' in json_data else 1.0 token_len >= min_output_len,
conditions = [ token_len <= max_output_len,
feat_len >= min_input_len, token_len / feat_len >= min_output_input_ratio,
feat_len <= max_input_len, token_len / feat_len <= max_output_input_ratio,
token_len >= min_output_len, ]
token_len <= max_output_len, if all(conditions):
token_len / feat_len >= min_output_input_ratio, manifest.append(json_data)
token_len / feat_len <= max_output_input_ratio,
]
if all(conditions):
manifest.append(json_data)
return manifest return manifest
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册