提交 83a5e0f8 编写于 作者: H Hui Zhang

using jsonlines to read manifest and dump decode result

上级 33b87902
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains data helper functions."""
import codecs
import json
import math
from typing import List
......@@ -92,26 +91,22 @@ def read_manifest(
"""
manifest = []
for json_line in codecs.open(manifest_path, 'r', 'utf-8'):
try:
json_data = json.loads(json_line)
except Exception as e:
raise IOError("Error reading manifest: %s" % str(e))
feat_len = json_data["feat_shape"][
0] if 'feat_shape' in json_data else 1.0
token_len = json_data["token_shape"][
0] if 'token_shape' in json_data else 1.0
conditions = [
feat_len >= min_input_len,
feat_len <= max_input_len,
token_len >= min_output_len,
token_len <= max_output_len,
token_len / feat_len >= min_output_input_ratio,
token_len / feat_len <= max_output_input_ratio,
]
if all(conditions):
manifest.append(json_data)
with jsonlines.open(manifest_path, 'r') as reader:
for json_data in reader:
feat_len = json_data["feat_shape"][
0] if 'feat_shape' in json_data else 1.0
token_len = json_data["token_shape"][
0] if 'token_shape' in json_data else 1.0
conditions = [
feat_len >= min_input_len,
feat_len <= max_input_len,
token_len >= min_output_len,
token_len <= max_output_len,
token_len / feat_len >= min_output_input_ratio,
token_len / feat_len <= max_output_input_ratio,
]
if all(conditions):
manifest.append(json_data)
return manifest
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册