提交 18db3cf7 编写于 作者: Y Yibing Liu

Handle the special tokens in scoring cer

上级 ff1cc191
......@@ -16,10 +16,18 @@ def parse_args():
default='cer',
choices=['cer', 'wer'],
help="Error rate type. (default: %(default)s)")
parser.add_argument(
'--special_tokens',
type=str,
default='<SPOKEN_NOISE>',
help="Special tokens in scoring CER, seperated by space. "
"They shouldn't be splitted and should be treated as one special "
"character. Example: '<SPOKEN_NOISE> <bos> <eos>' "
"(default: %(default)s)")
parser.add_argument(
'--ref', type=str, required=True, help="The ground truth text.")
parser.add_argument(
'--hyp', type=str, required=True, help="The decoding result.")
'--hyp', type=str, required=True, help="The decoding result text.")
args = parser.parse_args()
return args
......@@ -31,6 +39,8 @@ if __name__ == '__main__':
sum_errors, sum_ref_len = 0.0, 0
sent_cnt, not_in_ref_cnt = 0, 0
special_tokens = args.special_tokens.split(" ")
with open(args.ref, "r") as ref_txt:
line = ref_txt.readline()
while line:
......@@ -51,6 +61,8 @@ if __name__ == '__main__':
continue
if args.error_rate_type == 'cer':
for sp_tok in special_tokens:
sent = sent.replace(sp_tok, '\0')
errors, ref_len = char_errors(
ref_dict[key].decode("utf8"),
sent.decode("utf8"),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册