提交 bc923000 编写于 作者: G guosheng

Make args support hex code string in Transformer

上级 13b27afa
import argparse
import ast
import numpy as np
from functools import partial
import paddle
import paddle.fluid as fluid
......@@ -58,7 +59,8 @@ def parse_args():
"provided in util.py to do this.")
parser.add_argument(
"--token_delimiter",
type=str,
type=partial(
str.decode, encoding="string-escape"),
default=" ",
help="The delimiter used to split tokens in source or target sentences. "
"For EN-DE BPE data we provided, use spaces as token delimiter.; "
......@@ -540,7 +542,7 @@ def fast_infer(test_data, trg_idx2word, use_wordpiece):
trg_idx2word[idx]
for idx in post_process_seq(
np.array(seq_ids)[sub_start:sub_end])
]) if not use_wordpiece else util.subword_ids_to_str(
]) if not use_wordpiece else util.subtoken_ids_to_str(
post_process_seq(np.array(seq_ids)[sub_start:sub_end]),
trg_idx2word))
scores[i].append(np.array(seq_scores)[sub_end - 1])
......
......@@ -4,6 +4,7 @@ import argparse
import ast
import numpy as np
import multiprocessing
from functools import partial
import paddle
import paddle.fluid as fluid
......@@ -78,7 +79,8 @@ def parse_args():
help="The <bos>, <eos> and <unk> tokens in the dictionary.")
parser.add_argument(
"--token_delimiter",
type=str,
type=partial(
str.decode, encoding="string-escape"),
default=" ",
help="The delimiter used to split tokens in source or target sentences. "
"For EN-DE BPE data we provided, use spaces as token delimiter. "
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册