提交 bc923000 编写于 作者: G guosheng

Make args support hex code string in Transformer

上级 13b27afa
import argparse import argparse
import ast import ast
import numpy as np import numpy as np
from functools import partial
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -58,7 +59,8 @@ def parse_args(): ...@@ -58,7 +59,8 @@ def parse_args():
"provided in util.py to do this.") "provided in util.py to do this.")
parser.add_argument( parser.add_argument(
"--token_delimiter", "--token_delimiter",
type=str, type=partial(
str.decode, encoding="string-escape"),
default=" ", default=" ",
help="The delimiter used to split tokens in source or target sentences. " help="The delimiter used to split tokens in source or target sentences. "
"For EN-DE BPE data we provided, use spaces as token delimiter.; " "For EN-DE BPE data we provided, use spaces as token delimiter.; "
...@@ -540,7 +542,7 @@ def fast_infer(test_data, trg_idx2word, use_wordpiece): ...@@ -540,7 +542,7 @@ def fast_infer(test_data, trg_idx2word, use_wordpiece):
trg_idx2word[idx] trg_idx2word[idx]
for idx in post_process_seq( for idx in post_process_seq(
np.array(seq_ids)[sub_start:sub_end]) np.array(seq_ids)[sub_start:sub_end])
]) if not use_wordpiece else util.subword_ids_to_str( ]) if not use_wordpiece else util.subtoken_ids_to_str(
post_process_seq(np.array(seq_ids)[sub_start:sub_end]), post_process_seq(np.array(seq_ids)[sub_start:sub_end]),
trg_idx2word)) trg_idx2word))
scores[i].append(np.array(seq_scores)[sub_end - 1]) scores[i].append(np.array(seq_scores)[sub_end - 1])
......
...@@ -4,6 +4,7 @@ import argparse ...@@ -4,6 +4,7 @@ import argparse
import ast import ast
import numpy as np import numpy as np
import multiprocessing import multiprocessing
from functools import partial
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -78,7 +79,8 @@ def parse_args(): ...@@ -78,7 +79,8 @@ def parse_args():
help="The <bos>, <eos> and <unk> tokens in the dictionary.") help="The <bos>, <eos> and <unk> tokens in the dictionary.")
parser.add_argument( parser.add_argument(
"--token_delimiter", "--token_delimiter",
type=str, type=partial(
str.decode, encoding="string-escape"),
default=" ", default=" ",
help="The delimiter used to split tokens in source or target sentences. " help="The delimiter used to split tokens in source or target sentences. "
"For EN-DE BPE data we provided, use spaces as token delimiter. " "For EN-DE BPE data we provided, use spaces as token delimiter. "
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册