Make args support hex code string in Transformer

bc923000 · guosheng · 13b27afa · bc923000 · bc923000
2 changed file
--- a/fluid/neural_machine_translation/transformer/infer.py
+++ b/fluid/neural_machine_translation/transformer/infer.py
 import argparse
 import ast
 import numpy as np
+from functools import partial

 import paddle
 import paddle.fluid as fluid
@@ -58,7 +59,8 @@ def parse_args():
        "provided in util.py to do this.")
    parser.add_argument(
        "--token_delimiter",
-        type=str,
+        type=partial(
+            str.decode, encoding="string-escape"),
        default=" ",
        help="The delimiter used to split tokens in source or target sentences. "
        "For EN-DE BPE data we provided, use spaces as token delimiter.; "
@@ -540,7 +542,7 @@ def fast_infer(test_data, trg_idx2word, use_wordpiece):
                    trg_idx2word[idx]
                    for idx in post_process_seq(
                        np.array(seq_ids)[sub_start:sub_end])
-                ]) if not use_wordpiece else util.subword_ids_to_str(
+                ]) if not use_wordpiece else util.subtoken_ids_to_str(
                    post_process_seq(np.array(seq_ids)[sub_start:sub_end]),
                    trg_idx2word))
                scores[i].append(np.array(seq_scores)[sub_end - 1])

--- a/fluid/neural_machine_translation/transformer/train.py
+++ b/fluid/neural_machine_translation/transformer/train.py
@@ -4,6 +4,7 @@ import argparse
 import ast
 import numpy as np
 import multiprocessing
+from functools import partial

 import paddle
 import paddle.fluid as fluid
@@ -78,7 +79,8 @@ def parse_args():
        help="The <bos>, <eos> and <unk> tokens in the dictionary.")
    parser.add_argument(
        "--token_delimiter",
-        type=str,
+        type=partial(
+            str.decode, encoding="string-escape"),
        default=" ",
        help="The delimiter used to split tokens in source or target sentences. "
        "For EN-DE BPE data we provided, use spaces as token delimiter. "