From ad4c773b183d614112f5f66dfc6a11d3a249e557 Mon Sep 17 00:00:00 2001 From: Nyakku Shigure Date: Tue, 18 Oct 2022 10:27:15 +0800 Subject: [PATCH] [CodeStyle][py2] remove `compat` module (to_text) (#47036) * [CodeStyle][py2] remove `compat` module (to_text) * remove some unnecessary decode * remove to_text definition and unittest * Revert "remove to_text definition and unittest" This reverts commit a6b69cb8dca8b9b031ce10ea32d1040e7e0dd267. * remove an assertion * empty commit --- python/paddle/dataset/conll05.py | 5 ++--- python/paddle/dataset/movielens.py | 7 +++---- python/paddle/dataset/wmt14.py | 5 ++--- python/paddle/dataset/wmt16.py | 9 ++++---- .../distributed/passes/ps_trainer_pass.py | 3 +-- python/paddle/fluid/backward.py | 19 +++++++---------- python/paddle/fluid/compiler.py | 11 +++++----- .../slim/quantization/quantization_pass.py | 5 ++--- python/paddle/fluid/dygraph/io.py | 1 - python/paddle/fluid/framework.py | 21 ++++++++----------- .../fleet/parameter_server/ir/trainer_pass.py | 3 +-- python/paddle/fluid/io.py | 5 ++--- python/paddle/fluid/layers/control_flow.py | 5 ++--- .../fluid/tests/unittests/dist_transformer.py | 7 +++---- python/paddle/text/datasets/conll05.py | 5 ++--- python/paddle/text/datasets/movielens.py | 7 +++---- python/paddle/text/datasets/wmt14.py | 5 ++--- python/paddle/text/datasets/wmt16.py | 9 ++++---- 18 files changed, 55 insertions(+), 77 deletions(-) diff --git a/python/paddle/dataset/conll05.py b/python/paddle/dataset/conll05.py index b98b5d4bde..0ffdce8ae8 100644 --- a/python/paddle/dataset/conll05.py +++ b/python/paddle/dataset/conll05.py @@ -23,7 +23,6 @@ to initialize SRL model. import tarfile import gzip import paddle.dataset.common -import paddle.compat as cpt import paddle.utils.deprecated as deprecated from six.moves import zip, range @@ -90,8 +89,8 @@ def corpus_reader(data_path, words_name, props_name): labels = [] one_seg = [] for word, label in zip(words_file, props_file): - word = cpt.to_text(word.strip()) - label = cpt.to_text(label.strip().split()) + word = word.strip().decode() + label = label.strip().decode().split() if len(label) == 0: # end of sentence for i in range(len(one_seg[0])): diff --git a/python/paddle/dataset/movielens.py b/python/paddle/dataset/movielens.py index 590d20b741..db07525d31 100644 --- a/python/paddle/dataset/movielens.py +++ b/python/paddle/dataset/movielens.py @@ -29,7 +29,6 @@ import paddle.utils.deprecated as deprecated import re import functools import six -import paddle.compat as cpt __all__ = [] @@ -112,7 +111,7 @@ def __initialize_meta_info__(): categories_set = set() with package.open('ml-1m/movies.dat') as movie_file: for i, line in enumerate(movie_file): - line = cpt.to_text(line, encoding='latin') + line = line.decode(encoding='latin') movie_id, title, categories = line.strip().split('::') categories = categories.split('|') for c in categories: @@ -137,7 +136,7 @@ def __initialize_meta_info__(): USER_INFO = dict() with package.open('ml-1m/users.dat') as user_file: for line in user_file: - line = cpt.to_text(line, encoding='latin') + line = line.decode(encoding='latin') uid, gender, age, job, _ = line.strip().split("::") USER_INFO[int(uid)] = UserInfo(index=uid, gender=gender, @@ -152,7 +151,7 @@ def __reader__(rand_seed=0, test_ratio=0.1, is_test=False): with zipfile.ZipFile(file=fn) as package: with package.open('ml-1m/ratings.dat') as rating: for line in rating: - line = cpt.to_text(line, encoding='latin') + line = line.decode(encoding='latin') if (np.random.random() < test_ratio) == is_test: uid, mov_id, rating, _ = line.strip().split("::") uid = int(uid) diff --git a/python/paddle/dataset/wmt14.py b/python/paddle/dataset/wmt14.py index cdc516bee2..0007bc2770 100644 --- a/python/paddle/dataset/wmt14.py +++ b/python/paddle/dataset/wmt14.py @@ -24,7 +24,6 @@ import six import tarfile import paddle.dataset.common -import paddle.compat as cpt import paddle.utils.deprecated as deprecated __all__ = [] @@ -52,7 +51,7 @@ def __read_to_dict(tar_file, dict_size): out_dict = dict() for line_count, line in enumerate(fd): if line_count < size: - out_dict[cpt.to_text(line.strip())] = line_count + out_dict[line.strip().decode()] = line_count else: break return out_dict @@ -84,7 +83,7 @@ def reader_creator(tar_file, file_name, dict_size): ] for name in names: for line in f.extractfile(name): - line = cpt.to_text(line) + line = line.decode() line_split = line.strip().split('\t') if len(line_split) != 2: continue diff --git a/python/paddle/dataset/wmt16.py b/python/paddle/dataset/wmt16.py index 64ddc22147..e208f34249 100644 --- a/python/paddle/dataset/wmt16.py +++ b/python/paddle/dataset/wmt16.py @@ -34,7 +34,6 @@ import tarfile from collections import defaultdict import paddle -import paddle.compat as cpt import paddle.utils.deprecated as deprecated __all__ = [] @@ -54,7 +53,7 @@ def __build_dict(tar_file, dict_size, save_path, lang): word_dict = defaultdict(int) with tarfile.open(tar_file, mode="r") as f: for line in f.extractfile("wmt16/train"): - line = cpt.to_text(line) + line = line.decode() line_split = line.strip().split("\t") if len(line_split) != 2: continue sen = line_split[0] if lang == "en" else line_split[1] @@ -83,9 +82,9 @@ def __load_dict(tar_file, dict_size, lang, reverse=False): with open(dict_path, "rb") as fdict: for idx, line in enumerate(fdict): if reverse: - word_dict[idx] = cpt.to_text(line.strip()) + word_dict[idx] = line.strip().decode() else: - word_dict[cpt.to_text(line.strip())] = idx + word_dict[line.strip().decode()] = idx return word_dict @@ -116,7 +115,7 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang): with tarfile.open(tar_file, mode="r") as f: for line in f.extractfile(file_name): - line = cpt.to_text(line) + line = line.decode() line_split = line.strip().split("\t") if len(line_split) != 2: continue diff --git a/python/paddle/distributed/passes/ps_trainer_pass.py b/python/paddle/distributed/passes/ps_trainer_pass.py index 40276dec9a..95f4e980cf 100755 --- a/python/paddle/distributed/passes/ps_trainer_pass.py +++ b/python/paddle/distributed/passes/ps_trainer_pass.py @@ -14,7 +14,6 @@ import os import paddle -import paddle.compat as cpt from ..ps.utils.public import * from paddle.framework import core from paddle.distributed.passes.pass_base import PassBase, register_pass @@ -707,7 +706,7 @@ class PsGpuPass(PassBase): if op.type != "pull_box_sparse" and op.type != "pull_gpups_sparse": continue grad_op_desc, op_grad_to_var = core.get_grad_op_desc( - op.desc, cpt.to_text(set()), []) + op.desc, set(), []) for op_desc in grad_op_desc: new_op_desc = program.global_block().desc._insert_op( insert_index + 1) diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index e9babc79c3..c1d82f0be3 100755 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -423,10 +423,8 @@ def _some_in_set_(cands, s): """ if len(cands) == 0: return False - literal_set = cpt.to_text(s) - literal_cands = cpt.to_text(cands) - for c in literal_cands: - if c in literal_set: + for c in cands: + if c in s: return True return False @@ -437,7 +435,6 @@ def _strip_grad_suffix_(name): e.g. x@GRAD ==> x y@GRAD@RENAME@1 ==> y """ - name = cpt.to_text(name) pos = name.find(core.grad_var_suffix()) new_name = name[:pos] if pos != -1 else name new_pos = name.rfind('grad/') @@ -449,7 +446,7 @@ def _append_grad_suffix_(name): Append grad suffix to the given variable name e.g. x ==> x@GRAD """ - return cpt.to_text(name) + core.grad_var_suffix() + return name + core.grad_var_suffix() def _accumulate_gradients_by_sum_op_(var_name, @@ -967,7 +964,7 @@ def _append_backward_ops_with_checkpoints_(block, "invoke op: %s" % _pretty_op_desc_(op.desc, "with_sub_block")) grad_op_desc, op_grad_to_var = core.get_grad_op_desc( - op.desc, cpt.to_text(no_grad_dict[block.idx]), []) + op.desc, no_grad_dict[block.idx], []) # record the mapping between fwd and bwd if grad_op_id_to_fwd_op is not None: @@ -993,7 +990,7 @@ def _append_backward_ops_with_checkpoints_(block, "invoke op: %s" % _pretty_op_desc_(op.desc, "with_sub_block")) grad_op_desc, op_grad_to_var = core.get_grad_op_desc( - op.desc, cpt.to_text(no_grad_dict[block.idx]), []) + op.desc, no_grad_dict[block.idx], []) # record the mapping between fwd and bwd if grad_op_id_to_fwd_op is not None: @@ -1055,7 +1052,7 @@ def _append_backward_ops_with_checkpoints_(block, # 3.c. add backward ops for all ops in current segment for op_desc in reversed(added_descs): grad_op_desc, op_grad_to_var = core.get_grad_op_desc( - op_desc, cpt.to_text(no_grad_dict[block.idx]), []) + op_desc, no_grad_dict[block.idx], []) # record the mapping between fwd and bwd if grad_op_id_to_fwd_op is not None: @@ -1239,7 +1236,7 @@ def _append_backward_ops_(block, # Getting op's corresponding grad_op grad_op_desc, op_grad_to_var = core.get_grad_op_desc( - op.desc, cpt.to_text(no_grad_dict[block.idx]), grad_sub_block_list) + op.desc, no_grad_dict[block.idx], grad_sub_block_list) # record the mapping between fwd and bwd if grad_op_id_to_fwd_op is not None: @@ -1841,7 +1838,7 @@ def append_backward(loss, params_and_grads = [] op_role_var_attr_name = core.op_proto_and_checker_maker.kOpRoleVarAttrName() for param in parameters: - if cpt.to_text(param) not in grad_info_map: + if param not in grad_info_map: continue grad_info = grad_info_map[param] grad_block = grad_info[1] diff --git a/python/paddle/fluid/compiler.py b/python/paddle/fluid/compiler.py index db4b2c2df2..82a58713f2 100644 --- a/python/paddle/fluid/compiler.py +++ b/python/paddle/fluid/compiler.py @@ -17,7 +17,6 @@ import os import six import sys import warnings -from .. import compat as cpt from . import framework from .framework import _get_paddle_place, _get_paddle_place_list from .framework import cuda_places, cpu_places, xpu_places @@ -418,10 +417,10 @@ class CompiledProgram(object): for node in self._graph.nodes(): if node.is_var() and node.var() is not None and node.var().persistable() and \ node.var().type() != core.VarDesc.VarType.RAW: - name = cpt.to_text(node.name()) + name = node.name() if self._program is not None and _should_broadcast_or_not_exists( self._program, name): - self._persistable_vars.append(cpt.to_text(node.name())) + self._persistable_vars.append(node.name()) places = list(map(_place_obj, places)) @@ -433,9 +432,9 @@ class CompiledProgram(object): return core.ParallelExecutor( places, self._persistable_vars, - cpt.to_text(self._loss_name) if self._loss_name else six.u(''), - self._scope, self._local_scopes, self._exec_strategy, - self._build_strategy, self._graph) + self._loss_name if self._loss_name else six.u(''), self._scope, + self._local_scopes, self._exec_strategy, self._build_strategy, + self._graph) def _compile_inference(self): return core.create_paddle_predictor(self._infer_config) diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py index 952ea62c7c..abd6a7822d 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py @@ -18,7 +18,6 @@ try: from tqdm import tqdm except: from .utils import tqdm -from ..... import compat as cpt from .... import core from ....framework import IrGraph from ....framework import IrNode @@ -400,7 +399,7 @@ class QuantizationTransformPass(object): def _create_global_step(self, graph): if self._weight_quantize_type == 'range_abs_max' or \ self._activation_quantize_type == 'range_abs_max': - counter_name = cpt.to_text('@STEP_COUNTER@') + counter_name = '@STEP_COUNTER@' for node in graph.all_var_nodes(): if node.name() == counter_name: self._global_step = node @@ -1339,7 +1338,7 @@ class ConvertToInt8Pass(object): def _convert_to_int8(self, graph, var_node): int8_var_node_name = var_node.name() + ".int8" int8_var_node = graph.create_persistable_node( - name=cpt.to_text(int8_var_node_name), + name=int8_var_node_name, var_type=var_node.type(), shape=var_node.shape(), var_dtype=core.VarDesc.VarType.INT8) diff --git a/python/paddle/fluid/dygraph/io.py b/python/paddle/fluid/dygraph/io.py index 6671499e1c..ea617e253f 100644 --- a/python/paddle/fluid/dygraph/io.py +++ b/python/paddle/fluid/dygraph/io.py @@ -125,7 +125,6 @@ def _append_loaded_suffix(name): e.g. x ==> x.load_0, x.load_0 ==> x.load_0.load_0 """ suffix = LOADED_VAR_SUFFIX - name = cpt.to_text(name) new_name = unique_name.generate_with_ignorable_key('.'.join((name, suffix))) return new_name diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index e709a02b38..a7308789a5 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -1390,7 +1390,6 @@ class Variable(object): self.error_clip = error_clip is_new_var = False - name = cpt.to_text(name) self.desc = self.block.desc.find_var(name.encode()) if self.desc is None: @@ -1757,8 +1756,7 @@ class Variable(object): if with_details: additional_attr = ("error_clip", ) for attr_name in additional_attr: - res_str += "%s: %s\n" % (attr_name, - cpt.to_text(getattr(self, attr_name))) + res_str += "%s: %s\n" % (attr_name, getattr(self, attr_name)) return res_str @@ -1900,7 +1898,7 @@ class Variable(object): dtype='float32') print("name of current Var is: {}".format(new_variable.name)) """ - return cpt.to_text(self.desc.name()) + return self.desc.name() @property def grad_name(self): @@ -2807,7 +2805,7 @@ class Operator(object): elif isinstance(arg, six.binary_type): in_arg_names.append(arg.decode()) elif isinstance(arg, (Variable, core.VarBase)): - in_arg_names.append(cpt.to_text(arg.name)) + in_arg_names.append(arg.name) else: raise TypeError( "The type of '%s' in operator %s should be " @@ -2843,7 +2841,7 @@ class Operator(object): if isinstance(arg, six.string_types): out_arg_names.append(arg) else: - out_arg_names.append(cpt.to_text(arg.name)) + out_arg_names.append(arg.name) # TODO(minqiyang): could we remove variable's op in static mode? if not _non_static_mode(): if isinstance(arg, six.string_types): @@ -3660,8 +3658,8 @@ class Block(object): Rename variable in vars and ops' inputs and outputs Args: - name(str): the name that need to be renamed. - new_name(str): the name that need to rename to. + name(bytes): the name that need to be renamed. + new_name(bytes): the name that need to rename to. Raises: ValueError: If this block doesn't have this the giving name, @@ -3671,8 +3669,8 @@ class Block(object): Returns: Variable: the Variable with the giving name. """ - name = cpt.to_text(name) - new_name = cpt.to_text(new_name) + name = name.decode() + new_name = new_name.decode() if not self.has_var(name): raise ValueError("var %s is not in current block" % name) @@ -6643,8 +6641,7 @@ class Parameter(Variable): additional_attr = ("trainable", "optimize_attr", "regularizer", "do_model_average", "need_clip") for attr_name in additional_attr: - res_str += "%s: %s\n" % (attr_name, - cpt.to_text(getattr(self, attr_name))) + res_str += "%s: %s\n" % (attr_name, getattr(self, attr_name)) else: res_str = Variable.to_string(self, throw_on_error, False) return res_str diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py index e4de708820..c54fe06720 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py @@ -23,7 +23,6 @@ from functools import reduce import paddle.fluid as fluid import paddle.fluid.core as core import paddle.fluid.framework as framework -import paddle.compat as cpt from paddle.fluid.transpiler.details.program_utils import delete_ops from paddle.fluid.incubate.fleet.parameter_server.ir.public import _get_optimize_ops @@ -593,7 +592,7 @@ def ps_gpu_pass(program): if op.type != "pull_box_sparse" and op.type != "pull_gpups_sparse": continue grad_op_desc, op_grad_to_var = core.get_grad_op_desc( - op.desc, cpt.to_text(set()), []) + op.desc, set(), []) for op_desc in grad_op_desc: new_op_desc = program.global_block().desc.append_op() new_op_desc.copy_from(op_desc) diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 8aa28519c9..6123df295f 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -42,7 +42,6 @@ from .reader import * from . import dataloader from .dataloader import * from . import core -from .. import compat as cpt from paddle.utils import deprecated from paddle.fluid.framework import static_only @@ -1660,7 +1659,7 @@ def _save_persistable_nodes(executor, dirname, graph): persistable_nodes = [] all_persistable_nodes = graph.all_persistable_nodes() for node in all_persistable_nodes: - name = cpt.to_text(node.name()) + name = node.name() if name not in persistable_node_names: persistable_node_names.add(name) persistable_nodes.append(node) @@ -1695,7 +1694,7 @@ def _load_persistable_nodes(executor, dirname, graph): persistable_nodes = [] all_persistable_nodes = graph.all_persistable_nodes() for node in all_persistable_nodes: - name = cpt.to_text(node.name()) + name = node.name() if name not in persistable_node_names: persistable_node_names.add(name) persistable_nodes.append(node) diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 289cb9b904..0f0ae4b082 100755 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -2431,11 +2431,10 @@ class ConditionalBlock(object): for inner_input_name in params: inner_var = parent_block._find_var_recursive(inner_input_name) if inner_var: - param_list.append(cpt.to_text(inner_var.name)) + param_list.append(inner_var.name) grad_op_desc, op_grad_to_var = core.get_grad_op_desc( - conditional_block_op.desc, cpt.to_text(set()), - [grad_sub_block.desc]) + conditional_block_op.desc, set(), [grad_sub_block.desc]) # append op_desc in grad_op_descs to target_block op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName() diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py index 32478b8c5f..8b52f1f734 100644 --- a/python/paddle/fluid/tests/unittests/dist_transformer.py +++ b/python/paddle/fluid/tests/unittests/dist_transformer.py @@ -26,7 +26,6 @@ import tarfile import paddle.fluid as fluid import paddle.fluid.layers as layers from test_dist_base import TestDistRunnerBase, runtime_main, RUN_STEP -import paddle.compat as cpt const_para_attr = fluid.ParamAttr(initializer=fluid.initializer.Constant(0.001)) const_bias_attr = const_para_attr @@ -856,7 +855,7 @@ class DataReader(object): f = tarfile.open(fpaths[0], "r") for line in f.extractfile(tar_fname): - line = cpt.to_text(line) + line = line.decode() fields = line.strip("\n").split(self._field_delimiter) if (not self._only_src and len(fields) == 2) or (self._only_src @@ -869,7 +868,7 @@ class DataReader(object): with open(fpath, "rb") as f: for line in f: - line = cpt.to_text(line) + line = line.decode() fields = line.strip("\n").split(self._field_delimiter) if (not self._only_src and len(fields) == 2) or (self._only_src @@ -881,7 +880,7 @@ class DataReader(object): word_dict = {} with open(dict_path, "rb") as fdict: for idx, line in enumerate(fdict): - line = cpt.to_text(line) + line = line.decode() if reverse: word_dict[idx] = line.strip("\n") else: diff --git a/python/paddle/text/datasets/conll05.py b/python/paddle/text/datasets/conll05.py index 3e8889a126..ee475e7fc5 100644 --- a/python/paddle/text/datasets/conll05.py +++ b/python/paddle/text/datasets/conll05.py @@ -17,7 +17,6 @@ import tarfile import numpy as np from paddle.io import Dataset -import paddle.compat as cpt from paddle.dataset.common import _check_exists_and_download __all__ = [] @@ -182,8 +181,8 @@ class Conll05st(Dataset): labels = [] one_seg = [] for word, label in zip(words_file, props_file): - word = cpt.to_text(word.strip()) - label = cpt.to_text(label.strip().split()) + word = word.strip().decode() + label = label.strip().decode().split() if len(label) == 0: # end of sentence for i in range(len(one_seg[0])): diff --git a/python/paddle/text/datasets/movielens.py b/python/paddle/text/datasets/movielens.py index c3d1de135b..f735d22f91 100644 --- a/python/paddle/text/datasets/movielens.py +++ b/python/paddle/text/datasets/movielens.py @@ -17,7 +17,6 @@ import zipfile import re from paddle.io import Dataset -import paddle.compat as cpt from paddle.dataset.common import _check_exists_and_download __all__ = [] @@ -161,7 +160,7 @@ class Movielens(Dataset): categories_set = set() with package.open('ml-1m/movies.dat') as movie_file: for i, line in enumerate(movie_file): - line = cpt.to_text(line, encoding='latin') + line = line.decode(encoding='latin') movie_id, title, categories = line.strip().split('::') categories = categories.split('|') for c in categories: @@ -180,7 +179,7 @@ class Movielens(Dataset): with package.open('ml-1m/users.dat') as user_file: for line in user_file: - line = cpt.to_text(line, encoding='latin') + line = line.decode(encoding='latin') uid, gender, age, job, _ = line.strip().split("::") self.user_info[int(uid)] = UserInfo(index=uid, gender=gender, @@ -193,7 +192,7 @@ class Movielens(Dataset): with zipfile.ZipFile(self.data_file) as package: with package.open('ml-1m/ratings.dat') as rating: for line in rating: - line = cpt.to_text(line, encoding='latin') + line = line.decode(encoding='latin') if (np.random.random() < self.test_ratio) == is_test: uid, mov_id, rating, _ = line.strip().split("::") uid = int(uid) diff --git a/python/paddle/text/datasets/wmt14.py b/python/paddle/text/datasets/wmt14.py index bfb665570c..715e3794f0 100644 --- a/python/paddle/text/datasets/wmt14.py +++ b/python/paddle/text/datasets/wmt14.py @@ -17,7 +17,6 @@ import numpy as np import six from paddle.io import Dataset -import paddle.compat as cpt from paddle.dataset.common import _check_exists_and_download __all__ = [] @@ -111,7 +110,7 @@ class WMT14(Dataset): out_dict = dict() for line_count, line in enumerate(fd): if line_count < size: - out_dict[cpt.to_text(line.strip())] = line_count + out_dict[line.strip().decode()] = line_count else: break return out_dict @@ -140,7 +139,7 @@ class WMT14(Dataset): ] for name in names: for line in f.extractfile(name): - line = cpt.to_text(line) + line = line.decode() line_split = line.strip().split('\t') if len(line_split) != 2: continue diff --git a/python/paddle/text/datasets/wmt16.py b/python/paddle/text/datasets/wmt16.py index 40e464cb0e..66aa78c783 100644 --- a/python/paddle/text/datasets/wmt16.py +++ b/python/paddle/text/datasets/wmt16.py @@ -22,7 +22,6 @@ from collections import defaultdict import paddle from paddle.io import Dataset -import paddle.compat as cpt from paddle.dataset.common import _check_exists_and_download __all__ = [] @@ -152,16 +151,16 @@ class WMT16(Dataset): with open(dict_path, "rb") as fdict: for idx, line in enumerate(fdict): if reverse: - word_dict[idx] = cpt.to_text(line.strip()) + word_dict[idx] = line.strip().decode() else: - word_dict[cpt.to_text(line.strip())] = idx + word_dict[line.strip().decode()] = idx return word_dict def _build_dict(self, dict_path, dict_size, lang): word_dict = defaultdict(int) with tarfile.open(self.data_file, mode="r") as f: for line in f.extractfile("wmt16/train"): - line = cpt.to_text(line) + line = line.decode() line_split = line.strip().split("\t") if len(line_split) != 2: continue sen = line_split[0] if self.lang == "en" else line_split[1] @@ -195,7 +194,7 @@ class WMT16(Dataset): self.trg_ids_next = [] with tarfile.open(self.data_file, mode="r") as f: for line in f.extractfile("wmt16/{}".format(self.mode)): - line = cpt.to_text(line) + line = line.decode() line_split = line.strip().split("\t") if len(line_split) != 2: continue -- GitLab