diff --git a/python/paddle/dataset/conll05.py b/python/paddle/dataset/conll05.py index b98b5d4bde6f06da6ee2b38b6c1de47a6986fd39..0ffdce8ae8cb0cab52d1dd62fc6db5d73120e44a 100644 --- a/python/paddle/dataset/conll05.py +++ b/python/paddle/dataset/conll05.py @@ -23,7 +23,6 @@ to initialize SRL model. import tarfile import gzip import paddle.dataset.common -import paddle.compat as cpt import paddle.utils.deprecated as deprecated from six.moves import zip, range @@ -90,8 +89,8 @@ def corpus_reader(data_path, words_name, props_name): labels = [] one_seg = [] for word, label in zip(words_file, props_file): - word = cpt.to_text(word.strip()) - label = cpt.to_text(label.strip().split()) + word = word.strip().decode() + label = label.strip().decode().split() if len(label) == 0: # end of sentence for i in range(len(one_seg[0])): diff --git a/python/paddle/dataset/movielens.py b/python/paddle/dataset/movielens.py index 590d20b741fd350b3c1adc0846f8e93115cc9de8..db07525d3113006855826a1b58235548483cc356 100644 --- a/python/paddle/dataset/movielens.py +++ b/python/paddle/dataset/movielens.py @@ -29,7 +29,6 @@ import paddle.utils.deprecated as deprecated import re import functools import six -import paddle.compat as cpt __all__ = [] @@ -112,7 +111,7 @@ def __initialize_meta_info__(): categories_set = set() with package.open('ml-1m/movies.dat') as movie_file: for i, line in enumerate(movie_file): - line = cpt.to_text(line, encoding='latin') + line = line.decode(encoding='latin') movie_id, title, categories = line.strip().split('::') categories = categories.split('|') for c in categories: @@ -137,7 +136,7 @@ def __initialize_meta_info__(): USER_INFO = dict() with package.open('ml-1m/users.dat') as user_file: for line in user_file: - line = cpt.to_text(line, encoding='latin') + line = line.decode(encoding='latin') uid, gender, age, job, _ = line.strip().split("::") USER_INFO[int(uid)] = UserInfo(index=uid, gender=gender, @@ -152,7 +151,7 @@ def __reader__(rand_seed=0, test_ratio=0.1, is_test=False): with zipfile.ZipFile(file=fn) as package: with package.open('ml-1m/ratings.dat') as rating: for line in rating: - line = cpt.to_text(line, encoding='latin') + line = line.decode(encoding='latin') if (np.random.random() < test_ratio) == is_test: uid, mov_id, rating, _ = line.strip().split("::") uid = int(uid) diff --git a/python/paddle/dataset/wmt14.py b/python/paddle/dataset/wmt14.py index cdc516bee2d24fecaa30298885ae07b70d5eb419..0007bc2770d489a871cbe20503de37bec39742b0 100644 --- a/python/paddle/dataset/wmt14.py +++ b/python/paddle/dataset/wmt14.py @@ -24,7 +24,6 @@ import six import tarfile import paddle.dataset.common -import paddle.compat as cpt import paddle.utils.deprecated as deprecated __all__ = [] @@ -52,7 +51,7 @@ def __read_to_dict(tar_file, dict_size): out_dict = dict() for line_count, line in enumerate(fd): if line_count < size: - out_dict[cpt.to_text(line.strip())] = line_count + out_dict[line.strip().decode()] = line_count else: break return out_dict @@ -84,7 +83,7 @@ def reader_creator(tar_file, file_name, dict_size): ] for name in names: for line in f.extractfile(name): - line = cpt.to_text(line) + line = line.decode() line_split = line.strip().split('\t') if len(line_split) != 2: continue diff --git a/python/paddle/dataset/wmt16.py b/python/paddle/dataset/wmt16.py index 64ddc2214716e6b95bb6b87473b3acabe178357d..e208f342490c3e80b1711a72d261178c897c44aa 100644 --- a/python/paddle/dataset/wmt16.py +++ b/python/paddle/dataset/wmt16.py @@ -34,7 +34,6 @@ import tarfile from collections import defaultdict import paddle -import paddle.compat as cpt import paddle.utils.deprecated as deprecated __all__ = [] @@ -54,7 +53,7 @@ def __build_dict(tar_file, dict_size, save_path, lang): word_dict = defaultdict(int) with tarfile.open(tar_file, mode="r") as f: for line in f.extractfile("wmt16/train"): - line = cpt.to_text(line) + line = line.decode() line_split = line.strip().split("\t") if len(line_split) != 2: continue sen = line_split[0] if lang == "en" else line_split[1] @@ -83,9 +82,9 @@ def __load_dict(tar_file, dict_size, lang, reverse=False): with open(dict_path, "rb") as fdict: for idx, line in enumerate(fdict): if reverse: - word_dict[idx] = cpt.to_text(line.strip()) + word_dict[idx] = line.strip().decode() else: - word_dict[cpt.to_text(line.strip())] = idx + word_dict[line.strip().decode()] = idx return word_dict @@ -116,7 +115,7 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang): with tarfile.open(tar_file, mode="r") as f: for line in f.extractfile(file_name): - line = cpt.to_text(line) + line = line.decode() line_split = line.strip().split("\t") if len(line_split) != 2: continue diff --git a/python/paddle/distributed/passes/ps_trainer_pass.py b/python/paddle/distributed/passes/ps_trainer_pass.py index 40276dec9a557aa14e8d702458a1fc8e37ae3d30..95f4e980cf46b75b8e2a902f5d7bddc6639c9aaa 100755 --- a/python/paddle/distributed/passes/ps_trainer_pass.py +++ b/python/paddle/distributed/passes/ps_trainer_pass.py @@ -14,7 +14,6 @@ import os import paddle -import paddle.compat as cpt from ..ps.utils.public import * from paddle.framework import core from paddle.distributed.passes.pass_base import PassBase, register_pass @@ -707,7 +706,7 @@ class PsGpuPass(PassBase): if op.type != "pull_box_sparse" and op.type != "pull_gpups_sparse": continue grad_op_desc, op_grad_to_var = core.get_grad_op_desc( - op.desc, cpt.to_text(set()), []) + op.desc, set(), []) for op_desc in grad_op_desc: new_op_desc = program.global_block().desc._insert_op( insert_index + 1) diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index e9babc79c3ac88b0e30b96f61d7d534748cb03a3..c1d82f0be31e749c07a47076e5bc1d4b6ef99a6e 100755 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -423,10 +423,8 @@ def _some_in_set_(cands, s): """ if len(cands) == 0: return False - literal_set = cpt.to_text(s) - literal_cands = cpt.to_text(cands) - for c in literal_cands: - if c in literal_set: + for c in cands: + if c in s: return True return False @@ -437,7 +435,6 @@ def _strip_grad_suffix_(name): e.g. x@GRAD ==> x y@GRAD@RENAME@1 ==> y """ - name = cpt.to_text(name) pos = name.find(core.grad_var_suffix()) new_name = name[:pos] if pos != -1 else name new_pos = name.rfind('grad/') @@ -449,7 +446,7 @@ def _append_grad_suffix_(name): Append grad suffix to the given variable name e.g. x ==> x@GRAD """ - return cpt.to_text(name) + core.grad_var_suffix() + return name + core.grad_var_suffix() def _accumulate_gradients_by_sum_op_(var_name, @@ -967,7 +964,7 @@ def _append_backward_ops_with_checkpoints_(block, "invoke op: %s" % _pretty_op_desc_(op.desc, "with_sub_block")) grad_op_desc, op_grad_to_var = core.get_grad_op_desc( - op.desc, cpt.to_text(no_grad_dict[block.idx]), []) + op.desc, no_grad_dict[block.idx], []) # record the mapping between fwd and bwd if grad_op_id_to_fwd_op is not None: @@ -993,7 +990,7 @@ def _append_backward_ops_with_checkpoints_(block, "invoke op: %s" % _pretty_op_desc_(op.desc, "with_sub_block")) grad_op_desc, op_grad_to_var = core.get_grad_op_desc( - op.desc, cpt.to_text(no_grad_dict[block.idx]), []) + op.desc, no_grad_dict[block.idx], []) # record the mapping between fwd and bwd if grad_op_id_to_fwd_op is not None: @@ -1055,7 +1052,7 @@ def _append_backward_ops_with_checkpoints_(block, # 3.c. add backward ops for all ops in current segment for op_desc in reversed(added_descs): grad_op_desc, op_grad_to_var = core.get_grad_op_desc( - op_desc, cpt.to_text(no_grad_dict[block.idx]), []) + op_desc, no_grad_dict[block.idx], []) # record the mapping between fwd and bwd if grad_op_id_to_fwd_op is not None: @@ -1239,7 +1236,7 @@ def _append_backward_ops_(block, # Getting op's corresponding grad_op grad_op_desc, op_grad_to_var = core.get_grad_op_desc( - op.desc, cpt.to_text(no_grad_dict[block.idx]), grad_sub_block_list) + op.desc, no_grad_dict[block.idx], grad_sub_block_list) # record the mapping between fwd and bwd if grad_op_id_to_fwd_op is not None: @@ -1841,7 +1838,7 @@ def append_backward(loss, params_and_grads = [] op_role_var_attr_name = core.op_proto_and_checker_maker.kOpRoleVarAttrName() for param in parameters: - if cpt.to_text(param) not in grad_info_map: + if param not in grad_info_map: continue grad_info = grad_info_map[param] grad_block = grad_info[1] diff --git a/python/paddle/fluid/compiler.py b/python/paddle/fluid/compiler.py index db4b2c2df2eeb8e0a04e42a139bd288c92fcdd46..82a58713f28fce07127ebcec3b4b33915813f560 100644 --- a/python/paddle/fluid/compiler.py +++ b/python/paddle/fluid/compiler.py @@ -17,7 +17,6 @@ import os import six import sys import warnings -from .. import compat as cpt from . import framework from .framework import _get_paddle_place, _get_paddle_place_list from .framework import cuda_places, cpu_places, xpu_places @@ -418,10 +417,10 @@ class CompiledProgram(object): for node in self._graph.nodes(): if node.is_var() and node.var() is not None and node.var().persistable() and \ node.var().type() != core.VarDesc.VarType.RAW: - name = cpt.to_text(node.name()) + name = node.name() if self._program is not None and _should_broadcast_or_not_exists( self._program, name): - self._persistable_vars.append(cpt.to_text(node.name())) + self._persistable_vars.append(node.name()) places = list(map(_place_obj, places)) @@ -433,9 +432,9 @@ class CompiledProgram(object): return core.ParallelExecutor( places, self._persistable_vars, - cpt.to_text(self._loss_name) if self._loss_name else six.u(''), - self._scope, self._local_scopes, self._exec_strategy, - self._build_strategy, self._graph) + self._loss_name if self._loss_name else six.u(''), self._scope, + self._local_scopes, self._exec_strategy, self._build_strategy, + self._graph) def _compile_inference(self): return core.create_paddle_predictor(self._infer_config) diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py index 952ea62c7c28b25dd8754996d4e66c4a96338fd8..abd6a7822d872ee33d37fa8ac41fac34ccd311cf 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py @@ -18,7 +18,6 @@ try: from tqdm import tqdm except: from .utils import tqdm -from ..... import compat as cpt from .... import core from ....framework import IrGraph from ....framework import IrNode @@ -400,7 +399,7 @@ class QuantizationTransformPass(object): def _create_global_step(self, graph): if self._weight_quantize_type == 'range_abs_max' or \ self._activation_quantize_type == 'range_abs_max': - counter_name = cpt.to_text('@STEP_COUNTER@') + counter_name = '@STEP_COUNTER@' for node in graph.all_var_nodes(): if node.name() == counter_name: self._global_step = node @@ -1339,7 +1338,7 @@ class ConvertToInt8Pass(object): def _convert_to_int8(self, graph, var_node): int8_var_node_name = var_node.name() + ".int8" int8_var_node = graph.create_persistable_node( - name=cpt.to_text(int8_var_node_name), + name=int8_var_node_name, var_type=var_node.type(), shape=var_node.shape(), var_dtype=core.VarDesc.VarType.INT8) diff --git a/python/paddle/fluid/dygraph/io.py b/python/paddle/fluid/dygraph/io.py index 6671499e1c4ef9078203d11571522291cc2e0b74..ea617e253f4f0adfd28bd9acfa32ab75b807573a 100644 --- a/python/paddle/fluid/dygraph/io.py +++ b/python/paddle/fluid/dygraph/io.py @@ -125,7 +125,6 @@ def _append_loaded_suffix(name): e.g. x ==> x.load_0, x.load_0 ==> x.load_0.load_0 """ suffix = LOADED_VAR_SUFFIX - name = cpt.to_text(name) new_name = unique_name.generate_with_ignorable_key('.'.join((name, suffix))) return new_name diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index e709a02b388af020b1942c371b20b484fd02975f..a7308789a58fc481dc90164e3781a7acf6fdfdae 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -1390,7 +1390,6 @@ class Variable(object): self.error_clip = error_clip is_new_var = False - name = cpt.to_text(name) self.desc = self.block.desc.find_var(name.encode()) if self.desc is None: @@ -1757,8 +1756,7 @@ class Variable(object): if with_details: additional_attr = ("error_clip", ) for attr_name in additional_attr: - res_str += "%s: %s\n" % (attr_name, - cpt.to_text(getattr(self, attr_name))) + res_str += "%s: %s\n" % (attr_name, getattr(self, attr_name)) return res_str @@ -1900,7 +1898,7 @@ class Variable(object): dtype='float32') print("name of current Var is: {}".format(new_variable.name)) """ - return cpt.to_text(self.desc.name()) + return self.desc.name() @property def grad_name(self): @@ -2807,7 +2805,7 @@ class Operator(object): elif isinstance(arg, six.binary_type): in_arg_names.append(arg.decode()) elif isinstance(arg, (Variable, core.VarBase)): - in_arg_names.append(cpt.to_text(arg.name)) + in_arg_names.append(arg.name) else: raise TypeError( "The type of '%s' in operator %s should be " @@ -2843,7 +2841,7 @@ class Operator(object): if isinstance(arg, six.string_types): out_arg_names.append(arg) else: - out_arg_names.append(cpt.to_text(arg.name)) + out_arg_names.append(arg.name) # TODO(minqiyang): could we remove variable's op in static mode? if not _non_static_mode(): if isinstance(arg, six.string_types): @@ -3660,8 +3658,8 @@ class Block(object): Rename variable in vars and ops' inputs and outputs Args: - name(str): the name that need to be renamed. - new_name(str): the name that need to rename to. + name(bytes): the name that need to be renamed. + new_name(bytes): the name that need to rename to. Raises: ValueError: If this block doesn't have this the giving name, @@ -3671,8 +3669,8 @@ class Block(object): Returns: Variable: the Variable with the giving name. """ - name = cpt.to_text(name) - new_name = cpt.to_text(new_name) + name = name.decode() + new_name = new_name.decode() if not self.has_var(name): raise ValueError("var %s is not in current block" % name) @@ -6643,8 +6641,7 @@ class Parameter(Variable): additional_attr = ("trainable", "optimize_attr", "regularizer", "do_model_average", "need_clip") for attr_name in additional_attr: - res_str += "%s: %s\n" % (attr_name, - cpt.to_text(getattr(self, attr_name))) + res_str += "%s: %s\n" % (attr_name, getattr(self, attr_name)) else: res_str = Variable.to_string(self, throw_on_error, False) return res_str diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py index e4de7088202df7cf5de01ac347a68acecf09dae5..c54fe06720d68d759d49fbe9caf07020490b520f 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py @@ -23,7 +23,6 @@ from functools import reduce import paddle.fluid as fluid import paddle.fluid.core as core import paddle.fluid.framework as framework -import paddle.compat as cpt from paddle.fluid.transpiler.details.program_utils import delete_ops from paddle.fluid.incubate.fleet.parameter_server.ir.public import _get_optimize_ops @@ -593,7 +592,7 @@ def ps_gpu_pass(program): if op.type != "pull_box_sparse" and op.type != "pull_gpups_sparse": continue grad_op_desc, op_grad_to_var = core.get_grad_op_desc( - op.desc, cpt.to_text(set()), []) + op.desc, set(), []) for op_desc in grad_op_desc: new_op_desc = program.global_block().desc.append_op() new_op_desc.copy_from(op_desc) diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 8aa28519c9ff2f9fabdb4ae714342ec181537eb3..6123df295fd2d87d8ee987a805745c1e80b5c166 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -42,7 +42,6 @@ from .reader import * from . import dataloader from .dataloader import * from . import core -from .. import compat as cpt from paddle.utils import deprecated from paddle.fluid.framework import static_only @@ -1660,7 +1659,7 @@ def _save_persistable_nodes(executor, dirname, graph): persistable_nodes = [] all_persistable_nodes = graph.all_persistable_nodes() for node in all_persistable_nodes: - name = cpt.to_text(node.name()) + name = node.name() if name not in persistable_node_names: persistable_node_names.add(name) persistable_nodes.append(node) @@ -1695,7 +1694,7 @@ def _load_persistable_nodes(executor, dirname, graph): persistable_nodes = [] all_persistable_nodes = graph.all_persistable_nodes() for node in all_persistable_nodes: - name = cpt.to_text(node.name()) + name = node.name() if name not in persistable_node_names: persistable_node_names.add(name) persistable_nodes.append(node) diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 289cb9b9044abdfeccb26cb14486fba6473dd522..0f0ae4b08232bc50cd207891debc67b63f62bdb4 100755 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -2431,11 +2431,10 @@ class ConditionalBlock(object): for inner_input_name in params: inner_var = parent_block._find_var_recursive(inner_input_name) if inner_var: - param_list.append(cpt.to_text(inner_var.name)) + param_list.append(inner_var.name) grad_op_desc, op_grad_to_var = core.get_grad_op_desc( - conditional_block_op.desc, cpt.to_text(set()), - [grad_sub_block.desc]) + conditional_block_op.desc, set(), [grad_sub_block.desc]) # append op_desc in grad_op_descs to target_block op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName() diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py index 32478b8c5fada9bab5d87729327eb4172ad90b3a..8b52f1f7341d45bfd7dcd2952f19808336035433 100644 --- a/python/paddle/fluid/tests/unittests/dist_transformer.py +++ b/python/paddle/fluid/tests/unittests/dist_transformer.py @@ -26,7 +26,6 @@ import tarfile import paddle.fluid as fluid import paddle.fluid.layers as layers from test_dist_base import TestDistRunnerBase, runtime_main, RUN_STEP -import paddle.compat as cpt const_para_attr = fluid.ParamAttr(initializer=fluid.initializer.Constant(0.001)) const_bias_attr = const_para_attr @@ -856,7 +855,7 @@ class DataReader(object): f = tarfile.open(fpaths[0], "r") for line in f.extractfile(tar_fname): - line = cpt.to_text(line) + line = line.decode() fields = line.strip("\n").split(self._field_delimiter) if (not self._only_src and len(fields) == 2) or (self._only_src @@ -869,7 +868,7 @@ class DataReader(object): with open(fpath, "rb") as f: for line in f: - line = cpt.to_text(line) + line = line.decode() fields = line.strip("\n").split(self._field_delimiter) if (not self._only_src and len(fields) == 2) or (self._only_src @@ -881,7 +880,7 @@ class DataReader(object): word_dict = {} with open(dict_path, "rb") as fdict: for idx, line in enumerate(fdict): - line = cpt.to_text(line) + line = line.decode() if reverse: word_dict[idx] = line.strip("\n") else: diff --git a/python/paddle/text/datasets/conll05.py b/python/paddle/text/datasets/conll05.py index 3e8889a1269d7c63c840765f975cddee1eee0aba..ee475e7fc5d43ea253d699a4a5b0d509846459e4 100644 --- a/python/paddle/text/datasets/conll05.py +++ b/python/paddle/text/datasets/conll05.py @@ -17,7 +17,6 @@ import tarfile import numpy as np from paddle.io import Dataset -import paddle.compat as cpt from paddle.dataset.common import _check_exists_and_download __all__ = [] @@ -182,8 +181,8 @@ class Conll05st(Dataset): labels = [] one_seg = [] for word, label in zip(words_file, props_file): - word = cpt.to_text(word.strip()) - label = cpt.to_text(label.strip().split()) + word = word.strip().decode() + label = label.strip().decode().split() if len(label) == 0: # end of sentence for i in range(len(one_seg[0])): diff --git a/python/paddle/text/datasets/movielens.py b/python/paddle/text/datasets/movielens.py index c3d1de135b3005de93bdadd3a29160c0b483922c..f735d22f91f2863a02c0a50ee208e32fc4fcde09 100644 --- a/python/paddle/text/datasets/movielens.py +++ b/python/paddle/text/datasets/movielens.py @@ -17,7 +17,6 @@ import zipfile import re from paddle.io import Dataset -import paddle.compat as cpt from paddle.dataset.common import _check_exists_and_download __all__ = [] @@ -161,7 +160,7 @@ class Movielens(Dataset): categories_set = set() with package.open('ml-1m/movies.dat') as movie_file: for i, line in enumerate(movie_file): - line = cpt.to_text(line, encoding='latin') + line = line.decode(encoding='latin') movie_id, title, categories = line.strip().split('::') categories = categories.split('|') for c in categories: @@ -180,7 +179,7 @@ class Movielens(Dataset): with package.open('ml-1m/users.dat') as user_file: for line in user_file: - line = cpt.to_text(line, encoding='latin') + line = line.decode(encoding='latin') uid, gender, age, job, _ = line.strip().split("::") self.user_info[int(uid)] = UserInfo(index=uid, gender=gender, @@ -193,7 +192,7 @@ class Movielens(Dataset): with zipfile.ZipFile(self.data_file) as package: with package.open('ml-1m/ratings.dat') as rating: for line in rating: - line = cpt.to_text(line, encoding='latin') + line = line.decode(encoding='latin') if (np.random.random() < self.test_ratio) == is_test: uid, mov_id, rating, _ = line.strip().split("::") uid = int(uid) diff --git a/python/paddle/text/datasets/wmt14.py b/python/paddle/text/datasets/wmt14.py index bfb665570ca2966a81a1ce94f430113c70c55b72..715e3794f09a30d073333d2b6ab5b060f5108b43 100644 --- a/python/paddle/text/datasets/wmt14.py +++ b/python/paddle/text/datasets/wmt14.py @@ -17,7 +17,6 @@ import numpy as np import six from paddle.io import Dataset -import paddle.compat as cpt from paddle.dataset.common import _check_exists_and_download __all__ = [] @@ -111,7 +110,7 @@ class WMT14(Dataset): out_dict = dict() for line_count, line in enumerate(fd): if line_count < size: - out_dict[cpt.to_text(line.strip())] = line_count + out_dict[line.strip().decode()] = line_count else: break return out_dict @@ -140,7 +139,7 @@ class WMT14(Dataset): ] for name in names: for line in f.extractfile(name): - line = cpt.to_text(line) + line = line.decode() line_split = line.strip().split('\t') if len(line_split) != 2: continue diff --git a/python/paddle/text/datasets/wmt16.py b/python/paddle/text/datasets/wmt16.py index 40e464cb0ebae45f275fb7af883fc4bca03cc131..66aa78c7833b7a8f5345491e9d04503ee8f15bb4 100644 --- a/python/paddle/text/datasets/wmt16.py +++ b/python/paddle/text/datasets/wmt16.py @@ -22,7 +22,6 @@ from collections import defaultdict import paddle from paddle.io import Dataset -import paddle.compat as cpt from paddle.dataset.common import _check_exists_and_download __all__ = [] @@ -152,16 +151,16 @@ class WMT16(Dataset): with open(dict_path, "rb") as fdict: for idx, line in enumerate(fdict): if reverse: - word_dict[idx] = cpt.to_text(line.strip()) + word_dict[idx] = line.strip().decode() else: - word_dict[cpt.to_text(line.strip())] = idx + word_dict[line.strip().decode()] = idx return word_dict def _build_dict(self, dict_path, dict_size, lang): word_dict = defaultdict(int) with tarfile.open(self.data_file, mode="r") as f: for line in f.extractfile("wmt16/train"): - line = cpt.to_text(line) + line = line.decode() line_split = line.strip().split("\t") if len(line_split) != 2: continue sen = line_split[0] if self.lang == "en" else line_split[1] @@ -195,7 +194,7 @@ class WMT16(Dataset): self.trg_ids_next = [] with tarfile.open(self.data_file, mode="r") as f: for line in f.extractfile("wmt16/{}".format(self.mode)): - line = cpt.to_text(line) + line = line.decode() line_split = line.strip().split("\t") if len(line_split) != 2: continue