From ad4c773b183d614112f5f66dfc6a11d3a249e557 Mon Sep 17 00:00:00 2001
From: Nyakku Shigure <sigure.qaq@gmail.com>
Date: Tue, 18 Oct 2022 10:27:15 +0800
Subject: [PATCH] [CodeStyle][py2] remove `compat` module (to_text) (#47036)

* [CodeStyle][py2] remove `compat` module (to_text)

* remove some unnecessary decode

* remove to_text definition and unittest

* Revert "remove to_text definition and unittest"

This reverts commit a6b69cb8dca8b9b031ce10ea32d1040e7e0dd267.

* remove an assertion

* empty commit
---
 python/paddle/dataset/conll05.py              |  5 ++---
 python/paddle/dataset/movielens.py            |  7 +++----
 python/paddle/dataset/wmt14.py                |  5 ++---
 python/paddle/dataset/wmt16.py                |  9 ++++----
 .../distributed/passes/ps_trainer_pass.py     |  3 +--
 python/paddle/fluid/backward.py               | 19 +++++++----------
 python/paddle/fluid/compiler.py               | 11 +++++-----
 .../slim/quantization/quantization_pass.py    |  5 ++---
 python/paddle/fluid/dygraph/io.py             |  1 -
 python/paddle/fluid/framework.py              | 21 ++++++++-----------
 .../fleet/parameter_server/ir/trainer_pass.py |  3 +--
 python/paddle/fluid/io.py                     |  5 ++---
 python/paddle/fluid/layers/control_flow.py    |  5 ++---
 .../fluid/tests/unittests/dist_transformer.py |  7 +++----
 python/paddle/text/datasets/conll05.py        |  5 ++---
 python/paddle/text/datasets/movielens.py      |  7 +++----
 python/paddle/text/datasets/wmt14.py          |  5 ++---
 python/paddle/text/datasets/wmt16.py          |  9 ++++----
 18 files changed, 55 insertions(+), 77 deletions(-)

diff --git a/python/paddle/dataset/conll05.py b/python/paddle/dataset/conll05.py
index b98b5d4bde6..0ffdce8ae8c 100644
--- a/python/paddle/dataset/conll05.py
+++ b/python/paddle/dataset/conll05.py
@@ -23,7 +23,6 @@ to initialize SRL model.
 import tarfile
 import gzip
 import paddle.dataset.common
-import paddle.compat as cpt
 import paddle.utils.deprecated as deprecated
 from six.moves import zip, range
 
@@ -90,8 +89,8 @@ def corpus_reader(data_path, words_name, props_name):
             labels = []
             one_seg = []
             for word, label in zip(words_file, props_file):
-                word = cpt.to_text(word.strip())
-                label = cpt.to_text(label.strip().split())
+                word = word.strip().decode()
+                label = label.strip().decode().split()
 
                 if len(label) == 0:  # end of sentence
                     for i in range(len(one_seg[0])):
diff --git a/python/paddle/dataset/movielens.py b/python/paddle/dataset/movielens.py
index 590d20b741f..db07525d311 100644
--- a/python/paddle/dataset/movielens.py
+++ b/python/paddle/dataset/movielens.py
@@ -29,7 +29,6 @@ import paddle.utils.deprecated as deprecated
 import re
 import functools
 import six
-import paddle.compat as cpt
 
 __all__ = []
 
@@ -112,7 +111,7 @@ def __initialize_meta_info__():
                 categories_set = set()
                 with package.open('ml-1m/movies.dat') as movie_file:
                     for i, line in enumerate(movie_file):
-                        line = cpt.to_text(line, encoding='latin')
+                        line = line.decode(encoding='latin')
                         movie_id, title, categories = line.strip().split('::')
                         categories = categories.split('|')
                         for c in categories:
@@ -137,7 +136,7 @@ def __initialize_meta_info__():
                 USER_INFO = dict()
                 with package.open('ml-1m/users.dat') as user_file:
                     for line in user_file:
-                        line = cpt.to_text(line, encoding='latin')
+                        line = line.decode(encoding='latin')
                         uid, gender, age, job, _ = line.strip().split("::")
                         USER_INFO[int(uid)] = UserInfo(index=uid,
                                                        gender=gender,
@@ -152,7 +151,7 @@ def __reader__(rand_seed=0, test_ratio=0.1, is_test=False):
     with zipfile.ZipFile(file=fn) as package:
         with package.open('ml-1m/ratings.dat') as rating:
             for line in rating:
-                line = cpt.to_text(line, encoding='latin')
+                line = line.decode(encoding='latin')
                 if (np.random.random() < test_ratio) == is_test:
                     uid, mov_id, rating, _ = line.strip().split("::")
                     uid = int(uid)
diff --git a/python/paddle/dataset/wmt14.py b/python/paddle/dataset/wmt14.py
index cdc516bee2d..0007bc2770d 100644
--- a/python/paddle/dataset/wmt14.py
+++ b/python/paddle/dataset/wmt14.py
@@ -24,7 +24,6 @@ import six
 import tarfile
 
 import paddle.dataset.common
-import paddle.compat as cpt
 import paddle.utils.deprecated as deprecated
 
 __all__ = []
@@ -52,7 +51,7 @@ def __read_to_dict(tar_file, dict_size):
         out_dict = dict()
         for line_count, line in enumerate(fd):
             if line_count < size:
-                out_dict[cpt.to_text(line.strip())] = line_count
+                out_dict[line.strip().decode()] = line_count
             else:
                 break
         return out_dict
@@ -84,7 +83,7 @@ def reader_creator(tar_file, file_name, dict_size):
             ]
             for name in names:
                 for line in f.extractfile(name):
-                    line = cpt.to_text(line)
+                    line = line.decode()
                     line_split = line.strip().split('\t')
                     if len(line_split) != 2:
                         continue
diff --git a/python/paddle/dataset/wmt16.py b/python/paddle/dataset/wmt16.py
index 64ddc221471..e208f342490 100644
--- a/python/paddle/dataset/wmt16.py
+++ b/python/paddle/dataset/wmt16.py
@@ -34,7 +34,6 @@ import tarfile
 from collections import defaultdict
 
 import paddle
-import paddle.compat as cpt
 import paddle.utils.deprecated as deprecated
 
 __all__ = []
@@ -54,7 +53,7 @@ def __build_dict(tar_file, dict_size, save_path, lang):
     word_dict = defaultdict(int)
     with tarfile.open(tar_file, mode="r") as f:
         for line in f.extractfile("wmt16/train"):
-            line = cpt.to_text(line)
+            line = line.decode()
             line_split = line.strip().split("\t")
             if len(line_split) != 2: continue
             sen = line_split[0] if lang == "en" else line_split[1]
@@ -83,9 +82,9 @@ def __load_dict(tar_file, dict_size, lang, reverse=False):
     with open(dict_path, "rb") as fdict:
         for idx, line in enumerate(fdict):
             if reverse:
-                word_dict[idx] = cpt.to_text(line.strip())
+                word_dict[idx] = line.strip().decode()
             else:
-                word_dict[cpt.to_text(line.strip())] = idx
+                word_dict[line.strip().decode()] = idx
     return word_dict
 
 
@@ -116,7 +115,7 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang):
 
         with tarfile.open(tar_file, mode="r") as f:
             for line in f.extractfile(file_name):
-                line = cpt.to_text(line)
+                line = line.decode()
                 line_split = line.strip().split("\t")
                 if len(line_split) != 2:
                     continue
diff --git a/python/paddle/distributed/passes/ps_trainer_pass.py b/python/paddle/distributed/passes/ps_trainer_pass.py
index 40276dec9a5..95f4e980cf4 100755
--- a/python/paddle/distributed/passes/ps_trainer_pass.py
+++ b/python/paddle/distributed/passes/ps_trainer_pass.py
@@ -14,7 +14,6 @@
 
 import os
 import paddle
-import paddle.compat as cpt
 from ..ps.utils.public import *
 from paddle.framework import core
 from paddle.distributed.passes.pass_base import PassBase, register_pass
@@ -707,7 +706,7 @@ class PsGpuPass(PassBase):
             if op.type != "pull_box_sparse" and op.type != "pull_gpups_sparse":
                 continue
             grad_op_desc, op_grad_to_var = core.get_grad_op_desc(
-                op.desc, cpt.to_text(set()), [])
+                op.desc, set(), [])
             for op_desc in grad_op_desc:
                 new_op_desc = program.global_block().desc._insert_op(
                     insert_index + 1)
diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py
index e9babc79c3a..c1d82f0be31 100755
--- a/python/paddle/fluid/backward.py
+++ b/python/paddle/fluid/backward.py
@@ -423,10 +423,8 @@ def _some_in_set_(cands, s):
     """
     if len(cands) == 0:
         return False
-    literal_set = cpt.to_text(s)
-    literal_cands = cpt.to_text(cands)
-    for c in literal_cands:
-        if c in literal_set:
+    for c in cands:
+        if c in s:
             return True
     return False
 
@@ -437,7 +435,6 @@ def _strip_grad_suffix_(name):
     e.g. x@GRAD ==> x
          y@GRAD@RENAME@1 ==> y
     """
-    name = cpt.to_text(name)
     pos = name.find(core.grad_var_suffix())
     new_name = name[:pos] if pos != -1 else name
     new_pos = name.rfind('grad/')
@@ -449,7 +446,7 @@ def _append_grad_suffix_(name):
     Append grad suffix to the given variable name
     e.g. x ==> x@GRAD
     """
-    return cpt.to_text(name) + core.grad_var_suffix()
+    return name + core.grad_var_suffix()
 
 
 def _accumulate_gradients_by_sum_op_(var_name,
@@ -967,7 +964,7 @@ def _append_backward_ops_with_checkpoints_(block,
                                 "invoke op: %s" %
                                 _pretty_op_desc_(op.desc, "with_sub_block"))
             grad_op_desc, op_grad_to_var = core.get_grad_op_desc(
-                op.desc, cpt.to_text(no_grad_dict[block.idx]), [])
+                op.desc, no_grad_dict[block.idx], [])
 
             # record the mapping between fwd and bwd
             if grad_op_id_to_fwd_op is not None:
@@ -993,7 +990,7 @@ def _append_backward_ops_with_checkpoints_(block,
                                 "invoke op: %s" %
                                 _pretty_op_desc_(op.desc, "with_sub_block"))
             grad_op_desc, op_grad_to_var = core.get_grad_op_desc(
-                op.desc, cpt.to_text(no_grad_dict[block.idx]), [])
+                op.desc, no_grad_dict[block.idx], [])
 
             # record the mapping between fwd and bwd
             if grad_op_id_to_fwd_op is not None:
@@ -1055,7 +1052,7 @@ def _append_backward_ops_with_checkpoints_(block,
         # 3.c. add backward ops for all ops in current segment
         for op_desc in reversed(added_descs):
             grad_op_desc, op_grad_to_var = core.get_grad_op_desc(
-                op_desc, cpt.to_text(no_grad_dict[block.idx]), [])
+                op_desc, no_grad_dict[block.idx], [])
 
             # record the mapping between fwd and bwd
             if grad_op_id_to_fwd_op is not None:
@@ -1239,7 +1236,7 @@ def _append_backward_ops_(block,
 
         # Getting op's corresponding grad_op
         grad_op_desc, op_grad_to_var = core.get_grad_op_desc(
-            op.desc, cpt.to_text(no_grad_dict[block.idx]), grad_sub_block_list)
+            op.desc, no_grad_dict[block.idx], grad_sub_block_list)
 
         # record the mapping between fwd and bwd
         if grad_op_id_to_fwd_op is not None:
@@ -1841,7 +1838,7 @@ def append_backward(loss,
     params_and_grads = []
     op_role_var_attr_name = core.op_proto_and_checker_maker.kOpRoleVarAttrName()
     for param in parameters:
-        if cpt.to_text(param) not in grad_info_map:
+        if param not in grad_info_map:
             continue
         grad_info = grad_info_map[param]
         grad_block = grad_info[1]
diff --git a/python/paddle/fluid/compiler.py b/python/paddle/fluid/compiler.py
index db4b2c2df2e..82a58713f28 100644
--- a/python/paddle/fluid/compiler.py
+++ b/python/paddle/fluid/compiler.py
@@ -17,7 +17,6 @@ import os
 import six
 import sys
 import warnings
-from .. import compat as cpt
 from . import framework
 from .framework import _get_paddle_place, _get_paddle_place_list
 from .framework import cuda_places, cpu_places, xpu_places
@@ -418,10 +417,10 @@ class CompiledProgram(object):
         for node in self._graph.nodes():
             if node.is_var() and node.var() is not None and node.var().persistable() and \
                     node.var().type() != core.VarDesc.VarType.RAW:
-                name = cpt.to_text(node.name())
+                name = node.name()
                 if self._program is not None and _should_broadcast_or_not_exists(
                         self._program, name):
-                    self._persistable_vars.append(cpt.to_text(node.name()))
+                    self._persistable_vars.append(node.name())
 
         places = list(map(_place_obj, places))
 
@@ -433,9 +432,9 @@ class CompiledProgram(object):
 
         return core.ParallelExecutor(
             places, self._persistable_vars,
-            cpt.to_text(self._loss_name) if self._loss_name else six.u(''),
-            self._scope, self._local_scopes, self._exec_strategy,
-            self._build_strategy, self._graph)
+            self._loss_name if self._loss_name else six.u(''), self._scope,
+            self._local_scopes, self._exec_strategy, self._build_strategy,
+            self._graph)
 
     def _compile_inference(self):
         return core.create_paddle_predictor(self._infer_config)
diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
index 952ea62c7c2..abd6a7822d8 100644
--- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
@@ -18,7 +18,6 @@ try:
     from tqdm import tqdm
 except:
     from .utils import tqdm
-from ..... import compat as cpt
 from .... import core
 from ....framework import IrGraph
 from ....framework import IrNode
@@ -400,7 +399,7 @@ class QuantizationTransformPass(object):
     def _create_global_step(self, graph):
         if self._weight_quantize_type == 'range_abs_max' or \
                 self._activation_quantize_type == 'range_abs_max':
-            counter_name = cpt.to_text('@STEP_COUNTER@')
+            counter_name = '@STEP_COUNTER@'
             for node in graph.all_var_nodes():
                 if node.name() == counter_name:
                     self._global_step = node
@@ -1339,7 +1338,7 @@ class ConvertToInt8Pass(object):
     def _convert_to_int8(self, graph, var_node):
         int8_var_node_name = var_node.name() + ".int8"
         int8_var_node = graph.create_persistable_node(
-            name=cpt.to_text(int8_var_node_name),
+            name=int8_var_node_name,
             var_type=var_node.type(),
             shape=var_node.shape(),
             var_dtype=core.VarDesc.VarType.INT8)
diff --git a/python/paddle/fluid/dygraph/io.py b/python/paddle/fluid/dygraph/io.py
index 6671499e1c4..ea617e253f4 100644
--- a/python/paddle/fluid/dygraph/io.py
+++ b/python/paddle/fluid/dygraph/io.py
@@ -125,7 +125,6 @@ def _append_loaded_suffix(name):
     e.g. x ==> x.load_0, x.load_0 ==> x.load_0.load_0
     """
     suffix = LOADED_VAR_SUFFIX
-    name = cpt.to_text(name)
     new_name = unique_name.generate_with_ignorable_key('.'.join((name, suffix)))
     return new_name
 
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index e709a02b388..a7308789a58 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -1390,7 +1390,6 @@ class Variable(object):
         self.error_clip = error_clip
 
         is_new_var = False
-        name = cpt.to_text(name)
         self.desc = self.block.desc.find_var(name.encode())
 
         if self.desc is None:
@@ -1757,8 +1756,7 @@ class Variable(object):
         if with_details:
             additional_attr = ("error_clip", )
             for attr_name in additional_attr:
-                res_str += "%s: %s\n" % (attr_name,
-                                         cpt.to_text(getattr(self, attr_name)))
+                res_str += "%s: %s\n" % (attr_name, getattr(self, attr_name))
 
         return res_str
 
@@ -1900,7 +1898,7 @@ class Variable(object):
                                                 dtype='float32')
             print("name of current Var is: {}".format(new_variable.name))
         """
-        return cpt.to_text(self.desc.name())
+        return self.desc.name()
 
     @property
     def grad_name(self):
@@ -2807,7 +2805,7 @@ class Operator(object):
                             elif isinstance(arg, six.binary_type):
                                 in_arg_names.append(arg.decode())
                             elif isinstance(arg, (Variable, core.VarBase)):
-                                in_arg_names.append(cpt.to_text(arg.name))
+                                in_arg_names.append(arg.name)
                             else:
                                 raise TypeError(
                                     "The type of '%s' in operator %s should be "
@@ -2843,7 +2841,7 @@ class Operator(object):
                         if isinstance(arg, six.string_types):
                             out_arg_names.append(arg)
                         else:
-                            out_arg_names.append(cpt.to_text(arg.name))
+                            out_arg_names.append(arg.name)
                         # TODO(minqiyang): could we remove variable's op in static mode?
                         if not _non_static_mode():
                             if isinstance(arg, six.string_types):
@@ -3660,8 +3658,8 @@ class Block(object):
         Rename variable in vars and ops' inputs and outputs
 
         Args:
-            name(str): the name that need to be renamed.
-            new_name(str): the name that need to rename to.
+            name(bytes): the name that need to be renamed.
+            new_name(bytes): the name that need to rename to.
 
         Raises:
             ValueError: If this block doesn't have this the giving name,
@@ -3671,8 +3669,8 @@ class Block(object):
         Returns:
             Variable: the Variable with the giving name.
         """
-        name = cpt.to_text(name)
-        new_name = cpt.to_text(new_name)
+        name = name.decode()
+        new_name = new_name.decode()
 
         if not self.has_var(name):
             raise ValueError("var %s is not in current block" % name)
@@ -6643,8 +6641,7 @@ class Parameter(Variable):
             additional_attr = ("trainable", "optimize_attr", "regularizer",
                                "do_model_average", "need_clip")
             for attr_name in additional_attr:
-                res_str += "%s: %s\n" % (attr_name,
-                                         cpt.to_text(getattr(self, attr_name)))
+                res_str += "%s: %s\n" % (attr_name, getattr(self, attr_name))
         else:
             res_str = Variable.to_string(self, throw_on_error, False)
         return res_str
diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py
index e4de7088202..c54fe06720d 100644
--- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py
+++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py
@@ -23,7 +23,6 @@ from functools import reduce
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 import paddle.fluid.framework as framework
-import paddle.compat as cpt
 
 from paddle.fluid.transpiler.details.program_utils import delete_ops
 from paddle.fluid.incubate.fleet.parameter_server.ir.public import _get_optimize_ops
@@ -593,7 +592,7 @@ def ps_gpu_pass(program):
             if op.type != "pull_box_sparse" and op.type != "pull_gpups_sparse":
                 continue
             grad_op_desc, op_grad_to_var = core.get_grad_op_desc(
-                op.desc, cpt.to_text(set()), [])
+                op.desc, set(), [])
             for op_desc in grad_op_desc:
                 new_op_desc = program.global_block().desc.append_op()
                 new_op_desc.copy_from(op_desc)
diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py
index 8aa28519c9f..6123df295fd 100644
--- a/python/paddle/fluid/io.py
+++ b/python/paddle/fluid/io.py
@@ -42,7 +42,6 @@ from .reader import *
 from . import dataloader
 from .dataloader import *
 from . import core
-from .. import compat as cpt
 from paddle.utils import deprecated
 from paddle.fluid.framework import static_only
 
@@ -1660,7 +1659,7 @@ def _save_persistable_nodes(executor, dirname, graph):
     persistable_nodes = []
     all_persistable_nodes = graph.all_persistable_nodes()
     for node in all_persistable_nodes:
-        name = cpt.to_text(node.name())
+        name = node.name()
         if name not in persistable_node_names:
             persistable_node_names.add(name)
             persistable_nodes.append(node)
@@ -1695,7 +1694,7 @@ def _load_persistable_nodes(executor, dirname, graph):
     persistable_nodes = []
     all_persistable_nodes = graph.all_persistable_nodes()
     for node in all_persistable_nodes:
-        name = cpt.to_text(node.name())
+        name = node.name()
         if name not in persistable_node_names:
             persistable_node_names.add(name)
             persistable_nodes.append(node)
diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py
index 289cb9b9044..0f0ae4b0823 100755
--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -2431,11 +2431,10 @@ class ConditionalBlock(object):
         for inner_input_name in params:
             inner_var = parent_block._find_var_recursive(inner_input_name)
             if inner_var:
-                param_list.append(cpt.to_text(inner_var.name))
+                param_list.append(inner_var.name)
 
         grad_op_desc, op_grad_to_var = core.get_grad_op_desc(
-            conditional_block_op.desc, cpt.to_text(set()),
-            [grad_sub_block.desc])
+            conditional_block_op.desc, set(), [grad_sub_block.desc])
 
         # append op_desc in grad_op_descs to target_block
         op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName()
diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py
index 32478b8c5fa..8b52f1f7341 100644
--- a/python/paddle/fluid/tests/unittests/dist_transformer.py
+++ b/python/paddle/fluid/tests/unittests/dist_transformer.py
@@ -26,7 +26,6 @@ import tarfile
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
 from test_dist_base import TestDistRunnerBase, runtime_main, RUN_STEP
-import paddle.compat as cpt
 
 const_para_attr = fluid.ParamAttr(initializer=fluid.initializer.Constant(0.001))
 const_bias_attr = const_para_attr
@@ -856,7 +855,7 @@ class DataReader(object):
 
             f = tarfile.open(fpaths[0], "r")
             for line in f.extractfile(tar_fname):
-                line = cpt.to_text(line)
+                line = line.decode()
                 fields = line.strip("\n").split(self._field_delimiter)
                 if (not self._only_src
                         and len(fields) == 2) or (self._only_src
@@ -869,7 +868,7 @@ class DataReader(object):
 
                 with open(fpath, "rb") as f:
                     for line in f:
-                        line = cpt.to_text(line)
+                        line = line.decode()
                         fields = line.strip("\n").split(self._field_delimiter)
                         if (not self._only_src
                                 and len(fields) == 2) or (self._only_src
@@ -881,7 +880,7 @@ class DataReader(object):
         word_dict = {}
         with open(dict_path, "rb") as fdict:
             for idx, line in enumerate(fdict):
-                line = cpt.to_text(line)
+                line = line.decode()
                 if reverse:
                     word_dict[idx] = line.strip("\n")
                 else:
diff --git a/python/paddle/text/datasets/conll05.py b/python/paddle/text/datasets/conll05.py
index 3e8889a1269..ee475e7fc5d 100644
--- a/python/paddle/text/datasets/conll05.py
+++ b/python/paddle/text/datasets/conll05.py
@@ -17,7 +17,6 @@ import tarfile
 import numpy as np
 
 from paddle.io import Dataset
-import paddle.compat as cpt
 from paddle.dataset.common import _check_exists_and_download
 
 __all__ = []
@@ -182,8 +181,8 @@ class Conll05st(Dataset):
             labels = []
             one_seg = []
             for word, label in zip(words_file, props_file):
-                word = cpt.to_text(word.strip())
-                label = cpt.to_text(label.strip().split())
+                word = word.strip().decode()
+                label = label.strip().decode().split()
 
                 if len(label) == 0:  # end of sentence
                     for i in range(len(one_seg[0])):
diff --git a/python/paddle/text/datasets/movielens.py b/python/paddle/text/datasets/movielens.py
index c3d1de135b3..f735d22f91f 100644
--- a/python/paddle/text/datasets/movielens.py
+++ b/python/paddle/text/datasets/movielens.py
@@ -17,7 +17,6 @@ import zipfile
 import re
 
 from paddle.io import Dataset
-import paddle.compat as cpt
 from paddle.dataset.common import _check_exists_and_download
 
 __all__ = []
@@ -161,7 +160,7 @@ class Movielens(Dataset):
                 categories_set = set()
                 with package.open('ml-1m/movies.dat') as movie_file:
                     for i, line in enumerate(movie_file):
-                        line = cpt.to_text(line, encoding='latin')
+                        line = line.decode(encoding='latin')
                         movie_id, title, categories = line.strip().split('::')
                         categories = categories.split('|')
                         for c in categories:
@@ -180,7 +179,7 @@ class Movielens(Dataset):
 
                 with package.open('ml-1m/users.dat') as user_file:
                     for line in user_file:
-                        line = cpt.to_text(line, encoding='latin')
+                        line = line.decode(encoding='latin')
                         uid, gender, age, job, _ = line.strip().split("::")
                         self.user_info[int(uid)] = UserInfo(index=uid,
                                                             gender=gender,
@@ -193,7 +192,7 @@ class Movielens(Dataset):
         with zipfile.ZipFile(self.data_file) as package:
             with package.open('ml-1m/ratings.dat') as rating:
                 for line in rating:
-                    line = cpt.to_text(line, encoding='latin')
+                    line = line.decode(encoding='latin')
                     if (np.random.random() < self.test_ratio) == is_test:
                         uid, mov_id, rating, _ = line.strip().split("::")
                         uid = int(uid)
diff --git a/python/paddle/text/datasets/wmt14.py b/python/paddle/text/datasets/wmt14.py
index bfb665570ca..715e3794f09 100644
--- a/python/paddle/text/datasets/wmt14.py
+++ b/python/paddle/text/datasets/wmt14.py
@@ -17,7 +17,6 @@ import numpy as np
 import six
 
 from paddle.io import Dataset
-import paddle.compat as cpt
 from paddle.dataset.common import _check_exists_and_download
 
 __all__ = []
@@ -111,7 +110,7 @@ class WMT14(Dataset):
             out_dict = dict()
             for line_count, line in enumerate(fd):
                 if line_count < size:
-                    out_dict[cpt.to_text(line.strip())] = line_count
+                    out_dict[line.strip().decode()] = line_count
                 else:
                     break
             return out_dict
@@ -140,7 +139,7 @@ class WMT14(Dataset):
             ]
             for name in names:
                 for line in f.extractfile(name):
-                    line = cpt.to_text(line)
+                    line = line.decode()
                     line_split = line.strip().split('\t')
                     if len(line_split) != 2:
                         continue
diff --git a/python/paddle/text/datasets/wmt16.py b/python/paddle/text/datasets/wmt16.py
index 40e464cb0eb..66aa78c7833 100644
--- a/python/paddle/text/datasets/wmt16.py
+++ b/python/paddle/text/datasets/wmt16.py
@@ -22,7 +22,6 @@ from collections import defaultdict
 
 import paddle
 from paddle.io import Dataset
-import paddle.compat as cpt
 from paddle.dataset.common import _check_exists_and_download
 
 __all__ = []
@@ -152,16 +151,16 @@ class WMT16(Dataset):
         with open(dict_path, "rb") as fdict:
             for idx, line in enumerate(fdict):
                 if reverse:
-                    word_dict[idx] = cpt.to_text(line.strip())
+                    word_dict[idx] = line.strip().decode()
                 else:
-                    word_dict[cpt.to_text(line.strip())] = idx
+                    word_dict[line.strip().decode()] = idx
         return word_dict
 
     def _build_dict(self, dict_path, dict_size, lang):
         word_dict = defaultdict(int)
         with tarfile.open(self.data_file, mode="r") as f:
             for line in f.extractfile("wmt16/train"):
-                line = cpt.to_text(line)
+                line = line.decode()
                 line_split = line.strip().split("\t")
                 if len(line_split) != 2: continue
                 sen = line_split[0] if self.lang == "en" else line_split[1]
@@ -195,7 +194,7 @@ class WMT16(Dataset):
         self.trg_ids_next = []
         with tarfile.open(self.data_file, mode="r") as f:
             for line in f.extractfile("wmt16/{}".format(self.mode)):
-                line = cpt.to_text(line)
+                line = line.decode()
                 line_split = line.strip().split("\t")
                 if len(line_split) != 2:
                     continue
-- 
GitLab