Fix pybind11 problem

Fix str and bytes problem Fix sorted problem Fix math problem Fix CI problem

Fix pybind11 problem
Fix str and bytes problem Fix sorted problem Fix math problem Fix CI problem
6abe819f · minqiyang · 1f618c4f · 6abe819f · 6abe819f · 6abe819f
32 changed file
--- a/paddle/fluid/framework/attribute.h
+++ b/paddle/fluid/framework/attribute.h
@@ -82,7 +82,10 @@ class DefaultValueSetter {
 public:
  explicit DefaultValueSetter(T default_value)
      : default_value_(default_value) {}
-  void operator()(T& value) const { value = default_value_; }
+  void operator()(T* value) const {
+    PADDLE_ENFORCE(value != nullptr, "Can not set default value to nullptr");
+    *value = default_value_;
+  }
 private:
  T default_value_;
@@ -199,6 +202,7 @@ struct ExtractAttribute<int64_t> {
 template <typename T>
 class TypedAttrChecker {
  typedef std::function<void(T&)> ValueChecker;
+  typedef std::function<void(T*)> ValueSetter;
 public:
  explicit TypedAttrChecker(const std::string& attr_name)
@@ -241,7 +245,7 @@ class TypedAttrChecker {
                     "Attribute '%s' is required!", attr_name_);
      // default_value_setter_ has no more than one element
      T val;
-      (default_value_setter_[0])(val);
+      (default_value_setter_[0])(&val);
      attr_map[attr_name_] = val;
    }
    Attribute& attr = attr_map.at(attr_name_);
@@ -255,7 +259,7 @@ class TypedAttrChecker {
 private:
  std::string attr_name_;
  std::vector<ValueChecker> value_checkers_;
-  std::vector<ValueChecker> default_value_setter_;
+  std::vector<ValueSetter> default_value_setter_;
 };
 // check whether op's all attributes fit their own limits

--- a/paddle/fluid/framework/op_desc.cc
+++ b/paddle/fluid/framework/op_desc.cc
@@ -202,6 +202,57 @@ std::vector<std::string> OpDesc::AttrNames() const {
 }
 void OpDesc::SetAttr(const std::string &name, const Attribute &v) {
+  // NOTICE(minqiyang): pybind11 will take the empty list in python as
+  // the std::vector<int> type in C++; so we have to change the attr's type
+  // here if we meet this issue
+  proto::AttrType attr_type = static_cast<proto::AttrType>(v.which() - 1);
+  if (attr_type == proto::AttrType::INTS &&
+      boost::get<std::vector<int>>(v).size() == 0u) {
+    proto::OpProto proto = OpInfoMap::Instance().Get(Type()).Proto();
+    // Find current attr via attr name and set the correct attribute value
+    for (int i = 0; i != proto.attrs_size(); ++i) {
+      const proto::OpProto::Attr &attr = proto.attrs(i);
+      if (attr.name() == name) {
+        switch (attr.type()) {
+          case proto::AttrType::BOOLEANS: {
+            VLOG(11) << "SetAttr: " << Type() << ", " << name
+                     << " from INTS to BOOLEANS";
+            this->attrs_[name] = std::vector<bool>();
+            break;
+          }
+          case proto::AttrType::INTS: {
+            VLOG(11) << "SetAttr: " << Type() << ", " << name
+                     << " from INTS to INTS";
+            this->attrs_[name] = std::vector<int>();
+            break;
+          }
+          case proto::AttrType::FLOATS: {
+            VLOG(11) << "SetAttr: " << Type() << ", " << name
+                     << " from INTS to FLOATS";
+            this->attrs_[name] = std::vector<float>();
+            break;
+          }
+          case proto::AttrType::STRINGS: {
+            VLOG(11) << "SetAttr: " << Type() << ", " << name
+                     << " from INTS to STRINGS";
+            this->attrs_[name] = std::vector<std::string>();
+            break;
+          }
+          case proto::AttrType::BLOCKS: {
+            VLOG(11) << "SetAttr: " << Type() << ", " << name
+                     << " from INTS to BLOCKS";
+            this->SetBlocksAttr(name, std::vector<BlockDesc *>());
+            return;
+          }
+          default:
+            PADDLE_THROW("Wrong attr type %d", attr.type());
+        }
+        need_update_ = true;
+        return;
+      }
+    }
+  }
  this->attrs_[name] = v;
  need_update_ = true;
 }

--- a/paddle/fluid/pybind/protobuf.cc
+++ b/paddle/fluid/pybind/protobuf.cc
@@ -205,11 +205,7 @@ void BindBlockDesc(pybind11::module *m) {
 void BindVarDsec(pybind11::module *m) {
  pybind11::class_<pd::VarDesc> var_desc(*m, "VarDesc", "");
  var_desc
-      .def("name",
+      .def("name", [](pd::VarDesc &self) { return self.Name(); },
-           [](pd::VarDesc &self) {
-             pybind11::bytes name = self.Name();
-             return name;
-           },
           pybind11::return_value_policy::reference)
      .def("set_name", &pd::VarDesc::SetName)
      .def("set_shape", &pd::VarDesc::SetShape)

--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -54,6 +54,8 @@ limitations under the License. */
 #include "paddle/fluid/platform/gpu_info.h"
 #endif
+#include "pybind11/stl.h"
 // disable auto conversion to list in Python
 PYBIND11_MAKE_OPAQUE(paddle::framework::LoDTensorArray);

--- a/python/paddle/dataset/cifar.py
+++ b/python/paddle/dataset/cifar.py
@@ -53,7 +53,7 @@ def reader_creator(filename, sub_name, cycle=False):
            yield (sample / 255.0).astype(numpy.float32), int(label)
    def reader():
-        with tarfile.open(filename, mode='r') as f:
+        with tarfile.open(filename, mode='rb') as f:
            names = (each_item.name for each_item in f
                     if sub_name in each_item.name)

--- a/python/paddle/dataset/common.py
+++ b/python/paddle/dataset/common.py
@@ -20,6 +20,7 @@ import shutil
 import sys
 import importlib
 import paddle.dataset
+import paddle.fluid.compat as cpt
 import six.moves.cPickle as pickle
 import glob
@@ -93,7 +94,7 @@ def download(url, module_name, md5sum, save_name=None):
                total_length = int(total_length)
                for data in r.iter_content(chunk_size=4096):
                    dl += len(data)
-                    f.write(data)
+                    f.write(cpt.to_literal_str(data))
                    done = int(50 * dl / total_length)
                    sys.stdout.write("\r[%s%s]" % ('=' * done,
                                                   ' ' * (50 - done)))

--- a/python/paddle/dataset/image.py
+++ b/python/paddle/dataset/image.py
@@ -56,7 +56,7 @@ def batch_images_from_tar(data_file,
    :type data_file: string
    :param dataset_name: 'train','test' or 'valid'
    :type dataset_name: string
-    :param img2label: a dic with image file name as key 
+    :param img2label: a dic with image file name as key
                    and image's label as value
    :type img2label: dic
    :param num_per_batch: image number per batch file
@@ -88,7 +88,7 @@ def batch_images_from_tar(data_file,
                output['data'] = data
                pickle.dump(
                    output,
-                    open('%s/batch_%d' % (out_path, file_id), 'w'),
+                    open('%s/batch_%d' % (out_path, file_id), 'wb'),
                    protocol=pickle.HIGHEST_PROTOCOL)
                file_id += 1
                data = []
@@ -99,7 +99,7 @@ def batch_images_from_tar(data_file,
        output['data'] = data
        pickle.dump(
            output,
-            open('%s/batch_%d' % (out_path, file_id), 'w'),
+            open('%s/batch_%d' % (out_path, file_id), 'wb'),
            protocol=pickle.HIGHEST_PROTOCOL)
    with open(meta_file, 'a') as meta:
@@ -113,7 +113,7 @@ def load_image_bytes(bytes, is_color=True):
    Load an color or gray image from bytes array.
    Example usage:
    .. code-block:: python
        with open('cat.jpg') as f:
@@ -137,7 +137,7 @@ def load_image(file, is_color=True):
    Load an color or gray image from the file path.
    Example usage:
    .. code-block:: python
        im = load_image('cat.jpg')
@@ -161,16 +161,16 @@ def load_image(file, is_color=True):
 def resize_short(im, size):
-    """ 
+    """
    Resize an image so that the length of shorter edge is size.
    Example usage:
    .. code-block:: python
        im = load_image('cat.jpg')
        im = resize_short(im, 256)
    :param im: the input image with HWC layout.
    :type im: ndarray
    :param size: the shorter edge size of image after resizing.
@@ -193,17 +193,17 @@ def to_chw(im, order=(2, 0, 1)):
    according the order (2,0,1).
    Example usage:
    .. code-block:: python
        im = load_image('cat.jpg')
        im = resize_short(im, 256)
        im = to_chw(im)
    :param im: the input image with HWC layout.
    :type im: ndarray
    :param order: the transposed order.
-    :type order: tuple|list 
+    :type order: tuple|list
    """
    assert len(im.shape) == len(order)
    im = im.transpose(order)
@@ -215,11 +215,11 @@ def center_crop(im, size, is_color=True):
    Crop the center of image with size.
    Example usage:
    .. code-block:: python
        im = center_crop(im, 224)
    :param im: the input image with HWC layout.
    :type im: ndarray
    :param size: the cropping size.
@@ -243,11 +243,11 @@ def random_crop(im, size, is_color=True):
    Randomly crop input image with size.
    Example usage:
    .. code-block:: python
        im = random_crop(im, 224)
    :param im: the input image with HWC layout.
    :type im: ndarray
    :param size: the cropping size.
@@ -272,11 +272,11 @@ def left_right_flip(im, is_color=True):
    Return the flipped image.
    Example usage:
    .. code-block:: python
        im = left_right_flip(im)
    :param im: input image with HWC layout or HW layout for gray image
    :type im: ndarray
    :param is_color: whether input image is color or not
@@ -299,7 +299,7 @@ def simple_transform(im,
    resizing, croping and flipping.
    Example usage:
    .. code-block:: python
        im = simple_transform(im, 256, 224, True)
@@ -314,7 +314,7 @@ def simple_transform(im,
    :type is_train: bool
    :param is_color: whether the image is color or not.
    :type is_color: bool
-    :param mean: the mean values, which can be element-wise mean values or 
+    :param mean: the mean values, which can be element-wise mean values or
                 mean values per channel.
    :type mean: numpy array | list
    """
@@ -332,7 +332,7 @@ def simple_transform(im,
    im = im.astype('float32')
    if mean is not None:
        mean = np.array(mean, dtype=np.float32)
-        # mean value, may be one value per channel 
+        # mean value, may be one value per channel
        if mean.ndim == 1 and is_color:
            mean = mean[:, np.newaxis, np.newaxis]
        elif mean.ndim == 1:
@@ -357,7 +357,7 @@ def load_and_transform(filename,
    for the transform operations.
    Example usage:
    .. code-block:: python
        im = load_and_transform('cat.jpg', 256, 224, True)
@@ -372,7 +372,7 @@ def load_and_transform(filename,
    :type is_train: bool
    :param is_color: whether the image is color or not.
    :type is_color: bool
-    :param mean: the mean values, which can be element-wise mean values or 
+    :param mean: the mean values, which can be element-wise mean values or
                 mean values per channel.
    :type mean: numpy array | list
    """

--- a/python/paddle/dataset/mnist.py
+++ b/python/paddle/dataset/mnist.py
@@ -21,6 +21,8 @@ import paddle.dataset.common
 import subprocess
 import numpy
 import platform
+import six
+import tempfile
 from six.moves import range
 __all__ = ['train', 'test', 'convert']
@@ -46,23 +48,28 @@ def reader_creator(image_filename, label_filename, buffer_size):
        # According to http://stackoverflow.com/a/38061619/724872, we
        # cannot use standard package gzip here.
-        m = subprocess.Popen([zcat_cmd, image_filename], stdout=subprocess.PIPE)
+        tmp_image_file = tempfile.TemporaryFile(prefix='paddle_dataset')
-        m.stdout.read(16)  # skip some magic bytes
+        m = subprocess.Popen(
+            [zcat_cmd, image_filename], stdout=tmp_image_file).communicate()
+        tmp_image_file.seek(16)  # skip some magic bytes
-        l = subprocess.Popen([zcat_cmd, label_filename], stdout=subprocess.PIPE)
+        # Python3 will not take stdout as file
-        l.stdout.read(8)  # skip some magic bytes
+        tmp_label_file = tempfile.TemporaryFile(prefix='paddle_dataset')
+        l = subprocess.Popen(
+            [zcat_cmd, label_filename], stdout=tmp_label_file).communicate()
+        tmp_label_file.seek(8)  # skip some magic bytes
        try:  # reader could be break.
            while True:
                labels = numpy.fromfile(
-                    l.stdout, 'ubyte', count=buffer_size).astype("int")
+                    tmp_label_file, 'ubyte', count=buffer_size).astype("int")
                if labels.size != buffer_size:
                    break  # numpy.fromfile returns empty slice after EOF.
                images = numpy.fromfile(
-                    m.stdout, 'ubyte', count=buffer_size * 28 * 28).reshape(
+                    tmp_image_file, 'ubyte', count=buffer_size * 28 *
-                        (buffer_size, 28 * 28)).astype('float32')
+                    28).reshape((buffer_size, 28 * 28)).astype('float32')
                images = images / 255.0 * 2.0 - 1.0

--- a/python/paddle/dataset/uci_housing.py
+++ b/python/paddle/dataset/uci_housing.py
@@ -71,7 +71,7 @@ def load_data(filename, feature_num=14, ratio=0.8):
        return
    data = np.fromfile(filename, sep=' ')
-    data = data.reshape(data.shape[0] / feature_num, feature_num)
+    data = data.reshape(data.shape[0] // feature_num, feature_num)
    maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum(
        axis=0) / data.shape[0]
    feature_range(maximums[:-1], minimums[:-1])

--- a/python/paddle/dataset/wmt16.py
+++ b/python/paddle/dataset/wmt16.py
@@ -29,6 +29,7 @@ Multi30K: Multilingual English-German Image Descriptions.
 """
 import os
+import six
 import tarfile
 import gzip
 from collections import defaultdict
@@ -120,7 +121,7 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang):
        with tarfile.open(tar_file, mode="r") as f:
            for line in f.extractfile(file_name):
-                line_split = line.strip().split("\t")
+                line_split = line.strip().split(six.b("\t"))
                if len(line_split) != 2:
                    continue
                src_words = line_split[src_col].split()

--- a/python/paddle/fluid/backward.py
+++ b/python/paddle/fluid/backward.py
@@ -17,6 +17,7 @@ from . import core
 import collections
 import copy
 import six
+from . import compat as cpt
 from . import unique_name
 __all__ = ['append_backward']
@@ -75,10 +76,10 @@ def _infer_var_data_type_(grad_var_name, block):
    """
    Infer the data type of given grad variable
    """
-    grad_var = block.desc.find_var(grad_var_name.encode("ascii"))
+    grad_var = block.desc.find_var(cpt.to_bytes(grad_var_name))
-    fwd_name = _strip_grad_suffix_(grad_var_name.encode("ascii"))
+    fwd_name = _strip_grad_suffix_(grad_var_name)
-    if block.desc.has_var_recursive(fwd_name):
+    if block.desc.has_var_recursive(cpt.to_bytes(fwd_name)):
-        fwd_var = block.desc.find_var_recursive(fwd_name.encode("ascii"))
+        fwd_var = block.desc.find_var_recursive(cpt.to_bytes(fwd_name))
        grad_var.set_dtype(fwd_var.dtype())
    else:
        grad_var.set_dtype(core.VarDesc.VarType.FP32)
@@ -102,8 +103,10 @@ def _some_in_set_(cands, s):
    """
    if len(cands) == 0:
        return False
-    for c in cands:
+    literal_set = cpt.to_literal_str(s)
-        if c in s:
+    literal_cands = cpt.to_literal_str(cands)
+    for c in literal_cands:
+        if c in literal_set:
            return True
    return False
@@ -114,9 +117,8 @@ def _strip_grad_suffix_(name):
    e.g. x@GRAD ==> x
         y@GRAD@RENAME@1 ==> y
    """
-    if isinstance(name, six.text_type):
+    name = cpt.to_literal_str(name)
-        name = name.encode()
+    pos = name.find(core.grad_var_suffix())
-    pos = name.find(six.b(core.grad_var_suffix()))
    return name[:pos] if pos != -1 else name
@@ -125,9 +127,7 @@ def _append_grad_suffix_(name):
    Append grad suffix to the given variable name
    e.g. x ==> x@GRAD
    """
-    if isinstance(name, six.text_type):
+    return cpt.to_literal_str(name) + core.grad_var_suffix()
-        name = name.encode()
-    return name + six.b(core.grad_var_suffix())
 def _addup_repetitive_outputs_(op_descs):
@@ -364,7 +364,8 @@ def _append_backward_ops_(block,
        # Getting op's corresponding grad_op
        grad_op_desc, op_grad_to_var = core.get_grad_op_desc(
-            op.desc, no_grad_dict[block.idx], grad_sub_block_list)
+            op.desc,
+            cpt.to_literal_str(no_grad_dict[block.idx]), grad_sub_block_list)
        grad_op_descs.extend(grad_op_desc)
        grad_to_var.update(op_grad_to_var)
@@ -411,11 +412,10 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map):
        new_vars = set()
        # create new gradient variables
        for grad_var_name in op_desc.output_arg_names():
-            grad_var_name = grad_var_name.encode("ascii")
+            if block.desc.has_var_recursive(cpt.to_bytes(
-            if block.desc.has_var_recursive(
+                    grad_var_name)) or grad_var_name == core.empty_var_name():
-                    grad_var_name) or grad_var_name == core.empty_var_name():
                continue
-            block.desc.var(grad_var_name)
+            block.desc.var(cpt.to_bytes(grad_var_name))
            new_vars.add(grad_var_name)
            if grad_var_name not in grad_to_var:
                continue
@@ -597,11 +597,12 @@ def append_backward(loss, parameter_list=None, no_grad_set=None,
        parameters = parameter_list
    else:
        params = program.global_block().all_parameters()
+        program.global_block().iter_parameters()
        parameters = [param.name for param in params]
    params_and_grads = []
    for param in parameters:
-        if param not in grad_info_map:
+        if cpt.to_literal_str(param) not in grad_info_map:
            continue
        grad_info = grad_info_map[param]
        grad_block = grad_info[1]

--- a/python/paddle/fluid/compat.py
+++ b/python/paddle/fluid/compat.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import six
+#  str and bytes related functions
+def to_literal_str(obj):
+    if isinstance(obj, list):
+        return [_to_literal_str(item) for item in obj]
+    elif isinstance(obj, set):
+        return set([_to_literal_str(item) for item in obj])
+    else:
+        return _to_literal_str(obj)
+def _to_literal_str(obj):
+    if isinstance(obj, six.binary_type):
+        return obj.decode('latin-1')
+    elif isinstance(obj, six.text_type):
+        return obj
+    else:
+        return six.u(obj)
+def to_bytes(obj):
+    if isinstance(obj, list):
+        return [_to_bytes(item) for item in obj]
+    elif isinstance(obj, set):
+        return set([_to_bytes(item) for item in obj])
+    else:
+        return _to_bytes(obj)
+def _to_bytes(obj):
+    if isinstance(obj, six.text_type):
+        return obj.encode('latin-1')
+    elif isinstance(obj, six.binary_type):
+        return obj
+    else:
+        return six.b(obj)
+# math related functions
+import math
+def round(x, d=0):
+    """
+    Compatible round which act the same behaviour in Python3.
+    Args:
+        x(float) : The number to round halfway.
+    Returns:
+        round result of x
+    """
+    p = 10**d
+    return float(math.floor((x * p) + math.copysign(0.5, x))) / p
+def floor_division(x, y):
+    return x // y
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -19,6 +19,7 @@ import six
 import numpy as np
+from . import compat as cpt
 from .proto import framework_pb2
 try:
    from . import core
@@ -87,7 +88,7 @@ def convert_np_dtype_to_dtype_(np_dtype):
    elif dtype == np.uint8:
        return core.VarDesc.VarType.UINT8
    else:
-        raise ValueError("Not supported numpy dtype " + six.binary_type(dtype))
+        raise ValueError("Not supported numpy dtype %s" % dtype)
 def dtype_is_floating(dtype):
@@ -198,11 +199,11 @@ class Variable(object):
        if name is None:
            name = unique_name.generate('_generated_var')
        is_new_var = False
-        name = name if isinstance(name, six.binary_type) else name.encode()
+        name = cpt.to_literal_str(name)
-        self.desc = self.block.desc.find_var(name)
+        self.desc = self.block.desc.find_var(cpt.to_bytes(name))
        if self.desc is None:
-            self.desc = self.block.desc.var(name)
+            self.desc = self.block.desc.var(cpt.to_bytes(name))
            is_new_var = True
        if is_new_var:
@@ -325,7 +326,7 @@ class Variable(object):
    @property
    def name(self):
-        return self.desc.name()
+        return cpt.to_literal_str(self.desc.name())
    @name.setter
    def name(self, new_name):
@@ -529,10 +530,7 @@ class Operator(object):
                        elif isinstance(arg, six.binary_type):
                            in_arg_names.append(arg.decode())
                        else:
-                            if isinstance(arg.name, six.string_types):
+                            in_arg_names.append(cpt.to_literal_str(arg.name))
-                                in_arg_names.append(arg.name)
-                            elif isinstance(arg.name, six.binary_type):
-                                in_arg_names.append(arg.name.decode())
                    self.desc.set_input(in_proto.name, in_arg_names)
                else:
                    self.desc.set_input(in_proto.name, [])
@@ -561,12 +559,7 @@ class Operator(object):
                        (out_proto.name, len(out_args)))
                out_arg_names = []
                for arg in out_args:
-                    if isinstance(arg.name, six.string_types):
+                    out_arg_names.append(cpt.to_literal_str(arg.name))
-                        out_arg_names.append(arg.name)
-                    elif isinstance(arg.name, six.binary_type):
-                        out_arg_names.append(arg.name.decode())
-                    else:
-                        out_arg_names.append(six.u(arg.name))
                    arg.op = self
                self.desc.set_output(out_proto.name, out_arg_names)
@@ -994,6 +987,9 @@ class Block(object):
        Returns:
            Variable: the Variable with the giving name.
        """
+        name = cpt.to_literal_str(name)
+        new_name = cpt.to_literal_str(new_name)
        if not self.has_var(name):
            raise ValueError("var %s is not in current block" % name)
        v = self.var(name)
@@ -1012,9 +1008,9 @@ class Block(object):
        else:
            raise ValueError("unsupported var type: %s", type(v))
        orig_var_type = v.type
-        self.desc._rename_var(name, new_name)
+        self.desc._rename_var(cpt.to_bytes(name), cpt.to_bytes(new_name))
        # NOTE: v is destroyed by C++ after calling _rename_var.
-        d = self.desc.find_var(new_name)
+        d = self.desc.find_var(cpt.to_bytes(new_name))
        if var_type == "Parameter":
            var = Parameter(
                self,
@@ -1045,7 +1041,7 @@ class Block(object):
    def _remove_var(self, name):
        self._sync_with_cpp()
-        self.desc._remove_var(name)
+        self.desc._remove_var(cpt.to_bytes(name))
        del self.vars[name]
    def create_parameter(self, *args, **kwargs):
@@ -1128,7 +1124,7 @@ class Block(object):
        # sync variables removed from c++ end
        for var in list(self.vars.keys()):
-            if not self.desc.find_var(var):
+            if not self.desc.find_var(cpt.to_bytes(var)):
                self.vars.pop(var)
        # sync operators from cpp

--- a/python/paddle/fluid/graphviz.py
+++ b/python/paddle/fluid/graphviz.py
@@ -106,7 +106,8 @@ class Graph(object):
    def _rank_repr(self):
        ranks = sorted(
            list(self.rank_groups.items()),
-            cmp=lambda a, b: a[1].priority > b[1].priority)
+            key=functools.cmp_to_key(
+                lambda a, b: a[1].priority > b[1].priority))
        repr = []
        for x in ranks:
            repr.append(str(x[1]))

--- a/python/paddle/fluid/io.py
+++ b/python/paddle/fluid/io.py
@@ -600,25 +600,15 @@ def save_inference_model(dirname,
            # "./infer_model".
    """
-    if isinstance(feeded_var_names, six.binary_type):
+    if isinstance(feeded_var_names, six.string_types):
        feeded_var_names = [feeded_var_names]
-    elif isinstance(feeded_var_names, six.text_type):
-        feeded_var_names = [feeded_var_names.encode()]
    else:
        if len(feeded_var_names) > 0:
            # TODO(paddle-dev): polish these code blocks
            if not (bool(feeded_var_names) and all(
-                    isinstance(name, six.binary_type)
+                    isinstance(name, six.string_types)
                    for name in feeded_var_names)):
-                if not (all(
+                raise ValueError("'feed_var_names' should be a list of str.")
-                        isinstance(name, six.text_type)
-                        for name in feeded_var_names)):
-                    raise ValueError(
-                        "'feed_var_names' should be a list of str.")
-                else:
-                    feeded_var_names = [
-                        name.encode() for name in feeded_var_names
-                    ]
    if isinstance(target_vars, Variable):
        target_vars = [target_vars]

--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -751,7 +751,7 @@ def open_files(filenames,
    else:
        buffer_size = int(buffer_size)
-    if isinstance(filenames, basestring):
+    if isinstance(filenames, six.string_types):
        filenames = [filenames]
    dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes]
    shape_concat = []

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -360,7 +360,7 @@ def dynamic_lstm(input,
    """
    helper = LayerHelper('lstm', **locals())
-    size = size / 4
+    size = size // 4
    weight = helper.create_parameter(
        attr=helper.param_attr, shape=[size, 4 * size], dtype=dtype)
    bias_size = [1, 7 * size]
@@ -1498,7 +1498,7 @@ def conv2d(input,
        raise ValueError("use_cudnn should be True or False")
    input_shape = input.shape
-    filter_shape = [num_filters, num_filter_channels] + filter_size
+    filter_shape = [num_filters, int(num_filter_channels)] + filter_size
    def _get_default_param_initializer():
        std = (2.0 / (filter_size[0]**2 * num_channels))**0.5
@@ -2669,15 +2669,15 @@ def beam_search(pre_ids,
    Refer to `Beam search <https://en.wikipedia.org/wiki/Beam_search>`_
    for more details.
-    This layer does the search in beams for one time step. Specifically, it 
+    This layer does the search in beams for one time step. Specifically, it
    selects the top-K candidate word ids of current step from :attr:`ids`
    according to their :attr:`scores` for all source sentences, where K is
    :attr:`beam_size` and :attr:`ids, scores` are predicted results from the
    computation cell. Additionally, :attr:`pre_ids` and :attr:`pre_scores` are
    the output of beam_search at previous step, they are needed for special use
    to handle ended candidate translations.
    Note that the :attr:`scores` passed in should be accumulated scores, and
    length penalty should be done with extra operators before calculating the
    accumulated scores if needed, also suggest finding top-K before it and
@@ -3878,7 +3878,7 @@ def nce(input,
 def hsigmoid(input, label, num_classes, param_attr=None, bias_attr=None):
    """
    The hierarchical sigmoid operator is used to accelerate the training
-    process of language model. This operator organizes the classes into a 
+    process of language model. This operator organizes the classes into a
    complete binary tree, each leaf node represents a class(a word) and each
    internal node acts as a binary classifier. For each word there's a unique
    path from root to it's leaf node, hsigmoid calculate the cost for each
@@ -3888,9 +3888,9 @@ def hsigmoid(input, label, num_classes, param_attr=None, bias_attr=None):
    Refer to `Hierarchical Probabilistic Neural Network Language Model
    <http://www.iro.umontreal.ca/~lisa/pointeurs/hierarchical-nnlm-aistats05.pdf>`_
    Args:
-        input (Variable): The input tensor variable with shape 
+        input (Variable): The input tensor variable with shape
            :math:`[N \\times D]`, where :math:`N` is the size of mini-batch,
            and :math:`D` is the feature size.
        label (Variable): The tensor variable contains labels of training data.
@@ -3898,7 +3898,7 @@ def hsigmoid(input, label, num_classes, param_attr=None, bias_attr=None):
        num_classes: (int), The number of classes, must not be less than 2.
        param_attr (ParamAttr|list of ParamAttr, default None): The parameter
             attribute for learnable parameters/weights of this layer.
-        bias_attr (ParamAttr|list of ParamAttr, default None):  The parameter 
+        bias_attr (ParamAttr|list of ParamAttr, default None):  The parameter
             attribute for the bias of this layer. If it is set to False, no
             bias will be applied.
@@ -5293,23 +5293,23 @@ def rank_loss(label, left, right, name=None):
    is a pairwise ranking model with a training sample consisting of a pair
    of documents, A and B. Label P indicates whether A is ranked higher than B
    or not:
    P = {0, 1} or {0, 0.5, 1}, where 0.5 means that there is no information
    about the rank of the input pair.
    Rank loss layer takes three inputs: left (o_i), right (o_j) and
    label (P_{i,j}). The inputs respectively represent RankNet's output scores
    for documents A and B and the value of label P. The following equation
    computes rank loss C_{i,j} from the inputs:
    $$
      C_{i,j} = -\tilde{P_{ij}} * o_{i,j} + \log(1 + e^{o_{i,j}}) \\
      o_{i,j} =  o_i - o_j  \\
      \tilde{P_{i,j}} = \left \{0, 0.5, 1 \right \} \ or \ \left \{0, 1 \right \}
    $$
-    Rank loss layer takes batch inputs with size batch_size (batch_size >= 1).   
+    Rank loss layer takes batch inputs with size batch_size (batch_size >= 1).
    Args:
        label (Variable): Indicats whether A ranked higher than B or not.
        left (Variable): RankNet's output score for doc A.

--- a/python/paddle/fluid/parallel_executor.py
+++ b/python/paddle/fluid/parallel_executor.py
@@ -17,6 +17,7 @@ import multiprocessing
 from . import core
 from . import framework
 from . import executor
+from . import compat as cpt
 import warnings
 import sys
 import six
@@ -154,11 +155,14 @@ class ParallelExecutor(object):
        self.executor = core.ParallelExecutor(
            self._places,
            set([
-                p.name for p in main.global_block().iter_parameters()
+                cpt.to_literal_str(p.name)
+                for p in main.global_block().iter_parameters()
                if not p.stop_gradient
            ]),
-            set(self.persistable_vars), main.desc, loss_name
+            set(cpt.to_literal_str(var)
-            if loss_name else '', scope, local_scopes, exec_strategy,
+                for var in self.persistable_vars), main.desc,
+            cpt.to_literal_str(loss_name)
+            if loss_name else six.u(''), scope, local_scopes, exec_strategy,
            build_strategy, num_trainers, trainer_id)
        self.scope = scope
@@ -270,7 +274,8 @@ class ParallelExecutor(object):
            self.executor.feed_tensors_into_local_scopes(res)
        fetch_var_name = '@FETCHED_VAR_NAME@'
-        self.executor.run(fetch_list, fetch_var_name)
+        self.executor.run(
+            cpt.to_literal_str(fetch_list), cpt.to_literal_str(fetch_var_name))
        arr = self.scope.find_var(fetch_var_name).get_lod_tensor_array()
        if self.is_dist:

--- a/python/paddle/fluid/tests/book/high-level-api/image_classification/cifar10_small_test_set.py
+++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/cifar10_small_test_set.py
@@ -30,7 +30,7 @@ images per class.
 import itertools
 import numpy
-import paddle.v2.dataset.common
+import paddle.dataset.common
 import tarfile
 from six.moves import cPickle as pickle
 from six.moves import zip
@@ -78,6 +78,6 @@ def train10(batch_size=None):
    :rtype: callable
    """
    return reader_creator(
-        paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
+        paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
        'data_batch',
        batch_size=batch_size)
--- a/python/paddle/fluid/tests/book/test_image_classification.py
+++ b/python/paddle/fluid/tests/book/test_image_classification.py
@@ -60,7 +60,7 @@ def resnet_cifar10(input, depth=32):
        return tmp
    assert (depth - 2) % 6 == 0
-    n = (depth - 2) / 6
+    n = (depth - 2) // 6
    conv1 = conv_bn_layer(
        input=input, ch_out=16, filter_size=3, stride=1, padding=1)
    res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)

--- a/python/paddle/fluid/tests/unittests/op_test.py
+++ b/python/paddle/fluid/tests/unittests/op_test.py
@@ -15,6 +15,7 @@
 import unittest
 import numpy as np
 import random
+import six
 import time
 import itertools
 import collections
@@ -26,15 +27,13 @@ from paddle.fluid.op import Operator
 from paddle.fluid.executor import Executor
 from paddle.fluid.framework import Program, OpProtoHolder, Variable
 from testsuite import create_op, set_input, append_input_output, append_loss_ops
-from functools import reduce
-from six.moves import zip
 def randomize_probability(batch_size, class_num, dtype='float32'):
    prob = np.random.uniform(
        0.1, 1.0, size=(batch_size, class_num)).astype(dtype)
    prob_sum = prob.sum(axis=1)
-    for i in range(len(prob)):
+    for i in six.moves.xrange(len(prob)):
        prob[i] /= prob_sum[i]
    return prob
@@ -51,7 +50,7 @@ def get_numeric_gradient(place,
    set_input(scope, op, inputs, place)
    def product(dim):
-        return reduce(lambda a, b: a * b, dim, 1)
+        return six.moves.reduce(lambda a, b: a * b, dim, 1)
    def get_output():
        sum = []
@@ -103,7 +102,7 @@ def get_numeric_gradient(place,
    # we only compute gradient of one element each time.
    # we use a for loop to compute the gradient of every element.
-    for i in range(tensor_size):
+    for i in six.moves.xrange(tensor_size):
        if in_place:
            set_input(scope, op, inputs, place)
@@ -161,7 +160,7 @@ class OpTest(unittest.TestCase):
            assert isinstance(
                numpy_dict,
                dict), "self.inputs, self.outputs must be numpy_dict"
-            for var_name, var_value in numpy_dict.items():
+            for var_name, var_value in six.iteritems(numpy_dict):
                if isinstance(var_value, (np.ndarray, np.generic)):
                    self.try_call_once(var_value.dtype)
                elif isinstance(var_value, (list, tuple)):
@@ -225,7 +224,7 @@ class OpTest(unittest.TestCase):
    def _get_io_vars(self, block, numpy_inputs):
        inputs = {}
-        for name, value in numpy_inputs.items():
+        for name, value in six.iteritems(numpy_inputs):
            if isinstance(value, list):
                var_list = [
                    block.var(sub_name) for sub_name, sub_value in value
@@ -268,7 +267,7 @@ class OpTest(unittest.TestCase):
        # if the fetch_list is customized by user, we use it directly.
        # if not, fill the fetch_list by the user configured outputs in test.
        if len(fetch_list) == 0:
-            for var_name, var in outputs.items():
+            for var_name, var in six.iteritems(outputs):
                if isinstance(var, list):
                    for v in var:
                        fetch_list.append(v)
@@ -371,7 +370,7 @@ class OpTest(unittest.TestCase):
    def __assert_is_close(self, numeric_grads, analytic_grads, names,
                          max_relative_error, msg_prefix):
-        for a, b, name in zip(numeric_grads, analytic_grads, names):
+        for a, b, name in six.moves.zip(numeric_grads, analytic_grads, names):
            abs_a = np.abs(a)
            abs_a[abs_a < 1e-3] = 1

--- a/python/paddle/fluid/tests/unittests/test_data_balance.py
+++ b/python/paddle/fluid/tests/unittests/test_data_balance.py
@@ -14,7 +14,7 @@
 import unittest
 import paddle.fluid as fluid
-import paddle.v2 as paddle
+import paddle as paddle
 import numpy as np

--- a/python/paddle/fluid/tests/unittests/test_dist_base.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_base.py
@@ -16,9 +16,12 @@ import time
 import unittest
 import os
 import sys
+import six
 import signal
 import subprocess
+import paddle.fluid.compat as cpt
 class TestDistBase(unittest.TestCase):
    def setUp(self):
@@ -78,7 +81,7 @@ class TestDistBase(unittest.TestCase):
            env=env_local)
        local_proc.wait()
        out, err = local_proc.communicate()
-        local_ret = out
+        local_ret = cpt.to_literal_str(out)
        sys.stderr.write('local_loss: %s\n' % local_ret)
        sys.stderr.write('local_stderr: %s\n' % err)
@@ -116,7 +119,7 @@ class TestDistBase(unittest.TestCase):
        tr1_proc.wait()
        out, err = tr0_proc.communicate()
        sys.stderr.write('dist_stderr: %s\n' % err)
-        loss_data0 = out
+        loss_data0 = cpt.to_literal_str(out)
        sys.stderr.write('dist_loss: %s\n' % loss_data0)
        lines = loss_data0.split("\n")
        dist_first_loss = eval(lines[0].replace(" ", ","))[0]

--- a/python/paddle/fluid/tests/unittests/test_pool2d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pool2d_op.py
@@ -29,11 +29,11 @@ def max_pool2D_forward_naive(x,
    if global_pool == 1:
        ksize = [H, W]
    H_out = (H - ksize[0] + 2 * paddings[0] + strides[0] - 1
-             ) / strides[0] + 1 if ceil_mode else (H - ksize[0] + 2 *
+             ) // strides[0] + 1 if ceil_mode else (
-                                                   paddings[0]) / strides[0] + 1
+                 H - ksize[0] + 2 * paddings[0]) // strides[0] + 1
    W_out = (W - ksize[1] + 2 * paddings[1] + strides[1] - 1
-             ) / strides[1] + 1 if ceil_mode else (W - ksize[1] + 2 *
+             ) // strides[1] + 1 if ceil_mode else (
-                                                   paddings[1]) / strides[1] + 1
+                 W - ksize[1] + 2 * paddings[1]) // strides[1] + 1
    out = np.zeros((N, C, H_out, W_out))
    for i in range(H_out):
        for j in range(W_out):
@@ -57,11 +57,11 @@ def avg_pool2D_forward_naive(x,
    if global_pool == 1:
        ksize = [H, W]
    H_out = (H - ksize[0] + 2 * paddings[0] + strides[0] - 1
-             ) / strides[0] + 1 if ceil_mode else (H - ksize[0] + 2 *
+             ) // strides[0] + 1 if ceil_mode else (
-                                                   paddings[0]) / strides[0] + 1
+                 H - ksize[0] + 2 * paddings[0]) // strides[0] + 1
    W_out = (W - ksize[1] + 2 * paddings[1] + strides[1] - 1
-             ) / strides[1] + 1 if ceil_mode else (W - ksize[1] + 2 *
+             ) // strides[1] + 1 if ceil_mode else (
-                                                   paddings[1]) / strides[1] + 1
+                 W - ksize[1] + 2 * paddings[1]) // strides[1] + 1
    out = np.zeros((N, C, H_out, W_out))
    for i in range(H_out):
        for j in range(W_out):

--- a/python/paddle/fluid/tests/unittests/test_reader_reset.py
+++ b/python/paddle/fluid/tests/unittests/test_reader_reset.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 import paddle.fluid as fluid
-import paddle.v2 as paddle
+import paddle as paddle
 import numpy as np
 import unittest

--- a/python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py
@@ -17,6 +17,7 @@ import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid.layers.control_flow import lod_rank_table
 import numpy
+import functools
 class TestReorderLoDTensor(unittest.TestCase):
@@ -101,7 +102,8 @@ class TestReorderLoDTensor(unittest.TestCase):
        rank_table = []  # list of (index, length)
        for i in range(len(ref_lod)):
            rank_table.append((i, ref_lod[i]))
-        rank_table = sorted(rank_table, lambda x, y: y[1] - x[1])
+        rank_table = sorted(
+            rank_table, key=functools.cmp_to_key(lambda x, y: y[1] - x[1]))
        # compute the input sequence info according to input_lod
        input_value, input_lod = self.data[self.data_desc[0][0]]

--- a/python/paddle/fluid/tests/unittests/test_roi_pool_op.py
+++ b/python/paddle/fluid/tests/unittests/test_roi_pool_op.py
@@ -16,6 +16,7 @@ import unittest
 import numpy as np
 import math
 import sys
+import paddle.fluid.compat as cpt
 from op_test import OpTest
@@ -59,10 +60,10 @@ class TestROIPoolOp(OpTest):
        for i in range(self.rois_num):
            roi = self.rois[i]
            roi_batch_id = roi[0]
-            roi_start_w = int(round(roi[1] * self.spatial_scale))
+            roi_start_w = int(cpt.round(roi[1] * self.spatial_scale))
-            roi_start_h = int(round(roi[2] * self.spatial_scale))
+            roi_start_h = int(cpt.round(roi[2] * self.spatial_scale))
-            roi_end_w = int(round(roi[3] * self.spatial_scale))
+            roi_end_w = int(cpt.round(roi[3] * self.spatial_scale))
-            roi_end_h = int(round(roi[4] * self.spatial_scale))
+            roi_end_h = int(cpt.round(roi[4] * self.spatial_scale))
            roi_height = int(max(roi_end_h - roi_start_h + 1, 1))
            roi_width = int(max(roi_end_w - roi_start_w + 1, 1))
@@ -97,8 +98,8 @@ class TestROIPoolOp(OpTest):
                            for w in range(wstart, wend):
                                if x_i[c, h, w] > out_data[i, c, ph, pw]:
                                    out_data[i, c, ph, pw] = x_i[c, h, w]
-                                    argmax_data[i, c, ph, pw] = h * \
+                                    argmax_data[i, c, ph,
-                                        self.width + w
+                                                pw] = h * self.width + w
        self.outs = out_data.astype('float32')
        self.argmaxes = argmax_data.astype('int64')
@@ -110,14 +111,14 @@ class TestROIPoolOp(OpTest):
            self.rois_lod[0].append(bno + 1)
            for i in range(bno + 1):
                x1 = np.random.random_integers(
-                    0, self.width / self.spatial_scale - self.pooled_width)
+                    0, self.width // self.spatial_scale - self.pooled_width)
                y1 = np.random.random_integers(
-                    0, self.height / self.spatial_scale - self.pooled_height)
+                    0, self.height // self.spatial_scale - self.pooled_height)
                x2 = np.random.random_integers(x1 + self.pooled_width,
-                                               self.width / self.spatial_scale)
+                                               self.width // self.spatial_scale)
-                y2 = np.random.random_integers(y1 + self.pooled_height,
+                y2 = np.random.random_integers(
-                                               self.height / self.spatial_scale)
+                    y1 + self.pooled_height, self.height // self.spatial_scale)
                roi = [bno, x1, y1, x2, y2]
                rois.append(roi)

--- a/python/paddle/fluid/tests/unittests/test_unpool_op.py
+++ b/python/paddle/fluid/tests/unittests/test_unpool_op.py
@@ -27,7 +27,7 @@ def unpool2dmax_forward_naive(input, indices, ksize, strides, paddings):
            for h in range(s2):
                for w in range(s3):
                    index = indices[nidx, cidx, h, w]
-                    hidx = (index - index % out_wsize) / out_wsize
+                    hidx = (index - index % out_wsize) // out_wsize
                    widx = index % out_wsize
                    out[nidx, cidx, int(hidx), int(widx)] = \
                            input[nidx, cidx, h, w]
@@ -41,9 +41,9 @@ class TestUnpoolOp(OpTest):
        self.init_test_case()
        pre_input = np.random.random(self.shape).astype("float32")
        nsize, csize, hsize, wsize = pre_input.shape
-        hsize_out = (hsize - self.ksize[0] + 2 * self.paddings[0]) / \
+        hsize_out = (hsize - self.ksize[0] + 2 * self.paddings[0]) // \
                self.strides[0] + 1
-        wsize_out = (wsize - self.ksize[1] + 2 * self.paddings[1]) / \
+        wsize_out = (wsize - self.ksize[1] + 2 * self.paddings[1]) // \
                self.strides[1] + 1
        input = np.zeros((nsize, csize, hsize_out, wsize_out))
        indices = np.zeros((nsize, csize, hsize_out, wsize_out))
@@ -62,7 +62,7 @@ class TestUnpoolOp(OpTest):
                        input[nidx, cidx, i, j] = x_masked.max()
                        arg = x_masked.argmax()
                        indices[nidx, cidx, i, j] = \
-                                (r_start + arg / self.ksize[1]) * wsize + \
+                                (r_start + arg // self.ksize[1]) * wsize + \
                                c_start + arg % self.ksize[1]
        output = self.unpool2d_forward_naive(input, indices, self.ksize, \
                self.strides, self.paddings).astype("float32")

--- a/python/paddle/fluid/tests/unittests/test_warpctc_op.py
+++ b/python/paddle/fluid/tests/unittests/test_warpctc_op.py
@@ -132,7 +132,7 @@ class CTCForward(object):
            for k in range(end - start):
                j = k + start
                if j & 1 == 1:
-                    label_idx = j / 2
+                    label_idx = j // 2
                    label_val = labels_a_sequence[label_idx, 0]
                    fv = self.log_add(forward_vars[i - 1, j],
                                      forward_vars[i - 1, j - 1])

--- a/python/paddle/fluid/transpiler/details/program_utils.py
+++ b/python/paddle/fluid/transpiler/details/program_utils.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import six
 def delete_ops(block, ops):
    try:
        start = list(block.ops).index(ops[0])
        end = list(block.ops).index(ops[-1])
-        [block._remove_op(start) for _ in range(end - start + 1)]
+        [block._remove_op(start) for _ in six.moves.range(end - start + 1)]
    except Exception as e:
        raise e
    block.program._sync_with_cpp()

--- a/python/paddle/fluid/transpiler/distribute_transpiler.py
+++ b/python/paddle/fluid/transpiler/distribute_transpiler.py
@@ -1017,7 +1017,7 @@ class DistributeTranspiler(object):
            for i, block in enumerate(splited):
                size = block[1]
-                rows = size / orig_dim1_flatten
+                rows = size // orig_dim1_flatten
                splited_shape = [rows]
                if len(orig_shape) >= 2:
                    splited_shape.extend(orig_shape[1:])

--- a/tools/test_runner.py
+++ b/tools/test_runner.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from __future__ import print_function
 import unittest
 import os
 import sys