fix the compatibility issue between PY2 and PY3 (#21)

1. be compatible with PY3 2. reformat code.

fix the compatibility issue between PY2 and PY3 (#21)
1. be compatible with PY3 2. reformat code.
c56ceffc · lilong12 · GitHub · a36148cf · c56ceffc · c56ceffc
15 changed file
--- a/.gitignore
+++ b/.gitignore
 *.pyc
+.idea
 *.DS_Store
--- a/plsc/__init__.py
+++ b/plsc/__init__.py
@@ -13,5 +13,6 @@
 # limitations under the License.

 from .entry import Entry
+from .version import plsc_version as __version__

 __all__ = ['Entry']
--- a/plsc/config.py
+++ b/plsc/config.py
@@ -35,9 +35,9 @@ config.warmup_epochs = 0

 config.loss_type = "dist_arcface"
 config.num_classes = 85742
-config.image_shape = (3,112,112)
+config.image_shape = (3, 112, 112)
 config.margin = 0.5
 config.scale = 64.0
 config.lr = 0.1
-config.lr_steps = (100000,160000,220000)
+config.lr_steps = (100000, 160000, 220000)
 config.emb_dim = 512
--- a/plsc/entry.py
+++ b/plsc/entry.py
--- a/plsc/models/__init__.py
+++ b/plsc/models/__init__.py
@@ -12,11 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from . import resnet
-from .resnet import *
 from . import base_model
+from . import dist_algo
+from . import resnet
 from .base_model import *
+from .dist_algo import *
+from .resnet import *

 __all__ = []
 __all__ += resnet.__all__
 __all__ += base_model.__all__
+__all__ += dist_algo.__all__
--- a/plsc/models/base_model.py
+++ b/plsc/models/base_model.py
@@ -13,14 +13,11 @@
 # limitations under the License.

 import math
-import os
-import numpy as np

-import paddle
 import paddle.fluid as fluid
 from paddle.fluid import unique_name
-from . import dist_algo

+from . import dist_algo

 __all__ = ["BaseModel"]

@@ -32,21 +29,24 @@ class BaseModel(object):
    which constructs the custom model. And we will add the
    distributed fc layer for you automatically.
    """
+
    def __init__(self):
        super(BaseModel, self).__init__()

    def build_network(self, input, label, is_train=True):
        """
-        Construct the custom model, and we will add the
-        distributed fc layer for you automatically.
+        Construct the custom model, and we will add the distributed fc layer
+        at the end of your model automatically.
        """
        raise NotImplementedError(
-            "You must implement this method in your sub class.")
+            "You must implement this method in your subclass.")

    def get_output(self,
                   input,
                   label,
                   num_classes,
+                   num_ranks=1,
+                   rank_id=0,
                   is_train=True,
                   param_attr=None,
                   bias_attr=None,
@@ -55,6 +55,20 @@ class BaseModel(object):
                   scale=64.0):
        """
        Add the distributed fc layer for the custom model.
+
+        Params:
+            input: input for the model
+            label: label for the input
+            num_classes: number of classes for the classifier
+            num_ranks: number of trainers, i.e., GPUs
+            rank_id: id for the current trainer, from 0 to num_ranks - 1
+            is_train: build the network for training or not
+            param_attr: param_attr for the weight parameter of fc
+            bias_attr: bias_attr for the weight parameter for fc
+            loss_type: loss type to use, one of dist_softmax, softmax, arcface
+                and dist_arcface
+            margin: the margin parameter for arcface and dist_arcface
+            scale: the scale parameter for arcface and dist_arcface
        """
        supported_loss_types = ["dist_softmax", "dist_arcface",
                                "softmax", "arcface"]
@@ -62,67 +76,75 @@ class BaseModel(object):
            "Supported loss types: {}, but given: {}".format(
                supported_loss_types, loss_type)

-        nranks = int(os.getenv("PADDLE_TRAINERS_NUM", 1))
-        rank_id = int(os.getenv("PADDLE_TRAINER_ID", 0))
-
        emb = self.build_network(input, label, is_train)
+        prob = None
+        loss = None
        if loss_type == "softmax":
-            loss, prob = self.fc_classify(emb,
-                                          label,
-                                          num_classes,
-                                          param_attr,
-                                          bias_attr)
+            loss, prob = BaseModel._fc_classify(emb,
+                                                label,
+                                                num_classes,
+                                                param_attr,
+                                                bias_attr)
        elif loss_type == "arcface":
-            loss, prob = self.arcface(emb,
-                                         label,
-                                         num_classes,
-                                         param_attr,
-                                         margin,
-                                         scale)
+            loss, prob = BaseModel._arcface(emb,
+                                            label,
+                                            num_classes,
+                                            param_attr,
+                                            margin,
+                                            scale)
        elif loss_type == "dist_arcface":
-            loss = dist_algo._distributed_arcface_classify(
-                x=emb, label=label, class_num=num_classes,
-                nranks=nranks, rank_id=rank_id, margin=margin,
-                logit_scale=scale, param_attr=param_attr)
-            prob = None
+            loss = dist_algo.distributed_arcface_classify(x=emb,
+                                                          label=label,
+                                                          class_num=num_classes,
+                                                          nranks=num_ranks,
+                                                          rank_id=rank_id,
+                                                          margin=margin,
+                                                          logit_scale=scale,
+                                                          param_attr=param_attr)
        elif loss_type == "dist_softmax":
-            loss = dist_algo._distributed_softmax_classify(
-                x=emb, label=label, class_num=num_classes,
-                nranks=nranks, rank_id=rank_id, param_attr=param_attr,
-                use_bias=True, bias_attr=bias_attr)
-            prob = None
+            loss = dist_algo.distributed_softmax_classify(x=emb,
+                                                          label=label,
+                                                          class_num=num_classes,
+                                                          nranks=num_ranks,
+                                                          rank_id=rank_id,
+                                                          param_attr=param_attr,
+                                                          use_bias=True,
+                                                          bias_attr=bias_attr)

        return emb, loss, prob

-    def fc_classify(self, input, label, out_dim, param_attr, bias_attr):
+    @staticmethod
+    def _fc_classify(input, label, out_dim, param_attr, bias_attr):
        if param_attr is None:
-            stdv = 1.0 / math.sqrt(input.shape[1] * 1.0)
-            param_attr=fluid.param_attr.ParamAttr(
-                initializer=fluid.initializer.Uniform(-stdv, stdv))
-            
+            stddev = 1.0 / math.sqrt(input.shape[1] * 1.0)
+            param_attr = fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.Uniform(-stddev, stddev))
+
        out = fluid.layers.fc(input=input,
                              size=out_dim,
                              param_attr=param_attr,
                              bias_attr=bias_attr)
-        loss, prob = fluid.layers.softmax_with_cross_entropy(logits=out, 
-            label=label, return_softmax=True)
+        loss, prob = fluid.layers.softmax_with_cross_entropy(
+            logits=out,
+            label=label,
+            return_softmax=True)
        avg_loss = fluid.layers.mean(x=loss)
        return avg_loss, prob

-    def arcface(self, input, label, out_dim, param_attr, margin, scale):
+    @staticmethod
+    def _arcface(input, label, out_dim, param_attr, margin, scale):
        input_norm = fluid.layers.sqrt(
            fluid.layers.reduce_sum(fluid.layers.square(input), dim=1))
        input = fluid.layers.elementwise_div(input, input_norm, axis=0)

        if param_attr is None:
-            param_attr=fluid.param_attr.ParamAttr(
-                initializer=fluid.initializer.Xavier(
-                    uniform=False, fan_in=0.0))
+            param_attr = fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.Xavier(uniform=False, fan_in=0.0))
        weight = fluid.layers.create_parameter(
-                    shape=[input.shape[1], out_dim],
-                    dtype='float32',
-                    name=unique_name.generate('final_fc_w'),
-                    attr=param_attr)
+            shape=[input.shape[1], out_dim],
+            dtype='float32',
+            name=unique_name.generate('final_fc_w'),
+            attr=param_attr)

        weight_norm = fluid.layers.sqrt(
            fluid.layers.reduce_sum(fluid.layers.square(weight), dim=0))
@@ -137,10 +159,11 @@ class BaseModel(object):
        logit = fluid.layers.scale(target_cos, scale=scale)

        loss, prob = fluid.layers.softmax_with_cross_entropy(
-            logits=logit, label=label, return_softmax=True)
+            logits=logit,
+            label=label,
+            return_softmax=True)
        avg_loss = fluid.layers.mean(x=loss)

        one_hot.stop_gradient = True

        return avg_loss, prob
-
--- a/plsc/models/dist_algo.py
+++ b/plsc/models/dist_algo.py
@@ -12,32 +12,38 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+from __future__ import division
 from __future__ import print_function
+
 import math

 import logging
-from six.moves import reduce
 import paddle.fluid as fluid
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.framework import Variable, default_startup_program
-from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.initializer import Normal, Constant
+import paddle.fluid.layers.collective as collective
 import paddle.fluid.layers.nn as nn
 import paddle.fluid.layers.ops as ops
 import paddle.fluid.layers as layers
-import paddle.fluid.layers.collective as collective
 from paddle.fluid.optimizer import Optimizer
 import paddle.fluid.unique_name as unique_name
+from paddle.fluid.framework import Variable, default_startup_program
+from paddle.fluid.initializer import Normal
+from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.optimizer import Optimizer
+from paddle.fluid.param_attr import ParamAttr
 from ..utils.fp16_utils import rewrite_program, update_role_var_grad
 from ..utils.fp16_utils import update_loss_scaling, move_optimize_ops_back
 from ..utils.fp16_lists import AutoMixedPrecisionLists
+from six.moves import reduce
+
+__all__ = ['distributed_arcface_classify', 'distributed_softmax_classify',
+           'DistributedClassificationOptimizer']


 class DistributedClassificationOptimizer(Optimizer):
-    '''
-    A optimizer wrapper to generate backward network for distributed
+    """
+    An optimizer wrapper to generate backward network for distributed
    classification training of model parallelism.
-    '''
+    """
    def init_fp16_params(self, loss_type, fp16_user_dict):
        # set default value for fp16_params_dict
        fp16_params_dict = dict()
@@ -261,15 +267,15 @@ class DistributedClassificationOptimizer(Optimizer):
            })

    def insert_commom_backward_op(self,
-                                block,
-                                index,
-                                shard_logit,
-                                shard_prob,
-                                shard_label,
-                                shard_dim,
-                                op_role_key,
-                                backward_role,
-                                loss_backward_role):
+                                  block,
+                                  index,
+                                  shard_logit,
+                                  shard_prob,
+                                  shard_label,
+                                  shard_dim,
+                                  op_role_key,
+                                  backward_role,
+                                  loss_backward_role):
        '''
        insert backward ops when not using mixed precision training.
        common use in all lose type.
@@ -421,10 +427,10 @@ class DistributedClassificationOptimizer(Optimizer):


 class DistributedClassifier(object):
-    '''
+    """
    Tookit for distributed classification, in which the parameter of the last
    full-connected layer is distributed to all trainers
-    '''
+    """

    def __init__(self, nclasses, nranks, rank_id, layer_helper):
        self.nclasses = nclasses
@@ -446,29 +452,29 @@ class DistributedClassifier(object):
                         dtype,
                         in_dim,
                         param_attr=None,
+                         use_bias=True,
                         bias_attr=None,
-                         transpose_weight=False,
-                         use_bias=True):
+                         transpose_weight=False):
        if param_attr is None:
-            stdv = math.sqrt(2.0 / (in_dim + self.nclasses))
-            param_attr = ParamAttr(initializer=Normal(scale=stdv))
+            stddev = math.sqrt(2.0 / (in_dim + self.nclasses))
+            param_attr = ParamAttr(initializer=Normal(scale=stddev))
        weight_shape = [self.shard_dim, in_dim
                        ] if transpose_weight else [in_dim, self.shard_dim]
        weight = self._layer_helper.create_parameter(
            shape=weight_shape, dtype=dtype, attr=param_attr, is_bias=False)
-        # avoid distributed parameter allreduce gradients
+
+        # avoid allreducing gradients for distributed parameters
        weight.is_distributed = True
-        # avoid distributed parameter broadcasting in startup program
+        # avoid broadcasting distributed parameters in startup program
        default_startup_program().global_block().vars[
            weight.name].is_distributed = True

        bias = None
        if use_bias:
-            bias = self._layer_helper.create_parameter(
-                shape=[self.shard_dim],
-                attr=bias_attr,
-                dtype=dtype,
-                is_bias=True)
+            bias = self._layer_helper.create_parameter(shape=[self.shard_dim],
+                                                       attr=bias_attr,
+                                                       dtype=dtype,
+                                                       is_bias=True)
            bias.is_distributed = True
            default_startup_program().global_block().vars[
                bias.name].is_distributed = True
@@ -505,12 +511,11 @@ class DistributedClassifier(object):
                         use_bias=True,
                         bias_attr=None):
        flatten_dim = reduce(lambda a, b: a * b, x.shape[1:], 1)
-        weight, bias = self.create_parameter(
-            dtype=x.dtype,
-            in_dim=flatten_dim,
-            param_attr=param_attr,
-            bias_attr=bias_attr,
-            use_bias=use_bias)
+        weight, bias = self.create_parameter(dtype=x.dtype,
+                                             in_dim=flatten_dim,
+                                             param_attr=param_attr,
+                                             bias_attr=bias_attr,
+                                             use_bias=use_bias)

        x_all = collective._c_allgather(
            x, nranks=self.nranks, use_calc_stream=True)
@@ -551,11 +556,10 @@ class DistributedClassifier(object):
        reference: ArcFace. https://arxiv.org/abs/1801.07698
        '''
        flatten_dim = reduce(lambda a, b: a * b, x.shape[1:], 1)
-        weight, bias = self.create_parameter(
-            dtype=x.dtype,
-            in_dim=flatten_dim,
-            param_attr=param_attr,
-            use_bias=False)
+        weight, bias = self.create_parameter(dtype=x.dtype,
+                                             in_dim=flatten_dim,
+                                             param_attr=param_attr,
+                                             use_bias=False)

        # normalize x
        x_l2 = ops.sqrt(nn.reduce_sum(ops.square(x), dim=1))
@@ -566,12 +570,11 @@ class DistributedClassifier(object):
        label_all = collective._c_allgather(
            label, nranks=self.nranks, use_calc_stream=True)
        label_all.stop_gradient = True
-        shard_label = nn.shard_index(
-            label_all,
-            index_num=self.nclasses,
-            nshards=self.nranks,
-            shard_id=self.rank_id,
-            ignore_value=-1)
+        shard_label = nn.shard_index(label_all,
+                                     index_num=self.nclasses,
+                                     nshards=self.nranks,
+                                     shard_id=self.rank_id,
+                                     ignore_value=-1)
        # TODO check necessary
        shard_label.stop_gradient = True

@@ -605,16 +608,16 @@ class DistributedClassifier(object):
        return avg_loss


-def _distributed_softmax_classify(x,
-                                  label,
-                                  class_num,
-                                  nranks,
-                                  rank_id,
-                                  param_attr=None,
-                                  use_bias=True,
-                                  bias_attr=None,
-                                  name=None):
-    '''
+def distributed_softmax_classify(x,
+                                 label,
+                                 class_num,
+                                 nranks,
+                                 rank_id,
+                                 param_attr=None,
+                                 use_bias=True,
+                                 bias_attr=None,
+                                 name=None):
+    """
    Classification layer with FC, softmax and cross entropy calculation of
    distibuted version in case of too large number of classes.

@@ -652,26 +655,29 @@ def _distributed_softmax_classify(x,
                                                            class_num=1000,
                                                            nranks=8,
                                                            rank_id=0)
-    '''
+    """

    if name is None:
        name = 'dist@softmax@rank@%05d' % rank_id
    helper = LayerHelper(name, **locals())
    classifier = DistributedClassifier(class_num, nranks, rank_id, helper)
-    return classifier.softmax_classify(x, label, param_attr, use_bias,
+    return classifier.softmax_classify(x,
+                                       label,
+                                       param_attr,
+                                       use_bias,
                                       bias_attr)


-def _distributed_arcface_classify(x,
-                                  label,
-                                  class_num,
-                                  nranks,
-                                  rank_id,
-                                  margin=0.5,
-                                  logit_scale=64.0,
-                                  param_attr=None,
-                                  name=None):
-    '''
+def distributed_arcface_classify(x,
+                                 label,
+                                 class_num,
+                                 nranks,
+                                 rank_id,
+                                 margin=0.5,
+                                 logit_scale=64.0,
+                                 param_attr=None,
+                                 name=None):
+    """
    Classification layer with ArcFace loss of distibuted version in case of
    too large number of classes. the equation is

@@ -719,14 +725,13 @@ def _distributed_arcface_classify(x,
                                                                 class_num=1000,
                                                                 nranks=8,
                                                                 rank_id=0)
-    '''
+    """
    if name is None:
        name = 'dist@arcface@rank@%05d' % rank_id
    helper = LayerHelper(name, **locals())
    classifier = DistributedClassifier(class_num, nranks, rank_id, helper)
-    return classifier.arcface_classify(
-        x=x,
-        label=label,
-        margin=margin,
-        logit_scale=logit_scale,
-        param_attr=param_attr)
+    return classifier.arcface_classify(x=x,
+                                       label=label,
+                                       margin=margin,
+                                       logit_scale=logit_scale,
+                                       param_attr=param_attr)
--- a/plsc/models/resnet.py
+++ b/plsc/models/resnet.py
@@ -12,14 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import paddle
 import paddle.fluid as fluid
-import math
-import os
-import numpy as np
-from paddle.fluid import unique_name
-from .base_model import BaseModel

+from .base_model import BaseModel

 __all__ = ["ResNet", "ResNet50", "ResNet101", "ResNet152"]

@@ -33,12 +28,13 @@ class ResNet(BaseModel):
    def build_network(self,
                      input,
                      label,
-                      is_train):
+                      is_train=True):
        layers = self.layers
        supported_layers = [50, 101, 152]
        assert layers in supported_layers, \
            "supported layers {}, but given {}".format(supported_layers, layers)

+        depth = None
        if layers == 50:
            depth = [3, 4, 14, 3]
        elif layers == 101:
@@ -59,21 +55,26 @@ class ResNet(BaseModel):
                    stride=2 if i == 0 else 1,
                    is_train=is_train)

-        bn = fluid.layers.batch_norm(input=conv, act=None, epsilon=2e-05,
-            is_test=False if is_train else True)
-        drop = fluid.layers.dropout(x=bn, dropout_prob=0.4,
-            dropout_implementation='upscale_in_train',
-            is_test=False if is_train else True)
+        bn = fluid.layers.batch_norm(input=conv,
+                                     act=None,
+                                     epsilon=2e-05,
+                                     is_test=False if is_train else True)
+        drop = fluid.layers.dropout(x=bn,
+                                    dropout_prob=0.4,
+                                    dropout_implementation='upscale_in_train',
+                                    is_test=False if is_train else True)
        fc = fluid.layers.fc(
            input=drop,
            size=self.emb_dim,
-            act=None,
            param_attr=fluid.param_attr.ParamAttr(
-                initializer=fluid.initializer.Xavier(uniform=False, fan_in=0.0)),
+                initializer=fluid.initializer.Xavier(uniform=False,
+                                                     fan_in=0.0)),
            bias_attr=fluid.param_attr.ParamAttr(
                initializer=fluid.initializer.ConstantInitializer()))
-        emb = fluid.layers.batch_norm(input=fc, act=None, epsilon=2e-05,
-            is_test=False if is_train else True)
+        emb = fluid.layers.batch_norm(input=fc,
+                                      act=None,
+                                      epsilon=2e-05,
+                                      is_test=False if is_train else True)
        return emb

    def conv_bn_layer(self,
@@ -92,51 +93,79 @@ class ResNet(BaseModel):
            stride=stride,
            padding=pad,
            groups=groups,
-            act=None,
            param_attr=fluid.param_attr.ParamAttr(
                initializer=fluid.initializer.Xavier(
                    uniform=False, fan_in=0.0)),
            bias_attr=False)
        if act == 'prelu':
-            bn = fluid.layers.batch_norm(input=conv, act=None, epsilon=2e-05,
-                momentum=0.9, is_test=False if is_train else True)
-            return fluid.layers.prelu(bn, mode="all",
+            bn = fluid.layers.batch_norm(input=conv,
+                                         act=None,
+                                         epsilon=2e-05,
+                                         momentum=0.9,
+                                         is_test=False if is_train else True)
+            return fluid.layers.prelu(
+                bn,
+                mode="all",
                param_attr=fluid.param_attr.ParamAttr(
                    initializer=fluid.initializer.Constant(0.25)))
        else:
-            return fluid.layers.batch_norm(input=conv, act=act, epsilon=2e-05,
-                is_test=False if is_train else True)
+            return fluid.layers.batch_norm(input=conv,
+                                           act=act,
+                                           epsilon=2e-05,
+                                           is_test=False if is_train else True)

    def shortcut(self, input, ch_out, stride, is_train):
        ch_in = input.shape[1]
        if ch_in != ch_out or stride != 1:
-            return self.conv_bn_layer(input, ch_out, 1, stride,
-                is_train=is_train)
+            return self.conv_bn_layer(input,
+                                      ch_out,
+                                      1,
+                                      stride,
+                                      is_train=is_train)
        else:
            return input

    def bottleneck_block(self, input, num_filters, stride, is_train):
        if self.layers < 101:
-            bn1 = fluid.layers.batch_norm(input=input, act=None, epsilon=2e-05,
-                is_test=False if is_train else True)
-            conv1 = self.conv_bn_layer(
-                input=bn1, num_filters=num_filters, filter_size=3, pad=1,
-                act='prelu', is_train=is_train)
-            conv2 = self.conv_bn_layer(
-                input=conv1, num_filters=num_filters, filter_size=3,
-                stride=stride, pad=1, act=None, is_train=is_train)
+            bn1 = fluid.layers.batch_norm(input=input,
+                                          act=None,
+                                          epsilon=2e-05,
+                                          is_test=False if is_train else True)
+            conv1 = self.conv_bn_layer(input=bn1,
+                                       num_filters=num_filters,
+                                       filter_size=3,
+                                       pad=1,
+                                       act='prelu',
+                                       is_train=is_train)
+            conv2 = self.conv_bn_layer(input=conv1,
+                                       num_filters=num_filters,
+                                       filter_size=3,
+                                       stride=stride,
+                                       pad=1,
+                                       is_train=is_train)
        else:
-            bn0 = fluid.layers.batch_norm(input=input, act=None, epsilon=2e-05,
-                is_test=False if is_train else True)
-            conv0 = self.conv_bn_layer(
-                input=bn0, num_filters=num_filters/4, filter_size=1, pad=0,
-                act='prelu', is_train=is_train)
-            conv1 = self.conv_bn_layer(
-                input=conv0, num_filters=num_filters/4, filter_size=3, pad=1,
-                act='prelu', is_train=is_train)
-            conv2 = self.conv_bn_layer(
-                input=conv1, num_filters=num_filters, filter_size=1,
-                stride=stride, pad=0, act=None, is_train=is_train)
+            bn0 = fluid.layers.batch_norm(input=input,
+                                          act=None,
+                                          epsilon=2e-05,
+                                          is_test=False if is_train else True)
+            conv0 = self.conv_bn_layer(input=bn0,
+                                       num_filters=num_filters / 4,
+                                       filter_size=1,
+                                       pad=0,
+                                       act='prelu',
+                                       is_train=is_train)
+            conv1 = self.conv_bn_layer(input=conv0,
+                                       num_filters=num_filters / 4,
+                                       filter_size=3,
+                                       pad=1,
+                                       act='prelu',
+                                       is_train=is_train)
+            conv2 = self.conv_bn_layer(input=conv1,
+                                       num_filters=num_filters,
+                                       filter_size=1,
+                                       stride=stride,
+                                       pad=0,
+                                       is_train=is_train)

        short = self.shortcut(input, num_filters, stride, is_train=is_train)
        return fluid.layers.elementwise_add(x=short, y=conv2, act=None)

--- a/plsc/utils/__init__.py
+++ b/plsc/utils/__init__.py
@@ -11,4 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
--- a/plsc/utils/base64_reader.py
+++ b/plsc/utils/base64_reader.py
-import os
-import math
-import random
-import pickle
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import base64
 import functools
+import math
+import os
+import pickle
+import random
+
 import numpy as np
 import paddle
+import six
 from PIL import Image, ImageEnhance
+
 try:
    from StringIO import StringIO
 except ImportError:
    from io import StringIO
+from io import BytesIO

 random.seed(0)

@@ -18,7 +36,6 @@ DATA_DIM = 112
 THREAD = 8
 BUF_SIZE = 10240

-
 img_mean = np.array([127.5, 127.5, 127.5]).reshape((3, 1, 1))
 img_std = np.array([128.0, 128.0, 128.0]).reshape((3, 1, 1))

@@ -97,13 +114,13 @@ def RandomResizedCrop(img, size):
    return img


-def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]):
+def random_crop(img, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.)):
    aspect_ratio = math.sqrt(random.uniform(*ratio))
    w = 1. * aspect_ratio
    h = 1. / aspect_ratio

-    bound = min((float(img.size[0]) / img.size[1]) / (w**2),
-                (float(img.size[1]) / img.size[0]) / (h**2))
+    bound = min((float(img.size[0]) / img.size[1]) / (w ** 2),
+                (float(img.size[1]) / img.size[0]) / (h ** 2))
    scale_max = min(scale[1], bound)
    scale_min = min(scale[0], bound)

@@ -150,12 +167,12 @@ def distort_color(img):
    return img


-def process_image_imagepath(sample,
-                            class_dim,
-                            color_jitter,
-                            rotate,
-                            rand_mirror,
-                            normalize):
+def process_image(sample,
+                  class_dim,
+                  color_jitter,
+                  rotate,
+                  rand_mirror,
+                  normalize):
    img_data = base64.b64decode(sample[0])
    img = Image.open(StringIO(img_data))

@@ -185,49 +202,62 @@ def process_image_imagepath(sample,
    return img, sample[1]


-def arc_iterator(file_list,
+def arc_iterator(data_dir,
+                 file_list,
                 class_dim,
                 color_jitter=False,
                 rotate=False,
                 rand_mirror=False,
                 normalize=False):
    trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
-    trainer_count = int(os.getenv("PADDLE_TRAINERS_NUM", "1"))
+    num_trainers = int(os.getenv("PADDLE_TRAINERS_NUM", "1"))
+
    def reader():
        with open(file_list, 'r') as f:
            flist = f.readlines()
-            assert len(flist) % trainer_count == 0, \
-                "Number of files should be divisible by trainer count, " \
-                "run base64 file preprocessing tool first."
-            num_files_per_trainer = len(flist) // trainer_count
-            start = num_files_per_trainer * trainer_id
-            end = start + num_files_per_trainer
-            flist = flist[start:end]
-
-            for file in flist:
-                with open(file, 'r') as f:
-                    for line in f.xreadlines():
-                        line = line.strip().split('\t')
-                        image, label = line[0], line[1]
-                        yield image, label
-
-    mapper = functools.partial(process_image_imagepath,
-        class_dim=class_dim, color_jitter=color_jitter, rotate=rotate,
-        rand_mirror=rand_mirror, normalize=normalize)
+            assert len(flist) == num_trainers, \
+                "Please use process_base64_files.py to pre-process the dataset."
+            file = flist[trainer_id]
+        file = os.path.join(data_dir, file)
+
+        with open(file, 'r') as f:
+            if six.PY2:
+                for line in f.xreadlines():
+                    line = line.strip().split('\t')
+                    image, label = line[0], line[1]
+                    yield image, label
+            else:
+                for line in f:
+                    line = line.strip().split('\t')
+                    image, label = line[0], line[1]
+                    yield image, label
+
+    mapper = functools.partial(process_image,
+                               class_dim=class_dim,
+                               color_jitter=color_jitter,
+                               rotate=rotate,
+                               rand_mirror=rand_mirror,
+                               normalize=normalize)
    return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE)


 def load_bin(path, image_size):
-    bins, issame_list = pickle.load(open(path, 'rb'))
+    if six.PY2:
+        bins, issame_list = pickle.load(open(path, 'rb'))
+    else:
+        bins, issame_list = pickle.load(open(path, 'rb'), encoding='bytes')
    data_list = []
    for flip in [0, 1]:
-        data = np.empty((len(issame_list)*2, 3, image_size[0], image_size[1]))
+        data = np.empty((len(issame_list) * 2, 3, image_size[0], image_size[1]))
        data_list.append(data)
-    for i in xrange(len(issame_list)*2):
+    for i in range(len(issame_list) * 2):
        _bin = bins[i]
-        if not isinstance(_bin, basestring):
-            _bin = _bin.tostring()
-        img_ori = Image.open(StringIO(_bin))
+        if six.PY2:
+            if not isinstance(_bin, six.string_types):
+                _bin = _bin.tostring()
+            img_ori = Image.open(StringIO(_bin))
+        else:
+            img_ori = Image.open(BytesIO(_bin))
        for flip in [0, 1]:
            img = img_ori.copy()
            if flip == 1:
@@ -241,13 +271,18 @@ def load_bin(path, image_size):
        if i % 1000 == 0:
            print('loading bin', i)
    print(data_list[0].shape)
-    return (data_list, issame_list)
-
-
-def train(data_dir, file_list, num_classes):
-    file_path = os.path.join(data_dir, file_list)
-    return arc_iterator(file_path, class_dim=num_classes, color_jitter=False,
-        rotate=False, rand_mirror=True, normalize=True)
+    return data_list, issame_list
+
+
+def train(data_dir, num_classes):
+    file_path = os.path.join(data_dir, 'file_list.txt')
+    return arc_iterator(data_dir,
+                        file_path,
+                        class_dim=num_classes,
+                        color_jitter=False,
+                        rotate=False,
+                        rand_mirror=True,
+                        normalize=True)


 def test(data_dir, datasets):

--- a/plsc/utils/jpeg_reader.py
+++ b/plsc/utils/jpeg_reader.py
-import os
+import functools
 import math
-import random
+import os
 import pickle
-import functools
+import random
+
 import numpy as np
 import paddle
 import six
 from PIL import Image, ImageEnhance
+
 try:
    from StringIO import StringIO
 except ImportError:
    from io import StringIO
+from io import BytesIO

 random.seed(0)

@@ -123,13 +126,13 @@ def RandomResizedCrop(img, size):
    return img


-def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]):
+def random_crop(img, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.)):
    aspect_ratio = math.sqrt(random.uniform(*ratio))
    w = 1. * aspect_ratio
    h = 1. / aspect_ratio

-    bound = min((float(img.size[0]) / img.size[1]) / (w**2),
-                (float(img.size[1]) / img.size[0]) / (h**2))
+    bound = min((float(img.size[0]) / img.size[1]) / (w ** 2),
+                (float(img.size[1]) / img.size[0]) / (h ** 2))
    scale_max = min(scale[1], bound)
    scale_min = min(scale[0], bound)

@@ -222,28 +225,37 @@ def arc_iterator(data,
    def reader():
        if shuffle:
            random.shuffle(data)
-        for j in xrange(len(data)):
+        for j in range(len(data)):
            path, label = data[j]
            path = os.path.join(data_dir, path)
            yield path, label

-    mapper = functools.partial(process_image_imagepath, class_dim=class_dim,
-        color_jitter=color_jitter, rotate=rotate,
-        rand_mirror=rand_mirror, normalize=normalize)
+    mapper = functools.partial(process_image_imagepath,
+                               class_dim=class_dim,
+                               color_jitter=color_jitter,
+                               rotate=rotate,
+                               rand_mirror=rand_mirror,
+                               normalize=normalize)
    return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE)


 def load_bin(path, image_size):
-    bins, issame_list = pickle.load(open(path, 'rb'))
+    if six.PY2:
+        bins, issame_list = pickle.load(open(path, 'rb'))
+    else:
+        bins, issame_list = pickle.load(open(path, 'rb'), encoding='bytes')
    data_list = []
    for flip in [0, 1]:
-        data = np.empty((len(issame_list)*2, 3, image_size[0], image_size[1]))
+        data = np.empty((len(issame_list) * 2, 3, image_size[0], image_size[1]))
        data_list.append(data)
-    for i in range(len(issame_list)*2):
+    for i in range(len(issame_list) * 2):
        _bin = bins[i]
-        if not isinstance(_bin, six.string_types):
-            _bin = _bin.tostring()
-        img_ori = Image.open(StringIO(_bin))
+        if six.PY2:
+            if not isinstance(_bin, six.string_types):
+                _bin = _bin.tostring()
+            img_ori = Image.open(StringIO(_bin))
+        else:
+            img_ori = Image.open(BytesIO(_bin))
        for flip in [0, 1]:
            img = img_ori.copy()
            if flip == 1:
@@ -257,14 +269,19 @@ def load_bin(path, image_size):
        if i % 1000 == 0:
            print('loading bin', i)
    print(data_list[0].shape)
-    return (data_list, issame_list)
+    return data_list, issame_list


 def arc_train(data_dir, class_dim):
    train_image_list = get_train_image_list(data_dir)
-    return arc_iterator(train_image_list, shuffle=True, class_dim=class_dim,
-        data_dir=data_dir, color_jitter=False, rotate=False, rand_mirror=True,
-        normalize=True)
+    return arc_iterator(train_image_list,
+                        shuffle=True,
+                        class_dim=class_dim,
+                        data_dir=data_dir,
+                        color_jitter=False,
+                        rotate=False,
+                        rand_mirror=True,
+                        normalize=True)


 def test(data_dir, datasets):

--- a/plsc/utils/parameter_converter.py
+++ b/plsc/utils/parameter_converter.py
--- a/plsc/utils/process_distfc_parameter.py
+++ b/plsc/utils/process_distfc_parameter.py
--- a/plsc/version.py
+++ b/plsc/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ PLSC version string """
-plsc_version = "0.1.0"
+plsc_version = "0.0.0"
--- a/tools/process_base64_files.py
+++ b/tools/process_base64_files.py
@@ -12,29 +12,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from __future__ import print_function
 from __future__ import division
+from __future__ import print_function

-import os
 import argparse
-import random
-import time
-import math
 import logging
+import math
+import os
+import random
 import sqlite3
 import tempfile
-import six
+import time

+import six

 logging.basicConfig(level=logging.INFO,
-    format='[%(levelname)s %(asctime)s line:%(lineno)d] %(message)s',
-    datefmt='%d %b %Y %H:%M:%S')
+                    format='[%(asctime)s - %(levelname)s - %(message)s',
+                    datefmt='%d %b %Y %H:%M:%S')
 logger = logging.getLogger()

-
 parser = argparse.ArgumentParser(description="""
    Tool to preprocess dataset in base64 format.""")
-    
+
 """
 We assume that the directory of dataset contains a file-list file, and one 
 or more data files. Each line of the file-list file represents a data file.
@@ -111,9 +110,9 @@ class Base64Preprocessor(object):
                    line = line.strip()
                    file_path = os.path.join(self.data_dir, line)
                    with open(file_path, 'r') as df:
-                        for line in df.xreadlines():
-                            line = line.strip().split('\t')
-                            self.insert_to_db(cnt, line)
+                        for line_local in df.xreadlines():
+                            line_local = line_local.strip().split('\t')
+                            self.insert_to_db(cnt, line_local)
                            cnt += 1
                    os.remove(file_path)
            else:
@@ -121,9 +120,9 @@ class Base64Preprocessor(object):
                    line = line.strip()
                    file_path = os.path.join(self.data_dir, line)
                    with open(file_path, 'r') as df:
-                        for line in df:
-                            line = line.strip().split('\t')
-                            self.insert_to_db(cnt, line)
+                        for line_local in df:
+                            line_local = line_local.strip().split('\t')
+                            self.insert_to_db(cnt, line_local)
                            cnt += 1
                    os.remove(file_path)

@@ -143,19 +142,20 @@ class Base64Preprocessor(object):

        start_time = time.time()

-        lines_per_rank = int(math.ceil(num/nranks))
+        lines_per_rank = int(math.ceil(num / nranks))
        total_num = lines_per_rank * nranks
        index = index + index[0:total_num - num]
        assert len(index) == total_num

        for rank in range(nranks):
            start = rank * lines_per_rank
-            end = (rank + 1) * lines_per_rank # exclusive
+            end = (rank + 1) * lines_per_rank  # exclusive
            f_handler = open(os.path.join(self.data_dir,
-                ".tmp_" + str(rank)), 'w')
+                                          ".tmp_" + str(rank)), 'w')
            for i in range(start, end):
                idx = index[i]
-                sql_cmd = "SELECT DATA, LABEL FROM DATASET WHERE ID={};".format(idx)
+                sql_cmd = "SELECT DATA, LABEL FROM DATASET WHERE ID={};".format(
+                    idx)
                cursor = self.cursor.execute(sql_cmd)
                for result in cursor:
                    data = result[0]
@@ -174,7 +174,7 @@ class Base64Preprocessor(object):
                line += '\n'
                f_t.writelines(line)
                os.rename(os.path.join(data_dir, ".tmp_" + str(rank)),
-                    os.path.join(data_dir, "base64_rank_{}".format(rank)))
+                          os.path.join(data_dir, "base64_rank_{}".format(rank)))

        os.remove(file_list)
        os.rename(temp_file_list, file_list)
@@ -183,21 +183,16 @@ class Base64Preprocessor(object):
    def close_db(self):
        self.conn.close()
        self.tempfile.close()
-        
+        os.remove(self.sqlite3_file)
+

 def main():
    global args
-    
+
    obj = Base64Preprocessor(args.data_dir, args.file_list, args.nranks)
    obj.shuffle_files()
    obj.close_db()

-    #data_dir = args.data_dir
-    #file_list = args.file_list
-    #nranks = args.nranks

-    #names, file_num_map, num = get_image_info(data_dir, file_list)
-    #
-    
 if __name__ == "__main__":
    main()