campate python3 with six, basic usage command convert run passed, maybe need more test

c8d90837 · jiazhenwei · 3c1fcc20 · c8d90837 · c8d90837 · c8d90837
16 changed file
--- a/.gitignore
+++ b/.gitignore
@@ -19,3 +19,5 @@ builds/
 mace/examples/android/macelibrary/src/main/cpp/mace/
 \.project/
+*swp
+*~
--- a/mace/python/tools/binary_codegen.py
+++ b/mace/python/tools/binary_codegen.py
@@ -20,6 +20,8 @@ import struct
 import jinja2
 import numpy as np
+import six
 # python mace/python/tools/binary_codegen.py \
 #     --binary_dirs=${BIN_FILE} \
@@ -39,11 +41,11 @@ def generate_cpp_source(binary_dirs, binary_file_name, variable_name):
        with open(binary_path, "rb") as f:
            binary_array = np.fromfile(f, dtype=np.uint8)
-        print "Generate binary from", binary_path
+        six.print_("Generate binary from", binary_path)
        idx = 0
        size, = struct.unpack("Q", binary_array[idx:idx + 8])
        idx += 8
-        for _ in xrange(size):
+        for _ in six.moves.range(size):
            key_size, = struct.unpack("i", binary_array[idx:idx + 4])
            idx += 4
            key, = struct.unpack(

--- a/mace/python/tools/convert_util.py
+++ b/mace/python/tools/convert_util.py
@@ -21,7 +21,7 @@ def mace_check(condition, msg):
 def roundup_div4(value):
-    return int((value + 3) / 4)
+    return int((value + 3) // 4)
 class OpenCLBufferType(enum.Enum):

--- a/mace/python/tools/converter.py
+++ b/mace/python/tools/converter.py
@@ -18,6 +18,8 @@ import hashlib
 import os.path
 import copy
+import six
 from mace.proto import mace_pb2
 from mace.python.tools import memory_optimizer
 from mace.python.tools import model_saver
@@ -25,7 +27,6 @@ from mace.python.tools.converter_tool import base_converter as cvt
 from mace.python.tools.converter_tool import transformer
 from mace.python.tools.convert_util import mace_check
 # ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb \
 #                                            --output quantized_test_dsp.pb \
 #                                            --runtime dsp \
@@ -70,34 +71,39 @@ def parse_float_array_from_str(ints_str):
 def main(unused_args):
    if not os.path.isfile(FLAGS.model_file):
-        print("Input graph file '" + FLAGS.model_file + "' does not exist!")
+        six.print_("Input graph file '" +
+                   FLAGS.model_file +
+                   "' does not exist!", file=sys.stderr)
        sys.exit(-1)
    model_checksum = file_checksum(FLAGS.model_file)
    if FLAGS.model_checksum != "" and FLAGS.model_checksum != model_checksum:
-        print("Model checksum mismatch: %s != %s" % (model_checksum,
+        six.print_("Model checksum mismatch: %s != %s" %
-                                                     FLAGS.model_checksum))
+                   (model_checksum, FLAGS.model_checksum), file=sys.stderr)
        sys.exit(-1)
    weight_checksum = None
    if FLAGS.platform == 'caffe':
        if not os.path.isfile(FLAGS.weight_file):
-            print("Input weight file '" + FLAGS.weight_file +
+            six.print_("Input weight file '" + FLAGS.weight_file +
-                  "' does not exist!")
+                       "' does not exist!", file=sys.stderr)
            sys.exit(-1)
        weight_checksum = file_checksum(FLAGS.weight_file)
        if FLAGS.weight_checksum != "" and \
                FLAGS.weight_checksum != weight_checksum:
-            print("Weight checksum mismatch: %s != %s" %
+            six.print_("Weight checksum mismatch: %s != %s" %
-                  (weight_checksum, FLAGS.weight_checksum))
+                       (weight_checksum, FLAGS.weight_checksum),
+                       file=sys.stderr)
            sys.exit(-1)
    if FLAGS.platform not in ['tensorflow', 'caffe']:
-        print ("platform %s is not supported." % FLAGS.platform)
+        six.print_("platform %s is not supported." % FLAGS.platform,
+                   file=sys.stderr)
        sys.exit(-1)
    if FLAGS.runtime not in ['cpu', 'gpu', 'dsp', 'cpu+gpu']:
-        print ("runtime %s is not supported." % FLAGS.runtime)
+        six.print_("runtime %s is not supported." % FLAGS.runtime,
+                   file=sys.stderr)
        sys.exit(-1)
    option = cvt.ConverterOption()
@@ -117,7 +123,7 @@ def main(unused_args):
        input_node_ranges = []
    if len(input_node_names) != len(input_node_shapes):
        raise Exception('input node count and shape count do not match.')
-    for i in xrange(len(input_node_names)):
+    for i in six.moves.range(len(input_node_names)):
        input_node = cvt.NodeInfo()
        input_node.name = input_node_names[i]
        input_node.shape = parse_int_array_from_str(input_node_shapes[i])
@@ -126,7 +132,7 @@ def main(unused_args):
        option.add_input_node(input_node)
    output_node_names = FLAGS.output_node.split(',')
-    for i in xrange(len(output_node_names)):
+    for i in six.moves.range(len(output_node_names)):
        output_node = cvt.NodeInfo()
        output_node.name = output_node_names[i]
        option.add_output_node(output_node)
@@ -152,7 +158,8 @@ def main(unused_args):
                                                       FLAGS.model_file,
                                                       FLAGS.weight_file)
        else:
-            print("Mace do not support platorm %s yet." & FLAGS.platform)
+            six.print_("Mace do not support platorm %s yet." % FLAGS.platform,
+                       file=sys.stderr)
            exit(1)
        output_graph_def = converter.run()
@@ -166,9 +173,9 @@ def main(unused_args):
            mace_gpu_transformer = transformer.Transformer(
                option, output_graph_def)
            output_graph_def = mace_gpu_transformer.run()
-            print "start optimize gpu memory."
+            six.print_("start optimize gpu memory.")
            memory_optimizer.optimize_gpu_memory(output_graph_def)
-            print "GPU memory optimization done."
+            six.print_("GPU memory optimization done.")
            option.device = cvt.DeviceType.CPU.value
            option.data_type = parse_data_type(
@@ -177,11 +184,11 @@ def main(unused_args):
            mace_cpu_transformer = transformer.Transformer(
                option, cpu_graph_def)
            cpu_graph_def = mace_cpu_transformer.run()
-            print "start optimize cpu memory."
+            print("start optimize cpu memory.")
            memory_optimizer.optimize_cpu_memory(cpu_graph_def)
-            print "CPU memory optimization done."
+            print("CPU memory optimization done.")
-            print "Merge cpu and gpu ops together"
+            print("Merge cpu and gpu ops together")
            output_graph_def.op.extend(cpu_graph_def.op)
            output_graph_def.mem_arena.mem_block.extend(
                cpu_graph_def.mem_arena.mem_block)
@@ -192,7 +199,7 @@ def main(unused_args):
            for arg in cpu_graph_def.arg:
                if arg.name not in output_graph_arg_names:
                    output_graph_def.arg.extend(arg)
-            print "Merge done"
+            print("Merge done")
        else:
            option.device = device_type_map[FLAGS.runtime]
            option.data_type = parse_data_type(
@@ -201,7 +208,7 @@ def main(unused_args):
                option, output_graph_def)
            output_graph_def = mace_transformer.run()
-            print "start optimize memory."
+            print("start optimize memory.")
            if FLAGS.runtime == 'gpu':
                memory_optimizer.optimize_gpu_memory(output_graph_def)
            elif FLAGS.runtime == 'cpu':
@@ -209,7 +216,7 @@ def main(unused_args):
            else:
                mace_check(False, "runtime only support [gpu|cpu|dsp]")
-            print "Memory optimization done."
+            print("Memory optimization done.")
    model_saver.save_model(
        output_graph_def, model_checksum, weight_checksum,
@@ -238,7 +245,7 @@ def parse_args():
        type=str,
        default="",
        help="TensorFlow \'GraphDef\' file to load, "
-        "Caffe prototxt file to load.")
+             "Caffe prototxt file to load.")
    parser.add_argument(
        "--weight_file", type=str, default="", help="Caffe data file to load.")
    parser.add_argument(
@@ -302,7 +309,7 @@ def parse_args():
        type=str,
        default="file",
        help="[file|code] build models to code" +
-                "or `Protobuf` file.")
+             "or `Protobuf` file.")
    parser.add_argument(
        "--data_type",
        type=str,

--- a/mace/python/tools/converter_tool/caffe_converter.py
+++ b/mace/python/tools/converter_tool/caffe_converter.py
@@ -14,7 +14,9 @@
 import math
 import numpy as np
+import six
 import google.protobuf.text_format
 from mace.proto import mace_pb2
@@ -46,6 +48,7 @@ class CaffeOperator(object):
    Layer records caffe layer proto, while blobs records the weight data in
    format of numpy ndarray.
    """
    def __init__(self):
        self._layer = None
        self._blobs = None
@@ -92,6 +95,7 @@ class CaffeNet(object):
    """CaffeNet contains caffe operations. Output of each layer has unique
    name as we replace duplicated output name with unique one, while keep
    mace input/output name which user specifies unchanged."""
    def __init__(self):
        self._ops = {}
        self._consumers = {}
@@ -119,7 +123,7 @@ class CaffeNet(object):
        layer.bottom[:] = [self._alias_op_output_name.get(layer_input,
                                                          layer_input) for
                           layer_input in layer.bottom][:]
-        for i in xrange(len(layer.top)):
+        for i in six.moves.range(len(layer.top)):
            old_name = layer.top[i]
            if layer.type == 'Input':
                new_name = old_name
@@ -218,7 +222,7 @@ class CaffeConverter(base_converter.ConverterInterface):
    @staticmethod
    def replace_input_name(ops, src_name, dst_name):
        for op in ops:
-            for i in xrange(len(op.input)):
+            for i in six.moves.range(len(op.input)):
                if op.input[i] == src_name:
                    op.input[i] = dst_name
@@ -235,7 +239,7 @@ class CaffeConverter(base_converter.ConverterInterface):
        ops.reverse()
        visited = set()
        for op in ops:
-            for i in xrange(len(op.output)):
+            for i in six.moves.range(len(op.output)):
                original_output_name = op.output[i].split('#')[0]
                if original_output_name not in visited:
                    self.replace_input_name(
@@ -267,7 +271,7 @@ class CaffeConverter(base_converter.ConverterInterface):
                if len(layer.exclude):
                    phase = phase_map[layer.exclude[0].phase]
                if phase != 'test' or layer.type == 'Dropout':
-                    print ("Remove layer %s (%s)" % (layer.name, layer.type))
+                    print("Remove layer %s (%s)" % (layer.name, layer.type))
                    layers.layer.remove(layer)
                    changed = True
                    break
@@ -497,8 +501,8 @@ class CaffeConverter(base_converter.ConverterInterface):
            beta_value = scale_op.blobs[1]
        scale_value = (
-            (1.0 / np.vectorize(math.sqrt)(var_value + epsilon_value)) *
+                (1.0 / np.vectorize(math.sqrt)(var_value + epsilon_value)) *
-            gamma_value).reshape(-1)
+                gamma_value).reshape(-1)
        offset_value = ((-mean_value * scale_value) + beta_value).reshape(-1)
        input_names = [op.name + '_scale', op.name + '_offset']

--- a/mace/python/tools/converter_tool/shape_inference.py
+++ b/mace/python/tools/converter_tool/shape_inference.py
@@ -14,7 +14,9 @@
 import math
 import numpy as np
+import six
 from mace.python.tools.converter_tool.transformer import Transformer
 from mace.python.tools.converter_tool.base_converter import DataFormat
@@ -68,7 +70,7 @@ class ShapeInference(object):
                   "Op %s (%s) output count is different from "
                   "output shape count" % (
                       op.name, op.type))
-        for i in xrange(len(shapes)):
+        for i in six.moves.range(len(shapes)):
            output_name = op.output[i]
            output_shape = op.output_shape.add()
            output_shape.dims.extend(shapes[i])
@@ -174,8 +176,8 @@ class ShapeInference(object):
            mace_check(False,
                       "Mace can only infer shape for"
                       " NCHW input and OIHW filter")
-        print ("deconv layer %s (%s) input:%s filter:%s output:%s" %
+        print("deconv layer %s (%s) input:%s filter:%s output:%s" %
-               (op.name, op.type, input_shape, filter_shape, output_shape))
+              (op.name, op.type, input_shape, filter_shape, output_shape))
        self.add_output_shape(op, [output_shape])

--- a/mace/python/tools/converter_tool/tensorflow_converter.py
+++ b/mace/python/tools/converter_tool/tensorflow_converter.py
@@ -15,6 +15,7 @@
 import math
 import numpy as np
+import six
 import tensorflow as tf
 from enum import Enum

--- a/mace/python/tools/converter_tool/tf_dsp_converter.py
+++ b/mace/python/tools/converter_tool/tf_dsp_converter.py
@@ -18,6 +18,7 @@ from mace.python.tools.converter_tool import base_converter
 from mace.python.tools import graph_util
 from mace.python.tools.convert_util import mace_check
+import six
 import tensorflow as tf
 from tensorflow.core.framework import tensor_shape_pb2
 from operator import mul
@@ -506,8 +507,8 @@ def reverse_batch_to_space_and_biasadd(net_def):
                        for follow_op in follow_ops:
                            new_follow_op = mace_pb2.OperatorDef()
                            new_follow_op.CopyFrom(follow_op)
-                            for i in xrange(len(follow_op.input)):
+                            for i in six.moves.range(len(follow_op.input)):
-                                for k in xrange(3):
+                                for k in six.moves.range(3):
                                    if new_follow_op.input[i] == get_tensor_name_from_op(  # noqa
                                        biasadd_requantize_op.name, k):
                                        new_follow_op.input[i] = get_tensor_name_from_op(  # noqa
@@ -560,7 +561,7 @@ def add_input_output_info(net_def, input_node, output_node, graph, dtype):
    input_info.dims.extend(input_tensor.shape.as_list())
    input_info.data_type = dtype
    if dtype == mace_pb2.DT_UINT8:
-        for i in xrange(2):
+        for i in six.moves.range(2):
            input_info = net_def.input_info.add()
            input_info.dims.extend([1, 1, 1, 1])
            input_info.data_type = mace_pb2.DT_FLOAT
@@ -570,7 +571,7 @@ def add_input_output_info(net_def, input_node, output_node, graph, dtype):
    output_info.dims.extend(output_tensor.shape.as_list())
    output_info.data_type = dtype
    if dtype == mace_pb2.DT_UINT8:
-        for i in xrange(2):
+        for i in six.moves.range(2):
            output_info = net_def.output_info.add()
            output_info.dims.extend([1, 1, 1, 1])
            output_info.data_type = mace_pb2.DT_FLOAT

--- a/mace/python/tools/converter_tool/transformer.py
+++ b/mace/python/tools/converter_tool/transformer.py
@@ -14,9 +14,11 @@
 import enum
-import numpy as np
 import re
+import numpy as np
+import six
 from mace.proto import mace_pb2
 from mace.python.tools.converter_tool import base_converter
 from mace.python.tools.converter_tool.base_converter import ActivationType
@@ -191,14 +193,14 @@ class Transformer(base_converter.ConverterInterface):
    @staticmethod
    def replace(obj_list, source, target):
-        for i in xrange(len(obj_list)):
+        for i in six.moves.range(len(obj_list)):
            if obj_list[i] == source:
                obj_list[i] = target
    @staticmethod
    def transpose_shape(shape, order):
        transposed_shape = []
-        for i in xrange(len(order)):
+        for i in six.moves.range(len(order)):
            transposed_shape.append(shape[order[i]])
        shape[:] = transposed_shape[:]
@@ -208,7 +210,7 @@ class Transformer(base_converter.ConverterInterface):
    def get_tensor_shape(self, tensor):
        producer = self._producer[tensor]
-        for i in xrange(len(producer.output)):
+        for i in six.moves.range(len(producer.output)):
            if producer.output[i] == tensor:
                return list(producer.output_shape[i].dims)
@@ -247,7 +249,7 @@ class Transformer(base_converter.ConverterInterface):
                       "cannot remove op since len(op.output) "
                       "!= len(replace_op.output)")
-            for i in xrange(len(op.output)):
+            for i in six.moves.range(len(op.output)):
                for consumer_op in self._consumers.get(op.output[i], []):
                    self.replace(consumer_op.input,
                                 op.output[i],
@@ -255,7 +257,7 @@ class Transformer(base_converter.ConverterInterface):
            # if the op is output node, change replace_op output name to the op
            # output name
-            for i in xrange(len(op.output)):
+            for i in six.moves.range(len(op.output)):
                if op.output[i] in self._option.output_nodes:
                    for consumer in self._consumers.get(
                            replace_op.output[i], []):
@@ -508,15 +510,17 @@ class Transformer(base_converter.ConverterInterface):
                    idx = 0
                    filter_format = self.filter_format()
                    if filter_format == FilterFormat.HWIO:
-                        for hwi in xrange(filter.dims[0] * filter.dims[1]
+                        for hwi in six.moves.range(filter.dims[0]
-                                          * filter.dims[2]):
+                                                   * filter.dims[1]
-                            for o in xrange(filter.dims[3]):
+                                                   * filter.dims[2]):
+                            for o in six.moves.range(filter.dims[3]):
                                filter.float_data[idx] *= scale.float_data[o]
                                idx += 1
                    elif filter_format == FilterFormat.OIHW:
-                        for o in xrange(filter.dims[0]):
+                        for o in six.moves.range(filter.dims[0]):
-                            for hwi in xrange(filter.dims[1] * filter.dims[2]
+                            for hwi in six.moves.range(filter.dims[1]
-                                              * filter.dims[3]):
+                                                       * filter.dims[2]
+                                                       * filter.dims[3]):
                                filter.float_data[idx] *= scale.float_data[o]
                                idx += 1
                    else:
@@ -548,17 +552,18 @@ class Transformer(base_converter.ConverterInterface):
                    filter_format = self.filter_format()
                    if filter_format == FilterFormat.HWIO:
-                        for hw in xrange(filter.dims[0] * filter.dims[1]):
+                        for hw in six.moves.range(filter.dims[0]
-                            for i in xrange(filter.dims[2]):
+                                                  * filter.dims[1]):
-                                for o in xrange(filter.dims[3]):
+                            for i in six.moves.range(filter.dims[2]):
+                                for o in six.moves.range(filter.dims[3]):
                                    filter.float_data[idx] *= scale.float_data[
-                                        i * filter.dims[3] + o]
+                                                        i * filter.dims[3] + o]
                                    idx += 1
                    elif filter_format == FilterFormat.OIHW:
-                        for o in xrange(filter.dims[0]):
+                        for o in six.moves.range(filter.dims[0]):
-                            for i in xrange(filter.dims[1]):
+                            for i in six.moves.range(filter.dims[1]):
-                                for hw in xrange(filter.dims[2]
+                                for hw in six.moves.range(filter.dims[2]
-                                                 * filter.dims[3]):
+                                                          * filter.dims[3]):
                                    filter.float_data[idx] *= scale.float_data[
                                        i * filter.dims[0] + o]
                                    idx += 1
@@ -836,7 +841,7 @@ class Transformer(base_converter.ConverterInterface):
                        and len(self._consumers.get(conv_op.output[0], [])) == 1:  # noqa
                    b2s_op = self._consumers.get(conv_op.output[0])[0]
                    if b2s_op.type == MaceOp.BatchToSpaceND.name:
-                        print "Flatten atrous convolution"
+                        six.print_("Flatten atrous convolution")
                        # Add args.
                        padding_arg_values = ConverterUtil.get_arg(
                            op,
@@ -1167,7 +1172,7 @@ class Transformer(base_converter.ConverterInterface):
                            and len(self._consts[input].dims) == 2:
                        arg = ConverterUtil.get_arg(op, transpose_arg_names[i])
                        if arg is not None and arg.i == 1:
-                            print 'convert matmul'
+                            six.print_('convert matmul')
                            filter = self._consts[input]
                            filter_data = np.array(filter.float_data).reshape(
                                filter.dims)
@@ -1494,7 +1499,7 @@ class Transformer(base_converter.ConverterInterface):
                               weight.dims[0] != op.output_shape[0].dims[1]:
                                is_fc = False
                    if is_fc:
-                        print 'convert reshape and matmul to fc'
+                        print('convert reshape and matmul to fc')
                        self.safe_remove_node(op, input_op,
                                              remove_input_tensor=True)
                        for matmul_op in consumers:
@@ -1515,7 +1520,7 @@ class Transformer(base_converter.ConverterInterface):
                        producer.type != MaceOp.Reshape.name and \
                        len(producer.output_shape[0].dims) == 2 and \
                        weight.dims[0] == producer.output_shape[0].dims[1]:
-                    print 'convert matmul to fc'
+                    six.print_('convert matmul to fc')
                    op.type = MaceOp.FullyConnected.name
                    weight_data = np.array(weight.float_data).reshape(
                        weight.dims)
@@ -1840,7 +1845,7 @@ class Transformer(base_converter.ConverterInterface):
        if not self._option.quantize:
            return False
-        print ("Add default quantize info for ops like Pooling, Softmax")
+        print("Add default quantize info for ops like Pooling, Softmax")
        for op in self._model.op:
            if op.type in [MaceOp.Pooling.name,
                           MaceOp.Squeeze.name,
@@ -1897,7 +1902,7 @@ class Transformer(base_converter.ConverterInterface):
                    self.add_quantize_info(op, minval, maxval)
                self._quantize_activation_info[op.output[0]] = quantize_info
-        print ("Add default quantize info for input")
+        print("Add default quantize info for input")
        for input_node in self._option.input_nodes.values():
            if input_node.name not in self._quantize_activation_info:
                print("Input range %s: %s" % (input_node.name,

--- a/mace/python/tools/memory_optimizer.py
+++ b/mace/python/tools/memory_optimizer.py
@@ -14,6 +14,9 @@
 import sys
 import operator
+import six
 from mace.proto import mace_pb2
 from mace.python.tools.converter_tool import base_converter as cvt
@@ -60,7 +63,7 @@ class MemoryOptimizer(object):
        self.mem_ref_counter = {}
        ocl_mem_type_arg = ConverterUtil.get_arg(
            net_def, MaceKeyword.mace_opencl_mem_type)
-        self.cl_mem_type = ocl_mem_type_arg.i if ocl_mem_type_arg is not None\
+        self.cl_mem_type = ocl_mem_type_arg.i if ocl_mem_type_arg is not None \
            else None
        consumers = {}
@@ -126,8 +129,8 @@ class MemoryOptimizer(object):
    def get_total_optimized_mem_size(self):
        optimized_mem_size = 0
        for mem in self.mem_block:
-            print mem, MemoryTypeToStr(self.mem_block[mem].mem_type), \
+            print(mem, MemoryTypeToStr(self.mem_block[mem].mem_type),
-                self.mem_block[mem].block
+                  self.mem_block[mem].block)
            optimized_mem_size += self.mem_size(self.mem_block[mem])
        return optimized_mem_size
@@ -141,12 +144,14 @@ class MemoryOptimizer(object):
            if not self.op_need_optimize_memory(op):
                continue
            if not op.output_shape:
-                print("WARNING: There is no output shape information to "
+                six.print_("WARNING: There is no output shape information to "
-                      "do memory optimization. %s (%s)" % (op.name, op.type))
+                           "do memory optimization. %s (%s)" %
+                           (op.name, op.type), file=sys.stderr)
                return
            if len(op.output_shape) != len(op.output):
-                print('WARNING: the number of output shape is not equal to '
+                six.print_('WARNING: the number of output shape is '
-                      'the number of output.')
+                           'not equal to the number of output.',
+                           file=sys.stderr)
                return
            for i in range(len(op.output)):
                if self.is_memory_reuse_op(op):
@@ -181,7 +186,7 @@ class MemoryOptimizer(object):
                            # minimize add_mem_size; if best_mem_add_size is 0,
                            # then minimize waste_mem_size
                            if (best_mem_add_size > 0 and
-                                    add_mem_size < best_mem_add_size) \
+                                add_mem_size < best_mem_add_size) \
                                    or (best_mem_add_size == 0 and
                                        waste_mem_size < best_mem_waste_size):
                                best_mem_id = mid
@@ -209,7 +214,7 @@ class MemoryOptimizer(object):
                        self.mem_ref_counter[mem_id] += 1
            # de-ref input tensor mem
-            for idx in xrange(len(op.input)):
+            for idx in six.moves.range(len(op.input)):
                ipt = op.input[idx]
                if ipt in self.input_ref_counter:
                    self.input_ref_counter[ipt] -= 1
@@ -226,8 +231,8 @@ class MemoryOptimizer(object):
        print("total op: %d" % len(self.net_def.op))
        print("origin mem: %d, optimized mem: %d" % (
-              self.get_total_origin_mem_size(),
+            self.get_total_origin_mem_size(),
-              self.get_total_optimized_mem_size()))
+            self.get_total_optimized_mem_size()))
 class GPUMemoryOptimizer(MemoryOptimizer):

--- a/mace/python/tools/tf_ops_stats.py
+++ b/mace/python/tools/tf_ops_stats.py
@@ -16,8 +16,10 @@ import operator
 import functools
 import argparse
 import sys
-import six
 import copy
+import six
 import tensorflow as tf
 from tensorflow import gfile
 from tensorflow.core.framework import graph_pb2
@@ -51,7 +53,7 @@ def add_shape_info(input_graph_def, input_nodes, input_shapes):
            else:
                idx = input_nodes.index(node.name + ':0')
            input_shape = input_shapes[idx]
-            print input_shape
+            print(input_shape)
            placeholder_node = copy.deepcopy(node)
            placeholder_node.attr.clear()
            placeholder_node.attr['shape'].shape.dim.extend([
@@ -215,7 +217,7 @@ def main(unused_args):
 def parse_args():
-    '''Parses command line arguments.'''
+    """Parses command line arguments."""
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--input',

--- a/tools/common.py
+++ b/tools/common.py
@@ -15,6 +15,8 @@
 import enum
 import re
+import six
 ################################
 # log
@@ -33,24 +35,24 @@ class CMDColors:
 class MaceLogger:
    @staticmethod
    def header(message):
-        print CMDColors.PURPLE + message + CMDColors.ENDC
+        six.print_(CMDColors.PURPLE + message + CMDColors.ENDC)
    @staticmethod
    def summary(message):
-        print CMDColors.GREEN + message + CMDColors.ENDC
+        six.print_(CMDColors.GREEN + message + CMDColors.ENDC)
    @staticmethod
    def info(message):
-        print message
+        six.print_(message)
    @staticmethod
    def warning(message):
-        print CMDColors.YELLOW + 'WARNING:' + message + CMDColors.ENDC
+        six.print_(CMDColors.YELLOW + 'WARNING:' + message + CMDColors.ENDC)
    @staticmethod
    def error(module, message):
-        print CMDColors.RED + 'ERROR: [' + module + '] '\
+        six.print_(CMDColors.RED + 'ERROR: [' + module + '] '
-              + message + CMDColors.ENDC
+                   + message + CMDColors.ENDC)
        exit(1)
@@ -71,7 +73,7 @@ class StringFormatter:
        for row_idx in range(data_size):
            data_tuple = data[row_idx]
            ele_size = len(data_tuple)
-            assert(ele_size == column_size)
+            assert (ele_size == column_size)
            for i in range(ele_size):
                column_length[i] = max(column_length[i],
                                       len(str(data_tuple[i])) + 1)

--- a/tools/converter.py
+++ b/tools/converter.py
@@ -19,12 +19,12 @@ import os
 import re
 import sh
 import subprocess
-import six
 import sys
 import urllib
 import yaml
 from enum import Enum
+import six
 import sh_commands
 from sh_commands import BuildType
@@ -280,7 +280,7 @@ def get_opencl_mode(configs):
 def md5sum(str):
    md5 = hashlib.md5()
-    md5.update(str)
+    md5.update(str.encode('utf-8'))
    return md5.hexdigest()
@@ -670,7 +670,7 @@ def get_model_files(model_file_path,
        if not os.path.exists(model_file) or \
                sha256_checksum(model_file) != model_sha256_checksum:
            MaceLogger.info("Downloading model, please wait ...")
-            urllib.urlretrieve(model_file_path, model_file)
+            six.moves.urllib.request.urlretrieve(model_file_path, model_file)
            MaceLogger.info("Model downloaded successfully.")
    if sha256_checksum(model_file) != model_sha256_checksum:
@@ -684,7 +684,7 @@ def get_model_files(model_file_path,
        if not os.path.exists(weight_file) or \
                sha256_checksum(weight_file) != weight_sha256_checksum:
            MaceLogger.info("Downloading model weight, please wait ...")
-            urllib.urlretrieve(weight_file_path, weight_file)
+            six.moves.urllib.request.urlretrieve(weight_file_path, weight_file)
            MaceLogger.info("Model weight downloaded successfully.")
    if weight_file:
@@ -955,7 +955,7 @@ def build_quantize_stat(configs):
    quantize_stat_target = QUANTIZE_STAT_TARGET
    build_arg = ""
-    print (configs[YAMLKeyword.model_graph_format])
+    six.print_(configs[YAMLKeyword.model_graph_format])
    if configs[YAMLKeyword.model_graph_format] == ModelFormat.code:
        mace_check(os.path.exists(ENGINE_CODEGEN_DIR),
                   ModuleName.RUN,
@@ -1037,7 +1037,7 @@ def tuning(library_name, model_name, model_config,
           model_graph_format, model_data_format,
           target_abi, target_soc, serial_num,
           mace_lib_type):
-    print('* Tuning, it may take some time...')
+    six.print_('* Tuning, it may take some time...')
    build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
    mace_run_name = MACE_RUN_STATIC_NAME
@@ -1107,7 +1107,7 @@ def tuning(library_name, model_name, model_config,
        CL_TUNED_PARAMETER_FILE_NAME,
        "%s/%s" % (model_output_dir, BUILD_TMP_OPENCL_BIN_DIR))
-    print('Tuning done\n')
+    six.print_('Tuning done\n')
 def run_specific_target(flags, configs, target_abi,
@@ -1362,8 +1362,8 @@ def run_quantize_stat(flags, configs):
            stdout=subprocess.PIPE)
        out, err = p.communicate()
        stdout = err + out
-        print stdout
+        six.print_(stdout)
-        print("Running finished!\n")
+        six.print_("Running finished!\n")
 def print_package_summary(package_path):

--- a/tools/falcon_cli.py
+++ b/tools/falcon_cli.py
@@ -15,6 +15,9 @@
 import json
 import socket
 import itertools
+import sys
+import six
 class FalconCli(object):
@@ -37,15 +40,16 @@ class FalconCli(object):
                buf_size=1000):
        try:
            return FalconCli((server, port), debug, buf_size)
-        except socket.error, exc:
+        except socket.error as exc:
-            print "error: connect to %s:%s error: %s" % (server, port, exc)
+            six.print_("error: connect to %s:%s error: %s"
+                       % (server, port, exc), file=sys.stderr)
    def call(self, name, *params):
        request = dict(
            id=next(self.id_counter), params=list(params), method=name)
        payload = json.dumps(request).encode()
        if self.debug:
-            print "--> req:", payload
+            six.print_("--> req:", payload)
        self.socket_.sendall(payload)
        response = self.stream.readline()
@@ -53,7 +57,7 @@ class FalconCli(object):
            raise Exception('empty response')
        if self.debug:
-            print "<-- resp:", response
+            six.print_("<-- resp:", response)
        response = json.loads(response.decode("utf8"))
        if response.get('error') is not None:

--- a/tools/generate_data.py
+++ b/tools/generate_data.py
@@ -18,6 +18,8 @@ import numpy as np
 import re
 import common
+import six
 # Validation Flow:
 # 1. Generate input data
 #    python generate_data.py \
@@ -32,7 +34,7 @@ def generate_data(name, shape, input_file, tensor_range, input_data_type):
    data = np.random.random(shape) * (tensor_range[1] - tensor_range[0]) \
        + tensor_range[0]
    input_file_name = common.formatted_file_name(input_file, name)
-    print 'Generate input file: ', input_file_name
+    six.print_('Generate input file: ', input_file_name)
    if input_data_type == 'float32':
        np_data_type = np.float32
    elif input_data_type == 'int32':
@@ -61,7 +63,7 @@ def generate_input_data(input_file, input_node, input_shape, input_ranges,
        input_range = [float(x) for x in input_ranges[i].split(',')]
        generate_data(input_names[i], shape, input_file, input_range,
                      input_data_types[i])
-    print "Generate input file done."
+    six.print_("Generate input file done.")
 def parse_args():

--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py
@@ -27,6 +27,8 @@ import urllib
 import platform
 from enum import Enum
+import six
 import common
 sys.path.insert(0, "mace/python/tools")
@@ -37,9 +39,10 @@ try:
    from validate import validate
    from mace_engine_factory_codegen import gen_mace_engine_factory
 except Exception as e:
-    print("Import error:\n%s" % e)
+    six.print_("Import error:\n%s" % e, file=sys.stderr)
    exit(1)
 ################################
 # common
 ################################
@@ -57,7 +60,7 @@ def split_stdout(stdout_str):
 def make_output_processor(buff):
    def process_output(line):
-        print(line.rstrip())
+        six.print_(line.rstrip())
        buff.append(line)
    return process_output
@@ -95,7 +98,7 @@ def stdout_success(stdout):
    stdout_lines = stdout.split("\n")
    for line in stdout_lines:
        if "Aborted" in line or "FAILED" in line or \
-                        "Segmentation fault" in line:
+                "Segmentation fault" in line:
            return False
    return True
@@ -129,7 +132,7 @@ def get_soc_serialnos_map():
    soc_serialnos_map = {}
    for serialno in serialnos:
        props = adb_getprop_by_serialno(serialno)
-        soc_serialnos_map.setdefault(props["ro.board.platform"], [])\
+        soc_serialnos_map.setdefault(props["ro.board.platform"], []) \
            .append(serialno)
    return soc_serialnos_map
@@ -178,16 +181,16 @@ def adb_get_all_socs():
 def adb_push(src_path, dst_path, serialno):
-    print("Push %s to %s" % (src_path, dst_path))
+    six.print_("Push %s to %s" % (src_path, dst_path))
    sh.adb("-s", serialno, "push", src_path, dst_path)
 def adb_pull(src_path, dst_path, serialno):
-    print("Pull %s to %s" % (src_path, dst_path))
+    six.print_("Pull %s to %s" % (src_path, dst_path))
    try:
        sh.adb("-s", serialno, "pull", src_path, dst_path)
    except Exception as e:
-        print("Error msg: %s" % e.stderr)
+        six.print_("Error msg: %s" % e.stderr)
 def adb_run(abi,
@@ -204,14 +207,14 @@ def adb_run(abi,
    host_bin_full_path = "%s/%s" % (host_bin_path, bin_name)
    device_bin_full_path = "%s/%s" % (device_bin_path, bin_name)
    props = adb_getprop_by_serialno(serialno)
-    print(
+    six.print_(
        "====================================================================="
    )
-    print("Trying to lock device %s" % serialno)
+    six.print_("Trying to lock device %s" % serialno)
    with device_lock(serialno):
-        print("Run on device: %s, %s, %s" %
+        six.print_("Run on device: %s, %s, %s" %
-              (serialno, props["ro.board.platform"],
+                   (serialno, props["ro.board.platform"],
-               props["ro.product.model"]))
+                    props["ro.product.model"]))
        sh.adb("-s", serialno, "shell", "rm -rf %s" % device_bin_path)
        sh.adb("-s", serialno, "shell", "mkdir -p %s" % device_bin_path)
        adb_push(host_bin_full_path, device_bin_full_path, serialno)
@@ -223,7 +226,7 @@ def adb_run(abi,
        opencl_profiling = 1 if opencl_profiling else 0
        out_of_range_check = 1 if out_of_range_check else 0
-        print("Run %s" % device_bin_full_path)
+        six.print_("Run %s" % device_bin_full_path)
        stdout_buff = []
        process_output = make_output_processor(stdout_buff)
@@ -342,7 +345,7 @@ def bazel_build(target,
                address_sanitizer=False,
                symbol_hidden=True,
                extra_args=""):
-    print("* Build %s with ABI %s" % (target, abi))
+    six.print_("* Build %s with ABI %s" % (target, abi))
    if abi == "host":
        bazel_args = (
            "build",
@@ -372,12 +375,12 @@ def bazel_build(target,
    if symbol_hidden:
        bazel_args += ("--config", "symbol_hidden")
    if extra_args:
-        bazel_args += (extra_args, )
+        bazel_args += (extra_args,)
-        print bazel_args
+        six.print_(bazel_args)
    sh.bazel(
        _fg=True,
        *bazel_args)
-    print("Build done!\n")
+    six.print_("Build done!\n")
 def bazel_build_common(target, build_args=""):
@@ -414,7 +417,7 @@ def gen_encrypted_opencl_source(codegen_path="mace/codegen"):
 def gen_mace_engine_factory_source(model_tags,
                                   embed_model_data,
                                   codegen_path="mace/codegen"):
-    print("* Generate mace engine creator source")
+    six.print_("* Generate mace engine creator source")
    codegen_tools_dir = "%s/engine" % codegen_path
    sh.rm("-rf", codegen_tools_dir)
    sh.mkdir("-p", codegen_tools_dir)
@@ -423,7 +426,7 @@ def gen_mace_engine_factory_source(model_tags,
        "mace/python/tools",
        embed_model_data,
        codegen_tools_dir)
-    print("Generate mace engine creator source done!\n")
+    six.print_("Generate mace engine creator source done!\n")
 def pull_file_from_device(serial_num, file_path, file_name, output_dir):
@@ -452,14 +455,14 @@ def merge_opencl_binaries(binaries_dirs,
        if not os.path.exists(binary_path):
            continue
-        print 'generate opencl code from', binary_path
+        six.print_('generate opencl code from', binary_path)
        with open(binary_path, "rb") as f:
            binary_array = np.fromfile(f, dtype=np.uint8)
        idx = 0
        size, = struct.unpack("Q", binary_array[idx:idx + 8])
        idx += 8
-        for _ in xrange(size):
+        for _ in six.moves.range(size):
            key_size, = struct.unpack("i", binary_array[idx:idx + 4])
            idx += 4
            key, = struct.unpack(
@@ -481,7 +484,7 @@ def merge_opencl_binaries(binaries_dirs,
    output_byte_array = bytearray()
    data_size = len(kvs)
    output_byte_array.extend(struct.pack("Q", data_size))
-    for key, value in kvs.iteritems():
+    for key, value in six.iteritems(kvs):
        key_size = len(key)
        output_byte_array.extend(struct.pack("i", key_size))
        output_byte_array.extend(struct.pack(str(key_size) + "s", key))
@@ -508,14 +511,14 @@ def merge_opencl_parameters(binaries_dirs,
        if not os.path.exists(binary_path):
            continue
-        print 'generate opencl parameter from', binary_path
+        six.print_('generate opencl parameter from', binary_path)
        with open(binary_path, "rb") as f:
            binary_array = np.fromfile(f, dtype=np.uint8)
        idx = 0
        size, = struct.unpack("Q", binary_array[idx:idx + 8])
        idx += 8
-        for _ in xrange(size):
+        for _ in six.moves.range(size):
            key_size, = struct.unpack("i", binary_array[idx:idx + 4])
            idx += 4
            key, = struct.unpack(
@@ -529,7 +532,7 @@ def merge_opencl_parameters(binaries_dirs,
    output_byte_array = bytearray()
    data_size = len(kvs)
    output_byte_array.extend(struct.pack("Q", data_size))
-    for key, value in kvs.iteritems():
+    for key, value in six.iteritems(kvs):
        key_size = len(key)
        output_byte_array.extend(struct.pack("i", key_size))
        output_byte_array.extend(struct.pack(str(key_size) + "s", key))
@@ -637,11 +640,12 @@ def gen_random_input(model_output_dir,
        for i in range(len(input_file_list)):
            if input_file_list[i] is not None:
                dst_input_file = model_output_dir + '/' + \
-                        common.formatted_file_name(input_file_name,
+                                 common.formatted_file_name(input_file_name,
-                                                   input_name_list[i])
+                                                            input_name_list[i])
                if input_file_list[i].startswith("http://") or \
                        input_file_list[i].startswith("https://"):
-                    urllib.urlretrieve(input_file_list[i], dst_input_file)
+                    six.moves.urllib.request.urlretrieve(input_file_list[i],
+                                                         dst_input_file)
                else:
                    sh.cp("-f", input_file_list[i], dst_input_file)
@@ -701,12 +705,13 @@ def tuning_run(abi,
               runtime_failure_ratio=0.0,
               address_sanitizer=False,
               link_dynamic=False):
-    print("* Run '%s' with round=%s, restart_round=%s, tuning=%s, "
+    six.print_("* Run '%s' with round=%s, restart_round=%s, tuning=%s, "
-          "out_of_range_check=%s, omp_num_threads=%s, cpu_affinity_policy=%s, "
+               "out_of_range_check=%s, omp_num_threads=%s, "
-          "gpu_perf_hint=%s, gpu_priority_hint=%s" %
+               "cpu_affinity_policy=%s, gpu_perf_hint=%s, "
-          (model_tag, running_round, restart_round, str(tuning),
+               "gpu_priority_hint=%s" %
-           str(out_of_range_check), omp_num_threads, cpu_affinity_policy,
+               (model_tag, running_round, restart_round, str(tuning),
-           gpu_perf_hint, gpu_priority_hint))
+                str(out_of_range_check), omp_num_threads, cpu_affinity_policy,
+                gpu_perf_hint, gpu_priority_hint))
    mace_model_path = ""
    if model_graph_format == ModelFormat.file:
        mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag)
@@ -741,8 +746,8 @@ def tuning_run(abi,
            stdout=subprocess.PIPE)
        out, err = p.communicate()
        stdout = err + out
-        print stdout
+        six.print_(stdout)
-        print("Running finished!\n")
+        six.print_("Running finished!\n")
    else:
        sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir)
        internal_storage_dir = create_internal_storage_dir(
@@ -853,7 +858,7 @@ def tuning_run(abi,
            adb_cmd_file,
            _fg=True)
-        print("Running finished!\n")
+        six.print_("Running finished!\n")
    return stdout
@@ -875,7 +880,7 @@ def validate_model(abi,
                   input_file_name="model_input",
                   output_file_name="model_out",
                   validation_threshold=0.9):
-    print("* Validate with %s" % platform)
+    six.print_("* Validate with %s" % platform)
    if abi != "host":
        for output_name in output_nodes:
            formatted_name = common.formatted_file_name(
@@ -913,7 +918,7 @@ def validate_model(abi,
        elif caffe_env == common.CaffeEnvType.DOCKER:
            docker_image_id = sh.docker("images", "-q", image_name)
            if not docker_image_id:
-                print("Build caffe docker")
+                six.print_("Build caffe docker")
                sh.docker("build", "-t", image_name,
                          "third_party/caffe")
@@ -925,31 +930,31 @@ def validate_model(abi,
                sh.docker("rm", "-f", container_name)
                container_id = ""
            if not container_id:
-                print("Run caffe container")
+                six.print_("Run caffe container")
                sh.docker(
-                        "run",
+                    "run",
-                        "-d",
+                    "-d",
-                        "-it",
+                    "-it",
-                        "--name",
+                    "--name",
-                        container_name,
+                    container_name,
-                        image_name,
+                    image_name,
-                        "/bin/bash")
+                    "/bin/bash")
            for input_name in input_nodes:
                formatted_input_name = common.formatted_file_name(
-                        input_file_name, input_name)
+                    input_file_name, input_name)
                sh.docker(
-                        "cp",
+                    "cp",
-                        "%s/%s" % (model_output_dir, formatted_input_name),
+                    "%s/%s" % (model_output_dir, formatted_input_name),
-                        "%s:/mace" % container_name)
+                    "%s:/mace" % container_name)
            for output_name in output_nodes:
                formatted_output_name = common.formatted_file_name(
-                        output_file_name, output_name)
+                    output_file_name, output_name)
                sh.docker(
-                        "cp",
+                    "cp",
-                        "%s/%s" % (model_output_dir, formatted_output_name),
+                    "%s/%s" % (model_output_dir, formatted_output_name),
-                        "%s:/mace" % container_name)
+                    "%s:/mace" % container_name)
            model_file_name = os.path.basename(model_file_path)
            weight_file_name = os.path.basename(weight_file_path)
            sh.docker("cp", "tools/common.py", "%s:/mace" % container_name)
@@ -976,22 +981,22 @@ def validate_model(abi,
                "--validation_threshold=%f" % validation_threshold,
                _fg=True)
-    print("Validation done!\n")
+    six.print_("Validation done!\n")
 ################################
 # library
 ################################
 def packaging_lib(libmace_output_dir, project_name):
-    print("* Package libs for %s" % project_name)
+    six.print_("* Package libs for %s" % project_name)
    tar_package_name = "libmace_%s.tar.gz" % project_name
    project_dir = "%s/%s" % (libmace_output_dir, project_name)
    tar_package_path = "%s/%s" % (project_dir, tar_package_name)
    if os.path.exists(tar_package_path):
        sh.rm("-rf", tar_package_path)
-    print("Start packaging '%s' libs into %s" % (project_name,
+    six.print_("Start packaging '%s' libs into %s" % (project_name,
-                                                 tar_package_path))
+                                                      tar_package_path))
    which_sys = platform.system()
    if which_sys == "Linux":
        sh.tar(
@@ -1009,7 +1014,7 @@ def packaging_lib(libmace_output_dir, project_name):
            "%s" % tar_package_path,
            glob.glob("%s/*" % project_dir),
            _fg=True)
-    print("Packaging Done!\n")
+    six.print_("Packaging Done!\n")
    return tar_package_path
@@ -1041,7 +1046,7 @@ def benchmark_model(abi,
                    gpu_priority_hint=3,
                    input_file_name="model_input",
                    link_dynamic=False):
-    print("* Benchmark for %s" % model_tag)
+    six.print_("* Benchmark for %s" % model_tag)
    mace_model_path = ""
    if model_graph_format == ModelFormat.file:
@@ -1153,7 +1158,7 @@ def benchmark_model(abi,
            adb_cmd_file,
            _fg=True)
-    print("Benchmark done!\n")
+    six.print_("Benchmark done!\n")
 def build_run_throughput_test(abi,
@@ -1173,7 +1178,7 @@ def build_run_throughput_test(abi,
                              phone_data_dir,
                              strip="always",
                              input_file_name="model_input"):
-    print("* Build and run throughput_test")
+    six.print_("* Build and run throughput_test")
    model_tag_build_flag = ""
    if cpu_model_tag:
@@ -1260,7 +1265,7 @@ def build_run_throughput_test(abi,
        "--run_seconds=%s" % run_seconds,
        _fg=True)
-    print("throughput_test done!\n")
+    six.print_("throughput_test done!\n")
 ################################