Merge branch 'pycodestyle' into 'master'

Enable python style check See merge request !361

Merge branch 'pycodestyle' into 'master'
Enable python style check See merge request !361
58f2516e · 叶剑武 · e54825c5 · 6da30d22 · 58f2516e · 58f2516e
22 changed file
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
 stages:
  - cpplint
+  - pycodestyle
  - ops_test
  - ops_benchmark
@@ -7,7 +8,12 @@ cpplint:
  stage: cpplint
  script:
    - curl -o cpplint.py https://raw.githubusercontent.com/google/styleguide/gh-pages/cpplint/cpplint.py
-    - python cpplint.py --linelength=80 --counting=detailed $(find mace -name *.h -or -name *.cc)
+    - python cpplint.py --linelength=80 --counting=detailed $(find mace -name "*.h" -or -name "*.cc")
+pycodestyle:
+  stage: pycodestyle
+  script:
+    - pycodestyle $(find -name "*.py")
 ops_test:
  stage: ops_test

--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -113,7 +113,8 @@ RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com
    scipy \
    jinja2 \
    pyyaml \
-    sh 
+    sh \
+    pycodestyle
 # Download tensorflow tools
 RUN wget http://cnbj1-inner-fds.api.xiaomi.net/mace/tool/transform_graph && \

--- a/mace/python/tools/binary_codegen.py
+++ b/mace/python/tools/binary_codegen.py
@@ -27,28 +27,30 @@ def generate_cpp_source():
        print "Generate binary from", binary_path
        idx = 0
-    size, = struct.unpack("Q", binary_array[idx:idx+8])
+        size, = struct.unpack("Q", binary_array[idx:idx + 8])
        idx += 8
        for _ in xrange(size):
-      key_size, = struct.unpack("i", binary_array[idx:idx+4])
+            key_size, = struct.unpack("i", binary_array[idx:idx + 4])
            idx += 4
-      key, = struct.unpack(str(key_size) + "s", binary_array[idx:idx+key_size])
+            key, = struct.unpack(
+                str(key_size) + "s", binary_array[idx:idx + key_size])
            idx += key_size
-      params_size, = struct.unpack("i", binary_array[idx:idx+4])
+            params_size, = struct.unpack("i", binary_array[idx:idx + 4])
            idx += 4
            data_map[key] = []
            count = params_size / 4
-      params = struct.unpack(str(count) + "i", binary_array[idx:idx+params_size])
+            params = struct.unpack(
+                str(count) + "i", binary_array[idx:idx + params_size])
            for i in params:
                data_map[key].append(i)
            idx += params_size
    env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0]))
    return env.get_template('str2vec_maps.cc.jinja2').render(
-    maps = data_map,
+        maps=data_map,
-    data_type = 'unsigned int',
+        data_type='unsigned int',
-    variable_name = FLAGS.variable_name
+        variable_name=FLAGS.variable_name)
-  )
 def main(unused_args):
    cpp_binary_source = generate_cpp_source()
@@ -58,14 +60,12 @@ def main(unused_args):
    w_file.write(cpp_binary_source)
    w_file.close()
 def parse_args():
    """Parses command line arguments."""
    parser = argparse.ArgumentParser()
    parser.add_argument(
-      "--binary_dirs",
+        "--binary_dirs", type=str, default="", help="The binaries file path.")
-      type=str,
-      default="",
-      help="The binaries file path.")
    parser.add_argument(
        "--binary_file_name",
        type=str,
@@ -75,7 +75,8 @@ def parse_args():
        "--output_path",
        type=str,
        default="",
-      help="The path of generated C++ source file which contains the binary.")
+        help="The path of generated C++ source file which contains the binary."
+    )
    parser.add_argument(
        "--variable_name",
        type=str,

--- a/mace/python/tools/caffe_converter_lib.py
+++ b/mace/python/tools/caffe_converter_lib.py
--- a/mace/python/tools/convert_util.py
+++ b/mace/python/tools/convert_util.py
@@ -26,4 +26,3 @@ def tf_dtype_2_mace_dtype(tf_dtype):
    if not mace_dtype:
        raise Exception("Not supported tensorflow dtype: " + tf_dtype)
    return mace_dtype
--- a/mace/python/tools/converter.py
+++ b/mace/python/tools/converter.py
@@ -4,10 +4,14 @@ import hashlib
 import os.path
 from mace.python.tools import source_converter_lib
-# ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb --output quantized_test_dsp.pb --runtime dsp --input_dim input_node,1,28,28,3
+# ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb \
+#                                            --output quantized_test_dsp.pb \
+#                                            --runtime dsp \
+#                                            --input_dim input_node,1,28,28,3
 FLAGS = None
 def file_checksum(fname):
    hash_func = hashlib.sha256()
    with open(fname, "rb") as f:
@@ -15,6 +19,7 @@ def file_checksum(fname):
            hash_func.update(chunk)
    return hash_func.hexdigest()
 def main(unused_args):
    if not os.path.isfile(FLAGS.model_file):
        print("Input graph file '" + FLAGS.model_file + "' does not exist!")
@@ -22,17 +27,21 @@ def main(unused_args):
    model_checksum = file_checksum(FLAGS.model_file)
    if FLAGS.model_checksum != "" and FLAGS.model_checksum != model_checksum:
-    print("Model checksum mismatch: %s != %s" % (model_checksum, FLAGS.model_checksum))
+        print("Model checksum mismatch: %s != %s" % (model_checksum,
+                                                     FLAGS.model_checksum))
        sys.exit(-1)
    if FLAGS.platform == 'caffe':
        if not os.path.isfile(FLAGS.weight_file):
-      print("Input weight file '" + FLAGS.weight_file + "' does not exist!")
+            print("Input weight file '" + FLAGS.weight_file +
+                  "' does not exist!")
            sys.exit(-1)
        weight_checksum = file_checksum(FLAGS.weight_file)
-    if FLAGS.weight_checksum != "" and FLAGS.weight_checksum != weight_checksum:
+        if FLAGS.weight_checksum != "" and \
-      print("Weight checksum mismatch: %s != %s" % (weight_checksum, FLAGS.weight_checksum))
+                FLAGS.weight_checksum != weight_checksum:
+            print("Weight checksum mismatch: %s != %s" %
+                  (weight_checksum, FLAGS.weight_checksum))
            sys.exit(-1)
        if FLAGS.runtime == 'dsp':
@@ -41,22 +50,27 @@ def main(unused_args):
        from mace.python.tools import caffe_converter_lib
        output_graph_def = caffe_converter_lib.convert_to_mace_pb(
-      FLAGS.model_file, FLAGS.weight_file, FLAGS.input_node, FLAGS.input_shape, FLAGS.output_node,
+            FLAGS.model_file, FLAGS.weight_file, FLAGS.input_node,
-      FLAGS.data_type, FLAGS.runtime, FLAGS.winograd)
+            FLAGS.input_shape, FLAGS.output_node, FLAGS.data_type,
+            FLAGS.runtime, FLAGS.winograd)
    elif FLAGS.platform == 'tensorflow':
        if FLAGS.runtime == 'dsp':
            from mace.python.tools import tf_dsp_converter_lib
            output_graph_def = tf_dsp_converter_lib.convert_to_mace_pb(
-        FLAGS.model_file, FLAGS.input_node, FLAGS.output_node, FLAGS.dsp_mode)
+                FLAGS.model_file, FLAGS.input_node, FLAGS.output_node,
+                FLAGS.dsp_mode)
        else:
            from mace.python.tools import tf_converter_lib
            output_graph_def = tf_converter_lib.convert_to_mace_pb(
-        FLAGS.model_file, FLAGS.input_node, FLAGS.input_shape, FLAGS.output_node,
+                FLAGS.model_file, FLAGS.input_node, FLAGS.input_shape,
-        FLAGS.data_type, FLAGS.runtime, FLAGS.winograd)
+                FLAGS.output_node, FLAGS.data_type, FLAGS.runtime,
+                FLAGS.winograd)
    if FLAGS.output_type == 'source':
-    source_converter_lib.convert_to_source(output_graph_def, model_checksum, FLAGS.template, FLAGS.obfuscate,
+        source_converter_lib.convert_to_source(
-      FLAGS.model_tag, FLAGS.output, FLAGS.runtime, FLAGS.embed_model_data)
+            output_graph_def, model_checksum, FLAGS.template, FLAGS.obfuscate,
+            FLAGS.model_tag, FLAGS.output, FLAGS.runtime,
+            FLAGS.embed_model_data)
    else:
        with open(FLAGS.output, "wb") as f:
            f.write(output_graph_def.SerializeToString())
@@ -65,6 +79,7 @@ def main(unused_args):
            f.write(str(output_graph_def))
    print("Model conversion is completed.")
 def str2bool(v):
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
@@ -73,6 +88,7 @@ def str2bool(v):
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')
 def parse_args():
    """Parses command line arguments."""
    parser = argparse.ArgumentParser()
@@ -81,12 +97,10 @@ def parse_args():
        "--model_file",
        type=str,
        default="",
-    help="TensorFlow \'GraphDef\' file to load, Caffe prototxt file to load.")
+        help="TensorFlow \'GraphDef\' file to load, "
+        "Caffe prototxt file to load.")
    parser.add_argument(
-    "--weight_file",
+        "--weight_file", type=str, default="", help="Caffe data file to load.")
-    type=str,
-    default="",
-    help="Caffe data file to load.")
    parser.add_argument(
        "--model_checksum",
        type=str,
@@ -103,35 +117,23 @@ def parse_args():
        default="",
        help="File to save the output graph to.")
    parser.add_argument(
-    "--runtime",
+        "--runtime", type=str, default="cpu", help="Runtime: cpu/gpu/dsp")
-    type=str,
-    default="cpu",
-    help="Runtime: cpu/gpu/dsp")
    parser.add_argument(
        "--input_node",
        type=str,
        default="input_node",
        help="e.g., input_node")
    parser.add_argument(
-    "--output_node",
+        "--output_node", type=str, default="softmax", help="e.g., softmax")
-    type=str,
-    default="softmax",
-    help="e.g., softmax")
    parser.add_argument(
        "--data_type",
        type=str,
        default='DT_FLOAT',
        help="e.g., DT_HALF/DT_FLOAT")
    parser.add_argument(
-    "--output_type",
+        "--output_type", type=str, default="pb", help="output type: source/pb")
-    type=str,
-    default="pb",
-    help="output type: source/pb")
    parser.add_argument(
-    "--template",
+        "--template", type=str, default="", help="template path")
-    type=str,
-    default="",
-    help="template path")
    parser.add_argument(
        "--obfuscate",
        type=str2bool,
@@ -152,25 +154,13 @@ def parse_args():
        default=False,
        help="open winograd convolution or not")
    parser.add_argument(
-    "--dsp_mode",
+        "--dsp_mode", type=int, default=0, help="dsp run mode, defalut=0")
-    type=int,
-    default=0,
-    help="dsp run mode, defalut=0")
    parser.add_argument(
-    "--input_shape",
+        "--input_shape", type=str, default="", help="input shape.")
-    type=str,
-    default="",
-    help="input shape.")
    parser.add_argument(
-    "--platform",
+        "--platform", type=str, default="tensorflow", help="tensorflow/caffe")
-    type=str,
-    default="tensorflow",
-    help="tensorflow/caffe")
    parser.add_argument(
-    "--embed_model_data",
+        "--embed_model_data", type=str2bool, default=True, help="input shape.")
-    type=str2bool,
-    default=True,
-    help="input shape.")
    return parser.parse_known_args()

--- a/mace/python/tools/dsp_ops.py
+++ b/mace/python/tools/dsp_ops.py
 class DspOps(object):
    def __init__(self):
        self.dsp_ops = {
@@ -18,7 +17,7 @@ class DspOps(object):
            'QuantizedAvgPool': 'QuantizedAvgPool_8',
            'QuantizedConcat': 'QuantizedConcat_8',
            'QuantizedBiasAdd': 'QuantizedBiasAdd_8p8to32',
-      'QuantizedResizeBilinear' : 'QuantizedResizeBilinear_8',
+            'QuantizedResizeBilinear': 'QuantizedResizeBilinear_8',
            'QuantizedSpaceToBatchND': 'QuantizedSpaceToBatchND_8',
            'QuantizedBatchToSpaceND': 'QuantizedBatchToSpaceND_8',
            'QuantizedSoftmax': 'QuantizedSoftmax_8',
@@ -54,6 +53,7 @@ class DspOps(object):
            'Concat': 'Concat_f',
            'AddN': 'AddN_f',
        }
    def has_op(self, tf_op):
        return tf_op in self.dsp_ops
@@ -61,5 +61,3 @@ class DspOps(object):
        if tf_op not in self.dsp_ops:
            raise Exception('Could not map nn op for: ', tf_op)
        return self.dsp_ops[tf_op]
--- a/mace/python/tools/encrypt_opencl_codegen.py
+++ b/mace/python/tools/encrypt_opencl_codegen.py
@@ -11,10 +11,13 @@ FLAGS = None
 encrypt_lookup_table = "Xiaomi-AI-Platform-Mace"
 def encrypt_code(code_str):
    encrypted_arr = []
    for i in range(len(code_str)):
-    encrypted_char = hex(ord(code_str[i]) ^ ord(encrypt_lookup_table[i % len(encrypt_lookup_table)]))
+        encrypted_char = hex(
+            ord(code_str[i]) ^ ord(
+                encrypt_lookup_table[i % len(encrypt_lookup_table)]))
        encrypted_arr.append(encrypted_char)
    return encrypted_arr
@@ -45,7 +48,8 @@ def main(unused_args):
            encrypted_code_maps[file_name[:-3]] = encrypted_code_arr
    env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0]))
-  cpp_cl_encrypted_kernel = env.get_template('str2vec_maps.cc.jinja2').render(
+    cpp_cl_encrypted_kernel = env.get_template(
+        'str2vec_maps.cc.jinja2').render(
            maps=encrypted_code_maps,
            data_type='unsigned char',
            variable_name='kEncryptedProgramMap')

--- a/mace/python/tools/graph_util.py
+++ b/mace/python/tools/graph_util.py
@@ -2,18 +2,21 @@ import tensorflow as tf
 from mace.proto import mace_pb2
 from collections import OrderedDict
 def sort_tf_node(node, nodes_map, ordered_nodes_map):
    if node.name not in ordered_nodes_map:
        for input_tensor_name in node.input:
            input_node_name = input_tensor_name.split(':')[
                0] if ':' in input_tensor_name else input_tensor_name
-            if input_node_name not in nodes_map or input_node_name in ordered_nodes_map:
+            if input_node_name not in nodes_map or \
+                    input_node_name in ordered_nodes_map:
                continue
            input_node = nodes_map[input_node_name]
            sort_tf_node(input_node, nodes_map, ordered_nodes_map)
        ordered_nodes_map[node.name] = node
 def sort_tf_graph(graph_def):
    nodes_map = {}
    ordered_nodes_map = OrderedDict()
@@ -31,13 +34,15 @@ def sort_mace_node(node, nodes_map, ordered_nodes_map):
        for input_tensor_name in node.input:
            input_node_name = input_tensor_name.split(':')[
                0] if ':' in input_tensor_name else input_tensor_name
-            if input_node_name not in nodes_map or input_node_name in ordered_nodes_map:
+            if input_node_name not in nodes_map or \
+                    input_node_name in ordered_nodes_map:
                continue
            input_node = nodes_map[input_node_name]
            sort_mace_node(input_node, nodes_map, ordered_nodes_map)
        ordered_nodes_map[node.name] = node
 def sort_mace_graph(graph_def, output_name):
    nodes_map = {}
    ordered_nodes_map = OrderedDict()

--- a/mace/python/tools/memory_optimizer.py
+++ b/mace/python/tools/memory_optimizer.py
@@ -2,6 +2,7 @@ import sys
 import operator
 from mace.proto import mace_pb2
 class MemoryOptimizer(object):
    def __init__(self, net_def):
        self.net_def = net_def
@@ -37,9 +38,9 @@ class MemoryOptimizer(object):
        mem_size = [0, 0]
        if op_type == 'WinogradTransform' or op_type == 'MatMul':
            mem_size[0] = output_shape[2] * output_shape[3]
-      mem_size[1] = output_shape[0] * int((output_shape[1]+3)/4)
+            mem_size[1] = output_shape[0] * int((output_shape[1] + 3) / 4)
        else:
-      mem_size[0] = output_shape[2] * int((output_shape[3]+3)/4)
+            mem_size[0] = output_shape[2] * int((output_shape[3] + 3) / 4)
            mem_size[1] = output_shape[0] * output_shape[1]
        return mem_size
@@ -51,13 +52,16 @@ class MemoryOptimizer(object):
            if self.is_buffer_image_op(op):
                continue
            if not op.output_shape:
-        print('WARNING: There is no output shape information to do memory optimization.')
+                print('WARNING: There is no output shape information to '
+                      'do memory optimization.')
                return
            if len(op.output_shape) != len(op.output):
-        print('WARNING: the number of output shape is not equal to the number of output.')
+                print('WARNING: the number of output shape is not equal to '
+                      'the number of output.')
                return
            for i in range(len(op.output)):
-        op_mem_size = self.get_mem_size(op.type, op.output_shape[i].dims)
+                op_mem_size = self.get_mem_size(op.type,
+                                                op.output_shape[i].dims)
                mem_id = -1
                if len(self.idle_mem) > 0:
                    best_mem_candidate_id = -1
@@ -65,16 +69,22 @@ class MemoryOptimizer(object):
                    best_mem_candidate_shape = []
                    for mid in self.idle_mem:
                        reuse_mem_size = self.mem_block[mid]
-            resize_mem_size = [max(reuse_mem_size[0], op_mem_size[0]), max(reuse_mem_size[1], op_mem_size[1])]
+                        resize_mem_size = [
-            delta_mem_area = self.mem_area(resize_mem_size) - self.mem_area(reuse_mem_size)
+                            max(reuse_mem_size[0], op_mem_size[0]),
+                            max(reuse_mem_size[1], op_mem_size[1])
+                        ]
+                        delta_mem_area = self.mem_area(
+                            resize_mem_size) - self.mem_area(reuse_mem_size)
                        if delta_mem_area < best_mem_candidate_delta_area:
                            best_mem_candidate_id = mid
                            best_mem_candidate_delta_area = delta_mem_area
                            best_mem_candidate_shape = resize_mem_size
-          if best_mem_candidate_delta_area <= self.mem_area(op_mem_size):
+                    if best_mem_candidate_delta_area <= self.mem_area(
+                            op_mem_size):
                        # reuse
-            self.mem_block[best_mem_candidate_id] = best_mem_candidate_shape
+                        self.mem_block[
+                            best_mem_candidate_id] = best_mem_candidate_shape
                        mem_id = best_mem_candidate_id
                        self.idle_mem.remove(mem_id)
@@ -113,7 +123,8 @@ class MemoryOptimizer(object):
            print mem, self.mem_block[mem]
            optimized_mem_size += reduce(operator.mul, self.mem_block[mem], 4)
-    print('origin mem: %d, optimized mem: %d', origin_mem_size, optimized_mem_size)
+        print('origin mem: %d, optimized mem: %d', origin_mem_size,
+              optimized_mem_size)
 def optimize_memory(net_def):

--- a/mace/python/tools/opencl_codegen.py
+++ b/mace/python/tools/opencl_codegen.py
@@ -27,37 +27,40 @@ def generate_cpp_source():
            binary_array = np.fromfile(f, dtype=np.uint8)
        idx = 0
-    size, = struct.unpack("Q", binary_array[idx:idx+8])
+        size, = struct.unpack("Q", binary_array[idx:idx + 8])
        idx += 8
        for _ in xrange(size):
-      key_size, = struct.unpack("i", binary_array[idx:idx+4])
+            key_size, = struct.unpack("i", binary_array[idx:idx + 4])
            idx += 4
-      key, = struct.unpack(str(key_size) + "s", binary_array[idx:idx+key_size])
+            key, = struct.unpack(
+                str(key_size) + "s", binary_array[idx:idx + key_size])
            idx += key_size
-      value_size, = struct.unpack("i", binary_array[idx:idx+4])
+            value_size, = struct.unpack("i", binary_array[idx:idx + 4])
            idx += 4
            maps[key] = []
-      value = struct.unpack(str(value_size) + "B",
+            value = struct.unpack(
-                            binary_array[idx:idx+value_size])
+                str(value_size) + "B", binary_array[idx:idx + value_size])
            idx += value_size
            for ele in value:
                maps[key].append(hex(ele))
-    cl_platform_info_path = os.path.join(binary_dir, FLAGS.platform_info_file_name)
+        cl_platform_info_path = os.path.join(binary_dir,
+                                             FLAGS.platform_info_file_name)
        with open(cl_platform_info_path, 'r') as f:
            curr_platform_info = f.read()
        if platform_info != "":
-      assert(curr_platform_info == platform_info)
+            assert (curr_platform_info == platform_info)
        platform_info = curr_platform_info
    env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0]))
    return env.get_template('opencl_compiled_kernel.cc.jinja2').render(
-    maps = maps,
+        maps=maps,
-    data_type = 'unsigned char',
+        data_type='unsigned char',
-    variable_name = 'kCompiledProgramMap',
+        variable_name='kCompiledProgramMap',
-    platform_info = platform_info,
+        platform_info=platform_info,
    )
 def main(unused_args):
    cpp_cl_binary_source = generate_cpp_source()
@@ -90,7 +93,7 @@ def parse_args():
        "--output_path",
        type=str,
        default="./mace/examples/codegen/opencl/opencl_compiled_program.cc",
-      help="The path of generated C++ header file which contains cl binaries.")
+        help="The path of generated C++ header file for cl binaries.")
    return parser.parse_known_args()

--- a/mace/python/tools/source_converter_lib.py
+++ b/mace/python/tools/source_converter_lib.py
@@ -6,9 +6,9 @@ import hashlib
 from mace.proto import mace_pb2
 from jinja2 import Environment, FileSystemLoader
 GENERATED_NAME = set()
 def generate_obfuscated_name(namespace, name):
    md5 = hashlib.md5()
    md5.update(namespace)
@@ -22,31 +22,36 @@ def generate_obfuscated_name(namespace, name):
    GENERATED_NAME.add(name)
    return name
 def generate_tensor_map(tensors):
    tensor_map = {}
    for t in tensors:
-    if not tensor_map.has_key(t.name):
+        if t.name not in tensor_map:
            tensor_map[t.name] = generate_obfuscated_name("tensor", t.name)
    return tensor_map
 def generate_in_out_map(ops, tensor_map):
    in_out_map = {}
    for op in ops:
        op.name = generate_obfuscated_name("op", op.name)
        for input_name in op.input:
-        if not in_out_map.has_key(input_name):
+            if input_name not in in_out_map:
-          if tensor_map.has_key(input_name):
+                if input_name in tensor_map:
                    in_out_map[input_name] = tensor_map[input_name]
                else:
-            in_out_map[input_name] = generate_obfuscated_name("in", input_name)
+                    in_out_map[input_name] = generate_obfuscated_name(
+                        "in", input_name)
        for output_name in op.output:
-      if not in_out_map.has_key(output_name):
+            if output_name not in in_out_map:
-        if tensor_map.has_key(output_name):
+                if output_name in tensor_map:
                    in_out_map[output_name] = tensor_map[output_name]
                else:
-          in_out_map[output_name] = generate_obfuscated_name("out", output_name)
+                    in_out_map[output_name] = generate_obfuscated_name(
+                        "out", output_name)
    return in_out_map
 def obfuscate_name(net_def):
    input_node = "mace_input_node"
    output_node = "mace_output_node"
@@ -63,20 +68,22 @@ def obfuscate_name(net_def):
            if output_node not in op.output[i]:
                op.output[i] = in_out_map[op.output[i]]
 def rename_tensor(net_def):
    tensor_map = {}
    for t in net_def.tensors:
-    if not tensor_map.has_key(t.name):
+        if t.name not in tensor_map:
            tensor_map[t.name] = "_" + t.name[:-2].replace("/", "_")
            t.name = tensor_map[t.name]
    for op in net_def.op:
        for i in range(len(op.input)):
-      if tensor_map.has_key(op.input[i]):
+            if op.input[i] in tensor_map:
                op.input[i] = tensor_map[op.input[i]]
        for i in range(len(op.output)):
-      if tensor_map.has_key(op.output[i]):
+            if op.output[i] in tensor_map:
                op.output[i] = tensor_map[op.output[i]]
 class TensorInfo:
    def __init__(self, id, t, runtime):
        self.id = id
@@ -84,19 +91,26 @@ class TensorInfo:
        if t.data_type == mace_pb2.DT_FLOAT:
            if runtime == 'gpu':
                self.data_type = mace_pb2.DT_HALF
-        self.data = bytearray(np.array(t.float_data).astype(np.float16).tobytes())
+                self.data = bytearray(
+                    np.array(t.float_data).astype(np.float16).tobytes())
            else:
                self.data_type = mace_pb2.DT_FLOAT
-        self.data = bytearray(np.array(t.float_data).astype(np.float32).tobytes())
+                self.data = bytearray(
+                    np.array(t.float_data).astype(np.float32).tobytes())
        elif t.data_type == mace_pb2.DT_INT32:
-      self.data = bytearray(np.array(t.int32_data).astype(np.int32).tobytes())
+            self.data = bytearray(
+                np.array(t.int32_data).astype(np.int32).tobytes())
        elif t.data_type == mace_pb2.DT_UINT8:
-      self.data = bytearray(np.array(t.int32_data).astype(np.uint8).tolist())
+            self.data = bytearray(
+                np.array(t.int32_data).astype(np.uint8).tolist())
 def stringfy(value):
    return ', '.join('"{0}"'.format(w) for w in value)
-def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_tag, output, runtime, embed_model_data):
+def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate,
+                      model_tag, output, runtime, embed_model_data):
    if obfuscate:
        obfuscate_name(net_def)
    else:
@@ -106,7 +120,8 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
    print template_dir
    # Create the jinja2 environment.
-  j2_env = Environment(loader=FileSystemLoader(template_dir), trim_blocks=True)
+    j2_env = Environment(
+        loader=FileSystemLoader(template_dir), trim_blocks=True)
    j2_env.filters['stringfy'] = stringfy
    output_dir = os.path.dirname(output) + '/'
    # generate tensor source files
@@ -122,11 +137,11 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
            model_data.extend(bytearray([0] * padding))
            offset += padding
        source = j2_env.get_template(template_name).render(
-      tensor_info = tensor_info,
+            tensor_info=tensor_info,
-      tensor = t,
+            tensor=t,
-      tag = model_tag,
+            tag=model_tag,
-      runtime = runtime,
+            runtime=runtime,
-      offset = offset,
+            offset=offset,
        )
        model_data.extend(tensor_info.data)
        offset += len(tensor_info.data)
@@ -137,11 +152,10 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
    # generate tensor data
    template_name = 'tensor_data.jinja2'
    source = j2_env.get_template(template_name).render(
-    tag = model_tag,
+        tag=model_tag,
-    embed_model_data = embed_model_data,
+        embed_model_data=embed_model_data,
-    model_data_size = offset,
+        model_data_size=offset,
-    model_data = model_data
+        model_data=model_data)
-  )
    with open(output_dir + 'tensor_data' + '.cc', "wb") as f:
        f.write(source)
    if not embed_model_data:
@@ -155,11 +169,11 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
    op_size = len(net_def.op)
    for start in range(0, op_size, 10):
        source = j2_env.get_template(template_name).render(
-      start = start,
+            start=start,
-      end = min(start+10, op_size),
+            end=min(start + 10, op_size),
-      net = net_def,
+            net=net_def,
-      tag = model_tag,
+            tag=model_tag,
-      runtime = runtime,
+            runtime=runtime,
        )
        with open(output_dir + 'op' + str(counter) + '.cc', "wb") as f:
            f.write(source)
@@ -167,21 +181,21 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
    # generate model source files
    template_name = 'model.jinja2'
-  tensors = [TensorInfo(i, net_def.tensors[i], runtime) for i in range(len(net_def.tensors))]
+    tensors = [
+        TensorInfo(i, net_def.tensors[i], runtime)
+        for i in range(len(net_def.tensors))
+    ]
    source = j2_env.get_template(template_name).render(
-    tensors = tensors,
+        tensors=tensors,
-    net = net_def,
+        net=net_def,
-    tag = model_tag,
+        tag=model_tag,
-    runtime = runtime,
+        runtime=runtime,
-    model_pb_checksum = mode_pb_checksum
+        model_pb_checksum=mode_pb_checksum)
-  )
    with open(output, "wb") as f:
        f.write(source)
    # generate model header file
    template_name = 'model_header.jinja2'
-  source = j2_env.get_template(template_name).render(
+    source = j2_env.get_template(template_name).render(tag=model_tag, )
-    tag = model_tag,
-  )
    with open(output_dir + model_tag + '.h', "wb") as f:
        f.write(source)
--- a/mace/python/tools/tf_converter_lib.py
+++ b/mace/python/tools/tf_converter_lib.py
--- a/mace/python/tools/tf_dsp_converter_lib.py
+++ b/mace/python/tools/tf_dsp_converter_lib.py
--- a/mace/python/tools/tf_ops_stats.py
+++ b/mace/python/tools/tf_ops_stats.py
@@ -10,18 +10,21 @@ from tensorflow import gfile
 FLAGS = None
 def hist_inc(hist, key):
    if key in hist:
        hist[key] += 1
    else:
        hist[key] = 1
 def to_int_list(long_list):
    int_list = []
    for value in long_list:
        int_list.append(int(value))
    return int_list
 def main(unused_args):
    if not FLAGS.input or not gfile.Exists(FLAGS.input):
        print('Input graph file ' + FLAGS.input + ' does not exist!')
@@ -49,7 +52,9 @@ def main(unused_args):
                    tensor = output.eval()
                    tensor_shape = list(tensor.shape)
                    tensor_shapes[tensor_name] = tensor_shape
-          print("Const %s: %s, %d" % (tensor_name, tensor_shape, functools.reduce(operator.mul, tensor_shape, 1)))
+                    print("Const %s: %s, %d" %
+                          (tensor_name, tensor_shape,
+                           functools.reduce(operator.mul, tensor_shape, 1)))
                    if len(tensor_shape) == 1 and tensor_shape[0] < 10:
                        tensor_values[tensor_name] = list(tensor)
@@ -65,11 +70,16 @@ def main(unused_args):
                    if input_name.endswith('weights/read:0'):
                        ksize = input.shape.as_list()
                        break
-          if input_name.endswith('weights:0') and input_name in tensor_shapes:
+                    if input_name.endswith(
+                            'weights:0') and input_name in tensor_shapes:
                        ksize = tensor_shapes[input_name]
                        break
-        print('%s(padding=%s, strides=%s, ksize=%s, format=%s) %s => %s' % (op.type, padding, strides, ksize, data_format, op.inputs[0].shape, op.outputs[0].shape))
+                print(
-        key = '%s(padding=%s, strides=%s, ksize=%s, format=%s)' % (op.type, padding, strides, ksize, data_format)
+                    '%s(padding=%s, strides=%s, ksize=%s, format=%s) %s => %s'
+                    % (op.type, padding, strides, ksize, data_format,
+                       op.inputs[0].shape, op.outputs[0].shape))
+                key = '%s(padding=%s, strides=%s, ksize=%s, format=%s)' % (
+                    op.type, padding, strides, ksize, data_format)
                hist_inc(stats, key)
            elif op.type in ['FusedResizeAndPadConv2D']:
                padding = op.get_attr('padding')
@@ -78,20 +88,25 @@ def main(unused_args):
                ksize = 'Unknown'
                for input in op.inputs:
                    input_name = input.name
-          if input_name.endswith('weights:0') and input_name in tensor_shapes:
+                    if input_name.endswith(
+                            'weights:0') and input_name in tensor_shapes:
                        ksize = tensor_shapes[input_name]
                        break
-        key = '%s(padding=%s, strides=%s, ksize=%s, resize_align_corners=%s)' % (op.type, padding, strides, ksize, resize_align_corners)
+                key = '%s(padding=%s, strides=%s, ksize=%s, ' \
+                    'resize_align_corners=%s)' % (op.type, padding, strides,
+                                                  ksize, resize_align_corners)
                hist_inc(stats, key)
            elif op.type in ['ResizeBilinear']:
                align_corners = op.get_attr('align_corners')
                size = 'Unknown'
                for input in op.inputs:
                    input_name = input.name
-          if input_name.endswith('size:0') and input_name in tensor_values:
+                    if input_name.endswith(
+                            'size:0') and input_name in tensor_values:
                        size = tensor_values[input_name]
                        break
-        key = '%s(size=%s, align_corners=%s)' % (op.type, size, align_corners)
+                key = '%s(size=%s, align_corners=%s)' % (op.type, size,
+                                                         align_corners)
                print(key)
                hist_inc(stats, key)
            elif op.type in ['AvgPool', 'MaxPool']:
@@ -99,38 +114,47 @@ def main(unused_args):
                strides = to_int_list(op.get_attr('strides'))
                ksize = to_int_list(op.get_attr('ksize'))
                data_format = op.get_attr('data_format')
-        key = '%s(padding=%s, strides=%s, ksize=%s)' % (op.type, padding, strides, ksize)
+                key = '%s(padding=%s, strides=%s, ksize=%s)' % (op.type,
+                                                                padding,
+                                                                strides, ksize)
                hist_inc(stats, key)
            elif op.type in ['SpaceToBatchND', 'BatchToSpaceND']:
                block_shape = 'Unknown'
                for input in op.inputs:
                    input_name = input.name
-          if input_name.endswith('block_shape:0') and input_name in tensor_values:
+                    if input_name.endswith(
+                            'block_shape:0') and input_name in tensor_values:
                        block_shape = tensor_values[input_name]
                        break
                paddings = 'Unknown'
                for input in op.inputs:
                    input_name = input.name
-          if input_name.endswith('paddings:0') and input_name in tensor_values:
+                    if input_name.endswith(
+                            'paddings:0') and input_name in tensor_values:
                        paddings = tensor_values[input_name]
                        break
                crops = 'Unknown'
                for input in op.inputs:
                    input_name = input.name
-          if input_name.endswith('crops:0') and input_name in tensor_values:
+                    if input_name.endswith(
+                            'crops:0') and input_name in tensor_values:
                        paddings = tensor_values[input_name]
                        break
                if op.type == 'SpaceToBatchND':
-          key = '%s(block_shape=%s, paddings=%s)' % (op.type, block_shape, paddings)
+                    key = '%s(block_shape=%s, paddings=%s)' % (op.type,
+                                                               block_shape,
+                                                               paddings)
                else:
-          key = '%s(block_shape=%s, crops=%s)' % (op.type, block_shape, crops)
+                    key = '%s(block_shape=%s, crops=%s)' % (op.type,
+                                                            block_shape, crops)
                print(key)
                hist_inc(stats, key)
            elif op.type == 'Pad':
                paddings = 'Unknown'
                for input in op.inputs:
                    input_name = input.name
-          if input_name.endswith('paddings:0') and input_name in tensor_values:
+                    if input_name.endswith(
+                            'paddings:0') and input_name in tensor_values:
                        paddings = tensor_values[input_name]
                        break
                key = '%s(paddings=%s)' % (op.type, paddings)
@@ -142,6 +166,7 @@ def main(unused_args):
    for key, value in sorted(six.iteritems(stats)):
        print('%s: %d' % (key, value))
 def parse_args():
    '''Parses command line arguments.'''
    parser = argparse.ArgumentParser()
@@ -152,6 +177,7 @@ def parse_args():
        help='TensorFlow \'GraphDef\' file to load.')
    return parser.parse_known_args()
 if __name__ == '__main__':
    FLAGS, unparsed = parse_args()
    main(unused_args=[sys.argv[0]] + unparsed)
--- a/tools/bazel_adb_run.py
+++ b/tools/bazel_adb_run.py
@@ -7,7 +7,6 @@
 #     --target=//mace/ops:ops_test
 #     --stdout_processor=stdout_processor
 import argparse
 import random
 import re
@@ -15,15 +14,18 @@ import sys
 import sh_commands
 def stdout_processor(stdout, device_properties, abi):
    pass
 def ops_test_stdout_processor(stdout, device_properties, abi):
    stdout_lines = stdout.split("\n")
    for line in stdout_lines:
        if "Aborted" in line or "FAILED" in line:
            raise Exception("Command failed")
 def ops_benchmark_stdout_processor(stdout, device_properties, abi):
    stdout_lines = stdout.split("\n")
    metrics = {}
@@ -33,17 +35,20 @@ def ops_benchmark_stdout_processor(stdout, device_properties, abi):
        line = line.strip()
        parts = line.split()
        if len(parts) == 5 and parts[0].startswith("BM_"):
-      metrics["%s.time_ms" % parts[0]] = str(float(parts[1])/1e6)
+            metrics["%s.time_ms" % parts[0]] = str(float(parts[1]) / 1e6)
            metrics["%s.input_mb_per_sec" % parts[0]] = parts[3]
            metrics["%s.gmacc_per_sec" % parts[0]] = parts[4]
    platform = device_properties["ro.board.platform"].replace(" ", "-")
    model = device_properties["ro.product.model"].replace(" ", "-")
-  tags = {"ro.board.platform": platform,
+    tags = {
+        "ro.board.platform": platform,
        "ro.product.model": model,
-          "abi": abi}
+        "abi": abi
-  sh_commands.falcon_push_metrics(metrics, tags=tags,
+    }
-                                  endpoint="mace_ops_benchmark")
+    sh_commands.falcon_push_metrics(
+        metrics, tags=tags, endpoint="mace_ops_benchmark")
 def parse_args():
    """Parses command line arguments."""
@@ -57,22 +62,16 @@ def parse_args():
        "--target_socs",
        type=str,
        default="all",
-      help="SoCs(ro.board.platform) to build, comma seperated list or all/random")
+        help="SoCs (ro.board.platform from getprop) to build, "
+        "comma seperated list or all/random")
    parser.add_argument(
-      "--target",
+        "--target", type=str, default="//...", help="Bazel target to build")
-      type=str,
-      default="//...",
-      help="Bazel target to build")
    parser.add_argument(
        "--run_target",
        type=bool,
        default=False,
        help="Whether to run the target")
-  parser.add_argument(
+    parser.add_argument("--args", type=str, default="", help="Command args")
-      "--args",
-      type=str,
-      default="",
-      help="Command args")
    parser.add_argument(
        "--stdout_processor",
        type=str,
@@ -80,6 +79,7 @@ def parse_args():
        help="Stdout processing function, default: stdout_processor")
    return parser.parse_known_args()
 def main(unused_args):
    target_socs = None
    if FLAGS.target_socs != "all" and FLAGS.target_socs != "random":
@@ -101,17 +101,25 @@ def main(unused_args):
        sh_commands.bazel_build(target, abi=target_abi)
        if FLAGS.run_target:
            for serialno in target_devices:
-        if target_abi not in set(sh_commands.adb_supported_abis(serialno)):
+                if target_abi not in set(
-          print("Skip device %s which does not support ABI %s" % (serialno, target_abi))
+                        sh_commands.adb_supported_abis(serialno)):
+                    print("Skip device %s which does not support ABI %s" %
+                          (serialno, target_abi))
                    continue
-        stdouts = sh_commands.adb_run(serialno, host_bin_path, bin_name,
+                stdouts = sh_commands.adb_run(
+                    serialno,
+                    host_bin_path,
+                    bin_name,
                    args=FLAGS.args,
                    opencl_profiling=1,
                    vlog_level=0,
                    device_bin_path="/data/local/tmp/mace",
                    out_of_range_check=1)
-        device_properties = sh_commands.adb_getprop_by_serialno(serialno)
+                device_properties = sh_commands.adb_getprop_by_serialno(
-        globals()[FLAGS.stdout_processor](stdouts, device_properties, target_abi)
+                    serialno)
+                globals()[FLAGS.stdout_processor](stdouts, device_properties,
+                                                  target_abi)
 if __name__ == "__main__":
    FLAGS, unparsed = parse_args()

--- a/tools/falcon_cli.py
+++ b/tools/falcon_cli.py
-#-*- coding:utf8 -*-
+import json
+import socket
+import itertools
-import json, socket, itertools
 class FalconCli(object):
    def __init__(self, addr, debug=True, buf_size=1000):
        self.socket_ = socket.create_connection(addr)
        self.stream = self.socket_.makefile()
@@ -16,16 +16,19 @@ class FalconCli(object):
        self.stream.close()
    @classmethod
-    def connect(cls, server="transfer.falcon.miliao.srv", port=8433, debug=True, buf_size=1000):
+    def connect(cls,
+                server="transfer.falcon.miliao.srv",
+                port=8433,
+                debug=True,
+                buf_size=1000):
        try:
            return FalconCli((server, port), debug, buf_size)
        except socket.error, exc:
-            print "error: connect to %s:%s error: %s" %(server, port, exc)
+            print "error: connect to %s:%s error: %s" % (server, port, exc)
    def call(self, name, *params):
-        request = dict(id=next(self.id_counter),
+        request = dict(
-                    params=list(params),
+            id=next(self.id_counter), params=list(params), method=name)
-                    method=name)
        payload = json.dumps(request).encode()
        if self.debug:
            print "--> req:", payload
@@ -49,7 +52,7 @@ class FalconCli(object):
        resp = []
        while True:
-            buf = lines[s:s+self.buf_size]
+            buf = lines[s:s + self.buf_size]
            s = s + self.buf_size
            if len(buf) == 0:
                break
@@ -57,4 +60,3 @@ class FalconCli(object):
            resp.append(r)
        return resp
--- a/tools/generate_data.py
+++ b/tools/generate_data.py
@@ -11,13 +11,16 @@ import re
 #        --input_file input_file
 #
 def generate_data(name, shape):
    np.random.seed()
    data = np.random.random(shape) * 2 - 1
-  input_file_name = FLAGS.input_file + "_" + re.sub('[^0-9a-zA-Z]+', '_', name)
+    input_file_name = FLAGS.input_file + "_" + re.sub('[^0-9a-zA-Z]+', '_',
+                                                      name)
    print 'Generate input file: ', input_file_name
    data.astype(np.float32).tofile(input_file_name)
 def main(unused_args):
    input_names = [name for name in FLAGS.input_node.split(',')]
    input_shapes = [shape for shape in FLAGS.input_shape.split(':')]
@@ -27,29 +30,21 @@ def main(unused_args):
        generate_data(input_names[i], shape)
    print "Generate input file done."
 def parse_args():
    """Parses command line arguments."""
    parser = argparse.ArgumentParser()
    parser.register("type", "bool", lambda v: v.lower() == "true")
    parser.add_argument(
-    "--input_file",
+        "--input_file", type=str, default="", help="input file.")
-    type=str,
-    default="",
-    help="input file.")
    parser.add_argument(
-    "--input_node",
+        "--input_node", type=str, default="input_node", help="input node")
-    type=str,
-    default="input_node",
-    help="input node")
    parser.add_argument(
-    "--input_shape",
+        "--input_shape", type=str, default="1,64,64,3", help="input shape.")
-    type=str,
-    default="1,64,64,3",
-    help="input shape.")
    return parser.parse_known_args()
 if __name__ == '__main__':
    FLAGS, unparsed = parse_args()
    main(unused_args=[sys.argv[0]] + unparsed)
--- a/tools/mace_tools.py
+++ b/tools/mace_tools.py
@@ -34,7 +34,8 @@ def run_command(command):
        print("Stderr msg:\n{}".format(err))
    if result.returncode != 0:
-    raise Exception("Exit not 0 from bash with code: {}, command: {}".format(
+        raise Exception(
+            "Exit not 0 from bash with code: {}, command: {}".format(
                result.returncode, command))
@@ -63,10 +64,12 @@ def generate_version_code():
    command = "bash tools/generate_version_code.sh"
    run_command(command)
 def generate_opencl_source_code():
    command = "bash tools/generate_opencl_code.sh source"
    run_command(command)
 def generate_opencl_binay_code(target_soc, model_output_dirs, pull_or_not):
    cl_bin_dirs = []
    for d in model_output_dirs:
@@ -79,6 +82,7 @@ def generate_opencl_binay_code(target_soc, model_output_dirs, pull_or_not):
            'binary', target_soc, cl_bin_dirs_str, int(pull_or_not))
    run_command(command)
 def generate_tuning_param_code(target_soc, model_output_dirs, pull_or_not):
    cl_bin_dirs = []
    for d in model_output_dirs:
@@ -91,20 +95,24 @@ def generate_tuning_param_code(target_soc, model_output_dirs, pull_or_not):
            target_soc, cl_bin_dirs_str, int(pull_or_not))
    run_command(command)
 def generate_code(target_soc, model_output_dirs, pull_or_not):
    generate_opencl_binay_code(target_soc, model_output_dirs, pull_or_not)
    generate_tuning_param_code(target_soc, model_output_dirs, pull_or_not)
 def clear_env(target_soc):
    command = "bash tools/clear_env.sh {}".format(target_soc)
    run_command(command)
 def input_file_name(input_name):
    return os.environ['INPUT_FILE_NAME'] + '_' + \
           re.sub('[^0-9a-zA-Z]+', '_', input_name)
-def generate_random_input(target_soc, model_output_dir,
-                          input_names, input_files):
+def generate_random_input(target_soc, model_output_dir, input_names,
+                          input_files):
    generate_data_or_not = True
    command = "bash tools/validate_tools.sh {} {} {}".format(
        target_soc, model_output_dir, int(generate_data_or_not))
@@ -122,16 +130,19 @@ def generate_random_input(target_soc, model_output_dir,
        else:
            input_name_list.append(input_names)
        if len(input_file_list) != len(input_name_list):
-      raise Exception('If input_files set, the input files should match the input names.')
+            raise Exception('If input_files set, the input files should '
+                            'match the input names.')
        for i in range(len(input_file_list)):
            if input_file_list[i] is not None:
-        dst_input_file = model_output_dir + '/' + input_file_name(input_name_list[i])
+                dst_input_file = model_output_dir + '/' + input_file_name(
+                    input_name_list[i])
                if input_file_list[i].startswith("http://") or \
                        input_file_list[i].startswith("https://"):
                    urllib.urlretrieve(input_file_list[i], dst_input_file)
                else:
                    shutil.copy(input_file_list[i], dst_input_file)
 def generate_model_code():
    command = "bash tools/generate_model_code.sh"
    run_command(command)
@@ -155,10 +166,17 @@ def tuning_run(model_name,
    # TODO(yejianwu) refactoring the hackish code
    stdout_buff = []
    process_output = sh_commands.make_output_processor(stdout_buff)
-  p = sh.bash("tools/tuning_run.sh", target_soc, model_output_dir,
+    p = sh.bash(
-              running_round, int(tuning),
+        "tools/tuning_run.sh",
-              restart_round, option_args, _out=process_output,
+        target_soc,
-              _bg=True, _err_to_out=True)
+        model_output_dir,
+        running_round,
+        int(tuning),
+        restart_round,
+        option_args,
+        _out=process_output,
+        _bg=True,
+        _err_to_out=True)
    p.wait()
    metrics = {}
    for line in stdout_buff:
@@ -166,18 +184,23 @@ def tuning_run(model_name,
        parts = line.split()
        if len(parts) == 6 and parts[0].startswith("time"):
            metrics["%s.create_net_ms" % model_name] = str(float(parts[1]))
-      metrics["%s.mace_engine_ctor_ms" % model_name] = str(float(parts[2]))
+            metrics["%s.mace_engine_ctor_ms" % model_name] = str(
+                float(parts[2]))
            metrics["%s.init_ms" % model_name] = str(float(parts[3]))
            metrics["%s.warmup_ms" % model_name] = str(float(parts[4]))
            if float(parts[5]) > 0:
-        metrics["%s.avg_latency_ms" % model_name] = str(float(parts[5]))
+                metrics["%s.avg_latency_ms" % model_name] = str(
-  tags = {"ro.board.platform": target_soc,
+                    float(parts[5]))
+    tags = {
+        "ro.board.platform": target_soc,
        "abi": target_abi,
        # "runtime": target_runtime, # TODO(yejianwu) Add the actual runtime
        "round": running_round,  # TODO(yejianwu) change this to source/binary
-          "tuning": tuning}
+        "tuning": tuning
-  sh_commands.falcon_push_metrics(metrics, endpoint="mace_model_benchmark",
+    }
-                                  tags=tags)
+    sh_commands.falcon_push_metrics(
+        metrics, endpoint="mace_model_benchmark", tags=tags)
 def benchmark_model(target_soc, model_output_dir, option_args=''):
    command = "bash tools/benchmark.sh {} {} \"{}\"".format(
@@ -188,8 +211,8 @@ def benchmark_model(target_soc, model_output_dir, option_args=''):
 def run_model(model_name, target_runtime, target_abi, target_soc,
              model_output_dir, running_round, restart_round, option_args):
    tuning_run(model_name, target_runtime, target_abi, target_soc,
-             model_output_dir, running_round, False,
+               model_output_dir, running_round, False, restart_round,
-             restart_round, option_args)
+               option_args)
 def generate_production_code(target_soc, model_output_dirs, pull_or_not):
@@ -251,8 +274,8 @@ def merge_libs_and_tuning_results(target_soc, output_dir, model_output_dirs):
    build_production_code()
    model_output_dirs_str = ",".join(model_output_dirs)
-  command = "bash tools/merge_libs.sh {} {} {}".format(target_soc, output_dir,
+    command = "bash tools/merge_libs.sh {} {} {}".format(
-                                                       model_output_dirs_str)
+        target_soc, output_dir, model_output_dirs_str)
    run_command(command)
@@ -260,6 +283,7 @@ def packaging_lib_file(output_dir):
    command = "bash tools/packaging_lib.sh {}".format(output_dir)
    run_command(command)
 def download_model_files(model_file_path,
                         model_output_dir,
                         weight_file_path=""):
@@ -270,10 +294,9 @@ def download_model_files(model_file_path,
    if weight_file_path.startswith("http://") or \
            weight_file_path.startswith("https://"):
-    os.environ[
+        os.environ["WEIGHT_FILE_PATH"] = model_output_dir + "/model.caffemodel"
-      "WEIGHT_FILE_PATH"] = model_output_dir + "/model.caffemodel"
+        urllib.urlretrieve(weight_file_path, os.environ["WEIGHT_FILE_PATH"])
-    urllib.urlretrieve(weight_file_path,
-      os.environ["WEIGHT_FILE_PATH"])
 def md5sum(str):
    md5 = hashlib.md5()
@@ -306,7 +329,10 @@ def parse_args():
        default=10,
        help="The model throughput test running seconds.")
    parser.add_argument(
-      "--restart_round", type=int, default=1, help="The model restart round.")
+        "--restart_round",
+        type=int,
+        default=1,
+        help="The model restart round.")
    parser.add_argument(
        "--tuning", type="bool", default="true", help="Tune opencl params.")
    parser.add_argument(
@@ -321,14 +347,16 @@ def parse_args():
        help="SoCs to build, comma seperated list (getprop ro.board.platform)")
    return parser.parse_known_args()
 def set_environment(configs):
    os.environ["EMBED_MODEL_DATA"] = str(configs["embed_model_data"])
    os.environ["VLOG_LEVEL"] = str(configs["vlog_level"])
-  os.environ["PROJECT_NAME"] = os.path.splitext(os.path.basename(
+    os.environ["PROJECT_NAME"] = os.path.splitext(
-    FLAGS.config))[0]
+        os.path.basename(FLAGS.config))[0]
    os.environ['INPUT_FILE_NAME'] = "model_input"
    os.environ['OUTPUT_FILE_NAME'] = "model_out"
 def main(unused_args):
    configs = parse_model_configs()
@@ -343,13 +371,16 @@ def main(unused_args):
        if not os.path.exists(FLAGS.output_dir):
            os.makedirs(FLAGS.output_dir)
        elif os.path.exists(os.path.join(FLAGS.output_dir, "libmace")):
-      shutil.rmtree(os.path.join(FLAGS.output_dir, os.environ["PROJECT_NAME"]))
+            shutil.rmtree(
-      os.makedirs(os.path.join(FLAGS.output_dir, os.environ["PROJECT_NAME"]))
+                os.path.join(FLAGS.output_dir, os.environ["PROJECT_NAME"]))
+            os.makedirs(
+                os.path.join(FLAGS.output_dir, os.environ["PROJECT_NAME"]))
        generate_version_code()
        generate_opencl_source_code()
-  option_args = ' '.join([arg for arg in unused_args if arg.startswith('--')])
+    option_args = ' '.join(
+        [arg for arg in unused_args if arg.startswith('--')])
    available_socs = sh_commands.adb_get_all_socs()
    target_socs = available_socs
@@ -362,10 +393,10 @@ def main(unused_args):
        target_socs = target_socs & socs
        missing_socs = socs.difference(target_socs)
        if len(missing_socs) > 0:
-      print("Error: devices with SoCs are not connected %s" % missing_socs)
+            print(
+                "Error: devices with SoCs are not connected %s" % missing_socs)
            exit(1)
    for target_soc in target_socs:
        for target_abi in configs["target_abis"]:
            global_runtime = get_global_runtime(configs)
@@ -373,28 +404,27 @@ def main(unused_args):
            os.environ["TARGET_ABI"] = target_abi
            model_output_dirs = []
            for model_name in configs["models"]:
-        print '=======================', model_name, '======================='
+                print '===================', model_name, '==================='
                # Transfer params by environment
                os.environ["MODEL_TAG"] = model_name
                model_config = configs["models"][model_name]
-        input_file_list = model_config.get("validation_inputs_data", [])
+                input_file_list = model_config.get("validation_inputs_data",
+                                                   [])
                for key in model_config:
                    if key in ['input_nodes', 'output_nodes'] and isinstance(
                            model_config[key], list):
                        os.environ[key.upper()] = ",".join(model_config[key])
-          elif key in ['input_shapes', 'output_shapes'] and isinstance(
+                    elif key in ['input_shapes', 'output_shapes'
-              model_config[key], list):
+                                 ] and isinstance(model_config[key], list):
                        os.environ[key.upper()] = ":".join(model_config[key])
                    else:
                        os.environ[key.upper()] = str(model_config[key])
                # Create model build directory
                model_path_digest = md5sum(model_config["model_file_path"])
-        model_output_dir = "%s/%s/%s/%s/%s/%s/%s" % (FLAGS.output_dir,
+                model_output_dir = "%s/%s/%s/%s/%s/%s/%s" % (
-                                                     os.environ["PROJECT_NAME"],
+                    FLAGS.output_dir, os.environ["PROJECT_NAME"], "build",
-                                                     "build", model_name,
+                    model_name, model_path_digest, target_soc, target_abi)
-                                                     model_path_digest,
-                                                     target_soc, target_abi)
                model_output_dirs.append(model_output_dir)
                if FLAGS.mode == "build" or FLAGS.mode == "all":
@@ -404,22 +434,27 @@ def main(unused_args):
                    clear_env(target_soc)
                download_model_files(model_config["model_file_path"],
-          model_output_dir, model_config.get("weight_file_path", ""))
+                                     model_output_dir,
+                                     model_config.get("weight_file_path", ""))
-        if FLAGS.mode == "build" or FLAGS.mode == "run" or FLAGS.mode == "validate"\
+                if FLAGS.mode == "build" or FLAGS.mode == "run" or \
-            or FLAGS.mode == "benchmark" or FLAGS.mode == "all":
+                        FLAGS.mode == "validate" or \
+                        FLAGS.mode == "benchmark" or FLAGS.mode == "all":
                    generate_random_input(target_soc, model_output_dir,
-            model_config['input_nodes'], input_file_list)
+                                          model_config['input_nodes'],
+                                          input_file_list)
                if FLAGS.mode == "build" or FLAGS.mode == "all":
                    generate_model_code()
                    build_mace_run_prod(model_name, global_runtime, target_abi,
-                              target_soc, model_output_dir, FLAGS.tuning)
+                                        target_soc, model_output_dir,
+                                        FLAGS.tuning)
-        if FLAGS.mode == "run" or FLAGS.mode == "validate" or FLAGS.mode == "all":
+                if FLAGS.mode == "run" or FLAGS.mode == "validate" or \
-          run_model(model_name, global_runtime, target_abi, target_soc,
+                        FLAGS.mode == "all":
-                    model_output_dir, FLAGS.round, FLAGS.restart_round,
+                    run_model(model_name, global_runtime, target_abi,
-                    option_args)
+                              target_soc, model_output_dir, FLAGS.round,
+                              FLAGS.restart_round, option_args)
                if FLAGS.mode == "benchmark":
                    benchmark_model(target_soc, model_output_dir, option_args)
@@ -427,14 +462,18 @@ def main(unused_args):
                if FLAGS.mode == "validate" or FLAGS.mode == "all":
                    validate_model(target_soc, model_output_dir)
-      if FLAGS.mode == "build" or FLAGS.mode == "merge" or FLAGS.mode == "all":
+            if FLAGS.mode == "build" or FLAGS.mode == "merge" or \
+                    FLAGS.mode == "all":
                merge_libs_and_tuning_results(
-            target_soc, FLAGS.output_dir + "/" + os.environ["PROJECT_NAME"],
+                    target_soc,
+                    FLAGS.output_dir + "/" + os.environ["PROJECT_NAME"],
                    model_output_dirs)
            if FLAGS.mode == "throughput_test":
-        merged_lib_file = FLAGS.output_dir + "/%s/%s/libmace_%s.%s.a" % \
+                merged_lib_file = FLAGS.output_dir + \
-            (os.environ["PROJECT_NAME"], target_abi, os.environ["PROJECT_NAME"], target_soc)
+                        "/%s/%s/libmace_%s.%s.a" % \
+                        (os.environ["PROJECT_NAME"], target_abi,
+                         os.environ["PROJECT_NAME"], target_soc)
                generate_random_input(target_soc, FLAGS.output_dir, [], [])
                for model_name in configs["models"]:
                    runtime = configs["models"][model_name]["runtime"]
@@ -449,4 +488,3 @@ def main(unused_args):
 if __name__ == "__main__":
    FLAGS, unparsed = parse_args()
    main(unused_args=[sys.argv[0]] + unparsed)
--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py
@@ -3,18 +3,22 @@ import re
 import time
 import falcon_cli
 ################################
 # common
 ################################
 def strip_invalid_utf8(str):
    return sh.iconv(str, "-c", "-t", "UTF-8")
 def make_output_processor(buff):
    def process_output(line):
        print(line.strip())
        buff.append(line)
    return process_output
 ################################
 # adb commands
 ################################
@@ -23,11 +27,12 @@ def adb_split_stdout(stdout_str):
    # Filter out last empty line
    return [l.strip() for l in stdout_str.split('\n') if len(l.strip()) > 0]
 def adb_devices(target_socs=None):
    outputs = sh.grep(sh.adb("devices"), "^[A-Za-z0-9]\+[[:space:]]\+device$")
    raw_lists = sh.cut(outputs, "-f1")
    device_ids = adb_split_stdout(raw_lists)
-  if target_socs != None:
+    if target_socs is not None:
        target_socs_set = set(target_socs)
        target_devices = []
        for serialno in device_ids:
@@ -38,6 +43,7 @@ def adb_devices(target_socs=None):
    else:
        return device_ids
 def adb_getprop_by_serialno(serialno):
    outputs = sh.adb("-s", serialno, "shell", "getprop")
    raw_props = adb_split_stdout(outputs)
@@ -49,12 +55,14 @@ def adb_getprop_by_serialno(serialno):
            props[m.group(1)] = m.group(2)
    return props
 def adb_supported_abis(serialno):
    props = adb_getprop_by_serialno(serialno)
    abilist_str = props["ro.product.cpu.abilist"]
    abis = [abi.strip() for abi in abilist_str.split(',')]
    return abis
 def adb_get_all_socs():
    socs = []
    for d in adb_devices():
@@ -62,7 +70,10 @@ def adb_get_all_socs():
        socs.append(props["ro.board.platform"])
    return set(socs)
-def adb_run(serialno, host_bin_path, bin_name,
+def adb_run(serialno,
+            host_bin_path,
+            bin_name,
            args="",
            opencl_profiling=1,
            vlog_level=0,
@@ -71,7 +82,9 @@ def adb_run(serialno, host_bin_path, bin_name,
    host_bin_full_path = "%s/%s" % (host_bin_path, bin_name)
    device_bin_full_path = "%s/%s" % (device_bin_path, bin_name)
    props = adb_getprop_by_serialno(serialno)
-  print("=====================================================================")
+    print(
+        "====================================================================="
+    )
    print("Run on device: %s, %s, %s" % (serialno, props["ro.board.platform"],
                                         props["ro.product.model"]))
    sh.adb("-s", serialno, "shell", "rm -rf %s" % device_bin_path)
@@ -79,12 +92,19 @@ def adb_run(serialno, host_bin_path, bin_name,
    print("Push %s to %s" % (host_bin_full_path, device_bin_full_path))
    sh.adb("-s", serialno, "push", host_bin_full_path, device_bin_full_path)
    print("Run %s" % device_bin_full_path)
-  stdout_buff=[]
+    stdout_buff = []
    process_output = make_output_processor(stdout_buff)
-  p = sh.adb("-s", serialno, "shell",
+    p = sh.adb(
-             "MACE_OUT_OF_RANGE_CHECK=%d MACE_OPENCL_PROFILING=%d MACE_CPP_MIN_VLOG_LEVEL=%d %s %s" %
+        "-s",
-             (out_of_range_check, opencl_profiling, vlog_level, device_bin_full_path, args),
+        serialno,
-             _out=process_output, _bg=True, _err_to_out=True)
+        "shell",
+        "MACE_OUT_OF_RANGE_CHECK=%d MACE_OPENCL_PROFILING=%d "
+        "MACE_CPP_MIN_VLOG_LEVEL=%d %s %s" %
+        (out_of_range_check, opencl_profiling, vlog_level,
+         device_bin_full_path, args),
+        _out=process_output,
+        _bg=True,
+        _err_to_out=True)
    p.wait()
    return "".join(stdout_buff)
@@ -94,11 +114,14 @@ def adb_run(serialno, host_bin_path, bin_name,
 ################################
 def bazel_build(target, strip="always", abi="armeabi-v7a"):
    print("Build %s with ABI %s" % (target, abi))
-  stdout_buff=[]
+    stdout_buff = []
    process_output = make_output_processor(stdout_buff)
-  p= sh.bazel("build",
+    p = sh.bazel(
-              "-c", "opt",
+        "build",
-              "--strip", strip,
+        "-c",
+        "opt",
+        "--strip",
+        strip,
        "--verbose_failures",
        target,
        "--crosstool_top=//external:android/crosstool",
@@ -109,12 +132,17 @@ def bazel_build(target, strip="always", abi="armeabi-v7a"):
        "--copt=-DMACE_DISABLE_NO_TUNING_WARNING",
        "--copt=-Werror=return-type",
        "--copt=-O3",
-              "--define", "neon=true",
+        "--define",
-              "--define", "openmp=true",
+        "neon=true",
-              _out=process_output, _bg=True, _err_to_out=True)
+        "--define",
+        "openmp=true",
+        _out=process_output,
+        _bg=True,
+        _err_to_out=True)
    p.wait()
    return "".join(stdout_buff)
 def bazel_target_to_bin(target):
    # change //mace/a/b:c to bazel-bin/mace/a/b/c
    prefix, bin_name = target.split(':')
@@ -124,26 +152,32 @@ def bazel_target_to_bin(target):
    host_bin_path = "bazel-bin/%s" % prefix
    return host_bin_path, bin_name
 ################################
 # mace commands
 ################################
 # TODO this should be refactored
 def gen_encrypted_opencl_source(codegen_path="mace/codegen"):
    sh.mkdir("-p", "%s/opencl" % codegen_path)
-  sh.python("mace/python/tools/encrypt_opencl_codegen.py",
+    sh.python(
+        "mace/python/tools/encrypt_opencl_codegen.py",
        "--cl_kernel_dir=./mace/kernels/opencl/cl/",
        "--output_path=%s/opencl/opencl_encrypt_program.cc" % codegen_path)
 def gen_mace_version(codegen_path="mace/codegen"):
    sh.mkdir("-p", "%s/version" % codegen_path)
    sh.bash("mace/tools/git/gen_version_source.sh",
            "%s/version/version.cc" % codegen_path)
 def gen_compiled_opencl_source(codegen_path="mace/codegen"):
    sh.mkdir("-p", "%s/opencl" % codegen_path)
-  sh.python("mace/python/tools/opencl_codegen.py",
+    sh.python(
+        "mace/python/tools/opencl_codegen.py",
        "--output_path=%s/opencl/opencl_compiled_program.cc" % codegen_path)
 ################################
 # falcon
 ################################
@@ -156,10 +190,10 @@ def falcon_tags(tags_dict):
            tags = tags + ",%s=%s" % (k, v)
    return tags
 def falcon_push_metrics(metrics, endpoint="mace_dev", tags={}):
-  cli = falcon_cli.FalconCli.connect(server="transfer.falcon.miliao.srv",
+    cli = falcon_cli.FalconCli.connect(
-                                     port=8433,
+        server="transfer.falcon.miliao.srv", port=8433, debug=False)
-                                     debug=False)
    ts = int(time.time())
    falcon_metrics = [{
        "endpoint": endpoint,
@@ -171,4 +205,3 @@ def falcon_push_metrics(metrics, endpoint="mace_dev", tags={}):
        "counterType": "GAUGE"
    } for key, value in metrics.iteritems()]
    cli.update(falcon_metrics)
--- a/tools/validate.py
+++ b/tools/validate.py
@@ -20,29 +20,33 @@ from scipy import stats
 #        --input_shape 1,64,64,3 \
 #        --output_shape 1,64,64,2
 def load_data(file):
    if os.path.isfile(file):
        return np.fromfile(file=file, dtype=np.float32)
    else:
        return np.empty([0])
 def format_output_name(name):
    return re.sub('[^0-9a-zA-Z]+', '_', name)
 def compare_output(output_name, mace_out_value, out_value):
    if mace_out_value.size != 0:
        out_value = out_value.reshape(-1)
        mace_out_value = mace_out_value.reshape(-1)
        assert len(out_value) == len(mace_out_value)
        similarity = (1 - spatial.distance.cosine(out_value, mace_out_value))
-    print output_name, 'MACE VS', FLAGS.platform.upper(), 'similarity: ', similarity
+        print output_name, 'MACE VS', FLAGS.platform.upper(
+        ), 'similarity: ', similarity
        if (FLAGS.mace_runtime == "cpu" and similarity > 0.999) or \
            (FLAGS.mace_runtime == "neon" and similarity > 0.999) or \
            (FLAGS.mace_runtime == "gpu" and similarity > 0.995) or \
                (FLAGS.mace_runtime == "dsp" and similarity > 0.930):
-      print '=======================Similarity Test Passed======================'
+            print '===================Similarity Test Passed=================='
        else:
-      print '=======================Similarity Test Failed======================'
+            print '===================Similarity Test Failed=================='
            sys.exit(-1)
    else:
        print '=======================Skip empty node==================='
@@ -66,21 +70,28 @@ def validate_tf_model(input_names, input_shapes, output_names):
                tf.import_graph_def(input_graph_def, name="")
                input_dict = {}
                for i in range(len(input_names)):
-          input_value = load_data(FLAGS.input_file + "_" + input_names[i])
+                    input_value = load_data(
+                        FLAGS.input_file + "_" + input_names[i])
                    input_value = input_value.reshape(input_shapes[i])
-          input_node = graph.get_tensor_by_name(input_names[i] + ':0')
+                    input_node = graph.get_tensor_by_name(
+                        input_names[i] + ':0')
                    input_dict[input_node] = input_value
                output_nodes = []
                for name in output_names:
-          output_nodes.extend([graph.get_tensor_by_name(name + ':0')])
+                    output_nodes.extend(
+                        [graph.get_tensor_by_name(name + ':0')])
                output_values = session.run(output_nodes, feed_dict=input_dict)
                for i in range(len(output_names)):
-          output_file_name = FLAGS.mace_out_file + "_" + format_output_name(output_names[i])
+                    output_file_name = FLAGS.mace_out_file + "_" + \
+                            format_output_name(output_names[i])
                    mace_out_value = load_data(output_file_name)
-          compare_output(output_names[i], mace_out_value, output_values[i])
+                    compare_output(output_names[i], mace_out_value,
+                                   output_values[i])
-def validate_caffe_model(input_names, input_shapes, output_names, output_shapes):
+def validate_caffe_model(input_names, input_shapes, output_names,
+                         output_shapes):
    os.environ['GLOG_minloglevel'] = '1'  # suprress Caffe verbose prints
    import caffe
    if not os.path.isfile(FLAGS.model_file):
@@ -96,7 +107,8 @@ def validate_caffe_model(input_names, input_shapes, output_names, output_shapes)
    for i in range(len(input_names)):
        input_value = load_data(FLAGS.input_file + "_" + input_names[i])
-    input_value = input_value.reshape(input_shapes[i]).transpose((0, 3, 1, 2))
+        input_value = input_value.reshape(input_shapes[i]).transpose((0, 3, 1,
+                                                                      2))
        input_blob_name = input_names[i]
        try:
            if input_names[i] in net.top_names:
@@ -110,16 +122,20 @@ def validate_caffe_model(input_names, input_shapes, output_names, output_shapes)
    for i in range(len(output_names)):
        value = net.blobs[net.top_names[output_names[i]][0]].data
        out_shape = output_shapes[i]
-    out_shape[1], out_shape[2], out_shape[3] = out_shape[3], out_shape[1], out_shape[2]
+        out_shape[1], out_shape[2], out_shape[3] = out_shape[3], out_shape[
+            1], out_shape[2]
        value = value.reshape(out_shape).transpose((0, 2, 3, 1))
-    output_file_name = FLAGS.mace_out_file + "_" + format_output_name(output_names[i])
+        output_file_name = FLAGS.mace_out_file + "_" + format_output_name(
+            output_names[i])
        mace_out_value = load_data(output_file_name)
        compare_output(output_names[i], mace_out_value, value)
 def main(unused_args):
    input_names = [name for name in FLAGS.input_node.split(',')]
    input_shape_strs = [shape for shape in FLAGS.input_shape.split(':')]
-  input_shapes = [[int(x) for x in shape.split(',')] for shape in input_shape_strs]
+    input_shapes = [[int(x) for x in shape.split(',')]
+                    for shape in input_shape_strs]
    output_names = [name for name in FLAGS.output_node.split(',')]
    assert len(input_names) == len(input_shapes)
@@ -127,18 +143,18 @@ def main(unused_args):
        validate_tf_model(input_names, input_shapes, output_names)
    elif FLAGS.platform == 'caffe':
        output_shape_strs = [shape for shape in FLAGS.output_shape.split(':')]
-    output_shapes = [[int(x) for x in shape.split(',')] for shape in output_shape_strs]
+        output_shapes = [[int(x) for x in shape.split(',')]
-    validate_caffe_model(input_names, input_shapes, output_names, output_shapes)
+                         for shape in output_shape_strs]
+        validate_caffe_model(input_names, input_shapes, output_names,
+                             output_shapes)
 def parse_args():
    """Parses command line arguments."""
    parser = argparse.ArgumentParser()
    parser.register("type", "bool", lambda v: v.lower() == "true")
    parser.add_argument(
-    "--platform",
+        "--platform", type=str, default="", help="Tensorflow or Caffe.")
-    type=str,
-    default="",
-    help="Tensorflow or Caffe.")
    parser.add_argument(
        "--model_file",
        type=str,
@@ -150,40 +166,22 @@ def parse_args():
        default="",
        help="caffe model file to load.")
    parser.add_argument(
-    "--input_file",
+        "--input_file", type=str, default="", help="input file.")
-    type=str,
-    default="",
-    help="input file.")
    parser.add_argument(
        "--mace_out_file",
        type=str,
        default="",
        help="mace output file to load.")
    parser.add_argument(
-    "--mace_runtime",
+        "--mace_runtime", type=str, default="gpu", help="mace runtime device.")
-    type=str,
-    default="gpu",
-    help="mace runtime device.")
    parser.add_argument(
-    "--input_shape",
+        "--input_shape", type=str, default="1,64,64,3", help="input shape.")
-    type=str,
-    default="1,64,64,3",
-    help="input shape.")
    parser.add_argument(
-    "--output_shape",
+        "--output_shape", type=str, default="1,64,64,2", help="output shape.")
-    type=str,
-    default="1,64,64,2",
-    help="output shape.")
    parser.add_argument(
-    "--input_node",
+        "--input_node", type=str, default="input_node", help="input node")
-    type=str,
-    default="input_node",
-    help="input node")
    parser.add_argument(
-    "--output_node",
+        "--output_node", type=str, default="output_node", help="output node")
-    type=str,
-    default="output_node",
-    help="output node")
    return parser.parse_known_args()
@@ -191,4 +189,3 @@ def parse_args():
 if __name__ == '__main__':
    FLAGS, unparsed = parse_args()
    main(unused_args=[sys.argv[0]] + unparsed)
--- a/tools/wino_conv.py
+++ b/tools/wino_conv.py
@@ -11,12 +11,8 @@ G_T = {}
 # f(2, 3)
 A_T[4] = np.array([[1, 1, 1, 0], [0, 1, -1, -1]]).astype(np.float32)
 A[4] = np.transpose(A_T[4])
-B_T[4] = np.array([
+B_T[4] = np.array([[1, 0, -1, 0], [0, 1, 1, 0], [0, -1, 1, 0],
-  [1, 0, -1, 0],
+                   [0, 1, 0, -1]]).astype(np.float32)
-  [0, 1, 1, 0],
-  [0, -1, 1, 0],
-  [0, 1, 0, -1]
-]).astype(np.float32)
 B[4] = np.transpose(B_T[4])
 G[4] = np.array([
    [1, 0, 0],
@@ -44,45 +40,45 @@ B_T[6] = np.array([
 ]).astype(np.float32)
 B[6] = np.transpose(B_T[6])
 G[6] = np.array([
-  [1/4.0 ,   0    ,  0    ],
+    [1 / 4.0, 0, 0],
-  [-1/6.0, -1/6.0 , -1/6.0],
+    [-1 / 6.0, -1 / 6.0, -1 / 6.0],
-  [-1/6.0,  1/6.0 , -1/6.0],
+    [-1 / 6.0, 1 / 6.0, -1 / 6.0],
-  [1/24.0, 1/12.0 , 1/6.0 ],
+    [1 / 24.0, 1 / 12.0, 1 / 6.0],
-  [1/24.0, -1/12.0, 1/6.0 ],
+    [1 / 24.0, -1 / 12.0, 1 / 6.0],
-  [ 0    ,  0     ,  1    ],
+    [0, 0, 1],
 ]).astype(np.float32)
 G_T[6] = np.transpose(G[6])
 # f(6, 3)
 A_T[8] = np.array([
-  [1, 1, 1 , 1 ,  1 ,  1  ,   1  , 0],
+    [1, 1, 1, 1, 1, 1, 1, 0],
-  [0, 1, -1, 2 , -2 , 1/2. , -1/2. , 0],
+    [0, 1, -1, 2, -2, 1 / 2., -1 / 2., 0],
-  [0, 1, 1 , 4 ,  4 , 1/4. ,  1/4. , 0],
+    [0, 1, 1, 4, 4, 1 / 4., 1 / 4., 0],
-  [0, 1, -1, 8 , -8 , 1/8. , -1/8. , 0],
+    [0, 1, -1, 8, -8, 1 / 8., -1 / 8., 0],
-  [0, 1, 1 , 16, 16 , 1/16., 1/16. , 0],
+    [0, 1, 1, 16, 16, 1 / 16., 1 / 16., 0],
-  [0, 1, -1, 32, -32, 1/32., -1/32., 1],
+    [0, 1, -1, 32, -32, 1 / 32., -1 / 32., 1],
 ]).astype(np.float32)
 A[8] = np.transpose(A_T[8])
 B_T[8] = np.array([
-  [1,  0  , -21/4.,   0  ,  21/4.,   0  , -1, 0],
+    [1, 0, -21 / 4., 0, 21 / 4., 0, -1, 0],
-  [0,  1  ,   1  , -17/4., -17/4.,   1  , 1 , 0],
+    [0, 1, 1, -17 / 4., -17 / 4., 1, 1, 0],
-  [0,  -1 ,   1  , 17/4. , -17/4.,  -1  , 1 , 0],
+    [0, -1, 1, 17 / 4., -17 / 4., -1, 1, 0],
-  [0, 1/2. ,  1/4. , -5/2. , -5/4.,   2  , 1 , 0],
+    [0, 1 / 2., 1 / 4., -5 / 2., -5 / 4., 2, 1, 0],
-  [0, -1/2.,  1/4. ,  5/2. , -5/4.,  -2  , 1 , 0],
+    [0, -1 / 2., 1 / 4., 5 / 2., -5 / 4., -2, 1, 0],
-  [0,  2  ,   4  , -5/2. ,  -5  ,  1/2. , 1 , 0],
+    [0, 2, 4, -5 / 2., -5, 1 / 2., 1, 0],
-  [0,  -2 ,   4  ,  5/2. ,  -5  , -1/2. , 1 , 0],
+    [0, -2, 4, 5 / 2., -5, -1 / 2., 1, 0],
-  [0,  -1 ,   0  , 21/4. ,   0  , -21/4., 0 , 1],
+    [0, -1, 0, 21 / 4., 0, -21 / 4., 0, 1],
 ]).astype(np.float32)
 B[8] = np.transpose(B_T[8])
 G[8] = np.array([
- [ 1    ,   0    ,  0  ],
+    [1, 0, 0],
- [-2/9. , -2/9.  , -2/9.],
+    [-2 / 9., -2 / 9., -2 / 9.],
- [-2/9. ,  2/9.  , -2/9.],
+    [-2 / 9., 2 / 9., -2 / 9.],
- [1/90. , 1/45.  , 2/45.],
+    [1 / 90., 1 / 45., 2 / 45.],
- [1/90. , -1/45. , 2/45.],
+    [1 / 90., -1 / 45., 2 / 45.],
- [32/45., 16/45. , 8/45.],
+    [32 / 45., 16 / 45., 8 / 45.],
- [32/45., -16/45., 8/45.],
+    [32 / 45., -16 / 45., 8 / 45.],
- [ 0    ,  0     ,  1   ],
+    [0, 0, 1],
 ]).astype(np.float32)
 G_T[8] = np.transpose(G[8])
@@ -112,7 +108,7 @@ def winograd_conv(m, r, input, filter):
        for c in range(C):
            u = np.dot(np.dot(G[alpha], filter[k, c, :, :]), G_T[alpha])
            for i in range(alpha):
-        for j in range(alpha) :
+                for j in range(alpha):
                    U[(i * alpha + j) * K + k, c] = u[i, j]
    print 'filter out: ', U.shape
@@ -129,24 +125,24 @@ def winograd_conv(m, r, input, filter):
            w_idx = t % rounded_w
            h_start = h_idx * m
            w_start = w_idx * m
-      h_end = min(h_start+alpha, input_shape[2])
+            h_end = min(h_start + alpha, input_shape[2])
-      w_end = min(w_start+alpha, input_shape[3])
+            w_end = min(w_start + alpha, input_shape[3])
            d = np.zeros((alpha, alpha))
            d[0:h_end-h_start, 0:w_end-w_start] = \
                input[n, c, h_start:h_end, w_start:w_end]
            v = np.dot(np.dot(B_T[alpha], d), B[alpha])
            for i in range(alpha):
                for j in range(alpha):
-          V[(i*alpha+j)*C + c, p] = v[i, j]
+                    V[(i * alpha + j) * C + c, p] = v[i, j]
    tmp = V.reshape(alpha_square, C, P, 1)
    print 'input out: ', tmp.shape
    tmp.astype(np.float32).tofile("C")
    M = np.zeros((alpha_square * K, P))
    for i in range(alpha_square):
-    u = U[i * K : (i+1) * K, :]
+        u = U[i * K:(i + 1) * K, :]
-    v = V[i * C : (i+1) * C, :]
+        v = V[i * C:(i + 1) * C, :]
-    M[i * K : (i+1) * K, :] = np.dot(u, v)
+        M[i * K:(i + 1) * K, :] = np.dot(u, v)
    print 'M shape: ', M.shape
    M.astype(np.float32).tofile("gemm")
@@ -156,7 +152,7 @@ def winograd_conv(m, r, input, filter):
            tm = np.zeros((alpha, alpha))
            for i in range(alpha):
                for j in range(alpha):
-          tm[i][j] = M[(i*alpha+j) * K + k, b]
+                    tm[i][j] = M[(i * alpha + j) * K + k, b]
            y = np.dot(np.dot(A_T[alpha], tm), A[alpha])
            for i in range(m):
                for j in range(m):
@@ -173,6 +169,7 @@ def winograd_conv(m, r, input, filter):
    return res
 def tf_conv(input, filter):
    conv_op = tf.nn.conv2d(input, filter, [1, 1, 1, 1], 'VALID')
    with tf.Session() as sess:
@@ -206,4 +203,3 @@ def main():
 if __name__ == '__main__':
    main()