提交 6da30d22 编写于 作者: L Liangliang He

Enable python style check

上级 e54825c5
stages: stages:
- cpplint - cpplint
- pycodestyle
- ops_test - ops_test
- ops_benchmark - ops_benchmark
...@@ -7,7 +8,12 @@ cpplint: ...@@ -7,7 +8,12 @@ cpplint:
stage: cpplint stage: cpplint
script: script:
- curl -o cpplint.py https://raw.githubusercontent.com/google/styleguide/gh-pages/cpplint/cpplint.py - curl -o cpplint.py https://raw.githubusercontent.com/google/styleguide/gh-pages/cpplint/cpplint.py
- python cpplint.py --linelength=80 --counting=detailed $(find mace -name *.h -or -name *.cc) - python cpplint.py --linelength=80 --counting=detailed $(find mace -name "*.h" -or -name "*.cc")
pycodestyle:
stage: pycodestyle
script:
- pycodestyle $(find -name "*.py")
ops_test: ops_test:
stage: ops_test stage: ops_test
......
...@@ -113,7 +113,8 @@ RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com ...@@ -113,7 +113,8 @@ RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com
scipy \ scipy \
jinja2 \ jinja2 \
pyyaml \ pyyaml \
sh sh \
pycodestyle
# Download tensorflow tools # Download tensorflow tools
RUN wget http://cnbj1-inner-fds.api.xiaomi.net/mace/tool/transform_graph && \ RUN wget http://cnbj1-inner-fds.api.xiaomi.net/mace/tool/transform_graph && \
......
...@@ -27,28 +27,30 @@ def generate_cpp_source(): ...@@ -27,28 +27,30 @@ def generate_cpp_source():
print "Generate binary from", binary_path print "Generate binary from", binary_path
idx = 0 idx = 0
size, = struct.unpack("Q", binary_array[idx:idx+8]) size, = struct.unpack("Q", binary_array[idx:idx + 8])
idx += 8 idx += 8
for _ in xrange(size): for _ in xrange(size):
key_size, = struct.unpack("i", binary_array[idx:idx+4]) key_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4 idx += 4
key, = struct.unpack(str(key_size) + "s", binary_array[idx:idx+key_size]) key, = struct.unpack(
str(key_size) + "s", binary_array[idx:idx + key_size])
idx += key_size idx += key_size
params_size, = struct.unpack("i", binary_array[idx:idx+4]) params_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4 idx += 4
data_map[key] = [] data_map[key] = []
count = params_size / 4 count = params_size / 4
params = struct.unpack(str(count) + "i", binary_array[idx:idx+params_size]) params = struct.unpack(
str(count) + "i", binary_array[idx:idx + params_size])
for i in params: for i in params:
data_map[key].append(i) data_map[key].append(i)
idx += params_size idx += params_size
env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0])) env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0]))
return env.get_template('str2vec_maps.cc.jinja2').render( return env.get_template('str2vec_maps.cc.jinja2').render(
maps = data_map, maps=data_map,
data_type = 'unsigned int', data_type='unsigned int',
variable_name = FLAGS.variable_name variable_name=FLAGS.variable_name)
)
def main(unused_args): def main(unused_args):
cpp_binary_source = generate_cpp_source() cpp_binary_source = generate_cpp_source()
...@@ -58,14 +60,12 @@ def main(unused_args): ...@@ -58,14 +60,12 @@ def main(unused_args):
w_file.write(cpp_binary_source) w_file.write(cpp_binary_source)
w_file.close() w_file.close()
def parse_args(): def parse_args():
"""Parses command line arguments.""" """Parses command line arguments."""
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument(
"--binary_dirs", "--binary_dirs", type=str, default="", help="The binaries file path.")
type=str,
default="",
help="The binaries file path.")
parser.add_argument( parser.add_argument(
"--binary_file_name", "--binary_file_name",
type=str, type=str,
...@@ -75,7 +75,8 @@ def parse_args(): ...@@ -75,7 +75,8 @@ def parse_args():
"--output_path", "--output_path",
type=str, type=str,
default="", default="",
help="The path of generated C++ source file which contains the binary.") help="The path of generated C++ source file which contains the binary."
)
parser.add_argument( parser.add_argument(
"--variable_name", "--variable_name",
type=str, type=str,
......
...@@ -26,4 +26,3 @@ def tf_dtype_2_mace_dtype(tf_dtype): ...@@ -26,4 +26,3 @@ def tf_dtype_2_mace_dtype(tf_dtype):
if not mace_dtype: if not mace_dtype:
raise Exception("Not supported tensorflow dtype: " + tf_dtype) raise Exception("Not supported tensorflow dtype: " + tf_dtype)
return mace_dtype return mace_dtype
...@@ -4,10 +4,14 @@ import hashlib ...@@ -4,10 +4,14 @@ import hashlib
import os.path import os.path
from mace.python.tools import source_converter_lib from mace.python.tools import source_converter_lib
# ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb --output quantized_test_dsp.pb --runtime dsp --input_dim input_node,1,28,28,3 # ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb \
# --output quantized_test_dsp.pb \
# --runtime dsp \
# --input_dim input_node,1,28,28,3
FLAGS = None FLAGS = None
def file_checksum(fname): def file_checksum(fname):
hash_func = hashlib.sha256() hash_func = hashlib.sha256()
with open(fname, "rb") as f: with open(fname, "rb") as f:
...@@ -15,6 +19,7 @@ def file_checksum(fname): ...@@ -15,6 +19,7 @@ def file_checksum(fname):
hash_func.update(chunk) hash_func.update(chunk)
return hash_func.hexdigest() return hash_func.hexdigest()
def main(unused_args): def main(unused_args):
if not os.path.isfile(FLAGS.model_file): if not os.path.isfile(FLAGS.model_file):
print("Input graph file '" + FLAGS.model_file + "' does not exist!") print("Input graph file '" + FLAGS.model_file + "' does not exist!")
...@@ -22,17 +27,21 @@ def main(unused_args): ...@@ -22,17 +27,21 @@ def main(unused_args):
model_checksum = file_checksum(FLAGS.model_file) model_checksum = file_checksum(FLAGS.model_file)
if FLAGS.model_checksum != "" and FLAGS.model_checksum != model_checksum: if FLAGS.model_checksum != "" and FLAGS.model_checksum != model_checksum:
print("Model checksum mismatch: %s != %s" % (model_checksum, FLAGS.model_checksum)) print("Model checksum mismatch: %s != %s" % (model_checksum,
FLAGS.model_checksum))
sys.exit(-1) sys.exit(-1)
if FLAGS.platform == 'caffe': if FLAGS.platform == 'caffe':
if not os.path.isfile(FLAGS.weight_file): if not os.path.isfile(FLAGS.weight_file):
print("Input weight file '" + FLAGS.weight_file + "' does not exist!") print("Input weight file '" + FLAGS.weight_file +
"' does not exist!")
sys.exit(-1) sys.exit(-1)
weight_checksum = file_checksum(FLAGS.weight_file) weight_checksum = file_checksum(FLAGS.weight_file)
if FLAGS.weight_checksum != "" and FLAGS.weight_checksum != weight_checksum: if FLAGS.weight_checksum != "" and \
print("Weight checksum mismatch: %s != %s" % (weight_checksum, FLAGS.weight_checksum)) FLAGS.weight_checksum != weight_checksum:
print("Weight checksum mismatch: %s != %s" %
(weight_checksum, FLAGS.weight_checksum))
sys.exit(-1) sys.exit(-1)
if FLAGS.runtime == 'dsp': if FLAGS.runtime == 'dsp':
...@@ -41,22 +50,27 @@ def main(unused_args): ...@@ -41,22 +50,27 @@ def main(unused_args):
from mace.python.tools import caffe_converter_lib from mace.python.tools import caffe_converter_lib
output_graph_def = caffe_converter_lib.convert_to_mace_pb( output_graph_def = caffe_converter_lib.convert_to_mace_pb(
FLAGS.model_file, FLAGS.weight_file, FLAGS.input_node, FLAGS.input_shape, FLAGS.output_node, FLAGS.model_file, FLAGS.weight_file, FLAGS.input_node,
FLAGS.data_type, FLAGS.runtime, FLAGS.winograd) FLAGS.input_shape, FLAGS.output_node, FLAGS.data_type,
FLAGS.runtime, FLAGS.winograd)
elif FLAGS.platform == 'tensorflow': elif FLAGS.platform == 'tensorflow':
if FLAGS.runtime == 'dsp': if FLAGS.runtime == 'dsp':
from mace.python.tools import tf_dsp_converter_lib from mace.python.tools import tf_dsp_converter_lib
output_graph_def = tf_dsp_converter_lib.convert_to_mace_pb( output_graph_def = tf_dsp_converter_lib.convert_to_mace_pb(
FLAGS.model_file, FLAGS.input_node, FLAGS.output_node, FLAGS.dsp_mode) FLAGS.model_file, FLAGS.input_node, FLAGS.output_node,
FLAGS.dsp_mode)
else: else:
from mace.python.tools import tf_converter_lib from mace.python.tools import tf_converter_lib
output_graph_def = tf_converter_lib.convert_to_mace_pb( output_graph_def = tf_converter_lib.convert_to_mace_pb(
FLAGS.model_file, FLAGS.input_node, FLAGS.input_shape, FLAGS.output_node, FLAGS.model_file, FLAGS.input_node, FLAGS.input_shape,
FLAGS.data_type, FLAGS.runtime, FLAGS.winograd) FLAGS.output_node, FLAGS.data_type, FLAGS.runtime,
FLAGS.winograd)
if FLAGS.output_type == 'source': if FLAGS.output_type == 'source':
source_converter_lib.convert_to_source(output_graph_def, model_checksum, FLAGS.template, FLAGS.obfuscate, source_converter_lib.convert_to_source(
FLAGS.model_tag, FLAGS.output, FLAGS.runtime, FLAGS.embed_model_data) output_graph_def, model_checksum, FLAGS.template, FLAGS.obfuscate,
FLAGS.model_tag, FLAGS.output, FLAGS.runtime,
FLAGS.embed_model_data)
else: else:
with open(FLAGS.output, "wb") as f: with open(FLAGS.output, "wb") as f:
f.write(output_graph_def.SerializeToString()) f.write(output_graph_def.SerializeToString())
...@@ -65,6 +79,7 @@ def main(unused_args): ...@@ -65,6 +79,7 @@ def main(unused_args):
f.write(str(output_graph_def)) f.write(str(output_graph_def))
print("Model conversion is completed.") print("Model conversion is completed.")
def str2bool(v): def str2bool(v):
if v.lower() in ('yes', 'true', 't', 'y', '1'): if v.lower() in ('yes', 'true', 't', 'y', '1'):
return True return True
...@@ -73,6 +88,7 @@ def str2bool(v): ...@@ -73,6 +88,7 @@ def str2bool(v):
else: else:
raise argparse.ArgumentTypeError('Boolean value expected.') raise argparse.ArgumentTypeError('Boolean value expected.')
def parse_args(): def parse_args():
"""Parses command line arguments.""" """Parses command line arguments."""
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
...@@ -81,12 +97,10 @@ def parse_args(): ...@@ -81,12 +97,10 @@ def parse_args():
"--model_file", "--model_file",
type=str, type=str,
default="", default="",
help="TensorFlow \'GraphDef\' file to load, Caffe prototxt file to load.") help="TensorFlow \'GraphDef\' file to load, "
"Caffe prototxt file to load.")
parser.add_argument( parser.add_argument(
"--weight_file", "--weight_file", type=str, default="", help="Caffe data file to load.")
type=str,
default="",
help="Caffe data file to load.")
parser.add_argument( parser.add_argument(
"--model_checksum", "--model_checksum",
type=str, type=str,
...@@ -103,35 +117,23 @@ def parse_args(): ...@@ -103,35 +117,23 @@ def parse_args():
default="", default="",
help="File to save the output graph to.") help="File to save the output graph to.")
parser.add_argument( parser.add_argument(
"--runtime", "--runtime", type=str, default="cpu", help="Runtime: cpu/gpu/dsp")
type=str,
default="cpu",
help="Runtime: cpu/gpu/dsp")
parser.add_argument( parser.add_argument(
"--input_node", "--input_node",
type=str, type=str,
default="input_node", default="input_node",
help="e.g., input_node") help="e.g., input_node")
parser.add_argument( parser.add_argument(
"--output_node", "--output_node", type=str, default="softmax", help="e.g., softmax")
type=str,
default="softmax",
help="e.g., softmax")
parser.add_argument( parser.add_argument(
"--data_type", "--data_type",
type=str, type=str,
default='DT_FLOAT', default='DT_FLOAT',
help="e.g., DT_HALF/DT_FLOAT") help="e.g., DT_HALF/DT_FLOAT")
parser.add_argument( parser.add_argument(
"--output_type", "--output_type", type=str, default="pb", help="output type: source/pb")
type=str,
default="pb",
help="output type: source/pb")
parser.add_argument( parser.add_argument(
"--template", "--template", type=str, default="", help="template path")
type=str,
default="",
help="template path")
parser.add_argument( parser.add_argument(
"--obfuscate", "--obfuscate",
type=str2bool, type=str2bool,
...@@ -152,25 +154,13 @@ def parse_args(): ...@@ -152,25 +154,13 @@ def parse_args():
default=False, default=False,
help="open winograd convolution or not") help="open winograd convolution or not")
parser.add_argument( parser.add_argument(
"--dsp_mode", "--dsp_mode", type=int, default=0, help="dsp run mode, defalut=0")
type=int,
default=0,
help="dsp run mode, defalut=0")
parser.add_argument( parser.add_argument(
"--input_shape", "--input_shape", type=str, default="", help="input shape.")
type=str,
default="",
help="input shape.")
parser.add_argument( parser.add_argument(
"--platform", "--platform", type=str, default="tensorflow", help="tensorflow/caffe")
type=str,
default="tensorflow",
help="tensorflow/caffe")
parser.add_argument( parser.add_argument(
"--embed_model_data", "--embed_model_data", type=str2bool, default=True, help="input shape.")
type=str2bool,
default=True,
help="input shape.")
return parser.parse_known_args() return parser.parse_known_args()
......
class DspOps(object): class DspOps(object):
def __init__(self): def __init__(self):
self.dsp_ops = { self.dsp_ops = {
...@@ -18,7 +17,7 @@ class DspOps(object): ...@@ -18,7 +17,7 @@ class DspOps(object):
'QuantizedAvgPool': 'QuantizedAvgPool_8', 'QuantizedAvgPool': 'QuantizedAvgPool_8',
'QuantizedConcat': 'QuantizedConcat_8', 'QuantizedConcat': 'QuantizedConcat_8',
'QuantizedBiasAdd': 'QuantizedBiasAdd_8p8to32', 'QuantizedBiasAdd': 'QuantizedBiasAdd_8p8to32',
'QuantizedResizeBilinear' : 'QuantizedResizeBilinear_8', 'QuantizedResizeBilinear': 'QuantizedResizeBilinear_8',
'QuantizedSpaceToBatchND': 'QuantizedSpaceToBatchND_8', 'QuantizedSpaceToBatchND': 'QuantizedSpaceToBatchND_8',
'QuantizedBatchToSpaceND': 'QuantizedBatchToSpaceND_8', 'QuantizedBatchToSpaceND': 'QuantizedBatchToSpaceND_8',
'QuantizedSoftmax': 'QuantizedSoftmax_8', 'QuantizedSoftmax': 'QuantizedSoftmax_8',
...@@ -54,6 +53,7 @@ class DspOps(object): ...@@ -54,6 +53,7 @@ class DspOps(object):
'Concat': 'Concat_f', 'Concat': 'Concat_f',
'AddN': 'AddN_f', 'AddN': 'AddN_f',
} }
def has_op(self, tf_op): def has_op(self, tf_op):
return tf_op in self.dsp_ops return tf_op in self.dsp_ops
...@@ -61,5 +61,3 @@ class DspOps(object): ...@@ -61,5 +61,3 @@ class DspOps(object):
if tf_op not in self.dsp_ops: if tf_op not in self.dsp_ops:
raise Exception('Could not map nn op for: ', tf_op) raise Exception('Could not map nn op for: ', tf_op)
return self.dsp_ops[tf_op] return self.dsp_ops[tf_op]
...@@ -11,10 +11,13 @@ FLAGS = None ...@@ -11,10 +11,13 @@ FLAGS = None
encrypt_lookup_table = "Xiaomi-AI-Platform-Mace" encrypt_lookup_table = "Xiaomi-AI-Platform-Mace"
def encrypt_code(code_str): def encrypt_code(code_str):
encrypted_arr = [] encrypted_arr = []
for i in range(len(code_str)): for i in range(len(code_str)):
encrypted_char = hex(ord(code_str[i]) ^ ord(encrypt_lookup_table[i % len(encrypt_lookup_table)])) encrypted_char = hex(
ord(code_str[i]) ^ ord(
encrypt_lookup_table[i % len(encrypt_lookup_table)]))
encrypted_arr.append(encrypted_char) encrypted_arr.append(encrypted_char)
return encrypted_arr return encrypted_arr
...@@ -45,7 +48,8 @@ def main(unused_args): ...@@ -45,7 +48,8 @@ def main(unused_args):
encrypted_code_maps[file_name[:-3]] = encrypted_code_arr encrypted_code_maps[file_name[:-3]] = encrypted_code_arr
env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0])) env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0]))
cpp_cl_encrypted_kernel = env.get_template('str2vec_maps.cc.jinja2').render( cpp_cl_encrypted_kernel = env.get_template(
'str2vec_maps.cc.jinja2').render(
maps=encrypted_code_maps, maps=encrypted_code_maps,
data_type='unsigned char', data_type='unsigned char',
variable_name='kEncryptedProgramMap') variable_name='kEncryptedProgramMap')
......
...@@ -2,18 +2,21 @@ import tensorflow as tf ...@@ -2,18 +2,21 @@ import tensorflow as tf
from mace.proto import mace_pb2 from mace.proto import mace_pb2
from collections import OrderedDict from collections import OrderedDict
def sort_tf_node(node, nodes_map, ordered_nodes_map): def sort_tf_node(node, nodes_map, ordered_nodes_map):
if node.name not in ordered_nodes_map: if node.name not in ordered_nodes_map:
for input_tensor_name in node.input: for input_tensor_name in node.input:
input_node_name = input_tensor_name.split(':')[ input_node_name = input_tensor_name.split(':')[
0] if ':' in input_tensor_name else input_tensor_name 0] if ':' in input_tensor_name else input_tensor_name
if input_node_name not in nodes_map or input_node_name in ordered_nodes_map: if input_node_name not in nodes_map or \
input_node_name in ordered_nodes_map:
continue continue
input_node = nodes_map[input_node_name] input_node = nodes_map[input_node_name]
sort_tf_node(input_node, nodes_map, ordered_nodes_map) sort_tf_node(input_node, nodes_map, ordered_nodes_map)
ordered_nodes_map[node.name] = node ordered_nodes_map[node.name] = node
def sort_tf_graph(graph_def): def sort_tf_graph(graph_def):
nodes_map = {} nodes_map = {}
ordered_nodes_map = OrderedDict() ordered_nodes_map = OrderedDict()
...@@ -31,13 +34,15 @@ def sort_mace_node(node, nodes_map, ordered_nodes_map): ...@@ -31,13 +34,15 @@ def sort_mace_node(node, nodes_map, ordered_nodes_map):
for input_tensor_name in node.input: for input_tensor_name in node.input:
input_node_name = input_tensor_name.split(':')[ input_node_name = input_tensor_name.split(':')[
0] if ':' in input_tensor_name else input_tensor_name 0] if ':' in input_tensor_name else input_tensor_name
if input_node_name not in nodes_map or input_node_name in ordered_nodes_map: if input_node_name not in nodes_map or \
input_node_name in ordered_nodes_map:
continue continue
input_node = nodes_map[input_node_name] input_node = nodes_map[input_node_name]
sort_mace_node(input_node, nodes_map, ordered_nodes_map) sort_mace_node(input_node, nodes_map, ordered_nodes_map)
ordered_nodes_map[node.name] = node ordered_nodes_map[node.name] = node
def sort_mace_graph(graph_def, output_name): def sort_mace_graph(graph_def, output_name):
nodes_map = {} nodes_map = {}
ordered_nodes_map = OrderedDict() ordered_nodes_map = OrderedDict()
......
...@@ -2,6 +2,7 @@ import sys ...@@ -2,6 +2,7 @@ import sys
import operator import operator
from mace.proto import mace_pb2 from mace.proto import mace_pb2
class MemoryOptimizer(object): class MemoryOptimizer(object):
def __init__(self, net_def): def __init__(self, net_def):
self.net_def = net_def self.net_def = net_def
...@@ -37,9 +38,9 @@ class MemoryOptimizer(object): ...@@ -37,9 +38,9 @@ class MemoryOptimizer(object):
mem_size = [0, 0] mem_size = [0, 0]
if op_type == 'WinogradTransform' or op_type == 'MatMul': if op_type == 'WinogradTransform' or op_type == 'MatMul':
mem_size[0] = output_shape[2] * output_shape[3] mem_size[0] = output_shape[2] * output_shape[3]
mem_size[1] = output_shape[0] * int((output_shape[1]+3)/4) mem_size[1] = output_shape[0] * int((output_shape[1] + 3) / 4)
else: else:
mem_size[0] = output_shape[2] * int((output_shape[3]+3)/4) mem_size[0] = output_shape[2] * int((output_shape[3] + 3) / 4)
mem_size[1] = output_shape[0] * output_shape[1] mem_size[1] = output_shape[0] * output_shape[1]
return mem_size return mem_size
...@@ -51,13 +52,16 @@ class MemoryOptimizer(object): ...@@ -51,13 +52,16 @@ class MemoryOptimizer(object):
if self.is_buffer_image_op(op): if self.is_buffer_image_op(op):
continue continue
if not op.output_shape: if not op.output_shape:
print('WARNING: There is no output shape information to do memory optimization.') print('WARNING: There is no output shape information to '
'do memory optimization.')
return return
if len(op.output_shape) != len(op.output): if len(op.output_shape) != len(op.output):
print('WARNING: the number of output shape is not equal to the number of output.') print('WARNING: the number of output shape is not equal to '
'the number of output.')
return return
for i in range(len(op.output)): for i in range(len(op.output)):
op_mem_size = self.get_mem_size(op.type, op.output_shape[i].dims) op_mem_size = self.get_mem_size(op.type,
op.output_shape[i].dims)
mem_id = -1 mem_id = -1
if len(self.idle_mem) > 0: if len(self.idle_mem) > 0:
best_mem_candidate_id = -1 best_mem_candidate_id = -1
...@@ -65,16 +69,22 @@ class MemoryOptimizer(object): ...@@ -65,16 +69,22 @@ class MemoryOptimizer(object):
best_mem_candidate_shape = [] best_mem_candidate_shape = []
for mid in self.idle_mem: for mid in self.idle_mem:
reuse_mem_size = self.mem_block[mid] reuse_mem_size = self.mem_block[mid]
resize_mem_size = [max(reuse_mem_size[0], op_mem_size[0]), max(reuse_mem_size[1], op_mem_size[1])] resize_mem_size = [
delta_mem_area = self.mem_area(resize_mem_size) - self.mem_area(reuse_mem_size) max(reuse_mem_size[0], op_mem_size[0]),
max(reuse_mem_size[1], op_mem_size[1])
]
delta_mem_area = self.mem_area(
resize_mem_size) - self.mem_area(reuse_mem_size)
if delta_mem_area < best_mem_candidate_delta_area: if delta_mem_area < best_mem_candidate_delta_area:
best_mem_candidate_id = mid best_mem_candidate_id = mid
best_mem_candidate_delta_area = delta_mem_area best_mem_candidate_delta_area = delta_mem_area
best_mem_candidate_shape = resize_mem_size best_mem_candidate_shape = resize_mem_size
if best_mem_candidate_delta_area <= self.mem_area(op_mem_size): if best_mem_candidate_delta_area <= self.mem_area(
op_mem_size):
# reuse # reuse
self.mem_block[best_mem_candidate_id] = best_mem_candidate_shape self.mem_block[
best_mem_candidate_id] = best_mem_candidate_shape
mem_id = best_mem_candidate_id mem_id = best_mem_candidate_id
self.idle_mem.remove(mem_id) self.idle_mem.remove(mem_id)
...@@ -113,7 +123,8 @@ class MemoryOptimizer(object): ...@@ -113,7 +123,8 @@ class MemoryOptimizer(object):
print mem, self.mem_block[mem] print mem, self.mem_block[mem]
optimized_mem_size += reduce(operator.mul, self.mem_block[mem], 4) optimized_mem_size += reduce(operator.mul, self.mem_block[mem], 4)
print('origin mem: %d, optimized mem: %d', origin_mem_size, optimized_mem_size) print('origin mem: %d, optimized mem: %d', origin_mem_size,
optimized_mem_size)
def optimize_memory(net_def): def optimize_memory(net_def):
......
...@@ -27,37 +27,40 @@ def generate_cpp_source(): ...@@ -27,37 +27,40 @@ def generate_cpp_source():
binary_array = np.fromfile(f, dtype=np.uint8) binary_array = np.fromfile(f, dtype=np.uint8)
idx = 0 idx = 0
size, = struct.unpack("Q", binary_array[idx:idx+8]) size, = struct.unpack("Q", binary_array[idx:idx + 8])
idx += 8 idx += 8
for _ in xrange(size): for _ in xrange(size):
key_size, = struct.unpack("i", binary_array[idx:idx+4]) key_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4 idx += 4
key, = struct.unpack(str(key_size) + "s", binary_array[idx:idx+key_size]) key, = struct.unpack(
str(key_size) + "s", binary_array[idx:idx + key_size])
idx += key_size idx += key_size
value_size, = struct.unpack("i", binary_array[idx:idx+4]) value_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4 idx += 4
maps[key] = [] maps[key] = []
value = struct.unpack(str(value_size) + "B", value = struct.unpack(
binary_array[idx:idx+value_size]) str(value_size) + "B", binary_array[idx:idx + value_size])
idx += value_size idx += value_size
for ele in value: for ele in value:
maps[key].append(hex(ele)) maps[key].append(hex(ele))
cl_platform_info_path = os.path.join(binary_dir, FLAGS.platform_info_file_name) cl_platform_info_path = os.path.join(binary_dir,
FLAGS.platform_info_file_name)
with open(cl_platform_info_path, 'r') as f: with open(cl_platform_info_path, 'r') as f:
curr_platform_info = f.read() curr_platform_info = f.read()
if platform_info != "": if platform_info != "":
assert(curr_platform_info == platform_info) assert (curr_platform_info == platform_info)
platform_info = curr_platform_info platform_info = curr_platform_info
env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0])) env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0]))
return env.get_template('opencl_compiled_kernel.cc.jinja2').render( return env.get_template('opencl_compiled_kernel.cc.jinja2').render(
maps = maps, maps=maps,
data_type = 'unsigned char', data_type='unsigned char',
variable_name = 'kCompiledProgramMap', variable_name='kCompiledProgramMap',
platform_info = platform_info, platform_info=platform_info,
) )
def main(unused_args): def main(unused_args):
cpp_cl_binary_source = generate_cpp_source() cpp_cl_binary_source = generate_cpp_source()
...@@ -90,7 +93,7 @@ def parse_args(): ...@@ -90,7 +93,7 @@ def parse_args():
"--output_path", "--output_path",
type=str, type=str,
default="./mace/examples/codegen/opencl/opencl_compiled_program.cc", default="./mace/examples/codegen/opencl/opencl_compiled_program.cc",
help="The path of generated C++ header file which contains cl binaries.") help="The path of generated C++ header file for cl binaries.")
return parser.parse_known_args() return parser.parse_known_args()
......
...@@ -6,9 +6,9 @@ import hashlib ...@@ -6,9 +6,9 @@ import hashlib
from mace.proto import mace_pb2 from mace.proto import mace_pb2
from jinja2 import Environment, FileSystemLoader from jinja2 import Environment, FileSystemLoader
GENERATED_NAME = set() GENERATED_NAME = set()
def generate_obfuscated_name(namespace, name): def generate_obfuscated_name(namespace, name):
md5 = hashlib.md5() md5 = hashlib.md5()
md5.update(namespace) md5.update(namespace)
...@@ -22,31 +22,36 @@ def generate_obfuscated_name(namespace, name): ...@@ -22,31 +22,36 @@ def generate_obfuscated_name(namespace, name):
GENERATED_NAME.add(name) GENERATED_NAME.add(name)
return name return name
def generate_tensor_map(tensors): def generate_tensor_map(tensors):
tensor_map = {} tensor_map = {}
for t in tensors: for t in tensors:
if not tensor_map.has_key(t.name): if t.name not in tensor_map:
tensor_map[t.name] = generate_obfuscated_name("tensor", t.name) tensor_map[t.name] = generate_obfuscated_name("tensor", t.name)
return tensor_map return tensor_map
def generate_in_out_map(ops, tensor_map): def generate_in_out_map(ops, tensor_map):
in_out_map = {} in_out_map = {}
for op in ops: for op in ops:
op.name = generate_obfuscated_name("op", op.name) op.name = generate_obfuscated_name("op", op.name)
for input_name in op.input: for input_name in op.input:
if not in_out_map.has_key(input_name): if input_name not in in_out_map:
if tensor_map.has_key(input_name): if input_name in tensor_map:
in_out_map[input_name] = tensor_map[input_name] in_out_map[input_name] = tensor_map[input_name]
else: else:
in_out_map[input_name] = generate_obfuscated_name("in", input_name) in_out_map[input_name] = generate_obfuscated_name(
"in", input_name)
for output_name in op.output: for output_name in op.output:
if not in_out_map.has_key(output_name): if output_name not in in_out_map:
if tensor_map.has_key(output_name): if output_name in tensor_map:
in_out_map[output_name] = tensor_map[output_name] in_out_map[output_name] = tensor_map[output_name]
else: else:
in_out_map[output_name] = generate_obfuscated_name("out", output_name) in_out_map[output_name] = generate_obfuscated_name(
"out", output_name)
return in_out_map return in_out_map
def obfuscate_name(net_def): def obfuscate_name(net_def):
input_node = "mace_input_node" input_node = "mace_input_node"
output_node = "mace_output_node" output_node = "mace_output_node"
...@@ -63,20 +68,22 @@ def obfuscate_name(net_def): ...@@ -63,20 +68,22 @@ def obfuscate_name(net_def):
if output_node not in op.output[i]: if output_node not in op.output[i]:
op.output[i] = in_out_map[op.output[i]] op.output[i] = in_out_map[op.output[i]]
def rename_tensor(net_def): def rename_tensor(net_def):
tensor_map = {} tensor_map = {}
for t in net_def.tensors: for t in net_def.tensors:
if not tensor_map.has_key(t.name): if t.name not in tensor_map:
tensor_map[t.name] = "_" + t.name[:-2].replace("/", "_") tensor_map[t.name] = "_" + t.name[:-2].replace("/", "_")
t.name = tensor_map[t.name] t.name = tensor_map[t.name]
for op in net_def.op: for op in net_def.op:
for i in range(len(op.input)): for i in range(len(op.input)):
if tensor_map.has_key(op.input[i]): if op.input[i] in tensor_map:
op.input[i] = tensor_map[op.input[i]] op.input[i] = tensor_map[op.input[i]]
for i in range(len(op.output)): for i in range(len(op.output)):
if tensor_map.has_key(op.output[i]): if op.output[i] in tensor_map:
op.output[i] = tensor_map[op.output[i]] op.output[i] = tensor_map[op.output[i]]
class TensorInfo: class TensorInfo:
def __init__(self, id, t, runtime): def __init__(self, id, t, runtime):
self.id = id self.id = id
...@@ -84,19 +91,26 @@ class TensorInfo: ...@@ -84,19 +91,26 @@ class TensorInfo:
if t.data_type == mace_pb2.DT_FLOAT: if t.data_type == mace_pb2.DT_FLOAT:
if runtime == 'gpu': if runtime == 'gpu':
self.data_type = mace_pb2.DT_HALF self.data_type = mace_pb2.DT_HALF
self.data = bytearray(np.array(t.float_data).astype(np.float16).tobytes()) self.data = bytearray(
np.array(t.float_data).astype(np.float16).tobytes())
else: else:
self.data_type = mace_pb2.DT_FLOAT self.data_type = mace_pb2.DT_FLOAT
self.data = bytearray(np.array(t.float_data).astype(np.float32).tobytes()) self.data = bytearray(
np.array(t.float_data).astype(np.float32).tobytes())
elif t.data_type == mace_pb2.DT_INT32: elif t.data_type == mace_pb2.DT_INT32:
self.data = bytearray(np.array(t.int32_data).astype(np.int32).tobytes()) self.data = bytearray(
np.array(t.int32_data).astype(np.int32).tobytes())
elif t.data_type == mace_pb2.DT_UINT8: elif t.data_type == mace_pb2.DT_UINT8:
self.data = bytearray(np.array(t.int32_data).astype(np.uint8).tolist()) self.data = bytearray(
np.array(t.int32_data).astype(np.uint8).tolist())
def stringfy(value): def stringfy(value):
return ', '.join('"{0}"'.format(w) for w in value) return ', '.join('"{0}"'.format(w) for w in value)
def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_tag, output, runtime, embed_model_data):
def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate,
model_tag, output, runtime, embed_model_data):
if obfuscate: if obfuscate:
obfuscate_name(net_def) obfuscate_name(net_def)
else: else:
...@@ -106,7 +120,8 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_ ...@@ -106,7 +120,8 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
print template_dir print template_dir
# Create the jinja2 environment. # Create the jinja2 environment.
j2_env = Environment(loader=FileSystemLoader(template_dir), trim_blocks=True) j2_env = Environment(
loader=FileSystemLoader(template_dir), trim_blocks=True)
j2_env.filters['stringfy'] = stringfy j2_env.filters['stringfy'] = stringfy
output_dir = os.path.dirname(output) + '/' output_dir = os.path.dirname(output) + '/'
# generate tensor source files # generate tensor source files
...@@ -122,11 +137,11 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_ ...@@ -122,11 +137,11 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
model_data.extend(bytearray([0] * padding)) model_data.extend(bytearray([0] * padding))
offset += padding offset += padding
source = j2_env.get_template(template_name).render( source = j2_env.get_template(template_name).render(
tensor_info = tensor_info, tensor_info=tensor_info,
tensor = t, tensor=t,
tag = model_tag, tag=model_tag,
runtime = runtime, runtime=runtime,
offset = offset, offset=offset,
) )
model_data.extend(tensor_info.data) model_data.extend(tensor_info.data)
offset += len(tensor_info.data) offset += len(tensor_info.data)
...@@ -137,11 +152,10 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_ ...@@ -137,11 +152,10 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
# generate tensor data # generate tensor data
template_name = 'tensor_data.jinja2' template_name = 'tensor_data.jinja2'
source = j2_env.get_template(template_name).render( source = j2_env.get_template(template_name).render(
tag = model_tag, tag=model_tag,
embed_model_data = embed_model_data, embed_model_data=embed_model_data,
model_data_size = offset, model_data_size=offset,
model_data = model_data model_data=model_data)
)
with open(output_dir + 'tensor_data' + '.cc', "wb") as f: with open(output_dir + 'tensor_data' + '.cc', "wb") as f:
f.write(source) f.write(source)
if not embed_model_data: if not embed_model_data:
...@@ -155,11 +169,11 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_ ...@@ -155,11 +169,11 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
op_size = len(net_def.op) op_size = len(net_def.op)
for start in range(0, op_size, 10): for start in range(0, op_size, 10):
source = j2_env.get_template(template_name).render( source = j2_env.get_template(template_name).render(
start = start, start=start,
end = min(start+10, op_size), end=min(start + 10, op_size),
net = net_def, net=net_def,
tag = model_tag, tag=model_tag,
runtime = runtime, runtime=runtime,
) )
with open(output_dir + 'op' + str(counter) + '.cc', "wb") as f: with open(output_dir + 'op' + str(counter) + '.cc', "wb") as f:
f.write(source) f.write(source)
...@@ -167,21 +181,21 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_ ...@@ -167,21 +181,21 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
# generate model source files # generate model source files
template_name = 'model.jinja2' template_name = 'model.jinja2'
tensors = [TensorInfo(i, net_def.tensors[i], runtime) for i in range(len(net_def.tensors))] tensors = [
TensorInfo(i, net_def.tensors[i], runtime)
for i in range(len(net_def.tensors))
]
source = j2_env.get_template(template_name).render( source = j2_env.get_template(template_name).render(
tensors = tensors, tensors=tensors,
net = net_def, net=net_def,
tag = model_tag, tag=model_tag,
runtime = runtime, runtime=runtime,
model_pb_checksum = mode_pb_checksum model_pb_checksum=mode_pb_checksum)
)
with open(output, "wb") as f: with open(output, "wb") as f:
f.write(source) f.write(source)
# generate model header file # generate model header file
template_name = 'model_header.jinja2' template_name = 'model_header.jinja2'
source = j2_env.get_template(template_name).render( source = j2_env.get_template(template_name).render(tag=model_tag, )
tag = model_tag,
)
with open(output_dir + model_tag + '.h', "wb") as f: with open(output_dir + model_tag + '.h', "wb") as f:
f.write(source) f.write(source)
...@@ -10,18 +10,21 @@ from tensorflow import gfile ...@@ -10,18 +10,21 @@ from tensorflow import gfile
FLAGS = None FLAGS = None
def hist_inc(hist, key): def hist_inc(hist, key):
if key in hist: if key in hist:
hist[key] += 1 hist[key] += 1
else: else:
hist[key] = 1 hist[key] = 1
def to_int_list(long_list): def to_int_list(long_list):
int_list = [] int_list = []
for value in long_list: for value in long_list:
int_list.append(int(value)) int_list.append(int(value))
return int_list return int_list
def main(unused_args): def main(unused_args):
if not FLAGS.input or not gfile.Exists(FLAGS.input): if not FLAGS.input or not gfile.Exists(FLAGS.input):
print('Input graph file ' + FLAGS.input + ' does not exist!') print('Input graph file ' + FLAGS.input + ' does not exist!')
...@@ -49,7 +52,9 @@ def main(unused_args): ...@@ -49,7 +52,9 @@ def main(unused_args):
tensor = output.eval() tensor = output.eval()
tensor_shape = list(tensor.shape) tensor_shape = list(tensor.shape)
tensor_shapes[tensor_name] = tensor_shape tensor_shapes[tensor_name] = tensor_shape
print("Const %s: %s, %d" % (tensor_name, tensor_shape, functools.reduce(operator.mul, tensor_shape, 1))) print("Const %s: %s, %d" %
(tensor_name, tensor_shape,
functools.reduce(operator.mul, tensor_shape, 1)))
if len(tensor_shape) == 1 and tensor_shape[0] < 10: if len(tensor_shape) == 1 and tensor_shape[0] < 10:
tensor_values[tensor_name] = list(tensor) tensor_values[tensor_name] = list(tensor)
...@@ -65,11 +70,16 @@ def main(unused_args): ...@@ -65,11 +70,16 @@ def main(unused_args):
if input_name.endswith('weights/read:0'): if input_name.endswith('weights/read:0'):
ksize = input.shape.as_list() ksize = input.shape.as_list()
break break
if input_name.endswith('weights:0') and input_name in tensor_shapes: if input_name.endswith(
'weights:0') and input_name in tensor_shapes:
ksize = tensor_shapes[input_name] ksize = tensor_shapes[input_name]
break break
print('%s(padding=%s, strides=%s, ksize=%s, format=%s) %s => %s' % (op.type, padding, strides, ksize, data_format, op.inputs[0].shape, op.outputs[0].shape)) print(
key = '%s(padding=%s, strides=%s, ksize=%s, format=%s)' % (op.type, padding, strides, ksize, data_format) '%s(padding=%s, strides=%s, ksize=%s, format=%s) %s => %s'
% (op.type, padding, strides, ksize, data_format,
op.inputs[0].shape, op.outputs[0].shape))
key = '%s(padding=%s, strides=%s, ksize=%s, format=%s)' % (
op.type, padding, strides, ksize, data_format)
hist_inc(stats, key) hist_inc(stats, key)
elif op.type in ['FusedResizeAndPadConv2D']: elif op.type in ['FusedResizeAndPadConv2D']:
padding = op.get_attr('padding') padding = op.get_attr('padding')
...@@ -78,20 +88,25 @@ def main(unused_args): ...@@ -78,20 +88,25 @@ def main(unused_args):
ksize = 'Unknown' ksize = 'Unknown'
for input in op.inputs: for input in op.inputs:
input_name = input.name input_name = input.name
if input_name.endswith('weights:0') and input_name in tensor_shapes: if input_name.endswith(
'weights:0') and input_name in tensor_shapes:
ksize = tensor_shapes[input_name] ksize = tensor_shapes[input_name]
break break
key = '%s(padding=%s, strides=%s, ksize=%s, resize_align_corners=%s)' % (op.type, padding, strides, ksize, resize_align_corners) key = '%s(padding=%s, strides=%s, ksize=%s, ' \
'resize_align_corners=%s)' % (op.type, padding, strides,
ksize, resize_align_corners)
hist_inc(stats, key) hist_inc(stats, key)
elif op.type in ['ResizeBilinear']: elif op.type in ['ResizeBilinear']:
align_corners = op.get_attr('align_corners') align_corners = op.get_attr('align_corners')
size = 'Unknown' size = 'Unknown'
for input in op.inputs: for input in op.inputs:
input_name = input.name input_name = input.name
if input_name.endswith('size:0') and input_name in tensor_values: if input_name.endswith(
'size:0') and input_name in tensor_values:
size = tensor_values[input_name] size = tensor_values[input_name]
break break
key = '%s(size=%s, align_corners=%s)' % (op.type, size, align_corners) key = '%s(size=%s, align_corners=%s)' % (op.type, size,
align_corners)
print(key) print(key)
hist_inc(stats, key) hist_inc(stats, key)
elif op.type in ['AvgPool', 'MaxPool']: elif op.type in ['AvgPool', 'MaxPool']:
...@@ -99,38 +114,47 @@ def main(unused_args): ...@@ -99,38 +114,47 @@ def main(unused_args):
strides = to_int_list(op.get_attr('strides')) strides = to_int_list(op.get_attr('strides'))
ksize = to_int_list(op.get_attr('ksize')) ksize = to_int_list(op.get_attr('ksize'))
data_format = op.get_attr('data_format') data_format = op.get_attr('data_format')
key = '%s(padding=%s, strides=%s, ksize=%s)' % (op.type, padding, strides, ksize) key = '%s(padding=%s, strides=%s, ksize=%s)' % (op.type,
padding,
strides, ksize)
hist_inc(stats, key) hist_inc(stats, key)
elif op.type in ['SpaceToBatchND', 'BatchToSpaceND']: elif op.type in ['SpaceToBatchND', 'BatchToSpaceND']:
block_shape = 'Unknown' block_shape = 'Unknown'
for input in op.inputs: for input in op.inputs:
input_name = input.name input_name = input.name
if input_name.endswith('block_shape:0') and input_name in tensor_values: if input_name.endswith(
'block_shape:0') and input_name in tensor_values:
block_shape = tensor_values[input_name] block_shape = tensor_values[input_name]
break break
paddings = 'Unknown' paddings = 'Unknown'
for input in op.inputs: for input in op.inputs:
input_name = input.name input_name = input.name
if input_name.endswith('paddings:0') and input_name in tensor_values: if input_name.endswith(
'paddings:0') and input_name in tensor_values:
paddings = tensor_values[input_name] paddings = tensor_values[input_name]
break break
crops = 'Unknown' crops = 'Unknown'
for input in op.inputs: for input in op.inputs:
input_name = input.name input_name = input.name
if input_name.endswith('crops:0') and input_name in tensor_values: if input_name.endswith(
'crops:0') and input_name in tensor_values:
paddings = tensor_values[input_name] paddings = tensor_values[input_name]
break break
if op.type == 'SpaceToBatchND': if op.type == 'SpaceToBatchND':
key = '%s(block_shape=%s, paddings=%s)' % (op.type, block_shape, paddings) key = '%s(block_shape=%s, paddings=%s)' % (op.type,
block_shape,
paddings)
else: else:
key = '%s(block_shape=%s, crops=%s)' % (op.type, block_shape, crops) key = '%s(block_shape=%s, crops=%s)' % (op.type,
block_shape, crops)
print(key) print(key)
hist_inc(stats, key) hist_inc(stats, key)
elif op.type == 'Pad': elif op.type == 'Pad':
paddings = 'Unknown' paddings = 'Unknown'
for input in op.inputs: for input in op.inputs:
input_name = input.name input_name = input.name
if input_name.endswith('paddings:0') and input_name in tensor_values: if input_name.endswith(
'paddings:0') and input_name in tensor_values:
paddings = tensor_values[input_name] paddings = tensor_values[input_name]
break break
key = '%s(paddings=%s)' % (op.type, paddings) key = '%s(paddings=%s)' % (op.type, paddings)
...@@ -142,6 +166,7 @@ def main(unused_args): ...@@ -142,6 +166,7 @@ def main(unused_args):
for key, value in sorted(six.iteritems(stats)): for key, value in sorted(six.iteritems(stats)):
print('%s: %d' % (key, value)) print('%s: %d' % (key, value))
def parse_args(): def parse_args():
'''Parses command line arguments.''' '''Parses command line arguments.'''
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
...@@ -152,6 +177,7 @@ def parse_args(): ...@@ -152,6 +177,7 @@ def parse_args():
help='TensorFlow \'GraphDef\' file to load.') help='TensorFlow \'GraphDef\' file to load.')
return parser.parse_known_args() return parser.parse_known_args()
if __name__ == '__main__': if __name__ == '__main__':
FLAGS, unparsed = parse_args() FLAGS, unparsed = parse_args()
main(unused_args=[sys.argv[0]] + unparsed) main(unused_args=[sys.argv[0]] + unparsed)
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
# --target=//mace/ops:ops_test # --target=//mace/ops:ops_test
# --stdout_processor=stdout_processor # --stdout_processor=stdout_processor
import argparse import argparse
import random import random
import re import re
...@@ -15,15 +14,18 @@ import sys ...@@ -15,15 +14,18 @@ import sys
import sh_commands import sh_commands
def stdout_processor(stdout, device_properties, abi): def stdout_processor(stdout, device_properties, abi):
pass pass
def ops_test_stdout_processor(stdout, device_properties, abi): def ops_test_stdout_processor(stdout, device_properties, abi):
stdout_lines = stdout.split("\n") stdout_lines = stdout.split("\n")
for line in stdout_lines: for line in stdout_lines:
if "Aborted" in line or "FAILED" in line: if "Aborted" in line or "FAILED" in line:
raise Exception("Command failed") raise Exception("Command failed")
def ops_benchmark_stdout_processor(stdout, device_properties, abi): def ops_benchmark_stdout_processor(stdout, device_properties, abi):
stdout_lines = stdout.split("\n") stdout_lines = stdout.split("\n")
metrics = {} metrics = {}
...@@ -33,17 +35,20 @@ def ops_benchmark_stdout_processor(stdout, device_properties, abi): ...@@ -33,17 +35,20 @@ def ops_benchmark_stdout_processor(stdout, device_properties, abi):
line = line.strip() line = line.strip()
parts = line.split() parts = line.split()
if len(parts) == 5 and parts[0].startswith("BM_"): if len(parts) == 5 and parts[0].startswith("BM_"):
metrics["%s.time_ms" % parts[0]] = str(float(parts[1])/1e6) metrics["%s.time_ms" % parts[0]] = str(float(parts[1]) / 1e6)
metrics["%s.input_mb_per_sec" % parts[0]] = parts[3] metrics["%s.input_mb_per_sec" % parts[0]] = parts[3]
metrics["%s.gmacc_per_sec" % parts[0]] = parts[4] metrics["%s.gmacc_per_sec" % parts[0]] = parts[4]
platform = device_properties["ro.board.platform"].replace(" ", "-") platform = device_properties["ro.board.platform"].replace(" ", "-")
model = device_properties["ro.product.model"].replace(" ", "-") model = device_properties["ro.product.model"].replace(" ", "-")
tags = {"ro.board.platform": platform, tags = {
"ro.board.platform": platform,
"ro.product.model": model, "ro.product.model": model,
"abi": abi} "abi": abi
sh_commands.falcon_push_metrics(metrics, tags=tags, }
endpoint="mace_ops_benchmark") sh_commands.falcon_push_metrics(
metrics, tags=tags, endpoint="mace_ops_benchmark")
def parse_args(): def parse_args():
"""Parses command line arguments.""" """Parses command line arguments."""
...@@ -57,22 +62,16 @@ def parse_args(): ...@@ -57,22 +62,16 @@ def parse_args():
"--target_socs", "--target_socs",
type=str, type=str,
default="all", default="all",
help="SoCs(ro.board.platform) to build, comma seperated list or all/random") help="SoCs (ro.board.platform from getprop) to build, "
"comma seperated list or all/random")
parser.add_argument( parser.add_argument(
"--target", "--target", type=str, default="//...", help="Bazel target to build")
type=str,
default="//...",
help="Bazel target to build")
parser.add_argument( parser.add_argument(
"--run_target", "--run_target",
type=bool, type=bool,
default=False, default=False,
help="Whether to run the target") help="Whether to run the target")
parser.add_argument( parser.add_argument("--args", type=str, default="", help="Command args")
"--args",
type=str,
default="",
help="Command args")
parser.add_argument( parser.add_argument(
"--stdout_processor", "--stdout_processor",
type=str, type=str,
...@@ -80,6 +79,7 @@ def parse_args(): ...@@ -80,6 +79,7 @@ def parse_args():
help="Stdout processing function, default: stdout_processor") help="Stdout processing function, default: stdout_processor")
return parser.parse_known_args() return parser.parse_known_args()
def main(unused_args): def main(unused_args):
target_socs = None target_socs = None
if FLAGS.target_socs != "all" and FLAGS.target_socs != "random": if FLAGS.target_socs != "all" and FLAGS.target_socs != "random":
...@@ -101,17 +101,25 @@ def main(unused_args): ...@@ -101,17 +101,25 @@ def main(unused_args):
sh_commands.bazel_build(target, abi=target_abi) sh_commands.bazel_build(target, abi=target_abi)
if FLAGS.run_target: if FLAGS.run_target:
for serialno in target_devices: for serialno in target_devices:
if target_abi not in set(sh_commands.adb_supported_abis(serialno)): if target_abi not in set(
print("Skip device %s which does not support ABI %s" % (serialno, target_abi)) sh_commands.adb_supported_abis(serialno)):
print("Skip device %s which does not support ABI %s" %
(serialno, target_abi))
continue continue
stdouts = sh_commands.adb_run(serialno, host_bin_path, bin_name, stdouts = sh_commands.adb_run(
serialno,
host_bin_path,
bin_name,
args=FLAGS.args, args=FLAGS.args,
opencl_profiling=1, opencl_profiling=1,
vlog_level=0, vlog_level=0,
device_bin_path="/data/local/tmp/mace", device_bin_path="/data/local/tmp/mace",
out_of_range_check=1) out_of_range_check=1)
device_properties = sh_commands.adb_getprop_by_serialno(serialno) device_properties = sh_commands.adb_getprop_by_serialno(
globals()[FLAGS.stdout_processor](stdouts, device_properties, target_abi) serialno)
globals()[FLAGS.stdout_processor](stdouts, device_properties,
target_abi)
if __name__ == "__main__": if __name__ == "__main__":
FLAGS, unparsed = parse_args() FLAGS, unparsed = parse_args()
......
#-*- coding:utf8 -*- import json
import socket
import itertools
import json, socket, itertools
class FalconCli(object): class FalconCli(object):
def __init__(self, addr, debug=True, buf_size=1000): def __init__(self, addr, debug=True, buf_size=1000):
self.socket_ = socket.create_connection(addr) self.socket_ = socket.create_connection(addr)
self.stream = self.socket_.makefile() self.stream = self.socket_.makefile()
...@@ -16,16 +16,19 @@ class FalconCli(object): ...@@ -16,16 +16,19 @@ class FalconCli(object):
self.stream.close() self.stream.close()
@classmethod @classmethod
def connect(cls, server="transfer.falcon.miliao.srv", port=8433, debug=True, buf_size=1000): def connect(cls,
server="transfer.falcon.miliao.srv",
port=8433,
debug=True,
buf_size=1000):
try: try:
return FalconCli((server, port), debug, buf_size) return FalconCli((server, port), debug, buf_size)
except socket.error, exc: except socket.error, exc:
print "error: connect to %s:%s error: %s" %(server, port, exc) print "error: connect to %s:%s error: %s" % (server, port, exc)
def call(self, name, *params): def call(self, name, *params):
request = dict(id=next(self.id_counter), request = dict(
params=list(params), id=next(self.id_counter), params=list(params), method=name)
method=name)
payload = json.dumps(request).encode() payload = json.dumps(request).encode()
if self.debug: if self.debug:
print "--> req:", payload print "--> req:", payload
...@@ -49,7 +52,7 @@ class FalconCli(object): ...@@ -49,7 +52,7 @@ class FalconCli(object):
resp = [] resp = []
while True: while True:
buf = lines[s:s+self.buf_size] buf = lines[s:s + self.buf_size]
s = s + self.buf_size s = s + self.buf_size
if len(buf) == 0: if len(buf) == 0:
break break
...@@ -57,4 +60,3 @@ class FalconCli(object): ...@@ -57,4 +60,3 @@ class FalconCli(object):
resp.append(r) resp.append(r)
return resp return resp
...@@ -11,13 +11,16 @@ import re ...@@ -11,13 +11,16 @@ import re
# --input_file input_file # --input_file input_file
# #
def generate_data(name, shape): def generate_data(name, shape):
np.random.seed() np.random.seed()
data = np.random.random(shape) * 2 - 1 data = np.random.random(shape) * 2 - 1
input_file_name = FLAGS.input_file + "_" + re.sub('[^0-9a-zA-Z]+', '_', name) input_file_name = FLAGS.input_file + "_" + re.sub('[^0-9a-zA-Z]+', '_',
name)
print 'Generate input file: ', input_file_name print 'Generate input file: ', input_file_name
data.astype(np.float32).tofile(input_file_name) data.astype(np.float32).tofile(input_file_name)
def main(unused_args): def main(unused_args):
input_names = [name for name in FLAGS.input_node.split(',')] input_names = [name for name in FLAGS.input_node.split(',')]
input_shapes = [shape for shape in FLAGS.input_shape.split(':')] input_shapes = [shape for shape in FLAGS.input_shape.split(':')]
...@@ -27,29 +30,21 @@ def main(unused_args): ...@@ -27,29 +30,21 @@ def main(unused_args):
generate_data(input_names[i], shape) generate_data(input_names[i], shape)
print "Generate input file done." print "Generate input file done."
def parse_args(): def parse_args():
"""Parses command line arguments.""" """Parses command line arguments."""
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.register("type", "bool", lambda v: v.lower() == "true") parser.register("type", "bool", lambda v: v.lower() == "true")
parser.add_argument( parser.add_argument(
"--input_file", "--input_file", type=str, default="", help="input file.")
type=str,
default="",
help="input file.")
parser.add_argument( parser.add_argument(
"--input_node", "--input_node", type=str, default="input_node", help="input node")
type=str,
default="input_node",
help="input node")
parser.add_argument( parser.add_argument(
"--input_shape", "--input_shape", type=str, default="1,64,64,3", help="input shape.")
type=str,
default="1,64,64,3",
help="input shape.")
return parser.parse_known_args() return parser.parse_known_args()
if __name__ == '__main__': if __name__ == '__main__':
FLAGS, unparsed = parse_args() FLAGS, unparsed = parse_args()
main(unused_args=[sys.argv[0]] + unparsed) main(unused_args=[sys.argv[0]] + unparsed)
...@@ -34,7 +34,8 @@ def run_command(command): ...@@ -34,7 +34,8 @@ def run_command(command):
print("Stderr msg:\n{}".format(err)) print("Stderr msg:\n{}".format(err))
if result.returncode != 0: if result.returncode != 0:
raise Exception("Exit not 0 from bash with code: {}, command: {}".format( raise Exception(
"Exit not 0 from bash with code: {}, command: {}".format(
result.returncode, command)) result.returncode, command))
...@@ -63,10 +64,12 @@ def generate_version_code(): ...@@ -63,10 +64,12 @@ def generate_version_code():
command = "bash tools/generate_version_code.sh" command = "bash tools/generate_version_code.sh"
run_command(command) run_command(command)
def generate_opencl_source_code(): def generate_opencl_source_code():
command = "bash tools/generate_opencl_code.sh source" command = "bash tools/generate_opencl_code.sh source"
run_command(command) run_command(command)
def generate_opencl_binay_code(target_soc, model_output_dirs, pull_or_not): def generate_opencl_binay_code(target_soc, model_output_dirs, pull_or_not):
cl_bin_dirs = [] cl_bin_dirs = []
for d in model_output_dirs: for d in model_output_dirs:
...@@ -79,6 +82,7 @@ def generate_opencl_binay_code(target_soc, model_output_dirs, pull_or_not): ...@@ -79,6 +82,7 @@ def generate_opencl_binay_code(target_soc, model_output_dirs, pull_or_not):
'binary', target_soc, cl_bin_dirs_str, int(pull_or_not)) 'binary', target_soc, cl_bin_dirs_str, int(pull_or_not))
run_command(command) run_command(command)
def generate_tuning_param_code(target_soc, model_output_dirs, pull_or_not): def generate_tuning_param_code(target_soc, model_output_dirs, pull_or_not):
cl_bin_dirs = [] cl_bin_dirs = []
for d in model_output_dirs: for d in model_output_dirs:
...@@ -91,20 +95,24 @@ def generate_tuning_param_code(target_soc, model_output_dirs, pull_or_not): ...@@ -91,20 +95,24 @@ def generate_tuning_param_code(target_soc, model_output_dirs, pull_or_not):
target_soc, cl_bin_dirs_str, int(pull_or_not)) target_soc, cl_bin_dirs_str, int(pull_or_not))
run_command(command) run_command(command)
def generate_code(target_soc, model_output_dirs, pull_or_not): def generate_code(target_soc, model_output_dirs, pull_or_not):
generate_opencl_binay_code(target_soc, model_output_dirs, pull_or_not) generate_opencl_binay_code(target_soc, model_output_dirs, pull_or_not)
generate_tuning_param_code(target_soc, model_output_dirs, pull_or_not) generate_tuning_param_code(target_soc, model_output_dirs, pull_or_not)
def clear_env(target_soc): def clear_env(target_soc):
command = "bash tools/clear_env.sh {}".format(target_soc) command = "bash tools/clear_env.sh {}".format(target_soc)
run_command(command) run_command(command)
def input_file_name(input_name): def input_file_name(input_name):
return os.environ['INPUT_FILE_NAME'] + '_' + \ return os.environ['INPUT_FILE_NAME'] + '_' + \
re.sub('[^0-9a-zA-Z]+', '_', input_name) re.sub('[^0-9a-zA-Z]+', '_', input_name)
def generate_random_input(target_soc, model_output_dir,
input_names, input_files): def generate_random_input(target_soc, model_output_dir, input_names,
input_files):
generate_data_or_not = True generate_data_or_not = True
command = "bash tools/validate_tools.sh {} {} {}".format( command = "bash tools/validate_tools.sh {} {} {}".format(
target_soc, model_output_dir, int(generate_data_or_not)) target_soc, model_output_dir, int(generate_data_or_not))
...@@ -122,16 +130,19 @@ def generate_random_input(target_soc, model_output_dir, ...@@ -122,16 +130,19 @@ def generate_random_input(target_soc, model_output_dir,
else: else:
input_name_list.append(input_names) input_name_list.append(input_names)
if len(input_file_list) != len(input_name_list): if len(input_file_list) != len(input_name_list):
raise Exception('If input_files set, the input files should match the input names.') raise Exception('If input_files set, the input files should '
'match the input names.')
for i in range(len(input_file_list)): for i in range(len(input_file_list)):
if input_file_list[i] is not None: if input_file_list[i] is not None:
dst_input_file = model_output_dir + '/' + input_file_name(input_name_list[i]) dst_input_file = model_output_dir + '/' + input_file_name(
input_name_list[i])
if input_file_list[i].startswith("http://") or \ if input_file_list[i].startswith("http://") or \
input_file_list[i].startswith("https://"): input_file_list[i].startswith("https://"):
urllib.urlretrieve(input_file_list[i], dst_input_file) urllib.urlretrieve(input_file_list[i], dst_input_file)
else: else:
shutil.copy(input_file_list[i], dst_input_file) shutil.copy(input_file_list[i], dst_input_file)
def generate_model_code(): def generate_model_code():
command = "bash tools/generate_model_code.sh" command = "bash tools/generate_model_code.sh"
run_command(command) run_command(command)
...@@ -155,10 +166,17 @@ def tuning_run(model_name, ...@@ -155,10 +166,17 @@ def tuning_run(model_name,
# TODO(yejianwu) refactoring the hackish code # TODO(yejianwu) refactoring the hackish code
stdout_buff = [] stdout_buff = []
process_output = sh_commands.make_output_processor(stdout_buff) process_output = sh_commands.make_output_processor(stdout_buff)
p = sh.bash("tools/tuning_run.sh", target_soc, model_output_dir, p = sh.bash(
running_round, int(tuning), "tools/tuning_run.sh",
restart_round, option_args, _out=process_output, target_soc,
_bg=True, _err_to_out=True) model_output_dir,
running_round,
int(tuning),
restart_round,
option_args,
_out=process_output,
_bg=True,
_err_to_out=True)
p.wait() p.wait()
metrics = {} metrics = {}
for line in stdout_buff: for line in stdout_buff:
...@@ -166,18 +184,23 @@ def tuning_run(model_name, ...@@ -166,18 +184,23 @@ def tuning_run(model_name,
parts = line.split() parts = line.split()
if len(parts) == 6 and parts[0].startswith("time"): if len(parts) == 6 and parts[0].startswith("time"):
metrics["%s.create_net_ms" % model_name] = str(float(parts[1])) metrics["%s.create_net_ms" % model_name] = str(float(parts[1]))
metrics["%s.mace_engine_ctor_ms" % model_name] = str(float(parts[2])) metrics["%s.mace_engine_ctor_ms" % model_name] = str(
float(parts[2]))
metrics["%s.init_ms" % model_name] = str(float(parts[3])) metrics["%s.init_ms" % model_name] = str(float(parts[3]))
metrics["%s.warmup_ms" % model_name] = str(float(parts[4])) metrics["%s.warmup_ms" % model_name] = str(float(parts[4]))
if float(parts[5]) > 0: if float(parts[5]) > 0:
metrics["%s.avg_latency_ms" % model_name] = str(float(parts[5])) metrics["%s.avg_latency_ms" % model_name] = str(
tags = {"ro.board.platform": target_soc, float(parts[5]))
tags = {
"ro.board.platform": target_soc,
"abi": target_abi, "abi": target_abi,
# "runtime": target_runtime, # TODO(yejianwu) Add the actual runtime # "runtime": target_runtime, # TODO(yejianwu) Add the actual runtime
"round": running_round, # TODO(yejianwu) change this to source/binary "round": running_round, # TODO(yejianwu) change this to source/binary
"tuning": tuning} "tuning": tuning
sh_commands.falcon_push_metrics(metrics, endpoint="mace_model_benchmark", }
tags=tags) sh_commands.falcon_push_metrics(
metrics, endpoint="mace_model_benchmark", tags=tags)
def benchmark_model(target_soc, model_output_dir, option_args=''): def benchmark_model(target_soc, model_output_dir, option_args=''):
command = "bash tools/benchmark.sh {} {} \"{}\"".format( command = "bash tools/benchmark.sh {} {} \"{}\"".format(
...@@ -188,8 +211,8 @@ def benchmark_model(target_soc, model_output_dir, option_args=''): ...@@ -188,8 +211,8 @@ def benchmark_model(target_soc, model_output_dir, option_args=''):
def run_model(model_name, target_runtime, target_abi, target_soc, def run_model(model_name, target_runtime, target_abi, target_soc,
model_output_dir, running_round, restart_round, option_args): model_output_dir, running_round, restart_round, option_args):
tuning_run(model_name, target_runtime, target_abi, target_soc, tuning_run(model_name, target_runtime, target_abi, target_soc,
model_output_dir, running_round, False, model_output_dir, running_round, False, restart_round,
restart_round, option_args) option_args)
def generate_production_code(target_soc, model_output_dirs, pull_or_not): def generate_production_code(target_soc, model_output_dirs, pull_or_not):
...@@ -251,8 +274,8 @@ def merge_libs_and_tuning_results(target_soc, output_dir, model_output_dirs): ...@@ -251,8 +274,8 @@ def merge_libs_and_tuning_results(target_soc, output_dir, model_output_dirs):
build_production_code() build_production_code()
model_output_dirs_str = ",".join(model_output_dirs) model_output_dirs_str = ",".join(model_output_dirs)
command = "bash tools/merge_libs.sh {} {} {}".format(target_soc, output_dir, command = "bash tools/merge_libs.sh {} {} {}".format(
model_output_dirs_str) target_soc, output_dir, model_output_dirs_str)
run_command(command) run_command(command)
...@@ -260,6 +283,7 @@ def packaging_lib_file(output_dir): ...@@ -260,6 +283,7 @@ def packaging_lib_file(output_dir):
command = "bash tools/packaging_lib.sh {}".format(output_dir) command = "bash tools/packaging_lib.sh {}".format(output_dir)
run_command(command) run_command(command)
def download_model_files(model_file_path, def download_model_files(model_file_path,
model_output_dir, model_output_dir,
weight_file_path=""): weight_file_path=""):
...@@ -270,10 +294,9 @@ def download_model_files(model_file_path, ...@@ -270,10 +294,9 @@ def download_model_files(model_file_path,
if weight_file_path.startswith("http://") or \ if weight_file_path.startswith("http://") or \
weight_file_path.startswith("https://"): weight_file_path.startswith("https://"):
os.environ[ os.environ["WEIGHT_FILE_PATH"] = model_output_dir + "/model.caffemodel"
"WEIGHT_FILE_PATH"] = model_output_dir + "/model.caffemodel" urllib.urlretrieve(weight_file_path, os.environ["WEIGHT_FILE_PATH"])
urllib.urlretrieve(weight_file_path,
os.environ["WEIGHT_FILE_PATH"])
def md5sum(str): def md5sum(str):
md5 = hashlib.md5() md5 = hashlib.md5()
...@@ -306,7 +329,10 @@ def parse_args(): ...@@ -306,7 +329,10 @@ def parse_args():
default=10, default=10,
help="The model throughput test running seconds.") help="The model throughput test running seconds.")
parser.add_argument( parser.add_argument(
"--restart_round", type=int, default=1, help="The model restart round.") "--restart_round",
type=int,
default=1,
help="The model restart round.")
parser.add_argument( parser.add_argument(
"--tuning", type="bool", default="true", help="Tune opencl params.") "--tuning", type="bool", default="true", help="Tune opencl params.")
parser.add_argument( parser.add_argument(
...@@ -321,14 +347,16 @@ def parse_args(): ...@@ -321,14 +347,16 @@ def parse_args():
help="SoCs to build, comma seperated list (getprop ro.board.platform)") help="SoCs to build, comma seperated list (getprop ro.board.platform)")
return parser.parse_known_args() return parser.parse_known_args()
def set_environment(configs): def set_environment(configs):
os.environ["EMBED_MODEL_DATA"] = str(configs["embed_model_data"]) os.environ["EMBED_MODEL_DATA"] = str(configs["embed_model_data"])
os.environ["VLOG_LEVEL"] = str(configs["vlog_level"]) os.environ["VLOG_LEVEL"] = str(configs["vlog_level"])
os.environ["PROJECT_NAME"] = os.path.splitext(os.path.basename( os.environ["PROJECT_NAME"] = os.path.splitext(
FLAGS.config))[0] os.path.basename(FLAGS.config))[0]
os.environ['INPUT_FILE_NAME'] = "model_input" os.environ['INPUT_FILE_NAME'] = "model_input"
os.environ['OUTPUT_FILE_NAME'] = "model_out" os.environ['OUTPUT_FILE_NAME'] = "model_out"
def main(unused_args): def main(unused_args):
configs = parse_model_configs() configs = parse_model_configs()
...@@ -343,13 +371,16 @@ def main(unused_args): ...@@ -343,13 +371,16 @@ def main(unused_args):
if not os.path.exists(FLAGS.output_dir): if not os.path.exists(FLAGS.output_dir):
os.makedirs(FLAGS.output_dir) os.makedirs(FLAGS.output_dir)
elif os.path.exists(os.path.join(FLAGS.output_dir, "libmace")): elif os.path.exists(os.path.join(FLAGS.output_dir, "libmace")):
shutil.rmtree(os.path.join(FLAGS.output_dir, os.environ["PROJECT_NAME"])) shutil.rmtree(
os.makedirs(os.path.join(FLAGS.output_dir, os.environ["PROJECT_NAME"])) os.path.join(FLAGS.output_dir, os.environ["PROJECT_NAME"]))
os.makedirs(
os.path.join(FLAGS.output_dir, os.environ["PROJECT_NAME"]))
generate_version_code() generate_version_code()
generate_opencl_source_code() generate_opencl_source_code()
option_args = ' '.join([arg for arg in unused_args if arg.startswith('--')]) option_args = ' '.join(
[arg for arg in unused_args if arg.startswith('--')])
available_socs = sh_commands.adb_get_all_socs() available_socs = sh_commands.adb_get_all_socs()
target_socs = available_socs target_socs = available_socs
...@@ -362,10 +393,10 @@ def main(unused_args): ...@@ -362,10 +393,10 @@ def main(unused_args):
target_socs = target_socs & socs target_socs = target_socs & socs
missing_socs = socs.difference(target_socs) missing_socs = socs.difference(target_socs)
if len(missing_socs) > 0: if len(missing_socs) > 0:
print("Error: devices with SoCs are not connected %s" % missing_socs) print(
"Error: devices with SoCs are not connected %s" % missing_socs)
exit(1) exit(1)
for target_soc in target_socs: for target_soc in target_socs:
for target_abi in configs["target_abis"]: for target_abi in configs["target_abis"]:
global_runtime = get_global_runtime(configs) global_runtime = get_global_runtime(configs)
...@@ -373,28 +404,27 @@ def main(unused_args): ...@@ -373,28 +404,27 @@ def main(unused_args):
os.environ["TARGET_ABI"] = target_abi os.environ["TARGET_ABI"] = target_abi
model_output_dirs = [] model_output_dirs = []
for model_name in configs["models"]: for model_name in configs["models"]:
print '=======================', model_name, '=======================' print '===================', model_name, '==================='
# Transfer params by environment # Transfer params by environment
os.environ["MODEL_TAG"] = model_name os.environ["MODEL_TAG"] = model_name
model_config = configs["models"][model_name] model_config = configs["models"][model_name]
input_file_list = model_config.get("validation_inputs_data", []) input_file_list = model_config.get("validation_inputs_data",
[])
for key in model_config: for key in model_config:
if key in ['input_nodes', 'output_nodes'] and isinstance( if key in ['input_nodes', 'output_nodes'] and isinstance(
model_config[key], list): model_config[key], list):
os.environ[key.upper()] = ",".join(model_config[key]) os.environ[key.upper()] = ",".join(model_config[key])
elif key in ['input_shapes', 'output_shapes'] and isinstance( elif key in ['input_shapes', 'output_shapes'
model_config[key], list): ] and isinstance(model_config[key], list):
os.environ[key.upper()] = ":".join(model_config[key]) os.environ[key.upper()] = ":".join(model_config[key])
else: else:
os.environ[key.upper()] = str(model_config[key]) os.environ[key.upper()] = str(model_config[key])
# Create model build directory # Create model build directory
model_path_digest = md5sum(model_config["model_file_path"]) model_path_digest = md5sum(model_config["model_file_path"])
model_output_dir = "%s/%s/%s/%s/%s/%s/%s" % (FLAGS.output_dir, model_output_dir = "%s/%s/%s/%s/%s/%s/%s" % (
os.environ["PROJECT_NAME"], FLAGS.output_dir, os.environ["PROJECT_NAME"], "build",
"build", model_name, model_name, model_path_digest, target_soc, target_abi)
model_path_digest,
target_soc, target_abi)
model_output_dirs.append(model_output_dir) model_output_dirs.append(model_output_dir)
if FLAGS.mode == "build" or FLAGS.mode == "all": if FLAGS.mode == "build" or FLAGS.mode == "all":
...@@ -404,22 +434,27 @@ def main(unused_args): ...@@ -404,22 +434,27 @@ def main(unused_args):
clear_env(target_soc) clear_env(target_soc)
download_model_files(model_config["model_file_path"], download_model_files(model_config["model_file_path"],
model_output_dir, model_config.get("weight_file_path", "")) model_output_dir,
model_config.get("weight_file_path", ""))
if FLAGS.mode == "build" or FLAGS.mode == "run" or FLAGS.mode == "validate"\ if FLAGS.mode == "build" or FLAGS.mode == "run" or \
or FLAGS.mode == "benchmark" or FLAGS.mode == "all": FLAGS.mode == "validate" or \
FLAGS.mode == "benchmark" or FLAGS.mode == "all":
generate_random_input(target_soc, model_output_dir, generate_random_input(target_soc, model_output_dir,
model_config['input_nodes'], input_file_list) model_config['input_nodes'],
input_file_list)
if FLAGS.mode == "build" or FLAGS.mode == "all": if FLAGS.mode == "build" or FLAGS.mode == "all":
generate_model_code() generate_model_code()
build_mace_run_prod(model_name, global_runtime, target_abi, build_mace_run_prod(model_name, global_runtime, target_abi,
target_soc, model_output_dir, FLAGS.tuning) target_soc, model_output_dir,
FLAGS.tuning)
if FLAGS.mode == "run" or FLAGS.mode == "validate" or FLAGS.mode == "all": if FLAGS.mode == "run" or FLAGS.mode == "validate" or \
run_model(model_name, global_runtime, target_abi, target_soc, FLAGS.mode == "all":
model_output_dir, FLAGS.round, FLAGS.restart_round, run_model(model_name, global_runtime, target_abi,
option_args) target_soc, model_output_dir, FLAGS.round,
FLAGS.restart_round, option_args)
if FLAGS.mode == "benchmark": if FLAGS.mode == "benchmark":
benchmark_model(target_soc, model_output_dir, option_args) benchmark_model(target_soc, model_output_dir, option_args)
...@@ -427,14 +462,18 @@ def main(unused_args): ...@@ -427,14 +462,18 @@ def main(unused_args):
if FLAGS.mode == "validate" or FLAGS.mode == "all": if FLAGS.mode == "validate" or FLAGS.mode == "all":
validate_model(target_soc, model_output_dir) validate_model(target_soc, model_output_dir)
if FLAGS.mode == "build" or FLAGS.mode == "merge" or FLAGS.mode == "all": if FLAGS.mode == "build" or FLAGS.mode == "merge" or \
FLAGS.mode == "all":
merge_libs_and_tuning_results( merge_libs_and_tuning_results(
target_soc, FLAGS.output_dir + "/" + os.environ["PROJECT_NAME"], target_soc,
FLAGS.output_dir + "/" + os.environ["PROJECT_NAME"],
model_output_dirs) model_output_dirs)
if FLAGS.mode == "throughput_test": if FLAGS.mode == "throughput_test":
merged_lib_file = FLAGS.output_dir + "/%s/%s/libmace_%s.%s.a" % \ merged_lib_file = FLAGS.output_dir + \
(os.environ["PROJECT_NAME"], target_abi, os.environ["PROJECT_NAME"], target_soc) "/%s/%s/libmace_%s.%s.a" % \
(os.environ["PROJECT_NAME"], target_abi,
os.environ["PROJECT_NAME"], target_soc)
generate_random_input(target_soc, FLAGS.output_dir, [], []) generate_random_input(target_soc, FLAGS.output_dir, [], [])
for model_name in configs["models"]: for model_name in configs["models"]:
runtime = configs["models"][model_name]["runtime"] runtime = configs["models"][model_name]["runtime"]
...@@ -449,4 +488,3 @@ def main(unused_args): ...@@ -449,4 +488,3 @@ def main(unused_args):
if __name__ == "__main__": if __name__ == "__main__":
FLAGS, unparsed = parse_args() FLAGS, unparsed = parse_args()
main(unused_args=[sys.argv[0]] + unparsed) main(unused_args=[sys.argv[0]] + unparsed)
...@@ -3,18 +3,22 @@ import re ...@@ -3,18 +3,22 @@ import re
import time import time
import falcon_cli import falcon_cli
################################ ################################
# common # common
################################ ################################
def strip_invalid_utf8(str): def strip_invalid_utf8(str):
return sh.iconv(str, "-c", "-t", "UTF-8") return sh.iconv(str, "-c", "-t", "UTF-8")
def make_output_processor(buff): def make_output_processor(buff):
def process_output(line): def process_output(line):
print(line.strip()) print(line.strip())
buff.append(line) buff.append(line)
return process_output return process_output
################################ ################################
# adb commands # adb commands
################################ ################################
...@@ -23,11 +27,12 @@ def adb_split_stdout(stdout_str): ...@@ -23,11 +27,12 @@ def adb_split_stdout(stdout_str):
# Filter out last empty line # Filter out last empty line
return [l.strip() for l in stdout_str.split('\n') if len(l.strip()) > 0] return [l.strip() for l in stdout_str.split('\n') if len(l.strip()) > 0]
def adb_devices(target_socs=None): def adb_devices(target_socs=None):
outputs = sh.grep(sh.adb("devices"), "^[A-Za-z0-9]\+[[:space:]]\+device$") outputs = sh.grep(sh.adb("devices"), "^[A-Za-z0-9]\+[[:space:]]\+device$")
raw_lists = sh.cut(outputs, "-f1") raw_lists = sh.cut(outputs, "-f1")
device_ids = adb_split_stdout(raw_lists) device_ids = adb_split_stdout(raw_lists)
if target_socs != None: if target_socs is not None:
target_socs_set = set(target_socs) target_socs_set = set(target_socs)
target_devices = [] target_devices = []
for serialno in device_ids: for serialno in device_ids:
...@@ -38,6 +43,7 @@ def adb_devices(target_socs=None): ...@@ -38,6 +43,7 @@ def adb_devices(target_socs=None):
else: else:
return device_ids return device_ids
def adb_getprop_by_serialno(serialno): def adb_getprop_by_serialno(serialno):
outputs = sh.adb("-s", serialno, "shell", "getprop") outputs = sh.adb("-s", serialno, "shell", "getprop")
raw_props = adb_split_stdout(outputs) raw_props = adb_split_stdout(outputs)
...@@ -49,12 +55,14 @@ def adb_getprop_by_serialno(serialno): ...@@ -49,12 +55,14 @@ def adb_getprop_by_serialno(serialno):
props[m.group(1)] = m.group(2) props[m.group(1)] = m.group(2)
return props return props
def adb_supported_abis(serialno): def adb_supported_abis(serialno):
props = adb_getprop_by_serialno(serialno) props = adb_getprop_by_serialno(serialno)
abilist_str = props["ro.product.cpu.abilist"] abilist_str = props["ro.product.cpu.abilist"]
abis = [abi.strip() for abi in abilist_str.split(',')] abis = [abi.strip() for abi in abilist_str.split(',')]
return abis return abis
def adb_get_all_socs(): def adb_get_all_socs():
socs = [] socs = []
for d in adb_devices(): for d in adb_devices():
...@@ -62,7 +70,10 @@ def adb_get_all_socs(): ...@@ -62,7 +70,10 @@ def adb_get_all_socs():
socs.append(props["ro.board.platform"]) socs.append(props["ro.board.platform"])
return set(socs) return set(socs)
def adb_run(serialno, host_bin_path, bin_name,
def adb_run(serialno,
host_bin_path,
bin_name,
args="", args="",
opencl_profiling=1, opencl_profiling=1,
vlog_level=0, vlog_level=0,
...@@ -71,7 +82,9 @@ def adb_run(serialno, host_bin_path, bin_name, ...@@ -71,7 +82,9 @@ def adb_run(serialno, host_bin_path, bin_name,
host_bin_full_path = "%s/%s" % (host_bin_path, bin_name) host_bin_full_path = "%s/%s" % (host_bin_path, bin_name)
device_bin_full_path = "%s/%s" % (device_bin_path, bin_name) device_bin_full_path = "%s/%s" % (device_bin_path, bin_name)
props = adb_getprop_by_serialno(serialno) props = adb_getprop_by_serialno(serialno)
print("=====================================================================") print(
"====================================================================="
)
print("Run on device: %s, %s, %s" % (serialno, props["ro.board.platform"], print("Run on device: %s, %s, %s" % (serialno, props["ro.board.platform"],
props["ro.product.model"])) props["ro.product.model"]))
sh.adb("-s", serialno, "shell", "rm -rf %s" % device_bin_path) sh.adb("-s", serialno, "shell", "rm -rf %s" % device_bin_path)
...@@ -79,12 +92,19 @@ def adb_run(serialno, host_bin_path, bin_name, ...@@ -79,12 +92,19 @@ def adb_run(serialno, host_bin_path, bin_name,
print("Push %s to %s" % (host_bin_full_path, device_bin_full_path)) print("Push %s to %s" % (host_bin_full_path, device_bin_full_path))
sh.adb("-s", serialno, "push", host_bin_full_path, device_bin_full_path) sh.adb("-s", serialno, "push", host_bin_full_path, device_bin_full_path)
print("Run %s" % device_bin_full_path) print("Run %s" % device_bin_full_path)
stdout_buff=[] stdout_buff = []
process_output = make_output_processor(stdout_buff) process_output = make_output_processor(stdout_buff)
p = sh.adb("-s", serialno, "shell", p = sh.adb(
"MACE_OUT_OF_RANGE_CHECK=%d MACE_OPENCL_PROFILING=%d MACE_CPP_MIN_VLOG_LEVEL=%d %s %s" % "-s",
(out_of_range_check, opencl_profiling, vlog_level, device_bin_full_path, args), serialno,
_out=process_output, _bg=True, _err_to_out=True) "shell",
"MACE_OUT_OF_RANGE_CHECK=%d MACE_OPENCL_PROFILING=%d "
"MACE_CPP_MIN_VLOG_LEVEL=%d %s %s" %
(out_of_range_check, opencl_profiling, vlog_level,
device_bin_full_path, args),
_out=process_output,
_bg=True,
_err_to_out=True)
p.wait() p.wait()
return "".join(stdout_buff) return "".join(stdout_buff)
...@@ -94,11 +114,14 @@ def adb_run(serialno, host_bin_path, bin_name, ...@@ -94,11 +114,14 @@ def adb_run(serialno, host_bin_path, bin_name,
################################ ################################
def bazel_build(target, strip="always", abi="armeabi-v7a"): def bazel_build(target, strip="always", abi="armeabi-v7a"):
print("Build %s with ABI %s" % (target, abi)) print("Build %s with ABI %s" % (target, abi))
stdout_buff=[] stdout_buff = []
process_output = make_output_processor(stdout_buff) process_output = make_output_processor(stdout_buff)
p= sh.bazel("build", p = sh.bazel(
"-c", "opt", "build",
"--strip", strip, "-c",
"opt",
"--strip",
strip,
"--verbose_failures", "--verbose_failures",
target, target,
"--crosstool_top=//external:android/crosstool", "--crosstool_top=//external:android/crosstool",
...@@ -109,12 +132,17 @@ def bazel_build(target, strip="always", abi="armeabi-v7a"): ...@@ -109,12 +132,17 @@ def bazel_build(target, strip="always", abi="armeabi-v7a"):
"--copt=-DMACE_DISABLE_NO_TUNING_WARNING", "--copt=-DMACE_DISABLE_NO_TUNING_WARNING",
"--copt=-Werror=return-type", "--copt=-Werror=return-type",
"--copt=-O3", "--copt=-O3",
"--define", "neon=true", "--define",
"--define", "openmp=true", "neon=true",
_out=process_output, _bg=True, _err_to_out=True) "--define",
"openmp=true",
_out=process_output,
_bg=True,
_err_to_out=True)
p.wait() p.wait()
return "".join(stdout_buff) return "".join(stdout_buff)
def bazel_target_to_bin(target): def bazel_target_to_bin(target):
# change //mace/a/b:c to bazel-bin/mace/a/b/c # change //mace/a/b:c to bazel-bin/mace/a/b/c
prefix, bin_name = target.split(':') prefix, bin_name = target.split(':')
...@@ -124,26 +152,32 @@ def bazel_target_to_bin(target): ...@@ -124,26 +152,32 @@ def bazel_target_to_bin(target):
host_bin_path = "bazel-bin/%s" % prefix host_bin_path = "bazel-bin/%s" % prefix
return host_bin_path, bin_name return host_bin_path, bin_name
################################ ################################
# mace commands # mace commands
################################ ################################
# TODO this should be refactored # TODO this should be refactored
def gen_encrypted_opencl_source(codegen_path="mace/codegen"): def gen_encrypted_opencl_source(codegen_path="mace/codegen"):
sh.mkdir("-p", "%s/opencl" % codegen_path) sh.mkdir("-p", "%s/opencl" % codegen_path)
sh.python("mace/python/tools/encrypt_opencl_codegen.py", sh.python(
"mace/python/tools/encrypt_opencl_codegen.py",
"--cl_kernel_dir=./mace/kernels/opencl/cl/", "--cl_kernel_dir=./mace/kernels/opencl/cl/",
"--output_path=%s/opencl/opencl_encrypt_program.cc" % codegen_path) "--output_path=%s/opencl/opencl_encrypt_program.cc" % codegen_path)
def gen_mace_version(codegen_path="mace/codegen"): def gen_mace_version(codegen_path="mace/codegen"):
sh.mkdir("-p", "%s/version" % codegen_path) sh.mkdir("-p", "%s/version" % codegen_path)
sh.bash("mace/tools/git/gen_version_source.sh", sh.bash("mace/tools/git/gen_version_source.sh",
"%s/version/version.cc" % codegen_path) "%s/version/version.cc" % codegen_path)
def gen_compiled_opencl_source(codegen_path="mace/codegen"): def gen_compiled_opencl_source(codegen_path="mace/codegen"):
sh.mkdir("-p", "%s/opencl" % codegen_path) sh.mkdir("-p", "%s/opencl" % codegen_path)
sh.python("mace/python/tools/opencl_codegen.py", sh.python(
"mace/python/tools/opencl_codegen.py",
"--output_path=%s/opencl/opencl_compiled_program.cc" % codegen_path) "--output_path=%s/opencl/opencl_compiled_program.cc" % codegen_path)
################################ ################################
# falcon # falcon
################################ ################################
...@@ -156,10 +190,10 @@ def falcon_tags(tags_dict): ...@@ -156,10 +190,10 @@ def falcon_tags(tags_dict):
tags = tags + ",%s=%s" % (k, v) tags = tags + ",%s=%s" % (k, v)
return tags return tags
def falcon_push_metrics(metrics, endpoint="mace_dev", tags={}): def falcon_push_metrics(metrics, endpoint="mace_dev", tags={}):
cli = falcon_cli.FalconCli.connect(server="transfer.falcon.miliao.srv", cli = falcon_cli.FalconCli.connect(
port=8433, server="transfer.falcon.miliao.srv", port=8433, debug=False)
debug=False)
ts = int(time.time()) ts = int(time.time())
falcon_metrics = [{ falcon_metrics = [{
"endpoint": endpoint, "endpoint": endpoint,
...@@ -171,4 +205,3 @@ def falcon_push_metrics(metrics, endpoint="mace_dev", tags={}): ...@@ -171,4 +205,3 @@ def falcon_push_metrics(metrics, endpoint="mace_dev", tags={}):
"counterType": "GAUGE" "counterType": "GAUGE"
} for key, value in metrics.iteritems()] } for key, value in metrics.iteritems()]
cli.update(falcon_metrics) cli.update(falcon_metrics)
...@@ -20,29 +20,33 @@ from scipy import stats ...@@ -20,29 +20,33 @@ from scipy import stats
# --input_shape 1,64,64,3 \ # --input_shape 1,64,64,3 \
# --output_shape 1,64,64,2 # --output_shape 1,64,64,2
def load_data(file): def load_data(file):
if os.path.isfile(file): if os.path.isfile(file):
return np.fromfile(file=file, dtype=np.float32) return np.fromfile(file=file, dtype=np.float32)
else: else:
return np.empty([0]) return np.empty([0])
def format_output_name(name): def format_output_name(name):
return re.sub('[^0-9a-zA-Z]+', '_', name) return re.sub('[^0-9a-zA-Z]+', '_', name)
def compare_output(output_name, mace_out_value, out_value): def compare_output(output_name, mace_out_value, out_value):
if mace_out_value.size != 0: if mace_out_value.size != 0:
out_value = out_value.reshape(-1) out_value = out_value.reshape(-1)
mace_out_value = mace_out_value.reshape(-1) mace_out_value = mace_out_value.reshape(-1)
assert len(out_value) == len(mace_out_value) assert len(out_value) == len(mace_out_value)
similarity = (1 - spatial.distance.cosine(out_value, mace_out_value)) similarity = (1 - spatial.distance.cosine(out_value, mace_out_value))
print output_name, 'MACE VS', FLAGS.platform.upper(), 'similarity: ', similarity print output_name, 'MACE VS', FLAGS.platform.upper(
), 'similarity: ', similarity
if (FLAGS.mace_runtime == "cpu" and similarity > 0.999) or \ if (FLAGS.mace_runtime == "cpu" and similarity > 0.999) or \
(FLAGS.mace_runtime == "neon" and similarity > 0.999) or \ (FLAGS.mace_runtime == "neon" and similarity > 0.999) or \
(FLAGS.mace_runtime == "gpu" and similarity > 0.995) or \ (FLAGS.mace_runtime == "gpu" and similarity > 0.995) or \
(FLAGS.mace_runtime == "dsp" and similarity > 0.930): (FLAGS.mace_runtime == "dsp" and similarity > 0.930):
print '=======================Similarity Test Passed======================' print '===================Similarity Test Passed=================='
else: else:
print '=======================Similarity Test Failed======================' print '===================Similarity Test Failed=================='
sys.exit(-1) sys.exit(-1)
else: else:
print '=======================Skip empty node===================' print '=======================Skip empty node==================='
...@@ -66,21 +70,28 @@ def validate_tf_model(input_names, input_shapes, output_names): ...@@ -66,21 +70,28 @@ def validate_tf_model(input_names, input_shapes, output_names):
tf.import_graph_def(input_graph_def, name="") tf.import_graph_def(input_graph_def, name="")
input_dict = {} input_dict = {}
for i in range(len(input_names)): for i in range(len(input_names)):
input_value = load_data(FLAGS.input_file + "_" + input_names[i]) input_value = load_data(
FLAGS.input_file + "_" + input_names[i])
input_value = input_value.reshape(input_shapes[i]) input_value = input_value.reshape(input_shapes[i])
input_node = graph.get_tensor_by_name(input_names[i] + ':0') input_node = graph.get_tensor_by_name(
input_names[i] + ':0')
input_dict[input_node] = input_value input_dict[input_node] = input_value
output_nodes = [] output_nodes = []
for name in output_names: for name in output_names:
output_nodes.extend([graph.get_tensor_by_name(name + ':0')]) output_nodes.extend(
[graph.get_tensor_by_name(name + ':0')])
output_values = session.run(output_nodes, feed_dict=input_dict) output_values = session.run(output_nodes, feed_dict=input_dict)
for i in range(len(output_names)): for i in range(len(output_names)):
output_file_name = FLAGS.mace_out_file + "_" + format_output_name(output_names[i]) output_file_name = FLAGS.mace_out_file + "_" + \
format_output_name(output_names[i])
mace_out_value = load_data(output_file_name) mace_out_value = load_data(output_file_name)
compare_output(output_names[i], mace_out_value, output_values[i]) compare_output(output_names[i], mace_out_value,
output_values[i])
def validate_caffe_model(input_names, input_shapes, output_names, output_shapes): def validate_caffe_model(input_names, input_shapes, output_names,
output_shapes):
os.environ['GLOG_minloglevel'] = '1' # suprress Caffe verbose prints os.environ['GLOG_minloglevel'] = '1' # suprress Caffe verbose prints
import caffe import caffe
if not os.path.isfile(FLAGS.model_file): if not os.path.isfile(FLAGS.model_file):
...@@ -96,7 +107,8 @@ def validate_caffe_model(input_names, input_shapes, output_names, output_shapes) ...@@ -96,7 +107,8 @@ def validate_caffe_model(input_names, input_shapes, output_names, output_shapes)
for i in range(len(input_names)): for i in range(len(input_names)):
input_value = load_data(FLAGS.input_file + "_" + input_names[i]) input_value = load_data(FLAGS.input_file + "_" + input_names[i])
input_value = input_value.reshape(input_shapes[i]).transpose((0, 3, 1, 2)) input_value = input_value.reshape(input_shapes[i]).transpose((0, 3, 1,
2))
input_blob_name = input_names[i] input_blob_name = input_names[i]
try: try:
if input_names[i] in net.top_names: if input_names[i] in net.top_names:
...@@ -110,16 +122,20 @@ def validate_caffe_model(input_names, input_shapes, output_names, output_shapes) ...@@ -110,16 +122,20 @@ def validate_caffe_model(input_names, input_shapes, output_names, output_shapes)
for i in range(len(output_names)): for i in range(len(output_names)):
value = net.blobs[net.top_names[output_names[i]][0]].data value = net.blobs[net.top_names[output_names[i]][0]].data
out_shape = output_shapes[i] out_shape = output_shapes[i]
out_shape[1], out_shape[2], out_shape[3] = out_shape[3], out_shape[1], out_shape[2] out_shape[1], out_shape[2], out_shape[3] = out_shape[3], out_shape[
1], out_shape[2]
value = value.reshape(out_shape).transpose((0, 2, 3, 1)) value = value.reshape(out_shape).transpose((0, 2, 3, 1))
output_file_name = FLAGS.mace_out_file + "_" + format_output_name(output_names[i]) output_file_name = FLAGS.mace_out_file + "_" + format_output_name(
output_names[i])
mace_out_value = load_data(output_file_name) mace_out_value = load_data(output_file_name)
compare_output(output_names[i], mace_out_value, value) compare_output(output_names[i], mace_out_value, value)
def main(unused_args): def main(unused_args):
input_names = [name for name in FLAGS.input_node.split(',')] input_names = [name for name in FLAGS.input_node.split(',')]
input_shape_strs = [shape for shape in FLAGS.input_shape.split(':')] input_shape_strs = [shape for shape in FLAGS.input_shape.split(':')]
input_shapes = [[int(x) for x in shape.split(',')] for shape in input_shape_strs] input_shapes = [[int(x) for x in shape.split(',')]
for shape in input_shape_strs]
output_names = [name for name in FLAGS.output_node.split(',')] output_names = [name for name in FLAGS.output_node.split(',')]
assert len(input_names) == len(input_shapes) assert len(input_names) == len(input_shapes)
...@@ -127,18 +143,18 @@ def main(unused_args): ...@@ -127,18 +143,18 @@ def main(unused_args):
validate_tf_model(input_names, input_shapes, output_names) validate_tf_model(input_names, input_shapes, output_names)
elif FLAGS.platform == 'caffe': elif FLAGS.platform == 'caffe':
output_shape_strs = [shape for shape in FLAGS.output_shape.split(':')] output_shape_strs = [shape for shape in FLAGS.output_shape.split(':')]
output_shapes = [[int(x) for x in shape.split(',')] for shape in output_shape_strs] output_shapes = [[int(x) for x in shape.split(',')]
validate_caffe_model(input_names, input_shapes, output_names, output_shapes) for shape in output_shape_strs]
validate_caffe_model(input_names, input_shapes, output_names,
output_shapes)
def parse_args(): def parse_args():
"""Parses command line arguments.""" """Parses command line arguments."""
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.register("type", "bool", lambda v: v.lower() == "true") parser.register("type", "bool", lambda v: v.lower() == "true")
parser.add_argument( parser.add_argument(
"--platform", "--platform", type=str, default="", help="Tensorflow or Caffe.")
type=str,
default="",
help="Tensorflow or Caffe.")
parser.add_argument( parser.add_argument(
"--model_file", "--model_file",
type=str, type=str,
...@@ -150,40 +166,22 @@ def parse_args(): ...@@ -150,40 +166,22 @@ def parse_args():
default="", default="",
help="caffe model file to load.") help="caffe model file to load.")
parser.add_argument( parser.add_argument(
"--input_file", "--input_file", type=str, default="", help="input file.")
type=str,
default="",
help="input file.")
parser.add_argument( parser.add_argument(
"--mace_out_file", "--mace_out_file",
type=str, type=str,
default="", default="",
help="mace output file to load.") help="mace output file to load.")
parser.add_argument( parser.add_argument(
"--mace_runtime", "--mace_runtime", type=str, default="gpu", help="mace runtime device.")
type=str,
default="gpu",
help="mace runtime device.")
parser.add_argument( parser.add_argument(
"--input_shape", "--input_shape", type=str, default="1,64,64,3", help="input shape.")
type=str,
default="1,64,64,3",
help="input shape.")
parser.add_argument( parser.add_argument(
"--output_shape", "--output_shape", type=str, default="1,64,64,2", help="output shape.")
type=str,
default="1,64,64,2",
help="output shape.")
parser.add_argument( parser.add_argument(
"--input_node", "--input_node", type=str, default="input_node", help="input node")
type=str,
default="input_node",
help="input node")
parser.add_argument( parser.add_argument(
"--output_node", "--output_node", type=str, default="output_node", help="output node")
type=str,
default="output_node",
help="output node")
return parser.parse_known_args() return parser.parse_known_args()
...@@ -191,4 +189,3 @@ def parse_args(): ...@@ -191,4 +189,3 @@ def parse_args():
if __name__ == '__main__': if __name__ == '__main__':
FLAGS, unparsed = parse_args() FLAGS, unparsed = parse_args()
main(unused_args=[sys.argv[0]] + unparsed) main(unused_args=[sys.argv[0]] + unparsed)
...@@ -11,12 +11,8 @@ G_T = {} ...@@ -11,12 +11,8 @@ G_T = {}
# f(2, 3) # f(2, 3)
A_T[4] = np.array([[1, 1, 1, 0], [0, 1, -1, -1]]).astype(np.float32) A_T[4] = np.array([[1, 1, 1, 0], [0, 1, -1, -1]]).astype(np.float32)
A[4] = np.transpose(A_T[4]) A[4] = np.transpose(A_T[4])
B_T[4] = np.array([ B_T[4] = np.array([[1, 0, -1, 0], [0, 1, 1, 0], [0, -1, 1, 0],
[1, 0, -1, 0], [0, 1, 0, -1]]).astype(np.float32)
[0, 1, 1, 0],
[0, -1, 1, 0],
[0, 1, 0, -1]
]).astype(np.float32)
B[4] = np.transpose(B_T[4]) B[4] = np.transpose(B_T[4])
G[4] = np.array([ G[4] = np.array([
[1, 0, 0], [1, 0, 0],
...@@ -44,45 +40,45 @@ B_T[6] = np.array([ ...@@ -44,45 +40,45 @@ B_T[6] = np.array([
]).astype(np.float32) ]).astype(np.float32)
B[6] = np.transpose(B_T[6]) B[6] = np.transpose(B_T[6])
G[6] = np.array([ G[6] = np.array([
[1/4.0 , 0 , 0 ], [1 / 4.0, 0, 0],
[-1/6.0, -1/6.0 , -1/6.0], [-1 / 6.0, -1 / 6.0, -1 / 6.0],
[-1/6.0, 1/6.0 , -1/6.0], [-1 / 6.0, 1 / 6.0, -1 / 6.0],
[1/24.0, 1/12.0 , 1/6.0 ], [1 / 24.0, 1 / 12.0, 1 / 6.0],
[1/24.0, -1/12.0, 1/6.0 ], [1 / 24.0, -1 / 12.0, 1 / 6.0],
[ 0 , 0 , 1 ], [0, 0, 1],
]).astype(np.float32) ]).astype(np.float32)
G_T[6] = np.transpose(G[6]) G_T[6] = np.transpose(G[6])
# f(6, 3) # f(6, 3)
A_T[8] = np.array([ A_T[8] = np.array([
[1, 1, 1 , 1 , 1 , 1 , 1 , 0], [1, 1, 1, 1, 1, 1, 1, 0],
[0, 1, -1, 2 , -2 , 1/2. , -1/2. , 0], [0, 1, -1, 2, -2, 1 / 2., -1 / 2., 0],
[0, 1, 1 , 4 , 4 , 1/4. , 1/4. , 0], [0, 1, 1, 4, 4, 1 / 4., 1 / 4., 0],
[0, 1, -1, 8 , -8 , 1/8. , -1/8. , 0], [0, 1, -1, 8, -8, 1 / 8., -1 / 8., 0],
[0, 1, 1 , 16, 16 , 1/16., 1/16. , 0], [0, 1, 1, 16, 16, 1 / 16., 1 / 16., 0],
[0, 1, -1, 32, -32, 1/32., -1/32., 1], [0, 1, -1, 32, -32, 1 / 32., -1 / 32., 1],
]).astype(np.float32) ]).astype(np.float32)
A[8] = np.transpose(A_T[8]) A[8] = np.transpose(A_T[8])
B_T[8] = np.array([ B_T[8] = np.array([
[1, 0 , -21/4., 0 , 21/4., 0 , -1, 0], [1, 0, -21 / 4., 0, 21 / 4., 0, -1, 0],
[0, 1 , 1 , -17/4., -17/4., 1 , 1 , 0], [0, 1, 1, -17 / 4., -17 / 4., 1, 1, 0],
[0, -1 , 1 , 17/4. , -17/4., -1 , 1 , 0], [0, -1, 1, 17 / 4., -17 / 4., -1, 1, 0],
[0, 1/2. , 1/4. , -5/2. , -5/4., 2 , 1 , 0], [0, 1 / 2., 1 / 4., -5 / 2., -5 / 4., 2, 1, 0],
[0, -1/2., 1/4. , 5/2. , -5/4., -2 , 1 , 0], [0, -1 / 2., 1 / 4., 5 / 2., -5 / 4., -2, 1, 0],
[0, 2 , 4 , -5/2. , -5 , 1/2. , 1 , 0], [0, 2, 4, -5 / 2., -5, 1 / 2., 1, 0],
[0, -2 , 4 , 5/2. , -5 , -1/2. , 1 , 0], [0, -2, 4, 5 / 2., -5, -1 / 2., 1, 0],
[0, -1 , 0 , 21/4. , 0 , -21/4., 0 , 1], [0, -1, 0, 21 / 4., 0, -21 / 4., 0, 1],
]).astype(np.float32) ]).astype(np.float32)
B[8] = np.transpose(B_T[8]) B[8] = np.transpose(B_T[8])
G[8] = np.array([ G[8] = np.array([
[ 1 , 0 , 0 ], [1, 0, 0],
[-2/9. , -2/9. , -2/9.], [-2 / 9., -2 / 9., -2 / 9.],
[-2/9. , 2/9. , -2/9.], [-2 / 9., 2 / 9., -2 / 9.],
[1/90. , 1/45. , 2/45.], [1 / 90., 1 / 45., 2 / 45.],
[1/90. , -1/45. , 2/45.], [1 / 90., -1 / 45., 2 / 45.],
[32/45., 16/45. , 8/45.], [32 / 45., 16 / 45., 8 / 45.],
[32/45., -16/45., 8/45.], [32 / 45., -16 / 45., 8 / 45.],
[ 0 , 0 , 1 ], [0, 0, 1],
]).astype(np.float32) ]).astype(np.float32)
G_T[8] = np.transpose(G[8]) G_T[8] = np.transpose(G[8])
...@@ -112,7 +108,7 @@ def winograd_conv(m, r, input, filter): ...@@ -112,7 +108,7 @@ def winograd_conv(m, r, input, filter):
for c in range(C): for c in range(C):
u = np.dot(np.dot(G[alpha], filter[k, c, :, :]), G_T[alpha]) u = np.dot(np.dot(G[alpha], filter[k, c, :, :]), G_T[alpha])
for i in range(alpha): for i in range(alpha):
for j in range(alpha) : for j in range(alpha):
U[(i * alpha + j) * K + k, c] = u[i, j] U[(i * alpha + j) * K + k, c] = u[i, j]
print 'filter out: ', U.shape print 'filter out: ', U.shape
...@@ -129,24 +125,24 @@ def winograd_conv(m, r, input, filter): ...@@ -129,24 +125,24 @@ def winograd_conv(m, r, input, filter):
w_idx = t % rounded_w w_idx = t % rounded_w
h_start = h_idx * m h_start = h_idx * m
w_start = w_idx * m w_start = w_idx * m
h_end = min(h_start+alpha, input_shape[2]) h_end = min(h_start + alpha, input_shape[2])
w_end = min(w_start+alpha, input_shape[3]) w_end = min(w_start + alpha, input_shape[3])
d = np.zeros((alpha, alpha)) d = np.zeros((alpha, alpha))
d[0:h_end-h_start, 0:w_end-w_start] = \ d[0:h_end-h_start, 0:w_end-w_start] = \
input[n, c, h_start:h_end, w_start:w_end] input[n, c, h_start:h_end, w_start:w_end]
v = np.dot(np.dot(B_T[alpha], d), B[alpha]) v = np.dot(np.dot(B_T[alpha], d), B[alpha])
for i in range(alpha): for i in range(alpha):
for j in range(alpha): for j in range(alpha):
V[(i*alpha+j)*C + c, p] = v[i, j] V[(i * alpha + j) * C + c, p] = v[i, j]
tmp = V.reshape(alpha_square, C, P, 1) tmp = V.reshape(alpha_square, C, P, 1)
print 'input out: ', tmp.shape print 'input out: ', tmp.shape
tmp.astype(np.float32).tofile("C") tmp.astype(np.float32).tofile("C")
M = np.zeros((alpha_square * K, P)) M = np.zeros((alpha_square * K, P))
for i in range(alpha_square): for i in range(alpha_square):
u = U[i * K : (i+1) * K, :] u = U[i * K:(i + 1) * K, :]
v = V[i * C : (i+1) * C, :] v = V[i * C:(i + 1) * C, :]
M[i * K : (i+1) * K, :] = np.dot(u, v) M[i * K:(i + 1) * K, :] = np.dot(u, v)
print 'M shape: ', M.shape print 'M shape: ', M.shape
M.astype(np.float32).tofile("gemm") M.astype(np.float32).tofile("gemm")
...@@ -156,7 +152,7 @@ def winograd_conv(m, r, input, filter): ...@@ -156,7 +152,7 @@ def winograd_conv(m, r, input, filter):
tm = np.zeros((alpha, alpha)) tm = np.zeros((alpha, alpha))
for i in range(alpha): for i in range(alpha):
for j in range(alpha): for j in range(alpha):
tm[i][j] = M[(i*alpha+j) * K + k, b] tm[i][j] = M[(i * alpha + j) * K + k, b]
y = np.dot(np.dot(A_T[alpha], tm), A[alpha]) y = np.dot(np.dot(A_T[alpha], tm), A[alpha])
for i in range(m): for i in range(m):
for j in range(m): for j in range(m):
...@@ -173,6 +169,7 @@ def winograd_conv(m, r, input, filter): ...@@ -173,6 +169,7 @@ def winograd_conv(m, r, input, filter):
return res return res
def tf_conv(input, filter): def tf_conv(input, filter):
conv_op = tf.nn.conv2d(input, filter, [1, 1, 1, 1], 'VALID') conv_op = tf.nn.conv2d(input, filter, [1, 1, 1, 1], 'VALID')
with tf.Session() as sess: with tf.Session() as sess:
...@@ -206,4 +203,3 @@ def main(): ...@@ -206,4 +203,3 @@ def main():
if __name__ == '__main__': if __name__ == '__main__':
main() main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册