提交 6da30d22 编写于 作者: L Liangliang He

Enable python style check

上级 e54825c5
stages:
- cpplint
- pycodestyle
- ops_test
- ops_benchmark
......@@ -7,7 +8,12 @@ cpplint:
stage: cpplint
script:
- curl -o cpplint.py https://raw.githubusercontent.com/google/styleguide/gh-pages/cpplint/cpplint.py
- python cpplint.py --linelength=80 --counting=detailed $(find mace -name *.h -or -name *.cc)
- python cpplint.py --linelength=80 --counting=detailed $(find mace -name "*.h" -or -name "*.cc")
pycodestyle:
stage: pycodestyle
script:
- pycodestyle $(find -name "*.py")
ops_test:
stage: ops_test
......
......@@ -113,7 +113,8 @@ RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com
scipy \
jinja2 \
pyyaml \
sh
sh \
pycodestyle
# Download tensorflow tools
RUN wget http://cnbj1-inner-fds.api.xiaomi.net/mace/tool/transform_graph && \
......
......@@ -27,28 +27,30 @@ def generate_cpp_source():
print "Generate binary from", binary_path
idx = 0
size, = struct.unpack("Q", binary_array[idx:idx+8])
size, = struct.unpack("Q", binary_array[idx:idx + 8])
idx += 8
for _ in xrange(size):
key_size, = struct.unpack("i", binary_array[idx:idx+4])
key_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
key, = struct.unpack(str(key_size) + "s", binary_array[idx:idx+key_size])
key, = struct.unpack(
str(key_size) + "s", binary_array[idx:idx + key_size])
idx += key_size
params_size, = struct.unpack("i", binary_array[idx:idx+4])
params_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
data_map[key] = []
count = params_size / 4
params = struct.unpack(str(count) + "i", binary_array[idx:idx+params_size])
params = struct.unpack(
str(count) + "i", binary_array[idx:idx + params_size])
for i in params:
data_map[key].append(i)
idx += params_size
env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0]))
return env.get_template('str2vec_maps.cc.jinja2').render(
maps = data_map,
data_type = 'unsigned int',
variable_name = FLAGS.variable_name
)
maps=data_map,
data_type='unsigned int',
variable_name=FLAGS.variable_name)
def main(unused_args):
cpp_binary_source = generate_cpp_source()
......@@ -58,14 +60,12 @@ def main(unused_args):
w_file.write(cpp_binary_source)
w_file.close()
def parse_args():
"""Parses command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument(
"--binary_dirs",
type=str,
default="",
help="The binaries file path.")
"--binary_dirs", type=str, default="", help="The binaries file path.")
parser.add_argument(
"--binary_file_name",
type=str,
......@@ -75,7 +75,8 @@ def parse_args():
"--output_path",
type=str,
default="",
help="The path of generated C++ source file which contains the binary.")
help="The path of generated C++ source file which contains the binary."
)
parser.add_argument(
"--variable_name",
type=str,
......
......@@ -26,4 +26,3 @@ def tf_dtype_2_mace_dtype(tf_dtype):
if not mace_dtype:
raise Exception("Not supported tensorflow dtype: " + tf_dtype)
return mace_dtype
......@@ -4,10 +4,14 @@ import hashlib
import os.path
from mace.python.tools import source_converter_lib
# ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb --output quantized_test_dsp.pb --runtime dsp --input_dim input_node,1,28,28,3
# ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb \
# --output quantized_test_dsp.pb \
# --runtime dsp \
# --input_dim input_node,1,28,28,3
FLAGS = None
def file_checksum(fname):
hash_func = hashlib.sha256()
with open(fname, "rb") as f:
......@@ -15,6 +19,7 @@ def file_checksum(fname):
hash_func.update(chunk)
return hash_func.hexdigest()
def main(unused_args):
if not os.path.isfile(FLAGS.model_file):
print("Input graph file '" + FLAGS.model_file + "' does not exist!")
......@@ -22,17 +27,21 @@ def main(unused_args):
model_checksum = file_checksum(FLAGS.model_file)
if FLAGS.model_checksum != "" and FLAGS.model_checksum != model_checksum:
print("Model checksum mismatch: %s != %s" % (model_checksum, FLAGS.model_checksum))
print("Model checksum mismatch: %s != %s" % (model_checksum,
FLAGS.model_checksum))
sys.exit(-1)
if FLAGS.platform == 'caffe':
if not os.path.isfile(FLAGS.weight_file):
print("Input weight file '" + FLAGS.weight_file + "' does not exist!")
print("Input weight file '" + FLAGS.weight_file +
"' does not exist!")
sys.exit(-1)
weight_checksum = file_checksum(FLAGS.weight_file)
if FLAGS.weight_checksum != "" and FLAGS.weight_checksum != weight_checksum:
print("Weight checksum mismatch: %s != %s" % (weight_checksum, FLAGS.weight_checksum))
if FLAGS.weight_checksum != "" and \
FLAGS.weight_checksum != weight_checksum:
print("Weight checksum mismatch: %s != %s" %
(weight_checksum, FLAGS.weight_checksum))
sys.exit(-1)
if FLAGS.runtime == 'dsp':
......@@ -41,22 +50,27 @@ def main(unused_args):
from mace.python.tools import caffe_converter_lib
output_graph_def = caffe_converter_lib.convert_to_mace_pb(
FLAGS.model_file, FLAGS.weight_file, FLAGS.input_node, FLAGS.input_shape, FLAGS.output_node,
FLAGS.data_type, FLAGS.runtime, FLAGS.winograd)
FLAGS.model_file, FLAGS.weight_file, FLAGS.input_node,
FLAGS.input_shape, FLAGS.output_node, FLAGS.data_type,
FLAGS.runtime, FLAGS.winograd)
elif FLAGS.platform == 'tensorflow':
if FLAGS.runtime == 'dsp':
from mace.python.tools import tf_dsp_converter_lib
output_graph_def = tf_dsp_converter_lib.convert_to_mace_pb(
FLAGS.model_file, FLAGS.input_node, FLAGS.output_node, FLAGS.dsp_mode)
FLAGS.model_file, FLAGS.input_node, FLAGS.output_node,
FLAGS.dsp_mode)
else:
from mace.python.tools import tf_converter_lib
output_graph_def = tf_converter_lib.convert_to_mace_pb(
FLAGS.model_file, FLAGS.input_node, FLAGS.input_shape, FLAGS.output_node,
FLAGS.data_type, FLAGS.runtime, FLAGS.winograd)
FLAGS.model_file, FLAGS.input_node, FLAGS.input_shape,
FLAGS.output_node, FLAGS.data_type, FLAGS.runtime,
FLAGS.winograd)
if FLAGS.output_type == 'source':
source_converter_lib.convert_to_source(output_graph_def, model_checksum, FLAGS.template, FLAGS.obfuscate,
FLAGS.model_tag, FLAGS.output, FLAGS.runtime, FLAGS.embed_model_data)
source_converter_lib.convert_to_source(
output_graph_def, model_checksum, FLAGS.template, FLAGS.obfuscate,
FLAGS.model_tag, FLAGS.output, FLAGS.runtime,
FLAGS.embed_model_data)
else:
with open(FLAGS.output, "wb") as f:
f.write(output_graph_def.SerializeToString())
......@@ -65,6 +79,7 @@ def main(unused_args):
f.write(str(output_graph_def))
print("Model conversion is completed.")
def str2bool(v):
if v.lower() in ('yes', 'true', 't', 'y', '1'):
return True
......@@ -73,6 +88,7 @@ def str2bool(v):
else:
raise argparse.ArgumentTypeError('Boolean value expected.')
def parse_args():
"""Parses command line arguments."""
parser = argparse.ArgumentParser()
......@@ -81,12 +97,10 @@ def parse_args():
"--model_file",
type=str,
default="",
help="TensorFlow \'GraphDef\' file to load, Caffe prototxt file to load.")
help="TensorFlow \'GraphDef\' file to load, "
"Caffe prototxt file to load.")
parser.add_argument(
"--weight_file",
type=str,
default="",
help="Caffe data file to load.")
"--weight_file", type=str, default="", help="Caffe data file to load.")
parser.add_argument(
"--model_checksum",
type=str,
......@@ -103,35 +117,23 @@ def parse_args():
default="",
help="File to save the output graph to.")
parser.add_argument(
"--runtime",
type=str,
default="cpu",
help="Runtime: cpu/gpu/dsp")
"--runtime", type=str, default="cpu", help="Runtime: cpu/gpu/dsp")
parser.add_argument(
"--input_node",
type=str,
default="input_node",
help="e.g., input_node")
parser.add_argument(
"--output_node",
type=str,
default="softmax",
help="e.g., softmax")
"--output_node", type=str, default="softmax", help="e.g., softmax")
parser.add_argument(
"--data_type",
type=str,
default='DT_FLOAT',
help="e.g., DT_HALF/DT_FLOAT")
parser.add_argument(
"--output_type",
type=str,
default="pb",
help="output type: source/pb")
"--output_type", type=str, default="pb", help="output type: source/pb")
parser.add_argument(
"--template",
type=str,
default="",
help="template path")
"--template", type=str, default="", help="template path")
parser.add_argument(
"--obfuscate",
type=str2bool,
......@@ -152,25 +154,13 @@ def parse_args():
default=False,
help="open winograd convolution or not")
parser.add_argument(
"--dsp_mode",
type=int,
default=0,
help="dsp run mode, defalut=0")
"--dsp_mode", type=int, default=0, help="dsp run mode, defalut=0")
parser.add_argument(
"--input_shape",
type=str,
default="",
help="input shape.")
"--input_shape", type=str, default="", help="input shape.")
parser.add_argument(
"--platform",
type=str,
default="tensorflow",
help="tensorflow/caffe")
"--platform", type=str, default="tensorflow", help="tensorflow/caffe")
parser.add_argument(
"--embed_model_data",
type=str2bool,
default=True,
help="input shape.")
"--embed_model_data", type=str2bool, default=True, help="input shape.")
return parser.parse_known_args()
......
class DspOps(object):
def __init__(self):
self.dsp_ops = {
......@@ -18,7 +17,7 @@ class DspOps(object):
'QuantizedAvgPool': 'QuantizedAvgPool_8',
'QuantizedConcat': 'QuantizedConcat_8',
'QuantizedBiasAdd': 'QuantizedBiasAdd_8p8to32',
'QuantizedResizeBilinear' : 'QuantizedResizeBilinear_8',
'QuantizedResizeBilinear': 'QuantizedResizeBilinear_8',
'QuantizedSpaceToBatchND': 'QuantizedSpaceToBatchND_8',
'QuantizedBatchToSpaceND': 'QuantizedBatchToSpaceND_8',
'QuantizedSoftmax': 'QuantizedSoftmax_8',
......@@ -54,6 +53,7 @@ class DspOps(object):
'Concat': 'Concat_f',
'AddN': 'AddN_f',
}
def has_op(self, tf_op):
return tf_op in self.dsp_ops
......@@ -61,5 +61,3 @@ class DspOps(object):
if tf_op not in self.dsp_ops:
raise Exception('Could not map nn op for: ', tf_op)
return self.dsp_ops[tf_op]
......@@ -11,10 +11,13 @@ FLAGS = None
encrypt_lookup_table = "Xiaomi-AI-Platform-Mace"
def encrypt_code(code_str):
encrypted_arr = []
for i in range(len(code_str)):
encrypted_char = hex(ord(code_str[i]) ^ ord(encrypt_lookup_table[i % len(encrypt_lookup_table)]))
encrypted_char = hex(
ord(code_str[i]) ^ ord(
encrypt_lookup_table[i % len(encrypt_lookup_table)]))
encrypted_arr.append(encrypted_char)
return encrypted_arr
......@@ -45,7 +48,8 @@ def main(unused_args):
encrypted_code_maps[file_name[:-3]] = encrypted_code_arr
env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0]))
cpp_cl_encrypted_kernel = env.get_template('str2vec_maps.cc.jinja2').render(
cpp_cl_encrypted_kernel = env.get_template(
'str2vec_maps.cc.jinja2').render(
maps=encrypted_code_maps,
data_type='unsigned char',
variable_name='kEncryptedProgramMap')
......
......@@ -2,18 +2,21 @@ import tensorflow as tf
from mace.proto import mace_pb2
from collections import OrderedDict
def sort_tf_node(node, nodes_map, ordered_nodes_map):
if node.name not in ordered_nodes_map:
for input_tensor_name in node.input:
input_node_name = input_tensor_name.split(':')[
0] if ':' in input_tensor_name else input_tensor_name
if input_node_name not in nodes_map or input_node_name in ordered_nodes_map:
if input_node_name not in nodes_map or \
input_node_name in ordered_nodes_map:
continue
input_node = nodes_map[input_node_name]
sort_tf_node(input_node, nodes_map, ordered_nodes_map)
ordered_nodes_map[node.name] = node
def sort_tf_graph(graph_def):
nodes_map = {}
ordered_nodes_map = OrderedDict()
......@@ -31,13 +34,15 @@ def sort_mace_node(node, nodes_map, ordered_nodes_map):
for input_tensor_name in node.input:
input_node_name = input_tensor_name.split(':')[
0] if ':' in input_tensor_name else input_tensor_name
if input_node_name not in nodes_map or input_node_name in ordered_nodes_map:
if input_node_name not in nodes_map or \
input_node_name in ordered_nodes_map:
continue
input_node = nodes_map[input_node_name]
sort_mace_node(input_node, nodes_map, ordered_nodes_map)
ordered_nodes_map[node.name] = node
def sort_mace_graph(graph_def, output_name):
nodes_map = {}
ordered_nodes_map = OrderedDict()
......
......@@ -2,6 +2,7 @@ import sys
import operator
from mace.proto import mace_pb2
class MemoryOptimizer(object):
def __init__(self, net_def):
self.net_def = net_def
......@@ -37,9 +38,9 @@ class MemoryOptimizer(object):
mem_size = [0, 0]
if op_type == 'WinogradTransform' or op_type == 'MatMul':
mem_size[0] = output_shape[2] * output_shape[3]
mem_size[1] = output_shape[0] * int((output_shape[1]+3)/4)
mem_size[1] = output_shape[0] * int((output_shape[1] + 3) / 4)
else:
mem_size[0] = output_shape[2] * int((output_shape[3]+3)/4)
mem_size[0] = output_shape[2] * int((output_shape[3] + 3) / 4)
mem_size[1] = output_shape[0] * output_shape[1]
return mem_size
......@@ -51,13 +52,16 @@ class MemoryOptimizer(object):
if self.is_buffer_image_op(op):
continue
if not op.output_shape:
print('WARNING: There is no output shape information to do memory optimization.')
print('WARNING: There is no output shape information to '
'do memory optimization.')
return
if len(op.output_shape) != len(op.output):
print('WARNING: the number of output shape is not equal to the number of output.')
print('WARNING: the number of output shape is not equal to '
'the number of output.')
return
for i in range(len(op.output)):
op_mem_size = self.get_mem_size(op.type, op.output_shape[i].dims)
op_mem_size = self.get_mem_size(op.type,
op.output_shape[i].dims)
mem_id = -1
if len(self.idle_mem) > 0:
best_mem_candidate_id = -1
......@@ -65,16 +69,22 @@ class MemoryOptimizer(object):
best_mem_candidate_shape = []
for mid in self.idle_mem:
reuse_mem_size = self.mem_block[mid]
resize_mem_size = [max(reuse_mem_size[0], op_mem_size[0]), max(reuse_mem_size[1], op_mem_size[1])]
delta_mem_area = self.mem_area(resize_mem_size) - self.mem_area(reuse_mem_size)
resize_mem_size = [
max(reuse_mem_size[0], op_mem_size[0]),
max(reuse_mem_size[1], op_mem_size[1])
]
delta_mem_area = self.mem_area(
resize_mem_size) - self.mem_area(reuse_mem_size)
if delta_mem_area < best_mem_candidate_delta_area:
best_mem_candidate_id = mid
best_mem_candidate_delta_area = delta_mem_area
best_mem_candidate_shape = resize_mem_size
if best_mem_candidate_delta_area <= self.mem_area(op_mem_size):
if best_mem_candidate_delta_area <= self.mem_area(
op_mem_size):
# reuse
self.mem_block[best_mem_candidate_id] = best_mem_candidate_shape
self.mem_block[
best_mem_candidate_id] = best_mem_candidate_shape
mem_id = best_mem_candidate_id
self.idle_mem.remove(mem_id)
......@@ -113,7 +123,8 @@ class MemoryOptimizer(object):
print mem, self.mem_block[mem]
optimized_mem_size += reduce(operator.mul, self.mem_block[mem], 4)
print('origin mem: %d, optimized mem: %d', origin_mem_size, optimized_mem_size)
print('origin mem: %d, optimized mem: %d', origin_mem_size,
optimized_mem_size)
def optimize_memory(net_def):
......
......@@ -27,37 +27,40 @@ def generate_cpp_source():
binary_array = np.fromfile(f, dtype=np.uint8)
idx = 0
size, = struct.unpack("Q", binary_array[idx:idx+8])
size, = struct.unpack("Q", binary_array[idx:idx + 8])
idx += 8
for _ in xrange(size):
key_size, = struct.unpack("i", binary_array[idx:idx+4])
key_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
key, = struct.unpack(str(key_size) + "s", binary_array[idx:idx+key_size])
key, = struct.unpack(
str(key_size) + "s", binary_array[idx:idx + key_size])
idx += key_size
value_size, = struct.unpack("i", binary_array[idx:idx+4])
value_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
maps[key] = []
value = struct.unpack(str(value_size) + "B",
binary_array[idx:idx+value_size])
value = struct.unpack(
str(value_size) + "B", binary_array[idx:idx + value_size])
idx += value_size
for ele in value:
maps[key].append(hex(ele))
cl_platform_info_path = os.path.join(binary_dir, FLAGS.platform_info_file_name)
cl_platform_info_path = os.path.join(binary_dir,
FLAGS.platform_info_file_name)
with open(cl_platform_info_path, 'r') as f:
curr_platform_info = f.read()
if platform_info != "":
assert(curr_platform_info == platform_info)
assert (curr_platform_info == platform_info)
platform_info = curr_platform_info
env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0]))
return env.get_template('opencl_compiled_kernel.cc.jinja2').render(
maps = maps,
data_type = 'unsigned char',
variable_name = 'kCompiledProgramMap',
platform_info = platform_info,
maps=maps,
data_type='unsigned char',
variable_name='kCompiledProgramMap',
platform_info=platform_info,
)
def main(unused_args):
cpp_cl_binary_source = generate_cpp_source()
......@@ -90,7 +93,7 @@ def parse_args():
"--output_path",
type=str,
default="./mace/examples/codegen/opencl/opencl_compiled_program.cc",
help="The path of generated C++ header file which contains cl binaries.")
help="The path of generated C++ header file for cl binaries.")
return parser.parse_known_args()
......
......@@ -6,9 +6,9 @@ import hashlib
from mace.proto import mace_pb2
from jinja2 import Environment, FileSystemLoader
GENERATED_NAME = set()
def generate_obfuscated_name(namespace, name):
md5 = hashlib.md5()
md5.update(namespace)
......@@ -22,31 +22,36 @@ def generate_obfuscated_name(namespace, name):
GENERATED_NAME.add(name)
return name
def generate_tensor_map(tensors):
tensor_map = {}
for t in tensors:
if not tensor_map.has_key(t.name):
if t.name not in tensor_map:
tensor_map[t.name] = generate_obfuscated_name("tensor", t.name)
return tensor_map
def generate_in_out_map(ops, tensor_map):
in_out_map = {}
for op in ops:
op.name = generate_obfuscated_name("op", op.name)
for input_name in op.input:
if not in_out_map.has_key(input_name):
if tensor_map.has_key(input_name):
if input_name not in in_out_map:
if input_name in tensor_map:
in_out_map[input_name] = tensor_map[input_name]
else:
in_out_map[input_name] = generate_obfuscated_name("in", input_name)
in_out_map[input_name] = generate_obfuscated_name(
"in", input_name)
for output_name in op.output:
if not in_out_map.has_key(output_name):
if tensor_map.has_key(output_name):
if output_name not in in_out_map:
if output_name in tensor_map:
in_out_map[output_name] = tensor_map[output_name]
else:
in_out_map[output_name] = generate_obfuscated_name("out", output_name)
in_out_map[output_name] = generate_obfuscated_name(
"out", output_name)
return in_out_map
def obfuscate_name(net_def):
input_node = "mace_input_node"
output_node = "mace_output_node"
......@@ -63,20 +68,22 @@ def obfuscate_name(net_def):
if output_node not in op.output[i]:
op.output[i] = in_out_map[op.output[i]]
def rename_tensor(net_def):
tensor_map = {}
for t in net_def.tensors:
if not tensor_map.has_key(t.name):
if t.name not in tensor_map:
tensor_map[t.name] = "_" + t.name[:-2].replace("/", "_")
t.name = tensor_map[t.name]
for op in net_def.op:
for i in range(len(op.input)):
if tensor_map.has_key(op.input[i]):
if op.input[i] in tensor_map:
op.input[i] = tensor_map[op.input[i]]
for i in range(len(op.output)):
if tensor_map.has_key(op.output[i]):
if op.output[i] in tensor_map:
op.output[i] = tensor_map[op.output[i]]
class TensorInfo:
def __init__(self, id, t, runtime):
self.id = id
......@@ -84,19 +91,26 @@ class TensorInfo:
if t.data_type == mace_pb2.DT_FLOAT:
if runtime == 'gpu':
self.data_type = mace_pb2.DT_HALF
self.data = bytearray(np.array(t.float_data).astype(np.float16).tobytes())
self.data = bytearray(
np.array(t.float_data).astype(np.float16).tobytes())
else:
self.data_type = mace_pb2.DT_FLOAT
self.data = bytearray(np.array(t.float_data).astype(np.float32).tobytes())
self.data = bytearray(
np.array(t.float_data).astype(np.float32).tobytes())
elif t.data_type == mace_pb2.DT_INT32:
self.data = bytearray(np.array(t.int32_data).astype(np.int32).tobytes())
self.data = bytearray(
np.array(t.int32_data).astype(np.int32).tobytes())
elif t.data_type == mace_pb2.DT_UINT8:
self.data = bytearray(np.array(t.int32_data).astype(np.uint8).tolist())
self.data = bytearray(
np.array(t.int32_data).astype(np.uint8).tolist())
def stringfy(value):
return ', '.join('"{0}"'.format(w) for w in value)
def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_tag, output, runtime, embed_model_data):
def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate,
model_tag, output, runtime, embed_model_data):
if obfuscate:
obfuscate_name(net_def)
else:
......@@ -106,7 +120,8 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
print template_dir
# Create the jinja2 environment.
j2_env = Environment(loader=FileSystemLoader(template_dir), trim_blocks=True)
j2_env = Environment(
loader=FileSystemLoader(template_dir), trim_blocks=True)
j2_env.filters['stringfy'] = stringfy
output_dir = os.path.dirname(output) + '/'
# generate tensor source files
......@@ -122,11 +137,11 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
model_data.extend(bytearray([0] * padding))
offset += padding
source = j2_env.get_template(template_name).render(
tensor_info = tensor_info,
tensor = t,
tag = model_tag,
runtime = runtime,
offset = offset,
tensor_info=tensor_info,
tensor=t,
tag=model_tag,
runtime=runtime,
offset=offset,
)
model_data.extend(tensor_info.data)
offset += len(tensor_info.data)
......@@ -137,11 +152,10 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
# generate tensor data
template_name = 'tensor_data.jinja2'
source = j2_env.get_template(template_name).render(
tag = model_tag,
embed_model_data = embed_model_data,
model_data_size = offset,
model_data = model_data
)
tag=model_tag,
embed_model_data=embed_model_data,
model_data_size=offset,
model_data=model_data)
with open(output_dir + 'tensor_data' + '.cc', "wb") as f:
f.write(source)
if not embed_model_data:
......@@ -155,11 +169,11 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
op_size = len(net_def.op)
for start in range(0, op_size, 10):
source = j2_env.get_template(template_name).render(
start = start,
end = min(start+10, op_size),
net = net_def,
tag = model_tag,
runtime = runtime,
start=start,
end=min(start + 10, op_size),
net=net_def,
tag=model_tag,
runtime=runtime,
)
with open(output_dir + 'op' + str(counter) + '.cc', "wb") as f:
f.write(source)
......@@ -167,21 +181,21 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
# generate model source files
template_name = 'model.jinja2'
tensors = [TensorInfo(i, net_def.tensors[i], runtime) for i in range(len(net_def.tensors))]
tensors = [
TensorInfo(i, net_def.tensors[i], runtime)
for i in range(len(net_def.tensors))
]
source = j2_env.get_template(template_name).render(
tensors = tensors,
net = net_def,
tag = model_tag,
runtime = runtime,
model_pb_checksum = mode_pb_checksum
)
tensors=tensors,
net=net_def,
tag=model_tag,
runtime=runtime,
model_pb_checksum=mode_pb_checksum)
with open(output, "wb") as f:
f.write(source)
# generate model header file
template_name = 'model_header.jinja2'
source = j2_env.get_template(template_name).render(
tag = model_tag,
)
source = j2_env.get_template(template_name).render(tag=model_tag, )
with open(output_dir + model_tag + '.h', "wb") as f:
f.write(source)
......@@ -10,18 +10,21 @@ from tensorflow import gfile
FLAGS = None
def hist_inc(hist, key):
if key in hist:
hist[key] += 1
else:
hist[key] = 1
def to_int_list(long_list):
int_list = []
for value in long_list:
int_list.append(int(value))
return int_list
def main(unused_args):
if not FLAGS.input or not gfile.Exists(FLAGS.input):
print('Input graph file ' + FLAGS.input + ' does not exist!')
......@@ -49,7 +52,9 @@ def main(unused_args):
tensor = output.eval()
tensor_shape = list(tensor.shape)
tensor_shapes[tensor_name] = tensor_shape
print("Const %s: %s, %d" % (tensor_name, tensor_shape, functools.reduce(operator.mul, tensor_shape, 1)))
print("Const %s: %s, %d" %
(tensor_name, tensor_shape,
functools.reduce(operator.mul, tensor_shape, 1)))
if len(tensor_shape) == 1 and tensor_shape[0] < 10:
tensor_values[tensor_name] = list(tensor)
......@@ -65,11 +70,16 @@ def main(unused_args):
if input_name.endswith('weights/read:0'):
ksize = input.shape.as_list()
break
if input_name.endswith('weights:0') and input_name in tensor_shapes:
if input_name.endswith(
'weights:0') and input_name in tensor_shapes:
ksize = tensor_shapes[input_name]
break
print('%s(padding=%s, strides=%s, ksize=%s, format=%s) %s => %s' % (op.type, padding, strides, ksize, data_format, op.inputs[0].shape, op.outputs[0].shape))
key = '%s(padding=%s, strides=%s, ksize=%s, format=%s)' % (op.type, padding, strides, ksize, data_format)
print(
'%s(padding=%s, strides=%s, ksize=%s, format=%s) %s => %s'
% (op.type, padding, strides, ksize, data_format,
op.inputs[0].shape, op.outputs[0].shape))
key = '%s(padding=%s, strides=%s, ksize=%s, format=%s)' % (
op.type, padding, strides, ksize, data_format)
hist_inc(stats, key)
elif op.type in ['FusedResizeAndPadConv2D']:
padding = op.get_attr('padding')
......@@ -78,20 +88,25 @@ def main(unused_args):
ksize = 'Unknown'
for input in op.inputs:
input_name = input.name
if input_name.endswith('weights:0') and input_name in tensor_shapes:
if input_name.endswith(
'weights:0') and input_name in tensor_shapes:
ksize = tensor_shapes[input_name]
break
key = '%s(padding=%s, strides=%s, ksize=%s, resize_align_corners=%s)' % (op.type, padding, strides, ksize, resize_align_corners)
key = '%s(padding=%s, strides=%s, ksize=%s, ' \
'resize_align_corners=%s)' % (op.type, padding, strides,
ksize, resize_align_corners)
hist_inc(stats, key)
elif op.type in ['ResizeBilinear']:
align_corners = op.get_attr('align_corners')
size = 'Unknown'
for input in op.inputs:
input_name = input.name
if input_name.endswith('size:0') and input_name in tensor_values:
if input_name.endswith(
'size:0') and input_name in tensor_values:
size = tensor_values[input_name]
break
key = '%s(size=%s, align_corners=%s)' % (op.type, size, align_corners)
key = '%s(size=%s, align_corners=%s)' % (op.type, size,
align_corners)
print(key)
hist_inc(stats, key)
elif op.type in ['AvgPool', 'MaxPool']:
......@@ -99,38 +114,47 @@ def main(unused_args):
strides = to_int_list(op.get_attr('strides'))
ksize = to_int_list(op.get_attr('ksize'))
data_format = op.get_attr('data_format')
key = '%s(padding=%s, strides=%s, ksize=%s)' % (op.type, padding, strides, ksize)
key = '%s(padding=%s, strides=%s, ksize=%s)' % (op.type,
padding,
strides, ksize)
hist_inc(stats, key)
elif op.type in ['SpaceToBatchND', 'BatchToSpaceND']:
block_shape = 'Unknown'
for input in op.inputs:
input_name = input.name
if input_name.endswith('block_shape:0') and input_name in tensor_values:
if input_name.endswith(
'block_shape:0') and input_name in tensor_values:
block_shape = tensor_values[input_name]
break
paddings = 'Unknown'
for input in op.inputs:
input_name = input.name
if input_name.endswith('paddings:0') and input_name in tensor_values:
if input_name.endswith(
'paddings:0') and input_name in tensor_values:
paddings = tensor_values[input_name]
break
crops = 'Unknown'
for input in op.inputs:
input_name = input.name
if input_name.endswith('crops:0') and input_name in tensor_values:
if input_name.endswith(
'crops:0') and input_name in tensor_values:
paddings = tensor_values[input_name]
break
if op.type == 'SpaceToBatchND':
key = '%s(block_shape=%s, paddings=%s)' % (op.type, block_shape, paddings)
key = '%s(block_shape=%s, paddings=%s)' % (op.type,
block_shape,
paddings)
else:
key = '%s(block_shape=%s, crops=%s)' % (op.type, block_shape, crops)
key = '%s(block_shape=%s, crops=%s)' % (op.type,
block_shape, crops)
print(key)
hist_inc(stats, key)
elif op.type == 'Pad':
paddings = 'Unknown'
for input in op.inputs:
input_name = input.name
if input_name.endswith('paddings:0') and input_name in tensor_values:
if input_name.endswith(
'paddings:0') and input_name in tensor_values:
paddings = tensor_values[input_name]
break
key = '%s(paddings=%s)' % (op.type, paddings)
......@@ -142,6 +166,7 @@ def main(unused_args):
for key, value in sorted(six.iteritems(stats)):
print('%s: %d' % (key, value))
def parse_args():
'''Parses command line arguments.'''
parser = argparse.ArgumentParser()
......@@ -152,6 +177,7 @@ def parse_args():
help='TensorFlow \'GraphDef\' file to load.')
return parser.parse_known_args()
if __name__ == '__main__':
FLAGS, unparsed = parse_args()
main(unused_args=[sys.argv[0]] + unparsed)
......@@ -7,7 +7,6 @@
# --target=//mace/ops:ops_test
# --stdout_processor=stdout_processor
import argparse
import random
import re
......@@ -15,15 +14,18 @@ import sys
import sh_commands
def stdout_processor(stdout, device_properties, abi):
pass
def ops_test_stdout_processor(stdout, device_properties, abi):
stdout_lines = stdout.split("\n")
for line in stdout_lines:
if "Aborted" in line or "FAILED" in line:
raise Exception("Command failed")
def ops_benchmark_stdout_processor(stdout, device_properties, abi):
stdout_lines = stdout.split("\n")
metrics = {}
......@@ -33,17 +35,20 @@ def ops_benchmark_stdout_processor(stdout, device_properties, abi):
line = line.strip()
parts = line.split()
if len(parts) == 5 and parts[0].startswith("BM_"):
metrics["%s.time_ms" % parts[0]] = str(float(parts[1])/1e6)
metrics["%s.time_ms" % parts[0]] = str(float(parts[1]) / 1e6)
metrics["%s.input_mb_per_sec" % parts[0]] = parts[3]
metrics["%s.gmacc_per_sec" % parts[0]] = parts[4]
platform = device_properties["ro.board.platform"].replace(" ", "-")
model = device_properties["ro.product.model"].replace(" ", "-")
tags = {"ro.board.platform": platform,
tags = {
"ro.board.platform": platform,
"ro.product.model": model,
"abi": abi}
sh_commands.falcon_push_metrics(metrics, tags=tags,
endpoint="mace_ops_benchmark")
"abi": abi
}
sh_commands.falcon_push_metrics(
metrics, tags=tags, endpoint="mace_ops_benchmark")
def parse_args():
"""Parses command line arguments."""
......@@ -57,22 +62,16 @@ def parse_args():
"--target_socs",
type=str,
default="all",
help="SoCs(ro.board.platform) to build, comma seperated list or all/random")
help="SoCs (ro.board.platform from getprop) to build, "
"comma seperated list or all/random")
parser.add_argument(
"--target",
type=str,
default="//...",
help="Bazel target to build")
"--target", type=str, default="//...", help="Bazel target to build")
parser.add_argument(
"--run_target",
type=bool,
default=False,
help="Whether to run the target")
parser.add_argument(
"--args",
type=str,
default="",
help="Command args")
parser.add_argument("--args", type=str, default="", help="Command args")
parser.add_argument(
"--stdout_processor",
type=str,
......@@ -80,6 +79,7 @@ def parse_args():
help="Stdout processing function, default: stdout_processor")
return parser.parse_known_args()
def main(unused_args):
target_socs = None
if FLAGS.target_socs != "all" and FLAGS.target_socs != "random":
......@@ -101,17 +101,25 @@ def main(unused_args):
sh_commands.bazel_build(target, abi=target_abi)
if FLAGS.run_target:
for serialno in target_devices:
if target_abi not in set(sh_commands.adb_supported_abis(serialno)):
print("Skip device %s which does not support ABI %s" % (serialno, target_abi))
if target_abi not in set(
sh_commands.adb_supported_abis(serialno)):
print("Skip device %s which does not support ABI %s" %
(serialno, target_abi))
continue
stdouts = sh_commands.adb_run(serialno, host_bin_path, bin_name,
stdouts = sh_commands.adb_run(
serialno,
host_bin_path,
bin_name,
args=FLAGS.args,
opencl_profiling=1,
vlog_level=0,
device_bin_path="/data/local/tmp/mace",
out_of_range_check=1)
device_properties = sh_commands.adb_getprop_by_serialno(serialno)
globals()[FLAGS.stdout_processor](stdouts, device_properties, target_abi)
device_properties = sh_commands.adb_getprop_by_serialno(
serialno)
globals()[FLAGS.stdout_processor](stdouts, device_properties,
target_abi)
if __name__ == "__main__":
FLAGS, unparsed = parse_args()
......
#-*- coding:utf8 -*-
import json
import socket
import itertools
import json, socket, itertools
class FalconCli(object):
def __init__(self, addr, debug=True, buf_size=1000):
self.socket_ = socket.create_connection(addr)
self.stream = self.socket_.makefile()
......@@ -16,16 +16,19 @@ class FalconCli(object):
self.stream.close()
@classmethod
def connect(cls, server="transfer.falcon.miliao.srv", port=8433, debug=True, buf_size=1000):
def connect(cls,
server="transfer.falcon.miliao.srv",
port=8433,
debug=True,
buf_size=1000):
try:
return FalconCli((server, port), debug, buf_size)
except socket.error, exc:
print "error: connect to %s:%s error: %s" %(server, port, exc)
print "error: connect to %s:%s error: %s" % (server, port, exc)
def call(self, name, *params):
request = dict(id=next(self.id_counter),
params=list(params),
method=name)
request = dict(
id=next(self.id_counter), params=list(params), method=name)
payload = json.dumps(request).encode()
if self.debug:
print "--> req:", payload
......@@ -49,7 +52,7 @@ class FalconCli(object):
resp = []
while True:
buf = lines[s:s+self.buf_size]
buf = lines[s:s + self.buf_size]
s = s + self.buf_size
if len(buf) == 0:
break
......@@ -57,4 +60,3 @@ class FalconCli(object):
resp.append(r)
return resp
......@@ -11,13 +11,16 @@ import re
# --input_file input_file
#
def generate_data(name, shape):
np.random.seed()
data = np.random.random(shape) * 2 - 1
input_file_name = FLAGS.input_file + "_" + re.sub('[^0-9a-zA-Z]+', '_', name)
input_file_name = FLAGS.input_file + "_" + re.sub('[^0-9a-zA-Z]+', '_',
name)
print 'Generate input file: ', input_file_name
data.astype(np.float32).tofile(input_file_name)
def main(unused_args):
input_names = [name for name in FLAGS.input_node.split(',')]
input_shapes = [shape for shape in FLAGS.input_shape.split(':')]
......@@ -27,29 +30,21 @@ def main(unused_args):
generate_data(input_names[i], shape)
print "Generate input file done."
def parse_args():
"""Parses command line arguments."""
parser = argparse.ArgumentParser()
parser.register("type", "bool", lambda v: v.lower() == "true")
parser.add_argument(
"--input_file",
type=str,
default="",
help="input file.")
"--input_file", type=str, default="", help="input file.")
parser.add_argument(
"--input_node",
type=str,
default="input_node",
help="input node")
"--input_node", type=str, default="input_node", help="input node")
parser.add_argument(
"--input_shape",
type=str,
default="1,64,64,3",
help="input shape.")
"--input_shape", type=str, default="1,64,64,3", help="input shape.")
return parser.parse_known_args()
if __name__ == '__main__':
FLAGS, unparsed = parse_args()
main(unused_args=[sys.argv[0]] + unparsed)
......@@ -34,7 +34,8 @@ def run_command(command):
print("Stderr msg:\n{}".format(err))
if result.returncode != 0:
raise Exception("Exit not 0 from bash with code: {}, command: {}".format(
raise Exception(
"Exit not 0 from bash with code: {}, command: {}".format(
result.returncode, command))
......@@ -63,10 +64,12 @@ def generate_version_code():
command = "bash tools/generate_version_code.sh"
run_command(command)
def generate_opencl_source_code():
command = "bash tools/generate_opencl_code.sh source"
run_command(command)
def generate_opencl_binay_code(target_soc, model_output_dirs, pull_or_not):
cl_bin_dirs = []
for d in model_output_dirs:
......@@ -79,6 +82,7 @@ def generate_opencl_binay_code(target_soc, model_output_dirs, pull_or_not):
'binary', target_soc, cl_bin_dirs_str, int(pull_or_not))
run_command(command)
def generate_tuning_param_code(target_soc, model_output_dirs, pull_or_not):
cl_bin_dirs = []
for d in model_output_dirs:
......@@ -91,20 +95,24 @@ def generate_tuning_param_code(target_soc, model_output_dirs, pull_or_not):
target_soc, cl_bin_dirs_str, int(pull_or_not))
run_command(command)
def generate_code(target_soc, model_output_dirs, pull_or_not):
generate_opencl_binay_code(target_soc, model_output_dirs, pull_or_not)
generate_tuning_param_code(target_soc, model_output_dirs, pull_or_not)
def clear_env(target_soc):
command = "bash tools/clear_env.sh {}".format(target_soc)
run_command(command)
def input_file_name(input_name):
return os.environ['INPUT_FILE_NAME'] + '_' + \
re.sub('[^0-9a-zA-Z]+', '_', input_name)
def generate_random_input(target_soc, model_output_dir,
input_names, input_files):
def generate_random_input(target_soc, model_output_dir, input_names,
input_files):
generate_data_or_not = True
command = "bash tools/validate_tools.sh {} {} {}".format(
target_soc, model_output_dir, int(generate_data_or_not))
......@@ -122,16 +130,19 @@ def generate_random_input(target_soc, model_output_dir,
else:
input_name_list.append(input_names)
if len(input_file_list) != len(input_name_list):
raise Exception('If input_files set, the input files should match the input names.')
raise Exception('If input_files set, the input files should '
'match the input names.')
for i in range(len(input_file_list)):
if input_file_list[i] is not None:
dst_input_file = model_output_dir + '/' + input_file_name(input_name_list[i])
dst_input_file = model_output_dir + '/' + input_file_name(
input_name_list[i])
if input_file_list[i].startswith("http://") or \
input_file_list[i].startswith("https://"):
urllib.urlretrieve(input_file_list[i], dst_input_file)
else:
shutil.copy(input_file_list[i], dst_input_file)
def generate_model_code():
command = "bash tools/generate_model_code.sh"
run_command(command)
......@@ -155,10 +166,17 @@ def tuning_run(model_name,
# TODO(yejianwu) refactoring the hackish code
stdout_buff = []
process_output = sh_commands.make_output_processor(stdout_buff)
p = sh.bash("tools/tuning_run.sh", target_soc, model_output_dir,
running_round, int(tuning),
restart_round, option_args, _out=process_output,
_bg=True, _err_to_out=True)
p = sh.bash(
"tools/tuning_run.sh",
target_soc,
model_output_dir,
running_round,
int(tuning),
restart_round,
option_args,
_out=process_output,
_bg=True,
_err_to_out=True)
p.wait()
metrics = {}
for line in stdout_buff:
......@@ -166,18 +184,23 @@ def tuning_run(model_name,
parts = line.split()
if len(parts) == 6 and parts[0].startswith("time"):
metrics["%s.create_net_ms" % model_name] = str(float(parts[1]))
metrics["%s.mace_engine_ctor_ms" % model_name] = str(float(parts[2]))
metrics["%s.mace_engine_ctor_ms" % model_name] = str(
float(parts[2]))
metrics["%s.init_ms" % model_name] = str(float(parts[3]))
metrics["%s.warmup_ms" % model_name] = str(float(parts[4]))
if float(parts[5]) > 0:
metrics["%s.avg_latency_ms" % model_name] = str(float(parts[5]))
tags = {"ro.board.platform": target_soc,
metrics["%s.avg_latency_ms" % model_name] = str(
float(parts[5]))
tags = {
"ro.board.platform": target_soc,
"abi": target_abi,
# "runtime": target_runtime, # TODO(yejianwu) Add the actual runtime
"round": running_round, # TODO(yejianwu) change this to source/binary
"tuning": tuning}
sh_commands.falcon_push_metrics(metrics, endpoint="mace_model_benchmark",
tags=tags)
"tuning": tuning
}
sh_commands.falcon_push_metrics(
metrics, endpoint="mace_model_benchmark", tags=tags)
def benchmark_model(target_soc, model_output_dir, option_args=''):
command = "bash tools/benchmark.sh {} {} \"{}\"".format(
......@@ -188,8 +211,8 @@ def benchmark_model(target_soc, model_output_dir, option_args=''):
def run_model(model_name, target_runtime, target_abi, target_soc,
model_output_dir, running_round, restart_round, option_args):
tuning_run(model_name, target_runtime, target_abi, target_soc,
model_output_dir, running_round, False,
restart_round, option_args)
model_output_dir, running_round, False, restart_round,
option_args)
def generate_production_code(target_soc, model_output_dirs, pull_or_not):
......@@ -251,8 +274,8 @@ def merge_libs_and_tuning_results(target_soc, output_dir, model_output_dirs):
build_production_code()
model_output_dirs_str = ",".join(model_output_dirs)
command = "bash tools/merge_libs.sh {} {} {}".format(target_soc, output_dir,
model_output_dirs_str)
command = "bash tools/merge_libs.sh {} {} {}".format(
target_soc, output_dir, model_output_dirs_str)
run_command(command)
......@@ -260,6 +283,7 @@ def packaging_lib_file(output_dir):
command = "bash tools/packaging_lib.sh {}".format(output_dir)
run_command(command)
def download_model_files(model_file_path,
model_output_dir,
weight_file_path=""):
......@@ -270,10 +294,9 @@ def download_model_files(model_file_path,
if weight_file_path.startswith("http://") or \
weight_file_path.startswith("https://"):
os.environ[
"WEIGHT_FILE_PATH"] = model_output_dir + "/model.caffemodel"
urllib.urlretrieve(weight_file_path,
os.environ["WEIGHT_FILE_PATH"])
os.environ["WEIGHT_FILE_PATH"] = model_output_dir + "/model.caffemodel"
urllib.urlretrieve(weight_file_path, os.environ["WEIGHT_FILE_PATH"])
def md5sum(str):
md5 = hashlib.md5()
......@@ -306,7 +329,10 @@ def parse_args():
default=10,
help="The model throughput test running seconds.")
parser.add_argument(
"--restart_round", type=int, default=1, help="The model restart round.")
"--restart_round",
type=int,
default=1,
help="The model restart round.")
parser.add_argument(
"--tuning", type="bool", default="true", help="Tune opencl params.")
parser.add_argument(
......@@ -321,14 +347,16 @@ def parse_args():
help="SoCs to build, comma seperated list (getprop ro.board.platform)")
return parser.parse_known_args()
def set_environment(configs):
os.environ["EMBED_MODEL_DATA"] = str(configs["embed_model_data"])
os.environ["VLOG_LEVEL"] = str(configs["vlog_level"])
os.environ["PROJECT_NAME"] = os.path.splitext(os.path.basename(
FLAGS.config))[0]
os.environ["PROJECT_NAME"] = os.path.splitext(
os.path.basename(FLAGS.config))[0]
os.environ['INPUT_FILE_NAME'] = "model_input"
os.environ['OUTPUT_FILE_NAME'] = "model_out"
def main(unused_args):
configs = parse_model_configs()
......@@ -343,13 +371,16 @@ def main(unused_args):
if not os.path.exists(FLAGS.output_dir):
os.makedirs(FLAGS.output_dir)
elif os.path.exists(os.path.join(FLAGS.output_dir, "libmace")):
shutil.rmtree(os.path.join(FLAGS.output_dir, os.environ["PROJECT_NAME"]))
os.makedirs(os.path.join(FLAGS.output_dir, os.environ["PROJECT_NAME"]))
shutil.rmtree(
os.path.join(FLAGS.output_dir, os.environ["PROJECT_NAME"]))
os.makedirs(
os.path.join(FLAGS.output_dir, os.environ["PROJECT_NAME"]))
generate_version_code()
generate_opencl_source_code()
option_args = ' '.join([arg for arg in unused_args if arg.startswith('--')])
option_args = ' '.join(
[arg for arg in unused_args if arg.startswith('--')])
available_socs = sh_commands.adb_get_all_socs()
target_socs = available_socs
......@@ -362,10 +393,10 @@ def main(unused_args):
target_socs = target_socs & socs
missing_socs = socs.difference(target_socs)
if len(missing_socs) > 0:
print("Error: devices with SoCs are not connected %s" % missing_socs)
print(
"Error: devices with SoCs are not connected %s" % missing_socs)
exit(1)
for target_soc in target_socs:
for target_abi in configs["target_abis"]:
global_runtime = get_global_runtime(configs)
......@@ -373,28 +404,27 @@ def main(unused_args):
os.environ["TARGET_ABI"] = target_abi
model_output_dirs = []
for model_name in configs["models"]:
print '=======================', model_name, '======================='
print '===================', model_name, '==================='
# Transfer params by environment
os.environ["MODEL_TAG"] = model_name
model_config = configs["models"][model_name]
input_file_list = model_config.get("validation_inputs_data", [])
input_file_list = model_config.get("validation_inputs_data",
[])
for key in model_config:
if key in ['input_nodes', 'output_nodes'] and isinstance(
model_config[key], list):
os.environ[key.upper()] = ",".join(model_config[key])
elif key in ['input_shapes', 'output_shapes'] and isinstance(
model_config[key], list):
elif key in ['input_shapes', 'output_shapes'
] and isinstance(model_config[key], list):
os.environ[key.upper()] = ":".join(model_config[key])
else:
os.environ[key.upper()] = str(model_config[key])
# Create model build directory
model_path_digest = md5sum(model_config["model_file_path"])
model_output_dir = "%s/%s/%s/%s/%s/%s/%s" % (FLAGS.output_dir,
os.environ["PROJECT_NAME"],
"build", model_name,
model_path_digest,
target_soc, target_abi)
model_output_dir = "%s/%s/%s/%s/%s/%s/%s" % (
FLAGS.output_dir, os.environ["PROJECT_NAME"], "build",
model_name, model_path_digest, target_soc, target_abi)
model_output_dirs.append(model_output_dir)
if FLAGS.mode == "build" or FLAGS.mode == "all":
......@@ -404,22 +434,27 @@ def main(unused_args):
clear_env(target_soc)
download_model_files(model_config["model_file_path"],
model_output_dir, model_config.get("weight_file_path", ""))
model_output_dir,
model_config.get("weight_file_path", ""))
if FLAGS.mode == "build" or FLAGS.mode == "run" or FLAGS.mode == "validate"\
or FLAGS.mode == "benchmark" or FLAGS.mode == "all":
if FLAGS.mode == "build" or FLAGS.mode == "run" or \
FLAGS.mode == "validate" or \
FLAGS.mode == "benchmark" or FLAGS.mode == "all":
generate_random_input(target_soc, model_output_dir,
model_config['input_nodes'], input_file_list)
model_config['input_nodes'],
input_file_list)
if FLAGS.mode == "build" or FLAGS.mode == "all":
generate_model_code()
build_mace_run_prod(model_name, global_runtime, target_abi,
target_soc, model_output_dir, FLAGS.tuning)
target_soc, model_output_dir,
FLAGS.tuning)
if FLAGS.mode == "run" or FLAGS.mode == "validate" or FLAGS.mode == "all":
run_model(model_name, global_runtime, target_abi, target_soc,
model_output_dir, FLAGS.round, FLAGS.restart_round,
option_args)
if FLAGS.mode == "run" or FLAGS.mode == "validate" or \
FLAGS.mode == "all":
run_model(model_name, global_runtime, target_abi,
target_soc, model_output_dir, FLAGS.round,
FLAGS.restart_round, option_args)
if FLAGS.mode == "benchmark":
benchmark_model(target_soc, model_output_dir, option_args)
......@@ -427,14 +462,18 @@ def main(unused_args):
if FLAGS.mode == "validate" or FLAGS.mode == "all":
validate_model(target_soc, model_output_dir)
if FLAGS.mode == "build" or FLAGS.mode == "merge" or FLAGS.mode == "all":
if FLAGS.mode == "build" or FLAGS.mode == "merge" or \
FLAGS.mode == "all":
merge_libs_and_tuning_results(
target_soc, FLAGS.output_dir + "/" + os.environ["PROJECT_NAME"],
target_soc,
FLAGS.output_dir + "/" + os.environ["PROJECT_NAME"],
model_output_dirs)
if FLAGS.mode == "throughput_test":
merged_lib_file = FLAGS.output_dir + "/%s/%s/libmace_%s.%s.a" % \
(os.environ["PROJECT_NAME"], target_abi, os.environ["PROJECT_NAME"], target_soc)
merged_lib_file = FLAGS.output_dir + \
"/%s/%s/libmace_%s.%s.a" % \
(os.environ["PROJECT_NAME"], target_abi,
os.environ["PROJECT_NAME"], target_soc)
generate_random_input(target_soc, FLAGS.output_dir, [], [])
for model_name in configs["models"]:
runtime = configs["models"][model_name]["runtime"]
......@@ -449,4 +488,3 @@ def main(unused_args):
if __name__ == "__main__":
FLAGS, unparsed = parse_args()
main(unused_args=[sys.argv[0]] + unparsed)
......@@ -3,18 +3,22 @@ import re
import time
import falcon_cli
################################
# common
################################
def strip_invalid_utf8(str):
return sh.iconv(str, "-c", "-t", "UTF-8")
def make_output_processor(buff):
def process_output(line):
print(line.strip())
buff.append(line)
return process_output
################################
# adb commands
################################
......@@ -23,11 +27,12 @@ def adb_split_stdout(stdout_str):
# Filter out last empty line
return [l.strip() for l in stdout_str.split('\n') if len(l.strip()) > 0]
def adb_devices(target_socs=None):
outputs = sh.grep(sh.adb("devices"), "^[A-Za-z0-9]\+[[:space:]]\+device$")
raw_lists = sh.cut(outputs, "-f1")
device_ids = adb_split_stdout(raw_lists)
if target_socs != None:
if target_socs is not None:
target_socs_set = set(target_socs)
target_devices = []
for serialno in device_ids:
......@@ -38,6 +43,7 @@ def adb_devices(target_socs=None):
else:
return device_ids
def adb_getprop_by_serialno(serialno):
outputs = sh.adb("-s", serialno, "shell", "getprop")
raw_props = adb_split_stdout(outputs)
......@@ -49,12 +55,14 @@ def adb_getprop_by_serialno(serialno):
props[m.group(1)] = m.group(2)
return props
def adb_supported_abis(serialno):
props = adb_getprop_by_serialno(serialno)
abilist_str = props["ro.product.cpu.abilist"]
abis = [abi.strip() for abi in abilist_str.split(',')]
return abis
def adb_get_all_socs():
socs = []
for d in adb_devices():
......@@ -62,7 +70,10 @@ def adb_get_all_socs():
socs.append(props["ro.board.platform"])
return set(socs)
def adb_run(serialno, host_bin_path, bin_name,
def adb_run(serialno,
host_bin_path,
bin_name,
args="",
opencl_profiling=1,
vlog_level=0,
......@@ -71,7 +82,9 @@ def adb_run(serialno, host_bin_path, bin_name,
host_bin_full_path = "%s/%s" % (host_bin_path, bin_name)
device_bin_full_path = "%s/%s" % (device_bin_path, bin_name)
props = adb_getprop_by_serialno(serialno)
print("=====================================================================")
print(
"====================================================================="
)
print("Run on device: %s, %s, %s" % (serialno, props["ro.board.platform"],
props["ro.product.model"]))
sh.adb("-s", serialno, "shell", "rm -rf %s" % device_bin_path)
......@@ -79,12 +92,19 @@ def adb_run(serialno, host_bin_path, bin_name,
print("Push %s to %s" % (host_bin_full_path, device_bin_full_path))
sh.adb("-s", serialno, "push", host_bin_full_path, device_bin_full_path)
print("Run %s" % device_bin_full_path)
stdout_buff=[]
stdout_buff = []
process_output = make_output_processor(stdout_buff)
p = sh.adb("-s", serialno, "shell",
"MACE_OUT_OF_RANGE_CHECK=%d MACE_OPENCL_PROFILING=%d MACE_CPP_MIN_VLOG_LEVEL=%d %s %s" %
(out_of_range_check, opencl_profiling, vlog_level, device_bin_full_path, args),
_out=process_output, _bg=True, _err_to_out=True)
p = sh.adb(
"-s",
serialno,
"shell",
"MACE_OUT_OF_RANGE_CHECK=%d MACE_OPENCL_PROFILING=%d "
"MACE_CPP_MIN_VLOG_LEVEL=%d %s %s" %
(out_of_range_check, opencl_profiling, vlog_level,
device_bin_full_path, args),
_out=process_output,
_bg=True,
_err_to_out=True)
p.wait()
return "".join(stdout_buff)
......@@ -94,11 +114,14 @@ def adb_run(serialno, host_bin_path, bin_name,
################################
def bazel_build(target, strip="always", abi="armeabi-v7a"):
print("Build %s with ABI %s" % (target, abi))
stdout_buff=[]
stdout_buff = []
process_output = make_output_processor(stdout_buff)
p= sh.bazel("build",
"-c", "opt",
"--strip", strip,
p = sh.bazel(
"build",
"-c",
"opt",
"--strip",
strip,
"--verbose_failures",
target,
"--crosstool_top=//external:android/crosstool",
......@@ -109,12 +132,17 @@ def bazel_build(target, strip="always", abi="armeabi-v7a"):
"--copt=-DMACE_DISABLE_NO_TUNING_WARNING",
"--copt=-Werror=return-type",
"--copt=-O3",
"--define", "neon=true",
"--define", "openmp=true",
_out=process_output, _bg=True, _err_to_out=True)
"--define",
"neon=true",
"--define",
"openmp=true",
_out=process_output,
_bg=True,
_err_to_out=True)
p.wait()
return "".join(stdout_buff)
def bazel_target_to_bin(target):
# change //mace/a/b:c to bazel-bin/mace/a/b/c
prefix, bin_name = target.split(':')
......@@ -124,26 +152,32 @@ def bazel_target_to_bin(target):
host_bin_path = "bazel-bin/%s" % prefix
return host_bin_path, bin_name
################################
# mace commands
################################
# TODO this should be refactored
def gen_encrypted_opencl_source(codegen_path="mace/codegen"):
sh.mkdir("-p", "%s/opencl" % codegen_path)
sh.python("mace/python/tools/encrypt_opencl_codegen.py",
sh.python(
"mace/python/tools/encrypt_opencl_codegen.py",
"--cl_kernel_dir=./mace/kernels/opencl/cl/",
"--output_path=%s/opencl/opencl_encrypt_program.cc" % codegen_path)
def gen_mace_version(codegen_path="mace/codegen"):
sh.mkdir("-p", "%s/version" % codegen_path)
sh.bash("mace/tools/git/gen_version_source.sh",
"%s/version/version.cc" % codegen_path)
def gen_compiled_opencl_source(codegen_path="mace/codegen"):
sh.mkdir("-p", "%s/opencl" % codegen_path)
sh.python("mace/python/tools/opencl_codegen.py",
sh.python(
"mace/python/tools/opencl_codegen.py",
"--output_path=%s/opencl/opencl_compiled_program.cc" % codegen_path)
################################
# falcon
################################
......@@ -156,10 +190,10 @@ def falcon_tags(tags_dict):
tags = tags + ",%s=%s" % (k, v)
return tags
def falcon_push_metrics(metrics, endpoint="mace_dev", tags={}):
cli = falcon_cli.FalconCli.connect(server="transfer.falcon.miliao.srv",
port=8433,
debug=False)
cli = falcon_cli.FalconCli.connect(
server="transfer.falcon.miliao.srv", port=8433, debug=False)
ts = int(time.time())
falcon_metrics = [{
"endpoint": endpoint,
......@@ -171,4 +205,3 @@ def falcon_push_metrics(metrics, endpoint="mace_dev", tags={}):
"counterType": "GAUGE"
} for key, value in metrics.iteritems()]
cli.update(falcon_metrics)
......@@ -20,29 +20,33 @@ from scipy import stats
# --input_shape 1,64,64,3 \
# --output_shape 1,64,64,2
def load_data(file):
if os.path.isfile(file):
return np.fromfile(file=file, dtype=np.float32)
else:
return np.empty([0])
def format_output_name(name):
return re.sub('[^0-9a-zA-Z]+', '_', name)
def compare_output(output_name, mace_out_value, out_value):
if mace_out_value.size != 0:
out_value = out_value.reshape(-1)
mace_out_value = mace_out_value.reshape(-1)
assert len(out_value) == len(mace_out_value)
similarity = (1 - spatial.distance.cosine(out_value, mace_out_value))
print output_name, 'MACE VS', FLAGS.platform.upper(), 'similarity: ', similarity
print output_name, 'MACE VS', FLAGS.platform.upper(
), 'similarity: ', similarity
if (FLAGS.mace_runtime == "cpu" and similarity > 0.999) or \
(FLAGS.mace_runtime == "neon" and similarity > 0.999) or \
(FLAGS.mace_runtime == "gpu" and similarity > 0.995) or \
(FLAGS.mace_runtime == "dsp" and similarity > 0.930):
print '=======================Similarity Test Passed======================'
print '===================Similarity Test Passed=================='
else:
print '=======================Similarity Test Failed======================'
print '===================Similarity Test Failed=================='
sys.exit(-1)
else:
print '=======================Skip empty node==================='
......@@ -66,21 +70,28 @@ def validate_tf_model(input_names, input_shapes, output_names):
tf.import_graph_def(input_graph_def, name="")
input_dict = {}
for i in range(len(input_names)):
input_value = load_data(FLAGS.input_file + "_" + input_names[i])
input_value = load_data(
FLAGS.input_file + "_" + input_names[i])
input_value = input_value.reshape(input_shapes[i])
input_node = graph.get_tensor_by_name(input_names[i] + ':0')
input_node = graph.get_tensor_by_name(
input_names[i] + ':0')
input_dict[input_node] = input_value
output_nodes = []
for name in output_names:
output_nodes.extend([graph.get_tensor_by_name(name + ':0')])
output_nodes.extend(
[graph.get_tensor_by_name(name + ':0')])
output_values = session.run(output_nodes, feed_dict=input_dict)
for i in range(len(output_names)):
output_file_name = FLAGS.mace_out_file + "_" + format_output_name(output_names[i])
output_file_name = FLAGS.mace_out_file + "_" + \
format_output_name(output_names[i])
mace_out_value = load_data(output_file_name)
compare_output(output_names[i], mace_out_value, output_values[i])
compare_output(output_names[i], mace_out_value,
output_values[i])
def validate_caffe_model(input_names, input_shapes, output_names, output_shapes):
def validate_caffe_model(input_names, input_shapes, output_names,
output_shapes):
os.environ['GLOG_minloglevel'] = '1' # suprress Caffe verbose prints
import caffe
if not os.path.isfile(FLAGS.model_file):
......@@ -96,7 +107,8 @@ def validate_caffe_model(input_names, input_shapes, output_names, output_shapes)
for i in range(len(input_names)):
input_value = load_data(FLAGS.input_file + "_" + input_names[i])
input_value = input_value.reshape(input_shapes[i]).transpose((0, 3, 1, 2))
input_value = input_value.reshape(input_shapes[i]).transpose((0, 3, 1,
2))
input_blob_name = input_names[i]
try:
if input_names[i] in net.top_names:
......@@ -110,16 +122,20 @@ def validate_caffe_model(input_names, input_shapes, output_names, output_shapes)
for i in range(len(output_names)):
value = net.blobs[net.top_names[output_names[i]][0]].data
out_shape = output_shapes[i]
out_shape[1], out_shape[2], out_shape[3] = out_shape[3], out_shape[1], out_shape[2]
out_shape[1], out_shape[2], out_shape[3] = out_shape[3], out_shape[
1], out_shape[2]
value = value.reshape(out_shape).transpose((0, 2, 3, 1))
output_file_name = FLAGS.mace_out_file + "_" + format_output_name(output_names[i])
output_file_name = FLAGS.mace_out_file + "_" + format_output_name(
output_names[i])
mace_out_value = load_data(output_file_name)
compare_output(output_names[i], mace_out_value, value)
def main(unused_args):
input_names = [name for name in FLAGS.input_node.split(',')]
input_shape_strs = [shape for shape in FLAGS.input_shape.split(':')]
input_shapes = [[int(x) for x in shape.split(',')] for shape in input_shape_strs]
input_shapes = [[int(x) for x in shape.split(',')]
for shape in input_shape_strs]
output_names = [name for name in FLAGS.output_node.split(',')]
assert len(input_names) == len(input_shapes)
......@@ -127,18 +143,18 @@ def main(unused_args):
validate_tf_model(input_names, input_shapes, output_names)
elif FLAGS.platform == 'caffe':
output_shape_strs = [shape for shape in FLAGS.output_shape.split(':')]
output_shapes = [[int(x) for x in shape.split(',')] for shape in output_shape_strs]
validate_caffe_model(input_names, input_shapes, output_names, output_shapes)
output_shapes = [[int(x) for x in shape.split(',')]
for shape in output_shape_strs]
validate_caffe_model(input_names, input_shapes, output_names,
output_shapes)
def parse_args():
"""Parses command line arguments."""
parser = argparse.ArgumentParser()
parser.register("type", "bool", lambda v: v.lower() == "true")
parser.add_argument(
"--platform",
type=str,
default="",
help="Tensorflow or Caffe.")
"--platform", type=str, default="", help="Tensorflow or Caffe.")
parser.add_argument(
"--model_file",
type=str,
......@@ -150,40 +166,22 @@ def parse_args():
default="",
help="caffe model file to load.")
parser.add_argument(
"--input_file",
type=str,
default="",
help="input file.")
"--input_file", type=str, default="", help="input file.")
parser.add_argument(
"--mace_out_file",
type=str,
default="",
help="mace output file to load.")
parser.add_argument(
"--mace_runtime",
type=str,
default="gpu",
help="mace runtime device.")
"--mace_runtime", type=str, default="gpu", help="mace runtime device.")
parser.add_argument(
"--input_shape",
type=str,
default="1,64,64,3",
help="input shape.")
"--input_shape", type=str, default="1,64,64,3", help="input shape.")
parser.add_argument(
"--output_shape",
type=str,
default="1,64,64,2",
help="output shape.")
"--output_shape", type=str, default="1,64,64,2", help="output shape.")
parser.add_argument(
"--input_node",
type=str,
default="input_node",
help="input node")
"--input_node", type=str, default="input_node", help="input node")
parser.add_argument(
"--output_node",
type=str,
default="output_node",
help="output node")
"--output_node", type=str, default="output_node", help="output node")
return parser.parse_known_args()
......@@ -191,4 +189,3 @@ def parse_args():
if __name__ == '__main__':
FLAGS, unparsed = parse_args()
main(unused_args=[sys.argv[0]] + unparsed)
......@@ -11,12 +11,8 @@ G_T = {}
# f(2, 3)
A_T[4] = np.array([[1, 1, 1, 0], [0, 1, -1, -1]]).astype(np.float32)
A[4] = np.transpose(A_T[4])
B_T[4] = np.array([
[1, 0, -1, 0],
[0, 1, 1, 0],
[0, -1, 1, 0],
[0, 1, 0, -1]
]).astype(np.float32)
B_T[4] = np.array([[1, 0, -1, 0], [0, 1, 1, 0], [0, -1, 1, 0],
[0, 1, 0, -1]]).astype(np.float32)
B[4] = np.transpose(B_T[4])
G[4] = np.array([
[1, 0, 0],
......@@ -44,45 +40,45 @@ B_T[6] = np.array([
]).astype(np.float32)
B[6] = np.transpose(B_T[6])
G[6] = np.array([
[1/4.0 , 0 , 0 ],
[-1/6.0, -1/6.0 , -1/6.0],
[-1/6.0, 1/6.0 , -1/6.0],
[1/24.0, 1/12.0 , 1/6.0 ],
[1/24.0, -1/12.0, 1/6.0 ],
[ 0 , 0 , 1 ],
[1 / 4.0, 0, 0],
[-1 / 6.0, -1 / 6.0, -1 / 6.0],
[-1 / 6.0, 1 / 6.0, -1 / 6.0],
[1 / 24.0, 1 / 12.0, 1 / 6.0],
[1 / 24.0, -1 / 12.0, 1 / 6.0],
[0, 0, 1],
]).astype(np.float32)
G_T[6] = np.transpose(G[6])
# f(6, 3)
A_T[8] = np.array([
[1, 1, 1 , 1 , 1 , 1 , 1 , 0],
[0, 1, -1, 2 , -2 , 1/2. , -1/2. , 0],
[0, 1, 1 , 4 , 4 , 1/4. , 1/4. , 0],
[0, 1, -1, 8 , -8 , 1/8. , -1/8. , 0],
[0, 1, 1 , 16, 16 , 1/16., 1/16. , 0],
[0, 1, -1, 32, -32, 1/32., -1/32., 1],
[1, 1, 1, 1, 1, 1, 1, 0],
[0, 1, -1, 2, -2, 1 / 2., -1 / 2., 0],
[0, 1, 1, 4, 4, 1 / 4., 1 / 4., 0],
[0, 1, -1, 8, -8, 1 / 8., -1 / 8., 0],
[0, 1, 1, 16, 16, 1 / 16., 1 / 16., 0],
[0, 1, -1, 32, -32, 1 / 32., -1 / 32., 1],
]).astype(np.float32)
A[8] = np.transpose(A_T[8])
B_T[8] = np.array([
[1, 0 , -21/4., 0 , 21/4., 0 , -1, 0],
[0, 1 , 1 , -17/4., -17/4., 1 , 1 , 0],
[0, -1 , 1 , 17/4. , -17/4., -1 , 1 , 0],
[0, 1/2. , 1/4. , -5/2. , -5/4., 2 , 1 , 0],
[0, -1/2., 1/4. , 5/2. , -5/4., -2 , 1 , 0],
[0, 2 , 4 , -5/2. , -5 , 1/2. , 1 , 0],
[0, -2 , 4 , 5/2. , -5 , -1/2. , 1 , 0],
[0, -1 , 0 , 21/4. , 0 , -21/4., 0 , 1],
[1, 0, -21 / 4., 0, 21 / 4., 0, -1, 0],
[0, 1, 1, -17 / 4., -17 / 4., 1, 1, 0],
[0, -1, 1, 17 / 4., -17 / 4., -1, 1, 0],
[0, 1 / 2., 1 / 4., -5 / 2., -5 / 4., 2, 1, 0],
[0, -1 / 2., 1 / 4., 5 / 2., -5 / 4., -2, 1, 0],
[0, 2, 4, -5 / 2., -5, 1 / 2., 1, 0],
[0, -2, 4, 5 / 2., -5, -1 / 2., 1, 0],
[0, -1, 0, 21 / 4., 0, -21 / 4., 0, 1],
]).astype(np.float32)
B[8] = np.transpose(B_T[8])
G[8] = np.array([
[ 1 , 0 , 0 ],
[-2/9. , -2/9. , -2/9.],
[-2/9. , 2/9. , -2/9.],
[1/90. , 1/45. , 2/45.],
[1/90. , -1/45. , 2/45.],
[32/45., 16/45. , 8/45.],
[32/45., -16/45., 8/45.],
[ 0 , 0 , 1 ],
[1, 0, 0],
[-2 / 9., -2 / 9., -2 / 9.],
[-2 / 9., 2 / 9., -2 / 9.],
[1 / 90., 1 / 45., 2 / 45.],
[1 / 90., -1 / 45., 2 / 45.],
[32 / 45., 16 / 45., 8 / 45.],
[32 / 45., -16 / 45., 8 / 45.],
[0, 0, 1],
]).astype(np.float32)
G_T[8] = np.transpose(G[8])
......@@ -112,7 +108,7 @@ def winograd_conv(m, r, input, filter):
for c in range(C):
u = np.dot(np.dot(G[alpha], filter[k, c, :, :]), G_T[alpha])
for i in range(alpha):
for j in range(alpha) :
for j in range(alpha):
U[(i * alpha + j) * K + k, c] = u[i, j]
print 'filter out: ', U.shape
......@@ -129,24 +125,24 @@ def winograd_conv(m, r, input, filter):
w_idx = t % rounded_w
h_start = h_idx * m
w_start = w_idx * m
h_end = min(h_start+alpha, input_shape[2])
w_end = min(w_start+alpha, input_shape[3])
h_end = min(h_start + alpha, input_shape[2])
w_end = min(w_start + alpha, input_shape[3])
d = np.zeros((alpha, alpha))
d[0:h_end-h_start, 0:w_end-w_start] = \
input[n, c, h_start:h_end, w_start:w_end]
v = np.dot(np.dot(B_T[alpha], d), B[alpha])
for i in range(alpha):
for j in range(alpha):
V[(i*alpha+j)*C + c, p] = v[i, j]
V[(i * alpha + j) * C + c, p] = v[i, j]
tmp = V.reshape(alpha_square, C, P, 1)
print 'input out: ', tmp.shape
tmp.astype(np.float32).tofile("C")
M = np.zeros((alpha_square * K, P))
for i in range(alpha_square):
u = U[i * K : (i+1) * K, :]
v = V[i * C : (i+1) * C, :]
M[i * K : (i+1) * K, :] = np.dot(u, v)
u = U[i * K:(i + 1) * K, :]
v = V[i * C:(i + 1) * C, :]
M[i * K:(i + 1) * K, :] = np.dot(u, v)
print 'M shape: ', M.shape
M.astype(np.float32).tofile("gemm")
......@@ -156,7 +152,7 @@ def winograd_conv(m, r, input, filter):
tm = np.zeros((alpha, alpha))
for i in range(alpha):
for j in range(alpha):
tm[i][j] = M[(i*alpha+j) * K + k, b]
tm[i][j] = M[(i * alpha + j) * K + k, b]
y = np.dot(np.dot(A_T[alpha], tm), A[alpha])
for i in range(m):
for j in range(m):
......@@ -173,6 +169,7 @@ def winograd_conv(m, r, input, filter):
return res
def tf_conv(input, filter):
conv_op = tf.nn.conv2d(input, filter, [1, 1, 1, 1], 'VALID')
with tf.Session() as sess:
......@@ -206,4 +203,3 @@ def main():
if __name__ == '__main__':
main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册