提交 6da30d22 编写于 作者: L Liangliang He

Enable python style check

上级 e54825c5
stages:
- cpplint
- pycodestyle
- ops_test
- ops_benchmark
......@@ -7,7 +8,12 @@ cpplint:
stage: cpplint
script:
- curl -o cpplint.py https://raw.githubusercontent.com/google/styleguide/gh-pages/cpplint/cpplint.py
- python cpplint.py --linelength=80 --counting=detailed $(find mace -name *.h -or -name *.cc)
- python cpplint.py --linelength=80 --counting=detailed $(find mace -name "*.h" -or -name "*.cc")
pycodestyle:
stage: pycodestyle
script:
- pycodestyle $(find -name "*.py")
ops_test:
stage: ops_test
......
......@@ -113,7 +113,8 @@ RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com
scipy \
jinja2 \
pyyaml \
sh
sh \
pycodestyle
# Download tensorflow tools
RUN wget http://cnbj1-inner-fds.api.xiaomi.net/mace/tool/transform_graph && \
......
......@@ -27,28 +27,30 @@ def generate_cpp_source():
print "Generate binary from", binary_path
idx = 0
size, = struct.unpack("Q", binary_array[idx:idx+8])
size, = struct.unpack("Q", binary_array[idx:idx + 8])
idx += 8
for _ in xrange(size):
key_size, = struct.unpack("i", binary_array[idx:idx+4])
key_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
key, = struct.unpack(str(key_size) + "s", binary_array[idx:idx+key_size])
key, = struct.unpack(
str(key_size) + "s", binary_array[idx:idx + key_size])
idx += key_size
params_size, = struct.unpack("i", binary_array[idx:idx+4])
params_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
data_map[key] = []
count = params_size / 4
params = struct.unpack(str(count) + "i", binary_array[idx:idx+params_size])
params = struct.unpack(
str(count) + "i", binary_array[idx:idx + params_size])
for i in params:
data_map[key].append(i)
idx += params_size
env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0]))
return env.get_template('str2vec_maps.cc.jinja2').render(
maps = data_map,
data_type = 'unsigned int',
variable_name = FLAGS.variable_name
)
maps=data_map,
data_type='unsigned int',
variable_name=FLAGS.variable_name)
def main(unused_args):
cpp_binary_source = generate_cpp_source()
......@@ -58,14 +60,12 @@ def main(unused_args):
w_file.write(cpp_binary_source)
w_file.close()
def parse_args():
"""Parses command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument(
"--binary_dirs",
type=str,
default="",
help="The binaries file path.")
"--binary_dirs", type=str, default="", help="The binaries file path.")
parser.add_argument(
"--binary_file_name",
type=str,
......@@ -75,7 +75,8 @@ def parse_args():
"--output_path",
type=str,
default="",
help="The path of generated C++ source file which contains the binary.")
help="The path of generated C++ source file which contains the binary."
)
parser.add_argument(
"--variable_name",
type=str,
......
......@@ -5,32 +5,26 @@ import google.protobuf.text_format
import numpy as np
import math
pooling_type_mode = {
'AvgPool': 1,
'MaxPool': 2
}
pooling_type_mode = {'AvgPool': 1, 'MaxPool': 2}
buffer_type_map = {
'CONV2D_FILTER' : 0,
'IN_OUT_CHANNEL' : 1,
'ARGUMENT' : 2,
'IN_OUT_HEIGHT' : 3,
'IN_OUT_WIDTH' : 4,
'WINOGRAD_FILTER' : 5,
'DW_CONV2D_FILTER' : 6,
'WEIGHT_HEIGHT' : 7,
'WEIGHT_WIDTH' : 8,
'CONV2D_FILTER': 0,
'IN_OUT_CHANNEL': 1,
'ARGUMENT': 2,
'IN_OUT_HEIGHT': 3,
'IN_OUT_WIDTH': 4,
'WINOGRAD_FILTER': 5,
'DW_CONV2D_FILTER': 6,
'WEIGHT_HEIGHT': 7,
'WEIGHT_WIDTH': 8,
}
data_type_map = {
'DT_HALF' : mace_pb2.DT_HALF,
'DT_FLOAT': mace_pb2.DT_FLOAT
}
data_type_map = {'DT_HALF': mace_pb2.DT_HALF, 'DT_FLOAT': mace_pb2.DT_FLOAT}
activation_name_map = {
'ReLU' : 'RELU',
'Sigmoid' : 'SIGMOID',
'TanH' : 'TANH',
'ReLU': 'RELU',
'Sigmoid': 'SIGMOID',
'TanH': 'TANH',
}
MACE_INPUT_NODE_NAME = "mace_input_node"
......@@ -38,6 +32,7 @@ MACE_OUTPUT_NODE_NAME = "mace_output_node"
OPENCL_IMAGE_MAX_SIZE = 16384
class Operator(object):
def __init__(self, name, type, layer):
self.name = name
......@@ -54,37 +49,52 @@ class Operator(object):
def get_single_parent(self):
if len(self.parents) != 1:
raise Exception('Operation %s expected single parent, but got %s'
% (self.name, len(self.parents)))
raise Exception('Operation %s expected single parent, but got %s' %
(self.name, len(self.parents)))
return self.parents[0]
def BlobToNPArray(blob):
if blob.num != 0:
return (np.asarray(blob.data, dtype=np.float32).
reshape((blob.num, blob.channels, blob.height, blob.width)))
return (np.asarray(blob.data, dtype=np.float32).reshape(
(blob.num, blob.channels, blob.height, blob.width)))
else:
return np.asarray(blob.data, dtype=np.float32).reshape(blob.shape.dim)
class Shapes(object):
@staticmethod
def conv_pool_shape(input_shape, filter_shape, paddings, strides, dilations, round_func, input_format='NHWC'):
def conv_pool_shape(input_shape,
filter_shape,
paddings,
strides,
dilations,
round_func,
input_format='NHWC'):
output_shape = np.zeros_like(input_shape)
output_shape[0] = input_shape[0]
if input_format == 'NHWC':
# input format: NHWC, filter format: HWOI
output_shape[1] = int(round_func((input_shape[1] + paddings[0] - filter_shape[0]
- (filter_shape[0] - 1) * (dilations[0] - 1)) / float(strides[0]))) + 1
output_shape[2] = int(round_func((input_shape[2] + paddings[1] - filter_shape[1]
- (filter_shape[1] - 1) * (dilations[1] - 1)) / float(strides[1]))) + 1
output_shape[1] = int(
round_func((input_shape[1] + paddings[0] - filter_shape[0] -
(filter_shape[0] - 1) *
(dilations[0] - 1)) / float(strides[0]))) + 1
output_shape[2] = int(
round_func((input_shape[2] + paddings[1] - filter_shape[1] -
(filter_shape[1] - 1) *
(dilations[1] - 1)) / float(strides[1]))) + 1
output_shape[3] = filter_shape[2]
elif input_format == 'NCHW':
# input format: NCHW, filter format: OIHW
output_shape[1] = filter_shape[0]
output_shape[2] = int(round_func((input_shape[2] + paddings[0] - filter_shape[2]
- (filter_shape[2] - 1) * (dilations[0] - 1)) / float(strides[0]))) + 1
output_shape[3] = int(round_func((input_shape[3] + paddings[1] - filter_shape[3]
- (filter_shape[3] - 1) * (dilations[1] - 1)) / float(strides[1]))) + 1
output_shape[2] = int(
round_func((input_shape[2] + paddings[0] - filter_shape[2] -
(filter_shape[2] - 1) *
(dilations[0] - 1)) / float(strides[0]))) + 1
output_shape[3] = int(
round_func((input_shape[3] + paddings[1] - filter_shape[3] -
(filter_shape[3] - 1) *
(dilations[1] - 1)) / float(strides[1]))) + 1
else:
raise Exception("format %s is not supported" % input_format)
......@@ -107,12 +117,19 @@ class Shapes(object):
@staticmethod
def slice_shape(input_shape, num_output, input_format='NHWC'):
if input_format == 'NHWC':
return [input_shape[0], input_shape[1], input_shape[2], input_shape[3]/num_output]
return [
input_shape[0], input_shape[1], input_shape[2],
input_shape[3] / num_output
]
elif input_format == 'NCHW':
return [input_shape[0], input_shape[1]/num_output, input_shape[2], input_shape[3]]
return [
input_shape[0], input_shape[1] / num_output, input_shape[2],
input_shape[3]
]
else:
raise Exception("format %s is not supported" % input_format)
# outputs' name is [op.name + '_' + #]
class CaffeConverter(object):
def __init__(self, caffe_net, weights, net_def, dt, device, winograd):
......@@ -140,9 +157,10 @@ class CaffeConverter(object):
# Construct graph
# Only support single-output layer
# layer with single output often use the same top name.
self.ops.extend([Operator(layer.name, layer.type, layer) for layer in layers])
self.ops.extend(
[Operator(layer.name, layer.type, layer) for layer in layers])
self.ops_map = {op.name : op for op in self.ops}
self.ops_map = {op.name: op for op in self.ops}
output_op_map = {}
for layer in layers:
op = self.ops_map[layer.name]
......@@ -165,7 +183,6 @@ class CaffeConverter(object):
continue
output_op_map[output_name] = op
# Load weights
weights_layers = weights.layer
for layer in weights_layers:
......@@ -191,7 +208,7 @@ class CaffeConverter(object):
data_format_arg.s = 'NHWC'
op_def.name = op.name
op_def.type = mace_type
op_def.input.extend([name+':0' for name in self.inputs_map[op.name]])
op_def.input.extend([name + ':0' for name in self.inputs_map[op.name]])
return op_def
def remove_unused_layers(self, layers):
......@@ -274,7 +291,7 @@ class CaffeConverter(object):
op_def.name = name
op_def.type = 'BufferToImage'
op_def.input.extend([new_input_name])
op_def.output.extend([name+':0'])
op_def.output.extend([name + ':0'])
epsilon_arg = op_def.arg.add()
epsilon_arg.name = 'buffer_type'
......@@ -290,7 +307,7 @@ class CaffeConverter(object):
op_def = self.net_def.op.add()
op_def.name = output_name[:-2]
op_def.type = 'ImageToBuffer'
op_def.input.extend([name+':0'])
op_def.input.extend([name + ':0'])
op_def.output.extend([output_name])
epsilon_arg = op_def.arg.add()
......@@ -315,11 +332,16 @@ class CaffeConverter(object):
def add_stride_pad_kernel_arg(self, param, op_def):
try:
if len(param.stride) > 1 or len(param.kernel_size) > 1 or len(param.pad) > 1:
raise Exception('Mace does not support multiple stride/kernel_size/pad')
stride = [param.stride[0], param.stride[0]] if len(param.stride) else [1, 1]
pad = [param.pad[0] * 2, param.pad[0] * 2] if len(param.pad) else [0, 0]
kernel = [param.kernel_size[0], param.kernel_size[0]] if len(param.kernel_size) else [0, 0]
if len(param.stride) > 1 or len(param.kernel_size) > 1 or len(
param.pad) > 1:
raise Exception(
'Mace does not support multiple stride/kernel_size/pad')
stride = [param.stride[0],
param.stride[0]] if len(param.stride) else [1, 1]
pad = [param.pad[0] * 2,
param.pad[0] * 2] if len(param.pad) else [0, 0]
kernel = [param.kernel_size[0], param.kernel_size[0]] if len(
param.kernel_size) else [0, 0]
except TypeError:
stride = [param.stride, param.stride]
pad = [param.pad * 2, param.pad * 2]
......@@ -370,8 +392,10 @@ class CaffeConverter(object):
self.add_tensor(weight_tensor_name, weight_data)
if self.device == 'gpu':
buffer_type = "DW_CONV2D_FILTER" if is_depthwise else "CONV2D_FILTER"
output_name = self.add_buffer_to_image(weight_tensor_name, buffer_type)
buffer_type = "DW_CONV2D_FILTER" \
if is_depthwise else "CONV2D_FILTER"
output_name = self.add_buffer_to_image(weight_tensor_name,
buffer_type)
op_def.input.extend([output_name])
else:
op_def.input.extend([weight_tensor_name])
......@@ -382,7 +406,8 @@ class CaffeConverter(object):
bias_data = op.data[1].reshape(-1)
self.add_tensor(bias_tensor_name, bias_data)
if self.device == 'gpu':
output_name = self.add_buffer_to_image(bias_tensor_name, "ARGUMENT")
output_name = self.add_buffer_to_image(bias_tensor_name,
"ARGUMENT")
op_def.input.extend([output_name])
else:
op_def.input.extend([bias_tensor_name])
......@@ -401,14 +426,15 @@ class CaffeConverter(object):
self.resolved_ops.add(op.name)
input_format = 'NCHW' if self.device == 'neon' else 'NHWC'
output_shape = Shapes.conv_pool_shape(op.get_single_parent().output_shape_map[op.layer.bottom[0]],
weight_data.shape,
paddings, strides, dilations,
math.floor, input_format)
output_shape = Shapes.conv_pool_shape(
op.get_single_parent().output_shape_map[op.layer.bottom[0]],
weight_data.shape, paddings, strides, dilations, math.floor,
input_format)
op.output_shape_map[op.layer.top[0]] = output_shape
if len(self.ops_map[final_op.name].children) == 1 \
and self.ops_map[final_op.name].children[0].type in activation_name_map:
if len(self.ops_map[final_op.name].children) == 1 and \
self.ops_map[final_op.name].children[0].type \
in activation_name_map:
activation_op = self.ops_map[final_op.name].children[0]
if not is_depthwise:
op_def.type = "FusedConv2D"
......@@ -419,7 +445,7 @@ class CaffeConverter(object):
final_op.output_shape_map[final_op.layer.top[0]] = output_shape
self.resolved_ops.add(activation_op.name)
op_def.output.extend([final_op.name+':0'])
op_def.output.extend([final_op.name + ':0'])
self.add_output_shape(op_def, output_shape)
self.net_def.op.extend([op_def])
......@@ -443,17 +469,22 @@ class CaffeConverter(object):
input_format = 'NCHW' if self.device == 'neon' else 'NHWC'
output_shape = Shapes.conv_pool_shape(
op.get_single_parent().output_shape_map[op.layer.bottom[0]],
filter_shape, paddings, strides, dilations, math.floor, input_format)
width = output_shape[0] * ((output_shape[1] + 1)/2) * ((output_shape[2]+1)/2)
if self.winograd and dilations[0] == 1 and (dilations[0] == dilations[1]) and \
filter_shape, paddings, strides, dilations, math.floor,
input_format)
width = output_shape[0] * ((output_shape[1] + 1) / 2) * ((
output_shape[2] + 1) / 2)
if self.winograd and dilations[0] == 1 and \
(dilations[0] == dilations[1]) and \
(strides[0] == 1) and (strides[0] == strides[1]):
if self.device == 'gpu':
return filter_shape[0] == 3 and (filter_shape[0] == filter_shape[1]) and \
return filter_shape[0] == 3 and \
(filter_shape[0] == filter_shape[1]) and \
(16 * filter_shape[2] < OPENCL_IMAGE_MAX_SIZE) and \
(16 * filter_shape[3] < OPENCL_IMAGE_MAX_SIZE) and \
(width < OPENCL_IMAGE_MAX_SIZE)
elif self.device == 'neon':
return filter_shape[2] == 3 and (filter_shape[2] == filter_shape[3])
return filter_shape[2] == 3 and (
filter_shape[2] == filter_shape[3])
return False
def convert_winograd_conv(self, op):
......@@ -486,16 +517,20 @@ class CaffeConverter(object):
padding_arg.ints.extend(paddings)
wt_op.name = op.name + '_input_transform'
wt_op.type = 'WinogradTransform'
wt_op.input.extend([name+':0' for name in self.inputs_map[op.name]])
wt_op.input.extend([name + ':0' for name in self.inputs_map[op.name]])
wt_output_name = wt_op.name + ":0"
wt_op.output.extend([wt_output_name])
wt_output_shape = mace_pb2.OutputShape()
if self.device != 'neon':
wt_output_width = output_shape[0] * ((output_shape[1] + 1)/2) * ((output_shape[2]+1)/2)
wt_output_shape.dims.extend([16, filter_shape[3], wt_output_width, 1])
wt_output_width = output_shape[0] * ((
output_shape[1] + 1) / 2) * ((output_shape[2] + 1) / 2)
wt_output_shape.dims.extend(
[16, filter_shape[3], wt_output_width, 1])
else:
wt_output_width = output_shape[0] * ((output_shape[2] + 1)/2) * ((output_shape[3]+1)/2)
wt_output_shape.dims.extend([16, filter_shape[1], wt_output_width, 1])
wt_output_width = output_shape[0] * ((
output_shape[2] + 1) / 2) * ((output_shape[3] + 1) / 2)
wt_output_shape.dims.extend(
[16, filter_shape[1], wt_output_width, 1])
wt_op.output_shape.extend([wt_output_shape])
# MatMul
......@@ -510,9 +545,11 @@ class CaffeConverter(object):
matmul_op.output.extend([matmul_output_name])
matmul_output_shape = mace_pb2.OutputShape()
if self.device != 'neon':
matmul_output_shape.dims.extend([16, filter_shape[2], wt_output_width, 1])
matmul_output_shape.dims.extend(
[16, filter_shape[2], wt_output_width, 1])
else:
matmul_output_shape.dims.extend([16, filter_shape[0], wt_output_width, 1])
matmul_output_shape.dims.extend(
[16, filter_shape[0], wt_output_width, 1])
matmul_op.output_shape.extend([matmul_output_shape])
# Inverse transform
......@@ -525,10 +562,12 @@ class CaffeConverter(object):
batch_arg.i = output_shape[0]
height_arg = iwt_op.arg.add()
height_arg.name = 'height'
height_arg.i = output_shape[1] if self.device != 'neon' else output_shape[2]
height_arg.i = output_shape[
1] if self.device != 'neon' else output_shape[2]
width_arg = iwt_op.arg.add()
width_arg.name = 'width'
width_arg.i = output_shape[2] if self.device != 'neon' else output_shape[3]
width_arg.i = output_shape[
2] if self.device != 'neon' else output_shape[3]
iwt_op.name = op.name + '_inverse_transform'
iwt_op.type = 'WinogradInverseTransform'
iwt_op.input.extend([matmul_output_name])
......@@ -538,15 +577,17 @@ class CaffeConverter(object):
bias_tensor_name = op.name + '_bias:0'
bias_data = op.data[1].reshape(-1)
self.add_tensor(bias_tensor_name, bias_data)
output_name = self.add_buffer_to_image(bias_tensor_name, "ARGUMENT")
output_name = self.add_buffer_to_image(bias_tensor_name,
"ARGUMENT")
iwt_op.input.extend([output_name])
final_op = op
final_op.output_shape_map[final_op.layer.top[0]] = output_shape
self.resolved_ops.add(op.name)
if len(self.ops_map[final_op.name].children) == 1 \
and self.ops_map[final_op.name].children[0].type in activation_name_map:
if len(self.ops_map[final_op.name].children) == 1 and \
self.ops_map[final_op.name].children[0].type \
in activation_name_map:
activation_op = self.ops_map[final_op.name].children[0]
fused_act_arg = iwt_op.arg.add()
fused_act_arg.name = 'activation'
......@@ -555,7 +596,7 @@ class CaffeConverter(object):
final_op.output_shape_map[final_op.layer.top[0]] = output_shape
self.resolved_ops.add(activation_op.name)
iwt_op.output.extend([final_op.name+':0'])
iwt_op.output.extend([final_op.name + ':0'])
self.add_output_shape(iwt_op, output_shape)
self.net_def.op.extend([wt_op, matmul_op, iwt_op])
......@@ -577,11 +618,11 @@ class CaffeConverter(object):
if len(scale_op.data) == 2:
beta_value = scale_op.data[1]
scale_value = (
(1.0 / np.vectorize(math.sqrt)(var_value + epsilon_value)) *
scale_value = ((
1.0 / np.vectorize(math.sqrt)(var_value + epsilon_value)) *
gamma_value).reshape(-1)
offset_value = ((-mean_value * scale_value) + beta_value).reshape(-1)
input_names = [op.name+'_scale:0', op.name+'_offset:0']
input_names = [op.name + '_scale:0', op.name + '_offset:0']
self.add_tensor(input_names[0], scale_value)
self.add_tensor(input_names[1], offset_value)
......@@ -596,10 +637,12 @@ class CaffeConverter(object):
self.resolved_ops.add(scale_op.name)
final_op = scale_op
output_shape = op.get_single_parent().output_shape_map[op.layer.bottom[0]]
output_shape = op.get_single_parent().output_shape_map[op.layer.bottom[
0]]
if len(self.ops_map[final_op.name].children) == 1 \
and self.ops_map[final_op.name].children[0].type in activation_name_map:
and self.ops_map[final_op.name].children[0].type \
in activation_name_map:
activation_op = self.ops_map[final_op.name].children[0]
fused_act_arg = op_def.arg.add()
fused_act_arg.name = 'activation'
......@@ -616,7 +659,8 @@ class CaffeConverter(object):
param = op.layer.inner_product_param
try:
if param.axis != 1 or param.transpose:
raise ValueError('Do not support non-default axis and transpose '
raise ValueError(
'Do not support non-default axis and transpose '
'case for innner product')
except AttributeError:
pass
......@@ -626,20 +670,26 @@ class CaffeConverter(object):
if op.data[0].ndim not in [2, 4]:
raise ValueError('Unexpected weigth ndim.')
if op.data[0].ndim == 4 and list(op.data[0].shape[:2]) != [1, 1]:
raise ValueError('Do not support 4D weight with shape [1, 1, *, *]')
input_shape = op.get_single_parent().output_shape_map[op.layer.bottom[0]]
raise ValueError(
'Do not support 4D weight with shape [1, 1, *, *]')
input_shape = op.get_single_parent().output_shape_map[op.layer.bottom[
0]]
weight_data = op.data[0].reshape(-1, op.data[0].shape[-1])
assert weight_data.shape[1] == (input_shape[1] * input_shape[2] * input_shape[3])
assert weight_data.shape[1] == (
input_shape[1] * input_shape[2] * input_shape[3])
if self.device != 'neon':
weight_data = weight_data.reshape(-1, input_shape[3], input_shape[1], input_shape[2])
weight_data = weight_data.transpose((0, 2, 3, 1)).reshape(weight_data.shape[0], -1)
weight_data = weight_data.reshape(-1, input_shape[3],
input_shape[1], input_shape[2])
weight_data = weight_data.transpose((0, 2, 3, 1)).reshape(
weight_data.shape[0], -1)
self.add_tensor(weight_tensor_name, weight_data)
if self.device == 'gpu':
if (weight_data.shape[0] + 3) / 4 > OPENCL_IMAGE_MAX_SIZE \
and (weight_data.shape[1] + 3) / 4 > OPENCL_IMAGE_MAX_SIZE:
raise Exception('Mace gpu do not support FC with weight shape: '
+str(weight_data.shape))
if (weight_data.shape[0] + 3) / 4 > OPENCL_IMAGE_MAX_SIZE and \
(weight_data.shape[1] + 3) / 4 > OPENCL_IMAGE_MAX_SIZE:
raise Exception(
'Mace gpu do not support FC with weight shape: ' +
str(weight_data.shape))
if input_shape[3] % 4 == 0:
buffer_type = "WEIGHT_WIDTH"
else:
......@@ -650,9 +700,11 @@ class CaffeConverter(object):
if buffer_type == "WEIGHT_HEIGHT" and \
(weight_data.shape[0] + 3) / 4 > OPENCL_IMAGE_MAX_SIZE:
raise Exception('Mace gpu do not support FC with weight shape: '
+str(weight_data.shape))
output_name = self.add_buffer_to_image(weight_tensor_name, buffer_type)
raise Exception(
'Mace gpu do not support FC with weight shape: ' +
str(weight_data.shape))
output_name = self.add_buffer_to_image(weight_tensor_name,
buffer_type)
op_def.input.extend([output_name])
else:
op_def.input.extend([weight_tensor_name])
......@@ -663,18 +715,21 @@ class CaffeConverter(object):
bias_data = op.data[1].reshape(-1)
self.add_tensor(bias_tensor_name, bias_data)
if self.device == 'gpu':
output_name = self.add_buffer_to_image(bias_tensor_name, "ARGUMENT")
output_name = self.add_buffer_to_image(bias_tensor_name,
"ARGUMENT")
op_def.input.extend([output_name])
else:
op_def.input.extend([bias_tensor_name])
self.resolved_ops.add(op.name)
output_shape = Shapes.fully_connected_shape(input_shape, weight_data.shape)
output_shape = Shapes.fully_connected_shape(input_shape,
weight_data.shape)
op.output_shape_map[op.layer.top[0]] = output_shape
final_op = op
if len(self.ops_map[final_op.name].children) == 1 \
and self.ops_map[final_op.name].children[0].type in activation_name_map:
and self.ops_map[final_op.name].children[0].type \
in activation_name_map:
activation_op = self.ops_map[final_op.name].children[0]
fused_act_arg = op_def.arg.add()
fused_act_arg.name = 'activation'
......@@ -691,7 +746,8 @@ class CaffeConverter(object):
op_def = self.CommonConvert(op, 'Pooling')
param = op.layer.pooling_param
paddings, strides, kernels = self.add_stride_pad_kernel_arg(param, op_def)
paddings, strides, kernels = self.add_stride_pad_kernel_arg(
param, op_def)
if param.pool == caffe_pb2.PoolingParameter.MAX:
pooling_type = "MaxPool"
elif param.pool == caffe_pb2.PoolingParameter.AVE:
......@@ -700,7 +756,8 @@ class CaffeConverter(object):
pooling_type_arg.name = 'pooling_type'
pooling_type_arg.i = pooling_type_mode[pooling_type]
input_shape = op.get_single_parent().output_shape_map[op.layer.bottom[0]]
input_shape = op.get_single_parent().output_shape_map[op.layer.bottom[
0]]
if param.HasField('global_pooling') and param.global_pooling:
kernels = [input_shape[1], input_shape[2]]
......@@ -708,12 +765,18 @@ class CaffeConverter(object):
kernel_arg.name = 'kernels'
kernel_arg.ints.extend(kernels)
filter_shape = [kernels[0], kernels[1], input_shape[3], input_shape[3]] \
if self.device != 'neon' else \
[input_shape[1], input_shape[1], kernels[0], kernels[1]]
if self.device != 'neon':
filter_shape = [
kernels[0], kernels[1], input_shape[3], input_shape[3]
]
else:
filter_shape = [
input_shape[1], input_shape[1], kernels[0], kernels[1]
]
input_format = 'NCHW' if self.device == 'neon' else 'NHWC'
output_shape = Shapes.conv_pool_shape(input_shape, filter_shape,
paddings, strides, [1, 1], math.ceil, input_format)
paddings, strides, [1, 1],
math.ceil, input_format)
op.output_shape_map[op.layer.top[0]] = output_shape
op_def.output.extend([op.name + ':0'])
......@@ -727,7 +790,8 @@ class CaffeConverter(object):
activation_arg.name = 'activation'
activation_arg.s = activation_name_map[op.type]
op_def.output.extend([op.name + ':0'])
output_shape = op.get_single_parent().output_shape_map[op.layer.bottom[0]]
output_shape = op.get_single_parent().output_shape_map[op.layer.bottom[
0]]
op.output_shape_map[op.layer.top[0]] = output_shape
self.add_output_shape(op_def, output_shape)
self.net_def.op.extend([op_def])
......@@ -742,12 +806,14 @@ class CaffeConverter(object):
alpha_data = op.data[0].reshape(-1)
self.add_tensor(alpha_tensor_name, alpha_data)
if self.device == 'gpu':
output_name = self.add_buffer_to_image(alpha_tensor_name, "ARGUMENT")
output_name = self.add_buffer_to_image(alpha_tensor_name,
"ARGUMENT")
op_def.input.extend([output_name])
else:
op_def.input.extend([alpha_tensor_name])
op_def.output.extend([op.name + ':0'])
output_shape = op.get_single_parent().output_shape_map[op.layer.bottom[0]]
output_shape = op.get_single_parent().output_shape_map[op.layer.bottom[
0]]
op.output_shape_map[op.layer.top[0]] = output_shape
self.add_output_shape(op_def, output_shape)
self.net_def.op.extend([op_def])
......@@ -777,7 +843,8 @@ class CaffeConverter(object):
input_shapes = []
for i in range(len(op.parents)):
input_shapes.append(op.parents[i].output_shape_map[op.layer.bottom[i]])
input_shapes.append(
op.parents[i].output_shape_map[op.layer.bottom[i]])
output_shape = Shapes.concat_shape(input_shapes, axis_arg.i)
op.output_shape_map[op.layer.top[0]] = output_shape
self.add_output_shape(op_def, output_shape)
......@@ -808,7 +875,8 @@ class CaffeConverter(object):
if op.layer.HasField('slice_param'):
param = op.layer.slice_param
if param.HasField('axis') and param.axis != 1:
raise Exception('Mace do not support slice with axis ' + str(param.axis))
raise Exception(
'Mace do not support slice with axis ' + str(param.axis))
if len(param.slice_point) > 0:
raise Exception('Mace do not support slice with slice_point')
......@@ -820,11 +888,14 @@ class CaffeConverter(object):
num_outputs = len(op.layer.top)
input_channels = input_shape[axis_arg.i]
if (input_channels % num_outputs) != 0 or \
(self.device == 'gpu' and ((input_channels / num_outputs) % 4 != 0)):
raise Exception('Mace do not support slice with input shape '
+ str(input_shape) + ' and number of output ' + str(num_outputs))
(self.device == 'gpu' and
((input_channels / num_outputs) % 4 != 0)):
raise Exception(
'Mace do not support slice with input shape ' +
str(input_shape) + ' and number of output ' + str(num_outputs))
input_format = 'NCHW' if self.device == 'neon' else 'NHWC'
output_shape = Shapes.slice_shape(input_shape, num_outputs, input_format)
output_shape = Shapes.slice_shape(input_shape, num_outputs,
input_format)
for i in range(len(op.layer.top)):
op.output_shape_map[op.layer.top[i]] = output_shape
self.add_output_shape(op_def, output_shape)
......@@ -925,7 +996,8 @@ class CaffeConverter(object):
for i in range(len(input_nodes)):
input_op = self.ops_map[input_nodes[i]]
input_shape = input_shapes[i] if self.device != 'neon' else \
[input_shapes[i][0], input_shapes[i][3], input_shapes[i][1], input_shapes[i][2]]
[input_shapes[i][0], input_shapes[i][3],
input_shapes[i][1], input_shapes[i][2]]
if input_op.layer is not None:
input_op.output_shape_map[input_op.layer.top[0]] = input_shape
else:
......@@ -938,7 +1010,7 @@ class CaffeConverter(object):
op_def.name = name
op_def.type = 'Transpose'
op_def.input.extend([new_input_name])
op_def.output.extend([name+':0'])
op_def.output.extend([name + ':0'])
dims_arg = op_def.arg.add()
dims_arg.name = 'dims'
......@@ -954,7 +1026,7 @@ class CaffeConverter(object):
op_def = self.net_def.op.add()
op_def.name = output_name[:-2]
op_def.type = 'Transpose'
op_def.input.extend([name+':0'])
op_def.input.extend([name + ':0'])
op_def.output.extend([output_name])
dims_arg = op_def.arg.add()
......@@ -1008,7 +1080,8 @@ class CaffeConverter(object):
elif op.type in ['Softmax']:
self.convert_normal_op(op)
else:
raise Exception('Unknown Op: %s, type: %s' % (op.name, op.type))
raise Exception('Unknown Op: %s, type: %s' % (op.name,
op.type))
if self.device == 'gpu':
self.add_output_transform(output_nodes)
......@@ -1024,8 +1097,9 @@ class CaffeConverter(object):
print 'Unresolve Op: %s with type %s' % (op.name, op.type)
def convert_to_mace_pb(model_file, weight_file, input_node_str, input_shape_str,
output_node_str, data_type, device, winograd):
def convert_to_mace_pb(model_file, weight_file, input_node_str,
input_shape_str, output_node_str, data_type, device,
winograd):
net_def = mace_pb2.NetDef()
dt = data_type_map[data_type]
......@@ -1046,7 +1120,8 @@ def convert_to_mace_pb(model_file, weight_file, input_node_str, input_shape_str,
output_nodes = [x for x in output_node_str.split(',')]
assert len(input_nodes) == len(input_shapes)
converter = CaffeConverter(caffe_net, weights, net_def, dt, device, winograd)
converter = CaffeConverter(caffe_net, weights, net_def, dt, device,
winograd)
converter.convert(input_nodes, input_shapes, output_nodes)
print "PB Converted."
if device == 'gpu':
......@@ -1056,4 +1131,3 @@ def convert_to_mace_pb(model_file, weight_file, input_node_str, input_shape_str,
print "Memory optimization done."
return net_def
......@@ -26,4 +26,3 @@ def tf_dtype_2_mace_dtype(tf_dtype):
if not mace_dtype:
raise Exception("Not supported tensorflow dtype: " + tf_dtype)
return mace_dtype
......@@ -4,10 +4,14 @@ import hashlib
import os.path
from mace.python.tools import source_converter_lib
# ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb --output quantized_test_dsp.pb --runtime dsp --input_dim input_node,1,28,28,3
# ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb \
# --output quantized_test_dsp.pb \
# --runtime dsp \
# --input_dim input_node,1,28,28,3
FLAGS = None
def file_checksum(fname):
hash_func = hashlib.sha256()
with open(fname, "rb") as f:
......@@ -15,6 +19,7 @@ def file_checksum(fname):
hash_func.update(chunk)
return hash_func.hexdigest()
def main(unused_args):
if not os.path.isfile(FLAGS.model_file):
print("Input graph file '" + FLAGS.model_file + "' does not exist!")
......@@ -22,17 +27,21 @@ def main(unused_args):
model_checksum = file_checksum(FLAGS.model_file)
if FLAGS.model_checksum != "" and FLAGS.model_checksum != model_checksum:
print("Model checksum mismatch: %s != %s" % (model_checksum, FLAGS.model_checksum))
print("Model checksum mismatch: %s != %s" % (model_checksum,
FLAGS.model_checksum))
sys.exit(-1)
if FLAGS.platform == 'caffe':
if not os.path.isfile(FLAGS.weight_file):
print("Input weight file '" + FLAGS.weight_file + "' does not exist!")
print("Input weight file '" + FLAGS.weight_file +
"' does not exist!")
sys.exit(-1)
weight_checksum = file_checksum(FLAGS.weight_file)
if FLAGS.weight_checksum != "" and FLAGS.weight_checksum != weight_checksum:
print("Weight checksum mismatch: %s != %s" % (weight_checksum, FLAGS.weight_checksum))
if FLAGS.weight_checksum != "" and \
FLAGS.weight_checksum != weight_checksum:
print("Weight checksum mismatch: %s != %s" %
(weight_checksum, FLAGS.weight_checksum))
sys.exit(-1)
if FLAGS.runtime == 'dsp':
......@@ -41,22 +50,27 @@ def main(unused_args):
from mace.python.tools import caffe_converter_lib
output_graph_def = caffe_converter_lib.convert_to_mace_pb(
FLAGS.model_file, FLAGS.weight_file, FLAGS.input_node, FLAGS.input_shape, FLAGS.output_node,
FLAGS.data_type, FLAGS.runtime, FLAGS.winograd)
FLAGS.model_file, FLAGS.weight_file, FLAGS.input_node,
FLAGS.input_shape, FLAGS.output_node, FLAGS.data_type,
FLAGS.runtime, FLAGS.winograd)
elif FLAGS.platform == 'tensorflow':
if FLAGS.runtime == 'dsp':
from mace.python.tools import tf_dsp_converter_lib
output_graph_def = tf_dsp_converter_lib.convert_to_mace_pb(
FLAGS.model_file, FLAGS.input_node, FLAGS.output_node, FLAGS.dsp_mode)
FLAGS.model_file, FLAGS.input_node, FLAGS.output_node,
FLAGS.dsp_mode)
else:
from mace.python.tools import tf_converter_lib
output_graph_def = tf_converter_lib.convert_to_mace_pb(
FLAGS.model_file, FLAGS.input_node, FLAGS.input_shape, FLAGS.output_node,
FLAGS.data_type, FLAGS.runtime, FLAGS.winograd)
FLAGS.model_file, FLAGS.input_node, FLAGS.input_shape,
FLAGS.output_node, FLAGS.data_type, FLAGS.runtime,
FLAGS.winograd)
if FLAGS.output_type == 'source':
source_converter_lib.convert_to_source(output_graph_def, model_checksum, FLAGS.template, FLAGS.obfuscate,
FLAGS.model_tag, FLAGS.output, FLAGS.runtime, FLAGS.embed_model_data)
source_converter_lib.convert_to_source(
output_graph_def, model_checksum, FLAGS.template, FLAGS.obfuscate,
FLAGS.model_tag, FLAGS.output, FLAGS.runtime,
FLAGS.embed_model_data)
else:
with open(FLAGS.output, "wb") as f:
f.write(output_graph_def.SerializeToString())
......@@ -65,6 +79,7 @@ def main(unused_args):
f.write(str(output_graph_def))
print("Model conversion is completed.")
def str2bool(v):
if v.lower() in ('yes', 'true', 't', 'y', '1'):
return True
......@@ -73,6 +88,7 @@ def str2bool(v):
else:
raise argparse.ArgumentTypeError('Boolean value expected.')
def parse_args():
"""Parses command line arguments."""
parser = argparse.ArgumentParser()
......@@ -81,12 +97,10 @@ def parse_args():
"--model_file",
type=str,
default="",
help="TensorFlow \'GraphDef\' file to load, Caffe prototxt file to load.")
help="TensorFlow \'GraphDef\' file to load, "
"Caffe prototxt file to load.")
parser.add_argument(
"--weight_file",
type=str,
default="",
help="Caffe data file to load.")
"--weight_file", type=str, default="", help="Caffe data file to load.")
parser.add_argument(
"--model_checksum",
type=str,
......@@ -103,35 +117,23 @@ def parse_args():
default="",
help="File to save the output graph to.")
parser.add_argument(
"--runtime",
type=str,
default="cpu",
help="Runtime: cpu/gpu/dsp")
"--runtime", type=str, default="cpu", help="Runtime: cpu/gpu/dsp")
parser.add_argument(
"--input_node",
type=str,
default="input_node",
help="e.g., input_node")
parser.add_argument(
"--output_node",
type=str,
default="softmax",
help="e.g., softmax")
"--output_node", type=str, default="softmax", help="e.g., softmax")
parser.add_argument(
"--data_type",
type=str,
default='DT_FLOAT',
help="e.g., DT_HALF/DT_FLOAT")
parser.add_argument(
"--output_type",
type=str,
default="pb",
help="output type: source/pb")
"--output_type", type=str, default="pb", help="output type: source/pb")
parser.add_argument(
"--template",
type=str,
default="",
help="template path")
"--template", type=str, default="", help="template path")
parser.add_argument(
"--obfuscate",
type=str2bool,
......@@ -152,25 +154,13 @@ def parse_args():
default=False,
help="open winograd convolution or not")
parser.add_argument(
"--dsp_mode",
type=int,
default=0,
help="dsp run mode, defalut=0")
"--dsp_mode", type=int, default=0, help="dsp run mode, defalut=0")
parser.add_argument(
"--input_shape",
type=str,
default="",
help="input shape.")
"--input_shape", type=str, default="", help="input shape.")
parser.add_argument(
"--platform",
type=str,
default="tensorflow",
help="tensorflow/caffe")
"--platform", type=str, default="tensorflow", help="tensorflow/caffe")
parser.add_argument(
"--embed_model_data",
type=str2bool,
default=True,
help="input shape.")
"--embed_model_data", type=str2bool, default=True, help="input shape.")
return parser.parse_known_args()
......
class DspOps(object):
def __init__(self):
self.dsp_ops = {
......@@ -18,7 +17,7 @@ class DspOps(object):
'QuantizedAvgPool': 'QuantizedAvgPool_8',
'QuantizedConcat': 'QuantizedConcat_8',
'QuantizedBiasAdd': 'QuantizedBiasAdd_8p8to32',
'QuantizedResizeBilinear' : 'QuantizedResizeBilinear_8',
'QuantizedResizeBilinear': 'QuantizedResizeBilinear_8',
'QuantizedSpaceToBatchND': 'QuantizedSpaceToBatchND_8',
'QuantizedBatchToSpaceND': 'QuantizedBatchToSpaceND_8',
'QuantizedSoftmax': 'QuantizedSoftmax_8',
......@@ -54,6 +53,7 @@ class DspOps(object):
'Concat': 'Concat_f',
'AddN': 'AddN_f',
}
def has_op(self, tf_op):
return tf_op in self.dsp_ops
......@@ -61,5 +61,3 @@ class DspOps(object):
if tf_op not in self.dsp_ops:
raise Exception('Could not map nn op for: ', tf_op)
return self.dsp_ops[tf_op]
......@@ -11,10 +11,13 @@ FLAGS = None
encrypt_lookup_table = "Xiaomi-AI-Platform-Mace"
def encrypt_code(code_str):
encrypted_arr = []
for i in range(len(code_str)):
encrypted_char = hex(ord(code_str[i]) ^ ord(encrypt_lookup_table[i % len(encrypt_lookup_table)]))
encrypted_char = hex(
ord(code_str[i]) ^ ord(
encrypt_lookup_table[i % len(encrypt_lookup_table)]))
encrypted_arr.append(encrypted_char)
return encrypted_arr
......@@ -45,7 +48,8 @@ def main(unused_args):
encrypted_code_maps[file_name[:-3]] = encrypted_code_arr
env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0]))
cpp_cl_encrypted_kernel = env.get_template('str2vec_maps.cc.jinja2').render(
cpp_cl_encrypted_kernel = env.get_template(
'str2vec_maps.cc.jinja2').render(
maps=encrypted_code_maps,
data_type='unsigned char',
variable_name='kEncryptedProgramMap')
......
......@@ -2,18 +2,21 @@ import tensorflow as tf
from mace.proto import mace_pb2
from collections import OrderedDict
def sort_tf_node(node, nodes_map, ordered_nodes_map):
if node.name not in ordered_nodes_map:
for input_tensor_name in node.input:
input_node_name = input_tensor_name.split(':')[
0] if ':' in input_tensor_name else input_tensor_name
if input_node_name not in nodes_map or input_node_name in ordered_nodes_map:
if input_node_name not in nodes_map or \
input_node_name in ordered_nodes_map:
continue
input_node = nodes_map[input_node_name]
sort_tf_node(input_node, nodes_map, ordered_nodes_map)
ordered_nodes_map[node.name] = node
def sort_tf_graph(graph_def):
nodes_map = {}
ordered_nodes_map = OrderedDict()
......@@ -31,13 +34,15 @@ def sort_mace_node(node, nodes_map, ordered_nodes_map):
for input_tensor_name in node.input:
input_node_name = input_tensor_name.split(':')[
0] if ':' in input_tensor_name else input_tensor_name
if input_node_name not in nodes_map or input_node_name in ordered_nodes_map:
if input_node_name not in nodes_map or \
input_node_name in ordered_nodes_map:
continue
input_node = nodes_map[input_node_name]
sort_mace_node(input_node, nodes_map, ordered_nodes_map)
ordered_nodes_map[node.name] = node
def sort_mace_graph(graph_def, output_name):
nodes_map = {}
ordered_nodes_map = OrderedDict()
......
......@@ -2,6 +2,7 @@ import sys
import operator
from mace.proto import mace_pb2
class MemoryOptimizer(object):
def __init__(self, net_def):
self.net_def = net_def
......@@ -37,9 +38,9 @@ class MemoryOptimizer(object):
mem_size = [0, 0]
if op_type == 'WinogradTransform' or op_type == 'MatMul':
mem_size[0] = output_shape[2] * output_shape[3]
mem_size[1] = output_shape[0] * int((output_shape[1]+3)/4)
mem_size[1] = output_shape[0] * int((output_shape[1] + 3) / 4)
else:
mem_size[0] = output_shape[2] * int((output_shape[3]+3)/4)
mem_size[0] = output_shape[2] * int((output_shape[3] + 3) / 4)
mem_size[1] = output_shape[0] * output_shape[1]
return mem_size
......@@ -51,13 +52,16 @@ class MemoryOptimizer(object):
if self.is_buffer_image_op(op):
continue
if not op.output_shape:
print('WARNING: There is no output shape information to do memory optimization.')
print('WARNING: There is no output shape information to '
'do memory optimization.')
return
if len(op.output_shape) != len(op.output):
print('WARNING: the number of output shape is not equal to the number of output.')
print('WARNING: the number of output shape is not equal to '
'the number of output.')
return
for i in range(len(op.output)):
op_mem_size = self.get_mem_size(op.type, op.output_shape[i].dims)
op_mem_size = self.get_mem_size(op.type,
op.output_shape[i].dims)
mem_id = -1
if len(self.idle_mem) > 0:
best_mem_candidate_id = -1
......@@ -65,16 +69,22 @@ class MemoryOptimizer(object):
best_mem_candidate_shape = []
for mid in self.idle_mem:
reuse_mem_size = self.mem_block[mid]
resize_mem_size = [max(reuse_mem_size[0], op_mem_size[0]), max(reuse_mem_size[1], op_mem_size[1])]
delta_mem_area = self.mem_area(resize_mem_size) - self.mem_area(reuse_mem_size)
resize_mem_size = [
max(reuse_mem_size[0], op_mem_size[0]),
max(reuse_mem_size[1], op_mem_size[1])
]
delta_mem_area = self.mem_area(
resize_mem_size) - self.mem_area(reuse_mem_size)
if delta_mem_area < best_mem_candidate_delta_area:
best_mem_candidate_id = mid
best_mem_candidate_delta_area = delta_mem_area
best_mem_candidate_shape = resize_mem_size
if best_mem_candidate_delta_area <= self.mem_area(op_mem_size):
if best_mem_candidate_delta_area <= self.mem_area(
op_mem_size):
# reuse
self.mem_block[best_mem_candidate_id] = best_mem_candidate_shape
self.mem_block[
best_mem_candidate_id] = best_mem_candidate_shape
mem_id = best_mem_candidate_id
self.idle_mem.remove(mem_id)
......@@ -113,7 +123,8 @@ class MemoryOptimizer(object):
print mem, self.mem_block[mem]
optimized_mem_size += reduce(operator.mul, self.mem_block[mem], 4)
print('origin mem: %d, optimized mem: %d', origin_mem_size, optimized_mem_size)
print('origin mem: %d, optimized mem: %d', origin_mem_size,
optimized_mem_size)
def optimize_memory(net_def):
......
......@@ -27,37 +27,40 @@ def generate_cpp_source():
binary_array = np.fromfile(f, dtype=np.uint8)
idx = 0
size, = struct.unpack("Q", binary_array[idx:idx+8])
size, = struct.unpack("Q", binary_array[idx:idx + 8])
idx += 8
for _ in xrange(size):
key_size, = struct.unpack("i", binary_array[idx:idx+4])
key_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
key, = struct.unpack(str(key_size) + "s", binary_array[idx:idx+key_size])
key, = struct.unpack(
str(key_size) + "s", binary_array[idx:idx + key_size])
idx += key_size
value_size, = struct.unpack("i", binary_array[idx:idx+4])
value_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
maps[key] = []
value = struct.unpack(str(value_size) + "B",
binary_array[idx:idx+value_size])
value = struct.unpack(
str(value_size) + "B", binary_array[idx:idx + value_size])
idx += value_size
for ele in value:
maps[key].append(hex(ele))
cl_platform_info_path = os.path.join(binary_dir, FLAGS.platform_info_file_name)
cl_platform_info_path = os.path.join(binary_dir,
FLAGS.platform_info_file_name)
with open(cl_platform_info_path, 'r') as f:
curr_platform_info = f.read()
if platform_info != "":
assert(curr_platform_info == platform_info)
assert (curr_platform_info == platform_info)
platform_info = curr_platform_info
env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0]))
return env.get_template('opencl_compiled_kernel.cc.jinja2').render(
maps = maps,
data_type = 'unsigned char',
variable_name = 'kCompiledProgramMap',
platform_info = platform_info,
maps=maps,
data_type='unsigned char',
variable_name='kCompiledProgramMap',
platform_info=platform_info,
)
def main(unused_args):
cpp_cl_binary_source = generate_cpp_source()
......@@ -90,7 +93,7 @@ def parse_args():
"--output_path",
type=str,
default="./mace/examples/codegen/opencl/opencl_compiled_program.cc",
help="The path of generated C++ header file which contains cl binaries.")
help="The path of generated C++ header file for cl binaries.")
return parser.parse_known_args()
......
......@@ -6,9 +6,9 @@ import hashlib
from mace.proto import mace_pb2
from jinja2 import Environment, FileSystemLoader
GENERATED_NAME = set()
def generate_obfuscated_name(namespace, name):
md5 = hashlib.md5()
md5.update(namespace)
......@@ -22,31 +22,36 @@ def generate_obfuscated_name(namespace, name):
GENERATED_NAME.add(name)
return name
def generate_tensor_map(tensors):
tensor_map = {}
for t in tensors:
if not tensor_map.has_key(t.name):
if t.name not in tensor_map:
tensor_map[t.name] = generate_obfuscated_name("tensor", t.name)
return tensor_map
def generate_in_out_map(ops, tensor_map):
in_out_map = {}
for op in ops:
op.name = generate_obfuscated_name("op", op.name)
for input_name in op.input:
if not in_out_map.has_key(input_name):
if tensor_map.has_key(input_name):
if input_name not in in_out_map:
if input_name in tensor_map:
in_out_map[input_name] = tensor_map[input_name]
else:
in_out_map[input_name] = generate_obfuscated_name("in", input_name)
in_out_map[input_name] = generate_obfuscated_name(
"in", input_name)
for output_name in op.output:
if not in_out_map.has_key(output_name):
if tensor_map.has_key(output_name):
if output_name not in in_out_map:
if output_name in tensor_map:
in_out_map[output_name] = tensor_map[output_name]
else:
in_out_map[output_name] = generate_obfuscated_name("out", output_name)
in_out_map[output_name] = generate_obfuscated_name(
"out", output_name)
return in_out_map
def obfuscate_name(net_def):
input_node = "mace_input_node"
output_node = "mace_output_node"
......@@ -63,20 +68,22 @@ def obfuscate_name(net_def):
if output_node not in op.output[i]:
op.output[i] = in_out_map[op.output[i]]
def rename_tensor(net_def):
tensor_map = {}
for t in net_def.tensors:
if not tensor_map.has_key(t.name):
if t.name not in tensor_map:
tensor_map[t.name] = "_" + t.name[:-2].replace("/", "_")
t.name = tensor_map[t.name]
for op in net_def.op:
for i in range(len(op.input)):
if tensor_map.has_key(op.input[i]):
if op.input[i] in tensor_map:
op.input[i] = tensor_map[op.input[i]]
for i in range(len(op.output)):
if tensor_map.has_key(op.output[i]):
if op.output[i] in tensor_map:
op.output[i] = tensor_map[op.output[i]]
class TensorInfo:
def __init__(self, id, t, runtime):
self.id = id
......@@ -84,19 +91,26 @@ class TensorInfo:
if t.data_type == mace_pb2.DT_FLOAT:
if runtime == 'gpu':
self.data_type = mace_pb2.DT_HALF
self.data = bytearray(np.array(t.float_data).astype(np.float16).tobytes())
self.data = bytearray(
np.array(t.float_data).astype(np.float16).tobytes())
else:
self.data_type = mace_pb2.DT_FLOAT
self.data = bytearray(np.array(t.float_data).astype(np.float32).tobytes())
self.data = bytearray(
np.array(t.float_data).astype(np.float32).tobytes())
elif t.data_type == mace_pb2.DT_INT32:
self.data = bytearray(np.array(t.int32_data).astype(np.int32).tobytes())
self.data = bytearray(
np.array(t.int32_data).astype(np.int32).tobytes())
elif t.data_type == mace_pb2.DT_UINT8:
self.data = bytearray(np.array(t.int32_data).astype(np.uint8).tolist())
self.data = bytearray(
np.array(t.int32_data).astype(np.uint8).tolist())
def stringfy(value):
return ', '.join('"{0}"'.format(w) for w in value)
def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_tag, output, runtime, embed_model_data):
def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate,
model_tag, output, runtime, embed_model_data):
if obfuscate:
obfuscate_name(net_def)
else:
......@@ -106,7 +120,8 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
print template_dir
# Create the jinja2 environment.
j2_env = Environment(loader=FileSystemLoader(template_dir), trim_blocks=True)
j2_env = Environment(
loader=FileSystemLoader(template_dir), trim_blocks=True)
j2_env.filters['stringfy'] = stringfy
output_dir = os.path.dirname(output) + '/'
# generate tensor source files
......@@ -122,11 +137,11 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
model_data.extend(bytearray([0] * padding))
offset += padding
source = j2_env.get_template(template_name).render(
tensor_info = tensor_info,
tensor = t,
tag = model_tag,
runtime = runtime,
offset = offset,
tensor_info=tensor_info,
tensor=t,
tag=model_tag,
runtime=runtime,
offset=offset,
)
model_data.extend(tensor_info.data)
offset += len(tensor_info.data)
......@@ -137,11 +152,10 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
# generate tensor data
template_name = 'tensor_data.jinja2'
source = j2_env.get_template(template_name).render(
tag = model_tag,
embed_model_data = embed_model_data,
model_data_size = offset,
model_data = model_data
)
tag=model_tag,
embed_model_data=embed_model_data,
model_data_size=offset,
model_data=model_data)
with open(output_dir + 'tensor_data' + '.cc', "wb") as f:
f.write(source)
if not embed_model_data:
......@@ -155,11 +169,11 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
op_size = len(net_def.op)
for start in range(0, op_size, 10):
source = j2_env.get_template(template_name).render(
start = start,
end = min(start+10, op_size),
net = net_def,
tag = model_tag,
runtime = runtime,
start=start,
end=min(start + 10, op_size),
net=net_def,
tag=model_tag,
runtime=runtime,
)
with open(output_dir + 'op' + str(counter) + '.cc', "wb") as f:
f.write(source)
......@@ -167,21 +181,21 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
# generate model source files
template_name = 'model.jinja2'
tensors = [TensorInfo(i, net_def.tensors[i], runtime) for i in range(len(net_def.tensors))]
tensors = [
TensorInfo(i, net_def.tensors[i], runtime)
for i in range(len(net_def.tensors))
]
source = j2_env.get_template(template_name).render(
tensors = tensors,
net = net_def,
tag = model_tag,
runtime = runtime,
model_pb_checksum = mode_pb_checksum
)
tensors=tensors,
net=net_def,
tag=model_tag,
runtime=runtime,
model_pb_checksum=mode_pb_checksum)
with open(output, "wb") as f:
f.write(source)
# generate model header file
template_name = 'model_header.jinja2'
source = j2_env.get_template(template_name).render(
tag = model_tag,
)
source = j2_env.get_template(template_name).render(tag=model_tag, )
with open(output_dir + model_tag + '.h', "wb") as f:
f.write(source)
......@@ -8,15 +8,8 @@ from mace.python.tools import memory_optimizer
from tensorflow.core.framework import graph_pb2
from tensorflow.core.framework import tensor_shape_pb2
padding_mode = {
'VALID': 0,
'SAME': 1,
'FULL': 2
}
pooling_type_mode = {
'AvgPool': 1,
'MaxPool': 2
}
padding_mode = {'VALID': 0, 'SAME': 1, 'FULL': 2}
pooling_type_mode = {'AvgPool': 1, 'MaxPool': 2}
# the order should be the same as
# eltwise type's in mace/kernels/eltwise.h
......@@ -34,25 +27,22 @@ math_type_mode = {
}
buffer_type_map = {
'CONV2D_FILTER' : 0,
'IN_OUT_CHANNEL' : 1,
'ARGUMENT' : 2,
'IN_OUT_HEIGHT' : 3,
'IN_OUT_WIDTH' : 4,
'WINOGRAD_FILTER' : 5,
'DW_CONV2D_FILTER' : 6,
'CONV2D_FILTER': 0,
'IN_OUT_CHANNEL': 1,
'ARGUMENT': 2,
'IN_OUT_HEIGHT': 3,
'IN_OUT_WIDTH': 4,
'WINOGRAD_FILTER': 5,
'DW_CONV2D_FILTER': 6,
}
data_type_map = {
'DT_HALF' : mace_pb2.DT_HALF,
'DT_FLOAT': mace_pb2.DT_FLOAT
}
data_type_map = {'DT_HALF': mace_pb2.DT_HALF, 'DT_FLOAT': mace_pb2.DT_FLOAT}
activation_name_map = {
'Relu' : 'RELU',
'Sigmoid' : 'SIGMOID',
'Tanh' : 'TANH',
'Relu6' : 'RELUX'
'Relu': 'RELU',
'Sigmoid': 'SIGMOID',
'Tanh': 'TANH',
'Relu6': 'RELUX'
}
BATCH_NORM_ORDER = ["Add", "Rsqrt", "Mul", "Mul", "Mul", "Sub", "Add"]
......@@ -62,12 +52,14 @@ MACE_OUTPUT_NODE_NAME = "mace_output_node"
OPENCL_IMAGE_MAX_SIZE = 16384
def get_input_tensor(op, index):
input_tensor = op.inputs[index]
if input_tensor.op.type == 'Reshape':
input_tensor = get_input_tensor(input_tensor.op, 0)
return input_tensor
class TFConverter(object):
def __init__(self, tf_ops, net_def, dt, device, winograd):
self.net_def = net_def
......@@ -139,7 +131,7 @@ class TFConverter(object):
op_def.name = name
op_def.type = 'BufferToImage'
op_def.input.extend([new_input_name])
op_def.output.extend([name+':0'])
op_def.output.extend([name + ':0'])
epsilon_arg = op_def.arg.add()
epsilon_arg.name = 'buffer_type'
......@@ -156,7 +148,7 @@ class TFConverter(object):
op_def.name = name
op_def.type = 'Transpose'
op_def.input.extend([new_input_name])
op_def.output.extend([name+':0'])
op_def.output.extend([name + ':0'])
dims_arg = op_def.arg.add()
dims_arg.name = 'dims'
......@@ -172,7 +164,7 @@ class TFConverter(object):
op_def = self.net_def.op.add()
op_def.name = output_name[:-2]
op_def.type = 'ImageToBuffer'
op_def.input.extend([name+':0'])
op_def.input.extend([name + ':0'])
op_def.output.extend([output_name])
epsilon_arg = op_def.arg.add()
......@@ -185,7 +177,7 @@ class TFConverter(object):
op_def = self.net_def.op.add()
op_def.name = output_name[:-2]
op_def.type = 'Transpose'
op_def.input.extend([name+':0'])
op_def.input.extend([name + ':0'])
op_def.output.extend([output_name])
dims_arg = op_def.arg.add()
......@@ -237,7 +229,8 @@ class TFConverter(object):
tensor = self.net_def.tensors.add()
tf_tensor = op.outputs[0].eval()
if output_name in self.transpose_filter_tensor:
tf_tensor = tf_tensor.transpose(self.transpose_filter_tensor[output_name])
tf_tensor = tf_tensor.transpose(
self.transpose_filter_tensor[output_name])
if output_name in self.reshape_tensor:
tf_tensor = tf_tensor.reshape(self.reshape_tensor[output_name])
tensor.name = op.outputs[0].name
......@@ -262,9 +255,11 @@ class TFConverter(object):
output_shape = op.outputs[0].shape.as_list()
if len(output_shape) == 0 or output_shape[0] is None:
return False
width = output_shape[0] * ((output_shape[1] + 1)/2) * ((output_shape[2]+1)/2)
return self.winograd and op.type != 'DepthwiseConv2dNative' and self.device == 'gpu' and \
filter_shape[0] == 3 and (filter_shape[0] == filter_shape[1]) and \
width = output_shape[0] * ((output_shape[1] + 1) / 2) * ((
output_shape[2] + 1) / 2)
return self.winograd and op.type != 'DepthwiseConv2dNative' and \
self.device == 'gpu' and filter_shape[0] == 3 and \
(filter_shape[0] == filter_shape[1]) and \
(strides[0] == 1) and (strides[0] == strides[1]) and \
(16 * filter_shape[2] < OPENCL_IMAGE_MAX_SIZE) and \
(16 * filter_shape[3] < OPENCL_IMAGE_MAX_SIZE) and \
......@@ -276,7 +271,8 @@ class TFConverter(object):
output_shape = op.outputs[0].shape.as_list()
self.transpose_filter_tensor[filter_tensor.name] = (3, 2, 0, 1)
filter_name = self.add_buffer_to_image(op.inputs[1].name, "WINOGRAD_FILTER")
filter_name = self.add_buffer_to_image(op.inputs[1].name,
"WINOGRAD_FILTER")
# Input transform
wt_op = mace_pb2.OperatorDef()
......@@ -292,7 +288,8 @@ class TFConverter(object):
wt_output_name = wt_op.name + ":0"
wt_op.output.extend([wt_output_name])
wt_output_shape = mace_pb2.OutputShape()
wt_output_width = output_shape[0] * ((output_shape[1] + 1)/2) * ((output_shape[2]+1)/2)
wt_output_width = output_shape[0] * ((output_shape[1] + 1) / 2) * ((
output_shape[2] + 1) / 2)
wt_output_shape.dims.extend([16, filter_shape[2], wt_output_width, 1])
wt_op.output_shape.extend([wt_output_shape])
......@@ -307,7 +304,8 @@ class TFConverter(object):
matmul_output_name = matmul_op.name + ":0"
matmul_op.output.extend([matmul_output_name])
matmul_output_shape = mace_pb2.OutputShape()
matmul_output_shape.dims.extend([16, filter_shape[3], wt_output_width, 1])
matmul_output_shape.dims.extend(
[16, filter_shape[3], wt_output_width, 1])
matmul_op.output_shape.extend([matmul_output_shape])
# Inverse transform
......@@ -331,15 +329,17 @@ class TFConverter(object):
final_op = op
self.resolved_ops[op.name] = 1
if len(self.tf_graph[op.name]) == 1 and self.tf_graph[op.name][0].type == 'BiasAdd' :
if len(self.tf_graph[op.name]
) == 1 and self.tf_graph[op.name][0].type == 'BiasAdd':
bias_add_op = self.tf_graph[op.name][0]
output_name = self.add_buffer_to_image(get_input_tensor(bias_add_op, 1).name, "ARGUMENT")
output_name = self.add_buffer_to_image(
get_input_tensor(bias_add_op, 1).name, "ARGUMENT")
iwt_op.input.extend([output_name])
final_op = bias_add_op
self.resolved_ops[bias_add_op.name] = 1
if len(self.tf_graph[final_op.name]) == 1 \
and self.tf_graph[final_op.name][0].type in activation_name_map:
if len(self.tf_graph[final_op.name]) == 1 and \
self.tf_graph[final_op.name][0].type in activation_name_map:
activation_op = self.tf_graph[final_op.name][0]
fused_act_arg = iwt_op.arg.add()
fused_act_arg.name = 'activation'
......@@ -355,7 +355,6 @@ class TFConverter(object):
self.add_output_shape(final_op.outputs, iwt_op)
self.net_def.op.extend([wt_op, matmul_op, iwt_op])
def convert_conv2d(self, op):
op_def = mace_pb2.OperatorDef()
arg = op_def.arg.add()
......@@ -365,20 +364,28 @@ class TFConverter(object):
if op.type == 'DepthwiseConv2dNative':
op_def.type = 'DepthwiseConv2d'
if self.device == 'neon':
self.transpose_filter_tensor[get_input_tensor(op, 1).name] = (3, 2, 0, 1)
self.transpose_filter_tensor[get_input_tensor(
op, 1).name] = (3, 2, 0, 1)
else:
op_def.type = op.type
if self.device == 'neon':
self.transpose_filter_tensor[get_input_tensor(op, 1).name] = (3, 2, 0, 1)
self.transpose_filter_tensor[get_input_tensor(
op, 1).name] = (3, 2, 0, 1)
else:
self.transpose_filter_tensor[get_input_tensor(op, 1).name] = (0, 1, 3, 2)
self.transpose_filter_tensor[get_input_tensor(
op, 1).name] = (0, 1, 3, 2)
if self.device == 'gpu':
op_def.input.extend([op.inputs[0].name])
buffer_type = "DW_CONV2D_FILTER" if op_def.type == 'DepthwiseConv2d' else "CONV2D_FILTER"
output_name = self.add_buffer_to_image(get_input_tensor(op, 1).name, buffer_type)
if op_def.type == 'DepthwiseConv2d':
buffer_type = "DW_CONV2D_FILTER"
else:
buffer_type = "CONV2D_FILTER"
output_name = self.add_buffer_to_image(
get_input_tensor(op, 1).name, buffer_type)
op_def.input.extend([output_name])
else:
op_def.input.extend([get_input_tensor(op, i).name for i in range(len(op.inputs))])
op_def.input.extend(
[get_input_tensor(op, i).name for i in range(len(op.inputs))])
padding_arg = op_def.arg.add()
padding_arg.name = 'padding'
......@@ -395,18 +402,20 @@ class TFConverter(object):
final_op = op
self.resolved_ops[op.name] = 1
if len(self.tf_graph.get(op.name, [])) == 1 and self.tf_graph[op.name][0].type == 'BiasAdd':
if len(self.tf_graph.get(op.name, [])) == 1 and \
self.tf_graph[op.name][0].type == 'BiasAdd':
bias_add_op = self.tf_graph[op.name][0]
if self.device == 'gpu':
output_name = self.add_buffer_to_image(get_input_tensor(bias_add_op, 1).name, "ARGUMENT")
output_name = self.add_buffer_to_image(
get_input_tensor(bias_add_op, 1).name, "ARGUMENT")
op_def.input.extend([output_name])
else:
op_def.input.extend([get_input_tensor(bias_add_op, 1).name])
final_op = bias_add_op
self.resolved_ops[bias_add_op.name] = 1
if len(self.tf_graph.get(final_op.name, [])) == 1 \
and self.tf_graph[final_op.name][0].type in activation_name_map:
if len(self.tf_graph.get(final_op.name, [])) == 1 and \
self.tf_graph[final_op.name][0].type in activation_name_map:
activation_op = self.tf_graph[final_op.name][0]
if op_def.type == "Conv2D":
op_def.type = "FusedConv2D"
......@@ -450,17 +459,16 @@ class TFConverter(object):
var_value = get_input_tensor(op, 4).eval().astype(np.float32)
epsilon_value = op.get_attr('epsilon')
scale_value = (
(1.0 / np.vectorize(math.sqrt)(var_value + epsilon_value)) *
gamma_value)
scale_value = ((1.0 / np.vectorize(math.sqrt)
(var_value + epsilon_value)) * gamma_value)
offset_value = (-mean_value * scale_value) + beta_value
idx = gamma_tensor.name.rfind('/')
name_prefix = gamma_tensor.name[:idx] + '/'
input_names = [name_prefix+'scale:0', name_prefix+'offset:0']
self.add_tensor(input_names[0], gamma_value.shape,
gamma_tensor.dtype, scale_value)
self.add_tensor(input_names[1], gamma_value.shape,
gamma_tensor.dtype, offset_value)
input_names = [name_prefix + 'scale:0', name_prefix + 'offset:0']
self.add_tensor(input_names[0], gamma_value.shape, gamma_tensor.dtype,
scale_value)
self.add_tensor(input_names[1], gamma_value.shape, gamma_tensor.dtype,
offset_value)
op_def.input.extend([op.inputs[0].name])
if self.device == 'gpu':
......@@ -495,14 +503,15 @@ class TFConverter(object):
bn_ops = []
bn_ops.append(op)
for i in range(1, 3):
if len(self.tf_graph[bn_ops[i-1].name]) == 1 \
and self.tf_graph[bn_ops[i-1].name][0].type == BATCH_NORM_ORDER[i]:
bn_ops.append(self.tf_graph[bn_ops[i-1].name][0])
if len(self.tf_graph[bn_ops[i-1].name]) == 1 and \
self.tf_graph[bn_ops[i-1].name][0].type == BATCH_NORM_ORDER[i]:
bn_ops.append(self.tf_graph[bn_ops[i - 1].name][0])
else:
raise Exception('Invalid BatchNorm Op')
if len(self.tf_graph[bn_ops[2].name]) == 2 \
and self.tf_graph[bn_ops[2].name][0].type == BATCH_NORM_ORDER[3] \
and self.tf_graph[bn_ops[2].name][1].type == BATCH_NORM_ORDER[4]:
if len(self.tf_graph[bn_ops[2].name]) == 2 and \
self.tf_graph[bn_ops[2].name][0].type == \
BATCH_NORM_ORDER[3] and \
self.tf_graph[bn_ops[2].name][1].type == BATCH_NORM_ORDER[4]:
bn_ops.append(self.tf_graph[bn_ops[2].name][0])
bn_ops.append(self.tf_graph[bn_ops[2].name][1])
else:
......@@ -682,7 +691,8 @@ class TFConverter(object):
op_def.output.extend([output.name for output in op.outputs])
size_arg = op_def.arg.add()
size_arg.name = 'size'
size_arg.ints.extend(get_input_tensor(op, 1).eval().astype(np.int32).flat)
size_arg.ints.extend(
get_input_tensor(op, 1).eval().astype(np.int32).flat)
size_arg = op_def.arg.add()
size_arg.name = 'align_corners'
size_arg.i = op.get_attr('align_corners')
......@@ -712,7 +722,7 @@ class TFConverter(object):
else:
op_def.type = "CWise"
x_value = 0
if len(input_tensor1.shape)==4:
if len(input_tensor1.shape) == 4:
op_def.input.extend([op.inputs[1].name])
x_value = get_input_tensor(op, 0).eval().astype(np.float32)
else:
......@@ -752,7 +762,8 @@ class TFConverter(object):
op_def.type = "BiasAdd"
op_def.input.extend([op.inputs[0].name])
if self.device == 'gpu':
output_name = self.add_buffer_to_image(get_input_tensor(op, 1).name, "ARGUMENT")
output_name = self.add_buffer_to_image(
get_input_tensor(op, 1).name, "ARGUMENT")
op_def.input.extend([output_name])
else:
op_def.input.extend([get_input_tensor(op, 1).name])
......@@ -772,21 +783,24 @@ class TFConverter(object):
op_def.output.extend([output.name for output in op.outputs])
size_arg = op_def.arg.add()
size_arg.name = 'block_shape'
size_arg.ints.extend(get_input_tensor(op, 1).eval().astype(np.int32).flat)
size_arg.ints.extend(
get_input_tensor(op, 1).eval().astype(np.int32).flat)
size_arg = op_def.arg.add()
if b2s:
size_arg.name = 'crops'
else:
size_arg.name = 'paddings'
size_arg.ints.extend(get_input_tensor(op, 2).eval().astype(np.int32).flat)
size_arg.ints.extend(
get_input_tensor(op, 2).eval().astype(np.int32).flat)
self.add_output_shape(op.outputs, op_def)
self.resolved_ops[op.name] = 1
self.unused_tensor.add(get_input_tensor(op, 1).name)
self.unused_tensor.add(get_input_tensor(op, 2).name)
def is_atrous_conv2d(self, op):
return op.type == 'SpaceToBatchND' and\
len(self.tf_graph[op.name]) == 1 and self.tf_graph[op.name][0].type == 'Conv2D'
return op.type == 'SpaceToBatchND' and \
len(self.tf_graph[op.name]) == 1 and \
self.tf_graph[op.name][0].type == 'Conv2D'
def convert_atrous_conv2d(self, op):
op_def = mace_pb2.OperatorDef()
......@@ -796,10 +810,12 @@ class TFConverter(object):
conv_op = self.tf_graph[op.name][0]
op_def.name = conv_op.name
op_def.type = conv_op.type
self.transpose_filter_tensor[get_input_tensor(conv_op, 1).name] = (0, 1, 3, 2)
self.transpose_filter_tensor[get_input_tensor(conv_op,
1).name] = (0, 1, 3, 2)
if self.device == 'gpu':
op_def.input.extend([op.inputs[0].name])
output_name = self.add_buffer_to_image(get_input_tensor(conv_op, 1).name, "CONV2D_FILTER")
output_name = self.add_buffer_to_image(
get_input_tensor(conv_op, 1).name, "CONV2D_FILTER")
op_def.input.extend([output_name])
else:
op_def.input.extend([get_input_tensor(op, 0).name])
......@@ -807,7 +823,8 @@ class TFConverter(object):
dilation_arg = op_def.arg.add()
dilation_arg.name = 'dilations'
dilation_arg.ints.extend(get_input_tensor(op, 1).eval().astype(np.int32).flat)
dilation_arg.ints.extend(
get_input_tensor(op, 1).eval().astype(np.int32).flat)
padding_arg = op_def.arg.add()
padding_arg.name = 'padding'
padding_values = get_input_tensor(op, 2).eval().astype(np.int32).flat
......@@ -831,18 +848,20 @@ class TFConverter(object):
self.resolved_ops[op.name] = 1
self.resolved_ops[conv_op.name] = 1
if len(self.tf_graph[final_op.name]) == 1 and self.tf_graph[final_op.name][0].type == 'BiasAdd' :
if len(self.tf_graph[final_op.name]
) == 1 and self.tf_graph[final_op.name][0].type == 'BiasAdd':
bias_add_op = self.tf_graph[final_op.name][0]
if self.device == 'gpu':
output_name = self.add_buffer_to_image(get_input_tensor(bias_add_op, 1).name, "ARGUMENT")
output_name = self.add_buffer_to_image(
get_input_tensor(bias_add_op, 1).name, "ARGUMENT")
op_def.input.extend([output_name])
else:
op_def.input.extend([get_input_tensor(bias_add_op, 1).name])
final_op = bias_add_op
self.resolved_ops[bias_add_op.name] = 1
if len(self.tf_graph[final_op.name]) == 1 \
and self.tf_graph[final_op.name][0].type == 'BatchToSpaceND':
if len(self.tf_graph[final_op.name]) == 1 and \
self.tf_graph[final_op.name][0].type == 'BatchToSpaceND':
final_op = self.tf_graph[final_op.name][0]
self.resolved_ops[final_op.name] = 1
self.unused_tensor.add(get_input_tensor(final_op, 1).name)
......@@ -850,8 +869,8 @@ class TFConverter(object):
else:
raise Exception('Convert atrous conv error: no BatchToSpaceND op')
if len(self.tf_graph[final_op.name]) == 1 \
and self.tf_graph[final_op.name][0].type == 'Relu':
if len(self.tf_graph[final_op.name]) == 1 and \
self.tf_graph[final_op.name][0].type == 'Relu':
relu_op = self.tf_graph[final_op.name][0]
op_def.type = "FusedConv2D"
fused_relu_arg = op_def.arg.add()
......@@ -866,8 +885,10 @@ class TFConverter(object):
def is_softmax(self, op):
return op.type == 'Softmax' and \
len(self.tf_parents[op.name]) == 1 and self.tf_parents[op.name][0].type == 'Reshape' and \
len(self.tf_graph[op.name]) == 1 and self.tf_graph[op.name][0].type == 'Reshape'
len(self.tf_parents[op.name]) == 1 and \
self.tf_parents[op.name][0].type == 'Reshape' and \
len(self.tf_graph[op.name]) == 1 and \
self.tf_graph[op.name][0].type == 'Reshape'
def convert_softmax(self, softmax_op):
op_def = self.net_def.op.add()
......@@ -890,7 +911,8 @@ class TFConverter(object):
children_ops = self.tf_graph[squeeze_op.name]
print children_ops
if len(children_ops) > 1 and children_ops[0].type == 'Shape':
self.unused_tensor.add(get_input_tensor(children_ops[1], 0).name)
self.unused_tensor.add(
get_input_tensor(children_ops[1], 0).name)
self.resolved_ops[children_ops[1].name] = 1
else:
op_def.input.extend([parent_reshape_op.inputs[0].name])
......@@ -999,11 +1021,13 @@ class TFConverter(object):
self.convert_global_avg_pooling(op)
self.unused_tensor.add(op.inputs[1].name)
else:
raise Exception('Unknown Op: %s, type: %s' % (op.name, op.type))
#elif op.type in ['']:
raise Exception('Unknown Op: %s, type: %s' % (op.name,
op.type))
# elif op.type in ['']:
# self.convert_normal_op(op)
else:
raise Exception('Unknown Op: %s, type: %s' % (op.name, op.type))
raise Exception('Unknown Op: %s, type: %s' % (op.name,
op.type))
for op in self.tf_ops:
if self.resolved_ops[op.name] == 1:
......@@ -1011,7 +1035,8 @@ class TFConverter(object):
elif op.type == 'Const':
self.convert_tensor(op)
else:
raise Exception('Unknown Op: %s, type: %s' % (op.name, op.type))
raise Exception('Unknown Op: %s, type: %s' % (op.name,
op.type))
if self.device == 'gpu':
self.add_gpu_output_transform(output_nodes)
......@@ -1026,6 +1051,7 @@ class TFConverter(object):
if self.resolved_ops[key] != 1:
print 'Unresolve Op: %s' % key
class Optimizer:
def __init__(self, net_def, device):
self.net_def = net_def
......@@ -1056,14 +1082,17 @@ class Optimizer:
for op in self.net_def.op:
if op.name in resolved_ops:
pass
elif op.type == 'DepthwiseConv2d' and len(op.output) == 1 \
and self.mace_graph[op.output[0]][0].type == 'FoldedBatchNorm':
elif op.type == 'DepthwiseConv2d' and len(op.output) == 1 and \
self.mace_graph[op.output[0]][0].type == 'FoldedBatchNorm':
depthwise_conv2d_op = op
folded_bn_op = self.mace_graph[op.output[0]][0]
weight_buffer_name = self.get_buffer_tensor_name(depthwise_conv2d_op.input[1])
weight_buffer_name = self.get_buffer_tensor_name(
depthwise_conv2d_op.input[1])
weight_tensor = self.tensor_map[weight_buffer_name]
scale_buffer_name = self.get_buffer_tensor_name(folded_bn_op.input[1])
offset_buffer_name = self.get_buffer_tensor_name(folded_bn_op.input[2])
scale_buffer_name = self.get_buffer_tensor_name(
folded_bn_op.input[1])
offset_buffer_name = self.get_buffer_tensor_name(
folded_bn_op.input[2])
scale_tensor = self.tensor_map[scale_buffer_name]
weight_shape = weight_tensor.dims
idx = 0
......@@ -1072,14 +1101,18 @@ class Optimizer:
for ic in range(weight_shape[1]):
for i in range(weight_shape[2]):
for j in range(weight_shape[3]):
weight_tensor.float_data[idx] *= scale_tensor.float_data[ic * weight_shape[0] + oc]
weight_tensor.float_data[
idx] *= scale_tensor.float_data[
ic * weight_shape[0] + oc]
idx += 1
else: # HWIO
for i in range(weight_shape[0]):
for j in range(weight_shape[1]):
for ic in range(weight_shape[2]):
for oc in range(weight_shape[3]):
weight_tensor.float_data[idx] *= scale_tensor.float_data[ic * weight_shape[3] + oc]
weight_tensor.float_data[
idx] *= scale_tensor.float_data[
ic * weight_shape[3] + oc]
idx += 1
new_tensors.append(weight_tensor)
......@@ -1129,6 +1162,7 @@ class Optimizer:
new_net = self.fold_batch_norm()
return new_net
def add_shape_info(input_graph_def, input_nodes, input_shapes):
inputs_replaced_graph = graph_pb2.GraphDef()
for node in input_graph_def.node:
......@@ -1138,7 +1172,8 @@ def add_shape_info(input_graph_def, input_nodes, input_shapes):
placeholder_node = copy.deepcopy(node)
placeholder_node.attr.clear()
placeholder_node.attr['shape'].shape.dim.extend([
tensor_shape_pb2.TensorShapeProto.Dim(size=i) for i in input_shape
tensor_shape_pb2.TensorShapeProto.Dim(size=i)
for i in input_shape
])
placeholder_node.attr['dtype'].CopyFrom(node.attr['dtype'])
inputs_replaced_graph.node.extend([placeholder_node])
......@@ -1147,7 +1182,8 @@ def add_shape_info(input_graph_def, input_nodes, input_shapes):
return inputs_replaced_graph
def convert_to_mace_pb(model_file, input_node, input_shape, output_node, data_type, device, winograd):
def convert_to_mace_pb(model_file, input_node, input_shape, output_node,
data_type, device, winograd):
net_def = mace_pb2.NetDef()
dt = data_type_map[data_type]
......@@ -1165,7 +1201,8 @@ def convert_to_mace_pb(model_file, input_node, input_shape, output_node, data_ty
output_nodes = [x for x in output_node.split(',')]
assert len(input_nodes) == len(input_shapes)
input_graph_def = add_shape_info(input_graph_def, input_nodes, input_shapes)
input_graph_def = add_shape_info(input_graph_def, input_nodes,
input_shapes)
with tf.Session() as session:
with session.graph.as_default() as graph:
tf.import_graph_def(input_graph_def, name="")
......
......@@ -6,8 +6,10 @@ from dsp_ops import DspOps
from mace.python.tools import graph_util
from mace.python.tools.convert_util import tf_dtype_2_mace_dtype
# converter --input ../libcv/quantized_model.pb --output quantized_model_dsp.pb \
# --runtime dsp --input_node input_node --output_node output_node
# converter --input ../libcv/quantized_model.pb \
# --output quantized_model_dsp.pb \
# --runtime dsp --input_node input_node \
# --output_node output_node
padding_mode = {
'NA': 0,
......@@ -18,24 +20,29 @@ padding_mode = {
'SAME_CAFFE': 5
}
def get_tensor_name_from_op(op_name, port):
return op_name + ':' + str(port)
def get_node_from_map(op_map, op_or_tensor_name):
op_name = op_or_tensor_name.split(':')[0]
return op_map[op_name]
def get_op_and_port_from_tensor(tensor_name):
op, port = tensor_name.split(':')
port = int(port)
return op, port
def max_elem_size(tensor):
if len(tensor.shape.as_list()) == 0:
return tensor.dtype.size
else:
return reduce(mul, tensor.shape.as_list()) * tensor.dtype.size
def find_dtype(tensor_dtype):
if tensor_dtype == tf.float32:
return mace_pb2.DT_FLOAT
......@@ -46,20 +53,24 @@ def find_dtype(tensor_dtype):
else:
raise Exception('Unsupported data type: ', tensor_dtype)
def has_padding_and_strides(op):
return 'padding' in op.node_def.attr and 'strides' in op.node_def.attr
def is_node_flatten_reshape(op):
return op.type == 'Reshape' and len(op.outputs[0].shape) == 1
def get_input_tensor(op, index):
input_tensor = op.inputs[index]
if input_tensor.op.type == 'Reshape':
input_tensor = get_input_tensor(input_tensor.op, 0)
return input_tensor
def add_shape_const_node(net_def, op, values, name):
print ('Add const node: ', op.name + '/' + name)
print('Add const node: ', op.name + '/' + name)
tensor = net_def.tensors.add()
node_name = op.name + '/' + name
tensor.name = node_name + ':0'
......@@ -69,8 +80,8 @@ def add_shape_const_node(net_def, op, values, name):
def convert_op_outputs(mace_op_def, tf_op):
mace_op_def.output_type.extend([tf_dtype_2_mace_dtype(output.dtype)
for output in tf_op.outputs])
mace_op_def.output_type.extend(
[tf_dtype_2_mace_dtype(output.dtype) for output in tf_op.outputs])
output_shapes = []
for output in tf_op.outputs:
output_shape = mace_pb2.OutputShape()
......@@ -81,13 +92,13 @@ def convert_op_outputs(mace_op_def, tf_op):
def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
first_op = unresolved_ops[0]
print ('Op: ', first_op.name, first_op.type, first_op.outputs[0].shape)
print('Op: ', first_op.name, first_op.type, first_op.outputs[0].shape)
if first_op.name in resolved_ops:
pass
elif first_op.type == 'Const':
print ('Add const node: ', first_op.name)
print('Add const node: ', first_op.name)
tf_tensor = first_op.outputs[0].eval()
tensor = net_def.tensors.add()
tensor.name = first_op.outputs[0].name
......@@ -112,8 +123,8 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
if len(first_op.outputs) > 0 and first_op.type == 'Dequantize' \
and len(first_op.outputs[0].consumers()) > 0 \
and (first_op.outputs[0].consumers()[0].type == 'SpaceToBatchND' \
or first_op.outputs[0].consumers()[0].type == 'BatchToSpaceND'):
and (first_op.outputs[0].consumers()[0].type == 'SpaceToBatchND' or
first_op.outputs[0].consumers()[0].type == 'BatchToSpaceND'):
input_tensor = first_op.inputs[0]
min_tensor = first_op.inputs[1]
max_tensor = first_op.inputs[2]
......@@ -133,13 +144,17 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
op_def.input.append(input_tensor.name)
op_def.input.extend([t.name for t in s2b_op.inputs[1:]])
op_def.input.extend([min_tensor.name, max_tensor.name])
op_def.out_max_byte_size.extend([max_elem_size(out) for out in quantize_op.outputs])
op_def.out_max_byte_size.extend(
[max_elem_size(out) for out in quantize_op.outputs])
convert_op_outputs(op_def, quantize_op)
elif len(first_op.outputs) > 0 and first_op.type == 'QuantizedReshape' \
and len(first_op.outputs[0].consumers()) > 0 \
and first_op.outputs[0].consumers()[0].type == 'Dequantize' \
and len(first_op.outputs[0].consumers()[0].outputs[0].consumers()) > 0 \
and first_op.outputs[0].consumers()[0].outputs[0].consumers()[0].type == 'Softmax':
elif len(first_op.outputs) > 0 and \
first_op.type == 'QuantizedReshape' and \
len(first_op.outputs[0].consumers()) > 0 and \
first_op.outputs[0].consumers()[0].type == 'Dequantize' and \
len(first_op.outputs[0].consumers()[0].outputs[0].consumers()) \
> 0 and \
first_op.outputs[0].consumers()[0].outputs[0].consumers()[0].type \
== 'Softmax':
input_tensor = first_op.inputs[0]
min_tensor = first_op.inputs[2]
max_tensor = first_op.inputs[3]
......@@ -161,12 +176,14 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
op_def.name = quantize_reshape_op.name
op_def.type = dsp_ops.map_nn_op('QuantizedSoftmax')
op_def.input.extend([input_tensor.name, min_tensor.name, max_tensor.name])
op_def.out_max_byte_size.extend([max_elem_size(out) for out in quantize_reshape_op.outputs])
op_def.input.extend(
[input_tensor.name, min_tensor.name, max_tensor.name])
op_def.out_max_byte_size.extend(
[max_elem_size(out) for out in quantize_reshape_op.outputs])
convert_op_outputs(op_def, quantize_reshape_op)
elif len(first_op.outputs) > 0 and first_op.type == 'Dequantize' \
and len(first_op.outputs[0].consumers()) > 0 \
and first_op.outputs[0].consumers()[0].type == 'Tanh':
elif len(first_op.outputs) > 0 and first_op.type == 'Dequantize' and \
len(first_op.outputs[0].consumers()) > 0 and \
first_op.outputs[0].consumers()[0].type == 'Tanh':
input_tensor = first_op.inputs[0]
min_tensor = first_op.inputs[1]
max_tensor = first_op.inputs[2]
......@@ -186,18 +203,24 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
op_def.name = quantize_op.name
op_def.type = dsp_ops.map_nn_op('Quantized' + tanh_op.type)
op_def.input.extend([input_tensor.name, min_tensor.name, max_tensor.name])
op_def.out_max_byte_size.extend([max_elem_size(out) for out in quantize_op.outputs])
op_def.input.extend(
[input_tensor.name, min_tensor.name, max_tensor.name])
op_def.out_max_byte_size.extend(
[max_elem_size(out) for out in quantize_op.outputs])
convert_op_outputs(op_def, quantize_op)
# tanh is last op
else:
op_def.name = tanh_op.name + '/QuantizedTanh'
op_def.type = dsp_ops.map_nn_op('Quantized' + tanh_op.type)
op_def.input.extend([input_tensor.name, min_tensor.name, max_tensor.name])
op_def.out_max_byte_size.extend([max_elem_size(input_tensor),
op_def.input.extend(
[input_tensor.name, min_tensor.name, max_tensor.name])
op_def.out_max_byte_size.extend([
max_elem_size(input_tensor),
max_elem_size(min_tensor),
max_elem_size(max_tensor)])
op_def.output_type.extend([mace_pb2.DT_UINT8, mace_pb2.DT_FLOAT, mace_pb2.DT_FLOAT])
max_elem_size(max_tensor)
])
op_def.output_type.extend(
[mace_pb2.DT_UINT8, mace_pb2.DT_FLOAT, mace_pb2.DT_FLOAT])
output_shapes = []
for output in first_op.inputs:
output_shape = mace_pb2.OutputShape()
......@@ -208,31 +231,39 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
new_tanh_op_def = net_def.op.add()
new_tanh_op_def.name = tanh_op.name
new_tanh_op_def.type = dsp_ops.map_nn_op('Dequantize')
new_tanh_op_def.input.extend([get_tensor_name_from_op(op_def.name, 0),
new_tanh_op_def.input.extend([
get_tensor_name_from_op(op_def.name, 0),
get_tensor_name_from_op(op_def.name, 1),
get_tensor_name_from_op(op_def.name, 2)])
new_tanh_op_def.out_max_byte_size.extend([max_elem_size(tanh_op.outputs[0])])
get_tensor_name_from_op(op_def.name, 2)
])
new_tanh_op_def.out_max_byte_size.extend(
[max_elem_size(tanh_op.outputs[0])])
convert_op_outputs(new_tanh_op_def, tanh_op)
elif has_padding_and_strides(first_op):
op_def.padding = padding_mode[first_op.get_attr('padding')]
op_def.input.extend([t.name for t in first_op.inputs])
if 'ksize' in first_op.node_def.attr:
ksize = first_op.get_attr('ksize')
ksize_tensor = add_shape_const_node(net_def, first_op, ksize, 'ksize')
ksize_tensor = add_shape_const_node(net_def, first_op, ksize,
'ksize')
op_def.input.extend([ksize_tensor])
strides = first_op.get_attr('strides')
strides_tensor = add_shape_const_node(net_def, first_op, strides, 'strides')
strides_tensor = add_shape_const_node(net_def, first_op, strides,
'strides')
op_def.input.extend([strides_tensor])
op_def.out_max_byte_size.extend([max_elem_size(out) for out in first_op.outputs])
op_def.out_max_byte_size.extend(
[max_elem_size(out) for out in first_op.outputs])
convert_op_outputs(op_def, first_op)
elif is_node_flatten_reshape(first_op):
op_def.type = 'Flatten'
op_def.input.extend([t.name for t in first_op.inputs])
op_def.out_max_byte_size.extend([max_elem_size(out) for out in first_op.outputs])
op_def.out_max_byte_size.extend(
[max_elem_size(out) for out in first_op.outputs])
convert_op_outputs(op_def, first_op)
elif dsp_ops.has_op(first_op.type):
op_def.input.extend([t.name for t in first_op.inputs])
op_def.out_max_byte_size.extend([max_elem_size(out) for out in first_op.outputs])
op_def.out_max_byte_size.extend(
[max_elem_size(out) for out in first_op.outputs])
convert_op_outputs(op_def, first_op)
else:
raise Exception('Unsupported op: ', first_op)
......@@ -241,12 +272,14 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
del unresolved_ops[0]
def add_output_node(net_def, output_node):
op_def = net_def.op.add()
op_def.name = '__output__'
op_def.type = 'OUTPUT'
op_def.input.extend([get_tensor_name_from_op(output_node, 0)])
def reverse_batch_to_space_and_biasadd(net_def):
tensor_map = {}
for tensor in net_def.tensors:
......@@ -272,42 +305,65 @@ def reverse_batch_to_space_and_biasadd(net_def):
success = False
if op.type == 'Requantize_32to8':
biasadd_requantize_op = op
biasadd_op = get_node_from_map(op_map, biasadd_requantize_op.input[0])
biasadd_op = get_node_from_map(op_map,
biasadd_requantize_op.input[0])
if biasadd_op.type == 'QuantizedBiasAdd_8p8to32':
b2s_op = get_node_from_map(op_map, biasadd_op.input[0])
if b2s_op.type == 'QuantizedBatchToSpaceND_8':
conv_requantize_op = get_node_from_map(op_map, b2s_op.input[0])
conv_op = get_node_from_map(op_map, conv_requantize_op.input[0])
conv_requantize_op = get_node_from_map(
op_map, b2s_op.input[0])
conv_op = get_node_from_map(op_map,
conv_requantize_op.input[0])
if conv_op.type == 'QuantizedConv2d_8x8to32':
new_biasadd_op = mace_pb2.OperatorDef()
new_biasadd_op.CopyFrom(biasadd_op)
new_biasadd_op.input[0] = get_tensor_name_from_op(conv_requantize_op.name, 0)
new_biasadd_op.input[2] = get_tensor_name_from_op(conv_requantize_op.name, 1)
new_biasadd_op.input[3] = get_tensor_name_from_op(conv_requantize_op.name, 2)
new_biasadd_op.out_max_byte_size[0] = conv_requantize_op.out_max_byte_size[0] * 4
new_biasadd_op.input[0] = get_tensor_name_from_op(
conv_requantize_op.name, 0)
new_biasadd_op.input[2] = get_tensor_name_from_op(
conv_requantize_op.name, 1)
new_biasadd_op.input[3] = get_tensor_name_from_op(
conv_requantize_op.name, 2)
new_biasadd_op.out_max_byte_size[
0] = conv_requantize_op.out_max_byte_size[0] * 4
new_biasadd_requantize_op = mace_pb2.OperatorDef()
new_biasadd_requantize_op.CopyFrom(biasadd_requantize_op)
new_biasadd_requantize_op.out_max_byte_size[0] = new_biasadd_op.out_max_byte_size[0] / 4
new_biasadd_requantize_op.CopyFrom(
biasadd_requantize_op)
new_biasadd_requantize_op.out_max_byte_size[
0] = new_biasadd_op.out_max_byte_size[0] / 4
new_b2s_op = mace_pb2.OperatorDef()
new_b2s_op.CopyFrom(b2s_op)
new_b2s_op.input[0] = get_tensor_name_from_op(biasadd_requantize_op.name, 0)
new_b2s_op.input[3] = get_tensor_name_from_op(biasadd_requantize_op.name, 1)
new_b2s_op.input[4] = get_tensor_name_from_op(biasadd_requantize_op.name, 2)
new_ops.extend([new_biasadd_op, new_biasadd_requantize_op, new_b2s_op])
skip_ops = skip_ops.union([biasadd_op.name, biasadd_requantize_op.name, b2s_op.name])
new_b2s_op.input[0] = get_tensor_name_from_op(
biasadd_requantize_op.name, 0)
new_b2s_op.input[3] = get_tensor_name_from_op(
biasadd_requantize_op.name, 1)
new_b2s_op.input[4] = get_tensor_name_from_op(
biasadd_requantize_op.name, 2)
new_ops.extend([
new_biasadd_op, new_biasadd_requantize_op,
new_b2s_op
])
skip_ops = skip_ops.union([
biasadd_op.name, biasadd_requantize_op.name,
b2s_op.name
])
visited_ops.add(op.name)
follow_ops = consumers[get_tensor_name_from_op(biasadd_requantize_op.name, 0)]
follow_ops = consumers[get_tensor_name_from_op(
biasadd_requantize_op.name, 0)]
for follow_op in follow_ops:
new_follow_op = mace_pb2.OperatorDef()
new_follow_op.CopyFrom(follow_op)
for i in xrange(len(follow_op.input)):
for k in xrange(3):
if new_follow_op.input[i] == get_tensor_name_from_op(biasadd_requantize_op.name, k):
new_follow_op.input[i] = get_tensor_name_from_op(b2s_op.name, k)
if new_follow_op.input[
i] == get_tensor_name_from_op(
biasadd_requantize_op.name, k):
new_follow_op.input[
i] = get_tensor_name_from_op(
b2s_op.name, k)
new_ops.append(new_follow_op)
skip_ops.add(follow_op.name)
visited_ops.add(follow_op.name)
......@@ -321,6 +377,7 @@ def reverse_batch_to_space_and_biasadd(net_def):
return new_net_def
def add_node_id(net_def):
node_id_counter = 0
node_id_map = {}
......@@ -343,9 +400,12 @@ def add_node_id(net_def):
return net_def
def add_input_output_info(net_def, input_node, output_node, graph, dtype):
input_tensor = graph.get_tensor_by_name(get_tensor_name_from_op(input_node, 0))
output_tensor = graph.get_tensor_by_name(get_tensor_name_from_op(output_node, 0))
input_tensor = graph.get_tensor_by_name(
get_tensor_name_from_op(input_node, 0))
output_tensor = graph.get_tensor_by_name(
get_tensor_name_from_op(output_node, 0))
input_info = net_def.input_info.add()
input_info.dims.extend(input_tensor.shape.as_list())
......@@ -353,7 +413,7 @@ def add_input_output_info(net_def, input_node, output_node, graph, dtype):
if dtype == mace_pb2.DT_UINT8:
for i in xrange(2):
input_info = net_def.input_info.add()
input_info.dims.extend([1,1,1,1])
input_info.dims.extend([1, 1, 1, 1])
input_info.data_type = mace_pb2.DT_FLOAT
output_info = net_def.output_info.add()
......@@ -362,11 +422,12 @@ def add_input_output_info(net_def, input_node, output_node, graph, dtype):
if dtype == mace_pb2.DT_UINT8:
for i in xrange(2):
output_info = net_def.output_info.add()
output_info.dims.extend([1,1,1,1])
output_info.dims.extend([1, 1, 1, 1])
output_info.data_type = mace_pb2.DT_FLOAT
return net_def
def fuse_quantize(net_def, input_node, output_node):
tensor_map = {}
for tensor in net_def.tensors:
......@@ -397,18 +458,24 @@ def fuse_quantize(net_def, input_node, output_node):
elif o.type == 'Quantize':
quantize_op = o
if quantize_op is not None:
minf_op, maxf_op = consumers[get_tensor_name_from_op(flatten_op.name, 0)]
skip_ops = skip_ops.union([flatten_op.name, minf_op.name, maxf_op.name])
skip_tensors = skip_tensors.union([flatten_op.input[1], minf_op.input[1], maxf_op.input[1]])
minf_op, maxf_op = consumers[get_tensor_name_from_op(
flatten_op.name, 0)]
skip_ops = skip_ops.union(
[flatten_op.name, minf_op.name, maxf_op.name])
skip_tensors = skip_tensors.union(
[flatten_op.input[1], minf_op.input[1], maxf_op.input[1]])
quantize_op.type = 'AutoQuantize'
del quantize_op.input[1:]
new_net_def = mace_pb2.NetDef()
new_net_def.tensors.extend([tensor for tensor in net_def.tensors if tensor.name not in skip_tensors])
new_net_def.tensors.extend([
tensor for tensor in net_def.tensors if tensor.name not in skip_tensors
])
new_net_def.op.extend([op for op in net_def.op if op.name not in skip_ops])
new_net_def.op.extend(new_ops)
return new_net_def
def convert_to_mace_pb(model_file, input_node, output_node, dsp_mode):
"""
nnlib does not have batch norm, so use tensorflow optimizer to fold
......@@ -432,12 +499,14 @@ def convert_to_mace_pb(model_file, input_node, output_node, dsp_mode):
# convert const node
unresolved_ops = [op for op in ops if op.type == 'Const']
while len(unresolved_ops) > 0:
convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops)
convert_ops(unresolved_ops, resolved_ops, net_def, output_node,
dsp_ops)
# convert op node
unresolved_ops = [op for op in ops if op.type != 'Const']
while len(unresolved_ops) > 0:
convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops)
convert_ops(unresolved_ops, resolved_ops, net_def, output_node,
dsp_ops)
add_output_node(net_def, output_node)
net_def = reverse_batch_to_space_and_biasadd(net_def)
......@@ -447,11 +516,11 @@ def convert_to_mace_pb(model_file, input_node, output_node, dsp_mode):
net_def_with_node_id = add_node_id(sorted_net_def)
dtype = mace_pb2.DT_FLOAT
final_net_def = add_input_output_info(net_def_with_node_id, input_node, output_node, graph, dtype)
final_net_def = add_input_output_info(
net_def_with_node_id, input_node, output_node, graph, dtype)
arg = final_net_def.arg.add()
arg.name = 'dsp_mode'
arg.i = dsp_mode
return final_net_def
......@@ -10,18 +10,21 @@ from tensorflow import gfile
FLAGS = None
def hist_inc(hist, key):
if key in hist:
hist[key] += 1
else:
hist[key] = 1
def to_int_list(long_list):
int_list = []
for value in long_list:
int_list.append(int(value))
return int_list
def main(unused_args):
if not FLAGS.input or not gfile.Exists(FLAGS.input):
print('Input graph file ' + FLAGS.input + ' does not exist!')
......@@ -49,7 +52,9 @@ def main(unused_args):
tensor = output.eval()
tensor_shape = list(tensor.shape)
tensor_shapes[tensor_name] = tensor_shape
print("Const %s: %s, %d" % (tensor_name, tensor_shape, functools.reduce(operator.mul, tensor_shape, 1)))
print("Const %s: %s, %d" %
(tensor_name, tensor_shape,
functools.reduce(operator.mul, tensor_shape, 1)))
if len(tensor_shape) == 1 and tensor_shape[0] < 10:
tensor_values[tensor_name] = list(tensor)
......@@ -65,11 +70,16 @@ def main(unused_args):
if input_name.endswith('weights/read:0'):
ksize = input.shape.as_list()
break
if input_name.endswith('weights:0') and input_name in tensor_shapes:
if input_name.endswith(
'weights:0') and input_name in tensor_shapes:
ksize = tensor_shapes[input_name]
break
print('%s(padding=%s, strides=%s, ksize=%s, format=%s) %s => %s' % (op.type, padding, strides, ksize, data_format, op.inputs[0].shape, op.outputs[0].shape))
key = '%s(padding=%s, strides=%s, ksize=%s, format=%s)' % (op.type, padding, strides, ksize, data_format)
print(
'%s(padding=%s, strides=%s, ksize=%s, format=%s) %s => %s'
% (op.type, padding, strides, ksize, data_format,
op.inputs[0].shape, op.outputs[0].shape))
key = '%s(padding=%s, strides=%s, ksize=%s, format=%s)' % (
op.type, padding, strides, ksize, data_format)
hist_inc(stats, key)
elif op.type in ['FusedResizeAndPadConv2D']:
padding = op.get_attr('padding')
......@@ -78,20 +88,25 @@ def main(unused_args):
ksize = 'Unknown'
for input in op.inputs:
input_name = input.name
if input_name.endswith('weights:0') and input_name in tensor_shapes:
if input_name.endswith(
'weights:0') and input_name in tensor_shapes:
ksize = tensor_shapes[input_name]
break
key = '%s(padding=%s, strides=%s, ksize=%s, resize_align_corners=%s)' % (op.type, padding, strides, ksize, resize_align_corners)
key = '%s(padding=%s, strides=%s, ksize=%s, ' \
'resize_align_corners=%s)' % (op.type, padding, strides,
ksize, resize_align_corners)
hist_inc(stats, key)
elif op.type in ['ResizeBilinear']:
align_corners = op.get_attr('align_corners')
size = 'Unknown'
for input in op.inputs:
input_name = input.name
if input_name.endswith('size:0') and input_name in tensor_values:
if input_name.endswith(
'size:0') and input_name in tensor_values:
size = tensor_values[input_name]
break
key = '%s(size=%s, align_corners=%s)' % (op.type, size, align_corners)
key = '%s(size=%s, align_corners=%s)' % (op.type, size,
align_corners)
print(key)
hist_inc(stats, key)
elif op.type in ['AvgPool', 'MaxPool']:
......@@ -99,38 +114,47 @@ def main(unused_args):
strides = to_int_list(op.get_attr('strides'))
ksize = to_int_list(op.get_attr('ksize'))
data_format = op.get_attr('data_format')
key = '%s(padding=%s, strides=%s, ksize=%s)' % (op.type, padding, strides, ksize)
key = '%s(padding=%s, strides=%s, ksize=%s)' % (op.type,
padding,
strides, ksize)
hist_inc(stats, key)
elif op.type in ['SpaceToBatchND', 'BatchToSpaceND']:
block_shape = 'Unknown'
for input in op.inputs:
input_name = input.name
if input_name.endswith('block_shape:0') and input_name in tensor_values:
if input_name.endswith(
'block_shape:0') and input_name in tensor_values:
block_shape = tensor_values[input_name]
break
paddings = 'Unknown'
for input in op.inputs:
input_name = input.name
if input_name.endswith('paddings:0') and input_name in tensor_values:
if input_name.endswith(
'paddings:0') and input_name in tensor_values:
paddings = tensor_values[input_name]
break
crops = 'Unknown'
for input in op.inputs:
input_name = input.name
if input_name.endswith('crops:0') and input_name in tensor_values:
if input_name.endswith(
'crops:0') and input_name in tensor_values:
paddings = tensor_values[input_name]
break
if op.type == 'SpaceToBatchND':
key = '%s(block_shape=%s, paddings=%s)' % (op.type, block_shape, paddings)
key = '%s(block_shape=%s, paddings=%s)' % (op.type,
block_shape,
paddings)
else:
key = '%s(block_shape=%s, crops=%s)' % (op.type, block_shape, crops)
key = '%s(block_shape=%s, crops=%s)' % (op.type,
block_shape, crops)
print(key)
hist_inc(stats, key)
elif op.type == 'Pad':
paddings = 'Unknown'
for input in op.inputs:
input_name = input.name
if input_name.endswith('paddings:0') and input_name in tensor_values:
if input_name.endswith(
'paddings:0') and input_name in tensor_values:
paddings = tensor_values[input_name]
break
key = '%s(paddings=%s)' % (op.type, paddings)
......@@ -142,6 +166,7 @@ def main(unused_args):
for key, value in sorted(six.iteritems(stats)):
print('%s: %d' % (key, value))
def parse_args():
'''Parses command line arguments.'''
parser = argparse.ArgumentParser()
......@@ -152,6 +177,7 @@ def parse_args():
help='TensorFlow \'GraphDef\' file to load.')
return parser.parse_known_args()
if __name__ == '__main__':
FLAGS, unparsed = parse_args()
main(unused_args=[sys.argv[0]] + unparsed)
......@@ -7,7 +7,6 @@
# --target=//mace/ops:ops_test
# --stdout_processor=stdout_processor
import argparse
import random
import re
......@@ -15,15 +14,18 @@ import sys
import sh_commands
def stdout_processor(stdout, device_properties, abi):
pass
def ops_test_stdout_processor(stdout, device_properties, abi):
stdout_lines = stdout.split("\n")
for line in stdout_lines:
if "Aborted" in line or "FAILED" in line:
raise Exception("Command failed")
def ops_benchmark_stdout_processor(stdout, device_properties, abi):
stdout_lines = stdout.split("\n")
metrics = {}
......@@ -33,17 +35,20 @@ def ops_benchmark_stdout_processor(stdout, device_properties, abi):
line = line.strip()
parts = line.split()
if len(parts) == 5 and parts[0].startswith("BM_"):
metrics["%s.time_ms" % parts[0]] = str(float(parts[1])/1e6)
metrics["%s.time_ms" % parts[0]] = str(float(parts[1]) / 1e6)
metrics["%s.input_mb_per_sec" % parts[0]] = parts[3]
metrics["%s.gmacc_per_sec" % parts[0]] = parts[4]
platform = device_properties["ro.board.platform"].replace(" ", "-")
model = device_properties["ro.product.model"].replace(" ", "-")
tags = {"ro.board.platform": platform,
tags = {
"ro.board.platform": platform,
"ro.product.model": model,
"abi": abi}
sh_commands.falcon_push_metrics(metrics, tags=tags,
endpoint="mace_ops_benchmark")
"abi": abi
}
sh_commands.falcon_push_metrics(
metrics, tags=tags, endpoint="mace_ops_benchmark")
def parse_args():
"""Parses command line arguments."""
......@@ -57,22 +62,16 @@ def parse_args():
"--target_socs",
type=str,
default="all",
help="SoCs(ro.board.platform) to build, comma seperated list or all/random")
help="SoCs (ro.board.platform from getprop) to build, "
"comma seperated list or all/random")
parser.add_argument(
"--target",
type=str,
default="//...",
help="Bazel target to build")
"--target", type=str, default="//...", help="Bazel target to build")
parser.add_argument(
"--run_target",
type=bool,
default=False,
help="Whether to run the target")
parser.add_argument(
"--args",
type=str,
default="",
help="Command args")
parser.add_argument("--args", type=str, default="", help="Command args")
parser.add_argument(
"--stdout_processor",
type=str,
......@@ -80,6 +79,7 @@ def parse_args():
help="Stdout processing function, default: stdout_processor")
return parser.parse_known_args()
def main(unused_args):
target_socs = None
if FLAGS.target_socs != "all" and FLAGS.target_socs != "random":
......@@ -101,17 +101,25 @@ def main(unused_args):
sh_commands.bazel_build(target, abi=target_abi)
if FLAGS.run_target:
for serialno in target_devices:
if target_abi not in set(sh_commands.adb_supported_abis(serialno)):
print("Skip device %s which does not support ABI %s" % (serialno, target_abi))
if target_abi not in set(
sh_commands.adb_supported_abis(serialno)):
print("Skip device %s which does not support ABI %s" %
(serialno, target_abi))
continue
stdouts = sh_commands.adb_run(serialno, host_bin_path, bin_name,
stdouts = sh_commands.adb_run(
serialno,
host_bin_path,
bin_name,
args=FLAGS.args,
opencl_profiling=1,
vlog_level=0,
device_bin_path="/data/local/tmp/mace",
out_of_range_check=1)
device_properties = sh_commands.adb_getprop_by_serialno(serialno)
globals()[FLAGS.stdout_processor](stdouts, device_properties, target_abi)
device_properties = sh_commands.adb_getprop_by_serialno(
serialno)
globals()[FLAGS.stdout_processor](stdouts, device_properties,
target_abi)
if __name__ == "__main__":
FLAGS, unparsed = parse_args()
......
#-*- coding:utf8 -*-
import json
import socket
import itertools
import json, socket, itertools
class FalconCli(object):
def __init__(self, addr, debug=True, buf_size=1000):
self.socket_ = socket.create_connection(addr)
self.stream = self.socket_.makefile()
......@@ -16,16 +16,19 @@ class FalconCli(object):
self.stream.close()
@classmethod
def connect(cls, server="transfer.falcon.miliao.srv", port=8433, debug=True, buf_size=1000):
def connect(cls,
server="transfer.falcon.miliao.srv",
port=8433,
debug=True,
buf_size=1000):
try:
return FalconCli((server, port), debug, buf_size)
except socket.error, exc:
print "error: connect to %s:%s error: %s" %(server, port, exc)
print "error: connect to %s:%s error: %s" % (server, port, exc)
def call(self, name, *params):
request = dict(id=next(self.id_counter),
params=list(params),
method=name)
request = dict(
id=next(self.id_counter), params=list(params), method=name)
payload = json.dumps(request).encode()
if self.debug:
print "--> req:", payload
......@@ -49,7 +52,7 @@ class FalconCli(object):
resp = []
while True:
buf = lines[s:s+self.buf_size]
buf = lines[s:s + self.buf_size]
s = s + self.buf_size
if len(buf) == 0:
break
......@@ -57,4 +60,3 @@ class FalconCli(object):
resp.append(r)
return resp
......@@ -11,13 +11,16 @@ import re
# --input_file input_file
#
def generate_data(name, shape):
np.random.seed()
data = np.random.random(shape) * 2 - 1
input_file_name = FLAGS.input_file + "_" + re.sub('[^0-9a-zA-Z]+', '_', name)
input_file_name = FLAGS.input_file + "_" + re.sub('[^0-9a-zA-Z]+', '_',
name)
print 'Generate input file: ', input_file_name
data.astype(np.float32).tofile(input_file_name)
def main(unused_args):
input_names = [name for name in FLAGS.input_node.split(',')]
input_shapes = [shape for shape in FLAGS.input_shape.split(':')]
......@@ -27,29 +30,21 @@ def main(unused_args):
generate_data(input_names[i], shape)
print "Generate input file done."
def parse_args():
"""Parses command line arguments."""
parser = argparse.ArgumentParser()
parser.register("type", "bool", lambda v: v.lower() == "true")
parser.add_argument(
"--input_file",
type=str,
default="",
help="input file.")
"--input_file", type=str, default="", help="input file.")
parser.add_argument(
"--input_node",
type=str,
default="input_node",
help="input node")
"--input_node", type=str, default="input_node", help="input node")
parser.add_argument(
"--input_shape",
type=str,
default="1,64,64,3",
help="input shape.")
"--input_shape", type=str, default="1,64,64,3", help="input shape.")
return parser.parse_known_args()
if __name__ == '__main__':
FLAGS, unparsed = parse_args()
main(unused_args=[sys.argv[0]] + unparsed)
......@@ -34,7 +34,8 @@ def run_command(command):
print("Stderr msg:\n{}".format(err))
if result.returncode != 0:
raise Exception("Exit not 0 from bash with code: {}, command: {}".format(
raise Exception(
"Exit not 0 from bash with code: {}, command: {}".format(
result.returncode, command))
......@@ -63,10 +64,12 @@ def generate_version_code():
command = "bash tools/generate_version_code.sh"
run_command(command)
def generate_opencl_source_code():
command = "bash tools/generate_opencl_code.sh source"
run_command(command)
def generate_opencl_binay_code(target_soc, model_output_dirs, pull_or_not):
cl_bin_dirs = []
for d in model_output_dirs:
......@@ -79,6 +82,7 @@ def generate_opencl_binay_code(target_soc, model_output_dirs, pull_or_not):
'binary', target_soc, cl_bin_dirs_str, int(pull_or_not))
run_command(command)
def generate_tuning_param_code(target_soc, model_output_dirs, pull_or_not):
cl_bin_dirs = []
for d in model_output_dirs:
......@@ -91,20 +95,24 @@ def generate_tuning_param_code(target_soc, model_output_dirs, pull_or_not):
target_soc, cl_bin_dirs_str, int(pull_or_not))
run_command(command)
def generate_code(target_soc, model_output_dirs, pull_or_not):
generate_opencl_binay_code(target_soc, model_output_dirs, pull_or_not)
generate_tuning_param_code(target_soc, model_output_dirs, pull_or_not)
def clear_env(target_soc):
command = "bash tools/clear_env.sh {}".format(target_soc)
run_command(command)
def input_file_name(input_name):
return os.environ['INPUT_FILE_NAME'] + '_' + \
re.sub('[^0-9a-zA-Z]+', '_', input_name)
def generate_random_input(target_soc, model_output_dir,
input_names, input_files):
def generate_random_input(target_soc, model_output_dir, input_names,
input_files):
generate_data_or_not = True
command = "bash tools/validate_tools.sh {} {} {}".format(
target_soc, model_output_dir, int(generate_data_or_not))
......@@ -122,16 +130,19 @@ def generate_random_input(target_soc, model_output_dir,
else:
input_name_list.append(input_names)
if len(input_file_list) != len(input_name_list):
raise Exception('If input_files set, the input files should match the input names.')
raise Exception('If input_files set, the input files should '
'match the input names.')
for i in range(len(input_file_list)):
if input_file_list[i] is not None:
dst_input_file = model_output_dir + '/' + input_file_name(input_name_list[i])
dst_input_file = model_output_dir + '/' + input_file_name(
input_name_list[i])
if input_file_list[i].startswith("http://") or \
input_file_list[i].startswith("https://"):
urllib.urlretrieve(input_file_list[i], dst_input_file)
else:
shutil.copy(input_file_list[i], dst_input_file)
def generate_model_code():
command = "bash tools/generate_model_code.sh"
run_command(command)
......@@ -155,10 +166,17 @@ def tuning_run(model_name,
# TODO(yejianwu) refactoring the hackish code
stdout_buff = []
process_output = sh_commands.make_output_processor(stdout_buff)
p = sh.bash("tools/tuning_run.sh", target_soc, model_output_dir,
running_round, int(tuning),
restart_round, option_args, _out=process_output,
_bg=True, _err_to_out=True)
p = sh.bash(
"tools/tuning_run.sh",
target_soc,
model_output_dir,
running_round,
int(tuning),
restart_round,
option_args,
_out=process_output,
_bg=True,
_err_to_out=True)
p.wait()
metrics = {}
for line in stdout_buff:
......@@ -166,18 +184,23 @@ def tuning_run(model_name,
parts = line.split()
if len(parts) == 6 and parts[0].startswith("time"):
metrics["%s.create_net_ms" % model_name] = str(float(parts[1]))
metrics["%s.mace_engine_ctor_ms" % model_name] = str(float(parts[2]))
metrics["%s.mace_engine_ctor_ms" % model_name] = str(
float(parts[2]))
metrics["%s.init_ms" % model_name] = str(float(parts[3]))
metrics["%s.warmup_ms" % model_name] = str(float(parts[4]))
if float(parts[5]) > 0:
metrics["%s.avg_latency_ms" % model_name] = str(float(parts[5]))
tags = {"ro.board.platform": target_soc,
metrics["%s.avg_latency_ms" % model_name] = str(
float(parts[5]))
tags = {
"ro.board.platform": target_soc,
"abi": target_abi,
# "runtime": target_runtime, # TODO(yejianwu) Add the actual runtime
"round": running_round, # TODO(yejianwu) change this to source/binary
"tuning": tuning}
sh_commands.falcon_push_metrics(metrics, endpoint="mace_model_benchmark",
tags=tags)
"tuning": tuning
}
sh_commands.falcon_push_metrics(
metrics, endpoint="mace_model_benchmark", tags=tags)
def benchmark_model(target_soc, model_output_dir, option_args=''):
command = "bash tools/benchmark.sh {} {} \"{}\"".format(
......@@ -188,8 +211,8 @@ def benchmark_model(target_soc, model_output_dir, option_args=''):
def run_model(model_name, target_runtime, target_abi, target_soc,
model_output_dir, running_round, restart_round, option_args):
tuning_run(model_name, target_runtime, target_abi, target_soc,
model_output_dir, running_round, False,
restart_round, option_args)
model_output_dir, running_round, False, restart_round,
option_args)
def generate_production_code(target_soc, model_output_dirs, pull_or_not):
......@@ -251,8 +274,8 @@ def merge_libs_and_tuning_results(target_soc, output_dir, model_output_dirs):
build_production_code()
model_output_dirs_str = ",".join(model_output_dirs)
command = "bash tools/merge_libs.sh {} {} {}".format(target_soc, output_dir,
model_output_dirs_str)
command = "bash tools/merge_libs.sh {} {} {}".format(
target_soc, output_dir, model_output_dirs_str)
run_command(command)
......@@ -260,6 +283,7 @@ def packaging_lib_file(output_dir):
command = "bash tools/packaging_lib.sh {}".format(output_dir)
run_command(command)
def download_model_files(model_file_path,
model_output_dir,
weight_file_path=""):
......@@ -270,10 +294,9 @@ def download_model_files(model_file_path,
if weight_file_path.startswith("http://") or \
weight_file_path.startswith("https://"):
os.environ[
"WEIGHT_FILE_PATH"] = model_output_dir + "/model.caffemodel"
urllib.urlretrieve(weight_file_path,
os.environ["WEIGHT_FILE_PATH"])
os.environ["WEIGHT_FILE_PATH"] = model_output_dir + "/model.caffemodel"
urllib.urlretrieve(weight_file_path, os.environ["WEIGHT_FILE_PATH"])
def md5sum(str):
md5 = hashlib.md5()
......@@ -306,7 +329,10 @@ def parse_args():
default=10,
help="The model throughput test running seconds.")
parser.add_argument(
"--restart_round", type=int, default=1, help="The model restart round.")
"--restart_round",
type=int,
default=1,
help="The model restart round.")
parser.add_argument(
"--tuning", type="bool", default="true", help="Tune opencl params.")
parser.add_argument(
......@@ -321,14 +347,16 @@ def parse_args():
help="SoCs to build, comma seperated list (getprop ro.board.platform)")
return parser.parse_known_args()
def set_environment(configs):
os.environ["EMBED_MODEL_DATA"] = str(configs["embed_model_data"])
os.environ["VLOG_LEVEL"] = str(configs["vlog_level"])
os.environ["PROJECT_NAME"] = os.path.splitext(os.path.basename(
FLAGS.config))[0]
os.environ["PROJECT_NAME"] = os.path.splitext(
os.path.basename(FLAGS.config))[0]
os.environ['INPUT_FILE_NAME'] = "model_input"
os.environ['OUTPUT_FILE_NAME'] = "model_out"
def main(unused_args):
configs = parse_model_configs()
......@@ -343,13 +371,16 @@ def main(unused_args):
if not os.path.exists(FLAGS.output_dir):
os.makedirs(FLAGS.output_dir)
elif os.path.exists(os.path.join(FLAGS.output_dir, "libmace")):
shutil.rmtree(os.path.join(FLAGS.output_dir, os.environ["PROJECT_NAME"]))
os.makedirs(os.path.join(FLAGS.output_dir, os.environ["PROJECT_NAME"]))
shutil.rmtree(
os.path.join(FLAGS.output_dir, os.environ["PROJECT_NAME"]))
os.makedirs(
os.path.join(FLAGS.output_dir, os.environ["PROJECT_NAME"]))
generate_version_code()
generate_opencl_source_code()
option_args = ' '.join([arg for arg in unused_args if arg.startswith('--')])
option_args = ' '.join(
[arg for arg in unused_args if arg.startswith('--')])
available_socs = sh_commands.adb_get_all_socs()
target_socs = available_socs
......@@ -362,10 +393,10 @@ def main(unused_args):
target_socs = target_socs & socs
missing_socs = socs.difference(target_socs)
if len(missing_socs) > 0:
print("Error: devices with SoCs are not connected %s" % missing_socs)
print(
"Error: devices with SoCs are not connected %s" % missing_socs)
exit(1)
for target_soc in target_socs:
for target_abi in configs["target_abis"]:
global_runtime = get_global_runtime(configs)
......@@ -373,28 +404,27 @@ def main(unused_args):
os.environ["TARGET_ABI"] = target_abi
model_output_dirs = []
for model_name in configs["models"]:
print '=======================', model_name, '======================='
print '===================', model_name, '==================='
# Transfer params by environment
os.environ["MODEL_TAG"] = model_name
model_config = configs["models"][model_name]
input_file_list = model_config.get("validation_inputs_data", [])
input_file_list = model_config.get("validation_inputs_data",
[])
for key in model_config:
if key in ['input_nodes', 'output_nodes'] and isinstance(
model_config[key], list):
os.environ[key.upper()] = ",".join(model_config[key])
elif key in ['input_shapes', 'output_shapes'] and isinstance(
model_config[key], list):
elif key in ['input_shapes', 'output_shapes'
] and isinstance(model_config[key], list):
os.environ[key.upper()] = ":".join(model_config[key])
else:
os.environ[key.upper()] = str(model_config[key])
# Create model build directory
model_path_digest = md5sum(model_config["model_file_path"])
model_output_dir = "%s/%s/%s/%s/%s/%s/%s" % (FLAGS.output_dir,
os.environ["PROJECT_NAME"],
"build", model_name,
model_path_digest,
target_soc, target_abi)
model_output_dir = "%s/%s/%s/%s/%s/%s/%s" % (
FLAGS.output_dir, os.environ["PROJECT_NAME"], "build",
model_name, model_path_digest, target_soc, target_abi)
model_output_dirs.append(model_output_dir)
if FLAGS.mode == "build" or FLAGS.mode == "all":
......@@ -404,22 +434,27 @@ def main(unused_args):
clear_env(target_soc)
download_model_files(model_config["model_file_path"],
model_output_dir, model_config.get("weight_file_path", ""))
model_output_dir,
model_config.get("weight_file_path", ""))
if FLAGS.mode == "build" or FLAGS.mode == "run" or FLAGS.mode == "validate"\
or FLAGS.mode == "benchmark" or FLAGS.mode == "all":
if FLAGS.mode == "build" or FLAGS.mode == "run" or \
FLAGS.mode == "validate" or \
FLAGS.mode == "benchmark" or FLAGS.mode == "all":
generate_random_input(target_soc, model_output_dir,
model_config['input_nodes'], input_file_list)
model_config['input_nodes'],
input_file_list)
if FLAGS.mode == "build" or FLAGS.mode == "all":
generate_model_code()
build_mace_run_prod(model_name, global_runtime, target_abi,
target_soc, model_output_dir, FLAGS.tuning)
target_soc, model_output_dir,
FLAGS.tuning)
if FLAGS.mode == "run" or FLAGS.mode == "validate" or FLAGS.mode == "all":
run_model(model_name, global_runtime, target_abi, target_soc,
model_output_dir, FLAGS.round, FLAGS.restart_round,
option_args)
if FLAGS.mode == "run" or FLAGS.mode == "validate" or \
FLAGS.mode == "all":
run_model(model_name, global_runtime, target_abi,
target_soc, model_output_dir, FLAGS.round,
FLAGS.restart_round, option_args)
if FLAGS.mode == "benchmark":
benchmark_model(target_soc, model_output_dir, option_args)
......@@ -427,14 +462,18 @@ def main(unused_args):
if FLAGS.mode == "validate" or FLAGS.mode == "all":
validate_model(target_soc, model_output_dir)
if FLAGS.mode == "build" or FLAGS.mode == "merge" or FLAGS.mode == "all":
if FLAGS.mode == "build" or FLAGS.mode == "merge" or \
FLAGS.mode == "all":
merge_libs_and_tuning_results(
target_soc, FLAGS.output_dir + "/" + os.environ["PROJECT_NAME"],
target_soc,
FLAGS.output_dir + "/" + os.environ["PROJECT_NAME"],
model_output_dirs)
if FLAGS.mode == "throughput_test":
merged_lib_file = FLAGS.output_dir + "/%s/%s/libmace_%s.%s.a" % \
(os.environ["PROJECT_NAME"], target_abi, os.environ["PROJECT_NAME"], target_soc)
merged_lib_file = FLAGS.output_dir + \
"/%s/%s/libmace_%s.%s.a" % \
(os.environ["PROJECT_NAME"], target_abi,
os.environ["PROJECT_NAME"], target_soc)
generate_random_input(target_soc, FLAGS.output_dir, [], [])
for model_name in configs["models"]:
runtime = configs["models"][model_name]["runtime"]
......@@ -449,4 +488,3 @@ def main(unused_args):
if __name__ == "__main__":
FLAGS, unparsed = parse_args()
main(unused_args=[sys.argv[0]] + unparsed)
......@@ -3,18 +3,22 @@ import re
import time
import falcon_cli
################################
# common
################################
def strip_invalid_utf8(str):
return sh.iconv(str, "-c", "-t", "UTF-8")
def make_output_processor(buff):
def process_output(line):
print(line.strip())
buff.append(line)
return process_output
################################
# adb commands
################################
......@@ -23,11 +27,12 @@ def adb_split_stdout(stdout_str):
# Filter out last empty line
return [l.strip() for l in stdout_str.split('\n') if len(l.strip()) > 0]
def adb_devices(target_socs=None):
outputs = sh.grep(sh.adb("devices"), "^[A-Za-z0-9]\+[[:space:]]\+device$")
raw_lists = sh.cut(outputs, "-f1")
device_ids = adb_split_stdout(raw_lists)
if target_socs != None:
if target_socs is not None:
target_socs_set = set(target_socs)
target_devices = []
for serialno in device_ids:
......@@ -38,6 +43,7 @@ def adb_devices(target_socs=None):
else:
return device_ids
def adb_getprop_by_serialno(serialno):
outputs = sh.adb("-s", serialno, "shell", "getprop")
raw_props = adb_split_stdout(outputs)
......@@ -49,12 +55,14 @@ def adb_getprop_by_serialno(serialno):
props[m.group(1)] = m.group(2)
return props
def adb_supported_abis(serialno):
props = adb_getprop_by_serialno(serialno)
abilist_str = props["ro.product.cpu.abilist"]
abis = [abi.strip() for abi in abilist_str.split(',')]
return abis
def adb_get_all_socs():
socs = []
for d in adb_devices():
......@@ -62,7 +70,10 @@ def adb_get_all_socs():
socs.append(props["ro.board.platform"])
return set(socs)
def adb_run(serialno, host_bin_path, bin_name,
def adb_run(serialno,
host_bin_path,
bin_name,
args="",
opencl_profiling=1,
vlog_level=0,
......@@ -71,7 +82,9 @@ def adb_run(serialno, host_bin_path, bin_name,
host_bin_full_path = "%s/%s" % (host_bin_path, bin_name)
device_bin_full_path = "%s/%s" % (device_bin_path, bin_name)
props = adb_getprop_by_serialno(serialno)
print("=====================================================================")
print(
"====================================================================="
)
print("Run on device: %s, %s, %s" % (serialno, props["ro.board.platform"],
props["ro.product.model"]))
sh.adb("-s", serialno, "shell", "rm -rf %s" % device_bin_path)
......@@ -79,12 +92,19 @@ def adb_run(serialno, host_bin_path, bin_name,
print("Push %s to %s" % (host_bin_full_path, device_bin_full_path))
sh.adb("-s", serialno, "push", host_bin_full_path, device_bin_full_path)
print("Run %s" % device_bin_full_path)
stdout_buff=[]
stdout_buff = []
process_output = make_output_processor(stdout_buff)
p = sh.adb("-s", serialno, "shell",
"MACE_OUT_OF_RANGE_CHECK=%d MACE_OPENCL_PROFILING=%d MACE_CPP_MIN_VLOG_LEVEL=%d %s %s" %
(out_of_range_check, opencl_profiling, vlog_level, device_bin_full_path, args),
_out=process_output, _bg=True, _err_to_out=True)
p = sh.adb(
"-s",
serialno,
"shell",
"MACE_OUT_OF_RANGE_CHECK=%d MACE_OPENCL_PROFILING=%d "
"MACE_CPP_MIN_VLOG_LEVEL=%d %s %s" %
(out_of_range_check, opencl_profiling, vlog_level,
device_bin_full_path, args),
_out=process_output,
_bg=True,
_err_to_out=True)
p.wait()
return "".join(stdout_buff)
......@@ -94,11 +114,14 @@ def adb_run(serialno, host_bin_path, bin_name,
################################
def bazel_build(target, strip="always", abi="armeabi-v7a"):
print("Build %s with ABI %s" % (target, abi))
stdout_buff=[]
stdout_buff = []
process_output = make_output_processor(stdout_buff)
p= sh.bazel("build",
"-c", "opt",
"--strip", strip,
p = sh.bazel(
"build",
"-c",
"opt",
"--strip",
strip,
"--verbose_failures",
target,
"--crosstool_top=//external:android/crosstool",
......@@ -109,12 +132,17 @@ def bazel_build(target, strip="always", abi="armeabi-v7a"):
"--copt=-DMACE_DISABLE_NO_TUNING_WARNING",
"--copt=-Werror=return-type",
"--copt=-O3",
"--define", "neon=true",
"--define", "openmp=true",
_out=process_output, _bg=True, _err_to_out=True)
"--define",
"neon=true",
"--define",
"openmp=true",
_out=process_output,
_bg=True,
_err_to_out=True)
p.wait()
return "".join(stdout_buff)
def bazel_target_to_bin(target):
# change //mace/a/b:c to bazel-bin/mace/a/b/c
prefix, bin_name = target.split(':')
......@@ -124,26 +152,32 @@ def bazel_target_to_bin(target):
host_bin_path = "bazel-bin/%s" % prefix
return host_bin_path, bin_name
################################
# mace commands
################################
# TODO this should be refactored
def gen_encrypted_opencl_source(codegen_path="mace/codegen"):
sh.mkdir("-p", "%s/opencl" % codegen_path)
sh.python("mace/python/tools/encrypt_opencl_codegen.py",
sh.python(
"mace/python/tools/encrypt_opencl_codegen.py",
"--cl_kernel_dir=./mace/kernels/opencl/cl/",
"--output_path=%s/opencl/opencl_encrypt_program.cc" % codegen_path)
def gen_mace_version(codegen_path="mace/codegen"):
sh.mkdir("-p", "%s/version" % codegen_path)
sh.bash("mace/tools/git/gen_version_source.sh",
"%s/version/version.cc" % codegen_path)
def gen_compiled_opencl_source(codegen_path="mace/codegen"):
sh.mkdir("-p", "%s/opencl" % codegen_path)
sh.python("mace/python/tools/opencl_codegen.py",
sh.python(
"mace/python/tools/opencl_codegen.py",
"--output_path=%s/opencl/opencl_compiled_program.cc" % codegen_path)
################################
# falcon
################################
......@@ -156,10 +190,10 @@ def falcon_tags(tags_dict):
tags = tags + ",%s=%s" % (k, v)
return tags
def falcon_push_metrics(metrics, endpoint="mace_dev", tags={}):
cli = falcon_cli.FalconCli.connect(server="transfer.falcon.miliao.srv",
port=8433,
debug=False)
cli = falcon_cli.FalconCli.connect(
server="transfer.falcon.miliao.srv", port=8433, debug=False)
ts = int(time.time())
falcon_metrics = [{
"endpoint": endpoint,
......@@ -171,4 +205,3 @@ def falcon_push_metrics(metrics, endpoint="mace_dev", tags={}):
"counterType": "GAUGE"
} for key, value in metrics.iteritems()]
cli.update(falcon_metrics)
......@@ -20,29 +20,33 @@ from scipy import stats
# --input_shape 1,64,64,3 \
# --output_shape 1,64,64,2
def load_data(file):
if os.path.isfile(file):
return np.fromfile(file=file, dtype=np.float32)
else:
return np.empty([0])
def format_output_name(name):
return re.sub('[^0-9a-zA-Z]+', '_', name)
def compare_output(output_name, mace_out_value, out_value):
if mace_out_value.size != 0:
out_value = out_value.reshape(-1)
mace_out_value = mace_out_value.reshape(-1)
assert len(out_value) == len(mace_out_value)
similarity = (1 - spatial.distance.cosine(out_value, mace_out_value))
print output_name, 'MACE VS', FLAGS.platform.upper(), 'similarity: ', similarity
print output_name, 'MACE VS', FLAGS.platform.upper(
), 'similarity: ', similarity
if (FLAGS.mace_runtime == "cpu" and similarity > 0.999) or \
(FLAGS.mace_runtime == "neon" and similarity > 0.999) or \
(FLAGS.mace_runtime == "gpu" and similarity > 0.995) or \
(FLAGS.mace_runtime == "dsp" and similarity > 0.930):
print '=======================Similarity Test Passed======================'
print '===================Similarity Test Passed=================='
else:
print '=======================Similarity Test Failed======================'
print '===================Similarity Test Failed=================='
sys.exit(-1)
else:
print '=======================Skip empty node==================='
......@@ -66,21 +70,28 @@ def validate_tf_model(input_names, input_shapes, output_names):
tf.import_graph_def(input_graph_def, name="")
input_dict = {}
for i in range(len(input_names)):
input_value = load_data(FLAGS.input_file + "_" + input_names[i])
input_value = load_data(
FLAGS.input_file + "_" + input_names[i])
input_value = input_value.reshape(input_shapes[i])
input_node = graph.get_tensor_by_name(input_names[i] + ':0')
input_node = graph.get_tensor_by_name(
input_names[i] + ':0')
input_dict[input_node] = input_value
output_nodes = []
for name in output_names:
output_nodes.extend([graph.get_tensor_by_name(name + ':0')])
output_nodes.extend(
[graph.get_tensor_by_name(name + ':0')])
output_values = session.run(output_nodes, feed_dict=input_dict)
for i in range(len(output_names)):
output_file_name = FLAGS.mace_out_file + "_" + format_output_name(output_names[i])
output_file_name = FLAGS.mace_out_file + "_" + \
format_output_name(output_names[i])
mace_out_value = load_data(output_file_name)
compare_output(output_names[i], mace_out_value, output_values[i])
compare_output(output_names[i], mace_out_value,
output_values[i])
def validate_caffe_model(input_names, input_shapes, output_names, output_shapes):
def validate_caffe_model(input_names, input_shapes, output_names,
output_shapes):
os.environ['GLOG_minloglevel'] = '1' # suprress Caffe verbose prints
import caffe
if not os.path.isfile(FLAGS.model_file):
......@@ -96,7 +107,8 @@ def validate_caffe_model(input_names, input_shapes, output_names, output_shapes)
for i in range(len(input_names)):
input_value = load_data(FLAGS.input_file + "_" + input_names[i])
input_value = input_value.reshape(input_shapes[i]).transpose((0, 3, 1, 2))
input_value = input_value.reshape(input_shapes[i]).transpose((0, 3, 1,
2))
input_blob_name = input_names[i]
try:
if input_names[i] in net.top_names:
......@@ -110,16 +122,20 @@ def validate_caffe_model(input_names, input_shapes, output_names, output_shapes)
for i in range(len(output_names)):
value = net.blobs[net.top_names[output_names[i]][0]].data
out_shape = output_shapes[i]
out_shape[1], out_shape[2], out_shape[3] = out_shape[3], out_shape[1], out_shape[2]
out_shape[1], out_shape[2], out_shape[3] = out_shape[3], out_shape[
1], out_shape[2]
value = value.reshape(out_shape).transpose((0, 2, 3, 1))
output_file_name = FLAGS.mace_out_file + "_" + format_output_name(output_names[i])
output_file_name = FLAGS.mace_out_file + "_" + format_output_name(
output_names[i])
mace_out_value = load_data(output_file_name)
compare_output(output_names[i], mace_out_value, value)
def main(unused_args):
input_names = [name for name in FLAGS.input_node.split(',')]
input_shape_strs = [shape for shape in FLAGS.input_shape.split(':')]
input_shapes = [[int(x) for x in shape.split(',')] for shape in input_shape_strs]
input_shapes = [[int(x) for x in shape.split(',')]
for shape in input_shape_strs]
output_names = [name for name in FLAGS.output_node.split(',')]
assert len(input_names) == len(input_shapes)
......@@ -127,18 +143,18 @@ def main(unused_args):
validate_tf_model(input_names, input_shapes, output_names)
elif FLAGS.platform == 'caffe':
output_shape_strs = [shape for shape in FLAGS.output_shape.split(':')]
output_shapes = [[int(x) for x in shape.split(',')] for shape in output_shape_strs]
validate_caffe_model(input_names, input_shapes, output_names, output_shapes)
output_shapes = [[int(x) for x in shape.split(',')]
for shape in output_shape_strs]
validate_caffe_model(input_names, input_shapes, output_names,
output_shapes)
def parse_args():
"""Parses command line arguments."""
parser = argparse.ArgumentParser()
parser.register("type", "bool", lambda v: v.lower() == "true")
parser.add_argument(
"--platform",
type=str,
default="",
help="Tensorflow or Caffe.")
"--platform", type=str, default="", help="Tensorflow or Caffe.")
parser.add_argument(
"--model_file",
type=str,
......@@ -150,40 +166,22 @@ def parse_args():
default="",
help="caffe model file to load.")
parser.add_argument(
"--input_file",
type=str,
default="",
help="input file.")
"--input_file", type=str, default="", help="input file.")
parser.add_argument(
"--mace_out_file",
type=str,
default="",
help="mace output file to load.")
parser.add_argument(
"--mace_runtime",
type=str,
default="gpu",
help="mace runtime device.")
"--mace_runtime", type=str, default="gpu", help="mace runtime device.")
parser.add_argument(
"--input_shape",
type=str,
default="1,64,64,3",
help="input shape.")
"--input_shape", type=str, default="1,64,64,3", help="input shape.")
parser.add_argument(
"--output_shape",
type=str,
default="1,64,64,2",
help="output shape.")
"--output_shape", type=str, default="1,64,64,2", help="output shape.")
parser.add_argument(
"--input_node",
type=str,
default="input_node",
help="input node")
"--input_node", type=str, default="input_node", help="input node")
parser.add_argument(
"--output_node",
type=str,
default="output_node",
help="output node")
"--output_node", type=str, default="output_node", help="output node")
return parser.parse_known_args()
......@@ -191,4 +189,3 @@ def parse_args():
if __name__ == '__main__':
FLAGS, unparsed = parse_args()
main(unused_args=[sys.argv[0]] + unparsed)
......@@ -11,12 +11,8 @@ G_T = {}
# f(2, 3)
A_T[4] = np.array([[1, 1, 1, 0], [0, 1, -1, -1]]).astype(np.float32)
A[4] = np.transpose(A_T[4])
B_T[4] = np.array([
[1, 0, -1, 0],
[0, 1, 1, 0],
[0, -1, 1, 0],
[0, 1, 0, -1]
]).astype(np.float32)
B_T[4] = np.array([[1, 0, -1, 0], [0, 1, 1, 0], [0, -1, 1, 0],
[0, 1, 0, -1]]).astype(np.float32)
B[4] = np.transpose(B_T[4])
G[4] = np.array([
[1, 0, 0],
......@@ -44,45 +40,45 @@ B_T[6] = np.array([
]).astype(np.float32)
B[6] = np.transpose(B_T[6])
G[6] = np.array([
[1/4.0 , 0 , 0 ],
[-1/6.0, -1/6.0 , -1/6.0],
[-1/6.0, 1/6.0 , -1/6.0],
[1/24.0, 1/12.0 , 1/6.0 ],
[1/24.0, -1/12.0, 1/6.0 ],
[ 0 , 0 , 1 ],
[1 / 4.0, 0, 0],
[-1 / 6.0, -1 / 6.0, -1 / 6.0],
[-1 / 6.0, 1 / 6.0, -1 / 6.0],
[1 / 24.0, 1 / 12.0, 1 / 6.0],
[1 / 24.0, -1 / 12.0, 1 / 6.0],
[0, 0, 1],
]).astype(np.float32)
G_T[6] = np.transpose(G[6])
# f(6, 3)
A_T[8] = np.array([
[1, 1, 1 , 1 , 1 , 1 , 1 , 0],
[0, 1, -1, 2 , -2 , 1/2. , -1/2. , 0],
[0, 1, 1 , 4 , 4 , 1/4. , 1/4. , 0],
[0, 1, -1, 8 , -8 , 1/8. , -1/8. , 0],
[0, 1, 1 , 16, 16 , 1/16., 1/16. , 0],
[0, 1, -1, 32, -32, 1/32., -1/32., 1],
[1, 1, 1, 1, 1, 1, 1, 0],
[0, 1, -1, 2, -2, 1 / 2., -1 / 2., 0],
[0, 1, 1, 4, 4, 1 / 4., 1 / 4., 0],
[0, 1, -1, 8, -8, 1 / 8., -1 / 8., 0],
[0, 1, 1, 16, 16, 1 / 16., 1 / 16., 0],
[0, 1, -1, 32, -32, 1 / 32., -1 / 32., 1],
]).astype(np.float32)
A[8] = np.transpose(A_T[8])
B_T[8] = np.array([
[1, 0 , -21/4., 0 , 21/4., 0 , -1, 0],
[0, 1 , 1 , -17/4., -17/4., 1 , 1 , 0],
[0, -1 , 1 , 17/4. , -17/4., -1 , 1 , 0],
[0, 1/2. , 1/4. , -5/2. , -5/4., 2 , 1 , 0],
[0, -1/2., 1/4. , 5/2. , -5/4., -2 , 1 , 0],
[0, 2 , 4 , -5/2. , -5 , 1/2. , 1 , 0],
[0, -2 , 4 , 5/2. , -5 , -1/2. , 1 , 0],
[0, -1 , 0 , 21/4. , 0 , -21/4., 0 , 1],
[1, 0, -21 / 4., 0, 21 / 4., 0, -1, 0],
[0, 1, 1, -17 / 4., -17 / 4., 1, 1, 0],
[0, -1, 1, 17 / 4., -17 / 4., -1, 1, 0],
[0, 1 / 2., 1 / 4., -5 / 2., -5 / 4., 2, 1, 0],
[0, -1 / 2., 1 / 4., 5 / 2., -5 / 4., -2, 1, 0],
[0, 2, 4, -5 / 2., -5, 1 / 2., 1, 0],
[0, -2, 4, 5 / 2., -5, -1 / 2., 1, 0],
[0, -1, 0, 21 / 4., 0, -21 / 4., 0, 1],
]).astype(np.float32)
B[8] = np.transpose(B_T[8])
G[8] = np.array([
[ 1 , 0 , 0 ],
[-2/9. , -2/9. , -2/9.],
[-2/9. , 2/9. , -2/9.],
[1/90. , 1/45. , 2/45.],
[1/90. , -1/45. , 2/45.],
[32/45., 16/45. , 8/45.],
[32/45., -16/45., 8/45.],
[ 0 , 0 , 1 ],
[1, 0, 0],
[-2 / 9., -2 / 9., -2 / 9.],
[-2 / 9., 2 / 9., -2 / 9.],
[1 / 90., 1 / 45., 2 / 45.],
[1 / 90., -1 / 45., 2 / 45.],
[32 / 45., 16 / 45., 8 / 45.],
[32 / 45., -16 / 45., 8 / 45.],
[0, 0, 1],
]).astype(np.float32)
G_T[8] = np.transpose(G[8])
......@@ -112,7 +108,7 @@ def winograd_conv(m, r, input, filter):
for c in range(C):
u = np.dot(np.dot(G[alpha], filter[k, c, :, :]), G_T[alpha])
for i in range(alpha):
for j in range(alpha) :
for j in range(alpha):
U[(i * alpha + j) * K + k, c] = u[i, j]
print 'filter out: ', U.shape
......@@ -129,24 +125,24 @@ def winograd_conv(m, r, input, filter):
w_idx = t % rounded_w
h_start = h_idx * m
w_start = w_idx * m
h_end = min(h_start+alpha, input_shape[2])
w_end = min(w_start+alpha, input_shape[3])
h_end = min(h_start + alpha, input_shape[2])
w_end = min(w_start + alpha, input_shape[3])
d = np.zeros((alpha, alpha))
d[0:h_end-h_start, 0:w_end-w_start] = \
input[n, c, h_start:h_end, w_start:w_end]
v = np.dot(np.dot(B_T[alpha], d), B[alpha])
for i in range(alpha):
for j in range(alpha):
V[(i*alpha+j)*C + c, p] = v[i, j]
V[(i * alpha + j) * C + c, p] = v[i, j]
tmp = V.reshape(alpha_square, C, P, 1)
print 'input out: ', tmp.shape
tmp.astype(np.float32).tofile("C")
M = np.zeros((alpha_square * K, P))
for i in range(alpha_square):
u = U[i * K : (i+1) * K, :]
v = V[i * C : (i+1) * C, :]
M[i * K : (i+1) * K, :] = np.dot(u, v)
u = U[i * K:(i + 1) * K, :]
v = V[i * C:(i + 1) * C, :]
M[i * K:(i + 1) * K, :] = np.dot(u, v)
print 'M shape: ', M.shape
M.astype(np.float32).tofile("gemm")
......@@ -156,7 +152,7 @@ def winograd_conv(m, r, input, filter):
tm = np.zeros((alpha, alpha))
for i in range(alpha):
for j in range(alpha):
tm[i][j] = M[(i*alpha+j) * K + k, b]
tm[i][j] = M[(i * alpha + j) * K + k, b]
y = np.dot(np.dot(A_T[alpha], tm), A[alpha])
for i in range(m):
for j in range(m):
......@@ -173,6 +169,7 @@ def winograd_conv(m, r, input, filter):
return res
def tf_conv(input, filter):
conv_op = tf.nn.conv2d(input, filter, [1, 1, 1, 1], 'VALID')
with tf.Session() as sess:
......@@ -206,4 +203,3 @@ def main():
if __name__ == '__main__':
main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册