提交 a66e0e35 编写于 作者: L liuqi

Finish gcn converter and validation.

上级 8d834826
...@@ -129,15 +129,21 @@ int main(int argc, char **argv) { ...@@ -129,15 +129,21 @@ int main(int argc, char **argv) {
// save output // save output
const Tensor *output = ws.GetTensor(output_node + ":0"); const Tensor *output = ws.GetTensor(output_node + ":0");
Tensor::MappingGuard output_guard(output); std::remove(output_file.c_str());
ofstream out_file(output_file, ios::binary); if (output != nullptr) {
out_file.write((const char *)(output->data<float>()), Tensor::MappingGuard output_guard(output);
output->size() * sizeof(float)); ofstream out_file(output_file, ios::binary);
out_file.flush(); out_file.write((const char *)(output->data<float>()),
out_file.close(); output->size() * sizeof(float));
VLOG(0) << "Output shape: [" out_file.flush();
<< output->dim(0) << ", " out_file.close();
<< output->dim(1) << ", " stringstream ss;
<< output->dim(2) << ", " ss << "Output shape: [";
<< output->dim(3) << "]"; for (int i = 0; i < output->dim_size(); ++i) {
ss << output->dim(i) << ", ";
}
ss << "]";
VLOG(0) << ss.str();
}
} }
\ No newline at end of file
...@@ -408,3 +408,81 @@ TEST_F(FusedConv2dOpTest, OPENCLHalfAlignedConvNxNS12) { ...@@ -408,3 +408,81 @@ TEST_F(FusedConv2dOpTest, OPENCLHalfAlignedConvNxNS12) {
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32, 32, 64}); TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32, 32, 64});
} }
template<DeviceType D, typename T>
static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
const std::vector<index_t> &filter_shape) {
testing::internal::LogToStderr();
auto func = [&](int stride_h, int stride_w, Padding type) {
srand(time(NULL));
// generate random input
index_t batch = 1;
index_t height = image_shape[0];
index_t width = image_shape[1];
index_t input_channels = filter_shape[2];
index_t output_channels = filter_shape[3];
index_t kernel_h = filter_shape[0];
index_t kernel_w = filter_shape[1];
// Construct graph
OpsTestNet net;
OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("Input")
.Input("Filter")
.Input("Bias")
.Output("Output")
.AddIntsArg("strides", {stride_h, stride_w})
.AddIntArg("padding", type)
.AddIntsArg("dilations", {1, 1})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<D, T>("Input", {batch, height, width, input_channels});
net.AddRandomInput<D, T>(
"Filter", {kernel_h, kernel_w, input_channels, output_channels});
net.AddRandomInput<D, T>("Bias", {output_channels});
// run on cpu
net.RunOp();
// Check
Tensor expected;
expected.Copy(*net.GetOutput("Output"));
// run on gpu
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("InputImage")
.Input("FilterImage")
.Input("BiasImage")
.Output("OutputImage")
.AddIntsArg("strides", {stride_h, stride_w})
.AddIntArg("padding", type)
.AddIntsArg("dilations", {1, 1})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
// Run on device
net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001);
};
for (int stride : {1, 2}) {
func(stride, stride, VALID);
func(stride, stride, SAME);
}
}
TEST_F(FusedConv2dOpTest, OPENCL7X7ConvNxNS12) {
TestGeneralConvNxNS12<DeviceType::OPENCL, float>({32, 32},
{7, 7, 3, 64});
}
TEST_F(FusedConv2dOpTest, OPENCL15X1ConvNxNS12) {
TestGeneralConvNxNS12<DeviceType::OPENCL, float>({40, 40},
{15, 1, 32, 64});
}
...@@ -24,7 +24,7 @@ def main(unused_args): ...@@ -24,7 +24,7 @@ def main(unused_args):
input_graph_def, FLAGS.input_node, FLAGS.output_node, FLAGS.prequantize) input_graph_def, FLAGS.input_node, FLAGS.output_node, FLAGS.prequantize)
else: else:
output_graph_def = tf_converter_lib.convert_to_mace_pb( output_graph_def = tf_converter_lib.convert_to_mace_pb(
input_graph_def, FLAGS.input_node, FLAGS.output_node, FLAGS.runtime) input_graph_def, FLAGS.input_node, FLAGS.output_node, FLAGS.data_type, FLAGS.runtime)
with gfile.GFile(FLAGS.output, "wb") as f: with gfile.GFile(FLAGS.output, "wb") as f:
f.write(output_graph_def.SerializeToString()) f.write(output_graph_def.SerializeToString())
...@@ -67,6 +67,11 @@ def parse_args(): ...@@ -67,6 +67,11 @@ def parse_args():
type=bool, type=bool,
default=False, default=False,
help="e.g., False") help="e.g., False")
parser.add_argument(
"--data_type",
type=str,
default='DT_FLOAT',
help="e.g., DT_HALF/DT_FLOAT")
return parser.parse_known_args() return parser.parse_known_args()
......
...@@ -19,6 +19,11 @@ buffer_type_map = { ...@@ -19,6 +19,11 @@ buffer_type_map = {
'ARGUMENT' : 2, 'ARGUMENT' : 2,
} }
data_type_map = {
'DT_HALF' : mace_pb2.DT_HALF,
'DT_FLOAT': mace_pb2.DT_FLOAT
}
def convert_tensor(op, tensor): def convert_tensor(op, tensor):
tf_tensor = op.outputs[0].eval() tf_tensor = op.outputs[0].eval()
tensor.name = op.outputs[0].name tensor.name = op.outputs[0].name
...@@ -42,7 +47,7 @@ def get_input_tensor(op, index): ...@@ -42,7 +47,7 @@ def get_input_tensor(op, index):
input_tensor = get_input_tensor(input_tensor.op, 0) input_tensor = get_input_tensor(input_tensor.op, 0)
return input_tensor return input_tensor
def add_buffer_to_image(input_name, input_type, net_def): def add_buffer_to_image(input_name, input_type, dt, net_def):
output_name = input_name[:-2] + "_b2i" + input_name[-2:] output_name = input_name[:-2] + "_b2i" + input_name[-2:]
op_def = net_def.op.add() op_def = net_def.op.add()
op_def.name = output_name[:-2] op_def.name = output_name[:-2]
...@@ -50,15 +55,34 @@ def add_buffer_to_image(input_name, input_type, net_def): ...@@ -50,15 +55,34 @@ def add_buffer_to_image(input_name, input_type, net_def):
op_def.input.extend([input_name]) op_def.input.extend([input_name])
op_def.output.extend([output_name]) op_def.output.extend([output_name])
epsilon_arg = op_def.arg.add() arg = op_def.arg.add()
epsilon_arg.name = 'buffer_type' arg.name = 'buffer_type'
epsilon_arg.i = buffer_type_map[input_type] arg.i = buffer_type_map[input_type]
epsilon_arg = op_def.arg.add() arg = op_def.arg.add()
epsilon_arg.name = 'mode' arg.name = 'mode'
epsilon_arg.i = 0 arg.i = 0
arg = op_def.arg.add()
arg.name = 'T'
arg.i = dt
return output_name
def add_image_to_buffer(input_name, input_type, dt, net_def):
output_name = input_name[:-2] + "_i2b" + input_name[-2:]
op_def = net_def.op.add()
op_def.name = output_name[:-2]
op_def.type = 'ImageToBuffer'
op_def.input.extend([input_name])
op_def.output.extend([output_name])
arg = op_def.arg.add()
arg.name = 'buffer_type'
arg.i = buffer_type_map[input_type]
arg = op_def.arg.add()
arg.name = 'T'
arg.i = dt
return output_name return output_name
def add_input_transform(name, net_def): def add_input_transform(name, dt, net_def):
new_input_name = "mace_input_node:0" new_input_name = "mace_input_node:0"
op_def = net_def.op.add() op_def = net_def.op.add()
op_def.name = name op_def.name = name
...@@ -70,6 +94,10 @@ def add_input_transform(name, net_def): ...@@ -70,6 +94,10 @@ def add_input_transform(name, net_def):
epsilon_arg.name = 'buffer_type' epsilon_arg.name = 'buffer_type'
epsilon_arg.i = buffer_type_map['IN_OUT'] epsilon_arg.i = buffer_type_map['IN_OUT']
arg = op_def.arg.add()
arg.name = 'T'
arg.i = dt
def add_output_transform(name, net_def): def add_output_transform(name, net_def):
output_name = "mace_output_node:0" output_name = "mace_output_node:0"
op_def = net_def.op.add() op_def = net_def.op.add()
...@@ -82,7 +110,7 @@ def add_output_transform(name, net_def): ...@@ -82,7 +110,7 @@ def add_output_transform(name, net_def):
epsilon_arg.name = 'buffer_type' epsilon_arg.name = 'buffer_type'
epsilon_arg.i = buffer_type_map['IN_OUT'] epsilon_arg.i = buffer_type_map['IN_OUT']
def convert_ops(unresolved_ops, net_def, device): def convert_ops(unresolved_ops, dt, net_def, device):
ops_count = len(unresolved_ops) ops_count = len(unresolved_ops)
resolved_count = 1 resolved_count = 1
...@@ -93,225 +121,223 @@ def convert_ops(unresolved_ops, net_def, device): ...@@ -93,225 +121,223 @@ def convert_ops(unresolved_ops, net_def, device):
elif first_op.type == 'Const': elif first_op.type == 'Const':
tensor = net_def.tensors.add() tensor = net_def.tensors.add()
convert_tensor(first_op, tensor) convert_tensor(first_op, tensor)
elif first_op.type == 'Conv2D' or first_op.type == 'DepthwiseConv2dNative': else:
op_def = net_def.op.add() op_def = net_def.op.add()
op_def.name = first_op.name arg = op_def.arg.add()
if first_op.type == 'DepthwiseConv2dNative': arg.name = 'T'
op_def.type = 'DepthwiseConv2d' arg.i = dt
else:
op_def.type = first_op.type if first_op.type == 'Conv2D' or first_op.type == 'DepthwiseConv2dNative':
if device == 'gpu': op_def.name = first_op.name
op_def.input.extend([first_op.inputs[0].name]) if first_op.type == 'DepthwiseConv2dNative':
output_name = add_buffer_to_image(first_op.inputs[1].name, "FILTER", net_def) op_def.type = 'DepthwiseConv2d'
op_def.input.extend([output_name]) else:
else: op_def.type = first_op.type
op_def.input.extend([input.name for input in first_op.inputs])
padding_arg = op_def.arg.add()
padding_arg.name = 'padding'
padding_arg.i = padding_mode[first_op.get_attr('padding')]
strides_arg = op_def.arg.add()
strides_arg.name = 'strides'
strides_arg.ints.extend(first_op.get_attr('strides')[1:3])
data_format_arg = op_def.arg.add()
data_format_arg.name = 'data_format'
data_format_arg.s = 'NHWC'
final_op = first_op
if ops_count >= 3 and unresolved_ops[1].type == 'Const' and unresolved_ops[2].type == 'BiasAdd' :
bias_tensor = unresolved_ops[1]
tensor = net_def.tensors.add()
convert_tensor(bias_tensor, tensor)
bias_add_op = unresolved_ops[2]
if device == 'gpu': if device == 'gpu':
output_name = add_buffer_to_image(bias_add_op.inputs[1].name, "ARGUMENT", net_def) op_def.input.extend([first_op.inputs[0].name])
output_name = add_buffer_to_image(first_op.inputs[1].name, "FILTER", dt, net_def)
op_def.input.extend([output_name]) op_def.input.extend([output_name])
else: else:
op_def.input.extend([bias_add_op.inputs[1].name]) op_def.input.extend([input.name for input in first_op.inputs])
final_op = bias_add_op
resolved_count = 3 padding_arg = op_def.arg.add()
padding_arg.name = 'padding'
if ops_count >= 4 and unresolved_ops[3].type == 'Relu': padding_arg.i = padding_mode[first_op.get_attr('padding')]
relu_op = unresolved_ops[3]; strides_arg = op_def.arg.add()
op_def.type = "FusedConv2D" strides_arg.name = 'strides'
final_op = relu_op strides_arg.ints.extend(first_op.get_attr('strides')[1:3])
resolved_count = 4 data_format_arg = op_def.arg.add()
data_format_arg.name = 'data_format'
op_def.output.extend([output.name for output in final_op.outputs]) data_format_arg.s = 'NHWC'
output_shapes = [] final_op = first_op
for output in final_op.outputs:
output_shape = mace_pb2.OutputShape() if ops_count >= 3 and unresolved_ops[1].type == 'Const' and unresolved_ops[2].type == 'BiasAdd' :
output_shape.dims.extend(output.shape.as_list()) bias_tensor = unresolved_ops[1]
output_shapes.append(output_shape) tensor = net_def.tensors.add()
op_def.output_shape.extend(output_shapes) convert_tensor(bias_tensor, tensor)
bias_add_op = unresolved_ops[2]
if device == 'gpu':
output_name = add_buffer_to_image(bias_add_op.inputs[1].name, "ARGUMENT", dt, net_def)
op_def.input.extend([output_name])
else:
op_def.input.extend([bias_add_op.inputs[1].name])
final_op = bias_add_op
resolved_count = 3
if ops_count >= 4 and unresolved_ops[3].type == 'Relu':
relu_op = unresolved_ops[3];
op_def.type = "FusedConv2D"
final_op = relu_op
resolved_count = 4
op_def.output.extend([output.name for output in final_op.outputs])
output_shapes = []
for output in final_op.outputs:
output_shape = mace_pb2.OutputShape()
output_shape.dims.extend(output.shape.as_list())
output_shapes.append(output_shape)
op_def.output_shape.extend(output_shapes)
elif first_op.type == 'FusedBatchNorm':
op_def.name = first_op.name
op_def.type = 'BatchNorm'
if device == 'gpu':
op_def.input.extend([first_op.inputs[0].name])
for i in range(1, len(first_op.inputs)):
output_name = add_buffer_to_image(first_op.inputs[i].name, "ARGUMENT", dt, net_def)
op_def.input.extend([output_name])
else:
op_def.input.extend([input.name for input in first_op.inputs])
op_def.output.extend([first_op.outputs[0].name])
elif first_op.type == 'FusedBatchNorm': output_shape = mace_pb2.OutputShape()
op_def = net_def.op.add() output_shape.dims.extend(first_op.outputs[0].shape.as_list())
op_def.name = first_op.name op_def.output_shape.extend([output_shape])
op_def.type = 'BatchNorm'
if device == 'gpu': epsilon_arg = op_def.arg.add()
epsilon_arg.name = 'epsilon'
epsilon_arg.f = first_op.get_attr('epsilon')
data_format_arg = op_def.arg.add()
data_format_arg.name = 'data_format'
data_format_arg.s = 'NHWC'
elif first_op.type == 'Add' and first_op.name.endswith(
'batchnorm/add') and ops_count > 7:
add_op = first_op
mul_op = unresolved_ops[2]
mul_1_op = unresolved_ops[3]
mul_2_op = unresolved_ops[4]
sub_op = unresolved_ops[5]
add_1_op = unresolved_ops[6]
# print (mul_op.type, mul_2_op.type, mul_1_op.type, sub_op.type)
if mul_op.type != 'Mul' or mul_2_op.type != 'Mul' or \
mul_1_op.type != 'Mul' or sub_op.type != 'Sub' or add_1_op.type != 'Add':
raise Exception('Invalid BatchNorm Op')
get_input_tensor(mul_1_op, 0)
input_name = get_input_tensor(mul_1_op, 0).name
gamma = get_input_tensor(mul_op, 1).name
beta = get_input_tensor(sub_op, 0).name
mean = get_input_tensor(mul_2_op, 0).name
variance = get_input_tensor(add_op, 0).name
epsilon = get_input_tensor(add_op, 1).name
op_def.name = first_op.name[:-4] # remove /add
op_def.type = 'BatchNorm'
op_def.input.extend([input_name, gamma, beta, mean, variance, epsilon])
op_def.output.extend([output.name for output in add_1_op.outputs])
output_shapes = []
for output in add_1_op.outputs:
output_shape = mace_pb2.OutputShape()
output_shape.dims.extend(output.shape.as_list())
output_shapes.append(output_shape)
op_def.output_shape.extend(output_shapes)
resolved_count = 7
elif first_op.type == 'Relu6':
op_def.name = first_op.name
op_def.type = 'Relu'
op_def.input.extend([input.name for input in first_op.inputs])
op_def.output.extend([output.name for output in first_op.outputs])
output_shapes = []
for output in first_op.outputs:
output_shape = mace_pb2.OutputShape()
output_shape.dims.extend(output.shape.as_list())
output_shapes.append(output_shape)
op_def.output_shape.extend(output_shapes)
max_limit_arg = op_def.arg.add()
max_limit_arg.name = 'max_limit'
max_limit_arg.f = 6
elif first_op.type == 'AvgPool' or first_op.type == 'MaxPool':
op_def.name = first_op.name
op_def.type = 'Pooling'
op_def.input.extend([input.name for input in first_op.inputs])
op_def.output.extend([output.name for output in first_op.outputs])
output_shapes = []
for output in first_op.outputs:
output_shape = mace_pb2.OutputShape()
output_shape.dims.extend(output.shape.as_list())
output_shapes.append(output_shape)
op_def.output_shape.extend(output_shapes)
pooling_type_arg = op_def.arg.add()
pooling_type_arg.name = 'pooling_type'
pooling_type_arg.i = pooling_type_mode[first_op.type]
padding_arg = op_def.arg.add()
padding_arg.name = 'padding'
padding_arg.i = padding_mode[first_op.get_attr('padding')]
strides_arg = op_def.arg.add()
strides_arg.name = 'strides'
strides_arg.ints.extend(first_op.get_attr('strides')[1:3])
kernels_arg = op_def.arg.add()
kernels_arg.name = 'kernels'
kernels_arg.ints.extend(first_op.get_attr('ksize')[1:3])
data_format_arg = op_def.arg.add()
data_format_arg.name = 'data_format'
data_format_arg.s = 'NHWC'
elif first_op.type == 'Add':
op_def.name = first_op.name
op_def.type = "AddN"
op_def.input.extend([input.name for input in first_op.inputs])
op_def.output.extend([output.name for output in first_op.outputs])
output_shapes = []
for output in first_op.outputs:
output_shape = mace_pb2.OutputShape()
output_shape.dims.extend(output.shape.as_list())
output_shapes.append(output_shape)
op_def.output_shape.extend(output_shapes)
elif first_op.type == 'ConcatV2':
op_def.name = first_op.name
op_def.type = "Concat"
op_def.input.extend([first_op.inputs[i].name for i in xrange(2)])
op_def.output.extend([output.name for output in first_op.outputs])
axis_arg = op_def.arg.add()
axis_arg.name = 'axis'
axis_arg.i = get_input_tensor(first_op, 2).eval().astype(np.int32)
output_shapes = []
for output in first_op.outputs:
output_shape = mace_pb2.OutputShape()
output_shape.dims.extend(output.shape.as_list())
output_shapes.append(output_shape)
op_def.output_shape.extend(output_shapes)
elif first_op.type == 'ResizeBilinear':
op_def.name = first_op.name
op_def.type = "ResizeBilinear"
op_def.input.extend([first_op.inputs[0].name]) op_def.input.extend([first_op.inputs[0].name])
for i in range(1, len(first_op.inputs)): op_def.output.extend([output.name for output in first_op.outputs])
output_name = add_buffer_to_image(first_op.inputs[i].name, "ARGUMENT", net_def) size_arg = op_def.arg.add()
op_def.input.extend([output_name]) size_arg.name = 'size'
else: size_arg.ints.extend(get_input_tensor(first_op, 1).eval().astype(np.int32).flat)
size_arg = op_def.arg.add()
size_arg.name = 'align_corners'
size_arg.i = first_op.get_attr('align_corners')
output_shapes = []
for output in first_op.outputs:
output_shape = mace_pb2.OutputShape()
output_shape.dims.extend(output.shape.as_list())
output_shapes.append(output_shape)
op_def.output_shape.extend(output_shapes)
elif first_op.type in ['Relu', 'SpaceToBatchND', 'BatchToSpaceND', 'BiasAdd']:
op_def.name = first_op.name
op_def.type = first_op.type
op_def.input.extend([input.name for input in first_op.inputs]) op_def.input.extend([input.name for input in first_op.inputs])
op_def.output.extend([first_op.outputs[0].name]) op_def.output.extend([output.name for output in first_op.outputs])
output_shapes = []
output_shape = mace_pb2.OutputShape() for output in first_op.outputs:
output_shape.dims.extend(first_op.outputs[0].shape.as_list()) output_shape = mace_pb2.OutputShape()
op_def.output_shape.extend([output_shape]) output_shape.dims.extend(output.shape.as_list())
output_shapes.append(output_shape)
epsilon_arg = op_def.arg.add() op_def.output_shape.extend(output_shapes)
epsilon_arg.name = 'epsilon' else:
epsilon_arg.f = first_op.get_attr('epsilon') raise Exception('Unknown Op: %s, type: %s' % (first_op.name, first_op.type))
data_format_arg = op_def.arg.add() pass
data_format_arg.name = 'data_format'
data_format_arg.s = 'NHWC'
elif first_op.type == 'Add' and first_op.name.endswith(
'batchnorm/add') and ops_count > 7:
add_op = first_op
mul_op = unresolved_ops[2]
mul_1_op = unresolved_ops[3]
mul_2_op = unresolved_ops[4]
sub_op = unresolved_ops[5]
add_1_op = unresolved_ops[6]
# print (mul_op.type, mul_2_op.type, mul_1_op.type, sub_op.type)
if mul_op.type != 'Mul' or mul_2_op.type != 'Mul' or \
mul_1_op.type != 'Mul' or sub_op.type != 'Sub' or add_1_op.type != 'Add':
raise Exception('Invalid BatchNorm Op')
get_input_tensor(mul_1_op, 0)
input_name = get_input_tensor(mul_1_op, 0).name
gamma = get_input_tensor(mul_op, 1).name
beta = get_input_tensor(sub_op, 0).name
mean = get_input_tensor(mul_2_op, 0).name
variance = get_input_tensor(add_op, 0).name
epsilon = get_input_tensor(add_op, 1).name
op_def = net_def.op.add()
op_def.name = first_op.name[:-4] # remove /add
op_def.type = 'BatchNorm'
op_def.input.extend([input_name, gamma, beta, mean, variance, epsilon])
op_def.output.extend([output.name for output in add_1_op.outputs])
output_shapes = []
for output in add_1_op.outputs:
output_shape = mace_pb2.OutputShape()
output_shape.dims.extend(output.shape.as_list())
output_shapes.append(output_shape)
op_def.output_shape.extend(output_shapes)
resolved_count = 7
elif first_op.type == 'Relu6':
op_def = net_def.op.add()
op_def.name = first_op.name
op_def.type = 'Relu'
op_def.input.extend([input.name for input in first_op.inputs])
op_def.output.extend([output.name for output in first_op.outputs])
output_shapes = []
for output in first_op.outputs:
output_shape = mace_pb2.OutputShape()
output_shape.dims.extend(output.shape.as_list())
output_shapes.append(output_shape)
op_def.output_shape.extend(output_shapes)
max_limit_arg = op_def.arg.add()
max_limit_arg.name = 'max_limit'
max_limit_arg.f = 6
elif first_op.type == 'AvgPool' or first_op.type == 'MaxPool':
op_def = net_def.op.add()
op_def.name = first_op.name
op_def.type = 'Pooling'
op_def.input.extend([input.name for input in first_op.inputs])
op_def.output.extend([output.name for output in first_op.outputs])
output_shapes = []
for output in first_op.outputs:
output_shape = mace_pb2.OutputShape()
output_shape.dims.extend(output.shape.as_list())
output_shapes.append(output_shape)
op_def.output_shape.extend(output_shapes)
pooling_type_arg = op_def.arg.add()
pooling_type_arg.name = 'pooling_type'
pooling_type_arg.i = pooling_type_mode[first_op.type]
padding_arg = op_def.arg.add()
padding_arg.name = 'padding'
padding_arg.i = padding_mode[first_op.get_attr('padding')]
strides_arg = op_def.arg.add()
strides_arg.name = 'strides'
strides_arg.ints.extend(first_op.get_attr('strides')[1:3])
kernels_arg = op_def.arg.add()
kernels_arg.name = 'kernels'
kernels_arg.ints.extend(first_op.get_attr('ksize')[1:3])
data_format_arg = op_def.arg.add()
data_format_arg.name = 'data_format'
data_format_arg.s = 'NHWC'
elif first_op.type == 'Add':
op_def = net_def.op.add()
op_def.name = first_op.name
op_def.type = "AddN"
op_def.input.extend([input.name for input in first_op.inputs])
op_def.output.extend([output.name for output in first_op.outputs])
output_shapes = []
for output in first_op.outputs:
output_shape = mace_pb2.OutputShape()
output_shape.dims.extend(output.shape.as_list())
output_shapes.append(output_shape)
op_def.output_shape.extend(output_shapes)
elif first_op.type == 'ConcatV2':
op_def = net_def.op.add()
op_def.name = first_op.name
op_def.type = "Concat"
op_def.input.extend([first_op.inputs[i].name for i in xrange(2)])
op_def.output.extend([output.name for output in first_op.outputs])
axis_arg = op_def.arg.add()
axis_arg.name = 'axis'
axis_arg.i = get_input_tensor(first_op, 2).eval().astype(np.int32)
output_shapes = []
for output in first_op.outputs:
output_shape = mace_pb2.OutputShape()
output_shape.dims.extend(output.shape.as_list())
output_shapes.append(output_shape)
op_def.output_shape.extend(output_shapes)
elif first_op.type == 'ResizeBilinear':
op_def = net_def.op.add()
op_def.name = first_op.name
op_def.type = "ResizeBilinear"
op_def.input.extend([first_op.inputs[0].name])
op_def.output.extend([output.name for output in first_op.outputs])
size_arg = op_def.arg.add()
size_arg.name = 'size'
size_arg.ints.extend(get_input_tensor(first_op, 1).eval().astype(np.int32).flat)
size_arg = op_def.arg.add()
size_arg.name = 'align_corners'
size_arg.i = first_op.get_attr('align_corners')
output_shapes = []
for output in first_op.outputs:
output_shape = mace_pb2.OutputShape()
output_shape.dims.extend(output.shape.as_list())
output_shapes.append(output_shape)
op_def.output_shape.extend(output_shapes)
elif first_op.type in ['Relu', 'SpaceToBatchND', 'BatchToSpaceND', 'BiasAdd']:
op_def = net_def.op.add()
op_def.name = first_op.name
op_def.type = first_op.type
op_def.input.extend([input.name for input in first_op.inputs])
op_def.output.extend([output.name for output in first_op.outputs])
output_shapes = []
for output in first_op.outputs:
output_shape = mace_pb2.OutputShape()
output_shape.dims.extend(output.shape.as_list())
output_shapes.append(output_shape)
op_def.output_shape.extend(output_shapes)
else:
raise Exception('Unknown Op: %s, type: %s' % (first_op.name, first_op.type))
pass
for i in range(resolved_count): for i in range(resolved_count):
del unresolved_ops[0] del unresolved_ops[0]
def convert_to_mace_pb(input_graph_def, input_node, output_node, device): def convert_to_mace_pb(input_graph_def, input_node, output_node, data_type, device):
net_def = mace_pb2.NetDef() net_def = mace_pb2.NetDef()
dt = data_type_map[data_type]
with tf.Session() as session: with tf.Session() as session:
with session.graph.as_default() as graph: with session.graph.as_default() as graph:
...@@ -319,9 +345,9 @@ def convert_to_mace_pb(input_graph_def, input_node, output_node, device): ...@@ -319,9 +345,9 @@ def convert_to_mace_pb(input_graph_def, input_node, output_node, device):
ops = graph.get_operations() ops = graph.get_operations()
unresolved_ops = ops unresolved_ops = ops
if device == 'gpu': if device == 'gpu':
add_input_transform(input_node, net_def) add_input_transform(input_node, dt, net_def)
while len(unresolved_ops) > 0: while len(unresolved_ops) > 0:
convert_ops(unresolved_ops, net_def, device) convert_ops(unresolved_ops, dt, net_def, device)
if device == 'gpu': if device == 'gpu':
add_output_transform(output_node, net_def) add_output_transform(output_node, net_def)
......
import argparse import argparse
import sys import sys
import os
import os.path
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
...@@ -25,13 +27,23 @@ def generate_data(shape): ...@@ -25,13 +27,23 @@ def generate_data(shape):
print "Generate input file done." print "Generate input file done."
def load_data(file): def load_data(file):
return np.fromfile(file=file, dtype=np.float32) if os.path.isfile(file):
return np.fromfile(file=file, dtype=np.float32)
else:
return np.empty([0])
def valid_output(out_shape, mace_out_file, tf_out_value): def valid_output(out_shape, mace_out_file, tf_out_value):
mace_out_value = load_data(mace_out_file) mace_out_value = load_data(mace_out_file)
mace_out_value = mace_out_value.reshape(out_shape) if mace_out_value.size != 0:
res = np.allclose(tf_out_value, mace_out_value, rtol=0, atol=1e-5) mace_out_value = mace_out_value.reshape(out_shape)
print 'Passed! Haha' if res else 'Failed! Oops' np.testing.assert_allclose(tf_out_value, mace_out_value, rtol=0, atol=1e-3)
res = np.allclose(tf_out_value, mace_out_value, rtol=0, atol=1e-3)
if res:
print '=======================Passed! Haha======================'
else:
print '=======================Failed! Oops======================'
else:
print '=======================Skip empty node==================='
def run_model(input_shape): def run_model(input_shape):
...@@ -55,6 +67,7 @@ def run_model(input_shape): ...@@ -55,6 +67,7 @@ def run_model(input_shape):
input_value = input_value.reshape(input_shape) input_value = input_value.reshape(input_shape)
output_value = session.run(output_node, feed_dict={input_node: [input_value]}) output_value = session.run(output_node, feed_dict={input_node: [input_value]})
# output_value.astype(np.float32).tofile( os.path.dirname(FLAGS.input_file) + '/tf_weight')
return output_value return output_value
def main(unused_args): def main(unused_args):
......
#!/bin/bash #!/bin/bash
# Must run at root dir of mace project. # Must run at root dir of mace project.
set -e
Usage() { Usage() {
echo 'Usage: bash tools/validate_gcn.sh tf_model_file' echo 'Usage: bash tools/validate_gcn.sh tf_model_file'
...@@ -16,23 +15,26 @@ MODEL_DIR=$(dirname ${TF_MODEL_FILE_PATH}) ...@@ -16,23 +15,26 @@ MODEL_DIR=$(dirname ${TF_MODEL_FILE_PATH})
MACE_MODEL_NAME='mace_model.pb' MACE_MODEL_NAME='mace_model.pb'
INPUT_FILE_NAME='model_input' INPUT_FILE_NAME='model_input'
OUTPUT_FILE_NAME='gcn.out' OUTPUT_FILE_NAME='gcn.out'
OUTPUT_LIST_FILE='gcn.list'
PHONE_DATA_DIR="/data/local/tmp/${MACE_MODEL_NAME}" PHONE_DATA_DIR="/data/local/tmp/${MACE_MODEL_NAME}"
KERNEL_DIR="${PHONE_DATA_DIR}/cl/" KERNEL_DIR="${PHONE_DATA_DIR}/cl/"
# Step 1: convert tf model to mace model # Step 1: Generate input data
echo "Step 1: convert tf model to mace model" echo "Step 1: Generate input data"
python tools/validate.py --generate_data true --random_seed 1 \
--input_file=${MODEL_DIR}/${INPUT_FILE_NAME} \
--input_shape=512,512,3
# Step 2: convert tf model to mace model
echo "Step 2: convert tf model to mace model"
bazel build //mace/python/tools:tf_converter bazel build //mace/python/tools:tf_converter
bazel-bin/mace/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \ bazel-bin/mace/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \
--output=${MODEL_DIR}/${MACE_MODEL_NAME} \ --output=${MODEL_DIR}/${MACE_MODEL_NAME} \
--input_node=input \ --input_node=input \
--output_node=GCN/br_result_2/fcn_br \ --output_node=GCN/br_result_2/fcn_br \
--data_type=DT_FLOAT \
--runtime=gpu --runtime=gpu
# Step 2: Generate input data
echo "Step 2: Generate input data"
python tools/validate.py --generate_data true --random_seed 1 \
--input_file=${MODEL_DIR}/${INPUT_FILE_NAME} \
--input_shape=512,512,3
# Step 3: Run model on the phone # Step 3: Run model on the phone
echo "Step 3: Run model on the phone" echo "Step 3: Run model on the phone"
...@@ -50,28 +52,29 @@ adb push bazel-bin/mace/examples/mace_run ${PHONE_DATA_DIR} ...@@ -50,28 +52,29 @@ adb push bazel-bin/mace/examples/mace_run ${PHONE_DATA_DIR}
num_threads=${1:-1} num_threads=${1:-1}
adb shell MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \ adb </dev/null shell MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
MACE_KERNEL_PATH=$KERNEL_DIR \ MACE_KERNEL_PATH=$KERNEL_DIR \
OMP_NUM_THREADS=$num_threads \ OMP_NUM_THREADS=$num_threads \
${PHONE_DATA_DIR}/mace_run \ ${PHONE_DATA_DIR}/mace_run \
--model=${PHONE_DATA_DIR}/${MACE_MODEL_NAME} \ --model=${PHONE_DATA_DIR}/${MACE_MODEL_NAME} \
--input=mace_input_node \ --input=mace_input_node \
--output=mace_output_node \ --output=mace_output_node \
--input_shape=1,512,512,3\ --input_shape=1,512,512,3\
--input_file=${PHONE_DATA_DIR}/${MACE_INPUT_FILE_NAME} \ --input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME} \
--output_file=${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} \ --output_file=${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} \
--device=OPENCL --device=OPENCL
# Step 4: pull the mace run result. # Step 4: pull the mace run result.
echo "Step 4: pull the mace run result." echo "Step 4: pull the mace run result."
adb pull ${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} ${MODEL_DIR} rm -rf ${MODEL_DIR}/${OUTPUT_FILE_NAME}
adb </dev/null pull ${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} ${MODEL_DIR}
# Step 5: validate the result # Step 5: validate the result
echo "Step 5: validate the result" echo "Step 5: validate the result"
python tools/validate.py --model_file ${TF_MODEL_FILE_PATH} \ python tools/validate.py --model_file ${TF_MODEL_FILE_PATH} \
--input_file ${MODEL_DIR}/${INPUT_FILE_NAME} \ --input_file ${MODEL_DIR}/${INPUT_FILE_NAME} \
--mace_out_file ${MODEL_DIR}/${OUTPUT_FILE_NAME} \ --mace_out_file ${MODEL_DIR}/${OUTPUT_FILE_NAME} \
--input_node input \ --input_node input \
--output_node GCN/br_result_2/fcn_br --output_node GCN/br_result_2/fcn_br\
--output_shape 1,512,512,2
import argparse
import sys
import tensorflow as tf
import numpy as np
from tensorflow import gfile
# Validation Flow:
# 1. Generate input data
# python validate_icnet.py --generate_data 1 \
# --random_seed 1
# 2. Use mace_run to run icnet on phone.
# 3. adb pull the result.
# 4. Compare output data of mace and tf
# python validate_icnet.py --model_file opt_icnet.pb \
# --tf_input_file input_file \
# --mace_out_file icnet.out
def generate_data(shape):
np.random.seed(FLAGS.random_seed)
data = np.random.random(shape)
print FLAGS.tf_input_file
data.astype(np.float32).tofile(FLAGS.tf_input_file)
mace_data = np.transpose(data, axes=(2, 0, 1))
mace_data.astype(np.float32).tofile(FLAGS.mace_input_file)
print "Generate input file done."
def load_data(file):
return np.fromfile(file=file, dtype=np.float32)
def valid_output(out_shape, mace_out_file, tf_out_value):
mace_out_value = load_data(mace_out_file)
mace_out_value = mace_out_value.reshape(out_shape)
tf_out_data_t = np.transpose(tf_out_value, axes=(0, 3, 1, 2))
res = np.allclose(mace_out_value, tf_out_data_t, rtol=0, atol=1e-5)
print 'Passed! Haha' if res else 'Failed! Oops'
def run_model(input_shape):
if not gfile.Exists(FLAGS.model_file):
print("Input graph file '" + FLAGS.model_file + "' does not exist!")
return -1
input_graph_def = tf.GraphDef()
with gfile.Open(FLAGS.model_file, "rb") as f:
data = f.read()
input_graph_def.ParseFromString(data)
tf.import_graph_def(input_graph_def, name="")
with tf.Session() as session:
with session.graph.as_default() as graph:
tf.import_graph_def(input_graph_def, name="")
input_node = graph.get_tensor_by_name('input_node:0')
output_node = graph.get_tensor_by_name('output_node:0')
input_value = load_data(FLAGS.tf_input_file)
input_value = input_value.reshape(input_shape)
output_value = session.run(output_node, feed_dict={input_node: [input_value]})
return output_value
def main(unused_args):
input_shape = [int(x) for x in FLAGS.input_shape.split(',')]
output_shape = [int(x) for x in FLAGS.output_shape.split(',')]
if FLAGS.generate_data:
generate_data(input_shape)
else:
output_value = run_model(input_shape)
valid_output(output_shape, FLAGS.mace_out_file, output_value)
def parse_args():
"""Parses command line arguments."""
parser = argparse.ArgumentParser()
parser.register("type", "bool", lambda v: v.lower() == "true")
parser.add_argument(
"--model_file",
type=str,
default="",
help="TensorFlow \'GraphDef\' file to load.")
parser.add_argument(
"--tf_input_file",
type=str,
default="",
help="tensorflow input data to load.")
parser.add_argument(
"--mace_input_file",
type=str,
default="",
help="mace input data to load.")
parser.add_argument(
"--mace_out_file",
type=str,
default="",
help="mace output file to load.")
parser.add_argument(
"--input_shape",
type=str,
default="480,480,3",
help="input shape.")
parser.add_argument(
"--output_shape",
type=str,
default="1,2,480,480",
help="output shape.")
parser.add_argument(
"--generate_data",
type='bool',
default="false",
help="Random seed for generate test case.")
parser.add_argument(
"--random_seed",
type=int,
default="0",
help="Random seed for generate test case.")
return parser.parse_known_args()
if __name__ == '__main__':
FLAGS, unparsed = parse_args()
main(unused_args=[sys.argv[0]] + unparsed)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册