提交 4eb498a6 编写于 作者: L Liangliang He

Merge branch 'convert-gcn' into 'master'

Add gcn validation tools.

See merge request !149
...@@ -129,15 +129,21 @@ int main(int argc, char **argv) { ...@@ -129,15 +129,21 @@ int main(int argc, char **argv) {
// save output // save output
const Tensor *output = ws.GetTensor(output_node + ":0"); const Tensor *output = ws.GetTensor(output_node + ":0");
std::remove(output_file.c_str());
if (output != nullptr) {
Tensor::MappingGuard output_guard(output); Tensor::MappingGuard output_guard(output);
ofstream out_file(output_file, ios::binary); ofstream out_file(output_file, ios::binary);
out_file.write((const char *)(output->data<float>()), out_file.write((const char *)(output->data<float>()),
output->size() * sizeof(float)); output->size() * sizeof(float));
out_file.flush(); out_file.flush();
out_file.close(); out_file.close();
VLOG(0) << "Output shape: [" stringstream ss;
<< output->dim(0) << ", " ss << "Output shape: [";
<< output->dim(1) << ", " for (int i = 0; i < output->dim_size(); ++i) {
<< output->dim(2) << ", " ss << output->dim(i) << ", ";
<< output->dim(3) << "]";
}
ss << "]";
VLOG(0) << ss.str();
}
} }
\ No newline at end of file
...@@ -28,6 +28,11 @@ extern void Conv2dOpenclK3x3S2(const Tensor *input, const Tensor *filter, ...@@ -28,6 +28,11 @@ extern void Conv2dOpenclK3x3S2(const Tensor *input, const Tensor *filter,
const int *padding, const DataType dt, const int *padding, const DataType dt,
Tensor *output); Tensor *output);
extern void Conv2dOpencl(const Tensor *input, const Tensor *filter,
const Tensor *bias, const bool fused_relu,
const uint32_t stride, const int *padding,
const DataType dt, Tensor *output);
template<typename T> template<typename T>
void FusedConv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input, void FusedConv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
const Tensor *filter, const Tensor *filter,
...@@ -44,20 +49,15 @@ void FusedConv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input, ...@@ -44,20 +49,15 @@ void FusedConv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
{Conv2dOpenclK3x3S1, Conv2dOpenclK3x3S2}, {Conv2dOpenclK3x3S1, Conv2dOpenclK3x3S2},
{nullptr, nullptr}, {nullptr, nullptr},
{nullptr, nullptr}}; {nullptr, nullptr}};
index_t kernel_h = filter->dim(0); index_t kernel_h = filter->dim(0);
index_t kernel_w = filter->dim(1); index_t kernel_w = filter->dim(1);
if (kernel_h != kernel_w || kernel_h > 5 || strides_[0] != strides_[1] || if (!input->is_image() || strides_[0] != strides_[1] ||
strides_[0] > 2 || dilations_[0] != 1 || dilations_[1] != 1 || strides_[0] > 2 || dilations_[0] != 1 || dilations_[1] != 1) {
selector[kernel_h - 1][strides_[0] - 1] == nullptr) {
LOG(WARNING) << "OpenCL conv2d kernel with " LOG(WARNING) << "OpenCL conv2d kernel with "
<< "filter" << kernel_h << "x" << kernel_w << "," << "filter" << kernel_h << "x" << kernel_w << ","
<< " stride " << strides_[0] << "x" << strides_[1] << " stride " << strides_[0] << "x" << strides_[1]
<< " is not implemented yet, using slow version"; << " is not implemented yet, using slow version";
// TODO(heliangliang) The CPU/NEON kernel should map the buffer MACE_NOT_IMPLEMENTED;
FusedConv2dFunctor<DeviceType::CPU, T>(strides_, paddings_, dilations_)(
input, filter, bias, output);
return;
} }
std::vector<index_t> output_shape(4); std::vector<index_t> output_shape(4);
...@@ -66,16 +66,17 @@ void FusedConv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input, ...@@ -66,16 +66,17 @@ void FusedConv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
input->shape().data(), filter->shape().data(), dilations_, input->shape().data(), filter->shape().data(), dilations_,
strides_, paddings_, output_shape.data(), paddings.data()); strides_, paddings_, output_shape.data(), paddings.data());
if (input->is_image()) {
std::vector<size_t> output_image_shape; std::vector<size_t> output_image_shape;
CalImage2DShape(output_shape, BufferType::IN_OUT, output_image_shape); CalImage2DShape(output_shape, BufferType::IN_OUT, output_image_shape);
output->ResizeImage(output_shape, output_image_shape); output->ResizeImage(output_shape, output_image_shape);
} else {
output->Resize(output_shape);
}
if (kernel_h == kernel_w && kernel_h <= 5 &&
selector[kernel_h - 1][strides_[0] - 1] != nullptr) {
auto conv2d_func = selector[kernel_h - 1][strides_[0] - 1]; auto conv2d_func = selector[kernel_h - 1][strides_[0] - 1];
conv2d_func(input, filter, bias, true, paddings.data(), DataTypeToEnum<T>::value, output); conv2d_func(input, filter, bias, true, paddings.data(), DataTypeToEnum<T>::value, output);
} else {
Conv2dOpencl(input, filter, bias, true, strides_[0], paddings.data(), DataTypeToEnum<T>::value, output);
}
} }
template template
......
...@@ -408,3 +408,81 @@ TEST_F(FusedConv2dOpTest, OPENCLHalfAlignedConvNxNS12) { ...@@ -408,3 +408,81 @@ TEST_F(FusedConv2dOpTest, OPENCLHalfAlignedConvNxNS12) {
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32, 32, 64}); TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32, 32, 64});
} }
template<DeviceType D, typename T>
static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
const std::vector<index_t> &filter_shape) {
testing::internal::LogToStderr();
auto func = [&](int stride_h, int stride_w, Padding type) {
srand(time(NULL));
// generate random input
index_t batch = 1;
index_t height = image_shape[0];
index_t width = image_shape[1];
index_t input_channels = filter_shape[2];
index_t output_channels = filter_shape[3];
index_t kernel_h = filter_shape[0];
index_t kernel_w = filter_shape[1];
// Construct graph
OpsTestNet net;
OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("Input")
.Input("Filter")
.Input("Bias")
.Output("Output")
.AddIntsArg("strides", {stride_h, stride_w})
.AddIntArg("padding", type)
.AddIntsArg("dilations", {1, 1})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<D, T>("Input", {batch, height, width, input_channels});
net.AddRandomInput<D, T>(
"Filter", {kernel_h, kernel_w, input_channels, output_channels});
net.AddRandomInput<D, T>("Bias", {output_channels});
// run on cpu
net.RunOp();
// Check
Tensor expected;
expected.Copy(*net.GetOutput("Output"));
// run on gpu
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("InputImage")
.Input("FilterImage")
.Input("BiasImage")
.Output("OutputImage")
.AddIntsArg("strides", {stride_h, stride_w})
.AddIntArg("padding", type)
.AddIntsArg("dilations", {1, 1})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
// Run on device
net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001);
};
for (int stride : {1, 2}) {
func(stride, stride, VALID);
func(stride, stride, SAME);
}
}
TEST_F(FusedConv2dOpTest, OPENCL7X7ConvNxNS12) {
TestGeneralConvNxNS12<DeviceType::OPENCL, float>({32, 32},
{7, 7, 3, 64});
}
TEST_F(FusedConv2dOpTest, OPENCL15X1ConvNxNS12) {
TestGeneralConvNxNS12<DeviceType::OPENCL, float>({40, 40},
{15, 1, 32, 64});
}
...@@ -24,7 +24,7 @@ def main(unused_args): ...@@ -24,7 +24,7 @@ def main(unused_args):
input_graph_def, FLAGS.input_node, FLAGS.output_node, FLAGS.prequantize) input_graph_def, FLAGS.input_node, FLAGS.output_node, FLAGS.prequantize)
else: else:
output_graph_def = tf_converter_lib.convert_to_mace_pb( output_graph_def = tf_converter_lib.convert_to_mace_pb(
input_graph_def, FLAGS.runtime) input_graph_def, FLAGS.input_node, FLAGS.output_node, FLAGS.data_type, FLAGS.runtime)
with gfile.GFile(FLAGS.output, "wb") as f: with gfile.GFile(FLAGS.output, "wb") as f:
f.write(output_graph_def.SerializeToString()) f.write(output_graph_def.SerializeToString())
...@@ -67,6 +67,11 @@ def parse_args(): ...@@ -67,6 +67,11 @@ def parse_args():
type=bool, type=bool,
default=False, default=False,
help="e.g., False") help="e.g., False")
parser.add_argument(
"--data_type",
type=str,
default='DT_FLOAT',
help="e.g., DT_HALF/DT_FLOAT")
return parser.parse_known_args() return parser.parse_known_args()
......
...@@ -19,6 +19,11 @@ buffer_type_map = { ...@@ -19,6 +19,11 @@ buffer_type_map = {
'ARGUMENT' : 2, 'ARGUMENT' : 2,
} }
data_type_map = {
'DT_HALF' : mace_pb2.DT_HALF,
'DT_FLOAT': mace_pb2.DT_FLOAT
}
def convert_tensor(op, tensor): def convert_tensor(op, tensor):
tf_tensor = op.outputs[0].eval() tf_tensor = op.outputs[0].eval()
tensor.name = op.outputs[0].name tensor.name = op.outputs[0].name
...@@ -42,21 +47,70 @@ def get_input_tensor(op, index): ...@@ -42,21 +47,70 @@ def get_input_tensor(op, index):
input_tensor = get_input_tensor(input_tensor.op, 0) input_tensor = get_input_tensor(input_tensor.op, 0)
return input_tensor return input_tensor
def add_buffer_to_image(input_name, input_type, net_def): def add_buffer_to_image(input_name, input_type, dt, net_def):
output_name = input_name[:-2] + "_b2i" + input_name[-2:] output_name = input_name[:-2] + "_b2i" + input_name[-2:]
op_def = net_def.op.add() op_def = net_def.op.add()
op_def.name = output_name op_def.name = output_name[:-2]
op_def.type = 'BufferToImage' op_def.type = 'BufferToImage'
op_def.input.extend([input_name]) op_def.input.extend([input_name])
op_def.output.extend([output_name])
arg = op_def.arg.add()
arg.name = 'buffer_type'
arg.i = buffer_type_map[input_type]
arg = op_def.arg.add()
arg.name = 'mode'
arg.i = 0
arg = op_def.arg.add()
arg.name = 'T'
arg.i = dt
return output_name
def add_image_to_buffer(input_name, input_type, dt, net_def):
output_name = input_name[:-2] + "_i2b" + input_name[-2:]
op_def = net_def.op.add()
op_def.name = output_name[:-2]
op_def.type = 'ImageToBuffer'
op_def.input.extend([input_name])
op_def.output.extend([output_name])
arg = op_def.arg.add()
arg.name = 'buffer_type'
arg.i = buffer_type_map[input_type]
arg = op_def.arg.add()
arg.name = 'T'
arg.i = dt
return output_name
def add_input_transform(name, dt, net_def):
new_input_name = "mace_input_node:0"
op_def = net_def.op.add()
op_def.name = name
op_def.type = 'BufferToImage'
op_def.input.extend([new_input_name])
op_def.output.extend([name+':0'])
epsilon_arg = op_def.arg.add() epsilon_arg = op_def.arg.add()
epsilon_arg.name = 'buffer_type' epsilon_arg.name = 'buffer_type'
epsilon_arg.i = buffer_type_map[input_type] epsilon_arg.i = buffer_type_map['IN_OUT']
arg = op_def.arg.add()
arg.name = 'T'
arg.i = dt
def add_output_transform(name, net_def):
output_name = "mace_output_node:0"
op_def = net_def.op.add()
op_def.name = output_name[:-2]
op_def.type = 'ImageToBuffer'
op_def.input.extend([name+':0'])
op_def.output.extend([output_name])
epsilon_arg = op_def.arg.add() epsilon_arg = op_def.arg.add()
epsilon_arg.name = 'mode' epsilon_arg.name = 'buffer_type'
epsilon_arg.i = 0 epsilon_arg.i = buffer_type_map['IN_OUT']
return output_name
def convert_ops(unresolved_ops, net_def, device): def convert_ops(unresolved_ops, dt, net_def, device):
ops_count = len(unresolved_ops) ops_count = len(unresolved_ops)
resolved_count = 1 resolved_count = 1
...@@ -67,8 +121,13 @@ def convert_ops(unresolved_ops, net_def, device): ...@@ -67,8 +121,13 @@ def convert_ops(unresolved_ops, net_def, device):
elif first_op.type == 'Const': elif first_op.type == 'Const':
tensor = net_def.tensors.add() tensor = net_def.tensors.add()
convert_tensor(first_op, tensor) convert_tensor(first_op, tensor)
elif first_op.type == 'Conv2D' or first_op.type == 'DepthwiseConv2dNative': else:
op_def = net_def.op.add() op_def = net_def.op.add()
arg = op_def.arg.add()
arg.name = 'T'
arg.i = dt
if first_op.type == 'Conv2D' or first_op.type == 'DepthwiseConv2dNative':
op_def.name = first_op.name op_def.name = first_op.name
if first_op.type == 'DepthwiseConv2dNative': if first_op.type == 'DepthwiseConv2dNative':
op_def.type = 'DepthwiseConv2d' op_def.type = 'DepthwiseConv2d'
...@@ -76,7 +135,7 @@ def convert_ops(unresolved_ops, net_def, device): ...@@ -76,7 +135,7 @@ def convert_ops(unresolved_ops, net_def, device):
op_def.type = first_op.type op_def.type = first_op.type
if device == 'gpu': if device == 'gpu':
op_def.input.extend([first_op.inputs[0].name]) op_def.input.extend([first_op.inputs[0].name])
output_name = add_buffer_to_image(first_op.inputs[1].name, "FILTER", net_def) output_name = add_buffer_to_image(first_op.inputs[1].name, "FILTER", dt, net_def)
op_def.input.extend([output_name]) op_def.input.extend([output_name])
else: else:
op_def.input.extend([input.name for input in first_op.inputs]) op_def.input.extend([input.name for input in first_op.inputs])
...@@ -99,7 +158,7 @@ def convert_ops(unresolved_ops, net_def, device): ...@@ -99,7 +158,7 @@ def convert_ops(unresolved_ops, net_def, device):
bias_add_op = unresolved_ops[2] bias_add_op = unresolved_ops[2]
if device == 'gpu': if device == 'gpu':
output_name = add_buffer_to_image(bias_add_op.inputs[1].name, "ARGUMENT", net_def) output_name = add_buffer_to_image(bias_add_op.inputs[1].name, "ARGUMENT", dt, net_def)
op_def.input.extend([output_name]) op_def.input.extend([output_name])
else: else:
op_def.input.extend([bias_add_op.inputs[1].name]) op_def.input.extend([bias_add_op.inputs[1].name])
...@@ -121,13 +180,12 @@ def convert_ops(unresolved_ops, net_def, device): ...@@ -121,13 +180,12 @@ def convert_ops(unresolved_ops, net_def, device):
op_def.output_shape.extend(output_shapes) op_def.output_shape.extend(output_shapes)
elif first_op.type == 'FusedBatchNorm': elif first_op.type == 'FusedBatchNorm':
op_def = net_def.op.add()
op_def.name = first_op.name op_def.name = first_op.name
op_def.type = 'BatchNorm' op_def.type = 'BatchNorm'
if device == 'gpu': if device == 'gpu':
op_def.input.extend([first_op.inputs[0].name]) op_def.input.extend([first_op.inputs[0].name])
for i in range(1, len(first_op.inputs)): for i in range(1, len(first_op.inputs)):
output_name = add_buffer_to_image(first_op.inputs[i].name, "ARGUMENT", net_def) output_name = add_buffer_to_image(first_op.inputs[i].name, "ARGUMENT", dt, net_def)
op_def.input.extend([output_name]) op_def.input.extend([output_name])
else: else:
op_def.input.extend([input.name for input in first_op.inputs]) op_def.input.extend([input.name for input in first_op.inputs])
...@@ -164,7 +222,6 @@ def convert_ops(unresolved_ops, net_def, device): ...@@ -164,7 +222,6 @@ def convert_ops(unresolved_ops, net_def, device):
variance = get_input_tensor(add_op, 0).name variance = get_input_tensor(add_op, 0).name
epsilon = get_input_tensor(add_op, 1).name epsilon = get_input_tensor(add_op, 1).name
op_def = net_def.op.add()
op_def.name = first_op.name[:-4] # remove /add op_def.name = first_op.name[:-4] # remove /add
op_def.type = 'BatchNorm' op_def.type = 'BatchNorm'
op_def.input.extend([input_name, gamma, beta, mean, variance, epsilon]) op_def.input.extend([input_name, gamma, beta, mean, variance, epsilon])
...@@ -178,7 +235,6 @@ def convert_ops(unresolved_ops, net_def, device): ...@@ -178,7 +235,6 @@ def convert_ops(unresolved_ops, net_def, device):
resolved_count = 7 resolved_count = 7
elif first_op.type == 'Relu6': elif first_op.type == 'Relu6':
op_def = net_def.op.add()
op_def.name = first_op.name op_def.name = first_op.name
op_def.type = 'Relu' op_def.type = 'Relu'
op_def.input.extend([input.name for input in first_op.inputs]) op_def.input.extend([input.name for input in first_op.inputs])
...@@ -193,7 +249,6 @@ def convert_ops(unresolved_ops, net_def, device): ...@@ -193,7 +249,6 @@ def convert_ops(unresolved_ops, net_def, device):
max_limit_arg.name = 'max_limit' max_limit_arg.name = 'max_limit'
max_limit_arg.f = 6 max_limit_arg.f = 6
elif first_op.type == 'AvgPool' or first_op.type == 'MaxPool': elif first_op.type == 'AvgPool' or first_op.type == 'MaxPool':
op_def = net_def.op.add()
op_def.name = first_op.name op_def.name = first_op.name
op_def.type = 'Pooling' op_def.type = 'Pooling'
op_def.input.extend([input.name for input in first_op.inputs]) op_def.input.extend([input.name for input in first_op.inputs])
...@@ -220,7 +275,6 @@ def convert_ops(unresolved_ops, net_def, device): ...@@ -220,7 +275,6 @@ def convert_ops(unresolved_ops, net_def, device):
data_format_arg.name = 'data_format' data_format_arg.name = 'data_format'
data_format_arg.s = 'NHWC' data_format_arg.s = 'NHWC'
elif first_op.type == 'Add': elif first_op.type == 'Add':
op_def = net_def.op.add()
op_def.name = first_op.name op_def.name = first_op.name
op_def.type = "AddN" op_def.type = "AddN"
op_def.input.extend([input.name for input in first_op.inputs]) op_def.input.extend([input.name for input in first_op.inputs])
...@@ -232,7 +286,6 @@ def convert_ops(unresolved_ops, net_def, device): ...@@ -232,7 +286,6 @@ def convert_ops(unresolved_ops, net_def, device):
output_shapes.append(output_shape) output_shapes.append(output_shape)
op_def.output_shape.extend(output_shapes) op_def.output_shape.extend(output_shapes)
elif first_op.type == 'ConcatV2': elif first_op.type == 'ConcatV2':
op_def = net_def.op.add()
op_def.name = first_op.name op_def.name = first_op.name
op_def.type = "Concat" op_def.type = "Concat"
op_def.input.extend([first_op.inputs[i].name for i in xrange(2)]) op_def.input.extend([first_op.inputs[i].name for i in xrange(2)])
...@@ -247,7 +300,6 @@ def convert_ops(unresolved_ops, net_def, device): ...@@ -247,7 +300,6 @@ def convert_ops(unresolved_ops, net_def, device):
output_shapes.append(output_shape) output_shapes.append(output_shape)
op_def.output_shape.extend(output_shapes) op_def.output_shape.extend(output_shapes)
elif first_op.type == 'ResizeBilinear': elif first_op.type == 'ResizeBilinear':
op_def = net_def.op.add()
op_def.name = first_op.name op_def.name = first_op.name
op_def.type = "ResizeBilinear" op_def.type = "ResizeBilinear"
op_def.input.extend([first_op.inputs[0].name]) op_def.input.extend([first_op.inputs[0].name])
...@@ -257,7 +309,7 @@ def convert_ops(unresolved_ops, net_def, device): ...@@ -257,7 +309,7 @@ def convert_ops(unresolved_ops, net_def, device):
size_arg.ints.extend(get_input_tensor(first_op, 1).eval().astype(np.int32).flat) size_arg.ints.extend(get_input_tensor(first_op, 1).eval().astype(np.int32).flat)
size_arg = op_def.arg.add() size_arg = op_def.arg.add()
size_arg.name = 'align_corners' size_arg.name = 'align_corners'
size_arg.ints.extend(first_op.get_attr('align_corners')) size_arg.i = first_op.get_attr('align_corners')
output_shapes = [] output_shapes = []
for output in first_op.outputs: for output in first_op.outputs:
output_shape = mace_pb2.OutputShape() output_shape = mace_pb2.OutputShape()
...@@ -265,7 +317,6 @@ def convert_ops(unresolved_ops, net_def, device): ...@@ -265,7 +317,6 @@ def convert_ops(unresolved_ops, net_def, device):
output_shapes.append(output_shape) output_shapes.append(output_shape)
op_def.output_shape.extend(output_shapes) op_def.output_shape.extend(output_shapes)
elif first_op.type in ['Relu', 'SpaceToBatchND', 'BatchToSpaceND', 'BiasAdd']: elif first_op.type in ['Relu', 'SpaceToBatchND', 'BatchToSpaceND', 'BiasAdd']:
op_def = net_def.op.add()
op_def.name = first_op.name op_def.name = first_op.name
op_def.type = first_op.type op_def.type = first_op.type
op_def.input.extend([input.name for input in first_op.inputs]) op_def.input.extend([input.name for input in first_op.inputs])
...@@ -284,16 +335,21 @@ def convert_ops(unresolved_ops, net_def, device): ...@@ -284,16 +335,21 @@ def convert_ops(unresolved_ops, net_def, device):
del unresolved_ops[0] del unresolved_ops[0]
def convert_to_mace_pb(input_graph_def, device): def convert_to_mace_pb(input_graph_def, input_node, output_node, data_type, device):
net_def = mace_pb2.NetDef() net_def = mace_pb2.NetDef()
dt = data_type_map[data_type]
with tf.Session() as session: with tf.Session() as session:
with session.graph.as_default() as graph: with session.graph.as_default() as graph:
tf.import_graph_def(input_graph_def, name="") tf.import_graph_def(input_graph_def, name="")
ops = graph.get_operations() ops = graph.get_operations()
unresolved_ops = ops unresolved_ops = ops
if device == 'gpu':
add_input_transform(input_node, dt, net_def)
while len(unresolved_ops) > 0: while len(unresolved_ops) > 0:
convert_ops(unresolved_ops, net_def, device) convert_ops(unresolved_ops, dt, net_def, device)
if device == 'gpu':
add_output_transform(output_node, net_def)
print "PB Parsed." print "PB Parsed."
......
import argparse import argparse
import sys import sys
import os
import os.path
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
...@@ -13,28 +15,35 @@ from tensorflow import gfile ...@@ -13,28 +15,35 @@ from tensorflow import gfile
# 3. adb pull the result. # 3. adb pull the result.
# 4. Compare output data of mace and tf # 4. Compare output data of mace and tf
# python validate_icnet.py --model_file opt_icnet.pb \ # python validate_icnet.py --model_file opt_icnet.pb \
# --tf_input_file input_file \ # --input_file input_file \
# --mace_out_file icnet.out # --mace_out_file icnet.out
def generate_data(shape): def generate_data(shape):
np.random.seed(FLAGS.random_seed) np.random.seed(FLAGS.random_seed)
data = np.random.random(shape) data = np.random.random(shape)
print FLAGS.tf_input_file print FLAGS.input_file
data.astype(np.float32).tofile(FLAGS.tf_input_file) data.astype(np.float32).tofile(FLAGS.input_file)
mace_data = np.transpose(data, axes=(2, 0, 1))
mace_data.astype(np.float32).tofile(FLAGS.mace_input_file)
print "Generate input file done." print "Generate input file done."
def load_data(file): def load_data(file):
if os.path.isfile(file):
return np.fromfile(file=file, dtype=np.float32) return np.fromfile(file=file, dtype=np.float32)
else:
return np.empty([0])
def valid_output(out_shape, mace_out_file, tf_out_value): def valid_output(out_shape, mace_out_file, tf_out_value):
mace_out_value = load_data(mace_out_file) mace_out_value = load_data(mace_out_file)
if mace_out_value.size != 0:
mace_out_value = mace_out_value.reshape(out_shape) mace_out_value = mace_out_value.reshape(out_shape)
tf_out_data_t = np.transpose(tf_out_value, axes=(0, 3, 1, 2)) np.testing.assert_allclose(tf_out_value, mace_out_value, rtol=0, atol=1e-3)
res = np.allclose(mace_out_value, tf_out_data_t, rtol=0, atol=1e-5) res = np.allclose(tf_out_value, mace_out_value, rtol=0, atol=1e-3)
print 'Passed! Haha' if res else 'Failed! Oops' if res:
print '=======================Passed! Haha======================'
else:
print '=======================Failed! Oops======================'
else:
print '=======================Skip empty node==================='
def run_model(input_shape): def run_model(input_shape):
...@@ -51,13 +60,14 @@ def run_model(input_shape): ...@@ -51,13 +60,14 @@ def run_model(input_shape):
with tf.Session() as session: with tf.Session() as session:
with session.graph.as_default() as graph: with session.graph.as_default() as graph:
tf.import_graph_def(input_graph_def, name="") tf.import_graph_def(input_graph_def, name="")
input_node = graph.get_tensor_by_name('input_node:0') input_node = graph.get_tensor_by_name(FLAGS.input_node + ':0')
output_node = graph.get_tensor_by_name('output_node:0') output_node = graph.get_tensor_by_name(FLAGS.output_node + ':0')
input_value = load_data(FLAGS.tf_input_file) input_value = load_data(FLAGS.input_file)
input_value = input_value.reshape(input_shape) input_value = input_value.reshape(input_shape)
output_value = session.run(output_node, feed_dict={input_node: [input_value]}) output_value = session.run(output_node, feed_dict={input_node: [input_value]})
# output_value.astype(np.float32).tofile( os.path.dirname(FLAGS.input_file) + '/tf_weight')
return output_value return output_value
def main(unused_args): def main(unused_args):
...@@ -80,15 +90,10 @@ def parse_args(): ...@@ -80,15 +90,10 @@ def parse_args():
default="", default="",
help="TensorFlow \'GraphDef\' file to load.") help="TensorFlow \'GraphDef\' file to load.")
parser.add_argument( parser.add_argument(
"--tf_input_file", "--input_file",
type=str,
default="",
help="tensorflow input data to load.")
parser.add_argument(
"--mace_input_file",
type=str, type=str,
default="", default="",
help="mace input data to load.") help="input file.")
parser.add_argument( parser.add_argument(
"--mace_out_file", "--mace_out_file",
type=str, type=str,
...@@ -97,13 +102,23 @@ def parse_args(): ...@@ -97,13 +102,23 @@ def parse_args():
parser.add_argument( parser.add_argument(
"--input_shape", "--input_shape",
type=str, type=str,
default="480,480,3", default="512,512,3",
help="input shape.") help="input shape.")
parser.add_argument( parser.add_argument(
"--output_shape", "--output_shape",
type=str, type=str,
default="1,2,480,480", default="1,512,512,2",
help="output shape.") help="output shape.")
parser.add_argument(
"--input_node",
type=str,
default="input_node",
help="input node")
parser.add_argument(
"--output_node",
type=str,
default="output_node",
help="output node")
parser.add_argument( parser.add_argument(
"--generate_data", "--generate_data",
type='bool', type='bool',
......
#!/bin/bash
# Must run at root dir of mace project.
Usage() {
echo 'Usage: bash tools/validate_gcn.sh tf_model_file'
}
if [ $# != 1 ];then
Usage
exit -1
fi
TF_MODEL_FILE_PATH=$1
MODEL_DIR=$(dirname ${TF_MODEL_FILE_PATH})
MACE_MODEL_NAME='mace_model.pb'
INPUT_FILE_NAME='model_input'
OUTPUT_FILE_NAME='gcn.out'
OUTPUT_LIST_FILE='gcn.list'
PHONE_DATA_DIR="/data/local/tmp/${MACE_MODEL_NAME}"
KERNEL_DIR="${PHONE_DATA_DIR}/cl/"
# Step 1: Generate input data
echo "Step 1: Generate input data"
python tools/validate.py --generate_data true --random_seed 1 \
--input_file=${MODEL_DIR}/${INPUT_FILE_NAME} \
--input_shape=512,512,3
# Step 2: convert tf model to mace model
echo "Step 2: convert tf model to mace model"
bazel build //mace/python/tools:tf_converter
bazel-bin/mace/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \
--output=${MODEL_DIR}/${MACE_MODEL_NAME} \
--input_node=input \
--output_node=GCN/br_result_2/fcn_br \
--data_type=DT_FLOAT \
--runtime=gpu
# Step 3: Run model on the phone
echo "Step 3: Run model on the phone"
bazel build -c opt --strip always mace/examples:mace_run \
--crosstool_top=//external:android/crosstool \
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--cpu=arm64-v8a
adb shell "mkdir -p ${PHONE_DATA_DIR}"
adb shell "mkdir -p ${KERNEL_DIR}"
adb push mace/kernels/opencl/cl/* ${KERNEL_DIR}
adb push ${MODEL_DIR}/${MACE_MODEL_NAME} ${PHONE_DATA_DIR}
adb push ${MODEL_DIR}/${INPUT_FILE_NAME} ${PHONE_DATA_DIR}
adb push bazel-bin/mace/examples/mace_run ${PHONE_DATA_DIR}
num_threads=${1:-1}
adb </dev/null shell MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
MACE_KERNEL_PATH=$KERNEL_DIR \
OMP_NUM_THREADS=$num_threads \
${PHONE_DATA_DIR}/mace_run \
--model=${PHONE_DATA_DIR}/${MACE_MODEL_NAME} \
--input=mace_input_node \
--output=mace_output_node \
--input_shape=1,512,512,3\
--input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME} \
--output_file=${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} \
--device=OPENCL
# Step 4: pull the mace run result.
echo "Step 4: pull the mace run result."
rm -rf ${MODEL_DIR}/${OUTPUT_FILE_NAME}
adb </dev/null pull ${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} ${MODEL_DIR}
# Step 5: validate the result
echo "Step 5: validate the result"
python tools/validate.py --model_file ${TF_MODEL_FILE_PATH} \
--input_file ${MODEL_DIR}/${INPUT_FILE_NAME} \
--mace_out_file ${MODEL_DIR}/${OUTPUT_FILE_NAME} \
--input_node input \
--output_node GCN/br_result_2/fcn_br\
--output_shape 1,512,512,2
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册