From a022085eb47b19d2f82b11372ca0cda5c7e09847 Mon Sep 17 00:00:00 2001 From: yejianwu Date: Thu, 18 Jan 2018 15:51:46 +0800 Subject: [PATCH] move tools to libmace --- mace/codegen/BUILD | 12 - mace/examples/BUILD | 11 - mace/examples/mace_run.cc | 222 -------- mace/proto/mace.proto | 138 ----- mace/python/tools/BUILD | 46 -- mace/python/tools/binary_codegen.py | 85 --- mace/python/tools/convert_util.py | 29 - mace/python/tools/dsp_ops.py | 63 --- mace/python/tools/graph_util.py | 50 -- mace/python/tools/memory_optimizer.py | 90 --- mace/python/tools/model.template | 268 --------- mace/python/tools/opencl_codegen.py | 67 --- mace/python/tools/source_converter_lib.py | 145 ----- mace/python/tools/str2vec_maps.cc.tmpl | 27 - mace/python/tools/tf_converter.py | 116 ---- mace/python/tools/tf_converter_lib.py | 656 ---------------------- mace/python/tools/tf_dsp_converter_lib.py | 407 -------------- tools/create_mace_lib.sh | 27 - tools/export_lib.sh | 135 +++++ tools/gcn.config | 3 - tools/side_gcn.config | 2 - tools/validate_gcn.sh | 158 ------ 22 files changed, 135 insertions(+), 2622 deletions(-) delete mode 100644 mace/examples/mace_run.cc delete mode 100644 mace/proto/mace.proto delete mode 100644 mace/python/tools/binary_codegen.py delete mode 100644 mace/python/tools/convert_util.py delete mode 100644 mace/python/tools/dsp_ops.py delete mode 100644 mace/python/tools/graph_util.py delete mode 100644 mace/python/tools/memory_optimizer.py delete mode 100644 mace/python/tools/model.template delete mode 100644 mace/python/tools/opencl_codegen.py delete mode 100644 mace/python/tools/source_converter_lib.py delete mode 100644 mace/python/tools/str2vec_maps.cc.tmpl delete mode 100644 mace/python/tools/tf_converter.py delete mode 100644 mace/python/tools/tf_converter_lib.py delete mode 100644 mace/python/tools/tf_dsp_converter_lib.py delete mode 100755 tools/create_mace_lib.sh create mode 100755 tools/export_lib.sh delete mode 100644 tools/gcn.config delete mode 100644 tools/side_gcn.config delete mode 100755 tools/validate_gcn.sh diff --git a/mace/codegen/BUILD b/mace/codegen/BUILD index fcb0b509..c6d0f9ff 100644 --- a/mace/codegen/BUILD +++ b/mace/codegen/BUILD @@ -21,18 +21,6 @@ cc_library( linkstatic = 1, ) -cc_library( - name = "generated_opencl_prod", - srcs = ["opencl/opencl_compiled_program.cc"], - linkstatic = 1, -) - -cc_library( - name = "generated_tuning_params", - srcs = ["tuning/tuning_params.cc"], - linkstatic = 1, -) - cc_library( name = "generated_version", srcs = ["version/version.cc"], diff --git a/mace/examples/BUILD b/mace/examples/BUILD index 09667cf4..233b59f1 100644 --- a/mace/examples/BUILD +++ b/mace/examples/BUILD @@ -24,14 +24,3 @@ cc_test( "//mace/core:test_benchmark_main", ], ) - -cc_binary( - name = "mace_run", - srcs = ["mace_run.cc"], - linkopts = if_neon_enabled(["-fopenmp"]), - linkstatic = 1, - deps = [ - "//mace/codegen:generated_models", - "//mace/utils:command_line_flags", - ], -) diff --git a/mace/examples/mace_run.cc b/mace/examples/mace_run.cc deleted file mode 100644 index 2d57d7f7..00000000 --- a/mace/examples/mace_run.cc +++ /dev/null @@ -1,222 +0,0 @@ -// -// Copyright (c) 2017 XiaoMi All rights reserved. -// - -/** - * Usage: - * mace_run --model=mobi_mace.pb \ - * --input=input_node \ - * --output=MobilenetV1/Logits/conv2d/convolution \ - * --input_shape=1,224,224,3 \ - * --output_shape=1,224,224,2 \ - * --input_file=input_data \ - * --output_file=mace.out \ - * --device=NEON - */ -#include -#include -#include -#include -#include -#include "mace/utils/command_line_flags.h" -#include "mace/utils/env_time.h" -#include "mace/utils/logging.h" - -#include "mace/core/public/mace.h" - -using namespace std; -using namespace mace; - -namespace mace { -namespace MACE_MODEL_TAG { - -extern NetDef CreateNet(); - -} -} - -void ParseShape(const string &str, vector *shape) { - string tmp = str; - while (!tmp.empty()) { - int dim = atoi(tmp.data()); - shape->push_back(dim); - size_t next_offset = tmp.find(","); - if (next_offset == string::npos) { - break; - } else { - tmp = tmp.substr(next_offset + 1); - } - } -} - -DeviceType ParseDeviceType(const string &device_str) { - if (device_str.compare("CPU") == 0) { - return DeviceType::CPU; - } else if (device_str.compare("NEON") == 0) { - return DeviceType::NEON; - } else if (device_str.compare("OPENCL") == 0) { - return DeviceType::OPENCL; - } else if (device_str.compare("HEXAGON") == 0) { - return DeviceType::HEXAGON; - } else { - return DeviceType::CPU; - } -} - -struct mallinfo LogMallinfoChange(struct mallinfo prev) { - struct mallinfo curr = mallinfo(); - if (prev.arena != curr.arena) { - LOG(INFO) << "Non-mmapped space allocated (bytes): " << curr.arena - << ", diff: " << ((int64_t)curr.arena - (int64_t)prev.arena); - } - if (prev.ordblks != curr.ordblks) { - LOG(INFO) << "Number of free chunks: " << curr.ordblks - << ", diff: " << ((int64_t)curr.ordblks - (int64_t)prev.ordblks); - } - if (prev.smblks != curr.smblks) { - LOG(INFO) << "Number of free fastbin blocks: " << curr.smblks - << ", diff: " << ((int64_t)curr.smblks - (int64_t)prev.smblks); - } - if (prev.hblks != curr.hblks) { - LOG(INFO) << "Number of mmapped regions: " << curr.hblks - << ", diff: " << ((int64_t)curr.hblks - (int64_t)prev.hblks); - } - if (prev.hblkhd != curr.hblkhd) { - LOG(INFO) << "Space allocated in mmapped regions (bytes): " << curr.hblkhd - << ", diff: " << ((int64_t)curr.hblkhd - (int64_t)prev.hblkhd); - } - if (prev.usmblks != curr.usmblks) { - LOG(INFO) << "Maximum total allocated space (bytes): " << curr.usmblks - << ", diff: " << ((int64_t)curr.usmblks - (int64_t)prev.usmblks); - } - if (prev.fsmblks != curr.fsmblks) { - LOG(INFO) << "Space in freed fastbin blocks (bytes): " << curr.fsmblks - << ", diff: " << ((int64_t)curr.fsmblks - (int64_t)prev.fsmblks); - } - if (prev.uordblks != curr.uordblks) { - LOG(INFO) << "Total allocated space (bytes): " << curr.uordblks - << ", diff: " - << ((int64_t)curr.uordblks - (int64_t)prev.uordblks); - } - if (prev.fordblks != curr.fordblks) { - LOG(INFO) << "Total free space (bytes): " << curr.fordblks << ", diff: " - << ((int64_t)curr.fordblks - (int64_t)prev.fordblks); - } - if (prev.keepcost != curr.keepcost) { - LOG(INFO) << "Top-most, releasable space (bytes): " << curr.keepcost - << ", diff: " - << ((int64_t)curr.keepcost - (int64_t)prev.keepcost); - } - return curr; -} - -int main(int argc, char **argv) { - string input_shape; - string output_shape; - string input_file; - string output_file; - string device; - int round = 1; - int malloc_check_cycle = -1; - - std::vector flag_list = { - Flag("input_shape", &input_shape, "input shape, separated by comma"), - Flag("output_shape", &output_shape, "output shape, separated by comma"), - Flag("input_file", &input_file, "input file name"), - Flag("output_file", &output_file, "output file name"), - Flag("device", &device, "CPU/NEON/OPENCL/HEXAGON"), - Flag("round", &round, "round"), - Flag("malloc_check_cycle", &malloc_check_cycle, - "malloc debug check cycle, -1 to disable"), - }; - - string usage = Flags::Usage(argv[0], flag_list); - const bool parse_result = Flags::Parse(&argc, argv, flag_list); - - if (!parse_result) { - LOG(ERROR) << usage; - return -1; - } - - VLOG(0) << "mace version: " << MaceVersion() << std::endl - << "mace git version: " << MaceGitVersion() << std::endl - << "input_shape: " << input_shape << std::endl - << "output_shape: " << output_shape << std::endl - << "input_file: " << input_file << std::endl - << "output_file: " << output_file << std::endl - << "device: " << device << std::endl - << "round: " << round << std::endl; - - vector input_shape_vec; - vector output_shape_vec; - ParseShape(input_shape, &input_shape_vec); - ParseShape(output_shape, &output_shape_vec); - - // load model - int64_t t0 = utils::NowMicros(); - NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(); - int64_t t1 = utils::NowMicros(); - LOG(INFO) << "CreateNetDef duration: " << t1 - t0 << " us"; - int64_t init_micros = t1 - t0; - - DeviceType device_type = ParseDeviceType(device); - VLOG(1) << "Device Type" << device_type; - int64_t input_size = std::accumulate(input_shape_vec.begin(), - input_shape_vec.end(), 1, std::multiplies()); - int64_t output_size = std::accumulate(output_shape_vec.begin(), - output_shape_vec.end(), 1, std::multiplies()); - std::unique_ptr input_data(new float[input_size]); - std::unique_ptr output_data(new float[output_size]); - - // load input - ifstream in_file(input_file, ios::in | ios::binary); - if (in_file.is_open()) { - in_file.read(reinterpret_cast(input_data.get()), - input_size * sizeof(float)); - in_file.close(); - } else { - LOG(FATAL) << "Open input file failed"; - } - - // Init model - VLOG(0) << "Run init"; - t0 = utils::NowMicros(); - mace::MaceEngine engine(&net_def, device_type); - t1 = utils::NowMicros(); - init_micros += t1 - t0; - LOG(INFO) << "Net init duration: " << t1 - t0 << " us"; - - LOG(INFO) << "Total init duration: " << init_micros << " us"; - - VLOG(0) << "Warm up"; - t0 = utils::NowMicros(); - engine.Run(input_data.get(), input_shape_vec, output_data.get()); - t1 = utils::NowMicros(); - LOG(INFO) << "1st warm up run duration: " << t1 - t0 << " us"; - - if (round > 0) { - VLOG(0) << "Run model"; - t0 = utils::NowMicros(); - struct mallinfo prev = mallinfo(); - for (int i = 0; i < round; ++i) { - engine.Run(input_data.get(), input_shape_vec, output_data.get()); - if (malloc_check_cycle >= 1 && i % malloc_check_cycle == 0) { - LOG(INFO) << "=== check malloc info change #" << i << " ==="; - prev = LogMallinfoChange(prev); - } - } - t1 = utils::NowMicros(); - LOG(INFO) << "Avg duration: " << (t1 - t0) / round << " us"; - } - - if (output_data != nullptr) { - ofstream out_file(output_file, ios::binary); - out_file.write((const char *) (output_data.get()), - output_size * sizeof(float)); - out_file.flush(); - out_file.close(); - LOG(INFO) << "Write output file done."; - } else { - LOG(ERROR) << "output data is null"; - } -} diff --git a/mace/proto/mace.proto b/mace/proto/mace.proto deleted file mode 100644 index d3deb3e4..00000000 --- a/mace/proto/mace.proto +++ /dev/null @@ -1,138 +0,0 @@ -syntax = "proto2"; - -package mace; - -enum NetMode { - INIT = 0; - NORMAL = 1; -} - -enum DeviceType { - CPU = 0; // In default, we will use CPU. - NEON = 1; - OPENCL = 2; -} - -enum DataType { - DT_INVALID = 0; - - // Data types that all computation devices are expected to be - // capable to support. - DT_FLOAT = 1; - DT_DOUBLE = 2; - DT_INT32 = 3; - DT_UINT8 = 4; - DT_INT16 = 5; - DT_INT8 = 6; - DT_STRING = 7; - DT_INT64 = 8; - DT_UINT16 = 9; - DT_BOOL = 10; - DT_HALF = 19; - DT_UINT32 = 22; -} - -message TensorProto { - // The dimensions in the tensor. - repeated int64 dims = 1; - optional DataType data_type = 2 [default = DT_FLOAT]; - // For float - repeated float float_data = 3 [packed = true]; - // For int32, uint8, int8, uint16, int16, bool, and float16 - // Note about float16: in storage we will basically convert float16 byte-wise - // to unsigned short and then store them in the int32_data field. - repeated int32 int32_data = 4 [packed = true]; - // For bytes - optional bytes byte_data = 5; - // For strings - repeated bytes string_data = 6; - // For double - repeated double double_data = 9 [packed = true]; - // For int64 - repeated int64 int64_data = 10 [packed = true]; - // Optionally, a name for the tensor. - optional string name = 7; - - optional uint32 node_id = 100; -} - -message Argument { - optional string name = 1; - optional float f = 2; - optional int64 i = 3; - optional bytes s = 4; - repeated float floats = 5; - repeated int64 ints = 6; - repeated bytes strings = 7; -} - -// for hexagon mace-nnlib -message NodeInput { - optional int32 node_id = 1; - optional int32 output_port = 2; -} - -message OutputShape { - repeated int64 dims = 1; -} - -message OperatorDef { - repeated string input = 1; - repeated string output = 2; - optional string name = 3; - optional string type = 4; - repeated Argument arg = 5; - repeated OutputShape output_shape = 6; - repeated DataType output_type = 7; - - // Memory optimization: only support one single output op - optional int32 mem_id = 10 [default = -1]; - - // for hexagon mace-nnlib - optional uint32 node_id = 100; - optional uint32 op_id = 101; - optional uint32 padding = 102; - repeated NodeInput node_input = 103; - repeated int32 out_max_byte_size = 104; // only support 32-bit len -} - -// for memory optimization -message MemoryBlock { - optional int32 mem_id = 1; - optional uint32 x = 2; - optional uint32 y = 3; -} -message MemoryArena { - repeated MemoryBlock mem_block = 1; -} - -// for hexagon mace-nnlib -message InputInfo { - optional string name = 1; - optional int32 node_id = 2; - repeated int32 dims = 3; - optional int32 max_byte_size = 4; // only support 32-bit len - optional DataType data_type = 5 [default = DT_FLOAT]; -} -message OutputInfo { - optional string name = 1; - optional int32 node_id = 2; - repeated int32 dims = 3; - optional int32 max_byte_size = 4; // only support 32-bit len - optional DataType data_type = 5 [default = DT_FLOAT]; -} - -message NetDef { - optional string name = 1; - repeated OperatorDef op = 2; - optional string version = 3; - repeated Argument arg = 4; - repeated TensorProto tensors = 5; - - // for mem optimization - optional MemoryArena mem_arena = 10; - - // for hexagon mace-nnlib - repeated InputInfo input_info = 100; - repeated OutputInfo output_info = 101; -} diff --git a/mace/python/tools/BUILD b/mace/python/tools/BUILD index ad3944b9..4b898b54 100644 --- a/mace/python/tools/BUILD +++ b/mace/python/tools/BUILD @@ -1,49 +1,3 @@ -py_library( - name = "tf_converter_lib", - srcs = [ - "convert_util.py", - "graph_util.py", - "tf_converter_lib.py", - "tf_dsp_converter_lib.py", - ], - srcs_version = "PY2AND3", - deps = [ - ":memory_optimizer", - "//mace/proto:mace_py", - ], -) - -py_library( - name = "source_converter_lib", - srcs = [ - "source_converter_lib.py", - ], - srcs_version = "PY2AND3", - deps = [ - "//mace/proto:mace_py", - ], -) - -py_binary( - name = "tf_converter", - srcs = ["tf_converter.py"], - srcs_version = "PY2AND3", - deps = [ - ":tf_converter_lib", - ":source_converter_lib", - "@six_archive//:six", - ], -) - -py_binary( - name = "memory_optimizer", - srcs = ["memory_optimizer.py"], - srcs_version = "PY2AND3", - deps = [ - "//mace/proto:mace_py", - ], -) - py_binary( name = "caffe_ops_stats", srcs = ["caffe_ops_stats.py"], diff --git a/mace/python/tools/binary_codegen.py b/mace/python/tools/binary_codegen.py deleted file mode 100644 index a7cd756b..00000000 --- a/mace/python/tools/binary_codegen.py +++ /dev/null @@ -1,85 +0,0 @@ -import argparse -import os -import sys -import struct - -import jinja2 - -import numpy as np - -# python mace/python/tools/binary_codegen.py \ -# --binary_file=${BIN_FILE} --output_path=${CODE_GEN_PATH} --variable_name=kTuningParamsData - -FLAGS = None - - -def generate_cpp_source(): - data_map = {} - if not os.path.exists(FLAGS.binary_file): - env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0])) - return env.get_template('str2vec_maps.cc.tmpl').render( - maps=data_map, - data_type='unsigned int', - variable_name=FLAGS.variable_name - ) - - with open(FLAGS.binary_file, "rb") as binary_file: - binary_array = np.fromfile(binary_file, dtype=np.uint8) - - idx = 0 - size, = struct.unpack("Q", binary_array[idx:idx+8]) - print size - idx += 8 - for _ in xrange(size): - key_size, = struct.unpack("i", binary_array[idx:idx+4]) - idx += 4 - key, = struct.unpack(str(key_size) + "s", binary_array[idx:idx+key_size]) - idx += key_size - params_size, = struct.unpack("i", binary_array[idx:idx+4]) - idx += 4 - data_map[key] = [] - count = params_size / 4 - params = struct.unpack(str(count) + "i", binary_array[idx:idx+params_size]) - for i in params: - data_map[key].append(i) - idx += params_size - - env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0])) - return env.get_template('str2vec_maps.cc.tmpl').render( - maps = data_map, - data_type = 'unsigned int', - variable_name = FLAGS.variable_name - ) - -def main(unused_args): - cpp_binary_source = generate_cpp_source() - if os.path.isfile(FLAGS.output_path): - os.remove(FLAGS.output_path) - w_file = open(FLAGS.output_path, "w") - w_file.write(cpp_binary_source) - w_file.close() - -def parse_args(): - """Parses command line arguments.""" - parser = argparse.ArgumentParser() - parser.add_argument( - "--binary_file", - type=str, - default="", - help="The binaries file path.") - parser.add_argument( - "--output_path", - type=str, - default="", - help="The path of generated C++ source file which contains the binary.") - parser.add_argument( - "--variable_name", - type=str, - default="kTuningParamsData", - help="global variable name.") - return parser.parse_known_args() - - -if __name__ == '__main__': - FLAGS, unparsed = parse_args() - main(unused_args=[sys.argv[0]] + unparsed) diff --git a/mace/python/tools/convert_util.py b/mace/python/tools/convert_util.py deleted file mode 100644 index 53b31969..00000000 --- a/mace/python/tools/convert_util.py +++ /dev/null @@ -1,29 +0,0 @@ -import tensorflow as tf -from mace.proto import mace_pb2 - -TF_DTYPE_2_MACE_DTYPE_MAP = { - tf.float32: mace_pb2.DT_FLOAT, - tf.double: mace_pb2.DT_DOUBLE, - tf.half: mace_pb2.DT_HALF, - tf.int64: mace_pb2.DT_INT64, - tf.int32: mace_pb2.DT_INT32, - tf.qint32: mace_pb2.DT_INT32, - tf.int16: mace_pb2.DT_INT16, - tf.qint16: mace_pb2.DT_INT16, - tf.int8: mace_pb2.DT_INT8, - tf.qint8: mace_pb2.DT_INT8, - tf.quint16: mace_pb2.DT_UINT16, - tf.uint16: mace_pb2.DT_UINT16, - tf.quint8: mace_pb2.DT_UINT8, - tf.uint8: mace_pb2.DT_UINT8, - tf.string: mace_pb2.DT_STRING, - tf.bool: mace_pb2.DT_BOOL, -} - - -def tf_dtype_2_mace_dtype(tf_dtype): - mace_dtype = TF_DTYPE_2_MACE_DTYPE_MAP.get(tf_dtype, None) - if not mace_dtype: - raise Exception("Not supported tensorflow dtype: " + tf_dtype) - return mace_dtype - diff --git a/mace/python/tools/dsp_ops.py b/mace/python/tools/dsp_ops.py deleted file mode 100644 index bd79b53a..00000000 --- a/mace/python/tools/dsp_ops.py +++ /dev/null @@ -1,63 +0,0 @@ - -class DspOps(object): - def __init__(self): - self.dsp_ops = { - 'INPUT': 'INPUT"', - 'OUTPUT': 'OUTPUT', - 'NoOp': 'Nop', - 'FLATTEN': 'Flatten', - 'Identity': 'Nop', - 'Placeholder': 'INPUT', - 'Const': 'Const', - 'QuantizedConv2D': 'QuantizedConv2d_8x8to32', - 'QuantizedMatMul': 'QuantizedMatMul_8x8to32', - 'QuantizeDownAndShrinkRange': 'QuantizeDownAndShrinkRange_32to8', - 'QuantizedRelu': 'QuantizedRelu_8', - 'QuantizedReluX': 'QuantizedReluX_8', - 'QuantizedMaxPool': 'QuantizedMaxPool_8', - 'QuantizedAvgPool': 'QuantizedAvgPool_8', - 'QuantizedConcat': 'QuantizedConcat_8', - 'QuantizedBiasAdd': 'QuantizedBiasAdd_8p8to32', - 'QuantizedResizeBilinear' : 'QuantizedResizeBilinear_8', - 'QuantizedSpaceToBatchND': 'QuantizedSpaceToBatchND_8', - 'QuantizedBatchToSpaceND': 'QuantizedBatchToSpaceND_8', - 'Min': 'Min_f', - 'Max': 'Max_f', - 'QuantizeV2': 'Quantize', - 'Dequantize': 'Dequantize', - 'Softmax': 'Softmax_f', - 'Reshape': 'Reshape', - 'QuantizedReshape': 'QuantizedReshape', - 'Sigmoid': 'Sigmoid_f', - 'Slice': 'Slice_f', - 'Add': 'Add_f', - 'Mul': 'Mul_f', - 'Requantize': 'Requantize_32to8', - 'RequantizationRange': 'RequantizationRange_32', - 'Sub': 'Sub_f', - 'Pack': 'Pack_int32', - 'StridedSlice': 'StridedSlice_f', - 'ExpandDims': 'ExpandDims_f', - 'QuantizedMul': 'QuantizedMul_8x8to32', - 'QuantizedAdd': 'QuantizedAdd_8p8to32', - 'Pad': 'Pad_f', - 'SpaceToBatchND': 'SpaceToBatchND_f', - 'BatchToSpaceND': 'BatchToSpaceND_f', - 'ResizeBilinear': 'ResizeBilinear_f', - 'ConcatV2': 'ConcatV2_f', - 'Conv2DBackpropInput': 'Deconv_f', - 'Tanh': 'Tanh_f', - 'Split': 'Split_f', - 'Transpose': 'Transpose_f', - 'Concat': 'Concat_f', - 'AddN': 'AddN_f', - } - def has_op(self, tf_op): - return tf_op in self.dsp_ops - - def map_nn_op(self, tf_op): - if tf_op not in self.dsp_ops: - raise Exception('Could not map nn op for: ', tf_op) - return self.dsp_ops[tf_op] - - diff --git a/mace/python/tools/graph_util.py b/mace/python/tools/graph_util.py deleted file mode 100644 index 0474256d..00000000 --- a/mace/python/tools/graph_util.py +++ /dev/null @@ -1,50 +0,0 @@ -import tensorflow as tf -from mace.proto import mace_pb2 -from collections import OrderedDict - -def sort_tf_node(node, nodes_map, ordered_nodes_map): - if node.name not in ordered_nodes_map: - for input_tensor_name in node.input: - input_node_name = input_tensor_name.split(':')[ - 0] if ':' in input_tensor_name else input_tensor_name - if input_node_name not in nodes_map or input_node_name in ordered_nodes_map: - continue - - input_node = nodes_map[input_node_name] - sort_tf_node(input_node, nodes_map, ordered_nodes_map) - ordered_nodes_map[node.name] = node - -def sort_tf_graph(graph_def): - nodes_map = {} - ordered_nodes_map = OrderedDict() - for node in graph_def.node: - nodes_map[node.name] = node - for node in graph_def.node: - sort_tf_node(node, nodes_map, ordered_nodes_map) - sorted_graph = tf.GraphDef() - sorted_graph.node.extend([node for node in ordered_nodes_map.values()]) - return sorted_graph - - -def sort_mace_node(node, nodes_map, ordered_nodes_map): - if node.name not in ordered_nodes_map: - for input_tensor_name in node.input: - input_node_name = input_tensor_name.split(':')[ - 0] if ':' in input_tensor_name else input_tensor_name - if input_node_name not in nodes_map or input_node_name in ordered_nodes_map: - continue - - input_node = nodes_map[input_node_name] - sort_mace_node(input_node, nodes_map, ordered_nodes_map) - ordered_nodes_map[node.name] = node - -def sort_mace_graph(graph_def, output_name): - nodes_map = {} - ordered_nodes_map = OrderedDict() - for node in graph_def.op: - nodes_map[node.name] = node - sort_mace_node(nodes_map[output_name], nodes_map, ordered_nodes_map) - sorted_graph = mace_pb2.NetDef() - sorted_graph.tensors.extend(graph_def.tensors) - sorted_graph.op.extend([node for node in ordered_nodes_map.values()]) - return sorted_graph \ No newline at end of file diff --git a/mace/python/tools/memory_optimizer.py b/mace/python/tools/memory_optimizer.py deleted file mode 100644 index 55648b29..00000000 --- a/mace/python/tools/memory_optimizer.py +++ /dev/null @@ -1,90 +0,0 @@ -import sys -import operator -from mace.proto import mace_pb2 - -class MemoryOptimizer(object): - def __init__(self, net_def): - self.net_def = net_def - self.idle_mem = set() - self.op_mem = {} # op_name->mem_id - self.mem_block = {} # mem_id->[x, y] - self.total_mem_count = 0 - self.ref_counter = {} - - consumers = {} - for op in net_def.op: - if self.is_buffer_image_op(op): - continue - for ipt in op.input: - if ipt not in consumers: - consumers[ipt] = [] - consumers[ipt].append(op) - # only ref op's output tensor - for op in net_def.op: - if self.is_buffer_image_op(op): - continue - tensor_name = op.output[0] - if tensor_name in consumers: - self.ref_counter[tensor_name] = len(consumers[tensor_name]) - else: - self.ref_counter[tensor_name] = 0 - - def is_buffer_image_op(self, op): - return op.type == 'BufferToImage' or op.type == 'ImageToBuffer' - - def optimize(self): - for op in self.net_def.op: - if self.is_buffer_image_op(op): - continue - if len(self.idle_mem) == 0: - # allocate new mem - mem_id = self.total_mem_count - self.total_mem_count += 1 - else: - # reuse mem - mem_id = self.idle_mem.pop() - - if not op.output_shape: - print('WARNING: There is no output shape information to do memory optimization.') - return - op.mem_id = mem_id - self.op_mem[op.output[0]] = mem_id - if mem_id not in self.mem_block: - self.mem_block[mem_id] = [0, 0] - mem_size = self.mem_block[mem_id] - mem_size[1] = max(mem_size[1], op.output_shape[0].dims[0] * op.output_shape[0].dims[1]) - mem_size[0] = max(mem_size[0], op.output_shape[0].dims[2] * int((op.output_shape[0].dims[3]+3)/4)) - - # de-ref input tensor mem - for ipt in op.input: - if ipt in self.ref_counter: - self.ref_counter[ipt] -= 1 - if self.ref_counter[ipt] == 0: - self.idle_mem.add(self.op_mem[ipt]) - elif self.ref_counter[ipt] < 0: - raise Exception('ref count is less than 0') - - for mem in self.mem_block: - arena = self.net_def.mem_arena - block = arena.mem_block.add() - block.mem_id = mem - block.x = self.mem_block[mem][0] - block.y = self.mem_block[mem][1] - - print('total op: %d', len(self.net_def.op)) - origin_mem_size = 0 - optimized_mem_size = 0 - for op in self.net_def.op: - if self.is_buffer_image_op(op): - continue - origin_mem_size += reduce(operator.mul, op.output_shape[0].dims, 1) - for mem in self.mem_block: - print mem, self.mem_block[mem] - optimized_mem_size += reduce(operator.mul, self.mem_block[mem], 4) - - print('origin mem: %d, optimized mem: %d', origin_mem_size, optimized_mem_size) - - -def optimize_memory(net_def): - mem_optimizer = MemoryOptimizer(net_def) - mem_optimizer.optimize() \ No newline at end of file diff --git a/mace/python/tools/model.template b/mace/python/tools/model.template deleted file mode 100644 index 06832877..00000000 --- a/mace/python/tools/model.template +++ /dev/null @@ -1,268 +0,0 @@ -// -// Copyright (c) 2017 XiaoMi All rights reserved. -// Generated by the mace converter. DO NOT EDIT! -// - -{% if mode == 0 %} -#include -#include "mace/core/public/mace.h" - -namespace mace { -namespace {{tag}} { - -{% if tensor_info.data_type != 'DT_UINT8' %} alignas(4) {% endif %} unsigned char {{ tensor_info.name }}[] = { -{% for d in tensor_info.data %}{{"0x%02X, " % d }}{%endfor%} -}; - -void Create{{tensor.name}}(std::vector &tensors) { - tensors.emplace_back(mace::ConstTensor( - {{ tensor.name|tojson }}, {{ tensor.name }}, - { {{ tensor.dims|join(', ') }} }, {{ tensor.data_type }}, {{ tensor.node_id }})); -} - -} // namespace {{tag}} -} // namespace mace - -{% elif mode == 1 %} -#include -#include -#include "mace/core/public/mace.h" - -namespace { -void UpdateOp(mace::OperatorDef &op, - const std::string &name, - const std::string &type, - const std::vector &inputs, - const std::vector &outputs, - const std::vector &output_types, - uint32_t node_id) { - op.set_name(name); - op.set_type(type); - op.set_input(inputs); - op.set_output(outputs); - op.set_output_type(output_types); - op.set_node_id(node_id); -} -} - -namespace mace { -namespace {{tag}} { - -{% for i in range(start, end) %} - -void CreateOperator{{i}}(mace::OperatorDef &op) { - mace::Argument *arg = nullptr; - {% for arg in net.op[i].arg %} - - arg = op.add_arg(); - arg->set_name({{ arg.name|tojson }}); - - {%- if arg.HasField('f') %} - arg->set_f({{ arg.f }}); - {%- endif %} - {%- if arg.HasField('i') %} - arg->set_i({{ arg.i }}); - {%- endif %} - {%- if arg.HasField('s') %} - arg->set_s({{ arg.s|tojson }}); - {%- endif %} - - {% if arg.floats|length != 0 %} - arg->set_floats({ {{ arg.floats|join(', ') }} }); - {% endif %} - {% if arg.ints|length != 0 %} - arg->set_ints({ {{ arg.ints|join(', ') }} }); - {% endif %} - {% if arg.strings|length != 0 %} - arg->set_strings({ {{ arg.strings|stringfy() }} }); - {% endif %} - {% endfor %} - - {% if net.op[i].HasField('mem_id') %} - op.set_mem_id({{net.op[i].mem_id}}); - {% endif %} - - {% for shape in net.op[i].output_shape %} - {% if shape.dims | length > 0 %} - op.add_output_shape(mace::OutputShape({ {{ shape.dims|join(', ') }} })); - {% endif %} - {% endfor %} - - std::vector output_types_int({ {{ net.op[i].output_type | join(', ') }} }); - std::vector output_types({{ net.op[i].output_type | length }}); - for (int k = 0; k < {{ net.op[i].output_type | length }}; ++k) { - output_types[k] = static_cast(output_types_int[k]); - } - UpdateOp(op, {{ net.op[i].name|tojson }}, {{ net.op[i].type|tojson}}, - { {{ net.op[i].input|stringfy }} }, - { {{ net.op[i].output|stringfy }} }, - output_types, - {{ net.op[i].node_id }}); - - {% if runtime == 'dsp' %} - op.set_padding({{ net.op[i].padding }}); - {% if net.op[i].node_input | length > 0 %} - std::vector input_node_ids({ {{ net.op[i].node_input | map(attribute='node_id') | join(', ') }} }); - std::vector input_output_ports({ {{ net.op[i].node_input | map(attribute='output_port') | join(', ')}} }); - - for (size_t i = 0; i < {{ net.op[i].node_input | length }}; ++i) { - mace::NodeInput input(input_node_ids[i], input_output_ports[i]); - op.add_node_input(input); - } - {% endif %} - {% if net.op[i].out_max_byte_size | length > 0 %} - std::vector out_max_byte_sizes {{ net.op[i].out_max_byte_size | replace('[', '{') | replace(']', '}') }}; - for (size_t i = 0; i < {{ net.op[i].out_max_byte_size | length }}; ++i) { - op.add_out_max_byte_size(out_max_byte_sizes[i]); - } - {% endif %} - {% endif %} - -} - -{% endfor %} - -} // namespace {{tag}} -} // namespace mace - -{% else %} -#include -#include -#include "mace/core/public/mace.h" - -namespace mace { -namespace {{tag}} { - -{% for tensor in tensors %} -extern void Create{{ tensor.name }}(std::vector &tensors); -{% endfor %} - - -{% for i in range(net.op|length) %} -extern void CreateOperator{{i}}(mace::OperatorDef &op); -{% endfor %} - -} // namespace {{ tag }} -} // namespace mace - - -namespace { - -{% if net.arg|length != 0 %} -void CreateNetArg(mace::NetDef &net_def) { - net_def.mutable_arg().reserve({{ net.arg|length }}); - mace::Argument *arg = nullptr; - {% for arg in net.arg %} - - arg = net_def.add_arg(); - arg->set_name({{ arg.name|tojson }}); - - {%- if arg.HasField('f') %} - arg->set_f({{ arg.f }}); - {% endif %} - - {%- if arg.HasField('i') %} - arg->set_i({{ arg.i }}); - {% endif %} - - {%- if arg.HasField('s') %} - arg->set_s({{ arg.s|tojson }}); - {% endif %} - - {% if arg.floats|length != 0 %} - arg->set_floats({ {{ arg.floats|join(', ') }} }); - {% endif %} - {% if arg.ints|length != 0 %} - arg->set_ints({ {{ arg.ints|join(', ') }} }); - {% endif %} - {% if arg.strings|length != 0 %} - arg->set_strings({ {{ arg.strings|stringfy() }} }); - {% endif %} - - {% endfor %} -} -{% endif %} - -{% if net.output_info | length > 0 %} -void CreateOutputInfo(mace::NetDef &net_def) { - std::vector> dims { {{net.output_info | map(attribute='dims') | join(', ') | replace('[', '{') | replace(']', '}') }} }; - - std::vector data_types_int { {{ net.output_info | map(attribute='data_type') | join(', ') }} }; - std::vector data_types({{ net.output_info | length }}); - for (int k = 0; k < {{ net.output_info | length }}; ++k) { - data_types[k] = static_cast(data_types_int[k]); - } - net_def.mutable_output_info().resize({{ net.output_info | length }}); - for (int i = 0; i < {{ net.output_info | length }}; ++i) { - net_def.mutable_output_info()[i].set_data_type(data_types[i]); - net_def.mutable_output_info()[i].set_dims(dims[i]); - } -} -{% endif %} - -void CreateOperators(std::vector &ops) { - ops.resize({{ net.op|length }}); - {% for i in range(net.op|length) %} - - mace::{{tag}}::CreateOperator{{i}}(ops[{{i}}]); - {% endfor %} - -} - -void CreateTensors(std::vector &tensors) { - tensors.reserve({{ net.tensors|length }}); - - {% for tensor in net.tensors %} - - mace::{{tag}}::Create{{tensor.name}}(tensors); - {% endfor %} - -} - - -{% if net.mem_arena.mem_block|length != 0 %} -void CreateMemoryArena(mace::MemoryArena &mem_arena) { - std::vector &mem_block = mem_arena.mutable_mem_block(); - mem_block.reserve({{ net.mem_arena.mem_block|length }}); - - {% for mem_blk in net.mem_arena.mem_block %} - mem_block.emplace_back(mace::MemoryBlock({{ mem_blk.mem_id }}, - {{mem_blk.x}}, - {{mem_blk.y}})); - {% endfor %} - -} -{% endif %} - -} - -namespace mace { -namespace {{tag}} { - -NetDef CreateNet() { - NetDef net_def; - net_def.set_name("{{ net.name}}"); - net_def.set_version("{{ net.version }}"); - - {% if net.arg|length != 0 %} - CreateNetArg(net_def); - {% endif %} - - CreateOperators(net_def.mutable_op()); - - CreateTensors(net_def.mutable_tensors()); - - {% if net.mem_arena.mem_block|length != 0 %} - CreateMemoryArena(net_def.mutable_mem_arena()); - {% endif %} - - {% if net.output_info | length > 0 %} - CreateOutputInfo(net_def); - {% endif %} - - return net_def; -} - -} // namespace {{tag}} -} // namespace mace -{% endif %} diff --git a/mace/python/tools/opencl_codegen.py b/mace/python/tools/opencl_codegen.py deleted file mode 100644 index a9d73c12..00000000 --- a/mace/python/tools/opencl_codegen.py +++ /dev/null @@ -1,67 +0,0 @@ -import argparse -import os -import sys - -import numpy as np - -import jinja2 - -# python mace/python/tools/opencl_codegen.py \ -# --cl_binary_dir=${CL_BIN_DIR} --output_path=${CL_HEADER_PATH} - -FLAGS = None - - -def generate_cpp_source(): - maps = {} - for file_name in os.listdir(FLAGS.cl_binary_dir): - file_path = os.path.join(FLAGS.cl_binary_dir, file_name) - if file_path[-4:] == ".bin": - # read binary - f = open(file_path, "rb") - binary_array = np.fromfile(f, dtype=np.uint8) - f.close() - - maps[file_name[:-4]] = [] - for ele in binary_array: - maps[file_name[:-4]].append(hex(ele)) - - env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0])) - return env.get_template('str2vec_maps.cc.tmpl').render( - maps = maps, - data_type = 'unsigned char', - variable_name = 'kCompiledProgramMap' - ) - - -def main(unused_args): - if not os.path.exists(FLAGS.cl_binary_dir): - print("Input cl_binary_dir " + FLAGS.cl_binary_dir + " doesn't exist!") - - cpp_cl_binary_source = generate_cpp_source() - if os.path.isfile(FLAGS.output_path): - os.remove(FLAGS.output_path) - w_file = open(FLAGS.output_path, "w") - w_file.write(cpp_cl_binary_source) - w_file.close() - - -def parse_args(): - """Parses command line arguments.""" - parser = argparse.ArgumentParser() - parser.add_argument( - "--cl_binary_dir", - type=str, - default="./cl_bin/", - help="The cl binaries directory.") - parser.add_argument( - "--output_path", - type=str, - default="./mace/examples/codegen/opencl/opencl_compiled_program.cc", - help="The path of generated C++ header file which contains cl binaries.") - return parser.parse_known_args() - - -if __name__ == '__main__': - FLAGS, unparsed = parse_args() - main(unused_args=[sys.argv[0]] + unparsed) diff --git a/mace/python/tools/source_converter_lib.py b/mace/python/tools/source_converter_lib.py deleted file mode 100644 index c6be3a3e..00000000 --- a/mace/python/tools/source_converter_lib.py +++ /dev/null @@ -1,145 +0,0 @@ -import struct -import os -import uuid -import numpy as np - -from tensorflow import gfile -from mace.proto import mace_pb2 -from jinja2 import Environment, FileSystemLoader - - -GENERATED_NAME = set() - -def generate_random_name(): - name = '_' + uuid.uuid4().hex[:7].upper() - while name in GENERATED_NAME: - name = '_' + uuid.uuid4().hex[:7].upper() - GENERATED_NAME.add(name) - return name - -def generate_tensor_map(tensors): - tensor_map = {} - for t in tensors: - if not tensor_map.has_key(t.name): - tensor_map[t.name] = generate_random_name() - return tensor_map - -def generate_in_out_map(ops, tensor_map): - in_out_map = {} - for op in ops: - op.name = generate_random_name() - for input_name in op.input: - if not in_out_map.has_key(input_name): - if tensor_map.has_key(input_name): - in_out_map[input_name] = tensor_map[input_name] - else: - in_out_map[input_name] = generate_random_name() - for output_name in op.output: - if not in_out_map.has_key(output_name): - if tensor_map.has_key(output_name): - in_out_map[output_name] = tensor_map[output_name] - else: - in_out_map[output_name] = generate_random_name() - return in_out_map - -def obfuscate_name(net_def): - input_node = "mace_input_node" - output_node = "mace_output_node" - tensor_map = generate_tensor_map(net_def.tensors) - in_out_map = generate_in_out_map(net_def.op, tensor_map) - for t in net_def.tensors: - if input_node not in t.name and output_node not in t.name: - t.name = tensor_map[t.name] - for op in net_def.op: - for i in range(len(op.input)): - if input_node not in op.input[i]: - op.input[i] = in_out_map[op.input[i]] - for i in range(len(op.output)): - if output_node not in op.output[i]: - op.output[i] = in_out_map[op.output[i]] - -def rename_tensor(net_def): - tensor_map = {} - for t in net_def.tensors: - if not tensor_map.has_key(t.name): - tensor_map[t.name] = "_" + t.name[:-2].replace("/", "_") - t.name = tensor_map[t.name] - for op in net_def.op: - for i in range(len(op.input)): - if tensor_map.has_key(op.input[i]): - op.input[i] = tensor_map[op.input[i]] - for i in range(len(op.output)): - if tensor_map.has_key(op.output[i]): - op.output[i] = tensor_map[op.output[i]] - -class TensorInfo: - def __init__(self, t): - self.name = t.name - self.data_type = mace_pb2.DataType.Name(t.data_type) - if t.data_type == mace_pb2.DT_FLOAT: - self.data = bytearray(struct.pack('%sf' % len(t.float_data), *t.float_data)) - elif t.data_type == mace_pb2.DT_INT32: - self.data = bytearray(struct.pack('%si' % len(t.int32_data), *t.int32_data)) - elif t.data_type == mace_pb2.DT_UINT8: - self.data = bytearray(np.array(t.int32_data).astype(np.uint8).tolist()) - -def stringfy(value): - return ', '.join('"{0}"'.format(w) for w in value) - -def convert_to_source(net_def, template, obfuscate, model_tag, output, runtime): - if obfuscate: - obfuscate_name(net_def) - else: - rename_tensor(net_def) - - # Capture our current directory - template_dir = os.path.dirname(template) - template_name = os.path.basename(template) - print template_dir - - # Create the jinja2 environment. - j2_env = Environment(loader=FileSystemLoader(template_dir), - trim_blocks=True) - j2_env.filters['stringfy'] = stringfy - counter = 0 - output_dir = os.path.dirname(output) + '/' - # generate tensor source files - for t in net_def.tensors: - source = j2_env.get_template(template_name).render( - tensor_info = TensorInfo(t), - tensor = t, - tag = model_tag, - mode = 0, - runtime = runtime, - ) - with gfile.GFile(output_dir + 'tensor' + str(counter) + '.cc', "wb") as f: - f.write(source) - counter += 1 - - # generate op source files - counter = 0 - op_size = len(net_def.op) - for start in range(0, op_size, 10): - source = j2_env.get_template(template_name).render( - start = start, - end = min(start+10, op_size), - net = net_def, - tag = model_tag, - mode = 1, - runtime = runtime, - ) - with gfile.GFile(output_dir + 'op' + str(counter) + '.cc', "wb") as f: - f.write(source) - counter += 1 - - # generate model source files - tensors = [TensorInfo(t) for t in net_def.tensors] - source = j2_env.get_template(template_name).render( - tensors = tensors, - net = net_def, - tag = model_tag, - mode = 2, - runtime = runtime, - ) - with gfile.GFile(output, "wb") as f: - f.write(source) diff --git a/mace/python/tools/str2vec_maps.cc.tmpl b/mace/python/tools/str2vec_maps.cc.tmpl deleted file mode 100644 index 354af0aa..00000000 --- a/mace/python/tools/str2vec_maps.cc.tmpl +++ /dev/null @@ -1,27 +0,0 @@ -// -// Copyright (c) 2017 XiaoMi All rights reserved. -// - -// This is a generated file, DO NOT EDIT - -#include -#include -#include - -namespace mace { - -extern const std::map> {{variable_name}}= -{ - {% for key, value in maps.iteritems() %} - { - "{{key}}", - { - {%- for ele in value -%} - {{ele}}, - {%- endfor -%} - } - }, // {{key}} -{% endfor %} -}; - -} // namespace diff --git a/mace/python/tools/tf_converter.py b/mace/python/tools/tf_converter.py deleted file mode 100644 index 303fd143..00000000 --- a/mace/python/tools/tf_converter.py +++ /dev/null @@ -1,116 +0,0 @@ -import argparse -import sys -import tensorflow as tf -from tensorflow import gfile -from mace.proto import mace_pb2 -from mace.python.tools import tf_converter_lib -from mace.python.tools import tf_dsp_converter_lib -from mace.python.tools import source_converter_lib - -# ./bazel-bin/mace/python/tools/tf_converter --input quantized_test.pb --output quantized_test_dsp.pb --runtime dsp --input_dim input_node,1,28,28,3 - -FLAGS = None - -def main(unused_args): - if not gfile.Exists(FLAGS.input): - print("Input graph file '" + FLAGS.input + "' does not exist!") - return -1 - - input_graph_def = tf.GraphDef() - with gfile.Open(FLAGS.input, "rb") as f: - data = f.read() - input_graph_def.ParseFromString(data) - - if FLAGS.runtime == 'dsp': - output_graph_def = tf_dsp_converter_lib.convert_to_mace_pb( - input_graph_def, FLAGS.input_node, FLAGS.output_node, FLAGS.prequantize) - else: - output_graph_def = tf_converter_lib.convert_to_mace_pb( - input_graph_def, FLAGS.input_node, FLAGS.output_node, FLAGS.data_type, FLAGS.runtime) - - if FLAGS.output_type == 'source': - source_converter_lib.convert_to_source(output_graph_def, FLAGS.template, FLAGS.obfuscate, - FLAGS.model_tag, FLAGS.output, FLAGS.runtime) - else: - with gfile.GFile(FLAGS.output, "wb") as f: - f.write(output_graph_def.SerializeToString()) - with gfile.GFile(FLAGS.output + '_txt', "wb") as f: - # output_graph_def.ClearField('tensors') - f.write(str(output_graph_def)) - print("Model conversion is completed.") - -def str2bool(v): - if v.lower() in ('yes', 'true', 't', 'y', '1'): - return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): - return False - else: - raise argparse.ArgumentTypeError('Boolean value expected.') - -def parse_args(): - """Parses command line arguments.""" - parser = argparse.ArgumentParser() - parser.register("type", "bool", lambda v: v.lower() == "true") - parser.add_argument( - "--input", - type=str, - default="", - help="TensorFlow \'GraphDef\' file to load.") - parser.add_argument( - "--output", - type=str, - default="", - help="File to save the output graph to.") - parser.add_argument( - "--runtime", - type=str, - default="cpu", - help="Runtime: cpu/gpu/dsp") - parser.add_argument( - "--input_node", - type=str, - default="input_node", - help="e.g., input_node") - parser.add_argument( - "--output_node", - type=str, - default="softmax", - help="e.g., softmax") - parser.add_argument( - "--prequantize", - type=bool, - default=True, - help="e.g., True") - parser.add_argument( - "--data_type", - type=str, - default='DT_FLOAT', - help="e.g., DT_HALF/DT_FLOAT") - parser.add_argument( - "--output_type", - type=str, - default="pb", - help="output type: source/pb") - parser.add_argument( - "--template", - type=str, - default="", - help="template path") - parser.add_argument( - "--obfuscate", - type=str2bool, - nargs='?', - const=False, - default=False, - help="obfuscate model names") - parser.add_argument( - "--model_tag", - type=str, - default="", - help="model tag for generated function and namespace") - return parser.parse_known_args() - - -if __name__ == '__main__': - FLAGS, unparsed = parse_args() - main(unused_args=[sys.argv[0]] + unparsed) diff --git a/mace/python/tools/tf_converter_lib.py b/mace/python/tools/tf_converter_lib.py deleted file mode 100644 index 64bd7b9e..00000000 --- a/mace/python/tools/tf_converter_lib.py +++ /dev/null @@ -1,656 +0,0 @@ -from mace.proto import mace_pb2 -import tensorflow as tf -import numpy as np -import math -from mace.python.tools import memory_optimizer - -# TODO: support NCHW formt, now only support NHWC. -padding_mode = { - 'VALID': 0, - 'SAME': 1, - 'FULL': 2 -} -pooling_type_mode = { - 'AvgPool': 1, - 'MaxPool': 2 -} - -buffer_type_map = { - 'FILTER' : 0, - 'IN_OUT' : 1, - 'ARGUMENT' : 2, -} - -data_type_map = { - 'DT_HALF' : mace_pb2.DT_HALF, - 'DT_FLOAT': mace_pb2.DT_FLOAT -} - -BATCH_NORM_ORDER = ["Add", "Rsqrt", "Mul", "Mul", "Mul", "Sub", "Add"] - -MACE_INPUT_NODE_NAME = "mace_input_node" -MACE_OUTPUT_NODE_NAME = "mace_output_node" - -def get_input_tensor(op, index): - input_tensor = op.inputs[index] - if input_tensor.op.type == 'Reshape': - input_tensor = get_input_tensor(input_tensor.op, 0) - return input_tensor - -class TFConverter(object): - def __init__(self, tf_ops, net_def, dt, device): - self.net_def = net_def - self.tf_ops = tf_ops - self.dt = dt - self.device = device - self.tf_graph = {} - self.tf_parents = {} - self.resolved_ops = {} - self.unused_tensor = set() - self.ops = {} - - for op in tf_ops: - self.ops[op.name] = op - - for op in tf_ops: - self.resolved_ops[op.name] = 0 - for input in op.inputs: - input_name = input.name[:-2] - if input_name not in self.tf_graph: - self.tf_graph[input_name] = [] - self.tf_graph[input_name].append(op) - if op.name not in self.tf_parents: - self.tf_parents[op.name] = [] - self.tf_parents[op.name].append(self.ops[input_name]) - - def add_buffer_to_image(self, input_name, input_type): - output_name = input_name[:-2] + "_b2i" + input_name[-2:] - op_def = self.net_def.op.add() - op_def.name = output_name[:-2] - op_def.type = 'BufferToImage' - op_def.input.extend([input_name]) - op_def.output.extend([output_name]) - - arg = op_def.arg.add() - arg.name = 'buffer_type' - arg.i = buffer_type_map[input_type] - arg = op_def.arg.add() - arg.name = 'mode' - arg.i = 0 - arg = op_def.arg.add() - arg.name = 'T' - arg.i = self.dt - return output_name - - def add_image_to_buffer(self, input_name, input_type): - output_name = input_name[:-2] + "_i2b" + input_name[-2:] - op_def = self.net_def.op.add() - op_def.name = output_name[:-2] - op_def.type = 'ImageToBuffer' - op_def.input.extend([input_name]) - op_def.output.extend([output_name]) - - arg = op_def.arg.add() - arg.name = 'buffer_type' - arg.i = buffer_type_map[input_type] - arg = op_def.arg.add() - arg.name = 'T' - arg.i = self.dt - return output_name - - - def add_input_transform(self, name): - new_input_name = MACE_INPUT_NODE_NAME + ":0" - op_def = self.net_def.op.add() - op_def.name = name - op_def.type = 'BufferToImage' - op_def.input.extend([new_input_name]) - op_def.output.extend([name+':0']) - - epsilon_arg = op_def.arg.add() - epsilon_arg.name = 'buffer_type' - epsilon_arg.i = buffer_type_map['IN_OUT'] - - arg = op_def.arg.add() - arg.name = 'T' - arg.i = self.dt - - def add_output_transform(self, name): - output_name = MACE_OUTPUT_NODE_NAME + ":0" - op_def = self.net_def.op.add() - op_def.name = output_name[:-2] - op_def.type = 'ImageToBuffer' - op_def.input.extend([name+':0']) - op_def.output.extend([output_name]) - - epsilon_arg = op_def.arg.add() - epsilon_arg.name = 'buffer_type' - epsilon_arg.i = buffer_type_map['IN_OUT'] - - @staticmethod - def add_output_shape(outputs, op): - output_shapes = [] - for output in outputs: - if output.shape.num_elements() is not None: - output_shape = mace_pb2.OutputShape() - output_shape.dims.extend(output.shape.as_list()) - output_shapes.append(output_shape) - op.output_shape.extend(output_shapes) - - def add_tensor(self, name, shape, tf_dt, value): - tensor = self.net_def.tensors.add() - tensor.name = name - - shape = list(shape) - tensor.dims.extend(shape) - - if tf_dt == tf.float32: - tensor.data_type = mace_pb2.DT_FLOAT - tensor.float_data.extend(value.flat) - elif tf_dt == tf.int32: - tensor.data_type = mace_pb2.DT_INT32 - tensor.int32_data.extend(value.flat) - else: - raise Exception("Not supported tensor type: " + tf_dt.name) - - def convert_tensor(self, op): - if op.outputs[0].name not in self.unused_tensor: - tensor = self.net_def.tensors.add() - tf_tensor = op.outputs[0].eval() - tensor.name = op.outputs[0].name - - shape = list(tf_tensor.shape) - tensor.dims.extend(shape) - - tf_dt = op.get_attr('dtype') - if tf_dt == tf.float32: - tensor.data_type = mace_pb2.DT_FLOAT - tensor.float_data.extend(tf_tensor.astype(np.float32).flat) - elif tf_dt == tf.int32: - tensor.data_type = mace_pb2.DT_INT32 - tensor.int32_data.extend(tf_tensor.astype(np.int32).flat) - else: - raise Exception("Not supported tensor type: " + tf_dt.name) - self.resolved_ops[op.name] = 1 - - def convert_conv2d(self, op): - op_def = mace_pb2.OperatorDef() - arg = op_def.arg.add() - arg.name = 'T' - arg.i = self.dt - op_def.name = op.name - if op.type == 'DepthwiseConv2dNative': - op_def.type = 'DepthwiseConv2d' - else: - op_def.type = op.type - if self.device == 'gpu': - op_def.input.extend([op.inputs[0].name]) - output_name = self.add_buffer_to_image(op.inputs[1].name, "FILTER") - op_def.input.extend([output_name]) - else: - op_def.input.extend([input.name for input in op.inputs]) - - padding_arg = op_def.arg.add() - padding_arg.name = 'padding' - padding_arg.i = padding_mode[op.get_attr('padding')] - strides_arg = op_def.arg.add() - strides_arg.name = 'strides' - strides_arg.ints.extend(op.get_attr('strides')[1:3]) - data_format_arg = op_def.arg.add() - data_format_arg.name = 'data_format' - data_format_arg.s = 'NHWC' - final_op = op - self.resolved_ops[op.name] = 1 - - if len(self.tf_graph[op.name]) == 1 and self.tf_graph[op.name][0].type == 'BiasAdd' : - bias_add_op = self.tf_graph[op.name][0] - if self.device == 'gpu': - output_name = self.add_buffer_to_image(bias_add_op.inputs[1].name, "ARGUMENT") - op_def.input.extend([output_name]) - else: - op_def.input.extend([bias_add_op.inputs[1].name]) - final_op = bias_add_op - self.resolved_ops[bias_add_op.name] = 1 - - if len(self.tf_graph[final_op.name]) == 1 \ - and self.tf_graph[final_op.name][0].type == 'Relu': - relu_op = self.tf_graph[final_op.name][0] - op_def.type = "FusedConv2D" - final_op = relu_op - self.resolved_ops[relu_op.name] = 1 - - op_def.output.extend([output.name for output in final_op.outputs]) - self.add_output_shape(final_op.outputs, op_def) - self.net_def.op.extend([op_def]) - - def convert_fused_batchnorm(self, op): - op_def = mace_pb2.OperatorDef() - arg = op_def.arg.add() - arg.name = 'T' - arg.i = self.dt - data_format_arg = op_def.arg.add() - data_format_arg.name = 'data_format' - data_format_arg.s = 'NHWC' - op_def.name = op.name - op_def.type = 'FoldedBatchNorm' - - gamma_tensor = get_input_tensor(op, 1) - for i in range(1, 5): - input_tensor = get_input_tensor(op, i) - assert input_tensor.shape == gamma_tensor.shape - self.unused_tensor.add(input_tensor.name) - - gamma_value = get_input_tensor(op, 1).eval().astype(np.float32) - beta_value = get_input_tensor(op, 2).eval().astype(np.float32) - mean_value = get_input_tensor(op, 3).eval().astype(np.float32) - var_value = get_input_tensor(op, 4).eval().astype(np.float32) - epsilon_value = op.get_attr('epsilon') - - scale_value = ( - (1.0 / np.vectorize(math.sqrt)(var_value + epsilon_value)) * - gamma_value) - offset_value = (-mean_value * scale_value) + beta_value - idx = gamma_tensor.name.rfind('/') - name_prefix = gamma_tensor.name[:idx] + '/' - input_names = [name_prefix+'scale:0', name_prefix+'offset:0'] - self.add_tensor(input_names[0], gamma_value.shape, - gamma_tensor.dtype, scale_value) - self.add_tensor(input_names[1], gamma_value.shape, - gamma_tensor.dtype, offset_value) - - if self.device == 'gpu': - op_def.input.extend([op.inputs[0].name]) - for name in input_names: - output_name = self.add_buffer_to_image(name, "ARGUMENT") - op_def.input.extend([output_name]) - else: - op_def.input.extend([input.name for input in input_names]) - - self.resolved_ops[op.name] = 1 - final_op = op - - if len(self.tf_graph[op.name]) == 1 and self.tf_graph[op.name][0].type == 'Relu': - relu_op = self.tf_graph[op.name][0] - final_op = relu_op - fused_relu_arg = op_def.arg.add() - fused_relu_arg.name = 'fused_relu' - fused_relu_arg.i = 1 - self.resolved_ops[relu_op.name] = 1 - - op_def.output.extend([final_op.outputs[0].name]) - self.add_output_shape(final_op.outputs, op_def) - - self.net_def.op.extend([op_def]) - - def convert_batchnorm(self, op): - bn_ops = [] - bn_ops.append(op) - for i in range(1, 3): - if len(self.tf_graph[bn_ops[i-1].name]) == 1 \ - and self.tf_graph[bn_ops[i-1].name][0].type == BATCH_NORM_ORDER[i]: - bn_ops.append(self.tf_graph[bn_ops[i-1].name][0]) - else: - raise Exception('Invalid BatchNorm Op') - if len(self.tf_graph[bn_ops[2].name]) == 2 \ - and self.tf_graph[bn_ops[2].name][0].type == BATCH_NORM_ORDER[3] \ - and self.tf_graph[bn_ops[2].name][1].type == BATCH_NORM_ORDER[4]: - bn_ops.append(self.tf_graph[bn_ops[2].name][0]) - bn_ops.append(self.tf_graph[bn_ops[2].name][1]) - else: - raise Exception('Invalid BatchNorm Op') - bn_ops.append(self.tf_graph[bn_ops[4].name][0]) - bn_ops.append(self.tf_graph[bn_ops[3].name][0]) - - op_def = mace_pb2.OperatorDef() - arg = op_def.arg.add() - arg.name = 'T' - arg.i = self.dt - - input_name = get_input_tensor(bn_ops[3], 0).name - gamma = get_input_tensor(bn_ops[2], 1).name - beta = get_input_tensor(bn_ops[5], 0).name - mean = get_input_tensor(bn_ops[4], 0).name - variance = get_input_tensor(bn_ops[0], 0).name - - op_def.name = op.name[:-4] # remove /add - op_def.type = 'BatchNorm' - if self.device == 'gpu': - op_def.input.extend([input_name]) - for tensor_name in [gamma, beta, mean, variance]: - output_name = self.add_buffer_to_image(tensor_name, "ARGUMENT") - op_def.input.extend([output_name]) - else: - op_def.input.extend([input_name, gamma, beta, mean, variance]) - op_def.output.extend([output.name for output in bn_ops[6].outputs]) - self.add_output_shape(bn_ops[6].outputs, op_def) - epsilon_arg = op_def.arg.add() - epsilon_arg.name = 'epsilon' - epsilon_arg.f = get_input_tensor(op, 1).eval().astype(np.float) - data_format_arg = op_def.arg.add() - data_format_arg.name = 'data_format' - data_format_arg.s = 'NHWC' - self.unused_tensor.add(get_input_tensor(op, 1).name) - - self.net_def.op.extend([op_def]) - for i in range(0, 7): - self.resolved_ops[bn_ops[i].name] = 1 - - def convert_pooling(self, op): - op_def = self.net_def.op.add() - arg = op_def.arg.add() - arg.name = 'T' - arg.i = self.dt - op_def.name = op.name - op_def.type = 'Pooling' - op_def.input.extend([input.name for input in op.inputs]) - op_def.output.extend([output.name for output in op.outputs]) - self.add_output_shape(op.outputs, op_def) - pooling_type_arg = op_def.arg.add() - pooling_type_arg.name = 'pooling_type' - pooling_type_arg.i = pooling_type_mode[op.type] - padding_arg = op_def.arg.add() - padding_arg.name = 'padding' - padding_arg.i = padding_mode[op.get_attr('padding')] - strides_arg = op_def.arg.add() - strides_arg.name = 'strides' - strides_arg.ints.extend(op.get_attr('strides')[1:3]) - kernels_arg = op_def.arg.add() - kernels_arg.name = 'kernels' - kernels_arg.ints.extend(op.get_attr('ksize')[1:3]) - data_format_arg = op_def.arg.add() - data_format_arg.name = 'data_format' - data_format_arg.s = 'NHWC' - self.resolved_ops[op.name] = 1 - - def convert_relu6(self, op): - op_def = self.net_def.op.add() - arg = op_def.arg.add() - arg.name = 'T' - arg.i = self.dt - op_def.name = op.name - op_def.type = 'Relu' - op_def.input.extend([input.name for input in op.inputs]) - op_def.output.extend([output.name for output in op.outputs]) - self.add_output_shape(op.outputs, op_def) - max_limit_arg = op_def.arg.add() - max_limit_arg.name = 'max_limit' - max_limit_arg.f = 6 - self.resolved_ops[op.name] = 1 - - def convert_add(self, op): - op_def = self.net_def.op.add() - arg = op_def.arg.add() - arg.name = 'T' - arg.i = self.dt - op_def.name = op.name - op_def.type = "AddN" - op_def.input.extend([input.name for input in op.inputs]) - op_def.output.extend([output.name for output in op.outputs]) - self.add_output_shape(op.outputs, op_def) - self.resolved_ops[op.name] = 1 - - def convert_concat(self, op): - op_def = self.net_def.op.add() - arg = op_def.arg.add() - arg.name = 'T' - arg.i = self.dt - op_def.name = op.name - op_def.type = "Concat" - op_def.input.extend([op.inputs[i].name for i in xrange(2)]) - op_def.output.extend([output.name for output in op.outputs]) - axis_arg = op_def.arg.add() - axis_arg.name = 'axis' - axis_arg.i = get_input_tensor(op, 2).eval().astype(np.int32) - self.add_output_shape(op.outputs, op_def) - self.resolved_ops[op.name] = 1 - self.unused_tensor.add(get_input_tensor(op, 2).name) - - def convert_resize_bilinear(self, op): - op_def = self.net_def.op.add() - arg = op_def.arg.add() - arg.name = 'T' - arg.i = self.dt - op_def.name = op.name - op_def.type = "ResizeBilinear" - op_def.input.extend([op.inputs[0].name]) - op_def.output.extend([output.name for output in op.outputs]) - size_arg = op_def.arg.add() - size_arg.name = 'size' - size_arg.ints.extend(get_input_tensor(op, 1).eval().astype(np.int32).flat) - size_arg = op_def.arg.add() - size_arg.name = 'align_corners' - size_arg.i = op.get_attr('align_corners') - self.add_output_shape(op.outputs, op_def) - self.resolved_ops[op.name] = 1 - self.unused_tensor.add(get_input_tensor(op, 1).name) - - def convert_bias_add(self, op): - op_def = mace_pb2.OperatorDef() - arg = op_def.arg.add() - arg.name = 'T' - arg.i = self.dt - op_def.name = op.name - op_def.type = "BiasAdd" - op_def.input.extend([op.inputs[0].name]) - if self.device == 'gpu': - output_name = self.add_buffer_to_image(op.inputs[1].name, "ARGUMENT") - op_def.input.extend([output_name]) - else: - op_def.input.extend([op.inputs[1].name]) - op_def.output.extend([output.name for output in op.outputs]) - self.add_output_shape(op.outputs, op_def) - self.net_def.op.extend([op_def]) - self.resolved_ops[op.name] = 1 - - def convert_space_to_batch(self, op, b2s): - op_def = self.net_def.op.add() - arg = op_def.arg.add() - arg.name = 'T' - arg.i = self.dt - op_def.name = op.name - op_def.type = op.type - op_def.input.extend([op.inputs[0].name]) - op_def.output.extend([output.name for output in op.outputs]) - size_arg = op_def.arg.add() - size_arg.name = 'block_shape' - size_arg.ints.extend(get_input_tensor(op, 1).eval().astype(np.int32).flat) - size_arg = op_def.arg.add() - if b2s: - size_arg.name = 'crops' - else: - size_arg.name = 'paddings' - size_arg.ints.extend(get_input_tensor(op, 2).eval().astype(np.int32).flat) - self.add_output_shape(op.outputs, op_def) - self.resolved_ops[op.name] = 1 - self.unused_tensor.add(get_input_tensor(op, 1).name) - self.unused_tensor.add(get_input_tensor(op, 2).name) - - def is_atrous_conv2d(self, op): - return op.type == 'SpaceToBatchND' and\ - len(self.tf_graph[op.name]) == 1 and self.tf_graph[op.name][0].type == 'Conv2D' - - def convert_atrous_conv2d(self, op): - op_def = mace_pb2.OperatorDef() - arg = op_def.arg.add() - arg.name = 'T' - arg.i = self.dt - conv_op = self.tf_graph[op.name][0] - op_def.name = conv_op.name - op_def.type = conv_op.type - if self.device == 'gpu': - op_def.input.extend([op.inputs[0].name]) - output_name = self.add_buffer_to_image(conv_op.inputs[1].name, "FILTER") - op_def.input.extend([output_name]) - else: - op_def.input.extend([op.inputs[0].name]) - op_def.input.extend([conv_op.inputs[1].name]) - - dilation_arg = op_def.arg.add() - dilation_arg.name = 'dilations' - dilation_arg.ints.extend(get_input_tensor(op, 1).eval().astype(np.int32).flat) - padding_arg = op_def.arg.add() - padding_arg.name = 'padding' - padding_values = get_input_tensor(op, 2).eval().astype(np.int32).flat - if len(padding_values) > 0 and padding_values[0] > 0: - padding_arg.i = padding_mode['SAME'] - else: - padding_arg.i = padding_mode['VALID'] - self.unused_tensor.add(get_input_tensor(op, 1).name) - self.unused_tensor.add(get_input_tensor(op, 2).name) - - strides_arg = op_def.arg.add() - strides_arg.name = 'strides' - strides_arg.ints.extend([1, 1]) - data_format_arg = op_def.arg.add() - data_format_arg.name = 'data_format' - data_format_arg.s = 'NHWC' - final_op = conv_op - self.resolved_ops[op.name] = 1 - self.resolved_ops[conv_op.name] = 1 - - if len(self.tf_graph[final_op.name]) == 1 and self.tf_graph[final_op.name][0].type == 'BiasAdd' : - bias_add_op = self.tf_graph[final_op.name][0] - if self.device == 'gpu': - output_name = self.add_buffer_to_image(bias_add_op.inputs[1].name, "ARGUMENT") - op_def.input.extend([output_name]) - else: - op_def.input.extend([bias_add_op.inputs[1].name]) - final_op = bias_add_op - self.resolved_ops[bias_add_op.name] = 1 - - if len(self.tf_graph[final_op.name]) == 1 \ - and self.tf_graph[final_op.name][0].type == 'BatchToSpaceND': - final_op = self.tf_graph[final_op.name][0] - self.resolved_ops[final_op.name] = 1 - self.unused_tensor.add(get_input_tensor(final_op, 1).name) - self.unused_tensor.add(get_input_tensor(final_op, 2).name) - else: - raise Exception('Convert atrous conv error: no BatchToSpaceND op') - - if len(self.tf_graph[final_op.name]) == 1 \ - and self.tf_graph[final_op.name][0].type == 'Relu': - relu_op = self.tf_graph[final_op.name][0] - op_def.type = "FusedConv2D" - final_op = relu_op - self.resolved_ops[relu_op.name] = 1 - - op_def.output.extend([output.name for output in final_op.outputs]) - self.add_output_shape(final_op.outputs, op_def) - self.net_def.op.extend([op_def]) - - def is_softmax(self, op): - return op.type == 'Softmax' and \ - len(self.tf_parents[op.name]) == 1 and self.tf_parents[op.name][0].type == 'Reshape' and \ - len(self.tf_graph[op.name]) == 1 and self.tf_graph[op.name][0].type == 'Reshape' - - def convert_softmax(self, softmax_op): - op_def = self.net_def.op.add() - arg = op_def.arg.add() - arg.name = 'T' - arg.i = self.dt - - # deal with first Reshape op - parent_reshape_op = self.tf_parents[softmax_op.name][0] - op_def.input.extend([parent_reshape_op.inputs[0].name]) - self.unused_tensor.add(get_input_tensor(parent_reshape_op, 1).name) - self.resolved_ops[parent_reshape_op.name] = 1 - - # deal with Softmax op - op_def.name = softmax_op.name - op_def.type = softmax_op.type - self.resolved_ops[softmax_op.name] = 1 - - # deal with last Reshape op - reshape_op = self.tf_graph[softmax_op.name][0] - self.unused_tensor.add(get_input_tensor(reshape_op, 1).name) - - op_def.output.extend([output.name for output in reshape_op.outputs]) - self.add_output_shape(reshape_op.outputs, op_def) - self.resolved_ops[reshape_op.name] = 1 - - def convert_normal_op(self, op): - op_def = self.net_def.op.add() - arg = op_def.arg.add() - arg.name = 'T' - arg.i = self.dt - op_def.name = op.name - op_def.type = op.type - op_def.input.extend([input.name for input in op.inputs]) - op_def.output.extend([output.name for output in op.outputs]) - self.add_output_shape(op.outputs, op_def) - self.resolved_ops[op.name] = 1 - - def convert(self, input_node, output_node): - if self.device == 'gpu': - self.add_input_transform(input_node) - - for op in self.tf_ops: - if self.resolved_ops[op.name] == 1: - continue - if op.type in ['Placeholder', 'Reshape', 'Identity']: - self.resolved_ops[op.name] = 1 - pass - elif op.type == 'Const': - pass - elif self.is_atrous_conv2d(op): - self.convert_atrous_conv2d(op) - elif op.type == 'Conv2D' or op.type == 'DepthwiseConv2dNative': - self.convert_conv2d(op) - elif op.type == 'FusedBatchNorm': - self.convert_fused_batchnorm(op) - elif op.type == 'Add' and op.name.endswith('batchnorm/add'): - self.convert_batchnorm(op) - elif op.type == 'AvgPool' or op.type == 'MaxPool': - self.convert_pooling(op) - elif op.type == 'Relu6': - self.convert_relu6(op) - elif op.type == 'Add': - self.convert_add(op) - elif op.type == 'ConcatV2': - self.convert_concat(op) - elif op.type == 'ResizeBilinear': - self.convert_resize_bilinear(op) - elif op.type == 'BiasAdd': - self.convert_bias_add(op) - elif op.type == 'SpaceToBatchND': - self.convert_space_to_batch(op, False) - elif op.type == 'BatchToSpaceND': - self.convert_space_to_batch(op, True) - elif self.is_softmax(op): - self.convert_softmax(op) - elif op.type in ['Relu']: - self.convert_normal_op(op) - else: - raise Exception('Unknown Op: %s, type: %s' % (op.name, op.type)) - - for op in self.tf_ops: - if self.resolved_ops[op.name] == 1: - continue - elif op.type == 'Const': - self.convert_tensor(op) - else: - raise Exception('Unknown Op: %s, type: %s' % (op.name, op.type)) - - if self.device == 'gpu': - self.add_output_transform(output_node) - - for key in self.resolved_ops: - if self.resolved_ops[key] != 1: - print 'Unresolve Op: %s' % key - -def convert_to_mace_pb(input_graph_def, input_node, output_node, data_type, device): - net_def = mace_pb2.NetDef() - dt = data_type_map[data_type] - - with tf.Session() as session: - with session.graph.as_default() as graph: - tf.import_graph_def(input_graph_def, name="") - ops = graph.get_operations() - converter = TFConverter(ops, net_def, dt, device) - converter.convert(input_node, output_node) - print "PB Converted, start optimize memory." - mem_optimizer = memory_optimizer.MemoryOptimizer(net_def) - mem_optimizer.optimize() - print "Memory optimization done." - - return net_def diff --git a/mace/python/tools/tf_dsp_converter_lib.py b/mace/python/tools/tf_dsp_converter_lib.py deleted file mode 100644 index 62cdfde8..00000000 --- a/mace/python/tools/tf_dsp_converter_lib.py +++ /dev/null @@ -1,407 +0,0 @@ -from mace.proto import mace_pb2 -import tensorflow as tf -from operator import mul -from dsp_ops import DspOps -from mace.python.tools import graph_util -from mace.python.tools.convert_util import tf_dtype_2_mace_dtype - -# converter --input ../libcv/quantized_icnet.pb --output quantized_icnet_dsp.pb \ -# --runtime dsp --input_node input_node --output_node output_node - -padding_mode = { - 'NA': 0, - 'SAME': 1, - 'VALID': 2, - 'MIRROR_REFLECT': 3, - 'MIRROR_SYMMETRIC': 4, - 'SAME_CAFFE': 5 -} - -def get_tensor_name_from_op(op_name, port): - return op_name + ':' + str(port) - -def get_node_from_map(op_map, op_or_tensor_name): - op_name = op_or_tensor_name.split(':')[0] - return op_map[op_name] - -def get_op_and_port_from_tensor(tensor_name): - op, port = tensor_name.split(':') - port = int(port) - return op, port - -def max_elem_size(tensor): - if len(tensor.shape.as_list()) == 0: - return tensor.dtype.size - else: - return reduce(mul, tensor.shape.as_list()) * tensor.dtype.size - -def find_dtype(tensor_dtype): - if tensor_dtype == tf.float32: - return mace_pb2.DT_FLOAT - elif tensor_dtype == tf.uint8 or tensor_dtype == tf.quint8: - return mace_pb2.DT_UINT8 - elif tensor_dtype == tf.int32 or tensor_dtype == tf.qint32: - return mace_pb2.DT_INT32 - else: - raise Exception('Unsupported data type: ', tensor_dtype) - -def has_padding_and_strides(op): - return 'padding' in op.node_def.attr and 'strides' in op.node_def.attr - -def is_node_flatten_reshape(op): - return op.type == 'Reshape' and len(op.outputs[0].shape) == 1 - -def get_input_tensor(op, index): - input_tensor = op.inputs[index] - if input_tensor.op.type == 'Reshape': - input_tensor = get_input_tensor(input_tensor.op, 0) - return input_tensor - -def add_shape_const_node(net_def, op, values, name): - print ('Add const node: ', op.name + '/' + name) - tensor = net_def.tensors.add() - node_name = op.name + '/' + name - tensor.name = node_name + ':0' - tensor.data_type = mace_pb2.DT_INT32 - tensor.dims.extend(values) - return tensor.name - - -def convert_op_outputs(mace_op_def, tf_op): - mace_op_def.output_type.extend([tf_dtype_2_mace_dtype(output.dtype) - for output in tf_op.outputs]) - output_shapes = [] - for output in tf_op.outputs: - output_shape = mace_pb2.OutputShape() - output_shape.dims.extend(output.shape.as_list()) - output_shapes.append(output_shape) - mace_op_def.output_shape.extend(output_shapes) - - -def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops): - first_op = unresolved_ops[0] - print ('Op: ', first_op.name, first_op.type, first_op.outputs[0].shape) - - if first_op.name in resolved_ops: - pass - - elif first_op.type == 'Const': - print ('Add const node: ', first_op.name) - tf_tensor = first_op.outputs[0].eval() - tensor = net_def.tensors.add() - tensor.name = first_op.outputs[0].name - tensor.data_type = find_dtype(first_op.outputs[0].dtype) - shape = list(tf_tensor.shape) - if len(shape) > 0: - tensor.dims.extend(shape) - if first_op.outputs[0].dtype == tf.float32: - tensor.float_data.extend(tf_tensor.astype(float).flat) - elif first_op.outputs[0].dtype == tf.int32 or \ - first_op.outputs[0].dtype == tf.int8 or \ - first_op.outputs[0].dtype == tf.int16 or \ - first_op.outputs[0].dtype == tf.quint8 or \ - first_op.outputs[0].dtype == tf.quint16: - tensor.int32_data.extend(tf_tensor.astype(int).flat) - - else: - op_def = net_def.op.add() - op_def.name = first_op.name - op_def.type = dsp_ops.map_nn_op(first_op.type) - op_def.padding = padding_mode['NA'] - - if len(first_op.outputs) > 0 and first_op.type == 'Dequantize' \ - and len(first_op.outputs[0].consumers()) > 0 \ - and (first_op.outputs[0].consumers()[0].type == 'SpaceToBatchND' \ - or first_op.outputs[0].consumers()[0].type == 'BatchToSpaceND'): - input_tensor = first_op.inputs[0] - min_tensor = first_op.inputs[1] - max_tensor = first_op.inputs[2] - s2b_op = first_op.outputs[0].consumers()[0] - reshape_op = s2b_op.outputs[0].consumers()[0] - min_op = reshape_op.outputs[0].consumers()[0] - max_op = reshape_op.outputs[0].consumers()[1] - quantize_op = min_op.outputs[0].consumers()[0] - resolved_ops.add(s2b_op.name) - resolved_ops.add(reshape_op.name) - resolved_ops.add(min_op.name) - resolved_ops.add(max_op.name) - resolved_ops.add(quantize_op.name) - - op_def.name = quantize_op.name - op_def.type = dsp_ops.map_nn_op('Quantized' + s2b_op.type) - op_def.input.append(input_tensor.name) - op_def.input.extend([t.name for t in s2b_op.inputs[1:]]) - op_def.input.extend([min_tensor.name, max_tensor.name]) - op_def.out_max_byte_size.extend([max_elem_size(out) for out in quantize_op.outputs]) - convert_op_outputs(op_def, quantize_op) - elif has_padding_and_strides(first_op): - op_def.padding = padding_mode[first_op.get_attr('padding')] - op_def.input.extend([t.name for t in first_op.inputs]) - if 'ksize' in first_op.node_def.attr: - ksize = first_op.get_attr('ksize') - ksize_tensor = add_shape_const_node(net_def, first_op, ksize, 'ksize') - op_def.input.extend([ksize_tensor]) - strides = first_op.get_attr('strides') - strides_tensor = add_shape_const_node(net_def, first_op, strides, 'strides') - op_def.input.extend([strides_tensor]) - op_def.out_max_byte_size.extend([max_elem_size(out) for out in first_op.outputs]) - convert_op_outputs(op_def, first_op) - elif is_node_flatten_reshape(first_op): - op_def.type = 'Flatten' - op_def.input.extend([t.name for t in first_op.inputs]) - op_def.out_max_byte_size.extend([max_elem_size(out) for out in first_op.outputs]) - convert_op_outputs(op_def, first_op) - elif dsp_ops.has_op(first_op.type): - op_def.input.extend([t.name for t in first_op.inputs]) - op_def.out_max_byte_size.extend([max_elem_size(out) for out in first_op.outputs]) - convert_op_outputs(op_def, first_op) - else: - raise Exception('Unsupported op: ', first_op) - - resolved_ops.add(first_op.name) - - del unresolved_ops[0] - -def add_output_node(net_def, output_node): - op_def = net_def.op.add() - op_def.name = '__output__' - op_def.type = 'OUTPUT' - op_def.input.extend([get_tensor_name_from_op(output_node, 0)]) - -def reverse_batch_to_space_and_biasadd(net_def): - tensor_map = {} - for tensor in net_def.tensors: - tensor_map[tensor.name] = tensor - op_map = {} - for op in net_def.op: - op_map[op.name] = op - consumers = {} - for op in net_def.op: - for ipt in op.input: - if ipt not in consumers: - consumers[ipt] = [] - consumers[ipt].append(op) - - new_ops = [] - skip_ops = set() - visited_ops = set() - - for op in net_def.op: - if op.name in visited_ops: - pass - # pattern: QConv -> RR -> R -> QB2S -> QBiasAdd -> RR -> R - success = False - if op.type == 'Requantize_32to8': - biasadd_requantize_op = op - biasadd_op = get_node_from_map(op_map, biasadd_requantize_op.input[0]) - if biasadd_op.type == 'QuantizedBiasAdd_8p8to32': - b2s_op = get_node_from_map(op_map, biasadd_op.input[0]) - if b2s_op.type == 'QuantizedBatchToSpaceND_8': - conv_requantize_op = get_node_from_map(op_map, b2s_op.input[0]) - conv_op = get_node_from_map(op_map, conv_requantize_op.input[0]) - if conv_op.type == 'QuantizedConv2d_8x8to32': - new_biasadd_op = mace_pb2.OperatorDef() - new_biasadd_op.CopyFrom(biasadd_op) - new_biasadd_op.input[0] = get_tensor_name_from_op(conv_requantize_op.name, 0) - new_biasadd_op.input[2] = get_tensor_name_from_op(conv_requantize_op.name, 1) - new_biasadd_op.input[3] = get_tensor_name_from_op(conv_requantize_op.name, 2) - new_biasadd_op.out_max_byte_size[0] = conv_requantize_op.out_max_byte_size[0] * 4 - - new_biasadd_requantize_op = mace_pb2.OperatorDef() - new_biasadd_requantize_op.CopyFrom(biasadd_requantize_op) - new_biasadd_requantize_op.out_max_byte_size[0] = new_biasadd_op.out_max_byte_size[0] / 4 - - new_b2s_op = mace_pb2.OperatorDef() - new_b2s_op.CopyFrom(b2s_op) - new_b2s_op.input[0] = get_tensor_name_from_op(biasadd_requantize_op.name, 0) - new_b2s_op.input[3] = get_tensor_name_from_op(biasadd_requantize_op.name, 1) - new_b2s_op.input[4] = get_tensor_name_from_op(biasadd_requantize_op.name, 2) - - new_ops.extend([new_biasadd_op, new_biasadd_requantize_op, new_b2s_op]) - skip_ops = skip_ops.union([biasadd_op.name, biasadd_requantize_op.name, b2s_op.name]) - visited_ops.add(op.name) - - follow_ops = consumers[get_tensor_name_from_op(biasadd_requantize_op.name, 0)] - for follow_op in follow_ops: - new_follow_op = mace_pb2.OperatorDef() - new_follow_op.CopyFrom(follow_op) - for i in xrange(len(follow_op.input)): - for k in xrange(3): - if new_follow_op.input[i] == get_tensor_name_from_op(biasadd_requantize_op.name, k): - new_follow_op.input[i] = get_tensor_name_from_op(b2s_op.name, k) - new_ops.append(new_follow_op) - skip_ops.add(follow_op.name) - visited_ops.add(follow_op.name) - - visited_ops.add(op.name) - - new_net_def = mace_pb2.NetDef() - new_net_def.tensors.extend(tensor_map.values()) - new_net_def.op.extend([op for op in net_def.op if op.name not in skip_ops]) - new_net_def.op.extend(new_ops) - - return new_net_def - -def add_node_id(net_def): - node_id_counter = 0 - node_id_map = {} - for tensor in net_def.tensors: - tensor.node_id = node_id_counter - node_id_counter += 1 - tensor_op, port = get_op_and_port_from_tensor(tensor.name) - node_id_map[tensor_op] = tensor.node_id - - for op in net_def.op: - op.node_id = node_id_counter - node_id_counter += 1 - node_id_map[op.name] = op.node_id - for ipt in op.input: - op_name, port = get_op_and_port_from_tensor(ipt) - node_id = node_id_map[op_name] - node_input = op.node_input.add() - node_input.node_id = node_id - node_input.output_port = int(port) - - return net_def - -def add_input_output_info(net_def, input_node, output_node, graph, dtype): - input_tensor = graph.get_tensor_by_name(get_tensor_name_from_op(input_node, 0)) - output_tensor = graph.get_tensor_by_name(get_tensor_name_from_op(output_node, 0)) - - input_info = net_def.input_info.add() - input_info.dims.extend(input_tensor.shape.as_list()) - input_info.data_type = dtype - if dtype == mace_pb2.DT_UINT8: - for i in xrange(2): - input_info = net_def.input_info.add() - input_info.dims.extend([1,1,1,1]) - input_info.data_type = mace_pb2.DT_FLOAT - - output_info = net_def.output_info.add() - output_info.dims.extend(output_tensor.shape.as_list()) - output_info.data_type = dtype - if dtype == mace_pb2.DT_UINT8: - for i in xrange(2): - output_info = net_def.output_info.add() - output_info.dims.extend([1,1,1,1]) - output_info.data_type = mace_pb2.DT_FLOAT - - return net_def - -def strip_input_quantize_and_output_dequantize(net_def, input_node, output_node): - tensor_map = {} - for tensor in net_def.tensors: - tensor_map[tensor.name] = tensor - op_map = {} - for op in net_def.op: - op_map[op.name] = op - consumers = {} - for op in net_def.op: - for ipt in op.input: - if ipt not in consumers: - consumers[ipt] = [] - consumers[ipt].append(op) - - skip_ops = set() - new_ops = [] - skip_tensors = set() - - # INPUT->Flatten->Minf, Maxf->Quantize - for op in net_def.op: - if op.type == 'INPUT': - input_op = op - flatten_op = None - quantize_op = None - for o in consumers[get_tensor_name_from_op(input_op.name, 0)]: - if o.type == 'Flatten': - flatten_op = o - elif o.type == 'Quantize': - quantize_op = o - if quantize_op is not None: - minf_op, maxf_op = consumers[get_tensor_name_from_op(flatten_op.name, 0)] - skip_ops = skip_ops.union([input_op.name, flatten_op.name, minf_op.name, maxf_op.name, quantize_op.name]) - skip_tensors = skip_tensors.union([flatten_op.input[1], minf_op.input[1], maxf_op.input[1]]) - - new_input_op = mace_pb2.OperatorDef() - new_input_op.name = input_op.name - new_input_op.type = input_op.type - new_input_op.padding = input_op.padding - new_input_op.out_max_byte_size.extend([input_op.out_max_byte_size[0]/4, 4, 4]) - new_ops.append(new_input_op) - new_input_op.output_shape.extend([input_op.output_shape[0], - minf_op.output_shape[0], - maxf_op.output_shape[0]]) - new_input_op.output_type.extend([input_op.output_type[0], mace_pb2.DT_FLOAT, mace_pb2.DT_FLOAT]) - for follow_op in consumers[get_tensor_name_from_op(quantize_op.name, 0)]: - new_follow_op = mace_pb2.OperatorDef() - new_follow_op.CopyFrom(follow_op) - for i in xrange(len(follow_op.input)): - for k in xrange(3): - if new_follow_op.input[i] == get_tensor_name_from_op(quantize_op.name, k): - new_follow_op.input[i] = get_tensor_name_from_op(input_op.name, k) - new_ops.append(new_follow_op) - skip_ops.add(follow_op.name) - - elif op.type == 'OUTPUT': - output_op = op - dequantize_op = get_node_from_map(op_map, output_op.input[0]) - if dequantize_op.type == 'Dequantize': - skip_ops = skip_ops.union([dequantize_op.name, output_op.name]) - - new_output_op = mace_pb2.OperatorDef() - new_output_op.name = output_op.name - new_output_op.type = output_op.type - new_output_op.input.extend(dequantize_op.input) - new_ops.append(new_output_op) - - - - new_net_def = mace_pb2.NetDef() - new_net_def.tensors.extend([tensor for tensor in net_def.tensors if tensor.name not in skip_tensors]) - new_net_def.op.extend([op for op in net_def.op if op.name not in skip_ops]) - new_net_def.op.extend(new_ops) - return new_net_def - -def convert_to_mace_pb(input_graph_def, input_node, output_node, prequantize=False): - """ - nnlib does not have batch norm, so use tensorflow optimizer to fold - batch norm with convolution. The fold optimization reorders ops, so - we sort ops first by topology. - """ - input_graph_def = graph_util.sort_tf_graph(input_graph_def) - net_def = mace_pb2.NetDef() - - with tf.Session() as session: - with session.graph.as_default() as graph: - tf.import_graph_def(input_graph_def, name="") - ops = graph.get_operations() - dsp_ops = DspOps() - resolved_ops = set() - # convert const node - unresolved_ops = [op for op in ops if op.type == 'Const'] - while len(unresolved_ops) > 0: - convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops) - - # convert op node - unresolved_ops = [op for op in ops if op.type != 'Const'] - while len(unresolved_ops) > 0: - convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops) - - add_output_node(net_def, output_node) - # optimized_net_def = reverse_batch_to_space_and_biasadd(net_def) - - if prequantize: - print('Prequantize ...') - net_def = strip_input_quantize_and_output_dequantize(net_def, input_node, output_node) - - sorted_net_def = graph_util.sort_mace_graph(net_def, '__output__') - net_def_with_node_id = add_node_id(sorted_net_def) - - if prequantize: - dtype = mace_pb2.DT_UINT8 - else: - dtype = mace_pb2.DT_FLOAT - final_net_def = add_input_output_info(net_def_with_node_id, input_node, output_node, graph, dtype) - - return final_net_def - diff --git a/tools/create_mace_lib.sh b/tools/create_mace_lib.sh deleted file mode 100755 index 1bd56280..00000000 --- a/tools/create_mace_lib.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -set +x -Usage() { - echo 'Usage: bash tools/create_mace_lib.sh tf_model_path image_size phone_version abi_version' -} - -if [ $# -lt 4 ];then - Usage - exit -1 -fi - -IMAGE_SIZE=$2 -PHONE_VERSION=$3 -ABI_VERSION=$4 -MACE_STATIC_LIB_DIR=libmace_${PHONE_VERSION}_gcn${IMAGE_SIZE}_${ABI_VERSION} -MACE_LIB_PATH=${MACE_STATIC_LIB_DIR}/lib/ -MACE_INCLUDE_PATH=${MACE_STATIC_LIB_DIR}/include/mace/core/public/ - -rm -rf mace/codegen/models mace/codegen/opencl mace/codegen/opencl_bin mace/codegen/tuning mace/codegen/version -rm -rf ${MACE_STATIC_LIB_DIR} -mkdir -p ${MACE_LIB_PATH} -mkdir -p ${MACE_INCLUDE_PATH} - -sh ./tools/validate_gcn.sh $1 $2 -cp bazel-bin/mace/**/*.a ${MACE_LIB_PATH} -cp bazel-bin/mace/**/*.lo ${MACE_LIB_PATH} -cp mace/core/public/*.h ${MACE_INCLUDE_PATH} diff --git a/tools/export_lib.sh b/tools/export_lib.sh new file mode 100755 index 00000000..446330a5 --- /dev/null +++ b/tools/export_lib.sh @@ -0,0 +1,135 @@ +#!/bin/bash + +set -e + +Usage() { + echo "Usage: ./tools/export_lib.sh android_abi[armeabi-v7a/arm64-v8a] runtime[gpu/dsp] export_include_dir export_lib_dir" + echo "eg: ./tools/export_lib.sh armeabi-v7a ../include ../lib/libmace_v7" +} + +if [ $# -lt 4 ]; then + Usage + exit -1 +fi + +# ANDROID_ABI=arm64-v8a +# ANDROID_ABI=armeabi-v7a +ANDROID_ABI=$1 +RUNTIME=$2 +EXPORT_INCLUDE_DIR=$3 +EXPORT_LIB_DIR=$4 + +if [ x"${RUNTIME}" = x"dsp" ]; then + DSP_MODE_BUILD_FLAGS="--define hexagon=true" +fi + +MACE_SOURCE_DIR=`/bin/pwd` +CODEGEN_DIR=${MACE_SOURCE_DIR}/mace/codegen +CL_CODEGEN_DIR=${CODEGEN_DIR}/opencl +VERSION_CODEGEN_DIR=${CODEGEN_DIR}/version +STRIP="--strip always" + +LIBMACE_NAME="libmace" +LIBMACE_DEV_NAME="libmace_dev" +LIBMACE_PROD_NAME="libmace_prod" + +libmace_targets=( + "//mace/ops:ops" + "//mace/kernels:kernels" + "//mace/codegen:generated_version" + "//mace/core:core" + "//mace/utils:logging" +) + +libmace_dev_targets=( + "//mace/codegen:generated_opencl_dev" + "//mace/core:opencl_dev" + "//mace/utils:tuner_dev" +) + +libmace_prod_targets=( + "//mace/core:opencl_prod" + "//mace/utils:tuner_prod" +) + +all_targets=(${libmace_targets[*]} ${libmace_dev_targets[*]} ${libmace_prod_targets[*]}) + +build_target() +{ + BAZEL_TARGET=$1 + bazel build --verbose_failures -c opt --strip always $BAZEL_TARGET \ + --crosstool_top=//external:android/crosstool \ + --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ + --cpu=$ANDROID_ABI \ + --copt="-std=c++11" \ + --copt="-D_GLIBCXX_USE_C99_MATH_TR1" \ + --copt="-Werror=return-type" \ + --copt="-DMACE_OBFUSCATE_LITERALS" \ + $TUNING_MODE_BUILD_FLAGS \ + $DSP_MODE_BUILD_FLAGS || exit -1 +} + +merge_libs() +{ + CREATE_LIB_NAME=$1 + LIBS_LIST=$2 + echo "create /tmp/${CREATE_LIB_NAME}.a" > /tmp/${CREATE_LIB_NAME}.mri || exit -1 + + for lib_target in ${LIBS_LIST[*]} + do + lib_dir=`echo ${lib_target} | cut -d: -f1` + lib_dir=${lib_dir#//} + lib_name_prefix=lib`echo ${lib_target} | cut -d: -f2` + bin_path="${MACE_SOURCE_DIR}/bazel-bin/${lib_dir}/${lib_name_prefix}" + if [ -f "${bin_path}.a" ]; then + bin_path="${bin_path}.a" + else + bin_path="${bin_path}.lo" + fi + echo "addlib ${bin_path}" >> /tmp/${CREATE_LIB_NAME}.mri || exit -1 + done + + echo "save" >> /tmp/${CREATE_LIB_NAME}.mri || exit -1 + echo "end" >> /tmp/${CREATE_LIB_NAME}.mri || exit -1 + + $ANDROID_NDK_HOME/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-ar \ + -M < /tmp/${CREATE_LIB_NAME}.mri || exit -1 +} + + +echo "Step 1: Generate encrypted opencl source" +python mace/python/tools/encrypt_opencl_codegen.py \ + --cl_kernel_dir=./mace/kernels/opencl/cl/ \ + --output_path=${CODEGEN_DIR}/opencl/opencl_encrypt_program.cc || exit -1 + + +echo "Step 2: Generate version source" +rm -rf ${VERSION_CODEGEN_DIR} +mkdir ${VERSION_CODEGEN_DIR} +bash mace/tools/git/gen_version_source.sh ${CODEGEN_DIR}/version/version.cc || exit -1 + + +echo "Step 3: Build libmace targets" +bazel clean +for target in ${all_targets[*]} +do + build_target ${target} +done + + +echo "Step 4: Create mri files and generate merged libs" +merge_libs "libmace" "${libmace_targets[*]}" +merge_libs "libmace_dev" "${libmace_dev_targets[*]}" +merge_libs "libmace_prod" "${libmace_prod_targets[*]}" + + +echo "Step 5: Export lib" +rm -rf ${EXPORT_INCLUDE_DIR} +mkdir -p ${EXPORT_INCLUDE_DIR}/mace/core/public +rm -rf ${EXPORT_LIB_DIR} +mkdir -p ${EXPORT_LIB_DIR} + +cp ${MACE_SOURCE_DIR}/mace/core/public/* ${EXPORT_INCLUDE_DIR}/mace/core/public || exit -1 +cp /tmp/libmace.a /tmp/libmace_dev.a /tmp/libmace_prod.a ${EXPORT_LIB_DIR}/ || exit -1 + +echo "Done!" diff --git a/tools/gcn.config b/tools/gcn.config deleted file mode 100644 index 0a4480d1..00000000 --- a/tools/gcn.config +++ /dev/null @@ -1,3 +0,0 @@ -TF_INPUT_NODE=input -TF_OUTPUT_NODE=softmax/Reshape_1 -TF_OUTPUT_BR_NODE=GCN/br_result_2/fcn_br \ No newline at end of file diff --git a/tools/side_gcn.config b/tools/side_gcn.config deleted file mode 100644 index c7e23e97..00000000 --- a/tools/side_gcn.config +++ /dev/null @@ -1,2 +0,0 @@ -TF_INPUT_NODE=input_node -TF_OUTPUT_NODE=softmax/Reshape_1 \ No newline at end of file diff --git a/tools/validate_gcn.sh b/tools/validate_gcn.sh deleted file mode 100755 index 9a12c969..00000000 --- a/tools/validate_gcn.sh +++ /dev/null @@ -1,158 +0,0 @@ -#!/bin/bash -# Must run at root dir of mace project. -set +x -Usage() { - echo 'Usage: bash tools/validate_gcn.sh tools/gcn.config tf_model_path model_tag image_size runtime[gpu/dsp] [tuning]' -} - -if [ $# -lt 5 ];then - Usage - exit -1 -fi - -source $1 - -TF_MODEL_FILE_PATH=$2 -MODEL_TAG=$3 -IMAGE_SIZE=$4 -RUNTIME=$5 -TUNING_OR_NOT=${6:-0} - -if [ x"$RUNTIME" = x"dsp" ]; then - DATA_TYPE="DT_UINT8" - DEVICE_TYPE="HEXAGON" - TF_OUTPUT_NODE=${TF_OUTPUT_BR_NODE} -else - DATA_TYPE="DT_HALF" - DEVICE_TYPE="OPENCL" -fi - -VLOG_LEVEL=0 -MODEL_DIR=$(dirname ${TF_MODEL_FILE_PATH}) -MACE_SOURCE_DIR=`/bin/pwd` -INPUT_FILE_NAME='model_input' -OUTPUT_FILE_NAME='gcn.out' -OUTPUT_LIST_FILE='gcn.list' -PHONE_DATA_DIR="/data/local/tmp/mace_gcn" -KERNEL_DIR="${PHONE_DATA_DIR}/cl/" -CODEGEN_DIR=${MACE_SOURCE_DIR}/mace/codegen -MODEL_CODEGEN_DIR=${CODEGEN_DIR}/models/${MODEL_TAG} -CL_CODEGEN_DIR=${CODEGEN_DIR}/opencl -CL_BIN_DIR=${CODEGEN_DIR}/opencl_bin -TUNING_CODEGEN_DIR=${CODEGEN_DIR}/tuning -VERSION_SOURCE_PATH=${CODEGEN_DIR}/version - -build_and_run() -{ - PRODUCTION_MODE=$1 - if [ "$PRODUCTION_MODE" = true ]; then - PRODUCTION_MODE_BUILD_FLAGS="--define production=true" - fi - - if [[ "${TUNING_OR_NOT}" != "0" && "$PRODUCTION_MODE" != true ]];then - tuning_flag=1 - round=0 # only warm up - else - tuning_flag=0 - round=2 - fi - - bazel build --verbose_failures -c opt --strip always mace/examples:mace_run \ - --crosstool_top=//external:android/crosstool \ - --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ - --cpu=armeabi-v7a \ - --copt="-std=c++11" \ - --copt="-D_GLIBCXX_USE_C99_MATH_TR1" \ - --copt="-Werror=return-type" \ - --copt="-DMACE_MODEL_TAG=${MODEL_TAG}" \ - --copt="-DMACE_OBFUSCATE_LITERALS" \ - $PRODUCTION_MODE_BUILD_FLAGS \ - --define hexagon=true || exit -1 - - adb shell "mkdir -p ${PHONE_DATA_DIR}" || exit -1 - if [ "$PRODUCTION_MODE" = false ]; then - adb shell "mkdir -p ${KERNEL_DIR}" || exit -1 - fi - adb push ${MODEL_DIR}/${INPUT_FILE_NAME} ${PHONE_DATA_DIR} || exit -1 - adb push bazel-bin/mace/examples/mace_run ${PHONE_DATA_DIR} || exit -1 - adb push mace/core/runtime/hexagon/libhexagon_controller.so ${PHONE_DATA_DIR} || exit -1 - - adb