diff --git a/mace/core/operator.h b/mace/core/operator.h index 137a2e1a002be96673adb6cbbf5e30df13a09b01..4cd52fb366753e21557b38f0baf675d5937cc0dd 100644 --- a/mace/core/operator.h +++ b/mace/core/operator.h @@ -91,8 +91,13 @@ class Operator : public OperatorBase { } for (const string &output_str : operator_def.output()) { - outputs_.push_back(MACE_CHECK_NOTNULL(ws->CreateTensor( - output_str, GetDeviceAllocator(D), DataTypeToEnum::v()))); + if (ws->HasTensor(output_str)) { + Tensor *found_tensor = ws->GetTensor(output_str); + outputs_.push_back(ws->GetTensor(output_str)); + } else { + outputs_.push_back(MACE_CHECK_NOTNULL(ws->CreateTensor( + output_str, GetDeviceAllocator(D), DataTypeToEnum::v()))); + } } } virtual bool Run() override = 0; diff --git a/mace/core/tensor.h b/mace/core/tensor.h index 2af45d6c8364c4578a9f506743974b31becaa50e..94f9522805c62a484157941b9c87b559c02fca6f 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -199,14 +199,20 @@ class Tensor { size_ = size; MACE_CHECK(data_ == nullptr, "Buffer must be unmapped before resize"); - if (is_image_) { - alloc_->DeleteImage(buffer_); - } else { + if (is_image_ && !image_shape_.empty()) { + MACE_ASSERT(image_shape_.size() == 2 + && image_shape_[0] >= image_shape[0] + || image_shape_[1] >= image_shape[1], + "image shape not large enough"); + } + if (!is_image_ && buffer_ != nullptr) { alloc_->Delete(buffer_); } is_image_ = true; - image_shape_ = image_shape; - buffer_ = alloc_->NewImage(image_shape, dtype_); + if (image_shape_.empty()) { + image_shape_ = image_shape; + buffer_ = alloc_->NewImage(image_shape, dtype_); + } } } @@ -226,6 +232,17 @@ class Tensor { } } + inline void AllocateImageMemory(const std::vector &image_shape) { + is_image_ = true; + if (image_shape_ != image_shape) { + if (buffer_ != nullptr) { + alloc_->DeleteImage(buffer_); + } + image_shape_ = image_shape; + buffer_ = alloc_->NewImage(image_shape, dtype_); + } + } + template inline void Copy(const T *src, index_t size) { MACE_CHECK(size == size_, "copy src and dst with different size."); diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index 2a172f3e93161c1e8fe8cc3c9f4105b5bd88a3fb..e8fc98f9933e9c389cdf98e652c6ac11ee3dab87 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -3,8 +3,8 @@ // #include "mace/core/workspace.h" -#include "mace/core/common.h" #include "mace/core/serializer.h" +#include "mace/core/proto_utils.h" namespace mace { @@ -63,6 +63,34 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { tensor_map_[tensor_proto.name()] = serializer.Deserialize(tensor_proto, type); } + if (type == DeviceType::OPENCL) { + CreateImageOutputTensor(net_def); + } +} + +void Workspace::CreateImageOutputTensor(const NetDef &net_def) { + if (!net_def.has_mem_arena() || net_def.mem_arena().mem_block_size() == 0) { + return; + } + std::map> mem_tensor_map; + const DataType dtype = static_cast( + ArgumentHelper::GetSingleArgument( + net_def.op(0), + "T", + static_cast(DT_FLOAT))); + for (auto &mem_block: net_def.mem_arena().mem_block()) { + string mem_block_name = MemBlockName(mem_block.mem_id()); + mem_tensor_map[mem_block_name].reset(new Tensor( + GetDeviceAllocator(DeviceType::OPENCL), + dtype)); + mem_tensor_map[mem_block_name]->AllocateImageMemory({mem_block.x(), + mem_block.y()}); + } + for (auto &op: net_def.op()) { + if (op.has_mem_id()) { + tensor_map_[op.output(0)] = mem_tensor_map[MemBlockName(op.mem_id())]; + } + } } } // namespace mace \ No newline at end of file diff --git a/mace/core/workspace.h b/mace/core/workspace.h index 291bc0590cbf55554d2086e3348c1c692b5db4ed..8a706b876ecae7affbc9288f2b242de627c2725a 100644 --- a/mace/core/workspace.h +++ b/mace/core/workspace.h @@ -13,7 +13,7 @@ namespace mace { class Workspace { public: - typedef map> TensorMap; + typedef map> TensorMap; Workspace() {} @@ -33,7 +33,13 @@ class Workspace { void LoadModelTensor(const NetDef &net_def, DeviceType type); + inline std::string MemBlockName(int mem_id) const { + return internal::MakeString("mem_block_", mem_id); + }; + private: + void CreateImageOutputTensor(const NetDef &net_def); + TensorMap tensor_map_; DISABLE_COPY_AND_ASSIGN(Workspace); diff --git a/mace/examples/mace_run.cc b/mace/examples/mace_run.cc index 8ca9765b99a73475fe1884278baac7995358b1a7..73fa676766c12f1cb49a79b0e930f5e441fd8f5b 100644 --- a/mace/examples/mace_run.cc +++ b/mace/examples/mace_run.cc @@ -101,9 +101,12 @@ int main(int argc, char **argv) { } // Init model + VLOG(0) << "Run init"; auto net = CreateNet(net_def, &ws, device_type, NetMode::INIT); net->Run(); + VLOG(0) << "Run model"; + // run model net = CreateNet(net_def, &ws, device_type); diff --git a/mace/proto/mace.proto b/mace/proto/mace.proto index 37a349433e75cc277186da49a5942ed4d78503bf..d3deb3e4bed4cf5a3755c0375d0efe93dd94c240 100644 --- a/mace/proto/mace.proto +++ b/mace/proto/mace.proto @@ -83,6 +83,7 @@ message OperatorDef { optional string type = 4; repeated Argument arg = 5; repeated OutputShape output_shape = 6; + repeated DataType output_type = 7; // Memory optimization: only support one single output op optional int32 mem_id = 10 [default = -1]; @@ -128,6 +129,9 @@ message NetDef { repeated Argument arg = 4; repeated TensorProto tensors = 5; + // for mem optimization + optional MemoryArena mem_arena = 10; + // for hexagon mace-nnlib repeated InputInfo input_info = 100; repeated OutputInfo output_info = 101; diff --git a/mace/python/tools/BUILD b/mace/python/tools/BUILD index 964ea528707a7a724f7f8fff4c12727f0609dcd6..b1bb214cb7153324924e05ddc81868c94f09b73a 100644 --- a/mace/python/tools/BUILD +++ b/mace/python/tools/BUILD @@ -1,9 +1,11 @@ py_library( name = "tf_converter_lib", srcs = [ + "convert_util.py", + "graph_util.py", "tf_converter_lib.py", "tf_dsp_converter_lib.py", - "graph_util.py"], + ], srcs_version = "PY2AND3", deps = [ "//mace/proto:mace_py", @@ -20,6 +22,15 @@ py_binary( ], ) +py_binary( + name = "memory_optimizer", + srcs = ["memory_optimizer.py"], + srcs_version = "PY2AND3", + deps = [ + "//mace/proto:mace_py", + ], +) + py_binary( name = "tf_ops_stats", srcs = ["tf_ops_stats.py"], diff --git a/mace/python/tools/convert_util.py b/mace/python/tools/convert_util.py new file mode 100644 index 0000000000000000000000000000000000000000..53b3196952e991e6163b83ff3ca14395bcba9856 --- /dev/null +++ b/mace/python/tools/convert_util.py @@ -0,0 +1,29 @@ +import tensorflow as tf +from mace.proto import mace_pb2 + +TF_DTYPE_2_MACE_DTYPE_MAP = { + tf.float32: mace_pb2.DT_FLOAT, + tf.double: mace_pb2.DT_DOUBLE, + tf.half: mace_pb2.DT_HALF, + tf.int64: mace_pb2.DT_INT64, + tf.int32: mace_pb2.DT_INT32, + tf.qint32: mace_pb2.DT_INT32, + tf.int16: mace_pb2.DT_INT16, + tf.qint16: mace_pb2.DT_INT16, + tf.int8: mace_pb2.DT_INT8, + tf.qint8: mace_pb2.DT_INT8, + tf.quint16: mace_pb2.DT_UINT16, + tf.uint16: mace_pb2.DT_UINT16, + tf.quint8: mace_pb2.DT_UINT8, + tf.uint8: mace_pb2.DT_UINT8, + tf.string: mace_pb2.DT_STRING, + tf.bool: mace_pb2.DT_BOOL, +} + + +def tf_dtype_2_mace_dtype(tf_dtype): + mace_dtype = TF_DTYPE_2_MACE_DTYPE_MAP.get(tf_dtype, None) + if not mace_dtype: + raise Exception("Not supported tensorflow dtype: " + tf_dtype) + return mace_dtype + diff --git a/mace/python/tools/memory_optimizer.py b/mace/python/tools/memory_optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..f64df5bada8f5acf1730a1f5e15227605cadad24 --- /dev/null +++ b/mace/python/tools/memory_optimizer.py @@ -0,0 +1,102 @@ +import sys +import operator +from mace.proto import mace_pb2 + +class MemoryOptimizer(object): + def __init__(self, net_def): + self.net_def = net_def + self.idle_mem = set() + self.op_mem = {} # op_name->mem_id + self.mem_block = {} # mem_id->[x, y] + self.total_mem_count = 0 + self.ref_counter = {} + + consumers = {} + for op in net_def.op: + if self.is_buffer_image_op(op): + continue + for ipt in op.input: + if ipt not in consumers: + consumers[ipt] = [] + consumers[ipt].append(op) + # only ref op's output tensor + for op in net_def.op: + if self.is_buffer_image_op(op): + continue + tensor_name = self._op_to_tensor(op) + if tensor_name in consumers: + self.ref_counter[tensor_name] = len(consumers[tensor_name]) + else: + self.ref_counter[tensor_name] = 0 + + def _op_to_tensor(self, op): + return op.name + ':0' + + def is_buffer_image_op(self, op): + return op.type == 'BufferToImage' or op.type == 'ImageToBuffer' + + def optimize(self): + for op in self.net_def.op: + if self.is_buffer_image_op(op): + continue + if len(self.idle_mem) == 0: + # allocate new mem + mem_id = self.total_mem_count + self.total_mem_count += 1 + else: + # reuse mem + mem_id = self.idle_mem.pop() + + op.mem_id = mem_id + self.op_mem[self._op_to_tensor(op)] = mem_id + if mem_id not in self.mem_block: + self.mem_block[mem_id] = [0, 0] + mem_size = self.mem_block[mem_id] + mem_size[1] = max(mem_size[1], op.output_shape[0].dims[0] * op.output_shape[0].dims[1]) + mem_size[0] = max(mem_size[0], op.output_shape[0].dims[2] * (op.output_shape[0].dims[3]+3)/4) + + # de-ref input tensor mem + for ipt in op.input: + if ipt in self.ref_counter: + self.ref_counter[ipt] -= 1 + if self.ref_counter[ipt] == 0: + self.idle_mem.add(self.op_mem[ipt]) + elif self.ref_counter[ipt] < 0: + raise Exception('ref count is less than 0') + + for mem in self.mem_block: + arena = net_def.mem_arena + block = arena.mem_block.add() + block.mem_id = mem + block.x = self.mem_block[mem][0] + block.y = self.mem_block[mem][1] + + print('total op: %d', len(self.net_def.op)) + origin_mem_size = 0 + optimized_mem_size = 0 + for op in self.net_def.op: + if self.is_buffer_image_op(op): + continue + origin_mem_size += reduce(operator.mul, op.output_shape[0].dims, 1) + for mem in self.mem_block: + optimized_mem_size += reduce(operator.mul, self.mem_block[mem], 4) + + print('origin mem: %d, optimized mem: %d', origin_mem_size, optimized_mem_size) + +if __name__ == '__main__': + model_file = sys.argv[1] + opt_model_file = sys.argv[2] + with open(model_file, "rb") as f: + net_def = mace_pb2.NetDef() + net_def.ParseFromString(f.read()) + optimizer = MemoryOptimizer(net_def) + optimizer.optimize() + + with open(opt_model_file, "wb") as f: + f.write(net_def.SerializeToString()) + with open(opt_model_file + '_txt', "wb") as f: + net_def.ClearField('tensors') + f.write(str(net_def)) + + + diff --git a/mace/python/tools/tf_converter_lib.py b/mace/python/tools/tf_converter_lib.py index 80b5ee4253a8de05a82435ec5f7593734b757115..f603d3b5c96e76ef52bee06911b467d9b0112a29 100644 --- a/mace/python/tools/tf_converter_lib.py +++ b/mace/python/tools/tf_converter_lib.py @@ -1,6 +1,7 @@ from mace.proto import mace_pb2 import tensorflow as tf import numpy as np +from mace.python.tools.convert_util import tf_dtype_2_mace_dtype # TODO: support NCHW formt, now only support NHWC. padding_mode = { @@ -110,6 +111,19 @@ def add_output_transform(name, net_def): epsilon_arg.name = 'buffer_type' epsilon_arg.i = buffer_type_map['IN_OUT'] + +def convert_op_outputs(mace_op_def, tf_op): + mace_op_def.output.extend([output.name for output in tf_op.outputs]) + mace_op_def.output_type.extend([tf_dtype_2_mace_dtype(output.dtype) + for output in tf_op.outputs]) + output_shapes = [] + for output in tf_op.outputs: + output_shape = mace_pb2.OutputShape() + output_shape.dims.extend(output.shape.as_list()) + output_shapes.append(output_shape) + mace_op_def.output_shape.extend(output_shapes) + + def convert_ops(unresolved_ops, dt, net_def, device): ops_count = len(unresolved_ops) resolved_count = 1 @@ -171,13 +185,7 @@ def convert_ops(unresolved_ops, dt, net_def, device): final_op = relu_op resolved_count = 4 - op_def.output.extend([output.name for output in final_op.outputs]) - output_shapes = [] - for output in final_op.outputs: - output_shape = mace_pb2.OutputShape() - output_shape.dims.extend(output.shape.as_list()) - output_shapes.append(output_shape) - op_def.output_shape.extend(output_shapes) + convert_op_outputs(op_def, final_op) elif first_op.type == 'FusedBatchNorm': op_def.name = first_op.name @@ -225,26 +233,15 @@ def convert_ops(unresolved_ops, dt, net_def, device): op_def.name = first_op.name[:-4] # remove /add op_def.type = 'BatchNorm' op_def.input.extend([input_name, gamma, beta, mean, variance, epsilon]) - op_def.output.extend([output.name for output in add_1_op.outputs]) - output_shapes = [] - for output in add_1_op.outputs: - output_shape = mace_pb2.OutputShape() - output_shape.dims.extend(output.shape.as_list()) - output_shapes.append(output_shape) - op_def.output_shape.extend(output_shapes) + convert_op_outputs(op_def, add_1_op) resolved_count = 7 elif first_op.type == 'Relu6': op_def.name = first_op.name op_def.type = 'Relu' op_def.input.extend([input.name for input in first_op.inputs]) - op_def.output.extend([output.name for output in first_op.outputs]) - output_shapes = [] - for output in first_op.outputs: - output_shape = mace_pb2.OutputShape() - output_shape.dims.extend(output.shape.as_list()) - output_shapes.append(output_shape) - op_def.output_shape.extend(output_shapes) + convert_op_outputs(op_def, first_op) + max_limit_arg = op_def.arg.add() max_limit_arg.name = 'max_limit' max_limit_arg.f = 6 @@ -252,13 +249,8 @@ def convert_ops(unresolved_ops, dt, net_def, device): op_def.name = first_op.name op_def.type = 'Pooling' op_def.input.extend([input.name for input in first_op.inputs]) - op_def.output.extend([output.name for output in first_op.outputs]) - output_shapes = [] - for output in first_op.outputs: - output_shape = mace_pb2.OutputShape() - output_shape.dims.extend(output.shape.as_list()) - output_shapes.append(output_shape) - op_def.output_shape.extend(output_shapes) + convert_op_outputs(op_def, first_op) + pooling_type_arg = op_def.arg.add() pooling_type_arg.name = 'pooling_type' pooling_type_arg.i = pooling_type_mode[first_op.type] @@ -278,55 +270,31 @@ def convert_ops(unresolved_ops, dt, net_def, device): op_def.name = first_op.name op_def.type = "AddN" op_def.input.extend([input.name for input in first_op.inputs]) - op_def.output.extend([output.name for output in first_op.outputs]) - output_shapes = [] - for output in first_op.outputs: - output_shape = mace_pb2.OutputShape() - output_shape.dims.extend(output.shape.as_list()) - output_shapes.append(output_shape) - op_def.output_shape.extend(output_shapes) + convert_op_outputs(op_def, first_op) elif first_op.type == 'ConcatV2': op_def.name = first_op.name op_def.type = "Concat" op_def.input.extend([first_op.inputs[i].name for i in xrange(2)]) - op_def.output.extend([output.name for output in first_op.outputs]) axis_arg = op_def.arg.add() axis_arg.name = 'axis' axis_arg.i = get_input_tensor(first_op, 2).eval().astype(np.int32) - output_shapes = [] - for output in first_op.outputs: - output_shape = mace_pb2.OutputShape() - output_shape.dims.extend(output.shape.as_list()) - output_shapes.append(output_shape) - op_def.output_shape.extend(output_shapes) + convert_op_outputs(op_def, first_op) elif first_op.type == 'ResizeBilinear': op_def.name = first_op.name op_def.type = "ResizeBilinear" op_def.input.extend([first_op.inputs[0].name]) - op_def.output.extend([output.name for output in first_op.outputs]) size_arg = op_def.arg.add() size_arg.name = 'size' size_arg.ints.extend(get_input_tensor(first_op, 1).eval().astype(np.int32).flat) size_arg = op_def.arg.add() size_arg.name = 'align_corners' size_arg.i = first_op.get_attr('align_corners') - output_shapes = [] - for output in first_op.outputs: - output_shape = mace_pb2.OutputShape() - output_shape.dims.extend(output.shape.as_list()) - output_shapes.append(output_shape) - op_def.output_shape.extend(output_shapes) + convert_op_outputs(op_def, first_op) elif first_op.type in ['Relu', 'SpaceToBatchND', 'BatchToSpaceND', 'BiasAdd']: op_def.name = first_op.name op_def.type = first_op.type op_def.input.extend([input.name for input in first_op.inputs]) - op_def.output.extend([output.name for output in first_op.outputs]) - output_shapes = [] - for output in first_op.outputs: - output_shape = mace_pb2.OutputShape() - output_shape.dims.extend(output.shape.as_list()) - output_shapes.append(output_shape) - op_def.output_shape.extend(output_shapes) + convert_op_outputs(op_def, first_op) else: raise Exception('Unknown Op: %s, type: %s' % (first_op.name, first_op.type)) pass diff --git a/mace/python/tools/tf_dsp_converter_lib.py b/mace/python/tools/tf_dsp_converter_lib.py index ced16ce853e8f49b9c968e09ed257a8e3bf815b5..209173e90f8930d2fe7abbc767def55ac93e5e9a 100644 --- a/mace/python/tools/tf_dsp_converter_lib.py +++ b/mace/python/tools/tf_dsp_converter_lib.py @@ -3,6 +3,7 @@ import tensorflow as tf from operator import mul from dsp_ops import DspOps from mace.python.tools import graph_util +from mace.python.tools.convert_util import tf_dtype_2_mace_dtype # converter --input ../libcv/quantized_icnet.pb --output quantized_icnet_dsp.pb \ # --runtime dsp --input_node input_node --output_node output_node @@ -65,6 +66,18 @@ def add_shape_const_node(net_def, op, values, name): tensor.dims.extend(values) return tensor.name + +def convert_op_outputs(mace_op_def, tf_op): + mace_op_def.output_type.extend([tf_dtype_2_mace_dtype(output.dtype) + for output in tf_op.outputs]) + output_shapes = [] + for output in tf_op.outputs: + output_shape = mace_pb2.OutputShape() + output_shape.dims.extend(output.shape.as_list()) + output_shapes.append(output_shape) + mace_op_def.output_shape.extend(output_shapes) + + def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops): first_op = unresolved_ops[0] print ('Op: ', first_op.name, first_op.type, first_op.outputs[0].shape) @@ -120,6 +133,7 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops): op_def.input.extend([t.name for t in s2b_op.inputs[1:]]) op_def.input.extend([min_tensor.name, max_tensor.name]) op_def.out_max_byte_size.extend([max_elem_size(out) for out in quantize_op.outputs]) + convert_op_outputs(op_def, quantize_op) elif has_padding_and_strides(first_op): op_def.padding = padding_mode[first_op.get_attr('padding')] op_def.input.extend([t.name for t in first_op.inputs]) @@ -131,13 +145,15 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops): strides_tensor = add_shape_const_node(net_def, first_op, strides, 'strides') op_def.input.extend([strides_tensor]) op_def.out_max_byte_size.extend([max_elem_size(out) for out in first_op.outputs]) + convert_op_outputs(op_def, first_op) elif is_node_flatten_reshape(first_op): op_def.type = 'Flatten' op_def.input.extend([t.name for t in first_op.inputs]) - op_def.out_max_byte_size.extend([max_elem_size(out) for out in first_op.outputs]) + convert_op_outputs(op_def, first_op) elif dsp_ops.has_op(first_op.type): op_def.input.extend([t.name for t in first_op.inputs]) op_def.out_max_byte_size.extend([max_elem_size(out) for out in first_op.outputs]) + convert_op_outputs(op_def, first_op) else: raise Exception('Unsupported op: ', first_op) @@ -311,6 +327,10 @@ def strip_input_quantize_and_output_dequantize(net_def, input_node, output_node) new_input_op.padding = input_op.padding new_input_op.out_max_byte_size.extend([input_op.out_max_byte_size[0]/4, 4, 4]) new_ops.append(new_input_op) + new_input_op.output_shape.extend([input_op.output_shape[0], + minf_op.output_shape[0], + maxf_op.output_shape[0]]) + new_input_op.output_type.extend([input_op.output_type[0], mace_pb2.DT_FLOAT, mace_pb2.DT_FLOAT]) for follow_op in consumers[get_tensor_name_from_op(quantize_op.name, 0)]: new_follow_op = mace_pb2.OperatorDef() new_follow_op.CopyFrom(follow_op) diff --git a/tools/validate_gcn.sh b/tools/validate_gcn.sh index 524c752b8c70dedf4f332d2b3dfc7590bb66b93b..b62cb7848b9a3da8a58f94e6e74d29dae2637bf2 100644 --- a/tools/validate_gcn.sh +++ b/tools/validate_gcn.sh @@ -1,6 +1,6 @@ #!/bin/bash # Must run at root dir of mace project. - +set +x Usage() { echo 'Usage: bash tools/validate_gcn.sh tf_model_file' } @@ -13,6 +13,7 @@ fi TF_MODEL_FILE_PATH=$1 MODEL_DIR=$(dirname ${TF_MODEL_FILE_PATH}) MACE_MODEL_NAME='mace_model.pb' +MACE_OPT_MODEL_NAME='mace_opt_model.pb' INPUT_FILE_NAME='model_input' OUTPUT_FILE_NAME='gcn.out' OUTPUT_LIST_FILE='gcn.list' @@ -26,14 +27,17 @@ python tools/validate.py --generate_data true --random_seed 1 \ --input_shape=512,512,3 # Step 2: convert tf model to mace model -echo "Step 2: convert tf model to mace model" +echo "Step 2: convert tf model to mace model and optimize memory" bazel build //mace/python/tools:tf_converter bazel-bin/mace/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \ --output=${MODEL_DIR}/${MACE_MODEL_NAME} \ --input_node=input \ --output_node=GCN/br_result_2/fcn_br \ - --data_type=DT_HALF\ + --data_type=DT_HALF \ --runtime=gpu +bazel build mace/python/tools:memory_optimizer +bazel-bin/mace/python/tools/memory_optimizer ${MODEL_DIR}/${MACE_MODEL_NAME} \ + ${MODEL_DIR}/${MACE_OPT_MODEL_NAME} # Step 3: Run model on the phone @@ -46,7 +50,7 @@ bazel build -c opt --strip always mace/examples:mace_run \ adb shell "mkdir -p ${PHONE_DATA_DIR}" adb shell "mkdir -p ${KERNEL_DIR}" adb push mace/kernels/opencl/cl/* ${KERNEL_DIR} -adb push ${MODEL_DIR}/${MACE_MODEL_NAME} ${PHONE_DATA_DIR} +adb push ${MODEL_DIR}/${MACE_OPT_MODEL_NAME} ${PHONE_DATA_DIR} adb push ${MODEL_DIR}/${INPUT_FILE_NAME} ${PHONE_DATA_DIR} adb push bazel-bin/mace/examples/mace_run ${PHONE_DATA_DIR} @@ -56,13 +60,14 @@ adb