提交 c761a7ba 编写于 作者: Y yejianwu

remove mace.pb.h deps in public api header, Reserve() for pb resource

上级 9f9a1e5f
......@@ -42,11 +42,11 @@ namespace mace {
#ifdef MACE_CPU_MODEL_TAG
namespace MACE_CPU_MODEL_TAG {
extern const unsigned char *LoadModelData(const std::string &model_data_file);
extern const unsigned char *LoadModelData(const char *model_data_file);
extern void UnloadModelData(const unsigned char *model_data);
extern NetDef CreateNet();
extern NetDef CreateNet(const unsigned char *model_data);
extern const std::string ModelChecksum();
......@@ -60,7 +60,7 @@ extern const unsigned char *LoadModelData(const char *model_data_file);
extern void UnloadModelData(const unsigned char *model_data);
extern NetDef CreateNet();
extern NetDef CreateNet(const unsigned char *model_data);
extern const std::string ModelChecksum();
......@@ -74,7 +74,7 @@ extern const unsigned char *LoadModelData(const char *model_data_file);
extern void UnloadModelData(const unsigned char *model_data);
extern NetDef CreateNet();
extern NetDef CreateNet(const unsigned char *model_data);
extern const std::string ModelChecksum();
......@@ -255,10 +255,10 @@ int Main(int argc, char **argv) {
const unsigned char *cpu_model_data =
mace::MACE_CPU_MODEL_TAG::LoadModelData(
FLAGS_cpu_model_data_file.c_str());
NetDef cpu_net_def = mace::MACE_CPU_MODEL_TAG::CreateNet();
NetDef cpu_net_def = mace::MACE_CPU_MODEL_TAG::CreateNet(cpu_model_data);
mace::MaceEngine cpu_engine(&cpu_net_def, DeviceType::CPU, input_names,
output_names, cpu_model_data);
output_names);
LOG(INFO) << "CPU Warm up run";
t0 = NowMicros();
......@@ -273,10 +273,10 @@ int Main(int argc, char **argv) {
const unsigned char *gpu_model_data =
mace::MACE_GPU_MODEL_TAG::LoadModelData(
FLAGS_gpu_model_data_file.c_str());
NetDef gpu_net_def = mace::MACE_GPU_MODEL_TAG::CreateNet();
NetDef gpu_net_def = mace::MACE_GPU_MODEL_TAG::CreateNet(gpu_model_data);
mace::MaceEngine gpu_engine(&gpu_net_def, DeviceType::GPU, input_names,
output_names, gpu_model_data);
output_names);
mace::MACE_GPU_MODEL_TAG::UnloadModelData(gpu_model_data);
LOG(INFO) << "GPU Warm up run";
......@@ -292,10 +292,10 @@ int Main(int argc, char **argv) {
const unsigned char *dsp_model_data =
mace::MACE_DSP_MODEL_TAG::LoadModelData(
FLAGS_dsp_model_data_file.c_str());
NetDef dsp_net_def = mace::MACE_DSP_MODEL_TAG::CreateNet();
NetDef dsp_net_def = mace::MACE_DSP_MODEL_TAG::CreateNet(dsp_model_data);
mace::MaceEngine dsp_engine(&dsp_net_def, DeviceType::HEXAGON, input_names,
output_names, dsp_model_data);
output_names);
mace::MACE_DSP_MODEL_TAG::UnloadModelData(dsp_model_data);
LOG(INFO) << "DSP Warm up run";
......
......@@ -17,7 +17,6 @@
#include <set>
#include "mace/kernels/conv_pool_2d_util.h"
#include "mace/proto/mace.pb.h"
#include "mace/utils/logging.h"
#include "mace/utils/string_util.h"
......
......@@ -323,11 +323,11 @@ MaceStatus CreateMaceEngineFromPB(const std::string &model_data_file,
return MaceStatus::MACE_INVALID_ARGS;
}
NetDef net_def;
net_def.ParseFromArray(&model_pb[0], model_pb.size());
std::shared_ptr<NetDef> net_def(new NetDef());
net_def->ParseFromArray(&model_pb[0], model_pb.size());
index_t model_data_size = 0;
for (auto &const_tensor : net_def.tensors()) {
for (auto &const_tensor : net_def->tensors()) {
model_data_size = std::max(
model_data_size,
static_cast<index_t>(const_tensor.offset() +
......@@ -340,7 +340,8 @@ MaceStatus CreateMaceEngineFromPB(const std::string &model_data_file,
model_data = LoadModelData(model_data_file, model_data_size);
engine->reset(new mace::MaceEngine(device_type));
status = (*engine)->Init(&net_def, input_nodes, output_nodes, model_data);
status = (*engine)->Init(
net_def.get(), input_nodes, output_nodes, model_data);
if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) {
UnloadModelData(model_data, model_data_size);
......
......@@ -27,6 +27,7 @@
#include <utility>
#include <vector>
#include "mace/core/macros.h"
#include "mace/public/mace.h"
#include "mace/public/mace_runtime.h"
#include "mace/utils/logging.h"
......@@ -135,6 +136,7 @@ void SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads,
<< ", CPU core IDs: " << MakeString(cpu_ids);
omp_set_num_threads(omp_num_threads);
#else
MACE_UNUSED(omp_num_threads);
LOG(WARNING) << "Set OpenMP threads number failed: OpenMP not enabled.";
#endif
......
......@@ -9,12 +9,6 @@ enum NetMode {
NORMAL = 1;
}
enum DeviceType {
CPU = 0; // In default, we will use CPU.
GPU = 2;
HEXAGON = 3;
}
enum DataType {
DT_INVALID = 0;
......
......@@ -14,7 +14,4 @@ cc_library(
"mace_runtime.h",
],
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
deps = [
"//mace/proto:mace_cc",
],
)
......@@ -24,10 +24,13 @@
#include <string>
#include <vector>
#include "mace/proto/mace.pb.h"
namespace mace {
class OutputShape;
class NetDef;
enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3 };
struct CallStats {
int64_t start_micros;
int64_t end_micros;
......@@ -111,12 +114,6 @@ class MaceEngine {
MaceEngine &operator=(const MaceEngine &) = delete;
};
const unsigned char *LoadModelData(const std::string &model_data_file,
const size_t &data_size);
void UnloadModelData(const unsigned char *model_data,
const size_t &data_size);
MaceStatus CreateMaceEngineFromPB(const std::string &model_data_file,
const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes,
......
......@@ -36,13 +36,13 @@ from mace.python.tools.convert_util import mace_check
FLAGS = None
device_type_map = {'cpu': mace_pb2.CPU,
'gpu': mace_pb2.GPU,
'dsp': mace_pb2.HEXAGON}
device_type_map = {'cpu': cvt.DeviceType.CPU.value,
'gpu': cvt.DeviceType.GPU.value,
'dsp': cvt.DeviceType.HEXAGON.value}
device_data_type_map = {
mace_pb2.CPU: mace_pb2.DT_FLOAT,
mace_pb2.GPU: mace_pb2.DT_HALF,
mace_pb2.HEXAGON: mace_pb2.DT_UINT8
cvt.DeviceType.CPU.value: mace_pb2.DT_FLOAT,
cvt.DeviceType.GPU.value: mace_pb2.DT_HALF,
cvt.DeviceType.HEXAGON.value: mace_pb2.DT_UINT8
}
......@@ -131,8 +131,8 @@ def main(unused_args):
print("Transform model to one that can better run on device")
if not FLAGS.runtime:
cpu_graph_def = copy.deepcopy(output_graph_def)
option.device = mace_pb2.CPU
option.data_type = device_data_type_map[mace_pb2.CPU]
option.device = cvt.DeviceType.CPU.value
option.data_type = device_data_type_map[cvt.DeviceType.CPU.value]
option.disable_transpose_filters()
mace_cpu_transformer = transformer.Transformer(
option, cpu_graph_def)
......@@ -141,8 +141,8 @@ def main(unused_args):
memory_optimizer.optimize_cpu_memory(cpu_graph_def)
print "CPU memory optimization done."
option.device = mace_pb2.GPU
option.data_type = device_data_type_map[mace_pb2.GPU]
option.device = cvt.DeviceType.GPU.value
option.data_type = device_data_type_map[cvt.DeviceType.GPU.value]
option.enable_transpose_filters()
mace_gpu_transformer = transformer.Transformer(
option, output_graph_def)
......@@ -180,9 +180,9 @@ def main(unused_args):
if FLAGS.model_load_type == 'pb':
with open(FLAGS.pb_output, "wb") as f:
f.write(output_graph_def.SerializeToString())
with open(FLAGS.pb_output + '_txt', "wb") as f:
# output_graph_def.ClearField('tensors')
f.write(str(output_graph_def))
# with open(FLAGS.pb_output + '_txt', "wb") as f:
# # output_graph_def.ClearField('tensors')
# f.write(str(output_graph_def))
print("Model conversion is completed.")
......
......@@ -18,6 +18,12 @@ from enum import Enum
from mace.proto import mace_pb2
class DeviceType(Enum):
CPU = 0
GPU = 2
HEXAGON = 3
class DataFormat(Enum):
NHWC = 0
NCHW = 1
......@@ -198,7 +204,7 @@ class ConverterOption(object):
self._input_nodes = {}
self._output_nodes = {}
self._data_type = mace_pb2.DT_FLOAT
self._device = mace_pb2.CPU
self._device = DeviceType.CPU.value
self._winograd_enabled = False
self._transformer_option = [
TransformerRule.REMOVE_USELESS_RESHAPE_OP,
......
......@@ -18,14 +18,15 @@ import numpy as np
from mace.proto import mace_pb2
from mace.python.tools.converter_tool import base_converter
from mace.python.tools.converter_tool.base_converter import EltwiseType
from mace.python.tools.converter_tool.base_converter import ActivationType
from mace.python.tools.converter_tool.base_converter import PaddingMode
from mace.python.tools.converter_tool.base_converter import ConverterUtil
from mace.python.tools.converter_tool.base_converter import DataFormat
from mace.python.tools.converter_tool.base_converter import DeviceType
from mace.python.tools.converter_tool.base_converter import EltwiseType
from mace.python.tools.converter_tool.base_converter import FilterFormat
from mace.python.tools.converter_tool.base_converter import MaceOp
from mace.python.tools.converter_tool.base_converter import MaceKeyword
from mace.python.tools.converter_tool.base_converter import ConverterUtil
from mace.python.tools.converter_tool.base_converter import MaceOp
from mace.python.tools.converter_tool.base_converter import PaddingMode
from mace.python.tools.converter_tool.base_converter import TransformerRule
from mace.python.tools.convert_util import mace_check
......@@ -114,7 +115,7 @@ class Transformer(base_converter.ConverterInterface):
self._producer = {}
self._target_data_format = DataFormat.NHWC
if self._option.device == mace_pb2.CPU:
if self._option.device == DeviceType.CPU.value:
self._target_data_format = DataFormat.NCHW
def run(self):
......@@ -488,7 +489,7 @@ class Transformer(base_converter.ConverterInterface):
net = self._model
filter_format = self.filter_format()
if self._option.device == mace_pb2.GPU:
if self._option.device == DeviceType.GPU.value:
for op in net.op:
if op.type == MaceOp.Conv2D.name \
and self.check_if_gpu_use_winograd_conv(op):
......@@ -809,7 +810,7 @@ class Transformer(base_converter.ConverterInterface):
op.input[input_idx] = output_name
def transform_buffer_image(self):
if self._option.device != mace_pb2.GPU:
if self._option.device != DeviceType.GPU.value:
return False
print("Transform buffer to image")
......@@ -935,7 +936,7 @@ class Transformer(base_converter.ConverterInterface):
def transform_global_conv_to_fc(self):
"""Transform global conv to fc should be placed after transposing
input/output and filter"""
if self._option.device == mace_pb2.GPU:
if self._option.device == DeviceType.GPU.value:
return False
net = self._model
......
......@@ -33,7 +33,7 @@ extern const unsigned char *LoadModelData(const std::string &model_data_file);
extern void UnloadModelData(const unsigned char *model_data);
extern NetDef CreateNet();
extern const std::shared_ptr<NetDef> CreateNet();
extern const std::string ModelName();
extern const std::string ModelChecksum();
......@@ -63,7 +63,7 @@ MaceStatus CreateMaceEngineFromCode(
return MaceStatus::MACE_INVALID_ARGS;
}
const unsigned char * model_data = nullptr;
NetDef net_def;
std::shared_ptr<NetDef> net_def;
MaceStatus status = MaceStatus::MACE_SUCCESS;
switch (model_name_map[model_name]) {
{% for i in range(model_tags |length) %}
......@@ -72,7 +72,7 @@ MaceStatus CreateMaceEngineFromCode(
mace::{{model_tags[i]}}::LoadModelData(model_data_file);
net_def = mace::{{model_tags[i]}}::CreateNet();
engine->reset(new mace::MaceEngine(device_type));
status = (*engine)->Init(&net_def, input_nodes, output_nodes, model_data);
status = (*engine)->Init(net_def.get(), input_nodes, output_nodes, model_data);
if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) {
mace::{{model_tags[i]}}::UnloadModelData(model_data);
}
......
......@@ -18,6 +18,7 @@
#include <string>
#include "mace/core/macros.h"
#include "mace/proto/mace.pb.h"
#include "mace/public/mace.h"
#include "mace/utils/env_time.h"
#include "mace/utils/logging.h"
......@@ -40,33 +41,37 @@ extern void CreateOperator{{i}}(mace::OperatorDef *op);
namespace {
{% if net.arg|length != 0 %}
void CreateNetArg(mace::NetDef &net_def) {
void CreateNetArg(NetDef *net_def) {
net_def->mutable_arg()->Reserve({{ net.arg|length }});
mace::Argument *arg = nullptr;
{% for arg in net.arg %}
{% for i in range(net.arg|length) %}
arg = net_def.add_arg();
arg->set_name({{ arg.name|tojson }});
arg = net_def->add_arg();
arg->set_name({{ net.arg[i].name|tojson }});
{%- if arg.HasField('f') %}
arg->set_f({{ arg.f }});
{%- if net.arg[i].HasField('f') %}
arg->set_f({{ net.arg[i].f }});
{% endif %}
{%- if arg.HasField('i') %}
arg->set_i({{ arg.i }});
{%- if net.arg[i].HasField('i') %}
arg->set_i({{ net.arg[i].i }});
{% endif %}
{%- if arg.HasField('s') %}
arg->set_s({{ arg.s|tojson }});
{%- if net.arg[i].HasField('s') %}
arg->set_s({{ net.arg[i].s|tojson }});
{% endif %}
{% for float_value in arg.floats %}
arg->add_floats({ {{ float_value }} });
arg->mutable_floats()->Reserve({{ net.arg[i].floats|length }});
{% for float_value in net.arg[i].floats %}
arg->add_floats({{ float_value }});
{% endfor %}
{% for int_value in arg.ints %}
arg->add_ints({ {{ int_value }} });
arg->mutable_ints()->Reserve({{ net.arg[i].ints|length }});
{% for int_value in net.arg[i].ints %}
arg->add_ints({{ int_value }});
{% endfor %}
{% for str_value in arg.strings %}
arg->add_strings({ {{ str_value }} });
arg->mutable_strings()->Reserve({{ net.arg[i].strings|length }});
{% for str_value in net.arg[i].strings %}
arg->add_strings({{ str_value }});
{% endfor %}
{% endfor %}
......@@ -74,7 +79,7 @@ void CreateNetArg(mace::NetDef &net_def) {
{% endif %}
{% if net.output_info | length > 0 %}
void CreateOutputInfo(mace::NetDef &net_def) {
void CreateOutputInfo(NetDef *net_def) {
std::vector<std::vector<int>> dims { {{net.output_info | map(attribute='dims') | join(', ') | replace('[', '{') | replace(']', '}') }} };
std::vector<int> data_types_int { {{ net.output_info | map(attribute='data_type') | join(', ') }} };
......@@ -82,27 +87,31 @@ void CreateOutputInfo(mace::NetDef &net_def) {
for (int k = 0; k < {{ net.output_info | length }}; ++k) {
data_types[k] = static_cast<mace::DataType>(data_types_int[k]);
}
net_def->mutable_output_info()->Reserve({{ net.output_info | length }});
for (int i = 0; i < {{ net.output_info | length }}; ++i) {
net_def.add_output_info();
net_def.mutable_output_info(i)->set_data_type(data_types[i]);
for (int j = 0; j < (int)dims[i].size(); ++j) {
net_def.mutable_output_info(i)->add_dims(dims[i][j]);
auto output_info = net_def->add_output_info();
output_info->set_data_type(data_types[i]);
output_info->mutable_dims()->Reserve(dims[i].size());
for (size_t j = 0; j < dims[i].size(); ++j) {
output_info->add_dims(dims[i][j]);
}
}
}
{% endif %}
void CreateOperators(mace::NetDef *net_def) {
void CreateOperators(NetDef *net_def) {
MACE_LATENCY_LOGGER(1, "Create operators");
net_def->mutable_op()->Reserve({{ net.op|length }});
{% for i in range(net.op|length) %}
mace::{{tag}}::CreateOperator{{i}}(net_def->add_op());
{% endfor %}
}
void CreateTensors(mace::NetDef *net_def) {
void CreateTensors(NetDef *net_def) {
MACE_LATENCY_LOGGER(1, "Create tensors");
net_def->mutable_tensors()->Reserve({{ tensors|length }});
{% for tensor in tensors %}
mace::{{tag}}::CreateTensor{{tensor.id}}(net_def->add_tensors());
{% endfor %}
......@@ -110,6 +119,7 @@ void CreateTensors(mace::NetDef *net_def) {
{% if net.mem_arena.mem_block|length != 0 %}
void CreateMemoryArena(mace::MemoryArena *mem_arena) {
mem_arena->mutable_mem_block()->Reserve({{ net.mem_arena.mem_block|length }});
{% for i in range(net.mem_arena.mem_block|length) %}
mace::MemoryBlock* mem_block{{i}} = mem_arena->add_mem_block();
......@@ -127,25 +137,24 @@ void CreateMemoryArena(mace::MemoryArena *mem_arena) {
namespace {{tag}} {
NetDef CreateNet() {
const std::shared_ptr<NetDef> CreateNet() {
MACE_LATENCY_LOGGER(1, "Create net {{ net.name }}");
NetDef net_def;
std::shared_ptr<NetDef> net_def(new NetDef());
net_def->set_name("{{ net.name}}");
net_def->set_version("{{ net.version }}");
net_def.set_name("{{ net.name}}");
net_def.set_version("{{ net.version }}");
CreateOperators(&net_def);
CreateTensors(&net_def);
CreateOperators(net_def.get());
CreateTensors(net_def.get());
{% if net.arg|length != 0 %}
CreateNetArg(net_def);
CreateNetArg(net_def.get());
{% endif %}
{% if net.mem_arena.mem_block|length != 0 %}
CreateMemoryArena(net_def.mutable_mem_arena());
CreateMemoryArena(net_def->mutable_mem_arena());
{% endif %}
{% if net.output_info | length > 0 %}
CreateOutputInfo(net_def);
CreateOutputInfo(net_def.get());
{% endif %}
return net_def;
......
......@@ -17,6 +17,7 @@
#include <vector>
#include <string>
#include "mace/proto/mace.pb.h"
#include "mace/public/mace.h"
#include "mace/utils/env_time.h"
#include "mace/utils/logging.h"
......@@ -36,15 +37,19 @@ void UpdateOp(mace::OperatorDef *op,
op->set_type(type);
op->set_node_id(node_id);
op->mutable_input()->Reserve(inputs.size());
for (auto input : inputs) {
op->add_input(input);
}
op->mutable_output()->Reserve(outputs.size());
for (auto output : outputs) {
op->add_output(output);
}
op->mutable_output_type()->Reserve(output_types.size());
for (auto output_type : output_types) {
op->add_output_type(output_type);
}
op->mutable_mem_id()->Reserve(mem_ids.size());
for (auto mem_id : mem_ids) {
op->add_mem_id(mem_id);
}
......@@ -62,6 +67,7 @@ void CreateOperator{{i}}(mace::OperatorDef *op) {
MACE_LATENCY_LOGGER(2, "Create operator {{ net.op[i].name }}");
mace::Argument *arg = nullptr;
op->mutable_arg()->Reserve({{ net.op[i].arg|length }});
{% for arg in net.op[i].arg %}
arg = op->add_arg();
......@@ -77,23 +83,28 @@ void CreateOperator{{i}}(mace::OperatorDef *op) {
arg->set_s({{ arg.s|tojson }});
{%- endif %}
arg->mutable_floats()->Reserve({{ arg.floats|length }});
{% for float_value in arg.floats %}
arg->add_floats({{ float_value }});
{% endfor %}
arg->mutable_ints()->Reserve({{ arg.ints|length }});
{% for int_value in arg.ints %}
arg->add_ints({{ int_value }});
{% endfor %}
arg->mutable_strings()->Reserve({{ arg.strings|length }});
{% for str_value in arg.strings %}
arg->add_strings({{ str_value }});
{% endfor %}
{% endfor %}
{% for shape in net.op[i].output_shape %}
{% if shape.dims | length > 0 %}
op->mutable_output_shape()->Reserve({{ net.op[i].output_shape|length }});
mace::OutputShape * output_shape = nullptr;
{% for shape in net.op[i].output_shape %}
{% if shape.dims|length > 0 %}
output_shape = op->add_output_shape();
output_shape->mutable_dims()->Reserve({{ shape.dims|length }});
{% for dim in shape.dims %}
output_shape->add_dims({{ dim }});
{% endfor %}
......@@ -120,7 +131,8 @@ void CreateOperator{{i}}(mace::OperatorDef *op) {
std::vector<int> input_output_ports({ {{ net.op[i].node_input | map(attribute='output_port') | join(', ')}} });
mace::NodeInput *node_input = nullptr;
for (size_t i = 0; i < {{ net.op[i].node_input | length }}; ++i) {
op->mutable_node_input()->Reserve({{ net.op[i].node_input|length }});
for (size_t i = 0; i < {{ net.op[i].node_input|length }}; ++i) {
node_input = op->add_node_input();
node_input->set_node_id(input_node_ids[i]);
node_input->set_output_port(input_output_ports[i]);
......@@ -129,7 +141,8 @@ void CreateOperator{{i}}(mace::OperatorDef *op) {
{% if net.op[i].out_max_byte_size | length > 0 %}
std::vector<int> out_max_byte_sizes {{ net.op[i].out_max_byte_size | replace('[', '{') | replace(']', '}') }};
for (size_t i = 0; i < {{ net.op[i].out_max_byte_size | length }}; ++i) {
op->mutable_out_max_byte_size()->Reserve({{ net.op[i].out_max_byte_size|length }});
for (size_t i = 0; i < {{ net.op[i].out_max_byte_size|length }}; ++i) {
op->add_out_max_byte_size(out_max_byte_sizes[i]);
}
{% endif %}
......
......@@ -23,6 +23,12 @@
#include "mace/utils/logging.h"
namespace mace {
extern const unsigned char *LoadModelData(const std::string &model_data_file,
const size_t &data_size);
extern void UnloadModelData(const unsigned char *model_data,
const size_t &data_size);
namespace {{tag}} {
{% if embed_model_data %}
......
......@@ -17,6 +17,7 @@
#include <vector>
#include <string>
#include "mace/proto/mace.pb.h"
#include "mace/public/mace.h"
#include "mace/utils/env_time.h"
#include "mace/utils/logging.h"
......
......@@ -69,6 +69,7 @@ void BufferToImage(const std::string &input_name,
.AddIntArg("mode", mode)
.Finalize(&operator_def);
operator_def.mutable_mem_id()->Reserve(mem_ids.size());
for (auto mem_id : mem_ids) {
operator_def.add_mem_id(mem_id);
}
......@@ -113,6 +114,7 @@ void Conv3x3(const std::string &input_name,
.AddIntArg("device", static_cast<int>(device_type))
.Finalize(&operator_def);
operator_def.mutable_mem_id()->Reserve(mem_ids.size());
for (auto mem_id : mem_ids) {
operator_def.add_mem_id(mem_id);
}
......@@ -144,6 +146,7 @@ void AddTensor(const std::string &name,
NetDef *net_def) {
ConstTensor *tensor_ptr = net_def->add_tensors();
tensor_ptr->set_name(name);
tensor_ptr->mutable_dims()->Reserve(shape.size());
for (auto dim : shape) {
tensor_ptr->add_dims(dim);
}
......@@ -225,8 +228,10 @@ std::map<std::string, int> AddMemoryOptimization(
input_shapes[i][1]);
}
size_t input_size = input_names.size();
size_t output_size = output_names.size();
MemoryArena *mem_arena_ptr = net_def->mutable_mem_arena();
mem_arena_ptr->mutable_mem_block()->Reserve(input_size + output_size);
for (size_t i = 0; i < input_size; ++i) {
MemoryArena *mem_arena_ptr = net_def->mutable_mem_arena();
MemoryBlock *mem_blk_ptr = mem_arena_ptr->add_mem_block();
mem_blk_ptr->set_mem_id(mem_id);
mem_blk_ptr->set_x(in_mem_block_x);
......@@ -245,9 +250,7 @@ std::map<std::string, int> AddMemoryOptimization(
output_shapes[i][0] *
output_shapes[i][1]);
}
size_t output_size = output_names.size();
for (size_t i = 0; i < output_size; ++i) {
MemoryArena *mem_arena_ptr = net_def->mutable_mem_arena();
MemoryBlock *mem_blk_ptr = mem_arena_ptr->add_mem_block();
mem_blk_ptr->set_mem_id(mem_id);
mem_blk_ptr->set_x(out_mem_block_x);
......@@ -275,16 +278,17 @@ void MaceRunFunc(const int in_out_size) {
const std::vector<std::vector<int64_t>> output_shapes = {{1, 32, 32, 16}};
const std::vector<int64_t> filter_shape = {16, 16, 3, 3};
NetDef net_def;
std::shared_ptr<NetDef> net_def(new NetDef());
// Add memory optimization
auto mem_map = AddMemoryOptimization(input_names, output_names,
input_shapes, output_shapes,
&net_def);
net_def.get());
std::vector<half> data;
ops::test::GenerateRandomRealTypeData<half>(filter_shape, &data);
AddTensor<half>(filter_tensor_name, filter_shape, 0, data.size(), &net_def);
AddTensor<half>(
filter_tensor_name, filter_shape, 0, data.size(), net_def.get());
for (size_t i = 0; i < input_names.size(); ++i) {
std::string input_name = MakeString("mace_input_node_",
......@@ -293,16 +297,16 @@ void MaceRunFunc(const int in_out_size) {
mace::kernels::IN_OUT_CHANNEL,
{mem_map[input_names[i]]},
device,
&net_def);
net_def.get());
}
BufferToImage<half>(filter_tensor_name, filter_tensor_img_name,
mace::kernels::CONV2D_FILTER, {}, device,
&net_def, NetMode::INIT);
net_def.get(), NetMode::INIT);
for (size_t i = 0; i < output_names.size(); ++i) {
Conv3x3<half>(input_names[i], filter_tensor_img_name,
output_names[i], {mem_map[output_names[i]]},
device,
&net_def);
net_def.get());
}
for (size_t i = 0; i < output_names.size(); ++i) {
std::string output_name = MakeString("mace_output_node_",
......@@ -310,7 +314,7 @@ void MaceRunFunc(const int in_out_size) {
ImageToBuffer<float>(output_names[i], output_name,
mace::kernels::IN_OUT_CHANNEL,
device,
&net_def);
net_def.get());
}
const std::string file_path ="/data/local/tmp/mace";
......@@ -319,7 +323,7 @@ void MaceRunFunc(const int in_out_size) {
mace::SetKVStorageFactory(storage_factory);
MaceEngine engine(device);
MaceStatus status = engine.Init(&net_def, input_names, output_names,
MaceStatus status = engine.Init(net_def.get(), input_names, output_names,
reinterpret_cast<unsigned char *>(data.data()));
ASSERT_EQ(status, MaceStatus::MACE_SUCCESS);
......@@ -337,7 +341,7 @@ void MaceRunFunc(const int in_out_size) {
}
}
CheckOutputs<DeviceType::GPU, half>(net_def, inputs, outputs, data);
CheckOutputs<DeviceType::GPU, half>(*net_def, inputs, outputs, data);
}
} // namespace
......
......@@ -79,6 +79,7 @@ void BufferToImage(const std::string &input_name,
.AddIntArg("mode", mode)
.Finalize(&operator_def);
operator_def.mutable_mem_id()->Reserve(mem_ids.size());
for (auto mem_id : mem_ids) {
operator_def.add_mem_id(mem_id);
}
......@@ -124,6 +125,7 @@ void Conv3x3(const std::string &input_name,
.AddIntArg("device", static_cast<int>(device_type))
.Finalize(&operator_def);
operator_def.mutable_mem_id()->Reserve(mem_ids.size());
for (auto mem_id : mem_ids) {
operator_def.add_mem_id(mem_id);
}
......@@ -155,6 +157,7 @@ void AddTensor(const std::string &name,
NetDef *net_def) {
ConstTensor *tensor_ptr = net_def->add_tensors();
tensor_ptr->set_name(name);
tensor_ptr->mutable_dims()->Reserve(shape.size());
for (auto dim : shape) {
tensor_ptr->add_dims(dim);
}
......@@ -236,8 +239,10 @@ std::map<std::string, int> AddMemoryOptimization(
input_shapes[i][1]);
}
size_t input_size = input_names.size();
size_t output_size = output_names.size();
MemoryArena *mem_arena_ptr = net_def->mutable_mem_arena();
mem_arena_ptr->mutable_mem_block()->Reserve(input_size + output_size);
for (size_t i = 0; i < input_size; ++i) {
MemoryArena *mem_arena_ptr = net_def->mutable_mem_arena();
MemoryBlock *mem_blk_ptr = mem_arena_ptr->add_mem_block();
mem_blk_ptr->set_mem_id(mem_id);
mem_blk_ptr->set_x(in_mem_block_x);
......@@ -256,9 +261,7 @@ std::map<std::string, int> AddMemoryOptimization(
output_shapes[i][0] *
output_shapes[i][1]);
}
size_t output_size = output_names.size();
for (size_t i = 0; i < output_size; ++i) {
MemoryArena *mem_arena_ptr = net_def->mutable_mem_arena();
MemoryBlock *mem_blk_ptr = mem_arena_ptr->add_mem_block();
mem_blk_ptr->set_mem_id(mem_id);
mem_blk_ptr->set_x(out_mem_block_x);
......@@ -286,16 +289,16 @@ void MaceRun(const int in_out_size,
const DeviceType device = DeviceType::GPU;
NetDef net_def;
std::shared_ptr<NetDef> net_def(new NetDef());
// Add memory optimization
auto mem_map = AddMemoryOptimization(input_names, output_names,
input_shapes, output_shapes,
&net_def);
net_def.get());
std::vector<T> data;
ops::test::GenerateRandomRealTypeData<T>(filter_shape, &data);
AddTensor<T>(filter_tensor_name, filter_shape, 0, data.size(), &net_def);
AddTensor<T>(filter_tensor_name, filter_shape, 0, data.size(), net_def.get());
for (size_t i = 0; i < input_names.size(); ++i) {
std::string input_name = MakeString("mace_input_node_",
......@@ -304,15 +307,15 @@ void MaceRun(const int in_out_size,
mace::kernels::IN_OUT_CHANNEL,
{mem_map[input_names[i]]},
device,
&net_def);
net_def.get());
}
BufferToImage<half>(filter_tensor_name, filter_tensor_img_name,
mace::kernels::CONV2D_FILTER, {}, device,
&net_def, NetMode::INIT);
net_def.get(), NetMode::INIT);
for (size_t i = 0; i < output_names.size(); ++i) {
Conv3x3<half>(input_names[i], filter_tensor_img_name,
output_names[i], {mem_map[output_names[i]]},
device, &net_def);
device, net_def.get());
}
for (size_t i = 0; i < output_names.size(); ++i) {
std::string output_name = MakeString("mace_output_node_",
......@@ -320,11 +323,11 @@ void MaceRun(const int in_out_size,
ImageToBuffer<float>(output_names[i], output_name,
mace::kernels::IN_OUT_CHANNEL,
device,
&net_def);
net_def.get());
}
MaceEngine engine(device);
MaceStatus status = engine.Init(&net_def, input_names, output_names,
MaceStatus status = engine.Init(net_def.get(), input_names, output_names,
reinterpret_cast<unsigned char *>(data.data()));
ASSERT_EQ(status, MaceStatus::MACE_SUCCESS);
......@@ -342,7 +345,7 @@ void MaceRun(const int in_out_size,
}
}
CheckOutputs<DeviceType::GPU, T>(net_def, inputs, outputs, data);
CheckOutputs<DeviceType::GPU, T>(*net_def, inputs, outputs, data);
}
} // namespace
......
......@@ -31,7 +31,6 @@ cc_library(
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
deps = [
"//mace/public",
"//mace/proto:mace_cc",
],
)
......
......@@ -21,7 +21,6 @@
#include <vector>
#include <utility>
#include "mace/proto/mace.pb.h"
#include "mace/public/mace.h"
#include "mace/utils/env_time.h"
#include "mace/utils/string_util.h"
......
......@@ -865,7 +865,6 @@ def merge_libs(target_soc,
model_data_dir = "%s/data" % project_output_dir
hexagon_lib_file = "mace/third_party/nnlib/libhexagon_controller.so"
model_bin_dir = "%s/%s/" % (project_output_dir, abi)
mace_proto_dir = "%s/proto" % project_output_dir
if not os.path.exists(model_bin_dir):
sh.mkdir("-p", model_bin_dir)
......@@ -874,8 +873,6 @@ def merge_libs(target_soc,
sh.cp("-f", glob.glob("mace/public/*.h"), model_header_dir)
if not os.path.exists(model_data_dir):
sh.mkdir("-p", model_data_dir)
if not os.path.exists(mace_proto_dir):
sh.mkdir("-p", mace_proto_dir)
if hexagon_mode:
sh.cp("-f", hexagon_lib_file, model_bin_dir)
......@@ -894,8 +891,6 @@ def merge_libs(target_soc,
sh.cp("-f", "%s/%s.pb" % (mace_model_dirs_kv[model_name], model_name),
model_data_dir)
sh.cp("-f", "mace/proto/mace.proto", mace_proto_dir)
mri_stream = ""
if abi == "host":
mri_stream += "create %s/libmace_%s.a\n" % \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册