diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc index eab3a902d9ba3b8fbaa8d4e43ffa0cd72d2415ab..61fa97159ebe3af1165faa46a14d381ed884a468 100644 --- a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc +++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc @@ -3,8 +3,17 @@ // #include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" -#include -#include +#include "mace/core/runtime/hexagon/hexagon_nn_ops.h" +#include +#include + +namespace { + inline int64_t NowMicros() { + struct timeval tv; + gettimeofday(&tv, nullptr); + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; + } +} namespace mace { @@ -20,7 +29,7 @@ enum { int HexagonControlWrapper::GetVersion() { int version; - hexagon_nn_version(&version); + MACE_CHECK(hexagon_nn_version(&version) == 0, "get version error"); return version; } @@ -44,75 +53,121 @@ bool HexagonControlWrapper::Finalize() { return hexagon_controller_DeInitHexagon() == 0; } -bool HexagonControlWrapper::SetupGraph(const NetDef& net_def) { +bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) { LOG(INFO) << "Hexagon setup graph"; + + int64_t t0 = NowMicros(); + // const node - for (const ConstTensor& tensor_proto: net_def.tensors()) { - vector tensor_shape(tensor_proto.dims().begin(), - tensor_proto.dims().end()); - while (tensor_shape.size() < 4) { - tensor_shape.insert(tensor_shape.begin(), 1); - } + std::thread const_thread([&]() { + std::cout << "thread function\n"; + vector const_node_list; + for (const ConstTensor &tensor_proto: net_def.tensors()) { + vector tensor_shape(tensor_proto.dims().begin(), + tensor_proto.dims().end()); + while (tensor_shape.size() < 4) { + tensor_shape.insert(tensor_shape.begin(), 1); + } - if (tensor_proto.data_type() == DataType::DT_INT32 - && tensor_proto.data_size() == 0) { - hexagon_nn_append_const_node(nn_id_, node_id(tensor_proto.node_id()), - tensor_shape[0], tensor_shape[1], - tensor_shape[2], tensor_shape[3], - NULL, - 0); - } else { - unique_ptr tensor = serializer_.Deserialize(tensor_proto, - DeviceType::CPU); - hexagon_nn_append_const_node(nn_id_, node_id(tensor_proto.node_id()), - tensor_shape[0], tensor_shape[1], - tensor_shape[2], tensor_shape[3], - reinterpret_cast( - tensor->raw_data()), - tensor->raw_size()); + hexagon_nn_const_node const_node; + const_node.node_id = node_id(tensor_proto.node_id()); + const_node.tensor.batches = tensor_shape[0]; + const_node.tensor.height = tensor_shape[1]; + const_node.tensor.width = tensor_shape[2]; + const_node.tensor.depth = tensor_shape[3]; + + if (tensor_proto.data_type() == DataType::DT_INT32 + && tensor_proto.data_size() == 0) { + const_node.tensor.data = NULL; + const_node.tensor.dataLen = 0; + } else { + const_node.tensor.data = + const_cast(tensor_proto.data()); + const_node.tensor.dataLen = + tensor_proto.data_size() * GetEnumTypeSize(tensor_proto.data_type()); + } + const_node_list.push_back(const_node); + // 255 is magic number: why fastrpc limits sequence length to that? + if (const_node_list.size() >= 250) { + MACE_CHECK(hexagon_nn_append_const_node_list(nn_id_, + const_node_list.data(), + const_node_list.size()) + == 0, "append const node error"); + const_node_list.clear(); + } } - VLOG(1) << "Const: " << tensor_proto.name() - << ", node_id: " << node_id(tensor_proto.node_id()) - << "\n\t shape: " << tensor_shape[0] << " " << tensor_shape[1] - << " " << tensor_shape[2] << " " << tensor_shape[3]; - } - // op node - for (const OperatorDef& op: net_def.op()) { - unsigned int op_id; - MACE_CHECK(hexagon_nn_op_name_to_id(op.type().data(), &op_id) == 0, - "invalid op: ", op.name(), ", type: ", op.type()); - vector inputs(op.node_input().size()); - for (size_t i = 0; i < op.node_input().size(); ++i) { - inputs[i].src_id = node_id(op.node_input()[i].node_id()); - inputs[i].output_idx = op.node_input()[i].output_port(); + if (!const_node_list.empty()) { + MACE_CHECK(hexagon_nn_append_const_node_list(nn_id_, + const_node_list.data(), + const_node_list.size()) == 0, + "append const node error"); } - vector outputs(op.out_max_byte_size().size()); - for (size_t i = 0; i < op.out_max_byte_size().size(); ++i) { - outputs[i].max_size = op.out_max_byte_size()[i]; - } - - hexagon_nn_padding_type padding_type = static_cast( - op.padding()); - - hexagon_nn_append_node(nn_id_, node_id(op.node_id()), op_id, padding_type, - inputs.data(), inputs.size(), - outputs.data(), outputs.size()); + const_node_list.clear(); + }); - if (VLOG_IS_ON(1)) { - VLOG(1) << "Op: " << op.name() - << ", type: " << op.type() - << ", node_id: " << node_id(op.node_id()) - << ", padding_type: " << padding_type; - - for (const auto &input: inputs) { - VLOG(1) << "\t input: " << input.src_id << ":" << input.output_idx; + // op node + std::thread op_thread([&]() { + OpMap op_map; + op_map.Init(); + vector op_node_list; + vector> cached_inputs; + vector> cached_outputs; + vector inputs; + vector outputs; + + for (const OperatorDef &op: net_def.op()) { + int op_id = op_map.GetOpId(op.type()); + inputs.resize(op.node_input().size()); + for (size_t i = 0; i < op.node_input().size(); ++i) { + inputs[i].src_id = node_id(op.node_input()[i].node_id()); + inputs[i].output_idx = op.node_input()[i].output_port(); + } + outputs.resize(op.out_max_byte_size().size()); + for (size_t i = 0; i < op.out_max_byte_size().size(); ++i) { + outputs[i].max_size = op.out_max_byte_size()[i]; } - for (const auto &output: outputs) { - VLOG(1) << "\t output: " << output.max_size; + cached_inputs.push_back(inputs); + cached_outputs.push_back(outputs); + + hexagon_nn_padding_type + padding_type = static_cast( + op.padding()); + + hexagon_nn_op_node op_node; + op_node.node_id = node_id(op.node_id()); + op_node.operation = op_id; + op_node.padding = padding_type; + op_node.inputs = cached_inputs.back().data(); + op_node.inputsLen = inputs.size(); + op_node.outputs = cached_outputs.back().data(); + op_node.outputsLen = outputs.size(); + + op_node_list.push_back(op_node); + if (op_node_list.size() >= 125) { + MACE_CHECK(hexagon_nn_append_node_list(nn_id_, + op_node_list.data(), + op_node_list.size()) == 0, + "append node error"); + op_node_list.clear(); + cached_inputs.clear(); + cached_outputs.clear(); } } - } + + if (!op_node_list.empty()) { + MACE_CHECK(hexagon_nn_append_node_list(nn_id_, + op_node_list.data(), + op_node_list.size()) == 0, + "append node error"); + } + op_node_list.clear(); + cached_inputs.clear(); + cached_outputs.clear(); + }); + + const_thread.join(); + op_thread.join(); // input info num_inputs_ = 0; @@ -146,7 +201,15 @@ bool HexagonControlWrapper::SetupGraph(const NetDef& net_def) { << "\n\t type: " << output_info.data_type(); } - return hexagon_nn_prepare(nn_id_) == 0; + int64_t t1 = NowMicros(); + + int res = hexagon_nn_prepare(nn_id_); + + int64_t t2 = NowMicros(); + + VLOG(0) << "Setup time: " << t1 - t0 << " " << t2 - t1; + + return res == 0; } bool HexagonControlWrapper::TeardownGraph() { @@ -159,35 +222,42 @@ bool HexagonControlWrapper::TeardownGraph() { void HexagonControlWrapper::PrintLog() { char *buf; if ((buf = new char[PRINT_BUFSIZE]) == NULL) return; - hexagon_nn_getlog(nn_id_, reinterpret_cast(buf), PRINT_BUFSIZE); + MACE_CHECK(hexagon_nn_getlog(nn_id_, + reinterpret_cast(buf), + PRINT_BUFSIZE) == 0, "print log error"); LOG(INFO) << string(buf); - delete []buf; + delete[]buf; } void HexagonControlWrapper::PrintGraph() { LOG(INFO) << "Print Graph"; char *buf; if ((buf = new char[PRINT_BUFSIZE]) == NULL) return; - hexagon_nn_snpprint(nn_id_, reinterpret_cast(buf), PRINT_BUFSIZE); + MACE_CHECK(hexagon_nn_snpprint(nn_id_, + reinterpret_cast(buf), + PRINT_BUFSIZE) == 0, "print graph error"); LOG(INFO) << string(buf); - delete []buf; + delete[]buf; } void HexagonControlWrapper::SetDebugLevel(int level) { LOG(INFO) << "Set debug level: " << level; - hexagon_nn_set_debug_level(nn_id_, level); + MACE_CHECK(hexagon_nn_set_debug_level(nn_id_, level) == 0, + "set debug level error"); } void HexagonControlWrapper::SetGraphMode(int mode) { LOG(INFO) << "Set dsp mode: " << mode; - hexagon_nn_set_graph_mode(nn_id_, mode); + MACE_CHECK(hexagon_nn_set_graph_mode(nn_id_, mode) == 0, "set mode error"); } void HexagonControlWrapper::GetPerfInfo() { LOG(INFO) << "Get perf info"; vector perf_info(MAX_NODE); unsigned int n_items = 0; - hexagon_nn_get_perfinfo(nn_id_, perf_info.data(), MAX_NODE, &n_items); + MACE_CHECK( + hexagon_nn_get_perfinfo(nn_id_, perf_info.data(), MAX_NODE, &n_items) == 0, + "get perf info error"); std::unordered_map node_id_counters; std::unordered_map> node_type_counters; @@ -197,8 +267,9 @@ void HexagonControlWrapper::GetPerfInfo() { for (int i = 0; i < n_items; ++i) { unsigned int node_id = perf_info[i].node_id; unsigned int node_type_id = perf_info[i].node_type; - node_id_counters[node_id] = ((static_cast(perf_info[i].counter_hi) << 32) - + perf_info[i].counter_lo) * 1.0f / perf_info[i].executions; + node_id_counters[node_id] = + ((static_cast(perf_info[i].counter_hi) << 32) + + perf_info[i].counter_lo) * 1.0f / perf_info[i].executions; char node_type_buf[MAX_NODE]; hexagon_nn_op_id_to_name(node_type_id, node_type_buf, MAX_NODE); @@ -216,7 +287,7 @@ void HexagonControlWrapper::GetPerfInfo() { total_duration += node_id_counters[node_id]; } - for (auto& node_type_counter: node_type_counters) { + for (auto &node_type_counter: node_type_counters) { LOG(INFO) << "node type: " << node_type_counter.first << ", time: " << node_type_counter.second.first << ", duration: " << node_type_counter.second.second; @@ -226,12 +297,13 @@ void HexagonControlWrapper::GetPerfInfo() { void HexagonControlWrapper::ResetPerfInfo() { LOG(INFO) << "Reset perf info"; - hexagon_nn_reset_perfinfo(nn_id_, NN_GRAPH_PERFEVENT_UTIME); + MACE_CHECK(hexagon_nn_reset_perfinfo(nn_id_, NN_GRAPH_PERFEVENT_UTIME) == 0, + "reset perf error"); } bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor, Tensor *output_tensor) { - LOG(INFO) << "Execute graph: " << nn_id_; + VLOG(2) << "Execute graph: " << nn_id_; // single input and single output MACE_ASSERT(num_inputs_ == 1, "Wrong inputs num"); MACE_ASSERT(num_outputs_ == 1, "Wrong outputs num"); @@ -255,6 +327,7 @@ bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor, output_tensor->raw_mutable_data()), output_tensor->raw_size(), &output_bytes); + MACE_CHECK(res == 0, "execute error"); MACE_ASSERT(output_shape == output_shapes_[0], "wrong output shape inferred"); @@ -299,16 +372,16 @@ bool HexagonControlWrapper::ExecuteGraphNew(const vector &input_tensors, outputs, num_outputs); for (int i = 0; i < num_outputs; ++i) { - vector output_shape {outputs[i].batches, outputs[i].height, - outputs[i].width, outputs[i].depth}; - MACE_ASSERT(output_shape == output_shapes_[i], + vector output_shape{outputs[i].batches, outputs[i].height, + outputs[i].width, outputs[i].depth}; + MACE_ASSERT(output_shape == output_shapes_[i], "wrong output shape inferred"); MACE_ASSERT(outputs[i].data_valid_len == (*output_tensors)[i].raw_size(), "wrong output bytes inferred."); } - delete [] inputs; - delete [] outputs; + delete[] inputs; + delete[] outputs; return res == 0; }; @@ -323,7 +396,10 @@ bool HexagonControlWrapper::ExecuteGraphPreQuantize(const Tensor &input_tensor, float *min_in_data = input_tensors[1].mutable_data(); input_tensors[2].Resize({1, 1, 1, 1}); float *max_in_data = input_tensors[2].mutable_data(); - quantizer_.Quantize(input_tensor, &input_tensors[0], min_in_data, max_in_data); + quantizer_.Quantize(input_tensor, + &input_tensors[0], + min_in_data, + max_in_data); if (!ExecuteGraphNew(input_tensors, &output_tensors)) { return false; } @@ -332,7 +408,10 @@ bool HexagonControlWrapper::ExecuteGraphPreQuantize(const Tensor &input_tensor, const float *min_out_data = output_tensors[1].data(); const float *max_out_data = output_tensors[2].data(); - quantizer_.DeQuantize(output_tensors[0], *min_out_data, *max_out_data, output_tensor); + quantizer_.DeQuantize(output_tensors[0], + *min_out_data, + *max_out_data, + output_tensor); return true; } diff --git a/mace/core/runtime/hexagon/hexagon_controller_dummy.cc b/mace/core/runtime/hexagon/hexagon_controller_dummy.cc index 94eb7fe936cfecaba6b446acf53ae6b4738ca4e2..fdc62edecb35e01a7ed2b4ccfa4f0c239e010fc9 100644 --- a/mace/core/runtime/hexagon/hexagon_controller_dummy.cc +++ b/mace/core/runtime/hexagon/hexagon_controller_dummy.cc @@ -21,7 +21,9 @@ __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_graph_mode)(hexagon_nn_nn_ __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_snpprint)(hexagon_nn_nn_id id, unsigned char* buf, int bufLen) __QAIC_HEADER_ATTRIBUTE { return 0; } __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_getlog)(hexagon_nn_nn_id id, unsigned char* buf, int bufLen) __QAIC_HEADER_ATTRIBUTE { return 0; } __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node)(hexagon_nn_nn_id id, unsigned int node_id, unsigned int operation, hexagon_nn_padding_type padding, const hexagon_nn_input* inputs, int inputsLen, const hexagon_nn_output* outputs, int outputsLen) __QAIC_HEADER_ATTRIBUTE { return 0; } +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node_list)(hexagon_nn_nn_id id, const hexagon_nn_op_node* ops, int opsLen) __QAIC_HEADER_ATTRIBUTE { return 0; } __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node)(hexagon_nn_nn_id id, unsigned int node_id, unsigned int batches, unsigned int height, unsigned int width, unsigned int depth, const unsigned char* data, int dataLen) __QAIC_HEADER_ATTRIBUTE { return 0; } +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node_list)(hexagon_nn_nn_id id, const hexagon_nn_const_node* consts, int constsLen) __QAIC_HEADER_ATTRIBUTE { return 0; } __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_prepare)(hexagon_nn_nn_id id) __QAIC_HEADER_ATTRIBUTE { return 0; } __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute)(hexagon_nn_nn_id id, unsigned int batches_in, unsigned int height_in, unsigned int width_in, unsigned int depth_in, const unsigned char* data_in, int data_inLen, unsigned int* batches_out, unsigned int* height_out, unsigned int* width_out, unsigned int* depth_out, unsigned char* data_out, int data_outLen, unsigned int* data_len_out) __QAIC_HEADER_ATTRIBUTE { return 0; } __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_teardown)(hexagon_nn_nn_id id) __QAIC_HEADER_ATTRIBUTE { return 0; } diff --git a/mace/core/runtime/hexagon/hexagon_nn.h b/mace/core/runtime/hexagon/hexagon_nn.h index b2190344892ea13bf9d33dd64f49cc946800f414..3bfd79c06063f492b8fecc72f5ba54e1eef73076 100644 --- a/mace/core/runtime/hexagon/hexagon_nn.h +++ b/mace/core/runtime/hexagon/hexagon_nn.h @@ -30,51 +30,66 @@ extern "C" { #define __QAIC_STRING1_OBJECT_DEFINED__ #define __STRING1_OBJECT__ typedef struct _cstring1_s { - char* data; - int dataLen; + char* data; + int dataLen; } _cstring1_t; #endif /* __QAIC_STRING1_OBJECT_DEFINED__ */ typedef struct hexagon_nn_input hexagon_nn_input; struct hexagon_nn_input { - unsigned int src_id; - unsigned int output_idx; + unsigned int src_id; + unsigned int output_idx; }; typedef struct hexagon_nn_output hexagon_nn_output; struct hexagon_nn_output { - unsigned int max_size; - unsigned int unused; + unsigned int max_size; + unsigned int unused; }; typedef struct hexagon_nn_perfinfo hexagon_nn_perfinfo; struct hexagon_nn_perfinfo { - unsigned int node_id; - unsigned int node_type; - unsigned int executions; - unsigned int unused; - unsigned int counter_lo; - unsigned int counter_hi; + unsigned int node_id; + unsigned int node_type; + unsigned int executions; + unsigned int unused; + unsigned int counter_lo; + unsigned int counter_hi; }; typedef int hexagon_nn_nn_id; enum hexagon_nn_padding_type { - NN_PAD_NA, - NN_PAD_SAME, - NN_PAD_VALID, - NN_PAD_MIRROR_REFLECT, - NN_PAD_MIRROR_SYMMETRIC, - NN_PAD_SAME_CAFFE, - _32BIT_PLACEHOLDER_hexagon_nn_padding_type = 0x7fffffff + NN_PAD_NA, + NN_PAD_SAME, + NN_PAD_VALID, + NN_PAD_MIRROR_REFLECT, + NN_PAD_MIRROR_SYMMETRIC, + NN_PAD_SAME_CAFFE, + _32BIT_PLACEHOLDER_hexagon_nn_padding_type = 0x7fffffff }; typedef enum hexagon_nn_padding_type hexagon_nn_padding_type; typedef struct hexagon_nn_tensordef hexagon_nn_tensordef; struct hexagon_nn_tensordef { - unsigned int batches; - unsigned int height; - unsigned int width; - unsigned int depth; - unsigned char* data; - int dataLen; - unsigned int data_valid_len; - unsigned int unused; + unsigned int batches; + unsigned int height; + unsigned int width; + unsigned int depth; + unsigned char* data; + int dataLen; + unsigned int data_valid_len; + unsigned int unused; +}; +typedef struct hexagon_nn_op_node hexagon_nn_op_node; +struct hexagon_nn_op_node { + unsigned int node_id; + unsigned int operation; + hexagon_nn_padding_type padding; + hexagon_nn_input* inputs; + int inputsLen; + hexagon_nn_output* outputs; + int outputsLen; +}; +typedef struct hexagon_nn_const_node hexagon_nn_const_node; +struct hexagon_nn_const_node { + unsigned int node_id; + hexagon_nn_tensordef tensor; }; __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_config)(void) __QAIC_HEADER_ATTRIBUTE; __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_init)(void) __QAIC_HEADER_ATTRIBUTE; @@ -83,7 +98,9 @@ __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_graph_mode)(hexagon_nn_nn_ __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_snpprint)(hexagon_nn_nn_id id, unsigned char* buf, int bufLen) __QAIC_HEADER_ATTRIBUTE; __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_getlog)(hexagon_nn_nn_id id, unsigned char* buf, int bufLen) __QAIC_HEADER_ATTRIBUTE; __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node)(hexagon_nn_nn_id id, unsigned int node_id, unsigned int operation, hexagon_nn_padding_type padding, const hexagon_nn_input* inputs, int inputsLen, const hexagon_nn_output* outputs, int outputsLen) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node_list)(hexagon_nn_nn_id id, const hexagon_nn_op_node* ops, int opsLen) __QAIC_HEADER_ATTRIBUTE; __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node)(hexagon_nn_nn_id id, unsigned int node_id, unsigned int batches, unsigned int height, unsigned int width, unsigned int depth, const unsigned char* data, int dataLen) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node_list)(hexagon_nn_nn_id id, const hexagon_nn_const_node* consts, int constsLen) __QAIC_HEADER_ATTRIBUTE; __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_prepare)(hexagon_nn_nn_id id) __QAIC_HEADER_ATTRIBUTE; __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute)(hexagon_nn_nn_id id, unsigned int batches_in, unsigned int height_in, unsigned int width_in, unsigned int depth_in, const unsigned char* data_in, int data_inLen, unsigned int* batches_out, unsigned int* height_out, unsigned int* width_out, unsigned int* depth_out, unsigned char* data_out, int data_outLen, unsigned int* data_len_out) __QAIC_HEADER_ATTRIBUTE; __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_teardown)(hexagon_nn_nn_id id) __QAIC_HEADER_ATTRIBUTE; diff --git a/mace/core/runtime/hexagon/hexagon_nn_ops.h b/mace/core/runtime/hexagon/hexagon_nn_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..dfb3b3861dc60ee1dbc2ef8706fb7c78888ca716 --- /dev/null +++ b/mace/core/runtime/hexagon/hexagon_nn_ops.h @@ -0,0 +1,48 @@ +// +// Copyright (c) 2018 XiaoMi All rights reserved. +// + +#ifndef LIBMACE_HEXAGON_NN_OPS_H +#define LIBMACE_HEXAGON_NN_OPS_H + +#include "mace/utils/logging.h" +#include + +namespace mace { + +#define OP_INVALID -1 + +typedef enum op_type_enum { +#define DEF_OP(NAME, ...) OP_##NAME, + +#include "mace/core/runtime/hexagon/ops.h" + NN_OPS_MAX + +#undef DEF_OP +} op_type; + +class OpMap { + public: + void Init() { +#define DEF_OP(NAME) \ + op_map_[#NAME] = OP_##NAME; + +#include "mace/core/runtime/hexagon/ops.h" + +#undef DEF_OP + } + + int GetOpId(const std::string &op_type) { + if (op_map_.find(op_type) != end(op_map_)) { + return op_map_[op_type]; + } else { + LOG(ERROR) << "DSP unsupoorted op type: " << op_type; + return OP_INVALID; + } + } + private: + std::unordered_map op_map_; +}; +} // namespace mace + +#endif // LIBMACE_HEXAGON_NN_OPS_H diff --git a/mace/core/runtime/hexagon/libhexagon_controller.so b/mace/core/runtime/hexagon/libhexagon_controller.so index 9ee02ba7d5993fec5db466c7675ed8e1e9951d78..42dbfabb2cb90dfdcff463f4d46cac981de0dacf 100755 Binary files a/mace/core/runtime/hexagon/libhexagon_controller.so and b/mace/core/runtime/hexagon/libhexagon_controller.so differ diff --git a/mace/core/runtime/hexagon/ops.h b/mace/core/runtime/hexagon/ops.h new file mode 100644 index 0000000000000000000000000000000000000000..79b503cd43b7a567c7052da1b6a754bb7a73bfa6 --- /dev/null +++ b/mace/core/runtime/hexagon/ops.h @@ -0,0 +1,181 @@ +/* + * You probably want to + * + * ## ##### ##### + * # # # # # # + * # # # # # # + * ###### # # # # + * # # # # # # + * # # ##### ##### + * + * + * # # #### ##### ###### #### + * ## # # # # # # # + * # # # # # # # ##### #### + * # # # # # # # # # + * # ## # # # # # # # + * # # #### ##### ###### #### + * + * + * ## ##### + * # # # + * # # # + * ###### # + * # # # + * # # # + * + * + * ##### # # ###### + * # # # # + * # ###### ##### + * # # # # + * # # # # + * # # # ###### + * + * + * ###### # # ##### + * # ## # # # + * ##### # # # # # + * # # # # # # + * # # ## # # + * ###### # # ##### + * + * otherwise the interface becomes incompatible. + */ +DEF_OP(INPUT) +DEF_OP(OUTPUT) +DEF_OP(Nop) +DEF_OP(Const) +DEF_OP(Check) +DEF_OP(Close_f) +DEF_OP(Close_quint8) +DEF_OP(Close_q_quint8) +DEF_OP(Close_int32) +DEF_OP(Close_qint32) +DEF_OP(PPrint_8) +DEF_OP(PPrint_32) +DEF_OP(PPrint_f) +DEF_OP(PreFree) +DEF_OP(Flatten) + +#ifndef DEF_OP_WREF +#define DEF_OP_WREF(NAME) DEF_OP(NAME) DEF_OP(NAME##_ref) +#define __SELF_DEF_OP_WREF +#endif + +DEF_OP_WREF(QuantizedConv2d_8x8to32) +DEF_OP_WREF(QuantizedMatMul_8x8to32) +DEF_OP_WREF(QuantizeDownAndShrinkRange_32to8) +DEF_OP_WREF(QuantizedRelu_8) +DEF_OP_WREF(QuantizedReluX_8) +DEF_OP_WREF(QuantizedMaxPool_8) +DEF_OP_WREF(QuantizedAvgPool_8) +DEF_OP_WREF(QuantizedConcat_8) +DEF_OP_WREF(QuantizedBiasAdd_8p8to32) +DEF_OP_WREF(Min_f) +DEF_OP_WREF(Max_f) +DEF_OP_WREF(Quantize) +DEF_OP_WREF(Dequantize) +DEF_OP_WREF(Supernode_8x8p8to8) + +DEF_OP(QuantizedFlatten) +DEF_OP(Softmax_f) +DEF_OP(Conv2d_f) +DEF_OP(MatMul_f) +DEF_OP(Relu_f) +DEF_OP(ReluX_f) +DEF_OP(AvgPool_f) +DEF_OP(MaxPool_f) +DEF_OP(Concat_f) +DEF_OP(BiasAdd_f) +DEF_OP(LRN_f) + +DEF_OP(Variable) +DEF_OP(Assign) +DEF_OP(Reshape) +DEF_OP(QuantizedReshape) +DEF_OP(Tanh_f) +DEF_OP(Sigmoid_f) +DEF_OP(Slice_8) +DEF_OP(Slice_f) +DEF_OP(QuantizedSlice_8) +DEF_OP(Add_f) +DEF_OP(Mul_f) +DEF_OP(Minimum_f) +DEF_OP(Maximum_f) + +DEF_OP_WREF(Requantize_32to8) +DEF_OP_WREF(RequantizationRange_32) + +DEF_OP(Neg_f) +DEF_OP(Sub_f) +DEF_OP(AddN_f) +DEF_OP(Range_int32) +DEF_OP(Rank_int32) +DEF_OP(Transpose_int32) +DEF_OP(Transpose_f) +DEF_OP(InstanceNorm_f) +DEF_OP_WREF(QuantizedInstanceNorm_8) +DEF_OP(Sub_int32) +DEF_OP(Add_int32) +DEF_OP(Split_f) +DEF_OP(Dequantize_qint32_f) +DEF_OP(PRelu_f) +DEF_OP_WREF(QuantizedPRelu_8) +DEF_OP(Sum_f) +DEF_OP(Prod_f) +DEF_OP(Mul_int32) +DEF_OP(LogicalAnd_int32) +DEF_OP(LogicalOr_int32) +DEF_OP(LogicalXor_int32) +DEF_OP(Shape_int32) +DEF_OP(Pack_int32) +DEF_OP(MirrorPad_f) +DEF_OP(ResizeNearestNeighbor_f) +DEF_OP(StridedSlice_int32) +DEF_OP(StridedSlice_f) +DEF_OP(ExpandDims_int32) +DEF_OP(ExpandDims_f) + +DEF_OP(LogSoftmax_f) +DEF_OP(Split_int32) +DEF_OP(QuantizedSplit_8) + +DEF_OP(Deconv_f) +DEF_OP_WREF(QuantizedDeconv_8x8to32) + +DEF_OP_WREF(QuantizedMul_8x8to32) +DEF_OP_WREF(QuantizedAdd_8p8to32) +DEF_OP_WREF(QuantizedSigmoid_8) +DEF_OP_WREF(QuantizedTanh_8) +DEF_OP_WREF(QuantizedSoftmax_8) +DEF_OP_WREF(QuantizedLRN_8) +DEF_OP_WREF(QuantizedSub_8p8to32) +DEF_OP_WREF(QuantizedMaximum_8) +DEF_OP_WREF(QuantizedMinimum_8) + +DEF_OP(Pad_f) +DEF_OP(SpaceToBatchND_f) +DEF_OP(BatchToSpaceND_f) +DEF_OP(QuantizedSpaceToBatchND_8) +DEF_OP(QuantizedBatchToSpaceND_8) +DEF_OP(QuantizedPad_8) +DEF_OP(ResizeBilinear_f) +DEF_OP(QuantizedResizeBilinear_8) +DEF_OP(ConcatV2_f) +DEF_OP(ConcatV2_int32) +DEF_OP(Prod_int32) +DEF_OP(Slice_int32) + +DEF_OP(QuantizedAdd_8p8to8) + +DEF_OP_WREF(AutoQuantize) +DEF_OP_WREF(QuantizedDepthwiseConv2d_8x8to32) +DEF_OP(DepthwiseConv2d_f) +DEF_OP(QuantizedBiasAdd_8p8to8) + +#ifdef __SELF_DEF_OP_WREF +#undef __SELF_DEF_OP_WREF +#undef DEF_OP_WREF +#endif + diff --git a/mace/core/tensor.h b/mace/core/tensor.h index c35115645aa2e252cef8fbfc16cdd3cd82dfb7d5..d3014e8b2633793120c5e1afe81e3a01a60b4f35 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -298,7 +298,7 @@ class Tensor { return type_size; } - inline void Copy(Tensor &other) { + inline void Copy(const Tensor &other) { alloc_ = other.alloc_; dtype_ = other.dtype_; ResizeLike(other);