diff --git a/mace/dsp/hexagon/android.min b/mace/dsp/hexagon/android.min deleted file mode 100644 index 14c9422c6795bacc95dac01609b4a7be19b615dc..0000000000000000000000000000000000000000 --- a/mace/dsp/hexagon/android.min +++ /dev/null @@ -1,37 +0,0 @@ -$(info ------------------------------------------) -$(info --- V = $(V)) -$(info --- GLUE_DIR = $(GLUE_DIR)) -$(info --- HEXAGON_SDK_ROOT = $(HEXAGON_SDK_ROOT)) -$(info ------------------------------------------) - -INCDIRS += ../../../libs/common/adspmsgd/ship/android_Release -LIBDIRS += ../../../libs/common/adspmsgd/ship/android_Release - -BUILD_DLLS=libhexagon_controller - -hexagon_controller_lib_QAICIDLS += \ -interface/hexagon_nn \ -$(MAKE_D_DSPCV_INCDIR)/dspCV - -# hexagon interface -hexagon_controller_lib_C_SRCS += \ -$V/hexagon_nn_stub \ -$V/dspCV_stub - -hexagon_controller_lib_DLLS += libcdsprpc -hexagon_controller_lib_LIBS += rpcmem adspmsgd -hexagon_controller_lib_LD_FLAGS += -llog -hexagon_controller_lib_DEFINES += VERIFY_PRINT_ERROR - -libhexagon_controller_QAICIDLS += $(hexagon_controller_lib_QAICIDLS) -libhexagon_controller_C_SRCS += $(hexagon_controller_lib_C_SRCS) -libhexagon_controller_DLLS += $(hexagon_controller_lib_DLLS) -libhexagon_controller_LIBS += $(hexagon_controller_lib_LIBS) -libhexagon_controller_LD_FLAGS += $(hexagon_controller_lib_LD_FLAGS) -libhexagon_controller_DEFINES += $(hexagon_controller_lib_DEFINES) - -BUILD_COPIES = \ - $(DLLS) \ - $(EXES) \ - $(LIBS) \ - $(SHIP_DIR)/ ; diff --git a/mace/dsp/hexagon/hexagon_controller.h b/mace/dsp/hexagon/hexagon_controller.h new file mode 100644 index 0000000000000000000000000000000000000000..3558f2aafe4a832c087af9a8f535ff78e5784e90 --- /dev/null +++ b/mace/dsp/hexagon/hexagon_controller.h @@ -0,0 +1,21 @@ +#ifndef MACE_DSP_HEXAGON_DSP_CONTROLLER_H_ +#define MACE_DSP_HEXAGON_DSP_CONTROLLER_H_ + +#include "hexagon_nn.h" + +#ifdef __cplusplus +extern "C" { +#else +#include +#endif // __cplusplus + +int hexagon_controller_InitHexagonWithMaxAttributes(int enable_dcvs, + int bus_usage); + +int hexagon_controller_DeInitHexagon(); + +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif // MACE_DSP_HEXAGON_DSP_CONTROLLER_H_ \ No newline at end of file diff --git a/mace/dsp/hexagon/hexagon_nn.h b/mace/dsp/hexagon/hexagon_nn.h index 88502b1d2e3221595329b950be785408eaef3c41..acde71d98248a36b911e24c6520a638b638227e0 100644 --- a/mace/dsp/hexagon/hexagon_nn.h +++ b/mace/dsp/hexagon/hexagon_nn.h @@ -30,49 +30,51 @@ extern "C" { #define __QAIC_STRING1_OBJECT_DEFINED__ #define __STRING1_OBJECT__ typedef struct _cstring1_s { - char* data; - int dataLen; + char* data; + int dataLen; } _cstring1_t; #endif /* __QAIC_STRING1_OBJECT_DEFINED__ */ typedef struct hexagon_nn_input hexagon_nn_input; struct hexagon_nn_input { - unsigned int src_id; - unsigned int output_idx; + unsigned int src_id; + unsigned int output_idx; }; typedef struct hexagon_nn_output hexagon_nn_output; struct hexagon_nn_output { - unsigned int max_size; - unsigned int unused; + unsigned int max_size; + unsigned int unused; }; typedef struct hexagon_nn_perfinfo hexagon_nn_perfinfo; struct hexagon_nn_perfinfo { - unsigned int node_id; - unsigned int executions; - unsigned int counter_lo; - unsigned int counter_hi; + unsigned int node_id; + unsigned int node_type; + unsigned int executions; + unsigned int unused; + unsigned int counter_lo; + unsigned int counter_hi; }; typedef int hexagon_nn_nn_id; enum hexagon_nn_padding_type { - NN_PAD_NA, - NN_PAD_SAME, - NN_PAD_VALID, - NN_PAD_MIRROR_REFLECT, - NN_PAD_MIRROR_SYMMETRIC, - NN_PAD_SAME_CAFFE, - _32BIT_PLACEHOLDER_hexagon_nn_padding_type = 0x7fffffff + NN_PAD_NA, + NN_PAD_SAME, + NN_PAD_VALID, + NN_PAD_MIRROR_REFLECT, + NN_PAD_MIRROR_SYMMETRIC, + NN_PAD_SAME_CAFFE, + _32BIT_PLACEHOLDER_hexagon_nn_padding_type = 0x7fffffff }; typedef enum hexagon_nn_padding_type hexagon_nn_padding_type; typedef struct hexagon_nn_tensordef hexagon_nn_tensordef; struct hexagon_nn_tensordef { - unsigned int batches; - unsigned int height; - unsigned int width; - unsigned int depth; - unsigned char* data; - int dataLen; - unsigned int data_valid_len; - unsigned int unused; + unsigned int batches; + unsigned int height; + unsigned int width; + unsigned int depth; + unsigned char* data; + int dataLen; + unsigned int data_valid_len; + unsigned int unused; }; __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_config)(void) __QAIC_HEADER_ATTRIBUTE; __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_init)(void) __QAIC_HEADER_ATTRIBUTE; diff --git a/mace/dsp/hexagon/libhexagon_controller.so b/mace/dsp/hexagon/libhexagon_controller.so index 6b49b9d9ba3d83199dac6959f50c1cf4713df6bf..da6f257e152d486dc82e686e2fe6cad2f60bd173 100755 Binary files a/mace/dsp/hexagon/libhexagon_controller.so and b/mace/dsp/hexagon/libhexagon_controller.so differ diff --git a/mace/dsp/hexagon_control_wrapper.cc b/mace/dsp/hexagon_control_wrapper.cc index 8023827330d15ddc066cc94859977d7108437fe3..db3ff22a826c020e2f4651676026ffe8c7930682 100644 --- a/mace/dsp/hexagon_control_wrapper.cc +++ b/mace/dsp/hexagon_control_wrapper.cc @@ -7,6 +7,16 @@ namespace mace { +#define MAX_NODE 2048 * 8 + +enum { + NN_GRAPH_PERFEVENT_CYCLES = 0, + NN_GRAPH_PERFEVENT_USER0 = 1, + NN_GRAPH_PERFEVENT_USER1 = 2, + NN_GRAPH_PERFEVENT_HWPMU = 3, + NN_GRAPH_PERFEVENT_UTIME = 5, +}; + int HexagonControlWrapper::GetVersion() { int version; hexagon_nn_version(&version); @@ -20,15 +30,15 @@ bool HexagonControlWrapper::Config() { bool HexagonControlWrapper::Init() { LOG(INFO) << "Hexagon init"; - op_map_.Init(); - // TODO(liyin): dspCV init + hexagon_controller_InitHexagonWithMaxAttributes(0, 100); nn_id_ = hexagon_nn_init(); + ResetPerfInfo(); return true; } bool HexagonControlWrapper::Finalize() { LOG(INFO) << "Hexagon finalize"; - // TODO(liyin): dspCV deinit + hexagon_controller_DeInitHexagon(); return true; } @@ -36,7 +46,8 @@ bool HexagonControlWrapper::SetupGraph(NetDef net_def) { LOG(INFO) << "Hexagon setup graph"; // const node for (const TensorProto& tensor_proto: net_def.tensors()) { - vector tensor_shape(tensor_proto.dims().begin(), tensor_proto.dims().end()); + vector tensor_shape(tensor_proto.dims().begin(), + tensor_proto.dims().end()); while (tensor_shape.size() < 4) { tensor_shape.insert(tensor_shape.begin(), 1); } @@ -49,7 +60,8 @@ bool HexagonControlWrapper::SetupGraph(NetDef net_def) { NULL, 0); } else { - unique_ptr tensor = serializer_.Deserialize(tensor_proto, DeviceType::CPU); + unique_ptr tensor = serializer_.Deserialize(tensor_proto, + DeviceType::CPU); VLOG(0) << "Tensor size: " << tensor->size(); hexagon_nn_append_const_node(nn_id_, node_id(tensor_proto.node_id()), tensor_shape[0], tensor_shape[1], @@ -58,14 +70,17 @@ bool HexagonControlWrapper::SetupGraph(NetDef net_def) { tensor->raw_data()), tensor->raw_size()); } - VLOG(0) << "Const: " << tensor_proto.name() << ", node_id: " << node_id(tensor_proto.node_id()) - << "\n\t shape: " << tensor_shape[0] << " " << tensor_shape[1] << " " << tensor_shape[2] << " " << tensor_shape[3]; + VLOG(0) << "Const: " << tensor_proto.name() + << ", node_id: " << node_id(tensor_proto.node_id()) + << "\n\t shape: " << tensor_shape[0] << " " << tensor_shape[1] + << " " << tensor_shape[2] << " " << tensor_shape[3]; } // op node for (const OperatorDef& op: net_def.op()) { - int op_id = op_map_.GetOpId(op.type()); - MACE_CHECK(op_id != OP_INVALID, "invalid op: ", op.name()); + unsigned int op_id; + MACE_CHECK(hexagon_nn_op_name_to_id(op.type().data(), &op_id) == 0, + "invalid op: ", op.name()); vector inputs(op.node_input_size()); for (size_t i = 0; i < op.node_input_size(); ++i) { inputs[i].src_id = node_id(op.node_input(i).node_id()); @@ -80,9 +95,13 @@ bool HexagonControlWrapper::SetupGraph(NetDef net_def) { op.padding()); hexagon_nn_append_node(nn_id_, node_id(op.node_id()), op_id, padding_type, - inputs.data(), inputs.size(), outputs.data(), outputs.size()); + inputs.data(), inputs.size(), + outputs.data(), outputs.size()); - VLOG(0) << "Op: " << op.name() << ", type: " << op.type() << ", node_id: " << node_id(op.node_id()) << ", padding_type: " << padding_type; + VLOG(0) << "Op: " << op.name() + << ", type: " << op.type() + << ", node_id: " << node_id(op.node_id()) + << ", padding_type: " << padding_type; for (const auto& input: inputs) { VLOG(0) << "\t input: " << input.src_id << ":" << input.output_idx; } @@ -121,7 +140,6 @@ bool HexagonControlWrapper::SetupGraph(const std::string& model_file) { return SetupGraph(net_def); } - bool HexagonControlWrapper::TeardownGraph() { LOG(INFO) << "Hexagon teardown graph"; return hexagon_nn_teardown(nn_id_) == 0; @@ -156,15 +174,47 @@ void HexagonControlWrapper::SetDebugLevel(int level) { void HexagonControlWrapper::GetPerfInfo() { LOG(INFO) << "Get perf info"; - vector perf_info(10000); + vector perf_info(MAX_NODE); unsigned int n_items; - hexagon_nn_get_perfinfo(nn_id_, perf_info.data(), 10000, &n_items); + hexagon_nn_get_perfinfo(nn_id_, perf_info.data(), MAX_NODE, &n_items); + + std::unordered_map node_id_counters; + std::unordered_map> node_type_counters; + float total_duration = 0.0; for (int i = 0; i < n_items; ++i) { + unsigned int node_id = perf_info[i].node_id; + unsigned int node_type_id = perf_info[i].node_type; + node_id_counters[node_id] = ((static_cast(perf_info[i].counter_hi) << 32) + + perf_info[i].counter_lo) * 1.0f / perf_info[i].executions; + LOG(INFO) << "node id: " << perf_info[i].node_id + << ", node type: " << perf_info[i].node_type << ", executions: " << perf_info[i].executions - << ", counter_hi: " << perf_info[i].counter_hi - << ", counter_lo: " << perf_info[i].counter_lo; + << ", duration: " << node_id_counters[node_id]; + + + char node_type_buf[1280]; + hexagon_nn_op_id_to_name(node_type_id, node_type_buf, 1280); + std::string node_type(node_type_buf); + if (node_type_counters.find(node_type) == node_type_counters.end()) { + node_type_counters[node_type] = {0, 0.0}; + } + ++node_type_counters[node_type].first; + node_type_counters[node_type].second += node_id_counters[node_id]; + total_duration += node_id_counters[node_id]; + } + + for (auto& node_type_counter: node_type_counters) { + LOG(INFO) << "node type: " << node_type_counter.first + << ", time: " << node_type_counter.second.first + << ", duration: " << node_type_counter.second.second; } + LOG(INFO) << "total duration: " << total_duration; +} + +void HexagonControlWrapper::ResetPerfInfo() { + LOG(INFO) << "Reset perf info"; + hexagon_nn_reset_perfinfo(nn_id_, NN_GRAPH_PERFEVENT_UTIME); } } // namespace mace \ No newline at end of file diff --git a/mace/dsp/hexagon_control_wrapper.h b/mace/dsp/hexagon_control_wrapper.h index 57f633e62a6c6530c623dcb63da168fe29b8dd3d..0547b977f2649a672cb6cfbe70b0af8cf3b0c682 100644 --- a/mace/dsp/hexagon_control_wrapper.h +++ b/mace/dsp/hexagon_control_wrapper.h @@ -5,7 +5,7 @@ #ifndef MACE_DSP_HEXAGON_CONTROL_WRAPPER_H_ #define MACE_DSP_HEXAGON_CONTROL_WRAPPER_H_ -#include "mace/dsp/hexagon/hexagon_nn.h" +#include "mace/dsp/hexagon/hexagon_controller.h" #include "mace/dsp/hexagon_nn_ops.h" #include "mace/core/common.h" #include "mace/core/tensor.h" @@ -57,18 +57,18 @@ class HexagonControlWrapper { void PrintLog(); void PrintGraph(); void GetPerfInfo(); + void ResetPerfInfo(); void SetDebugLevel(int level); private: // CAVEAT: Need offset as HVX library reserves some ids static constexpr int NODE_ID_OFFSET = 10000; - uint32_t node_id(uint32_t nodeid) { + inline uint32_t node_id(uint32_t nodeid) { return NODE_ID_OFFSET + nodeid; } int nn_id_; - OpMap op_map_; Serializer serializer_; vector input_shape_; diff --git a/mace/dsp/hexagon_control_wrapper_test.cc b/mace/dsp/hexagon_control_wrapper_test.cc index fcc4176e0020e2241453b54cfd1a1bdeb4981365..48a743c69ecdb09bb09ca95412fe8852a86a55eb 100644 --- a/mace/dsp/hexagon_control_wrapper_test.cc +++ b/mace/dsp/hexagon_control_wrapper_test.cc @@ -13,22 +13,34 @@ TEST(HexagonControlerWrapper, GetVersion) { HexagonControlWrapper wrapper; VLOG(0) << "version: " << wrapper.GetVersion(); wrapper.Init(); - wrapper.SetDebugLevel(3); + wrapper.SetDebugLevel(0); wrapper.Config(); - VLOG(0) << wrapper.SetupGraph("quantized_test_dsp.pb"); + VLOG(0) << wrapper.SetupGraph("quantized_icnet_dsp.pb"); wrapper.PrintGraph(); Tensor input_tensor; Tensor output_tensor; - input_tensor.Resize({1, 28, 28, 3}); + input_tensor.Resize({1, 480, 480, 3}); float *input_data = input_tensor.mutable_data(); for (int i = 0; i < input_tensor.size(); ++i) { - input_data[i] = i; + input_data[i] = i % 256; } - VLOG(0) << wrapper.ExecuteGraph(input_tensor, &output_tensor); - wrapper.PrintLog(); + wrapper.ResetPerfInfo(); + timeval tv1, tv2; + gettimeofday(&tv1, NULL); + int round = 2; + for (int i = 0; i < round; ++i) { + VLOG(0) << wrapper.ExecuteGraph(input_tensor, &output_tensor); + } + gettimeofday(&tv2, NULL); + VLOG(0) << "avg duration: " + << ((tv2.tv_sec - tv1.tv_sec) * 1000 + + (tv2.tv_usec - tv1.tv_usec) / 1000) / + round; + wrapper.GetPerfInfo(); + wrapper.PrintLog(); const float *output_data = output_tensor.data(); VLOG(0) << output_tensor.size() << output_tensor.dtype(); diff --git a/mace/dsp/hexagon_nn_ops.h b/mace/dsp/hexagon_nn_ops.h index 26622973d3e77a517929394026e9315d7dd19504..e66548d4d9b9e1f1a3f31e05d05e758e932a9c58 100644 --- a/mace/dsp/hexagon_nn_ops.h +++ b/mace/dsp/hexagon_nn_ops.h @@ -12,6 +12,9 @@ namespace mace { #define OP_INVALID -1 +// The following macros are deprecated unless we found cache op meta in stub +// is necessary for performance or other causes. + typedef enum op_type_enum { #define DEF_OP(NAME, ...) OP_##NAME, @@ -21,6 +24,14 @@ typedef enum op_type_enum { #undef DEF_OP } op_type; + +#define DEF_OP(NAME,...) #NAME, +static const char *hexagon_nn_op_names[NN_OPS_MAX] = { +#include "mace/dsp/ops.h" +}; +#undef DEF_OP + + class OpMap { public: void Init() {