提交 fbf80ad5 编写于 作者: Y Yin Li

Fix perf op type

上级 c93b928a
$(info ------------------------------------------)
$(info --- V = $(V))
$(info --- GLUE_DIR = $(GLUE_DIR))
$(info --- HEXAGON_SDK_ROOT = $(HEXAGON_SDK_ROOT))
$(info ------------------------------------------)
INCDIRS += ../../../libs/common/adspmsgd/ship/android_Release
LIBDIRS += ../../../libs/common/adspmsgd/ship/android_Release
BUILD_DLLS=libhexagon_controller
hexagon_controller_lib_QAICIDLS += \
interface/hexagon_nn \
$(MAKE_D_DSPCV_INCDIR)/dspCV
# hexagon interface
hexagon_controller_lib_C_SRCS += \
$V/hexagon_nn_stub \
$V/dspCV_stub
hexagon_controller_lib_DLLS += libcdsprpc
hexagon_controller_lib_LIBS += rpcmem adspmsgd
hexagon_controller_lib_LD_FLAGS += -llog
hexagon_controller_lib_DEFINES += VERIFY_PRINT_ERROR
libhexagon_controller_QAICIDLS += $(hexagon_controller_lib_QAICIDLS)
libhexagon_controller_C_SRCS += $(hexagon_controller_lib_C_SRCS)
libhexagon_controller_DLLS += $(hexagon_controller_lib_DLLS)
libhexagon_controller_LIBS += $(hexagon_controller_lib_LIBS)
libhexagon_controller_LD_FLAGS += $(hexagon_controller_lib_LD_FLAGS)
libhexagon_controller_DEFINES += $(hexagon_controller_lib_DEFINES)
BUILD_COPIES = \
$(DLLS) \
$(EXES) \
$(LIBS) \
$(SHIP_DIR)/ ;
#ifndef MACE_DSP_HEXAGON_DSP_CONTROLLER_H_
#define MACE_DSP_HEXAGON_DSP_CONTROLLER_H_
#include "hexagon_nn.h"
#ifdef __cplusplus
extern "C" {
#else
#include <stdbool.h>
#endif // __cplusplus
int hexagon_controller_InitHexagonWithMaxAttributes(int enable_dcvs,
int bus_usage);
int hexagon_controller_DeInitHexagon();
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // MACE_DSP_HEXAGON_DSP_CONTROLLER_H_
\ No newline at end of file
......@@ -30,49 +30,51 @@ extern "C" {
#define __QAIC_STRING1_OBJECT_DEFINED__
#define __STRING1_OBJECT__
typedef struct _cstring1_s {
char* data;
int dataLen;
char* data;
int dataLen;
} _cstring1_t;
#endif /* __QAIC_STRING1_OBJECT_DEFINED__ */
typedef struct hexagon_nn_input hexagon_nn_input;
struct hexagon_nn_input {
unsigned int src_id;
unsigned int output_idx;
unsigned int src_id;
unsigned int output_idx;
};
typedef struct hexagon_nn_output hexagon_nn_output;
struct hexagon_nn_output {
unsigned int max_size;
unsigned int unused;
unsigned int max_size;
unsigned int unused;
};
typedef struct hexagon_nn_perfinfo hexagon_nn_perfinfo;
struct hexagon_nn_perfinfo {
unsigned int node_id;
unsigned int executions;
unsigned int counter_lo;
unsigned int counter_hi;
unsigned int node_id;
unsigned int node_type;
unsigned int executions;
unsigned int unused;
unsigned int counter_lo;
unsigned int counter_hi;
};
typedef int hexagon_nn_nn_id;
enum hexagon_nn_padding_type {
NN_PAD_NA,
NN_PAD_SAME,
NN_PAD_VALID,
NN_PAD_MIRROR_REFLECT,
NN_PAD_MIRROR_SYMMETRIC,
NN_PAD_SAME_CAFFE,
_32BIT_PLACEHOLDER_hexagon_nn_padding_type = 0x7fffffff
NN_PAD_NA,
NN_PAD_SAME,
NN_PAD_VALID,
NN_PAD_MIRROR_REFLECT,
NN_PAD_MIRROR_SYMMETRIC,
NN_PAD_SAME_CAFFE,
_32BIT_PLACEHOLDER_hexagon_nn_padding_type = 0x7fffffff
};
typedef enum hexagon_nn_padding_type hexagon_nn_padding_type;
typedef struct hexagon_nn_tensordef hexagon_nn_tensordef;
struct hexagon_nn_tensordef {
unsigned int batches;
unsigned int height;
unsigned int width;
unsigned int depth;
unsigned char* data;
int dataLen;
unsigned int data_valid_len;
unsigned int unused;
unsigned int batches;
unsigned int height;
unsigned int width;
unsigned int depth;
unsigned char* data;
int dataLen;
unsigned int data_valid_len;
unsigned int unused;
};
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_config)(void) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_init)(void) __QAIC_HEADER_ATTRIBUTE;
......
......@@ -7,6 +7,16 @@
namespace mace {
#define MAX_NODE 2048 * 8
enum {
NN_GRAPH_PERFEVENT_CYCLES = 0,
NN_GRAPH_PERFEVENT_USER0 = 1,
NN_GRAPH_PERFEVENT_USER1 = 2,
NN_GRAPH_PERFEVENT_HWPMU = 3,
NN_GRAPH_PERFEVENT_UTIME = 5,
};
int HexagonControlWrapper::GetVersion() {
int version;
hexagon_nn_version(&version);
......@@ -20,15 +30,15 @@ bool HexagonControlWrapper::Config() {
bool HexagonControlWrapper::Init() {
LOG(INFO) << "Hexagon init";
op_map_.Init();
// TODO(liyin): dspCV init
hexagon_controller_InitHexagonWithMaxAttributes(0, 100);
nn_id_ = hexagon_nn_init();
ResetPerfInfo();
return true;
}
bool HexagonControlWrapper::Finalize() {
LOG(INFO) << "Hexagon finalize";
// TODO(liyin): dspCV deinit
hexagon_controller_DeInitHexagon();
return true;
}
......@@ -36,7 +46,8 @@ bool HexagonControlWrapper::SetupGraph(NetDef net_def) {
LOG(INFO) << "Hexagon setup graph";
// const node
for (const TensorProto& tensor_proto: net_def.tensors()) {
vector<int> tensor_shape(tensor_proto.dims().begin(), tensor_proto.dims().end());
vector<int> tensor_shape(tensor_proto.dims().begin(),
tensor_proto.dims().end());
while (tensor_shape.size() < 4) {
tensor_shape.insert(tensor_shape.begin(), 1);
}
......@@ -49,7 +60,8 @@ bool HexagonControlWrapper::SetupGraph(NetDef net_def) {
NULL,
0);
} else {
unique_ptr<Tensor> tensor = serializer_.Deserialize(tensor_proto, DeviceType::CPU);
unique_ptr<Tensor> tensor = serializer_.Deserialize(tensor_proto,
DeviceType::CPU);
VLOG(0) << "Tensor size: " << tensor->size();
hexagon_nn_append_const_node(nn_id_, node_id(tensor_proto.node_id()),
tensor_shape[0], tensor_shape[1],
......@@ -58,14 +70,17 @@ bool HexagonControlWrapper::SetupGraph(NetDef net_def) {
tensor->raw_data()),
tensor->raw_size());
}
VLOG(0) << "Const: " << tensor_proto.name() << ", node_id: " << node_id(tensor_proto.node_id())
<< "\n\t shape: " << tensor_shape[0] << " " << tensor_shape[1] << " " << tensor_shape[2] << " " << tensor_shape[3];
VLOG(0) << "Const: " << tensor_proto.name()
<< ", node_id: " << node_id(tensor_proto.node_id())
<< "\n\t shape: " << tensor_shape[0] << " " << tensor_shape[1]
<< " " << tensor_shape[2] << " " << tensor_shape[3];
}
// op node
for (const OperatorDef& op: net_def.op()) {
int op_id = op_map_.GetOpId(op.type());
MACE_CHECK(op_id != OP_INVALID, "invalid op: ", op.name());
unsigned int op_id;
MACE_CHECK(hexagon_nn_op_name_to_id(op.type().data(), &op_id) == 0,
"invalid op: ", op.name());
vector<hexagon_nn_input> inputs(op.node_input_size());
for (size_t i = 0; i < op.node_input_size(); ++i) {
inputs[i].src_id = node_id(op.node_input(i).node_id());
......@@ -80,9 +95,13 @@ bool HexagonControlWrapper::SetupGraph(NetDef net_def) {
op.padding());
hexagon_nn_append_node(nn_id_, node_id(op.node_id()), op_id, padding_type,
inputs.data(), inputs.size(), outputs.data(), outputs.size());
inputs.data(), inputs.size(),
outputs.data(), outputs.size());
VLOG(0) << "Op: " << op.name() << ", type: " << op.type() << ", node_id: " << node_id(op.node_id()) << ", padding_type: " << padding_type;
VLOG(0) << "Op: " << op.name()
<< ", type: " << op.type()
<< ", node_id: " << node_id(op.node_id())
<< ", padding_type: " << padding_type;
for (const auto& input: inputs) {
VLOG(0) << "\t input: " << input.src_id << ":" << input.output_idx;
}
......@@ -121,7 +140,6 @@ bool HexagonControlWrapper::SetupGraph(const std::string& model_file) {
return SetupGraph(net_def);
}
bool HexagonControlWrapper::TeardownGraph() {
LOG(INFO) << "Hexagon teardown graph";
return hexagon_nn_teardown(nn_id_) == 0;
......@@ -156,15 +174,47 @@ void HexagonControlWrapper::SetDebugLevel(int level) {
void HexagonControlWrapper::GetPerfInfo() {
LOG(INFO) << "Get perf info";
vector<hexagon_nn_perfinfo> perf_info(10000);
vector<hexagon_nn_perfinfo> perf_info(MAX_NODE);
unsigned int n_items;
hexagon_nn_get_perfinfo(nn_id_, perf_info.data(), 10000, &n_items);
hexagon_nn_get_perfinfo(nn_id_, perf_info.data(), MAX_NODE, &n_items);
std::unordered_map<uint32_t, float> node_id_counters;
std::unordered_map<std::string, std::pair<int, float>> node_type_counters;
float total_duration = 0.0;
for (int i = 0; i < n_items; ++i) {
unsigned int node_id = perf_info[i].node_id;
unsigned int node_type_id = perf_info[i].node_type;
node_id_counters[node_id] = ((static_cast<uint64_t>(perf_info[i].counter_hi) << 32)
+ perf_info[i].counter_lo) * 1.0f / perf_info[i].executions;
LOG(INFO) << "node id: " << perf_info[i].node_id
<< ", node type: " << perf_info[i].node_type
<< ", executions: " << perf_info[i].executions
<< ", counter_hi: " << perf_info[i].counter_hi
<< ", counter_lo: " << perf_info[i].counter_lo;
<< ", duration: " << node_id_counters[node_id];
char node_type_buf[1280];
hexagon_nn_op_id_to_name(node_type_id, node_type_buf, 1280);
std::string node_type(node_type_buf);
if (node_type_counters.find(node_type) == node_type_counters.end()) {
node_type_counters[node_type] = {0, 0.0};
}
++node_type_counters[node_type].first;
node_type_counters[node_type].second += node_id_counters[node_id];
total_duration += node_id_counters[node_id];
}
for (auto& node_type_counter: node_type_counters) {
LOG(INFO) << "node type: " << node_type_counter.first
<< ", time: " << node_type_counter.second.first
<< ", duration: " << node_type_counter.second.second;
}
LOG(INFO) << "total duration: " << total_duration;
}
void HexagonControlWrapper::ResetPerfInfo() {
LOG(INFO) << "Reset perf info";
hexagon_nn_reset_perfinfo(nn_id_, NN_GRAPH_PERFEVENT_UTIME);
}
} // namespace mace
\ No newline at end of file
......@@ -5,7 +5,7 @@
#ifndef MACE_DSP_HEXAGON_CONTROL_WRAPPER_H_
#define MACE_DSP_HEXAGON_CONTROL_WRAPPER_H_
#include "mace/dsp/hexagon/hexagon_nn.h"
#include "mace/dsp/hexagon/hexagon_controller.h"
#include "mace/dsp/hexagon_nn_ops.h"
#include "mace/core/common.h"
#include "mace/core/tensor.h"
......@@ -57,18 +57,18 @@ class HexagonControlWrapper {
void PrintLog();
void PrintGraph();
void GetPerfInfo();
void ResetPerfInfo();
void SetDebugLevel(int level);
private:
// CAVEAT: Need offset as HVX library reserves some ids
static constexpr int NODE_ID_OFFSET = 10000;
uint32_t node_id(uint32_t nodeid) {
inline uint32_t node_id(uint32_t nodeid) {
return NODE_ID_OFFSET + nodeid;
}
int nn_id_;
OpMap op_map_;
Serializer serializer_;
vector<index_t> input_shape_;
......
......@@ -13,22 +13,34 @@ TEST(HexagonControlerWrapper, GetVersion) {
HexagonControlWrapper wrapper;
VLOG(0) << "version: " << wrapper.GetVersion();
wrapper.Init();
wrapper.SetDebugLevel(3);
wrapper.SetDebugLevel(0);
wrapper.Config();
VLOG(0) << wrapper.SetupGraph("quantized_test_dsp.pb");
VLOG(0) << wrapper.SetupGraph("quantized_icnet_dsp.pb");
wrapper.PrintGraph();
Tensor input_tensor;
Tensor output_tensor;
input_tensor.Resize({1, 28, 28, 3});
input_tensor.Resize({1, 480, 480, 3});
float *input_data = input_tensor.mutable_data<float>();
for (int i = 0; i < input_tensor.size(); ++i) {
input_data[i] = i;
input_data[i] = i % 256;
}
VLOG(0) << wrapper.ExecuteGraph(input_tensor, &output_tensor);
wrapper.PrintLog();
wrapper.ResetPerfInfo();
timeval tv1, tv2;
gettimeofday(&tv1, NULL);
int round = 2;
for (int i = 0; i < round; ++i) {
VLOG(0) << wrapper.ExecuteGraph(input_tensor, &output_tensor);
}
gettimeofday(&tv2, NULL);
VLOG(0) << "avg duration: "
<< ((tv2.tv_sec - tv1.tv_sec) * 1000 +
(tv2.tv_usec - tv1.tv_usec) / 1000) /
round;
wrapper.GetPerfInfo();
wrapper.PrintLog();
const float *output_data = output_tensor.data<float>();
VLOG(0) << output_tensor.size() << output_tensor.dtype();
......
......@@ -12,6 +12,9 @@ namespace mace {
#define OP_INVALID -1
// The following macros are deprecated unless we found cache op meta in stub
// is necessary for performance or other causes.
typedef enum op_type_enum {
#define DEF_OP(NAME, ...) OP_##NAME,
......@@ -21,6 +24,14 @@ typedef enum op_type_enum {
#undef DEF_OP
} op_type;
#define DEF_OP(NAME,...) #NAME,
static const char *hexagon_nn_op_names[NN_OPS_MAX] = {
#include "mace/dsp/ops.h"
};
#undef DEF_OP
class OpMap {
public:
void Init() {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册