diff --git a/mace/dsp/BUILD b/mace/dsp/BUILD index fad950de4a26e8769e271024d9d9ea33ff42a70c..40e81b05e5d6ad15837c36690e4bb69f5fe91c44 100644 --- a/mace/dsp/BUILD +++ b/mace/dsp/BUILD @@ -46,3 +46,19 @@ cc_test( ":dsp", ], ) + +cc_test( + name = "dsp_op_test", + testonly = 1, + srcs = glob(["test/*_test.cc"]), + copts = ["-std=c++11"], + linkopts = if_android([ + "-ldl", + "-lm", + ]), + linkstatic = 1, + deps = [ + "@gtest//:gtest_main", + ":dsp", + ], +) diff --git a/mace/dsp/hexagon_control_wrapper.cc b/mace/dsp/hexagon_control_wrapper.cc index c6a8c8ec6825b56d94ab2a269eae71024993f375..7c65e7e5212f0797e847b22fd67640bb58854f41 100644 --- a/mace/dsp/hexagon_control_wrapper.cc +++ b/mace/dsp/hexagon_control_wrapper.cc @@ -42,7 +42,7 @@ bool HexagonControlWrapper::Finalize() { return true; } -bool HexagonControlWrapper::SetupGraph(NetDef net_def) { +bool HexagonControlWrapper::SetupGraph(const NetDef& net_def) { LOG(INFO) << "Hexagon setup graph"; // const node for (const TensorProto& tensor_proto: net_def.tensors()) { @@ -181,21 +181,22 @@ void HexagonControlWrapper::GetPerfInfo() { std::unordered_map node_id_counters; std::unordered_map> node_type_counters; float total_duration = 0.0; + + VLOG(0) << "items: " << n_items; for (int i = 0; i < n_items; ++i) { unsigned int node_id = perf_info[i].node_id; unsigned int node_type_id = perf_info[i].node_type; node_id_counters[node_id] = ((static_cast(perf_info[i].counter_hi) << 32) + perf_info[i].counter_lo) * 1.0f / perf_info[i].executions; - LOG(INFO) << "node id: " << perf_info[i].node_id - << ", node type: " << perf_info[i].node_type - << ", executions: " << perf_info[i].executions - << ", duration: " << node_id_counters[node_id]; - - char node_type_buf[1280]; hexagon_nn_op_id_to_name(node_type_id, node_type_buf, 1280); std::string node_type(node_type_buf); + LOG(INFO) << "node id: " << perf_info[i].node_id + << ", node type: " << node_type + << ", executions: " << perf_info[i].executions + << ", duration: " << node_id_counters[node_id]; + if (node_type_counters.find(node_type) == node_type_counters.end()) { node_type_counters[node_type] = {0, 0.0}; } diff --git a/mace/dsp/hexagon_control_wrapper.h b/mace/dsp/hexagon_control_wrapper.h index 0547b977f2649a672cb6cfbe70b0af8cf3b0c682..a67e9903b7a42f6866fe1e1a63177586bfdfd326 100644 --- a/mace/dsp/hexagon_control_wrapper.h +++ b/mace/dsp/hexagon_control_wrapper.h @@ -21,7 +21,7 @@ class HexagonControlWrapper { bool Config(); bool Init(); bool Finalize(); - bool SetupGraph(NetDef net_def); + bool SetupGraph(const NetDef& net_def); bool SetupGraph(const std::string &model_file); bool ExecuteGraph(const Tensor &input_tensor, Tensor *output_tensor) { LOG(INFO) << "Execute graph: " << nn_id_; diff --git a/mace/dsp/test/quantized_relu_test.cc b/mace/dsp/test/quantized_relu_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..c3883d8d3d7fa21e697d571fff584b932f59ae20 --- /dev/null +++ b/mace/dsp/test/quantized_relu_test.cc @@ -0,0 +1,131 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#include "mace/dsp/hexagon_control_wrapper.h" +#include "gtest/gtest.h" + +using namespace mace; + +static NetDef BuildNetDef() { + NetDef net; + net.set_name("quantized_relu_test"); + // input op + OperatorDef *input_op = net.add_op(); + input_op->set_name("input_node"); + input_op->set_type("INPUT"); + input_op->set_node_id(0); + input_op->set_padding(0); + input_op->add_out_max_byte_size(1000); + + // relu op + OperatorDef *relu_op = net.add_op(); + relu_op->set_name("relu"); + relu_op->set_type("QuantizedRelu_8"); + relu_op->set_node_id(1); + relu_op->set_padding(0); + relu_op->add_input("input_node"); + relu_op->add_input("input_min"); + relu_op->add_input("input_max"); + relu_op->add_output("relu:0"); + relu_op->add_output("relu:1"); + relu_op->add_output("relu:2"); + NodeInput *input_node_input = relu_op->add_node_input(); + input_node_input->set_node_id(0); + input_node_input->set_output_port(0); + input_node_input = relu_op->add_node_input(); + input_node_input->set_node_id(10); + input_node_input->set_output_port(0); + input_node_input = relu_op->add_node_input(); + input_node_input->set_node_id(11); + input_node_input->set_output_port(0); + relu_op->add_out_max_byte_size(1000); + relu_op->add_out_max_byte_size(1000); + relu_op->add_out_max_byte_size(1000); + + // output op + OperatorDef *output_op = net.add_op(); + output_op->set_name("__output__"); + output_op->set_type("OUTPUT"); + output_op->set_op_id(2); + input_node_input = output_op->add_node_input(); + input_node_input->set_node_id(1); + input_node_input->set_output_port(0); + + // tensor + TensorProto *input_min_tensor = net.add_tensors(); + input_min_tensor->set_name("input_min"); + input_min_tensor->add_dims(1); + input_min_tensor->set_data_type(DataType::DT_FLOAT); + input_min_tensor->set_node_id(10); + input_min_tensor->add_float_data(-100.0); + + TensorProto *input_max_tensor = net.add_tensors(); + input_max_tensor->set_name("input_max"); + input_max_tensor->add_dims(1); + input_max_tensor->set_data_type(DataType::DT_FLOAT); + input_max_tensor->set_node_id(11); + input_max_tensor->add_float_data(100.0); + + // input & output info + InputInfo *input_info = net.add_input_info(); + input_info->set_name("input_node"); + input_info->set_node_id(0); + input_info->add_dims(1); + input_info->add_dims(1); + input_info->add_dims(1); + input_info->add_dims(5); + input_info->set_data_type(DataType::DT_UINT8); + input_info->set_max_byte_size(1000); + OutputInfo *output_info = net.add_output_info(); + output_info->set_name("output_node"); + output_info->set_node_id(1); + output_info->add_dims(1); + output_info->add_dims(1); + output_info->add_dims(1); + output_info->add_dims(5); + output_info->set_data_type(DataType::DT_UINT8); + output_info->set_max_byte_size(1000); + + return net; +} + +TEST(QuantizedReluTest, QuantizedRelu) { + testing::internal::LogToStderr(); + HexagonControlWrapper wrapper; + wrapper.Init(); + wrapper.SetDebugLevel(3); + wrapper.Config(); + + NetDef net = BuildNetDef(); + wrapper.SetupGraph(net); + + Allocator *cpu_allocator = GetDeviceAllocator(DeviceType::CPU); + Tensor input_tensor(cpu_allocator, DT_UINT8); + Tensor output_tensor(cpu_allocator, DT_UINT8); + input_tensor.Resize({1, 1, 1, 5}); + output_tensor.Resize({1, 1, 1, 5}); + uint8_t *input_data = input_tensor.mutable_data(); + const uint8_t *output_data = output_tensor.data(); + // -100.0 -50.0 0 50.0 100.0 -> s=0.782, q0=int(-fmin/s)=128, + // q=q0+f/s -> 0, 64, 128, 192, 256 + input_data[0] = 0; + input_data[1] = 64; + input_data[2] = 128; + input_data[3] = 192; + input_data[4] = 255; + + // 0, 0, 0, 50, 100 -> s=0.782, q0=128 + // q -> 128, 128, 128, 192, 255 + + VLOG(0) << wrapper.ExecuteGraph(input_tensor, &output_tensor); + wrapper.PrintLog(); + + for (int i = 0; i < output_tensor.size(); ++i) { + std::cout << (int32_t) output_data[i] << " "; + } + std::cout << std::endl; + + VLOG(0) << wrapper.TeardownGraph(); + wrapper.Finalize(); +} \ No newline at end of file diff --git a/mace/python/tools/tf_dsp_converter_lib.py b/mace/python/tools/tf_dsp_converter_lib.py index 4e0cb75a910e019ee100dde779fbe73ce5c52a2f..8f925059279d2b50b13fc28aaf1aca975ec67bc7 100644 --- a/mace/python/tools/tf_dsp_converter_lib.py +++ b/mace/python/tools/tf_dsp_converter_lib.py @@ -147,7 +147,7 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops): def add_output_node(net_def, output_node): op_def = net_def.op.add() - op_def.name = 'output' + op_def.name = '__output__' op_def.type = 'OUTPUT' op_def.input.extend([get_tensor_name_from_op(output_node, 0)]) @@ -298,8 +298,8 @@ def convert_to_mace_pb(input_graph_def, input_node, output_node): add_output_node(net_def, output_node) # optimized_net_def = reverse_batch_to_space_and_biasadd(net_def) - # sorted_net_def = graph_util.sort_mace_graph(optimized_net_def, output_node) - net_def_with_node_id = add_node_id(net_def) + sorted_net_def = graph_util.sort_mace_graph(net_def, '__output__') + net_def_with_node_id = add_node_id(sorted_net_def) final_net_def = add_input_output_info(net_def_with_node_id, input_node, output_node, graph)