diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc index 470d946105a32711249007c03a9de48ac2f52dd6..b3a0ff6fbd3bed960c91e42635f13cc711934f8d 100644 --- a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc +++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc @@ -13,6 +13,8 @@ // limitations under the License. #include +#include +#include #include // NOLINT(build/c++11) #include #include @@ -43,6 +45,53 @@ enum { NN_GRAPH_PERFEVENT_UTIME = 5, }; +namespace { +struct InputOutputMetadata{ + void Init(float min_val, float max_val, int needs_quantization) { + this->min_val = min_val; + this->max_val = max_val; + this->needs_quantization = needs_quantization; + } + float min_val; + float max_val; + int needs_quantization; +}; + +template +void AddInputMetadata(const T &data, hexagon_nn_tensordef *tensor) { + tensor->batches = 1; + tensor->height = 1; + tensor->width = 1; + tensor->depth = 1; + tensor->data = const_cast( + reinterpret_cast(&data)); + tensor->dataLen = sizeof(data); + tensor->data_valid_len = sizeof(data); + tensor->unused = 0; +} + +template +void AddOutputMetadata(const T &data, hexagon_nn_tensordef *tensor) { + tensor->data = const_cast( + reinterpret_cast(&data)); + tensor->dataLen = sizeof(data); +} + +template +std::string IntToString(const IntType v) { + std::stringstream stream; + stream << v; + return stream.str(); +} + +template +std::string FloatToString(const FloatType v, const int32_t precision) { + std::stringstream stream; + stream << std::fixed << std::setprecision(precision) << v; + return stream.str(); +} +} // namespace + int HexagonControlWrapper::GetVersion() { int version; MACE_CHECK(hexagon_nn_version(&version) == 0, "get version error"); @@ -299,9 +348,15 @@ void HexagonControlWrapper::GetPerfInfo() { std::unordered_map node_id_counters; std::unordered_map> node_type_counters; + std::vector node_types; float total_duration = 0.0; VLOG(1) << "items: " << n_items; + std::string run_order_title = "Sort by Run Order"; + const std::vector run_order_header = { + "Node Id", "Node Type", "Node Type Id", "Executions", "Duration(ms)" + }; + std::vector> run_order_data; for (unsigned int i = 0; i < n_items; ++i) { unsigned int node_id = perf_info[i].node_id; unsigned int node_type_id = perf_info[i].node_type; @@ -313,27 +368,48 @@ void HexagonControlWrapper::GetPerfInfo() { char node_type_buf[MACE_MAX_NODE]; hexagon_nn_op_id_to_name(node_type_id, node_type_buf, MACE_MAX_NODE); std::string node_type(node_type_buf); - LOG(INFO) << "node id: " << perf_info[i].node_id - << ", node type: " << node_type - << ", node type id: " << node_type_id - << ", executions: " << perf_info[i].executions - << ", duration: " << node_id_counters[node_id]; + if (node_type.compare("Const") == 0) continue; + std::vector tuple; + tuple.push_back(IntToString(perf_info[i].node_id)); + tuple.push_back(node_type); + tuple.push_back(IntToString(node_type_id)); + tuple.push_back(IntToString(perf_info[i].executions)); + tuple.push_back(FloatToString(node_id_counters[node_id] / 1000.0f, 3)); + run_order_data.emplace_back(tuple); if (node_type_counters.find(node_type) == node_type_counters.end()) { node_type_counters[node_type] = {0, 0.0}; + node_types.push_back(node_type); } ++node_type_counters[node_type].first; node_type_counters[node_type].second += node_id_counters[node_id]; - if (node_type.compare("Const") != 0) { - total_duration += node_id_counters[node_id]; - } + total_duration += node_id_counters[node_id]; } - for (auto &node_type_counter : node_type_counters) { - LOG(INFO) << "node type: " << node_type_counter.first - << ", time: " << node_type_counter.second.first - << ", duration: " << node_type_counter.second.second; + std::sort(node_types.begin(), node_types.end(), + [&](const std::string &lhs, const std::string &rhs) { + return node_type_counters[lhs].second + > node_type_counters[rhs].second; + }); + + std::string duration_title = "Sort by Duration"; + const std::vector duration_header = { + "Node Type", "Times", "Duration(ms)" + }; + std::vector> duration_data; + for (auto &node_type : node_types) { + auto node_type_counter = node_type_counters[node_type]; + std::vector tuple; + tuple.push_back(node_type); + tuple.push_back(IntToString(node_type_counter.first)); + tuple.push_back(FloatToString(node_type_counter.second / 1000.0f, 3)); + duration_data.emplace_back(tuple); } + + LOG(INFO) << mace::string_util::StringFormatter::Table( + run_order_title, run_order_header, run_order_data); + LOG(INFO) << mace::string_util::StringFormatter::Table( + duration_title, duration_header, duration_data); LOG(INFO) << "total duration: " << std::fixed << total_duration; } @@ -382,45 +458,64 @@ bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor, } bool HexagonControlWrapper::ExecuteGraphNew( - const std::vector &input_tensors, - std::vector *output_tensors) { + const std::vector &input_tensors, + std::vector *output_tensors) { LOG(INFO) << "Execute graph new: " << nn_id_; uint32_t num_inputs = static_cast(input_tensors.size()); uint32_t num_outputs = static_cast(output_tensors->size()); MACE_ASSERT(num_inputs_ == num_inputs, "Wrong inputs num"); MACE_ASSERT(num_outputs_ == num_outputs, "Wrong outputs num"); - hexagon_nn_tensordef *inputs = new hexagon_nn_tensordef[num_inputs]; - hexagon_nn_tensordef *outputs = new hexagon_nn_tensordef[num_outputs]; + std::vector inputs(num_inputs * NUM_METADATA); + std::vector outputs(num_outputs * NUM_METADATA); + std::vector input_metadata(num_inputs); + std::vector output_metadata(num_outputs); for (size_t i = 0; i < num_inputs; ++i) { - std::vector input_shape = input_tensors[i].shape(); - inputs[i].batches = static_cast(input_shape[0]); - inputs[i].height = static_cast(input_shape[1]); - inputs[i].width = static_cast(input_shape[2]); - inputs[i].depth = static_cast(input_shape[3]); - inputs[i].data = const_cast( - reinterpret_cast(input_tensors[i].raw_data())); - inputs[i].dataLen = static_cast(input_tensors[i].raw_size()); - inputs[i].data_valid_len = static_cast( - input_tensors[i].raw_size()); - inputs[i].unused = 0; + std::vector input_shape = input_tensors[i]->shape(); + size_t index = i * NUM_METADATA; + inputs[index].batches = static_cast(input_shape[0]); + inputs[index].height = static_cast(input_shape[1]); + inputs[index].width = static_cast(input_shape[2]); + inputs[index].depth = static_cast(input_shape[3]); + inputs[index].data = const_cast( + reinterpret_cast(input_tensors[i]->raw_data())); + inputs[index].dataLen = static_cast(input_tensors[i]->raw_size()); + inputs[index].data_valid_len = static_cast( + input_tensors[i]->raw_size()); + inputs[index].unused = 0; + input_metadata[i].Init(.0f, .0f, 1); + AddInputMetadata(input_metadata[i].min_val, &inputs[index + 1]); + AddInputMetadata(input_metadata[i].max_val, &inputs[index + 2]); + AddInputMetadata(input_metadata[i].needs_quantization, &inputs[index + 3]); } for (size_t i = 0; i < num_outputs; ++i) { - (*output_tensors)[i].SetDtype(output_data_types_[i]); - (*output_tensors)[i].Resize(output_shapes_[i]); - outputs[i].data = reinterpret_cast( - (*output_tensors)[i].raw_mutable_data()); - outputs[i].dataLen = static_cast((*output_tensors)[i].raw_size()); + size_t index = i * NUM_METADATA; + (*output_tensors)[i]->SetDtype(output_data_types_[i]); + (*output_tensors)[i]->Resize(output_shapes_[i]); + outputs[index].data = reinterpret_cast( + (*output_tensors)[i]->raw_mutable_data()); + outputs[index].dataLen = static_cast((*output_tensors)[i]->raw_size()); + output_metadata[i].Init(.0f, .0f, 1); + AddOutputMetadata(output_metadata[i].min_val, &outputs[index + 1]); + AddOutputMetadata(output_metadata[i].max_val, &outputs[index + 2]); + AddOutputMetadata(output_metadata[i].needs_quantization, + &outputs[index + 3]); } int res = - hexagon_nn_execute_new(nn_id_, inputs, num_inputs, outputs, num_outputs); + hexagon_nn_execute_new(nn_id_, + inputs.data(), + num_inputs * NUM_METADATA, + outputs.data(), + num_outputs * NUM_METADATA); for (size_t i = 0; i < num_outputs; ++i) { - std::vector output_shape{outputs[i].batches, outputs[i].height, - outputs[i].width, outputs[i].depth}; + size_t index = i * NUM_METADATA; + std::vector output_shape{ + outputs[index].batches, outputs[index].height, outputs[index].width, + outputs[index].depth}; MACE_ASSERT(output_shape.size() == output_shapes_[i].size(), "wrong output shape inferred"); for (size_t j = 0; j < output_shape.size(); ++j) { @@ -428,40 +523,12 @@ bool HexagonControlWrapper::ExecuteGraphNew( == output_shapes_[i][j], "wrong output shape inferred"); } - MACE_ASSERT(static_cast(outputs[i].data_valid_len) - == (*output_tensors)[i].raw_size(), + MACE_ASSERT(static_cast(outputs[index].data_valid_len) + == (*output_tensors)[i]->raw_size(), "wrong output bytes inferred."); } - delete[] inputs; - delete[] outputs; return res == 0; } -bool HexagonControlWrapper::ExecuteGraphPreQuantize(const Tensor &input_tensor, - Tensor *output_tensor) { - std::vector input_tensors(3); - std::vector output_tensors(3); - input_tensors[0].SetDtype(DT_UINT8); - output_tensors[0].SetDtype(DT_UINT8); - input_tensors[0].ResizeLike(input_tensor); - input_tensors[1].Resize({1, 1, 1, 1}); - float *min_in_data = input_tensors[1].mutable_data(); - input_tensors[2].Resize({1, 1, 1, 1}); - float *max_in_data = input_tensors[2].mutable_data(); - quantizer_.Quantize(input_tensor, &input_tensors[0], min_in_data, - max_in_data); - if (!ExecuteGraphNew(input_tensors, &output_tensors)) { - return false; - } - - output_tensor->ResizeLike(output_tensors[0]); - - const float *min_out_data = output_tensors[1].data(); - const float *max_out_data = output_tensors[2].data(); - quantizer_.DeQuantize(output_tensors[0], *min_out_data, *max_out_data, - output_tensor); - return true; -} - } // namespace mace diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.h b/mace/core/runtime/hexagon/hexagon_control_wrapper.h index adcd5224e447ae565a1286246ae84e1be23d08f8..ed4fe373a5c3699cde05acaf49cb165850ddb536 100644 --- a/mace/core/runtime/hexagon/hexagon_control_wrapper.h +++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.h @@ -33,10 +33,8 @@ class HexagonControlWrapper { bool Finalize(); bool SetupGraph(const NetDef &net_def, const unsigned char *model_data); bool ExecuteGraph(const Tensor &input_tensor, Tensor *output_tensor); - bool ExecuteGraphNew(const std::vector &input_tensors, - std::vector *output_tensors); - bool ExecuteGraphPreQuantize(const Tensor &input_tensor, - Tensor *output_tensor); + bool ExecuteGraphNew(const std::vector &input_tensors, + std::vector *output_tensors); bool TeardownGraph(); void PrintLog(); @@ -47,6 +45,7 @@ class HexagonControlWrapper { private: static constexpr int NODE_ID_OFFSET = 10000; + static constexpr int NUM_METADATA = 4; inline uint32_t node_id(uint32_t nodeid) { return NODE_ID_OFFSET + nodeid; } diff --git a/mace/libmace/mace.cc b/mace/libmace/mace.cc index 52584abb41d4438ffb4591030c8325611bdd7f08..a7494086c4e98c3263b61b9a8ca72576d05fa320 100644 --- a/mace/libmace/mace.cc +++ b/mace/libmace/mace.cc @@ -567,7 +567,7 @@ MaceStatus MaceEngine::Impl::Run( if (device_type_ == HEXAGON) { MACE_CHECK(input_tensors.size() == 1 && output_tensors.size() == 1, "HEXAGON not support multiple inputs and outputs yet."); - hexagon_controller_->ExecuteGraph(*input_tensors[0], output_tensors[0]); + hexagon_controller_->ExecuteGraphNew(input_tensors, &output_tensors); } else { #endif MACE_RETURN_IF_ERROR(net_->Run(run_metadata)); diff --git a/mace/python/tools/converter_tool/hexagon_converter.py b/mace/python/tools/converter_tool/hexagon_converter.py index 478c313c6f5b46d458ce1208d7744643373a0b1f..b42d6a3856d22de83468d083f90105090bf725f2 100644 --- a/mace/python/tools/converter_tool/hexagon_converter.py +++ b/mace/python/tools/converter_tool/hexagon_converter.py @@ -31,10 +31,8 @@ from operator import mul class HexagonOps(object): def __init__(self): self.hexagon_ops = { - 'INPUT': 'INPUT', - 'OUTPUT': 'OUTPUT', - 'Quantize': 'Quantize', - 'Dequantize': 'Dequantize', + 'Quantize': 'QuantizeINPUT_f_to_8', + 'Dequantize': 'DequantizeOUTPUT_8tof', 'Concat': 'QuantizedConcat_8', 'Conv2D': 'Supernode_8x8p32to8', 'DepthwiseConv2d': 'DepthwiseSupernode_8x8p32to8', @@ -78,6 +76,10 @@ def get_op_and_port_from_tensor(tensor_name): return op, port +def normalize_name(name): + return name.replace(':', '_') + + class HexagonConverter(base_converter.ConverterInterface): def __init__(self, option, model, quantize_activation_info): self._option = option @@ -99,7 +101,10 @@ class HexagonConverter(base_converter.ConverterInterface): self.add_input_output_node() - self._model = graph_util.sort_mace_graph(self._model, '__output__') + output_name = MaceKeyword.mace_output_node_name + '_' \ + + self._option.output_nodes.values()[0].name + output_name = normalize_name(output_name) + self._model = graph_util.sort_mace_graph(self._model, output_name) self.add_node_id() @@ -293,23 +298,9 @@ class HexagonConverter(base_converter.ConverterInterface): def add_input_output_node(self): input_node = self._option.input_nodes.values()[0] - op_def = self._model.op.add() - op_def.name = '__input__' - op_def.type = 'INPUT' - shape = op_def.output_shape.add() - shape.dims.extend(input_node.shape) - op_def.output_type.extend([mace_pb2.DT_FLOAT]) - out_max_byte_size = reduce(mul, shape.dims) - op_def.out_max_byte_size.extend([out_max_byte_size]) for op in self._model.op: if op.name == input_node.name: del op.input[0] - input_name = op_def.name + ':0' - op.input.extend([input_name]) - self._consts[input_name] = \ - self._quantize_activation_info[input_node.name] - self.add_min_max_const_node(op, input_name) - del self._consts[input_name] break output_node = None @@ -317,6 +308,7 @@ class HexagonConverter(base_converter.ConverterInterface): output_name = self._option.output_nodes.values()[0].name else: output_name = self._option.check_nodes.values()[0].name + output_name = normalize_name(output_name) for op in self._model.op: if op.name.startswith(MaceKeyword.mace_output_node_name) \ and op.name.find(output_name) != -1: @@ -324,10 +316,9 @@ class HexagonConverter(base_converter.ConverterInterface): break mace_check(output_node is not None, "mace_output_node_* not found.") - op_def = self._model.op.add() - op_def.name = '__output__' - op_def.type = 'OUTPUT' - op_def.input.extend([get_tensor_name_from_op(output_node.name, 0)]) + del output_node.output_shape[:] + del output_node.output_type[:] + del output_node.out_max_byte_size[:] def add_node_id(self): node_id_counter = 0