From 07ea317d2804f8cda8444bb1a92c98f365539fc8 Mon Sep 17 00:00:00 2001 From: Bin Li Date: Wed, 10 Apr 2019 17:18:02 +0800 Subject: [PATCH] Support multiple inputs and outputs for Hexagon DSP --- .../runtime/hexagon/hexagon_control_wrapper.h | 14 ++-- .../runtime/hexagon/hexagon_dsp_wrapper.cc | 71 +++++++++++-------- .../runtime/hexagon/hexagon_dsp_wrapper.h | 6 +- .../runtime/hexagon/hexagon_hta_wrapper.cc | 50 +++++++------ .../runtime/hexagon/hexagon_hta_wrapper.h | 6 +- mace/libmace/mace.cc | 14 ++-- .../tools/converter_tool/hexagon_converter.py | 48 ++++++++----- mace/python/tools/layers_validate.py | 1 + 8 files changed, 129 insertions(+), 81 deletions(-) diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.h b/mace/core/runtime/hexagon/hexagon_control_wrapper.h index eda740f4..0ab7e3f9 100644 --- a/mace/core/runtime/hexagon/hexagon_control_wrapper.h +++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.h @@ -15,7 +15,9 @@ #ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROL_WRAPPER_H_ #define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROL_WRAPPER_H_ +#include #include +#include #include #include @@ -25,17 +27,20 @@ namespace mace { struct InOutInfo { - InOutInfo(const std::vector &shape, + InOutInfo(const std::string &name, + const std::vector &shape, const DataType data_type, const float scale, const int32_t zero_point, std::unique_ptr tensor_u8) - : shape(shape), + : name(name), + shape(shape), data_type(data_type), scale(scale), zero_point(zero_point), tensor_u8(std::move(tensor_u8)) {} + std::string name; std::vector shape; DataType data_type; float scale; @@ -56,8 +61,9 @@ class HexagonControlWrapper { const unsigned char *model_data) = 0; virtual bool ExecuteGraph(const Tensor &input_tensor, Tensor *output_tensor) = 0; - virtual bool ExecuteGraphNew(const std::vector &input_tensors, - std::vector *output_tensors) = 0; + virtual bool ExecuteGraphNew( + const std::map &input_tensors, + std::map *output_tensors) = 0; virtual bool TeardownGraph() = 0; virtual void PrintLog() = 0; virtual void PrintGraph() = 0; diff --git a/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc b/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc index a98d9ad1..0b285ee2 100644 --- a/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc +++ b/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc @@ -14,6 +14,7 @@ #include #include +#include #include #include // NOLINT(build/c++11) #include @@ -239,7 +240,8 @@ bool HexagonDSPWrapper::SetupGraph(const NetDef &net_def, while (input_shape.size() < 4) { input_shape.insert(input_shape.begin(), 1); } - input_info_.emplace_back(input_shape, + input_info_.emplace_back(input_info.name(), + input_shape, input_info.data_type(), input_info.scale(), input_info.zero_point(), @@ -255,7 +257,8 @@ bool HexagonDSPWrapper::SetupGraph(const NetDef &net_def, while (output_shape.size() < 4) { output_shape.insert(output_shape.begin(), 1); } - output_info_.emplace_back(output_shape, + output_info_.emplace_back(output_info.name(), + output_shape, output_info.data_type(), output_info.scale(), output_info.zero_point(), @@ -396,8 +399,8 @@ bool HexagonDSPWrapper::ExecuteGraph(const Tensor &input_tensor, Tensor *output_tensor) { VLOG(2) << "Execute graph: " << nn_id_; // single input and single output - MACE_ASSERT(num_inputs_ == 1, "Wrong inputs num"); - MACE_ASSERT(num_outputs_ == 1, "Wrong outputs num"); + MACE_CHECK(num_inputs_ == 1, "Wrong inputs num"); + MACE_CHECK(num_outputs_ == 1, "Wrong outputs num"); output_tensor->SetDtype(output_info_[0].data_type); output_tensor->Resize(output_info_[0].shape); std::vector output_shape(4); @@ -419,26 +422,27 @@ bool HexagonDSPWrapper::ExecuteGraph(const Tensor &input_tensor, &output_bytes); MACE_CHECK(res == 0, "execute error"); - MACE_ASSERT(output_shape.size() == output_info_[0].shape.size(), - "wrong output shape inferred"); + MACE_CHECK(output_shape.size() == output_info_[0].shape.size(), + "wrong output shape inferred"); for (size_t i = 0; i < output_shape.size(); ++i) { - MACE_ASSERT(static_cast(output_shape[i]) - == output_info_[0].shape[i], - "wrong output shape inferred"); + MACE_CHECK(static_cast(output_shape[i]) + == output_info_[0].shape[i], + "wrong output shape inferred"); } - MACE_ASSERT(output_bytes == output_tensor->raw_size(), - "wrong output bytes inferred."); + MACE_CHECK(output_bytes == output_tensor->raw_size(), + "wrong output bytes inferred."); return res == 0; } bool HexagonDSPWrapper::ExecuteGraphNew( - const std::vector &input_tensors, - std::vector *output_tensors) { + const std::map &input_tensors, + std::map *output_tensors) { VLOG(2) << "Execute graph new: " << nn_id_; uint32_t num_inputs = static_cast(input_tensors.size()); uint32_t num_outputs = static_cast(output_tensors->size()); - MACE_ASSERT(num_inputs_ == num_inputs, "Wrong inputs num"); - MACE_ASSERT(num_outputs_ == num_outputs, "Wrong outputs num"); + MACE_CHECK(num_inputs_ == static_cast(num_inputs), "Wrong inputs num"); + MACE_CHECK(num_outputs_ == static_cast(num_outputs), + "Wrong outputs num"); std::vector inputs(num_inputs * kNumMetaData); std::vector outputs(num_outputs * kNumMetaData); @@ -447,17 +451,18 @@ bool HexagonDSPWrapper::ExecuteGraphNew( // transform mace input to hexagon input for (size_t i = 0; i < num_inputs; ++i) { - std::vector input_shape = input_tensors[i]->shape(); + const auto input_tensor = input_tensors.at(input_info_[i].name); + const auto &input_shape = input_tensor->shape(); size_t index = i * kNumMetaData; inputs[index].batches = static_cast(input_shape[0]); inputs[index].height = static_cast(input_shape[1]); inputs[index].width = static_cast(input_shape[2]); inputs[index].depth = static_cast(input_shape[3]); inputs[index].data = const_cast( - reinterpret_cast(input_tensors[i]->raw_data())); - inputs[index].dataLen = static_cast(input_tensors[i]->raw_size()); + reinterpret_cast(input_tensor->raw_data())); + inputs[index].dataLen = static_cast(input_tensor->raw_size()); inputs[index].data_valid_len = - static_cast(input_tensors[i]->raw_size()); + static_cast(input_tensor->raw_size()); inputs[index].unused = 0; input_metadata[i].Init(.0f, .0f, 1); AddInputMetadata(input_metadata[i].min_val, &inputs[index + 1]); @@ -467,13 +472,14 @@ bool HexagonDSPWrapper::ExecuteGraphNew( // transform mace output to hexagon output for (size_t i = 0; i < num_outputs; ++i) { + auto output_tensor = output_tensors->at(output_info_[i].name); size_t index = i * kNumMetaData; - (*output_tensors)[i]->SetDtype(output_info_[i].data_type); - (*output_tensors)[i]->Resize(output_info_[i].shape); + output_tensor->SetDtype(output_info_[i].data_type); + output_tensor->Resize(output_info_[i].shape); outputs[index].data = reinterpret_cast( - (*output_tensors)[i]->raw_mutable_data()); - outputs[index].dataLen = static_cast((*output_tensors)[i]->raw_size()); + output_tensor->raw_mutable_data()); + outputs[index].dataLen = static_cast(output_tensor->raw_size()); output_metadata[i].Init(.0f, .0f, 1); AddOutputMetadata(output_metadata[i].min_val, &outputs[index + 1]); @@ -495,17 +501,20 @@ bool HexagonDSPWrapper::ExecuteGraphNew( std::vector output_shape{ outputs[index].batches, outputs[index].height, outputs[index].width, outputs[index].depth}; - MACE_ASSERT(output_shape.size() == output_info_[i].shape.size(), + MACE_CHECK(output_shape.size() == output_info_[i].shape.size(), + output_shape.size(), " vs ", output_info_[i].shape.size(), "wrong output shape inferred"); for (size_t j = 0; j < output_shape.size(); ++j) { - MACE_ASSERT(static_cast(output_shape[j]) - == output_info_[i].shape[j], - "wrong output shape inferred"); + MACE_CHECK(static_cast(output_shape[j]) + == output_info_[i].shape[j], + output_shape[j], " vs ", output_info_[i].shape[j], + "wrong output shape inferred"); } - - MACE_ASSERT(static_cast(outputs[index].data_valid_len) - == (*output_tensors)[i]->raw_size(), - "wrong output bytes inferred."); + auto output_tensor = output_tensors->at(output_info_[i].name); + MACE_CHECK(static_cast(outputs[index].data_valid_len) + == output_tensor->raw_size(), + outputs[index].data_valid_len, " vs ", output_tensor->raw_size(), + " wrong output bytes inferred."); } return res == 0; diff --git a/mace/core/runtime/hexagon/hexagon_dsp_wrapper.h b/mace/core/runtime/hexagon/hexagon_dsp_wrapper.h index 2c46414b..f0877592 100644 --- a/mace/core/runtime/hexagon/hexagon_dsp_wrapper.h +++ b/mace/core/runtime/hexagon/hexagon_dsp_wrapper.h @@ -15,6 +15,8 @@ #ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DSP_WRAPPER_H_ #define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DSP_WRAPPER_H_ +#include +#include #include #include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" @@ -35,8 +37,8 @@ class HexagonDSPWrapper : public HexagonControlWrapper { const unsigned char *model_data) override; bool ExecuteGraph(const Tensor &input_tensor, Tensor *output_tensor) override; - bool ExecuteGraphNew(const std::vector &input_tensors, - std::vector *output_tensors) override; + bool ExecuteGraphNew(const std::map &input_tensors, + std::map *output_tensors) override; bool TeardownGraph() override; void PrintLog() override; void PrintGraph() override; diff --git a/mace/core/runtime/hexagon/hexagon_hta_wrapper.cc b/mace/core/runtime/hexagon/hexagon_hta_wrapper.cc index e3754f19..c4191e7f 100644 --- a/mace/core/runtime/hexagon/hexagon_hta_wrapper.cc +++ b/mace/core/runtime/hexagon/hexagon_hta_wrapper.cc @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -160,7 +161,8 @@ bool HexagonHTAWrapper::SetupGraph(const NetDef &net_def, while (input_shape.size() < 4) { input_shape.insert(input_shape.begin(), 1); } - input_info_.emplace_back(input_shape, + input_info_.emplace_back(input_info.name(), + input_shape, input_info.data_type(), input_info.scale(), input_info.zero_point(), @@ -176,7 +178,8 @@ bool HexagonHTAWrapper::SetupGraph(const NetDef &net_def, while (output_shape.size() < 4) { output_shape.insert(output_shape.begin(), 1); } - output_info_.emplace_back(output_shape, + output_info_.emplace_back(output_info.name(), + output_shape, output_info.data_type(), output_info.scale(), output_info.zero_point(), @@ -234,19 +237,21 @@ bool HexagonHTAWrapper::ExecuteGraph(const Tensor &input_tensor, } bool HexagonHTAWrapper::ExecuteGraphNew( - const std::vector &input_tensors, - std::vector *output_tensors) { + const std::map &input_tensors, + std::map *output_tensors) { VLOG(2) << "Execute graph new: " << nn_id_; uint32_t num_inputs = static_cast(input_tensors.size()); uint32_t num_outputs = static_cast(output_tensors->size()); - MACE_ASSERT(num_inputs_ == num_inputs, "Wrong inputs num"); - MACE_ASSERT(num_outputs_ == num_outputs, "Wrong outputs num"); + MACE_CHECK(num_inputs_ == static_cast(num_inputs), "Wrong inputs num"); + MACE_CHECK(num_outputs_ == static_cast(num_outputs), + "Wrong outputs num"); std::vector inputs(num_inputs); std::vector outputs(num_outputs); for (size_t i = 0; i < num_inputs; ++i) { - std::vector input_shape = input_tensors[i]->shape(); + const auto input_tensor = input_tensors.at(input_info_[i].name); + const auto &input_shape = input_tensor->shape(); inputs[i].batches = static_cast(input_shape[0]); inputs[i].height = static_cast(input_shape[1]); inputs[i].width = static_cast(input_shape[2]); @@ -254,10 +259,10 @@ bool HexagonHTAWrapper::ExecuteGraphNew( input_info_[i].tensor_u8->SetDtype(DT_UINT8); input_info_[i].tensor_u8->Resize(input_shape); - const float *input_data = input_tensors[i]->data(); + const float *input_data = input_tensor->data(); uint8_t *input_data_u8 = input_info_[i].tensor_u8->mutable_data(); QuantizeWithScaleAndZeropoint(input_data, - input_tensors[i]->size(), + input_tensor->size(), input_info_[i].scale, input_info_[i].zero_point, input_data_u8); @@ -272,8 +277,9 @@ bool HexagonHTAWrapper::ExecuteGraphNew( } for (size_t i = 0; i < num_outputs; ++i) { - (*output_tensors)[i]->SetDtype(output_info_[i].data_type); - (*output_tensors)[i]->Resize(output_info_[i].shape); + auto output_tensor = output_tensors->at(output_info_[i].name); + output_tensor->SetDtype(output_info_[i].data_type); + output_tensor->Resize(output_info_[i].shape); output_info_[i].tensor_u8->SetDtype(DT_UINT8); output_info_[i].tensor_u8->Resize(output_info_[i].shape); outputs[i].data = reinterpret_cast( @@ -292,19 +298,23 @@ bool HexagonHTAWrapper::ExecuteGraphNew( std::vector output_shape{ outputs[i].batches, outputs[i].height, outputs[i].width, outputs[i].depth}; - MACE_ASSERT(output_shape.size() == output_info_[i].shape.size(), - "wrong output shape inferred"); + MACE_CHECK(output_shape.size() == output_info_[i].shape.size(), + output_shape.size(), " vs ", output_info_[i].shape.size(), + "wrong output shape inferred"); for (size_t j = 0; j < output_shape.size(); ++j) { - MACE_ASSERT(static_cast(output_shape[j]) - == output_info_[i].shape[j], - "wrong output shape inferred"); + MACE_CHECK(static_cast(output_shape[j]) + == output_info_[i].shape[j], + output_shape[j], " vs ", output_info_[i].shape[j], + "wrong output shape inferred"); } - MACE_ASSERT(static_cast(outputs[i].data_valid_len) - == (*output_tensors)[i]->raw_size(), - "wrong output bytes inferred."); + auto output_tensor = output_tensors->at(output_info_[i].name); + MACE_CHECK(static_cast(outputs[i].data_valid_len) + == output_tensor->raw_size(), + outputs[i].data_valid_len, " vs ", output_tensor->raw_size(), + " wrong output bytes inferred."); const uint8_t *output_data_u8 = output_info_[i].tensor_u8->data(); - float *output_data = (*output_tensors)[i]->mutable_data(); + float *output_data = output_tensor->mutable_data(); Dequantize(output_data_u8, output_info_[i].tensor_u8->size(), output_info_[i].scale, diff --git a/mace/core/runtime/hexagon/hexagon_hta_wrapper.h b/mace/core/runtime/hexagon/hexagon_hta_wrapper.h index 26ea17bd..66d02e02 100644 --- a/mace/core/runtime/hexagon/hexagon_hta_wrapper.h +++ b/mace/core/runtime/hexagon/hexagon_hta_wrapper.h @@ -15,6 +15,8 @@ #ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_HTA_WRAPPER_H_ #define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_HTA_WRAPPER_H_ +#include +#include #include #include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" @@ -35,8 +37,8 @@ class HexagonHTAWrapper : public HexagonControlWrapper { const unsigned char *model_data) override; bool ExecuteGraph(const Tensor &input_tensor, Tensor *output_tensor) override; - bool ExecuteGraphNew(const std::vector &input_tensors, - std::vector *output_tensors) override; + bool ExecuteGraphNew(const std::map &input_tensors, + std::map *output_tensors) override; bool TeardownGraph() override; void PrintLog() override; void PrintGraph() override; diff --git a/mace/libmace/mace.cc b/mace/libmace/mace.cc index 0a44eb97..da43f5e2 100644 --- a/mace/libmace/mace.cc +++ b/mace/libmace/mace.cc @@ -736,8 +736,8 @@ MaceStatus MaceEngine::Impl::Run( std::map *outputs, RunMetadata *run_metadata) { MACE_CHECK_NOTNULL(outputs); - std::vector input_tensors; - std::vector output_tensors; + std::map input_tensors; + std::map output_tensors; for (auto &input : inputs) { if (input_info_map_.find(input.first) == input_info_map_.end()) { LOG(FATAL) << "'" << input.first @@ -746,7 +746,7 @@ MaceStatus MaceEngine::Impl::Run( } Tensor *input_tensor = ws_->GetTensor(input.first); MACE_RETURN_IF_ERROR(TransposeInput(input, input_tensor)); - input_tensors.push_back(input_tensor); + input_tensors[input.first] = input_tensor; } for (auto &output : *outputs) { if (output_info_map_.find(output.first) == output_info_map_.end()) { @@ -755,12 +755,14 @@ MaceStatus MaceEngine::Impl::Run( << MakeString(MapKeys(output_info_map_)); } Tensor *output_tensor = ws_->GetTensor(output.first); - output_tensors.push_back(output_tensor); + output_tensors[output.first] = output_tensor; } #if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA) if (device_type_ == HEXAGON || device_type_ == HTA) { - MACE_CHECK(input_tensors.size() == 1 && output_tensors.size() == 1, - "HEXAGON not support multiple inputs and outputs yet."); + if (device_type_ == HTA) { + MACE_CHECK(input_tensors.size() == 1 && output_tensors.size() == 1, + "HTA not support multiple inputs and outputs yet."); + } hexagon_controller_->ExecuteGraphNew(input_tensors, &output_tensors); } else { #endif diff --git a/mace/python/tools/converter_tool/hexagon_converter.py b/mace/python/tools/converter_tool/hexagon_converter.py index 53598243..4f642abf 100644 --- a/mace/python/tools/converter_tool/hexagon_converter.py +++ b/mace/python/tools/converter_tool/hexagon_converter.py @@ -119,9 +119,10 @@ class HexagonConverter(base_converter.ConverterInterface): self._quantize_activation_info = quantize_activation_info def run(self): - mace_check(len(self._option.input_nodes) == 1 - and len(self._option.output_nodes) == 1, - 'dsp only support single input and output') + if self._option.device == DeviceType.HTA.value: + mace_check(len(self._option.input_nodes) == 1 + and len(self._option.output_nodes) == 1, + 'hta only support single input and output') for tensor in self._model.tensors: self._consts[tensor.name] = tensor @@ -129,13 +130,7 @@ class HexagonConverter(base_converter.ConverterInterface): # convert op node self.convert_ops() - self.add_input_output_node() - if not self._option.check_nodes: - output_name = list(self._option.output_nodes.values())[0].name - else: - output_name = list(self._option.check_nodes.values())[0].name - output_name = normalize_name(output_name) - self._model = graph_util.sort_mace_graph(self._model, output_name) + self.convert_input_output_node() self.add_node_id() @@ -399,21 +394,42 @@ class HexagonConverter(base_converter.ConverterInterface): elif op.input[i] == input_op + ':2': op.input[i] = input_max - def add_input_output_node(self): + def convert_input_output_node(self): + quantize_input_op = self._model.op[0] mace_check( - self._model.op[0].type == HexagonOp.QuantizeINPUT_f_to_8.name, + quantize_input_op.type == HexagonOp.QuantizeINPUT_f_to_8.name, "Not started with Quantize op.") - quantize_input_op = self._model.op[0] del quantize_input_op.input[:] - mace_check( - self._model.op[-1].type == HexagonOp.DequantizeOUTPUT_8tof.name, - "Not ended with Dequantize op.") dequantize_output_op = self._model.op[-1] + mace_check(dequantize_output_op.type + == HexagonOp.DequantizeOUTPUT_8tof.name, + "Not ended with Dequantize op.") + dequantize_input = [input for input in dequantize_output_op.input] + del dequantize_output_op.input[:] del dequantize_output_op.output_shape[:] del dequantize_output_op.output_type[:] del dequantize_output_op.out_max_byte_size[:] + index = 1 + while index < len(self._model.op) - 1: + op = self._model.op[index] + if op.type == HexagonOp.QuantizeINPUT_f_to_8.name: + quantize_input_op.output.extend(op.output) + quantize_input_op.output_shape.extend(op.output_shape) + quantize_input_op.output_type.extend(op.output_type) + quantize_input_op.out_max_byte_size.extend( + op.out_max_byte_size) + del self._model.op[index] + + elif op.type == HexagonOp.DequantizeOUTPUT_8tof.name: + dequantize_output_op.input.extend(op.input) + del self._model.op[index] + + index += 1 + # input order matters + dequantize_output_op.input.extend(dequantize_input) + if self._option.device == DeviceType.HTA.value: # replace QuantizeINPUT_f_to_8 with INPUT quantize_input_op.type = HexagonOp.INPUT.name diff --git a/mace/python/tools/layers_validate.py b/mace/python/tools/layers_validate.py index 3316580b..32316c4e 100644 --- a/mace/python/tools/layers_validate.py +++ b/mace/python/tools/layers_validate.py @@ -160,6 +160,7 @@ def main(unused_args): dequantize_op.node_input[0].node_id = op.node_id dequantize_op.node_input[1].node_id = op.node_id dequantize_op.node_input[2].node_id = op.node_id + del dequantize_op.node_input[3:] model_path = save_model_to_proto(net, normalize_op_name(op_name), FLAGS.output_dir) -- GitLab