From 6277e9e62a104ddd2f8f5e93ae764c5c992619a5 Mon Sep 17 00:00:00 2001 From: Bin Li Date: Fri, 22 Feb 2019 09:56:21 +0800 Subject: [PATCH] Support pre-quantization for hexagon --- .../hexagon/hexagon_control_wrapper.cc | 88 +++++++++++--- .../runtime/hexagon/hexagon_control_wrapper.h | 8 +- mace/core/runtime/hexagon/quantize.cc | 109 ------------------ mace/core/runtime/hexagon/quantize.h | 55 --------- mace/libmace/mace.cc | 2 +- mace/utils/quantize.h | 37 ++++++ 6 files changed, 117 insertions(+), 182 deletions(-) delete mode 100644 mace/core/runtime/hexagon/quantize.cc delete mode 100644 mace/core/runtime/hexagon/quantize.h diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc index 5e0cb772..e19d98c7 100644 --- a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc +++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc @@ -24,6 +24,7 @@ #include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" #include "mace/core/runtime/hexagon/hexagon_nn_ops.h" #include "mace/core/types.h" +#include "mace/utils/quantize.h" namespace { inline int64_t NowMicros() { @@ -247,6 +248,7 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def, input_data_types_.push_back(input_info.data_type()); num_inputs_ += 1; } + input_tensors_u8_.reserve(num_inputs_); // output info num_outputs_ = 0; @@ -264,6 +266,7 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def, << " " << output_shape[2] << " " << output_shape[3] << "\n\t type: " << output_info.data_type(); } + output_tensors_u8_.reserve(num_outputs_); int64_t t1 = NowMicros(); @@ -431,7 +434,8 @@ bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor, bool HexagonControlWrapper::ExecuteGraphNew( const std::vector &input_tensors, - std::vector *output_tensors) { + std::vector *output_tensors, + bool hexagon_quantize) { VLOG(2) << "Execute graph new: " << nn_id_; uint32_t num_inputs = static_cast(input_tensors.size()); uint32_t num_outputs = static_cast(output_tensors->size()); @@ -443,6 +447,7 @@ bool HexagonControlWrapper::ExecuteGraphNew( std::vector input_metadata(num_inputs); std::vector output_metadata(num_outputs); + // transform mace input to hexagon input for (size_t i = 0; i < num_inputs; ++i) { std::vector input_shape = input_tensors[i]->shape(); size_t index = i * NUM_METADATA; @@ -450,38 +455,81 @@ bool HexagonControlWrapper::ExecuteGraphNew( inputs[index].height = static_cast(input_shape[1]); inputs[index].width = static_cast(input_shape[2]); inputs[index].depth = static_cast(input_shape[3]); - inputs[index].data = const_cast( - reinterpret_cast(input_tensors[i]->raw_data())); - inputs[index].dataLen = static_cast(input_tensors[i]->raw_size()); - inputs[index].data_valid_len = static_cast( - input_tensors[i]->raw_size()); + if (hexagon_quantize) { + inputs[index].data = + const_cast(reinterpret_cast( + input_tensors[i]->raw_data())); + inputs[index].dataLen = static_cast(input_tensors[i]->raw_size()); + inputs[index].data_valid_len = + static_cast(input_tensors[i]->raw_size()); + input_metadata[i].Init(.0f, .0f, 1); + } else { + if (input_tensors_u8_.size() < i + 1) { + input_tensors_u8_.emplace_back(new Tensor()); + input_tensors_u8_[i]->SetDtype(DT_UINT8); + input_tensors_u8_[i]->Resize(input_shape); + } + + Quantize(*input_tensors[i], + input_tensors_u8_[i].get(), + &input_metadata[i].min_val, + &input_metadata[i].max_val); + + inputs[index].data = + const_cast(reinterpret_cast( + input_tensors_u8_[i]->raw_data())); + inputs[index].dataLen = + static_cast(input_tensors_u8_[i]->raw_size()); + inputs[index].data_valid_len = + static_cast(input_tensors_u8_[i]->raw_size()); + input_metadata[i].needs_quantization = 0; + } inputs[index].unused = 0; - input_metadata[i].Init(.0f, .0f, 1); AddInputMetadata(input_metadata[i].min_val, &inputs[index + 1]); AddInputMetadata(input_metadata[i].max_val, &inputs[index + 2]); AddInputMetadata(input_metadata[i].needs_quantization, &inputs[index + 3]); } + // transform mace output to hexagon output for (size_t i = 0; i < num_outputs; ++i) { size_t index = i * NUM_METADATA; (*output_tensors)[i]->SetDtype(output_data_types_[i]); (*output_tensors)[i]->Resize(output_shapes_[i]); - outputs[index].data = reinterpret_cast( - (*output_tensors)[i]->raw_mutable_data()); - outputs[index].dataLen = static_cast((*output_tensors)[i]->raw_size()); - output_metadata[i].Init(.0f, .0f, 1); + + if (hexagon_quantize) { + outputs[index].data = reinterpret_cast( + (*output_tensors)[i]->raw_mutable_data()); + outputs[index].dataLen = + static_cast((*output_tensors)[i]->raw_size()); + output_metadata[i].Init(.0f, .0f, 1); + } else { + if (output_tensors_u8_.size() < i + 1) { + output_tensors_u8_.emplace_back(new Tensor()); + output_tensors_u8_[i]->SetDtype(DT_UINT8); + output_tensors_u8_[i]->Resize(output_shapes_[i]); + } + + outputs[index].data = reinterpret_cast( + output_tensors_u8_[i]->raw_mutable_data()); + outputs[index].dataLen = + static_cast(output_tensors_u8_[i]->raw_size()); + output_metadata[i].Init(.0f, .0f, 0); + } + AddOutputMetadata(output_metadata[i].min_val, &outputs[index + 1]); AddOutputMetadata(output_metadata[i].max_val, &outputs[index + 2]); AddOutputMetadata(output_metadata[i].needs_quantization, &outputs[index + 3]); } + // Execute graph int res = hexagon_nn_execute_new(nn_id_, inputs.data(), num_inputs * NUM_METADATA, outputs.data(), num_outputs * NUM_METADATA); + // handle hexagon output for (size_t i = 0; i < num_outputs; ++i) { size_t index = i * NUM_METADATA; std::vector output_shape{ @@ -494,9 +542,21 @@ bool HexagonControlWrapper::ExecuteGraphNew( == output_shapes_[i][j], "wrong output shape inferred"); } - MACE_ASSERT(static_cast(outputs[index].data_valid_len) - == (*output_tensors)[i]->raw_size(), - "wrong output bytes inferred."); + + if (hexagon_quantize) { + MACE_ASSERT(static_cast(outputs[index].data_valid_len) + == (*output_tensors)[i]->raw_size(), + "wrong output bytes inferred."); + } else { + MACE_ASSERT(static_cast(outputs[index].data_valid_len) + == output_tensors_u8_[i]->raw_size(), + "wrong output bytes inferred."); + + DeQuantize(*output_tensors_u8_[i], + output_metadata[i].min_val, + output_metadata[i].max_val, + (*output_tensors)[i]); + } } return res == 0; diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.h b/mace/core/runtime/hexagon/hexagon_control_wrapper.h index 1674e6cf..c74af257 100644 --- a/mace/core/runtime/hexagon/hexagon_control_wrapper.h +++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.h @@ -15,9 +15,9 @@ #ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROL_WRAPPER_H_ #define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROL_WRAPPER_H_ +#include #include -#include "mace/core/runtime/hexagon/quantize.h" #include "mace/core/tensor.h" #include "mace/public/mace.h" #include "third_party/nnlib/hexagon_nn.h" @@ -34,7 +34,8 @@ class HexagonControlWrapper { bool SetupGraph(const NetDef &net_def, const unsigned char *model_data); bool ExecuteGraph(const Tensor &input_tensor, Tensor *output_tensor); bool ExecuteGraphNew(const std::vector &input_tensors, - std::vector *output_tensors); + std::vector *output_tensors, + bool hexagon_quantize); bool TeardownGraph(); void PrintLog(); @@ -50,7 +51,6 @@ class HexagonControlWrapper { inline uint32_t node_id(uint32_t nodeid) { return NODE_ID_OFFSET + nodeid; } int nn_id_; - Quantizer quantizer_; std::vector> input_shapes_; std::vector> output_shapes_; @@ -58,6 +58,8 @@ class HexagonControlWrapper { std::vector output_data_types_; uint32_t num_inputs_; uint32_t num_outputs_; + std::vector> input_tensors_u8_; + std::vector> output_tensors_u8_; MACE_DISABLE_COPY_AND_ASSIGN(HexagonControlWrapper); }; diff --git a/mace/core/runtime/hexagon/quantize.cc b/mace/core/runtime/hexagon/quantize.cc deleted file mode 100644 index 31a62288..00000000 --- a/mace/core/runtime/hexagon/quantize.cc +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright 2018 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include "mace/core/runtime/hexagon/quantize.h" - -namespace mace { - -void Quantizer::Quantize(const Tensor &in_tensor, - Tensor *out_tensor, - float *min_out, - float *max_out) { - if (in_tensor.size() == 0) return; - const float *in_data = in_tensor.data(); - float min_in = in_data[0]; - float max_in = in_data[0]; - for (index_t i = 0; i < in_tensor.size(); ++i) { - min_in = std::min(min_in, in_data[i]); - max_in = std::max(max_in, in_data[i]); - } - Quantize(in_tensor, min_in, max_in, out_tensor, min_out, max_out); -} - -void Quantizer::Quantize(const Tensor &in_tensor, - const float min_in, - const float max_in, - Tensor *out_tensor, - float *min_out, - float *max_out) { - float stepsize; - float recip_stepsize; - QuantizeAdjustRange(min_in, max_in, min_out, max_out, &stepsize, - &recip_stepsize); - - const float *in = in_tensor.data(); - uint8_t *out = out_tensor->mutable_data(); - - for (int i = 0; i < in_tensor.size(); i++) { - const float inval = in[i]; - float ival = - static_cast((inval - *min_out) * recip_stepsize + 0.5f); - if (ival < 0) ival = 0; - if (ival > 255) ival = 255; - out[i] = static_cast(ival); - } -} - -void Quantizer::QuantizeAdjustRange(float min_in, - float max_in, - float *min_out, - float *max_out, - float *stepsize_out, - float *recip_stepsize_out) { - float minval = std::min(0.0f, min_in); - float maxval = std::max(0.0f, max_in); - float range = std::max(0.0001f, maxval - minval); - float recip_stepsize = 255.0f / range; - // make z(q0) integer - if (minval < 0.0f) { - float z = -minval * recip_stepsize; - float zi = floorf(z); - float zf = z - zi; - if (zf > 0.0001f && zf < 0.9999f) { - if (zi > 0.0f && (zi >= 254.0f || (zf - 1.0f) * minval > zf * maxval)) { - range = -255.0f * minval / zi; - maxval = minval + range; - } else { - range = 255.0f * maxval / (254.0f - zi); - minval = maxval - range; - } - recip_stepsize = 255.0f / range; - } - } - - *min_out = minval; - *max_out = maxval; - *stepsize_out = range / 255.0f; - *recip_stepsize_out = recip_stepsize; -} - -void Quantizer::DeQuantize(const Tensor &in_tensor, - const float min_in, - const float max_in, - Tensor *out_tensor) { - float range = std::max(0.0001f, max_in - min_in); - float stepsize = range / 255.0f; - - const uint8_t *in = in_tensor.data(); - float *out = out_tensor->mutable_data(); - - for (int i = 0; i < out_tensor->size(); ++i) { - out[i] = (in[i] * stepsize) + min_in; - } -} - - -} // namespace mace diff --git a/mace/core/runtime/hexagon/quantize.h b/mace/core/runtime/hexagon/quantize.h deleted file mode 100644 index f121b0d0..00000000 --- a/mace/core/runtime/hexagon/quantize.h +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2018 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_CORE_RUNTIME_HEXAGON_QUANTIZE_H_ -#define MACE_CORE_RUNTIME_HEXAGON_QUANTIZE_H_ - -#include "mace/core/tensor.h" - -namespace mace { - -class Quantizer { - public: - Quantizer() {} - ~Quantizer() {} - - void Quantize(const Tensor &in_tensor, - Tensor *out_tensor, - float *min_out, - float *max_out); - void Quantize(const Tensor &in_tensor, - const float min_in, - const float max_in, - Tensor *out_tensor, - float *min_out, - float *max_out); - void DeQuantize(const Tensor &in_tensor, - const float min_in, - const float max_in, - Tensor *out_tensor); - - private: - void QuantizeAdjustRange(float min_in, - float max_in, - float *min_out, - float *max_out, - float *stepsize, - float *recip_stepsize); - - MACE_DISABLE_COPY_AND_ASSIGN(Quantizer); -}; - -} // namespace mace - -#endif // MACE_CORE_RUNTIME_HEXAGON_QUANTIZE_H_ diff --git a/mace/libmace/mace.cc b/mace/libmace/mace.cc index ce8a1cc7..8f551154 100644 --- a/mace/libmace/mace.cc +++ b/mace/libmace/mace.cc @@ -702,7 +702,7 @@ MaceStatus MaceEngine::Impl::Run( if (device_type_ == HEXAGON) { MACE_CHECK(input_tensors.size() == 1 && output_tensors.size() == 1, "HEXAGON not support multiple inputs and outputs yet."); - hexagon_controller_->ExecuteGraphNew(input_tensors, &output_tensors); + hexagon_controller_->ExecuteGraphNew(input_tensors, &output_tensors, true); } else { #endif MACE_RETURN_IF_ERROR(net_->Run(run_metadata)); diff --git a/mace/utils/quantize.h b/mace/utils/quantize.h index 81d820cb..526d6528 100644 --- a/mace/utils/quantize.h +++ b/mace/utils/quantize.h @@ -123,6 +123,25 @@ inline void Quantize(const float *input, QuantizeWithScaleAndZeropoint(input, size, *scale, *zero_point, output); } +template +inline void Quantize(const Tensor &input, + Tensor *output, + float *min_out, + float *max_out) { + MACE_CHECK(input.size() != 0); + Tensor::MappingGuard input_guard(&input); + Tensor::MappingGuard output_guard(output); + auto *input_data = input.data(); + auto *output_data = output->mutable_data(); + float scale; + int32_t zero_point; + + Quantize(input_data, input.size(), false, output_data, &scale, &zero_point); + + *min_out = scale * (std::numeric_limits::lowest() - zero_point); + *max_out = scale * (std::numeric_limits::max() - zero_point); +} + template inline void Dequantize(const T *input, const index_t size, @@ -135,6 +154,24 @@ inline void Dequantize(const T *input, } } +template +inline void DeQuantize(const Tensor &input, + const float min_in, + const float max_in, + Tensor *output) { + MACE_CHECK(input.size() != 0); + Tensor::MappingGuard input_guard(&input); + Tensor::MappingGuard output_guard(output); + auto *input_data = input.data(); + auto *output_data = output->mutable_data(); + float scale; + int32_t zero_point; + + AdjustRange(min_in, max_in, false, &scale, &zero_point); + + Dequantize(input_data, input.size(), scale, zero_point, output_data); +} + inline void QuantizeMultiplier(double multiplier, int32_t* output_multiplier, int32_t* shift) { -- GitLab