提交 4b69b083 编写于 作者: 李寅

Merge branch 'pre_quant' into 'master'

Support pre-quantization for hexagon

See merge request !992
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" #include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
#include "mace/core/runtime/hexagon/hexagon_nn_ops.h" #include "mace/core/runtime/hexagon/hexagon_nn_ops.h"
#include "mace/core/types.h" #include "mace/core/types.h"
#include "mace/utils/quantize.h"
namespace { namespace {
inline int64_t NowMicros() { inline int64_t NowMicros() {
...@@ -247,6 +248,7 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def, ...@@ -247,6 +248,7 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def,
input_data_types_.push_back(input_info.data_type()); input_data_types_.push_back(input_info.data_type());
num_inputs_ += 1; num_inputs_ += 1;
} }
input_tensors_u8_.reserve(num_inputs_);
// output info // output info
num_outputs_ = 0; num_outputs_ = 0;
...@@ -264,6 +266,7 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def, ...@@ -264,6 +266,7 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def,
<< " " << output_shape[2] << " " << output_shape[3] << " " << output_shape[2] << " " << output_shape[3]
<< "\n\t type: " << output_info.data_type(); << "\n\t type: " << output_info.data_type();
} }
output_tensors_u8_.reserve(num_outputs_);
int64_t t1 = NowMicros(); int64_t t1 = NowMicros();
...@@ -431,7 +434,8 @@ bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor, ...@@ -431,7 +434,8 @@ bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor,
bool HexagonControlWrapper::ExecuteGraphNew( bool HexagonControlWrapper::ExecuteGraphNew(
const std::vector<Tensor *> &input_tensors, const std::vector<Tensor *> &input_tensors,
std::vector<Tensor *> *output_tensors) { std::vector<Tensor *> *output_tensors,
bool hexagon_quantize) {
VLOG(2) << "Execute graph new: " << nn_id_; VLOG(2) << "Execute graph new: " << nn_id_;
uint32_t num_inputs = static_cast<uint32_t>(input_tensors.size()); uint32_t num_inputs = static_cast<uint32_t>(input_tensors.size());
uint32_t num_outputs = static_cast<uint32_t>(output_tensors->size()); uint32_t num_outputs = static_cast<uint32_t>(output_tensors->size());
...@@ -443,6 +447,7 @@ bool HexagonControlWrapper::ExecuteGraphNew( ...@@ -443,6 +447,7 @@ bool HexagonControlWrapper::ExecuteGraphNew(
std::vector<InputOutputMetadata> input_metadata(num_inputs); std::vector<InputOutputMetadata> input_metadata(num_inputs);
std::vector<InputOutputMetadata> output_metadata(num_outputs); std::vector<InputOutputMetadata> output_metadata(num_outputs);
// transform mace input to hexagon input
for (size_t i = 0; i < num_inputs; ++i) { for (size_t i = 0; i < num_inputs; ++i) {
std::vector<index_t> input_shape = input_tensors[i]->shape(); std::vector<index_t> input_shape = input_tensors[i]->shape();
size_t index = i * NUM_METADATA; size_t index = i * NUM_METADATA;
...@@ -450,38 +455,81 @@ bool HexagonControlWrapper::ExecuteGraphNew( ...@@ -450,38 +455,81 @@ bool HexagonControlWrapper::ExecuteGraphNew(
inputs[index].height = static_cast<uint32_t>(input_shape[1]); inputs[index].height = static_cast<uint32_t>(input_shape[1]);
inputs[index].width = static_cast<uint32_t>(input_shape[2]); inputs[index].width = static_cast<uint32_t>(input_shape[2]);
inputs[index].depth = static_cast<uint32_t>(input_shape[3]); inputs[index].depth = static_cast<uint32_t>(input_shape[3]);
inputs[index].data = const_cast<unsigned char *>( if (hexagon_quantize) {
reinterpret_cast<const unsigned char *>(input_tensors[i]->raw_data())); inputs[index].data =
inputs[index].dataLen = static_cast<int>(input_tensors[i]->raw_size()); const_cast<unsigned char *>(reinterpret_cast<const unsigned char *>(
inputs[index].data_valid_len = static_cast<uint32_t>( input_tensors[i]->raw_data()));
input_tensors[i]->raw_size()); inputs[index].dataLen = static_cast<int>(input_tensors[i]->raw_size());
inputs[index].data_valid_len =
static_cast<uint32_t>(input_tensors[i]->raw_size());
input_metadata[i].Init(.0f, .0f, 1);
} else {
if (input_tensors_u8_.size() < i + 1) {
input_tensors_u8_.emplace_back(new Tensor());
input_tensors_u8_[i]->SetDtype(DT_UINT8);
input_tensors_u8_[i]->Resize(input_shape);
}
Quantize<uint8_t>(*input_tensors[i],
input_tensors_u8_[i].get(),
&input_metadata[i].min_val,
&input_metadata[i].max_val);
inputs[index].data =
const_cast<unsigned char *>(reinterpret_cast<const unsigned char *>(
input_tensors_u8_[i]->raw_data()));
inputs[index].dataLen =
static_cast<int>(input_tensors_u8_[i]->raw_size());
inputs[index].data_valid_len =
static_cast<uint32_t>(input_tensors_u8_[i]->raw_size());
input_metadata[i].needs_quantization = 0;
}
inputs[index].unused = 0; inputs[index].unused = 0;
input_metadata[i].Init(.0f, .0f, 1);
AddInputMetadata(input_metadata[i].min_val, &inputs[index + 1]); AddInputMetadata(input_metadata[i].min_val, &inputs[index + 1]);
AddInputMetadata(input_metadata[i].max_val, &inputs[index + 2]); AddInputMetadata(input_metadata[i].max_val, &inputs[index + 2]);
AddInputMetadata(input_metadata[i].needs_quantization, &inputs[index + 3]); AddInputMetadata(input_metadata[i].needs_quantization, &inputs[index + 3]);
} }
// transform mace output to hexagon output
for (size_t i = 0; i < num_outputs; ++i) { for (size_t i = 0; i < num_outputs; ++i) {
size_t index = i * NUM_METADATA; size_t index = i * NUM_METADATA;
(*output_tensors)[i]->SetDtype(output_data_types_[i]); (*output_tensors)[i]->SetDtype(output_data_types_[i]);
(*output_tensors)[i]->Resize(output_shapes_[i]); (*output_tensors)[i]->Resize(output_shapes_[i]);
outputs[index].data = reinterpret_cast<unsigned char *>(
(*output_tensors)[i]->raw_mutable_data()); if (hexagon_quantize) {
outputs[index].dataLen = static_cast<int>((*output_tensors)[i]->raw_size()); outputs[index].data = reinterpret_cast<unsigned char *>(
output_metadata[i].Init(.0f, .0f, 1); (*output_tensors)[i]->raw_mutable_data());
outputs[index].dataLen =
static_cast<int>((*output_tensors)[i]->raw_size());
output_metadata[i].Init(.0f, .0f, 1);
} else {
if (output_tensors_u8_.size() < i + 1) {
output_tensors_u8_.emplace_back(new Tensor());
output_tensors_u8_[i]->SetDtype(DT_UINT8);
output_tensors_u8_[i]->Resize(output_shapes_[i]);
}
outputs[index].data = reinterpret_cast<unsigned char *>(
output_tensors_u8_[i]->raw_mutable_data());
outputs[index].dataLen =
static_cast<int>(output_tensors_u8_[i]->raw_size());
output_metadata[i].Init(.0f, .0f, 0);
}
AddOutputMetadata(output_metadata[i].min_val, &outputs[index + 1]); AddOutputMetadata(output_metadata[i].min_val, &outputs[index + 1]);
AddOutputMetadata(output_metadata[i].max_val, &outputs[index + 2]); AddOutputMetadata(output_metadata[i].max_val, &outputs[index + 2]);
AddOutputMetadata(output_metadata[i].needs_quantization, AddOutputMetadata(output_metadata[i].needs_quantization,
&outputs[index + 3]); &outputs[index + 3]);
} }
// Execute graph
int res = hexagon_nn_execute_new(nn_id_, int res = hexagon_nn_execute_new(nn_id_,
inputs.data(), inputs.data(),
num_inputs * NUM_METADATA, num_inputs * NUM_METADATA,
outputs.data(), outputs.data(),
num_outputs * NUM_METADATA); num_outputs * NUM_METADATA);
// handle hexagon output
for (size_t i = 0; i < num_outputs; ++i) { for (size_t i = 0; i < num_outputs; ++i) {
size_t index = i * NUM_METADATA; size_t index = i * NUM_METADATA;
std::vector<uint32_t> output_shape{ std::vector<uint32_t> output_shape{
...@@ -494,9 +542,21 @@ bool HexagonControlWrapper::ExecuteGraphNew( ...@@ -494,9 +542,21 @@ bool HexagonControlWrapper::ExecuteGraphNew(
== output_shapes_[i][j], == output_shapes_[i][j],
"wrong output shape inferred"); "wrong output shape inferred");
} }
MACE_ASSERT(static_cast<index_t>(outputs[index].data_valid_len)
== (*output_tensors)[i]->raw_size(), if (hexagon_quantize) {
"wrong output bytes inferred."); MACE_ASSERT(static_cast<index_t>(outputs[index].data_valid_len)
== (*output_tensors)[i]->raw_size(),
"wrong output bytes inferred.");
} else {
MACE_ASSERT(static_cast<index_t>(outputs[index].data_valid_len)
== output_tensors_u8_[i]->raw_size(),
"wrong output bytes inferred.");
DeQuantize<uint8_t>(*output_tensors_u8_[i],
output_metadata[i].min_val,
output_metadata[i].max_val,
(*output_tensors)[i]);
}
} }
return res == 0; return res == 0;
......
...@@ -15,9 +15,9 @@ ...@@ -15,9 +15,9 @@
#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROL_WRAPPER_H_ #ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROL_WRAPPER_H_
#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROL_WRAPPER_H_ #define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROL_WRAPPER_H_
#include <memory>
#include <vector> #include <vector>
#include "mace/core/runtime/hexagon/quantize.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "third_party/nnlib/hexagon_nn.h" #include "third_party/nnlib/hexagon_nn.h"
...@@ -34,7 +34,8 @@ class HexagonControlWrapper { ...@@ -34,7 +34,8 @@ class HexagonControlWrapper {
bool SetupGraph(const NetDef &net_def, const unsigned char *model_data); bool SetupGraph(const NetDef &net_def, const unsigned char *model_data);
bool ExecuteGraph(const Tensor &input_tensor, Tensor *output_tensor); bool ExecuteGraph(const Tensor &input_tensor, Tensor *output_tensor);
bool ExecuteGraphNew(const std::vector<Tensor *> &input_tensors, bool ExecuteGraphNew(const std::vector<Tensor *> &input_tensors,
std::vector<Tensor *> *output_tensors); std::vector<Tensor *> *output_tensors,
bool hexagon_quantize);
bool TeardownGraph(); bool TeardownGraph();
void PrintLog(); void PrintLog();
...@@ -50,7 +51,6 @@ class HexagonControlWrapper { ...@@ -50,7 +51,6 @@ class HexagonControlWrapper {
inline uint32_t node_id(uint32_t nodeid) { return NODE_ID_OFFSET + nodeid; } inline uint32_t node_id(uint32_t nodeid) { return NODE_ID_OFFSET + nodeid; }
int nn_id_; int nn_id_;
Quantizer quantizer_;
std::vector<std::vector<index_t>> input_shapes_; std::vector<std::vector<index_t>> input_shapes_;
std::vector<std::vector<index_t>> output_shapes_; std::vector<std::vector<index_t>> output_shapes_;
...@@ -58,6 +58,8 @@ class HexagonControlWrapper { ...@@ -58,6 +58,8 @@ class HexagonControlWrapper {
std::vector<DataType> output_data_types_; std::vector<DataType> output_data_types_;
uint32_t num_inputs_; uint32_t num_inputs_;
uint32_t num_outputs_; uint32_t num_outputs_;
std::vector<std::unique_ptr<Tensor>> input_tensors_u8_;
std::vector<std::unique_ptr<Tensor>> output_tensors_u8_;
MACE_DISABLE_COPY_AND_ASSIGN(HexagonControlWrapper); MACE_DISABLE_COPY_AND_ASSIGN(HexagonControlWrapper);
}; };
......
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include "mace/core/runtime/hexagon/quantize.h"
namespace mace {
void Quantizer::Quantize(const Tensor &in_tensor,
Tensor *out_tensor,
float *min_out,
float *max_out) {
if (in_tensor.size() == 0) return;
const float *in_data = in_tensor.data<float>();
float min_in = in_data[0];
float max_in = in_data[0];
for (index_t i = 0; i < in_tensor.size(); ++i) {
min_in = std::min(min_in, in_data[i]);
max_in = std::max(max_in, in_data[i]);
}
Quantize(in_tensor, min_in, max_in, out_tensor, min_out, max_out);
}
void Quantizer::Quantize(const Tensor &in_tensor,
const float min_in,
const float max_in,
Tensor *out_tensor,
float *min_out,
float *max_out) {
float stepsize;
float recip_stepsize;
QuantizeAdjustRange(min_in, max_in, min_out, max_out, &stepsize,
&recip_stepsize);
const float *in = in_tensor.data<float>();
uint8_t *out = out_tensor->mutable_data<uint8_t>();
for (int i = 0; i < in_tensor.size(); i++) {
const float inval = in[i];
float ival =
static_cast<uint8_t>((inval - *min_out) * recip_stepsize + 0.5f);
if (ival < 0) ival = 0;
if (ival > 255) ival = 255;
out[i] = static_cast<uint8_t>(ival);
}
}
void Quantizer::QuantizeAdjustRange(float min_in,
float max_in,
float *min_out,
float *max_out,
float *stepsize_out,
float *recip_stepsize_out) {
float minval = std::min(0.0f, min_in);
float maxval = std::max(0.0f, max_in);
float range = std::max(0.0001f, maxval - minval);
float recip_stepsize = 255.0f / range;
// make z(q0) integer
if (minval < 0.0f) {
float z = -minval * recip_stepsize;
float zi = floorf(z);
float zf = z - zi;
if (zf > 0.0001f && zf < 0.9999f) {
if (zi > 0.0f && (zi >= 254.0f || (zf - 1.0f) * minval > zf * maxval)) {
range = -255.0f * minval / zi;
maxval = minval + range;
} else {
range = 255.0f * maxval / (254.0f - zi);
minval = maxval - range;
}
recip_stepsize = 255.0f / range;
}
}
*min_out = minval;
*max_out = maxval;
*stepsize_out = range / 255.0f;
*recip_stepsize_out = recip_stepsize;
}
void Quantizer::DeQuantize(const Tensor &in_tensor,
const float min_in,
const float max_in,
Tensor *out_tensor) {
float range = std::max(0.0001f, max_in - min_in);
float stepsize = range / 255.0f;
const uint8_t *in = in_tensor.data<uint8_t>();
float *out = out_tensor->mutable_data<float>();
for (int i = 0; i < out_tensor->size(); ++i) {
out[i] = (in[i] * stepsize) + min_in;
}
}
} // namespace mace
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_CORE_RUNTIME_HEXAGON_QUANTIZE_H_
#define MACE_CORE_RUNTIME_HEXAGON_QUANTIZE_H_
#include "mace/core/tensor.h"
namespace mace {
class Quantizer {
public:
Quantizer() {}
~Quantizer() {}
void Quantize(const Tensor &in_tensor,
Tensor *out_tensor,
float *min_out,
float *max_out);
void Quantize(const Tensor &in_tensor,
const float min_in,
const float max_in,
Tensor *out_tensor,
float *min_out,
float *max_out);
void DeQuantize(const Tensor &in_tensor,
const float min_in,
const float max_in,
Tensor *out_tensor);
private:
void QuantizeAdjustRange(float min_in,
float max_in,
float *min_out,
float *max_out,
float *stepsize,
float *recip_stepsize);
MACE_DISABLE_COPY_AND_ASSIGN(Quantizer);
};
} // namespace mace
#endif // MACE_CORE_RUNTIME_HEXAGON_QUANTIZE_H_
...@@ -702,7 +702,7 @@ MaceStatus MaceEngine::Impl::Run( ...@@ -702,7 +702,7 @@ MaceStatus MaceEngine::Impl::Run(
if (device_type_ == HEXAGON) { if (device_type_ == HEXAGON) {
MACE_CHECK(input_tensors.size() == 1 && output_tensors.size() == 1, MACE_CHECK(input_tensors.size() == 1 && output_tensors.size() == 1,
"HEXAGON not support multiple inputs and outputs yet."); "HEXAGON not support multiple inputs and outputs yet.");
hexagon_controller_->ExecuteGraphNew(input_tensors, &output_tensors); hexagon_controller_->ExecuteGraphNew(input_tensors, &output_tensors, true);
} else { } else {
#endif #endif
MACE_RETURN_IF_ERROR(net_->Run(run_metadata)); MACE_RETURN_IF_ERROR(net_->Run(run_metadata));
......
...@@ -123,6 +123,25 @@ inline void Quantize(const float *input, ...@@ -123,6 +123,25 @@ inline void Quantize(const float *input,
QuantizeWithScaleAndZeropoint(input, size, *scale, *zero_point, output); QuantizeWithScaleAndZeropoint(input, size, *scale, *zero_point, output);
} }
template<typename T>
inline void Quantize(const Tensor &input,
Tensor *output,
float *min_out,
float *max_out) {
MACE_CHECK(input.size() != 0);
Tensor::MappingGuard input_guard(&input);
Tensor::MappingGuard output_guard(output);
auto *input_data = input.data<float>();
auto *output_data = output->mutable_data<T>();
float scale;
int32_t zero_point;
Quantize(input_data, input.size(), false, output_data, &scale, &zero_point);
*min_out = scale * (std::numeric_limits<T>::lowest() - zero_point);
*max_out = scale * (std::numeric_limits<T>::max() - zero_point);
}
template<typename T> template<typename T>
inline void Dequantize(const T *input, inline void Dequantize(const T *input,
const index_t size, const index_t size,
...@@ -135,6 +154,24 @@ inline void Dequantize(const T *input, ...@@ -135,6 +154,24 @@ inline void Dequantize(const T *input,
} }
} }
template<typename T>
inline void DeQuantize(const Tensor &input,
const float min_in,
const float max_in,
Tensor *output) {
MACE_CHECK(input.size() != 0);
Tensor::MappingGuard input_guard(&input);
Tensor::MappingGuard output_guard(output);
auto *input_data = input.data<T>();
auto *output_data = output->mutable_data<float>();
float scale;
int32_t zero_point;
AdjustRange<T>(min_in, max_in, false, &scale, &zero_point);
Dequantize(input_data, input.size(), scale, zero_point, output_data);
}
inline void QuantizeMultiplier(double multiplier, inline void QuantizeMultiplier(double multiplier,
int32_t* output_multiplier, int32_t* output_multiplier,
int32_t* shift) { int32_t* shift) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册