提交 de518dfc 编写于 作者: 李寅

Refactor quantization utils

上级 fdcd657b
......@@ -106,21 +106,27 @@ class Tensor {
buffer_(nullptr),
is_buffer_owner_(true),
unused_(false),
name_("") {}
name_(""),
scale_(0.f),
zero_point_(0) {}
Tensor(BufferBase *buffer, DataType dtype)
: dtype_(dtype),
buffer_(buffer),
is_buffer_owner_(false),
unused_(false),
name_("") {}
name_(""),
scale_(0.f),
zero_point_(0) {}
Tensor(const BufferSlice &buffer_slice, DataType dtype)
: dtype_(dtype),
buffer_slice_(buffer_slice),
is_buffer_owner_(false),
unused_(false),
name_("") {
name_(""),
scale_(0.f),
zero_point_(0) {
buffer_ = &buffer_slice_;
}
......@@ -363,6 +369,22 @@ class Tensor {
MACE_DISABLE_COPY_AND_ASSIGN(MappingGuard);
};
inline float scale() const {
return scale_;
}
inline int32_t zero_point() const {
return zero_point_;
}
inline void SetScale(float scale) {
scale_ = scale;
}
inline void SetZeroPoint(int32_t zero_point) {
zero_point_ = zero_point;
}
private:
Allocator *allocator_;
DataType dtype_;
......@@ -373,6 +395,8 @@ class Tensor {
bool is_buffer_owner_;
bool unused_;
std::string name_;
float scale_;
int32_t zero_point_;
MACE_DISABLE_COPY_AND_ASSIGN(Tensor);
};
......
......@@ -28,31 +28,47 @@ namespace kernels {
template<typename T>
inline void AdjustRange(const float in_min_data,
const float in_max_data,
float *out_min_data,
float *out_max_data) {
const bool non_zero,
float *scale,
int32_t *zero_point) {
// re-range to make range include zero float and
// make zero float as integer u8
const float quantized_max = std::numeric_limits<uint8_t>::max();
float out_min = fminf(0.f, in_min_data);
float out_max = fmaxf(0.f, in_max_data);
if (out_min < 0.f) {
float stepsize = (in_max_data - in_min_data) / quantized_max;
float quantized_zero = -in_min_data / stepsize;
float quantized_zero_near_int = roundf(quantized_zero);
if (fabs(quantized_zero - quantized_zero_near_int) > 1e-6) {
if (quantized_zero < quantized_zero_near_int) {
const T quantized_min = std::numeric_limits<T>::lowest();
const T quantized_max = std::numeric_limits<T>::max();
if (quantized_min < 0) {
MACE_ASSERT(!non_zero, "Cannot nudge to non_zero quantize value.");
}
float out_max = std::max(0.f, in_max_data);
float out_min = std::min(0.f, in_min_data);
// make in_min_data quantize as greater than 1
if (non_zero) {
out_min = std::min(out_min,
in_min_data - (out_max - in_min_data)
/ (quantized_max - quantized_min - 1));
}
*scale = (out_max - out_min) / (quantized_max - quantized_min);
const float kEps = 1e-6;
if (out_min < -kEps && out_max > kEps) {
float quantized_zero = -out_min / *scale;
int32_t
quantized_zero_near_int = static_cast<int32_t>(roundf(quantized_zero));
*zero_point = quantized_zero_near_int;
if (fabs(quantized_zero - quantized_zero_near_int) > kEps) {
if (quantized_zero < quantized_zero_near_int || non_zero) {
// keep out_max fixed, and move out_min
stepsize = out_max / (quantized_max - quantized_zero_near_int);
out_min = out_max - quantized_max * stepsize;
*scale = out_max / (quantized_max - quantized_zero_near_int);
} else {
// keep out_min fixed, and move out_max
stepsize = -out_min / quantized_zero_near_int;
out_max = out_min + quantized_max * stepsize;
*scale = -out_min / quantized_zero_near_int;
}
}
} else if (out_min > -kEps) {
*zero_point = quantized_min;
} else {
*zero_point = quantized_max;
}
*out_min_data = out_min;
*out_max_data = out_max;
}
template<typename T>
......@@ -67,6 +83,50 @@ inline T Saturate(float value) {
}
}
inline void FindMinMax(const float *input,
const index_t size,
float *min_val, float *max_val) {
float max_v = std::numeric_limits<float>::lowest();
float min_v = std::numeric_limits<float>::max();
for (index_t i = 0; i < size; ++i) {
max_v = std::max(max_v, input[i]);
min_v = std::min(min_v, input[i]);
}
*min_val = min_v;
*max_val = max_v;
}
template<typename T>
inline void Quantize(const float *input,
const index_t size,
bool non_zero,
T *output,
float *scale,
int32_t *zero_point) {
float in_min_data;
float in_max_data;
FindMinMax(input, size, &in_min_data, &in_max_data);
AdjustRange<T>(in_min_data, in_max_data, non_zero,
scale, zero_point);
float recip_scale = 1 / *scale;
for (int i = 0; i < size; ++i) {
output[i] = Saturate<T>(roundf(*zero_point + recip_scale * input[i]));
}
}
template<typename T>
inline void Dequantize(const T *input,
const index_t size,
const float scale,
const int32_t zero_point,
float *output) {
for (int i = 0; i < size; ++i) {
output[i] = scale * (input[i] - zero_point);
}
}
template<DeviceType D, typename T>
struct QuantizeFunctor;
......@@ -75,26 +135,24 @@ struct QuantizeFunctor<CPU, uint8_t> {
QuantizeFunctor() {}
MaceStatus operator()(const Tensor *input,
const Tensor *in_min,
const Tensor *in_max,
Tensor *output,
Tensor *out_min,
Tensor *out_max,
StatsFuture *future) {
const bool non_zero,
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(future);
Tensor::MappingGuard input_guard(input);
Tensor::MappingGuard output_guard(output);
const float *input_data = input->data<float>();
const float in_min_data = in_min->data<float>()[0];
const float in_max_data = in_max->data<float>()[0];
uint8_t *output_data = output->mutable_data<uint8_t>();
float *out_min_data = out_min->mutable_data<float>();
float *out_max_data = out_max->mutable_data<float>();
AdjustRange<uint8_t>(in_min_data, in_max_data, out_min_data, out_max_data);
float recip_stepsize = 255.f / (out_max_data[0] - out_min_data[0]);
for (int i = 0; i < input->size(); ++i) {
output_data[i] = Saturate<uint8_t>(roundf(
(input_data[i] - in_min_data) * recip_stepsize));
}
float scale;
int32_t zero_point;
Quantize(input_data,
input->size(),
non_zero,
output_data,
&scale,
&zero_point);
output->SetScale(scale);
output->SetZeroPoint(zero_point);
return MACE_SUCCESS;
}
......@@ -108,91 +166,18 @@ struct DequantizeFunctor<CPU, uint8_t> {
DequantizeFunctor() {}
MaceStatus operator()(const Tensor *input,
const Tensor *in_min,
const Tensor *in_max,
Tensor *output,
StatsFuture *future) {
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(future);
Tensor::MappingGuard input_guard(input);
Tensor::MappingGuard output_guard(output);
const uint8_t *input_data = input->data<uint8_t>();
const float in_min_data = in_min->data<float>()[0];
const float in_max_data = in_max->data<float>()[0];
float *output_data = output->mutable_data<float>();
float stepsize = (in_max_data - in_min_data) / 255.0;
for (int i = 0; i < input->size(); ++i) {
output_data[i] = in_min_data + stepsize * input_data[i];
}
return MACE_SUCCESS;
}
};
template<DeviceType D, typename T>
struct RequantizeFunctor;
template<>
struct RequantizeFunctor<CPU, uint8_t> {
RequantizeFunctor() {}
MaceStatus operator()(const Tensor *input,
const Tensor *in_min,
const Tensor *in_max,
const Tensor *rerange_min,
const Tensor *rerange_max,
Tensor *output,
Tensor *out_min,
Tensor *out_max,
StatsFuture *future) {
MACE_UNUSED(future);
const int *input_data = input->data<int>();
const float in_min_data = in_min->data<float>()[0];
const float in_max_data = in_max->data<float>()[0];
float rerange_min_data;
float rerange_max_data;
int min_val = std::numeric_limits<int>::max();
int max_val = std::numeric_limits<int>::lowest();
double
si = (in_max_data - in_min_data) / std::numeric_limits<uint32_t>::max();
if (rerange_min == nullptr && rerange_max == nullptr) {
for (int i = 0; i < input->size(); ++i) {
min_val = std::min(min_val, input_data[i]);
max_val = std::max(max_val, input_data[i]);
}
rerange_min_data = min_val * si;
rerange_max_data = max_val * si;
} else {
rerange_min_data = rerange_min->data<float>()[0];
rerange_max_data = rerange_max->data<float>()[0];
}
uint8_t *output_data = output->mutable_data<uint8_t>();
float *out_min_data = out_min->mutable_data<float>();
float *out_max_data = out_max->mutable_data<float>();
AdjustRange<uint8_t>(rerange_min_data,
rerange_max_data,
out_min_data,
out_max_data);
/**
* f = qi * si = min_o + qo * so
* => qo = (qi * si - min_o) / so
* = qi * (si/so) - min_o / so
* = qi * (si / so) + zo
*
* zo = -min_o / so
*
*/
float so =
(out_max_data[0] - out_min_data[0]) / std::numeric_limits<uint8_t>::max();
double step_ratio = si / so;
float quantized_out_zero = -out_min_data[0] / so;
for (int i = 0; i < output->size(); ++i) {
output_data[i] =
Saturate<uint8_t>(roundf(
quantized_out_zero + input_data[i] * step_ratio));
}
Dequantize(input_data,
input->size(),
input->scale(),
input->zero_point(),
output_data);
return MACE_SUCCESS;
}
......
......@@ -45,7 +45,6 @@ extern void Register_Pooling(OperatorRegistryBase *op_registry);
extern void Register_Proposal(OperatorRegistryBase *op_registry);
extern void Register_Quantize(OperatorRegistryBase *op_registry);
extern void Register_ReduceMean(OperatorRegistryBase *op_registry);
extern void Register_Requantize(OperatorRegistryBase *op_registry);
extern void Register_Reshape(OperatorRegistryBase *op_registry);
extern void Register_ResizeBilinear(OperatorRegistryBase *op_registry);
extern void Register_Shape(OperatorRegistryBase *op_registry);
......@@ -96,7 +95,6 @@ OperatorRegistry::OperatorRegistry() : OperatorRegistryBase() {
ops::Register_Proposal(this);
ops::Register_Quantize(this);
ops::Register_ReduceMean(this);
ops::Register_Requantize(this);
ops::Register_Reshape(this);
ops::Register_ResizeBilinear(this);
ops::Register_Shape(this);
......
......@@ -33,13 +33,5 @@ void Register_Dequantize(OperatorRegistryBase *op_registry) {
DequantizeOp<DeviceType::CPU, uint8_t>);
}
void Register_Requantize(OperatorRegistryBase *op_registry) {
MACE_REGISTER_OPERATOR(op_registry, OpKeyBuilder("Requantize")
.Device(DeviceType::CPU)
.TypeConstraint<uint8_t>("T")
.Build(),
RequantizeOp<DeviceType::CPU, uint8_t>);
}
} // namespace ops
} // namespace mace
......@@ -21,39 +21,33 @@
namespace mace {
namespace ops {
template <DeviceType D, class T>
template<DeviceType D, class T>
class QuantizeOp : public Operator<D, T> {
public:
QuantizeOp(const OperatorDef &operator_def, Workspace *ws)
: Operator<D, T>(operator_def, ws) {}
: Operator<D, T>(operator_def, ws),
non_zero_(
static_cast<bool>(OperatorBase::GetOptionalArg<int>("non_zero",
0))) {}
MaceStatus Run(StatsFuture *future) override {
const Tensor *input = this->Input(INPUT);
const Tensor *in_min = this->Input(IN_MIN);
const Tensor *in_max = this->Input(IN_MAX);
MACE_CHECK(in_min->size() == 1, "min val tensor has more than 1 value");
MACE_CHECK(in_max->size() == 1, "max val tensor has more than 1 value");
Tensor *output = this->Output(OUTPUT);
Tensor *out_min = this->Output(OUT_MIN);
Tensor *out_max = this->Output(OUT_MAX);
MACE_RETURN_IF_ERROR(output->ResizeLike(input));
MACE_RETURN_IF_ERROR(out_min->ResizeLike(in_min));
MACE_RETURN_IF_ERROR(out_max->ResizeLike(in_max));
return functor_(input, in_min, in_max, output, out_min, out_max, future);
return functor_(input, non_zero_, output, future);
}
private:
kernels::QuantizeFunctor<D, T> functor_;
bool non_zero_;
protected:
MACE_OP_INPUT_TAGS(INPUT, IN_MIN, IN_MAX);
MACE_OP_OUTPUT_TAGS(OUTPUT, OUT_MIN, OUT_MAX);
MACE_OP_INPUT_TAGS(INPUT);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
template <DeviceType D, class T>
template<DeviceType D, class T>
class DequantizeOp : public Operator<D, T> {
public:
DequantizeOp(const OperatorDef &operator_def, Workspace *ws)
......@@ -61,70 +55,20 @@ class DequantizeOp : public Operator<D, T> {
MaceStatus Run(StatsFuture *future) override {
const Tensor *input = this->Input(INPUT);
const Tensor *in_min = this->Input(IN_MIN);
const Tensor *in_max = this->Input(IN_MAX);
MACE_CHECK(in_min->size() == 1, "min val tensor has more than 1 value");
MACE_CHECK(in_max->size() == 1, "max val tensor has more than 1 value");
Tensor *output = this->Output(OUTPUT);
MACE_RETURN_IF_ERROR(output->ResizeLike(input));
return functor_(input, in_min, in_max, output, future);
return functor_(input, output, future);
}
private:
kernels::DequantizeFunctor<D, T> functor_;
protected:
MACE_OP_INPUT_TAGS(INPUT, IN_MIN, IN_MAX);
MACE_OP_INPUT_TAGS(INPUT);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
template <DeviceType D, class T>
class RequantizeOp : public Operator<D, T> {
public:
RequantizeOp(const OperatorDef &operator_def, Workspace *ws)
: Operator<D, T>(operator_def, ws) {}
MaceStatus Run(StatsFuture *future) override {
const Tensor *input = this->Input(INPUT);
const Tensor *in_min = this->Input(IN_MIN);
const Tensor *in_max = this->Input(IN_MAX);
const Tensor *rerange_min = nullptr;
const Tensor *rerange_max = nullptr;
MACE_CHECK(in_min->size() == 1, "min val tensor has more than 1 value");
MACE_CHECK(in_max->size() == 1, "max val tensor has more than 1 value");
if (this->InputSize() >= 5) {
rerange_min = this->Input(RERANGE_MIN);
rerange_max = this->Input(RERANGE_MAX);
MACE_CHECK(rerange_min->size() == 1,
"rerange min val tensor has more than 1 value");
MACE_CHECK(rerange_max->size() == 1,
"rerange max val tensor has more than 1 value");
}
Tensor *output = this->Output(OUTPUT);
Tensor *out_min = this->Output(OUT_MIN);
Tensor *out_max = this->Output(OUT_MAX);
MACE_RETURN_IF_ERROR(output->ResizeLike(input));
MACE_RETURN_IF_ERROR(out_min->ResizeLike(in_min));
MACE_RETURN_IF_ERROR(out_max->ResizeLike(out_max));
return functor_(input, in_min, in_max, rerange_min, rerange_max, output,
out_min, out_max, future);
}
private:
kernels::RequantizeFunctor<D, T> functor_;
protected:
MACE_OP_INPUT_TAGS(INPUT, IN_MIN, IN_MAX, RERANGE_MIN, RERANGE_MAX);
MACE_OP_OUTPUT_TAGS(OUTPUT, OUT_MIN, OUT_MAX);
};
} // namespace ops
} // namespace mace
......
......@@ -19,190 +19,63 @@ namespace mace {
namespace ops {
namespace test {
class QuantizeTest : public OpsTestBase {};
namespace {
TEST_F(QuantizeTest, TestQuantize) {
// Construct graph
void TestQuantizeDequantize(const std::vector<float> &input, bool non_zero) {
OpsTestNet net;
// Add input data
net.AddInputFromArray<CPU, float>("Input", {1, 2, 3, 1},
{-2, -1, 1, 2, 3, 4});
net.AddInputFromArray<CPU, float>("InputMin", {1}, {-3});
net.AddInputFromArray<CPU, float>("InputMax", {1}, {5});
net.AddInputFromArray<CPU, float>("Input",
{static_cast<index_t>(input.size())},
input);
OpDefBuilder("Quantize", "QuantizeTest")
.Input("Input")
.Input("InputMin")
.Input("InputMax")
.Output("Output")
.Output("OutputMin")
.Output("OutputMax")
.OutputType({DT_UINT8, DT_FLOAT, DT_FLOAT})
.Output("QuantizeOutput")
.OutputType({DT_UINT8})
.AddIntArg("T", DT_UINT8)
.AddIntArg("non_zero", non_zero)
.Finalize(net.NewOperatorDef());
// Run
net.RunOp();
auto output = net.GetTensor("Output");
auto output_min = net.GetTensor("OutputMin");
auto output_max = net.GetTensor("OutputMax");
auto expected_output =
CreateTensor<uint8_t>({1, 2, 3, 1}, {32, 64, 127, 159, 191, 223});
auto expected_min = CreateTensor<float>({1}, {-3.01887});
auto expected_max = CreateTensor<float>({1}, {5});
ExpectTensorNear<uint8_t>(*expected_output, *output);
ExpectTensorNear<float>(*expected_min, *output_min);
ExpectTensorNear<float>(*expected_max, *output_max);
}
TEST_F(QuantizeTest, TestQuantizeTrend) {
// Construct graph
OpsTestNet net;
// Add input data
net.AddRandomInput<CPU, float>("Input", {100});
const float *input_data = net.GetTensor("Input")->data<float>();
net.AddInputFromArray<CPU, float>(
"InputMin", {1},
{*std::min_element(input_data,
input_data + net.GetTensor("Input")->size())});
net.AddInputFromArray<CPU, float>(
"InputMax", {1},
{*std::max_element(input_data,
input_data + net.GetTensor("Input")->size())});
OpDefBuilder("Quantize", "QuantizeTest")
.Input("Input")
.Input("InputMin")
.Input("InputMax")
.Output("Output")
.Output("OutputMin")
.Output("OutputMax")
.OutputType({DT_UINT8, DT_FLOAT, DT_FLOAT})
.AddIntArg("T", DT_UINT8)
.Finalize(net.NewOperatorDef());
// Run
net.RunOp();
auto output = net.GetTensor("Output");
const uint8_t *output_data = net.GetTensor("Output")->data<uint8_t>();
for (int i = 1; i < output->size(); ++i) {
if (input_data[i] > input_data[i - 1]) {
EXPECT_GE(output_data[i], output_data[i - 1]);
} else if (input_data[i] == input_data[i - 1]) {
EXPECT_EQ(output_data[i], output_data[i - 1]);
} else {
EXPECT_LE(output_data[i], output_data[i - 1]);
if (non_zero) {
Tensor *quantized_output = net.GetTensor("QuantizeOutput");
Tensor::MappingGuard guard(quantized_output);
const uint8_t *quantized_output_data = quantized_output->data<uint8_t>();
for (index_t i = 0; i < quantized_output->size(); ++i) {
EXPECT_GT(quantized_output_data[i], 0);
}
}
}
TEST_F(QuantizeTest, TestDequantize) {
// Construct graph
OpsTestNet net;
// Add input data
net.AddInputFromArray<CPU, uint8_t>("Input", {1, 2, 3, 1},
{32, 64, 127, 159, 191, 223});
net.AddInputFromArray<CPU, float>("InputMin", {1}, {-3.01887});
net.AddInputFromArray<CPU, float>("InputMax", {1}, {5});
OpDefBuilder("Dequantize", "DequantizeTest")
.Input("Input")
.Input("InputMin")
.Input("InputMax")
OpDefBuilder("Dequantize", "DeQuantizeTest")
.Input("QuantizeOutput")
.Output("Output")
.OutputType({DT_FLOAT})
.AddIntArg("T", DT_UINT8)
.Finalize(net.NewOperatorDef());
// Run
net.RunOp();
auto output = net.GetTensor("Output");
auto expected_output =
CreateTensor<float>({1, 2, 3, 1}, {-2, -1, 1, 2, 3, 4});
auto expected_min = CreateTensor<float>({1}, {-3.01887});
auto expected_max = CreateTensor<float>({1}, {5});
ExpectTensorNear<float>(*expected_output, *output, 0.1, 0.01);
}
TEST_F(QuantizeTest, TestRequantizeWithMinMax) {
// Construct graph
OpsTestNet net;
// Add input data
net.AddInputFromArray<CPU, int>(
"Input", {1, 2, 3, 1},
{-1073741824, -536870912, 536870912, 1073741824, 1610612736, 2147483647});
net.AddInputFromArray<CPU, float>("InputMin", {1}, {-3});
net.AddInputFromArray<CPU, float>("InputMax", {1}, {5});
net.AddInputFromArray<CPU, float>("RerangeMin", {1}, {-3.01887});
net.AddInputFromArray<CPU, float>("RerangeMax", {1}, {5});
OpDefBuilder("Requantize", "RequantizeTest")
.Input("Input")
.Input("InputMin")
.Input("InputMax")
.Input("RerangeMin")
.Input("RerangeMax")
.Output("Output")
.Output("OutputMin")
.Output("OutputMax")
.OutputType({DT_UINT8, DT_FLOAT, DT_FLOAT})
.AddIntArg("T", DT_UINT8)
.Finalize(net.NewOperatorDef());
// Run
net.RunOp();
auto output = net.GetTensor("Output");
auto expected_output =
CreateTensor<uint8_t>({1, 2, 3, 1}, {32, 64, 128, 160, 191, 223});
auto expected_min = CreateTensor<float>({1}, {-3.01887});
auto expected_max = CreateTensor<float>({1}, {5});
ExpectTensorNear<uint8_t>(*expected_output, *output);
ExpectTensorNear<float>(*net.GetTensor("Input"),
*net.GetTensor("Output"),
0.1);
}
TEST_F(QuantizeTest, TestRequantizeWithoutMinMax) {
// Construct graph
OpsTestNet net;
// Add input data
net.AddInputFromArray<CPU, int>(
"Input", {1, 2, 3, 1},
{-1073741824, -536870912, 536870912, 1073741824, 1610612736, 2147483647});
net.AddInputFromArray<CPU, float>("InputMin", {1}, {-3});
net.AddInputFromArray<CPU, float>("InputMax", {1}, {5});
} // namespace
OpDefBuilder("Requantize", "RequantizeTest")
.Input("Input")
.Input("InputMin")
.Input("InputMax")
.Output("Output")
.Output("OutputMin")
.Output("OutputMax")
.OutputType({DT_UINT8, DT_FLOAT, DT_FLOAT})
.AddIntArg("T", DT_UINT8)
.Finalize(net.NewOperatorDef());
// Run
net.RunOp();
class QuantizeTest : public OpsTestBase {};
auto output = net.GetTensor("Output");
auto expected_output =
CreateTensor<uint8_t>({1, 2, 3, 1}, {0, 43, 128, 170, 213, 255});
auto expected_min = CreateTensor<float>({1}, {-3.01887});
auto expected_max = CreateTensor<float>({1}, {5});
ExpectTensorNear<uint8_t>(*expected_output, *output);
TEST_F(QuantizeTest, TestQuantize) {
TestQuantizeDequantize({-2, -1, 0, 1, 2, 3, 4}, false);
TestQuantizeDequantize({-2, -1, 0, 1, 2, 3, 4}, true);
TestQuantizeDequantize({0, 1, 2, 3, 4}, false);
TestQuantizeDequantize({0, 1, 2, 3, 4}, true);
TestQuantizeDequantize({2, 3, 4, 5, 6}, false);
TestQuantizeDequantize({2, 3, 4, 5, 6}, true);
TestQuantizeDequantize({2, 4, 6, 8}, false);
TestQuantizeDequantize({2, 4, 6, 8}, true);
TestQuantizeDequantize({-2, -4, -6, -8}, false);
TestQuantizeDequantize({-2, -4, -6, -8}, true);
}
} // namespace test
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册