Merge branch 'master' into 'master'

Refactor quantization utils See merge request !684

Merge branch 'master' into 'master'
Refactor quantization utils See merge request !684
fe978b15 · 李滨 · 7a9ee4ca · de518dfc · fe978b15 · fe978b15
6 changed file
--- a/mace/core/tensor.h
+++ b/mace/core/tensor.h
@@ -106,21 +106,27 @@ class Tensor {
        buffer_(nullptr),
        is_buffer_owner_(true),
        unused_(false),
-        name_("") {}
+        name_(""),
+        scale_(0.f),
+        zero_point_(0) {}

  Tensor(BufferBase *buffer, DataType dtype)
    : dtype_(dtype),
      buffer_(buffer),
      is_buffer_owner_(false),
      unused_(false),
-      name_("") {}
+      name_(""),
+      scale_(0.f),
+      zero_point_(0) {}

  Tensor(const BufferSlice &buffer_slice, DataType dtype)
      : dtype_(dtype),
        buffer_slice_(buffer_slice),
        is_buffer_owner_(false),
        unused_(false),
-        name_("") {
+        name_(""),
+        scale_(0.f),
+        zero_point_(0) {
    buffer_ = &buffer_slice_;
  }

@@ -363,6 +369,22 @@ class Tensor {
    MACE_DISABLE_COPY_AND_ASSIGN(MappingGuard);
  };

+  inline float scale() const {
+    return scale_;
+  }
+
+  inline int32_t zero_point() const {
+    return zero_point_;
+  }
+
+  inline void SetScale(float scale) {
+    scale_ = scale;
+  }
+
+  inline void SetZeroPoint(int32_t zero_point) {
+    zero_point_ = zero_point;
+  }
+
 private:
  Allocator *allocator_;
  DataType dtype_;
@@ -373,6 +395,8 @@ class Tensor {
  bool is_buffer_owner_;
  bool unused_;
  std::string name_;
+  float scale_;
+  int32_t zero_point_;

  MACE_DISABLE_COPY_AND_ASSIGN(Tensor);
 };

--- a/mace/kernels/quantize.h
+++ b/mace/kernels/quantize.h
@@ -28,31 +28,47 @@ namespace kernels {
 template<typename T>
 inline void AdjustRange(const float in_min_data,
                        const float in_max_data,
-                        float *out_min_data,
-                        float *out_max_data) {
+                        const bool non_zero,
+                        float *scale,
+                        int32_t *zero_point) {
  // re-range to make range include zero float and
  // make zero float as integer u8
-  const float quantized_max = std::numeric_limits<uint8_t>::max();
-  float out_min = fminf(0.f, in_min_data);
-  float out_max = fmaxf(0.f, in_max_data);
-  if (out_min < 0.f) {
-    float stepsize = (in_max_data - in_min_data) / quantized_max;
-    float quantized_zero = -in_min_data / stepsize;
-    float quantized_zero_near_int = roundf(quantized_zero);
-    if (fabs(quantized_zero - quantized_zero_near_int) > 1e-6) {
-      if (quantized_zero < quantized_zero_near_int) {
+  const T quantized_min = std::numeric_limits<T>::lowest();
+  const T quantized_max = std::numeric_limits<T>::max();
+  if (quantized_min < 0) {
+    MACE_ASSERT(!non_zero, "Cannot nudge to non_zero quantize value.");
+  }
+
+  float out_max = std::max(0.f, in_max_data);
+  float out_min = std::min(0.f, in_min_data);
+  // make in_min_data quantize as greater than 1
+  if (non_zero) {
+    out_min = std::min(out_min,
+                       in_min_data - (out_max - in_min_data)
+                           / (quantized_max - quantized_min - 1));
+  }
+
+  *scale = (out_max - out_min) / (quantized_max - quantized_min);
+  const float kEps = 1e-6;
+  if (out_min < -kEps && out_max > kEps) {
+    float quantized_zero = -out_min / *scale;
+    int32_t
+        quantized_zero_near_int = static_cast<int32_t>(roundf(quantized_zero));
+    *zero_point = quantized_zero_near_int;
+    if (fabs(quantized_zero - quantized_zero_near_int) > kEps) {
+      if (quantized_zero < quantized_zero_near_int || non_zero) {
        // keep out_max fixed, and move out_min
-        stepsize = out_max / (quantized_max - quantized_zero_near_int);
-        out_min = out_max - quantized_max * stepsize;
+        *scale = out_max / (quantized_max - quantized_zero_near_int);
      } else {
        // keep out_min fixed, and move out_max
-        stepsize = -out_min / quantized_zero_near_int;
-        out_max = out_min + quantized_max * stepsize;
+        *scale = -out_min / quantized_zero_near_int;
      }
    }
+  } else if (out_min > -kEps) {
+    *zero_point = quantized_min;
+  } else {
+    *zero_point = quantized_max;
  }
-  *out_min_data = out_min;
-  *out_max_data = out_max;
 }

 template<typename T>
@@ -67,6 +83,50 @@ inline T Saturate(float value) {
  }
 }

+inline void FindMinMax(const float *input,
+                       const index_t size,
+                       float *min_val, float *max_val) {
+  float max_v = std::numeric_limits<float>::lowest();
+  float min_v = std::numeric_limits<float>::max();
+  for (index_t i = 0; i < size; ++i) {
+    max_v = std::max(max_v, input[i]);
+    min_v = std::min(min_v, input[i]);
+  }
+  *min_val = min_v;
+  *max_val = max_v;
+}
+
+template<typename T>
+inline void Quantize(const float *input,
+                     const index_t size,
+                     bool non_zero,
+                     T *output,
+                     float *scale,
+                     int32_t *zero_point) {
+  float in_min_data;
+  float in_max_data;
+  FindMinMax(input, size, &in_min_data, &in_max_data);
+
+  AdjustRange<T>(in_min_data, in_max_data, non_zero,
+                 scale, zero_point);
+
+  float recip_scale = 1 / *scale;
+  for (int i = 0; i < size; ++i) {
+    output[i] = Saturate<T>(roundf(*zero_point + recip_scale * input[i]));
+  }
+}
+
+template<typename T>
+inline void Dequantize(const T *input,
+                       const index_t size,
+                       const float scale,
+                       const int32_t zero_point,
+                       float *output) {
+  for (int i = 0; i < size; ++i) {
+    output[i] = scale * (input[i] - zero_point);
+  }
+}
+
 template<DeviceType D, typename T>
 struct QuantizeFunctor;

@@ -75,26 +135,24 @@ struct QuantizeFunctor<CPU, uint8_t> {
  QuantizeFunctor() {}

  MaceStatus operator()(const Tensor *input,
-                  const Tensor *in_min,
-                  const Tensor *in_max,
-                  Tensor *output,
-                  Tensor *out_min,
-                  Tensor *out_max,
-                  StatsFuture *future) {
+                        const bool non_zero,
+                        Tensor *output,
+                        StatsFuture *future) {
    MACE_UNUSED(future);
+    Tensor::MappingGuard input_guard(input);
+    Tensor::MappingGuard output_guard(output);
    const float *input_data = input->data<float>();
-    const float in_min_data = in_min->data<float>()[0];
-    const float in_max_data = in_max->data<float>()[0];
    uint8_t *output_data = output->mutable_data<uint8_t>();
-    float *out_min_data = out_min->mutable_data<float>();
-    float *out_max_data = out_max->mutable_data<float>();
-
-    AdjustRange<uint8_t>(in_min_data, in_max_data, out_min_data, out_max_data);
-    float recip_stepsize = 255.f / (out_max_data[0] - out_min_data[0]);
-    for (int i = 0; i < input->size(); ++i) {
-      output_data[i] = Saturate<uint8_t>(roundf(
-        (input_data[i] - in_min_data) * recip_stepsize));
-    }
+    float scale;
+    int32_t zero_point;
+    Quantize(input_data,
+             input->size(),
+             non_zero,
+             output_data,
+             &scale,
+             &zero_point);
+    output->SetScale(scale);
+    output->SetZeroPoint(zero_point);

    return MACE_SUCCESS;
  }
@@ -108,91 +166,18 @@ struct DequantizeFunctor<CPU, uint8_t> {
  DequantizeFunctor() {}

  MaceStatus operator()(const Tensor *input,
-                  const Tensor *in_min,
-                  const Tensor *in_max,
-                  Tensor *output,
-                  StatsFuture *future) {
+                        Tensor *output,
+                        StatsFuture *future) {
    MACE_UNUSED(future);
+    Tensor::MappingGuard input_guard(input);
+    Tensor::MappingGuard output_guard(output);
    const uint8_t *input_data = input->data<uint8_t>();
-    const float in_min_data = in_min->data<float>()[0];
-    const float in_max_data = in_max->data<float>()[0];
    float *output_data = output->mutable_data<float>();
-
-    float stepsize = (in_max_data - in_min_data) / 255.0;
-    for (int i = 0; i < input->size(); ++i) {
-      output_data[i] = in_min_data + stepsize * input_data[i];
-    }
-
-    return MACE_SUCCESS;
-  }
-};
-
-template<DeviceType D, typename T>
-struct RequantizeFunctor;
-
-template<>
-struct RequantizeFunctor<CPU, uint8_t> {
-  RequantizeFunctor() {}
-
-  MaceStatus operator()(const Tensor *input,
-                  const Tensor *in_min,
-                  const Tensor *in_max,
-                  const Tensor *rerange_min,
-                  const Tensor *rerange_max,
-                  Tensor *output,
-                  Tensor *out_min,
-                  Tensor *out_max,
-                  StatsFuture *future) {
-    MACE_UNUSED(future);
-    const int *input_data = input->data<int>();
-    const float in_min_data = in_min->data<float>()[0];
-    const float in_max_data = in_max->data<float>()[0];
-
-    float rerange_min_data;
-    float rerange_max_data;
-    int min_val = std::numeric_limits<int>::max();
-    int max_val = std::numeric_limits<int>::lowest();
-    double
-      si = (in_max_data - in_min_data) / std::numeric_limits<uint32_t>::max();
-    if (rerange_min == nullptr && rerange_max == nullptr) {
-      for (int i = 0; i < input->size(); ++i) {
-        min_val = std::min(min_val, input_data[i]);
-        max_val = std::max(max_val, input_data[i]);
-      }
-      rerange_min_data = min_val * si;
-      rerange_max_data = max_val * si;
-    } else {
-      rerange_min_data = rerange_min->data<float>()[0];
-      rerange_max_data = rerange_max->data<float>()[0];
-    }
-
-    uint8_t *output_data = output->mutable_data<uint8_t>();
-    float *out_min_data = out_min->mutable_data<float>();
-    float *out_max_data = out_max->mutable_data<float>();
-
-    AdjustRange<uint8_t>(rerange_min_data,
-                         rerange_max_data,
-                         out_min_data,
-                         out_max_data);
-    /**
-     * f = qi * si = min_o + qo * so
-     * => qo = (qi * si - min_o) / so
-     *       = qi * (si/so) - min_o / so
-     *       = qi * (si / so) + zo
-     *
-     *    zo = -min_o / so
-     *
-     */
-    float so =
-      (out_max_data[0] - out_min_data[0]) / std::numeric_limits<uint8_t>::max();
-    double step_ratio = si / so;
-    float quantized_out_zero = -out_min_data[0] / so;
-
-    for (int i = 0; i < output->size(); ++i) {
-      output_data[i] =
-        Saturate<uint8_t>(roundf(
-          quantized_out_zero + input_data[i] * step_ratio));
-    }
+    Dequantize(input_data,
+               input->size(),
+               input->scale(),
+               input->zero_point(),
+               output_data);

    return MACE_SUCCESS;
  }

--- a/mace/ops/ops_register.cc
+++ b/mace/ops/ops_register.cc
@@ -45,7 +45,6 @@ extern void Register_Pooling(OperatorRegistryBase *op_registry);
 extern void Register_Proposal(OperatorRegistryBase *op_registry);
 extern void Register_Quantize(OperatorRegistryBase *op_registry);
 extern void Register_ReduceMean(OperatorRegistryBase *op_registry);
-extern void Register_Requantize(OperatorRegistryBase *op_registry);
 extern void Register_Reshape(OperatorRegistryBase *op_registry);
 extern void Register_ResizeBilinear(OperatorRegistryBase *op_registry);
 extern void Register_Shape(OperatorRegistryBase *op_registry);
@@ -96,7 +95,6 @@ OperatorRegistry::OperatorRegistry() : OperatorRegistryBase() {
  ops::Register_Proposal(this);
  ops::Register_Quantize(this);
  ops::Register_ReduceMean(this);
-  ops::Register_Requantize(this);
  ops::Register_Reshape(this);
  ops::Register_ResizeBilinear(this);
  ops::Register_Shape(this);

--- a/mace/ops/quantize.cc
+++ b/mace/ops/quantize.cc
@@ -33,13 +33,5 @@ void Register_Dequantize(OperatorRegistryBase *op_registry) {
                         DequantizeOp<DeviceType::CPU, uint8_t>);
 }

-void Register_Requantize(OperatorRegistryBase *op_registry) {
-  MACE_REGISTER_OPERATOR(op_registry, OpKeyBuilder("Requantize")
-                                          .Device(DeviceType::CPU)
-                                          .TypeConstraint<uint8_t>("T")
-                                          .Build(),
-                         RequantizeOp<DeviceType::CPU, uint8_t>);
-}
-
 }  // namespace ops
 }  // namespace mace
--- a/mace/ops/quantize.h
+++ b/mace/ops/quantize.h
@@ -21,39 +21,33 @@
 namespace mace {
 namespace ops {

-template <DeviceType D, class T>
+template<DeviceType D, class T>
 class QuantizeOp : public Operator<D, T> {
 public:
  QuantizeOp(const OperatorDef &operator_def, Workspace *ws)
-      : Operator<D, T>(operator_def, ws) {}
+      : Operator<D, T>(operator_def, ws),
+        non_zero_(
+            static_cast<bool>(OperatorBase::GetOptionalArg<int>("non_zero",
+                                                                0))) {}

  MaceStatus Run(StatsFuture *future) override {
    const Tensor *input = this->Input(INPUT);
-    const Tensor *in_min = this->Input(IN_MIN);
-    const Tensor *in_max = this->Input(IN_MAX);
-
-    MACE_CHECK(in_min->size() == 1, "min val tensor has more than 1 value");
-    MACE_CHECK(in_max->size() == 1, "max val tensor has more than 1 value");
-
    Tensor *output = this->Output(OUTPUT);
-    Tensor *out_min = this->Output(OUT_MIN);
-    Tensor *out_max = this->Output(OUT_MAX);
    MACE_RETURN_IF_ERROR(output->ResizeLike(input));
-    MACE_RETURN_IF_ERROR(out_min->ResizeLike(in_min));
-    MACE_RETURN_IF_ERROR(out_max->ResizeLike(in_max));

-    return functor_(input, in_min, in_max, output, out_min, out_max, future);
+    return functor_(input, non_zero_, output, future);
  }

 private:
  kernels::QuantizeFunctor<D, T> functor_;
+  bool non_zero_;

 protected:
-  MACE_OP_INPUT_TAGS(INPUT, IN_MIN, IN_MAX);
-  MACE_OP_OUTPUT_TAGS(OUTPUT, OUT_MIN, OUT_MAX);
+  MACE_OP_INPUT_TAGS(INPUT);
+  MACE_OP_OUTPUT_TAGS(OUTPUT);
 };

-template <DeviceType D, class T>
+template<DeviceType D, class T>
 class DequantizeOp : public Operator<D, T> {
 public:
  DequantizeOp(const OperatorDef &operator_def, Workspace *ws)
@@ -61,70 +55,20 @@ class DequantizeOp : public Operator<D, T> {

  MaceStatus Run(StatsFuture *future) override {
    const Tensor *input = this->Input(INPUT);
-    const Tensor *in_min = this->Input(IN_MIN);
-    const Tensor *in_max = this->Input(IN_MAX);
-
-    MACE_CHECK(in_min->size() == 1, "min val tensor has more than 1 value");
-    MACE_CHECK(in_max->size() == 1, "max val tensor has more than 1 value");
-
    Tensor *output = this->Output(OUTPUT);
    MACE_RETURN_IF_ERROR(output->ResizeLike(input));

-    return functor_(input, in_min, in_max, output, future);
+    return functor_(input, output, future);
  }

 private:
  kernels::DequantizeFunctor<D, T> functor_;

 protected:
-  MACE_OP_INPUT_TAGS(INPUT, IN_MIN, IN_MAX);
+  MACE_OP_INPUT_TAGS(INPUT);
  MACE_OP_OUTPUT_TAGS(OUTPUT);
 };

-template <DeviceType D, class T>
-class RequantizeOp : public Operator<D, T> {
- public:
-  RequantizeOp(const OperatorDef &operator_def, Workspace *ws)
-      : Operator<D, T>(operator_def, ws) {}
-
-  MaceStatus Run(StatsFuture *future) override {
-    const Tensor *input = this->Input(INPUT);
-    const Tensor *in_min = this->Input(IN_MIN);
-    const Tensor *in_max = this->Input(IN_MAX);
-    const Tensor *rerange_min = nullptr;
-    const Tensor *rerange_max = nullptr;
-
-    MACE_CHECK(in_min->size() == 1, "min val tensor has more than 1 value");
-    MACE_CHECK(in_max->size() == 1, "max val tensor has more than 1 value");
-
-    if (this->InputSize() >= 5) {
-      rerange_min = this->Input(RERANGE_MIN);
-      rerange_max = this->Input(RERANGE_MAX);
-      MACE_CHECK(rerange_min->size() == 1,
-                 "rerange min val tensor has more than 1 value");
-      MACE_CHECK(rerange_max->size() == 1,
-                 "rerange max val tensor has more than 1 value");
-    }
-
-    Tensor *output = this->Output(OUTPUT);
-    Tensor *out_min = this->Output(OUT_MIN);
-    Tensor *out_max = this->Output(OUT_MAX);
-    MACE_RETURN_IF_ERROR(output->ResizeLike(input));
-    MACE_RETURN_IF_ERROR(out_min->ResizeLike(in_min));
-    MACE_RETURN_IF_ERROR(out_max->ResizeLike(out_max));
-
-    return functor_(input, in_min, in_max, rerange_min, rerange_max, output,
-                    out_min, out_max, future);
-  }
-
- private:
-  kernels::RequantizeFunctor<D, T> functor_;
-
- protected:
-  MACE_OP_INPUT_TAGS(INPUT, IN_MIN, IN_MAX, RERANGE_MIN, RERANGE_MAX);
-  MACE_OP_OUTPUT_TAGS(OUTPUT, OUT_MIN, OUT_MAX);
-};
-
 }  // namespace ops
 }  // namespace mace


--- a/mace/ops/quantize_test.cc
+++ b/mace/ops/quantize_test.cc
@@ -19,190 +19,63 @@ namespace mace {
 namespace ops {
 namespace test {

-class QuantizeTest : public OpsTestBase {};
+namespace {

-TEST_F(QuantizeTest, TestQuantize) {
-  // Construct graph
+void TestQuantizeDequantize(const std::vector<float> &input, bool non_zero) {
  OpsTestNet net;
-
-  // Add input data
-  net.AddInputFromArray<CPU, float>("Input", {1, 2, 3, 1},
-                                    {-2, -1, 1, 2, 3, 4});
-  net.AddInputFromArray<CPU, float>("InputMin", {1}, {-3});
-  net.AddInputFromArray<CPU, float>("InputMax", {1}, {5});
-
+  net.AddInputFromArray<CPU, float>("Input",
+                                    {static_cast<index_t>(input.size())},
+                                    input);
  OpDefBuilder("Quantize", "QuantizeTest")
      .Input("Input")
-      .Input("InputMin")
-      .Input("InputMax")
-      .Output("Output")
-      .Output("OutputMin")
-      .Output("OutputMax")
-      .OutputType({DT_UINT8, DT_FLOAT, DT_FLOAT})
+      .Output("QuantizeOutput")
+      .OutputType({DT_UINT8})
      .AddIntArg("T", DT_UINT8)
+      .AddIntArg("non_zero", non_zero)
      .Finalize(net.NewOperatorDef());

-  // Run
  net.RunOp();

-  auto output = net.GetTensor("Output");
-  auto output_min = net.GetTensor("OutputMin");
-  auto output_max = net.GetTensor("OutputMax");
-
-  auto expected_output =
-      CreateTensor<uint8_t>({1, 2, 3, 1}, {32, 64, 127, 159, 191, 223});
-  auto expected_min = CreateTensor<float>({1}, {-3.01887});
-  auto expected_max = CreateTensor<float>({1}, {5});
-
-  ExpectTensorNear<uint8_t>(*expected_output, *output);
-  ExpectTensorNear<float>(*expected_min, *output_min);
-  ExpectTensorNear<float>(*expected_max, *output_max);
-}
-
-TEST_F(QuantizeTest, TestQuantizeTrend) {
-  // Construct graph
-  OpsTestNet net;
-
-  // Add input data
-  net.AddRandomInput<CPU, float>("Input", {100});
-  const float *input_data = net.GetTensor("Input")->data<float>();
-  net.AddInputFromArray<CPU, float>(
-      "InputMin", {1},
-      {*std::min_element(input_data,
-                         input_data + net.GetTensor("Input")->size())});
-  net.AddInputFromArray<CPU, float>(
-      "InputMax", {1},
-      {*std::max_element(input_data,
-                         input_data + net.GetTensor("Input")->size())});
-
-  OpDefBuilder("Quantize", "QuantizeTest")
-      .Input("Input")
-      .Input("InputMin")
-      .Input("InputMax")
-      .Output("Output")
-      .Output("OutputMin")
-      .Output("OutputMax")
-      .OutputType({DT_UINT8, DT_FLOAT, DT_FLOAT})
-      .AddIntArg("T", DT_UINT8)
-      .Finalize(net.NewOperatorDef());
-
-  // Run
-  net.RunOp();
-
-  auto output = net.GetTensor("Output");
-
-  const uint8_t *output_data = net.GetTensor("Output")->data<uint8_t>();
-  for (int i = 1; i < output->size(); ++i) {
-    if (input_data[i] > input_data[i - 1]) {
-      EXPECT_GE(output_data[i], output_data[i - 1]);
-    } else if (input_data[i] == input_data[i - 1]) {
-      EXPECT_EQ(output_data[i], output_data[i - 1]);
-    } else {
-      EXPECT_LE(output_data[i], output_data[i - 1]);
+  if (non_zero) {
+    Tensor *quantized_output = net.GetTensor("QuantizeOutput");
+    Tensor::MappingGuard guard(quantized_output);
+    const uint8_t *quantized_output_data = quantized_output->data<uint8_t>();
+    for (index_t i = 0; i < quantized_output->size(); ++i) {
+      EXPECT_GT(quantized_output_data[i], 0);
    }
  }
-}
-
-TEST_F(QuantizeTest, TestDequantize) {
-  // Construct graph
-  OpsTestNet net;
-
-  // Add input data
-  net.AddInputFromArray<CPU, uint8_t>("Input", {1, 2, 3, 1},
-                                      {32, 64, 127, 159, 191, 223});
-  net.AddInputFromArray<CPU, float>("InputMin", {1}, {-3.01887});
-  net.AddInputFromArray<CPU, float>("InputMax", {1}, {5});

-  OpDefBuilder("Dequantize", "DequantizeTest")
-      .Input("Input")
-      .Input("InputMin")
-      .Input("InputMax")
+  OpDefBuilder("Dequantize", "DeQuantizeTest")
+      .Input("QuantizeOutput")
      .Output("Output")
      .OutputType({DT_FLOAT})
      .AddIntArg("T", DT_UINT8)
      .Finalize(net.NewOperatorDef());

-  // Run
-  net.RunOp();
-
-  auto output = net.GetTensor("Output");
-  auto expected_output =
-      CreateTensor<float>({1, 2, 3, 1}, {-2, -1, 1, 2, 3, 4});
-  auto expected_min = CreateTensor<float>({1}, {-3.01887});
-  auto expected_max = CreateTensor<float>({1}, {5});
-
-  ExpectTensorNear<float>(*expected_output, *output, 0.1, 0.01);
-}
-
-TEST_F(QuantizeTest, TestRequantizeWithMinMax) {
-  // Construct graph
-  OpsTestNet net;
-
-  // Add input data
-  net.AddInputFromArray<CPU, int>(
-      "Input", {1, 2, 3, 1},
-      {-1073741824, -536870912, 536870912, 1073741824, 1610612736, 2147483647});
-  net.AddInputFromArray<CPU, float>("InputMin", {1}, {-3});
-  net.AddInputFromArray<CPU, float>("InputMax", {1}, {5});
-  net.AddInputFromArray<CPU, float>("RerangeMin", {1}, {-3.01887});
-  net.AddInputFromArray<CPU, float>("RerangeMax", {1}, {5});
-
-  OpDefBuilder("Requantize", "RequantizeTest")
-      .Input("Input")
-      .Input("InputMin")
-      .Input("InputMax")
-      .Input("RerangeMin")
-      .Input("RerangeMax")
-      .Output("Output")
-      .Output("OutputMin")
-      .Output("OutputMax")
-      .OutputType({DT_UINT8, DT_FLOAT, DT_FLOAT})
-      .AddIntArg("T", DT_UINT8)
-      .Finalize(net.NewOperatorDef());
-
-  // Run
  net.RunOp();

  auto output = net.GetTensor("Output");
-  auto expected_output =
-      CreateTensor<uint8_t>({1, 2, 3, 1}, {32, 64, 128, 160, 191, 223});
-  auto expected_min = CreateTensor<float>({1}, {-3.01887});
-  auto expected_max = CreateTensor<float>({1}, {5});

-  ExpectTensorNear<uint8_t>(*expected_output, *output);
+  ExpectTensorNear<float>(*net.GetTensor("Input"),
+                          *net.GetTensor("Output"),
+                          0.1);
 }

-TEST_F(QuantizeTest, TestRequantizeWithoutMinMax) {
-  // Construct graph
-  OpsTestNet net;
-
-  // Add input data
-  net.AddInputFromArray<CPU, int>(
-      "Input", {1, 2, 3, 1},
-      {-1073741824, -536870912, 536870912, 1073741824, 1610612736, 2147483647});
-  net.AddInputFromArray<CPU, float>("InputMin", {1}, {-3});
-  net.AddInputFromArray<CPU, float>("InputMax", {1}, {5});
+}  // namespace

-  OpDefBuilder("Requantize", "RequantizeTest")
-      .Input("Input")
-      .Input("InputMin")
-      .Input("InputMax")
-      .Output("Output")
-      .Output("OutputMin")
-      .Output("OutputMax")
-      .OutputType({DT_UINT8, DT_FLOAT, DT_FLOAT})
-      .AddIntArg("T", DT_UINT8)
-      .Finalize(net.NewOperatorDef());
-
-  // Run
-  net.RunOp();
+class QuantizeTest : public OpsTestBase {};

-  auto output = net.GetTensor("Output");
-  auto expected_output =
-      CreateTensor<uint8_t>({1, 2, 3, 1}, {0, 43, 128, 170, 213, 255});
-  auto expected_min = CreateTensor<float>({1}, {-3.01887});
-  auto expected_max = CreateTensor<float>({1}, {5});
-  ExpectTensorNear<uint8_t>(*expected_output, *output);
+TEST_F(QuantizeTest, TestQuantize) {
+  TestQuantizeDequantize({-2, -1, 0, 1, 2, 3, 4}, false);
+  TestQuantizeDequantize({-2, -1, 0, 1, 2, 3, 4}, true);
+  TestQuantizeDequantize({0, 1, 2, 3, 4}, false);
+  TestQuantizeDequantize({0, 1, 2, 3, 4}, true);
+  TestQuantizeDequantize({2, 3, 4, 5, 6}, false);
+  TestQuantizeDequantize({2, 3, 4, 5, 6}, true);
+  TestQuantizeDequantize({2, 4, 6, 8}, false);
+  TestQuantizeDequantize({2, 4, 6, 8}, true);
+  TestQuantizeDequantize({-2, -4, -6, -8}, false);
+  TestQuantizeDequantize({-2, -4, -6, -8}, true);
 }

 }  // namespace test