Refine: implement naive quantize and dequantize kernel

1e2f7619 · hjchen2 · a5360331 · 1e2f7619 · 1e2f7619 · 1e2f7619
9 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,8 +7,8 @@ option(DEBUGING "enable debug mode" ON)
 option(USE_EXCEPTION "use std exception" OFF)
 option(LOG_PROFILE "log profile" OFF)
 # select the platform to build
-option(X86 "x86" OFF)
+option(X86 "x86" ON)
-option(CPU "armv7 with neon" ON)
+option(CPU "armv7 with neon" OFF)
 option(MALI_GPU "mali gpu" OFF)
 option(FPGA "fpga" OFF)

--- a/src/common/types.cpp
+++ b/src/common/types.cpp
@@ -64,6 +64,9 @@ const char *G_OP_TYPE_BILINEAR_INTERP = "bilinear_interp";
 const char *G_OP_TYPE_FLATTEN = "flatten";
 const char *G_OP_TYPE_SHAPE = "shape";
+const char *G_OP_TYPE_QUANTIZE = "quantize";
+const char *G_OP_TYPE_DEQUANTIZE = "dequantize";
 std::unordered_map<
    std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
    op_input_output_key = {
@@ -112,6 +115,9 @@ std::unordered_map<
        {G_OP_TYPE_BILINEAR_INTERP, {{"OutSize", "X"}, {"Out"}}},
        {G_OP_TYPE_FLATTEN, {{"X"}, {"Out"}}},
        {G_OP_TYPE_SHAPE, {{"Input"}, {"Out"}}},
-        {G_OP_TYPE_CONV_TRANSPOSE, {{"Input"}, {"Output"}}}};
+        {G_OP_TYPE_CONV_TRANSPOSE, {{"Input"}, {"Output"}}},
+        {G_OP_TYPE_QUANTIZE, {{"X"}, {"Out", "OutScale"}}},
+        {G_OP_TYPE_DEQUANTIZE, {{"X", "Scale"}, {"Out"}}}
+    };
 }  // namespace paddle_mobile
--- a/src/common/types.h
+++ b/src/common/types.h
@@ -82,8 +82,9 @@ enum PMStatus {
 enum RoundType {
  ROUND_UNK = 0,
-  ROUND_NEAREST_TOWARDS_ZERO = 1,
+  ROUND_NEAREST_AWAY_ZERO = 1,
-  ROUND_NEAREST_TO_EVEN = 2
+  ROUND_NEAREST_TOWARDS_ZERO = 2,
+  ROUND_NEAREST_TO_EVEN = 3
 };
 extern const char *G_OP_TYPE_CONV;
@@ -127,6 +128,9 @@ extern const char *G_OP_TYPE_FUSION_CONV_BN;
 extern const char *G_OP_TYPE_CONV_TRANSPOSE;
 extern const char *G_OP_TYPE_PRELU;
+extern const char *G_OP_TYPE_QUANTIZE;
+extern const char *G_OP_TYPE_DEQUANTIZE;
 extern std::unordered_map<
    std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
    op_input_output_key;

--- a/src/operators/kernel/arm/dequantize_kernel.cpp
+++ b/src/operators/kernel/arm/dequantize_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef PADDLE_MOBILE_CPU
 #include "operators/kernel/dequantize_kernel.h"
 namespace paddle_mobile {
@@ -26,7 +28,18 @@ template<>
 void DequantizeKernel<CPU, float>::Compute(
    const DequantizeParam<CPU> &param) const {
  // TODO
+  const Tensor *input = param.input_;
+  Tensor *output = param.out_; 
+  float activation_scale = param.activation_scale_->data<float>()[0];
+  float weight_scale = param.weight_scale_;
+  const int32_t *x = input->data<const int32_t>();
+  float *y = output->mutable_data<float>();
+  for (size_t i = 0; i < output->numel(); ++i) {
+    y[i] = x[i] / activation_scale / weight_scale;
+  }
 }
 }  // namespace paddle_mobile
 }  // namespace operators
+#endif
--- a/src/operators/kernel/arm/quantize_kernel.cpp
+++ b/src/operators/kernel/arm/quantize_kernel.cpp
@@ -12,11 +12,67 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef PADDLE_MOBILE_CPU
 #include "operators/kernel/quantize_kernel.h"
+#include <cmath>
+#include <limits>
 namespace paddle_mobile {
 namespace operators {
+static float find_abs_max(const Tensor *input) {
+  float max_abs = float(0);
+  const float *x = input->data<const float>();
+  for (size_t i = 0; i < input->numel(); ++i) {
+    float value = std::abs(x[i]);
+    if (value > max_abs) {
+      max_abs = value;
+    }
+  }
+  return max_abs;
+}
+static void quantize_round_to_even(const Tensor *input,
+                            const float scale,
+                            Tensor *output) {
+  const float *x = input->data<const float>();
+  int8_t *y = output->data<int8_t>();
+  for (size_t i = 0; i < input->numel(); ++i) {
+    float value = x[i] * scale;
+    long long quant = llround(value);
+    if (abs(abs(round(value) - value) - 0.5) > 0) {
+      y[i] = quant;
+    } else {
+      if (abs(quant) % 2 == 0) {
+        y[i] = quant;
+      } else {
+        y[i] = quant + (quant > 0) ? -1 : 1;
+      }
+    }
+  }
+}
+static void quantize_round_to_zero(const Tensor *input,
+                            const float scale,
+                            Tensor *output) {
+  const float *x = input->data<const float>();
+  int8_t *y = output->data<int8_t>();
+  for (size_t i = 0; i < input->numel(); ++i) {
+    y[i] = trunc(x[i] * scale);
+  }
+}
+static void quantize_round_to_nearest(const Tensor *input,
+                               const float scale,
+                               Tensor *output) {
+  const float *x = input->data<const float>();
+  int8_t *y = output->data<int8_t>();
+  for (size_t i = 0; i < input->numel(); ++i) {
+    y[i] = round(x[i] * scale);
+  }
+}
 template<>
 bool QuantizeKernel<CPU, float>::Init(QuantizeParam<CPU> *param) {
  return true;
@@ -26,7 +82,37 @@ template<>
 void QuantizeKernel<CPU, float>::Compute(
    const QuantizeParam<CPU> &param) const {
  // TODO
+  float max_abs = 0.f;
+  const Tensor *input = param.input_;
+  Tensor *output = param.out_;
+  Tensor *output_scale = param.online_scale_;
+  if (param.is_static_) {
+    max_abs = param.static_scale_;
+  } else {
+    max_abs = find_abs_max(input);
+  }
+  if (max_abs < std::numeric_limits<float>::min()) {
+    max_abs = std::numeric_limits<float>::min();
+  }
+  // only support int8 currently
+  float online_scale = 127 / max_abs;
+  param.online_scale_->mutable_data<float>()[0] = online_scale;
+  switch (param.round_type_) {
+    case ROUND_NEAREST_TO_EVEN:
+      quantize_round_to_even(input, online_scale, output);
+      break;
+    case ROUND_NEAREST_TOWARDS_ZERO:
+      quantize_round_to_zero(input, online_scale, output);
+      break;
+    case ROUND_NEAREST_AWAY_ZERO:
+      quantize_round_to_nearest(input, online_scale, output);
+    default:
+      LOG(kLOG_ERROR) << "round type is not supported.";
+      break;
+  }
 }
 }  // namespace paddle_mobile
 }  // namespace operators
+#endif
--- a/src/operators/kernel/x86/dequantize_kernel.cpp
+++ b/src/operators/kernel/x86/dequantize_kernel.cpp
@@ -26,6 +26,15 @@ template<>
 void DequantizeKernel<X86, float>::Compute(
    const DequantizeParam<X86> &param) const {
  // TODO
+  const Tensor *input = param.input_;
+  Tensor *output = param.out_; 
+  float activation_scale = param.activation_scale_->data<float>()[0];
+  float weight_scale = param.weight_scale_;
+  const int32_t *x = input->data<const int32_t>();
+  float *y = output->mutable_data<float>();
+  for (size_t i = 0; i < output->numel(); ++i) {
+    y[i] = x[i] / activation_scale / weight_scale;
+  }
 }
 }  // namespace paddle_mobile

--- a/src/operators/kernel/x86/quantize_kernel.cpp
+++ b/src/operators/kernel/x86/quantize_kernel.cpp
@@ -12,11 +12,67 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef PADDLE_MOBILE_X86
 #include "operators/kernel/quantize_kernel.h"
+#include <cmath>
+#include <limits>
 namespace paddle_mobile {
 namespace operators {
+static float find_abs_max(const Tensor *input) {
+  float max_abs = float(0);
+  const float *x = input->data<const float>();
+  for (size_t i = 0; i < input->numel(); ++i) {
+    float value = std::abs(x[i]);
+    if (value > max_abs) {
+      max_abs = value;
+    }
+  }
+  return max_abs;
+}
+static void quantize_round_to_even(const Tensor *input,
+                            const float scale,
+                            Tensor *output) {
+  const float *x = input->data<const float>();
+  int8_t *y = output->data<int8_t>();
+  for (size_t i = 0; i < input->numel(); ++i) {
+    float value = x[i] * scale;
+    long long quant = llround(value);
+    if (abs(abs(round(value) - value) - 0.5) > 0) {
+      y[i] = quant;
+    } else {
+      if (abs(quant) % 2 == 0) {
+        y[i] = quant;
+      } else {
+        y[i] = quant + (quant > 0) ? -1 : 1;
+      }
+    }
+  }
+}
+static void quantize_round_to_zero(const Tensor *input,
+                            const float scale,
+                            Tensor *output) {
+  const float *x = input->data<const float>();
+  int8_t *y = output->data<int8_t>();
+  for (size_t i = 0; i < input->numel(); ++i) {
+    y[i] = trunc(x[i] * scale);
+  }
+}
+static void quantize_round_to_nearest(const Tensor *input,
+                               const float scale,
+                               Tensor *output) {
+  const float *x = input->data<const float>();
+  int8_t *y = output->data<int8_t>();
+  for (size_t i = 0; i < input->numel(); ++i) {
+    y[i] = round(x[i] * scale);
+  }
+}
 template<>
 bool QuantizeKernel<X86, float>::Init(QuantizeParam<X86> *param) {
  return true;
@@ -26,7 +82,37 @@ template<>
 void QuantizeKernel<X86, float>::Compute(
    const QuantizeParam<X86> &param) const {
  // TODO
+  float max_abs = 0.f;
+  const Tensor *input = param.input_;
+  Tensor *output = param.out_;
+  Tensor *output_scale = param.online_scale_;
+  if (param.is_static_) {
+    max_abs = param.static_scale_;
+  } else {
+    max_abs = find_abs_max(input);
+  }
+  if (max_abs < std::numeric_limits<float>::min()) {
+    max_abs = std::numeric_limits<float>::min();
+  }
+  // only support int8 currently
+  float online_scale = 127 / max_abs;
+  param.online_scale_->mutable_data<float>()[0] = online_scale;
+  switch (param.round_type_) {
+    case ROUND_NEAREST_TO_EVEN:
+      quantize_round_to_even(input, online_scale, output);
+      break;
+    case ROUND_NEAREST_TOWARDS_ZERO:
+      quantize_round_to_zero(input, online_scale, output);
+      break;
+    case ROUND_NEAREST_AWAY_ZERO:
+      quantize_round_to_nearest(input, online_scale, output);
+    default:
+      LOG(kLOG_ERROR) << "round type is not supported.";
+      break;
+  }
 }
 }  // namespace paddle_mobile
 }  // namespace operators
+#endif
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -2349,12 +2349,6 @@ class QuantizeParam : public OpParam {
    // online
    // scale = max(abs(x))
    online_scale_ = GetVarValue<GType>("OutScale", outputs, scope);
-    if (HasAttr("is_signed", attrs)) {
-      is_signed_ = GetAttr<bool>("signed", attrs);
-    }
-    if (HasAttr("mantissa", attrs)) {
-      mantissa_bits_ = GetAttr<bool>("mantissa", attrs);
-    }
    // offline
    if (HasAttr("static_scale", attrs)) {
      static_scale_ = GetAttr<float>("static_scale", attrs);
@@ -2372,11 +2366,6 @@ class QuantizeParam : public OpParam {
  RType *out_;
  //
  RType *online_scale_;
-  // signed quantize or unsigned quantize
-  bool is_signed_ = true;
-  // mantissa bit width
-  // for int8, mantissa bits is 7
-  int mantissa_bits_ = 7;
  // if static scale or not
  bool is_static_ = false;
  // quantize scale

--- a/test/net/test_googlenet.cpp
+++ b/test/net/test_googlenet.cpp
@@ -17,8 +17,11 @@ limitations under the License. */
 #include "../test_include.h"
 int main() {
+#if defined(PADDLE_MOBILE_CPU)
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+#elif defined(PADDLE_MOBILE_X86)
  paddle_mobile::PaddleMobile<paddle_mobile::X86> paddle_mobile;
+#endif
  paddle_mobile.SetThreadNum(4);
  bool optimize = true;
  auto time1 = time();