Add quantizer

d3aef22d · Yin Li · 3738481f · d3aef22d · d3aef22d · d3aef22d
11 changed file
--- a/mace/core/BUILD
+++ b/mace/core/BUILD
@@ -47,24 +47,33 @@ cc_library(
    srcs = glob(
        ["*.cc",],
        exclude=[
-            "logging.cc"
+            "logging.cc",
+            "opencl_allocator.cc",
+        ]) + if_android([
+            "opencl_allocator.cc",
        ]),
    hdrs = glob(
        ["*.h"],
        exclude=[
-            "logging.h"
+            "logging.h",
+            "opencl_allocator.h",
+        ]) + if_android([
+            "opencl_allocator.h",
        ]),
-    copts = ["-std=c++11"],
+    copts = ["-std=c++11"] + if_android([
+		"-D__USE_OPENCL",
+    ]),
    linkopts = if_android([
        "-pie",
    ]),
    deps = [
        ":logging",
-        ":opencl_runtime",
        "//mace/proto:cc_proto",
        "//mace/proto:stats_proto",
        "//mace/utils",
-    ],
+    ] + if_android([
+        ":opencl_runtime",
+    ]),
 )

 # Main program for tests

--- a/mace/core/allocator.cc
+++ b/mace/core/allocator.cc
@@ -3,7 +3,9 @@
 //

 #include "mace/core/allocator.h"
+#ifdef __USE_OPENCL
 #include "mace/core/opencl_allocator.h"
+#endif

 namespace mace {

@@ -23,6 +25,8 @@ Allocator *GetDeviceAllocator(DeviceType type) {

 MACE_REGISTER_ALLOCATOR(DeviceType::CPU, new CPUAllocator());
 MACE_REGISTER_ALLOCATOR(DeviceType::NEON, new CPUAllocator());
+#ifdef __USE_OPENCL
 MACE_REGISTER_ALLOCATOR(DeviceType::OPENCL, new OpenCLAllocator());
+#endif

 }  // namespace mace
--- a/mace/core/net.cc
+++ b/mace/core/net.cc
@@ -4,7 +4,9 @@

 #include "mace/core/net.h"
 #include "mace/utils/utils.h"
+#ifdef __USE_OPENCL
 #include "mace/core/runtime/opencl/opencl_runtime.h"
+#endif

 namespace mace {


--- a/mace/dsp/BUILD
+++ b/mace/dsp/BUILD
@@ -60,5 +60,6 @@ cc_test(
    deps = [
        "@gtest//:gtest_main",
        ":dsp",
+        "//mace/kernels:kernels",
    ],
 )
--- a/mace/dsp/test/quantized_add_test.cc
+++ b/mace/dsp/test/quantized_add_test.cc
@@ -175,11 +175,11 @@ TEST(QuantizedAddTest, QuantizedAdd) {
  VLOG(0) << wrapper.ExecuteGraph(input_tensor, &output_tensor);
  wrapper.PrintLog();

-  // -120.0~176.47, [17, 146,232]
+  // -120.0~176.47, [17, 146, 229]
+  vector<uint8_t> expected {17, 146, 229};
  for (int i = 0; i < output_tensor.size(); ++i) {
-    std::cout << (int32_t) output_data[i] << " ";
+    EXPECT_EQ(expected[i], output_data[i]);
  }
-  std::cout << std::endl;

  VLOG(0) << wrapper.TeardownGraph();
  wrapper.Finalize();

--- a/mace/dsp/test/quantized_relu_test.cc
+++ b/mace/dsp/test/quantized_relu_test.cc
@@ -121,10 +121,10 @@ TEST(QuantizedReluTest, QuantizedRelu) {
  VLOG(0) << wrapper.ExecuteGraph(input_tensor, &output_tensor);
  wrapper.PrintLog();

+  vector<uint8_t> expected {128, 128, 128, 192, 255};
  for (int i = 0; i < output_tensor.size(); ++i) {
-    std::cout << (int32_t) output_data[i] << " ";
+    EXPECT_EQ(expected[i], output_data[i]);
  }
-  std::cout << std::endl;

  VLOG(0) << wrapper.TeardownGraph();
  wrapper.Finalize();

--- a/mace/dsp/util/BUILD
+++ b/mace/dsp/util/BUILD
+# Description:
+# Mace dsp util.
+#
+
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+load("//mace:mace.bzl", "if_android")
+
+cc_library(
+    name = "util",
+    srcs = glob([
+        "*.cc",
+    ], exclude = [
+        "*_test.cc",
+    ]),
+    hdrs = glob([
+        "*.h",
+    ]),
+    copts = ["-std=c++11"],
+    deps = [
+        "//mace/core:core",
+    ],
+)
+
+cc_test(
+    name = "util_test",
+    testonly = 1,
+    srcs = glob(["*_test.cc"]),
+    copts = ["-std=c++11"],
+    linkopts = if_android([
+        "-ldl",
+        "-lm",
+    ]),
+    linkstatic = 1,
+    deps = [
+        "@gtest//:gtest_main",
+        ":util",
+    ],
+)
--- a/mace/dsp/util/quantize.cc
+++ b/mace/dsp/util/quantize.cc
+//
+// Copyright (c) 2017 XiaoMi All rights reserved.
+//
+
+#include "mace/dsp/util/quantize.h"
+
+namespace mace {
+
+void Quantizer::Quantize(const Tensor &in_tensor,
+                         const float min_in,
+                         const float max_in,
+                         Tensor *out_tensor,
+                         float *min_out,
+                         float *max_out) {
+  float stepsize;
+  float recip_stepsize;
+  QuantizeAdjustRange(min_in, max_in,
+                      min_out, max_out,
+                      &stepsize, &recip_stepsize);
+
+  const float *in = in_tensor.data<float>();
+  uint8_t *out = out_tensor->mutable_data<uint8_t>();
+
+  for (int i = 0; i < in_tensor.size(); i++) {
+    const float inval = in[i];
+    float ival = static_cast<uint8_t>((inval - *min_out) * recip_stepsize + 0.5f);
+    if (ival < 0) ival = 0;
+    if (ival > 255) ival = 255;
+    out[i] = static_cast<uint8_t>(ival);
+  }
+}
+
+void Quantizer::QuantizeAdjustRange(float min_in,
+                                    float max_in,
+                                    float *min_out,
+                                    float *max_out,
+                                    float *stepsize_out,
+                                    float *recip_stepsize_out) {
+  float minval = std::min(0.0f, min_in);
+  float maxval = std::max(0.0f, max_in);
+  float range = fmaxf(0.0001f, maxval - minval);
+  float stepsize = range / 254.0f;
+  float recip_stepsize = 254.0f / range;
+  // round quantized_zero up so min_out <= minval
+  int quantized_zero = ((0.0f - minval) * recip_stepsize) + 0.999;
+  float newmin = -quantized_zero * stepsize;
+  float newmax = 255.0f * stepsize + newmin;
+  *min_out = newmin;
+  *max_out = newmax;
+  *stepsize_out = stepsize;
+  *recip_stepsize_out = recip_stepsize;
+}
+
+void Quantizer::DeQuantize(const Tensor &in_tensor,
+                           const float min_in,
+                           const float max_in,
+                           Tensor *out_tensor) {
+  float range = std::max(0.0001f, max_in - min_in);
+  float stepsize = range / 255.0f;
+
+  const uint8_t *in = in_tensor.data<uint8_t>();
+  float *out = out_tensor->mutable_data<float>();
+
+  for (int i = 0; i < in_tensor.size(); i++) {
+    out[i] = (in[i] * stepsize) + min_in;
+  }
+}
+
+} // namespace mace
\ No newline at end of file
--- a/mace/dsp/util/quantize.h
+++ b/mace/dsp/util/quantize.h
+//
+// Copyright (c) 2017 XiaoMi All rights reserved.
+//
+
+#ifndef MACE_DSP_UTIL_QUANTIZE_H_
+#define MACE_DSP_UTIL_QUANTIZE_H_
+
+#include "mace/core/common.h"
+#include "mace/core/tensor.h"
+
+namespace mace {
+
+class Quantizer {
+ public:
+  Quantizer() {}
+  ~Quantizer() {}
+
+  void Quantize(const Tensor &in_tensor,
+                const float min_in, const float max_in,
+                Tensor *out_tensor,
+                float *min_out, float *max_out);
+  void DeQuantize(const Tensor &in_tensor,
+                  const float min_in, const float max_in,
+                  Tensor *out_tensor);
+
+ private:
+  void QuantizeAdjustRange(float min_in,
+                           float max_in,
+                           float *min_out,
+                           float *max_out,
+                           float *stepsize,
+                           float *recip_stepsize);
+};
+
+} // mace
+
+#endif // MACE_DSP_UTIL_QUANTIZE_H_
--- a/mace/dsp/util/quantize_test.cc
+++ b/mace/dsp/util/quantize_test.cc
+//
+// Copyright (c) 2017 XiaoMi All rights reserved.
+//
+
+#include "mace/dsp/util/quantize.h"
+#include "gtest/gtest.h"
+
+using namespace mace;
+
+TEST(QuantizeTest, QuantizeAndDequantize) {
+  testing::internal::LogToStderr();
+
+  Quantizer quantizer;
+  Allocator *allocator = GetDeviceAllocator(DeviceType::CPU);
+
+  Tensor in_tensor(allocator, DataType::DT_FLOAT);
+  vector<index_t> shape {5};
+  in_tensor.Resize(shape);
+  float *in_data = in_tensor.mutable_data<float>();
+  in_data[0] = -50.0;
+  in_data[1] = -10.0;
+  in_data[2] = 20.0;
+  in_data[3] = 80.0;
+  in_data[4] = 100.0;
+
+  Tensor quantized_tensor(allocator, DataType::DT_UINT8);
+  quantized_tensor.Resize(shape);
+  uint8_t *quantized_data = quantized_tensor.mutable_data<uint8_t>();
+  float min_out, max_out;
+  quantizer.Quantize(in_tensor, -50.0, 100.0, &quantized_tensor, &min_out, &max_out);
+  vector<uint8_t> expected_quantize_data {0, 68, 119, 220, 254};
+  for (int i = 0; i < quantized_tensor.size(); ++i) {
+    EXPECT_EQ(expected_quantize_data[i], quantized_data[i]);
+  }
+
+  Tensor dequantized_tensor(allocator, DataType::DT_FLOAT);
+  dequantized_tensor.Resize(shape);
+  float *dequantized_data = dequantized_tensor.mutable_data<float>();
+  quantizer.DeQuantize(quantized_tensor, min_out, max_out, &dequantized_tensor);
+
+  for (int i = 0; i < dequantized_tensor.size(); ++i) {
+    EXPECT_NEAR(in_data[i], dequantized_data[i], 1);
+  }
+}
+
--- a/mace/kernels/BUILD
+++ b/mace/kernels/BUILD
@@ -8,11 +8,12 @@ package(
 licenses(["notice"])  # Apache 2.0

 load("//mace:mace.bzl", "if_android")
+load("//mace:mace.bzl", "if_android_arm64")

 cc_library(
    name = "kernels",
-    srcs = glob(["*.cc"]) + if_android(glob(["neon/*.cc", "opencl/*.cc"])),
-    hdrs = glob(["*.h"]) + if_android(glob(["neon/*.h", "opencl/*.h"])),
+    srcs = glob(["*.cc"]) + if_android(glob(["opencl/*.cc"])) + if_android_arm64(glob(["neon/*.cc"])),
+    hdrs = glob(["*.h"]) + if_android(glob(["opencl/*.cc"])) + if_android_arm64(glob(["neon/*.cc"])),
    copts = [
        "-std=c++11",
        "-fopenmp",