diff --git a/mace/core/BUILD b/mace/core/BUILD index 5adf9010ec01e7d69292a92de492eaaf64f4654c..77a3e0dd8ae3c9a21aa0ebc973867cba07910cfd 100644 --- a/mace/core/BUILD +++ b/mace/core/BUILD @@ -47,24 +47,33 @@ cc_library( srcs = glob( ["*.cc",], exclude=[ - "logging.cc" + "logging.cc", + "opencl_allocator.cc", + ]) + if_android([ + "opencl_allocator.cc", ]), hdrs = glob( ["*.h"], exclude=[ - "logging.h" + "logging.h", + "opencl_allocator.h", + ]) + if_android([ + "opencl_allocator.h", ]), - copts = ["-std=c++11"], + copts = ["-std=c++11"] + if_android([ + "-D__USE_OPENCL", + ]), linkopts = if_android([ "-pie", ]), deps = [ ":logging", - ":opencl_runtime", "//mace/proto:cc_proto", "//mace/proto:stats_proto", "//mace/utils", - ], + ] + if_android([ + ":opencl_runtime", + ]), ) # Main program for tests diff --git a/mace/core/allocator.cc b/mace/core/allocator.cc index 707ea4cb0e0a3dd267e229b6a7f52e39d42e9773..84bdeb86fd87f66ef5caee92cc959f84bd19a197 100644 --- a/mace/core/allocator.cc +++ b/mace/core/allocator.cc @@ -3,7 +3,9 @@ // #include "mace/core/allocator.h" +#ifdef __USE_OPENCL #include "mace/core/opencl_allocator.h" +#endif namespace mace { @@ -23,6 +25,8 @@ Allocator *GetDeviceAllocator(DeviceType type) { MACE_REGISTER_ALLOCATOR(DeviceType::CPU, new CPUAllocator()); MACE_REGISTER_ALLOCATOR(DeviceType::NEON, new CPUAllocator()); +#ifdef __USE_OPENCL MACE_REGISTER_ALLOCATOR(DeviceType::OPENCL, new OpenCLAllocator()); +#endif } // namespace mace diff --git a/mace/core/net.cc b/mace/core/net.cc index f93089a18a2e18cc0c147b8df1e94fe79538d17c..bd3d45c20a631571354122ee64d84cc73ca686f9 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -4,7 +4,9 @@ #include "mace/core/net.h" #include "mace/utils/utils.h" +#ifdef __USE_OPENCL #include "mace/core/runtime/opencl/opencl_runtime.h" +#endif namespace mace { diff --git a/mace/dsp/BUILD b/mace/dsp/BUILD index 40e81b05e5d6ad15837c36690e4bb69f5fe91c44..814d18744d47da77e28a51c9676acb260d47c23c 100644 --- a/mace/dsp/BUILD +++ b/mace/dsp/BUILD @@ -60,5 +60,6 @@ cc_test( deps = [ "@gtest//:gtest_main", ":dsp", + "//mace/kernels:kernels", ], ) diff --git a/mace/dsp/test/quantized_add_test.cc b/mace/dsp/test/quantized_add_test.cc index 3d89f45bfa3f8facf9979ac0f61a11c27c44131a..f30d8424f68c1613064f3c7531b9685a41a0f215 100644 --- a/mace/dsp/test/quantized_add_test.cc +++ b/mace/dsp/test/quantized_add_test.cc @@ -175,11 +175,11 @@ TEST(QuantizedAddTest, QuantizedAdd) { VLOG(0) << wrapper.ExecuteGraph(input_tensor, &output_tensor); wrapper.PrintLog(); - // -120.0~176.47, [17, 146,232] + // -120.0~176.47, [17, 146, 229] + vector expected {17, 146, 229}; for (int i = 0; i < output_tensor.size(); ++i) { - std::cout << (int32_t) output_data[i] << " "; + EXPECT_EQ(expected[i], output_data[i]); } - std::cout << std::endl; VLOG(0) << wrapper.TeardownGraph(); wrapper.Finalize(); diff --git a/mace/dsp/test/quantized_relu_test.cc b/mace/dsp/test/quantized_relu_test.cc index c3883d8d3d7fa21e697d571fff584b932f59ae20..685be71d9b51ab3a833579654da568552d310a0f 100644 --- a/mace/dsp/test/quantized_relu_test.cc +++ b/mace/dsp/test/quantized_relu_test.cc @@ -121,10 +121,10 @@ TEST(QuantizedReluTest, QuantizedRelu) { VLOG(0) << wrapper.ExecuteGraph(input_tensor, &output_tensor); wrapper.PrintLog(); + vector expected {128, 128, 128, 192, 255}; for (int i = 0; i < output_tensor.size(); ++i) { - std::cout << (int32_t) output_data[i] << " "; + EXPECT_EQ(expected[i], output_data[i]); } - std::cout << std::endl; VLOG(0) << wrapper.TeardownGraph(); wrapper.Finalize(); diff --git a/mace/dsp/util/BUILD b/mace/dsp/util/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..4a75e104fccca2214cd0ffbf014a8c224614d9f4 --- /dev/null +++ b/mace/dsp/util/BUILD @@ -0,0 +1,43 @@ +# Description: +# Mace dsp util. +# + +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # Apache 2.0 + +load("//mace:mace.bzl", "if_android") + +cc_library( + name = "util", + srcs = glob([ + "*.cc", + ], exclude = [ + "*_test.cc", + ]), + hdrs = glob([ + "*.h", + ]), + copts = ["-std=c++11"], + deps = [ + "//mace/core:core", + ], +) + +cc_test( + name = "util_test", + testonly = 1, + srcs = glob(["*_test.cc"]), + copts = ["-std=c++11"], + linkopts = if_android([ + "-ldl", + "-lm", + ]), + linkstatic = 1, + deps = [ + "@gtest//:gtest_main", + ":util", + ], +) diff --git a/mace/dsp/util/quantize.cc b/mace/dsp/util/quantize.cc new file mode 100644 index 0000000000000000000000000000000000000000..42063b4f05f8bfac40d000cbd399c7131baa3d60 --- /dev/null +++ b/mace/dsp/util/quantize.cc @@ -0,0 +1,69 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#include "mace/dsp/util/quantize.h" + +namespace mace { + +void Quantizer::Quantize(const Tensor &in_tensor, + const float min_in, + const float max_in, + Tensor *out_tensor, + float *min_out, + float *max_out) { + float stepsize; + float recip_stepsize; + QuantizeAdjustRange(min_in, max_in, + min_out, max_out, + &stepsize, &recip_stepsize); + + const float *in = in_tensor.data(); + uint8_t *out = out_tensor->mutable_data(); + + for (int i = 0; i < in_tensor.size(); i++) { + const float inval = in[i]; + float ival = static_cast((inval - *min_out) * recip_stepsize + 0.5f); + if (ival < 0) ival = 0; + if (ival > 255) ival = 255; + out[i] = static_cast(ival); + } +} + +void Quantizer::QuantizeAdjustRange(float min_in, + float max_in, + float *min_out, + float *max_out, + float *stepsize_out, + float *recip_stepsize_out) { + float minval = std::min(0.0f, min_in); + float maxval = std::max(0.0f, max_in); + float range = fmaxf(0.0001f, maxval - minval); + float stepsize = range / 254.0f; + float recip_stepsize = 254.0f / range; + // round quantized_zero up so min_out <= minval + int quantized_zero = ((0.0f - minval) * recip_stepsize) + 0.999; + float newmin = -quantized_zero * stepsize; + float newmax = 255.0f * stepsize + newmin; + *min_out = newmin; + *max_out = newmax; + *stepsize_out = stepsize; + *recip_stepsize_out = recip_stepsize; +} + +void Quantizer::DeQuantize(const Tensor &in_tensor, + const float min_in, + const float max_in, + Tensor *out_tensor) { + float range = std::max(0.0001f, max_in - min_in); + float stepsize = range / 255.0f; + + const uint8_t *in = in_tensor.data(); + float *out = out_tensor->mutable_data(); + + for (int i = 0; i < in_tensor.size(); i++) { + out[i] = (in[i] * stepsize) + min_in; + } +} + +} // namespace mace \ No newline at end of file diff --git a/mace/dsp/util/quantize.h b/mace/dsp/util/quantize.h new file mode 100644 index 0000000000000000000000000000000000000000..316fdaed72b216a0cd009fe19ea84facc508fef4 --- /dev/null +++ b/mace/dsp/util/quantize.h @@ -0,0 +1,37 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#ifndef MACE_DSP_UTIL_QUANTIZE_H_ +#define MACE_DSP_UTIL_QUANTIZE_H_ + +#include "mace/core/common.h" +#include "mace/core/tensor.h" + +namespace mace { + +class Quantizer { + public: + Quantizer() {} + ~Quantizer() {} + + void Quantize(const Tensor &in_tensor, + const float min_in, const float max_in, + Tensor *out_tensor, + float *min_out, float *max_out); + void DeQuantize(const Tensor &in_tensor, + const float min_in, const float max_in, + Tensor *out_tensor); + + private: + void QuantizeAdjustRange(float min_in, + float max_in, + float *min_out, + float *max_out, + float *stepsize, + float *recip_stepsize); +}; + +} // mace + +#endif // MACE_DSP_UTIL_QUANTIZE_H_ diff --git a/mace/dsp/util/quantize_test.cc b/mace/dsp/util/quantize_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..aca1eadb98bd675b123967ca88638299a487ef74 --- /dev/null +++ b/mace/dsp/util/quantize_test.cc @@ -0,0 +1,45 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#include "mace/dsp/util/quantize.h" +#include "gtest/gtest.h" + +using namespace mace; + +TEST(QuantizeTest, QuantizeAndDequantize) { + testing::internal::LogToStderr(); + + Quantizer quantizer; + Allocator *allocator = GetDeviceAllocator(DeviceType::CPU); + + Tensor in_tensor(allocator, DataType::DT_FLOAT); + vector shape {5}; + in_tensor.Resize(shape); + float *in_data = in_tensor.mutable_data(); + in_data[0] = -50.0; + in_data[1] = -10.0; + in_data[2] = 20.0; + in_data[3] = 80.0; + in_data[4] = 100.0; + + Tensor quantized_tensor(allocator, DataType::DT_UINT8); + quantized_tensor.Resize(shape); + uint8_t *quantized_data = quantized_tensor.mutable_data(); + float min_out, max_out; + quantizer.Quantize(in_tensor, -50.0, 100.0, &quantized_tensor, &min_out, &max_out); + vector expected_quantize_data {0, 68, 119, 220, 254}; + for (int i = 0; i < quantized_tensor.size(); ++i) { + EXPECT_EQ(expected_quantize_data[i], quantized_data[i]); + } + + Tensor dequantized_tensor(allocator, DataType::DT_FLOAT); + dequantized_tensor.Resize(shape); + float *dequantized_data = dequantized_tensor.mutable_data(); + quantizer.DeQuantize(quantized_tensor, min_out, max_out, &dequantized_tensor); + + for (int i = 0; i < dequantized_tensor.size(); ++i) { + EXPECT_NEAR(in_data[i], dequantized_data[i], 1); + } +} + diff --git a/mace/kernels/BUILD b/mace/kernels/BUILD index 7a3f3007e31915e1652b7e8e72b0e16d58e99bcd..bd79ac2bbadc33beb45849300bcc82cda89a8723 100644 --- a/mace/kernels/BUILD +++ b/mace/kernels/BUILD @@ -8,11 +8,12 @@ package( licenses(["notice"]) # Apache 2.0 load("//mace:mace.bzl", "if_android") +load("//mace:mace.bzl", "if_android_arm64") cc_library( name = "kernels", - srcs = glob(["*.cc"]) + if_android(glob(["neon/*.cc", "opencl/*.cc"])), - hdrs = glob(["*.h"]) + if_android(glob(["neon/*.h", "opencl/*.h"])), + srcs = glob(["*.cc"]) + if_android(glob(["opencl/*.cc"])) + if_android_arm64(glob(["neon/*.cc"])), + hdrs = glob(["*.h"]) + if_android(glob(["opencl/*.cc"])) + if_android_arm64(glob(["neon/*.cc"])), copts = [ "-std=c++11", "-fopenmp",