提交 e35a077a 编写于 作者: Y yejianwu

separate quantize code when run without quantize

上级 e988e951
...@@ -59,3 +59,11 @@ config_setting( ...@@ -59,3 +59,11 @@ config_setting(
}, },
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
) )
config_setting(
name = "quantize_enabled",
define_values = {
"quantize": "true",
},
visibility = ["//visibility:public"],
)
...@@ -15,6 +15,7 @@ load( ...@@ -15,6 +15,7 @@ load(
"if_openmp_enabled", "if_openmp_enabled",
"if_neon_enabled", "if_neon_enabled",
"if_opencl_enabled", "if_opencl_enabled",
"if_quantize_enabled",
) )
cc_library( cc_library(
...@@ -51,6 +52,8 @@ cc_library( ...@@ -51,6 +52,8 @@ cc_library(
"-DMACE_ENABLE_OPENMP", "-DMACE_ENABLE_OPENMP",
]) + if_opencl_enabled([ ]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL", "-DMACE_ENABLE_OPENCL",
]) + if_quantize_enabled([
"-DMACE_ENABLE_QUANTIZE",
]) + if_hexagon_enabled([ ]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON", "-DMACE_ENABLE_HEXAGON",
]) + if_neon_enabled([ ]) + if_neon_enabled([
...@@ -64,11 +67,12 @@ cc_library( ...@@ -64,11 +67,12 @@ cc_library(
"//mace/codegen:generated_version", "//mace/codegen:generated_version",
"//mace/proto:mace_cc", "//mace/proto:mace_cc",
"//mace/utils", "//mace/utils",
"@gemmlowp",
] + if_opencl_enabled([ ] + if_opencl_enabled([
":opencl_headers", ":opencl_headers",
"//mace/codegen:generated_opencl", "//mace/codegen:generated_opencl",
"@half//:half", "@half//:half",
]) + if_quantize_enabled([
"@gemmlowp",
]) + if_hexagon_enabled([ ]) + if_hexagon_enabled([
"//third_party/nnlib:libhexagon", "//third_party/nnlib:libhexagon",
]), ]),
......
...@@ -176,11 +176,16 @@ MaceStatus SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads, ...@@ -176,11 +176,16 @@ MaceStatus SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads,
MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy( MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy(
int omp_num_threads_hint, int omp_num_threads_hint,
CPUAffinityPolicy policy, CPUAffinityPolicy policy,
gemmlowp::GemmContext *gemm_context) { void *gemm_context) {
if (policy == CPUAffinityPolicy::AFFINITY_NONE) { if (policy == CPUAffinityPolicy::AFFINITY_NONE) {
#ifdef MACE_ENABLE_QUANTIZE
if (gemm_context) { if (gemm_context) {
gemm_context->set_max_num_threads(std::max(0, omp_num_threads_hint)); static_cast<gemmlowp::GemmContext*>(gemm_context)->set_max_num_threads(
std::max(0, omp_num_threads_hint));
} }
#else
MACE_UNUSED(gemm_context);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENMP #ifdef MACE_ENABLE_OPENMP
if (omp_num_threads_hint > 0) { if (omp_num_threads_hint > 0) {
omp_set_num_threads(std::min(omp_num_threads_hint, omp_get_num_procs())); omp_set_num_threads(std::min(omp_num_threads_hint, omp_get_num_procs()));
...@@ -210,9 +215,12 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy( ...@@ -210,9 +215,12 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy(
omp_num_threads_hint = use_cpu_ids.size(); omp_num_threads_hint = use_cpu_ids.size();
} }
#ifdef MACE_ENABLE_QUANTIZE
if (gemm_context) { if (gemm_context) {
gemm_context->set_max_num_threads(omp_num_threads_hint); static_cast<gemmlowp::GemmContext*>(gemm_context)->set_max_num_threads(
omp_num_threads_hint);
} }
#endif // MACE_ENABLE_QUANTIZE
return SetOpenMPThreadsAndAffinityCPUs(omp_num_threads_hint, use_cpu_ids); return SetOpenMPThreadsAndAffinityCPUs(omp_num_threads_hint, use_cpu_ids);
} }
......
...@@ -18,7 +18,11 @@ ...@@ -18,7 +18,11 @@
#include <memory> #include <memory>
#include <vector> #include <vector>
#ifdef MACE_ENABLE_QUANTIZE
#include "public/gemmlowp.h" #include "public/gemmlowp.h"
#endif // MACE_ENABLE_QUANTIZE
#include "mace/core/macros.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
...@@ -34,22 +38,34 @@ class CPURuntime { ...@@ -34,22 +38,34 @@ class CPURuntime {
: num_threads_(num_threads), : num_threads_(num_threads),
policy_(policy), policy_(policy),
gemm_context_(nullptr) { gemm_context_(nullptr) {
#ifdef MACE_ENABLE_QUANTIZE
if (use_gemmlowp) { if (use_gemmlowp) {
MACE_CHECK_NOTNULL(GetGemmlowpContext()); MACE_CHECK_NOTNULL(GetGemmlowpContext());
} }
#else
MACE_UNUSED(use_gemmlowp);
#endif // MACE_ENABLE_QUANTIZE
SetOpenMPThreadsAndAffinityPolicy(num_threads_, SetOpenMPThreadsAndAffinityPolicy(num_threads_,
policy_, policy_,
gemm_context_.get()); gemm_context_);
}
#ifdef MACE_ENABLE_QUANTIZE
~CPURuntime() {
if (!gemm_context_) {
delete static_cast<gemmlowp::GemmContext*>(gemm_context_);
}
} }
~CPURuntime() = default;
gemmlowp::GemmContext *GetGemmlowpContext() { gemmlowp::GemmContext *GetGemmlowpContext() {
if (!gemm_context_) { if (!gemm_context_) {
gemm_context_.reset(new gemmlowp::GemmContext()); gemm_context_ = new gemmlowp::GemmContext();
} }
return gemm_context_.get(); return static_cast<gemmlowp::GemmContext*>(gemm_context_);
} }
#else
~CPURuntime() = default;
#endif // MACE_ENABLE_QUANTIZE
int num_threads() const { int num_threads() const {
return num_threads_; return num_threads_;
...@@ -67,11 +83,11 @@ class CPURuntime { ...@@ -67,11 +83,11 @@ class CPURuntime {
MaceStatus SetOpenMPThreadsAndAffinityPolicy( MaceStatus SetOpenMPThreadsAndAffinityPolicy(
int omp_num_threads_hint, int omp_num_threads_hint,
CPUAffinityPolicy policy, CPUAffinityPolicy policy,
gemmlowp::GemmContext *gemm_context); void *gemm_context);
int num_threads_; int num_threads_;
CPUAffinityPolicy policy_; CPUAffinityPolicy policy_;
std::unique_ptr<gemmlowp::GemmContext> gemm_context_; void *gemm_context_;
}; };
} // namespace mace } // namespace mace
......
...@@ -16,6 +16,7 @@ load( ...@@ -16,6 +16,7 @@ load(
"if_hexagon_enabled", "if_hexagon_enabled",
"if_opencl_enabled", "if_opencl_enabled",
"if_opencl_enabled_str", "if_opencl_enabled_str",
"if_quantize_enabled",
) )
cc_library( cc_library(
...@@ -34,6 +35,8 @@ cc_library( ...@@ -34,6 +35,8 @@ cc_library(
"-mfloat-abi=softfp", "-mfloat-abi=softfp",
]) + if_opencl_enabled([ ]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL", "-DMACE_ENABLE_OPENCL",
]) + if_quantize_enabled([
"-DMACE_ENABLE_QUANTIZE",
]) + if_hexagon_enabled([ ]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON", "-DMACE_ENABLE_HEXAGON",
]), ]),
......
...@@ -60,6 +60,12 @@ def if_opencl_enabled_str(a): ...@@ -60,6 +60,12 @@ def if_opencl_enabled_str(a):
"//conditions:default": "", "//conditions:default": "",
}) })
def if_quantize_enabled(a):
return select({
"//mace:quantize_enabled": a,
"//conditions:default": [],
})
def mace_version_genrule(): def mace_version_genrule():
native.genrule( native.genrule(
name = "mace_version_gen", name = "mace_version_gen",
......
...@@ -14,6 +14,7 @@ load( ...@@ -14,6 +14,7 @@ load(
"if_android_armv7", "if_android_armv7",
"if_hexagon_enabled", "if_hexagon_enabled",
"if_opencl_enabled", "if_opencl_enabled",
"if_quantize_enabled",
) )
cc_library( cc_library(
...@@ -33,6 +34,7 @@ cc_library( ...@@ -33,6 +34,7 @@ cc_library(
"buffer_transform.cc", "buffer_transform.cc",
"lstm_cell.cc", "lstm_cell.cc",
"winograd_transform.cc", "winograd_transform.cc",
"quantize.cc",
], ],
) + if_opencl_enabled(glob( ) + if_opencl_enabled(glob(
[ [
...@@ -47,6 +49,10 @@ cc_library( ...@@ -47,6 +49,10 @@ cc_library(
exclude = [ exclude = [
"opencl/*_test.cc", "opencl/*_test.cc",
], ],
)) + if_quantize_enabled(glob(
[
"quantize.cc",
],
)), )),
hdrs = glob( hdrs = glob(
[ [
...@@ -56,11 +62,16 @@ cc_library( ...@@ -56,11 +62,16 @@ cc_library(
exclude = [ exclude = [
"ops_registry.h", "ops_registry.h",
"ops_test_util.h", "ops_test_util.h",
"fixpoint.h",
"gemmlowp_util.h",
] ]
) + if_opencl_enabled(glob([ ) + if_opencl_enabled(glob([
"opencl/*.h", "opencl/*.h",
"opencl/image/*.h", "opencl/image/*.h",
"opencl/buffer/*.h", "opencl/buffer/*.h",
])) + if_quantize_enabled(glob([
"fixpoint.h",
"gemmlowp_util.h",
])), ])),
copts = [ copts = [
"-Werror", "-Werror",
...@@ -76,15 +87,18 @@ cc_library( ...@@ -76,15 +87,18 @@ cc_library(
"-mfloat-abi=softfp", "-mfloat-abi=softfp",
]) + if_opencl_enabled([ ]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL", "-DMACE_ENABLE_OPENCL",
]) + if_quantize_enabled([
"-DMACE_ENABLE_QUANTIZE",
]) + if_hexagon_enabled([ ]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON", "-DMACE_ENABLE_HEXAGON",
]), ]),
linkopts = if_android(["-lm"]), linkopts = if_android(["-lm"]),
deps = [ deps = [
"//mace/core", "//mace/core",
"@gemmlowp",
"@tflite", "@tflite",
], ] + if_quantize_enabled([
"@gemmlowp",
]),
) )
...@@ -110,6 +124,8 @@ cc_library( ...@@ -110,6 +124,8 @@ cc_library(
"-mfloat-abi=softfp", "-mfloat-abi=softfp",
]) + if_opencl_enabled([ ]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL", "-DMACE_ENABLE_OPENCL",
]) + if_quantize_enabled([
"-DMACE_ENABLE_QUANTIZE",
]) + if_hexagon_enabled([ ]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON", "-DMACE_ENABLE_HEXAGON",
]), ]),
...@@ -157,7 +173,15 @@ cc_test( ...@@ -157,7 +173,15 @@ cc_test(
"arm/*_test.cc", "arm/*_test.cc",
"opencl/*_test.cc", "opencl/*_test.cc",
], ],
), exclude = [
"fixpoint_test.cc"
],
) + if_quantize_enabled(glob(
[
"fixpoint_test.cc"
],
)),
copts = [ copts = [
"-Werror", "-Werror",
"-Wextra", "-Wextra",
...@@ -171,6 +195,8 @@ cc_test( ...@@ -171,6 +195,8 @@ cc_test(
"-mfloat-abi=softfp", "-mfloat-abi=softfp",
]) + if_opencl_enabled([ ]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL", "-DMACE_ENABLE_OPENCL",
]) + if_quantize_enabled([
"-DMACE_ENABLE_QUANTIZE",
]) + if_hexagon_enabled([ ]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON", "-DMACE_ENABLE_HEXAGON",
]), ]),
...@@ -199,6 +225,8 @@ cc_test( ...@@ -199,6 +225,8 @@ cc_test(
"-mfloat-abi=softfp", "-mfloat-abi=softfp",
]) + if_opencl_enabled([ ]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL", "-DMACE_ENABLE_OPENCL",
]) + if_quantize_enabled([
"-DMACE_ENABLE_QUANTIZE",
]) + if_hexagon_enabled([ ]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON", "-DMACE_ENABLE_HEXAGON",
]), ]),
...@@ -208,6 +236,5 @@ cc_test( ...@@ -208,6 +236,5 @@ cc_test(
"test", "test",
"//mace/core:test_benchmark_main", "//mace/core:test_benchmark_main",
"//third_party/eigen3", "//third_party/eigen3",
"@gemmlowp",
], ],
) )
...@@ -108,6 +108,7 @@ class ConcatOp<DeviceType::CPU, T> : public ConcatOpBase { ...@@ -108,6 +108,7 @@ class ConcatOp<DeviceType::CPU, T> : public ConcatOpBase {
} }
}; };
#ifdef MACE_ENABLE_QUANTIZE
template <> template <>
class ConcatOp<DeviceType::CPU, uint8_t> : public ConcatOpBase { class ConcatOp<DeviceType::CPU, uint8_t> : public ConcatOpBase {
public: public:
...@@ -177,6 +178,7 @@ class ConcatOp<DeviceType::CPU, uint8_t> : public ConcatOpBase { ...@@ -177,6 +178,7 @@ class ConcatOp<DeviceType::CPU, uint8_t> : public ConcatOpBase {
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
}; };
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
...@@ -209,8 +211,10 @@ void RegisterConcat(OpRegistryBase *op_registry) { ...@@ -209,8 +211,10 @@ void RegisterConcat(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "Concat", ConcatOp, MACE_REGISTER_OP(op_registry, "Concat", ConcatOp,
DeviceType::CPU, int32_t); DeviceType::CPU, int32_t);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "Concat", ConcatOp, MACE_REGISTER_OP(op_registry, "Concat", ConcatOp,
DeviceType::CPU, uint8_t); DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "Concat", ConcatOp, MACE_REGISTER_OP(op_registry, "Concat", ConcatOp,
......
...@@ -31,8 +31,12 @@ ...@@ -31,8 +31,12 @@
#include "mace/ops/arm/conv_winograd.h" #include "mace/ops/arm/conv_winograd.h"
#include "mace/ops/conv_pool_2d_base.h" #include "mace/ops/conv_pool_2d_base.h"
#include "mace/ops/conv_pool_2d_util.h" #include "mace/ops/conv_pool_2d_util.h"
#include "mace/ops/gemmlowp_util.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
#ifdef MACE_ENABLE_QUANTIZE
#include "mace/ops/gemmlowp_util.h"
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/conv_2d.h" #include "mace/ops/opencl/image/conv_2d.h"
#include "mace/ops/opencl/buffer/conv_2d.h" #include "mace/ops/opencl/buffer/conv_2d.h"
...@@ -707,6 +711,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase { ...@@ -707,6 +711,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
}; };
#ifdef MACE_ENABLE_QUANTIZE
template <> template <>
class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase { class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase {
public: public:
...@@ -943,6 +948,7 @@ class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase { ...@@ -943,6 +948,7 @@ class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase {
MACE_OP_INPUT_TAGS(INPUT, FILTER, BIAS); MACE_OP_INPUT_TAGS(INPUT, FILTER, BIAS);
MACE_OP_OUTPUT_TAGS(OUTPUT); MACE_OP_OUTPUT_TAGS(OUTPUT);
}; };
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
...@@ -987,8 +993,10 @@ void RegisterConv2D(OpRegistryBase *op_registry) { ...@@ -987,8 +993,10 @@ void RegisterConv2D(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "Conv2D", Conv2dOp, MACE_REGISTER_OP(op_registry, "Conv2D", Conv2dOp,
DeviceType::CPU, float); DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "Conv2D", Conv2dOp, MACE_REGISTER_OP(op_registry, "Conv2D", Conv2dOp,
DeviceType::CPU, uint8_t); DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "Conv2D", Conv2dOp, MACE_REGISTER_OP(op_registry, "Conv2D", Conv2dOp,
......
...@@ -20,9 +20,11 @@ ...@@ -20,9 +20,11 @@
#include <string> #include <string>
#include <vector> #include <vector>
#ifdef MACE_ENABLE_QUANTIZE
// We reuse TensorFlow Lite's optimized depthwiseconv_uint8 and parallelized it // We reuse TensorFlow Lite's optimized depthwiseconv_uint8 and parallelized it
// using OpenMP for MACE's quantized depthwise_conv2d. // using OpenMP for MACE's quantized depthwise_conv2d.
#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h" #include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h"
#endif // MACE_ENABLE_QUANTIZE
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/operator.h" #include "mace/core/operator.h"
...@@ -276,6 +278,7 @@ class DepthwiseConv2dOp<DeviceType::CPU, float> : public DepthwiseConv2dOpBase { ...@@ -276,6 +278,7 @@ class DepthwiseConv2dOp<DeviceType::CPU, float> : public DepthwiseConv2dOpBase {
MACE_OP_OUTPUT_TAGS(OUTPUT); MACE_OP_OUTPUT_TAGS(OUTPUT);
}; };
#ifdef MACE_ENABLE_QUANTIZE
template <> template <>
class DepthwiseConv2dOp<DeviceType::CPU, uint8_t> class DepthwiseConv2dOp<DeviceType::CPU, uint8_t>
: public DepthwiseConv2dOpBase { : public DepthwiseConv2dOpBase {
...@@ -479,6 +482,7 @@ class DepthwiseConv2dOp<DeviceType::CPU, uint8_t> ...@@ -479,6 +482,7 @@ class DepthwiseConv2dOp<DeviceType::CPU, uint8_t>
MACE_OP_INPUT_TAGS(INPUT, FILTER, BIAS); MACE_OP_INPUT_TAGS(INPUT, FILTER, BIAS);
MACE_OP_OUTPUT_TAGS(OUTPUT); MACE_OP_OUTPUT_TAGS(OUTPUT);
}; };
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
...@@ -520,8 +524,10 @@ void RegisterDepthwiseConv2d(OpRegistryBase *op_registry) { ...@@ -520,8 +524,10 @@ void RegisterDepthwiseConv2d(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "DepthwiseConv2d", MACE_REGISTER_OP(op_registry, "DepthwiseConv2d",
DepthwiseConv2dOp, DeviceType::CPU, float); DepthwiseConv2dOp, DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "DepthwiseConv2d", MACE_REGISTER_OP(op_registry, "DepthwiseConv2d",
DepthwiseConv2dOp, DeviceType::CPU, uint8_t); DepthwiseConv2dOp, DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "DepthwiseConv2d", MACE_REGISTER_OP(op_registry, "DepthwiseConv2d",
......
...@@ -932,6 +932,7 @@ class EltwiseOp : public Operation { ...@@ -932,6 +932,7 @@ class EltwiseOp : public Operation {
Tensor scalar_tensor_; Tensor scalar_tensor_;
}; };
#ifdef MACE_ENABLE_QUANTIZE
template <> template <>
class EltwiseOp<DeviceType::CPU, uint8_t> : public Operation { class EltwiseOp<DeviceType::CPU, uint8_t> : public Operation {
public: public:
...@@ -1072,6 +1073,7 @@ class EltwiseOp<DeviceType::CPU, uint8_t> : public Operation { ...@@ -1072,6 +1073,7 @@ class EltwiseOp<DeviceType::CPU, uint8_t> : public Operation {
DataFormat data_format_; DataFormat data_format_;
Tensor scalar_tensor_; Tensor scalar_tensor_;
}; };
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
...@@ -1113,8 +1115,11 @@ void RegisterEltwise(OpRegistryBase *op_registry) { ...@@ -1113,8 +1115,11 @@ void RegisterEltwise(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "Eltwise", EltwiseOp, MACE_REGISTER_OP(op_registry, "Eltwise", EltwiseOp,
DeviceType::CPU, int32_t); DeviceType::CPU, int32_t);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "Eltwise", EltwiseOp, MACE_REGISTER_OP(op_registry, "Eltwise", EltwiseOp,
DeviceType::CPU, uint8_t); DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "Eltwise", EltwiseOp, MACE_REGISTER_OP(op_registry, "Eltwise", EltwiseOp,
DeviceType::GPU, float); DeviceType::GPU, float);
......
...@@ -63,8 +63,10 @@ void RegisterExpandDims(OpRegistryBase *op_registry) { ...@@ -63,8 +63,10 @@ void RegisterExpandDims(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "ExpandDims", ExpandDimsOp, MACE_REGISTER_OP(op_registry, "ExpandDims", ExpandDimsOp,
DeviceType::CPU, int32_t); DeviceType::CPU, int32_t);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "ExpandDims", ExpandDimsOp, MACE_REGISTER_OP(op_registry, "ExpandDims", ExpandDimsOp,
DeviceType::CPU, uint8_t); DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
} }
} // namespace ops } // namespace ops
......
...@@ -21,7 +21,10 @@ ...@@ -21,7 +21,10 @@
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/ops/activation.h" #include "mace/ops/activation.h"
#include "mace/ops/gemm.h" #include "mace/ops/gemm.h"
#ifdef MACE_ENABLE_QUANTIZE
#include "mace/ops/gemmlowp_util.h" #include "mace/ops/gemmlowp_util.h"
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/fully_connected.h" #include "mace/ops/opencl/image/fully_connected.h"
...@@ -106,6 +109,7 @@ class FullyConnectedOp<DeviceType::CPU, float> : public FullyConnectedOpBase { ...@@ -106,6 +109,7 @@ class FullyConnectedOp<DeviceType::CPU, float> : public FullyConnectedOpBase {
} }
}; };
#ifdef MACE_ENABLE_QUANTIZE
template <> template <>
class FullyConnectedOp<DeviceType::CPU, uint8_t> class FullyConnectedOp<DeviceType::CPU, uint8_t>
: public FullyConnectedOpBase { : public FullyConnectedOpBase {
...@@ -180,6 +184,7 @@ class FullyConnectedOp<DeviceType::CPU, uint8_t> ...@@ -180,6 +184,7 @@ class FullyConnectedOp<DeviceType::CPU, uint8_t>
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
}; };
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
...@@ -218,8 +223,11 @@ void RegisterFullyConnected(OpRegistryBase *op_registry) { ...@@ -218,8 +223,11 @@ void RegisterFullyConnected(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "FullyConnected", MACE_REGISTER_OP(op_registry, "FullyConnected",
FullyConnectedOp, DeviceType::CPU, float); FullyConnectedOp, DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "FullyConnected", MACE_REGISTER_OP(op_registry, "FullyConnected",
FullyConnectedOp, DeviceType::CPU, uint8_t); FullyConnectedOp, DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "FullyConnected", MACE_REGISTER_OP(op_registry, "FullyConnected",
FullyConnectedOp, DeviceType::GPU, float); FullyConnectedOp, DeviceType::GPU, float);
......
...@@ -89,8 +89,11 @@ class GatherOp : public Operation { ...@@ -89,8 +89,11 @@ class GatherOp : public Operation {
void RegisterGather(OpRegistryBase *op_registry) { void RegisterGather(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "Gather", GatherOp, MACE_REGISTER_OP(op_registry, "Gather", GatherOp,
DeviceType::CPU, float); DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "Gather", GatherOp, MACE_REGISTER_OP(op_registry, "Gather", GatherOp,
DeviceType::CPU, uint8_t); DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
} }
} // namespace ops } // namespace ops
......
...@@ -22,9 +22,13 @@ ...@@ -22,9 +22,13 @@
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/ops/gemm.h" #include "mace/ops/gemm.h"
#include "mace/ops/gemmlowp_util.h"
#include "mace/ops/sgemm.h" #include "mace/ops/sgemm.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
#ifdef MACE_ENABLE_QUANTIZE
#include "mace/ops/gemmlowp_util.h"
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/matmul.h" #include "mace/ops/opencl/image/matmul.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
...@@ -150,6 +154,7 @@ class MatMulOp<CPU, float> : public MatMulOpBase { ...@@ -150,6 +154,7 @@ class MatMulOp<CPU, float> : public MatMulOpBase {
SGemm sgemm_; SGemm sgemm_;
}; };
#ifdef MACE_ENABLE_QUANTIZE
template<gemmlowp::MapOrder AOrder, gemmlowp::MapOrder BOrder, template<gemmlowp::MapOrder AOrder, gemmlowp::MapOrder BOrder,
typename OutputType> typename OutputType>
class MatMulFixpointImpl; class MatMulFixpointImpl;
...@@ -311,6 +316,7 @@ class MatMulOp<DeviceType::CPU, uint8_t>: public MatMulOpBase { ...@@ -311,6 +316,7 @@ class MatMulOp<DeviceType::CPU, uint8_t>: public MatMulOpBase {
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
}; };
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
...@@ -342,8 +348,11 @@ void RegisterMatMul(OpRegistryBase *op_registry) { ...@@ -342,8 +348,11 @@ void RegisterMatMul(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp, MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
DeviceType::CPU, float); DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp, MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
DeviceType::CPU, uint8_t); DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp, MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
DeviceType::GPU, float); DeviceType::GPU, float);
......
...@@ -33,7 +33,6 @@ extern void RegisterDeconv2D(OpRegistryBase *op_registry); ...@@ -33,7 +33,6 @@ extern void RegisterDeconv2D(OpRegistryBase *op_registry);
extern void RegisterDepthToSpace(OpRegistryBase *op_registry); extern void RegisterDepthToSpace(OpRegistryBase *op_registry);
extern void RegisterDepthwiseConv2d(OpRegistryBase *op_registry); extern void RegisterDepthwiseConv2d(OpRegistryBase *op_registry);
extern void RegisterDepthwiseDeconv2d(OpRegistryBase *op_registry); extern void RegisterDepthwiseDeconv2d(OpRegistryBase *op_registry);
extern void RegisterDequantize(OpRegistryBase *op_registry);
extern void RegisterEltwise(OpRegistryBase *op_registry); extern void RegisterEltwise(OpRegistryBase *op_registry);
extern void RegisterExpandDims(OpRegistryBase *op_registry); extern void RegisterExpandDims(OpRegistryBase *op_registry);
extern void RegisterFill(OpRegistryBase *op_registry); extern void RegisterFill(OpRegistryBase *op_registry);
...@@ -45,7 +44,6 @@ extern void RegisterLocalResponseNorm(OpRegistryBase *op_registry); ...@@ -45,7 +44,6 @@ extern void RegisterLocalResponseNorm(OpRegistryBase *op_registry);
extern void RegisterMatMul(OpRegistryBase *op_registry); extern void RegisterMatMul(OpRegistryBase *op_registry);
extern void RegisterPad(OpRegistryBase *op_registry); extern void RegisterPad(OpRegistryBase *op_registry);
extern void RegisterPooling(OpRegistryBase *op_registry); extern void RegisterPooling(OpRegistryBase *op_registry);
extern void RegisterQuantize(OpRegistryBase *op_registry);
extern void RegisterReduceMean(OpRegistryBase *op_registry); extern void RegisterReduceMean(OpRegistryBase *op_registry);
extern void RegisterReshape(OpRegistryBase *op_registry); extern void RegisterReshape(OpRegistryBase *op_registry);
extern void RegisterResizeBicubic(OpRegistryBase *op_registry); extern void RegisterResizeBicubic(OpRegistryBase *op_registry);
...@@ -64,6 +62,11 @@ extern void RegisterStridedSlice(OpRegistryBase *op_registry); ...@@ -64,6 +62,11 @@ extern void RegisterStridedSlice(OpRegistryBase *op_registry);
extern void RegisterTranspose(OpRegistryBase *op_registry); extern void RegisterTranspose(OpRegistryBase *op_registry);
extern void RegisterUnstack(OpRegistryBase *op_registry); extern void RegisterUnstack(OpRegistryBase *op_registry);
#ifdef MACE_ENABLE_QUANTIZE
extern void RegisterDequantize(OpRegistryBase *op_registry);
extern void RegisterQuantize(OpRegistryBase *op_registry);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
extern void RegisterBufferTransform(OpRegistryBase *op_registry); extern void RegisterBufferTransform(OpRegistryBase *op_registry);
extern void RegisterBufferInverseTransform(OpRegistryBase *op_registry); extern void RegisterBufferInverseTransform(OpRegistryBase *op_registry);
...@@ -91,7 +94,6 @@ OpRegistry::OpRegistry() : OpRegistryBase() { ...@@ -91,7 +94,6 @@ OpRegistry::OpRegistry() : OpRegistryBase() {
ops::RegisterDepthToSpace(this); ops::RegisterDepthToSpace(this);
ops::RegisterDepthwiseConv2d(this); ops::RegisterDepthwiseConv2d(this);
ops::RegisterDepthwiseDeconv2d(this); ops::RegisterDepthwiseDeconv2d(this);
ops::RegisterDequantize(this);
ops::RegisterEltwise(this); ops::RegisterEltwise(this);
ops::RegisterExpandDims(this); ops::RegisterExpandDims(this);
ops::RegisterFill(this); ops::RegisterFill(this);
...@@ -103,7 +105,6 @@ OpRegistry::OpRegistry() : OpRegistryBase() { ...@@ -103,7 +105,6 @@ OpRegistry::OpRegistry() : OpRegistryBase() {
ops::RegisterMatMul(this); ops::RegisterMatMul(this);
ops::RegisterPad(this); ops::RegisterPad(this);
ops::RegisterPooling(this); ops::RegisterPooling(this);
ops::RegisterQuantize(this);
ops::RegisterReduceMean(this); ops::RegisterReduceMean(this);
ops::RegisterReshape(this); ops::RegisterReshape(this);
ops::RegisterResizeBicubic(this); ops::RegisterResizeBicubic(this);
...@@ -122,6 +123,11 @@ OpRegistry::OpRegistry() : OpRegistryBase() { ...@@ -122,6 +123,11 @@ OpRegistry::OpRegistry() : OpRegistryBase() {
ops::RegisterTranspose(this); ops::RegisterTranspose(this);
ops::RegisterUnstack(this); ops::RegisterUnstack(this);
#ifdef MACE_ENABLE_QUANTIZE
ops::RegisterDequantize(this);
ops::RegisterQuantize(this);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
ops::RegisterBufferTransform(this); ops::RegisterBufferTransform(this);
ops::RegisterBufferInverseTransform(this); ops::RegisterBufferInverseTransform(this);
......
...@@ -214,6 +214,7 @@ class PoolingOp<DeviceType::CPU, float> : public PoolingOpBase { ...@@ -214,6 +214,7 @@ class PoolingOp<DeviceType::CPU, float> : public PoolingOpBase {
} }
}; };
#ifdef MACE_ENABLE_QUANTIZE
template <> template <>
class PoolingOp<DeviceType::CPU, uint8_t> : public PoolingOpBase { class PoolingOp<DeviceType::CPU, uint8_t> : public PoolingOpBase {
public: public:
...@@ -420,6 +421,7 @@ class PoolingOp<DeviceType::CPU, uint8_t> : public PoolingOpBase { ...@@ -420,6 +421,7 @@ class PoolingOp<DeviceType::CPU, uint8_t> : public PoolingOpBase {
} }
} }
}; };
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
...@@ -451,8 +453,11 @@ class PoolingOp<DeviceType::GPU, T> : public PoolingOpBase { ...@@ -451,8 +453,11 @@ class PoolingOp<DeviceType::GPU, T> : public PoolingOpBase {
void RegisterPooling(OpRegistryBase *op_registry) { void RegisterPooling(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "Pooling", PoolingOp, MACE_REGISTER_OP(op_registry, "Pooling", PoolingOp,
DeviceType::CPU, float); DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "Pooling", PoolingOp, MACE_REGISTER_OP(op_registry, "Pooling", PoolingOp,
DeviceType::CPU, uint8_t); DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "Pooling", PoolingOp, MACE_REGISTER_OP(op_registry, "Pooling", PoolingOp,
......
...@@ -244,6 +244,7 @@ class ResizeBilinearOp<DeviceType::CPU, T> : public Operation { ...@@ -244,6 +244,7 @@ class ResizeBilinearOp<DeviceType::CPU, T> : public Operation {
std::vector<index_t> size_; std::vector<index_t> size_;
}; };
#ifdef MACE_ENABLE_QUANTIZE
template <> template <>
class ResizeBilinearOp<DeviceType::CPU, uint8_t> : public Operation { class ResizeBilinearOp<DeviceType::CPU, uint8_t> : public Operation {
public: public:
...@@ -317,6 +318,7 @@ class ResizeBilinearOp<DeviceType::CPU, uint8_t> : public Operation { ...@@ -317,6 +318,7 @@ class ResizeBilinearOp<DeviceType::CPU, uint8_t> : public Operation {
bool align_corners_; bool align_corners_;
std::vector<index_t> size_; std::vector<index_t> size_;
}; };
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
...@@ -355,8 +357,10 @@ void RegisterResizeBilinear(OpRegistryBase *op_registry) { ...@@ -355,8 +357,10 @@ void RegisterResizeBilinear(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "ResizeBilinear", ResizeBilinearOp, MACE_REGISTER_OP(op_registry, "ResizeBilinear", ResizeBilinearOp,
DeviceType::CPU, float); DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "ResizeBilinear", ResizeBilinearOp, MACE_REGISTER_OP(op_registry, "ResizeBilinear", ResizeBilinearOp,
DeviceType::CPU, uint8_t); DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "ResizeBilinear", ResizeBilinearOp, MACE_REGISTER_OP(op_registry, "ResizeBilinear", ResizeBilinearOp,
......
...@@ -18,8 +18,12 @@ ...@@ -18,8 +18,12 @@
#include <vector> #include <vector>
#include "mace/core/operator.h" #include "mace/core/operator.h"
#ifdef MACE_ENABLE_QUANTIZE
#include "mace/ops/fixpoint.h" #include "mace/ops/fixpoint.h"
#include "mace/ops/gemmlowp_util.h" #include "mace/ops/gemmlowp_util.h"
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/softmax.h" #include "mace/ops/opencl/image/softmax.h"
#include "mace/ops/opencl/buffer/softmax.h" #include "mace/ops/opencl/buffer/softmax.h"
...@@ -122,6 +126,7 @@ class SoftmaxOp<DeviceType::CPU, float> : public Operation { ...@@ -122,6 +126,7 @@ class SoftmaxOp<DeviceType::CPU, float> : public Operation {
static const int kInputDeltaIntBits = 6; static const int kInputDeltaIntBits = 6;
static const int kSumExpIntBits = 12; static const int kSumExpIntBits = 12;
#ifdef MACE_ENABLE_QUANTIZE
template <> template <>
class SoftmaxOp<DeviceType::CPU, uint8_t> : public Operation { class SoftmaxOp<DeviceType::CPU, uint8_t> : public Operation {
public: public:
...@@ -351,6 +356,7 @@ class SoftmaxOp<DeviceType::CPU, uint8_t> : public Operation { ...@@ -351,6 +356,7 @@ class SoftmaxOp<DeviceType::CPU, uint8_t> : public Operation {
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
}; };
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
...@@ -382,8 +388,10 @@ void RegisterSoftmax(OpRegistryBase *op_registry) { ...@@ -382,8 +388,10 @@ void RegisterSoftmax(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "Softmax", SoftmaxOp, MACE_REGISTER_OP(op_registry, "Softmax", SoftmaxOp,
DeviceType::CPU, float); DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "Softmax", SoftmaxOp, MACE_REGISTER_OP(op_registry, "Softmax", SoftmaxOp,
DeviceType::CPU, uint8_t); DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "Softmax", SoftmaxOp, MACE_REGISTER_OP(op_registry, "Softmax", SoftmaxOp,
......
...@@ -197,6 +197,7 @@ class SpaceToBatchNDOp<DeviceType::CPU, float> : public SpaceToBatchOpBase { ...@@ -197,6 +197,7 @@ class SpaceToBatchNDOp<DeviceType::CPU, float> : public SpaceToBatchOpBase {
} }
}; };
#ifdef MACE_ENABLE_QUANTIZE
template <> template <>
class SpaceToBatchNDOp<DeviceType::CPU, uint8_t> : public SpaceToBatchOpBase { class SpaceToBatchNDOp<DeviceType::CPU, uint8_t> : public SpaceToBatchOpBase {
public: public:
...@@ -299,6 +300,7 @@ class SpaceToBatchNDOp<DeviceType::CPU, uint8_t> : public SpaceToBatchOpBase { ...@@ -299,6 +300,7 @@ class SpaceToBatchNDOp<DeviceType::CPU, uint8_t> : public SpaceToBatchOpBase {
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
}; };
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
...@@ -331,8 +333,10 @@ void RegisterSpaceToBatchND(OpRegistryBase *op_registry) { ...@@ -331,8 +333,10 @@ void RegisterSpaceToBatchND(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "SpaceToBatchND", MACE_REGISTER_OP(op_registry, "SpaceToBatchND",
SpaceToBatchNDOp, DeviceType::CPU, float); SpaceToBatchNDOp, DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "SpaceToBatchND", MACE_REGISTER_OP(op_registry, "SpaceToBatchND",
SpaceToBatchNDOp, DeviceType::CPU, uint8_t); SpaceToBatchNDOp, DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "SpaceToBatchND", MACE_REGISTER_OP(op_registry, "SpaceToBatchND",
......
...@@ -52,7 +52,9 @@ class SqueezeOp : public Operation { ...@@ -52,7 +52,9 @@ class SqueezeOp : public Operation {
void RegisterSqueeze(OpRegistryBase *op_registry) { void RegisterSqueeze(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::CPU, float); MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::CPU, uint8_t); MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::GPU, float); MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::GPU, float);
MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::GPU, half); MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::GPU, half);
......
...@@ -223,7 +223,7 @@ class MACE_API MaceEngineConfig { ...@@ -223,7 +223,7 @@ class MACE_API MaceEngineConfig {
/// \param status MACE_SUCCESS for successful, or it can't reliabley /// \param status MACE_SUCCESS for successful, or it can't reliabley
/// detect big-LITTLE cores (see GetBigLittleCoreIDs). In such cases, it's /// detect big-LITTLE cores (see GetBigLittleCoreIDs). In such cases, it's
/// suggested to use AFFINITY_NONE to use all cores. /// suggested to use AFFINITY_NONE to use all cores.
/// \param use_gemmlowp use gemmlowp for quantized inference /// \param use_gemmlowp use gemmlowp for cpu quantized inference
/// \return MaceStatus::MACE_SUCCESS for success, other for failed. /// \return MaceStatus::MACE_SUCCESS for success, other for failed.
MaceStatus SetCPUThreadPolicy(int num_threads_hint, MaceStatus SetCPUThreadPolicy(int num_threads_hint,
CPUAffinityPolicy policy, CPUAffinityPolicy policy,
......
...@@ -24,41 +24,41 @@ mkdir -p $LIB_DIR/linux-x86-64 ...@@ -24,41 +24,41 @@ mkdir -p $LIB_DIR/linux-x86-64
# build shared libraries # build shared libraries
echo "build shared lib for armeabi-v7a + cpu_gpu_dsp" echo "build shared lib for armeabi-v7a + cpu_gpu_dsp"
bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define hexagon=true --cpu=armeabi-v7a bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/ cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/
cp third_party/nnlib/*so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/ cp third_party/nnlib/*so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/
echo "build shared lib for armeabi-v7a + cpu_gpu" echo "build shared lib for armeabi-v7a + cpu_gpu"
bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --cpu=armeabi-v7a bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=armeabi-v7a
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/armeabi-v7a/cpu_gpu/ cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/armeabi-v7a/cpu_gpu/
echo "build shared lib for arm64-v8a + cpu_gpu" echo "build shared lib for arm64-v8a + cpu_gpu"
bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --cpu=arm64-v8a bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu/ cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu/
if [[ "$OSTYPE" != "darwin"* ]];then if [[ "$OSTYPE" != "darwin"* ]];then
echo "build shared lib for linux-x86-64" echo "build shared lib for linux-x86-64"
bazel build mace/libmace:libmace_dynamic --config optimization --define openmp=true bazel build mace/libmace:libmace_dynamic --config optimization --define quantize=true --define openmp=true
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/linux-x86-64/ cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/linux-x86-64/
fi fi
# build static libraries # build static libraries
echo "build static lib for armeabi-v7a + cpu_gpu_dsp" echo "build static lib for armeabi-v7a + cpu_gpu_dsp"
bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define hexagon=true --cpu=armeabi-v7a bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/ cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/
cp third_party/nnlib/*so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/ cp third_party/nnlib/*so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/
echo "build static lib for armeabi-v7a + cpu_gpu" echo "build static lib for armeabi-v7a + cpu_gpu"
bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --cpu=armeabi-v7a bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=armeabi-v7a
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/armeabi-v7a/cpu_gpu/ cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/armeabi-v7a/cpu_gpu/
echo "build static lib for arm64-v8a + cpu_gpu" echo "build static lib for arm64-v8a + cpu_gpu"
bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --cpu=arm64-v8a bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu/ cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu/
if [[ "$OSTYPE" != "darwin"* ]];then if [[ "$OSTYPE" != "darwin"* ]];then
echo "build static lib for linux-x86-64" echo "build static lib for linux-x86-64"
bazel build mace/libmace:libmace_static --config optimization --define openmp=true bazel build mace/libmace:libmace_static --config optimization --define quantize=true --define openmp=true
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/linux-x86-64/ cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/linux-x86-64/
fi fi
......
...@@ -279,6 +279,17 @@ def get_opencl_mode(configs): ...@@ -279,6 +279,17 @@ def get_opencl_mode(configs):
return False return False
def get_quantize_mode(configs):
for model_name in configs[YAMLKeyword.models]:
quantize =\
configs[YAMLKeyword.models][model_name].get(
YAMLKeyword.quantize, 0)
if quantize == 1:
return True
return False
def md5sum(str): def md5sum(str):
md5 = hashlib.md5() md5 = hashlib.md5()
md5.update(str.encode('utf-8')) md5.update(str.encode('utf-8'))
...@@ -855,6 +866,7 @@ def build_model_lib(configs, address_sanitizer): ...@@ -855,6 +866,7 @@ def build_model_lib(configs, address_sanitizer):
abi=target_abi, abi=target_abi,
hexagon_mode=hexagon_mode, hexagon_mode=hexagon_mode,
enable_opencl=get_opencl_mode(configs), enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs),
address_sanitizer=address_sanitizer, address_sanitizer=address_sanitizer,
symbol_hidden=True symbol_hidden=True
) )
...@@ -968,6 +980,7 @@ def build_mace_run(configs, target_abi, enable_openmp, address_sanitizer, ...@@ -968,6 +980,7 @@ def build_mace_run(configs, target_abi, enable_openmp, address_sanitizer,
hexagon_mode=hexagon_mode, hexagon_mode=hexagon_mode,
enable_openmp=enable_openmp, enable_openmp=enable_openmp,
enable_opencl=get_opencl_mode(configs), enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs),
address_sanitizer=address_sanitizer, address_sanitizer=address_sanitizer,
symbol_hidden=symbol_hidden, symbol_hidden=symbol_hidden,
extra_args=build_arg extra_args=build_arg
...@@ -996,6 +1009,7 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer, ...@@ -996,6 +1009,7 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer,
abi=target_abi, abi=target_abi,
enable_openmp=enable_openmp, enable_openmp=enable_openmp,
enable_opencl=get_opencl_mode(configs), enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs),
hexagon_mode=hexagon_mode, hexagon_mode=hexagon_mode,
address_sanitizer=address_sanitizer, address_sanitizer=address_sanitizer,
symbol_hidden=symbol_hidden) symbol_hidden=symbol_hidden)
...@@ -1025,6 +1039,7 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer, ...@@ -1025,6 +1039,7 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer,
abi=target_abi, abi=target_abi,
enable_openmp=enable_openmp, enable_openmp=enable_openmp,
enable_opencl=get_opencl_mode(configs), enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs),
hexagon_mode=hexagon_mode, hexagon_mode=hexagon_mode,
address_sanitizer=address_sanitizer, address_sanitizer=address_sanitizer,
extra_args=build_arg) extra_args=build_arg)
...@@ -1404,6 +1419,7 @@ def build_benchmark_model(configs, target_abi, enable_openmp, mace_lib_type): ...@@ -1404,6 +1419,7 @@ def build_benchmark_model(configs, target_abi, enable_openmp, mace_lib_type):
abi=target_abi, abi=target_abi,
enable_openmp=enable_openmp, enable_openmp=enable_openmp,
enable_opencl=get_opencl_mode(configs), enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs),
hexagon_mode=hexagon_mode, hexagon_mode=hexagon_mode,
symbol_hidden=symbol_hidden, symbol_hidden=symbol_hidden,
extra_args=build_arg) extra_args=build_arg)
......
...@@ -342,6 +342,7 @@ def bazel_build(target, ...@@ -342,6 +342,7 @@ def bazel_build(target,
enable_openmp=True, enable_openmp=True,
enable_neon=True, enable_neon=True,
enable_opencl=True, enable_opencl=True,
enable_quantize=True,
address_sanitizer=False, address_sanitizer=False,
symbol_hidden=True, symbol_hidden=True,
extra_args=""): extra_args=""):
...@@ -351,6 +352,8 @@ def bazel_build(target, ...@@ -351,6 +352,8 @@ def bazel_build(target,
"build", "build",
"--define", "--define",
"openmp=%s" % str(enable_openmp).lower(), "openmp=%s" % str(enable_openmp).lower(),
"--define",
"quantize=%s" % str(enable_quantize).lower(),
target, target,
) )
else: else:
...@@ -367,6 +370,8 @@ def bazel_build(target, ...@@ -367,6 +370,8 @@ def bazel_build(target,
"--define", "--define",
"opencl=%s" % str(enable_opencl).lower(), "opencl=%s" % str(enable_opencl).lower(),
"--define", "--define",
"quantize=%s" % str(enable_quantize).lower(),
"--define",
"hexagon=%s" % str(hexagon_mode).lower()) "hexagon=%s" % str(hexagon_mode).lower())
if address_sanitizer: if address_sanitizer:
bazel_args += ("--config", "asan") bazel_args += ("--config", "asan")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册