diff --git a/mace/BUILD b/mace/BUILD index 2ee799919ace663dde4b6114ce37fb5b2cd002c3..cf2e1e2d1963514b7f2e1c2a4f06f294f9970d21 100644 --- a/mace/BUILD +++ b/mace/BUILD @@ -59,3 +59,11 @@ config_setting( }, visibility = ["//visibility:public"], ) + +config_setting( + name = "quantize_enabled", + define_values = { + "quantize": "true", + }, + visibility = ["//visibility:public"], +) diff --git a/mace/core/BUILD b/mace/core/BUILD index 16db0e187be7e6df69db6afbdc2f997679f8458f..c979feccb22c86be12a0f7c6d9284b466e4be27d 100644 --- a/mace/core/BUILD +++ b/mace/core/BUILD @@ -15,6 +15,7 @@ load( "if_openmp_enabled", "if_neon_enabled", "if_opencl_enabled", + "if_quantize_enabled", ) cc_library( @@ -51,6 +52,8 @@ cc_library( "-DMACE_ENABLE_OPENMP", ]) + if_opencl_enabled([ "-DMACE_ENABLE_OPENCL", + ]) + if_quantize_enabled([ + "-DMACE_ENABLE_QUANTIZE", ]) + if_hexagon_enabled([ "-DMACE_ENABLE_HEXAGON", ]) + if_neon_enabled([ @@ -64,11 +67,12 @@ cc_library( "//mace/codegen:generated_version", "//mace/proto:mace_cc", "//mace/utils", - "@gemmlowp", ] + if_opencl_enabled([ ":opencl_headers", "//mace/codegen:generated_opencl", "@half//:half", + ]) + if_quantize_enabled([ + "@gemmlowp", ]) + if_hexagon_enabled([ "//third_party/nnlib:libhexagon", ]), diff --git a/mace/core/runtime/cpu/cpu_runtime.cc b/mace/core/runtime/cpu/cpu_runtime.cc index ae168a547090c5e133f32b714d6d80f77b374987..789f2a5c1e2ccf0f87f8fbf03c71a22d2dec76cf 100644 --- a/mace/core/runtime/cpu/cpu_runtime.cc +++ b/mace/core/runtime/cpu/cpu_runtime.cc @@ -176,11 +176,16 @@ MaceStatus SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads, MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy( int omp_num_threads_hint, CPUAffinityPolicy policy, - gemmlowp::GemmContext *gemm_context) { + void *gemm_context) { if (policy == CPUAffinityPolicy::AFFINITY_NONE) { +#ifdef MACE_ENABLE_QUANTIZE if (gemm_context) { - gemm_context->set_max_num_threads(std::max(0, omp_num_threads_hint)); + static_cast(gemm_context)->set_max_num_threads( + std::max(0, omp_num_threads_hint)); } +#else + MACE_UNUSED(gemm_context); +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENMP if (omp_num_threads_hint > 0) { omp_set_num_threads(std::min(omp_num_threads_hint, omp_get_num_procs())); @@ -210,9 +215,12 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy( omp_num_threads_hint = use_cpu_ids.size(); } +#ifdef MACE_ENABLE_QUANTIZE if (gemm_context) { - gemm_context->set_max_num_threads(omp_num_threads_hint); + static_cast(gemm_context)->set_max_num_threads( + omp_num_threads_hint); } +#endif // MACE_ENABLE_QUANTIZE return SetOpenMPThreadsAndAffinityCPUs(omp_num_threads_hint, use_cpu_ids); } diff --git a/mace/core/runtime/cpu/cpu_runtime.h b/mace/core/runtime/cpu/cpu_runtime.h index a6926e9e1e1198a704fa68d119111ea70c701079..00fd492b0923184af070400ce0749ff6e29ee3b8 100644 --- a/mace/core/runtime/cpu/cpu_runtime.h +++ b/mace/core/runtime/cpu/cpu_runtime.h @@ -18,7 +18,11 @@ #include #include +#ifdef MACE_ENABLE_QUANTIZE #include "public/gemmlowp.h" +#endif // MACE_ENABLE_QUANTIZE + +#include "mace/core/macros.h" #include "mace/public/mace.h" #include "mace/utils/logging.h" @@ -34,22 +38,34 @@ class CPURuntime { : num_threads_(num_threads), policy_(policy), gemm_context_(nullptr) { +#ifdef MACE_ENABLE_QUANTIZE if (use_gemmlowp) { MACE_CHECK_NOTNULL(GetGemmlowpContext()); } - +#else + MACE_UNUSED(use_gemmlowp); +#endif // MACE_ENABLE_QUANTIZE SetOpenMPThreadsAndAffinityPolicy(num_threads_, policy_, - gemm_context_.get()); + gemm_context_); + } + +#ifdef MACE_ENABLE_QUANTIZE + ~CPURuntime() { + if (!gemm_context_) { + delete static_cast(gemm_context_); + } } - ~CPURuntime() = default; gemmlowp::GemmContext *GetGemmlowpContext() { if (!gemm_context_) { - gemm_context_.reset(new gemmlowp::GemmContext()); + gemm_context_ = new gemmlowp::GemmContext(); } - return gemm_context_.get(); + return static_cast(gemm_context_); } +#else + ~CPURuntime() = default; +#endif // MACE_ENABLE_QUANTIZE int num_threads() const { return num_threads_; @@ -67,11 +83,11 @@ class CPURuntime { MaceStatus SetOpenMPThreadsAndAffinityPolicy( int omp_num_threads_hint, CPUAffinityPolicy policy, - gemmlowp::GemmContext *gemm_context); + void *gemm_context); int num_threads_; CPUAffinityPolicy policy_; - std::unique_ptr gemm_context_; + void *gemm_context_; }; } // namespace mace diff --git a/mace/libmace/BUILD b/mace/libmace/BUILD index d227f259fb8b6195a708b2edee21193c3e006086..48312e8cc4beaa82cf5c04b2cef51e935401f738 100644 --- a/mace/libmace/BUILD +++ b/mace/libmace/BUILD @@ -16,6 +16,7 @@ load( "if_hexagon_enabled", "if_opencl_enabled", "if_opencl_enabled_str", + "if_quantize_enabled", ) cc_library( @@ -34,6 +35,8 @@ cc_library( "-mfloat-abi=softfp", ]) + if_opencl_enabled([ "-DMACE_ENABLE_OPENCL", + ]) + if_quantize_enabled([ + "-DMACE_ENABLE_QUANTIZE", ]) + if_hexagon_enabled([ "-DMACE_ENABLE_HEXAGON", ]), diff --git a/mace/mace.bzl b/mace/mace.bzl index 3b91e44effb0ce17863a255851766278d9de053c..a7a6bc89454a847deb9fc0c832eac964dd90d429 100644 --- a/mace/mace.bzl +++ b/mace/mace.bzl @@ -60,6 +60,12 @@ def if_opencl_enabled_str(a): "//conditions:default": "", }) +def if_quantize_enabled(a): + return select({ + "//mace:quantize_enabled": a, + "//conditions:default": [], + }) + def mace_version_genrule(): native.genrule( name = "mace_version_gen", diff --git a/mace/ops/BUILD b/mace/ops/BUILD index bfdf85a5b051efa3df099ae776c90a1247334193..c0d4fbeda8aab9023583a67492dacbe4e2e7107f 100644 --- a/mace/ops/BUILD +++ b/mace/ops/BUILD @@ -14,6 +14,7 @@ load( "if_android_armv7", "if_hexagon_enabled", "if_opencl_enabled", + "if_quantize_enabled", ) cc_library( @@ -33,6 +34,7 @@ cc_library( "buffer_transform.cc", "lstm_cell.cc", "winograd_transform.cc", + "quantize.cc", ], ) + if_opencl_enabled(glob( [ @@ -47,6 +49,10 @@ cc_library( exclude = [ "opencl/*_test.cc", ], + )) + if_quantize_enabled(glob( + [ + "quantize.cc", + ], )), hdrs = glob( [ @@ -56,11 +62,16 @@ cc_library( exclude = [ "ops_registry.h", "ops_test_util.h", + "fixpoint.h", + "gemmlowp_util.h", ] ) + if_opencl_enabled(glob([ "opencl/*.h", "opencl/image/*.h", "opencl/buffer/*.h", + ])) + if_quantize_enabled(glob([ + "fixpoint.h", + "gemmlowp_util.h", ])), copts = [ "-Werror", @@ -76,15 +87,18 @@ cc_library( "-mfloat-abi=softfp", ]) + if_opencl_enabled([ "-DMACE_ENABLE_OPENCL", + ]) + if_quantize_enabled([ + "-DMACE_ENABLE_QUANTIZE", ]) + if_hexagon_enabled([ "-DMACE_ENABLE_HEXAGON", ]), linkopts = if_android(["-lm"]), deps = [ "//mace/core", - "@gemmlowp", "@tflite", - ], + ] + if_quantize_enabled([ + "@gemmlowp", + ]), ) @@ -110,6 +124,8 @@ cc_library( "-mfloat-abi=softfp", ]) + if_opencl_enabled([ "-DMACE_ENABLE_OPENCL", + ]) + if_quantize_enabled([ + "-DMACE_ENABLE_QUANTIZE", ]) + if_hexagon_enabled([ "-DMACE_ENABLE_HEXAGON", ]), @@ -157,7 +173,15 @@ cc_test( "arm/*_test.cc", "opencl/*_test.cc", ], - ), + exclude = [ + "fixpoint_test.cc" + ], + ) + if_quantize_enabled(glob( + [ + "fixpoint_test.cc" + ], + + )), copts = [ "-Werror", "-Wextra", @@ -171,6 +195,8 @@ cc_test( "-mfloat-abi=softfp", ]) + if_opencl_enabled([ "-DMACE_ENABLE_OPENCL", + ]) + if_quantize_enabled([ + "-DMACE_ENABLE_QUANTIZE", ]) + if_hexagon_enabled([ "-DMACE_ENABLE_HEXAGON", ]), @@ -199,6 +225,8 @@ cc_test( "-mfloat-abi=softfp", ]) + if_opencl_enabled([ "-DMACE_ENABLE_OPENCL", + ]) + if_quantize_enabled([ + "-DMACE_ENABLE_QUANTIZE", ]) + if_hexagon_enabled([ "-DMACE_ENABLE_HEXAGON", ]), @@ -208,6 +236,5 @@ cc_test( "test", "//mace/core:test_benchmark_main", "//third_party/eigen3", - "@gemmlowp", ], ) diff --git a/mace/ops/concat.cc b/mace/ops/concat.cc index 0cebac68392711cffc6af26faf8906f97fdc01d1..f4c7ebbefac649e87af26a1a295f5613e171a4a7 100644 --- a/mace/ops/concat.cc +++ b/mace/ops/concat.cc @@ -108,6 +108,7 @@ class ConcatOp : public ConcatOpBase { } }; +#ifdef MACE_ENABLE_QUANTIZE template <> class ConcatOp : public ConcatOpBase { public: @@ -177,6 +178,7 @@ class ConcatOp : public ConcatOpBase { return MaceStatus::MACE_SUCCESS; } }; +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL template @@ -209,8 +211,10 @@ void RegisterConcat(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "Concat", ConcatOp, DeviceType::CPU, int32_t); +#ifdef MACE_ENABLE_QUANTIZE MACE_REGISTER_OP(op_registry, "Concat", ConcatOp, DeviceType::CPU, uint8_t); +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL MACE_REGISTER_OP(op_registry, "Concat", ConcatOp, diff --git a/mace/ops/conv_2d.cc b/mace/ops/conv_2d.cc index 74234b5ee15a2dec2e9fc5812bcd87656ebb585e..af459b2bb3e9d730dc111f7a46615f5c452e405d 100644 --- a/mace/ops/conv_2d.cc +++ b/mace/ops/conv_2d.cc @@ -31,8 +31,12 @@ #include "mace/ops/arm/conv_winograd.h" #include "mace/ops/conv_pool_2d_base.h" #include "mace/ops/conv_pool_2d_util.h" -#include "mace/ops/gemmlowp_util.h" #include "mace/utils/utils.h" + +#ifdef MACE_ENABLE_QUANTIZE +#include "mace/ops/gemmlowp_util.h" +#endif // MACE_ENABLE_QUANTIZE + #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/conv_2d.h" #include "mace/ops/opencl/buffer/conv_2d.h" @@ -707,6 +711,7 @@ class Conv2dOp : public ConvPool2dOpBase { }; +#ifdef MACE_ENABLE_QUANTIZE template <> class Conv2dOp : public ConvPool2dOpBase { public: @@ -943,6 +948,7 @@ class Conv2dOp : public ConvPool2dOpBase { MACE_OP_INPUT_TAGS(INPUT, FILTER, BIAS); MACE_OP_OUTPUT_TAGS(OUTPUT); }; +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL template @@ -987,8 +993,10 @@ void RegisterConv2D(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "Conv2D", Conv2dOp, DeviceType::CPU, float); +#ifdef MACE_ENABLE_QUANTIZE MACE_REGISTER_OP(op_registry, "Conv2D", Conv2dOp, DeviceType::CPU, uint8_t); +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL MACE_REGISTER_OP(op_registry, "Conv2D", Conv2dOp, diff --git a/mace/ops/depthwise_conv2d.cc b/mace/ops/depthwise_conv2d.cc index 76eee2f227e87365fdaa4d359bae776e16c02ca1..1f1dd1363770d18c0d590742cba7de7f5ec8d29e 100644 --- a/mace/ops/depthwise_conv2d.cc +++ b/mace/ops/depthwise_conv2d.cc @@ -20,9 +20,11 @@ #include #include +#ifdef MACE_ENABLE_QUANTIZE // We reuse TensorFlow Lite's optimized depthwiseconv_uint8 and parallelized it // using OpenMP for MACE's quantized depthwise_conv2d. #include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h" +#endif // MACE_ENABLE_QUANTIZE #include "mace/core/future.h" #include "mace/core/operator.h" @@ -276,6 +278,7 @@ class DepthwiseConv2dOp : public DepthwiseConv2dOpBase { MACE_OP_OUTPUT_TAGS(OUTPUT); }; +#ifdef MACE_ENABLE_QUANTIZE template <> class DepthwiseConv2dOp : public DepthwiseConv2dOpBase { @@ -479,6 +482,7 @@ class DepthwiseConv2dOp MACE_OP_INPUT_TAGS(INPUT, FILTER, BIAS); MACE_OP_OUTPUT_TAGS(OUTPUT); }; +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL template @@ -520,8 +524,10 @@ void RegisterDepthwiseConv2d(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "DepthwiseConv2d", DepthwiseConv2dOp, DeviceType::CPU, float); +#ifdef MACE_ENABLE_QUANTIZE MACE_REGISTER_OP(op_registry, "DepthwiseConv2d", DepthwiseConv2dOp, DeviceType::CPU, uint8_t); +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL MACE_REGISTER_OP(op_registry, "DepthwiseConv2d", diff --git a/mace/ops/eltwise.cc b/mace/ops/eltwise.cc index 9bca9033cbb601aa3686d0be633f5fcdea2035d7..cfd0bdba91d2a5efe91ea5b407cc461bbd1aaeef 100644 --- a/mace/ops/eltwise.cc +++ b/mace/ops/eltwise.cc @@ -932,6 +932,7 @@ class EltwiseOp : public Operation { Tensor scalar_tensor_; }; +#ifdef MACE_ENABLE_QUANTIZE template <> class EltwiseOp : public Operation { public: @@ -1072,6 +1073,7 @@ class EltwiseOp : public Operation { DataFormat data_format_; Tensor scalar_tensor_; }; +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL template @@ -1113,8 +1115,11 @@ void RegisterEltwise(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "Eltwise", EltwiseOp, DeviceType::CPU, int32_t); +#ifdef MACE_ENABLE_QUANTIZE MACE_REGISTER_OP(op_registry, "Eltwise", EltwiseOp, DeviceType::CPU, uint8_t); +#endif // MACE_ENABLE_QUANTIZE + #ifdef MACE_ENABLE_OPENCL MACE_REGISTER_OP(op_registry, "Eltwise", EltwiseOp, DeviceType::GPU, float); diff --git a/mace/ops/expand_dims.cc b/mace/ops/expand_dims.cc index a912e0c9b3ca1379e0579e90662b49d586d86208..74551687c7358628b9f2fb9fdfde37dfdfa37012 100644 --- a/mace/ops/expand_dims.cc +++ b/mace/ops/expand_dims.cc @@ -63,8 +63,10 @@ void RegisterExpandDims(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "ExpandDims", ExpandDimsOp, DeviceType::CPU, int32_t); +#ifdef MACE_ENABLE_QUANTIZE MACE_REGISTER_OP(op_registry, "ExpandDims", ExpandDimsOp, DeviceType::CPU, uint8_t); +#endif // MACE_ENABLE_QUANTIZE } } // namespace ops diff --git a/mace/ops/fully_connected.cc b/mace/ops/fully_connected.cc index c5a8872b361a53a76715f7f57f6e06ea83b6a638..3705e415e59ce17a92da48f5535fe7845d232e3f 100644 --- a/mace/ops/fully_connected.cc +++ b/mace/ops/fully_connected.cc @@ -21,7 +21,10 @@ #include "mace/core/tensor.h" #include "mace/ops/activation.h" #include "mace/ops/gemm.h" + +#ifdef MACE_ENABLE_QUANTIZE #include "mace/ops/gemmlowp_util.h" +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/fully_connected.h" @@ -106,6 +109,7 @@ class FullyConnectedOp : public FullyConnectedOpBase { } }; +#ifdef MACE_ENABLE_QUANTIZE template <> class FullyConnectedOp : public FullyConnectedOpBase { @@ -180,6 +184,7 @@ class FullyConnectedOp return MaceStatus::MACE_SUCCESS; } }; +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL template @@ -218,8 +223,11 @@ void RegisterFullyConnected(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "FullyConnected", FullyConnectedOp, DeviceType::CPU, float); +#ifdef MACE_ENABLE_QUANTIZE MACE_REGISTER_OP(op_registry, "FullyConnected", FullyConnectedOp, DeviceType::CPU, uint8_t); +#endif // MACE_ENABLE_QUANTIZE + #ifdef MACE_ENABLE_OPENCL MACE_REGISTER_OP(op_registry, "FullyConnected", FullyConnectedOp, DeviceType::GPU, float); diff --git a/mace/ops/gather.cc b/mace/ops/gather.cc index 4357e466df43c6c9e7d5338fdd80a897f79d828f..1af56d7edf8bd11c53c7db816ea636b2d0ff06fa 100644 --- a/mace/ops/gather.cc +++ b/mace/ops/gather.cc @@ -89,8 +89,11 @@ class GatherOp : public Operation { void RegisterGather(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "Gather", GatherOp, DeviceType::CPU, float); + +#ifdef MACE_ENABLE_QUANTIZE MACE_REGISTER_OP(op_registry, "Gather", GatherOp, DeviceType::CPU, uint8_t); +#endif // MACE_ENABLE_QUANTIZE } } // namespace ops diff --git a/mace/ops/matmul.cc b/mace/ops/matmul.cc index 8608657e018ddae1bab64ffe34f062f152c0c213..af88dd85dc8be35d17b99832735a6b092f35c883 100644 --- a/mace/ops/matmul.cc +++ b/mace/ops/matmul.cc @@ -22,9 +22,13 @@ #include "mace/core/operator.h" #include "mace/core/tensor.h" #include "mace/ops/gemm.h" -#include "mace/ops/gemmlowp_util.h" #include "mace/ops/sgemm.h" #include "mace/utils/utils.h" + +#ifdef MACE_ENABLE_QUANTIZE +#include "mace/ops/gemmlowp_util.h" +#endif // MACE_ENABLE_QUANTIZE + #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/matmul.h" #endif // MACE_ENABLE_OPENCL @@ -150,6 +154,7 @@ class MatMulOp : public MatMulOpBase { SGemm sgemm_; }; +#ifdef MACE_ENABLE_QUANTIZE template class MatMulFixpointImpl; @@ -311,6 +316,7 @@ class MatMulOp: public MatMulOpBase { return MaceStatus::MACE_SUCCESS; } }; +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL template @@ -342,8 +348,11 @@ void RegisterMatMul(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp, DeviceType::CPU, float); +#ifdef MACE_ENABLE_QUANTIZE MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp, DeviceType::CPU, uint8_t); +#endif // MACE_ENABLE_QUANTIZE + #ifdef MACE_ENABLE_OPENCL MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp, DeviceType::GPU, float); diff --git a/mace/ops/ops_registry.cc b/mace/ops/ops_registry.cc index bd3038c5244f306112e05d52e95f69d6e51add9f..48a893760a8789fb8f831774726332d55e17a922 100644 --- a/mace/ops/ops_registry.cc +++ b/mace/ops/ops_registry.cc @@ -33,7 +33,6 @@ extern void RegisterDeconv2D(OpRegistryBase *op_registry); extern void RegisterDepthToSpace(OpRegistryBase *op_registry); extern void RegisterDepthwiseConv2d(OpRegistryBase *op_registry); extern void RegisterDepthwiseDeconv2d(OpRegistryBase *op_registry); -extern void RegisterDequantize(OpRegistryBase *op_registry); extern void RegisterEltwise(OpRegistryBase *op_registry); extern void RegisterExpandDims(OpRegistryBase *op_registry); extern void RegisterFill(OpRegistryBase *op_registry); @@ -45,7 +44,6 @@ extern void RegisterLocalResponseNorm(OpRegistryBase *op_registry); extern void RegisterMatMul(OpRegistryBase *op_registry); extern void RegisterPad(OpRegistryBase *op_registry); extern void RegisterPooling(OpRegistryBase *op_registry); -extern void RegisterQuantize(OpRegistryBase *op_registry); extern void RegisterReduceMean(OpRegistryBase *op_registry); extern void RegisterReshape(OpRegistryBase *op_registry); extern void RegisterResizeBicubic(OpRegistryBase *op_registry); @@ -64,6 +62,11 @@ extern void RegisterStridedSlice(OpRegistryBase *op_registry); extern void RegisterTranspose(OpRegistryBase *op_registry); extern void RegisterUnstack(OpRegistryBase *op_registry); +#ifdef MACE_ENABLE_QUANTIZE +extern void RegisterDequantize(OpRegistryBase *op_registry); +extern void RegisterQuantize(OpRegistryBase *op_registry); +#endif // MACE_ENABLE_QUANTIZE + #ifdef MACE_ENABLE_OPENCL extern void RegisterBufferTransform(OpRegistryBase *op_registry); extern void RegisterBufferInverseTransform(OpRegistryBase *op_registry); @@ -91,7 +94,6 @@ OpRegistry::OpRegistry() : OpRegistryBase() { ops::RegisterDepthToSpace(this); ops::RegisterDepthwiseConv2d(this); ops::RegisterDepthwiseDeconv2d(this); - ops::RegisterDequantize(this); ops::RegisterEltwise(this); ops::RegisterExpandDims(this); ops::RegisterFill(this); @@ -103,7 +105,6 @@ OpRegistry::OpRegistry() : OpRegistryBase() { ops::RegisterMatMul(this); ops::RegisterPad(this); ops::RegisterPooling(this); - ops::RegisterQuantize(this); ops::RegisterReduceMean(this); ops::RegisterReshape(this); ops::RegisterResizeBicubic(this); @@ -122,6 +123,11 @@ OpRegistry::OpRegistry() : OpRegistryBase() { ops::RegisterTranspose(this); ops::RegisterUnstack(this); +#ifdef MACE_ENABLE_QUANTIZE + ops::RegisterDequantize(this); + ops::RegisterQuantize(this); +#endif // MACE_ENABLE_QUANTIZE + #ifdef MACE_ENABLE_OPENCL ops::RegisterBufferTransform(this); ops::RegisterBufferInverseTransform(this); diff --git a/mace/ops/pooling.cc b/mace/ops/pooling.cc index e1800b87254ff2d220e6939ea4986260bfd3d908..5f9d0e0d7641217e156e3d071e8f521eda95af8b 100644 --- a/mace/ops/pooling.cc +++ b/mace/ops/pooling.cc @@ -214,6 +214,7 @@ class PoolingOp : public PoolingOpBase { } }; +#ifdef MACE_ENABLE_QUANTIZE template <> class PoolingOp : public PoolingOpBase { public: @@ -420,6 +421,7 @@ class PoolingOp : public PoolingOpBase { } } }; +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL template @@ -451,8 +453,11 @@ class PoolingOp : public PoolingOpBase { void RegisterPooling(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "Pooling", PoolingOp, DeviceType::CPU, float); + +#ifdef MACE_ENABLE_QUANTIZE MACE_REGISTER_OP(op_registry, "Pooling", PoolingOp, DeviceType::CPU, uint8_t); +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL MACE_REGISTER_OP(op_registry, "Pooling", PoolingOp, diff --git a/mace/ops/resize_bilinear.cc b/mace/ops/resize_bilinear.cc index 4b3f5a0917de5d82fd33924712625190c19da9ee..91f6c3e5ccf491755d98cae03f8bf32910fde31e 100644 --- a/mace/ops/resize_bilinear.cc +++ b/mace/ops/resize_bilinear.cc @@ -244,6 +244,7 @@ class ResizeBilinearOp : public Operation { std::vector size_; }; +#ifdef MACE_ENABLE_QUANTIZE template <> class ResizeBilinearOp : public Operation { public: @@ -317,6 +318,7 @@ class ResizeBilinearOp : public Operation { bool align_corners_; std::vector size_; }; +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL template @@ -355,8 +357,10 @@ void RegisterResizeBilinear(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "ResizeBilinear", ResizeBilinearOp, DeviceType::CPU, float); +#ifdef MACE_ENABLE_QUANTIZE MACE_REGISTER_OP(op_registry, "ResizeBilinear", ResizeBilinearOp, DeviceType::CPU, uint8_t); +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL MACE_REGISTER_OP(op_registry, "ResizeBilinear", ResizeBilinearOp, diff --git a/mace/ops/softmax.cc b/mace/ops/softmax.cc index 8f6fb7fd24ff94c04fb38ef585f94e167d555a59..6d62fabc9781838007b9a6d8db8a629b47cfdb40 100644 --- a/mace/ops/softmax.cc +++ b/mace/ops/softmax.cc @@ -18,8 +18,12 @@ #include #include "mace/core/operator.h" + +#ifdef MACE_ENABLE_QUANTIZE #include "mace/ops/fixpoint.h" #include "mace/ops/gemmlowp_util.h" +#endif // MACE_ENABLE_QUANTIZE + #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/softmax.h" #include "mace/ops/opencl/buffer/softmax.h" @@ -122,6 +126,7 @@ class SoftmaxOp : public Operation { static const int kInputDeltaIntBits = 6; static const int kSumExpIntBits = 12; +#ifdef MACE_ENABLE_QUANTIZE template <> class SoftmaxOp : public Operation { public: @@ -351,6 +356,7 @@ class SoftmaxOp : public Operation { return MaceStatus::MACE_SUCCESS; } }; +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL template @@ -382,8 +388,10 @@ void RegisterSoftmax(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "Softmax", SoftmaxOp, DeviceType::CPU, float); +#ifdef MACE_ENABLE_QUANTIZE MACE_REGISTER_OP(op_registry, "Softmax", SoftmaxOp, DeviceType::CPU, uint8_t); +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL MACE_REGISTER_OP(op_registry, "Softmax", SoftmaxOp, diff --git a/mace/ops/space_to_batch.cc b/mace/ops/space_to_batch.cc index f391bc79bc4b005a8e6757c4a457159df9baf09a..a023ae897b98fca66c7502b82f40bef8fcc94959 100644 --- a/mace/ops/space_to_batch.cc +++ b/mace/ops/space_to_batch.cc @@ -197,6 +197,7 @@ class SpaceToBatchNDOp : public SpaceToBatchOpBase { } }; +#ifdef MACE_ENABLE_QUANTIZE template <> class SpaceToBatchNDOp : public SpaceToBatchOpBase { public: @@ -299,6 +300,7 @@ class SpaceToBatchNDOp : public SpaceToBatchOpBase { return MaceStatus::MACE_SUCCESS; } }; +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL template @@ -331,8 +333,10 @@ void RegisterSpaceToBatchND(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "SpaceToBatchND", SpaceToBatchNDOp, DeviceType::CPU, float); +#ifdef MACE_ENABLE_QUANTIZE MACE_REGISTER_OP(op_registry, "SpaceToBatchND", SpaceToBatchNDOp, DeviceType::CPU, uint8_t); +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL MACE_REGISTER_OP(op_registry, "SpaceToBatchND", diff --git a/mace/ops/squeeze.cc b/mace/ops/squeeze.cc index 37ee3d70eae1d21ecb3335d292eb5913a660d909..0cd15752f52adc7383239591c96c077e4354ac04 100644 --- a/mace/ops/squeeze.cc +++ b/mace/ops/squeeze.cc @@ -52,7 +52,9 @@ class SqueezeOp : public Operation { void RegisterSqueeze(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::CPU, float); +#ifdef MACE_ENABLE_QUANTIZE MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::CPU, uint8_t); +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::GPU, float); MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::GPU, half); diff --git a/mace/public/mace.h b/mace/public/mace.h index e9ab737ae5e3fa0582a12fb998f1000077121995..ef8fb35d5d781a401b0be58c8a59f03c48a3bd16 100644 --- a/mace/public/mace.h +++ b/mace/public/mace.h @@ -223,7 +223,7 @@ class MACE_API MaceEngineConfig { /// \param status MACE_SUCCESS for successful, or it can't reliabley /// detect big-LITTLE cores (see GetBigLittleCoreIDs). In such cases, it's /// suggested to use AFFINITY_NONE to use all cores. - /// \param use_gemmlowp use gemmlowp for quantized inference + /// \param use_gemmlowp use gemmlowp for cpu quantized inference /// \return MaceStatus::MACE_SUCCESS for success, other for failed. MaceStatus SetCPUThreadPolicy(int num_threads_hint, CPUAffinityPolicy policy, diff --git a/tools/build-standalone-lib.sh b/tools/build-standalone-lib.sh index a08fb3b06c8b1ce3cf66b0f759051145edc82d97..348665964479900f1b02a1424364d7133d14cda4 100755 --- a/tools/build-standalone-lib.sh +++ b/tools/build-standalone-lib.sh @@ -24,41 +24,41 @@ mkdir -p $LIB_DIR/linux-x86-64 # build shared libraries echo "build shared lib for armeabi-v7a + cpu_gpu_dsp" -bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define hexagon=true --cpu=armeabi-v7a +bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/ cp third_party/nnlib/*so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/ echo "build shared lib for armeabi-v7a + cpu_gpu" -bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --cpu=armeabi-v7a +bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=armeabi-v7a cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/armeabi-v7a/cpu_gpu/ echo "build shared lib for arm64-v8a + cpu_gpu" -bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --cpu=arm64-v8a +bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu/ if [[ "$OSTYPE" != "darwin"* ]];then echo "build shared lib for linux-x86-64" - bazel build mace/libmace:libmace_dynamic --config optimization --define openmp=true + bazel build mace/libmace:libmace_dynamic --config optimization --define quantize=true --define openmp=true cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/linux-x86-64/ fi # build static libraries echo "build static lib for armeabi-v7a + cpu_gpu_dsp" -bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define hexagon=true --cpu=armeabi-v7a +bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/ cp third_party/nnlib/*so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/ echo "build static lib for armeabi-v7a + cpu_gpu" -bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --cpu=armeabi-v7a +bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=armeabi-v7a cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/armeabi-v7a/cpu_gpu/ echo "build static lib for arm64-v8a + cpu_gpu" -bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --cpu=arm64-v8a +bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu/ if [[ "$OSTYPE" != "darwin"* ]];then echo "build static lib for linux-x86-64" - bazel build mace/libmace:libmace_static --config optimization --define openmp=true + bazel build mace/libmace:libmace_static --config optimization --define quantize=true --define openmp=true cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/linux-x86-64/ fi diff --git a/tools/converter.py b/tools/converter.py index 4ad1a8b55d732181758a5fc119355eb1923919cf..3a1aeba3ac31c9615aa3d781591927a0d8b62d5c 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -279,6 +279,17 @@ def get_opencl_mode(configs): return False +def get_quantize_mode(configs): + for model_name in configs[YAMLKeyword.models]: + quantize =\ + configs[YAMLKeyword.models][model_name].get( + YAMLKeyword.quantize, 0) + if quantize == 1: + return True + + return False + + def md5sum(str): md5 = hashlib.md5() md5.update(str.encode('utf-8')) @@ -855,6 +866,7 @@ def build_model_lib(configs, address_sanitizer): abi=target_abi, hexagon_mode=hexagon_mode, enable_opencl=get_opencl_mode(configs), + enable_quantize=get_quantize_mode(configs), address_sanitizer=address_sanitizer, symbol_hidden=True ) @@ -968,6 +980,7 @@ def build_mace_run(configs, target_abi, enable_openmp, address_sanitizer, hexagon_mode=hexagon_mode, enable_openmp=enable_openmp, enable_opencl=get_opencl_mode(configs), + enable_quantize=get_quantize_mode(configs), address_sanitizer=address_sanitizer, symbol_hidden=symbol_hidden, extra_args=build_arg @@ -996,6 +1009,7 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer, abi=target_abi, enable_openmp=enable_openmp, enable_opencl=get_opencl_mode(configs), + enable_quantize=get_quantize_mode(configs), hexagon_mode=hexagon_mode, address_sanitizer=address_sanitizer, symbol_hidden=symbol_hidden) @@ -1025,6 +1039,7 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer, abi=target_abi, enable_openmp=enable_openmp, enable_opencl=get_opencl_mode(configs), + enable_quantize=get_quantize_mode(configs), hexagon_mode=hexagon_mode, address_sanitizer=address_sanitizer, extra_args=build_arg) @@ -1404,6 +1419,7 @@ def build_benchmark_model(configs, target_abi, enable_openmp, mace_lib_type): abi=target_abi, enable_openmp=enable_openmp, enable_opencl=get_opencl_mode(configs), + enable_quantize=get_quantize_mode(configs), hexagon_mode=hexagon_mode, symbol_hidden=symbol_hidden, extra_args=build_arg) diff --git a/tools/sh_commands.py b/tools/sh_commands.py index 9c8045f19b3e3a2afae06d4a4555e2d34968caf3..0eb991296d180225b25bc2cc1429f0de17c10e76 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -342,6 +342,7 @@ def bazel_build(target, enable_openmp=True, enable_neon=True, enable_opencl=True, + enable_quantize=True, address_sanitizer=False, symbol_hidden=True, extra_args=""): @@ -351,6 +352,8 @@ def bazel_build(target, "build", "--define", "openmp=%s" % str(enable_openmp).lower(), + "--define", + "quantize=%s" % str(enable_quantize).lower(), target, ) else: @@ -367,6 +370,8 @@ def bazel_build(target, "--define", "opencl=%s" % str(enable_opencl).lower(), "--define", + "quantize=%s" % str(enable_quantize).lower(), + "--define", "hexagon=%s" % str(hexagon_mode).lower()) if address_sanitizer: bazel_args += ("--config", "asan")