提交 e35a077a 编写于 作者: Y yejianwu

separate quantize code when run without quantize

上级 e988e951
......@@ -59,3 +59,11 @@ config_setting(
},
visibility = ["//visibility:public"],
)
config_setting(
name = "quantize_enabled",
define_values = {
"quantize": "true",
},
visibility = ["//visibility:public"],
)
......@@ -15,6 +15,7 @@ load(
"if_openmp_enabled",
"if_neon_enabled",
"if_opencl_enabled",
"if_quantize_enabled",
)
cc_library(
......@@ -51,6 +52,8 @@ cc_library(
"-DMACE_ENABLE_OPENMP",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
]) + if_quantize_enabled([
"-DMACE_ENABLE_QUANTIZE",
]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON",
]) + if_neon_enabled([
......@@ -64,11 +67,12 @@ cc_library(
"//mace/codegen:generated_version",
"//mace/proto:mace_cc",
"//mace/utils",
"@gemmlowp",
] + if_opencl_enabled([
":opencl_headers",
"//mace/codegen:generated_opencl",
"@half//:half",
]) + if_quantize_enabled([
"@gemmlowp",
]) + if_hexagon_enabled([
"//third_party/nnlib:libhexagon",
]),
......
......@@ -176,11 +176,16 @@ MaceStatus SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads,
MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy(
int omp_num_threads_hint,
CPUAffinityPolicy policy,
gemmlowp::GemmContext *gemm_context) {
void *gemm_context) {
if (policy == CPUAffinityPolicy::AFFINITY_NONE) {
#ifdef MACE_ENABLE_QUANTIZE
if (gemm_context) {
gemm_context->set_max_num_threads(std::max(0, omp_num_threads_hint));
static_cast<gemmlowp::GemmContext*>(gemm_context)->set_max_num_threads(
std::max(0, omp_num_threads_hint));
}
#else
MACE_UNUSED(gemm_context);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENMP
if (omp_num_threads_hint > 0) {
omp_set_num_threads(std::min(omp_num_threads_hint, omp_get_num_procs()));
......@@ -210,9 +215,12 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy(
omp_num_threads_hint = use_cpu_ids.size();
}
#ifdef MACE_ENABLE_QUANTIZE
if (gemm_context) {
gemm_context->set_max_num_threads(omp_num_threads_hint);
static_cast<gemmlowp::GemmContext*>(gemm_context)->set_max_num_threads(
omp_num_threads_hint);
}
#endif // MACE_ENABLE_QUANTIZE
return SetOpenMPThreadsAndAffinityCPUs(omp_num_threads_hint, use_cpu_ids);
}
......
......@@ -18,7 +18,11 @@
#include <memory>
#include <vector>
#ifdef MACE_ENABLE_QUANTIZE
#include "public/gemmlowp.h"
#endif // MACE_ENABLE_QUANTIZE
#include "mace/core/macros.h"
#include "mace/public/mace.h"
#include "mace/utils/logging.h"
......@@ -34,22 +38,34 @@ class CPURuntime {
: num_threads_(num_threads),
policy_(policy),
gemm_context_(nullptr) {
#ifdef MACE_ENABLE_QUANTIZE
if (use_gemmlowp) {
MACE_CHECK_NOTNULL(GetGemmlowpContext());
}
#else
MACE_UNUSED(use_gemmlowp);
#endif // MACE_ENABLE_QUANTIZE
SetOpenMPThreadsAndAffinityPolicy(num_threads_,
policy_,
gemm_context_.get());
gemm_context_);
}
#ifdef MACE_ENABLE_QUANTIZE
~CPURuntime() {
if (!gemm_context_) {
delete static_cast<gemmlowp::GemmContext*>(gemm_context_);
}
}
~CPURuntime() = default;
gemmlowp::GemmContext *GetGemmlowpContext() {
if (!gemm_context_) {
gemm_context_.reset(new gemmlowp::GemmContext());
gemm_context_ = new gemmlowp::GemmContext();
}
return gemm_context_.get();
return static_cast<gemmlowp::GemmContext*>(gemm_context_);
}
#else
~CPURuntime() = default;
#endif // MACE_ENABLE_QUANTIZE
int num_threads() const {
return num_threads_;
......@@ -67,11 +83,11 @@ class CPURuntime {
MaceStatus SetOpenMPThreadsAndAffinityPolicy(
int omp_num_threads_hint,
CPUAffinityPolicy policy,
gemmlowp::GemmContext *gemm_context);
void *gemm_context);
int num_threads_;
CPUAffinityPolicy policy_;
std::unique_ptr<gemmlowp::GemmContext> gemm_context_;
void *gemm_context_;
};
} // namespace mace
......
......@@ -16,6 +16,7 @@ load(
"if_hexagon_enabled",
"if_opencl_enabled",
"if_opencl_enabled_str",
"if_quantize_enabled",
)
cc_library(
......@@ -34,6 +35,8 @@ cc_library(
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
]) + if_quantize_enabled([
"-DMACE_ENABLE_QUANTIZE",
]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON",
]),
......
......@@ -60,6 +60,12 @@ def if_opencl_enabled_str(a):
"//conditions:default": "",
})
def if_quantize_enabled(a):
return select({
"//mace:quantize_enabled": a,
"//conditions:default": [],
})
def mace_version_genrule():
native.genrule(
name = "mace_version_gen",
......
......@@ -14,6 +14,7 @@ load(
"if_android_armv7",
"if_hexagon_enabled",
"if_opencl_enabled",
"if_quantize_enabled",
)
cc_library(
......@@ -33,6 +34,7 @@ cc_library(
"buffer_transform.cc",
"lstm_cell.cc",
"winograd_transform.cc",
"quantize.cc",
],
) + if_opencl_enabled(glob(
[
......@@ -47,6 +49,10 @@ cc_library(
exclude = [
"opencl/*_test.cc",
],
)) + if_quantize_enabled(glob(
[
"quantize.cc",
],
)),
hdrs = glob(
[
......@@ -56,11 +62,16 @@ cc_library(
exclude = [
"ops_registry.h",
"ops_test_util.h",
"fixpoint.h",
"gemmlowp_util.h",
]
) + if_opencl_enabled(glob([
"opencl/*.h",
"opencl/image/*.h",
"opencl/buffer/*.h",
])) + if_quantize_enabled(glob([
"fixpoint.h",
"gemmlowp_util.h",
])),
copts = [
"-Werror",
......@@ -76,15 +87,18 @@ cc_library(
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
]) + if_quantize_enabled([
"-DMACE_ENABLE_QUANTIZE",
]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON",
]),
linkopts = if_android(["-lm"]),
deps = [
"//mace/core",
"@gemmlowp",
"@tflite",
],
] + if_quantize_enabled([
"@gemmlowp",
]),
)
......@@ -110,6 +124,8 @@ cc_library(
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
]) + if_quantize_enabled([
"-DMACE_ENABLE_QUANTIZE",
]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON",
]),
......@@ -157,7 +173,15 @@ cc_test(
"arm/*_test.cc",
"opencl/*_test.cc",
],
),
exclude = [
"fixpoint_test.cc"
],
) + if_quantize_enabled(glob(
[
"fixpoint_test.cc"
],
)),
copts = [
"-Werror",
"-Wextra",
......@@ -171,6 +195,8 @@ cc_test(
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
]) + if_quantize_enabled([
"-DMACE_ENABLE_QUANTIZE",
]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON",
]),
......@@ -199,6 +225,8 @@ cc_test(
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
]) + if_quantize_enabled([
"-DMACE_ENABLE_QUANTIZE",
]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON",
]),
......@@ -208,6 +236,5 @@ cc_test(
"test",
"//mace/core:test_benchmark_main",
"//third_party/eigen3",
"@gemmlowp",
],
)
......@@ -108,6 +108,7 @@ class ConcatOp<DeviceType::CPU, T> : public ConcatOpBase {
}
};
#ifdef MACE_ENABLE_QUANTIZE
template <>
class ConcatOp<DeviceType::CPU, uint8_t> : public ConcatOpBase {
public:
......@@ -177,6 +178,7 @@ class ConcatOp<DeviceType::CPU, uint8_t> : public ConcatOpBase {
return MaceStatus::MACE_SUCCESS;
}
};
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
template <typename T>
......@@ -209,8 +211,10 @@ void RegisterConcat(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "Concat", ConcatOp,
DeviceType::CPU, int32_t);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "Concat", ConcatOp,
DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "Concat", ConcatOp,
......
......@@ -31,8 +31,12 @@
#include "mace/ops/arm/conv_winograd.h"
#include "mace/ops/conv_pool_2d_base.h"
#include "mace/ops/conv_pool_2d_util.h"
#include "mace/ops/gemmlowp_util.h"
#include "mace/utils/utils.h"
#ifdef MACE_ENABLE_QUANTIZE
#include "mace/ops/gemmlowp_util.h"
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/conv_2d.h"
#include "mace/ops/opencl/buffer/conv_2d.h"
......@@ -707,6 +711,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
};
#ifdef MACE_ENABLE_QUANTIZE
template <>
class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase {
public:
......@@ -943,6 +948,7 @@ class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase {
MACE_OP_INPUT_TAGS(INPUT, FILTER, BIAS);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
template <typename T>
......@@ -987,8 +993,10 @@ void RegisterConv2D(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "Conv2D", Conv2dOp,
DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "Conv2D", Conv2dOp,
DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "Conv2D", Conv2dOp,
......
......@@ -20,9 +20,11 @@
#include <string>
#include <vector>
#ifdef MACE_ENABLE_QUANTIZE
// We reuse TensorFlow Lite's optimized depthwiseconv_uint8 and parallelized it
// using OpenMP for MACE's quantized depthwise_conv2d.
#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h"
#endif // MACE_ENABLE_QUANTIZE
#include "mace/core/future.h"
#include "mace/core/operator.h"
......@@ -276,6 +278,7 @@ class DepthwiseConv2dOp<DeviceType::CPU, float> : public DepthwiseConv2dOpBase {
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
#ifdef MACE_ENABLE_QUANTIZE
template <>
class DepthwiseConv2dOp<DeviceType::CPU, uint8_t>
: public DepthwiseConv2dOpBase {
......@@ -479,6 +482,7 @@ class DepthwiseConv2dOp<DeviceType::CPU, uint8_t>
MACE_OP_INPUT_TAGS(INPUT, FILTER, BIAS);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
template <typename T>
......@@ -520,8 +524,10 @@ void RegisterDepthwiseConv2d(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "DepthwiseConv2d",
DepthwiseConv2dOp, DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "DepthwiseConv2d",
DepthwiseConv2dOp, DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "DepthwiseConv2d",
......
......@@ -932,6 +932,7 @@ class EltwiseOp : public Operation {
Tensor scalar_tensor_;
};
#ifdef MACE_ENABLE_QUANTIZE
template <>
class EltwiseOp<DeviceType::CPU, uint8_t> : public Operation {
public:
......@@ -1072,6 +1073,7 @@ class EltwiseOp<DeviceType::CPU, uint8_t> : public Operation {
DataFormat data_format_;
Tensor scalar_tensor_;
};
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
template <typename T>
......@@ -1113,8 +1115,11 @@ void RegisterEltwise(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "Eltwise", EltwiseOp,
DeviceType::CPU, int32_t);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "Eltwise", EltwiseOp,
DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "Eltwise", EltwiseOp,
DeviceType::GPU, float);
......
......@@ -63,8 +63,10 @@ void RegisterExpandDims(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "ExpandDims", ExpandDimsOp,
DeviceType::CPU, int32_t);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "ExpandDims", ExpandDimsOp,
DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
}
} // namespace ops
......
......@@ -21,7 +21,10 @@
#include "mace/core/tensor.h"
#include "mace/ops/activation.h"
#include "mace/ops/gemm.h"
#ifdef MACE_ENABLE_QUANTIZE
#include "mace/ops/gemmlowp_util.h"
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/fully_connected.h"
......@@ -106,6 +109,7 @@ class FullyConnectedOp<DeviceType::CPU, float> : public FullyConnectedOpBase {
}
};
#ifdef MACE_ENABLE_QUANTIZE
template <>
class FullyConnectedOp<DeviceType::CPU, uint8_t>
: public FullyConnectedOpBase {
......@@ -180,6 +184,7 @@ class FullyConnectedOp<DeviceType::CPU, uint8_t>
return MaceStatus::MACE_SUCCESS;
}
};
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
template <typename T>
......@@ -218,8 +223,11 @@ void RegisterFullyConnected(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "FullyConnected",
FullyConnectedOp, DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "FullyConnected",
FullyConnectedOp, DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "FullyConnected",
FullyConnectedOp, DeviceType::GPU, float);
......
......@@ -89,8 +89,11 @@ class GatherOp : public Operation {
void RegisterGather(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "Gather", GatherOp,
DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "Gather", GatherOp,
DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
}
} // namespace ops
......
......@@ -22,9 +22,13 @@
#include "mace/core/operator.h"
#include "mace/core/tensor.h"
#include "mace/ops/gemm.h"
#include "mace/ops/gemmlowp_util.h"
#include "mace/ops/sgemm.h"
#include "mace/utils/utils.h"
#ifdef MACE_ENABLE_QUANTIZE
#include "mace/ops/gemmlowp_util.h"
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/matmul.h"
#endif // MACE_ENABLE_OPENCL
......@@ -150,6 +154,7 @@ class MatMulOp<CPU, float> : public MatMulOpBase {
SGemm sgemm_;
};
#ifdef MACE_ENABLE_QUANTIZE
template<gemmlowp::MapOrder AOrder, gemmlowp::MapOrder BOrder,
typename OutputType>
class MatMulFixpointImpl;
......@@ -311,6 +316,7 @@ class MatMulOp<DeviceType::CPU, uint8_t>: public MatMulOpBase {
return MaceStatus::MACE_SUCCESS;
}
};
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
template <typename T>
......@@ -342,8 +348,11 @@ void RegisterMatMul(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
DeviceType::GPU, float);
......
......@@ -33,7 +33,6 @@ extern void RegisterDeconv2D(OpRegistryBase *op_registry);
extern void RegisterDepthToSpace(OpRegistryBase *op_registry);
extern void RegisterDepthwiseConv2d(OpRegistryBase *op_registry);
extern void RegisterDepthwiseDeconv2d(OpRegistryBase *op_registry);
extern void RegisterDequantize(OpRegistryBase *op_registry);
extern void RegisterEltwise(OpRegistryBase *op_registry);
extern void RegisterExpandDims(OpRegistryBase *op_registry);
extern void RegisterFill(OpRegistryBase *op_registry);
......@@ -45,7 +44,6 @@ extern void RegisterLocalResponseNorm(OpRegistryBase *op_registry);
extern void RegisterMatMul(OpRegistryBase *op_registry);
extern void RegisterPad(OpRegistryBase *op_registry);
extern void RegisterPooling(OpRegistryBase *op_registry);
extern void RegisterQuantize(OpRegistryBase *op_registry);
extern void RegisterReduceMean(OpRegistryBase *op_registry);
extern void RegisterReshape(OpRegistryBase *op_registry);
extern void RegisterResizeBicubic(OpRegistryBase *op_registry);
......@@ -64,6 +62,11 @@ extern void RegisterStridedSlice(OpRegistryBase *op_registry);
extern void RegisterTranspose(OpRegistryBase *op_registry);
extern void RegisterUnstack(OpRegistryBase *op_registry);
#ifdef MACE_ENABLE_QUANTIZE
extern void RegisterDequantize(OpRegistryBase *op_registry);
extern void RegisterQuantize(OpRegistryBase *op_registry);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
extern void RegisterBufferTransform(OpRegistryBase *op_registry);
extern void RegisterBufferInverseTransform(OpRegistryBase *op_registry);
......@@ -91,7 +94,6 @@ OpRegistry::OpRegistry() : OpRegistryBase() {
ops::RegisterDepthToSpace(this);
ops::RegisterDepthwiseConv2d(this);
ops::RegisterDepthwiseDeconv2d(this);
ops::RegisterDequantize(this);
ops::RegisterEltwise(this);
ops::RegisterExpandDims(this);
ops::RegisterFill(this);
......@@ -103,7 +105,6 @@ OpRegistry::OpRegistry() : OpRegistryBase() {
ops::RegisterMatMul(this);
ops::RegisterPad(this);
ops::RegisterPooling(this);
ops::RegisterQuantize(this);
ops::RegisterReduceMean(this);
ops::RegisterReshape(this);
ops::RegisterResizeBicubic(this);
......@@ -122,6 +123,11 @@ OpRegistry::OpRegistry() : OpRegistryBase() {
ops::RegisterTranspose(this);
ops::RegisterUnstack(this);
#ifdef MACE_ENABLE_QUANTIZE
ops::RegisterDequantize(this);
ops::RegisterQuantize(this);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
ops::RegisterBufferTransform(this);
ops::RegisterBufferInverseTransform(this);
......
......@@ -214,6 +214,7 @@ class PoolingOp<DeviceType::CPU, float> : public PoolingOpBase {
}
};
#ifdef MACE_ENABLE_QUANTIZE
template <>
class PoolingOp<DeviceType::CPU, uint8_t> : public PoolingOpBase {
public:
......@@ -420,6 +421,7 @@ class PoolingOp<DeviceType::CPU, uint8_t> : public PoolingOpBase {
}
}
};
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
template <typename T>
......@@ -451,8 +453,11 @@ class PoolingOp<DeviceType::GPU, T> : public PoolingOpBase {
void RegisterPooling(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "Pooling", PoolingOp,
DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "Pooling", PoolingOp,
DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "Pooling", PoolingOp,
......
......@@ -244,6 +244,7 @@ class ResizeBilinearOp<DeviceType::CPU, T> : public Operation {
std::vector<index_t> size_;
};
#ifdef MACE_ENABLE_QUANTIZE
template <>
class ResizeBilinearOp<DeviceType::CPU, uint8_t> : public Operation {
public:
......@@ -317,6 +318,7 @@ class ResizeBilinearOp<DeviceType::CPU, uint8_t> : public Operation {
bool align_corners_;
std::vector<index_t> size_;
};
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
template <typename T>
......@@ -355,8 +357,10 @@ void RegisterResizeBilinear(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "ResizeBilinear", ResizeBilinearOp,
DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "ResizeBilinear", ResizeBilinearOp,
DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "ResizeBilinear", ResizeBilinearOp,
......
......@@ -18,8 +18,12 @@
#include <vector>
#include "mace/core/operator.h"
#ifdef MACE_ENABLE_QUANTIZE
#include "mace/ops/fixpoint.h"
#include "mace/ops/gemmlowp_util.h"
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/softmax.h"
#include "mace/ops/opencl/buffer/softmax.h"
......@@ -122,6 +126,7 @@ class SoftmaxOp<DeviceType::CPU, float> : public Operation {
static const int kInputDeltaIntBits = 6;
static const int kSumExpIntBits = 12;
#ifdef MACE_ENABLE_QUANTIZE
template <>
class SoftmaxOp<DeviceType::CPU, uint8_t> : public Operation {
public:
......@@ -351,6 +356,7 @@ class SoftmaxOp<DeviceType::CPU, uint8_t> : public Operation {
return MaceStatus::MACE_SUCCESS;
}
};
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
template <typename T>
......@@ -382,8 +388,10 @@ void RegisterSoftmax(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "Softmax", SoftmaxOp,
DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "Softmax", SoftmaxOp,
DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "Softmax", SoftmaxOp,
......
......@@ -197,6 +197,7 @@ class SpaceToBatchNDOp<DeviceType::CPU, float> : public SpaceToBatchOpBase {
}
};
#ifdef MACE_ENABLE_QUANTIZE
template <>
class SpaceToBatchNDOp<DeviceType::CPU, uint8_t> : public SpaceToBatchOpBase {
public:
......@@ -299,6 +300,7 @@ class SpaceToBatchNDOp<DeviceType::CPU, uint8_t> : public SpaceToBatchOpBase {
return MaceStatus::MACE_SUCCESS;
}
};
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
template <typename T>
......@@ -331,8 +333,10 @@ void RegisterSpaceToBatchND(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "SpaceToBatchND",
SpaceToBatchNDOp, DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "SpaceToBatchND",
SpaceToBatchNDOp, DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "SpaceToBatchND",
......
......@@ -52,7 +52,9 @@ class SqueezeOp : public Operation {
void RegisterSqueeze(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::GPU, float);
MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::GPU, half);
......
......@@ -223,7 +223,7 @@ class MACE_API MaceEngineConfig {
/// \param status MACE_SUCCESS for successful, or it can't reliabley
/// detect big-LITTLE cores (see GetBigLittleCoreIDs). In such cases, it's
/// suggested to use AFFINITY_NONE to use all cores.
/// \param use_gemmlowp use gemmlowp for quantized inference
/// \param use_gemmlowp use gemmlowp for cpu quantized inference
/// \return MaceStatus::MACE_SUCCESS for success, other for failed.
MaceStatus SetCPUThreadPolicy(int num_threads_hint,
CPUAffinityPolicy policy,
......
......@@ -24,41 +24,41 @@ mkdir -p $LIB_DIR/linux-x86-64
# build shared libraries
echo "build shared lib for armeabi-v7a + cpu_gpu_dsp"
bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define hexagon=true --cpu=armeabi-v7a
bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/
cp third_party/nnlib/*so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/
echo "build shared lib for armeabi-v7a + cpu_gpu"
bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --cpu=armeabi-v7a
bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=armeabi-v7a
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/armeabi-v7a/cpu_gpu/
echo "build shared lib for arm64-v8a + cpu_gpu"
bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --cpu=arm64-v8a
bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu/
if [[ "$OSTYPE" != "darwin"* ]];then
echo "build shared lib for linux-x86-64"
bazel build mace/libmace:libmace_dynamic --config optimization --define openmp=true
bazel build mace/libmace:libmace_dynamic --config optimization --define quantize=true --define openmp=true
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/linux-x86-64/
fi
# build static libraries
echo "build static lib for armeabi-v7a + cpu_gpu_dsp"
bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define hexagon=true --cpu=armeabi-v7a
bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/
cp third_party/nnlib/*so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/
echo "build static lib for armeabi-v7a + cpu_gpu"
bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --cpu=armeabi-v7a
bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=armeabi-v7a
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/armeabi-v7a/cpu_gpu/
echo "build static lib for arm64-v8a + cpu_gpu"
bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --cpu=arm64-v8a
bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu/
if [[ "$OSTYPE" != "darwin"* ]];then
echo "build static lib for linux-x86-64"
bazel build mace/libmace:libmace_static --config optimization --define openmp=true
bazel build mace/libmace:libmace_static --config optimization --define quantize=true --define openmp=true
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/linux-x86-64/
fi
......
......@@ -279,6 +279,17 @@ def get_opencl_mode(configs):
return False
def get_quantize_mode(configs):
for model_name in configs[YAMLKeyword.models]:
quantize =\
configs[YAMLKeyword.models][model_name].get(
YAMLKeyword.quantize, 0)
if quantize == 1:
return True
return False
def md5sum(str):
md5 = hashlib.md5()
md5.update(str.encode('utf-8'))
......@@ -855,6 +866,7 @@ def build_model_lib(configs, address_sanitizer):
abi=target_abi,
hexagon_mode=hexagon_mode,
enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs),
address_sanitizer=address_sanitizer,
symbol_hidden=True
)
......@@ -968,6 +980,7 @@ def build_mace_run(configs, target_abi, enable_openmp, address_sanitizer,
hexagon_mode=hexagon_mode,
enable_openmp=enable_openmp,
enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs),
address_sanitizer=address_sanitizer,
symbol_hidden=symbol_hidden,
extra_args=build_arg
......@@ -996,6 +1009,7 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer,
abi=target_abi,
enable_openmp=enable_openmp,
enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs),
hexagon_mode=hexagon_mode,
address_sanitizer=address_sanitizer,
symbol_hidden=symbol_hidden)
......@@ -1025,6 +1039,7 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer,
abi=target_abi,
enable_openmp=enable_openmp,
enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs),
hexagon_mode=hexagon_mode,
address_sanitizer=address_sanitizer,
extra_args=build_arg)
......@@ -1404,6 +1419,7 @@ def build_benchmark_model(configs, target_abi, enable_openmp, mace_lib_type):
abi=target_abi,
enable_openmp=enable_openmp,
enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs),
hexagon_mode=hexagon_mode,
symbol_hidden=symbol_hidden,
extra_args=build_arg)
......
......@@ -342,6 +342,7 @@ def bazel_build(target,
enable_openmp=True,
enable_neon=True,
enable_opencl=True,
enable_quantize=True,
address_sanitizer=False,
symbol_hidden=True,
extra_args=""):
......@@ -351,6 +352,8 @@ def bazel_build(target,
"build",
"--define",
"openmp=%s" % str(enable_openmp).lower(),
"--define",
"quantize=%s" % str(enable_quantize).lower(),
target,
)
else:
......@@ -367,6 +370,8 @@ def bazel_build(target,
"--define",
"opencl=%s" % str(enable_opencl).lower(),
"--define",
"quantize=%s" % str(enable_quantize).lower(),
"--define",
"hexagon=%s" % str(hexagon_mode).lower())
if address_sanitizer:
bazel_args += ("--config", "asan")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册