diff --git a/micro/base/types.h b/micro/base/types.h index 6de264b4119e0ac68080a11df129d17f7b04a364..0f018d6a7348129e02a11bce31423a44540dcfa8 100644 --- a/micro/base/types.h +++ b/micro/base/types.h @@ -52,6 +52,35 @@ MACE_MAPPING_DATA_TYPE_AND_ENUM(int32_t, DT_INT32); MACE_MAPPING_DATA_TYPE_AND_ENUM(BFloat16, DT_BFLOAT16); #endif +struct QuantizeInfo { + float scale; + int32_t zero; +}; + +namespace ops { +namespace eltwise { // for redefine + +enum Type { + SUM = 0, + SUB = 1, + PROD = 2, + DIV = 3, + MIN = 4, + MAX = 5, + NEG = 6, + ABS = 7, + SQR_DIFF = 8, + POW = 9, + EQUAL = 10, + FLOOR_DIV = 11, + CLIP = 12, + SIGN = 13, + NONE = 14, +}; + +} // namespace eltwise +} // namespace ops + } // namespace micro #endif // MICRO_BASE_TYPES_H_ diff --git a/micro/base/utils.cc b/micro/base/utils.cc index 99f9142e22f637a8b7adb2340cbe731a429bb948..5f8637da11fd1dbdb12593ba2767993c9e386b65 100644 --- a/micro/base/utils.cc +++ b/micro/base/utils.cc @@ -116,5 +116,14 @@ const T &min(const T &a, const T &b) { return (a < b) ? a : b; } +bool ShapeIsEqual(const int32_t *dims0, + const int32_t *dims1, uint32_t dim_size) { + while (dim_size-- > 0) { + if (dims0[dim_size] != dims1[dim_size]) + return false; + } + return true; +} + } // namespace base } // namespace micro diff --git a/micro/base/utils.h b/micro/base/utils.h index 56eb955ebd7670e888527325e0bd5a142a0ade8f..d47394c2de901171f89bacaaa283855043861c64 100644 --- a/micro/base/utils.h +++ b/micro/base/utils.h @@ -26,6 +26,8 @@ uint32_t strlen(const char *str); int32_t strcmp(const char *str1, const char *str2); void memcpy(void *dst, const void *src, uint32_t bytes); int32_t GetShapeSize(uint32_t dim_size, const int32_t *dims); +bool ShapeIsEqual(const int32_t *dims0, + const int32_t *dims1, uint32_t dim_size); float sqrt(float x); int32_t ceil(float f); int32_t floor(float f); diff --git a/micro/framework/operator.cc b/micro/framework/operator.cc index f0505d93dc271e8fae151b4f49db5bd8c3458cd7..9ecdad69fd536d7ebf786d3fd04e528768bf16fc 100644 --- a/micro/framework/operator.cc +++ b/micro/framework/operator.cc @@ -74,6 +74,8 @@ uint32_t Operator::GetInputSize() { } const void *Operator::DoGetInputData(uint32_t idx) { + MACE_ASSERT(idx < GetInputSize()); + const void *data = NULL; const OpIOInfo *input_info = op_context_->input_info(idx); const uint32_t op_def_idx = input_info->op_def_idx_; @@ -94,6 +96,8 @@ const void *Operator::DoGetInputData(uint32_t idx) { } uint32_t Operator::GetInputShapeDimSize(uint32_t idx) { + MACE_ASSERT(idx < GetInputSize()); + uint32_t dim_size = 0; const OpIOInfo *input_info = op_context_->input_info(idx); const uint32_t op_def_idx = input_info->op_def_idx_; @@ -115,6 +119,8 @@ uint32_t Operator::GetInputShapeDimSize(uint32_t idx) { } const int32_t *Operator::GetInputShapeDims(uint32_t idx) { + MACE_ASSERT(idx < GetInputSize()); + const int32_t *dims = NULL; const OpIOInfo *input_info = op_context_->input_info(idx); const uint32_t op_def_idx = input_info->op_def_idx_; @@ -138,14 +144,20 @@ uint32_t Operator::GetOutputSize() { } DataType Operator::GetOutputDataType(uint32_t idx) { + MACE_ASSERT(idx < GetOutputSize()); + return op_def_->output_type(idx); } void *Operator::DoGetOutputData(uint32_t idx) { + MACE_ASSERT(idx < GetOutputSize()); + return engine_config_->tensor_mem_ + op_def_->mem_offset(idx); } uint32_t Operator::GetOutputShapeDimSize(uint32_t idx) { + MACE_ASSERT(idx < GetOutputSize()); + uint32_t dim_size = 0; model::OutputShape *output_shape = const_cast(op_context_->output_resize_shape(idx)); @@ -156,6 +168,8 @@ uint32_t Operator::GetOutputShapeDimSize(uint32_t idx) { } const int32_t *Operator::GetOutputShapeDims(uint32_t idx) { + MACE_ASSERT(idx < GetOutputSize()); + const int32_t *dims = NULL; model::OutputShape *output_shape = const_cast(op_context_->output_resize_shape(idx)); @@ -167,6 +181,8 @@ const int32_t *Operator::GetOutputShapeDims(uint32_t idx) { MaceStatus Operator::ResizeOutputShape(uint32_t idx, uint32_t dim_size, const int32_t *dims) { + MACE_ASSERT(idx < GetOutputSize()); + model::OutputShape *output_shape = const_cast(op_context_->output_resize_shape(idx)); #ifndef MACE_MICRO_NDEBUG @@ -202,12 +218,14 @@ MaceStatus Operator::ResizeOutputShape(uint32_t idx, uint32_t dim_size, } QuantizeInfo Operator::GetInputQuantizeInfo(uint32_t idx) { + MACE_ASSERT(idx < GetInputSize()); + + QuantizeInfo quantize_info = {0.0f, 0}; const OpIOInfo *input_info = op_context_->input_info(idx); const uint32_t op_def_idx = input_info->op_def_idx_; if (kIdxConstTensor == op_def_idx) { const model::ConstTensor *const_tensor = engine_config_->net_def_->tensor(input_info->output_idx_); - QuantizeInfo quantize_info; quantize_info.scale = const_tensor->scale(); quantize_info.zero = const_tensor->zero_point(); return quantize_info; @@ -218,14 +236,17 @@ QuantizeInfo Operator::GetInputQuantizeInfo(uint32_t idx) { engine_config_->net_def_->op(op_def_idx); model::QuantizeActivationInfo quantize_activation_info = pre_op_def->quantize_info(input_info->output_idx_); - QuantizeInfo quantize_info; quantize_info.scale = quantize_activation_info.scale(); quantize_info.zero = quantize_activation_info.zero_point(); return quantize_info; } + + return quantize_info; } QuantizeInfo Operator::GetOutputQuantizeInfo(uint32_t idx) { + MACE_ASSERT(idx < GetOutputSize()); + QuantizeInfo quantize_info; model::QuantizeActivationInfo quantize_activation_info = op_def_->quantize_info(idx); diff --git a/micro/framework/operator.h b/micro/framework/operator.h index f3c5878dd93f1309509fe9212c7f2365a20fbb36..a053f78f57d398a675d8364bca5a9a8cad35bba7 100644 --- a/micro/framework/operator.h +++ b/micro/framework/operator.h @@ -24,11 +24,6 @@ namespace micro { struct MaceMicroEngineConfig; -struct QuantizeInfo { - float scale; - float zero; -}; - namespace model { class Argument; class OperatorDef; @@ -90,7 +85,6 @@ class Operator { MaceStatus ReuseInputBufferForOutput(uint32_t output_idx, uint32_t input_idx); QuantizeInfo GetInputQuantizeInfo(uint32_t idx); - QuantizeInfo GetOutputQuantizeInfo(uint32_t idx); template diff --git a/micro/ops/CMakeLists.txt b/micro/ops/CMakeLists.txt index 6f938e09a46efac8aeabe8af7d554d178bc59d17..0825e180074738bf60949484c9aab46af21a57f1 100644 --- a/micro/ops/CMakeLists.txt +++ b/micro/ops/CMakeLists.txt @@ -36,8 +36,7 @@ add_library(micro_ops ${MICRO_OPS_SRCS} ) target_link_libraries(micro_ops - micro_base - micro_framework + PRIVATE micro_base ) diff --git a/micro/ops/eltwise.cc b/micro/ops/eltwise.cc index 98f3897ea96f2b2eaf989e529fa26b6c851dfbbe..975a60cecfc7825dcae055256fe2d2fbda64de57 100644 --- a/micro/ops/eltwise.cc +++ b/micro/ops/eltwise.cc @@ -19,14 +19,6 @@ namespace micro { namespace ops { namespace eltwise { -bool ShapeIsEqual(const int32_t *dims0, - const int32_t *dims1, uint32_t dim_size) { - while (--dim_size > 0) { - if (dims0[dim_size] != dims1[dim_size]) - return false; - } - return true; -} int32_t GetIndex(const int32_t *shape, const int32_t *index, int32_t dim_size) { diff --git a/micro/ops/eltwise.h b/micro/ops/eltwise.h index 263082cca9225438dcaac456a983a4a47510d512..fd08114206b0f4acf1676912a89a3caa8e1fe708 100644 --- a/micro/ops/eltwise.h +++ b/micro/ops/eltwise.h @@ -19,31 +19,13 @@ #include "micro/base/utils.h" #include "micro/framework/operator.h" #include "micro/framework/scratch_buffer.h" +#include "micro/base/types.h" namespace micro { namespace ops { -namespace eltwise { // for redefine -enum Type { - SUM = 0, - SUB = 1, - PROD = 2, - DIV = 3, - MIN = 4, - MAX = 5, - NEG = 6, - ABS = 7, - SQR_DIFF = 8, - POW = 9, - EQUAL = 10, - FLOOR_DIV = 11, - CLIP = 12, - SIGN = 13, - NONE = 14, -}; +namespace eltwise { -bool ShapeIsEqual(const int32_t *dims0, - const int32_t *dims1, uint32_t dim_size); int32_t GetIndex(const int32_t *shape, const int32_t *index, int32_t dim_size); void IncreaseIndex(const int32_t *shape, int32_t **index, int32_t dim_size); template @@ -202,9 +184,8 @@ class EltwiseOp : public framework::Operator { if (input1_size == 1) { TensorScalarEltwise(type_, input0_, input1_[0], input0_size, swapped, output_ptr); - } else if (eltwise::ShapeIsEqual(input0_dims_, - input1_shape, - input0_dim_size_)) { + } else if (base::ShapeIsEqual(input0_dims_, input1_shape, + input0_dim_size_)) { TensorEltwise(type_, input0_, input1_, input0_size, swapped, output_ptr); } else if (need_general_broadcast) { diff --git a/micro/ops/nhwc/CMakeLists.txt b/micro/ops/nhwc/CMakeLists.txt index 96a97d7ed7dc1e07b02047c6314f1c8f695336e3..ce890e12b0f665d3b0338f56b9747c4315ae5025 100644 --- a/micro/ops/nhwc/CMakeLists.txt +++ b/micro/ops/nhwc/CMakeLists.txt @@ -1,3 +1,3 @@ -if(MACE_MICRO_ENABLE_CMISI) +if(MACE_MICRO_ENABLE_CMSIS) add_subdirectory(cmsis_nn) endif() diff --git a/micro/ops/nhwc/cmsis_nn/CMakeLists.txt b/micro/ops/nhwc/cmsis_nn/CMakeLists.txt index caec93e3d35c4eadae4672f01cd89bcf57b34357..ca6db7303fdf0f4caf07f4717502be4e7f5fb1d7 100644 --- a/micro/ops/nhwc/cmsis_nn/CMakeLists.txt +++ b/micro/ops/nhwc/cmsis_nn/CMakeLists.txt @@ -1,17 +1,16 @@ -add_library(mace_micro_ops_nhwc_cmsis_nn +add_library(micro_ops_nhwc_cmsis_nn arm_conv_2d_int8.cc arm_pooling_int8.cc arm_softmax_int8.cc arm_mat_mul_int8.cc + arm_eltwise_int8.cc arm_depthwise_conv_2d_int8.cc dequantize.cc quantize.cc utilities.cc ) -target_link_libraries(mace_micro_ops_nhwc_cmsis_nn - PUBLIC micro_base - PUBLIC micro_framework - PUBLIC micro_ops +target_link_libraries(micro_ops_nhwc_cmsis_nn + PRIVATE micro_base PRIVATE CMSISNN ) diff --git a/micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.cc b/micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.cc index 081cc205d3ff75a4a561b0cfdee5d691f3c0770f..f886be8d7317b0f421f6db6d3e74991a07759c8b 100644 --- a/micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.cc +++ b/micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.cc @@ -47,12 +47,12 @@ MaceStatus ArmConv2dInt8Op::Compute(int32_t (&output_dims)[4]) { conv_params.output_offset = output_quantize_info.zero; conv_params.activation.min = -128; conv_params.activation.max = 127; - conv_params.stride.w = strides_[0]; - conv_params.stride.h = strides_[1]; - conv_params.padding.w = padding_sizes_[0] / 2; - conv_params.padding.h = padding_sizes_[1] / 2; - conv_params.dilation.w = dilations_[0]; - conv_params.dilation.h = dilations_[1]; + conv_params.stride.w = strides_[1]; + conv_params.stride.h = strides_[0]; + conv_params.padding.w = padding_sizes_[1] / 2; + conv_params.padding.h = padding_sizes_[0] / 2; + conv_params.dilation.w = dilations_[1]; + conv_params.dilation.h = dilations_[0]; ScratchBuffer scratch_buffer(engine_config_); @@ -64,6 +64,9 @@ MaceStatus ArmConv2dInt8Op::Compute(int32_t (&output_dims)[4]) { quant_params.shift[i] = shift; } + MACE_ASSERT(input_dims_[0] == 1); + MACE_ASSERT(dilations_[0] == 1 && dilations_[1] == 1); + cmsis_nn_dims input_dims; input_dims.n = input_dims_[0]; input_dims.h = input_dims_[1]; diff --git a/micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.cc b/micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.cc index db2e040534c28ce9de370eb2885157de3584ce96..e3746fe80d5cf8e9917eb41b262dc8ebceb83eb9 100644 --- a/micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.cc +++ b/micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.cc @@ -45,12 +45,12 @@ MaceStatus ArmDepthwiseConv2dInt8Op::Compute(int32_t (&output_dims)[4]) { dw_conv_params.output_offset = output_quantize_info.zero; dw_conv_params.activation.min = -128; dw_conv_params.activation.max = 127; - dw_conv_params.stride.w = strides_[0]; - dw_conv_params.stride.h = strides_[1]; - dw_conv_params.padding.w = padding_sizes_[0] / 2; - dw_conv_params.padding.h = padding_sizes_[1] / 2; - dw_conv_params.dilation.w = dilations_[0]; - dw_conv_params.dilation.h = dilations_[1]; + dw_conv_params.stride.w = strides_[1]; + dw_conv_params.stride.h = strides_[0]; + dw_conv_params.padding.w = padding_sizes_[1] / 2; + dw_conv_params.padding.h = padding_sizes_[0] / 2; + dw_conv_params.dilation.w = dilations_[1]; + dw_conv_params.dilation.h = dilations_[0]; ScratchBuffer scratch_buffer(engine_config_); @@ -62,6 +62,10 @@ MaceStatus ArmDepthwiseConv2dInt8Op::Compute(int32_t (&output_dims)[4]) { quant_params.shift[i] = shift; } + MACE_ASSERT(input_dims_[0] == 1); + MACE_ASSERT(filter_dims_[0] == 1); + MACE_ASSERT(dilations_[0] == 1 && dilations_[1] == 1); + cmsis_nn_dims input_dims; input_dims.n = input_dims_[0]; input_dims.h = input_dims_[1]; diff --git a/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.cc b/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.cc index 8d774274b371c14cd357e78ae819e5cbf985f059..6cb44e35a20410385c6f8143d22b84ba4f696063 100644 --- a/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.cc +++ b/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.cc @@ -17,6 +17,7 @@ #include #include "micro/base/logging.h" +#include "micro/base/types.h" #include "micro/base/utils.h" #include "micro/ops/nhwc/cmsis_nn/utilities.h" @@ -24,19 +25,15 @@ namespace micro { namespace ops { MaceStatus ArmEltwiseInt8Op::OnInit() { + MACE_ASSERT(GetInputSize() == 2); + input0_ = GetInputData(INPUT0); input0_dims_ = GetInputShapeDims(INPUT0); input0_dim_size_ = GetInputShapeDimSize(INPUT0); - if (GetInputSize() >= 2) { - input1_ = GetInputData(INPUT1); - input1_dims_ = GetInputShapeDims(INPUT1); - input1_dim_size_ = GetInputShapeDimSize(INPUT1); - } else { - input1_ = NULL; - input1_dims_ = NULL; - input1_dim_size_ = 0; - } + input1_ = GetInputData(INPUT1); + input1_dims_ = GetInputShapeDims(INPUT1); + input1_dim_size_ = GetInputShapeDimSize(INPUT1); output_ = GetOutputData(OUTPUT); @@ -48,11 +45,15 @@ MaceStatus ArmEltwiseInt8Op::OnInit() { } MaceStatus ArmEltwiseInt8Op::Run() { - MACE_ASSERT1(GetInputSize() < 3, - "Element-Wise does not support 3 or higher inputs," - " you could change your model to multiple Element-Wise"); + MACE_ASSERT1(GetInputSize() == 2, + "ArmEltwiseInt8Op only supports 2 inputs"); + MACE_ASSERT(input0_dim_size_ == input1_dim_size_); + MACE_ASSERT(base::ShapeIsEqual(input0_dims_, input1_dims_, input1_dim_size_)); + + MACE_RETURN_IF_ERROR( + ResizeOutputShape(OUTPUT, input0_dim_size_, input0_dims_)); - if (type_ == 0) { + if (type_ == eltwise::SUM) { QuantizeInfo input_quantize_info0 = GetInputQuantizeInfo(0); QuantizeInfo input_quantize_info1 = GetInputQuantizeInfo(1); QuantizeInfo output_quantize_info = GetOutputQuantizeInfo(OUTPUT); diff --git a/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h b/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h index 2f9eebd86bbdbc2c38490fb024b4572961416eb1..6e8a0aea8ff6e4e36eab4d19ef968330a67df029 100644 --- a/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h +++ b/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h @@ -17,6 +17,7 @@ #include "micro/base/logger.h" #include "micro/base/logging.h" +#include "micro/base/types.h" #include "micro/base/utils.h" #include "micro/framework/op_context.h" #include "micro/framework/operator.h" @@ -27,28 +28,6 @@ namespace micro { namespace ops { -namespace eltwise { // for redefine - -enum Type { - SUM = 0, - SUB = 1, - PROD = 2, - DIV = 3, - MIN = 4, - MAX = 5, - NEG = 6, - ABS = 7, - SQR_DIFF = 8, - POW = 9, - EQUAL = 10, - FLOOR_DIV = 11, - CLIP = 12, - SIGN = 13, - NONE = 14, -}; - -} // namespace eltwise - class ArmEltwiseInt8Op : public framework::Operator { public: MaceStatus OnInit(); diff --git a/micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.cc b/micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.cc index 8fa9799f21018553f99110a1a4c7c310582c881e..e2dd8fd2dacde6dea88bad682a14643c9521055f 100644 --- a/micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.cc +++ b/micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.cc @@ -61,6 +61,8 @@ MaceStatus ArmMatMulInt8Op::Run() { MACE_ASSERT(input_a_dim_size_ == 2); MACE_ASSERT(input_b_dim_size_ == 2); + MACE_ASSERT(input_a_dims_[0] == 1); + MACE_ASSERT(transpose_b_); MACE_ASSERT(!transpose_a_); @@ -111,9 +113,9 @@ MaceStatus ArmMatMulInt8Op::Run() { } arm_status status = arm_nn_vec_mat_mult_t_s8( - input_a_, input_b_, bias, output_, -input_quantize_info_a.zero, 0, - output_quantize_info.zero, multiplier, shift, rhs_cols, rhs_rows, -128, - 127); + input_a_, input_b_, bias, output_, -input_quantize_info_a.zero, + input_quantize_info_b.zero, output_quantize_info.zero, multiplier, shift, + rhs_cols, rhs_rows, -128, 127); MACE_ASSERT(status == ARM_MATH_SUCCESS); diff --git a/micro/ops/nhwc/cmsis_nn/arm_pooling_int8.cc b/micro/ops/nhwc/cmsis_nn/arm_pooling_int8.cc index c8db49aa69c50337818a2da040123051b7c51637..7e5851622f2abeddd14813601f5d84a416cd1ed6 100644 --- a/micro/ops/nhwc/cmsis_nn/arm_pooling_int8.cc +++ b/micro/ops/nhwc/cmsis_nn/arm_pooling_int8.cc @@ -19,8 +19,8 @@ #include "micro/base/logging.h" #include "micro/base/utils.h" #include "micro/framework/scratch_buffer.h" -#include "micro/ops/nhwc/cmsis_nn/utilities.h" #include "micro/include/utils/macros.h" +#include "micro/ops/nhwc/cmsis_nn/utilities.h" namespace micro { namespace ops { @@ -30,7 +30,6 @@ void ArmPoolingInt8Op::MaxPooling(const mifloat *input, const int32_t *stride_hw, const int32_t *dilation_hw, const int32_t *pad_hw) { - MACE_UNUSED(filter_hw); MACE_UNUSED(dilation_hw); cmsis_nn_context ctx; @@ -45,6 +44,8 @@ void ArmPoolingInt8Op::MaxPooling(const mifloat *input, pool_params.padding.h = pad_hw[0]; pool_params.padding.w = pad_hw[1]; + MACE_ASSERT(input_dims_[0] == 1); + cmsis_nn_dims input_dims; input_dims.n = input_dims_[0]; input_dims.h = input_dims_[1]; @@ -53,10 +54,8 @@ void ArmPoolingInt8Op::MaxPooling(const mifloat *input, const int8_t *input_data = reinterpret_cast(input); cmsis_nn_dims filter_dims; - filter_dims.n = filter_dims_[0]; - filter_dims.h = filter_dims_[1]; - filter_dims.w = filter_dims_[2]; - filter_dims.c = filter_dims_[3]; + filter_dims.h = filter_hw[0]; + filter_dims.w = filter_hw[1]; cmsis_nn_dims output_dims; output_dims.n = output_dims_[0]; @@ -74,7 +73,6 @@ void ArmPoolingInt8Op::AvgPooling(const mifloat *input, const int32_t *stride_hw, const int32_t *dilation_hw, const int32_t *pad_hw) { - MACE_UNUSED(filter_hw); MACE_UNUSED(dilation_hw); const int32_t out_width = output_dims_[2]; @@ -82,8 +80,12 @@ void ArmPoolingInt8Op::AvgPooling(const mifloat *input, cmsis_nn_context ctx; ctx.size = arm_avgpool_s8_get_buffer_size(out_width, in_channels); - MACE_ASSERT(ctx.size == 0); - ctx.buf = NULL; + ScratchBuffer scratch_buffer(engine_config_); + if (ctx.size > 0) { + ctx.buf = scratch_buffer.GetBuffer(ctx.size); + } else { + ctx.buf = NULL; + } cmsis_nn_pool_params pool_params; pool_params.activation.min = -128; @@ -93,6 +95,8 @@ void ArmPoolingInt8Op::AvgPooling(const mifloat *input, pool_params.padding.h = pad_hw[0]; pool_params.padding.w = pad_hw[1]; + MACE_ASSERT(input_dims_[0] == 1); + cmsis_nn_dims input_dims; input_dims.n = input_dims_[0]; input_dims.h = input_dims_[1]; @@ -101,10 +105,8 @@ void ArmPoolingInt8Op::AvgPooling(const mifloat *input, const int8_t *input_data = reinterpret_cast(input); cmsis_nn_dims filter_dims; - filter_dims.n = filter_dims_[0]; - filter_dims.h = filter_dims_[1]; - filter_dims.w = filter_dims_[2]; - filter_dims.c = filter_dims_[3]; + filter_dims.h = filter_hw[0]; + filter_dims.w = filter_hw[1]; cmsis_nn_dims output_dims; output_dims.n = output_dims_[0]; diff --git a/micro/ops/nhwc/cmsis_nn/arm_softmax_int8.cc b/micro/ops/nhwc/cmsis_nn/arm_softmax_int8.cc index cec332d6bc165199bddfc4096c65211e5e2b9edf..e1d44bf1cdeb0aba53d0e14bf8bf9f1707034179 100644 --- a/micro/ops/nhwc/cmsis_nn/arm_softmax_int8.cc +++ b/micro/ops/nhwc/cmsis_nn/arm_softmax_int8.cc @@ -35,6 +35,9 @@ MaceStatus ArmSoftmaxInt8Op::OnInit() { output_ = GetOutputData(OUTPUT); + bool use_log = GetArgByName("use_log", false); + MACE_ASSERT1(!use_log, "The argument \"use_log\" is unsupported"); + return MACE_SUCCESS; } diff --git a/micro/ops/nhwc/cmsis_nn/dequantize.cc b/micro/ops/nhwc/cmsis_nn/dequantize.cc index 6118c16c5e62b5744caa9fa4ee35f0eff261adc0..9e4be8bcd872b51f5611feb9c0f5a18e13969971 100644 --- a/micro/ops/nhwc/cmsis_nn/dequantize.cc +++ b/micro/ops/nhwc/cmsis_nn/dequantize.cc @@ -36,8 +36,6 @@ MaceStatus DequantizeOp::OnInit() { MaceStatus DequantizeOp::Run() { MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_)); - const micro::OpIOInfo *input_info = op_context_->input_info(INPUT); - QuantizeInfo input_quantize_info = GetInputQuantizeInfo(INPUT); float scale = input_quantize_info.scale; diff --git a/micro/ops/nhwc/pooling_ref.cc b/micro/ops/nhwc/pooling_ref.cc index 270a7c0f782e9faebbaa5347ca0221e50f266dca..c3f97694e3575f7b35907fc797d5b31a94866cf3 100644 --- a/micro/ops/nhwc/pooling_ref.cc +++ b/micro/ops/nhwc/pooling_ref.cc @@ -49,7 +49,7 @@ void PoolingRefOp::MaxPooling(const mifloat *input, } for (int32_t fh = 0; fh < filter_hw[0]; ++fh) { int32_t inh = inh_addr + dilation_hw[0] * fh; - if (inh < 0 && inh >= in_height) { + if (inh < 0 || inh >= in_height) { continue; } int32_t in_h_base = (in_b_base + inh) * in_width; diff --git a/micro/test/ccunit/CMakeLists.txt b/micro/test/ccunit/CMakeLists.txt index ce31634de464fbdd5834bb8a8ff7a9ebbd3bcb28..46a5eac21dfbf7b19cfca2dd7ad96abc5ca1d138 100644 --- a/micro/test/ccunit/CMakeLists.txt +++ b/micro/test/ccunit/CMakeLists.txt @@ -20,25 +20,21 @@ add_executable(micro_ops_test micro/ops/expand_dims_test.cc micro/ops/concat_test.cc ) + +if(MACE_MICRO_ENABLE_CMSIS) + target_link_libraries(micro_ops_test + PRIVATE micro_ops_nhwc_cmsis_nn + ) + target_compile_options(micro_ops_test + PRIVATE "-DMACE_MICRO_ENABLE_CMSIS=ON" + ) +endif() + target_link_libraries(micro_ops_test PRIVATE micro_base - PRIVATE micro_ops_for_test + PRIVATE micro_ops + PRIVATE micro_framework_for_optest PRIVATE micro_ccutils PRIVATE gtest PRIVATE gtest_main ) - -if(MICRO_MODEL_NAME) - add_executable(micro_cc_test - micro/model/net_def_test.cc - micro/framework/graph_test.cc - micro/codegen/engine_test.cc - ) - target_link_libraries(micro_cc_test - micro - models - gtest - gtest_main - ) - target_compile_definitions(micro_cc_test PRIVATE "-DMICRO_MODEL_NAME=${MICRO_MODEL_NAME}") -endif() diff --git a/micro/test/ccunit/micro/ops/eltwise_test.cc b/micro/test/ccunit/micro/ops/eltwise_test.cc index 4d0fe7914f3edb2a796bbeabbe549e8be2f5e5a0..49cf75236447c19cf55d4ff8895d06efca3ef29d 100644 --- a/micro/test/ccunit/micro/ops/eltwise_test.cc +++ b/micro/test/ccunit/micro/ops/eltwise_test.cc @@ -14,8 +14,10 @@ #include "gtest/gtest.h" #include "micro/ops/eltwise.h" +#include "micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h" #include "micro/ops/gtest_utils.h" #include "micro/ops/substitute_op.h" +#include "micro/ops/test_quantize_utils.h" #include "micro/ops/test_utils.h" namespace micro { @@ -494,6 +496,91 @@ TEST_F(EltwiseOpTest, TensorGeneralBroadcastCPU) { dims1121, output_9, expect_9, dims1123); } +#ifdef MACE_MICRO_ENABLE_CMSIS + +namespace { + +void TestEltwiseQuantInt8(const int32_t *input_dims, + const uint32_t input_dim_size, + eltwise::Type type) { + int32_t shape_size = base::GetShapeSize(input_dim_size, input_dims); + float *input0 = new float[shape_size]; + float *input1 = new float[shape_size]; + FillNormalRandomInput(input0, shape_size); + FillNormalRandomInput(input1, shape_size); + float *expect_output = new float[shape_size]; + const uint32_t MAX_OUTPUT_NUM = 10; + int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM]; + + EltwiseOp eltwsie_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input0, input_dims, input_dim_size) + .AddInput(input1, input_dims, input_dim_size) + .AddArg("type", static_cast(type)) + .AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM); + eltwsie_op.Init( + NULL, reinterpret_cast(&substitude_op), NULL); + eltwsie_op.Run(); + uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0); + + int8_t *input0_int8 = new int8_t[shape_size]; + int8_t *input1_int8 = new int8_t[shape_size]; + int8_t *output_int8 = new int8_t[shape_size]; + float *output = new float[shape_size]; + int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM]; + QuantizeInfo input_quant_info0; + QuantizeInfo input_quant_info1; + AutoQuantizeInt8(input0, shape_size, input0_int8, &input_quant_info0.scale, + &input_quant_info0.zero); + AutoQuantizeInt8(input1, shape_size, input1_int8, &input_quant_info1.scale, + &input_quant_info1.zero); + QuantizeInfo output_quant_info = {0.0f, 0}; + AdjustRangeInt8(expect_output, shape_size, &output_quant_info.scale, + &output_quant_info.zero); + + ArmEltwiseInt8Op eltwsie_op_int8; + framework::SubstituteOp substitude_op_int8; + substitude_op_int8 + .AddInput(input0_int8, input_dims, input_dim_size, input_quant_info0) + .AddInput(input1_int8, input_dims, input_dim_size, input_quant_info1) + .AddArg("type", static_cast(type)) + .AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info); + eltwsie_op_int8.Init( + NULL, reinterpret_cast(&substitude_op_int8), + NULL); + eltwsie_op_int8.Run(); + uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0); + + Dequantize(output_int8, shape_size, output_quant_info.scale, + output_quant_info.zero, output); + + ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size, + output, output_dims, output_dim_size, 0.1); + + delete[] input0; + delete[] input1; + delete[] expect_output; + delete[] expect_output_dims; + delete[] input0_int8; + delete[] input1_int8; + delete[] output_int8; + delete[] output; + delete[] output_dims; +} + +} // namespace + +TEST_F(EltwiseOpTest, QuantInt8) { + const int32_t input_dims0[4] = {1, 32, 32, 16}; + TestEltwiseQuantInt8(input_dims0, 4, eltwise::SUM); + const int32_t input_dims1[4] = {2, 31, 31, 17}; + TestEltwiseQuantInt8(input_dims1, 4, eltwise::SUM); + const int32_t input_dims2[2] = {1, 31}; + TestEltwiseQuantInt8(input_dims2, 2, eltwise::SUM); +} + +#endif + } // namespace test } // namespace ops } // namespace micro diff --git a/micro/test/ccunit/micro/ops/matmul_test.cc b/micro/test/ccunit/micro/ops/matmul_test.cc index 4661352a97fa0c96b9403cc9d56bdc34e17a6282..86a0a0592d7803c5f20bead5fe279332aca55c56 100644 --- a/micro/test/ccunit/micro/ops/matmul_test.cc +++ b/micro/test/ccunit/micro/ops/matmul_test.cc @@ -15,8 +15,10 @@ #include "gtest/gtest.h" #include "micro/ops/gtest_utils.h" #include "micro/ops/matmul.h" +#include "micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.h" #include "micro/ops/substitute_op.h" #include "micro/ops/test_utils.h" +#include "micro/ops/test_quantize_utils.h" namespace micro { namespace ops { @@ -94,6 +96,94 @@ TEST_F(MatMulOpTest, SimpleCPU) { Simple2(); } +#ifdef MACE_MICRO_ENABLE_CMSIS + +namespace { + +void TestMatMulQuantInt8(int32_t lhs_rows, int32_t lhs_cols, int32_t rhs_cols) { + uint32_t input0_size = lhs_rows * lhs_cols; + uint32_t input1_size = lhs_cols * rhs_cols; + uint32_t output_size = lhs_rows * rhs_cols; + float *input0 = new float[input0_size]; + float *input1 = new float[input1_size]; + FillNormalRandomInput(input0, input0_size); + FillNormalRandomInput(input1, input1_size); + float *expect_output = new float[output_size]; + const uint32_t MAX_OUTPUT_NUM = 10; + int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM]; + + const int32_t input0_dims[2] = {lhs_rows, lhs_cols}; + // mat0 * tranpose(mat1) + const int32_t input1_dims[2] = {rhs_cols, lhs_cols}; + + MatMulOp matmul_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input0, input0_dims, 2) + .AddInput(input1, input1_dims, 2) + .AddArg("transpose_a", false) + .AddArg("transpose_b", true) + .AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM); + matmul_op.Init(NULL, reinterpret_cast(&substitude_op), + NULL); + matmul_op.Run(); + uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0); + + int8_t *input0_int8 = new int8_t[input0_size]; + int8_t *input1_int8 = new int8_t[input1_size]; + int8_t *output_int8 = new int8_t[output_size]; + float *output = new float[output_size]; + int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM]; + QuantizeInfo input_quant_info0; + QuantizeInfo input_quant_info1; + AutoQuantizeInt8(input0, input0_size, input0_int8, &input_quant_info0.scale, + &input_quant_info0.zero); + AutoQuantizeInt8Symmetric(input1, input1_size, input1_int8, + &input_quant_info1.scale); + QuantizeInfo output_quant_info = {0.0f, 0}; + AdjustRangeInt8(expect_output, output_size, &output_quant_info.scale, + &output_quant_info.zero); + + ArmMatMulInt8Op matmul_op_int8; + framework::SubstituteOp substitude_op_int8; + substitude_op_int8.AddInput(input0_int8, input0_dims, 2, input_quant_info0) + .AddInput(input1_int8, input1_dims, 2, input_quant_info1) + .AddArg("transpose_a", false) + .AddArg("transpose_b", true) + .AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info); + matmul_op_int8.Init( + NULL, reinterpret_cast(&substitude_op_int8), + NULL); + matmul_op_int8.Run(); + uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0); + + Dequantize(output_int8, output_size, output_quant_info.scale, + output_quant_info.zero, output); + + ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size, + output, output_dims, output_dim_size, 0.1); + + delete[] input0; + delete[] input1; + delete[] expect_output; + delete[] expect_output_dims; + delete[] input0_int8; + delete[] input1_int8; + delete[] output_int8; + delete[] output; + delete[] output_dims; +} + +} // namespace + +TEST_F(MatMulOpTest, QuantInt8) { + TestMatMulQuantInt8(1, 8, 4); + TestMatMulQuantInt8(1, 1001, 63); + // WARNING(ZhangZhimin): Batch inputs is unsupported + // TestMatMulQuantInt8(3, 100, 100); +} + +#endif + } // namespace test } // namespace ops } // namespace micro diff --git a/micro/test/ccunit/micro/ops/nhwc/conv_2d_test.cc b/micro/test/ccunit/micro/ops/nhwc/conv_2d_test.cc index 067420dc0b81cb9649175597600a231bc3a39066..e26b8cae28bf30248f3462e71c78e052ba5c875f 100644 --- a/micro/test/ccunit/micro/ops/nhwc/conv_2d_test.cc +++ b/micro/test/ccunit/micro/ops/nhwc/conv_2d_test.cc @@ -15,8 +15,10 @@ #include "gtest/gtest.h" #include "micro/ops/gtest_utils.h" #include "micro/ops/nhwc/conv_2d_ref.h" +#include "micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.h" #include "micro/ops/substitute_op.h" #include "micro/ops/test_utils.h" +#include "micro/ops/test_quantize_utils.h" namespace micro { namespace ops { @@ -315,6 +317,141 @@ TEST_F(Conv2dOpTest, CPUConv1x1) { TestConv1x1(); } +#ifdef MACE_MICRO_ENABLE_CMSIS + +namespace { + +void TestConv2dQuantInt8(const int32_t batch, + const int32_t out_channels, + const int32_t in_channels, + const int32_t in_height, + const int32_t in_width, + const int32_t kernel_height, + const int32_t kernel_width, + enum Padding padding_type, + const int32_t stride_height, + const int32_t stride_width, + const int32_t dilation_height, + const int32_t dilation_width) { + uint32_t input0_size = batch * in_height * in_width * in_channels; + uint32_t input1_size = + out_channels * kernel_height * kernel_width * in_channels; + uint32_t max_output_size = batch * out_channels * + (in_height + kernel_height * dilation_height) * + (in_width + kernel_width * dilation_width); + int32_t bias_size = out_channels; + float *input0 = new float[input0_size]; + float *input1 = new float[input1_size]; + float *bias = new float[bias_size]; + FillNormalRandomInput(input0, input0_size); + FillNormalRandomInput(input1, input1_size); + FillNormalRandomInput(bias, bias_size); + float *expect_output = new float[max_output_size]; + const uint32_t MAX_OUTPUT_NUM = 10; + int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM]; + + const int32_t input0_dims[4] = {batch, in_height, in_width, in_channels}; + const int32_t input1_dims[4] = {out_channels, kernel_height, kernel_width, + in_channels}; + const int32_t bias_dims[1] = {bias_size}; + + const int32_t strides[2] = {stride_height, stride_width}; + const int32_t dilations[2] = {dilation_height, dilation_width}; + + Conv2dRefOp conv2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input0, input0_dims, 4) + .AddInput(input1, input1_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddArg("padding", padding_type) + .AddRepeatArg("strides", strides, 2) + .AddRepeatArg("dilations", dilations, 2) + .AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM); + conv2d_op.Init(NULL, reinterpret_cast(&substitude_op), + NULL); + conv2d_op.Run(); + uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0); + uint32_t exepct_output_size = + base::GetShapeSize(expect_output_dim_size, expect_output_dims); + + int8_t *input0_int8 = new int8_t[input0_size]; + int8_t *input1_int8 = new int8_t[input1_size]; + int32_t *bias_int32 = new int32_t[bias_size]; + int8_t *output_int8 = new int8_t[max_output_size]; + float *output = new float[max_output_size]; + int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM]; + QuantizeInfo input_quant_info0; + QuantizeInfo input_quant_info1; + AutoQuantizeInt8(input0, input0_size, input0_int8, &input_quant_info0.scale, + &input_quant_info0.zero); + AutoQuantizeInt8Symmetric(input1, input1_size, input1_int8, + &input_quant_info1.scale); + QuantizeInfo output_quant_info = {0.0f, 0}; + AdjustRangeInt8(expect_output, exepct_output_size, &output_quant_info.scale, + &output_quant_info.zero); + float bias_scale = input_quant_info0.scale * input_quant_info1.scale; + QuantizeWithScaleAndZeropoint(bias, bias_size, bias_scale, 0, bias_int32); + + ArmConv2dInt8Op conv2d_op_int8; + framework::SubstituteOp substitude_op_int8; + substitude_op_int8.AddInput(input0_int8, input0_dims, 4, input_quant_info0) + .AddInput(input1_int8, input1_dims, 4, input_quant_info1) + .AddInput(bias_int32, bias_dims, 1) + .AddArg("padding", padding_type) + .AddRepeatArg("strides", strides, 2) + .AddRepeatArg("dilations", dilations, 2) + .AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info); + conv2d_op_int8.Init( + NULL, reinterpret_cast(&substitude_op_int8), + NULL); + conv2d_op_int8.Run(); + uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0); + + uint32_t output_size = base::GetShapeSize(output_dim_size, output_dims); + Dequantize(output_int8, output_size, output_quant_info.scale, + output_quant_info.zero, output); + + ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size, + output, output_dims, output_dim_size, 0.1); + + delete[] input0; + delete[] input1; + delete[] bias; + delete[] expect_output; + delete[] expect_output_dims; + delete[] input0_int8; + delete[] input1_int8; + delete[] bias_int32; + delete[] output_int8; + delete[] output; + delete[] output_dims; +} + +} // namespace + +TEST_F(Conv2dOpTest, QuantInt8) { + TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, VALID, 1, 1, 1, 1); + TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 1, 1); + TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, FULL, 1, 1, 1, 1); + TestConv2dQuantInt8(1, 128, 64, 32, 54, 3, 3, FULL, 1, 1, 1, 1); + TestConv2dQuantInt8(1, 128, 512, 14, 13, 3, 3, SAME, 1, 1, 1, 1); + TestConv2dQuantInt8(1, 128, 64, 14, 13, 5, 5, SAME, 2, 2, 1, 1); + TestConv2dQuantInt8(1, 128, 257, 28, 28, 3, 3, SAME, 1, 1, 1, 1); + TestConv2dQuantInt8(1, 1, 128, 56, 56, 3, 3, SAME, 2, 2, 1, 1); + TestConv2dQuantInt8(1, 2, 1, 1000, 1000, 4, 3, FULL, 2, 1, 1, 1); + TestConv2dQuantInt8(1, 128, 1, 1000, 1000, 4, 3, FULL, 2, 3, 1, 1); + + // dilations is unsupported + // TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 2, 2); + // TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 2, 1); + + // batch must be 1 + // TestConv2dQuantInt8(2, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 1, 1); + // TestConv2dQuantInt8(4, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 1, 1); +} + +#endif + } // namespace test } // namespace ops } // namespace micro diff --git a/micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_test.cc b/micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_test.cc index 3583f4c4f128a7aee1f5db6f91aacb6f5b4a361c..7f62ffef6b3bc879da741d89a5b780551464540c 100644 --- a/micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_test.cc +++ b/micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_test.cc @@ -15,8 +15,10 @@ #include "gtest/gtest.h" #include "micro/ops/gtest_utils.h" #include "micro/ops/nhwc/depthwise_conv_2d_ref.h" +#include "micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.h" #include "micro/ops/substitute_op.h" #include "micro/ops/test_utils.h" +#include "micro/ops/test_quantize_utils.h" namespace micro { namespace ops { @@ -107,6 +109,146 @@ TEST_F(DepthwiseConv2dOpTest, MuiltiC2CPU) { MultiC2ValidTest(); } +#ifdef MACE_MICRO_ENABLE_CMSIS + +namespace { + +void TestDepthwiseConv2dQuantInt8(const int32_t batch, + const int32_t multiplier, + const int32_t in_channels, + const int32_t in_height, + const int32_t in_width, + const int32_t kernel_height, + const int32_t kernel_width, + enum Padding padding_type, + const int32_t stride_height, + const int32_t stride_width, + const int32_t dilation_height, + const int32_t dilation_width) { + uint32_t input0_size = batch * in_height * in_width * in_channels; + uint32_t input1_size = + multiplier * kernel_height * kernel_width * in_channels; + uint32_t max_output_size = batch * multiplier * in_channels * + (in_height + kernel_height * dilation_height) * + (in_width + kernel_width * dilation_width); + int32_t bias_size = multiplier * in_channels; + float *input0 = new float[input0_size]; + float *input1 = new float[input1_size]; + float *bias = new float[bias_size]; + FillNormalRandomInput(input0, input0_size); + FillNormalRandomInput(input1, input1_size); + FillNormalRandomInput(bias, bias_size); + float *expect_output = new float[max_output_size]; + const uint32_t MAX_OUTPUT_NUM = 10; + int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM]; + + const int32_t input0_dims[4] = {batch, in_height, in_width, in_channels}; + const int32_t input1_dims[4] = {multiplier, kernel_height, kernel_width, + in_channels}; + const int32_t bias_dims[1] = {bias_size}; + + const int32_t strides[2] = {stride_height, stride_width}; + const int32_t dilations[2] = {dilation_height, dilation_width}; + + DepthwiseConv2dRefOp depthwise_conv2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input0, input0_dims, 4) + .AddInput(input1, input1_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddArg("padding", padding_type) + .AddRepeatArg("strides", strides, 2) + .AddRepeatArg("dilations", dilations, 2) + .AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM); + depthwise_conv2d_op.Init( + NULL, reinterpret_cast(&substitude_op), NULL); + depthwise_conv2d_op.Run(); + uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0); + uint32_t exepct_output_size = + base::GetShapeSize(expect_output_dim_size, expect_output_dims); + + int8_t *input0_int8 = new int8_t[input0_size]; + int8_t *input1_int8 = new int8_t[input1_size]; + int32_t *bias_int32 = new int32_t[bias_size]; + int8_t *output_int8 = new int8_t[max_output_size]; + float *output = new float[max_output_size]; + int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM]; + QuantizeInfo input_quant_info0; + QuantizeInfo input_quant_info1; + AutoQuantizeInt8(input0, input0_size, input0_int8, &input_quant_info0.scale, + &input_quant_info0.zero); + AutoQuantizeInt8Symmetric(input1, input1_size, input1_int8, + &input_quant_info1.scale); + QuantizeInfo output_quant_info = {0.0f, 0}; + AdjustRangeInt8(expect_output, exepct_output_size, &output_quant_info.scale, + &output_quant_info.zero); + float bias_scale = input_quant_info0.scale * input_quant_info1.scale; + QuantizeWithScaleAndZeropoint(bias, bias_size, bias_scale, 0, bias_int32); + + ArmDepthwiseConv2dInt8Op depthwise_conv2d_op_int8; + framework::SubstituteOp substitude_op_int8; + substitude_op_int8.AddInput(input0_int8, input0_dims, 4, input_quant_info0) + .AddInput(input1_int8, input1_dims, 4, input_quant_info1) + .AddInput(bias_int32, bias_dims, 1) + .AddArg("padding", padding_type) + .AddRepeatArg("strides", strides, 2) + .AddRepeatArg("dilations", dilations, 2) + .AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info); + depthwise_conv2d_op_int8.Init( + NULL, reinterpret_cast(&substitude_op_int8), + NULL); + depthwise_conv2d_op_int8.Run(); + uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0); + + uint32_t output_size = base::GetShapeSize(output_dim_size, output_dims); + Dequantize(output_int8, output_size, output_quant_info.scale, + output_quant_info.zero, output); + + ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size, + output, output_dims, output_dim_size, 0.1); + + delete[] input0; + delete[] input1; + delete[] bias; + delete[] expect_output; + delete[] expect_output_dims; + delete[] input0_int8; + delete[] input1_int8; + delete[] bias_int32; + delete[] output_int8; + delete[] output; + delete[] output_dims; +} + +} // namespace + +TEST_F(DepthwiseConv2dOpTest, QuantInt8) { + TestDepthwiseConv2dQuantInt8(1, 1, 1024, 7, 7, 3, 3, VALID, 1, 1, 1, 1); + TestDepthwiseConv2dQuantInt8(1, 1, 1024, 7, 7, 3, 3, SAME, 1, 1, 1, 1); + TestDepthwiseConv2dQuantInt8(1, 1, 1024, 7, 7, 3, 3, FULL, 1, 1, 1, 1); + + TestDepthwiseConv2dQuantInt8(1, 1, 512, 14, 13, 3, 3, SAME, 1, 1, 1, 1); + TestDepthwiseConv2dQuantInt8(1, 1, 512, 14, 13, 5, 5, SAME, 2, 2, 1, 1); + TestDepthwiseConv2dQuantInt8(1, 1, 256, 28, 28, 3, 3, SAME, 1, 1, 1, 1); + TestDepthwiseConv2dQuantInt8(1, 1, 128, 56, 56, 3, 3, SAME, 2, 2, 1, 1); + + TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 2, 1, 1, 1); + TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 2, 3, 1, 1); + + // dilations is unsupported + // TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 3, 3, VALID, 1, 1, 2, 2); + // TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 1, 1, 3, 5); + // TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 1, 3, 3, 1); + + // batch must be 1 + // TestDepthwiseConv2dQuantInt8(3, 1, 128, 56, 56, 3, 3, SAME, 2, 2); + + // multiplier must be 1 + // TestDepthwiseConv2dQuantInt8(1, 2, 1024, 7, 7, 3, 3, SAME, 1, 1); + // TestDepthwiseConv2dQuantInt8(1, 2, 1024, 7, 7, 3, 3, SAME, 2, 2); +} + +#endif + } // namespace test } // namespace ops } // namespace micro diff --git a/micro/test/ccunit/micro/ops/nhwc/pooling_test.cc b/micro/test/ccunit/micro/ops/nhwc/pooling_test.cc index d7f7db329a8d98eafe7d65f383f0c72c2f6a3044..74e3f15e8e3fe461b86c6b0228ff1156e3402a80 100644 --- a/micro/test/ccunit/micro/ops/nhwc/pooling_test.cc +++ b/micro/test/ccunit/micro/ops/nhwc/pooling_test.cc @@ -16,7 +16,9 @@ #include "micro/ops/gtest_utils.h" #include "micro/ops/nhwc/pooling_ref.h" #include "micro/ops/nhwc/pooling_s4.h" +#include "micro/ops/nhwc/cmsis_nn/arm_pooling_int8.h" #include "micro/ops/substitute_op.h" +#include "micro/ops/test_quantize_utils.h" #include "micro/ops/test_utils.h" namespace micro { @@ -203,6 +205,134 @@ TEST_F(PoolingOpTest, TestPoolingOpSameAvg) { TestPoolingOpSameAvg(); } +#ifdef MACE_MICRO_ENABLE_CMSIS + +namespace { + +void TestPoolingQuantInt8(const int32_t *input_dims, + const uint32_t input_dim_size, + const int32_t *kernels, + const int32_t *strides, + Padding padding, + PoolingType pooling_type) { + int32_t input_size = base::GetShapeSize(input_dim_size, input_dims); + int32_t max_output_size = input_dims[0] * input_dims[3] * + (input_dims[1] + kernels[0]) * + (input_dims[2] + kernels[1]); + + float *input = new float[input_size]; + FillNormalRandomInput(input, input_size); + float *expect_output = new float[max_output_size]; + const uint32_t MAX_OUTPUT_DIM_SIZE = 100; + int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_DIM_SIZE]; + + const int32_t dilations[2] = {1, 1}; + + PoolingRefOp pooling_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, input_dim_size) + .AddRepeatArg("strides", strides, 2) + .AddRepeatArg("kernels", kernels, 2) + .AddRepeatArg("dilations", dilations, 2) + .AddArg("padding", padding) + .AddArg("pooling_type", pooling_type) + .AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_DIM_SIZE); + pooling_op.Init( + NULL, reinterpret_cast(&substitude_op), NULL); + pooling_op.Run(); + uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0); + + int8_t *input_int8 = new int8_t[input_size]; + int8_t *output_int8 = new int8_t[max_output_size]; + float *output = new float[max_output_size]; + int32_t *output_dims = new int32_t[MAX_OUTPUT_DIM_SIZE]; + QuantizeInfo input_quant_info; + AutoQuantizeInt8(input, input_size, input_int8, &input_quant_info.scale, + &input_quant_info.zero); + QuantizeInfo output_quant_info = input_quant_info; + + ArmPoolingInt8Op pooling_op_int8; + framework::SubstituteOp substitude_op_int8; + substitude_op_int8 + .AddInput(input_int8, input_dims, input_dim_size, input_quant_info) + .AddRepeatArg("strides", strides, 2) + .AddRepeatArg("kernels", kernels, 2) + .AddRepeatArg("dilations", dilations, 2) + .AddArg("padding", padding) + .AddArg("pooling_type", pooling_type) + .AddOutput(output_int8, output_dims, MAX_OUTPUT_DIM_SIZE, + output_quant_info); + pooling_op_int8.Init( + NULL, reinterpret_cast(&substitude_op_int8), + NULL); + pooling_op_int8.Run(); + uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0); + + uint32_t output_size = base::GetShapeSize(output_dim_size, output_dims); + Dequantize(output_int8, output_size, output_quant_info.scale, + output_quant_info.zero, output); + + ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size, + output, output_dims, output_dim_size, 0.1); + + delete[] input; + delete[] expect_output; + delete[] expect_output_dims; + delete[] input_int8; + delete[] output_int8; + delete[] output; + delete[] output_dims; +} + +} // namespace +TEST_F(PoolingOpTest, Quant) { + const int32_t input_dims0[4] = {1, 7, 7, 1024}; + const int32_t kernels0[2] = {7, 7}; + const int32_t strides0[2] = {1, 1}; + TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::VALID, + PoolingType::AVG); + TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::VALID, + PoolingType::MAX); + TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::FULL, + PoolingType::AVG); + TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::SAME, + PoolingType::MAX); + const int32_t input_dims1[4] = {1, 3, 3, 2}; + const int32_t kernels1[2] = {3, 3}; + const int32_t strides1[2] = {1, 1}; + TestPoolingQuantInt8(input_dims1, 4, kernels1, strides1, Padding::SAME, + PoolingType::AVG); + const int32_t input_dims2[4] = {1, 3, 3, 2}; + const int32_t kernels2[2] = {2, 3}; + const int32_t strides2[2] = {1, 2}; + TestPoolingQuantInt8(input_dims2, 4, kernels2, strides2, Padding::SAME, + PoolingType::MAX); + // WARNING(ZhangZhimin): Batch inputs is unsupported + // const int32_t input_dims3[4] = {3,15,15,128}; + // const int32_t kernels3[2] = {4, 4}; + // const int32_t strides3[2] = {4, 4}; + // TestPoolingQuantInt8(input_dims3, 4, kernels3, strides3, Padding::SAME, + // PoolingType::AVG); + // const int32_t input_dims4[4] = {3,15,15,128}; + // const int32_t kernels4[2] = {4, 4}; + // const int32_t strides4[2] = {4, 4}; + // TestPoolingQuantInt8(input_dims4, 4, kernels4, strides4, Padding::SAME, + // PoolingType::MAX); + const int32_t input_dims5[4] = {1, 31, 31, 127}; + const int32_t kernels5[2] = {2, 2}; + const int32_t strides5[2] = {3, 3}; + TestPoolingQuantInt8(input_dims5, 4, kernels5, strides5, Padding::SAME, + PoolingType::AVG); + const int32_t input_dims6[4] = {1, 31, 31, 127}; + const int32_t kernels6[2] = {2, 2}; + const int32_t strides6[2] = {3, 3}; + TestPoolingQuantInt8(input_dims6, 4, kernels6, strides6, Padding::SAME, + PoolingType::MAX); +} + +#endif + + } // namespace test } // namespace ops } // namespace micro diff --git a/micro/test/ccunit/micro/ops/softmax_test.cc b/micro/test/ccunit/micro/ops/softmax_test.cc index 0590256fddded792a04adf72b8c2f63ac4deb198..32facb83535e323cbcebdb56a0808e0a7085acae 100644 --- a/micro/test/ccunit/micro/ops/softmax_test.cc +++ b/micro/test/ccunit/micro/ops/softmax_test.cc @@ -13,9 +13,11 @@ // limitations under the License. #include "gtest/gtest.h" -#include "micro/ops/gtest_utils.h" #include "micro/ops/softmax.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/nhwc/cmsis_nn/arm_softmax_int8.h" #include "micro/ops/substitute_op.h" +#include "micro/ops/test_quantize_utils.h" #include "micro/ops/test_utils.h" namespace micro { @@ -49,15 +51,89 @@ void Simple(bool use_log = false) { &substitude_op), NULL); softmax_op.Run(); - ExpectTensorNear(output, output_dims, output_dim_size, - expect, expect_dims, output_dim_size, 1e-5); + ExpectTensorNear(output, output_dims, output_dim_size, expect, + expect_dims, output_dim_size, 1e-5); } } // namespace + TEST_F(SoftmaxOpTest, CPUSimple) { Simple(); } TEST_F(SoftmaxOpTest, CPUSimpleUseLog) { Simple(true); } +#ifdef MACE_MICRO_ENABLE_CMSIS + +namespace { + +void TestSoftmaxQuantInt8(const int32_t *input_dims, + const uint32_t input_dim_size, + bool use_log = false) { + int32_t shape_size = base::GetShapeSize(input_dim_size, input_dims); + float *input = new float[shape_size]; + FillNormalRandomInput(input, shape_size); + float *expect_output = new float[shape_size]; + const uint32_t MAX_OUTPUT_NUM = 10; + int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM]; + + SoftmaxOp softmax_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, input_dim_size) + .AddArg("use_log", static_cast(use_log)) + .AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM); + softmax_op.Init( + NULL, reinterpret_cast(&substitude_op), NULL); + softmax_op.Run(); + uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0); + + int8_t *input_int8 = new int8_t[shape_size]; + int8_t *output_int8 = new int8_t[shape_size]; + float *output = new float[shape_size]; + int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM]; + QuantizeInfo input_quant_info; + AutoQuantizeInt8(input, shape_size, input_int8, &input_quant_info.scale, + &input_quant_info.zero); + QuantizeInfo output_quant_info = {1.0f / 255.0f, -128}; + + ArmSoftmaxInt8Op softmax_op_int8; + framework::SubstituteOp substitude_op_int8; + substitude_op_int8 + .AddInput(input_int8, input_dims, input_dim_size, input_quant_info) + .AddArg("use_log", static_cast(use_log)) + .AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info); + softmax_op_int8.Init( + NULL, reinterpret_cast(&substitude_op_int8), + NULL); + softmax_op_int8.Run(); + uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0); + + Dequantize(output_int8, shape_size, output_quant_info.scale, + output_quant_info.zero, output); + + ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size, + output, output_dims, output_dim_size, 0.1); + + delete[] input; + delete[] expect_output; + delete[] expect_output_dims; + delete[] input_int8; + delete[] output_int8; + delete[] output; + delete[] output_dims; +} + +} // namespace + +TEST_F(SoftmaxOpTest, QuantInt8) { + const int32_t input_dims0[2] = {5, 10}; + TestSoftmaxQuantInt8(input_dims0, 2); + const int32_t input_dims1[2] = {50, 100}; + TestSoftmaxQuantInt8(input_dims1, 2); + const int32_t input_dims2[2] = {1, 31}; + TestSoftmaxQuantInt8(input_dims2, 2); +} + +#endif + } // namespace test } // namespace ops } // namespace micro diff --git a/micro/test/ccutils/CMakeLists.txt b/micro/test/ccutils/CMakeLists.txt index 8b60050d4a11c16fe6ef8c2e543150524a7c2408..aa9246cfc7688d1ca21753d829e0e1b1e73bc74f 100644 --- a/micro/test/ccutils/CMakeLists.txt +++ b/micro/test/ccutils/CMakeLists.txt @@ -7,6 +7,7 @@ add_library(micro_ccutils target_include_directories(micro_ccutils PUBLIC .) target_link_libraries(micro_ccutils micro_base micro_framework_for_optest) +target_compile_options(micro_ccutils PUBLIC "-std=c++11") if(HEXAGON_STUB) add_library(micro_rpc_stub diff --git a/micro/test/ccutils/micro/ops/operator.test.cc b/micro/test/ccutils/micro/ops/operator.test.cc index 578402b3973ae8b3fc95147dce4be67896af994d..267314940c44910fe9a88ccc70c8175491b8d774 100644 --- a/micro/test/ccutils/micro/ops/operator.test.cc +++ b/micro/test/ccutils/micro/ops/operator.test.cc @@ -105,6 +105,16 @@ MaceStatus Operator::ResizeOutputShape(uint32_t idx, uint32_t dim_size, return fake_op_->ResizeOutputShape(idx, dim_size, dims); } +QuantizeInfo Operator::GetInputQuantizeInfo(uint32_t idx) { + return fake_op_->GetInputQuantizeInfo(idx); +} + +QuantizeInfo Operator::GetOutputQuantizeInfo(uint32_t idx) { + return fake_op_->GetOutputQuantizeInfo(idx); +} + + + #ifndef MACE_DEFINE_GET_ARG_BY_NAME_FUNC #define MACE_DEFINE_GET_ARG_BY_NAME_FUNC(T, FUNC) \ template <> \ diff --git a/micro/test/ccutils/micro/ops/substitute_op.cc b/micro/test/ccutils/micro/ops/substitute_op.cc index f65c01ec9f160934b73c01c23de9790d8851d42c..4c8735d1a85d43d4bb214f63b1feedb2488b428d 100644 --- a/micro/test/ccutils/micro/ops/substitute_op.cc +++ b/micro/test/ccutils/micro/ops/substitute_op.cc @@ -24,26 +24,32 @@ namespace framework { SubstituteOp::SubstituteOp() : input_idx_(0), output_idx_(0), arg_idx_(0), repeat_arg_idx_(0) {} -SubstituteOp &SubstituteOp::AddInput( - const void *input, const int32_t *dims, const uint32_t dims_size) { +SubstituteOp &SubstituteOp::AddInput(const void *input, + const int32_t *dims, + const uint32_t dims_size, + QuantizeInfo quant_info) { MACE_ASSERT1(input != NULL || dims != NULL || dims_size == 0, "Invalid param"); MACE_ASSERT1(input_idx_ < kMaxInputNum, "Not enough mem."); inputs_[input_idx_] = input; input_dims_[input_idx_] = dims; input_dim_sizes_[input_idx_] = dims_size; + input_quant_info_[input_idx_] = quant_info; ++input_idx_; return *this; } -SubstituteOp &SubstituteOp::AddOutput( - void *output, int32_t *dims, const uint32_t dims_size) { +SubstituteOp &SubstituteOp::AddOutput(void *output, + int32_t *dims, + const uint32_t dims_size, + QuantizeInfo quant_info) { MACE_ASSERT1(output != NULL || dims != NULL || dims_size == 0, "Invalid param"); MACE_ASSERT1(output_idx_ < kMaxOutputNum, "Not enough mem."); outputs_[output_idx_] = output; output_dims_[output_idx_] = dims; output_dim_sizes_[output_idx_] = dims_size; + output_quant_info_[output_idx_] = quant_info; ++output_idx_; return *this; } @@ -86,6 +92,14 @@ const int32_t *SubstituteOp::GetOutputShapeDims(uint32_t idx) { return output_dims_[idx]; } +QuantizeInfo SubstituteOp::GetInputQuantizeInfo(uint32_t idx) { + return input_quant_info_[idx]; +} + +QuantizeInfo SubstituteOp::GetOutputQuantizeInfo(uint32_t idx) { + return output_quant_info_[idx]; +} + MaceStatus SubstituteOp::ResizeOutputShape(uint32_t idx, uint32_t input_dim_size, const int32_t *input_dims) { diff --git a/micro/test/ccutils/micro/ops/substitute_op.h b/micro/test/ccutils/micro/ops/substitute_op.h index 0f5e60d471fb7a6c07bdb31d33d5d03b71ccba56..4b822d7a6b03f0ca90782bbaecc38170b3d42445 100644 --- a/micro/test/ccutils/micro/ops/substitute_op.h +++ b/micro/test/ccutils/micro/ops/substitute_op.h @@ -16,6 +16,7 @@ #define MICRO_TEST_CCUTILS_MICRO_OPS_SUBSTITUTE_OP_H_ #include "micro/base/logging.h" +#include "micro/base/types.h" #include "micro/base/utils.h" #include "micro/include/public/micro.h" @@ -43,9 +44,13 @@ class SubstituteOp { ~SubstituteOp() {} SubstituteOp &AddInput(const void *input, - const int32_t *dims, const uint32_t dims_size); + const int32_t *dims, + const uint32_t dims_size, + QuantizeInfo quant_info = QuantizeInfo{0.0f, 0}); SubstituteOp &AddOutput(void *output, - int32_t *dims, const uint32_t dims_size); + int32_t *dims, + const uint32_t dims_size, + QuantizeInfo quant_info = QuantizeInfo{0.0f, 0}); template SubstituteOp &AddArg(const char *name, T value) { @@ -106,6 +111,9 @@ class SubstituteOp { const int32_t *input_dims); MaceStatus ReuseInputBufferForOutput(uint32_t output_idx, uint32_t input_idx); + QuantizeInfo GetInputQuantizeInfo(uint32_t idx); + QuantizeInfo GetOutputQuantizeInfo(uint32_t idx); + template const T *GetInputData(uint32_t idx) { return static_cast(DoGetInputData(idx)); @@ -120,11 +128,13 @@ class SubstituteOp { const void *inputs_[kMaxInputNum]; const int32_t *input_dims_[kMaxInputNum]; uint32_t input_dim_sizes_[kMaxInputNum]; + QuantizeInfo input_quant_info_[kMaxInputNum]; uint32_t input_idx_; void *outputs_[kMaxOutputNum]; int32_t *output_dims_[kMaxOutputNum]; uint32_t output_dim_sizes_[kMaxOutputNum]; + QuantizeInfo output_quant_info_[kMaxOutputNum]; uint32_t output_idx_; // for arg diff --git a/micro/test/ccutils/micro/ops/test_quantize_utils.h b/micro/test/ccutils/micro/ops/test_quantize_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..d15792cf45416826168b3764cc3a9720a1e55c9f --- /dev/null +++ b/micro/test/ccutils/micro/ops/test_quantize_utils.h @@ -0,0 +1,129 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_TEST_CCUTILS_MICRO_OPS_TEST_QUANTIZE_UTILS_H_ +#define MICRO_TEST_CCUTILS_MICRO_OPS_TEST_QUANTIZE_UTILS_H_ + +#include +#include + +#include + +#include "micro/base/logging.h" +#include "micro/common/global_buffer.h" +#include "micro/include/public/micro.h" +#include "micro/port/api.h" + +namespace micro { +namespace ops { +namespace test { + +template +inline Q Saturate(float value) { + int rounded_value = static_cast(value); + if (rounded_value <= std::numeric_limits::lowest()) { + return std::numeric_limits::lowest(); + } else if (rounded_value >= std::numeric_limits::max()) { + return std::numeric_limits::max(); + } else { + return static_cast(rounded_value); + } +} + +inline void FindMinMax(const float *input, + const uint32_t size, + float *min_val, + float *max_val) { + float max_v = base::lowest(); + float min_v = base::highest(); + for (uint32_t i = 0; i < size; ++i) { + max_v = base::max(max_v, input[i]); + min_v = base::min(min_v, input[i]); + } + *min_val = min_v; + *max_val = max_v; +} + +template +inline void QuantizeWithScaleAndZeropoint(const float *input, + const uint32_t size, + float scale, + int32_t zero_point, + Q *output) { + float recip_scale = 1 / scale; + for (uint32_t i = 0; i < size; ++i) { + output[i] = Saturate(roundf(zero_point + recip_scale * input[i])); + } +} + +inline void AdjustRangeInt8(const float *input, + const uint32_t size, + float *scale, + int32_t *zero_point) { + float in_min_data; + float in_max_data; + FindMinMax(input, size, &in_min_data, &in_max_data); + in_max_data = base::max(0.f, in_max_data); + in_min_data = base::min(0.f, in_min_data); + + *scale = (in_max_data - in_min_data) / 255; + *zero_point = int8_t(-in_min_data / *scale - 128); +} + +inline void AdjustRangeInt8Symmetric(const float *input, + const uint32_t size, + float *scale) { + float in_min_data; + float in_max_data; + FindMinMax(input, size, &in_min_data, &in_max_data); + in_max_data = base::max(0.f, in_max_data); + in_min_data = base::min(0.f, in_min_data); + + float max_abs = base::max(base::abs(in_max_data), base::abs(in_min_data)); + + *scale = max_abs / 127.0f; +} + +inline void AutoQuantizeInt8(const float *input, + const uint32_t size, + int8_t *output, + float *scale, + int32_t *zero_point) { + AdjustRangeInt8(input, size, scale, zero_point); + QuantizeWithScaleAndZeropoint(input, size, *scale, *zero_point, output); +} + +inline void AutoQuantizeInt8Symmetric(const float *input, + const uint32_t size, + int8_t *output, + float *scale) { + AdjustRangeInt8Symmetric(input, size, scale); + QuantizeWithScaleAndZeropoint(input, size, *scale, 0, output); +} + +inline void Dequantize(const int8_t *input, + const uint32_t size, + const float scale, + const int32_t zero_point, + float *output) { + for (uint32_t i = 0; i < size; ++i) { + output[i] = static_cast(scale * (input[i] - zero_point)); + } +} + +} // namespace test +} // namespace ops +} // namespace micro + +#endif // MICRO_TEST_CCUTILS_MICRO_OPS_TEST_QUANTIZE_UTILS_H_ diff --git a/micro/test/ccutils/micro/ops/test_utils.cc b/micro/test/ccutils/micro/ops/test_utils.cc index 7cbe5163e5383e1bcb0da3be9784991c66846d3d..bb6cd0f3edd287f2540ca3197427a727a9007f4c 100644 --- a/micro/test/ccutils/micro/ops/test_utils.cc +++ b/micro/test/ccutils/micro/ops/test_utils.cc @@ -15,6 +15,8 @@ #include "micro/ops/test_utils.h" +#include + namespace micro { namespace ops { namespace test { @@ -67,6 +69,30 @@ void FillRandomInput(void *input, const int32_t shape_size) { } } +void FillUniformRandomInput(float *input, + const int32_t shape_size, + float low, + float up) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dis(low, up); + for (int n = 0; n < shape_size; ++n) { + input[n] = dis(gen); + } +} + +void FillNormalRandomInput(float *input, + const int32_t shape_size, + float mean, + float std) { + std::random_device rd; + std::mt19937 gen(rd()); + std::normal_distribution dis(mean, std); + for (int n = 0; n < shape_size; ++n) { + input[n] = dis(gen); + } +} + } // namespace test } // namespace ops } // namespace micro diff --git a/micro/test/ccutils/micro/ops/test_utils.h b/micro/test/ccutils/micro/ops/test_utils.h index fc64e0b7c33dbe10d1b52afd9c6eb7c737d9326b..91c29025997bfe6107154d18e52a02736a256fe0 100644 --- a/micro/test/ccutils/micro/ops/test_utils.h +++ b/micro/test/ccutils/micro/ops/test_utils.h @@ -38,6 +38,16 @@ T *input = common::test::GetGlobalBuffer()->GetBuffer(shape_size); \ micro::ops::test::FillRandomInput(input, shape_size * sizeof(T)) #endif +void FillUniformRandomInput(float *input, + const int32_t shape_size, + float low = -50.0f, + float up = 50.0f); + +void FillNormalRandomInput(float *input, + const int32_t shape_size, + float mean = 0.0f, + float std = 1.0f); + } // namespace test } // namespace ops } // namespace micro diff --git a/tools/cpplint.sh b/tools/cpplint.sh index a9ba8fcda4bbc6f22ff86f2b13dfd3c26a1b2e30..07555867d0f96225cbf6c81fe9d80b60136428d4 100755 --- a/tools/cpplint.sh +++ b/tools/cpplint.sh @@ -14,6 +14,6 @@ cpplint --linelength=80 --counting=detailed $(find micro/include -name "*.h" -o cpplint --linelength=80 --counting=detailed $(find micro/model -name "*.h" -or -name "*.cc") cpplint --linelength=80 --counting=detailed --filter=-build/include_what_you_use $(find micro/ops -name "*.h" -or -name "*.cc") cpplint --linelength=80 --counting=detailed $(find micro/port -name "*.h" -or -name "*.cc") -cpplint --linelength=80 --counting=detailed $(find micro/test \( -path micro/test/ccbenchmark/codegen -or -path micro/test/ccbaseline/codegen \) -prune -o -name "*.h" -or -name "*.cc") +cpplint --linelength=80 --counting=detailed --filter=-build/include_what_you_use $(find micro/test \( -path micro/test/ccbenchmark/codegen -or -path micro/test/ccbaseline/codegen \) -prune -o -name "*.h" -or -name "*.cc") cpplint --linelength=80 --counting=detailed $(find micro/tools -name "*.h" -or -name "*.cc") cpplint --linelength=80 --counting=detailed --filter=-build/include_subdir $(find micro/examples \( -path micro/examples/classifier/mbed-os -or -path micro/examples/classifier/data -or -path micro/examples/classifier/install -or -path micro/examples/classifier/BUILD \) -prune -name "*.cc" -or -name "*.h")