提交 cd839dfc 编写于 作者: Z Zhang Zhimin

fix: Add micro arm int8 uts

上级 6352c291
......@@ -52,6 +52,35 @@ MACE_MAPPING_DATA_TYPE_AND_ENUM(int32_t, DT_INT32);
MACE_MAPPING_DATA_TYPE_AND_ENUM(BFloat16, DT_BFLOAT16);
#endif
struct QuantizeInfo {
float scale;
int32_t zero;
};
namespace ops {
namespace eltwise { // for redefine
enum Type {
SUM = 0,
SUB = 1,
PROD = 2,
DIV = 3,
MIN = 4,
MAX = 5,
NEG = 6,
ABS = 7,
SQR_DIFF = 8,
POW = 9,
EQUAL = 10,
FLOOR_DIV = 11,
CLIP = 12,
SIGN = 13,
NONE = 14,
};
} // namespace eltwise
} // namespace ops
} // namespace micro
#endif // MICRO_BASE_TYPES_H_
......@@ -116,5 +116,14 @@ const T &min(const T &a, const T &b) {
return (a < b) ? a : b;
}
bool ShapeIsEqual(const int32_t *dims0,
const int32_t *dims1, uint32_t dim_size) {
while (dim_size-- > 0) {
if (dims0[dim_size] != dims1[dim_size])
return false;
}
return true;
}
} // namespace base
} // namespace micro
......@@ -26,6 +26,8 @@ uint32_t strlen(const char *str);
int32_t strcmp(const char *str1, const char *str2);
void memcpy(void *dst, const void *src, uint32_t bytes);
int32_t GetShapeSize(uint32_t dim_size, const int32_t *dims);
bool ShapeIsEqual(const int32_t *dims0,
const int32_t *dims1, uint32_t dim_size);
float sqrt(float x);
int32_t ceil(float f);
int32_t floor(float f);
......
......@@ -74,6 +74,8 @@ uint32_t Operator::GetInputSize() {
}
const void *Operator::DoGetInputData(uint32_t idx) {
MACE_ASSERT(idx < GetInputSize());
const void *data = NULL;
const OpIOInfo *input_info = op_context_->input_info(idx);
const uint32_t op_def_idx = input_info->op_def_idx_;
......@@ -94,6 +96,8 @@ const void *Operator::DoGetInputData(uint32_t idx) {
}
uint32_t Operator::GetInputShapeDimSize(uint32_t idx) {
MACE_ASSERT(idx < GetInputSize());
uint32_t dim_size = 0;
const OpIOInfo *input_info = op_context_->input_info(idx);
const uint32_t op_def_idx = input_info->op_def_idx_;
......@@ -115,6 +119,8 @@ uint32_t Operator::GetInputShapeDimSize(uint32_t idx) {
}
const int32_t *Operator::GetInputShapeDims(uint32_t idx) {
MACE_ASSERT(idx < GetInputSize());
const int32_t *dims = NULL;
const OpIOInfo *input_info = op_context_->input_info(idx);
const uint32_t op_def_idx = input_info->op_def_idx_;
......@@ -138,14 +144,20 @@ uint32_t Operator::GetOutputSize() {
}
DataType Operator::GetOutputDataType(uint32_t idx) {
MACE_ASSERT(idx < GetOutputSize());
return op_def_->output_type(idx);
}
void *Operator::DoGetOutputData(uint32_t idx) {
MACE_ASSERT(idx < GetOutputSize());
return engine_config_->tensor_mem_ + op_def_->mem_offset(idx);
}
uint32_t Operator::GetOutputShapeDimSize(uint32_t idx) {
MACE_ASSERT(idx < GetOutputSize());
uint32_t dim_size = 0;
model::OutputShape *output_shape =
const_cast<model::OutputShape *>(op_context_->output_resize_shape(idx));
......@@ -156,6 +168,8 @@ uint32_t Operator::GetOutputShapeDimSize(uint32_t idx) {
}
const int32_t *Operator::GetOutputShapeDims(uint32_t idx) {
MACE_ASSERT(idx < GetOutputSize());
const int32_t *dims = NULL;
model::OutputShape *output_shape =
const_cast<model::OutputShape *>(op_context_->output_resize_shape(idx));
......@@ -167,6 +181,8 @@ const int32_t *Operator::GetOutputShapeDims(uint32_t idx) {
MaceStatus Operator::ResizeOutputShape(uint32_t idx, uint32_t dim_size,
const int32_t *dims) {
MACE_ASSERT(idx < GetOutputSize());
model::OutputShape *output_shape =
const_cast<model::OutputShape *>(op_context_->output_resize_shape(idx));
#ifndef MACE_MICRO_NDEBUG
......@@ -202,12 +218,14 @@ MaceStatus Operator::ResizeOutputShape(uint32_t idx, uint32_t dim_size,
}
QuantizeInfo Operator::GetInputQuantizeInfo(uint32_t idx) {
MACE_ASSERT(idx < GetInputSize());
QuantizeInfo quantize_info = {0.0f, 0};
const OpIOInfo *input_info = op_context_->input_info(idx);
const uint32_t op_def_idx = input_info->op_def_idx_;
if (kIdxConstTensor == op_def_idx) {
const model::ConstTensor *const_tensor =
engine_config_->net_def_->tensor(input_info->output_idx_);
QuantizeInfo quantize_info;
quantize_info.scale = const_tensor->scale();
quantize_info.zero = const_tensor->zero_point();
return quantize_info;
......@@ -218,14 +236,17 @@ QuantizeInfo Operator::GetInputQuantizeInfo(uint32_t idx) {
engine_config_->net_def_->op(op_def_idx);
model::QuantizeActivationInfo quantize_activation_info =
pre_op_def->quantize_info(input_info->output_idx_);
QuantizeInfo quantize_info;
quantize_info.scale = quantize_activation_info.scale();
quantize_info.zero = quantize_activation_info.zero_point();
return quantize_info;
}
return quantize_info;
}
QuantizeInfo Operator::GetOutputQuantizeInfo(uint32_t idx) {
MACE_ASSERT(idx < GetOutputSize());
QuantizeInfo quantize_info;
model::QuantizeActivationInfo quantize_activation_info =
op_def_->quantize_info(idx);
......
......@@ -24,11 +24,6 @@ namespace micro {
struct MaceMicroEngineConfig;
struct QuantizeInfo {
float scale;
float zero;
};
namespace model {
class Argument;
class OperatorDef;
......@@ -90,7 +85,6 @@ class Operator {
MaceStatus ReuseInputBufferForOutput(uint32_t output_idx, uint32_t input_idx);
QuantizeInfo GetInputQuantizeInfo(uint32_t idx);
QuantizeInfo GetOutputQuantizeInfo(uint32_t idx);
template<typename T>
......
......@@ -36,8 +36,7 @@ add_library(micro_ops
${MICRO_OPS_SRCS}
)
target_link_libraries(micro_ops
micro_base
micro_framework
PRIVATE micro_base
)
......
......@@ -19,14 +19,6 @@
namespace micro {
namespace ops {
namespace eltwise {
bool ShapeIsEqual(const int32_t *dims0,
const int32_t *dims1, uint32_t dim_size) {
while (--dim_size > 0) {
if (dims0[dim_size] != dims1[dim_size])
return false;
}
return true;
}
int32_t GetIndex(const int32_t *shape,
const int32_t *index, int32_t dim_size) {
......
......@@ -19,31 +19,13 @@
#include "micro/base/utils.h"
#include "micro/framework/operator.h"
#include "micro/framework/scratch_buffer.h"
#include "micro/base/types.h"
namespace micro {
namespace ops {
namespace eltwise { // for redefine
enum Type {
SUM = 0,
SUB = 1,
PROD = 2,
DIV = 3,
MIN = 4,
MAX = 5,
NEG = 6,
ABS = 7,
SQR_DIFF = 8,
POW = 9,
EQUAL = 10,
FLOOR_DIV = 11,
CLIP = 12,
SIGN = 13,
NONE = 14,
};
namespace eltwise {
bool ShapeIsEqual(const int32_t *dims0,
const int32_t *dims1, uint32_t dim_size);
int32_t GetIndex(const int32_t *shape, const int32_t *index, int32_t dim_size);
void IncreaseIndex(const int32_t *shape, int32_t **index, int32_t dim_size);
template<typename T>
......@@ -202,9 +184,8 @@ class EltwiseOp : public framework::Operator {
if (input1_size == 1) {
TensorScalarEltwise(type_, input0_, input1_[0],
input0_size, swapped, output_ptr);
} else if (eltwise::ShapeIsEqual(input0_dims_,
input1_shape,
input0_dim_size_)) {
} else if (base::ShapeIsEqual(input0_dims_, input1_shape,
input0_dim_size_)) {
TensorEltwise(type_, input0_, input1_, input0_size,
swapped, output_ptr);
} else if (need_general_broadcast) {
......
if(MACE_MICRO_ENABLE_CMISI)
if(MACE_MICRO_ENABLE_CMSIS)
add_subdirectory(cmsis_nn)
endif()
add_library(mace_micro_ops_nhwc_cmsis_nn
add_library(micro_ops_nhwc_cmsis_nn
arm_conv_2d_int8.cc
arm_pooling_int8.cc
arm_softmax_int8.cc
arm_mat_mul_int8.cc
arm_eltwise_int8.cc
arm_depthwise_conv_2d_int8.cc
dequantize.cc
quantize.cc
utilities.cc
)
target_link_libraries(mace_micro_ops_nhwc_cmsis_nn
PUBLIC micro_base
PUBLIC micro_framework
PUBLIC micro_ops
target_link_libraries(micro_ops_nhwc_cmsis_nn
PRIVATE micro_base
PRIVATE CMSISNN
)
......@@ -47,12 +47,12 @@ MaceStatus ArmConv2dInt8Op::Compute(int32_t (&output_dims)[4]) {
conv_params.output_offset = output_quantize_info.zero;
conv_params.activation.min = -128;
conv_params.activation.max = 127;
conv_params.stride.w = strides_[0];
conv_params.stride.h = strides_[1];
conv_params.padding.w = padding_sizes_[0] / 2;
conv_params.padding.h = padding_sizes_[1] / 2;
conv_params.dilation.w = dilations_[0];
conv_params.dilation.h = dilations_[1];
conv_params.stride.w = strides_[1];
conv_params.stride.h = strides_[0];
conv_params.padding.w = padding_sizes_[1] / 2;
conv_params.padding.h = padding_sizes_[0] / 2;
conv_params.dilation.w = dilations_[1];
conv_params.dilation.h = dilations_[0];
ScratchBuffer scratch_buffer(engine_config_);
......@@ -64,6 +64,9 @@ MaceStatus ArmConv2dInt8Op::Compute(int32_t (&output_dims)[4]) {
quant_params.shift[i] = shift;
}
MACE_ASSERT(input_dims_[0] == 1);
MACE_ASSERT(dilations_[0] == 1 && dilations_[1] == 1);
cmsis_nn_dims input_dims;
input_dims.n = input_dims_[0];
input_dims.h = input_dims_[1];
......
......@@ -45,12 +45,12 @@ MaceStatus ArmDepthwiseConv2dInt8Op::Compute(int32_t (&output_dims)[4]) {
dw_conv_params.output_offset = output_quantize_info.zero;
dw_conv_params.activation.min = -128;
dw_conv_params.activation.max = 127;
dw_conv_params.stride.w = strides_[0];
dw_conv_params.stride.h = strides_[1];
dw_conv_params.padding.w = padding_sizes_[0] / 2;
dw_conv_params.padding.h = padding_sizes_[1] / 2;
dw_conv_params.dilation.w = dilations_[0];
dw_conv_params.dilation.h = dilations_[1];
dw_conv_params.stride.w = strides_[1];
dw_conv_params.stride.h = strides_[0];
dw_conv_params.padding.w = padding_sizes_[1] / 2;
dw_conv_params.padding.h = padding_sizes_[0] / 2;
dw_conv_params.dilation.w = dilations_[1];
dw_conv_params.dilation.h = dilations_[0];
ScratchBuffer scratch_buffer(engine_config_);
......@@ -62,6 +62,10 @@ MaceStatus ArmDepthwiseConv2dInt8Op::Compute(int32_t (&output_dims)[4]) {
quant_params.shift[i] = shift;
}
MACE_ASSERT(input_dims_[0] == 1);
MACE_ASSERT(filter_dims_[0] == 1);
MACE_ASSERT(dilations_[0] == 1 && dilations_[1] == 1);
cmsis_nn_dims input_dims;
input_dims.n = input_dims_[0];
input_dims.h = input_dims_[1];
......
......@@ -17,6 +17,7 @@
#include <arm_nnfunctions.h>
#include "micro/base/logging.h"
#include "micro/base/types.h"
#include "micro/base/utils.h"
#include "micro/ops/nhwc/cmsis_nn/utilities.h"
......@@ -24,19 +25,15 @@ namespace micro {
namespace ops {
MaceStatus ArmEltwiseInt8Op::OnInit() {
MACE_ASSERT(GetInputSize() == 2);
input0_ = GetInputData<int8_t>(INPUT0);
input0_dims_ = GetInputShapeDims(INPUT0);
input0_dim_size_ = GetInputShapeDimSize(INPUT0);
if (GetInputSize() >= 2) {
input1_ = GetInputData<int8_t>(INPUT1);
input1_dims_ = GetInputShapeDims(INPUT1);
input1_dim_size_ = GetInputShapeDimSize(INPUT1);
} else {
input1_ = NULL;
input1_dims_ = NULL;
input1_dim_size_ = 0;
}
input1_ = GetInputData<int8_t>(INPUT1);
input1_dims_ = GetInputShapeDims(INPUT1);
input1_dim_size_ = GetInputShapeDimSize(INPUT1);
output_ = GetOutputData<int8_t>(OUTPUT);
......@@ -48,11 +45,15 @@ MaceStatus ArmEltwiseInt8Op::OnInit() {
}
MaceStatus ArmEltwiseInt8Op::Run() {
MACE_ASSERT1(GetInputSize() < 3,
"Element-Wise does not support 3 or higher inputs,"
" you could change your model to multiple Element-Wise");
MACE_ASSERT1(GetInputSize() == 2,
"ArmEltwiseInt8Op only supports 2 inputs");
MACE_ASSERT(input0_dim_size_ == input1_dim_size_);
MACE_ASSERT(base::ShapeIsEqual(input0_dims_, input1_dims_, input1_dim_size_));
MACE_RETURN_IF_ERROR(
ResizeOutputShape(OUTPUT, input0_dim_size_, input0_dims_));
if (type_ == 0) {
if (type_ == eltwise::SUM) {
QuantizeInfo input_quantize_info0 = GetInputQuantizeInfo(0);
QuantizeInfo input_quantize_info1 = GetInputQuantizeInfo(1);
QuantizeInfo output_quantize_info = GetOutputQuantizeInfo(OUTPUT);
......
......@@ -17,6 +17,7 @@
#include "micro/base/logger.h"
#include "micro/base/logging.h"
#include "micro/base/types.h"
#include "micro/base/utils.h"
#include "micro/framework/op_context.h"
#include "micro/framework/operator.h"
......@@ -27,28 +28,6 @@
namespace micro {
namespace ops {
namespace eltwise { // for redefine
enum Type {
SUM = 0,
SUB = 1,
PROD = 2,
DIV = 3,
MIN = 4,
MAX = 5,
NEG = 6,
ABS = 7,
SQR_DIFF = 8,
POW = 9,
EQUAL = 10,
FLOOR_DIV = 11,
CLIP = 12,
SIGN = 13,
NONE = 14,
};
} // namespace eltwise
class ArmEltwiseInt8Op : public framework::Operator {
public:
MaceStatus OnInit();
......
......@@ -61,6 +61,8 @@ MaceStatus ArmMatMulInt8Op::Run() {
MACE_ASSERT(input_a_dim_size_ == 2);
MACE_ASSERT(input_b_dim_size_ == 2);
MACE_ASSERT(input_a_dims_[0] == 1);
MACE_ASSERT(transpose_b_);
MACE_ASSERT(!transpose_a_);
......@@ -111,9 +113,9 @@ MaceStatus ArmMatMulInt8Op::Run() {
}
arm_status status = arm_nn_vec_mat_mult_t_s8(
input_a_, input_b_, bias, output_, -input_quantize_info_a.zero, 0,
output_quantize_info.zero, multiplier, shift, rhs_cols, rhs_rows, -128,
127);
input_a_, input_b_, bias, output_, -input_quantize_info_a.zero,
input_quantize_info_b.zero, output_quantize_info.zero, multiplier, shift,
rhs_cols, rhs_rows, -128, 127);
MACE_ASSERT(status == ARM_MATH_SUCCESS);
......
......@@ -19,8 +19,8 @@
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/scratch_buffer.h"
#include "micro/ops/nhwc/cmsis_nn/utilities.h"
#include "micro/include/utils/macros.h"
#include "micro/ops/nhwc/cmsis_nn/utilities.h"
namespace micro {
namespace ops {
......@@ -30,7 +30,6 @@ void ArmPoolingInt8Op::MaxPooling(const mifloat *input,
const int32_t *stride_hw,
const int32_t *dilation_hw,
const int32_t *pad_hw) {
MACE_UNUSED(filter_hw);
MACE_UNUSED(dilation_hw);
cmsis_nn_context ctx;
......@@ -45,6 +44,8 @@ void ArmPoolingInt8Op::MaxPooling(const mifloat *input,
pool_params.padding.h = pad_hw[0];
pool_params.padding.w = pad_hw[1];
MACE_ASSERT(input_dims_[0] == 1);
cmsis_nn_dims input_dims;
input_dims.n = input_dims_[0];
input_dims.h = input_dims_[1];
......@@ -53,10 +54,8 @@ void ArmPoolingInt8Op::MaxPooling(const mifloat *input,
const int8_t *input_data = reinterpret_cast<const int8_t *>(input);
cmsis_nn_dims filter_dims;
filter_dims.n = filter_dims_[0];
filter_dims.h = filter_dims_[1];
filter_dims.w = filter_dims_[2];
filter_dims.c = filter_dims_[3];
filter_dims.h = filter_hw[0];
filter_dims.w = filter_hw[1];
cmsis_nn_dims output_dims;
output_dims.n = output_dims_[0];
......@@ -74,7 +73,6 @@ void ArmPoolingInt8Op::AvgPooling(const mifloat *input,
const int32_t *stride_hw,
const int32_t *dilation_hw,
const int32_t *pad_hw) {
MACE_UNUSED(filter_hw);
MACE_UNUSED(dilation_hw);
const int32_t out_width = output_dims_[2];
......@@ -82,8 +80,12 @@ void ArmPoolingInt8Op::AvgPooling(const mifloat *input,
cmsis_nn_context ctx;
ctx.size = arm_avgpool_s8_get_buffer_size(out_width, in_channels);
MACE_ASSERT(ctx.size == 0);
ctx.buf = NULL;
ScratchBuffer scratch_buffer(engine_config_);
if (ctx.size > 0) {
ctx.buf = scratch_buffer.GetBuffer<int8_t>(ctx.size);
} else {
ctx.buf = NULL;
}
cmsis_nn_pool_params pool_params;
pool_params.activation.min = -128;
......@@ -93,6 +95,8 @@ void ArmPoolingInt8Op::AvgPooling(const mifloat *input,
pool_params.padding.h = pad_hw[0];
pool_params.padding.w = pad_hw[1];
MACE_ASSERT(input_dims_[0] == 1);
cmsis_nn_dims input_dims;
input_dims.n = input_dims_[0];
input_dims.h = input_dims_[1];
......@@ -101,10 +105,8 @@ void ArmPoolingInt8Op::AvgPooling(const mifloat *input,
const int8_t *input_data = reinterpret_cast<const int8_t *>(input);
cmsis_nn_dims filter_dims;
filter_dims.n = filter_dims_[0];
filter_dims.h = filter_dims_[1];
filter_dims.w = filter_dims_[2];
filter_dims.c = filter_dims_[3];
filter_dims.h = filter_hw[0];
filter_dims.w = filter_hw[1];
cmsis_nn_dims output_dims;
output_dims.n = output_dims_[0];
......
......@@ -35,6 +35,9 @@ MaceStatus ArmSoftmaxInt8Op::OnInit() {
output_ = GetOutputData<mifloat>(OUTPUT);
bool use_log = GetArgByName("use_log", false);
MACE_ASSERT1(!use_log, "The argument \"use_log\" is unsupported");
return MACE_SUCCESS;
}
......
......@@ -36,8 +36,6 @@ MaceStatus DequantizeOp::OnInit() {
MaceStatus DequantizeOp::Run() {
MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_));
const micro::OpIOInfo *input_info = op_context_->input_info(INPUT);
QuantizeInfo input_quantize_info = GetInputQuantizeInfo(INPUT);
float scale = input_quantize_info.scale;
......
......@@ -49,7 +49,7 @@ void PoolingRefOp::MaxPooling(const mifloat *input,
}
for (int32_t fh = 0; fh < filter_hw[0]; ++fh) {
int32_t inh = inh_addr + dilation_hw[0] * fh;
if (inh < 0 && inh >= in_height) {
if (inh < 0 || inh >= in_height) {
continue;
}
int32_t in_h_base = (in_b_base + inh) * in_width;
......
......@@ -20,25 +20,21 @@ add_executable(micro_ops_test
micro/ops/expand_dims_test.cc
micro/ops/concat_test.cc
)
if(MACE_MICRO_ENABLE_CMSIS)
target_link_libraries(micro_ops_test
PRIVATE micro_ops_nhwc_cmsis_nn
)
target_compile_options(micro_ops_test
PRIVATE "-DMACE_MICRO_ENABLE_CMSIS=ON"
)
endif()
target_link_libraries(micro_ops_test
PRIVATE micro_base
PRIVATE micro_ops_for_test
PRIVATE micro_ops
PRIVATE micro_framework_for_optest
PRIVATE micro_ccutils
PRIVATE gtest
PRIVATE gtest_main
)
if(MICRO_MODEL_NAME)
add_executable(micro_cc_test
micro/model/net_def_test.cc
micro/framework/graph_test.cc
micro/codegen/engine_test.cc
)
target_link_libraries(micro_cc_test
micro
models
gtest
gtest_main
)
target_compile_definitions(micro_cc_test PRIVATE "-DMICRO_MODEL_NAME=${MICRO_MODEL_NAME}")
endif()
......@@ -14,8 +14,10 @@
#include "gtest/gtest.h"
#include "micro/ops/eltwise.h"
#include "micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_quantize_utils.h"
#include "micro/ops/test_utils.h"
namespace micro {
......@@ -494,6 +496,91 @@ TEST_F(EltwiseOpTest, TensorGeneralBroadcastCPU) {
dims1121, output_9, expect_9, dims1123);
}
#ifdef MACE_MICRO_ENABLE_CMSIS
namespace {
void TestEltwiseQuantInt8(const int32_t *input_dims,
const uint32_t input_dim_size,
eltwise::Type type) {
int32_t shape_size = base::GetShapeSize(input_dim_size, input_dims);
float *input0 = new float[shape_size];
float *input1 = new float[shape_size];
FillNormalRandomInput(input0, shape_size);
FillNormalRandomInput(input1, shape_size);
float *expect_output = new float[shape_size];
const uint32_t MAX_OUTPUT_NUM = 10;
int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM];
EltwiseOp<float> eltwsie_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input0, input_dims, input_dim_size)
.AddInput(input1, input_dims, input_dim_size)
.AddArg("type", static_cast<int>(type))
.AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM);
eltwsie_op.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op), NULL);
eltwsie_op.Run();
uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0);
int8_t *input0_int8 = new int8_t[shape_size];
int8_t *input1_int8 = new int8_t[shape_size];
int8_t *output_int8 = new int8_t[shape_size];
float *output = new float[shape_size];
int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM];
QuantizeInfo input_quant_info0;
QuantizeInfo input_quant_info1;
AutoQuantizeInt8(input0, shape_size, input0_int8, &input_quant_info0.scale,
&input_quant_info0.zero);
AutoQuantizeInt8(input1, shape_size, input1_int8, &input_quant_info1.scale,
&input_quant_info1.zero);
QuantizeInfo output_quant_info = {0.0f, 0};
AdjustRangeInt8(expect_output, shape_size, &output_quant_info.scale,
&output_quant_info.zero);
ArmEltwiseInt8Op eltwsie_op_int8;
framework::SubstituteOp substitude_op_int8;
substitude_op_int8
.AddInput(input0_int8, input_dims, input_dim_size, input_quant_info0)
.AddInput(input1_int8, input_dims, input_dim_size, input_quant_info1)
.AddArg("type", static_cast<int>(type))
.AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info);
eltwsie_op_int8.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op_int8),
NULL);
eltwsie_op_int8.Run();
uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0);
Dequantize(output_int8, shape_size, output_quant_info.scale,
output_quant_info.zero, output);
ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size,
output, output_dims, output_dim_size, 0.1);
delete[] input0;
delete[] input1;
delete[] expect_output;
delete[] expect_output_dims;
delete[] input0_int8;
delete[] input1_int8;
delete[] output_int8;
delete[] output;
delete[] output_dims;
}
} // namespace
TEST_F(EltwiseOpTest, QuantInt8) {
const int32_t input_dims0[4] = {1, 32, 32, 16};
TestEltwiseQuantInt8(input_dims0, 4, eltwise::SUM);
const int32_t input_dims1[4] = {2, 31, 31, 17};
TestEltwiseQuantInt8(input_dims1, 4, eltwise::SUM);
const int32_t input_dims2[2] = {1, 31};
TestEltwiseQuantInt8(input_dims2, 2, eltwise::SUM);
}
#endif
} // namespace test
} // namespace ops
} // namespace micro
......@@ -15,8 +15,10 @@
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/matmul.h"
#include "micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
#include "micro/ops/test_quantize_utils.h"
namespace micro {
namespace ops {
......@@ -94,6 +96,94 @@ TEST_F(MatMulOpTest, SimpleCPU) {
Simple2();
}
#ifdef MACE_MICRO_ENABLE_CMSIS
namespace {
void TestMatMulQuantInt8(int32_t lhs_rows, int32_t lhs_cols, int32_t rhs_cols) {
uint32_t input0_size = lhs_rows * lhs_cols;
uint32_t input1_size = lhs_cols * rhs_cols;
uint32_t output_size = lhs_rows * rhs_cols;
float *input0 = new float[input0_size];
float *input1 = new float[input1_size];
FillNormalRandomInput(input0, input0_size);
FillNormalRandomInput(input1, input1_size);
float *expect_output = new float[output_size];
const uint32_t MAX_OUTPUT_NUM = 10;
int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM];
const int32_t input0_dims[2] = {lhs_rows, lhs_cols};
// mat0 * tranpose(mat1)
const int32_t input1_dims[2] = {rhs_cols, lhs_cols};
MatMulOp matmul_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input0, input0_dims, 2)
.AddInput(input1, input1_dims, 2)
.AddArg("transpose_a", false)
.AddArg("transpose_b", true)
.AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM);
matmul_op.Init(NULL, reinterpret_cast<framework::OpContext *>(&substitude_op),
NULL);
matmul_op.Run();
uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0);
int8_t *input0_int8 = new int8_t[input0_size];
int8_t *input1_int8 = new int8_t[input1_size];
int8_t *output_int8 = new int8_t[output_size];
float *output = new float[output_size];
int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM];
QuantizeInfo input_quant_info0;
QuantizeInfo input_quant_info1;
AutoQuantizeInt8(input0, input0_size, input0_int8, &input_quant_info0.scale,
&input_quant_info0.zero);
AutoQuantizeInt8Symmetric(input1, input1_size, input1_int8,
&input_quant_info1.scale);
QuantizeInfo output_quant_info = {0.0f, 0};
AdjustRangeInt8(expect_output, output_size, &output_quant_info.scale,
&output_quant_info.zero);
ArmMatMulInt8Op matmul_op_int8;
framework::SubstituteOp substitude_op_int8;
substitude_op_int8.AddInput(input0_int8, input0_dims, 2, input_quant_info0)
.AddInput(input1_int8, input1_dims, 2, input_quant_info1)
.AddArg("transpose_a", false)
.AddArg("transpose_b", true)
.AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info);
matmul_op_int8.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op_int8),
NULL);
matmul_op_int8.Run();
uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0);
Dequantize(output_int8, output_size, output_quant_info.scale,
output_quant_info.zero, output);
ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size,
output, output_dims, output_dim_size, 0.1);
delete[] input0;
delete[] input1;
delete[] expect_output;
delete[] expect_output_dims;
delete[] input0_int8;
delete[] input1_int8;
delete[] output_int8;
delete[] output;
delete[] output_dims;
}
} // namespace
TEST_F(MatMulOpTest, QuantInt8) {
TestMatMulQuantInt8(1, 8, 4);
TestMatMulQuantInt8(1, 1001, 63);
// WARNING(ZhangZhimin): Batch inputs is unsupported
// TestMatMulQuantInt8(3, 100, 100);
}
#endif
} // namespace test
} // namespace ops
} // namespace micro
......@@ -15,8 +15,10 @@
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/nhwc/conv_2d_ref.h"
#include "micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
#include "micro/ops/test_quantize_utils.h"
namespace micro {
namespace ops {
......@@ -315,6 +317,141 @@ TEST_F(Conv2dOpTest, CPUConv1x1) {
TestConv1x1();
}
#ifdef MACE_MICRO_ENABLE_CMSIS
namespace {
void TestConv2dQuantInt8(const int32_t batch,
const int32_t out_channels,
const int32_t in_channels,
const int32_t in_height,
const int32_t in_width,
const int32_t kernel_height,
const int32_t kernel_width,
enum Padding padding_type,
const int32_t stride_height,
const int32_t stride_width,
const int32_t dilation_height,
const int32_t dilation_width) {
uint32_t input0_size = batch * in_height * in_width * in_channels;
uint32_t input1_size =
out_channels * kernel_height * kernel_width * in_channels;
uint32_t max_output_size = batch * out_channels *
(in_height + kernel_height * dilation_height) *
(in_width + kernel_width * dilation_width);
int32_t bias_size = out_channels;
float *input0 = new float[input0_size];
float *input1 = new float[input1_size];
float *bias = new float[bias_size];
FillNormalRandomInput(input0, input0_size);
FillNormalRandomInput(input1, input1_size);
FillNormalRandomInput(bias, bias_size);
float *expect_output = new float[max_output_size];
const uint32_t MAX_OUTPUT_NUM = 10;
int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM];
const int32_t input0_dims[4] = {batch, in_height, in_width, in_channels};
const int32_t input1_dims[4] = {out_channels, kernel_height, kernel_width,
in_channels};
const int32_t bias_dims[1] = {bias_size};
const int32_t strides[2] = {stride_height, stride_width};
const int32_t dilations[2] = {dilation_height, dilation_width};
Conv2dRefOp conv2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input0, input0_dims, 4)
.AddInput(input1, input1_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddArg("padding", padding_type)
.AddRepeatArg("strides", strides, 2)
.AddRepeatArg("dilations", dilations, 2)
.AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM);
conv2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(&substitude_op),
NULL);
conv2d_op.Run();
uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0);
uint32_t exepct_output_size =
base::GetShapeSize(expect_output_dim_size, expect_output_dims);
int8_t *input0_int8 = new int8_t[input0_size];
int8_t *input1_int8 = new int8_t[input1_size];
int32_t *bias_int32 = new int32_t[bias_size];
int8_t *output_int8 = new int8_t[max_output_size];
float *output = new float[max_output_size];
int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM];
QuantizeInfo input_quant_info0;
QuantizeInfo input_quant_info1;
AutoQuantizeInt8(input0, input0_size, input0_int8, &input_quant_info0.scale,
&input_quant_info0.zero);
AutoQuantizeInt8Symmetric(input1, input1_size, input1_int8,
&input_quant_info1.scale);
QuantizeInfo output_quant_info = {0.0f, 0};
AdjustRangeInt8(expect_output, exepct_output_size, &output_quant_info.scale,
&output_quant_info.zero);
float bias_scale = input_quant_info0.scale * input_quant_info1.scale;
QuantizeWithScaleAndZeropoint(bias, bias_size, bias_scale, 0, bias_int32);
ArmConv2dInt8Op conv2d_op_int8;
framework::SubstituteOp substitude_op_int8;
substitude_op_int8.AddInput(input0_int8, input0_dims, 4, input_quant_info0)
.AddInput(input1_int8, input1_dims, 4, input_quant_info1)
.AddInput(bias_int32, bias_dims, 1)
.AddArg("padding", padding_type)
.AddRepeatArg("strides", strides, 2)
.AddRepeatArg("dilations", dilations, 2)
.AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info);
conv2d_op_int8.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op_int8),
NULL);
conv2d_op_int8.Run();
uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0);
uint32_t output_size = base::GetShapeSize(output_dim_size, output_dims);
Dequantize(output_int8, output_size, output_quant_info.scale,
output_quant_info.zero, output);
ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size,
output, output_dims, output_dim_size, 0.1);
delete[] input0;
delete[] input1;
delete[] bias;
delete[] expect_output;
delete[] expect_output_dims;
delete[] input0_int8;
delete[] input1_int8;
delete[] bias_int32;
delete[] output_int8;
delete[] output;
delete[] output_dims;
}
} // namespace
TEST_F(Conv2dOpTest, QuantInt8) {
TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, VALID, 1, 1, 1, 1);
TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 1, 1);
TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, FULL, 1, 1, 1, 1);
TestConv2dQuantInt8(1, 128, 64, 32, 54, 3, 3, FULL, 1, 1, 1, 1);
TestConv2dQuantInt8(1, 128, 512, 14, 13, 3, 3, SAME, 1, 1, 1, 1);
TestConv2dQuantInt8(1, 128, 64, 14, 13, 5, 5, SAME, 2, 2, 1, 1);
TestConv2dQuantInt8(1, 128, 257, 28, 28, 3, 3, SAME, 1, 1, 1, 1);
TestConv2dQuantInt8(1, 1, 128, 56, 56, 3, 3, SAME, 2, 2, 1, 1);
TestConv2dQuantInt8(1, 2, 1, 1000, 1000, 4, 3, FULL, 2, 1, 1, 1);
TestConv2dQuantInt8(1, 128, 1, 1000, 1000, 4, 3, FULL, 2, 3, 1, 1);
// dilations is unsupported
// TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 2, 2);
// TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 2, 1);
// batch must be 1
// TestConv2dQuantInt8(2, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 1, 1);
// TestConv2dQuantInt8(4, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 1, 1);
}
#endif
} // namespace test
} // namespace ops
} // namespace micro
......@@ -15,8 +15,10 @@
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/nhwc/depthwise_conv_2d_ref.h"
#include "micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
#include "micro/ops/test_quantize_utils.h"
namespace micro {
namespace ops {
......@@ -107,6 +109,146 @@ TEST_F(DepthwiseConv2dOpTest, MuiltiC2CPU) {
MultiC2ValidTest();
}
#ifdef MACE_MICRO_ENABLE_CMSIS
namespace {
void TestDepthwiseConv2dQuantInt8(const int32_t batch,
const int32_t multiplier,
const int32_t in_channels,
const int32_t in_height,
const int32_t in_width,
const int32_t kernel_height,
const int32_t kernel_width,
enum Padding padding_type,
const int32_t stride_height,
const int32_t stride_width,
const int32_t dilation_height,
const int32_t dilation_width) {
uint32_t input0_size = batch * in_height * in_width * in_channels;
uint32_t input1_size =
multiplier * kernel_height * kernel_width * in_channels;
uint32_t max_output_size = batch * multiplier * in_channels *
(in_height + kernel_height * dilation_height) *
(in_width + kernel_width * dilation_width);
int32_t bias_size = multiplier * in_channels;
float *input0 = new float[input0_size];
float *input1 = new float[input1_size];
float *bias = new float[bias_size];
FillNormalRandomInput(input0, input0_size);
FillNormalRandomInput(input1, input1_size);
FillNormalRandomInput(bias, bias_size);
float *expect_output = new float[max_output_size];
const uint32_t MAX_OUTPUT_NUM = 10;
int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM];
const int32_t input0_dims[4] = {batch, in_height, in_width, in_channels};
const int32_t input1_dims[4] = {multiplier, kernel_height, kernel_width,
in_channels};
const int32_t bias_dims[1] = {bias_size};
const int32_t strides[2] = {stride_height, stride_width};
const int32_t dilations[2] = {dilation_height, dilation_width};
DepthwiseConv2dRefOp depthwise_conv2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input0, input0_dims, 4)
.AddInput(input1, input1_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddArg("padding", padding_type)
.AddRepeatArg("strides", strides, 2)
.AddRepeatArg("dilations", dilations, 2)
.AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM);
depthwise_conv2d_op.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op), NULL);
depthwise_conv2d_op.Run();
uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0);
uint32_t exepct_output_size =
base::GetShapeSize(expect_output_dim_size, expect_output_dims);
int8_t *input0_int8 = new int8_t[input0_size];
int8_t *input1_int8 = new int8_t[input1_size];
int32_t *bias_int32 = new int32_t[bias_size];
int8_t *output_int8 = new int8_t[max_output_size];
float *output = new float[max_output_size];
int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM];
QuantizeInfo input_quant_info0;
QuantizeInfo input_quant_info1;
AutoQuantizeInt8(input0, input0_size, input0_int8, &input_quant_info0.scale,
&input_quant_info0.zero);
AutoQuantizeInt8Symmetric(input1, input1_size, input1_int8,
&input_quant_info1.scale);
QuantizeInfo output_quant_info = {0.0f, 0};
AdjustRangeInt8(expect_output, exepct_output_size, &output_quant_info.scale,
&output_quant_info.zero);
float bias_scale = input_quant_info0.scale * input_quant_info1.scale;
QuantizeWithScaleAndZeropoint(bias, bias_size, bias_scale, 0, bias_int32);
ArmDepthwiseConv2dInt8Op depthwise_conv2d_op_int8;
framework::SubstituteOp substitude_op_int8;
substitude_op_int8.AddInput(input0_int8, input0_dims, 4, input_quant_info0)
.AddInput(input1_int8, input1_dims, 4, input_quant_info1)
.AddInput(bias_int32, bias_dims, 1)
.AddArg("padding", padding_type)
.AddRepeatArg("strides", strides, 2)
.AddRepeatArg("dilations", dilations, 2)
.AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info);
depthwise_conv2d_op_int8.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op_int8),
NULL);
depthwise_conv2d_op_int8.Run();
uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0);
uint32_t output_size = base::GetShapeSize(output_dim_size, output_dims);
Dequantize(output_int8, output_size, output_quant_info.scale,
output_quant_info.zero, output);
ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size,
output, output_dims, output_dim_size, 0.1);
delete[] input0;
delete[] input1;
delete[] bias;
delete[] expect_output;
delete[] expect_output_dims;
delete[] input0_int8;
delete[] input1_int8;
delete[] bias_int32;
delete[] output_int8;
delete[] output;
delete[] output_dims;
}
} // namespace
TEST_F(DepthwiseConv2dOpTest, QuantInt8) {
TestDepthwiseConv2dQuantInt8(1, 1, 1024, 7, 7, 3, 3, VALID, 1, 1, 1, 1);
TestDepthwiseConv2dQuantInt8(1, 1, 1024, 7, 7, 3, 3, SAME, 1, 1, 1, 1);
TestDepthwiseConv2dQuantInt8(1, 1, 1024, 7, 7, 3, 3, FULL, 1, 1, 1, 1);
TestDepthwiseConv2dQuantInt8(1, 1, 512, 14, 13, 3, 3, SAME, 1, 1, 1, 1);
TestDepthwiseConv2dQuantInt8(1, 1, 512, 14, 13, 5, 5, SAME, 2, 2, 1, 1);
TestDepthwiseConv2dQuantInt8(1, 1, 256, 28, 28, 3, 3, SAME, 1, 1, 1, 1);
TestDepthwiseConv2dQuantInt8(1, 1, 128, 56, 56, 3, 3, SAME, 2, 2, 1, 1);
TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 2, 1, 1, 1);
TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 2, 3, 1, 1);
// dilations is unsupported
// TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 3, 3, VALID, 1, 1, 2, 2);
// TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 1, 1, 3, 5);
// TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 1, 3, 3, 1);
// batch must be 1
// TestDepthwiseConv2dQuantInt8(3, 1, 128, 56, 56, 3, 3, SAME, 2, 2);
// multiplier must be 1
// TestDepthwiseConv2dQuantInt8(1, 2, 1024, 7, 7, 3, 3, SAME, 1, 1);
// TestDepthwiseConv2dQuantInt8(1, 2, 1024, 7, 7, 3, 3, SAME, 2, 2);
}
#endif
} // namespace test
} // namespace ops
} // namespace micro
......@@ -16,7 +16,9 @@
#include "micro/ops/gtest_utils.h"
#include "micro/ops/nhwc/pooling_ref.h"
#include "micro/ops/nhwc/pooling_s4.h"
#include "micro/ops/nhwc/cmsis_nn/arm_pooling_int8.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_quantize_utils.h"
#include "micro/ops/test_utils.h"
namespace micro {
......@@ -203,6 +205,134 @@ TEST_F(PoolingOpTest, TestPoolingOpSameAvg) {
TestPoolingOpSameAvg();
}
#ifdef MACE_MICRO_ENABLE_CMSIS
namespace {
void TestPoolingQuantInt8(const int32_t *input_dims,
const uint32_t input_dim_size,
const int32_t *kernels,
const int32_t *strides,
Padding padding,
PoolingType pooling_type) {
int32_t input_size = base::GetShapeSize(input_dim_size, input_dims);
int32_t max_output_size = input_dims[0] * input_dims[3] *
(input_dims[1] + kernels[0]) *
(input_dims[2] + kernels[1]);
float *input = new float[input_size];
FillNormalRandomInput(input, input_size);
float *expect_output = new float[max_output_size];
const uint32_t MAX_OUTPUT_DIM_SIZE = 100;
int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_DIM_SIZE];
const int32_t dilations[2] = {1, 1};
PoolingRefOp pooling_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, input_dim_size)
.AddRepeatArg("strides", strides, 2)
.AddRepeatArg("kernels", kernels, 2)
.AddRepeatArg("dilations", dilations, 2)
.AddArg("padding", padding)
.AddArg("pooling_type", pooling_type)
.AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_DIM_SIZE);
pooling_op.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op), NULL);
pooling_op.Run();
uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0);
int8_t *input_int8 = new int8_t[input_size];
int8_t *output_int8 = new int8_t[max_output_size];
float *output = new float[max_output_size];
int32_t *output_dims = new int32_t[MAX_OUTPUT_DIM_SIZE];
QuantizeInfo input_quant_info;
AutoQuantizeInt8(input, input_size, input_int8, &input_quant_info.scale,
&input_quant_info.zero);
QuantizeInfo output_quant_info = input_quant_info;
ArmPoolingInt8Op pooling_op_int8;
framework::SubstituteOp substitude_op_int8;
substitude_op_int8
.AddInput(input_int8, input_dims, input_dim_size, input_quant_info)
.AddRepeatArg("strides", strides, 2)
.AddRepeatArg("kernels", kernels, 2)
.AddRepeatArg("dilations", dilations, 2)
.AddArg("padding", padding)
.AddArg("pooling_type", pooling_type)
.AddOutput(output_int8, output_dims, MAX_OUTPUT_DIM_SIZE,
output_quant_info);
pooling_op_int8.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op_int8),
NULL);
pooling_op_int8.Run();
uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0);
uint32_t output_size = base::GetShapeSize(output_dim_size, output_dims);
Dequantize(output_int8, output_size, output_quant_info.scale,
output_quant_info.zero, output);
ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size,
output, output_dims, output_dim_size, 0.1);
delete[] input;
delete[] expect_output;
delete[] expect_output_dims;
delete[] input_int8;
delete[] output_int8;
delete[] output;
delete[] output_dims;
}
} // namespace
TEST_F(PoolingOpTest, Quant) {
const int32_t input_dims0[4] = {1, 7, 7, 1024};
const int32_t kernels0[2] = {7, 7};
const int32_t strides0[2] = {1, 1};
TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::VALID,
PoolingType::AVG);
TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::VALID,
PoolingType::MAX);
TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::FULL,
PoolingType::AVG);
TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::SAME,
PoolingType::MAX);
const int32_t input_dims1[4] = {1, 3, 3, 2};
const int32_t kernels1[2] = {3, 3};
const int32_t strides1[2] = {1, 1};
TestPoolingQuantInt8(input_dims1, 4, kernels1, strides1, Padding::SAME,
PoolingType::AVG);
const int32_t input_dims2[4] = {1, 3, 3, 2};
const int32_t kernels2[2] = {2, 3};
const int32_t strides2[2] = {1, 2};
TestPoolingQuantInt8(input_dims2, 4, kernels2, strides2, Padding::SAME,
PoolingType::MAX);
// WARNING(ZhangZhimin): Batch inputs is unsupported
// const int32_t input_dims3[4] = {3,15,15,128};
// const int32_t kernels3[2] = {4, 4};
// const int32_t strides3[2] = {4, 4};
// TestPoolingQuantInt8(input_dims3, 4, kernels3, strides3, Padding::SAME,
// PoolingType::AVG);
// const int32_t input_dims4[4] = {3,15,15,128};
// const int32_t kernels4[2] = {4, 4};
// const int32_t strides4[2] = {4, 4};
// TestPoolingQuantInt8(input_dims4, 4, kernels4, strides4, Padding::SAME,
// PoolingType::MAX);
const int32_t input_dims5[4] = {1, 31, 31, 127};
const int32_t kernels5[2] = {2, 2};
const int32_t strides5[2] = {3, 3};
TestPoolingQuantInt8(input_dims5, 4, kernels5, strides5, Padding::SAME,
PoolingType::AVG);
const int32_t input_dims6[4] = {1, 31, 31, 127};
const int32_t kernels6[2] = {2, 2};
const int32_t strides6[2] = {3, 3};
TestPoolingQuantInt8(input_dims6, 4, kernels6, strides6, Padding::SAME,
PoolingType::MAX);
}
#endif
} // namespace test
} // namespace ops
} // namespace micro
......@@ -13,9 +13,11 @@
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/softmax.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/nhwc/cmsis_nn/arm_softmax_int8.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_quantize_utils.h"
#include "micro/ops/test_utils.h"
namespace micro {
......@@ -49,15 +51,89 @@ void Simple(bool use_log = false) {
&substitude_op), NULL);
softmax_op.Run();
ExpectTensorNear<float>(output, output_dims, output_dim_size,
expect, expect_dims, output_dim_size, 1e-5);
ExpectTensorNear<float>(output, output_dims, output_dim_size, expect,
expect_dims, output_dim_size, 1e-5);
}
} // namespace
TEST_F(SoftmaxOpTest, CPUSimple) { Simple(); }
TEST_F(SoftmaxOpTest, CPUSimpleUseLog) { Simple(true); }
#ifdef MACE_MICRO_ENABLE_CMSIS
namespace {
void TestSoftmaxQuantInt8(const int32_t *input_dims,
const uint32_t input_dim_size,
bool use_log = false) {
int32_t shape_size = base::GetShapeSize(input_dim_size, input_dims);
float *input = new float[shape_size];
FillNormalRandomInput(input, shape_size);
float *expect_output = new float[shape_size];
const uint32_t MAX_OUTPUT_NUM = 10;
int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM];
SoftmaxOp softmax_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, input_dim_size)
.AddArg("use_log", static_cast<int>(use_log))
.AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM);
softmax_op.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op), NULL);
softmax_op.Run();
uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0);
int8_t *input_int8 = new int8_t[shape_size];
int8_t *output_int8 = new int8_t[shape_size];
float *output = new float[shape_size];
int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM];
QuantizeInfo input_quant_info;
AutoQuantizeInt8(input, shape_size, input_int8, &input_quant_info.scale,
&input_quant_info.zero);
QuantizeInfo output_quant_info = {1.0f / 255.0f, -128};
ArmSoftmaxInt8Op softmax_op_int8;
framework::SubstituteOp substitude_op_int8;
substitude_op_int8
.AddInput(input_int8, input_dims, input_dim_size, input_quant_info)
.AddArg("use_log", static_cast<int>(use_log))
.AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info);
softmax_op_int8.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op_int8),
NULL);
softmax_op_int8.Run();
uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0);
Dequantize(output_int8, shape_size, output_quant_info.scale,
output_quant_info.zero, output);
ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size,
output, output_dims, output_dim_size, 0.1);
delete[] input;
delete[] expect_output;
delete[] expect_output_dims;
delete[] input_int8;
delete[] output_int8;
delete[] output;
delete[] output_dims;
}
} // namespace
TEST_F(SoftmaxOpTest, QuantInt8) {
const int32_t input_dims0[2] = {5, 10};
TestSoftmaxQuantInt8(input_dims0, 2);
const int32_t input_dims1[2] = {50, 100};
TestSoftmaxQuantInt8(input_dims1, 2);
const int32_t input_dims2[2] = {1, 31};
TestSoftmaxQuantInt8(input_dims2, 2);
}
#endif
} // namespace test
} // namespace ops
} // namespace micro
......@@ -7,6 +7,7 @@ add_library(micro_ccutils
target_include_directories(micro_ccutils PUBLIC .)
target_link_libraries(micro_ccutils micro_base micro_framework_for_optest)
target_compile_options(micro_ccutils PUBLIC "-std=c++11")
if(HEXAGON_STUB)
add_library(micro_rpc_stub
......
......@@ -105,6 +105,16 @@ MaceStatus Operator::ResizeOutputShape(uint32_t idx, uint32_t dim_size,
return fake_op_->ResizeOutputShape(idx, dim_size, dims);
}
QuantizeInfo Operator::GetInputQuantizeInfo(uint32_t idx) {
return fake_op_->GetInputQuantizeInfo(idx);
}
QuantizeInfo Operator::GetOutputQuantizeInfo(uint32_t idx) {
return fake_op_->GetOutputQuantizeInfo(idx);
}
#ifndef MACE_DEFINE_GET_ARG_BY_NAME_FUNC
#define MACE_DEFINE_GET_ARG_BY_NAME_FUNC(T, FUNC) \
template <> \
......
......@@ -24,26 +24,32 @@ namespace framework {
SubstituteOp::SubstituteOp()
: input_idx_(0), output_idx_(0), arg_idx_(0), repeat_arg_idx_(0) {}
SubstituteOp &SubstituteOp::AddInput(
const void *input, const int32_t *dims, const uint32_t dims_size) {
SubstituteOp &SubstituteOp::AddInput(const void *input,
const int32_t *dims,
const uint32_t dims_size,
QuantizeInfo quant_info) {
MACE_ASSERT1(input != NULL || dims != NULL || dims_size == 0,
"Invalid param");
MACE_ASSERT1(input_idx_ < kMaxInputNum, "Not enough mem.");
inputs_[input_idx_] = input;
input_dims_[input_idx_] = dims;
input_dim_sizes_[input_idx_] = dims_size;
input_quant_info_[input_idx_] = quant_info;
++input_idx_;
return *this;
}
SubstituteOp &SubstituteOp::AddOutput(
void *output, int32_t *dims, const uint32_t dims_size) {
SubstituteOp &SubstituteOp::AddOutput(void *output,
int32_t *dims,
const uint32_t dims_size,
QuantizeInfo quant_info) {
MACE_ASSERT1(output != NULL || dims != NULL || dims_size == 0,
"Invalid param");
MACE_ASSERT1(output_idx_ < kMaxOutputNum, "Not enough mem.");
outputs_[output_idx_] = output;
output_dims_[output_idx_] = dims;
output_dim_sizes_[output_idx_] = dims_size;
output_quant_info_[output_idx_] = quant_info;
++output_idx_;
return *this;
}
......@@ -86,6 +92,14 @@ const int32_t *SubstituteOp::GetOutputShapeDims(uint32_t idx) {
return output_dims_[idx];
}
QuantizeInfo SubstituteOp::GetInputQuantizeInfo(uint32_t idx) {
return input_quant_info_[idx];
}
QuantizeInfo SubstituteOp::GetOutputQuantizeInfo(uint32_t idx) {
return output_quant_info_[idx];
}
MaceStatus SubstituteOp::ResizeOutputShape(uint32_t idx,
uint32_t input_dim_size,
const int32_t *input_dims) {
......
......@@ -16,6 +16,7 @@
#define MICRO_TEST_CCUTILS_MICRO_OPS_SUBSTITUTE_OP_H_
#include "micro/base/logging.h"
#include "micro/base/types.h"
#include "micro/base/utils.h"
#include "micro/include/public/micro.h"
......@@ -43,9 +44,13 @@ class SubstituteOp {
~SubstituteOp() {}
SubstituteOp &AddInput(const void *input,
const int32_t *dims, const uint32_t dims_size);
const int32_t *dims,
const uint32_t dims_size,
QuantizeInfo quant_info = QuantizeInfo{0.0f, 0});
SubstituteOp &AddOutput(void *output,
int32_t *dims, const uint32_t dims_size);
int32_t *dims,
const uint32_t dims_size,
QuantizeInfo quant_info = QuantizeInfo{0.0f, 0});
template<typename T>
SubstituteOp &AddArg(const char *name, T value) {
......@@ -106,6 +111,9 @@ class SubstituteOp {
const int32_t *input_dims);
MaceStatus ReuseInputBufferForOutput(uint32_t output_idx, uint32_t input_idx);
QuantizeInfo GetInputQuantizeInfo(uint32_t idx);
QuantizeInfo GetOutputQuantizeInfo(uint32_t idx);
template<typename T>
const T *GetInputData(uint32_t idx) {
return static_cast<const T *>(DoGetInputData(idx));
......@@ -120,11 +128,13 @@ class SubstituteOp {
const void *inputs_[kMaxInputNum];
const int32_t *input_dims_[kMaxInputNum];
uint32_t input_dim_sizes_[kMaxInputNum];
QuantizeInfo input_quant_info_[kMaxInputNum];
uint32_t input_idx_;
void *outputs_[kMaxOutputNum];
int32_t *output_dims_[kMaxOutputNum];
uint32_t output_dim_sizes_[kMaxOutputNum];
QuantizeInfo output_quant_info_[kMaxOutputNum];
uint32_t output_idx_;
// for arg
......
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_TEST_CCUTILS_MICRO_OPS_TEST_QUANTIZE_UTILS_H_
#define MICRO_TEST_CCUTILS_MICRO_OPS_TEST_QUANTIZE_UTILS_H_
#include <math.h>
#include <stdint.h>
#include <limits>
#include "micro/base/logging.h"
#include "micro/common/global_buffer.h"
#include "micro/include/public/micro.h"
#include "micro/port/api.h"
namespace micro {
namespace ops {
namespace test {
template <typename Q>
inline Q Saturate(float value) {
int rounded_value = static_cast<int>(value);
if (rounded_value <= std::numeric_limits<Q>::lowest()) {
return std::numeric_limits<Q>::lowest();
} else if (rounded_value >= std::numeric_limits<Q>::max()) {
return std::numeric_limits<Q>::max();
} else {
return static_cast<Q>(rounded_value);
}
}
inline void FindMinMax(const float *input,
const uint32_t size,
float *min_val,
float *max_val) {
float max_v = base::lowest();
float min_v = base::highest();
for (uint32_t i = 0; i < size; ++i) {
max_v = base::max(max_v, input[i]);
min_v = base::min(min_v, input[i]);
}
*min_val = min_v;
*max_val = max_v;
}
template <typename Q>
inline void QuantizeWithScaleAndZeropoint(const float *input,
const uint32_t size,
float scale,
int32_t zero_point,
Q *output) {
float recip_scale = 1 / scale;
for (uint32_t i = 0; i < size; ++i) {
output[i] = Saturate<Q>(roundf(zero_point + recip_scale * input[i]));
}
}
inline void AdjustRangeInt8(const float *input,
const uint32_t size,
float *scale,
int32_t *zero_point) {
float in_min_data;
float in_max_data;
FindMinMax(input, size, &in_min_data, &in_max_data);
in_max_data = base::max(0.f, in_max_data);
in_min_data = base::min(0.f, in_min_data);
*scale = (in_max_data - in_min_data) / 255;
*zero_point = int8_t(-in_min_data / *scale - 128);
}
inline void AdjustRangeInt8Symmetric(const float *input,
const uint32_t size,
float *scale) {
float in_min_data;
float in_max_data;
FindMinMax(input, size, &in_min_data, &in_max_data);
in_max_data = base::max(0.f, in_max_data);
in_min_data = base::min(0.f, in_min_data);
float max_abs = base::max(base::abs(in_max_data), base::abs(in_min_data));
*scale = max_abs / 127.0f;
}
inline void AutoQuantizeInt8(const float *input,
const uint32_t size,
int8_t *output,
float *scale,
int32_t *zero_point) {
AdjustRangeInt8(input, size, scale, zero_point);
QuantizeWithScaleAndZeropoint(input, size, *scale, *zero_point, output);
}
inline void AutoQuantizeInt8Symmetric(const float *input,
const uint32_t size,
int8_t *output,
float *scale) {
AdjustRangeInt8Symmetric(input, size, scale);
QuantizeWithScaleAndZeropoint(input, size, *scale, 0, output);
}
inline void Dequantize(const int8_t *input,
const uint32_t size,
const float scale,
const int32_t zero_point,
float *output) {
for (uint32_t i = 0; i < size; ++i) {
output[i] = static_cast<float>(scale * (input[i] - zero_point));
}
}
} // namespace test
} // namespace ops
} // namespace micro
#endif // MICRO_TEST_CCUTILS_MICRO_OPS_TEST_QUANTIZE_UTILS_H_
......@@ -15,6 +15,8 @@
#include "micro/ops/test_utils.h"
#include <random>
namespace micro {
namespace ops {
namespace test {
......@@ -67,6 +69,30 @@ void FillRandomInput(void *input, const int32_t shape_size) {
}
}
void FillUniformRandomInput(float *input,
const int32_t shape_size,
float low,
float up) {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dis(low, up);
for (int n = 0; n < shape_size; ++n) {
input[n] = dis(gen);
}
}
void FillNormalRandomInput(float *input,
const int32_t shape_size,
float mean,
float std) {
std::random_device rd;
std::mt19937 gen(rd());
std::normal_distribution<float> dis(mean, std);
for (int n = 0; n < shape_size; ++n) {
input[n] = dis(gen);
}
}
} // namespace test
} // namespace ops
} // namespace micro
......
......@@ -38,6 +38,16 @@ T *input = common::test::GetGlobalBuffer()->GetBuffer<T>(shape_size); \
micro::ops::test::FillRandomInput(input, shape_size * sizeof(T))
#endif
void FillUniformRandomInput(float *input,
const int32_t shape_size,
float low = -50.0f,
float up = 50.0f);
void FillNormalRandomInput(float *input,
const int32_t shape_size,
float mean = 0.0f,
float std = 1.0f);
} // namespace test
} // namespace ops
} // namespace micro
......
......@@ -14,6 +14,6 @@ cpplint --linelength=80 --counting=detailed $(find micro/include -name "*.h" -o
cpplint --linelength=80 --counting=detailed $(find micro/model -name "*.h" -or -name "*.cc")
cpplint --linelength=80 --counting=detailed --filter=-build/include_what_you_use $(find micro/ops -name "*.h" -or -name "*.cc")
cpplint --linelength=80 --counting=detailed $(find micro/port -name "*.h" -or -name "*.cc")
cpplint --linelength=80 --counting=detailed $(find micro/test \( -path micro/test/ccbenchmark/codegen -or -path micro/test/ccbaseline/codegen \) -prune -o -name "*.h" -or -name "*.cc")
cpplint --linelength=80 --counting=detailed --filter=-build/include_what_you_use $(find micro/test \( -path micro/test/ccbenchmark/codegen -or -path micro/test/ccbaseline/codegen \) -prune -o -name "*.h" -or -name "*.cc")
cpplint --linelength=80 --counting=detailed $(find micro/tools -name "*.h" -or -name "*.cc")
cpplint --linelength=80 --counting=detailed --filter=-build/include_subdir $(find micro/examples \( -path micro/examples/classifier/mbed-os -or -path micro/examples/classifier/data -or -path micro/examples/classifier/install -or -path micro/examples/classifier/BUILD \) -prune -name "*.cc" -or -name "*.h")
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册