未验证 提交 a3e7bdad 编写于 作者: M Måns Nilsson 提交者: GitHub

SoftMax Quantization specific registration for CMSIS-NN (#1018)

* Quantization specific registration for CMSIS-NN

Adds three specific registrations for softmax kernel:
- Pure int8.
- Pure int16.
- And one with int8 input and int16 output.

In order to avoid duplicating code CalculateSoftmaxParams is made
public.

Change-Id: I51de85e85f3bfb7a2d936593bd6512f263e6e5e4

* Add helper function InitializeLutForInt16

* Do not inline helper function
Co-authored-by: Nmergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
上级 67a2b960
......@@ -79,7 +79,7 @@ void setup() {
micro_op_resolver.AddDepthwiseConv2D(
tflite::Register_DEPTHWISE_CONV_2D_INT8());
micro_op_resolver.AddReshape();
micro_op_resolver.AddSoftmax();
micro_op_resolver.AddSoftmax(tflite::Register_SOFTMAX_INT8());
// Build an interpreter to run the model with.
// NOLINTNEXTLINE(runtime-global-variables)
......
......@@ -59,7 +59,7 @@ TF_LITE_MICRO_TEST(TestInvoke) {
micro_op_resolver.AddDepthwiseConv2D(
tflite::Register_DEPTHWISE_CONV_2D_INT8());
micro_op_resolver.AddReshape();
micro_op_resolver.AddSoftmax();
micro_op_resolver.AddSoftmax(tflite::Register_SOFTMAX_INT8());
// Build an interpreter to run the model with.
tflite::MicroInterpreter interpreter(model, micro_op_resolver, tensor_arena,
......
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
......@@ -28,36 +28,50 @@ limitations under the License.
namespace tflite {
namespace {
void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
const SoftmaxParams& op_data) {
if (input->type == kTfLiteInt8) {
if (output->type == kTfLiteInt16) {
tflite::reference_ops::Softmax(
op_data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
} else {
const auto input_shape = tflite::micro::GetTensorShape(input);
const auto output_shape = tflite::micro::GetTensorShape(output);
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
arm_softmax_s8(tflite::micro::GetTensorData<int8_t>(input), outer_size,
depth, op_data.input_multiplier, op_data.input_left_shift,
op_data.diff_min,
tflite::micro::GetTensorData<int8_t>(output));
}
} else {
tflite::reference_ops::SoftmaxInt16(
op_data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
}
struct CMSISNNSoftmaxParams {
SoftmaxParams softmax_params;
int32_t num_rows;
int32_t row_size;
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context,
sizeof(CMSISNNSoftmaxParams));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, input != nullptr);
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE(context, node->user_data != nullptr);
CMSISNNSoftmaxParams* op_data =
static_cast<CMSISNNSoftmaxParams*>(node->user_data);
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
auto ret_val = CalculateSoftmaxParams(context, input, output, params,
&op_data->softmax_params);
const auto input_shape = GetTensorShape(input);
const auto output_shape = GetTensorShape(output);
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
op_data->num_rows = outer_size;
op_data->row_size = depth;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return ret_val;
}
TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
......@@ -65,21 +79,48 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
TFLITE_DCHECK(node->user_data != nullptr);
const SoftmaxParams data =
*static_cast<const SoftmaxParams*>(node->user_data);
const CMSISNNSoftmaxParams op_data =
*static_cast<const CMSISNNSoftmaxParams*>(node->user_data);
switch (input->type) {
case kTfLiteFloat32: {
tflite::reference_ops::Softmax(
data, tflite::micro::GetTensorShape(input),
op_data.softmax_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
case kTfLiteInt8:
case kTfLiteInt8: {
if (output->type == kTfLiteInt8) {
arm_softmax_s8(tflite::micro::GetTensorData<int8_t>(input),
op_data.num_rows, op_data.row_size,
op_data.softmax_params.input_multiplier,
op_data.softmax_params.input_left_shift,
op_data.softmax_params.diff_min,
tflite::micro::GetTensorData<int8_t>(output));
} else {
arm_softmax_s8_s16(tflite::micro::GetTensorData<int8_t>(input),
op_data.num_rows, op_data.row_size,
op_data.softmax_params.input_multiplier,
op_data.softmax_params.input_left_shift,
op_data.softmax_params.diff_min,
tflite::micro::GetTensorData<int16_t>(output));
}
return kTfLiteOk;
}
case kTfLiteInt16: {
SoftmaxQuantized(input, output, data);
const cmsis_nn_softmax_lut_s16 softmax_params = {
.exp_lut = op_data.softmax_params.exp_lut,
.one_by_one_lut = op_data.softmax_params.one_over_one_plus_x_lut};
TFLITE_DCHECK_EQ(
arm_softmax_s16(
tflite::micro::GetTensorData<int16_t>(input), op_data.num_rows,
op_data.row_size, op_data.softmax_params.input_multiplier,
op_data.softmax_params.input_left_shift, &softmax_params,
tflite::micro::GetTensorData<int16_t>(output)),
ARM_CMSIS_NN_SUCCESS);
return kTfLiteOk;
}
default:
......@@ -89,10 +130,79 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
}
}
TfLiteStatus SoftmaxEvalInt8(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
TFLITE_DCHECK(node->user_data != nullptr);
const CMSISNNSoftmaxParams op_data =
*static_cast<const CMSISNNSoftmaxParams*>(node->user_data);
arm_softmax_s8(tflite::micro::GetTensorData<int8_t>(input), op_data.num_rows,
op_data.row_size, op_data.softmax_params.input_multiplier,
op_data.softmax_params.input_left_shift,
op_data.softmax_params.diff_min,
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
TfLiteStatus SoftmaxEvalInt8_Int16(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
TFLITE_DCHECK(node->user_data != nullptr);
const CMSISNNSoftmaxParams op_data =
*static_cast<const CMSISNNSoftmaxParams*>(node->user_data);
arm_softmax_s8_s16(
tflite::micro::GetTensorData<int8_t>(input), op_data.num_rows,
op_data.row_size, op_data.softmax_params.input_multiplier,
op_data.softmax_params.input_left_shift, op_data.softmax_params.diff_min,
tflite::micro::GetTensorData<int16_t>(output));
return kTfLiteOk;
}
TfLiteStatus SoftmaxEvalInt16(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
TFLITE_DCHECK(node->user_data != nullptr);
const CMSISNNSoftmaxParams op_data =
*static_cast<const CMSISNNSoftmaxParams*>(node->user_data);
const cmsis_nn_softmax_lut_s16 softmax_params = {
.exp_lut = op_data.softmax_params.exp_lut,
.one_by_one_lut = op_data.softmax_params.one_over_one_plus_x_lut};
TFLITE_DCHECK_EQ(
arm_softmax_s16(tflite::micro::GetTensorData<int16_t>(input),
op_data.num_rows, op_data.row_size,
op_data.softmax_params.input_multiplier,
op_data.softmax_params.input_left_shift, &softmax_params,
tflite::micro::GetTensorData<int16_t>(output)),
ARM_CMSIS_NN_SUCCESS);
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_SOFTMAX() {
return tflite::micro::RegisterOp(SoftmaxInit, SoftmaxPrepare, SoftmaxEval);
return tflite::micro::RegisterOp(Init, Prepare, SoftmaxEval);
}
TfLiteRegistration Register_SOFTMAX_INT8() {
return tflite::micro::RegisterOp(Init, Prepare, SoftmaxEvalInt8);
}
TfLiteRegistration Register_SOFTMAX_INT8_INT16() {
return tflite::micro::RegisterOp(Init, Prepare, SoftmaxEvalInt8_Int16);
}
TfLiteRegistration Register_SOFTMAX_INT16() {
return tflite::micro::RegisterOp(Init, Prepare, SoftmaxEvalInt16);
}
} // namespace tflite
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
......@@ -23,6 +23,13 @@ namespace tflite {
void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length);
// Common helper function to SoftmaxPrepare.
TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
const TfLiteTensor* input,
TfLiteTensor* output,
const TfLiteSoftmaxParams* params,
SoftmaxParams* op_data);
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node);
// This is the most generic TfLiteRegistration. The actual supported types may
......@@ -30,7 +37,7 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node);
// (reference or optimized) must define this function.
TfLiteRegistration Register_SOFTMAX();
#if defined(XTENSA)
#if defined(XTENSA) || defined(CMSIS_NN)
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int8 input and int16 output.
TfLiteRegistration Register_SOFTMAX_INT8_INT16();
......@@ -40,6 +47,23 @@ inline TfLiteRegistration Register_SOFTMAX_INT8_INT16() {
}
#endif
#if defined(CMSIS_NN)
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int8 input/output and uses the latency optimized implementations.
TfLiteRegistration Register_SOFTMAX_INT8();
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int16 input/output and uses the latency optimized implementations.
TfLiteRegistration Register_SOFTMAX_INT16();
#else
inline TfLiteRegistration Register_SOFTMAX_INT8() { return Register_SOFTMAX(); }
inline TfLiteRegistration Register_SOFTMAX_INT16() {
return Register_SOFTMAX();
}
#endif
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
......@@ -28,11 +28,59 @@ namespace {
// Softmax parameter data that persists in user_data
const int kInt16LUTArraySize = 513;
TfLiteStatus InitializeLutForInt16(TfLiteContext* context,
const TfLiteTensor* input,
TfLiteTensor* output,
SoftmaxParams* op_data) {
// Only allocate LUTs for KTfLiteInt16 data type
if (input->type == kTfLiteInt16) {
void* raw_exp_lut = context->AllocatePersistentBuffer(
context, sizeof(int16_t) * kInt16LUTArraySize);
TF_LITE_ENSURE(context, raw_exp_lut != nullptr);
op_data->exp_lut = reinterpret_cast<int16_t*>(raw_exp_lut);
void* one_over_one_plus_x_lut = context->AllocatePersistentBuffer(
context, sizeof(int16_t) * kInt16LUTArraySize);
TF_LITE_ENSURE(context, one_over_one_plus_x_lut != nullptr);
op_data->one_over_one_plus_x_lut =
reinterpret_cast<int16_t*>(one_over_one_plus_x_lut);
}
if (output->type == kTfLiteInt16) {
TF_LITE_ENSURE(context,
input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
} else {
TF_LITE_ENSURE_EQ(context, input->type, output->type);
}
// Populate LUT if required
if (input->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
// exp LUT only used on negative values
// we consider exp(-10.0) is insignificant to accumulation
gen_lut<float, int16_t, int16_t>(
[](float value) { return std::exp(value); }, -10.0f, 0.0f, -1.0f, 1.0f,
op_data->exp_lut);
gen_lut<float, int16_t, int16_t>(
[](float value) { return 1.0f / (1.0f + value); }, 0.0f, 1.0f, -1.0f,
1.0f, op_data->one_over_one_plus_x_lut);
op_data->zero_point = output->params.zero_point;
op_data->scale = output->params.scale;
}
return kTfLiteOk;
}
} // namespace
TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
const TfLiteTensor* input,
TfLiteTensor* output,
const TfLiteSoftmaxParams* params,
SoftmaxParams* op_data) {
if (InitializeLutForInt16(context, input, output, op_data) != kTfLiteOk) {
return kTfLiteError;
}
if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
if (input->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
......@@ -83,8 +131,6 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
return kTfLiteOk;
}
} // namespace
void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams));
......@@ -103,40 +149,6 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, node->user_data != nullptr);
SoftmaxParams* op_data = static_cast<SoftmaxParams*>(node->user_data);
// Only allocate LUTs for KTfLiteInt16 data type
if (input->type == kTfLiteInt16) {
void* raw_exp_lut = context->AllocatePersistentBuffer(
context, sizeof(int16_t) * kInt16LUTArraySize);
TF_LITE_ENSURE(context, raw_exp_lut != nullptr);
op_data->exp_lut = reinterpret_cast<int16_t*>(raw_exp_lut);
void* one_over_one_plus_x_lut = context->AllocatePersistentBuffer(
context, sizeof(int16_t) * kInt16LUTArraySize);
TF_LITE_ENSURE(context, one_over_one_plus_x_lut != nullptr);
op_data->one_over_one_plus_x_lut =
reinterpret_cast<int16_t*>(one_over_one_plus_x_lut);
}
if (output->type == kTfLiteInt16) {
TF_LITE_ENSURE(context,
input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
} else {
TF_LITE_ENSURE_EQ(context, input->type, output->type);
}
// Populate LUT if required
if (input->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
// exp LUT only used on negative values
// we consider exp(-10.0) is insignificant to accumulation
gen_lut<float, int16_t, int16_t>(
[](float value) { return std::exp(value); }, -10.0f, 0.0f, -1.0f, 1.0f,
op_data->exp_lut);
gen_lut<float, int16_t, int16_t>(
[](float value) { return 1.0f / (1.0f + value); }, 0.0f, 1.0f, -1.0f,
1.0f, op_data->one_over_one_plus_x_lut);
op_data->zero_point = output->params.zero_point;
op_data->scale = output->params.scale;
}
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
auto ret_val =
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册