提交 8beb1b0f 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!3799 conv1x1 & deconv change

Merge pull request !3799 from ling/conv1x1
...@@ -165,8 +165,7 @@ OpParameter *PopulateFullconnectionParameter(const lite::Primitive *primitive) { ...@@ -165,8 +165,7 @@ OpParameter *PopulateFullconnectionParameter(const lite::Primitive *primitive) {
matmul_param->b_transpose_ = true; matmul_param->b_transpose_ = true;
matmul_param->a_transpose_ = false; matmul_param->a_transpose_ = false;
matmul_param->has_bias_ = param->hasBias(); matmul_param->has_bias_ = param->hasBias();
matmul_param->minf_ = -FLT_MAX; matmul_param->act_type_ = ActType_No;
matmul_param->maxf_ = FLT_MAX;
return reinterpret_cast<OpParameter *>(matmul_param); return reinterpret_cast<OpParameter *>(matmul_param);
} }
...@@ -181,8 +180,7 @@ OpParameter *PopulateMatMulParameter(const lite::Primitive *primitive) { ...@@ -181,8 +180,7 @@ OpParameter *PopulateMatMulParameter(const lite::Primitive *primitive) {
matmul_param->b_transpose_ = param->transposeB(); matmul_param->b_transpose_ = param->transposeB();
matmul_param->a_transpose_ = param->transposeA(); matmul_param->a_transpose_ = param->transposeA();
matmul_param->has_bias_ = false; matmul_param->has_bias_ = false;
matmul_param->minf_ = -FLT_MAX; matmul_param->act_type_ = ActType_No;
matmul_param->maxf_ = FLT_MAX;
return reinterpret_cast<OpParameter *>(matmul_param); return reinterpret_cast<OpParameter *>(matmul_param);
} }
......
...@@ -146,28 +146,10 @@ int ConvolutionBaseCPUKernel::SetQuantParam() { ...@@ -146,28 +146,10 @@ int ConvolutionBaseCPUKernel::SetQuantParam() {
QuantizeRoundParameter(real_multiplier, &conv_quant_arg_->quant_multiplier_[0], &conv_quant_arg_->left_shift_[0], QuantizeRoundParameter(real_multiplier, &conv_quant_arg_->quant_multiplier_[0], &conv_quant_arg_->left_shift_[0],
&conv_quant_arg_->right_shift_[0]); &conv_quant_arg_->right_shift_[0]);
ComputeQuantOutRange(conv_param_); CalculateActivationRangeQuantized(
conv_param_->is_relu_, conv_param_->is_relu6_, conv_param_->conv_quant_arg_.quant_args_[2][0].zp_,
conv_param_->conv_quant_arg_.quant_args_[2][0].scale_, &conv_param_->conv_quant_arg_.out_act_min_[0],
&conv_param_->conv_quant_arg_.out_act_max_[0]);
return RET_OK; return RET_OK;
} }
void ComputeQuantOutRange(ConvParameter *conv_param) {
int32_t min = std::numeric_limits<int8_t>::min();
int32_t max = std::numeric_limits<int8_t>::max();
float scale = conv_param->conv_quant_arg_.quant_args_[2][0].scale_;
int32_t zp = conv_param->conv_quant_arg_.quant_args_[2][0].zp_;
bool is_relu = conv_param->is_relu_;
bool is_relu6 = conv_param->is_relu6_;
int32_t quantized_zero = QuantizeToInt8(0, scale, zp);
int32_t quantized_six = QuantizeToInt8(6, scale, zp);
if (is_relu) {
min = min > quantized_zero ? min : quantized_zero;
} else if (is_relu6) {
min = min > quantized_zero ? min : quantized_zero;
max = max < quantized_six ? max : quantized_six;
} else {
// do nothing
}
conv_param->conv_quant_arg_.out_act_min_[0] = min;
conv_param->conv_quant_arg_.out_act_max_[0] = max;
}
} // namespace mindspore::kernel } // namespace mindspore::kernel
...@@ -60,7 +60,6 @@ class ConvolutionBaseCPUKernel : public LiteKernel { ...@@ -60,7 +60,6 @@ class ConvolutionBaseCPUKernel : public LiteKernel {
ConvParameter *conv_param_; ConvParameter *conv_param_;
LayoutConvertor convert_func_; LayoutConvertor convert_func_;
}; };
void ComputeQuantOutRange(ConvParameter *conv_param);
bool CheckSupportFP16(); bool CheckSupportFP16();
} // namespace mindspore::kernel } // namespace mindspore::kernel
......
...@@ -23,62 +23,71 @@ using mindspore::lite::RET_OK; ...@@ -23,62 +23,71 @@ using mindspore::lite::RET_OK;
namespace mindspore::kernel { namespace mindspore::kernel {
Convolution1x1CPUKernel::~Convolution1x1CPUKernel() { Convolution1x1CPUKernel::~Convolution1x1CPUKernel() {
if (c4_output_ != nullptr) { if (weight_ptr_ != nullptr) {
free(c4_output_); free(weight_ptr_);
c4_output_ = nullptr; weight_ptr_ = nullptr;
} }
if (c4_input_ != nullptr) { if (pack_input_ != nullptr) {
free(c4_input_); free(pack_input_);
c4_input_ = nullptr; pack_input_ = nullptr;
} }
if (pre_trans_input_) { if (pack_output_ != nullptr) {
free(pack_output_);
pack_output_ = nullptr;
}
if (pre_trans_input_ && input_ptr_ != nullptr) {
free(input_ptr_); free(input_ptr_);
input_ptr_ = nullptr; input_ptr_ = nullptr;
} }
if (tmp_ptr_ != nullptr) {
free(tmp_ptr_);
tmp_ptr_ = nullptr;
}
if (weight_ptr_ != nullptr) {
free(weight_ptr_);
weight_ptr_ = nullptr;
}
delete matmul_param_; delete matmul_param_;
} }
int Convolution1x1CPUKernel::ReSize() { return RET_OK; } int Convolution1x1CPUKernel::ReSize() {
if (pack_input_ != nullptr) {
free(pack_input_);
pack_input_ = nullptr;
}
if (pre_trans_input_ && input_ptr_ != nullptr) {
free(input_ptr_);
input_ptr_ = nullptr;
}
InitConv1x1MatmulParam();
InitConv1x1Param();
return RET_OK;
}
void Convolution1x1CPUKernel::InitConv1x1MatmulParam() { void Convolution1x1CPUKernel::InitConv1x1MatmulParam() {
matmul_param_ = new StrassenMatMulParameter();
matmul_param_->row_ = conv_param_->output_h_ * conv_param_->output_w_; matmul_param_->row_ = conv_param_->output_h_ * conv_param_->output_w_;
matmul_param_->col_ = UP_DIV(conv_param_->output_channel_, FP32_STRASSEN_UINT); matmul_param_->col_ = conv_param_->output_channel_;
matmul_param_->deep_ = UP_DIV(conv_param_->input_channel_, FP32_STRASSEN_UINT); matmul_param_->deep_ = conv_param_->input_channel_;
matmul_param_->a_stride_ = matmul_param_->row_ * FP32_STRASSEN_UINT; matmul_param_->row_8_ = UP_ROUND(matmul_param_->row_, C8NUM);
matmul_param_->b_stride_ = matmul_param_->deep_ * FP32_STRASSEN_WEIGHT_UINT; matmul_param_->col_8_ = UP_ROUND(matmul_param_->col_, C8NUM);
matmul_param_->c_stride_ = matmul_param_->row_ * FP32_STRASSEN_UINT; matmul_param_->act_type_ = (conv_param_->is_relu6_) ? ActType_Relu6 : ActType_No;
matmul_param_->act_type_ = (conv_param_->is_relu_) ? ActType_Relu : matmul_param_->act_type_;
return;
} }
int Convolution1x1CPUKernel::InitConv1x1BiasWeight() { int Convolution1x1CPUKernel::InitConv1x1BiasWeight() {
if (inputs_.size() == 3) { if (inputs_.size() == 3) {
bias_data_ = malloc(matmul_param_->col_ * C4NUM * sizeof(float)); bias_data_ = malloc(matmul_param_->col_8_ * sizeof(float));
if (bias_data_ == nullptr) { if (bias_data_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!"; MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!";
return RET_ERROR; return RET_ERROR;
} }
memset(bias_data_, 0, matmul_param_->col_ * C4NUM * sizeof(float)); memset(bias_data_, 0, matmul_param_->col_8_ * sizeof(float));
memcpy(bias_data_, inputs_[2]->Data(), conv_param_->output_channel_ * sizeof(float)); memcpy(bias_data_, inputs_[2]->Data(), conv_param_->output_channel_ * sizeof(float));
} else { } else {
bias_data_ = nullptr; bias_data_ = nullptr;
} }
weight_ptr_ = reinterpret_cast<float *>( weight_ptr_ = reinterpret_cast<float *>(malloc(matmul_param_->row_8_ * matmul_param_->col_8_ * sizeof(float)));
malloc(matmul_param_->col_ * matmul_param_->deep_ * FP32_STRASSEN_WEIGHT_UINT * sizeof(float)));
if (weight_ptr_ == nullptr) { if (weight_ptr_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc weight_ptr_ error!"; MS_LOG(ERROR) << "Conv1x1 Malloc weight_ptr_ error!";
return RET_ERROR; return RET_ERROR;
} }
memset(weight_ptr_, 0, matmul_param_->col_ * matmul_param_->deep_ * FP32_STRASSEN_WEIGHT_UINT * sizeof(float)); memset(weight_ptr_, 0, matmul_param_->row_8_ * matmul_param_->col_8_ * sizeof(float));
Pack1x1WeightFp32(reinterpret_cast<float *>(inputs_[1]->Data()), weight_ptr_, conv_param_); RowMajor2Col8Major(reinterpret_cast<float *>(inputs_[1]->Data()), weight_ptr_, matmul_param_->col_,
matmul_param_->deep_);
return RET_OK; return RET_OK;
} }
...@@ -86,52 +95,43 @@ int Convolution1x1CPUKernel::InitConv1x1Param() { ...@@ -86,52 +95,43 @@ int Convolution1x1CPUKernel::InitConv1x1Param() {
pre_trans_input_ = (conv_param_->pad_h_ != 0 || conv_param_->pad_w_ != 0 || conv_param_->stride_h_ != 1 || pre_trans_input_ = (conv_param_->pad_h_ != 0 || conv_param_->pad_w_ != 0 || conv_param_->stride_h_ != 1 ||
conv_param_->stride_w_ != 1); conv_param_->stride_w_ != 1);
if (pre_trans_input_) { if (pre_trans_input_) {
input_ptr_ = reinterpret_cast<float *>(malloc(matmul_param_->a_stride_ * matmul_param_->deep_ * sizeof(float))); input_ptr_ = reinterpret_cast<float *>(malloc(matmul_param_->row_ * matmul_param_->deep_ * sizeof(float)));
if (input_ptr_ == nullptr) { if (input_ptr_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc input_ptr_ error!"; MS_LOG(ERROR) << "Conv1x1 Malloc input_ptr_ error!";
return RET_MEMORY_FAILED; return RET_MEMORY_FAILED;
} }
memset(input_ptr_, 0, matmul_param_->a_stride_ * matmul_param_->deep_ * sizeof(float)); memset(input_ptr_, 0, matmul_param_->row_ * matmul_param_->deep_ * sizeof(float));
} }
thread_hw_count_ = MSMIN(opParameter->thread_num_, matmul_param_->row_); thread_count_ = MSMIN(opParameter->thread_num_, UP_DIV(matmul_param_->col_, C8NUM));
thread_hw_stride_ = UP_DIV(matmul_param_->row_, thread_hw_count_); thread_stride_ = UP_DIV(UP_DIV(matmul_param_->col_, C8NUM), thread_count_) * C8NUM;
thread_oc4_count_ = MSMIN(opParameter->thread_num_, matmul_param_->col_);
thread_oc_stride_ = UP_DIV(matmul_param_->col_, thread_oc4_count_) * C4NUM;
tmp_ptr_ = reinterpret_cast<float *>(malloc(matmul_param_->a_stride_ * matmul_param_->deep_ * sizeof(float))); pack_input_ = reinterpret_cast<float *>(malloc(matmul_param_->row_8_ * matmul_param_->deep_ * sizeof(float)));
if (tmp_ptr_ == nullptr) { if (pack_input_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc tmp_ptr_ error!"; MS_LOG(ERROR) << "Conv1x1 Malloc pack_input_ error!";
return RET_MEMORY_FAILED;
}
c4_output_ =
reinterpret_cast<float *>(malloc(outputs_[0]->ElementsC4Num() / conv_param_->output_batch_ * sizeof(float)));
if (c4_output_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc c4_output_ error!";
return RET_MEMORY_FAILED; return RET_MEMORY_FAILED;
} }
memset(pack_input_, 0, matmul_param_->row_8_ * matmul_param_->deep_ * sizeof(float));
c4_input_ = pack_output_ = reinterpret_cast<float *>(malloc(matmul_param_->row_8_ * matmul_param_->col_8_ * sizeof(float)));
reinterpret_cast<float *>(malloc(inputs_[0]->ElementsC4Num() / conv_param_->input_batch_ * sizeof(float))); if (pack_output_ == nullptr) {
if (c4_input_ == nullptr) { MS_LOG(ERROR) << "Conv1x1 Malloc pack_output_ error!";
MS_LOG(ERROR) << "Conv1x1 Malloc c4_input_ error!";
return RET_MEMORY_FAILED; return RET_MEMORY_FAILED;
} }
memset(pack_output_, 0, matmul_param_->row_8_ * matmul_param_->col_8_ * sizeof(float));
return RET_OK; return RET_OK;
} }
void Convolution1x1CPUKernel::Pre1x1Trans(float *src_input, float *src_output) { void Convolution1x1CPUKernel::Pre1x1Trans(float *src_input, float *src_output) {
output_ptr_ = src_output; output_ptr_ = src_output;
PackNHWCToNC4HW4Fp32(src_input, c4_input_, 1, conv_param_->input_h_ * conv_param_->input_w_,
conv_param_->input_channel_);
if (!pre_trans_input_) { if (pre_trans_input_) {
input_ptr_ = c4_input_; Conv1x1InputPackFp32(src_input, input_ptr_, conv_param_);
return; } else {
input_ptr_ = src_input;
} }
Conv1x1InputPackFp32(c4_input_, input_ptr_, conv_param_); RowMajor2Col8Major(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
return; return;
} }
...@@ -152,53 +152,26 @@ int Convolution1x1CPUKernel::Init() { ...@@ -152,53 +152,26 @@ int Convolution1x1CPUKernel::Init() {
return RET_OK; return RET_OK;
} }
int Convolution1x1CPUKernel::DoStrassen(int task_id) { int Convolution1x1CPUKernel::DoConv1x1(int task_id) {
matmul_param_->row_ = MSMIN(thread_hw_stride_, matmul_param_->row_ - task_id * thread_hw_stride_); int cur_oc = MSMIN(thread_stride_, matmul_param_->col_8_ - task_id * thread_stride_);
if (matmul_param_->row_ <= 0) {
return RET_OK;
}
auto error_code = Conv1x1Fp32(input_ptr_ + task_id * thread_hw_stride_ * C4NUM, weight_ptr_,
c4_output_ + task_id * thread_hw_stride_ * C4NUM,
tmp_ptr_ + task_id * thread_hw_stride_ * matmul_param_->deep_ * C4NUM, *matmul_param_);
if (error_code != 0) {
MS_LOG(ERROR) << "DoStrassen error task_id[" << task_id << "] error_code[" << error_code << "]";
return RET_ERROR;
}
matmul_param_->row_ = conv_param_->output_h_ * conv_param_->output_w_;
return RET_OK;
}
int Convolution1x1StrassenRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
auto conv1x1 = reinterpret_cast<Convolution1x1CPUKernel *>(cdata);
auto error_code = conv1x1->DoStrassen(task_id);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Convolution1x1StrassenRun error task_id[" << task_id << "] error_code[" << error_code << "]";
return RET_ERROR;
}
return RET_OK;
}
int Convolution1x1CPUKernel::DoPostFunc(int task_id) {
int cur_oc = MSMIN(thread_oc_stride_, conv_param_->output_channel_ - task_id * thread_oc_stride_);
if (cur_oc <= 0) { if (cur_oc <= 0) {
return RET_OK; return RET_OK;
} }
float *cur_bias = auto bias = (bias_data_ == nullptr) ? nullptr : reinterpret_cast<float *>(bias_data_) + thread_stride_ * task_id;
(bias_data_ == nullptr) ? nullptr : reinterpret_cast<float *>(bias_data_) + task_id * thread_oc_stride_;
MatMul(pack_input_, weight_ptr_ + task_id * thread_stride_ * matmul_param_->deep_,
pack_output_ + task_id * thread_stride_ * matmul_param_->row_8_, bias, matmul_param_->act_type_,
matmul_param_->deep_, matmul_param_->row_8_, cur_oc);
PostConvFuncFp32(c4_output_ + matmul_param_->row_ * thread_oc_stride_ * task_id,
output_ptr_ + task_id * thread_oc_stride_, cur_bias, cur_oc, matmul_param_->row_,
conv_param_->output_channel_, conv_param_->is_relu_, conv_param_->is_relu6_);
return RET_OK; return RET_OK;
} }
int Convolution1x1PostFuncRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { int Convolution1x1Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
auto conv1x1 = reinterpret_cast<Convolution1x1CPUKernel *>(cdata); auto conv1x1 = reinterpret_cast<Convolution1x1CPUKernel *>(cdata);
auto error_code = conv1x1->DoPostFunc(task_id); auto error_code = conv1x1->DoConv1x1(task_id);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "Convolution1x1PostFuncRun error task_id[" << task_id << "] error_code[" << error_code << "]"; MS_LOG(ERROR) << "Convolution1x1Run error task_id[" << task_id << "] error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;
} }
return RET_OK; return RET_OK;
...@@ -209,20 +182,16 @@ int Convolution1x1CPUKernel::Run() { ...@@ -209,20 +182,16 @@ int Convolution1x1CPUKernel::Run() {
auto src_out = reinterpret_cast<float *>(outputs_[0]->Data()); auto src_out = reinterpret_cast<float *>(outputs_[0]->Data());
for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
Pre1x1Trans(src_in + batch_index * matmul_param_->deep_ * matmul_param_->a_stride_, Pre1x1Trans(src_in + batch_index * conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_channel_,
src_out + batch_index * matmul_param_->col_ * matmul_param_->c_stride_); src_out + batch_index * matmul_param_->row_ * matmul_param_->col_);
int error_code = LiteBackendParallelLaunch(Convolution1x1StrassenRun, this, thread_hw_count_); int error_code = LiteBackendParallelLaunch(Convolution1x1Run, this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "conv1x1 strassen error error_code[" << error_code << "]"; MS_LOG(ERROR) << "conv1x1 strassen error error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;
} }
error_code = LiteBackendParallelLaunch(Convolution1x1PostFuncRun, this, thread_oc4_count_); Row8x8Major2RowMajor(pack_output_, output_ptr_, matmul_param_->row_, matmul_param_->col_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "conv1x1 post function error error_code[" << error_code << "]";
return RET_ERROR;
}
} }
return RET_OK; return RET_OK;
} }
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_1X1_H_ #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_1X1_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_1X1_H_ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_1X1_H_
#include <float.h>
#include <vector> #include <vector>
#include "src/lite_kernel.h" #include "src/lite_kernel.h"
#include "include/errorcode.h" #include "include/errorcode.h"
...@@ -26,21 +27,24 @@ ...@@ -26,21 +27,24 @@
#include "src/runtime/kernel/arm/base/layout_transform.h" #include "src/runtime/kernel/arm/base/layout_transform.h"
#include "src/runtime/kernel/arm/opclib/fp32/conv.h" #include "src/runtime/kernel/arm/opclib/fp32/conv.h"
#include "src/runtime/kernel/arm/opclib/fp32/common_func.h" #include "src/runtime/kernel/arm/opclib/fp32/common_func.h"
#include "src/runtime/kernel/arm/opclib/matmul.h"
#include "src/runtime/kernel/arm/opclib/fp32/matmul.h"
namespace mindspore::kernel { namespace mindspore::kernel {
class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel { class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel {
public: public:
Convolution1x1CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, Convolution1x1CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {
matmul_param_ = new MatMulParameter();
}
~Convolution1x1CPUKernel(); ~Convolution1x1CPUKernel();
int Init() override; int Init() override;
int Run() override; int Run() override;
int ReSize() override; int ReSize() override;
public: public:
int DoStrassen(int task_id); int DoConv1x1(int task_id);
int DoPostFunc(int task_id);
private: private:
int InitConv1x1Param(); int InitConv1x1Param();
...@@ -49,20 +53,15 @@ class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel { ...@@ -49,20 +53,15 @@ class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel {
void Pre1x1Trans(float *src_input, float *src_output); void Pre1x1Trans(float *src_input, float *src_output);
private: private:
StrassenMatMulParameter *matmul_param_ = nullptr; MatMulParameter *matmul_param_ = nullptr;
bool pre_trans_input_ = false; bool pre_trans_input_ = false;
int thread_count_ = 0; int thread_count_ = 0;
int thread_hw_count_ = 0; int thread_stride_ = 0;
int thread_hw_stride_ = 0;
int thread_oc4_count_ = 0;
int thread_oc_stride_ = 0;
float *weight_ptr_ = nullptr; float *weight_ptr_ = nullptr;
float *tmp_ptr_ = nullptr; float *pack_input_ = nullptr;
float *c4_input_ = nullptr; float *pack_output_ = nullptr;
float *c4_output_ = nullptr;
float *input_ptr_ = nullptr; float *input_ptr_ = nullptr;
float *output_ptr_ = nullptr; float *output_ptr_ = nullptr;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_1X1_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_1X1_H_
...@@ -30,27 +30,38 @@ DeConvolutionCPUKernel::~DeConvolutionCPUKernel() { ...@@ -30,27 +30,38 @@ DeConvolutionCPUKernel::~DeConvolutionCPUKernel() {
free(weight_ptr_); free(weight_ptr_);
weight_ptr_ = nullptr; weight_ptr_ = nullptr;
} }
if (tmp_output_ != nullptr) {
free(tmp_output_);
tmp_output_ = nullptr;
}
if (tmp_buffer_ != nullptr) { if (tmp_buffer_ != nullptr) {
free(tmp_buffer_); free(tmp_buffer_);
tmp_buffer_ = nullptr; tmp_buffer_ = nullptr;
} }
if (c4_input_ != nullptr) { if (pack_input_ != nullptr) {
free(c4_input_); free(pack_input_);
c4_input_ = nullptr; pack_input_ = nullptr;
} }
if (c4_output_ != nullptr) { if (pack_output_ != nullptr) {
free(c4_output_); free(pack_output_);
c4_output_ = nullptr; pack_output_ = nullptr;
} }
return; return;
} }
int DeConvolutionCPUKernel::ReSize() { return 0; } int DeConvolutionCPUKernel::ReSize() {
if (tmp_buffer_ != nullptr) {
free(tmp_buffer_);
tmp_buffer_ = nullptr;
}
if (pack_input_ != nullptr) {
free(pack_input_);
pack_input_ = nullptr;
}
if (pack_output_ != nullptr) {
free(pack_output_);
pack_output_ = nullptr;
}
InitParam();
return RET_OK;
}
int DeConvolutionCPUKernel::InitWeightBias() { int DeConvolutionCPUKernel::InitWeightBias() {
if (inputs_.size() == 3) { if (inputs_.size() == 3) {
...@@ -65,60 +76,50 @@ int DeConvolutionCPUKernel::InitWeightBias() { ...@@ -65,60 +76,50 @@ int DeConvolutionCPUKernel::InitWeightBias() {
bias_data_ = nullptr; bias_data_ = nullptr;
} }
size_t weight_pack_size = conv_param_->kernel_w_ * conv_param_->kernel_h_ * size_t weight_pack_size = conv_param_->input_channel_ * conv_param_->kernel_w_ * conv_param_->kernel_h_ *
UP_ROUND(conv_param_->output_channel_, C4NUM) * UP_ROUND(conv_param_->output_channel_, C8NUM) * sizeof(float);
UP_ROUND(conv_param_->input_channel_, C4NUM) * sizeof(float);
weight_ptr_ = reinterpret_cast<float *>(malloc(weight_pack_size)); weight_ptr_ = reinterpret_cast<float *>(malloc(weight_pack_size));
if (weight_ptr_ == nullptr) { if (weight_ptr_ == nullptr) {
MS_LOG(ERROR) << "deconv malloc weight_ptr_ error!"; MS_LOG(ERROR) << "deconv malloc weight_ptr_ error!";
return RET_ERROR; return RET_ERROR;
} }
memset(weight_ptr_, 0, weight_pack_size); memset(weight_ptr_, 0, weight_pack_size);
PackDeConvWeightFp32(reinterpret_cast<float *>(inputs_[1]->Data()), weight_ptr_, conv_param_->input_channel_, PackNHWCToC8HWN8Fp32(reinterpret_cast<float *>(inputs_[1]->Data()), weight_ptr_, conv_param_->input_channel_,
conv_param_->output_channel_, conv_param_->kernel_w_ * conv_param_->kernel_h_); kernel_plane_, conv_param_->output_channel_);
return RET_OK; return RET_OK;
} }
int DeConvolutionCPUKernel::InitParam() { int DeConvolutionCPUKernel::InitParam() {
matmul_param_ = new StrassenMatMulParameter(); input_plane_ = conv_param_->input_h_ * conv_param_->input_w_;
matmul_param_->row_ = conv_param_->input_h_ * conv_param_->input_w_; kernel_plane_ = conv_param_->kernel_w_ * conv_param_->kernel_h_;
matmul_param_->deep_ = UP_DIV(conv_param_->input_channel_, C4NUM); output_plane_ = conv_param_->output_h_ * conv_param_->output_w_;
matmul_param_->col_ = UP_DIV(conv_param_->output_channel_, 4) * conv_param_->kernel_w_ * conv_param_->kernel_h_;
matmul_param_->a_stride_ = matmul_param_->row_ * C4NUM; matmul_param_->row_ = input_plane_;
matmul_param_->b_stride_ = matmul_param_->deep_ * C4NUM * C4NUM; matmul_param_->deep_ = conv_param_->input_channel_;
matmul_param_->c_stride_ = matmul_param_->row_ * C4NUM; matmul_param_->col_ = conv_param_->output_channel_ * kernel_plane_;
matmul_param_->row_8_ = UP_ROUND(matmul_param_->row_, C8NUM);
thread_hw_count_ = MSMIN(opParameter->thread_num_, matmul_param_->row_); matmul_param_->col_8_ = UP_ROUND(conv_param_->output_channel_, C8NUM) * kernel_plane_;
thread_hw_stride_ = UP_DIV(matmul_param_->row_, thread_hw_count_);
thread_count_ = MSMIN(opParameter->thread_num_, UP_DIV(conv_param_->output_channel_, C8NUM));
thread_co4_count_ = MSMIN(opParameter->thread_num_, UP_DIV(conv_param_->output_channel_, C4NUM)); thread_stride_ = UP_DIV(UP_DIV(conv_param_->output_channel_, C8NUM), thread_count_);
thread_co_stride_ = UP_DIV(UP_DIV(conv_param_->output_channel_, C4NUM), thread_co4_count_) * C4NUM;
pack_input_ = reinterpret_cast<float *>(malloc(matmul_param_->row_8_ * matmul_param_->deep_ * sizeof(float)));
tmp_buffer_ = if (pack_input_ == nullptr) {
reinterpret_cast<float *>(malloc(matmul_param_->a_stride_ * matmul_param_->deep_ * C4NUM * sizeof(float))); MS_LOG(ERROR) << "deconv Malloc pack_input_ error!";
if (tmp_buffer_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc tmp_buffer_ error!";
return RET_ERROR; return RET_ERROR;
} }
tmp_output_ = reinterpret_cast<float *>(malloc(matmul_param_->row_ * matmul_param_->col_ * C4NUM * sizeof(float))); pack_output_ =
if (tmp_output_ == nullptr) { reinterpret_cast<float *>(malloc(UP_ROUND(conv_param_->output_channel_, C8NUM) * output_plane_ * sizeof(float)));
MS_LOG(ERROR) << "Conv1x1 Malloc tmp_output_ error!"; if (pack_output_ == nullptr) {
return RET_ERROR; MS_LOG(ERROR) << "deconv Malloc pack_output_ error!";
}
c4_input_ =
reinterpret_cast<float *>(malloc(inputs_[0]->ElementsC4Num() / conv_param_->input_batch_ * sizeof(float)));
if (c4_input_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc c4_input_ error!";
return RET_NULL_PTR; return RET_NULL_PTR;
} }
c4_output_ = tmp_buffer_ = reinterpret_cast<float *>(malloc(matmul_param_->row_8_ * matmul_param_->col_8_ * sizeof(float)));
reinterpret_cast<float *>(malloc(outputs_[0]->ElementsC4Num() / conv_param_->output_batch_ * sizeof(float))); if (tmp_buffer_ == nullptr) {
if (c4_output_ == nullptr) { MS_LOG(ERROR) << "Conv1x1 Malloc tmp_buffer_ error!";
MS_LOG(ERROR) << "Conv1x1 Malloc c4_output_ error!"; return RET_ERROR;
return RET_NULL_PTR;
} }
return RET_OK; return RET_OK;
} }
...@@ -132,6 +133,7 @@ int DeConvFp32Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { ...@@ -132,6 +133,7 @@ int DeConvFp32Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
} }
return RET_OK; return RET_OK;
} }
int DeConvFp32PostRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { int DeConvFp32PostRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
auto deconv = reinterpret_cast<DeConvolutionCPUKernel *>(cdata); auto deconv = reinterpret_cast<DeConvolutionCPUKernel *>(cdata);
auto error_code = deconv->DoPostFunc(task_id); auto error_code = deconv->DoPostFunc(task_id);
...@@ -141,51 +143,39 @@ int DeConvFp32PostRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { ...@@ -141,51 +143,39 @@ int DeConvFp32PostRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
} }
return RET_OK; return RET_OK;
} }
int DeConvolutionCPUKernel::DoDeconv(int task_id) { int DeConvolutionCPUKernel::DoDeconv(int task_id) {
matmul_param_->row_ = MSMIN(thread_hw_stride_, matmul_param_->row_ - task_id * thread_hw_stride_); int oc = MSMIN(thread_stride_, UP_DIV(conv_param_->output_channel_, C8NUM) - task_id * thread_stride_);
if (matmul_param_->row_ <= 0) { if (oc <= 0) {
return RET_OK; return RET_OK;
} }
int error_code = DeConvFp32(c4_input_ + task_id * thread_hw_stride_ * C4NUM, weight_ptr_, MatMul(pack_input_, weight_ptr_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_,
tmp_output_ + task_id * thread_hw_stride_ * C4NUM, tmp_buffer_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->row_8_, nullptr, ActType_No,
tmp_buffer_ + task_id * thread_hw_stride_ * matmul_param_->deep_ * C4NUM, *matmul_param_); matmul_param_->deep_, matmul_param_->row_8_, oc * C8NUM * kernel_plane_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "DeConvFp32 error! error code: " << error_code;
return error_code;
}
matmul_param_->row_ = conv_param_->input_h_ * conv_param_->input_w_;
return RET_OK; return RET_OK;
} }
int DeConvolutionCPUKernel::DoPostFunc(int task_id) { int DeConvolutionCPUKernel::DoPostFunc(int task_id) {
int input_plane = conv_param_->input_h_ * conv_param_->input_w_; int oc = MSMIN(thread_stride_ * C8NUM, conv_param_->output_channel_ - task_id * thread_stride_ * C8NUM);
int kernel_plane = conv_param_->kernel_w_ * conv_param_->kernel_h_; if (oc <= 0) {
int output_plane = conv_param_->output_h_ * conv_param_->output_w_;
int cur_oc = MSMIN(thread_co_stride_, conv_param_->output_channel_ - task_id * thread_co_stride_);
if (cur_oc <= 0) {
return RET_OK; return RET_OK;
} }
float *cur_bias = float *bias =
(bias_data_ == nullptr) ? nullptr : reinterpret_cast<float *>(bias_data_) + thread_co_stride_ * task_id; (bias_data_ == nullptr) ? nullptr : reinterpret_cast<float *>(bias_data_) + thread_stride_ * task_id * C8NUM;
DeConvPostFp32(tmp_output_ + thread_co_stride_ * task_id * input_plane * kernel_plane, DeConvPostFp32C8x8(tmp_buffer_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->row_8_,
c4_output_ + thread_co_stride_ * task_id * output_plane, output_ptr_ + thread_co_stride_ * task_id, pack_output_ + task_id * thread_stride_ * C8NUM * output_plane_, bias,
cur_bias, cur_oc, input_plane, kernel_plane, output_plane, conv_param_); output_ptr_ + task_id * thread_stride_ * C8NUM, oc, conv_param_);
return RET_OK; return RET_OK;
} }
int DeConvolutionCPUKernel::Init() { int DeConvolutionCPUKernel::Init() {
int error_code = ConvolutionBaseCPUKernel::Init(); ConvolutionBaseCPUKernel::Init();
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Conv base init error!";
return error_code;
}
error_code = InitParam(); int error_code = InitParam();
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv InitParam error!"; MS_LOG(ERROR) << "deconv InitParam error!";
return error_code; return error_code;
...@@ -204,20 +194,18 @@ int DeConvolutionCPUKernel::Run() { ...@@ -204,20 +194,18 @@ int DeConvolutionCPUKernel::Run() {
float *src_out = reinterpret_cast<float *>(outputs_[0]->Data()); float *src_out = reinterpret_cast<float *>(outputs_[0]->Data());
for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
input_ptr_ = src_in + batch_index * conv_param_->input_w_ * conv_param_->input_h_ * conv_param_->input_channel_; input_ptr_ = src_in + batch_index * input_plane_ * conv_param_->input_channel_;
output_ptr_ = output_ptr_ = src_out + batch_index * output_plane_ * conv_param_->output_channel_;
src_out + batch_index * conv_param_->output_h_ * conv_param_->output_w_ * conv_param_->output_channel_;
PackNHWCToNC4HW4Fp32(input_ptr_, c4_input_, 1, conv_param_->input_h_ * conv_param_->input_w_, RowMajor2Col8Major(input_ptr_, pack_input_, input_plane_, conv_param_->input_channel_);
conv_param_->input_channel_);
int error_code = LiteBackendParallelLaunch(DeConvFp32Run, this, thread_hw_count_); int error_code = LiteBackendParallelLaunch(DeConvFp32Run, this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]"; MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;
} }
error_code = LiteBackendParallelLaunch(DeConvFp32PostRun, this, thread_co4_count_); error_code = LiteBackendParallelLaunch(DeConvFp32PostRun, this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv fp32 postrun error! error_code[" << error_code << "]"; MS_LOG(ERROR) << "deconv fp32 postrun error! error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_H_ #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_H_ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_H_
#include <float.h>
#include <vector> #include <vector>
#include "src/lite_kernel.h" #include "src/lite_kernel.h"
#include "src/kernel_registry.h" #include "src/kernel_registry.h"
...@@ -24,13 +25,16 @@ ...@@ -24,13 +25,16 @@
#include "schema/model_generated.h" #include "schema/model_generated.h"
#include "src/runtime/kernel/arm/base/convolution_base.h" #include "src/runtime/kernel/arm/base/convolution_base.h"
#include "src/runtime/kernel/arm/opclib/fp32/deconv.h" #include "src/runtime/kernel/arm/opclib/fp32/deconv.h"
#include "src/runtime/kernel/arm/opclib/fp32/matmul.h"
namespace mindspore::kernel { namespace mindspore::kernel {
class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel { class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
public: public:
DeConvolutionCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, DeConvolutionCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {
matmul_param_ = new MatMulParameter();
}
~DeConvolutionCPUKernel() override; ~DeConvolutionCPUKernel() override;
int Init() override; int Init() override;
int Run() override; int Run() override;
...@@ -45,19 +49,18 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel { ...@@ -45,19 +49,18 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
int InitWeightBias(); int InitWeightBias();
private: private:
StrassenMatMulParameter *matmul_param_; MatMulParameter *matmul_param_;
int thread_hw_count_; int input_plane_;
int thread_hw_stride_; int kernel_plane_;
int thread_co4_count_; int output_plane_;
int thread_co_stride_; int thread_count_;
int thread_stride_;
float *weight_ptr_; float *weight_ptr_;
float *pack_input_;
float *pack_output_;
float *tmp_buffer_; float *tmp_buffer_;
float *tmp_output_;
float *c4_input_;
float *c4_output_;
float *input_ptr_; float *input_ptr_;
float *output_ptr_; float *output_ptr_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_H_
...@@ -99,8 +99,8 @@ int FullconnectionCPUKernel::DoMatmul(int task_id) { ...@@ -99,8 +99,8 @@ int FullconnectionCPUKernel::DoMatmul(int task_id) {
MatMul(a_c8_ptr_, b_r8_ptr_ + task_id * thread_stride_ * C8NUM * fc_param_->deep_, MatMul(a_c8_ptr_, b_r8_ptr_ + task_id * thread_stride_ * C8NUM * fc_param_->deep_,
c_r8x8_ptr_ + task_id * thread_stride_ * C8NUM * fc_param_->row_8_, c_r8x8_ptr_ + task_id * thread_stride_ * C8NUM * fc_param_->row_8_,
bias_ptr_ + task_id * thread_stride_ * C8NUM, fc_param_->maxf_, fc_param_->minf_, fc_param_->deep_, bias_ptr_ + task_id * thread_stride_ * C8NUM, fc_param_->act_type_, fc_param_->deep_, fc_param_->row_8_,
fc_param_->row_8_, cur_oc * 8); cur_oc * 8);
return RET_OK; return RET_OK;
} }
......
...@@ -82,9 +82,9 @@ int FullconnectionInt8CPUKernel::Init() { ...@@ -82,9 +82,9 @@ int FullconnectionInt8CPUKernel::Init() {
double real_multiplier = quant_params_.input.scale_ * quant_params_.weight.scale_ / quant_params_.output.scale_; double real_multiplier = quant_params_.input.scale_ * quant_params_.weight.scale_ / quant_params_.output.scale_;
QuantizeRoundParameter(real_multiplier, &quant_params_.quant_multiplier, &quant_params_.left_shift, QuantizeRoundParameter(real_multiplier, &quant_params_.quant_multiplier, &quant_params_.left_shift,
&quant_params_.right_shift); &quant_params_.right_shift);
CalculateActivationRangeQuantized(fc_param_->maxf_, fc_param_->minf_, quant_params_.output.scale_, CalculateActivationRangeQuantized(fc_param_->act_type_ == ActType_Relu, fc_param_->act_type_ == ActType_Relu6,
quant_params_.output.zp_, &quant_params_.out_act_max, &quant_params_.out_act_min); quant_params_.output.zp_, quant_params_.output.scale_, &quant_params_.out_act_max,
&quant_params_.out_act_min);
return RET_OK; return RET_OK;
} }
......
...@@ -63,23 +63,29 @@ void MatrixMultiAdd(float *c11, float *c12, float *c21, float *c22, float *x_ptr ...@@ -63,23 +63,29 @@ void MatrixMultiAdd(float *c11, float *c12, float *c21, float *c22, float *x_ptr
return; return;
} }
void PostConvFuncFp32(const float *c4_out_ptr, float *out_ptr, const float *bias_ptr, size_t output_channel, void PostConvFuncComm(const float *src_ptr_, float *out_ptr, const float *bias_ptr, size_t output_channel,
size_t plane_size, size_t stride, bool is_relu, bool is_relu6) { size_t plane_size, size_t stride, bool is_relu, bool is_relu6, int size) {
#ifndef ENABLE_ARM64
for (int oc = 0; oc < output_channel; oc++) { for (int oc = 0; oc < output_channel; oc++) {
int oc4div = oc / 4, oc4mod = oc % 4; int oc_div = oc / size, oc_mod = oc % size;
for (int hw = 0; hw < plane_size; hw++) { for (int hw = 0; hw < plane_size; hw++) {
int src_index = oc4div * 4 * plane_size + hw * 4 + oc4mod; int src_index = oc_div * size * plane_size + hw * size + oc_mod;
int dst_index = hw * stride + oc; int dst_index = hw * stride + oc;
float value = c4_out_ptr[src_index]; float value = src_ptr_[src_index];
if (bias_ptr != nullptr) { if (bias_ptr != nullptr) {
value = value + bias_ptr[oc]; value = value + bias_ptr[oc];
} }
value = (is_relu) ? (MSMAX(0, value)) : (value); value = (is_relu || is_relu6) ? (MSMAX(0.f, value)) : (value);
value = (is_relu6) ? (MSMIN(6, MSMAX(0, value))) : (value); value = (is_relu6) ? (MSMIN(6.f, value)) : (value);
out_ptr[dst_index] = value; out_ptr[dst_index] = value;
} }
} }
return;
}
void PostConvFuncFp32C4(const float *c4_out_ptr, float *out_ptr, const float *bias_ptr, size_t output_channel,
size_t plane_size, size_t stride, bool is_relu, bool is_relu6) {
#ifndef ENABLE_ARM64
PostConvFuncComm(c4_out_ptr, out_ptr, bias_ptr, output_channel, plane_size, stride, is_relu, is_relu6, C4NUM);
#else #else
if (bias_ptr != nullptr) { if (bias_ptr != nullptr) {
if (is_relu) { if (is_relu) {
...@@ -102,3 +108,8 @@ void PostConvFuncFp32(const float *c4_out_ptr, float *out_ptr, const float *bias ...@@ -102,3 +108,8 @@ void PostConvFuncFp32(const float *c4_out_ptr, float *out_ptr, const float *bias
return; return;
} }
void PostConvFuncFp32C8(const float *c8_out_ptr, float *out_ptr, const float *bias_ptr, size_t output_channel,
size_t plane_size, size_t stride, bool is_relu, bool is_relu6) {
PostConvFuncComm(c8_out_ptr, out_ptr, bias_ptr, output_channel, plane_size, stride, is_relu, is_relu6, C8NUM);
return;
}
...@@ -27,7 +27,9 @@ ...@@ -27,7 +27,9 @@
extern "C" { extern "C" {
#endif #endif
void PostConvFuncFp32(const float *c4_out_ptr, float *out_ptr, const float *bias_ptr, size_t output_channel, void PostConvFuncFp32C4(const float *c4_out_ptr, float *out_ptr, const float *bias_ptr, size_t output_channel,
size_t plane_size, size_t stride, bool is_relu, bool is_relu6);
void PostConvFuncFp32C8(const float *c8_out_ptr, float *out_ptr, const float *bias_ptr, size_t output_channel,
size_t plane_size, size_t stride, bool is_relu, bool is_relu6); size_t plane_size, size_t stride, bool is_relu, bool is_relu6);
void MatrixAdd(const float *a_ptr, const float *b_ptr, float *dst, size_t a_stride, size_t b_stride, size_t c_stride, void MatrixAdd(const float *a_ptr, const float *b_ptr, float *dst, size_t a_stride, size_t b_stride, size_t c_stride,
size_t row, size_t col); size_t row, size_t col);
...@@ -60,4 +62,3 @@ void DeconvDwFp32Center(float *dst, const float *src, const float *weight, size_ ...@@ -60,4 +62,3 @@ void DeconvDwFp32Center(float *dst, const float *src, const float *weight, size_
#endif #endif
#endif /* MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_COMMON_FUNC_H_ */ #endif /* MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_COMMON_FUNC_H_ */
...@@ -38,8 +38,52 @@ int DeConvFp32(const float *input, const float *weight, float *output, float *tm ...@@ -38,8 +38,52 @@ int DeConvFp32(const float *input, const float *weight, float *output, float *tm
return StrassenMatmul(input, weight, output, &matmul_param, FP32_STRASSEN_MAX_RECURSION, 0, tmp_buffer); return StrassenMatmul(input, weight, output, &matmul_param, FP32_STRASSEN_MAX_RECURSION, 0, tmp_buffer);
} }
int DeConvPostFp32(const float *src, float *tmp_c4, float *dst, const float *bias, int output_channel, int input_plane, int DeConvPostFp32C8x8(const float *src, float *tmp, const float *bias, float *dst, int output_channel,
int kernel_plane, int output_plane, ConvParameter *conv_param) { ConvParameter *conv_param) {
/* row8x8-major(ih*iw x oc*kh*kw) -> row8-major(oh*ow x oc) */
size_t input_plane = conv_param->input_w_ * conv_param->input_h_;
size_t kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_;
size_t output_plane = conv_param->output_w_ * conv_param->output_h_;
int oc8 = UP_DIV(output_channel, C8NUM);
int in_plane8 = UP_ROUND(input_plane, C8NUM);
for (int c = 0; c < oc8; c++) {
float *dst_ptr = tmp + c * output_plane * C8NUM;
const float *src_ptr = src + c * in_plane8 * kernel_plane * C8NUM;
memset(dst_ptr, 0, output_plane * C8NUM * sizeof(int32_t));
for (int ih = 0; ih < conv_param->input_h_; ih++) {
for (int iw = 0; iw < conv_param->input_w_; iw++) {
int oh = ih * conv_param->stride_h_ - conv_param->pad_h_;
int ow = iw * conv_param->stride_w_ - conv_param->pad_w_;
int kh_start = MSMAX(0, UP_DIV(-oh, conv_param->dilation_h_));
int kh_end = MSMIN(conv_param->kernel_h_, UP_DIV(conv_param->output_h_ - oh, conv_param->dilation_h_));
int kw_start = MSMAX(0, UP_DIV(-ow, conv_param->dilation_w_));
int kw_end = MSMIN(conv_param->kernel_w_, UP_DIV(conv_param->output_w_ - ow, conv_param->dilation_w_));
for (int kh = kh_start; kh < kh_end; kh++) {
for (int kw = kw_start; kw < kw_end; kw++) {
int src_index = ih * conv_param->input_w_ * C8NUM + iw * C8NUM +
kh * in_plane8 * conv_param->kernel_w_ * C8NUM + kw * in_plane8 * C8NUM;
int dst_index = oh * conv_param->output_w_ * C8NUM + ow * C8NUM +
kh * conv_param->dilation_h_ * conv_param->output_w_ * C8NUM +
kw * conv_param->dilation_w_ * C8NUM;
for (int i = 0; i < C8NUM; i++) {
dst_ptr[dst_index + i] += src_ptr[src_index + i];
}
} /*kw*/
} /*kh*/
} /*iw*/
} /*ih*/
} /*oc8*/
PostConvFuncFp32C8(tmp, dst, bias, output_channel, output_plane, conv_param->output_channel_, conv_param->is_relu_,
conv_param->is_relu6_);
return OPCLIB_OK;
}
int DeConvPostFp32C4(const float *src, float *tmp_c4, float *dst, const float *bias, int output_channel,
int input_plane, int kernel_plane, int output_plane, ConvParameter *conv_param) {
int oc4 = UP_DIV(output_channel, C4NUM); int oc4 = UP_DIV(output_channel, C4NUM);
for (int c = 0; c < oc4; c++) { for (int c = 0; c < oc4; c++) {
float *dst_ptr = tmp_c4 + c * output_plane * C4NUM; float *dst_ptr = tmp_c4 + c * output_plane * C4NUM;
...@@ -71,8 +115,7 @@ int DeConvPostFp32(const float *src, float *tmp_c4, float *dst, const float *bia ...@@ -71,8 +115,7 @@ int DeConvPostFp32(const float *src, float *tmp_c4, float *dst, const float *bia
} /*ih*/ } /*ih*/
} /*oc4*/ } /*oc4*/
PostConvFuncFp32(tmp_c4, dst, bias, output_channel, output_plane, conv_param->output_channel_, conv_param->is_relu_, PostConvFuncFp32C4(tmp_c4, dst, bias, output_channel, output_plane, conv_param->output_channel_, conv_param->is_relu_,
conv_param->is_relu6_); conv_param->is_relu6_);
return OPCLIB_OK; return OPCLIB_OK;
} }
...@@ -26,8 +26,9 @@ void PackDeConvWeightFp32(const float *weight, float *dst, int input_channel, in ...@@ -26,8 +26,9 @@ void PackDeConvWeightFp32(const float *weight, float *dst, int input_channel, in
int DeConvFp32(const float *input, const float *weight, float *output, float *tmp_buffer, int DeConvFp32(const float *input, const float *weight, float *output, float *tmp_buffer,
StrassenMatMulParameter matmul_param); StrassenMatMulParameter matmul_param);
int DeConvPostFp32(const float *src, float *tmp_c4, float *dst, const float *bias, int output_channel, int input_plane, int DeConvPostFp32C4(const float *src, float *tmp_c4, float *dst, const float *bias, int output_channel,
int kernel_plane, int output_plane, ConvParameter *conv_param); int input_plane, int kernel_plane, int output_plane, ConvParameter *conv_param);
int DeConvPostFp32C8x8(const float *src, float *tmp_out, const float *bias, float *dst, int output_channel,
ConvParameter *conv_param);
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_DECONV_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_DECONV_H_
...@@ -48,10 +48,11 @@ void Row8x8Major2RowMajor(float *src_ptr, float *dst_ptr, int row, int col) { ...@@ -48,10 +48,11 @@ void Row8x8Major2RowMajor(float *src_ptr, float *dst_ptr, int row, int col) {
dst_ptr[r * col + c] = src_ptr[cd8 * row8 * 8 + r * 8 + cm8]; dst_ptr[r * col + c] = src_ptr[cd8 * row8 * 8 + r * 8 + cm8];
} }
} }
return;
} }
void MatMul8x8(const float *a, const float *b, float *c, const float *bias, float maxf, float minf, int deep, void MatMul8x8(const float *a, const float *b, float *c, const float *bias, ActType act_type, int deep, int row_8_,
int row_8_, int col_8_) { int col_8_) {
/* col8-major * row8-major => col8x8-major */ /* col8-major * row8-major => col8x8-major */
for (int row = 0; row < row_8_; row++) { for (int row = 0; row < row_8_; row++) {
for (int col = 0; col < col_8_; col++) { for (int col = 0; col < col_8_; col++) {
...@@ -64,19 +65,25 @@ void MatMul8x8(const float *a, const float *b, float *c, const float *bias, floa ...@@ -64,19 +65,25 @@ void MatMul8x8(const float *a, const float *b, float *c, const float *bias, floa
size_t bi = c8div * deep * 8 + d * 8 + c8mod; size_t bi = c8div * deep * 8 + d * 8 + c8mod;
value = value + a[ai] * b[bi]; value = value + a[ai] * b[bi];
} }
if (bias != nullptr) {
value += bias[col]; value += bias[col];
value = MSMIN(maxf, value); }
value = MSMAX(minf, value); if (act_type == ActType_Relu6) value = MSMIN(6.0f, value);
if (act_type != ActType_No) value = MSMAX(0.0f, value);
c[ci] = value; c[ci] = value;
} }
} }
return;
} }
void MatMul(const float *a, const float *b, float *c, const float *bias, float maxf, float minf, int deep, int row_8_, void MatMul(const float *a, const float *b, float *c, const float *bias, ActType act_type, int deep, int row_8_,
int col_8_) { int col_8_) {
#ifdef __aarch64__ #ifdef __aarch64__
float minf = (act_type == ActType_No) ? FLT_MIN : 0.f;
float maxf = (act_type == ActType_Relu6) ? 6.0f : FLT_MAX;
MatMulFloatNeon64(a, b, c, bias, maxf, minf, deep, row_8_, col_8_); MatMulFloatNeon64(a, b, c, bias, maxf, minf, deep, row_8_, col_8_);
#else #else
MatMul8x8(a, b, c, bias, maxf, minf, deep, row_8_, col_8_); MatMul8x8(a, b, c, bias, act_type, deep, row_8_, col_8_);
#endif #endif
return;
} }
...@@ -17,12 +17,12 @@ ...@@ -17,12 +17,12 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_MATMUL_H_ #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_MATMUL_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_MATMUL_H_ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_MATMUL_H_
#include <float.h>
#include "src/runtime/kernel/arm/opclib/errorcode.h" #include "src/runtime/kernel/arm/opclib/errorcode.h"
#include "src/runtime/kernel/arm/opclib/op_base.h" #include "src/runtime/kernel/arm/opclib/op_base.h"
#include "src/runtime/kernel/arm/opclib/matmul.h" #include "src/runtime/kernel/arm/opclib/matmul.h"
void MatMul(const float *a, const float *b, float *c, const float *bias, float maxf, float minf, int depth, int row, void MatMul(const float *a, const float *b, float *c, const float *bias, ActType act_type, int depth, int row, int col);
int col);
void RowMajor2Row8Major(float *src_ptr, float *dst_ptr, int row, int col); void RowMajor2Row8Major(float *src_ptr, float *dst_ptr, int row, int col);
void RowMajor2Col8Major(float *src_ptr, float *dst_ptr, int row, int col); void RowMajor2Col8Major(float *src_ptr, float *dst_ptr, int row, int col);
void Row8x8Major2RowMajor(float *src_ptr, float *dst_ptr, int row, int col); void Row8x8Major2RowMajor(float *src_ptr, float *dst_ptr, int row, int col);
......
...@@ -25,15 +25,18 @@ int DeConvInt8(const int8_t *input, const int8_t *weight, int32_t *output, size_ ...@@ -25,15 +25,18 @@ int DeConvInt8(const int8_t *input, const int8_t *weight, int32_t *output, size_
int DeConvPostInt8(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t *out, int output_channel, int DeConvPostInt8(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t *out, int output_channel,
ConvParameter *conv_param) { ConvParameter *conv_param) {
int oc8 = UP_DIV(output_channel, C8NUM); /* row8x8-major(ih*iw x oc*kh*kw) -> row8x8-major(oh*ow x oc) */
size_t input_plane = conv_param->input_w_ * conv_param->input_h_; size_t input_plane = conv_param->input_w_ * conv_param->input_h_;
size_t kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_; size_t kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_;
size_t output_plane = conv_param->output_w_ * conv_param->output_h_; size_t output_plane = conv_param->output_w_ * conv_param->output_h_;
int oc8 = UP_DIV(output_channel, C8NUM);
int in_plane8 = UP_ROUND(input_plane, 8);
int out_plane8 = UP_ROUND(output_plane, 8);
for (int c = 0; c < oc8; c++) { for (int c = 0; c < oc8; c++) {
int32_t *dst_ptr = tmp + c * output_plane * C8NUM; int32_t *dst_ptr = tmp + c * out_plane8 * C8NUM;
const int32_t *src_ptr = src + c * input_plane * kernel_plane * C8NUM; const int32_t *src_ptr = src + c * in_plane8 * kernel_plane * C8NUM;
memset(dst_ptr, 0, output_plane * C8NUM * sizeof(int32_t)); memset(dst_ptr, 0, out_plane8 * C8NUM * sizeof(int32_t));
for (int ih = 0; ih < conv_param->input_h_; ih++) { for (int ih = 0; ih < conv_param->input_h_; ih++) {
for (int iw = 0; iw < conv_param->input_w_; iw++) { for (int iw = 0; iw < conv_param->input_w_; iw++) {
...@@ -60,7 +63,7 @@ int DeConvPostInt8(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t ...@@ -60,7 +63,7 @@ int DeConvPostInt8(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t
} /*ih*/ } /*ih*/
} /*oc8*/ } /*oc8*/
PostFuncInt8(tmp, bias, out, output_channel, output_plane, UP_ROUND(output_plane, 8), PostFuncInt8(tmp, bias, out, output_channel, output_plane, out_plane8,
conv_param->conv_quant_arg_.quant_multiplier_[0], conv_param->conv_quant_arg_.left_shift_[0], conv_param->conv_quant_arg_.quant_multiplier_[0], conv_param->conv_quant_arg_.left_shift_[0],
conv_param->conv_quant_arg_.right_shift_[0], conv_param->conv_quant_arg_.quant_args_[2][0].zp_, conv_param->conv_quant_arg_.right_shift_[0], conv_param->conv_quant_arg_.quant_args_[2][0].zp_,
conv_param->conv_quant_arg_.out_act_min_[0], conv_param->conv_quant_arg_.out_act_max_[0]); conv_param->conv_quant_arg_.out_act_min_[0], conv_param->conv_quant_arg_.out_act_max_[0]);
......
...@@ -19,6 +19,8 @@ ...@@ -19,6 +19,8 @@
#include "src/runtime/kernel/arm/opclib/op_base.h" #include "src/runtime/kernel/arm/opclib/op_base.h"
enum ActType { ActType_No, ActType_Relu, ActType_Relu6 };
struct MatMulParameter { struct MatMulParameter {
OpParameter op_parameter_; OpParameter op_parameter_;
int row_; int row_;
...@@ -26,12 +28,10 @@ struct MatMulParameter { ...@@ -26,12 +28,10 @@ struct MatMulParameter {
int row_8_; int row_8_;
int col_8_; int col_8_;
int deep_; int deep_;
float minf_;
float maxf_;
bool has_bias_; bool has_bias_;
bool a_transpose_; /* false : row-major */ bool a_transpose_; /* false : row-major */
bool b_transpose_; /* true : col-major */ bool b_transpose_; /* true : col-major */
ActType act_type_;
}; };
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_MATMUL_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_MATMUL_H_
...@@ -150,23 +150,21 @@ void PackWeightInt8Opt(int8_t *weight_data, ConvParameter *conv_param, int8_t *p ...@@ -150,23 +150,21 @@ void PackWeightInt8Opt(int8_t *weight_data, ConvParameter *conv_param, int8_t *p
} }
void Conv1x1InputPackFp32(const float *src, float *dst, ConvParameter *conv_param) { void Conv1x1InputPackFp32(const float *src, float *dst, ConvParameter *conv_param) {
for (int c = 0; c < UP_DIV(conv_param->input_channel_, C4NUM); c++) { /* support nhwc */
const float *src_c_ptr = src + c * conv_param->input_h_ * conv_param->input_w_ * C4NUM;
float *dst_c_ptr = dst + c * conv_param->output_h_ * conv_param->output_w_ * C4NUM;
for (int dst_h = 0; dst_h < conv_param->output_h_; dst_h++) { for (int dst_h = 0; dst_h < conv_param->output_h_; dst_h++) {
int src_h = dst_h * conv_param->stride_h_ - conv_param->pad_h_; int src_h = dst_h * conv_param->stride_h_ - conv_param->pad_h_;
if (src_h < 0 || src_h >= conv_param->input_h_) { if (src_h < 0 || src_h >= conv_param->input_h_) {
continue; continue;
} }
const float *src_h_ptr = src_c_ptr + src_h * conv_param->input_w_ * C4NUM; const float *src_h_ptr = src + src_h * conv_param->input_w_ * conv_param->input_channel_;
float *dst_h_ptr = dst_c_ptr + dst_h * conv_param->output_w_ * C4NUM; float *dst_h_ptr = dst + dst_h * conv_param->output_w_ * conv_param->input_channel_;
for (int dst_w = 0; dst_w < conv_param->output_w_; dst_w++) { for (int dst_w = 0; dst_w < conv_param->output_w_; dst_w++) {
int src_w = dst_w * conv_param->stride_w_ - conv_param->pad_w_; int src_w = dst_w * conv_param->stride_w_ - conv_param->pad_w_;
if (src_w < 0 || src_w >= conv_param->input_w_) { if (src_w < 0 || src_w >= conv_param->input_w_) {
continue; continue;
} }
memcpy(dst_h_ptr + dst_w * C4NUM, src_h_ptr + src_w * C4NUM, C4NUM * sizeof(float)); memcpy(dst_h_ptr + dst_w * conv_param->input_channel_, src_h_ptr + src_w * conv_param->input_channel_,
} conv_param->input_channel_ * sizeof(float));
} }
} }
return; return;
...@@ -572,6 +570,21 @@ void PackNC4HW4ToNCHWFp32(const void *src, void *dst, int batch, int plane, int ...@@ -572,6 +570,21 @@ void PackNC4HW4ToNCHWFp32(const void *src, void *dst, int batch, int plane, int
} }
} }
void PackNHWCToC8HWN8Fp32(const void *src, void *dst, int batch, int plane, int channel) {
for (int n = 0; n < batch; n++) {
for (int hw = 0; hw < plane; hw++) {
for (int c = 0; c < channel; c++) {
int c8div = c / C8NUM;
int c8mod = c % C8NUM;
int src_index = n * plane * channel + hw * channel + c;
int dst_index = c8div * batch * plane * C8NUM + hw * batch * C8NUM + n * C8NUM + c8mod;
((float *)dst)[dst_index] = ((float *)src)[src_index];
}
}
}
return;
}
void PackNHWCToNHWC4Int8(const void *src, void *dst, int batch, int plane, int channel) { void PackNHWCToNHWC4Int8(const void *src, void *dst, int batch, int plane, int channel) {
int c4 = UP_DIV(channel, C4NUM); int c4 = UP_DIV(channel, C4NUM);
int nhwc4_batch_unit_offset = c4 * C4NUM * plane; int nhwc4_batch_unit_offset = c4 * C4NUM * plane;
......
...@@ -69,6 +69,8 @@ void PackNC4HW4ToNHWCFp32(const void *src, void *dst, int batch, int plane, int ...@@ -69,6 +69,8 @@ void PackNC4HW4ToNHWCFp32(const void *src, void *dst, int batch, int plane, int
void PackNC4HW4ToNCHWFp32(const void *src, void *dst, int batch, int plane, int channel); void PackNC4HW4ToNCHWFp32(const void *src, void *dst, int batch, int plane, int channel);
void PackNHWCToC8HWN8Fp32(const void *src, void *dst, int batch, int plane, int channel);
void PackNHWCToNHWC4Int8(const void *src, void *dst, int batch, int plane, int channel); void PackNHWCToNHWC4Int8(const void *src, void *dst, int batch, int plane, int channel);
void PackNHWC4ToNHWCInt8(const void *src, void *dst, int batch, int plane, int channel); void PackNHWC4ToNHWCInt8(const void *src, void *dst, int batch, int plane, int channel);
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <math.h> #include <math.h>
#include <stdlib.h> #include <stdlib.h>
#include <limits.h> #include <limits.h>
#include <limits>
struct QuantArg { struct QuantArg {
double scale_; double scale_;
...@@ -112,13 +113,21 @@ inline uint8_t QuantizeToUint8(float real_value, float scale, int32_t zp) { retu ...@@ -112,13 +113,21 @@ inline uint8_t QuantizeToUint8(float real_value, float scale, int32_t zp) { retu
inline int32_t QuantizeToInt8(float real_value, float scale, int32_t zp) { return round(real_value / scale + zp); } inline int32_t QuantizeToInt8(float real_value, float scale, int32_t zp) { return round(real_value / scale + zp); }
inline void CalculateActivationRangeQuantized(float fmax, float fmin, float scale, int zero_point, int *imax, inline void CalculateActivationRangeQuantized(bool is_relu, bool is_relu6, int32_t zp, int32_t scale, int *mini,
int *imin) { int *maxi) {
int8_t qmin = (int8_t)CHAR_MIN; int32_t min = std::numeric_limits<int8_t>::min();
int8_t qmax = (int8_t)CHAR_MAX; int32_t max = std::numeric_limits<int8_t>::max();
int8_t qfmin = QuantizeToInt8(fmin, scale, zero_point); int32_t quantized_zero = QuantizeToInt8(0, scale, zp);
int8_t qfmax = QuantizeToInt8(fmax, scale, zero_point); int32_t quantized_six = QuantizeToInt8(6, scale, zp);
*imin = qmin < qfmin ? qmin : qfmin; if (is_relu) {
*imax = qmax > qfmax ? qmax : qfmax; min = min > quantized_zero ? min : quantized_zero;
} else if (is_relu6) {
min = min > quantized_zero ? min : quantized_zero;
max = max < quantized_six ? max : quantized_six;
} else {
// do nothing
}
*mini = min;
*maxi = max;
} }
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_QUANTIZATION_QUANTIZE_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_QUANTIZATION_QUANTIZE_H_
...@@ -6,5 +6,15 @@ BUILD_DIR=${CUR_DIR}/../build ...@@ -6,5 +6,15 @@ BUILD_DIR=${CUR_DIR}/../build
mkdir -pv ${CUR_DIR}/do_test mkdir -pv ${CUR_DIR}/do_test
cd ${CUR_DIR}/do_test cd ${CUR_DIR}/do_test
cp ${BUILD_DIR}/test/lite-test ./ cp ${BUILD_DIR}/test/lite-test ./
cp -r ${CUR_DIR}/ut/src/runtime/kernel/arm/test_data/* ./
./lite-test --gtest_filter="*TestHebing*" ./lite-test --gtest_filter="*TestHebing*"
./lite-test --gtest_filter=TestFcFp32*
./lite-test --gtest_filter=TestConv1x1Fp32*
./lite-test --gtest_filter=TestStrassenFp32*
./lite-test --gtest_filter=TestDeConvolutionFp32*
./lite-test --gtest_filter=TestPadInt8*
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <sys/time.h>
#include <iostream>
#include <memory>
#include "utils/log_adapter.h"
#include "common/common_test.h"
#include "src/common/file_utils.h"
#include "src/runtime/kernel/arm/fp32/fullconnection.h"
#include "src/runtime/kernel/arm/opclib/fp32/matmul.h"
namespace mindspore {
using mindspore::lite::tensor::Tensor;
class TestFcFp32 : public mindspore::Common {
public:
TestFcFp32() {}
};
int FcTestInit1(std::vector<lite::tensor::Tensor *> *inputs_, std::vector<lite::tensor::Tensor *> *outputs_,
MatMulParameter *matmal_param, float **correct) {
Tensor *in_t = new Tensor(kNumberTypeFloat, {2, 2, 2, 2}, schema::Format_NHWC, static_cast<schema::NodeType>(1));
in_t->MallocData();
float in[] = {-3.2366564, -4.7733846, -7.8329225, 16.146885, 5.060793, -6.1471, -1.7680453, -6.5721383,
17.87506, -5.1192183, 10.742863, 1.4536934, 19.693445, 19.45783, 5.063163, 0.5234792};
memcpy(in_t->Data(), in, sizeof(float) * in_t->ElementsNum());
inputs_->push_back(in_t);
Tensor *weight_t = new Tensor(kNumberTypeFloat, {3, 8}, schema::Format_NHWC, static_cast<schema::NodeType>(1));
weight_t->MallocData();
float weight[] = {-0.0024438887, 0.0006738146, -0.008169129, 0.0021510671, -0.012470592, -0.0053063435,
0.006050155, 0.008656233, 0.012911413, -0.0028635843, -0.00034080597, -0.0010622552,
-0.012254699, -0.01312836, 0.0025241964, -0.004706142, 0.002451482, -0.009558459,
0.004481974, 0.0033251503, -0.011705584, -0.001720293, -0.0039410214, -0.0073637343};
memcpy(weight_t->Data(), weight, sizeof(float) * weight_t->ElementsNum());
inputs_->push_back(weight_t);
Tensor *bias_t = new Tensor(kNumberTypeFloat, {3}, schema::Format_NHWC, static_cast<schema::NodeType>(1));
bias_t->MallocData();
float bias[] = {1.6103756, -0.9872417, 0.546849};
memcpy(bias_t->Data(), bias, sizeof(float) * bias_t->ElementsNum());
inputs_->push_back(bias_t);
Tensor *out_t = new Tensor(kNumberTypeFloat, {2, 3}, schema::Format_NHWC, static_cast<schema::NodeType>(1));
out_t->MallocData();
outputs_->push_back(out_t);
*correct = reinterpret_cast<float *>(malloc(out_t->ElementsNum() * sizeof(float)));
float nchw_co[] = {1.6157111, -0.98469573, 0.6098231, 1.1649342, -1.2334653, 0.404779};
memcpy(*correct, nchw_co, out_t->ElementsNum() * sizeof(float));
matmal_param->b_transpose_ = true;
matmal_param->a_transpose_ = false;
matmal_param->has_bias_ = true;
matmal_param->act_type_ = ActType_No;
return out_t->ElementsNum();
}
TEST_F(TestFcFp32, FcTest1) {
std::vector<lite::tensor::Tensor *> inputs_;
std::vector<lite::tensor::Tensor *> outputs_;
auto matmul_param = new MatMulParameter();
float *correct;
int total_size = FcTestInit1(&inputs_, &outputs_, matmul_param, &correct);
lite::Context *ctx = new lite::Context;
ctx->threadNum = 2;
kernel::FullconnectionCPUKernel *fc =
new kernel::FullconnectionCPUKernel(reinterpret_cast<OpParameter *>(matmul_param), inputs_, outputs_, ctx);
fc->Init();
fc->Run();
CompareOutputData(reinterpret_cast<float *>(outputs_[0]->Data()), correct, total_size, 0.0001);
}
int FcTestInit2(std::vector<lite::tensor::Tensor *> *inputs_, std::vector<lite::tensor::Tensor *> *outputs_,
MatMulParameter *matmal_param, float **correct) {
size_t buffer_size;
Tensor *in_t = new Tensor(kNumberTypeFloat, {20, 4, 2, 10}, schema::Format_NCHW, static_cast<schema::NodeType>(1));
in_t->MallocData();
std::string in_path = "./matmul/FcFp32_input1.bin";
auto in_data = mindspore::lite::ReadFile(in_path.c_str(), &buffer_size);
memcpy(in_t->Data(), in_data, buffer_size);
inputs_->push_back(in_t);
Tensor *weight_t = new Tensor(kNumberTypeFloat, {30, 80}, schema::Format_NCHW, static_cast<schema::NodeType>(1));
weight_t->MallocData();
std::string weight_path = "./matmul/FcFp32_weight1.bin";
auto w_data = mindspore::lite::ReadFile(weight_path.c_str(), &buffer_size);
memcpy(weight_t->Data(), w_data, buffer_size);
inputs_->push_back(weight_t);
Tensor *bias_t = new Tensor(kNumberTypeFloat, {30}, schema::Format_NCHW, static_cast<schema::NodeType>(1));
bias_t->MallocData();
std::string bias_path = "./matmul/FcFp32_bias1.bin";
auto bias_data = mindspore::lite::ReadFile(bias_path.c_str(), &buffer_size);
memcpy(bias_t->Data(), bias_data, buffer_size);
inputs_->push_back(bias_t);
Tensor *out_t = new Tensor(kNumberTypeFloat, {20, 30}, schema::Format_NCHW, static_cast<schema::NodeType>(1));
out_t->MallocData();
outputs_->push_back(out_t);
*correct = reinterpret_cast<float *>(malloc(out_t->ElementsNum() * sizeof(float)));
std::string out_path = "./matmul/FcFp32_output1.bin";
auto out_data = mindspore::lite::ReadFile(out_path.c_str(), &buffer_size);
memcpy(*correct, out_data, out_t->ElementsNum() * sizeof(float));
matmal_param->b_transpose_ = true;
matmal_param->a_transpose_ = false;
matmal_param->has_bias_ = true;
matmal_param->act_type_ = ActType_No;
return out_t->ElementsNum();
}
TEST_F(TestFcFp32, FcTest2) {
std::vector<lite::tensor::Tensor *> inputs_;
std::vector<lite::tensor::Tensor *> outputs_;
auto matmul_param = new MatMulParameter();
float *correct;
int total_size = FcTestInit2(&inputs_, &outputs_, matmul_param, &correct);
lite::Context *ctx = new lite::Context;
ctx->threadNum = 1;
kernel::FullconnectionCPUKernel *fc =
new kernel::FullconnectionCPUKernel(reinterpret_cast<OpParameter *>(matmul_param), inputs_, outputs_, ctx);
fc->Init();
fc->Run();
CompareOutputData(reinterpret_cast<float *>(outputs_[0]->Data()), correct, total_size, 0.0001);
}
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include <memory>
#include "common/common_test.h"
#include "src/common/file_utils.h"
#include "mindspore/lite/src/kernel_registry.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/pack.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/int8/deconv.h"
#include "mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h"
using mindspore::lite::DeviceType;
namespace mindspore {
using mindspore::lite::tensor::QuantArg;
using mindspore::lite::tensor::Tensor;
using mindspore::schema::Format_NHWC;
using mindspore::schema::NodeType_Parameter;
class TestDeconvInt8 : public mindspore::Common {
public:
TestDeconvInt8() {}
};
void FloatToInt8(float *fptr, int8_t *iptr, size_t size, int32_t zp, double scale) {
for (int i = 0; i < size; i++) {
int32_t value = round(fptr[i] / scale + zp);
value = MSMIN(value, INT8_MAX);
value = MSMAX(value, INT8_MIN);
iptr[i] = (int8_t)value;
}
}
TEST_F(TestDeconvInt8, PackWeight1) {
int8_t in[] = {-8, 11, 99, -80, 8, -12, 37, -45, 31, -69, -66, 26, 112, 124, -109, 85, -24, 28, -46, 100,
72, -36, -82, 64, -110, 37, -72, 65, -124, 91, -43, 99, 3, 100, 19, 51, -14, -81, 67, 90,
4, -106, 105, 28, -61, -79, 55, -54, 47, -38, 114, 125, -65, 100, 6, -72, -33, 60, 109, -68};
int8_t co[] = {-8, 11, 99, -80, 8, -12, 0, 0, 112, 124, -109, 85, -24, 28, 0, 0, -110, 37, -72, 65,
-124, 91, 0, 0, -14, -81, 67, 90, 4, -106, 0, 0, 47, -38, 114, 125, -65, 100, 0, 0,
37, -45, 31, -69, -66, 26, 0, 0, -46, 100, 72, -36, -82, 64, 0, 0, -43, 99, 3, 100,
19, 51, 0, 0, 105, 28, -61, -79, 55, -54, 0, 0, 6, -72, -33, 60, 109, -68, 0, 0};
int8_t dst[80] = {0};
/*5*1*2*6 nhwc*/
PackNHWCToC8HWN8Int8(in, dst, 5, 2, 6);
CompareOutputData(dst, co, 80, 1);
}
TEST_F(TestDeconvInt8, PackWeight2) {
int8_t in[] = {
40, 24, 94, 122, 67, 34, -89, 31, -43, 121, 48, -54, 44, -91, 35, 89, -37, 114, -8, 103,
-22, 32, 26, 112, -92, -23, 43, 9, 81, 118, -73, -54, 65, -99, 51, -90, 121, -62, 119, -93,
21, -92, -1, -82, -71, -54, 63, -93, 92, -93, 99, 122, -104, -16, -8, -32, 90, -126, 51, 91,
4, 70, -7, 116, 99, 81, -79, 124, -14, 28, 97, 9, -97, 99, 88, -15, 54, 26, 77, -25,
113, 119, 119, -75, -17, 7, 7, 1, 69, 66, 40, -13, 80, -115, -98, -8, -17, 31, 88, 65,
-1, -15, -98, 77, 56, 119, -20, -32, -54, -58, -16, 52, 121, 126, -33, 43, 92, -34, -17, -52,
104, -52, -91, 76, 79, 105, 102, -65, 43, 32, 13, 15, -38, 95, -18, -82, -7, 118, -79, -85,
120, -15, 2, 32, -94, 111, 115, 102, -18, 121, -106, 54, 63, 111, -16, 92, 82, -23, 111, 53,
1, -48, 45, 19, -4, -15, -72, 41, 80, -51, 116, 31, 94, 101, -10, 18, 0, -49, 108, 28,
-36, 47, -14, -2, -10, 31, -92, -84, 74, -114, -107, 66, 99, -121, -107, 31, -38, 56, -30, 109,
-7, 28, -22, -17, -3, -2, 27, -3, 108, -84, -23, -71, -54, 20, -45, 109, -42, 78, -79, 98,
-10, 57, 52, 1, 25, 73, 21, -78, 46, 121, 66, 92, 24, 55, 4, -110, -37, 112, -18, 10,
-42, 16, -9, 31, 39, -70, 108, -3, -90, -60, -121, 11, 50, -88, -104, -29, -89, 94, 64, -91,
-101, -7, 23, -57, 93, 16, 17, 35, -48, -25, 13, -121, 73, -68, -54, -122, -20, 12, 64, 20,
-11, -6, -71, -52, -97, 109, 116, -107, 117, -124, 56, 80, -108, 30, 123, 56, -80, 39, -18, -97,
-103, 122, 114, -10, -31, 97, -92, 105, -61, -25, 10, -119, -106, 41, 77, -117, 55, -83, -29, 14,
27, -106, -86, 41, 43, 23, 11, -76, -34, 121, 94, 18, 69, 73, 100, 54, 43, 32, 13, 15,
-38, 95, -18, -82, -7, 118, -79, -85, 120, -15, 2, 32, -94, 111, 115, 102, -18, 121, -106, 54,
63, 111, -16, 92, 82, -23, 111, 53, 1, -48, 45, 19, -4, -15, -72, 41, 80, -51, 116, 31,
94, 101, -10, 18, 0, -49, 108, 28, -36, 47, -14, -2, -10, 31, -92, -84, 74, -114, -107, 66,
99, -121, -107, 31, -38, 56, -30, 109, -7, 28, -22, -17, -3, -2, 27, -3, 108, -84, -23, -71,
-54, 20, -45, 109, -42, 78, -79, 98, -10, 57, 52, 1, 25, 73, 21, -78, 46, 121, 66, 92};
int8_t co[] = {
40, 24, 94, 122, 67, 34, -89, 31, -22, 32, 26, 112, -92, -23, 43, 9, 21, -92, -1, -82,
-71, -54, 63, -93, 4, 70, -7, 116, 99, 81, -79, 124, 113, 119, 119, -75, -17, 7, 7, 1,
-1, -15, -98, 77, 56, 119, -20, -32, 104, -52, -91, 76, 79, 105, 102, -65, 120, -15, 2, 32,
-94, 111, 115, 102, 1, -48, 45, 19, -4, -15, -72, 41, -36, 47, -14, -2, -10, 31, -92, -84,
-7, 28, -22, -17, -3, -2, 27, -3, -10, 57, 52, 1, 25, 73, 21, -78, -42, 16, -9, 31,
39, -70, 108, -3, -101, -7, 23, -57, 93, 16, 17, 35, -11, -6, -71, -52, -97, 109, 116, -107,
-103, 122, 114, -10, -31, 97, -92, 105, 27, -106, -86, 41, 43, 23, 11, -76, -38, 95, -18, -82,
-7, 118, -79, -85, 63, 111, -16, 92, 82, -23, 111, 53, 94, 101, -10, 18, 0, -49, 108, 28,
99, -121, -107, 31, -38, 56, -30, 109, -54, 20, -45, 109, -42, 78, -79, 98, -43, 121, 48, -54,
44, -91, 35, 89, 81, 118, -73, -54, 65, -99, 51, -90, 92, -93, 99, 122, -104, -16, -8, -32,
-14, 28, 97, 9, -97, 99, 88, -15, 69, 66, 40, -13, 80, -115, -98, -8, -54, -58, -16, 52,
121, 126, -33, 43, 43, 32, 13, 15, -38, 95, -18, -82, -18, 121, -106, 54, 63, 111, -16, 92,
80, -51, 116, 31, 94, 101, -10, 18, 74, -114, -107, 66, 99, -121, -107, 31, 108, -84, -23, -71,
-54, 20, -45, 109, 46, 121, 66, 92, 24, 55, 4, -110, -90, -60, -121, 11, 50, -88, -104, -29,
-48, -25, 13, -121, 73, -68, -54, -122, 117, -124, 56, 80, -108, 30, 123, 56, -61, -25, 10, -119,
-106, 41, 77, -117, -34, 121, 94, 18, 69, 73, 100, 54, 120, -15, 2, 32, -94, 111, 115, 102,
1, -48, 45, 19, -4, -15, -72, 41, -36, 47, -14, -2, -10, 31, -92, -84, -7, 28, -22, -17,
-3, -2, 27, -3, -10, 57, 52, 1, 25, 73, 21, -78, -37, 114, -8, 103, 0, 0, 0, 0,
121, -62, 119, -93, 0, 0, 0, 0, 90, -126, 51, 91, 0, 0, 0, 0, 54, 26, 77, -25,
0, 0, 0, 0, -17, 31, 88, 65, 0, 0, 0, 0, 92, -34, -17, -52, 0, 0, 0, 0,
-7, 118, -79, -85, 0, 0, 0, 0, 82, -23, 111, 53, 0, 0, 0, 0, 0, -49, 108, 28,
0, 0, 0, 0, -38, 56, -30, 109, 0, 0, 0, 0, -42, 78, -79, 98, 0, 0, 0, 0,
-37, 112, -18, 10, 0, 0, 0, 0, -89, 94, 64, -91, 0, 0, 0, 0, -20, 12, 64, 20,
0, 0, 0, 0, -80, 39, -18, -97, 0, 0, 0, 0, 55, -83, -29, 14, 0, 0, 0, 0,
43, 32, 13, 15, 0, 0, 0, 0, -18, 121, -106, 54, 0, 0, 0, 0, 80, -51, 116, 31,
0, 0, 0, 0, 74, -114, -107, 66, 0, 0, 0, 0, 108, -84, -23, -71, 0, 0, 0, 0,
46, 121, 66, 92, 0, 0, 0, 0};
int8_t dst[528] = {0};
PackNHWCToC8HWN8Int8(in, dst, 22, 1, 20);
CompareOutputData(dst, co, 528, 1);
}
TEST_F(TestDeconvInt8, MatMulTest1) {
int8_t a_row_major_10_12[] = {
-6, 76, 32, 80, -73, 8, -85, -3, 114, 80, 30, 42, -41, 117, 62, -76, -77, -111, 88, 105,
68, 105, -74, 13, 51, 94, 31, -52, -92, -4, -35, -71, 101, -93, 46, -65, 57, -41, -51, 77,
1, 9, 73, -19, -36, 57, 81, -24, 40, 103, 112, 109, -41, -68, 57, 61, 55, -20, 3, 2,
17, -16, -31, 58, -4, 67, -4, -95, -5, -72, 81, 15, -7, -16, -47, 112, 114, -26, -98, 53,
15, -49, 26, 19, 19, 8, -57, -35, -79, 118, 29, 21, 37, -48, 83, 7, 124, 113, -5, 15,
-8, 107, -65, -88, 50, -47, -80, -84, 3, -45, 92, 42, -20, -101, 106, -10, 89, 67, 55, 10};
int32_t zp_a = 15;
int8_t a_col8_major[16 * 12] = {0};
int8_t b_col_major_12_18[] = {
92, 27, 22, 52, -112, -20, -57, -2, 89, 32, 93, -66, -25, -54, 94, -97, -119, -98, 101, -99,
77, -83, 76, 95, 59, 97, 8, 40, -109, -20, 67, -107, 37, -6, -54, -20, -30, 36, -106, -103,
-3, -86, -82, 59, 4, -75, -50, -106, 55, 104, -117, -71, -20, -85, -77, 16, -25, -58, 4, 80,
-75, 94, 32, -68, 2, 40, 56, -103, 11, -98, -70, -69, 0, 57, -6, 82, 66, -112, -61, 33,
-77, -53, 95, -38, 87, -46, -3, 81, -47, 43, 21, 26, -45, -57, 50, -24, -82, -114, 61, 46,
-53, 78, -24, 31, -7, 37, 29, 38, 45, 106, 52, -42, 31, -6, -61, -87, 2, 79, -5, -42,
43, -106, -104, 7, 91, -63, 58, 97, -15, 74, -96, 15, -23, -3, -47, -97, 100, -54, 26, -46,
35, 26, 100, -80, 34, -25, 96, -67, -80, -27, 66, 41, 41, -43, -43, -38, -4, -64, 31, 7,
-8, 6, -2, 39, -119, 53, 75, -91, -44, 77, -62, 22, -44, 78, -67, -48, -115, -4, 43, 81,
40, -20, -5, -89, 60, -62, -4, -48, 66, -64, -69, 62, 17, -89, 1, 87, 81, 32, -29, 51,
40, 27, 66, 67, 11, -69, 85, -79, -106, 55, 22, -23, 62, 69, -74, 49};
int32_t zp_b = -20;
int8_t b_row8_major[12 * 24] = {0};
int32_t co_row_major_10_18[] = {
32005, 3597, 16595, -3458, 6627, -6663, 818, -3910, 10228, 15079, -19205, -10203, -3178, -10046,
10374, -6199, 5330, 12163, 1819, 20533, 17382, 18283, 9778, 9185, -12623, -26234, -11987, 7904,
8144, -1603, 27611, -10190, -20053, 4999, -28389, 21852, 24680, 25858, 23506, 17944, 11768, 24378,
-6102, -4675, -23460, 10434, -47579, 1986, 12018, -19418, -7248, 4938, -32613, -941, 8171, -4788,
3325, -11310, -8351, -14786, 6909, 16401, 2017, -6456, 11242, 7393, -9119, 17312, 2646, -14402,
7201, -9949, 23986, 17607, 27461, -1547, 2783, 7558, 19487, 11158, -2686, 6328, -8225, -11668,
21858, -2079, -8671, -639, -1544, 1235, 1156, 6582, 2829, -10311, -2692, 5154, 1527, 10870,
106, -8189, -24174, -1846, -15399, -3598, 14874, -5591, -619, -13667, -6053, -31103, -24499, 13008,
9143, -17982, 28437, 2176, -2114, -11631, 10779, -1032, -24690, -3112, 2125, 432, 20270, -33859,
8907, 10063, 1603, 3761, 4805, 4904, -15594, 10786, 4287, -13591, -18777, -1679, 2109, -2243,
12051, -8504, -6558, 4209, 13606, -25803, 27922, 12092, 7140, 27142, -12267, 2339, -26224, 23674,
-26579, -11398, -1823, -18976, 3641, 4415, -24878, -2045, 15937, 41465, 12601, -14513, -17619, -5728,
334, -424, 8147, -1369, 5984, 11000, 19016, 4456, -25920, 4506, 5930, 15458};
int32_t c_row8x8_major[16 * 24] = {0};
int32_t out_row_major[180] = {0};
RowMajor2Col8MajorInt8(a_row_major_10_12, a_col8_major, 10, 12);
RowMajor2Col8MajorInt8(b_col_major_12_18, b_row8_major, 18, 12);
MatMulInt8(a_col8_major, b_row8_major, c_row8x8_major, 16, 24, 12, zp_a, zp_b);
Row8x8Major2RowMajor(reinterpret_cast<float *>(c_row8x8_major), reinterpret_cast<float *>(out_row_major), 10, 18);
CompareOutputData(out_row_major, co_row_major_10_18, 180, 1);
}
TEST_F(TestDeconvInt8, PostAddTest1) {
int32_t in[] = {
-4956, -3923, 868, -8880, -4089, -5179, -4526, -4527, -10464, 99, -5826, -2995, -4519, -4519, -10509, -2505,
-11272, 434, -4522, -4523, -5287, -8936, -878, 373, -4528, -4529, -1960, -6589, 1688, 2287, -8059, 926,
-2506, -6972, -2834, -8281, -8118, -3110, -4526, -4527, -4528, -4529, -4519, -4519, -4519, -4519, -4519, -4519,
-4520, -4521, -4522, -4523, -4524, -4525, -4526, -4527, -4528, -4529, -4519, -4519, -4519, -4519, -4519, -4519,
1578, 2231, -4522, -4523, -4524, -4525, -4526, -4527, -8449, -990, -4519, -4519, -4519, -4519, -4519, -4519,
-4303, -10293, -4522, -4523, -4524, -4525, -4526, -4527, -4528, -4529, -4519, -4519, -4519, -4519, -4519, -4519,
-7025, 924, -4522, -4523, -4524, -4525, -4526, -4527, -4528, -4529, -4519, -4519, -4519, -4519, -4519, -4519,
-4520, -4521, -4522, -4523, -4524, -4525, -4526, -4527, -4528, -4529, -4519, -4519, -4519, -4519, -4519, -4519};
int8_t co[] = {-8, 11, 99, -80, 8, -12, 0, 0, 112, 124, -109, 85, -24, 28, 0, 0, -110,
37, -72, 65, -124, 91, 0, 0, -14, -81, 67, 90, 4, -106, 0, 0, 47, -38,
114, 125, -65, 100, 0, 0, 37, -45, 31, -69, -66, 26, 0, 0, -46, 100};
int32_t bias[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
int8_t out[50] = {0};
double multiplier = 0.0183649725490196;
int32_t quant_multiplier;
int32_t left_shift;
int32_t right_shift;
QuantizeRoundParameter(multiplier, &quant_multiplier, &left_shift, &right_shift);
int32_t zp = 83;
PostFuncInt8(in, bias, out, 10, 5, 8, quant_multiplier, left_shift, right_shift, zp, -128, 127);
CompareOutputData(out, co, 50, 1);
int8_t co_relu[] = {0, 11, 99, 0, 8, 0, 0, 0, 112, 124, 0, 85, 0, 28, 0, 0, 0, 37, 0, 65, 0, 91, 0, 0, 0,
0, 67, 90, 4, 0, 0, 0, 47, 0, 114, 125, 0, 100, 0, 0, 37, 0, 31, 0, 0, 26, 0, 0, 0, 100};
PostFuncInt8(in, bias, out, 10, 5, 8, quant_multiplier, left_shift, right_shift, zp, 0, 127);
CompareOutputData(out, co_relu, 50, 1);
int8_t co_relu6[] = {0, 6, 6, 0, 6, 0, 0, 0, 6, 6, 0, 6, 0, 6, 0, 0, 0, 6, 0, 6, 0, 6, 0, 0, 0,
0, 6, 6, 4, 0, 0, 0, 6, 0, 6, 6, 0, 6, 0, 0, 6, 0, 6, 0, 0, 6, 0, 0, 0, 6};
PostFuncInt8(in, bias, out, 10, 5, 8, quant_multiplier, left_shift, right_shift, zp, 0, 6);
CompareOutputData(out, co_relu6, 50, 1);
}
int DeConvInt8TestInit1(std::vector<lite::tensor::Tensor *> *inputs_, std::vector<lite::tensor::Tensor *> *outputs_,
ConvParameter *conv_param, int8_t **correct) {
/* float data from deconv fp32 testcase : DeConvTestInit2 */
/* vq = (vi - zp) * s vi = vq / s + zp */
Tensor *in_t = new Tensor(kNumberTypeInt8, {1, 4, 2, 3}, Format_NHWC, NodeType_Parameter);
in_t->MallocData();
int8_t in[] = {6, 43, 38, 24, -8, 12, 41, -24, -20, 41, -19, -6, -26, -6, 23, -31, 34, 45, 8, 45, -39, -27, -48, 12};
memcpy(in_t->Data(), in, sizeof(int8_t) * in_t->ElementsNum());
QuantArg *in_quant_arg = new QuantArg();
in_quant_arg->zeroPoint = -19, in_quant_arg->scale = 0.31228156;
in_t->AddQuantParam(*in_quant_arg);
inputs_->push_back(in_t);
Tensor *weight_t = new Tensor(kNumberTypeInt8, {3, 3, 3, 2}, Format_NHWC, NodeType_Parameter);
weight_t->MallocData();
int8_t weight[] = {66, 89, 98, 74, 95, 86, 125, 95, 105, 83, 116, 94, 90, 80, 86, 59, 72, 92,
64, 76, 92, 80, 90, 87, 106, 55, 105, 60, 75, 53, 81, 81, 98, 81, 86, 59,
74, 82, 97, 105, 71, 67, 79, 87, 72, 79, 80, 76, 96, 80, 83, 71, 61, 79};
memcpy(weight_t->Data(), weight, sizeof(int8_t) * weight_t->ElementsNum());
QuantArg *w_quant_arg = new QuantArg();
w_quant_arg->zeroPoint = 83, w_quant_arg->scale = 0.023649725490196;
weight_t->AddQuantParam(*w_quant_arg);
inputs_->push_back(weight_t);
Tensor *out_t = new Tensor(kNumberTypeInt8, {1, 7, 3, 2}, Format_NHWC, NodeType_Parameter);
out_t->MallocData();
QuantArg *out_quant_arg = new QuantArg();
out_quant_arg->zeroPoint = 31, out_quant_arg->scale = 0.3439215686275;
out_t->AddQuantParam(*out_quant_arg);
outputs_->push_back(out_t);
*correct = reinterpret_cast<int8_t *>(malloc(out_t->ElementsNum() * sizeof(int8_t)));
int8_t co_nchw[] = {57, 76, 49, 71, 8, 61, 57, 127, 56, 46, -11, 61, 23, 31, 34, 50, 59, 49, 78, 17, 6,
-3, -5, 23, -11, 6, -5, 33, 64, 30, 21, 18, 25, 21, -15, 0, 4, 31, 36, 2, 17, 43};
PackNCHWToNHWCInt8(co_nchw, *correct, out_t->Batch(), out_t->Width() * out_t->Height(), out_t->Channel());
conv_param->kernel_h_ = conv_param->kernel_w_ = 3;
conv_param->pad_h_ = conv_param->pad_w_ = 1;
conv_param->stride_h_ = conv_param->stride_w_ = 2;
conv_param->dilation_h_ = conv_param->dilation_w_ = 1;
return out_t->ElementsNum();
}
TEST_F(TestDeconvInt8, DeConvInt8Test1) {
std::vector<lite::tensor::Tensor *> inputs_;
std::vector<lite::tensor::Tensor *> outputs_;
auto deconv_param = new ConvParameter();
lite::Context *ctx = new lite::Context;
ctx->threadNum = 2;
int8_t *correct;
int total_size = DeConvInt8TestInit1(&inputs_, &outputs_, deconv_param, &correct);
mindspore::kernel::DeConvInt8CPUKernel *deconv =
new mindspore::kernel::DeConvInt8CPUKernel(reinterpret_cast<OpParameter *>(deconv_param), inputs_, outputs_, ctx);
deconv->Init();
deconv->Run();
CompareOutputData(reinterpret_cast<int8_t *>(outputs_[0]->Data()), correct, total_size, 3);
delete deconv_param;
// delete deconv;
for (auto t : inputs_) delete t;
for (auto t : outputs_) delete t;
free(correct);
}
} // namespace mindspore
...@@ -27,7 +27,7 @@ namespace mindspore { ...@@ -27,7 +27,7 @@ namespace mindspore {
using lite::tensor::Tensor; using lite::tensor::Tensor;
class TestFcInt8 : public mindspore::Common { class TestFcInt8 : public mindspore::Common {
public: public:
TestFcInt8(){} TestFcInt8() {}
}; };
void Quantize(float *input_data, int length, float scale, int zero_point, int8_t *output_data) { void Quantize(float *input_data, int length, float scale, int zero_point, int8_t *output_data) {
...@@ -110,8 +110,7 @@ int FcInt8TestInit(std::vector<lite::tensor::Tensor *> *inputs_, std::vector<lit ...@@ -110,8 +110,7 @@ int FcInt8TestInit(std::vector<lite::tensor::Tensor *> *inputs_, std::vector<lit
matmal_param->b_transpose_ = true; matmal_param->b_transpose_ = true;
matmal_param->a_transpose_ = false; matmal_param->a_transpose_ = false;
matmal_param->has_bias_ = true; matmal_param->has_bias_ = true;
matmal_param->minf_ = -FLT_MAX; matmal_param->act_type_ = ActType_No;
matmal_param->maxf_ = FLT_MAX;
return out_t->ElementsNum(); return out_t->ElementsNum();
} }
......
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include "include/context.h"
#include "src/ir/tensor.h"
#include "common/common_test.h"
#include "src/common/file_utils.h"
#include "src/runtime/kernel/arm/opclib/pad_parameter.h"
#include "src/runtime/kernel/arm/int8/pad_int8.h"
namespace mindspore {
using mindspore::lite::tensor::QuantArg;
using mindspore::lite::tensor::Tensor;
class TestPadInt8 : public mindspore::Common {
public:
TestPadInt8() {}
};
int PadInt8TestInit1(std::vector<Tensor *> *inputs_, std::vector<Tensor *> *outputs_, PadParameter *pad_param,
int8_t **correct) {
Tensor *in_t = new Tensor(kNumberTypeInt8, {3}, schema::Format_NHWC, schema::NodeType_Parameter);
in_t->MallocData();
int8_t in[] = {1, 1, 1};
memcpy(in_t->Data(), in, sizeof(int8_t) * in_t->ElementsNum());
QuantArg *in_quant_arg = new QuantArg();
in_quant_arg->zeroPoint = 10, in_quant_arg->scale = 0.31228156;
in_t->AddQuantParam(*in_quant_arg);
inputs_->push_back(in_t);
Tensor *out_t = new Tensor(kNumberTypeInt8, {7}, schema::Format_NHWC, schema::NodeType_Parameter);
out_t->MallocData();
QuantArg *out_quant_arg = new QuantArg();
out_quant_arg->zeroPoint = 10, out_quant_arg->scale = 0.31228156;
out_t->AddQuantParam(*out_quant_arg);
outputs_->push_back(out_t);
*correct = reinterpret_cast<int8_t *>(malloc(out_t->ElementsNum() * sizeof(int8_t)));
int8_t co[] = {10, 10, 1, 1, 1, 10, 10};
memcpy(*correct, co, out_t->ElementsNum() * sizeof(int8_t));
int padding[] = {0, 0, 0, 0, 0, 0, 2, 2};
memcpy(pad_param->paddings_, padding, MAX_PAD_SIZE * sizeof(int));
pad_param->constant_value_ = 0;
return out_t->ElementsNum();
}
TEST_F(TestPadInt8, PadInt8Test1) {
std::vector<lite::tensor::Tensor *> inputs_;
std::vector<lite::tensor::Tensor *> outputs_;
auto pad_param = new PadParameter();
lite::Context *ctx = new lite::Context;
int8_t *correct;
int total_size = PadInt8TestInit1(&inputs_, &outputs_, pad_param, &correct);
kernel::PadInt8CPUKernel *pad =
new kernel::PadInt8CPUKernel(reinterpret_cast<OpParameter *>(pad_param), inputs_, outputs_, ctx);
pad->Init();
pad->Run();
CompareOutputData(reinterpret_cast<int8_t *>(outputs_[0]->Data()), correct, total_size, 0);
delete pad_param;
delete pad;
for (auto t : inputs_) delete t;
for (auto t : outputs_) delete t;
free(correct);
}
int PadInt8TestInit2(std::vector<Tensor *> *inputs_, std::vector<Tensor *> *outputs_, PadParameter *pad_param,
int8_t **correct) {
Tensor *in_t = new Tensor(kNumberTypeInt8, {6, 2}, schema::Format_NHWC, schema::NodeType_Parameter);
in_t->MallocData();
int8_t in[] = {18, 71, 99, -6, 5, -119, 86, 13, 15, -85, -41, -77};
memcpy(in_t->Data(), in, sizeof(int8_t) * in_t->ElementsNum());
QuantArg *in_quant_arg = new QuantArg();
in_quant_arg->zeroPoint = 10, in_quant_arg->scale = 0.31228156;
in_t->AddQuantParam(*in_quant_arg);
inputs_->push_back(in_t);
Tensor *out_t = new Tensor(kNumberTypeInt8, {10, 5}, schema::Format_NHWC, schema::NodeType_Parameter);
out_t->MallocData();
QuantArg *out_quant_arg = new QuantArg();
out_quant_arg->zeroPoint = 10, out_quant_arg->scale = 0.31228156;
out_t->AddQuantParam(*out_quant_arg);
outputs_->push_back(out_t);
*correct = reinterpret_cast<int8_t *>(malloc(out_t->ElementsNum() * sizeof(int8_t)));
int8_t co[] = {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 18,
71, 10, 10, 10, 99, -6, 10, 10, 10, 5, -119, 10, 10, 10, 86, 13, 10,
10, 10, 15, -85, 10, 10, 10, -41, -77, 10, 10, 10, 10, 10, 10, 10};
memcpy(*correct, co, out_t->ElementsNum() * sizeof(int8_t));
int padding[] = {0, 0, 0, 0, 3, 1, 1, 2};
memcpy(pad_param->paddings_, padding, MAX_PAD_SIZE * sizeof(int));
pad_param->constant_value_ = 0;
return out_t->ElementsNum();
}
TEST_F(TestPadInt8, PadInt8Test2) {
std::vector<lite::tensor::Tensor *> inputs_;
std::vector<lite::tensor::Tensor *> outputs_;
auto pad_param = new PadParameter();
lite::Context *ctx = new lite::Context;
int8_t *correct;
int total_size = PadInt8TestInit2(&inputs_, &outputs_, pad_param, &correct);
kernel::PadInt8CPUKernel *pad =
new kernel::PadInt8CPUKernel(reinterpret_cast<OpParameter *>(pad_param), inputs_, outputs_, ctx);
pad->Init();
pad->Run();
CompareOutputData(reinterpret_cast<int8_t *>(outputs_[0]->Data()), correct, total_size, 0);
delete pad_param;
delete pad;
for (auto t : inputs_) delete t;
for (auto t : outputs_) delete t;
free(correct);
}
int PadInt8TestInit4(std::vector<Tensor *> *inputs_, std::vector<Tensor *> *outputs_, PadParameter *pad_param,
int8_t **correct) {
Tensor *in_t = new Tensor(kNumberTypeInt8, {2, 3, 2, 1}, schema::Format_NHWC, schema::NodeType_Parameter);
in_t->MallocData();
int8_t in[] = {73, 24, 7, -31, -109, -2, 69, -64, 51, -45, 38, 53};
memcpy(in_t->Data(), in, sizeof(int8_t) * in_t->ElementsNum());
QuantArg *in_quant_arg = new QuantArg();
in_quant_arg->zeroPoint = 10, in_quant_arg->scale = 0.31228156;
in_t->AddQuantParam(*in_quant_arg);
inputs_->push_back(in_t);
Tensor *out_t = new Tensor(kNumberTypeInt8, {6, 6, 4, 3}, schema::Format_NHWC, schema::NodeType_Parameter);
out_t->MallocData();
QuantArg *out_quant_arg = new QuantArg();
out_quant_arg->zeroPoint = 10, out_quant_arg->scale = 0.31228156;
out_t->AddQuantParam(*out_quant_arg);
outputs_->push_back(out_t);
*correct = reinterpret_cast<int8_t *>(malloc(out_t->ElementsNum() * sizeof(int8_t)));
int8_t co[] = {
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 73, 10, 10, 24, 10, 10, 10, 10,
10, 10, 10, 10, 7, 10, 10, -31, 10, 10, 10, 10, 10, 10, 10, 10, -109, 10, 10, -2, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 69, 10, 10, -64, 10, 10, 10, 10, 10, 10, 10, 10, 51, 10, 10, -45, 10,
10, 10, 10, 10, 10, 10, 10, 38, 10, 10, 53, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10};
memcpy(*correct, co, out_t->ElementsNum() * sizeof(int8_t));
int padding[] = {3, 1, 1, 2, 2, 0, 1, 1};
memcpy(pad_param->paddings_, padding, MAX_PAD_SIZE * sizeof(int));
pad_param->constant_value_ = 0;
return out_t->ElementsNum();
}
TEST_F(TestPadInt8, PadInt8TestInit4) {
std::vector<lite::tensor::Tensor *> inputs_;
std::vector<lite::tensor::Tensor *> outputs_;
auto pad_param = new PadParameter();
lite::Context *ctx = new lite::Context;
int8_t *correct;
int total_size = PadInt8TestInit2(&inputs_, &outputs_, pad_param, &correct);
kernel::PadInt8CPUKernel *pad =
new kernel::PadInt8CPUKernel(reinterpret_cast<OpParameter *>(pad_param), inputs_, outputs_, ctx);
pad->Init();
pad->Run();
CompareOutputData(reinterpret_cast<int8_t *>(outputs_[0]->Data()), correct, total_size, 0);
delete pad_param;
delete pad;
for (auto t : inputs_) delete t;
for (auto t : outputs_) delete t;
free(correct);
}
} // namespace mindspore
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册