提交 d0aa719a 编写于 作者: F fuzhiye

optimize int8 pooling

上级 29070d60
......@@ -264,7 +264,8 @@ void ConvInt8(int8_t *input_data, int8_t *packed_input, int8_t *packed_weight, c
int output_tile_count = UP_DIV(output_count, tile_n);
int ic4 = UP_DIV(in_channel, C4NUM);
int kernel_plane = kernel_h * kernel_w;
int unit_size = kernel_plane * ic4 * C4NUM;
int plane_block = UP_DIV(kernel_plane, C4NUM);
int unit_size = plane_block * C4NUM * ic4 * C4NUM;
int packed_input_size = output_tile_count * tile_n * unit_size;
int input_sum_offset;
if (conv_param->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL) {
......
......@@ -32,6 +32,8 @@ void AvgPoolingOptInt8(const int8_t *input_ptr, int8_t *output_ptr, PoolingParam
void MaxPoolingInt8(const int8_t *input_ptr, int8_t *output_ptr, PoolingParameter *pooling_param, int task_id);
void MaxPoolingWithQuantInt8(const int8_t *input_ptr, int8_t *output_ptr, PoolingParameter *pooling_param, int task_id);
void MaxPoolingOptInt8(const int8_t *input_ptr, int8_t *output_ptr, PoolingParameter *pooling_param, int task_id);
#ifdef __cplusplus
}
......
......@@ -19,14 +19,16 @@
#include "nnacl/op_base.h"
#include "nnacl/quantization/quantize.h"
typedef enum PoolMode { PoolMode_No, PoolMode_MaxPool, PoolMode_AvgPool } PoolMode;
typedef enum RoundMode { RoundMode_No, RoundMode_Ceil, RoundMode_Floor } RoundMode;
typedef struct PoolingParameter {
OpParameter op_parameter_;
PoolMode pool_mode_;
RoundMode round_mode_;
ActType act_type_;
QuantArg **quant_args_;
bool global_;
bool max_pooling_;
bool avg_pooling_;
bool round_ceil_;
bool round_floor_;
int window_w_;
int window_h_;
int input_w_;
......@@ -44,7 +46,8 @@ typedef struct PoolingParameter {
int stride_w_;
int stride_h_;
int thread_num_;
ActType act_type_;
bool global_;
bool quantize_;
} PoolingParameter;
#endif // MINDSPORE_LITE_NNACL_POOLING_PARAMETER_H_
......@@ -294,32 +294,26 @@ OpParameter *PopulatePoolingParameter(const mindspore::lite::PrimitiveC *primiti
auto pool_mode = pooling_primitive->GetPoolingMode();
switch (pool_mode) {
case schema::PoolMode_MAX_POOLING:
pooling_param->max_pooling_ = true;
pooling_param->avg_pooling_ = false;
pooling_param->pool_mode_ = PoolMode_MaxPool;
break;
case schema::PoolMode_MEAN_POOLING:
pooling_param->max_pooling_ = false;
pooling_param->avg_pooling_ = true;
pooling_param->pool_mode_ = PoolMode_AvgPool;
break;
default:
pooling_param->max_pooling_ = false;
pooling_param->avg_pooling_ = false;
pooling_param->pool_mode_ = PoolMode_No;
break;
}
auto round_mode = pooling_primitive->GetRoundMode();
switch (round_mode) {
case schema::RoundMode_FLOOR:
pooling_param->round_floor_ = true;
pooling_param->round_ceil_ = false;
pooling_param->round_mode_ = RoundMode_Floor;
break;
case schema::RoundMode_CEIL:
pooling_param->round_floor_ = false;
pooling_param->round_ceil_ = true;
pooling_param->round_mode_ = RoundMode_Ceil;
break;
default:
pooling_param->round_floor_ = false;
pooling_param->round_ceil_ = false;
pooling_param->round_mode_ = RoundMode_No;
break;
}
......
......@@ -42,6 +42,12 @@ int PoolingBaseCPUKernel::SetQuantParam() {
pooling_quant_arg_[1][0].scale_ = out_quant_arg.front().scale;
pooling_quant_arg_[1][0].zp_ = out_quant_arg.front().zeroPoint;
pooling_param_->quant_args_ = pooling_quant_arg_;
if (pooling_quant_arg_[0][0].scale_ == pooling_quant_arg_[1][0].scale_ &&
pooling_quant_arg_[0][0].zp_ == pooling_quant_arg_[1][0].zp_) {
pooling_param_->quantize_ = false;
} else {
pooling_param_->quantize_ = true;
}
return RET_OK;
}
......
......@@ -53,7 +53,7 @@ int PoolingFp16CPUKernel::ReSize() {
}
int PoolingFp16CPUKernel::RunImpl(int task_id) {
if (pooling_param_->max_pooling_) {
if (pooling_param_->pool_mode_ == PoolMode_MaxPool) {
MaxPoolingFp16(fp16_input_, fp16_output_, pooling_param_, task_id);
} else {
AvgPoolingFp16(fp16_input_, fp16_output_, pooling_param_, task_id);
......
......@@ -52,7 +52,7 @@ int PoolingCPUKernel::ReSize() {
int PoolingCPUKernel::RunImpl(int task_id) {
auto input_ptr = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->Data());
auto output_ptr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->Data());
if (pooling_param_->max_pooling_) {
if (pooling_param_->pool_mode_ == PoolMode_MaxPool) {
switch (pooling_param_->act_type_) {
case ActType_Relu:
MaxPoolingRelu(input_ptr, output_ptr, pooling_param_, task_id);
......
......@@ -163,7 +163,7 @@ int PoolingGradCPUKernel::Run() {
auto input_ptr = reinterpret_cast<float *>(inputs_.at(0)->Data());
auto output_ptr = reinterpret_cast<float *>(outputs_.at(0)->Data());
if (pool_param->max_pooling_) {
if (pool_param->pool_mode_ == PoolMode_MaxPool) {
auto ind = reinterpret_cast<int *>(inputs_.at(1)->Data());
MaxPoolingGrad(input_ptr, ind, output_ptr, pool_param);
} else {
......
......@@ -61,10 +61,14 @@ int PoolingInt8CPUKernel::ReSize() {
int PoolingInt8CPUKernel::RunImpl(int task_id) {
auto input_data = reinterpret_cast<int8_t *>(in_tensors_.at(kInputIndex)->Data());
auto output_data = reinterpret_cast<int8_t *>(out_tensors_.at(kOutputIndex)->Data());
if (pooling_param_->max_pooling_) {
MaxPoolingInt8(input_data, output_data, pooling_param_, task_id);
if (pooling_param_->pool_mode_ == PoolMode_MaxPool) {
if (pooling_param_->quantize_) {
MaxPoolingWithQuantInt8(input_data, output_data, pooling_param_, task_id);
} else {
MaxPoolingOptInt8(input_data, output_data, pooling_param_, task_id);
}
} else {
AvgPoolingInt8(input_data, output_data, pooling_param_, task_id);
AvgPoolingOptInt8(input_data, output_data, pooling_param_, task_id);
}
return RET_OK;
}
......
......@@ -43,13 +43,13 @@ int PoolingOpenCLKernel::Init() {
std::string source;
std::string program_name;
#endif
if (parameter_->max_pooling_) {
if (parameter_->pool_mode_ == PoolMode_MaxPool) {
kernel_name = "MaxPooling2d";
#ifndef PROGRAM_WITH_IL
source = max_pool2d_source;
program_name = "MaxPooling2d";
#endif
} else if (parameter_->avg_pooling_) {
} else if (parameter_->pool_mode_ == PoolMode_AvgPool) {
kernel_name = "AvgPooling2d";
#ifndef PROGRAM_WITH_IL
source = avg_pool2d_source;
......
......@@ -26,7 +26,7 @@
#include "nnacl/fp32_grad/pooling_grad.h"
namespace mindspore {
class TestPoolingGradFp32 : public mindspore::CommonTest {
class TestPoolingGradFp32 : public mindspore::CommonTest {
public:
TestPoolingGradFp32() {}
};
......@@ -161,8 +161,7 @@ TEST_F(TestPoolingGradFp32, MaxPoolingGradFp32) {
auto pooling_param = new PoolingParameter();
InitPoolingParamFP32(pooling_param);
pooling_param->output_channel_ = 3;
pooling_param->avg_pooling_ = false;
pooling_param->max_pooling_ = true;
pooling_param->pool_mode_ = PoolMode_MaxPool;
// runtime part
printf("Calculating runtime cost...\n");
uint64_t time_avg = 0;
......@@ -215,8 +214,7 @@ TEST_F(TestPoolingGradFp32, MaxPoolingKernelGradFp32) {
// prepare stage
auto maxpool = new PoolingParameter();
InitPoolingParamFP32(maxpool);
maxpool->avg_pooling_ = false;
maxpool->max_pooling_ = true;
maxpool->pool_mode_ = PoolMode_MaxPool;
maxpool->input_h_ = 30;
maxpool->input_w_ = 30;
maxpool->input_channel_ = 3;
......@@ -268,8 +266,7 @@ TEST_F(TestPoolingGradFp32, MaxPoolingKernelGradFp32) {
auto pooling_param = new PoolingParameter();
InitPoolingParamFP32(pooling_param);
pooling_param->avg_pooling_ = false;
pooling_param->max_pooling_ = true;
pooling_param->pool_mode_ = PoolMode_MaxPool;
pooling_param->input_h_ = 10;
pooling_param->input_w_ = 10;
pooling_param->input_channel_ = 3;
......
......@@ -48,8 +48,7 @@ void InitAvgPoolingParam(PoolingParameter *param) {
param->pad_l_ = 0;
param->pad_r_ = 0;
param->max_pooling_ = false;
param->avg_pooling_ = true;
param->pool_mode_ = PoolMode_AvgPool;
}
TEST_F(TestAvgPoolingOpenCL, AvgPoolFp32) {
......
......@@ -35,8 +35,7 @@ void InitParameter(PoolingParameter *param) {
param->pad_d_ = 0;
param->pad_l_ = 0;
param->pad_r_ = 0;
param->avg_pooling_ = false;
param->max_pooling_ = true;
param->pool_mode_ = PoolMode_MaxPool;
}
TEST_F(TestMaxPoolingOpenCL, MaxPool_1_32_512_96) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册