提交 2931920f 编写于 作者: S songhonglei413

op_pooling+relu

上级 cff6c357
...@@ -171,6 +171,7 @@ void MaxPooling(const float *input_ptr, float *output_ptr, PoolingParameter *poo ...@@ -171,6 +171,7 @@ void MaxPooling(const float *input_ptr, float *output_ptr, PoolingParameter *poo
tmp_max2 = fmax(tmp_max2, *(input_ptr + in_offset + 1)); tmp_max2 = fmax(tmp_max2, *(input_ptr + in_offset + 1));
tmp_max3 = fmax(tmp_max3, *(input_ptr + in_offset + 2)); tmp_max3 = fmax(tmp_max3, *(input_ptr + in_offset + 2));
tmp_max4 = fmax(tmp_max4, *(input_ptr + in_offset + 3)); tmp_max4 = fmax(tmp_max4, *(input_ptr + in_offset + 3));
#endif #endif
} }
} // win_w loop } // win_w loop
...@@ -206,3 +207,449 @@ void MaxPooling(const float *input_ptr, float *output_ptr, PoolingParameter *poo ...@@ -206,3 +207,449 @@ void MaxPooling(const float *input_ptr, float *output_ptr, PoolingParameter *poo
} // out_plane loop } // out_plane loop
} // out_batch loop } // out_batch loop
} }
void AvgPoolingRelu(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id) {
int stride_w = pooling_param->stride_w_;
int stride_h = pooling_param->stride_h_;
int pad_w = pooling_param->pad_l_;
int pad_h = pooling_param->pad_u_;
int win_w = pooling_param->window_w_;
int win_h = pooling_param->window_h_;
int channel = pooling_param->input_channel_;
int c4 = UP_DIV(channel, C4NUM);
int in_w = pooling_param->input_w_;
int in_h = pooling_param->input_h_;
int output_w = pooling_param->output_w_;
int output_h = pooling_param->output_h_;
int output_batch = pooling_param->output_batch_;
int out_plane = output_w * output_h;
int out_tile_count = UP_DIV(out_plane, TILE_NUM);
int thread_num = pooling_param->thread_num_;
#ifdef ENABLE_NEON
float32x4_t zeros = vdupq_n_f32(0);
#endif
for (int batch = 0; batch < output_batch; batch++) {
int in_batch_offset = batch * in_h * in_w * channel;
int out_batch_offset = batch * output_h * output_w * channel;
for (int thread_id = task_id; thread_id < out_tile_count; thread_id += thread_num) {
int cal_start_index = thread_id * TILE_NUM;
int real_cal_num = (out_plane - cal_start_index) > TILE_NUM ? TILE_NUM : (out_plane - cal_start_index);
for (int i = 0; i < real_cal_num; i++) {
int index = cal_start_index + i;
int out_w_index = index % output_w;
int out_h_index = index / output_w;
int in_w_index = out_w_index * stride_w - pad_w;
int in_h_index = out_h_index * stride_h - pad_h;
int out_plane_offset = out_batch_offset + index * channel;
for (int j = 0; j < c4 - 1; j++) {
int in_channel_offset = in_batch_offset + j * C4NUM;
int out_channel_offset = out_plane_offset + j * C4NUM;
#ifdef ENABLE_NEON
float32x4_t tmp_avg = vdupq_n_f32(0);
#else
float tmp_avg1 = 0;
float tmp_avg2 = 0;
float tmp_avg3 = 0;
float tmp_avg4 = 0;
#endif
int real_count = 0;
for (int h = 0; h < win_h; h++) {
for (int w = 0; w < win_w; w++) {
if ((in_h_index + h) < 0 || (in_h_index + h) >= in_h || (in_w_index + w) < 0 ||
(in_w_index + w) >= in_w) {
continue;
} else {
int in_offset = in_channel_offset + ((in_h_index + h) * in_w + in_w_index + w) * channel;
#ifdef ENABLE_NEON
tmp_avg = vaddq_f32(tmp_avg, vld1q_f32(input_ptr + in_offset));
#else
tmp_avg1 += *(input_ptr + in_offset);
tmp_avg2 += *(input_ptr + in_offset + 1);
tmp_avg3 += *(input_ptr + in_offset + 2);
tmp_avg4 += *(input_ptr + in_offset + 3);
#endif
++real_count;
}
} // win_w loop
} // win_h loop
#ifdef ENABLE_NEON
tmp_avg = vmaxq_f32(tmp_avg, zeros);
vst1q_f32(output_ptr + out_channel_offset, tmp_avg / vdupq_n_f32(real_count));
#else
tmp_avg1 = fmax(tmp_avg1, 0);
tmp_avg2 = fmax(tmp_avg2, 0);
tmp_avg3 = fmax(tmp_avg3, 0);
tmp_avg4 = fmax(tmp_avg4, 0);
*(output_ptr + out_channel_offset) = tmp_avg1 / (float)real_count;
*(output_ptr + out_channel_offset + 1) = tmp_avg2 / (float)real_count;
*(output_ptr + out_channel_offset + 2) = tmp_avg3 / (float)real_count;
*(output_ptr + out_channel_offset + 3) = tmp_avg4 / (float)real_count;
#endif
} // ic4-1 loop
int channel_s = (c4 - 1) * C4NUM;
for (int k = channel_s; k < channel; k++) {
int in_channel_offset = in_batch_offset + k;
int out_channel_offset = out_plane_offset + k;
float tmp_avg = 0;
int real_count = 0;
for (int h = 0; h < win_h; h++) {
for (int w = 0; w < win_w; w++) {
if ((in_h_index + h) < 0 || (in_h_index + h) >= in_h || (in_w_index + w) < 0 ||
(in_w_index + w) >= in_w) {
continue;
} else {
int in_offset = in_channel_offset + ((in_h_index + h) * in_w + in_w_index + w) * channel;
tmp_avg += *(input_ptr + in_offset);
++real_count;
}
} // win_w loop
} // win_h loop
tmp_avg = fmax(tmp_avg, 0);
*(output_ptr + out_channel_offset) = tmp_avg / (float)real_count;
} // channel_res loop
} // real_cal_num loop
} // out_plane loop
} // out_batch loop
}
void MaxPoolingRelu(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id) {
int stride_w = pooling_param->stride_w_;
int stride_h = pooling_param->stride_h_;
int pad_w = pooling_param->pad_l_;
int pad_h = pooling_param->pad_u_;
int win_w = pooling_param->window_w_;
int win_h = pooling_param->window_h_;
int channel = pooling_param->input_channel_;
int in_w = pooling_param->input_w_;
int in_h = pooling_param->input_h_;
int output_w = pooling_param->output_w_;
int output_h = pooling_param->output_h_;
int output_batch = pooling_param->output_batch_;
int out_plane = output_w * output_h;
int out_tile_count = UP_DIV(out_plane, TILE_NUM);
int thread_num = pooling_param->thread_num_;
int c4 = UP_DIV(channel, C4NUM);
// input channel is equal to output channel
#ifdef ENABLE_NEON
float32x4_t zeros = vdupq_n_f32(0);
#endif
for (int batch = 0; batch < output_batch; batch++) {
int in_batch_offset = batch * in_h * in_w * channel;
int out_batch_offset = batch * output_h * output_w * channel;
for (int thread_id = task_id; thread_id < out_tile_count; thread_id += thread_num) {
int cal_start_index = thread_id * TILE_NUM;
int real_cal_num = (out_plane - cal_start_index) > TILE_NUM ? TILE_NUM : (out_plane - cal_start_index);
for (int i = 0; i < real_cal_num; i++) {
int index = cal_start_index + i;
int out_w_index = index % output_w;
int out_h_index = index / output_w;
int in_w_index = out_w_index * stride_w - pad_w;
int in_h_index = out_h_index * stride_h - pad_h;
int out_plane_offset = out_batch_offset + index * channel;
for (int j = 0; j < c4 - 1; j++) {
int in_channel_offset = in_batch_offset + j * C4NUM;
int out_channel_offset = out_plane_offset + j * C4NUM;
#ifdef ENABLE_NEON
float32x4_t tmp_max = vdupq_n_f32(-FLT_MAX);
#else
float tmp_max1 = -FLT_MAX;
float tmp_max2 = -FLT_MAX;
float tmp_max3 = -FLT_MAX;
float tmp_max4 = -FLT_MAX;
#endif
for (int h = 0; h < win_h; h++) {
for (int w = 0; w < win_w; w++) {
if ((in_h_index + h) < 0 || (in_h_index + h) >= in_h || (in_w_index + w) < 0 ||
(in_w_index + w) >= in_w) {
continue;
} else {
int in_offset = in_channel_offset + ((in_h_index + h) * in_w + in_w_index + w) * channel;
#ifdef ENABLE_NEON
tmp_max = vmaxq_f32(tmp_max, vld1q_f32(input_ptr + in_offset));
#else
tmp_max1 = fmax(tmp_max1, *(input_ptr + in_offset));
tmp_max2 = fmax(tmp_max2, *(input_ptr + in_offset + 1));
tmp_max3 = fmax(tmp_max3, *(input_ptr + in_offset + 2));
tmp_max4 = fmax(tmp_max4, *(input_ptr + in_offset + 3));
#endif
}
} // win_w loop
} // win_h loop
#ifdef ENABLE_NEON
tmp_max = vmaxq_f32(tmp_max, zeros);
vst1q_f32(output_ptr + out_channel_offset, tmp_max);
#else
// relu:
tmp_max1 = fmax(tmp_max1, 0);
tmp_max2 = fmax(tmp_max2, 0);
tmp_max3 = fmax(tmp_max3, 0);
tmp_max4 = fmax(tmp_max4, 0);
*(output_ptr + out_channel_offset) = tmp_max1;
*(output_ptr + out_channel_offset + 1) = tmp_max2;
*(output_ptr + out_channel_offset + 2) = tmp_max3;
*(output_ptr + out_channel_offset + 3) = tmp_max4;
#endif
} // ic4-1 loop
int channel_s = (c4 - 1) * C4NUM;
for (int k = channel_s; k < channel; k++) {
int in_channel_offset = in_batch_offset + k;
int out_channel_offset = out_plane_offset + k;
float tmp_max = -FLT_MAX;
for (int h = 0; h < win_h; h++) {
for (int w = 0; w < win_w; w++) {
if ((in_h_index + h) < 0 || (in_h_index + h) >= in_h || (in_w_index + w) < 0 ||
(in_w_index + w) >= in_w) {
continue;
} else {
int in_offset = in_channel_offset + ((in_h_index + h) * in_w + in_w_index + w) * channel;
tmp_max = fmax(tmp_max, *(input_ptr + in_offset));
tmp_max = fmax(tmp_max, 0);
}
} // win_w loop
} // win_h loop
*(output_ptr + out_channel_offset) = tmp_max;
} // channel_res loop
} // real_cal_num loop
} // out_plane loop
} // out_batch loop
}
void AvgPoolingRelu6(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id) {
int stride_w = pooling_param->stride_w_;
int stride_h = pooling_param->stride_h_;
int pad_w = pooling_param->pad_l_;
int pad_h = pooling_param->pad_u_;
int win_w = pooling_param->window_w_;
int win_h = pooling_param->window_h_;
int channel = pooling_param->input_channel_;
int c4 = UP_DIV(channel, C4NUM);
int in_w = pooling_param->input_w_;
int in_h = pooling_param->input_h_;
int output_w = pooling_param->output_w_;
int output_h = pooling_param->output_h_;
int output_batch = pooling_param->output_batch_;
int out_plane = output_w * output_h;
int out_tile_count = UP_DIV(out_plane, TILE_NUM);
int thread_num = pooling_param->thread_num_;
// input channel is equal to output channel
#ifdef ENABLE_NEON
float32x4_t zeros = vdupq_n_f32(0);
float32x4_t bounds = vdupq_n_f32(6);
#endif
for (int batch = 0; batch < output_batch; batch++) {
int in_batch_offset = batch * in_h * in_w * channel;
int out_batch_offset = batch * output_h * output_w * channel;
for (int thread_id = task_id; thread_id < out_tile_count; thread_id += thread_num) {
int cal_start_index = thread_id * TILE_NUM;
int real_cal_num = (out_plane - cal_start_index) > TILE_NUM ? TILE_NUM : (out_plane - cal_start_index);
for (int i = 0; i < real_cal_num; i++) {
int index = cal_start_index + i;
int out_w_index = index % output_w;
int out_h_index = index / output_w;
int in_w_index = out_w_index * stride_w - pad_w;
int in_h_index = out_h_index * stride_h - pad_h;
int out_plane_offset = out_batch_offset + index * channel;
for (int j = 0; j < c4 - 1; j++) {
int in_channel_offset = in_batch_offset + j * C4NUM;
int out_channel_offset = out_plane_offset + j * C4NUM;
#ifdef ENABLE_NEON
float32x4_t tmp_avg = vdupq_n_f32(0);
#else
float tmp_avg1 = 0;
float tmp_avg2 = 0;
float tmp_avg3 = 0;
float tmp_avg4 = 0;
#endif
int real_count = 0;
for (int h = 0; h < win_h; h++) {
for (int w = 0; w < win_w; w++) {
if ((in_h_index + h) < 0 || (in_h_index + h) >= in_h || (in_w_index + w) < 0 ||
(in_w_index + w) >= in_w) {
continue;
} else {
int in_offset = in_channel_offset + ((in_h_index + h) * in_w + in_w_index + w) * channel;
#ifdef ENABLE_NEON
tmp_avg = vaddq_f32(tmp_avg, vld1q_f32(input_ptr + in_offset));
#else
tmp_avg1 += *(input_ptr + in_offset);
tmp_avg2 += *(input_ptr + in_offset + 1);
tmp_avg3 += *(input_ptr + in_offset + 2);
tmp_avg4 += *(input_ptr + in_offset + 3);
#endif
++real_count;
}
} // win_w loop
} // win_h loop
#ifdef ENABLE_NEON
float32x4_t count = vdupq_n_f32(real_count);
tmp_avg = vdivq_f32(tmp_avg, count);
tmp_avg = vmaxq_f32(tmp_avg, zeros);
tmp_avg = vminq_f32(tmp_avg, bounds);
vst1q_f32(output_ptr + out_channel_offset, tmp_avg);
#else
tmp_avg1 /= (float)real_count;
tmp_avg2 /= (float)real_count;
tmp_avg3 /= (float)real_count;
tmp_avg4 /= (float)real_count;
tmp_avg1 = fmax(tmp_avg1, 0);
tmp_avg2 = fmax(tmp_avg2, 0);
tmp_avg3 = fmax(tmp_avg3, 0);
tmp_avg4 = fmax(tmp_avg4, 0);
tmp_avg1 = fmin(tmp_avg1, 6);
tmp_avg2 = fmin(tmp_avg2, 6);
tmp_avg3 = fmin(tmp_avg3, 6);
tmp_avg4 = fmin(tmp_avg4, 6);
*(output_ptr + out_channel_offset) = tmp_avg1;
*(output_ptr + out_channel_offset + 1) = tmp_avg2;
*(output_ptr + out_channel_offset + 2) = tmp_avg3;
*(output_ptr + out_channel_offset + 3) = tmp_avg4;
#endif
} // ic4-1 loop
int channel_s = (c4 - 1) * C4NUM;
for (int k = channel_s; k < channel; k++) {
int in_channel_offset = in_batch_offset + k;
int out_channel_offset = out_plane_offset + k;
float tmp_avg = 0;
int real_count = 0;
for (int h = 0; h < win_h; h++) {
for (int w = 0; w < win_w; w++) {
if ((in_h_index + h) < 0 || (in_h_index + h) >= in_h || (in_w_index + w) < 0 ||
(in_w_index + w) >= in_w) {
continue;
} else {
int in_offset = in_channel_offset + ((in_h_index + h) * in_w + in_w_index + w) * channel;
tmp_avg += *(input_ptr + in_offset);
++real_count;
}
} // win_w loop
} // win_h loop
tmp_avg /= (float)real_count;
tmp_avg = fmax(tmp_avg, 0);
tmp_avg = fmin(tmp_avg, 6);
*(output_ptr + out_channel_offset) = tmp_avg;
} // channel_res loop
} // real_cal_num loop
} // out_plane loop
} // out_batch loop
}
void MaxPoolingRelu6(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id) {
int stride_w = pooling_param->stride_w_;
int stride_h = pooling_param->stride_h_;
int pad_w = pooling_param->pad_l_;
int pad_h = pooling_param->pad_u_;
int win_w = pooling_param->window_w_;
int win_h = pooling_param->window_h_;
int channel = pooling_param->input_channel_;
int in_w = pooling_param->input_w_;
int in_h = pooling_param->input_h_;
int output_w = pooling_param->output_w_;
int output_h = pooling_param->output_h_;
int output_batch = pooling_param->output_batch_;
int out_plane = output_w * output_h;
int out_tile_count = UP_DIV(out_plane, TILE_NUM);
int thread_num = pooling_param->thread_num_;
int c4 = UP_DIV(channel, C4NUM);
// input channel is equal to output channel
#ifdef ENABLE_NEON
float32x4_t zeros = vdupq_n_f32(0);
float32x4_t bounds = vdupq_n_f32(6);
#endif
for (int batch = 0; batch < output_batch; batch++) {
int in_batch_offset = batch * in_h * in_w * channel;
int out_batch_offset = batch * output_h * output_w * channel;
for (int thread_id = task_id; thread_id < out_tile_count; thread_id += thread_num) {
int cal_start_index = thread_id * TILE_NUM;
int real_cal_num = (out_plane - cal_start_index) > TILE_NUM ? TILE_NUM : (out_plane - cal_start_index);
for (int i = 0; i < real_cal_num; i++) {
int index = cal_start_index + i;
int out_w_index = index % output_w;
int out_h_index = index / output_w;
int in_w_index = out_w_index * stride_w - pad_w;
int in_h_index = out_h_index * stride_h - pad_h;
int out_plane_offset = out_batch_offset + index * channel;
for (int j = 0; j < c4 - 1; j++) {
int in_channel_offset = in_batch_offset + j * C4NUM;
int out_channel_offset = out_plane_offset + j * C4NUM;
#ifdef ENABLE_NEON
float32x4_t tmp_max = vdupq_n_f32(-FLT_MAX);
#else
float tmp_max1 = -FLT_MAX;
float tmp_max2 = -FLT_MAX;
float tmp_max3 = -FLT_MAX;
float tmp_max4 = -FLT_MAX;
#endif
for (int h = 0; h < win_h; h++) {
for (int w = 0; w < win_w; w++) {
if ((in_h_index + h) < 0 || (in_h_index + h) >= in_h || (in_w_index + w) < 0 ||
(in_w_index + w) >= in_w) {
continue;
} else {
int in_offset = in_channel_offset + ((in_h_index + h) * in_w + in_w_index + w) * channel;
#ifdef ENABLE_NEON
tmp_max = vmaxq_f32(tmp_max, vld1q_f32(input_ptr + in_offset));
#else
tmp_max1 = fmax(tmp_max1, *(input_ptr + in_offset));
tmp_max2 = fmax(tmp_max2, *(input_ptr + in_offset + 1));
tmp_max3 = fmax(tmp_max3, *(input_ptr + in_offset + 2));
tmp_max4 = fmax(tmp_max4, *(input_ptr + in_offset + 3));
#endif
}
} // win_w loop
} // win_h loop
#ifdef ENABLE_NEON
tmp_max = vmaxq_f32(tmp_max, zeros);
tmp_max = vminq_f32(tmp_max, bounds);
vst1q_f32(output_ptr + out_channel_offset, tmp_max);
#else
tmp_max1 = fmax(tmp_max1, 0);
tmp_max2 = fmax(tmp_max2, 0);
tmp_max3 = fmax(tmp_max3, 0);
tmp_max4 = fmax(tmp_max4, 0);
tmp_max1 = fmin(tmp_max1, 6);
tmp_max2 = fmin(tmp_max2, 6);
tmp_max3 = fmin(tmp_max3, 6);
tmp_max4 = fmin(tmp_max4, 6);
*(output_ptr + out_channel_offset) = tmp_max1;
*(output_ptr + out_channel_offset + 1) = tmp_max2;
*(output_ptr + out_channel_offset + 2) = tmp_max3;
*(output_ptr + out_channel_offset + 3) = tmp_max4;
#endif
} // ic4-1 loop
int channel_s = (c4 - 1) * C4NUM;
for (int k = channel_s; k < channel; k++) {
int in_channel_offset = in_batch_offset + k;
int out_channel_offset = out_plane_offset + k;
float tmp_max = -FLT_MAX;
for (int h = 0; h < win_h; h++) {
for (int w = 0; w < win_w; w++) {
if ((in_h_index + h) < 0 || (in_h_index + h) >= in_h || (in_w_index + w) < 0 ||
(in_w_index + w) >= in_w) {
continue;
} else {
int in_offset = in_channel_offset + ((in_h_index + h) * in_w + in_w_index + w) * channel;
tmp_max = fmax(tmp_max, *(input_ptr + in_offset));
tmp_max = fmax(tmp_max, 0);
tmp_max = fmin(tmp_max, 6);
}
} // win_w loop
} // win_h loop
*(output_ptr + out_channel_offset) = tmp_max;
} // channel_res loop
} // real_cal_num loop
} // out_plane loop
} // out_batch loop
}
...@@ -30,6 +30,14 @@ extern "C" { ...@@ -30,6 +30,14 @@ extern "C" {
void AvgPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id); void AvgPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id);
void MaxPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id); void MaxPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id);
void AvgPoolingRelu(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id);
void MaxPoolingRelu(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id);
void AvgPoolingRelu6(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id);
void MaxPoolingRelu6(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
......
...@@ -44,6 +44,7 @@ typedef struct PoolingParameter { ...@@ -44,6 +44,7 @@ typedef struct PoolingParameter {
int stride_w_; int stride_w_;
int stride_h_; int stride_h_;
int thread_num_; int thread_num_;
ActType act_type_;
} PoolingParameter; } PoolingParameter;
#endif // MINDSPORE_LITE_NNACL_POOLING_PARAMETER_H_ #endif // MINDSPORE_LITE_NNACL_POOLING_PARAMETER_H_
...@@ -290,6 +290,7 @@ table Pooling { ...@@ -290,6 +290,7 @@ table Pooling {
padLeft: int; padLeft: int;
padRight: int; padRight: int;
roundMode: RoundMode; roundMode: RoundMode;
activationType: ActivationType = 0;
} }
table DepthwiseConv2D { table DepthwiseConv2D {
......
...@@ -36,6 +36,7 @@ int Pooling::GetPadDown() const { return this->primitive_->value.AsPooling()->pa ...@@ -36,6 +36,7 @@ int Pooling::GetPadDown() const { return this->primitive_->value.AsPooling()->pa
int Pooling::GetPadLeft() const { return this->primitive_->value.AsPooling()->padLeft; } int Pooling::GetPadLeft() const { return this->primitive_->value.AsPooling()->padLeft; }
int Pooling::GetPadRight() const { return this->primitive_->value.AsPooling()->padRight; } int Pooling::GetPadRight() const { return this->primitive_->value.AsPooling()->padRight; }
int Pooling::GetRoundMode() const { return this->primitive_->value.AsPooling()->roundMode; } int Pooling::GetRoundMode() const { return this->primitive_->value.AsPooling()->roundMode; }
int Pooling::GetActivationType() const { return this->primitive_->value.AsPooling()->activationType; }
void Pooling::SetFormat(int format) { this->primitive_->value.AsPooling()->format = (schema::Format)format; } void Pooling::SetFormat(int format) { this->primitive_->value.AsPooling()->format = (schema::Format)format; }
void Pooling::SetPoolingMode(int pooling_mode) { void Pooling::SetPoolingMode(int pooling_mode) {
...@@ -54,6 +55,9 @@ void Pooling::SetPadRight(int pad_right) { this->primitive_->value.AsPooling()-> ...@@ -54,6 +55,9 @@ void Pooling::SetPadRight(int pad_right) { this->primitive_->value.AsPooling()->
void Pooling::SetRoundMode(int round_mode) { void Pooling::SetRoundMode(int round_mode) {
this->primitive_->value.AsPooling()->roundMode = (schema::RoundMode)round_mode; this->primitive_->value.AsPooling()->roundMode = (schema::RoundMode)round_mode;
} }
void Pooling::SetActivationType(int activation_type) {
this->primitive_->value.AsPooling()->activationType = (schema::ActivationType)activation_type;
}
int Pooling::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { int Pooling::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
if (this->primitive_ == nullptr) { if (this->primitive_ == nullptr) {
...@@ -130,6 +134,7 @@ int Pooling::GetPadDown() const { return this->primitive_->value_as_Pooling()->p ...@@ -130,6 +134,7 @@ int Pooling::GetPadDown() const { return this->primitive_->value_as_Pooling()->p
int Pooling::GetPadLeft() const { return this->primitive_->value_as_Pooling()->padLeft(); } int Pooling::GetPadLeft() const { return this->primitive_->value_as_Pooling()->padLeft(); }
int Pooling::GetPadRight() const { return this->primitive_->value_as_Pooling()->padRight(); } int Pooling::GetPadRight() const { return this->primitive_->value_as_Pooling()->padRight(); }
int Pooling::GetRoundMode() const { return this->primitive_->value_as_Pooling()->roundMode(); } int Pooling::GetRoundMode() const { return this->primitive_->value_as_Pooling()->roundMode(); }
int Pooling::GetActivationType() const { return this->primitive_->value_as_Pooling()->activationType(); }
#endif #endif
......
...@@ -44,6 +44,7 @@ class Pooling : public PrimitiveC { ...@@ -44,6 +44,7 @@ class Pooling : public PrimitiveC {
void SetPadLeft(int pad_left); void SetPadLeft(int pad_left);
void SetPadRight(int pad_right); void SetPadRight(int pad_right);
void SetRoundMode(int round_mode); void SetRoundMode(int round_mode);
void SetActivationType(int activation_type);
#else #else
explicit Pooling(schema::Primitive *primitive) : PrimitiveC(primitive) {} explicit Pooling(schema::Primitive *primitive) : PrimitiveC(primitive) {}
#endif #endif
...@@ -61,6 +62,7 @@ class Pooling : public PrimitiveC { ...@@ -61,6 +62,7 @@ class Pooling : public PrimitiveC {
int GetPadLeft() const; int GetPadLeft() const;
int GetPadRight() const; int GetPadRight() const;
int GetRoundMode() const; int GetRoundMode() const;
int GetActivationType() const;
int PadUp() const; int PadUp() const;
int PadDown() const; int PadDown() const;
...@@ -74,7 +76,7 @@ class Pooling : public PrimitiveC { ...@@ -74,7 +76,7 @@ class Pooling : public PrimitiveC {
int pad_d_ = 0; int pad_d_ = 0;
int pad_l_ = 0; int pad_l_ = 0;
int pad_r_ = 0; int pad_r_ = 0;
}; }; // namespace lite
} // namespace lite } // namespace lite
} // namespace mindspore } // namespace mindspore
......
...@@ -314,6 +314,14 @@ OpParameter *PopulatePoolingParameter(const mindspore::lite::PrimitiveC *primiti ...@@ -314,6 +314,14 @@ OpParameter *PopulatePoolingParameter(const mindspore::lite::PrimitiveC *primiti
pooling_param->round_ceil_ = false; pooling_param->round_ceil_ = false;
break; break;
} }
if (pooling_primitive->GetActivationType() == schema::ActivationType_RELU) {
pooling_param->act_type_ = ActType_Relu;
} else if (pooling_primitive->GetActivationType() == schema::ActivationType_RELU6) {
pooling_param->act_type_ = ActType_Relu6;
} else {
pooling_param->act_type_ = ActType_No;
}
return reinterpret_cast<OpParameter *>(pooling_param); return reinterpret_cast<OpParameter *>(pooling_param);
} }
......
...@@ -53,9 +53,27 @@ int PoolingCPUKernel::RunImpl(int task_id) { ...@@ -53,9 +53,27 @@ int PoolingCPUKernel::RunImpl(int task_id) {
auto input_ptr = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->Data()); auto input_ptr = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->Data());
auto output_ptr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->Data()); auto output_ptr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->Data());
if (pooling_param_->max_pooling_) { if (pooling_param_->max_pooling_) {
MaxPooling(input_ptr, output_ptr, pooling_param_, task_id); switch (pooling_param_->act_type_) {
case ActType_Relu:
MaxPoolingRelu(input_ptr, output_ptr, pooling_param_, task_id);
break;
case ActType_Relu6:
MaxPoolingRelu6(input_ptr, output_ptr, pooling_param_, task_id);
break;
default:
MaxPooling(input_ptr, output_ptr, pooling_param_, task_id);
}
} else { } else {
AvgPooling(input_ptr, output_ptr, pooling_param_, task_id); switch (pooling_param_->act_type_) {
case ActType_Relu:
AvgPoolingRelu(input_ptr, output_ptr, pooling_param_, task_id);
break;
case ActType_Relu6:
AvgPoolingRelu6(input_ptr, output_ptr, pooling_param_, task_id);
break;
default:
AvgPooling(input_ptr, output_ptr, pooling_param_, task_id);
}
} }
return RET_OK; return RET_OK;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册