From cb752a690cef4ab49ffd183016817b6c3336f6d7 Mon Sep 17 00:00:00 2001 From: songhonglei413 Date: Mon, 17 Aug 2020 21:15:09 +0800 Subject: [PATCH] modify op_roi_pooling --- .../runtime/kernel/arm/fp32/roi_pooling.cc | 36 +++++-- .../src/runtime/kernel/arm/fp32/roi_pooling.h | 4 - .../kernel/arm/nnacl/fp32/roi_pooling.c | 96 ++++++++++--------- .../kernel/arm/nnacl/fp32/roi_pooling.h | 17 +++- .../kernel/arm/fp32/roi_pooling_fp32_tests.cc | 8 +- 5 files changed, 97 insertions(+), 64 deletions(-) diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.cc index 5848cca8d..fd8cda5b8 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.cc @@ -14,6 +14,7 @@ * limitations under the License. */ #include "src/runtime/kernel/arm/fp32/roi_pooling.h" +#include "src/runtime/kernel/arm/nnacl/fp32/roi_pooling.h" #include #include "schema/model_generated.h" #include "src/kernel_registry.h" @@ -35,10 +36,35 @@ int ROIPoolingCPUKernel::Init() { return ReSize(); } -int ROIPoolingCPUKernel::ReSize() { return RET_OK; } +int ROIPoolingCPUKernel::ReSize() { + auto in_shape = in_tensors_.front()->shape(); + auto out_shape = out_tensors_.front()->shape(); + int ndims = in_shape.size(); + if (ndims > 4) { + MS_LOG(ERROR) << "ROIPooling ReSzie error ,shape dim greater than 4!"; + return RET_ERROR; + } + param_->ndim_ = ndims; + param_->input_n_ = in_shape[0]; + param_->input_h_ = in_shape[1]; + param_->input_w_ = in_shape[2]; + param_->input_c_ = in_shape[3]; + param_->output_n_ = out_shape[0]; + param_->output_h_ = out_shape[1]; + param_->output_w_ = out_shape[2]; + param_->output_c_ = out_shape[3]; + param_->in_strides_[ndims - 1] = 1; + param_->out_strides_[ndims - 1] = 1; + for (int i = ndims - 2; i >= 0; --i) { + param_->in_strides_[i] = in_shape[i + 1] * param_->in_strides_[i + 1]; + param_->out_strides_[i] = out_shape[i + 1] * param_->out_strides_[i + 1]; + } + param_->thread_num_ = MSMIN(param_->op_parameter_.thread_num_, out_shape[0]); + return RET_OK; +} int ROIPoolingCPUKernel::DoExecute(int task_id) { - auto ret = ROIPooling(in_ptr_, out_ptr_, roi_ptr_, in_shape_, out_shape_, dim_, task_id, param_); + auto ret = ROIPooling(in_ptr_, out_ptr_, roi_ptr_, task_id, param_); if (ret != RET_OK) { MS_LOG(ERROR) << "ROIPooling Execute error task_id[" << task_id << "] error_code[" << ret << "]"; return ret; @@ -65,11 +91,7 @@ int ROIPoolingCPUKernel::Run() { in_ptr_ = reinterpret_cast(in_tensors_.front()->Data()); out_ptr_ = reinterpret_cast(out_tensors_.front()->Data()); roi_ptr_ = reinterpret_cast(in_tensors_.at(1)->Data()); - in_shape_ = reinterpret_cast(in_tensors_.front()->shape().data()); - out_shape_ = reinterpret_cast(out_tensors_.front()->shape().data()); - dim_ = in_tensors_.front()->shape().size(); - thread_count_ = 1; - ret = LiteBackendParallelLaunch(ROIPoolingRun, this, thread_count_); + ret = LiteBackendParallelLaunch(ROIPoolingRun, this, param_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "ROIPooling error: error_code[" << ret << "]"; return ret; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.h b/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.h index ba01a8ad4..3bcfe5a24 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.h @@ -40,11 +40,7 @@ class ROIPoolingCPUKernel : public LiteKernel { float *in_ptr_; float *out_ptr_; float *roi_ptr_; - const int *in_shape_; - const int *out_shape_; ROIPoolingParameter *param_; - int dim_; - int thread_count_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/roi_pooling.c b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/roi_pooling.c index 12a1ba7ff..06a656a7d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/roi_pooling.c +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/roi_pooling.c @@ -16,29 +16,31 @@ #include "nnacl/fp32/roi_pooling.h" #include +#include #include "nnacl/errorcode.h" +#include "nnacl/op_base.h" -int ROIPooling(float *in_ptr, float *out_ptr, float *roi, const int *in_shape, const int *out_shape, int dim, int tid, - ROIPoolingParameter *param) { - int num_rois = out_shape[kNHWC_N]; - int batch_size = in_shape[kNHWC_N]; - int height_ = in_shape[kNHWC_H]; - int width_ = in_shape[kNHWC_W]; - int channels_ = in_shape[kNHWC_C]; +int ROIPooling(float *in_ptr, float *out_ptr, float *roi, int tid, ROIPoolingParameter *param) { + int num_rois = param->output_n_; + int units = UP_DIV(num_rois, param->thread_num_); + int roi_st = tid * units; + int roi_end = MSMIN(num_rois, roi_st + units); + if (roi_st >= num_rois) { + return NNACL_OK; + } + int batch_size = param->input_n_; + int height_ = param->input_h_; + int width_ = param->input_w_; + int channels_ = param->input_c_; int scale = param->scale_; int pooled_height = param->pooledH_; int pooled_width = param->pooledW_; - int in_stride[DIMENSION_4D]; - int out_stride[DIMENSION_4D]; - const int roi_stride = 5; - in_stride[DIMENSION_4D - 1] = 1; - out_stride[DIMENSION_4D - 1] = 1; - for (int i = dim - 2; i >= 0; --i) { - in_stride[i] = in_stride[i + 1] * in_shape[i + 1]; - out_stride[i] = out_stride[i + 1] * out_shape[i + 1]; - } - int roi_ind_st = 0; - for (int i = 0; i < num_rois; ++i) { + int *in_strides = &(param->in_strides_); + int *out_strides = &(param->out_strides_); + int roi_stride = 5; + int roi_ind_st = roi_st * roi_stride; + float *max_c = malloc(channels_ * sizeof(float)); + for (int i = roi_st; i < roi_end; ++i) { int roi_batch_ind = (int)roi[roi_ind_st]; // batch_index if (roi_batch_ind >= batch_size) { return NNACL_ERRCODE_INDEX_OUT_OF_RANGE; @@ -53,44 +55,46 @@ int ROIPooling(float *in_ptr, float *out_ptr, float *roi, const int *in_shape, c float bin_size_h = (float)roi_height / (float)pooled_height; float bin_size_w = (float)roi_width / (float)pooled_width; - float *batch_data = in_ptr + in_stride[kNHWC_N] * roi_batch_ind; - - int out_ind = i * out_stride[0]; - for (int c = kNHWC_N; c < channels_; ++c) { - float max_v = -__FLT_MAX__; - for (int ph = 0; ph < pooled_height; ++ph) { - for (int pw = 0; pw < pooled_width; ++pw) { - int pooled_index = - i * out_stride[kNHWC_N] + ph * out_stride[kNHWC_H] + pw * out_stride[kNHWC_W] + c * out_stride[kNHWC_C]; - int hstart = (int)floorf(ph * bin_size_h); // block xi_1 - int wstart = (int)floorf(pw * bin_size_w); // block yi_1 - int hend = (int)ceilf((ph + 1) * bin_size_h); // block xi_2 - int wend = (int)ceilf((pw + 1) * bin_size_w); // block yi_2 + float *batch_data = in_ptr + in_strides[kNHWC_N] * roi_batch_ind; - hstart = MSMIN(MSMAX(hstart + roi_start_h, 0), height_); - hend = MSMIN(MSMAX(hend + roi_start_h, 0), height_); - wstart = MSMIN(MSMAX(wstart + roi_start_w, 0), width_); - wend = MSMIN(MSMAX(wend + roi_start_w, 0), width_); + int out_ind = i * out_strides[0]; + for (int ph = 0; ph < pooled_height; ++ph) { + for (int pw = 0; pw < pooled_width; ++pw) { + int hstart = (int)floorf(ph * bin_size_h); // block xi_1 + int wstart = (int)floorf(pw * bin_size_w); // block yi_1 + int hend = (int)ceilf((ph + 1) * bin_size_h); // block xi_2 + int wend = (int)ceilf((pw + 1) * bin_size_w); // block yi_2 + hstart = MSMIN(MSMAX(hstart + roi_start_h, 0), height_); + hend = MSMIN(MSMAX(hend + roi_start_h, 0), height_); + wstart = MSMIN(MSMAX(wstart + roi_start_w, 0), width_); + wend = MSMIN(MSMAX(wend + roi_start_w, 0), width_); + for (int j = 0; j < channels_; ++j) { + max_c[j] = -__FLT_MAX__; bool is_empty = (hend <= hstart) || (wend <= wstart); if (is_empty) { - max_v = 0; + max_c[j] = 0; } - int bd_index = c * in_stride[kNHWC_C] + hstart * in_stride[kNHWC_H]; - for (int h = hstart; h < hend; ++h) { - int wi = bd_index + wstart * in_stride[kNHWC_W]; - for (int w = wstart; w < wend; ++w) { - max_v = MSMAX(batch_data[wi], max_v); - // printf("bd:index: %d, data: %f, max_v: %f\n",wi,batch_data[wi],max_v); - wi += in_stride[kNHWC_W]; + } + int pooled_index = i * out_strides[0] + ph * out_strides[1] + pw * out_strides[2]; + int bd_index = hstart * in_strides[1]; + for (int h = hstart; h < hend; ++h) { + int wi = bd_index + wstart * in_strides[2]; + for (int w = wstart; w < wend; ++w) { + for (int c = 0; c < channels_; ++c) { + max_c[c] = MSMAX(batch_data[wi + c], max_c[c]); } - bd_index += in_stride[kNHWC_H]; - } - out_ptr[pooled_index] = max_v; + wi += in_strides[2]; + } // in_w end; + bd_index += in_strides[1]; + } // in_h end + for (int j = 0; j < channels_; ++j) { + out_ptr[pooled_index + j] = max_c[j]; } } } roi_ind_st += roi_stride; } + free(max_c); return NNACL_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/roi_pooling.h b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/roi_pooling.h index a90ff3d51..39181fcc8 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/roi_pooling.h +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/roi_pooling.h @@ -20,16 +20,27 @@ typedef struct ROIPoolingParameter { OpParameter op_parameter_; + int in_strides_[DIMENSION_4D]; + int out_strides_[DIMENSION_4D]; + float scale_; + int ndim_; + int input_w_; + int input_h_; + int input_n_; + int input_c_; + int output_w_; + int output_h_; + int output_n_; + int output_c_; + int thread_num_; int pooledW_; int pooledH_; - float scale_; } ROIPoolingParameter; #ifdef __cplusplus extern "C" { #endif -int ROIPooling(float *in_ptr, float *out_ptr, float *roi, const int *in_shape, const int *out_shape, int dim, int tid, - ROIPoolingParameter *param); +int ROIPooling(float *in_ptr, float *out_ptr, float *roi, int tid, ROIPoolingParameter *param); #ifdef __cplusplus } #endif diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/roi_pooling_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/roi_pooling_fp32_tests.cc index 9de4d15d0..44256f12c 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/roi_pooling_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/roi_pooling_fp32_tests.cc @@ -57,10 +57,10 @@ TEST_F(TestROIPoolingFp32, Simple) { param->pooledH_ = 2; float a[] = {1, 2, 3, 4, 5, 11, 12, 13, 14, 15, 21, 22, 23, 24, 25, 31, 32, 33, 34, 35, 1, 2, 3, 4, 5, 11, 12, 13, 14, 15, 21, 22, 23, 24, 25, 31, 32, 33, 34, 35}; - float b[] = {0, 1, 1, 3, 4, 1, 1, 1, 3, 4}; - std::vector a_shape = {2, 4, 5, 1}; + float b[] = {0, 1, 1, 3, 4}; + std::vector a_shape = {1, 4, 5, 2}; std::vector b_shape = {2, 5}; - std::vector c_shape = {2, 2, 2, 1}; + std::vector c_shape = {1, 2, 2, 2}; int total_size = ROIPoolingTestInit(&inputs_, &outputs_, a, b, a_shape, b_shape, c_shape); auto ctx = new lite::Context; ctx->thread_num_ = 3; @@ -68,7 +68,7 @@ TEST_F(TestROIPoolingFp32, Simple) { new kernel::ROIPoolingCPUKernel(reinterpret_cast(param), inputs_, outputs_, ctx, nullptr); op->Init(); op->Run(); - float correct[] = {23, 25, 33, 35, 23, 25, 33, 35}; + float correct[] = {25, 31, 34, 35, 25, 31, 34, 35}; float *output = reinterpret_cast(outputs_[0]->Data()); for (int i = 0; i < 8; ++i) printf("%f ", output[i]); printf("\n"); -- GitLab