diff --git a/mindspore/lite/nnacl/fp32/matmul.c b/mindspore/lite/nnacl/fp32/matmul.c index 91c4691c5a0e72b41ed912bd52a2c4d39d73dc30..ee9dec656f4785b4a7bf1f9016f077a07fdb74e7 100644 --- a/mindspore/lite/nnacl/fp32/matmul.c +++ b/mindspore/lite/nnacl/fp32/matmul.c @@ -41,6 +41,7 @@ void RowMajor2Row12Major(float *src_ptr, float *dst_ptr, int row, int col) { } void RowMajor2Col12Major(float *src_ptr, float *dst_ptr, size_t row, size_t col) { + size_t row_up_12 = UP_ROUND(row, C12NUM); size_t row12 = row / C12NUM * C12NUM; size_t col4 = col / C4NUM * C4NUM; float *src_r = src_ptr; @@ -129,12 +130,6 @@ void RowMajor2Col12Major(float *src_ptr, float *dst_ptr, size_t row, size_t col) dst_c[i] = src_c[i * col]; } } - for (; ci < col4; ci++) { - float *dst_c = dst_r + ci * C12NUM; - for (size_t i = 0; i < C12NUM; i++) { - dst_c[i] = 0; - } - } src_r += C12NUM * col; dst_r += C12NUM * col; } @@ -147,7 +142,7 @@ void RowMajor2Col12Major(float *src_ptr, float *dst_ptr, size_t row, size_t col) dst_r += 1; } - for (; ri < row12; ri++) { + for (; ri < row_up_12; ri++) { for (size_t i = 0; i < col; i++) { dst_r[i * C12NUM] = 0; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc index 1708b11105f62e5dd02486196dcb532efc7526dd..7c0e35c706c05918ff1f9ffe933d17cf863b83c3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc @@ -116,21 +116,19 @@ int Convolution1x1FP16CPUKernel::InitWeightBias() { } int Convolution1x1FP16CPUKernel::Init() { - if (!InferShapeDone()) { - return RET_OK; - } - matmul_param_ = new (std::nothrow) MatMulParameter(); if (matmul_param_ == nullptr) { MS_LOG(ERROR) << "Init matmul_param_ failed."; return RET_ERROR; } - int ret = InitWeightBias(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init weight bias failed."; return ret; } + if (!InferShapeDone()) { + return RET_OK; + } return ReSize(); } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc index 56e1cbe4925e0c4019eaaf15226e2f2bbd2ba0d0..983cdddd1b4dcc37d4211bfa19ada2ca78cd923c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc @@ -125,15 +125,14 @@ void Convolution1x1CPUKernel::Pre1x1Trans(float *src_input, float *src_output) { } int Convolution1x1CPUKernel::Init() { - if (!InferShapeDone()) { - return RET_OK; - } - int error_code = InitConv1x1BiasWeight(); if (error_code != RET_OK) { MS_LOG(ERROR) << "Convolution base init failed."; return error_code; } + if (!InferShapeDone()) { + return RET_OK; + } return ReSize(); } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc index bc831df25ad72fec819315e958ac6953ff7b26b4..26870046df65131a6472b010b81e232fc1a11098 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc @@ -26,27 +26,17 @@ using mindspore::schema::PrimitiveType_DeConv2D; namespace mindspore::kernel { DeConvolutionCPUKernel::~DeConvolutionCPUKernel() { - FreeTmpBuffer(); if (matmul_param_ != nullptr) { delete matmul_param_; matmul_param_ = nullptr; } -} - -void DeConvolutionCPUKernel::FreeTmpBuffer() { if (weight_ptr_ != nullptr) { free(weight_ptr_); weight_ptr_ = nullptr; } - if (pack_input_ != nullptr) { - free(pack_input_); - pack_input_ = nullptr; - } - return; } int DeConvolutionCPUKernel::ReSize() { - FreeTmpBuffer(); ConvolutionBaseCPUKernel::Init(); int error_code = InitParam(); @@ -54,36 +44,35 @@ int DeConvolutionCPUKernel::ReSize() { MS_LOG(ERROR) << "deconv InitParam error!ret: " << error_code; return error_code; } - - error_code = InitWeightBias(); - if (error_code != RET_OK) { - MS_LOG(ERROR) << "deconv InitWeightBias error!ret: " << error_code; - return error_code; - } return RET_OK; } int DeConvolutionCPUKernel::InitWeightBias() { - bias_data_ = malloc(UP_ROUND(conv_param_->output_channel_, C4NUM) * sizeof(float)); + auto weight_tensor = in_tensors_.at(kWeightIndex); + auto input_channel = weight_tensor->Batch(); + auto output_channel = weight_tensor->Channel(); + auto kernel_h_ = weight_tensor->Height(); + auto kernel_w_ = weight_tensor->Width(); + + bias_data_ = malloc(UP_ROUND(output_channel, C4NUM) * sizeof(float)); if (bias_data_ == nullptr) { MS_LOG(ERROR) << "deconv malloc bias_data_ error!"; return RET_ERROR; } - memset(bias_data_, 0, UP_ROUND(conv_param_->output_channel_, C4NUM) * sizeof(float)); + memset(bias_data_, 0, UP_ROUND(output_channel, C4NUM) * sizeof(float)); if (in_tensors_.size() == 3) { - memcpy(bias_data_, in_tensors_[2]->Data(), conv_param_->output_channel_ * sizeof(float)); + memcpy(bias_data_, in_tensors_[2]->Data(), output_channel * sizeof(float)); } - size_t weight_pack_size = conv_param_->input_channel_ * conv_param_->kernel_w_ * conv_param_->kernel_h_ * - UP_ROUND(conv_param_->output_channel_, C8NUM) * sizeof(float); + size_t weight_pack_size = input_channel * kernel_w_ * kernel_h_ * UP_ROUND(output_channel, C8NUM) * sizeof(float); weight_ptr_ = reinterpret_cast(malloc(weight_pack_size)); if (weight_ptr_ == nullptr) { MS_LOG(ERROR) << "deconv malloc weight_ptr_ error!"; return RET_ERROR; } memset(weight_ptr_, 0, weight_pack_size); - PackNHWCToC8HWN8Fp32(reinterpret_cast(in_tensors_[1]->Data()), weight_ptr_, conv_param_->input_channel_, - kernel_plane_, conv_param_->output_channel_); + PackNHWCToC8HWN8Fp32(reinterpret_cast(in_tensors_[1]->Data()), weight_ptr_, input_channel, + kernel_w_ * kernel_h_, output_channel); return RET_OK; } @@ -100,12 +89,6 @@ int DeConvolutionCPUKernel::InitParam() { thread_count_ = MSMIN(op_parameter_->thread_num_, UP_DIV(conv_param_->output_channel_, C8NUM)); thread_stride_ = UP_DIV(UP_DIV(conv_param_->output_channel_, C8NUM), thread_count_); - - pack_input_ = reinterpret_cast(malloc(matmul_param_->row_12_ * matmul_param_->deep_ * sizeof(float))); - if (pack_input_ == nullptr) { - MS_LOG(ERROR) << "deconv Malloc pack_input_ error!"; - return RET_ERROR; - } return RET_OK; } @@ -138,6 +121,11 @@ int DeConvolutionCPUKernel::DoDeconv(int task_id) { } int DeConvolutionCPUKernel::Init() { + int error_code = InitWeightBias(); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "deconv InitWeightBias error!ret: " << error_code; + return error_code; + } if (!InferShapeDone()) { return RET_OK; } @@ -153,6 +141,10 @@ void DeConvolutionCPUKernel::FreeRunBuf() { ctx_->allocator->Free(tmp_buffer_); tmp_buffer_ = nullptr; } + if (pack_input_ != nullptr) { + ctx_->allocator->Free(pack_input_); + pack_input_ = nullptr; + } return; } @@ -170,6 +162,13 @@ int DeConvolutionCPUKernel::InitRunBuf() { MS_LOG(ERROR) << "Conv1x1 Malloc tmp_buffer_ error!"; return RET_NULL_PTR; } + + pack_input_ = + reinterpret_cast(ctx_->allocator->Malloc(matmul_param_->row_12_ * matmul_param_->deep_ * sizeof(float))); + if (pack_input_ == nullptr) { + MS_LOG(ERROR) << "deconv Malloc pack_input_ error!"; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h index b0a57a2c6d32c781c77384e9ad30389adfc2642b..3cbfac3869df08bb9e893e4f8b3fca588deb900d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h @@ -49,7 +49,6 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel { void FreeRunBuf(); int InitParam(); int InitWeightBias(); - void FreeTmpBuffer(); private: MatMulParameter *matmul_param_ = nullptr;