diff --git a/mindspore/lite/src/ops/deconv2d.cc b/mindspore/lite/src/ops/deconv2d.cc index ee77a455777f0f2d3ede303316fb409514fcf8e8..f50bf63297ec74e21696f6bc354355555d8b8493 100644 --- a/mindspore/lite/src/ops/deconv2d.cc +++ b/mindspore/lite/src/ops/deconv2d.cc @@ -124,6 +124,20 @@ int DeConv2D::InferShape(std::vector inputs_, std::vecto } std::vector out_shape = {output_n, output_h, output_w, output_c}; output->set_shape(out_shape); + + if (pad_mode == schema::PadMode_SAME) { + pad_h_ = ((input_h - 1) * stride_h + (kernel_h - 1) * dilate_h + 1 - output_h) / 2; + pad_w_ = ((input_w - 1) * stride_w + (kernel_w - 1) * dilate_w + 1 - output_w) / 2; + } else if (pad_mode == schema::PadMode_VALID) { + pad_h_ = 0; + pad_w_ = 0; + } else if (pad_mode == schema::PadMode_CAFFE) { + pad_h_ = pad_u_; + pad_w_ = pad_l_; + } else { + MS_LOG(ERROR) << "unsupported pad mode for deconv"; + } + return 0; } } // namespace lite diff --git a/mindspore/lite/src/ops/deconv2d.h b/mindspore/lite/src/ops/deconv2d.h index 89b7e9fccc5322b311c25b3954d47f1600eba412..ab20b156ba14912f99f1e6f95539279d35b806bf 100644 --- a/mindspore/lite/src/ops/deconv2d.h +++ b/mindspore/lite/src/ops/deconv2d.h @@ -74,12 +74,16 @@ class DeConv2D : public PrimitiveC { int PadDown() const { return this->pad_d_; } int PadLeft() const { return this->pad_l_; } int PadRight() const { return this->pad_r_; } + int PadH() const { return this->pad_h_; } + int PadW() const { return this->pad_w_; } protected: int pad_u_ = 0; int pad_d_ = 0; int pad_l_ = 0; int pad_r_ = 0; + int pad_h_ = 0; + int pad_w_ = 0; }; } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/src/populate_parameter.cc b/mindspore/lite/src/populate_parameter.cc index bf524d7a29845a1df792a16e60a08fd37540d30d..98da74c0b99d3e2549b4ebb57c451221235ff658 100644 --- a/mindspore/lite/src/populate_parameter.cc +++ b/mindspore/lite/src/populate_parameter.cc @@ -506,6 +506,8 @@ OpParameter *PopulateDeconvParameter(const mindspore::lite::PrimitiveC *primitiv conv_param->pad_d_ = deconv_lite_primitive->PadDown(); conv_param->pad_l_ = deconv_lite_primitive->PadLeft(); conv_param->pad_r_ = deconv_lite_primitive->PadRight(); + conv_param->pad_h_ = deconv_lite_primitive->PadH(); + conv_param->pad_w_ = deconv_lite_primitive->PadW(); conv_param->dilation_h_ = conv_primitive->GetDilateH(); conv_param->dilation_w_ = conv_primitive->GetDilateW(); auto act_type = conv_primitive->GetActivationType(); @@ -523,26 +525,6 @@ OpParameter *PopulateDeconvParameter(const mindspore::lite::PrimitiveC *primitiv conv_param->is_relu6_ = false; break; } - - auto pad_mode = conv_primitive->GetPadMode(); - switch (pad_mode) { - case schema::PadMode_SAME: - conv_param->pad_h_ = (conv_param->kernel_h_ - 1) / 2; - conv_param->pad_w_ = (conv_param->kernel_w_ - 1) / 2; - break; - case schema::PadMode_VALID: - conv_param->pad_h_ = 0; - conv_param->pad_w_ = 0; - break; - case schema::PadMode_CAFFE: - conv_param->pad_h_ = conv_param->pad_u_; - conv_param->pad_w_ = conv_param->pad_l_; - break; - default: - MS_LOG(ERROR) << "invalid pad mode!"; - return nullptr; - } - return reinterpret_cast(conv_param); } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc index 1bd9c81f3637be42d9dd16e2cd01ee24b759e5b0..ceca388d17a186d26a7b5173af5f3cad98ee4df0 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc @@ -70,6 +70,15 @@ int Convolution1x1FP16CPUKernel::InitConv1x1Param() { return RET_MEMORY_FAILED; } memset(pack_input_, 0, matmul_param_->row_16_ * matmul_param_->deep_ * sizeof(float16_t)); + + if (pre_trans_input_) { + input_ptr_ = reinterpret_cast(malloc(matmul_param_->row_ * matmul_param_->deep_ * sizeof(float16_t))); + if (input_ptr_ == nullptr) { + MS_LOG(ERROR) << "Conv1x1 Malloc input_ptr_ error!"; + return RET_MEMORY_FAILED; + } + memset(input_ptr_, 0, matmul_param_->row_ * matmul_param_->deep_ * sizeof(float16_t)); + } return RET_OK; } @@ -131,6 +140,10 @@ void Convolution1x1FP16CPUKernel::FreeTmpBuffer() { free(pack_input_); pack_input_ = nullptr; } + if (pre_trans_input_ && input_ptr_ != nullptr) { + free(input_ptr_); + input_ptr_ = nullptr; + } return; } @@ -205,15 +218,6 @@ int Convolution1x1FP16CPUKernel::Run() { return ret; } - if (pre_trans_input_) { - input_ptr_ = reinterpret_cast( - ctx_->allocator->Malloc(matmul_param_->row_ * matmul_param_->deep_ * sizeof(float16_t))); - if (input_ptr_ == nullptr) { - MS_LOG(ERROR) << "Conv1x1 Malloc input_ptr_ error!"; - return RET_MEMORY_FAILED; - } - } - for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { Pre1x1Trans( execute_input_ + batch_index * conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_channel_, @@ -229,10 +233,6 @@ int Convolution1x1FP16CPUKernel::Run() { ConvolutionBaseFP16CPUKernel::IfCastOutput(); ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); - if (pre_trans_input_ && input_ptr_ != nullptr) { - ctx_->allocator->Free(input_ptr_); - input_ptr_ = nullptr; - } return RET_OK; } } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc index d3134a342d1bf3b1839c9f9cd6d6a9c7b4689770..c88a6423bdb830ebdab63ffe08e9eccf54935609 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc @@ -29,15 +29,15 @@ MatmulCPUKernel::~MatmulCPUKernel() { FreeTmpBuffer(); } void MatmulCPUKernel::FreeTmpBuffer() { if (a_c12_ptr_ != nullptr) { - ctx_->allocator->Free(a_c12_ptr_); + free(a_c12_ptr_); a_c12_ptr_ = nullptr; } if (b_r8_ptr_ != nullptr) { - ctx_->allocator->Free(b_r8_ptr_); + free(b_r8_ptr_); b_r8_ptr_ = nullptr; } if (bias_ptr_ != nullptr) { - ctx_->allocator->Free(bias_ptr_); + free(bias_ptr_); bias_ptr_ = nullptr; } } @@ -67,23 +67,28 @@ int MatmulCPUKernel::ReSize() { thread_count_ = MSMIN(thread_count_, UP_DIV(params_->col_8_, 8)); thread_stride_ = UP_DIV(UP_DIV(params_->col_8_, 8), thread_count_); - a_c12_ptr_ = reinterpret_cast(ctx_->allocator->Malloc(params_->row_12_ * params_->deep_ * sizeof(float))); + a_c12_ptr_ = reinterpret_cast(malloc(params_->batch * params_->row_12_ * params_->deep_ * sizeof(float))); if (a_c12_ptr_ == nullptr) { FreeTmpBuffer(); return RET_MEMORY_FAILED; } memset(a_c12_ptr_, 0, params_->row_12_ * params_->deep_ * sizeof(float)); - b_r8_ptr_ = reinterpret_cast(ctx_->allocator->Malloc(params_->col_8_ * params_->deep_ * sizeof(float))); + + b_r8_ptr_ = reinterpret_cast(malloc(params_->batch * params_->col_8_ * params_->deep_ * sizeof(float))); if (b_r8_ptr_ == nullptr) { FreeTmpBuffer(); return RET_MEMORY_FAILED; } memset(b_r8_ptr_, 0, params_->col_8_ * params_->deep_ * sizeof(float)); - params_->a_const_ = false; - params_->b_const_ = false; - InitMatrixA(reinterpret_cast(in_tensors_[0]->Data()), a_c12_ptr_); - InitMatrixB(reinterpret_cast(in_tensors_[1]->Data()), b_r8_ptr_); + params_->a_const_ = (in_tensors_[0]->Data() != nullptr); + params_->b_const_ = (in_tensors_[1]->Data() != nullptr); + if (params_->a_const_ == true) { + InitMatrixA(reinterpret_cast(in_tensors_[0]->Data()), a_c12_ptr_); + } + if (params_->b_const_ == true) { + InitMatrixB(reinterpret_cast(in_tensors_[1]->Data()), b_r8_ptr_); + } bias_ptr_ = reinterpret_cast(malloc(params_->col_8_ * sizeof(float))); if (bias_ptr_ == nullptr) { @@ -99,35 +104,27 @@ int MatmulCPUKernel::ReSize() { } void MatmulCPUKernel::InitMatrixA(float *src_ptr, float *dst_ptr) { - if (params_->a_const_ == true) { - return; - } - if (src_ptr == nullptr) { - return; - } - params_->a_const_ = true; - - if (params_->a_transpose_) { - RowMajor2Row12Major(src_ptr, dst_ptr, params_->deep_, params_->row_); - } else { - RowMajor2Col12Major(src_ptr, dst_ptr, params_->row_, params_->deep_); + for (int i = 0; i < params_->batch; i++) { + float *src = src_ptr + i * params_->deep_ * params_->row_; + float *dst = dst_ptr + i * params_->deep_ * params_->row_12_; + if (params_->a_transpose_) { + RowMajor2Row12Major(src, dst, params_->deep_, params_->row_); + } else { + RowMajor2Col12Major(src, dst, params_->row_, params_->deep_); + } } return; } void MatmulCPUKernel::InitMatrixB(float *src_ptr, float *dst_ptr) { - if (params_->b_const_ == true) { - return; - } - if (src_ptr == nullptr) { - return; - } - params_->b_const_ = true; - - if (params_->b_transpose_) { - RowMajor2Col8Major(src_ptr, dst_ptr, params_->col_, params_->deep_); - } else { - RowMajor2Row8Major(src_ptr, dst_ptr, params_->deep_, params_->col_); + for (int i = 0; i < params_->batch; i++) { + float *src = src_ptr + i * params_->deep_ * params_->col_; + float *dst = dst_ptr + i * params_->deep_ * params_->col_8_; + if (params_->b_transpose_) { + RowMajor2Col8Major(src, dst, params_->col_, params_->deep_); + } else { + RowMajor2Row8Major(src, dst, params_->deep_, params_->col_); + } } return; } @@ -144,8 +141,8 @@ int MatmulCPUKernel::RunImpl(int task_id) { if (cur_oc <= 0) { return RET_OK; } - MatMulOpt(a_c12_ptr_, b_r8_ptr_ + task_id * thread_stride_ * C8NUM * params_->deep_, - c_r_ptr_ + task_id * thread_stride_ * C8NUM, bias_ptr_ + task_id * thread_stride_ * C8NUM, ActType_No, + MatMulOpt(a_ptr_, b_ptr_ + task_id * thread_stride_ * C8NUM * params_->deep_, + c_ptr_ + task_id * thread_stride_ * C8NUM, bias_ptr_ + task_id * thread_stride_ * C8NUM, ActType_No, params_->deep_, params_->row_, cur_oc, params_->col_, OutType_Nhwc); return RET_OK; } @@ -166,20 +163,21 @@ int MatmulCPUKernel::Run() { MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; return prepare_ret; } - auto a_ptr = reinterpret_cast(in_tensors_[0]->Data()); - auto b_ptr = reinterpret_cast(in_tensors_[1]->Data()); - auto c_ptr = reinterpret_cast(out_tensors_[0]->Data()); - auto a_stride = params_->row_ * params_->deep_; - auto b_stride = params_->deep_ * params_->col_; - auto c_stride = params_->row_ * params_->col_; - for (int i = 0; i < params_->batch; ++i) { - auto cur_a_ptr = a_ptr + i * a_stride; - auto cur_b_ptr = b_ptr + i * b_stride; - c_r_ptr_ = c_ptr + i * c_stride; + auto a_src = reinterpret_cast(in_tensors_[0]->Data()); + auto b_src = reinterpret_cast(in_tensors_[1]->Data()); + auto c_src = reinterpret_cast(out_tensors_[0]->Data()); - InitMatrixA(cur_a_ptr, a_c12_ptr_); - InitMatrixB(cur_b_ptr, b_r8_ptr_); + if (params_->a_const_ == false) { + InitMatrixA(a_src, a_c12_ptr_); + } + if (params_->b_const_ == false) { + InitMatrixB(b_src, b_r8_ptr_); + } + for (int i = 0; i < params_->batch; ++i) { + a_ptr_ = a_c12_ptr_ + i * params_->row_12_ * params_->deep_; + b_ptr_ = b_r8_ptr_ + i * params_->deep_ * params_->col_8_; + c_ptr_ = c_src + i * params_->row_ * params_->col_; LiteBackendParallelLaunch(MatmulFloatRun, this, thread_count_); } return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h index 69fa2080fc42a0c14ca009ba59a69bcdeec9788d..e93e170e11661d6b14d18d0e85a36d54846f9c6a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h @@ -43,8 +43,10 @@ class MatmulCPUKernel : public MatmulBaseCPUKernel { private: float *a_c12_ptr_ = nullptr; float *b_r8_ptr_ = nullptr; - float *c_r_ptr_ = nullptr; float *bias_ptr_ = nullptr; + float *a_ptr_ = nullptr; + float *b_ptr_ = nullptr; + float *c_ptr_ = nullptr; }; } // namespace mindspore::kernel