diff --git a/mindspore/lite/src/populate_parameter.cc b/mindspore/lite/src/populate_parameter.cc index 3ce3900ae9566cc5422e2e7c4c38b6fb518a87ea..4ecf73e5cf99e9749327d893c7ed0d251785f847 100644 --- a/mindspore/lite/src/populate_parameter.cc +++ b/mindspore/lite/src/populate_parameter.cc @@ -40,7 +40,6 @@ #include "src/runtime/kernel/arm/nnacl/fp32/reduce.h" #include "src/runtime/kernel/arm/nnacl/fp32/activation.h" #include "src/runtime/kernel/arm/nnacl/fp32/arithmetic.h" -#include "src/runtime/kernel/arm/nnacl/fused_batchnorm.h" #include "src/runtime/kernel/arm/nnacl/fp32/batchnorm.h" #include "src/runtime/kernel/arm/nnacl/power.h" #include "src/runtime/kernel/arm/nnacl/fp32/range.h" @@ -510,15 +509,15 @@ OpParameter *PopulateActivationParameter(const lite::Primitive *primitive) { } OpParameter *PopulateFusedBatchNorm(const lite::Primitive *primitive) { - FusedBatchNormParameter *fuse_batch_norm_param = new (std::nothrow) FusedBatchNormParameter(); - if (fuse_batch_norm_param == nullptr) { + BatchNormParameter *batch_norm_param = new (std::nothrow) BatchNormParameter(); + if (batch_norm_param == nullptr) { MS_LOG(ERROR) << "new FusedBatchNormParameter failed."; return nullptr; } - fuse_batch_norm_param->op_parameter_.type_ = primitive->Type(); + batch_norm_param->op_parameter_.type_ = primitive->Type(); auto param = primitive->Value()->value_as_FusedBatchNorm(); - fuse_batch_norm_param->epsilon_ = param->epsilon(); - return reinterpret_cast(fuse_batch_norm_param); + batch_norm_param->epsilon_ = param->epsilon(); + return reinterpret_cast(batch_norm_param); } OpParameter *PopulateArithmetic(const lite::Primitive *primitive) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc index 522dd49d90f935fce3652a4abdfeabaf07c2c966..c288550e86622c4b5c8f0db6334a8110d9877224 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc @@ -28,6 +28,22 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_DepthwiseConv2D; namespace mindspore::kernel { +ConvolutionDepthwiseFp16CPUKernel::~ConvolutionDepthwiseFp16CPUKernel() { + delete sliding_; + if (packed_weight_ != nullptr) { + delete packed_weight_; + packed_weight_ = nullptr; + } + if (packed_input_ != nullptr) { + delete packed_input_; + packed_input_ = nullptr; + } + if (packed_output_ != nullptr) { + delete packed_output_; + packed_output_ = nullptr; + } +} + int ConvolutionDepthwiseFp16CPUKernel::InitBuffer() { // malloc pack input buffer int C8 = UP_DIV(conv_param_->input_channel_, C8NUM); @@ -113,8 +129,14 @@ int ConvolutionDepthwiseFp16CPUKernel::Init() { } int ConvolutionDepthwiseFp16CPUKernel::ReSize() { - free(packed_input_); - free(packed_output_); + if (packed_input_ != nullptr) { + delete packed_input_; + packed_input_ = nullptr; + } + if (packed_output_ != nullptr) { + delete packed_output_; + packed_output_ = nullptr; + } ConvolutionBaseCPUKernel::Init(); InitSlidingParam(sliding_, conv_param_, C8NUM); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h index d605ca5ba0bec17dc6d1ca46eeb5cabee2ffb3aa..de7f2cce2962988945921471b5365268d3e86e37 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h @@ -29,12 +29,7 @@ class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { const std::vector &outputs, const Context *ctx, const lite::Primitive *primitive) : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} - ~ConvolutionDepthwiseFp16CPUKernel() override { - delete sliding_; - free(packed_weight_); - free(packed_input_); - free(packed_output_); - } + ~ConvolutionDepthwiseFp16CPUKernel() override; int Init() override; int ReSize() override; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc index 0df66a963cba032cd08ec86f7ff5c7c1b8837536..96aa37c63e62c6ff5b0aa347f3f7323f4aa26e32 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc @@ -28,6 +28,22 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_DeDepthwiseConv2D; namespace mindspore::kernel { +DeconvolutionDepthwiseFp16CPUKernel::~DeconvolutionDepthwiseFp16CPUKernel() { + delete sliding_; + if (packed_weight_ != nullptr) { + delete packed_weight_; + packed_weight_ = nullptr; + } + if (packed_input_ != nullptr) { + delete packed_input_; + packed_input_ = nullptr; + } + if (packed_output_ != nullptr) { + delete packed_output_; + packed_output_ = nullptr; + } +} + int DeconvolutionDepthwiseFp16CPUKernel::InitSlideParam() { conv_param_->input_batch_ = outputs_.front()->shape().at(kNHWC_N); conv_param_->input_h_ = outputs_.front()->shape().at(kNHWC_H); @@ -126,8 +142,14 @@ int DeconvolutionDepthwiseFp16CPUKernel::Init() { } int DeconvolutionDepthwiseFp16CPUKernel::ReSize() { - free(packed_input_); - free(packed_output_); + if (packed_input_ != nullptr) { + delete packed_input_; + packed_input_ = nullptr; + } + if (packed_output_ != nullptr) { + delete packed_output_; + packed_output_ = nullptr; + } InitSlideParam(); ConvolutionBaseCPUKernel::Init(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h index 64807fa9d898ac0dde52b50ae2a6ea38cb8d1ab6..be88809971af1035f72ae5281e1e3fce98ec6217 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h @@ -29,14 +29,7 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { const std::vector &outputs, const Context *ctx, const lite::Primitive *primitive) : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} - ~DeconvolutionDepthwiseFp16CPUKernel() override { - delete sliding_; - free(packed_weight_); - if (need_align_) { - free(packed_input_); - free(packed_output_); - } - }; + ~DeconvolutionDepthwiseFp16CPUKernel() override; int Init() override; int ReSize() override; @@ -52,7 +45,6 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { float16_t *packed_weight_; float16_t *packed_input_; float16_t *packed_output_; - bool need_align_ = false; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc index 4a827a78ee61107c5965dd56981977deca0d62c3..157c4b76c1cfccb995fe87387462898afd31dfe4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc @@ -15,7 +15,6 @@ */ #include "src/runtime/kernel/arm/fp32/batchnorm.h" -#include #include "schema/model_generated.h" #include "src/kernel_registry.h" #include "include/errorcode.h" @@ -28,7 +27,42 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_BatchNorm; namespace mindspore::kernel { +BatchnormCPUKernel::~BatchnormCPUKernel() { + if (mean_addr_ != nullptr) { + free(mean_addr_); + mean_addr_ = nullptr; + } + if (var_addr_ != nullptr) { + free(var_addr_); + var_addr_ = nullptr; + } +} + +int BatchnormCPUKernel::InitConstTensor() { + auto mean = inputs_[1]; + mean_addr_ = reinterpret_cast(malloc(mean->ElementsNum() * sizeof(float))); + if (mean_addr_ == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + memcpy(mean_addr_, mean->Data(), mean->ElementsNum() * sizeof(float)); + + auto variance = inputs_[2]; + var_addr_ = reinterpret_cast(malloc(variance->ElementsNum() * sizeof(float))); + if (var_addr_ == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + memcpy(var_addr_, variance->Data(), variance->ElementsNum() * sizeof(float)); + return RET_OK; +} + int BatchnormCPUKernel::Init() { + if (context_->infer_shape_interrupt_ && !context_->running_) { + SetNeedReInit(); + return RET_OK; + } + auto input_shapes = inputs_[0]->shape(); auto n_dim = input_shapes.size(); batchnorm_param_->channel_ = input_shapes[n_dim - 1]; @@ -37,11 +71,24 @@ int BatchnormCPUKernel::Init() { batchnorm_param_->unit_ *= input_shapes[i]; } batchnorm_param_->op_parameter_.thread_num_ = - MSMIN(batchnorm_param_->op_parameter_.thread_num_, batchnorm_param_->unit_); + MSMIN(batchnorm_param_->op_parameter_.thread_num_, batchnorm_param_->channel_); + + auto ret = InitConstTensor(); + if (ret != 0) { + MS_LOG(ERROR) << "Batchnorm fp32 InitConstTensor failed."; + return RET_ERROR; + } return RET_OK; } -int BatchnormCPUKernel::ReSize() { return RET_OK; } +int BatchnormCPUKernel::ReSize() { + auto input_shapes = inputs_[0]->shape(); + batchnorm_param_->unit_ = 1; + for (int i = 0; i < input_shapes.size() - 1; i++) { + batchnorm_param_->unit_ *= input_shapes[i]; + } + return RET_OK; +} int BatchnormCPUKernel::DoExecute(int task_id) { BatchNorm(out_addr_, in_addr_, mean_addr_, var_addr_, task_id, batchnorm_param_); @@ -61,12 +108,10 @@ int BatchNormRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { int BatchnormCPUKernel::Run() { auto prepare_ret = Prepare(); if (prepare_ret != RET_OK) { - MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; + MS_LOG(ERROR) << "Prepare fail! Ret error code: " << prepare_ret; return prepare_ret; } in_addr_ = reinterpret_cast(inputs_.at(0)->Data()); - mean_addr_ = reinterpret_cast(inputs_.at(1)->Data()); - var_addr_ = reinterpret_cast(inputs_.at(2)->Data()); out_addr_ = reinterpret_cast(outputs_.at(0)->Data()); int ret = LiteBackendParallelLaunch(BatchNormRun, this, batchnorm_param_->op_parameter_.thread_num_); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h index 4ad0224511b16554a0f123053c8b210195d51f6f..28d9027cf81c311a241004e4638ea8f54ffb72c8 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h @@ -31,14 +31,14 @@ class BatchnormCPUKernel : public LiteKernel { const std::vector &outputs, const Context *ctx, const lite::Primitive *primitive) : LiteKernel(parameter, inputs, outputs, ctx, primitive) { - opParameter->thread_num_ = ctx->thread_num_; batchnorm_param_ = reinterpret_cast(parameter); } - ~BatchnormCPUKernel() override = default; + ~BatchnormCPUKernel() override; int Init() override; int ReSize() override; int Run() override; + int InitConstTensor(); int DoExecute(int tid); private: diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc index 0de33adb567a4364566dd8f026848915eb94f19b..f9ca15665d6baa6f930e7e4a7179611ca53d7cc6 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc @@ -29,6 +29,24 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_DepthwiseConv2D; namespace mindspore::kernel { +ConvolutionDepthwiseCPUKernel::~ConvolutionDepthwiseCPUKernel() { + delete sliding_; + if (packed_weight_ != nullptr) { + delete packed_weight_; + packed_weight_ = nullptr; + } + if (need_align_) { + if (packed_input_ != nullptr) { + delete packed_input_; + packed_input_ = nullptr; + } + if (packed_output_ != nullptr) { + delete packed_output_; + packed_output_ = nullptr; + } + } +} + int ConvolutionDepthwiseCPUKernel::InitWeightBias() { // init weight: o, h, w, i; o == group, i == 1 auto weight_tensor = inputs_[kWeightIndex]; @@ -114,9 +132,16 @@ int ConvolutionDepthwiseCPUKernel::Init() { int ConvolutionDepthwiseCPUKernel::ReSize() { if (need_align_) { - free(packed_input_); - free(packed_output_); + if (packed_input_ != nullptr) { + delete packed_input_; + packed_input_ = nullptr; + } + if (packed_output_ != nullptr) { + delete packed_output_; + packed_output_ = nullptr; + } } + // conv base init ConvolutionBaseCPUKernel::Init(); @@ -197,10 +222,11 @@ kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector(opParameter); // if (param->kernel_h_ == 3 && param->kernel_w_ == 3 && param->stride_h_ == 1 && param->stride_w_ == 1 && - // param->dilation_h_ == 1 && param->dilation_w_ == 1) { - // kernel = new (std::nothrow) kernel::ConvolutionDepthwise3x3CPUKernel(opParameter, inputs, outputs, ctx); + // param->dilation_h_ == 1 && param->dilation_w_ == 1) { + // kernel = new (std::nothrow) kernel::ConvolutionDepthwise3x3CPUKernel(opParameter, inputs, outputs, ctx, + // primitive); // } else { - // kernel = new (std::nothrow) kernel::ConvolutionDepthwiseCPUKernel(opParameter, inputs, outputs, ctx); + // kernel = new (std::nothrow) kernel::ConvolutionDepthwiseCPUKernel(opParameter, inputs, outputs, ctx, primitive); // } if (kernel == nullptr) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h index 08706ac050ab48daebdc19b9fc41f4d081448ad6..22de529bcab8c7515bc2b41accc0f4318d8cf09c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h @@ -29,14 +29,7 @@ class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { const std::vector &outputs, const lite::Context *ctx, const lite::Primitive *primitive) : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} - ~ConvolutionDepthwiseCPUKernel() override { - delete sliding_; - free(packed_weight_); - if (need_align_) { - free(packed_input_); - free(packed_output_); - } - }; + ~ConvolutionDepthwiseCPUKernel() override; int Init() override; int ReSize() override; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc index a60851e17860f1e0a6731737bdd0a944225eb776..77cc8ac22e333d217b297e350a60f653f061a287 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc @@ -27,6 +27,24 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_DeDepthwiseConv2D; namespace mindspore::kernel { +DeconvolutionDepthwiseCPUKernel::~DeconvolutionDepthwiseCPUKernel() { + delete sliding_; + if (packed_weight_ != nullptr) { + delete packed_weight_; + packed_weight_ = nullptr; + } + if (need_align_) { + if (packed_input_ != nullptr) { + delete packed_input_; + packed_input_ = nullptr; + } + if (packed_output_ != nullptr) { + delete packed_output_; + packed_output_ = nullptr; + } + } +} + int DeconvolutionDepthwiseCPUKernel::InitSlideParam() { conv_param_->input_batch_ = outputs_.front()->shape().at(kNHWC_N); conv_param_->input_h_ = outputs_.front()->shape().at(kNHWC_H); @@ -126,8 +144,14 @@ int DeconvolutionDepthwiseCPUKernel::Init() { int DeconvolutionDepthwiseCPUKernel::ReSize() { if (need_align_) { - free(packed_input_); - free(packed_output_); + if (packed_input_ != nullptr) { + delete packed_input_; + packed_input_ = nullptr; + } + if (packed_output_ != nullptr) { + delete packed_output_; + packed_output_ = nullptr; + } } InitSlideParam(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.h b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.h index 0ad3c18d444f1684711d5a0d6dc7d78a030bb5a9..06400a3ba169e70c93af4cdbe95a4cafae515459 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.h @@ -29,14 +29,7 @@ class DeconvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { const std::vector &outputs, const lite::Context *ctx, const lite::Primitive *primitive) : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} - ~DeconvolutionDepthwiseCPUKernel() override { - delete sliding_; - free(packed_weight_); - if (need_align_) { - free(packed_input_); - free(packed_output_); - } - }; + ~DeconvolutionDepthwiseCPUKernel() override; int Init() override; int InitSlideParam(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/flatten.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/flatten.cc index 868705746b5c6c2c05c222e5ef4fbc429b585cf4..30e1ede1ed355e62f7dc5c4286c1e5693a3c7967 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/flatten.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/flatten.cc @@ -32,6 +32,12 @@ int FlattenCPUKernel::Init() { SetNeedReInit(); return RET_OK; } + + ReSize(); + return RET_OK; +} + +int FlattenCPUKernel::ReSize() { auto output_shape = outputs_[0]->shape(); flatten_param_->size = sizeof(float); for (int i = 0; i < output_shape.size(); i++) { @@ -40,8 +46,6 @@ int FlattenCPUKernel::Init() { return RET_OK; } -int FlattenCPUKernel::ReSize() { return RET_OK; } - int FlattenCPUKernel::Run() { auto prepare_ret = Prepare(); if (prepare_ret != RET_OK) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc index 00810f89aaf825ba5c36462ddf41ae49b69fc82f..9f39c30eae70857e7789de8de88238256935a39a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc @@ -15,10 +15,10 @@ */ #include "src/runtime/kernel/arm/fp32/fused_batchnorm.h" -#include #include "schema/model_generated.h" #include "src/kernel_registry.h" #include "include/errorcode.h" +#include "src/runtime/runtime_api.h" using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::lite::KernelRegistrar; @@ -27,33 +27,121 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_FusedBatchNorm; namespace mindspore::kernel { +FusedBatchnormCPUKernel::~FusedBatchnormCPUKernel() { + if (scale_addr_ != nullptr) { + free(scale_addr_); + scale_addr_ = nullptr; + } + if (offset_addr_ != nullptr) { + free(offset_addr_); + offset_addr_ = nullptr; + } + if (mean_addr_ != nullptr) { + free(mean_addr_); + mean_addr_ = nullptr; + } + if (var_addr_ != nullptr) { + free(var_addr_); + var_addr_ = nullptr; + } +} + +int FusedBatchnormCPUKernel::InitConstTensor() { + auto scale = inputs_[1]; + scale_addr_ = reinterpret_cast(malloc(scale->ElementsNum() * sizeof(float))); + if (scale_addr_ == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + memcpy(scale_addr_, scale->Data(), scale->ElementsNum() * sizeof(float)); + + auto offset = inputs_[2]; + offset_addr_ = reinterpret_cast(malloc(offset->ElementsNum() * sizeof(float))); + if (offset_addr_ == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + memcpy(offset_addr_, offset->Data(), offset->ElementsNum() * sizeof(float)); + + auto mean = inputs_[3]; + mean_addr_ = reinterpret_cast(malloc(mean->ElementsNum() * sizeof(float))); + if (mean_addr_ == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + memcpy(mean_addr_, mean->Data(), mean->ElementsNum() * sizeof(float)); + + auto variance = inputs_[4]; + var_addr_ = reinterpret_cast(malloc(variance->ElementsNum() * sizeof(float))); + if (var_addr_ == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + memcpy(var_addr_, variance->Data(), variance->ElementsNum() * sizeof(float)); + return RET_OK; +} + int FusedBatchnormCPUKernel::Init() { if (context_->infer_shape_interrupt_ && !context_->running_) { SetNeedReInit(); return RET_OK; } - input_shape_ = reinterpret_cast(malloc(sizeof(int) * inputs_[0]->shape().size())); - memcpy(input_shape_, inputs_[0]->shape().data(), inputs_[0]->shape().size() * sizeof(int)); + auto input_shapes = inputs_[0]->shape(); + auto n_dim = input_shapes.size(); + batchnorm_param_->channel_ = input_shapes[n_dim - 1]; + batchnorm_param_->unit_ = 1; + for (int i = 0; i < n_dim - 1; i++) { + batchnorm_param_->unit_ *= input_shapes[i]; + } + batchnorm_param_->op_parameter_.thread_num_ = + MSMIN(batchnorm_param_->op_parameter_.thread_num_, batchnorm_param_->channel_); + + auto ret = InitConstTensor(); + if (ret != 0) { + MS_LOG(ERROR) << "FusedBatchnorm fp32 InitConstTensor failed."; + return RET_ERROR; + } + return RET_OK; +} + +int FusedBatchnormCPUKernel::ReSize() { + auto input_shapes = inputs_[0]->shape(); + batchnorm_param_->unit_ = 1; + for (int i = 0; i < input_shapes.size() - 1; i++) { + batchnorm_param_->unit_ *= input_shapes[i]; + } + return RET_OK; +} + +int FusedBatchnormCPUKernel::Execute(int task_id) { + FusedBatchNorm(out_addr_, in_addr_, scale_addr_, offset_addr_, mean_addr_, var_addr_, task_id, batchnorm_param_); return RET_OK; } -int FusedBatchnormCPUKernel::ReSize() { return RET_OK; } +int FusedBatchNormRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { + auto g_kernel = reinterpret_cast(cdata); + auto ret = g_kernel->Execute(task_id); + if (ret != RET_OK) { + MS_LOG(ERROR) << "FusedBatchnormRun error task_id[" << task_id << "] error_code[" << ret << "]"; + return ret; + } + return RET_OK; +} int FusedBatchnormCPUKernel::Run() { auto prepare_ret = Prepare(); if (prepare_ret != RET_OK) { - MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; + MS_LOG(ERROR) << "Prepare fail! Ret error code: " << prepare_ret; return prepare_ret; } - auto input_addr = reinterpret_cast(inputs_.at(0)->Data()); - auto scale_addr = reinterpret_cast(inputs_.at(1)->Data()); - auto offest_addr = reinterpret_cast(inputs_.at(2)->Data()); - auto mean_addr = reinterpret_cast(inputs_.at(3)->Data()); - auto variance_addr = reinterpret_cast(inputs_.at(4)->Data()); - auto output_addr = reinterpret_cast(outputs_.at(0)->Data()); + in_addr_ = reinterpret_cast(inputs_.at(0)->Data()); + out_addr_ = reinterpret_cast(outputs_.at(0)->Data()); - FusedBatchNorm(input_addr, scale_addr, offest_addr, mean_addr, variance_addr, input_shape_, - fused_batchnorm_param_->epsilon_, output_addr); + int ret = LiteBackendParallelLaunch(FusedBatchNormRun, this, batchnorm_param_->op_parameter_.thread_num_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "FusedBatchnormRun error error_code[" << ret << "]"; + return ret; + } return RET_OK; } @@ -63,8 +151,8 @@ kernel::LiteKernel *CpuFusedBatchnormKernelCreator(const std::vector #include "src/lite_kernel.h" -#include "src/runtime/kernel/arm/nnacl/fused_batchnorm.h" +#include "src/runtime/kernel/arm/nnacl/fp32/batchnorm.h" namespace mindspore::kernel { class FusedBatchnormCPUKernel : public LiteKernel { @@ -28,17 +28,26 @@ class FusedBatchnormCPUKernel : public LiteKernel { const std::vector &outputs, const lite::Context *ctx, const lite::Primitive *primitive) : LiteKernel(parameter, inputs, outputs, ctx, primitive) { - fused_batchnorm_param_ = reinterpret_cast(parameter); + batchnorm_param_ = reinterpret_cast(parameter); } - ~FusedBatchnormCPUKernel() override { delete fused_batchnorm_param_; } + ~FusedBatchnormCPUKernel() override; int Init() override; int ReSize() override; int Run() override; + int InitConstTensor(); + int Execute(int task_id); + private: - int *input_shape_{}; - FusedBatchNormParameter *fused_batchnorm_param_; + float *in_addr_; + float *mean_addr_; + float *var_addr_; + float *scale_addr_; + float *offset_addr_; + float *out_addr_; + + BatchNormParameter *batchnorm_param_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc index 69efb8a24e358cfaa1fb046f03ec354c8b962bcd..0bcf08d9d571826239ba9f91d80e84c150803e61 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc @@ -17,7 +17,6 @@ #include "src/runtime/kernel/arm/fp32/scale.h" #include #include -#include "src/runtime/kernel/arm/nnacl/scale.h" #include "schema/model_generated.h" #include "src/kernel_registry.h" #include "include/errorcode.h" @@ -29,23 +28,29 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_Scale; namespace mindspore::kernel { +ScaleCPUKernel::~ScaleCPUKernel() { FreeTmpBuffer(); } + void ScaleCPUKernel::FreeTmpBuffer() { - if (scale_ != nullptr) { - free(scale_); - scale_ = nullptr; + if (scale_param_->const_scale_) { + if (scale_ != nullptr) { + free(scale_); + scale_ = nullptr; + } } - if (offset_ != nullptr) { - free(offset_); - offset_ = nullptr; + if (scale_param_->has_offset_) { + if (offset_ != nullptr) { + free(offset_); + offset_ = nullptr; + } } } int ScaleCPUKernel::InitScaleOffset() { FreeTmpBuffer(); - auto param = reinterpret_cast(opParameter); auto scale_tensor = inputs_.at(1); float *scale_ptr = reinterpret_cast(inputs_.at(1)->Data()); if (scale_ptr != nullptr) { + scale_param_->const_scale_ = true; scale_ = reinterpret_cast(malloc(scale_tensor->ElementsNum() * sizeof(float))); if (scale_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; @@ -53,6 +58,7 @@ int ScaleCPUKernel::InitScaleOffset() { } memcpy(scale_, scale_ptr, scale_tensor->ElementsNum() * sizeof(float)); } else { + scale_param_->const_scale_ = false; scale_ = nullptr; } @@ -64,40 +70,39 @@ int ScaleCPUKernel::InitScaleOffset() { return RET_ERROR; } memcpy(offset_, offset_tensor->Data(), offset_tensor->ElementsNum() * sizeof(float)); - param->has_offset_ = true; + scale_param_->has_offset_ = true; } else { offset_ = nullptr; - param->has_offset_ = false; + scale_param_->has_offset_ = false; } return RET_OK; } int ScaleCPUKernel::InitParameter() { - auto param = reinterpret_cast(opParameter); auto in_tensor = inputs_.at(0); auto in_shape = in_tensor->shape(); auto scale_tensor = inputs_.at(1); auto scale_shape = scale_tensor->shape(); - if (scale_shape.size() + param->axis_ > in_shape.size()) { + if (scale_shape.size() + scale_param_->axis_ > in_shape.size()) { MS_LOG(ERROR) << "Scale tensor shape is incorrect."; return RET_ERROR; } - param->outer_size_ = 1; - param->axis_size_ = 1; - param->inner_size_ = 1; - for (int i = 0; i < param->axis_; i++) { - param->outer_size_ *= in_shape[i]; + scale_param_->outer_size_ = 1; + scale_param_->axis_size_ = 1; + scale_param_->inner_size_ = 1; + for (int i = 0; i < scale_param_->axis_; i++) { + scale_param_->outer_size_ *= in_shape[i]; } for (int i = 0; i < scale_shape.size(); i++) { - if (in_shape[i + param->axis_] != scale_shape[i]) { + if (in_shape[i + scale_param_->axis_] != scale_shape[i]) { MS_LOG(ERROR) << "Scale tensor shape is incorrect."; return RET_ERROR; } - param->axis_size_ *= in_shape[i + param->axis_]; + scale_param_->axis_size_ *= in_shape[i + scale_param_->axis_]; } - for (int i = param->axis_ + scale_shape.size(); i < in_shape.size(); i++) { - param->inner_size_ *= in_shape[i]; + for (int i = scale_param_->axis_ + scale_shape.size(); i < in_shape.size(); i++) { + scale_param_->inner_size_ *= in_shape[i]; } return RET_OK; } @@ -130,9 +135,7 @@ int ScaleCPUKernel::ReSize() { } int ScaleCPUKernel::Scale(int task_id) { - auto ret = - DoScale(input_ptr_, output_ptr_, scale_, offset_, task_id, reinterpret_cast(opParameter)); - + auto ret = DoScale(input_ptr_, output_ptr_, scale_, offset_, task_id, scale_param_); if (ret != RET_OK) { MS_LOG(ERROR) << "Scale error task_id[" << task_id << "] error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h index 4c19404d72f2f95bd33cd1d079fde280e4611f45..38ed5177468d19705abcc5dcb56c7407759901a2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h @@ -19,6 +19,7 @@ #include #include "src/lite_kernel.h" +#include "src/runtime/kernel/arm/nnacl/scale.h" namespace mindspore::kernel { @@ -27,10 +28,10 @@ class ScaleCPUKernel : public LiteKernel { ScaleCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::Context *ctx, const lite::Primitive *primitive) - : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} - ~ScaleCPUKernel() { - FreeTmpBuffer(); + : LiteKernel(parameter, inputs, outputs, ctx, primitive) { + scale_param_ = reinterpret_cast(opParameter); } + ~ScaleCPUKernel() override; int Init() override; int ReSize() override; @@ -45,6 +46,7 @@ class ScaleCPUKernel : public LiteKernel { float *scale_; float *offset_; float *output_ptr_; + ScaleParameter *scale_param_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc index bf107f65c34f6c6efd608df0f0eab8a268bb8545..52e4ab6e53c076a4da91d38497abac106875b3c2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc @@ -28,6 +28,24 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_DepthwiseConv2D; namespace mindspore::kernel { +ConvolutionDepthwiseInt8CPUKernel::~ConvolutionDepthwiseInt8CPUKernel() { + delete sliding; + if (packed_weight_ != nullptr) { + delete packed_weight_; + packed_weight_ = nullptr; + } + if (packed_input_ != nullptr) { + delete packed_input_; + packed_input_ = nullptr; + } + if (need_align_) { + if (packed_output_ != nullptr) { + delete packed_output_; + packed_output_ = nullptr; + } + } +} + int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() { // init weight, int8 -> int16 // o, h, w, i -> o/8, h, w, i, 8; o == group, i == 1 @@ -111,10 +129,17 @@ int ConvolutionDepthwiseInt8CPUKernel::Init() { } int ConvolutionDepthwiseInt8CPUKernel::ReSize() { - free(packed_input_); + if (packed_input_ != nullptr) { + delete packed_input_; + packed_input_ = nullptr; + } if (need_align_) { - free(packed_output_); + if (packed_output_ != nullptr) { + delete packed_output_; + packed_output_ = nullptr; + } } + // conv base init ConvolutionBaseCPUKernel::Init(); diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h index a6e068d90dbebf970ad25eed46357337df856989..e13ba163f05220e2bf0e538e58d6cfe9320a1233 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h @@ -29,14 +29,7 @@ class ConvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel { const std::vector &outputs, const Context *ctx, const lite::Primitive *primitive) : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} - ~ConvolutionDepthwiseInt8CPUKernel() override { - delete sliding; - free(packed_weight_); - free(packed_input_); - if (need_align_) { - free(packed_output_); - } - }; + ~ConvolutionDepthwiseInt8CPUKernel() override; int Init() override; int ReSize() override; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc index 542898a08958a26d44320926268643999907cccf..b2c91baaa29110c60214db7424b486d8ca6f715c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc @@ -28,6 +28,28 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_DeDepthwiseConv2D; namespace mindspore::kernel { +DeconvolutionDepthwiseInt8CPUKernel::~DeconvolutionDepthwiseInt8CPUKernel() { + delete sliding; + if (packed_weight_ != nullptr) { + delete packed_weight_; + packed_weight_ = nullptr; + } + if (packed_input_ != nullptr) { + delete packed_input_; + packed_input_ = nullptr; + } + if (need_align_) { + if (packed_output_ != nullptr) { + delete packed_output_; + packed_output_ = nullptr; + } + } + if (output_buffer_ != nullptr) { + delete output_buffer_; + output_buffer_ = nullptr; + } +} + int DeconvolutionDepthwiseInt8CPUKernel::InitWeightBias() { // init weight: int8 -> int16 // o, h, w, i -> o/8, h, w, i, 8; o == group, i == 1 @@ -101,9 +123,9 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitBuffer() { } // malloc tmp buffer for int32 output - output_buffer = + output_buffer_ = reinterpret_cast(malloc(conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * sizeof(int32_t))); - if (output_buffer == nullptr) { + if (output_buffer_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; } @@ -144,10 +166,21 @@ int DeconvolutionDepthwiseInt8CPUKernel::Init() { } int DeconvolutionDepthwiseInt8CPUKernel::ReSize() { - free(packed_input_); + if (packed_input_ != nullptr) { + delete packed_input_; + packed_input_ = nullptr; + } if (need_align_) { - free(packed_output_); + if (packed_output_ != nullptr) { + delete packed_output_; + packed_output_ = nullptr; + } + } + if (output_buffer_ != nullptr) { + delete output_buffer_; + output_buffer_ = nullptr; } + InitSlideParam(); // conv base init @@ -162,7 +195,7 @@ int DeconvolutionDepthwiseInt8CPUKernel::ReSize() { } int DeconvolutionDepthwiseInt8CPUKernel::Execute(int task_id) { - DeconvDwInt8(packed_output_, output_buffer, packed_input_, packed_weight_, reinterpret_cast(bias_data_), + DeconvDwInt8(packed_output_, output_buffer_, packed_input_, packed_weight_, reinterpret_cast(bias_data_), conv_param_, sliding, task_id); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h index d7b76438f3b82b3274f72266b1dc494febf67d35..3b7ac123e35769151d42c3ca38d852411052ed93 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h @@ -29,14 +29,7 @@ class DeconvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel { const std::vector &outputs, const Context *ctx, const lite::Primitive *primitive) : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} - ~DeconvolutionDepthwiseInt8CPUKernel() override { - delete sliding; - free(packed_weight_); - free(packed_input_); - if (need_align_) { - free(packed_output_); - } - }; + ~DeconvolutionDepthwiseInt8CPUKernel() override; int Init() override; int ReSize() override; @@ -52,7 +45,7 @@ class DeconvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel { int16_t *packed_weight_; int16_t *packed_input_; int8_t *packed_output_; - int32_t *output_buffer; + int32_t *output_buffer_; bool need_align_ = false; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/flatten.h b/mindspore/lite/src/runtime/kernel/arm/nnacl/flatten.h index b2b2fdfebb32cd2e85305d6aff490a0e8b6fafa3..3d29b50f1af13eba8d2de28906391657bf027c71 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/flatten.h +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/flatten.h @@ -24,4 +24,3 @@ typedef struct FlattenParameter { void Flatten(const void *input, void *output, FlattenParameter *flatten_param); #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FLATTEN_H_ - diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.cc b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.cc index 269528e0a498f06a9c943e1725ba3be5fddd5f3b..4e08156d3cea839095455eab2551482170756089 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.cc +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.cc @@ -19,10 +19,21 @@ void BatchNorm(float *output_ptr, const float *input_ptr, const float *mean_ptr, const float *variance_ptr, int task_id, BatchNormParameter *param) { - for (int u = task_id; u < param->unit_; u += param->op_parameter_.thread_num_) { - for (int c = 0; c < param->channel_; c++) { - auto variance_sqrt = sqrt(variance_ptr[c] + param->epsilon_); + for (int c = task_id; c < param->channel_; c += param->op_parameter_.thread_num_) { + auto variance_sqrt = sqrt(variance_ptr[c] + param->epsilon_); + for (int u = 0; u < param->unit_; u++) { output_ptr[u * param->channel_ + c] = (input_ptr[u * param->channel_ + c] - mean_ptr[c]) / variance_sqrt; } } } + +void FusedBatchNorm(float *output_ptr, const float *input_ptr, const float *scale_ptr, const float *offest_ptr, + const float *mean_ptr, const float *variance_ptr, int task_id, BatchNormParameter *param) { + for (int c = task_id; c < param->channel_; c += param->op_parameter_.thread_num_) { + auto variance_sqrt = sqrt(variance_ptr[c] + param->epsilon_); + for (int u = 0; u < param->unit_; u++) { + output_ptr[u * param->channel_ + c] = + (input_ptr[u * param->channel_ + c] - mean_ptr[c]) / variance_sqrt * scale_ptr[c] + offest_ptr[c]; + } + } +} diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.h b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.h index c6103565e1fd7d5c3179aeb4f2ee770966428620..b4c187ba9a46dc43f656864cd95db10c820804d2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.h +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.h @@ -29,4 +29,7 @@ typedef struct BatchNormParameter { void BatchNorm(float *output_ptr, const float *input_ptr, const float *mean_ptr, const float *variance_ptr, int task_id, BatchNormParameter *param); +void FusedBatchNorm(float *output_ptr, const float *input_ptr, const float *scale_ptr, const float *offest_ptr, + const float *mean_ptr, const float *variance_ptr, int task_id, BatchNormParameter *param); + #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FUSED_BATCHNORM_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/conv_depthwise.cc b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/conv_depthwise.cc index cd68bc467ef88922bf5fb49244b251be6fb0eaf2..3f543a1d38f3960fdd4bc02c1e8a792e9ff219fa 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/conv_depthwise.cc +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/conv_depthwise.cc @@ -486,6 +486,21 @@ void ConvDw3x3Fp32OutputUnit(float *src_buf, float *dst_output, const float *bia float32x4_t d10 = vaddq_f32(vaddq_f32(vaddq_f32(t10, t11), t12), bias_ptr); float32x4_t d11 = vaddq_f32(vsubq_f32(vsubq_f32(t11, t12), t13), bias_ptr); + float32x4_t zeros = {0, 0, 0, 0}; + float32x4_t bounds = {6, 6, 6, 6}; + if (is_relu) { + d00 = vmaxq_f32(d00, zeros); + d01 = vmaxq_f32(d01, zeros); + d10 = vmaxq_f32(d10, zeros); + d11 = vmaxq_f32(d11, zeros); + } + if (is_relu6) { + d00 = vminq_f32(vmaxq_f32(d00, zeros), bounds); + d01 = vminq_f32(vmaxq_f32(d01, zeros), bounds); + d10 = vminq_f32(vmaxq_f32(d10, zeros), bounds); + d11 = vminq_f32(vmaxq_f32(d11, zeros), bounds); + } + vst1q_f32(dst_output, d00); if (w_in_range) { vst1q_f32(dst_output + channel, d01); @@ -536,6 +551,19 @@ void ConvDw3x3Fp32OutputUnit(float *src_buf, float *dst_output, const float *bia float d10 = t10 + t11 + t12 + bias_ptr[0]; float d11 = t11 - t12 - t13 + bias_ptr[0]; + if (is_relu) { + d00 = MSMAX(d00, 0); + d01 = MSMAX(d01, 0); + d10 = MSMAX(d10, 0); + d11 = MSMAX(d11, 0); + } + if (is_relu6) { + d00 = MSMIN(MSMAX(d00, 0), 6); + d01 = MSMIN(MSMAX(d01, 0), 6); + d10 = MSMIN(MSMAX(d10, 0), 6); + d11 = MSMIN(MSMAX(d11, 0), 6); + } + (dst_output + i)[0] = d00; if (w_in_range) { (dst_output + i + channel)[0] = d01; diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/fused_batchnorm.cc b/mindspore/lite/src/runtime/kernel/arm/nnacl/fused_batchnorm.cc deleted file mode 100644 index c740c9cdb1cefe76c351b8c4d98e0406d7086b9f..0000000000000000000000000000000000000000 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fused_batchnorm.cc +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "nnacl/fused_batchnorm.h" -#include - -void FusedBatchNorm(const float *input_ptr, const float *scale_ptr, const float *offest_ptr, const float *mean_ptr, - const float *variance_ptr, int *input_shapes, float epsilon, float *output_ptr) { - int channel = input_shapes[3]; - int units = 1; - for (int i = 0; i < 3; i++) { - units *= input_shapes[i]; - } - for (int c = 0; c < input_shapes[3]; c++) { - auto variance_sqrt = sqrt(variance_ptr[c] + epsilon); - for (int u = 0; u < units; u++) { - output_ptr[u * channel + c] = - (input_ptr[u * channel + c] - mean_ptr[c]) / variance_sqrt * scale_ptr[c] + offest_ptr[c]; - } - } -} - diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/fused_batchnorm.h b/mindspore/lite/src/runtime/kernel/arm/nnacl/fused_batchnorm.h deleted file mode 100644 index 259b967ac69b83642796c7b4f7cc708e3a558497..0000000000000000000000000000000000000000 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fused_batchnorm.h +++ /dev/null @@ -1,32 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FUSED_BATCHNORM_H_ -#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FUSED_BATCHNORM_H_ - -#include "nnacl/op_base.h" - -typedef struct FusedBatchNormParameter { - OpParameter op_parameter_; - float epsilon_; -} FusedBatchNormParameter; - -void FusedBatchNorm(const float *input_ptr, const float *scale_ptr, const float *offest_ptr, const float *mean_ptr, - const float *variance_ptr, int *input_shapes, float epsilon, float *output_ptr); - - -#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FUSED_BATCHNORM_H_ - diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/scale.h b/mindspore/lite/src/runtime/kernel/arm/nnacl/scale.h index 0b1b733f5fa2e776f6387c976b811867ad4e8136..c4dee2a0daf144db73dfd5cba411a7b4bd3c79ff 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/scale.h +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/scale.h @@ -25,10 +25,9 @@ typedef struct ScaleParameter { int axis_size_; int inner_size_; int axis_; - bool has_offset_; - // todo yangruoqi: axis + bool const_scale_ = false; + bool has_offset_ = false; } ScaleParameter; int DoScale(float *in_data, float *out_data, float *scale, float *offset, int task_id, ScaleParameter *scale_param); #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_SCALE_H_ - diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc index b65e6ecaabc62926c8f398cee11af932d83ff518..7bd03d842da44585d60728e11240763b100956ba 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc @@ -17,33 +17,20 @@ #include "mindspore/core/utils/log_adapter.h" #include "common/common_test.h" #include "mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.h" -#include "mindspore/lite/src/runtime/kernel/arm/nnacl/fused_batchnorm.h" #include "mindspore/lite/src/kernel_registry.h" #include "mindspore/lite/src/lite_kernel.h" -#include "mindspore/lite/src/common/file_utils.h" namespace mindspore { - class TestBatchnormFp32 : public mindspore::Common { public: TestBatchnormFp32() {} }; TEST_F(TestBatchnormFp32, BNTest) { - std::vector in_data = {0.0669681, 0.959215, 0.252686, 0.613594, 0.811776, 0.139469, 0.322848, 0.118354, - 0.082978, 0.399467, 0.961267, 0.0247456, 0.0714259, 0.0791484, 0.0648625, 0.561612, - 0.412069, 0.311492, 0.46109, 0.377125, 0.369283, 0.0332446, 0.696142, 0.715973, - 0.525524, 0.477265, 0.0336351, 0.751577, 0.377548, 0.964603, 0.0196834, 0.174865}; - std::vector in_data1 = {0.855446, 0.821765, 0.281008, 0.0798653, 0.22294, 0.793782, 0.963222, 0.17851, - 0.667549, 0.274381, 0.592842, 0.216552, 0.190274, 0.237873, 0.610063, 0.307559, - 0.830007, 0.760957, 0.583265, 0.763793, 0.456372, 0.391378, 0.547915, 0.862198, - 0.510794, 0.826776, 0.515894, 0.30071, 0.404987, 0.184773}; - std::vector in_data2 = {0.712438, 0.4927, 0.078419, 0.310429, 0.546871, 0.0667141, 0.874321, 0.0265647, - 0.685165, 0.732586, 0.952889, 0.506402, 0.540784, 0.131119, 0.357713, 0.678992, - 0.960839, 0.340706, 0.697678, 0.398146, 0.313321, 0.6485, 0.739153, 0.00190134, - 0.536842, 0.996873, 0.445276, 0.371212, 0.420397, 0.0930115}; - std::vector in_data3(32, 1); - std::vector in_data4(32, 0); + std::vector in_data = {-11.18675, 11.433986, 11.386012, 11.245945, -2.7614849, 14.692399, + -1.1983503, -6.6790967, 6.383416, -13.3213005, -8.693595, 9.476344}; + std::vector in_data1 = {12.352293, 5.122387, 14.249514}; + std::vector in_data2 = {14.632595, 0.70900035, 11.179003}; std::vector inputs_tensor; std::vector outputs_tensor; @@ -51,8 +38,7 @@ TEST_F(TestBatchnormFp32, BNTest) { op_param.op_parameter_.type_ = schema::PrimitiveType_BatchNorm; op_param.epsilon_ = 0.001f; - std::vector in_shape = {1, 2, 4, 4}; - + std::vector shape = {1, 2, 2, 3}; lite::tensor::Tensor input0_tensor; lite::tensor::Tensor input1_tensor; lite::tensor::Tensor input2_tensor; @@ -62,39 +48,40 @@ TEST_F(TestBatchnormFp32, BNTest) { input0_tensor.SetData(in_data.data()); input1_tensor.SetData(in_data1.data()); input2_tensor.SetData(in_data2.data()); - input0_tensor.set_shape(in_shape); + input0_tensor.set_shape(shape); + input1_tensor.set_shape({3}); + input2_tensor.set_shape({3}); - std::vector output(32); - std::vector corr_out(32); - std::vector output_shape = {1, 2, 4, 4}; + std::vector output(12); + std::vector corr_out = {-6.1533737, 7.4904885, -0.8563998, -0.289212, -9.356432, 0.13245535, + -3.5422924, -14.005781, -2.3525476, -6.7113695, -16.396551, -1.4275324}; lite::tensor::Tensor output0_tensor; outputs_tensor.push_back(&output0_tensor); output0_tensor.SetData(output.data()); + output0_tensor.set_shape(shape); kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_BatchNorm}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); ASSERT_NE(creator, nullptr); lite::Context ctx; - ctx.thread_num_ = 7; + ctx.thread_num_ = 1; kernel::LiteKernel *kernel = creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), &ctx, desc, nullptr); ASSERT_NE(kernel, nullptr); auto output_tensor_shape = output0_tensor.shape(); kernel->Run(); - FusedBatchNorm(in_data.data(), in_data3.data(), in_data4.data(), in_data1.data(), in_data2.data(), in_shape.data(), - 0.001f, corr_out.data()); - printf("==================output data=================\n"); - for (int i = 0; i < 1 * 28; i++) { + for (int i = 0; i < output0_tensor.ElementsNum(); i++) { std::cout << output[i] << " ,"; } std::cout << std::endl; - CompareOutputData(output.data(), corr_out.data(), 32, 0.00001); + CompareOutputData(output.data(), corr_out.data(), output0_tensor.ElementsNum(), 0.001); input0_tensor.SetData(nullptr); input1_tensor.SetData(nullptr); input2_tensor.SetData(nullptr); output0_tensor.SetData(nullptr); + MS_LOG(INFO) << "TestBathNormFp32 accuracy passed"; } } // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_0.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_0.bin deleted file mode 100644 index b22edaef0e7683cd56ace393a1dcd5c8b061c979..0000000000000000000000000000000000000000 Binary files a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_0.bin and /dev/null differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_1.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_1.bin deleted file mode 100644 index 437a6958ad7efb877634c6048fd294ab8a2426a7..0000000000000000000000000000000000000000 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_1.bin +++ /dev/null @@ -1 +0,0 @@ -ýL[?-"R>‰qƒ>{B¸>´?yx?ó×_>JSD>Gº0? \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_2.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_2.bin deleted file mode 100644 index 4708330c95150b02913fb30726e2acf9f32047bb..0000000000000000000000000000000000000000 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_2.bin +++ /dev/null @@ -1 +0,0 @@ -J[q? §P?¾ŸŒ>gý?õA?>oo? 7G?x¸<¿”"? \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_3.bin deleted file mode 100644 index ca38daf512865dd08e8bae7f84bb584f57ed8672..0000000000000000000000000000000000000000 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_3.bin +++ /dev/null @@ -1 +0,0 @@ -WÚU>X™8?*Á ?!—v>›žF>0î ?.<C?Èd? \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_4.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_4.bin deleted file mode 100644 index dd1fa36149bd64ff30f4e56954034d58ed336cec..0000000000000000000000000000000000000000 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_4.bin +++ /dev/null @@ -1 +0,0 @@ -ÜR?Ü]?žÎ>†c~?um?z1->í??Ø'?—U? \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_out.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_out.bin deleted file mode 100644 index 9bc4e213954d4d9a897699b709331dd1cbc54c91..0000000000000000000000000000000000000000 Binary files a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_out.bin and /dev/null differ