diff --git a/mindspore/lite/schema/model.fbs b/mindspore/lite/schema/model.fbs index b71fa966c125c70e45793c1e9e3a22ed8ccdb091..7cc9e082b4b25ced0c4af20d7efc9ff991cbd482 100644 --- a/mindspore/lite/schema/model.fbs +++ b/mindspore/lite/schema/model.fbs @@ -174,6 +174,19 @@ union PrimitiveType { Where, OneHot, Lstm, + Conv2DGradFilter, + Conv2DGradInput, + PoolingGrad, + BNGradInput, + OptMomentum, + BiasGrad, + SoftmaxCrossEntropy, + AddGrad, + SubGrad, + MulGrad, + DivGrad, + PowerGrad, + ActivationGrad, PriorBox } diff --git a/mindspore/lite/schema/ops.fbs b/mindspore/lite/schema/ops.fbs index 52fb99aabe734a4c12e86b2cfb9e0d2df7c065e9..a78b43b34963471571843cd0e95493f83a57dd75 100644 --- a/mindspore/lite/schema/ops.fbs +++ b/mindspore/lite/schema/ops.fbs @@ -55,7 +55,25 @@ enum ActivationType : byte { LINEAR = 15, UNKNOW = 16 } - +enum ActivationGradType : byte { + NO_ACTIVATION = 0, + RELU = 1, + SIGMOID = 2, + RELU6 = 3, + ELU = 4, + LEAKY_RELU = 5, + ABS = 6, + RELU1 = 7, + SOFTSIGN = 8, + SOFTPLUS = 9, + TANH = 10, + SELU = 11, + HSWISH = 12, + HSIGMOID = 13, + THRESHOLDRELU = 14, + LINEAR = 15, + UNKNOW = 16 +} enum ReduceType : byte { REDUCE_MAX = 0, REDUCE_MEAN = 1, @@ -125,6 +143,10 @@ table SoftMax { table Activation { type: ActivationType = 0; } +table ActivationGrad { + type: ActivationGradType = 0; +} + table Conv2D { format: Format = 0; @@ -146,7 +168,45 @@ table Conv2D { activationType: ActivationType = 0; } -table FusedBatchNorm { +table Conv2DGradFilter { + format: Format = 0; + group: int; + channelIn: int; + channelOut: int; + kernelW: int; + kernelH: int; + strideW: int; + strideH: int; + padMode: PadMode; + padUp: int; + padDown: int; + padLeft: int; + padRight: int; + dilateW: int; + dilateH: int; + hasBias: bool = false; + activationType: ActivationType = 0; +} + +table Conv2DGradInput { + format: Format = 0; + group: int; + channelIn: int; + channelOut: int; + kernelW: int; + kernelH: int; + strideW: int; + strideH: int; + padMode: PadMode; + padUp: int; + padDown: int; + padLeft: int; + padRight: int; + dilateW: int; + dilateH: int; + hasBias: bool = false; + activationType: ActivationType = 0; +}table FusedBatchNorm { epsilon: float = 0.00001; // eg. epsilon=0.001 momentum: float = 0.9; spatial: int = 1; @@ -156,6 +216,31 @@ table CaffeBatchNorm { epsilon: float; // eg. epsilon=0.001 } +table BiasGrad { + axis: [int]; +} + + +table SoftmaxCrossEntropy { + axis: [int]; +} + + +table PoolingGrad { + format: Format = 0; + poolingMode: PoolMode; + global: bool = false; + windowW: int; + windowH: int; + strideW: int; + strideH: int; + padMode: PadMode; + padUp: int; + padDown: int; + padLeft: int; + padRight: int; + roundMode: RoundMode; +} table Shape { } @@ -286,7 +371,10 @@ table DeConv2D { hasBias: bool = false; activationType: ActivationType = 0; } - +table BNGradInput { + eps : float; + channels: int; +} table Scale { format: Format = 0; } @@ -307,6 +395,17 @@ table Mul { table Div { } +table AddGrad { +} + +table SubGrad { +} + +table MulGrad { +} + +table DivGrad { +} table RealDiv { } @@ -389,7 +488,11 @@ table Power { scale: float; shift: float; } - +table PowerGrad { + power: float; + scale: float; + shift: float; +} table ArgMax { axis: int; outMaxValue: bool; @@ -712,6 +815,10 @@ table SquaredDifference { table TupleGetItem { } +table OptMomentum { +} + + table Where{ } diff --git a/mindspore/lite/src/common/file_utils_ext.cc b/mindspore/lite/src/common/file_utils_ext.cc new file mode 100644 index 0000000000000000000000000000000000000000..cdaa337e23da0631bf31f6649d44df0f9c3f464c --- /dev/null +++ b/mindspore/lite/src/common/file_utils_ext.cc @@ -0,0 +1,53 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include "src/common/file_utils.h" +#include "src/common/file_utils_ext.h" + +namespace mindspore { +namespace lite { +static int CompareOutputRelativeData(float *output_data, float *correct_data, int data_size) { + float error = 0; + + // relative error + float diffSum = 0.0f; + float sum = 0.0f; + for (int i = 0; i < data_size; i++) { + sum += std::abs(correct_data[i]); + } + for (int i = 0; i < data_size; i++) { + float diff = std::abs(output_data[i] - correct_data[i]); + diffSum += diff; + } + error = diffSum / sum; + if (error > 1e-4) { + std::cout << "has accuracy error!\n" << error << "\n"; + return 1; + } + return 0; +} + +int CompareRelativeOutput(float *output_data, std::string file_path) { + size_t output_size; + auto ground_truth = reinterpret_cast(mindspore::lite::ReadFile(file_path.c_str(), &output_size)); + size_t output_num = output_size / sizeof(float); + std::cout << "output num : " << output_num << "\n"; + return CompareOutputRelativeData(output_data, ground_truth, output_num); +} +} // namespace lite +} // namespace mindspore diff --git a/mindspore/lite/src/common/file_utils_ext.h b/mindspore/lite/src/common/file_utils_ext.h new file mode 100644 index 0000000000000000000000000000000000000000..28eea02e41af566099edc51c83adc4c026f5d05e --- /dev/null +++ b/mindspore/lite/src/common/file_utils_ext.h @@ -0,0 +1,28 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_COMMON_FILE_UTILS_EXT_H_ +#define MINDSPORE_LITE_COMMON_FILE_UTILS_EXT_H_ +#include + + +namespace mindspore { +namespace lite { +int CompareRelativeOutput(float *output_data, std::string file_path); + +} +} // namespace mindspore +#endif // MINDSPORE_LITE_COMMON_FILE_UTILS_EXT_H_ diff --git a/mindspore/lite/src/lite_kernel.h b/mindspore/lite/src/lite_kernel.h index 7cd2cfa9f222718e323e083532742437fa91f13d..b9086ee675ec923f06bc89206ec524c9eb355d46 100644 --- a/mindspore/lite/src/lite_kernel.h +++ b/mindspore/lite/src/lite_kernel.h @@ -64,7 +64,7 @@ class LiteKernel { LiteKernel() = default; explicit LiteKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs) - : opParameter(parameter), inputs_(inputs), outputs_(outputs) { + : opParameter(parameter), inputs_(inputs), outputs_(outputs), train_mode(false) { this->in_kernel_.clear(); this->out_kernel_.clear(); } @@ -77,7 +77,10 @@ class LiteKernel { virtual int Run() { return -1; } std::string Name() { return this->name; } - + virtual void train() { train_mode = true; } + virtual bool is_train() { return train_mode == true; } + virtual void eval() { train_mode = false; } + virtual bool is_eval() { return train_mode == false; } void set_name(const std::string &name) { this->name = name; } schema::PrimitiveType type() { return (schema::PrimitiveType)this->opParameter->type_; } @@ -117,6 +120,7 @@ class LiteKernel { std::vector outputs_; std::vector in_kernel_; std::vector out_kernel_; + bool train_mode; }; class SubGraphKernel : public LiteKernel { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/activation_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/activation_grad.cc new file mode 100644 index 0000000000000000000000000000000000000000..279832aca2fc8bf2f4cdb36cfc71a450d0336062 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/activation_grad.cc @@ -0,0 +1,110 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/fp32/activation_grad.h" +#include "schema/model_generated.h" +#include "src/kernel_registry.h" +#include "src/runtime/runtime_api.h" +#include "include/errorcode.h" + +using mindspore::lite::KernelRegistrar; +using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::ActivationGradType_HSWISH; +using mindspore::schema::ActivationGradType_LEAKY_RELU; +using mindspore::schema::ActivationGradType_RELU; +using mindspore::schema::ActivationGradType_RELU6; +using mindspore::schema::PrimitiveType_ActivationGrad; + +namespace mindspore::kernel { +int ActivationGradCPUKernel::Init() { + outputs_[0]->set_shape(inputs_[0]->shape()); + return RET_OK; +} + +int ActivationGradCPUKernel::ReSize() { return RET_OK; } + +int ActivationGradCPUKernel::DoActivation(int task_id) { + auto yt_addr = reinterpret_cast(inputs_.at(0)->Data()); + auto input_addr = reinterpret_cast(inputs_.at(1)->Data()); + auto output_addr = reinterpret_cast(outputs_.at(0)->Data()); + auto length = inputs_.at(0)->ElementsNum(); + + auto error_code = RET_OK; + + if (type_ == schema::ActivationGradType_RELU) { + error_code = ReluGrad(yt_addr, input_addr, length, output_addr); + } else if (type_ == schema::ActivationGradType_RELU6) { + error_code = Relu6Grad(yt_addr, input_addr, length, output_addr); + } else if (type_ == schema::ActivationGradType_LEAKY_RELU) { + error_code = LReluGrad(yt_addr, input_addr, length, output_addr, alpha_); + } else if (type_ == schema::ActivationGradType_SIGMOID) { + error_code = SigmoidGrad(yt_addr, input_addr, length, output_addr); + } else if (type_ == schema::ActivationGradType_TANH) { + error_code = TanhGrad(yt_addr, input_addr, length, output_addr); + } else if (type_ == schema::ActivationGradType_HSWISH) { + error_code = HSwishGrad(yt_addr, input_addr, length, output_addr); + } else if (type_ == schema::ActivationGradType_HSIGMOID) { + error_code = HSigmoidGrad(yt_addr, input_addr, length, output_addr); + } else { + MS_LOG(ERROR) << "Activation type error"; + return RET_ERROR; + } + if (error_code != RET_OK) { + return RET_ERROR; + } + return RET_OK; +} + +int ActivationGradRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { + auto activationGrad_kernel = reinterpret_cast(cdata); + auto error_code = activationGrad_kernel->DoActivation(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "ActivationGradRun error task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int ActivationGradCPUKernel::Run() { + int error_code = LiteBackendParallelLaunch(ActivationGradRun, this, thread_count_); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +kernel::LiteKernel *CpuActivationGradFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const lite::Context *ctx, + const kernel::KernelKey &desc) { + MS_ASSERT(opParameter != nullptr); + MS_ASSERT(desc.type == schema::PrimitiveType_ActivationGrad); + auto *kernel = new (std::nothrow) ActivationGradCPUKernel(opParameter, inputs, outputs); + MS_ASSERT(kernel != nullptr); + auto ret = kernel->Init(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "InferShape kernel failed, name: " << opParameter->name_ + << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + } + return kernel; +} + +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_ActivationGrad, CpuActivationGradFp32KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/activation_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32/activation_grad.h new file mode 100644 index 0000000000000000000000000000000000000000..4fc4265d49fc49d17aee26b6a39d15fb36194799 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/activation_grad.h @@ -0,0 +1,50 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ACTIVATION_GRAD_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ACTIVATION_GRAD_H_ + +#include +#include "src/lite_kernel.h" +#include "ir/anf.h" + +#include "src/runtime/kernel/arm/opclib/activation_grad.h" + +namespace mindspore::kernel { +class ActivationGradCPUKernel : public LiteKernel { + public: + explicit ActivationGradCPUKernel(OpParameter *param, const std::vector &inputs, + const std::vector &outputs) + : LiteKernel(param, inputs, outputs) { + ActivationGradParameter *param_act_grad = reinterpret_cast(param); + type_ = param_act_grad->type_; + alpha_ = param_act_grad->alpha_; + } + ~ActivationGradCPUKernel() override = default; + + int Init() override; + int ReSize() override; + int Run() override; + int DoActivation(int task_id); + + private: + int thread_count_; + int type_; + float alpha_; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ACTIVATION_GRAD_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_grad.cc new file mode 100644 index 0000000000000000000000000000000000000000..a9d62c8ee9860c0307de9dbc0cf7b73e5ca7f699 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_grad.cc @@ -0,0 +1,285 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "schema/model_generated.h" +#include "src/kernel_registry.h" +#include "src/runtime/kernel/arm/opclib/fp32/reduce_grad.h" +#include "src/runtime/kernel/arm/fp32/arithmetic_grad.h" +#include "src/runtime/kernel/arm/opclib/fp32/arithmetic_grad.h" +#include "include/errorcode.h" + +using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; + +namespace mindspore::kernel { +namespace { +constexpr int kArithGradOpInputNum = 3; +constexpr int kArithGradOpOutputNum = 2; +} // namespace + +int ArithmeticGradCPUKernel::Init() { + auto ret = InferShape(); + return ret; +} + +int ArithmeticGradCPUKernel::InferShape() { + if (inputs_.size() != kArithGradOpInputNum) { + MS_LOG(ERROR) << "The number of input must be " << kArithGradOpInputNum; + return RET_ERROR; + } + if (outputs_.size() != kArithGradOpOutputNum) { + MS_LOG(ERROR) << "The number of output must be " << kArithGradOpOutputNum; + return RET_ERROR; + } + auto dy = inputs_[0]; + auto x1 = inputs_[1]; + auto x2 = inputs_[2]; + auto dx1 = outputs_[0]; + auto dx2 = outputs_[1]; + + MS_ASSERT(dy != nullptr); + MS_ASSERT(x1 != nullptr); + MS_ASSERT(x2 != nullptr); + MS_ASSERT(dx1 != nullptr); + MS_ASSERT(dx2 != nullptr); + + auto inShape0 = x1->shape(); + auto inShape1 = x2->shape(); + auto outShape = dy->shape(); + + if ((type() == PrimitiveType_AddGrad) || (type() == PrimitiveType_SubGrad)) { + arithmeticParameter_->ndim_ = outShape.size(); + auto fillDimNum0 = outShape.size() - inShape0.size(); + auto fillDimNum1 = outShape.size() - inShape1.size(); + int j0 = 0; + int j1 = 0; + for (unsigned int i = 0; i < outShape.size(); i++) { + arithmeticParameter_->in_shape0_[i] = (i < fillDimNum0) ? 1 : inShape0[j0++]; + arithmeticParameter_->in_shape1_[i] = (i < fillDimNum1) ? 1 : inShape1[j1++]; + arithmeticParameter_->out_shape_[i] = outShape[i]; + } + } else { + // if (inShape0.size() < inShape1.size()) + if (dx1->ElementsNum() < dx2->ElementsNum()) { + arithmeticParameter_->ndim_ = inShape1.size(); + if (type() == PrimitiveType_MulGrad) + arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMul2L; + else if (type() == PrimitiveType_DivGrad) + arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradDiv2L; + + auto fillDimNum = inShape1.size() - inShape0.size(); // This will not work for batch! + int j = 0; + for (unsigned int i = 0; i < inShape1.size(); i++) { + if (i < fillDimNum) { + arithmeticParameter_->in_shape1_[i] = 1; + } else { + arithmeticParameter_->in_shape1_[i] = inShape0[j++]; + } + arithmeticParameter_->in_shape0_[i] = inShape1[i]; + arithmeticParameter_->out_shape_[i] = outShape[i]; + } + } else if (dx2->ElementsNum() < dx1->ElementsNum()) { // if (inShape0.size() > inShape1.size()) + arithmeticParameter_->ndim_ = inShape0.size(); + if (type() == PrimitiveType_MulGrad) + arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMul1L; + else if (type() == PrimitiveType_DivGrad) + arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradDiv1L; + arithmeticParameter_->broadcasting_ = true; + arithmeticParameter_->ndim_ = inShape0.size(); + int j = 0; + auto fillDimNum = inShape0.size() - inShape1.size(); + for (unsigned int i = 0; i < inShape0.size(); i++) { + if (i < fillDimNum) { + arithmeticParameter_->in_shape1_[i] = 1; + } else { + arithmeticParameter_->in_shape1_[i] = inShape1[j++]; + } + arithmeticParameter_->in_shape0_[i] = inShape0[i]; + arithmeticParameter_->out_shape_[i] = outShape[i]; + } + } else { + arithmeticParameter_->broadcasting_ = false; + for (unsigned int i = 0; i < inShape0.size(); i++) { + arithmeticParameter_->in_shape1_[i] = inShape1[i]; + arithmeticParameter_->in_shape0_[i] = inShape0[i]; + arithmeticParameter_->out_shape_[i] = outShape[i]; + } + } + tile_data0 = new (std::nothrow) float[inputs_.at(0)->ElementsNum()]; + MS_ASSERT(tile_data0 != nullptr); + tile_data1 = new (std::nothrow) float[inputs_.at(0)->ElementsNum()]; + MS_ASSERT(tile_data1 != nullptr); + if (type() == PrimitiveType_DivGrad) { + tile_data2 = new (std::nothrow) float[inputs_.at(0)->ElementsNum()]; + MS_ASSERT(tile_data2 != nullptr); + } + } + + dx1->set_shape(x1->shape()); + dx2->set_shape(x2->shape()); + // outTensor->set_shape(out_shape); + dx1->set_data_type(dy->data_type()); + dx2->set_data_type(dy->data_type()); + return RET_OK; +} + +void ArithmeticGradCPUKernel::ArithmeticGradAdd(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, + int dx2_size) { + if (dx1_size == dy_size) + memcpy(dx1, dy, dy_size * sizeof(float)); + else + ReduceSumByAxes(dy, arithmeticParameter_->out_shape_, dx1, arithmeticParameter_->in_shape0_, + arithmeticParameter_->ndim_); + if (dx2_size == dy_size) + memcpy(dx2, dy, dy_size * sizeof(float)); + else + ReduceSumByAxes(dy, arithmeticParameter_->out_shape_, dx2, arithmeticParameter_->in_shape1_, + arithmeticParameter_->ndim_); +} + +void ArithmeticGradCPUKernel::ArithmeticGradSub(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, + int dx2_size) { + if (dx1_size == dy_size) + memcpy(dx1, dy, dy_size * sizeof(float)); + else + ReduceSumByAxes(dy, arithmeticParameter_->out_shape_, dx1, arithmeticParameter_->in_shape0_, + arithmeticParameter_->ndim_); + if (dx2_size == dy_size) { + for (int i = 0; i < dx2_size; i++) { + dx2[i] = -dy[i]; + } + } else { + ReduceSumByAxes(dy, arithmeticParameter_->out_shape_, dx2, arithmeticParameter_->in_shape1_, + arithmeticParameter_->ndim_); + for (int i = 0; i < dx2_size; i++) { + dx2[i] = -dx2[i]; + } + } +} + +void ArithmeticGradCPUKernel::ArithmeticGradMul(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, + int dx2_size) { + auto x1_data = reinterpret_cast(inputs_[1]->Data()); + auto x2_data = reinterpret_cast(inputs_[2]->Data()); + ElementMul(dy, x1_data, dx2, dy_size); + ElementMul(dy, x2_data, dx1, dy_size); +} + +void ArithmeticGradCPUKernel::ArithmeticGradMul1L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, + int dx2_size) { + auto x1_data = reinterpret_cast(inputs_[1]->Data()); + auto x2_data = reinterpret_cast(inputs_[2]->Data()); + ElementMul(dy, x1_data, tile_data0, dy_size); + ReduceSumByAxes(tile_data0, arithmeticParameter_->in_shape0_, dx2, arithmeticParameter_->in_shape1_, + arithmeticParameter_->ndim_); + + BroadcastMul(dy, x2_data, tile_data0, tile_data1, dx1, dy_size, arithmeticParameter_); // broadcast directly to dx1 +} + +void ArithmeticGradCPUKernel::ArithmeticGradMul2L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, + int dx2_size) { + auto x1_data = reinterpret_cast(inputs_[1]->Data()); + auto x2_data = reinterpret_cast(inputs_[2]->Data()); + ElementMul(dy, x2_data, tile_data0, dy_size); + ReduceSumByAxes(tile_data0, arithmeticParameter_->in_shape0_, dx1, arithmeticParameter_->in_shape1_, + arithmeticParameter_->ndim_); + + BroadcastMul(dy, x1_data, tile_data0, tile_data1, dx2, dy_size, arithmeticParameter_); // broadcast directly to dx2 +} + +void ArithmeticGradCPUKernel::ArithmeticGradDiv(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, + int dx2_size) { + auto x1 = reinterpret_cast(inputs_[1]->Data()); + auto x2 = reinterpret_cast(inputs_[2]->Data()); + ElementDiv(dy, x2, dx1, dy_size); + ElementMulAndDivNegSquare(dy, x1, x2, dx2, dy_size); +} + +void ArithmeticGradCPUKernel::ArithmeticGradDiv1L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, + int dx2_size) { + auto x1_data = reinterpret_cast(inputs_[1]->Data()); + auto x2_data = reinterpret_cast(inputs_[2]->Data()); + + ElementMul(x2_data, x2_data, dx2, dx2_size); + ElementMul(x1_data, dy, dx1, dy_size); // use dx1 buffer + BroadcastDiv(dx1, dx2, tile_data0, tile_data1, tile_data2, dy_size, + arithmeticParameter_); // broadcast directly to dx1 + ReduceSumByAxes(tile_data2, arithmeticParameter_->in_shape0_, dx2, arithmeticParameter_->in_shape1_, + arithmeticParameter_->ndim_); + for (int i = 0; i < dx2_size; i++) dx2[i] = -dx2[i]; + // ReduceNegSumPrefix(tile_data2, dy_size, dx2, dx2_size); //then reduce into dx2 + + // broadcasting x2 + BroadcastDiv(dy, x2_data, tile_data0, tile_data1, dx1, dy_size, arithmeticParameter_); // broadcast directly to dx1 +} + +void ArithmeticGradCPUKernel::ArithmeticGradDiv2L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, + int dx2_size) { + auto x1_data = reinterpret_cast(inputs_[1]->Data()); + auto x2_data = reinterpret_cast(inputs_[2]->Data()); + + // dx1 = dy/x2 + ElementDiv(dy, x2_data, tile_data0, dy_size); // first multiply into temp + ReduceSumByAxes(tile_data0, arithmeticParameter_->in_shape0_, dx1, arithmeticParameter_->in_shape1_, + arithmeticParameter_->ndim_); + + // dx2 = -dy*x1/(x2*x2) + BroadcastMul(dy, x1_data, tile_data0, tile_data1, tile_data2, dy_size, arithmeticParameter_); // broadcast numerator + ElementDivNegSquare(tile_data2, x2_data, dx2, dy_size); +} + +int ArithmeticGradCPUKernel::ReSize() { return RET_OK; } + +int ArithmeticGradCPUKernel::Run() { + auto dy = reinterpret_cast(inputs_[0]->Data()); + // auto input1_data1 = reinterpret_cast(inputs_[1]->Data()); + auto dx1 = reinterpret_cast(outputs_[0]->Data()); + auto dx2 = reinterpret_cast(outputs_[1]->Data()); + + size_t dy_size = inputs_.at(0)->ElementsNum(); + size_t dx1_size = outputs_.at(0)->ElementsNum(); + size_t dx2_size = outputs_[1]->ElementsNum(); + (this->*arithmetic_grad_)(dy, dy_size, dx1, dx1_size, dx2, dx2_size); + return RET_OK; +} + +kernel::LiteKernel *CpuArithmeticGradFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const lite::Context *ctx, + const kernel::KernelKey &desc) { + MS_EXCEPTION_IF_NULL(opParameter); + if (opParameter == nullptr) { + return nullptr; + } + auto *kernel = new (std::nothrow) ArithmeticGradCPUKernel(opParameter, inputs, outputs); + MS_ASSERT(kernel != nullptr); + auto ret = kernel->Init(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + delete kernel; + return nullptr; + } + return kernel; +} + +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MulGrad, CpuArithmeticGradFp32KernelCreator) +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_AddGrad, CpuArithmeticGradFp32KernelCreator) +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SubGrad, CpuArithmeticGradFp32KernelCreator) +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_DivGrad, CpuArithmeticGradFp32KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_grad.h new file mode 100644 index 0000000000000000000000000000000000000000..a11ef24a4488f892de2ab1ce82460289a202a18d --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_grad.h @@ -0,0 +1,90 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_GRAD_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_GRAD_H_ + +#include +#include "src/lite_kernel.h" +#include "src/runtime/kernel/arm/opclib/fp32/arithmetic.h" +#include "schema/model_generated.h" +#include "ir/anf.h" + +using mindspore::schema::PrimitiveType_AddGrad; +using mindspore::schema::PrimitiveType_DivGrad; +using mindspore::schema::PrimitiveType_MulGrad; +using mindspore::schema::PrimitiveType_SubGrad; + +namespace mindspore::kernel { + +class ArithmeticGradCPUKernel; + +class ArithmeticGradCPUKernel : public LiteKernel { + typedef void (ArithmeticGradCPUKernel::*ArithmeticGradOperation)(float *, int, float *, int, float *, int); + + public: + explicit ArithmeticGradCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) + : LiteKernel(parameter, inputs, outputs), tile_data0(NULL), tile_data1(NULL), tile_data2(NULL) { + switch (type()) { + case PrimitiveType_MulGrad: + arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMul; // this will be adjusted in InferShape + break; + case PrimitiveType_AddGrad: + arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradAdd; + break; + case PrimitiveType_SubGrad: + arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradSub; + break; + case PrimitiveType_DivGrad: + arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradDiv; // this will be adjusted in InferShape + break; + default: + MS_LOG(ERROR) << "Error Operator type " << parameter->type_; + break; + } + arithmeticParameter_ = reinterpret_cast(parameter); + } + ~ArithmeticGradCPUKernel() override { + if (tile_data0) delete[] tile_data0; + if (tile_data1) delete[] tile_data1; + if (tile_data2) delete[] tile_data2; + } + void InitKernel(const CNodePtr &kernel_node); + + int Init() override; + int InferShape(); + int ReSize() override; + int Run() override; + + private: + void ArithmeticGradAdd(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); + void ArithmeticGradSub(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); + void ArithmeticGradMul(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); + void ArithmeticGradMul1L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); + void ArithmeticGradMul2L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); + void ArithmeticGradDiv(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); + void ArithmeticGradDiv1L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); + void ArithmeticGradDiv2L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); + ArithmeticParameter *arithmeticParameter_; + ArithmeticGradOperation arithmetic_grad_; + float *tile_data0; + float *tile_data1; + float *tile_data2; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_GRAD_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/bias_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/bias_grad.cc new file mode 100644 index 0000000000000000000000000000000000000000..e57fe298ab9222ba0d99c8e0ce6168f769413348 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/bias_grad.cc @@ -0,0 +1,115 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "src/runtime/kernel/arm/fp32/bias_grad.h" +#include "schema/model_generated.h" +#include "src/kernel_registry.h" +#include "include/errorcode.h" + + +using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::KernelRegistrar; +using mindspore::schema::PrimitiveType_BiasGrad; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; + +namespace mindspore::kernel { +int BiasGradCPUKernel::InferShape() { + if (1 != this->inputs_.size()) { + MS_LOG(ERROR) << "BiasGrad should have one input"; + return RET_ERROR; + } + if (1 != this->outputs_.size()) { + MS_LOG(ERROR) << "BiasGrad should have one output"; + return RET_ERROR; + } + auto *in0 = inputs_.front(); + auto *out = outputs_.front(); + MS_ASSERT(in0 != nullptr); + MS_ASSERT(out != nullptr); + auto inshape = in0->shape(); + int ndim = inshape.size(); + for (int i = 0; i < ndim - 1; i++) { + inshape[i] = 1; + } + out->set_shape(inshape); + out->set_data_type(in0->data_type()); + return RET_OK; +} + +int BiasGradCPUKernel::Init() { + MS_ASSERT(InferShape() == RET_OK); + + auto dims = inputs_[0]->shape(); + bias_param->ndim_ = dims.size(); + for (unsigned int i = 0; i < bias_param->ndim_; i++) { + bias_param->in_shape0_[i] = dims[i]; + bias_param->out_shape_[i] = 1; // 1 dimension for N,H,W, + } + bias_param->out_shape_[bias_param->ndim_ - 1] = dims[bias_param->ndim_ - 1]; + for (int i = bias_param->ndim_; i < 4; i++) { + bias_param->in_shape0_[i] = 0; + bias_param->out_shape_[i] = 0; + } + return RET_OK; +} + + +int BiasGradCPUKernel::ReSize() { return 0; } + +int BiasGradCPUKernel::Run() { + auto in = reinterpret_cast(inputs_.at(0)->Data()); + auto out = reinterpret_cast(outputs_.at(0)->Data()); + // size_t data_size = inputs_.at(0)->ElementsNum(); + + size_t nhw_size = 1; + size_t channels = bias_param->in_shape0_[bias_param->ndim_ - 1]; // C in NHWC + for (unsigned int i = 0; i < bias_param->ndim_ - 1; i++) nhw_size *= bias_param->in_shape0_[i]; + + size_t total_size = channels * nhw_size; + for (size_t c = 0; c < channels; ++c) { + out[c] = 0; + for (size_t offset = 0; offset < total_size; offset += channels) { + out[c] += in[offset + c]; + } + } + + return RET_OK; +} + + +kernel::LiteKernel *CpuBiasGradFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const lite::Context *ctx, + const kernel::KernelKey &desc) { + MS_ASSERT(opParameter != nullptr); + MS_ASSERT(desc.type == schema::PrimitiveType_BiasGrad); + auto *kernel = new (std::nothrow) BiasGradCPUKernel(reinterpret_cast(opParameter), inputs, outputs); + MS_ASSERT(kernel != nullptr); + + auto ret = kernel->Init(); + if (RET_OK != ret) { + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + delete kernel; + return nullptr; + } + return kernel; +} + +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_BiasGrad, CpuBiasGradFp32KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/bias_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32/bias_grad.h new file mode 100644 index 0000000000000000000000000000000000000000..0e7ac186924a805c31d99f59f334b8c5baf2a310 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/bias_grad.h @@ -0,0 +1,46 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BIAS_GRAD_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BIAS_GRAD_H_ + +#include +#include "src/lite_kernel.h" +#include "ir/anf.h" + +#include "src/runtime/kernel/arm/opclib/fp32/arithmetic.h" + +namespace mindspore::kernel { +class BiasGradCPUKernel : public LiteKernel { + public: + explicit BiasGradCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) + : LiteKernel(parameter, inputs, outputs) { + bias_param = reinterpret_cast(parameter); + } + ~BiasGradCPUKernel() override = default; + + int Init() override; + int InferShape(); + int ReSize() override; + int Run() override; + + private: + ArithmeticParameter *bias_param; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BIAS_GRAD_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/bngrad_input.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/bngrad_input.cc new file mode 100644 index 0000000000000000000000000000000000000000..a9492b264479770ec5200bde1c7417eb115951ee --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/bngrad_input.cc @@ -0,0 +1,115 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "schema/model_generated.h" +#include "src/kernel_factory.h" +#include "src/runtime/kernel/arm/fp32/bngrad_input.h" +#include "src/runtime//kernel/arm/opclib/batch_norm.h" +#include "include/errorcode.h" + +using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +// using mindspore::lite::REG_OP; +using mindspore::schema::PrimitiveType_BNGradInput; + +namespace mindspore::kernel { +int BNGradInputCPUKernel::Init() { + auto bn_param = reinterpret_cast(opParameter); + workspace_size = 5 * bn_param->channels; + workspace = new float[workspace_size]; + + if (2 != this->inputs_.size()) { + MS_LOG(ERROR) << "Conv2d Grad should has 2 inputs"; + return RET_ERROR; + } + if (1 != this->outputs_.size()) { + MS_LOG(ERROR) << "Conv2d Grad should has one output"; + return RET_ERROR; + } + auto *input_tensor = inputs_.at(0); + // auto *weight_tensor = inputs_.at(1); + auto *out_tensor = outputs_.at(0); + auto in_shape = input_tensor->shape(); + out_tensor->set_shape(in_shape); + out_tensor->set_data_type(input_tensor->data_type()); + return RET_OK; +} + +int BNGradInputCPUKernel::ReSize() { return RET_OK; } + +/* +according to https://wiseodd.github.io/techblog/2016/07/04/batchnorm +*/ + +int BNGradInputCPUKernel::Run() { + // std::cout << "run succ" << std::endl; + auto *input_x = inputs_.at(0); + auto *input_yt = inputs_.at(1); + auto *input_scale = inputs_.at(2); + auto *output_grad = outputs_.at(0); + // Tensor *bias = input[5]; + auto bn_param = reinterpret_cast(opParameter); + int batch = bn_param->batch; + int channels = bn_param->channels; + int spatial = bn_param->spatial; + float eps = bn_param->eps; + std::fill(workspace, workspace + workspace_size, 0.f); + + float *mean = workspace; + float *variance = mean + channels; + float *mean_delta = variance + channels; + float *variance_delta = mean_delta + channels; + float *mean_add_delta = variance_delta + channels; + + float *x = reinterpret_cast(input_x->Data()); + float *yt = reinterpret_cast(input_yt->Data()); + float *scale = reinterpret_cast(input_scale->Data()); + float *out = reinterpret_cast(output_grad->Data()); + + std::copy(yt, yt + batch * channels * spatial, out); + meanVar(x, batch, spatial, channels, mean, variance); + scaleBias(scale, batch, channels, spatial, out); + meanDelta(out, spatial, channels, eps, variance, mean_delta); + varianceDelta(x, out, mean, variance, batch, channels, spatial, eps, variance_delta); + meanAdd(x, mean, variance_delta, batch, channels, spatial, mean_add_delta, mean_delta); + NormalizeDelta(x, mean, variance, mean_delta, variance_delta, batch, channels, eps, spatial, out); + return RET_OK; +} + +kernel::LiteKernel *CpuBNGradInputFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const lite::Context *ctx, + const kernel::KernelKey &desc) { + MS_ASSERT(opParameter != nullptr); + MS_ASSERT(desc.type == schema::PrimitiveType_BNGradInput); + // parameter->name = opDef.name()->str().data(); + // parameter->type = opDef.attr_type(); + auto *kernel = new (std::nothrow) BNGradInputCPUKernel(opParameter, inputs, outputs); + MS_ASSERT(kernel != nullptr); + auto ret = kernel->Init(); + if (RET_OK != ret) { + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + } + return kernel; +} + +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_BNGradInput, CpuBNGradInputFp32KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/bngrad_input.h b/mindspore/lite/src/runtime/kernel/arm/fp32/bngrad_input.h new file mode 100644 index 0000000000000000000000000000000000000000..e4e6d6e746263ec2b5375ca9ef56288d097c29d0 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/bngrad_input.h @@ -0,0 +1,41 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BNGRAD_INPUT_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BNGRAD_INPUT_H_ + +#include +#include "src/lite_kernel.h" +#include "ir/anf.h" + +namespace mindspore::kernel { +class BNGradInputCPUKernel : public LiteKernel { + public: + explicit BNGradInputCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) + : LiteKernel(parameter, inputs, outputs) {} + ~BNGradInputCPUKernel() override { delete workspace; } + + int Init() override; + int ReSize() override; + int Run() override; + + private: + float *workspace; + int workspace_size; +}; +} // namespace mindspore::kernel +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BNGRAD_INPUT_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_filter.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_filter.cc new file mode 100644 index 0000000000000000000000000000000000000000..89cba44768b981c9ed4a3b18195e296cc906eaa1 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_filter.cc @@ -0,0 +1,156 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/fp32/convolution_grad_filter.h" +#include "src/kernel_registry.h" +#include "src/runtime/kernel/arm/opclib/pack.h" +#include "src/runtime/kernel/arm/opclib/pack_ext.h" +#include "src/runtime/kernel/arm/opclib/fp32/gemm.h" +#include "include/errorcode.h" + +using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_Conv2DGradFilter; + +namespace mindspore::kernel { +int ConvolutionGradFilterCPUKernel::Init() { + // dy is in input 0 + // x is in input 1 + // dw is output 0 + + if (2 != this->inputs_.size()) { + MS_LOG(ERROR) << "Conv2d Grad should has 2 inputs"; + return RET_ERROR; + } + if (1 != this->outputs_.size()) { + MS_LOG(ERROR) << "Conv2d Grad should has one output"; + return RET_ERROR; + } + + auto *input_tensor = inputs_.at(1); + MS_ASSERT(input_tensor != nullptr); + auto *dy = inputs_.at(0); + MS_ASSERT(dy != nullptr); + auto *weight_tensor = outputs_.at(0); + MS_ASSERT(weight_tensor != nullptr); + + auto conv_param = reinterpret_cast(opParameter); + conv_param->output_batch_ = this->inputs_.at(0)->shape().at(kNHWC_N); + conv_param->input_batch_ = this->inputs_.at(1)->shape().at(kNHWC_N); + conv_param->input_h_ = this->inputs_.at(1)->shape().at(kNHWC_H); + conv_param->input_w_ = this->inputs_.at(1)->shape().at(kNHWC_W); + // assume OutCh|kh|kw|In + conv_param->input_channel_ = this->inputs_.at(1)->shape().at(kNHWC_C); + conv_param->output_channel_ = this->outputs_.at(0)->shape().at(kNHWC_N); + + int ws_size = conv_param->output_h_ * conv_param->output_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ * + conv_param->input_channel_ / conv_param->group_; + + workspace = new float[ws_size]; + + int output_w = 0; + int output_h = 0; + output_h = dy->shape()[kNHWC_H]; + output_w = dy->shape()[kNHWC_W]; + + std::vector out_shape(4); + out_shape.at(0) = conv_param->output_channel_; + out_shape.at(1) = conv_param->kernel_h_; + out_shape.at(2) = conv_param->kernel_w_; + out_shape.at(3) = conv_param->input_channel_ / conv_param->group_; + + // weight is output + weight_tensor->set_shape(out_shape); + weight_tensor->set_data_type(input_tensor->data_type()); + + conv_param->output_h_ = output_h; + conv_param->output_w_ = output_w; + + return RET_OK; +} + +int ConvolutionGradFilterCPUKernel::ReSize() { return 0; } + +int ConvolutionGradFilterCPUKernel::Run() { + auto conv_param = reinterpret_cast(opParameter); + auto *input_dy = inputs_.at(0); + auto *input_x = inputs_.at(1); + auto *out_dw = outputs_.at(0); + + auto x_addr = reinterpret_cast(input_x->Data()); + auto dy_addr = reinterpret_cast(input_dy->Data()); + auto dw_addr = reinterpret_cast(out_dw->Data()); + + int i, j; + int nweights = out_dw->ElementsNum(); + int in_ch = conv_param->input_channel_; + int in_h = conv_param->input_h_; + int in_w = conv_param->input_w_; + int k_h = conv_param->kernel_h_; // out_dw->shape()[1]; + int k_w = conv_param->kernel_w_; // out_dw->shape()[2]; + int batch = conv_param->output_batch_; + int out_ch = conv_param->output_channel_; + int groups = conv_param->group_; + int out_h = conv_param->output_h_; + int out_w = conv_param->output_w_; + + int m = out_h * out_w; + int n = k_h * k_w * in_ch / groups; + int k = out_ch / groups; + + // zero out pointer + memset(dw_addr, 0, out_dw->Size()); + + for (i = 0; i < batch; ++i) { + for (j = 0; j < groups; ++j) { + float *mat_a = dy_addr + (i * groups) * m * k + j * (out_ch / groups); + float *mat_b = workspace; + float *mat_c = dw_addr + j * nweights / groups; + float *im = x_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups); + + im2row_hwc(im, mat_b, conv_param); + gemm(1, 1, k, n, m, 1, mat_a, out_ch, mat_b, m, 1, mat_c, n); + } + } + + // std::cout << "run succ" << std::endl; + return RET_OK; +} + +kernel::LiteKernel *CpuConvGradFilterFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const lite::Context *ctx, + const kernel::KernelKey &desc) { + MS_ASSERT(opParameter != nullptr); + MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DGradFilter); + + auto *kernel = new (std::nothrow) ConvolutionGradFilterCPUKernel(opParameter, inputs, outputs); + MS_ASSERT(kernel != nullptr); + + auto ret = kernel->Init(); + if (RET_OK != ret) { + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + delete kernel; + return nullptr; + } + return kernel; +} + +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Conv2DGradFilter, CpuConvGradFilterFp32KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_filter.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_filter.h new file mode 100644 index 0000000000000000000000000000000000000000..c32a798eafada3d3afc65560601b6866db5e386a --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_filter.h @@ -0,0 +1,41 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_GRAD_FILTER_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_GRAD_FILTER_H_ + +#include +#include "src/lite_kernel.h" +#include "ir/anf.h" + +namespace mindspore::kernel { +class ConvolutionGradFilterCPUKernel : public LiteKernel { + public: + explicit ConvolutionGradFilterCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) + : LiteKernel(parameter, inputs, outputs) {} + ~ConvolutionGradFilterCPUKernel() override { delete workspace; } + + int Init() override; + int ReSize() override; + int Run() override; + + private: + float *workspace; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_GRAD_FILTER_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_input.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_input.cc new file mode 100644 index 0000000000000000000000000000000000000000..29bb49a8c8f726c3a6ff564d62ba6d224a0ec24b --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_input.cc @@ -0,0 +1,136 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/fp32/convolution_grad_input.h" +#include "src/kernel_registry.h" +#include "src/runtime/kernel/arm/opclib/pack.h" +#include "src/runtime/kernel/arm/opclib/pack_ext.h" +#include "src/runtime/kernel/arm/opclib/fp32/gemm.h" +#include "include/errorcode.h" + +using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::KernelRegistrar; +using mindspore::schema::PrimitiveType_Conv2DGradInput; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; + +namespace mindspore::kernel { +int ConvolutionGradInputCPUKernel::Init() { + if (2 != this->inputs_.size()) { + MS_LOG(ERROR) << "Conv2d Grad should has 2 inputs"; + return RET_ERROR; + } + if (1 != this->outputs_.size()) { + MS_LOG(ERROR) << "Conv2d Grad should has one output"; + return RET_ERROR; + } + + auto *dy_tensor = inputs_.at(kInputIndex); + MS_ASSERT(dy_tensor != nullptr); + auto *weight_tensor = inputs_.at(kWeightIndex); + MS_ASSERT(weight_tensor != nullptr); + auto *dx_tensor = outputs_.at(kOutputIndex); + MS_ASSERT(dx_tensor != nullptr); + + auto conv_param = reinterpret_cast(opParameter); + conv_param->output_batch_ = dx_tensor->shape()[(kNHWC_N)]; + conv_param->input_batch_ = dy_tensor->shape()[(kNHWC_N)]; + + conv_param->input_h_ = dx_tensor->shape()[(kNHWC_H)]; + conv_param->input_w_ = dx_tensor->shape()[(kNHWC_W)]; + + // assume OutCh|kh|kw|In + conv_param->input_channel_ = dx_tensor->shape()[(kNHWC_C)]; + conv_param->output_channel_ = weight_tensor->shape()[(kNHWC_N)]; + + // TBD + conv_param->output_h_ = dy_tensor->shape()[kNHWC_H]; + conv_param->output_w_ = dy_tensor->shape()[kNHWC_W]; + + int ws_size = conv_param->output_h_ * conv_param->output_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ * + conv_param->input_channel_ / conv_param->group_; + + workspace = new float[ws_size]; + return 0; +} + +int ConvolutionGradInputCPUKernel::ReSize() { return 0; } + +int ConvolutionGradInputCPUKernel::Run() { + auto conv_param = reinterpret_cast(opParameter); + auto *input_dy = inputs_.at(0); + auto *input_w = inputs_.at(1); + auto *out_dx = outputs_.at(0); + + auto dy_addr = reinterpret_cast(input_dy->Data()); + auto w_addr = reinterpret_cast(input_w->Data()); + auto dx_addr = reinterpret_cast(out_dx->Data()); + + int i, j; + int nweights = input_w->ElementsNum(); + int in_ch = conv_param->input_channel_; + int in_h = conv_param->input_h_; + int in_w = conv_param->input_w_; + int k_h = conv_param->kernel_h_; // out_dw->shape()[1]; + int k_w = conv_param->kernel_w_; // out_dw->shape()[2]; + int batch = conv_param->output_batch_; + int out_ch = conv_param->output_channel_; + int groups = conv_param->group_; + int out_h = conv_param->output_h_; + int out_w = conv_param->output_w_; + + int m = out_h * out_w; + int n = k_w * k_h * in_ch / groups; + int k = out_ch / groups; + + memset(dx_addr, 0, sizeof(float) * batch * in_ch * in_h * in_w); + + for (i = 0; i < batch; ++i) { + for (j = 0; j < groups; ++j) { + float *mat_a = dy_addr + (i * groups) * m * k + j * (out_ch / groups); + float *mat_b = w_addr + j * nweights / groups; + float *mat_c = workspace; + gemm(0, 0, m, n, k, 1, mat_a, out_ch, mat_b, n, 0, mat_c, n); + col2im_hwc(mat_c, dx_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups), conv_param); + } + } + + // std::cout << "run succ" << std::endl; + return 0; +} + +kernel::LiteKernel *CpuConvGradInputFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const lite::Context *ctx, + const kernel::KernelKey &desc) { + MS_ASSERT(opParameter != nullptr); + MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DGradInput); + + auto *kernel = new (std::nothrow) ConvolutionGradInputCPUKernel(opParameter, inputs, outputs); + MS_ASSERT(kernel != nullptr); + + auto ret = kernel->Init(); + if (0 != ret) { + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + delete kernel; + return nullptr; + } + return kernel; +} + +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Conv2DGradInput, CpuConvGradInputFp32KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_input.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_input.h new file mode 100644 index 0000000000000000000000000000000000000000..86901b37ba5b7468105ff1f90e433e59806c5aa6 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_input.h @@ -0,0 +1,41 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_GRAD_INPUT_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_GRAD_INPUT_H_ + +#include +#include "src/lite_kernel.h" +#include "ir/anf.h" + +namespace mindspore::kernel { +class ConvolutionGradInputCPUKernel : public LiteKernel { + public: + explicit ConvolutionGradInputCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) + : LiteKernel(parameter, inputs, outputs) {} + ~ConvolutionGradInputCPUKernel() override { delete workspace; } + + int Init() override; + int ReSize() override; + int Run() override; + + private: + float *workspace; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_GRAD_INPUT_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/opt_momentum.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/opt_momentum.cc new file mode 100644 index 0000000000000000000000000000000000000000..84c51509ba0f34630e85a60c5a60e79c45924e0d --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/opt_momentum.cc @@ -0,0 +1,78 @@ + +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "schema/model_generated.h" +#include "src/kernel_registry.h" +#include "src/runtime/kernel/arm/fp32/opt_momentum.h" +#include "include/errorcode.h" + +using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::KernelRegistrar; +using mindspore::schema::PrimitiveType_OptMomentum; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; + +namespace mindspore::kernel { + +int OptMomentumCPUKernel::ReSize() { return 0; } + +int OptMomentumCPUKernel::Run() { + if (inputs_.size() != 5 || !outputs_.empty()) { + MS_LOG(ERROR) << "OptMomentumCPUKernel error input output size!"; + return RET_ERROR; + } + + if (inputs_[0]->ElementsNum() != inputs_[1]->ElementsNum() || + inputs_[0]->ElementsNum() != inputs_[3]->ElementsNum()) { + MS_LOG(ERROR) << "error input data size!"; + return RET_ERROR; + } + auto weight = reinterpret_cast(inputs_[0]->Data()); + auto accumulate = reinterpret_cast(inputs_[1]->Data()); + float learning_rate = reinterpret_cast(inputs_[2]->Data())[0]; + auto gradient = reinterpret_cast(inputs_[3]->Data()); + float moment = reinterpret_cast(inputs_[4]->Data())[0]; + size_t elem_num = inputs_[0]->ElementsNum(); + for (size_t i = 0; i < elem_num; ++i) { + accumulate[i] = accumulate[i] * moment + gradient[i]; + weight[i] -= accumulate[i] * learning_rate; + } + return RET_OK; +} + +int OptMomentumCPUKernel::Init() { return 0; } + +kernel::LiteKernel *CpuOptMomentumFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const lite::Context *ctx, + const kernel::KernelKey &desc) { + MS_ASSERT(desc.type == schema::PrimitiveType_OptMomentum); + auto *kernel = new (std::nothrow) OptMomentumCPUKernel(opParameter, inputs, outputs); + MS_ASSERT(kernel != nullptr); + + auto ret = kernel->Init(); + if (0 != ret) { + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + delete kernel; + return nullptr; + } + return kernel; +} + +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_OptMomentum, CpuOptMomentumFp32KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/opt_momentum.h b/mindspore/lite/src/runtime/kernel/arm/fp32/opt_momentum.h new file mode 100644 index 0000000000000000000000000000000000000000..6746b5bf0b1c4d62c3406e0845ab87648034c293 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/opt_momentum.h @@ -0,0 +1,40 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_BACKEND_ARM_FP32_OPT_MOMENTUM_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPT_MOMENTUM_H_ + +#include +#include "src/lite_kernel.h" +#include "ir/anf.h" + +namespace mindspore::kernel { +class OptMomentumCPUKernel : public LiteKernel { + public: + explicit OptMomentumCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) + : LiteKernel(parameter, inputs, outputs) {} + ~OptMomentumCPUKernel() override {} + + int Init() override; + int ReSize() override; + int Run() override; + + private: +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPT_MOMENTUM_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_grad.cc new file mode 100644 index 0000000000000000000000000000000000000000..9b10cbf369db6f6034cfeb628eb9fb0f4020dce1 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_grad.cc @@ -0,0 +1,195 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/fp32/pooling_grad.h" +#include "schema/model_generated.h" +#include "src/kernel_registry.h" +#include "src/runtime/kernel/arm/opclib/fp32/pooling.h" +#include "src/runtime/kernel/arm/opclib/fp32/pooling_grad.h" +#include "include/errorcode.h" + +using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_PoolingGrad; + +namespace mindspore::kernel { +#if 0 +int PoolingGradCPUKernel::TfPadding(int input_w, int input_h, int &output_w, int &output_h) { + PoolingParameter *pool_param = reinterpret_cast (opParameter); + + auto stride_w = pool_param->stride_w_; + auto stride_h = pool_param->stride_h_; + auto window_w = pool_param->window_w_; + auto window_h = pool_param->window_h_; + auto pad_up = pool_param->pad_u_; + auto pad_down = pool_param->pad_d_; + auto pad_left = pool_param->pad_l_; + auto pad_right = pool_param->pad_r_; + if (pool_param->pad_mode_ == PADMODE_SAME) { + output_w = ceil(input_w / stride_w); + output_h = ceil(input_h / stride_h); + } else { + output_w = ceil((input_w + pad_left + pad_right - window_w + 1) / stride_w); + output_h = ceil((input_h + pad_up + pad_down - window_h + 1) / stride_h); + } + return RET_OK; +} + +int PoolingGradCPUKernel::CaffePadding(int input_w, int input_h, int &output_w, int &output_h) { + PoolingParameter *pool_param = reinterpret_cast (opParameter); + + auto round_mode = pool_param->round_mode_; + auto stride_w = pool_param->stride_w_; + auto stride_h = pool_param->stride_h_; + auto window_w = pool_param->window_w_; + auto window_h = pool_param->window_h_; + auto pad_up = pool_param->pad_u_; + auto pad_down = pool_param->pad_d_; + auto pad_left = pool_param->pad_l_; + auto pad_right = pool_param->pad_r_; + if (round_mode == ROUNDMODE_FLOOR && false) { + output_w = floor((input_w + pad_left + pad_right - window_w) / stride_w + 1); + output_h = floor((input_h + pad_up + pad_down - window_h) / stride_h + 1); + } else if (round_mode == ROUNDMODE_CEIL || true) { + output_w = ceil((input_w + pad_left + pad_right - window_w) / stride_w + 1); + output_h = ceil((input_h + pad_up + pad_down - window_h) / stride_h + 1); + } else { + MS_LOG(ERROR) << "round mode not support."; + } + + if (pad_left > 0 || pad_up > 0) { + if ((output_w - 1) * stride_w >= input_w + pad_left) { + --output_w; + } + if ((output_h - 1) * stride_h >= input_h + pad_up) { + --output_h; + } + } + return RET_OK; +} + +int PoolingGradCPUKernel::OnnxPadding(int input_w, int input_h, int &output_w, int &output_h) { + PoolingParameter *pool_param = reinterpret_cast (opParameter); + + auto round_mode = pool_param->round_mode_; + auto stride_w = pool_param->stride_w_; + auto stride_h = pool_param->stride_h_; + auto window_w = pool_param->window_w_; + auto window_h = pool_param->window_h_; + auto pad_up = pool_param->pad_u_; + auto pad_down = pool_param->pad_d_; + auto pad_left = pool_param->pad_l_; + auto pad_right = pool_param->pad_r_; + if (round_mode == ROUNDMODE_FLOOR) { + output_w = floor((input_w + pad_left + pad_right - window_w) / stride_w + 1); + output_h = floor((input_h + pad_up + pad_down - window_h) / stride_h + 1); + } else if (round_mode == ROUNDMODE_CEIL) { + MS_LOG(ERROR) << "RoundMode_CEIL mode not support."; + } else { + MS_LOG(ERROR) << "OnnxPadding round mode not support."; + } + return RET_OK; +} +#endif + +int PoolingGradCPUKernel::Init() { + // InferShape(): + // auto *in_tensor = reinterpret_cast(inputs_.at(0)->Data()); + // auto *x_tensor = reinterpret_cast(inputs_.at(1)->Data()); + + PoolingParameter *pool_param = reinterpret_cast(opParameter); + + auto in_shape = inputs_.at(0)->shape(); + int input_h = in_shape.at(1); + int input_w = in_shape.at(2); + + if (pool_param->global_) { + pool_param->window_w_ = input_w; + pool_param->window_h_ = input_h; + } + + // Emir -- here I assume we get the outputshape in the output tensor + auto *out_tensor = outputs_.front(); + auto out_shape = out_tensor->shape(); + +#if 0 + int output_w = 0, output_h = 0; + auto fmk_type = pool_param->fmk_type_; + switch (fmk_type) { + case lite::FmkType_TF: + break; + case lite::FmkType_CAFFE: + CaffePadding(input_w, input_h, output_w, output_h); + break; + case lite::FmkType_ONNX: + OnnxPadding(input_w, input_h, output_w, output_h); + break; + case lite::FmkType_MS: + break; + case lite::FmkType_TFLITE: + TfPadding(input_w, input_h, output_w, output_h); + break; + default: + MS_LOG(ERROR) << "Not support this framework."; + } + std::vector out_shape{in_tensor->shape()}; + out_shape.at(1) = output_h; + out_shape.at(2) = output_w; +#endif + out_tensor->set_shape(out_shape); + out_tensor->set_data_type(inputs_.at(0)->data_type()); + return RET_OK; +} + +int PoolingGradCPUKernel::ReSize() { return RET_OK; } + +int PoolingGradCPUKernel::Run() { + PoolingParameter *pool_param = reinterpret_cast(opParameter); + auto input_ptr = reinterpret_cast(inputs_.at(0)->Data()); + auto output_ptr = reinterpret_cast(outputs_.at(0)->Data()); + + if (pool_param->max_pooling_) { + auto ind = reinterpret_cast(inputs_.at(1)->Data()); + MaxPoolingGrad(input_ptr, ind, output_ptr, pool_param); + } else { + AvgPoolingGrad(input_ptr, output_ptr, pool_param); + } + return RET_OK; +} + +kernel::LiteKernel *CpuPoolingGradFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const lite::Context *ctx, + const kernel::KernelKey &desc) { + MS_ASSERT(opParameter != nullptr); + MS_ASSERT(desc.type == schema::PrimitiveType_PoolingGrad); + + auto *kernel = new (std::nothrow) PoolingGradCPUKernel(opParameter, inputs, outputs); + MS_ASSERT(kernel != nullptr); + auto ret = kernel->Init(); + if (RET_OK != ret) { + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + delete kernel; + return nullptr; + } + return kernel; +} + +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_PoolingGrad, CpuPoolingGradFp32KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_grad.h new file mode 100644 index 0000000000000000000000000000000000000000..eec333d8600fd16a907c07d996cc1fcd3443302f --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_grad.h @@ -0,0 +1,50 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_POOLING_GRAD_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_POOLING_GRAD_H_ + +#include +#include "src/lite_kernel.h" +#include "ir/anf.h" + +namespace mindspore::kernel { +using mindspore::schema::PadMode; +using mindspore::schema::PoolMode; +using mindspore::schema::QuantType; +using mindspore::schema::RoundMode; + +class PoolingGradCPUKernel : public LiteKernel { + public: + explicit PoolingGradCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) + : LiteKernel(parameter, inputs, outputs) {} + ~PoolingGradCPUKernel() override = default; + + // int TfPadding(int input_w, int input_h, int &output_w, int &output_h); + // int CaffePadding(int input_w, int input_h, int &output_w, int &output_h); + // int OnnxPadding(int input_w, int input_h, int &output_w, int &output_h); + + int Init() override; + int ReSize() override; + int Run() override; + + private: + uint8_t data_shape_{0}; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_POOLING_GRAD_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/power_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/power_grad.cc new file mode 100644 index 0000000000000000000000000000000000000000..759cf8b43753d2667ed05ea5beeee7938790cf15 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/power_grad.cc @@ -0,0 +1,67 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/fp32/power_grad.h" +#include "schema/model_generated.h" +#include "src/kernel_registry.h" +#include "include/errorcode.h" +#include "src/runtime/kernel/arm/opclib/fp32/arithmetic.h" + +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_PowerGrad; + +namespace mindspore::kernel { +int PowerGradCPUKernel::Init() { return RET_OK; } + +int PowerGradCPUKernel::ReSize() { return RET_OK; } + +int PowerGradCPUKernel::Run() { + auto dy_addr = reinterpret_cast(inputs_.at(0)->Data()); + auto x_addr = reinterpret_cast(inputs_.at(1)->Data()); + auto dx_addr = reinterpret_cast(outputs_.at(0)->Data()); + auto size = inputs_.at(0)->ElementsNum(); + + Power(x_addr, dx_addr, size, power_ - 1, scale_, shift_); + ElementMul(dx_addr, dy_addr, dx_addr, size); + float scale = scale_ * power_; + for (int i = 0; i < size; i++) { + dx_addr[i] *= scale; + } + + return RET_OK; +} + +kernel::LiteKernel *CpuPowerGradFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const lite::Context *ctx, + const kernel::KernelKey &desc) { + MS_ASSERT(opParameter != nullptr); + MS_ASSERT(desc.type == schema::PrimitiveType_PowerGrad); + auto *kernel = new (std::nothrow) PowerGradCPUKernel(opParameter, inputs, outputs); + auto ret = kernel->Init(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + delete kernel; + return nullptr; + } + return kernel; +} + +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_PowerGrad, CpuPowerGradFp32KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/power_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32/power_grad.h new file mode 100644 index 0000000000000000000000000000000000000000..316b55e1ebfc74dcfaa0c0035ab9a265f9181359 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/power_grad.h @@ -0,0 +1,49 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_POWER_GRAD_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_POWER_GRAD_H_ + +#include +#include "src/lite_kernel.h" +#include "ir/anf.h" +#include "src/runtime/kernel/arm/opclib/power.h" + +namespace mindspore::kernel { +class PowerGradCPUKernel : public LiteKernel { + public: + PowerGradCPUKernel(OpParameter *param, const std::vector &inputs, + const std::vector &outputs) + : LiteKernel(param, inputs, outputs) { + PowerParameter *power_param = reinterpret_cast(param); + power_ = power_param->power_; + scale_ = power_param->scale_; + shift_ = power_param->shift_; + } + ~PowerGradCPUKernel() override = default; + + int Init() override; + int ReSize() override; + int Run() override; + + private: + float power_; + float scale_; + float shift_; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_POWER_GRAD_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_softmax_cross_entropy_with_logits.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_softmax_cross_entropy_with_logits.cc new file mode 100644 index 0000000000000000000000000000000000000000..005b091fb66c4da760540a8277127ec89d96ef83 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_softmax_cross_entropy_with_logits.cc @@ -0,0 +1,145 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/fp32/sparse_softmax_cross_entropy_with_logits.h" +#include "src/runtime/kernel/arm/opclib/fp32/softmax.h" +#include "schema/model_generated.h" +#include "src/kernel_registry.h" +#include "include/errorcode.h" + +using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_SoftmaxCrossEntropy; + +namespace mindspore::kernel { + +int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::ReSize() { return RET_OK; } + +void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const int *labels, const float *losses, + float *output) const { + float total_loss = 0; + for (int i = 0; i < param->batch_size_; ++i) { + if (labels[i] < 0) { + MS_LOG(EXCEPTION) << "label value must >= 0"; + } + size_t label = labels[i]; + if (label > param->number_of_classes_) { + MS_LOG(EXCEPTION) << "error label input!"; + } else { + total_loss -= logf(losses[i * param->number_of_classes_ + label]); + } + } + output[0] = total_loss / param->batch_size_; +} + +void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::GradPostExecute(const int *labels, const float *losses, + float *output) const { + size_t row_start = 0; + for (int i = 0; i < param->batch_size_; ++i) { + if (labels[i] < 0) { + MS_LOG(EXCEPTION) << "label value must >= 0"; + } + size_t label = labels[i]; + if (label > param->number_of_classes_) { + MS_LOG(EXCEPTION) << "error label input!"; + } + for (size_t j = 0; j < param->number_of_classes_; ++j) { + size_t index = row_start + j; + if (j == label) { + output[index] = (losses[index] - 1) / param->batch_size_; + } else { + output[index] = losses[index] / param->batch_size_; + } + } + row_start += param->number_of_classes_; + } +} + +int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Run() { + auto ins = reinterpret_cast(inputs_.at(0)->Data()); + auto labels = reinterpret_cast(inputs_.at(1)->Data()); + auto out = reinterpret_cast(outputs_.at(0)->Data()); + float *grads = NULL; + if (is_train()) { // outputs_.size() > 1) + grads = reinterpret_cast(outputs_.at(0)->Data()); + } + size_t data_size = inputs_.at(0)->ElementsNum(); + float *losses = new (std::nothrow) float[data_size]; + MS_ASSERT(losses != nullptr); + std::fill(losses, losses + data_size, 0); + + MS_ASSERT(out != nullptr); + MS_ASSERT(labels != nullptr); + MS_ASSERT(ins != nullptr); + + SoftmaxParameter sm_params; + sm_params.n_dim_ = param->n_dim_; + sm_params.element_size_ = data_size; + sm_params.axis_ = 1; + for (int i = 0; i < 4; i++) // softmax has only 4 params in shape + sm_params.input_shape_[i] = param->input_shape_[i]; + float sum_data[sm_params.input_shape_[sm_params.axis_]]; + Softmax(ins, losses, sum_data, &sm_params); + + if (is_train()) { + GradPostExecute(labels, losses, grads); + } else { + ForwardPostExecute(labels, losses, out); + } + return RET_OK; +} + +int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Init() { + auto dims = inputs_[0]->shape(); + param->n_dim_ = 2; + param->number_of_classes_ = dims[1]; + param->batch_size_ = dims[0]; + for (unsigned int i = 0; i < dims.size(); i++) param->input_shape_[i] = dims[i]; + if (2 != this->inputs_.size()) { + MS_LOG(ERROR) << "softmax entropy loss should have two inputs"; + return RET_ERROR; + } + auto *in0 = inputs_.front(); + if (in0 == nullptr) { + MS_LOG(ERROR) << "softmax etropy loss in0 have no data"; + return RET_ERROR; + } + + return RET_OK; +} + +kernel::LiteKernel *CpuSoftmaxCrossEntropyFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const lite::Context *ctx, + const kernel::KernelKey &desc) { + MS_ASSERT(opParameter != nullptr); + MS_ASSERT(desc.type == schema::PrimitiveType_SoftmaxCrossEntropy); + auto *kernel = new (std::nothrow) SparseSoftmaxCrossEntropyWithLogitsCPUKernel(opParameter, inputs, outputs); + MS_ASSERT(kernel != nullptr); + auto ret = kernel->Init(); + if (RET_OK != ret) { + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + delete kernel; + return nullptr; + } + return kernel; +} + +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SoftmaxCrossEntropy, CpuSoftmaxCrossEntropyFp32KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_softmax_cross_entropy_with_logits.h b/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_softmax_cross_entropy_with_logits.h new file mode 100644 index 0000000000000000000000000000000000000000..ae9a4dd0a9b75ffd8116289ec2b81ab7c7fd657f --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_softmax_cross_entropy_with_logits.h @@ -0,0 +1,50 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_H_ + +#include +#include "src/lite_kernel.h" +#include "ir/anf.h" +#include "src/runtime/kernel/arm/opclib/fp32/softmax_grad.h" +#include "src/runtime/kernel/arm/opclib/fp32/arithmetic.h" + +namespace mindspore::kernel { + +class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public LiteKernel { + public: + explicit SparseSoftmaxCrossEntropyWithLogitsCPUKernel(OpParameter *parameter, + const std::vector &inputs, + const std::vector &outputs) + : LiteKernel(parameter, inputs, outputs) { + param = reinterpret_cast(parameter); + } + ~SparseSoftmaxCrossEntropyWithLogitsCPUKernel() override = default; + + void ForwardPostExecute(const int *labels, const float *losses, float *output) const; + void GradPostExecute(const int *labels, const float *losses, float *output) const; + + int Init() override; + int ReSize() override; + int Run() override; + + private: + SoftmaxCrossEntropyParameter *param; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/activation_grad.h b/mindspore/lite/src/runtime/kernel/arm/opclib/activation_grad.h new file mode 100644 index 0000000000000000000000000000000000000000..ac49b8567aa941cd2852f8d25a5210d857512805 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/activation_grad.h @@ -0,0 +1,88 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_ACTIVATION_GRAD_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_ACTIVATION_GRAD_H_ + +#include +#include "src/runtime/kernel/arm/opclib/op_base.h" +#include "src/runtime/kernel/arm/opclib/fp32/arithmetic.h" +#include "src/runtime/kernel/arm/opclib/errorcode.h" + +struct ActivationGradParameter { + OpParameter op_parameter{}; + int type_; + float alpha_{0.01}; +}; + +inline int ReluGrad(float *src0, float *src1, int length, float *dst) { + for (int i = 0; i < length; ++i) { + dst[i] = src1[i] > 0 ? 1.0f : 0.0f; + } + ElementMul(src0, dst, dst, length); + return OPCLIB_OK; +} + +inline int Relu6Grad(float *src0, float *src1, int length, float *dst) { + for (int i = 0; i < length; ++i) { + if (src1[i] < 0) { + dst[i] = 0; + } else { + dst[i] = src1[i] > 6.0f ? 0.0f : 1.0f; + } + } + ElementMul(src0, dst, dst, length); + return OPCLIB_OK; +} + +inline int LReluGrad(float *src0, float *src1, int length, float *dst, float alpha) { + for (int i = 0; i < length; ++i) { + dst[i] = src1[i] > 0.0f ? 1.0f : alpha; + } + ElementMul(src0, dst, dst, length); + return OPCLIB_OK; +} + +inline int SigmoidGrad(float *src0, float *src1, int length, float *dst) { + for (int i = 0; i < length; ++i) { + dst[i] = src0[i] * (src1[i] * (1.0f - src1[i])); + } + return OPCLIB_OK; +} + +inline int TanhGrad(float *src0, float *src1, int length, float *dst) { + for (int i = 0; i < length; ++i) { + dst[i] = (1.0f - (src1[i] * src1[i])) * src0[i]; + } + return OPCLIB_OK; +} + +inline int HSwishGrad(float *src0, float *src1, int length, float *dst) { + for (int i = 0; i < length; ++i) { + float tmp = (src1[i] > 3.0f ? 1.0f : (src1[i] < -3.0f ? 0.0f : (2.0f * src1[i] + 3.0f) / 6.0f)); + dst[i] = tmp * src0[i]; + } + return OPCLIB_OK; +} + +inline int HSigmoidGrad(float *src0, float *src1, int length, float *dst) { + for (int i = 0; i < length; ++i) { + float tmp = (src1[i] > 3.0f ? 1.0f : (src1[i] < -3.0f ? 0.0f : 1.0f / 6.0f)); + dst[i] = tmp * src0[i]; + } + return OPCLIB_OK; +} + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_ACTIVATION_GRAD_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/batch_norm.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/batch_norm.cc new file mode 100644 index 0000000000000000000000000000000000000000..c11819a1d9e2dd9dd000b38d20cfb2d091496dd6 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/batch_norm.cc @@ -0,0 +1,120 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include "src/runtime/kernel/arm/opclib/batch_norm.h" + +static void sumSpatialBatch(const float *in, int size, int ch, float *out) { + std::fill(out, out + ch, 0.f); + for (int i = 0; i < size; i++) { + const float *ptr = in + i * ch; + for (int c = 0; c < ch; c++) { + out[c] += ptr[c]; + } + } +} + +void scaleBias(const float *scales, int batch, int n, int size, float *output) { + for (int i = 0; i < batch * size; i++) + for (int c = 0; c < n; c++) output[i * n + c] *= scales[c]; +} + +void normalize(const float *x, const float *mean, const float *variance, float eps, int batch, int filters, int spatial, + float *out) { + int b, f, i; + for (b = 0; b < batch; ++b) { + for (i = 0; i < spatial; ++i) { + for (f = 0; f < filters; ++f) { + int index = b * filters * spatial + i * filters + f; + out[index] = (x[index] - mean[f]) / (std::sqrt(variance[f]) + eps); + } + } + } +} + +void backwardScale(const float *x_norm, const float *delta, int batch, int n, int size, float *scale_updates) { + int i, b, f; + std::fill(scale_updates, scale_updates + n, 0.f); + for (b = 0; b < batch; ++b) { + for (i = 0; i < size; ++i) { + for (f = 0; f < n; ++f) { + int index = (b * size + i) * n + f; + scale_updates[f] += delta[index] * x_norm[index]; + } + } + } +} + +void meanVar(const float *in, int batch, int spatial, int ch, float *mean, float *var) { + float N = batch * spatial; + sumSpatialBatch(in, N, ch, mean); + for (int f = 0; f < ch; ++f) mean[f] /= N; + std::fill(var, var + ch, 0.f); + for (int i = 0; i < N; i++) { + for (int f = 0; f < ch; f++) { + float x = in[i * ch + f]; + var[f] += (x - mean[f]) * (x - mean[f]); + } + } + for (int f = 0; f < ch; f++) var[f] /= N; +} + +void meanDelta(float *yt, int size, int ch, float eps, float *variance, float *mean_delta) { + sumSpatialBatch(yt, size, ch, mean_delta); + for (int i = 0; i < ch; i++) mean_delta[i] *= -1.f / std::sqrt((variance[i] + eps)); +} + +void meanAdd(const float *x, const float *mean, const float *variance_delta, int batch, int filters, int spatial, + float *mean_add, float *mean_delta) { + int i, k; + std::fill(mean_add, mean_add + filters, 0.f); + for (k = 0; k < spatial * batch; ++k) { + for (i = 0; i < filters; ++i) { + int index = k * filters + i; + mean_add[i] += x[index] - mean[i]; + } + } + for (i = 0; i < filters; ++i) { + mean_add[i] *= variance_delta[i] * (-2.f / (spatial * batch)); + mean_delta[i] += mean_add[i]; + } +} + +void varianceDelta(const float *x, const float *delta, const float *mean, const float *variance, int batch, int filters, + int spatial, float eps, float *variance_delta) { + int i, k; + std::fill(variance_delta, variance_delta + filters, 0.f); + for (k = 0; k < batch * spatial; k++) { + for (i = 0; i < filters; i++) { + int index = k * filters + i; + variance_delta[i] += delta[index] * (x[index] - mean[i]); + } + } + for (i = 0; i < filters; i++) variance_delta[i] *= -.5 * pow(variance[i] + eps, (-3.f / 2.f)); +} + +void NormalizeDelta(const float *x, const float *mean, const float *variance, const float *mean_delta, + const float *variance_delta, int batch, int filters, int spatial, float eps, float *delta) { + int f, k; + for (k = 0; k < batch * spatial; k++) { + for (f = 0; f < filters; f++) { + int index = k * filters + f; + delta[index] = delta[index] * 1. / (std::sqrt(variance[f] + eps)) + + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + + mean_delta[f] / (spatial * batch); + } + } +} diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/batch_norm.h b/mindspore/lite/src/runtime/kernel/arm/opclib/batch_norm.h new file mode 100644 index 0000000000000000000000000000000000000000..0d9e8b74bff14059798423167ee847a44f78ee4a --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/batch_norm.h @@ -0,0 +1,39 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_BACKEND_ARM_BATCH_NORM_H_ +#define MINDSPORE_LITE_SRC_BACKEND_ARM_BATCH_NORM_H_ + +struct bnParameter { + int batch; + int channels; + int spatial; + float eps; +}; +void scaleBias(const float *scales, int batch, int n, int size, float *output); +void normalize(const float *x, const float *mean, const float *variance, float eps, int batch, int filters, int spatial, + float *out); +void backwardScale(const float *x_norm, const float *delta, int batch, int n, int size, float *scale_updates); +void meanVar(const float *in, int batch, int size, int ch, float *mean, float *var); +void meanDelta(float *yt, int size, int ch, float eps, float *variance, float *mean_delta); +void varianceDelta(const float *x, const float *delta, const float *mean, const float *variance, int batch, int ch, + int spatial, float eps, float *variance_delta); +void meanAdd(const float *x, const float *mean, const float *variance_delta, int batch, int filters, int spatial, + float *mean_add, float *mean_delta); +void NormalizeDelta(const float *x, const float *mean, const float *variance, const float *mean_delta, + const float *variance_delta, int batch, int filters, int spatial, float eps, float *delta); + +#endif diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/arithmetic_grad.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/arithmetic_grad.cc new file mode 100644 index 0000000000000000000000000000000000000000..4baec49d028a9afef28cbe3cd7deace87fe68e5f --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/arithmetic_grad.cc @@ -0,0 +1,29 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/opclib/fp32/arithmetic_grad.h" + +void ElementDivNegSquare(const float *nom, const float *denom, float *output, int element_size) { + for (int i = 0; i < element_size; i++) { + output[i] = -nom[i] / (denom[i] * denom[i]); + } +} + +void ElementMulAndDivNegSquare(const float *a, const float *b, const float *denom, float *output, int element_size) { + for (int i = 0; i < element_size; i++) { + output[i] = -a[i] * b[i] / (denom[i] * denom[i]); + } +} diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/arithmetic_grad.h b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/arithmetic_grad.h new file mode 100644 index 0000000000000000000000000000000000000000..5ce96e669b40867a5846bde8cad76cc3b4cbafe8 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/arithmetic_grad.h @@ -0,0 +1,22 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_ARITHMETIC_GRAD_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_ARITHMETIC_GRAD_H_ + +void ElementDivNegSquare(const float *nom, const float *denom, float *output, int element_size); +void ElementMulAndDivNegSquare(const float *a, const float *b, const float *denom, float *output, int element_size); + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_ARITHMETIC_GRAD_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/gemm.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/gemm.cc new file mode 100644 index 0000000000000000000000000000000000000000..a8f3b2afd4e56b46939d7db672b07d237abb3ee7 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/gemm.cc @@ -0,0 +1,108 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/opclib/fp32/gemm.h" + +static void gemm_nn(int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_B, int ldb, float *mat_c, + int ldc) { + int i, j, k; + for (i = 0; i < M; ++i) { + for (k = 0; k < K; ++k) { + float a = alpha * mat_a[i * lda + k]; + for (j = 0; j < N; ++j) { + mat_c[i * ldc + j] += a * mat_B[k * ldb + j]; + } + } + } +} + +static void gemm_nt(int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, int ldb, float *mat_c, + int ldc) { + int i, j, k; + for (i = 0; i < M; ++i) { + for (j = 0; j < N; ++j) { + float sum = 0; + for (k = 0; k < K; ++k) { + sum += alpha * mat_a[i * lda + k] * mat_b[j * ldb + k]; + } + mat_c[i * ldc + j] += sum; + } + } +} + +static void gemm_tn(int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, int ldb, float *mat_c, + int ldc) { + int i, j, k; + for (i = 0; i < M; ++i) { + for (k = 0; k < K; ++k) { + float a = alpha * mat_a[k * lda + i]; + for (j = 0; j < N; ++j) { + mat_c[i * ldc + j] += a * mat_b[k * ldb + j]; + } + } + } +} + +static void gemm_tt(int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, int ldb, float *mat_c, + int ldc) { + int i, j, k; + for (i = 0; i < M; ++i) { + for (j = 0; j < N; ++j) { + float sum = 0; + for (k = 0; k < K; ++k) { + sum += alpha * mat_a[i + k * lda] * mat_b[k + j * ldb]; + } + mat_c[i * ldc + j] += sum; + } + } +} + +// mat_c = alpha*op( mat_a )*op( mat_b ) + beta*C +// M - number of rows of matrix a +// N - number of cols of matrix b +// K - number of cols of matrix a + +void gemm(int transpose_a, int transpose_b, int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, + int ldb, float beta, float *mat_c, int ldc) { + // printf("cpu: %d %d %d %d %d %f %d %d %f %d\n",TA, TB, M, N, K, ALPHA, lda, ldb, BETA, ldc); + if (beta >= 0.f && beta <= 0.f) { + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + mat_c[i * ldc + j] = 0; + } + } + } else if (beta < 1.f || beta > 1.f) { + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + mat_c[i * ldc + j] *= beta; + } + } + } + + int t; + + for (t = 0; t < M; ++t) { + if (!transpose_a && !transpose_b) { + gemm_nn(1, N, K, alpha, mat_a + t * lda, lda, mat_b, ldb, mat_c + t * ldc, ldc); + } else if (transpose_a && !transpose_b) { + gemm_tn(1, N, K, alpha, mat_a + t, lda, mat_b, ldb, mat_c + t * ldc, ldc); + } else if (!transpose_a && transpose_b) { + gemm_nt(1, N, K, alpha, mat_a + t * lda, lda, mat_b, ldb, mat_c + t * ldc, ldc); + } else { + gemm_tt(1, N, K, alpha, mat_a + t, lda, mat_b, ldb, mat_c + t * ldc, ldc); + } + } +} diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/gemm.h b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/gemm.h new file mode 100644 index 0000000000000000000000000000000000000000..8caf05755f6a0d7f557f9c7522fcce66e99d69f4 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/gemm.h @@ -0,0 +1,23 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_GEMM_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_GEMM_H_ + +void gemm(int transpose_a, int transpose_b, int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, + int ldb, float beta, float *mat_c, int ldc); + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_GEMM_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/pooling_grad.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/pooling_grad.cc new file mode 100644 index 0000000000000000000000000000000000000000..2730cbe95d3fb2784f5042ccf79093f8715d1a0c --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/pooling_grad.cc @@ -0,0 +1,149 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include "src/runtime/kernel/arm/opclib/fp32/pooling_grad.h" + +void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param) { + int stride_w = pooling_param->stride_w_; + int stride_h = pooling_param->stride_h_; + int pad_w = pooling_param->pad_l_; + int pad_h = pooling_param->pad_u_; + int win_w = pooling_param->window_w_; + int win_h = pooling_param->window_h_; + int channel = pooling_param->input_channel_; + int in_w = pooling_param->input_w_; + int in_h = pooling_param->input_h_; + int output_w = pooling_param->output_w_; + int output_h = pooling_param->output_h_; + int output_batch = pooling_param->output_batch_; + + const float *inPtr; + for (int i = 0; i < output_h * output_w * channel * output_batch; i++) output_ptr[i] = 0.0; + + // int pad_top = padding[2]; + + float kk = static_cast(win_h * win_w); + + for (uint16_t ib = 0; ib < output_batch; ib++) { + // int in_batch_offset = batch * in_h * in_w * channel; + // int out_batch_offset = batch * output_h * output_w * channel; + // out = grads->getData(ib*grads->imgSize()); + // inPtr = in->getData(ib*in->imgSize()); + float *out; + out = &output_ptr[(ib * output_h * output_w)]; + inPtr = reinterpret_cast(&input_ptr[(ib * in_h * in_w)]); + if (1) { // in->layout() == Tensor::nhwc) + // iterate over yt + for (uint16_t yh = 0; yh < in_h; yh++) { + for (uint16_t yw = 0; yw < in_w; yw++) { + for (uint16_t ic = 0; ic < channel; ic++) { + int idx = (yw + yh * in_w) * channel + ic; // (ic*in_h*in_w) + (in_w*yh) + yw; + float delta = inPtr[idx] / kk; + for (int32_t kh = 0; kh < win_h; kh++) { + int xh = yh * stride_h + kh - pad_h; + if ((xh < 0) || (xh >= output_h)) { + continue; + } + for (int32_t kw = 0; kw < win_w; kw++) { + int xw = yw * stride_w + kw - pad_w; + if ((xw < 0) || (xw >= output_w)) { + continue; + } + // out[(ic*output_h*output_w) + (xh*output_w) + xw] += delta; + out[(xw + output_w * xh) * channel + ic] += delta; + } + } + } + } + } + } else { // nchw + for (uint16_t ic = 0; ic < channel; ic++) { + // iterate over yt + for (uint16_t yh = 0; yh < in_h; yh++) { + for (uint16_t yw = 0; yw < in_w; yw++) { + int idx = (ic * in_h * in_w) + (in_w * yh) + yw; + float delta = inPtr[idx] / kk; + for (int32_t kh = 0; kh < win_h; kh++) { + int xh = yh * stride_h + kh - pad_h; + if ((xh < 0) || (xh >= output_h)) { + continue; + } + for (int32_t kw = 0; kw < win_w; kw++) { + int xw = yw * stride_w + kw - pad_w; + if ((xw < 0) || (xw >= output_w)) { + continue; + } + out[(ic * output_h * output_w) + (xh * output_w) + xw] += delta; + } + } + } + } + } + } + } +} + +void MaxPoolingGrad(const float *dy, const int *indices, float *output_ptr, PoolingParameter *pooling_param) { + // int stride_w = pooling_param->stride_w_; + // int stride_h = pooling_param->stride_h_; + // int pad_w = pooling_param->pad_l_; + // int pad_h = pooling_param->pad_u_; + // int win_w = pooling_param->window_w_; + // int win_h = pooling_param->window_h_; + int channel = pooling_param->input_channel_; + int in_w = pooling_param->input_w_; + int in_h = pooling_param->input_h_; + int output_w = pooling_param->output_w_; + int output_h = pooling_param->output_h_; + int output_batch = pooling_param->output_batch_; + + int out_img_size = + output_h * output_w; // Emir -- in original code this varible is calculated according to input size ?? + int ind_img_size = in_h * in_w; + // const int w_pad = (output_w + pad_w + pad_w); + + for (int i = 0; i < output_h * output_w * channel * output_batch; i++) output_ptr[i] = 0.0; + + const float *yt = reinterpret_cast(dy); + const int *pos = reinterpret_cast(indices); + float *out; + + if (1) { // grads->layout() == Tensor::nhwc) + for (int ib = 0; ib < output_batch; ib++) { + out = &(output_ptr[ib * output_w * output_w * channel]); + for (int ix = 0; ix < ind_img_size; ix++) { + for (int cix = 0; cix < channel; cix++) { + int idx = (*pos) * channel + cix; + out[idx] += *yt; + pos++; + yt++; + } + } + } + } else { + for (int ib = 0; ib < output_batch; ib++) { + out = &output_ptr[(ib * out_img_size)]; + for (int cix = 0; cix < channel; cix++) { + for (int ix = 0; ix < ind_img_size; ix++) { + int idx = cix * output_h * output_w + *pos; // cord_y*output_w + cord_x; + out[idx] += *yt; + pos++; + yt++; + } + } + } + } +} diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/pooling_grad.h b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/pooling_grad.h new file mode 100644 index 0000000000000000000000000000000000000000..750530d767c5742a671b1151f1f11c086975faf5 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/pooling_grad.h @@ -0,0 +1,25 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_POOLING_GRAD_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_POOLING_GRAD_H_ + +#include "src/runtime/kernel/arm/opclib/fp32/pooling.h" + +void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param); +void MaxPoolingGrad(const float *dy, const int *indices_ptr, float *output_ptr, PoolingParameter *pooling_param); + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_POOLING_GRAD_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/reduce_grad.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/reduce_grad.cc new file mode 100644 index 0000000000000000000000000000000000000000..24e7189b3ef8108e64a8ae8f7803c731b67a3844 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/reduce_grad.cc @@ -0,0 +1,130 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include "mindspore/lite/src/runtime/kernel/arm/opclib/fp32/reduce_grad.h" + +static inline bool NextIndex(const int num_dims, const int *dims, int *current) { + int carry = 1; + for (int idx = num_dims - 1; idx >= 0; --idx) { + int current_val = current[idx] + carry; + if (dims[idx] == current_val) { + current[idx] = 0; + } else { + current[idx] = current_val; + carry = 0; + break; + } + } + return (carry == 0); +} + +static inline size_t GetInputOffset(const int num_dims, const int *dims, const int *iter) { + size_t offset = 0; + for (int idx = 0; idx < num_dims; ++idx) { + offset = offset * (size_t)(dims[idx]) + (size_t)(iter[idx]); + } + + return offset; +} + +static inline size_t GetOutputOffset(const int num_dims, const int *dims, const int *iter, const int num_axis, + const int *axes) { + size_t offset = 0; + for (int idx = 0; idx < num_dims; ++idx) { + // if we need to skip this axis + bool is_axis = false; + for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) { + if (idx == axes[axis_idx]) { + is_axis = true; + break; + } + } + + if (!is_axis) { + offset = offset * (size_t)(dims[idx]) + (size_t)(iter[idx]); + } + } + return offset; +} + +void ReduceMeanByAxes(const float *input_data, int *input_iter, const int *input_dims, int input_num_dims, + const int *axes, int num_axes, float *output_data, const int *output_dims, int output_num_dims) { + size_t num_outputs = 1; + for (int idx = 0; idx < output_num_dims; ++idx) { + size_t current = (size_t)(output_dims[idx]); + num_outputs *= current; + } + + // Reset input iterator. + for (int idx = 0; idx < input_num_dims; ++idx) { + input_iter[idx] = 0; + } + // Iterate through input_data. + do { + size_t input_offset = GetInputOffset(input_num_dims, input_dims, input_iter); + size_t output_offset = GetOutputOffset(input_num_dims, input_dims, input_iter, num_axes, axes); + output_data[output_offset] += input_data[input_offset]; + } while (NextIndex(input_num_dims, input_dims, input_iter)); + + // Calculate mean by dividing output_data by num of aggregated element. + size_t num_elements_in_axis = 1; + for (int idx = 0; idx < num_axes; ++idx) { + size_t current = (size_t)(input_dims[axes[idx]]); + num_elements_in_axis *= current; + } + + for (size_t idx = 0; idx < num_outputs; ++idx) { + output_data[idx] = output_data[idx] / static_cast(num_elements_in_axis); + } +} + +float ReduceMeanAll(const float *src, int size) { + float sum = 0; + for (int i = 0; i < size; ++i) { + sum += src[i]; + } + return sum / size; +} + +void ReduceSumByAxes(const float *input, const int *input_dims, float *output, const int *output_dims, int num_dims) { + int num_outputs = 1; + int same_shape = true; + for (int idx = 0; idx < num_dims; ++idx) { + num_outputs *= output_dims[idx]; + if (output_dims[idx] != input_dims[idx]) same_shape = false; + } + if (same_shape) { + std::copy(input, input + num_outputs * sizeof(float), output); + // memcpy(output, input, num_outputs*sizeof(float)); + return; + } + + for (int idx = 0; idx < num_outputs; ++idx) output[idx] = 0; // zero output + + int input_iter[8] = {0}; + int axes[5] = {0}; + int num_axes = 0; + for (int i = 0; i < num_dims; i++) + if (output_dims[i] == 1) axes[num_axes++] = i; + + // Iterate through input_data. + do { + size_t input_offset = GetInputOffset(num_dims, input_dims, input_iter); + size_t output_offset = GetOutputOffset(num_dims, input_dims, input_iter, num_axes, axes); + output[output_offset] += input[input_offset]; + } while (NextIndex(num_dims, input_dims, input_iter)); +} diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/reduce_grad.h b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/reduce_grad.h new file mode 100644 index 0000000000000000000000000000000000000000..76458171357182f536ac2f6b69daf14a1ae08ecc --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/reduce_grad.h @@ -0,0 +1,24 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_REDUCE_GRAD_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_REDUCE_GRAD_H_ + +float ReduceMeanAll(const float *src, int size); +void ReduceSumByAxes(const float *input, const int *input_dims, float *output, const int *output_dims, int num_dims); + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_REDUCE_GRAD_H_ + diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/softmax_grad.h b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/softmax_grad.h new file mode 100644 index 0000000000000000000000000000000000000000..6fb7fc1580b0510e4fb80c7b6827377a382cde0c --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/softmax_grad.h @@ -0,0 +1,29 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_SOFTMAX_GRAD_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_SOFTMAX_GRAD_H_ + +#include "src/runtime/kernel/arm/opclib/op_base.h" + +struct SoftmaxCrossEntropyParameter { + OpParameter op_parameter; + int32_t batch_size_; + unsigned int number_of_classes_; + int n_dim_; + int input_shape_[5]; +}; +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_SOFTMAX_GRAD_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/pack_ext.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/pack_ext.cc new file mode 100644 index 0000000000000000000000000000000000000000..bb6cd1ce6b6de8eabcf434c825a22abb12e5da84 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/pack_ext.cc @@ -0,0 +1,176 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "src/runtime/kernel/arm/opclib/pack_ext.h" + +static int is_a_ge_zero_and_a_lt_b(int a, int b) { return (unsigned)(a) < (unsigned)(b); } + +void im2col_hwc(const float *in_data, float *data_col, ConvParameter *conv_param) { + const int pad_left = /*conv_param->pad_l_*/ conv_param->pad_w_; + // const int pad_right = /*conv_param->pad_r_*/conv_param->pad_w_; + const int pad_up = /*conv_param->pad_u_*/ conv_param->pad_h_; + // const int pad_down = /*conv_param->pad_d/*/conv_param->pad_h_; + + const int stride_h = conv_param->stride_h_; + const int stride_w = conv_param->stride_w_; + + const int dilation_h = conv_param->dilation_h_; + const int dilation_w = conv_param->dilation_w_; + + const int kernel_h = conv_param->kernel_h_; + const int kernel_w = conv_param->kernel_w_; + + const int in_height = conv_param->input_h_; + const int in_width = conv_param->input_w_; + + const int output_h = conv_param->output_h_; + const int output_w = conv_param->output_w_; + const int channels = conv_param->input_channel_ / conv_param->group_; + const int tot_channels = conv_param->input_channel_; + + int /*channel,*/ kernel_row, kernel_col, output_rows, output_col; + + int row_stride_offset = 0; + + for (output_rows = output_h; output_rows; output_rows--) { + int col_stride_offset = 0; + for (output_col = output_w; output_col; output_col--) { + for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) { + int input_row = -pad_up + kernel_row * dilation_h + row_stride_offset; + for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) { + int input_col = -pad_left + kernel_col * dilation_w + col_stride_offset; + + if (is_a_ge_zero_and_a_lt_b(input_row, in_height) && is_a_ge_zero_and_a_lt_b(input_col, in_width)) { + const int offset = (input_row * in_width + input_col) * tot_channels; + memcpy(data_col, in_data + offset, sizeof(float) * channels); + data_col += channels; + } else { + memset(data_col, 0, sizeof(float) * channels); + data_col += channels; + } + } + } + col_stride_offset += stride_w; + } + row_stride_offset += stride_h; + } +} + +// output matrix is (kernel_h*kernel_w*channels)X(output_h*output_w) +void im2row_hwc(const float *in_data, float *data_row, ConvParameter *conv_param) { + const int pad_left = /*conv_param->pad_l_*/ conv_param->pad_w_; + // const int pad_right = /*conv_param->pad_r_*/conv_param->pad_w_; + const int pad_up = /*conv_param->pad_u_*/ conv_param->pad_h_; + // const int pad_down = /*conv_param->pad_d/*/conv_param->pad_h_; + + const int stride_h = conv_param->stride_h_; + const int stride_w = conv_param->stride_w_; + + const int dilation_h = conv_param->dilation_h_; + const int dilation_w = conv_param->dilation_w_; + + const int kernel_h = conv_param->kernel_h_; + const int kernel_w = conv_param->kernel_w_; + + const int in_height = conv_param->input_h_; + const int in_width = conv_param->input_w_; + + const int output_h = conv_param->output_h_; + const int output_w = conv_param->output_w_; + const int channels = conv_param->input_channel_ / conv_param->group_; + const int tot_channels = conv_param->input_channel_; + + int channel, kernel_row, kernel_col, output_rows, output_col; + + for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) { + for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) { + for (channel = 0; channel < channels; channel++) { + int input_row = -pad_up + kernel_row * dilation_h; + for (output_rows = output_h; output_rows; output_rows--) { + if (!is_a_ge_zero_and_a_lt_b(input_row, in_height)) { + for (output_col = output_w; output_col; output_col--) { + *(data_row++) = 0; + } + } else { + int input_col = -pad_left + kernel_col * dilation_w; + for (output_col = output_w; output_col; output_col--) { + if (is_a_ge_zero_and_a_lt_b(input_col, in_width)) { + const int offset = (input_row * in_width + input_col) * tot_channels + channel; + *(data_row++) = in_data[offset]; + } else { + *(data_row++) = 0; + } + input_col += stride_w; + } + } + input_row += stride_h; + } + } + } + } +} + +void col2im_hwc(const float *data_col, float *data_im, ConvParameter *conv_param) { + const int pad_left = /*conv_param->pad_l_*/ conv_param->pad_w_; + // const int pad_right = /*conv_param->pad_r_*/conv_param->pad_w_; + const int pad_up = /*conv_param->pad_u_*/ conv_param->pad_h_; + // const int pad_down = /*conv_param->pad_d/*/conv_param->pad_h_; + + const int stride_h = conv_param->stride_h_; + const int stride_w = conv_param->stride_w_; + + const int dilation_h = conv_param->dilation_h_; + const int dilation_w = conv_param->dilation_w_; + + const int kernel_h = conv_param->kernel_h_; + const int kernel_w = conv_param->kernel_w_; + + const int in_height = conv_param->input_h_; + const int in_width = conv_param->input_w_; + + const int output_h = conv_param->output_h_; + const int output_w = conv_param->output_w_; + const int channels = conv_param->input_channel_ / conv_param->group_; + const int tot_channels = conv_param->input_channel_; + + int kernel_row, kernel_col, output_rows, output_col; + + int row_stride_offset = 0; + + for (output_rows = output_h; output_rows; output_rows--) { + int col_stride_offset = 0; + for (output_col = output_w; output_col; output_col--) { + for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) { + int input_row = -pad_up + kernel_row * dilation_h + row_stride_offset; + for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) { + int input_col = -pad_left + kernel_col * dilation_w + col_stride_offset; + + if (is_a_ge_zero_and_a_lt_b(input_row, in_height) && is_a_ge_zero_and_a_lt_b(input_col, in_width)) { + int offset = (input_row * in_width + input_col) * tot_channels; + float *data_im_ptr = &data_im[offset]; + for (int i = 0; i < channels; i++) { + data_im_ptr[i] += data_col[i]; + } + } + data_col += channels; + } + } + col_stride_offset += stride_w; + } + row_stride_offset += stride_h; + } +} diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/pack_ext.h b/mindspore/lite/src/runtime/kernel/arm/opclib/pack_ext.h new file mode 100644 index 0000000000000000000000000000000000000000..3f51aae13d73cc6b70d4dc8e77e09cca59401e59 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/pack_ext.h @@ -0,0 +1,26 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_PACK_EXT_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_PACK_EXT_H_ + +#include "src/runtime/kernel/arm/opclib/conv_parameter.h" + +void im2col_hwc(const float *in_data, float *data_col, ConvParameter *conv_param); +void im2row_hwc(const float *in_data, float *data_row, ConvParameter *conv_param); +void col2im_hwc(const float *data_col, float *data_im, ConvParameter *conv_param); + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_PACK_EXT_H diff --git a/mindspore/lite/test/CMakeLists.txt b/mindspore/lite/test/CMakeLists.txt index ad0ef9619546cf7da26498b84203406be118295f..0bfa4b1dbd34c68d5a40701af2aedc89026b8769 100644 --- a/mindspore/lite/test/CMakeLists.txt +++ b/mindspore/lite/test/CMakeLists.txt @@ -152,6 +152,7 @@ set(TEST_LITE_SRC ${LITE_DIR}/src/scheduler.cc ${LITE_DIR}/src/common/graph_util.cc ${LITE_DIR}/src/common/file_utils.cc + ${LITE_DIR}/src/common/file_utils_ext.cc ${LITE_DIR}/src/common/utils.cc ${LITE_DIR}/tools/common/graph_util.cc ${LITE_DIR}/tools/common/tensor_util.cc diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/activation_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/activation_grad_fp32_tests.cc new file mode 100644 index 0000000000000000000000000000000000000000..1badd29a267daafeb2eff45a32f8de418398dcce --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/activation_grad_fp32_tests.cc @@ -0,0 +1,312 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "utils/log_adapter.h" +#include "common/common_test.h" +#include "src/common/file_utils.h" +#include "src/common/file_utils_ext.h" +#include "mindspore/lite/src/kernel_registry.h" +#include "mindspore/lite/src/ir/tensor.h" +#include "mindspore/lite/src/lite_kernel.h" +#include "mindspore/lite/src/runtime/kernel/arm/fp32/activation_grad.h" + +namespace mindspore { +class TestActGradFp32 : public mindspore::Common { + public: + TestActGradFp32() {} +}; + +TEST_F(TestActGradFp32, ReluGradFp32) { + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + size_t output_data_size = 50; + + size_t input_size; + std::string input_path = "./test_data/activationGrad/relu_y_50.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + std::string yt_path = "./test_data/activationGrad/relu_yt_50.bin"; + auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + + auto output_data = new float[output_data_size]; + // warm up loop + for (int i = 0; i < 3; i++) { + ReluGrad(yt_data, input_data, 50, output_data); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + ReluGrad(yt_data, input_data, 50, output_data); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + printf("==================output data=================\n"); + for (int i = 0; i < 20; i++) { + std::cout << output_data[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/activationGrad/relu_out_50.bin"; + + int res = lite::CompareRelativeOutput(output_data, output_path); + + EXPECT_EQ(res, 0); + + delete input_data; + delete[] output_data; + delete yt_data; + + MS_LOG(INFO) << "ReluGradFp32 passed"; +} + +TEST_F(TestActGradFp32, Relu6GradFp32) { + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + size_t output_data_size = 50; + + size_t input_size; + std::string input_path = "./test_data/activationGrad/relu6_y_50.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + std::string yt_path = "./test_data/activationGrad/relu6_yt_50.bin"; + auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + + auto output_data = new float[output_data_size]; + // warm up loop + for (int i = 0; i < 3; i++) { + Relu6Grad(yt_data, input_data, 50, output_data); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + Relu6Grad(yt_data, input_data, 50, output_data); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + printf("==================output data=================\n"); + for (int i = 0; i < 20; i++) { + std::cout << output_data[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/activationGrad/relu6_out_50.bin"; + int res = lite::CompareRelativeOutput(output_data, output_path); + + EXPECT_EQ(res, 0); + + delete input_data; + delete[] output_data; + delete yt_data; + + MS_LOG(INFO) << "Relu6GradFp32 passed"; +} + +TEST_F(TestActGradFp32, LReluGradFp32) { + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + size_t output_data_size = 50; + + size_t input_size; + std::string input_path = "./test_data/activationGrad/lrelu_y_50.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + std::string yt_path = "./test_data/activationGrad/lrelu_yt_50.bin"; + auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + + auto output_data = new float[output_data_size]; + // warm up loop + for (int i = 0; i < 3; i++) { + LReluGrad(yt_data, input_data, 50, output_data, 0.1); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + LReluGrad(yt_data, input_data, 50, output_data, 0.1); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + printf("==================output data=================\n"); + for (int i = 0; i < 20; i++) { + std::cout << output_data[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/activationGrad/lrelu_out_50.bin"; + int res = lite::CompareRelativeOutput(output_data, output_path); + + EXPECT_EQ(res, 0); + + delete input_data; + delete[] output_data; + delete yt_data; + + MS_LOG(INFO) << "LReluGradFp32 passed"; +} + +TEST_F(TestActGradFp32, SigmoidGradFp32) { + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + size_t output_data_size = 50; + + size_t input_size; + std::string input_path = "./test_data/activationGrad/sigmoid_y_50.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + std::string yt_path = "./test_data/activationGrad/sigmoid_yt_50.bin"; + auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + + auto output_data = new float[output_data_size]; + // warm up loop + for (int i = 0; i < 3; i++) { + SigmoidGrad(yt_data, input_data, 50, output_data); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + SigmoidGrad(yt_data, input_data, 50, output_data); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + printf("==================output data=================\n"); + for (int i = 0; i < 20; i++) { + std::cout << output_data[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/activationGrad/sigmoid_out_50.bin"; + int res = lite::CompareRelativeOutput(output_data, output_path); + + EXPECT_EQ(res, 0); + // lite::CompareOutput(output_data, output_path); + + delete input_data; + delete[] output_data; + delete yt_data; + + MS_LOG(INFO) << "SigmoidGradFp32 passed"; +} + +TEST_F(TestActGradFp32, tanhGradFp32) { + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + size_t output_data_size = 50; + + size_t input_size; + std::string input_path = "./test_data/activationGrad/tanh_y_50.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + std::string yt_path = "./test_data/activationGrad/tanh_yt_50.bin"; + auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + + auto output_data = new float[output_data_size]; + // warm up loop + for (int i = 0; i < 3; i++) { + TanhGrad(yt_data, input_data, 50, output_data); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + TanhGrad(yt_data, input_data, 50, output_data); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + printf("==================output data=================\n"); + for (int i = 0; i < 20; i++) { + std::cout << output_data[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/activationGrad/tanh_out_50.bin"; + int res = lite::CompareRelativeOutput(output_data, output_path); + + EXPECT_EQ(res, 0); + + delete input_data; + delete[] output_data; + delete yt_data; + MS_LOG(INFO) << "TanhGradFp32 passed"; +} + +TEST_F(TestActGradFp32, hswishGradFp32) { + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + size_t output_data_size = 50; + + size_t input_size; + std::string input_path = "./test_data/activationGrad/hswish_x_50.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + std::string yt_path = "./test_data/activationGrad/hswish_yt_50.bin"; + auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + + auto output_data = new float[output_data_size]; + // warm up loop + for (int i = 0; i < 3; i++) { + HSwishGrad(yt_data, input_data, 50, output_data); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + HSwishGrad(yt_data, input_data, 50, output_data); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + printf("==================output data=================\n"); + for (int i = 0; i < 20; i++) { + std::cout << output_data[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/activationGrad/hswish_out_50.bin"; + int res = lite::CompareRelativeOutput(output_data, output_path); + + EXPECT_EQ(res, 0); + + delete input_data; + delete[] output_data; + delete yt_data; + MS_LOG(INFO) << "hswishGradFp32 passed"; +} + +} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/arithmetic_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/arithmetic_grad_fp32_tests.cc new file mode 100644 index 0000000000000000000000000000000000000000..48d5c93e563447c34edb766e24082ec592888e36 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/arithmetic_grad_fp32_tests.cc @@ -0,0 +1,497 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include "utils/log_adapter.h" +#include "common/common_test.h" +#include "src/common/file_utils.h" +#include "src/common/file_utils_ext.h" +#include "mindspore/lite/src/runtime/kernel/arm/opclib/fp32/reduce.h" +#include "mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_grad.h" +#include "mindspore/lite/src/kernel_registry.h" + +namespace mindspore { + +class TestArithmeticGradFp32 : public mindspore::Common { + public: + TestArithmeticGradFp32() {} +}; + +std::vector GenerateTensorsForTest(const char *test, int test_id) { + size_t input_size; + std::vector large_dim({4, 6}); + std::vector small_dim({6}); + int large_size = (4 * 6); + int small_size = (1 * 6); + char *dx1_file = const_cast("./test_data/operators/arithmetic_fp32_1_x1_4_6.bin"); + char *dx2_file = const_cast("./test_data/operators/arithmetic_fp32_1_x2_1_6.bin"); + + if (test_id == 7) { + large_dim = std::vector({4, 5, 6}); + small_dim = std::vector({6}); + large_size = (4 * 5 * 6); + small_size = (6); + dx1_file = const_cast("./test_data/operators/arithmetic_fp32_7_x1_4_5_6.bin"); + dx2_file = const_cast("./test_data/operators/arithmetic_fp32_7_x2_1_1_6.bin"); + } + if (test_id >= 8) { + large_dim = std::vector({5, 4, 6}); + small_dim = std::vector({5, 1, 6}); + large_size = (4 * 5 * 6); + small_size = (5 * 6); + dx1_file = const_cast("./test_data/operators/arithmetic_fp32_8_x1_5_4_6.bin"); + dx2_file = const_cast("./test_data/operators/arithmetic_fp32_8_x2_5_1_6.bin"); + } + + auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(test, &input_size)); + lite::tensor::Tensor *dy_tensor = new lite::tensor::Tensor(TypeId::kNumberTypeFloat32, large_dim); + dy_tensor->SetData(dy_data); + + auto x1_data = reinterpret_cast(mindspore::lite::ReadFile(dx1_file, &input_size)); + lite::tensor::Tensor *x1_tensor = new lite::tensor::Tensor(TypeId::kNumberTypeFloat32, large_dim); + x1_tensor->SetData(x1_data); + + auto x2_data = reinterpret_cast(mindspore::lite::ReadFile(dx2_file, &input_size)); + lite::tensor::Tensor *x2_tensor = new lite::tensor::Tensor(TypeId::kNumberTypeFloat32, small_dim); + x2_tensor->SetData(x2_data); + + auto dx1_data = new float[large_size]; + lite::tensor::Tensor *dx1_tensor = new lite::tensor::Tensor(TypeId::kNumberTypeFloat32, large_dim); + dx1_tensor->SetData(dx1_data); + + auto dx2_data = new float[small_size]; + lite::tensor::Tensor *dx2_tensor = new lite::tensor::Tensor(TypeId::kNumberTypeFloat32, small_dim); + dx2_tensor->SetData(dx2_data); + + std::vector ret_vector = {dy_tensor, x1_tensor, x2_tensor, dx1_tensor, dx2_tensor}; + return ret_vector; +} + +TEST_F(TestArithmeticGradFp32, TestAddGradFp32) { + auto param = new ArithmeticParameter(); + param->op_parameter_.type_ = PrimitiveType_AddGrad; + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_1_dy_4_6.bin", 1); + + std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; + std::vector outputs = {all_tensors[3], all_tensors[4]}; + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[1]->Data()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_1_dx1_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[0]->Data()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_1_dx2_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + + for (int i = 0; i < 5; i++) delete all_tensors[i]; + delete param; + MS_LOG(INFO) << "TestAddGradFp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestAddGrad2Fp32) { + auto param = new ArithmeticParameter(); + param->op_parameter_.type_ = PrimitiveType_AddGrad; + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_1_dy_4_6.bin", 1); + + std::vector inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; + std::vector outputs = {all_tensors[4], all_tensors[3]}; + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[0]->Data()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_1_dx1_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[1]->Data()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_1_dx2_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + + for (int i = 0; i < 5; i++) delete all_tensors[i]; + delete param; + MS_LOG(INFO) << "TestAddGrad2Fp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestAddGrad3Fp32) { + auto param = new ArithmeticParameter(); + param->op_parameter_.type_ = PrimitiveType_AddGrad; + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_8_dy_5_4_6.bin", 8); + + std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; + std::vector outputs = {all_tensors[3], all_tensors[4]}; + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[0]->Data()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_8_dx2_5_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[1]->Data()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_8_dx1_5_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + + for (int i = 0; i < 5; i++) delete all_tensors[i]; + delete param; + MS_LOG(INFO) << "TestAddGrad3Fp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestSubGradFp32) { + auto param = new ArithmeticParameter(); + param->op_parameter_.type_ = PrimitiveType_SubGrad; + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_2_dy_4_6.bin", 2); + + std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; + std::vector outputs = {all_tensors[3], all_tensors[4]}; + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SubGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[1]->Data()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_2_dx1_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[0]->Data()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_2_dx2_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + + for (int i = 0; i < 5; i++) delete all_tensors[i]; + delete param; + MS_LOG(INFO) << "TestSubGradFp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestSubGrad2Fp32) { + auto param = new ArithmeticParameter(); + param->op_parameter_.type_ = PrimitiveType_SubGrad; + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_3_dy_4_6.bin", 3); + + std::vector inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; + std::vector outputs = {all_tensors[4], all_tensors[3]}; + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SubGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[0]->Data()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_3_dx1_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[1]->Data()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_3_dx2_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + + for (int i = 0; i < 5; i++) delete all_tensors[i]; + delete param; + MS_LOG(INFO) << "TestSubGrad2Fp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestMulGradFp32) { + auto param = new ArithmeticParameter(); + param->op_parameter_.type_ = PrimitiveType_MulGrad; + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_4_dy_4_6.bin", 4); + + std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; + std::vector outputs = {all_tensors[3], all_tensors[4]}; + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc); + + int loop_count = 1000; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel_obj->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + printf("total cost (for %d loops): %lu us\n", loop_count, cost); + // auto time_avg = cost / loop_count; + // printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + float *output_ptr = reinterpret_cast(outputs[1]->Data()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_4_dx1_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[0]->Data()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_4_dx2_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + + for (int i = 0; i < 5; i++) delete all_tensors[i]; + delete param; + MS_LOG(INFO) << "TestMulGradFp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestMulGrad2Fp32) { + auto param = new ArithmeticParameter(); + param->op_parameter_.type_ = PrimitiveType_MulGrad; + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_4_dy_4_6.bin", 4); + + std::vector inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; + std::vector outputs = {all_tensors[4], all_tensors[3]}; + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[0]->Data()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_4_dx1_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[1]->Data()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_4_dx2_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + + for (int i = 0; i < 5; i++) delete all_tensors[i]; + delete param; + MS_LOG(INFO) << "TestMulGrad2Fp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestMulGrad3Fp32) { + auto param = new ArithmeticParameter(); + param->op_parameter_.type_ = PrimitiveType_MulGrad; + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_9_dy_5_4_6.bin", 9); + + std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; + std::vector outputs = {all_tensors[3], all_tensors[4]}; + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[1]->Data()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_9_dx1_5_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[0]->Data()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_9_dx2_5_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + + for (int i = 0; i < 5; i++) delete all_tensors[i]; + delete param; + MS_LOG(INFO) << "TestMulGrad3Fp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestMulGrad4Fp32) { + auto param = new ArithmeticParameter(); + param->op_parameter_.type_ = PrimitiveType_MulGrad; + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_9_dy_5_4_6.bin", 9); + + std::vector inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; + std::vector outputs = {all_tensors[4], all_tensors[3]}; + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[0]->Data()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_9_dx1_5_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[1]->Data()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_9_dx2_5_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + + for (int i = 0; i < 5; i++) delete all_tensors[i]; + delete param; + MS_LOG(INFO) << "TestMulGrad4Fp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestDivGradFp32) { + auto param = new ArithmeticParameter(); + param->op_parameter_.type_ = PrimitiveType_DivGrad; + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_5_dy_4_6.bin", 5); + + std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; + std::vector outputs = {all_tensors[3], all_tensors[4]}; + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[1]->Data()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_5_dx1_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[0]->Data()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_5_dx2_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + + for (int i = 0; i < 5; i++) delete all_tensors[i]; + delete param; + MS_LOG(INFO) << "TestDivGradFp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestDivGrad2Fp32) { + auto param = new ArithmeticParameter(); + param->op_parameter_.type_ = PrimitiveType_DivGrad; + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_6_dy_4_6.bin", 6); + + std::vector inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; + std::vector outputs = {all_tensors[4], all_tensors[3]}; + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[0]->Data()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_6_dx2_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[1]->Data()), dx2_path)); + + std::string output_path = "./test_data/operators/arithmetic_fp32_6_dx1_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, output_path)); + + for (int i = 0; i < 5; i++) delete all_tensors[i]; + delete param; + MS_LOG(INFO) << "TestDivGrad2Fp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestDivGrad3Fp32) { + auto param = new ArithmeticParameter(); + param->op_parameter_.type_ = PrimitiveType_DivGrad; + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_10_dy_5_4_6.bin", 10); + + std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; + std::vector outputs = {all_tensors[3], all_tensors[4]}; + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[1]->Data()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string dx1_path = "./test_data/operators/arithmetic_fp32_10_dx1_5_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[0]->Data()), dx1_path)); + + std::string output_path = "./test_data/operators/arithmetic_fp32_10_dx2_5_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, output_path)); + + for (int i = 0; i < 5; i++) delete all_tensors[i]; + delete param; + MS_LOG(INFO) << "TestDivGrad3Fp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, Test3DDivGrad2Fp32) { + auto param = new ArithmeticParameter(); + param->op_parameter_.type_ = PrimitiveType_DivGrad; + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_7_dy_4_5_6.bin", 7); + + std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; + std::vector outputs = {all_tensors[3], all_tensors[4]}; + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[1]->Data()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string dx1_path = "./test_data/operators/arithmetic_fp32_7_dx1_4_5_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[0]->Data()), dx1_path)); + + std::string output_path = "./test_data/operators/arithmetic_fp32_7_dx2_1_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, output_path)); + + for (int i = 0; i < 5; i++) delete all_tensors[i]; + delete param; + MS_LOG(INFO) << "TestDivGrad2Fp32 passed"; +} + +} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/bias_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/bias_grad_fp32_tests.cc new file mode 100644 index 0000000000000000000000000000000000000000..7c26e9502291b181e45f259acd55d4f7c696847e --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/bias_grad_fp32_tests.cc @@ -0,0 +1,71 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include "utils/log_adapter.h" +#include "common/common_test.h" +#include "src/common/file_utils.h" +#include "mindspore/lite/src/runtime/kernel/arm/fp32/bias_grad.h" +#include "mindspore/lite/src/kernel_registry.h" + +namespace mindspore { + +class TestBiasGradFp32 : public mindspore::Common { + public: + TestBiasGradFp32() {} +}; + +TEST_F(TestBiasGradFp32, BiasGradFp32) { + // prepare stage + auto bias_param = new ArithmeticParameter(); + + size_t input_size; + std::string input_path = "./test_data/operators/biasgradfp32_1_dy_10_28_28_7.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + std::vector dim_dy({10, 28, 28, 7}); + lite::tensor::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); + dy_tensor.SetData(input_data); + + std::vector inputs = {&dy_tensor}; + + auto output_data = new float[7]; + std::vector dim_dw({7}); + lite::tensor::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); + dw_tensor.SetData(output_data); + std::vector outputs = {&dw_tensor}; + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_BiasGrad}; + + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(bias_param), NULL, desc); + + kernel_obj->Run(); + + printf("==================output data=================\n"); + for (int i = 0; i < 7; i++) { + std::cout << output_data[i] << " ,"; + } + std::cout << std::endl; + std::string output_path = "./test_data/operators/biasgradfp32_1_db_7.bin"; + lite::CompareOutput(output_data, output_path); + + // delete input_data; + // delete[] output_data; + delete bias_param; + MS_LOG(INFO) << "BiasGradFp32 passed"; +} + +} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/convolution_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/convolution_grad_fp32_tests.cc new file mode 100644 index 0000000000000000000000000000000000000000..ef501fa65537411f547db122df6ac8e99a18916c --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/convolution_grad_fp32_tests.cc @@ -0,0 +1,521 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "utils/log_adapter.h" +#include "common/common_test.h" +#include "src/common/file_utils.h" +#include "src/common/file_utils_ext.h" +#include "mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_filter.h" +#include "mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_input.h" +#include "mindspore/lite/src/runtime/kernel/arm/opclib/conv_parameter.h" +#include "mindspore/lite/src/kernel_registry.h" + +namespace mindspore { +class TestConvolutionGradFp32 : public mindspore::Common { + public: + TestConvolutionGradFp32() {} +}; + +void InitConvParamGroup1FP32(ConvParameter *conv_param) { + conv_param->input_batch_ = 1; + conv_param->input_h_ = 28; + conv_param->input_w_ = 28; + conv_param->input_channel_ = 3; + + conv_param->output_batch_ = 1; + conv_param->output_h_ = 28; + conv_param->output_w_ = 28; + conv_param->output_channel_ = 32; + + conv_param->kernel_h_ = 3; + conv_param->kernel_w_ = 3; + + conv_param->stride_h_ = 1; + conv_param->stride_w_ = 1; + + conv_param->dilation_h_ = 1; + conv_param->dilation_w_ = 1; + + conv_param->pad_h_ = 1; + conv_param->pad_w_ = 1; + + conv_param->group_ = 1; + conv_param->is_relu_ = false; + conv_param->is_relu6_ = false; + conv_param->thread_num_ = 1; +} + +void InitConvParamGroup3FP32(ConvParameter *conv_param) { + InitConvParamGroup1FP32(conv_param); + conv_param->group_ = 3; + conv_param->output_channel_ = 18; +} + +void InitConvParamGroup3Dilation2FP32(ConvParameter *conv_param) { + InitConvParamGroup3FP32(conv_param); + conv_param->dilation_h_ = 2; + conv_param->dilation_w_ = 2; + conv_param->output_h_ = 26; + conv_param->output_w_ = 26; +} + +TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) { + // prepare stage + auto conv_param = new ConvParameter(); + InitConvParamGroup1FP32(conv_param); + + size_t dy_size; + std::string dy_path = "./test_data/conv/convfp32_dy_1_28_28_32.bin"; + auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + std::vector dim_dy({1, 28, 28, 32}); + lite::tensor::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); + dy_tensor.SetData(dy_data); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + size_t output_data_size = + conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; + + size_t input_size; + std::string input_path = "./test_data/conv/convfp32_x_1_28_28_3.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + std::vector dim_x({1, 28, 28, 3}); + lite::tensor::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); + x_tensor.SetData(input_data); + + auto dw_data = new float[output_data_size]; + std::vector dim_dw({32, 3, 3, 3}); + lite::tensor::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); + dw_tensor.SetData(dw_data); + std::vector inputs = {&dy_tensor, &x_tensor}; + std::vector outputs = {&dw_tensor}; + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), NULL, desc); + + // warm up loop + for (int i = 0; i < 3; i++) { + kernel->Run(); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/conv/convfp32_dw_32_3_3_3.bin"; + auto res = lite::CompareRelativeOutput(dw_data, output_path); + + EXPECT_EQ(res, 0); + + // delete input_data; + // delete dy_data; + // delete [] dw_data; + delete kernel; + delete conv_param; + MS_LOG(INFO) << "TestConvolutionGradFp32 Filter Grad passed"; +} + +TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) { + // prepare stage + auto conv_param = new ConvParameter(); + InitConvParamGroup1FP32(conv_param); + + size_t dy_size; + std::string dy_path = "./test_data/conv/convfp32_dy_1_28_28_32.bin"; + auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + std::vector dim_dy({1, 28, 28, 32}); + lite::tensor::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); + dy_tensor.SetData(dy_data); + + size_t w_size; + std::string w_path = "./test_data/conv/convfp32_w_32_3_3_3.bin"; + auto w_data = reinterpret_cast(mindspore::lite::ReadFile(w_path.c_str(), &w_size)); + std::vector dim_dw({32, 3, 3, 3}); + lite::tensor::Tensor w_tensor(TypeId::kNumberTypeFloat32, dim_dw); + w_tensor.SetData(w_data); + + size_t output_data_size = + conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_; + auto dx_data = new float[output_data_size]; + std::vector dim_dx({1, 28, 28, 3}); + lite::tensor::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); + dx_tensor.SetData(dx_data); + + std::vector inputs = {&dy_tensor, &w_tensor}; + std::vector outputs = {&dx_tensor}; + // runtime part + + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), NULL, desc); + + // warm up loop + for (int i = 0; i < 3; i++) { + kernel->Run(); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/conv/convfp32_dx_1_28_28_3.bin"; + auto res = lite::CompareRelativeOutput(dx_data, output_path); + EXPECT_EQ(res, 0); + + delete kernel; + delete conv_param; + MS_LOG(INFO) << "TestConvolutionGradFp32 Filter Grad passed"; +} + +TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) { + // prepare stage + auto conv_param = new ConvParameter(); + InitConvParamGroup3FP32(conv_param); + + size_t dy_size; + std::string dy_path = "./test_data/conv/convfp32_dy_g3_1_28_28_18.bin"; + auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + std::vector dim_dy({1, 28, 28, 18}); + lite::tensor::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); + dy_tensor.SetData(dy_data); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + size_t output_data_size = conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * + conv_param->input_channel_ / conv_param->group_; + + size_t input_size; + std::string input_path = "./test_data/conv/convfp32_x_g3_1_28_28_3.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + std::vector dim_x({1, 28, 28, 3}); + lite::tensor::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); + x_tensor.SetData(input_data); + + auto dw_data = new float[output_data_size]; + std::vector dim_dw({18, 3, 3, 1}); + lite::tensor::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); + dw_tensor.SetData(dw_data); + std::vector inputs = {&dy_tensor, &x_tensor}; + std::vector outputs = {&dw_tensor}; + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), NULL, desc); + + // warm up loop + for (int i = 0; i < 3; i++) { + kernel->Run(); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/conv/convfp32_dw_g3_18_3_3_3.bin"; + auto res = lite::CompareRelativeOutput(dw_data, output_path); + EXPECT_EQ(res, 0); + + // delete input_data; + // delete dy_data; + // delete [] dw_data; + delete kernel; + delete conv_param; + MS_LOG(INFO) << "TestConvolutionGradFp32 Filter Grad passed"; +} + +TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) { + // prepare stage + auto conv_param = new ConvParameter(); + InitConvParamGroup3FP32(conv_param); + + size_t dy_size; + std::string dy_path = "./test_data/conv/convfp32_dy_g3_1_28_28_18.bin"; + auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + std::vector dim_dy({1, 28, 28, 18}); + lite::tensor::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); + dy_tensor.SetData(dy_data); + + size_t w_size; + std::string w_path = "./test_data/conv/convfp32_w_g3_18_3_3_3.bin"; + auto w_data = reinterpret_cast(mindspore::lite::ReadFile(w_path.c_str(), &w_size)); + std::vector dim_dw({18, 3, 3, 1}); + lite::tensor::Tensor w_tensor(TypeId::kNumberTypeFloat32, dim_dw); + w_tensor.SetData(w_data); + + size_t output_data_size = + conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_; + auto dx_data = new float[output_data_size]; + std::vector dim_dx({1, 28, 28, 3}); + lite::tensor::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); + dx_tensor.SetData(dx_data); + + std::vector inputs = {&dy_tensor, &w_tensor}; + std::vector outputs = {&dx_tensor}; + // runtime part + + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), NULL, desc); + + // warm up loop + for (int i = 0; i < 3; i++) { + kernel->Run(); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/conv/convfp32_dx_g3_1_28_28_3.bin"; + auto res = lite::CompareRelativeOutput(dx_data, output_path); + EXPECT_EQ(res, 0); + + delete kernel; + delete conv_param; + MS_LOG(INFO) << "TestConvolutionGradFp32 Filter Grad passed"; +} + +TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) { + // prepare stage + auto conv_param = new ConvParameter(); + + InitConvParamGroup3Dilation2FP32(conv_param); + + size_t dy_size; + std::string dy_path = "./test_data/conv/convfp32_dy_g3_d2_1_26_26_18.bin"; + auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + std::vector dim_dy({1, 26, 26, 18}); + lite::tensor::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); + dy_tensor.SetData(dy_data); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + size_t output_data_size = conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * + conv_param->input_channel_ / conv_param->group_; + + size_t input_size; + std::string input_path = "./test_data/conv/convfp32_x_g3_d2_1_28_28_3.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + std::vector dim_x({1, 28, 28, 3}); + lite::tensor::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); + x_tensor.SetData(input_data); + + auto dw_data = new float[output_data_size]; + std::vector dim_dw({18, 3, 3, 1}); + lite::tensor::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); + dw_tensor.SetData(dw_data); + std::vector inputs = {&dy_tensor, &x_tensor}; + std::vector outputs = {&dw_tensor}; + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), NULL, desc); + + // warm up loop + for (int i = 0; i < 3; i++) { + kernel->Run(); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/conv/convfp32_dw_g3_d2_18_3_3_3.bin"; + auto res = lite::CompareRelativeOutput(dw_data, output_path); + EXPECT_EQ(res, 0); + // delete input_data; + // delete dy_data; + // delete [] dw_data; + delete kernel; + delete conv_param; + MS_LOG(INFO) << "TestConvolutionGradFp32 Filter Grad passed"; +} + +TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) { + // prepare stage + auto conv_param = new ConvParameter(); + InitConvParamGroup3Dilation2FP32(conv_param); + + size_t dy_size; + std::string dy_path = "./test_data/conv/convfp32_dy_g3_d2_1_26_26_18.bin"; + auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + std::vector dim_dy({1, 26, 26, 18}); + lite::tensor::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); + dy_tensor.SetData(dy_data); + + size_t w_size; + std::string w_path = "./test_data/conv/convfp32_w_g3_d2_18_3_3_3.bin"; + auto w_data = reinterpret_cast(mindspore::lite::ReadFile(w_path.c_str(), &w_size)); + std::vector dim_w({18, 3, 3, 1}); + lite::tensor::Tensor w_tensor(TypeId::kNumberTypeFloat32, dim_w); + w_tensor.SetData(w_data); + + size_t output_data_size = + conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_; + auto dx_data = new float[output_data_size]; + std::vector dim_dx({1, 28, 28, 3}); + lite::tensor::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); + dx_tensor.SetData(dx_data); + + std::vector inputs = {&dy_tensor, &w_tensor}; + std::vector outputs = {&dx_tensor}; + // runtime part + + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), NULL, desc); + + // warm up loop + for (int i = 0; i < 3; i++) { + kernel->Run(); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/conv/convfp32_dx_g3_d2_1_28_28_3.bin"; + auto res = lite::CompareRelativeOutput(dx_data, output_path); + EXPECT_EQ(res, 0); + + delete kernel; + delete conv_param; + MS_LOG(INFO) << "TestConvolutionGradFp32 Filter Grad passed"; +} + +// TEST_F(TestConvolutionGradFp32, ConvGroupDilation) { +// // prepare stage +// auto conv_param = new ConvParameter(); +// InitConvParamGroup3Dilation2FP32(conv_param); + +// size_t x_size; +// std::string x_path = "./test_data/conv/convfp32_x_g3_d2_1_28_28_3.bin"; +// auto x_data = reinterpret_cast(mindspore::lite::ReadFile(x_path.c_str(), &x_size)); +// std::vector dim_x({1, 28, 28, 3}); +// tensor::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); +// x_tensor.SetData(x_data); + +// size_t w_size; +// std::string w_path = "./test_data/conv/convfp32_w_g3_d2_18_3_3_3.bin"; +// auto w_data = reinterpret_cast(mindspore::lite::ReadFile(w_path.c_str(), &w_size)); +// std::vector dim_w({18, 3, 3, 1}); +// tensor::Tensor w_tensor(TypeId::kNumberTypeFloat32, dim_w); +// w_tensor.SetData(w_data); + +// size_t output_data_size = +// conv_param->output_batch_ * conv_param->output_h_ * conv_param->output_w_ * conv_param->output_channel_; +// auto y_data = new float[output_data_size]; +// std::vector dim_y({1, 26, 26, 18}); +// tensor::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y); +// y_tensor.SetData(y_data); + +// std::vector inputs = {&x_tensor, &w_tensor}; +// std::vector outputs = {&y_tensor}; +// // runtime part + +// printf("Calculating runtime cost...\n"); +// uint64_t time_avg = 0; + +// lite::Context context; +// ; +// context.deviceCtx.type = lite::DT_CPU; +// context.threadNum = 1; + +// kernel::KernelKey desc = {kernel::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Conv2D}; +// auto creator = lite::KernelRegistry::GetInstance()->GetKernelCreator(desc); +// auto kernel = creator(inputs, outputs, (OpParameter *)conv_param, &context, desc); + +// kernel->train(); +// EXPECT_EQ(kernel->is_train(), 1); + +// // warm up loop +// for (int i = 0; i < 3; i++) { +// kernel->Run(); +// } + +// int loop_count = 100; +// auto time_start = mindspore::lite::GetTimeUs(); +// for (int i = 0; i < loop_count; i++) { +// kernel->Run(); +// } +// auto time_end = mindspore::lite::GetTimeUs(); +// auto cost = time_end - time_start; +// time_avg = cost / loop_count; +// printf("single thread running time : %f ms\n", time_avg / 1000.0f); + +// std::string output_path = "./test_data/conv/convfp32_y_g3_d2_1_26_26_18.bin"; +// auto res = lite::CompareRelativeOutput(y_data, output_path); +// EXPECT_EQ(res, 0); + +// delete kernel; +// delete conv_param; + +// MS_LOG(INFO) << "TestConvolutionFp32 Filter Grad passed"; +// } + +} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/pooling_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/pooling_grad_fp32_tests.cc new file mode 100644 index 0000000000000000000000000000000000000000..0dace8d7cdd32f17fb875126af6f0600cb4bfba1 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/pooling_grad_fp32_tests.cc @@ -0,0 +1,332 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "mindspore/lite/include/context.h" +#include "utils/log_adapter.h" +#include "common/common_test.h" +#include "mindspore/lite/src/kernel_registry.h" +#include "src/common/utils.h" +#include "src/common/file_utils.h" +#include "src/runtime/kernel/arm/fp32/pooling_grad.h" +#include "src/runtime/kernel/arm/opclib/fp32/pooling_grad.h" + +namespace mindspore { +class TestPoolingGradFp32 : public mindspore::Common { + public: + TestPoolingGradFp32() {} +}; + +void InitPoolingParamFP32(PoolingParameter *pooling_param) { + pooling_param->input_batch_ = 1; + pooling_param->input_h_ = 28; + pooling_param->input_w_ = 28; + pooling_param->input_channel_ = 3; + + pooling_param->output_batch_ = 1; + pooling_param->output_h_ = 28; + pooling_param->output_w_ = 28; + pooling_param->output_channel_ = 32; + + pooling_param->window_h_ = 3; + pooling_param->window_w_ = 3; + + pooling_param->stride_h_ = 1; + pooling_param->stride_w_ = 1; + + pooling_param->pad_u_ = 1; + pooling_param->pad_d_ = 1; + pooling_param->pad_l_ = 1; + pooling_param->pad_r_ = 1; + pooling_param->thread_num_ = 1; +} + +TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) { + // prepare stage + auto pooling_param = new PoolingParameter(); + InitPoolingParamFP32(pooling_param); + pooling_param->output_channel_ = 3; + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + size_t output_data_size = + pooling_param->output_batch_ * pooling_param->output_channel_ * pooling_param->output_h_ * pooling_param->output_w_; + + size_t input_size; + std::string input_path = "./test_data/pooling/avgpoolgradfp32_1_dy_1_28_28_3.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + + auto output_data = new float[output_data_size]; + // warm up loop + for (int i = 0; i < 3; i++) { + AvgPoolingGrad(input_data, output_data, pooling_param); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + AvgPoolingGrad(input_data, output_data, pooling_param); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + printf("==================output data=================\n"); + for (int i = 0; i < 20; i++) { + std::cout << output_data[i] << " ,"; + } + std::cout << std::endl; + std::string output_path = "./test_data/pooling/avgpoolgradfp32_1_dx_1_28_28_3.bin"; + lite::CompareOutput(output_data, output_path); + + delete input_data; + delete[] output_data; + delete pooling_param; + MS_LOG(INFO) << "TestAvgPoolingGradFp32 passed"; +} + +TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) { + // prepare stage + auto pooling_param = new PoolingParameter(); + InitPoolingParamFP32(pooling_param); + + pooling_param->output_channel_ = 3; + + // runtime part + printf("Calculating runtime cost...\n"); + // uint64_t time_avg = 0; + size_t output_data_size = + pooling_param->output_batch_ * pooling_param->output_channel_ * pooling_param->output_h_ * pooling_param->output_w_; + + size_t input_size; + std::string input_path = "./test_data/pooling/avgpoolgradfp32_1_dy_1_28_28_3.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + std::vector dim_dy({1, 28, 28, 3}); + lite::tensor::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); + dy_tensor.SetData(input_data); + + std::string input1_path = "./test_data/pooling/avgpoolgradfp32_1_x_1_28_28_3.bin"; + input_data = reinterpret_cast(mindspore::lite::ReadFile(input1_path.c_str(), &input_size)); + std::vector dim_x({1, 28, 28, 3}); + lite::tensor::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); + x_tensor.SetData(input_data); + + std::vector inputs = {&dy_tensor, &x_tensor}; + + auto output_data = new float[output_data_size]; + std::vector dim_dx({1, 28, 28, 3}); + lite::tensor::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); + dx_tensor.SetData(output_data); + std::vector outputs = {&dx_tensor}; + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; + + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(pooling_param), NULL, desc); + + kernel_obj->Run(); + + printf("==================output data=================\n"); + for (int i = 0; i < 20; i++) { + std::cout << output_data[i] << " ,"; + } + std::cout << std::endl; + std::string output_path = "./test_data/pooling/avgpoolgradfp32_1_dx_1_28_28_3.bin"; + lite::CompareOutput(output_data, output_path); + + // delete input_data; + // delete[] output_data; + delete pooling_param; + MS_LOG(INFO) << "TestAvgPoolingGradFp32 passed"; +} + +TEST_F(TestPoolingGradFp32, MaxPoolingGradFp32) { + // prepare stage + auto pooling_param = new PoolingParameter(); + InitPoolingParamFP32(pooling_param); + pooling_param->output_channel_ = 3; + pooling_param->avg_pooling_ = false; + pooling_param->max_pooling_ = true; + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + size_t output_data_size = + pooling_param->output_batch_ * pooling_param->output_channel_ * pooling_param->output_h_ * pooling_param->output_w_; + + size_t input_size; + std::string i_path = "./test_data/pooling/maxpoolgradfp32_1_i_1_28_28_3.bin"; + auto ill_data = reinterpret_cast(mindspore::lite::ReadFile(i_path.c_str(), &input_size)); + auto i_data = new int[output_data_size]; + for (uint32_t i = 0; i < output_data_size; i++) { + i_data[i] = static_cast(ill_data[i]); + } + + std::string dy_path = "./test_data/pooling/maxpoolgradfp32_1_dy_1_28_28_3.bin"; + auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &input_size)); + + auto output_data = new float[output_data_size]; + // warm up loop + for (int i = 0; i < 3; i++) { + MaxPoolingGrad(dy_data, i_data, output_data, pooling_param); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + MaxPoolingGrad(dy_data, i_data, output_data, pooling_param); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + printf("==================output data=================\n"); + for (int i = 0; i < 20; i++) { + std::cout << output_data[i] << " ,"; + } + std::cout << std::endl; + std::string output_path = "./test_data/pooling/maxpoolgradfp32_1_dx_1_28_28_3.bin"; + lite::CompareOutput(output_data, output_path); + + // delete input_data; + delete pooling_param; + delete[] output_data; + MS_LOG(INFO) << "TestMaxPoolingGradFp32 passed"; +} + +#if 0 +TEST_F(TestPoolingGradFp32, MaxPoolingKernelGradFp32) { + // prepare stage + auto maxpool = new PoolingParameter(); + InitPoolingParamFP32(maxpool); + maxpool->avg_pooling_ = false; + maxpool->max_pooling_ = true; + maxpool->input_h_ = 30; + maxpool->input_w_ = 30; + maxpool->input_channel_ = 3; + + maxpool->output_batch_ = 1; + maxpool->output_h_ = 10; + maxpool->output_w_ = 10; + maxpool->output_channel_ = 3; + maxpool->stride_h_ = 3; + maxpool->stride_w_ = 3; + + maxpool->pad_u_ = 0; + maxpool->pad_d_ = 0; + maxpool->pad_l_ = 0; + maxpool->pad_r_ = 0; + + size_t input_size; + size_t y_data_size = maxpool->output_batch_ * maxpool->output_channel_ * maxpool->output_h_ * maxpool->output_w_; + + auto x_data = reinterpret_cast( + mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_2_x_1_30_30_3.bin", &input_size)); + std::vector dim_x({1, 30, 30, 3}); + lite::tensor::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); + x_tensor.SetData(x_data); + std::vector maxpool_inputs = {&x_tensor}; + + auto y_data = new float[y_data_size]; + std::vector dim_y({1, 10, 10, 3}); + lite::tensor::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y); + y_tensor.SetData(y_data); + + auto ind_data = new int[y_data_size]; + lite::tensor::Tensor ind_tensor(TypeId::kNumberTypeInt32, dim_y); + ind_tensor.SetData(ind_data); + + std::vector maxpool_outputs = {&y_tensor, &ind_tensor}; + + kernel::KernelKey maxpool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Pooling}; + auto maxpool_creator = lite::KernelRegistry::GetInstance()->GetCreator(maxpool_desc); + auto maxpoolobj = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast(maxpool), + NULL, maxpool_desc); + maxpoolobj->Run(); + + printf("==================indices data=================\n"); + for (int i = 0; i < 10; i++) { + std::cout << ind_data[i] << " ,"; + } + std::cout << std::endl; + + auto pooling_param = new PoolingParameter(); + InitPoolingParamFP32(pooling_param); + pooling_param->avg_pooling_ = false; + pooling_param->max_pooling_ = true; + pooling_param->input_h_ = 10; + pooling_param->input_w_ = 10; + pooling_param->input_channel_ = 3; + + pooling_param->output_batch_ = 1; + pooling_param->output_h_ = 30; + pooling_param->output_w_ = 30; + pooling_param->output_channel_ = 3; + + // runtime part + printf("Calculating runtime cost...\n"); + // uint64_t time_avg = 0; + size_t output_data_size = + pooling_param->output_batch_ * pooling_param->output_channel_ * pooling_param->output_h_ * pooling_param->output_w_; + + auto dy_data = reinterpret_cast( + mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_2_dy_1_10_10_3.bin", &input_size)); + std::vector dim_dy({1, 3, 10, 10}); + lite::tensor::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); + dy_tensor.SetData(dy_data); + +#if 0 + std::string i_path = "./test_data/pooling/maxpoolgradfp32_2_i_1_3_10_10.bin"; + auto ill_data = reinterpret_cast(mindspore::lite::ReadFile(i_path.c_str(), &input_size)); + auto i_data = new int[output_data_size]; + for (int i=0; i < output_data_size; i++) + i_data[i] = static_cast(ill_data[i]); + std::vector dim_ind({1, 3, 10, 10}); + lite::tensor::Tensor ind_tensor(TypeId::kNumberTypeInt32, dim_ind); + ind_tensor.SetData(i_data); +#endif + + std::vector inputs = {&dy_tensor, &ind_tensor}; + + auto output_data = new float[output_data_size]; + std::vector dim_dx({1, 3, 30, 30}); + lite::tensor::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); + dx_tensor.SetData(output_data); + std::vector outputs = {&dx_tensor}; + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(pooling_param), NULL, desc); + kernel_obj->Run(); + + printf("==================output data=================\n"); + for (int i = 0; i < 20; i++) { + std::cout << output_data[i] << " ,"; + } + std::cout << std::endl; + std::string output_path = "./test_data/pooling/maxpoolgradfp32_2_dx_1_30_30_3.bin"; + lite::CompareOutput(output_data, output_path); + + // delete input_data; + // delete[] output_data; + delete pooling_param; + MS_LOG(INFO) << "TestMaxPoolingKernelGradFp32 passed"; +} +#endif // if 0 before MaxPoolingKernelGradFp32 +} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsigmoid_out_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsigmoid_out_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..9418b5866b472c97233ce255d056b3f84527df44 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsigmoid_out_50.bin @@ -0,0 +1 @@ +"x>#>K9>pR >)J >4>K>Z>>>L>=Q>*^>M>&>6>S>*>N>-=+L>vK>+A}>w^>$Q>s>/W>=M'>9[*>#%<#>C>>>$=Gj>>7*>2>6> >1p>s#>Y)>k>9==lQ0>w> \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsigmoid_x_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsigmoid_x_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..d216d74d9dbe53d43de8f103f81fd8416ff55ceb --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsigmoid_x_50.bin @@ -0,0 +1 @@ +M?Ƿ? H2|7>0?dyX?C.\fT@?ͳg?Lw񾫘žE9&7A?T?XF4??ҹ?(k?0??VH?-Tz@&"-1w?F?羢D>Y> _p?] ?%R5 Ks=? \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsigmoid_yt_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsigmoid_yt_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..23f04a201536070ad6fd6e5d0d2dcefeba6b581c --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsigmoid_yt_50.bin @@ -0,0 +1 @@ +?6V?U?S?=M?;?3P?;?E?Ln?u?!?V??sW?9_?e?}H?h??X=? ?%??Y1?[s??c??t{?Ո?7=DK?eW???>?kcY?S???_fQ?u%?-u?}??k9??=?? \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_out_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_out_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..6ff3dd84c97a76ff7d3b7e59c30d5c9688edd2a7 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_out_50.bin @@ -0,0 +1 @@ +v=qٽBs>Q=@"\=`ο;?廿? \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_yt_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_yt_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..b25a5c778789a510adef8e4bfaeab774aed1d472 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_yt_50.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/lrelu_out_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/lrelu_out_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..b13b02ddbc42052e6cec58ba089a97ea76390f24 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/lrelu_out_50.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/lrelu_y_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/lrelu_y_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..6031302671540be073958caf9e0510f85696f72f --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/lrelu_y_50.bin @@ -0,0 +1 @@ +&5Sa?t?@W,2ս&8?;V?橡?$?5pNF7:?5V:΄?m ,!@`|>Vؚ ?_B?0Խ"?q!>%=,? >Ѓ?;?qGh?7<U>=?-ap?g?>r@X> \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/lrelu_yt_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/lrelu_yt_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..7cd15ef7f91202ece4cd01401a6de73b64d1c769 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/lrelu_yt_50.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/relu6_out_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/relu6_out_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e9de34e838a98b8f169c1d10b4673b62786470d Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/relu6_out_50.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/relu6_y_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/relu6_y_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..095be3ca3a533381324322799563c12842504d3c Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/relu6_y_50.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/relu6_yt_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/relu6_yt_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e9de34e838a98b8f169c1d10b4673b62786470d Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/relu6_yt_50.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/relu_out_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/relu_out_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..dc7098eca0e8ef210e6b3f762d83b8a78c06fd7e Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/relu_out_50.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/relu_y_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/relu_y_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..00d6139a772c8f73137107da2fea797b47537793 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/relu_y_50.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/relu_yt_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/relu_yt_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..dc7098eca0e8ef210e6b3f762d83b8a78c06fd7e Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/relu_yt_50.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/sigmoid_out_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/sigmoid_out_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..8fcc885497be036600bfe56f29152b5efda22019 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/sigmoid_out_50.bin @@ -0,0 +1 @@ +]>E >Jn>bK>8=

&>g>];>Q>I>=\>S> ŀ=C*>K=n>Iy>>l>/=>rp>>>( >[>->{j=4>C>e>D>B==x>/m>vj>P>v=Pʕ>=3>vN= >ӂ> \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/sigmoid_y_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/sigmoid_y_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..9e3724e2c0d5bef22418627f4449bdbae32e824c --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/sigmoid_y_50.bin @@ -0,0 +1 @@ +w>>XO>?>h>=%?o:?9>"q=7> >?? >9?{>t?D2\?J>n>1>>OF?/?7y?J0?eT?A?F$>'>Ab>#?"m@>$i>8?*C?)>r3?ᆒ>X?9y>>^2>S??w!'? \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/sigmoid_yt_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/sigmoid_yt_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca66395f8fc72c2f095a7bc2f61592226018a152 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/sigmoid_yt_50.bin @@ -0,0 +1 @@ +wa?"?XOo??Q?>E>?o?9i?"qv>> u??? W?9?>t?D2?>n8?>o?O?/Ǚ?7y?J?e?A?F$&?'?Ab_??"m>˼?? ??>$>?*?)l?r??X?9>??^>SԿ?w!? \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/tanh_out_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/tanh_out_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..9e3e18c43c380af8ca4cf7fbc62ca0f418af13f5 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/tanh_out_50.bin @@ -0,0 +1,2 @@ +@?^*Su>?(1?Y?O]>8>yͽh>Y:ן<e +?@?C?C?6GUp>_=I 0`>0>ݎ9?;?Gs*>e3>?ʑ>;?(,?&3*?C?Cw<2?=K>%HC%8?M>~>'u>JI>^4YuZ? \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/tanh_y_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/tanh_y_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..0a9a479053216ca28ee149d7a28170c1f82b3ecb Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/tanh_y_50.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/tanh_yt_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/tanh_yt_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..7323533d2f7db7e605f0718712a06a961f732344 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/tanh_yt_50.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dw_32_3_3_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dw_32_3_3_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dw_g3_18_3_3_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dw_g3_18_3_3_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dw_g3_d2_18_3_3_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dw_g3_d2_18_3_3_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dx_1_28_28_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dx_1_28_28_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dx_g3_1_28_28_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dx_g3_1_28_28_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dx_g3_d2_1_28_28_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dx_g3_d2_1_28_28_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dy_1_28_28_32.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dy_1_28_28_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dy_g3_1_28_28_18.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dy_g3_1_28_28_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dy_g3_d2_1_26_26_18.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dy_g3_d2_1_26_26_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_w_32_3_3_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_w_32_3_3_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_w_g3_18_3_3_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_w_g3_18_3_3_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..1dc4bf74d844821f465898bd386b125b9be17e3d --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_w_g3_18_3_3_3.bin @@ -0,0 +1,4 @@ +F .N2?󻾩`?ͽؿSį2xR=}%T?9>R?Eÿ?>?@<*Fs?h>۾i) W>+ ;=y@\?V=~?)оϬ?HF}?տի?F꓿E +?Gÿ#μ>PD>>J?gNY, <ֈu?Y_"4?fx Y7;¡̾?)???]@-/zb?Ye?M /6?"?t?T?; -1?,6?.>n>8D?Ǿ +F+j?~B? +P??t ek?I?WJ>&? ?;;隿j =sg?[k?r?ݖc>.ljy?D>S¿?l?rSq?#m(@_?>l %6?h%k>=4?ŋoJs>fW?8c;??k:?bQ1Y>yp>nW= z|S:?P?r?K?kw>->V?~> \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_w_g3_d2_18_3_3_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_w_g3_d2_18_3_3_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_x_1_28_28_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_x_1_28_28_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_x_g3_1_28_28_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_x_g3_1_28_28_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_x_g3_d2_1_28_28_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_x_g3_d2_1_28_28_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_y_g3_d2_1_26_26_18.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_y_g3_d2_1_26_26_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/matmulfp32_a_10x4.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/matmulfp32_a_10x4.bin new file mode 100644 index 0000000000000000000000000000000000000000..9d152d6f6cdf16f555e2edf864c494d7ed06ed47 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/matmulfp32_a_10x4.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/matmulfp32_a_4x10.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/matmulfp32_a_4x10.bin new file mode 100644 index 0000000000000000000000000000000000000000..2352a3989a1c367bfc0a260c1d1f871555a8da23 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/matmulfp32_a_4x10.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/matmulfp32_b_10x5.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/matmulfp32_b_10x5.bin new file mode 100644 index 0000000000000000000000000000000000000000..a6df1ed831a045b7950ad2a3d0a285e242924003 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/matmulfp32_b_10x5.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/matmulfp32_b_5x10.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/matmulfp32_b_5x10.bin new file mode 100644 index 0000000000000000000000000000000000000000..eb28b692f68f92bf059c718d5ef583973bbfddd0 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/matmulfp32_b_5x10.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/matmulfp32_c_4x5.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/matmulfp32_c_4x5.bin new file mode 100644 index 0000000000000000000000000000000000000000..183a9f0ba57476bf28191ad70a7850a8793c4954 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/matmulfp32_c_4x5.bin @@ -0,0 +1 @@ +.@8|A-=,fAQ>2@dui}?t4@@2zN@ԣx@&(ӂe@g \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_10_dx1_5_4_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_10_dx1_5_4_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..fcb5df39266ce621bdb133949f02f82f5fb4d302 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_10_dx1_5_4_6.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_10_dx2_5_1_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_10_dx2_5_1_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..aa7bc323f733a3245bd16a390fcc620ec9dfb505 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_10_dx2_5_1_6.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_10_dy_5_4_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_10_dy_5_4_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..c68b9c80b0518116c96380bb4f6b2c6944399a73 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_10_dy_5_4_6.bin @@ -0,0 +1 @@ +&JvBAoL?̓I?̿A6ԽS?d>5?iӿ`@u@G@`M>Av>B)>c@$/AwA˿^ 0kܾȁAfr>0x˿cR?vu=`,>pŔ?aK@y?׾db3?@ڤeK?Ч9)?uu@?"=P>b>v@Nl@ÐU>ot ?*@y ; @Av_ 俶\q?w@0ݻj?Aq;bo,3@I`?3sfl@@I?? AC_>=L@? .@xy`?῿A3 3ˑ?n?.=\B@/A>B_KGF?-^.;?V]?K@z:} QPqH ſ?,@~?LP>7>Pq@P>@ \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_1_dx2_1_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_1_dx2_1_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..1676dc671fba382cada419cff74f0b1bef99edb5 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_1_dx2_1_6.bin @@ -0,0 +1 @@ +L&h>)A[7?.O2@ \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_1_dy_4_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_1_dy_4_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..37947edcb8da8f41f5860202b700368a9fe1f039 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_1_dy_4_6.bin @@ -0,0 +1 @@ +wӿ?8>GF?-^.;?V]?K@z:} QPqH ſ?,@~?LP>7>Pq@P>@ \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_1_x1_4_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_1_x1_4_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..a093c882f6343999f7451710fd95ba7e4a736519 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_1_x1_4_6.bin @@ -0,0 +1 @@ +.%?.s?d-f=<Vnߔ?`Dz?t"?e|>~ ޾I? ??ƽY?3B?Xf \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_2_dx1_4_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_2_dx1_4_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..6737f96700fa3514ac6f83dbc461eec7409b3881 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_2_dx1_4_6.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_2_dx2_1_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_2_dx2_1_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..94bd26e133b77350ba9194c3b3caa7e47642d1fa --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_2_dx2_1_6.bin @@ -0,0 +1 @@ +2)A%dȑ \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_2_dy_4_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_2_dy_4_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..6737f96700fa3514ac6f83dbc461eec7409b3881 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_2_dy_4_6.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_3_dx1_4_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_3_dx1_4_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..6737f96700fa3514ac6f83dbc461eec7409b3881 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_3_dx1_4_6.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_3_dx2_1_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_3_dx2_1_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..94bd26e133b77350ba9194c3b3caa7e47642d1fa --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_3_dx2_1_6.bin @@ -0,0 +1 @@ +2)A%dȑ \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_3_dy_4_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_3_dy_4_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..a4130257de45f33ca3b375ff3d8d178872776118 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_3_dy_4_6.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_4_dx1_4_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_4_dx1_4_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..b8cc5229db710bd7af730291cc606dab1e5ce7e7 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_4_dx1_4_6.bin @@ -0,0 +1,2 @@ +;p +?y=xJɷi<%.ve!?N[@I(*8Iኽ}> $!ÿi>u?JWy?R8]?z{?5> \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_4_dx2_1_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_4_dx2_1_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..40d97286c83e81dc6c3b612d19e9acd9d0b7b837 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_4_dx2_1_6.bin @@ -0,0 +1 @@ +b殿Q.* (@0˸ok@ \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_4_dy_4_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_4_dy_4_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..3cac9bfc200f002716402184b6b7586681b8917e --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_4_dy_4_6.bin @@ -0,0 +1 @@ +ao=XD<ԟо?$_`=US>]%@?,?SG+b>&e+ѿ#Nv1>b?\i׿׺ H|@N?& ?ǖb ?9A{d@uh?wN?3(cvi5Ϙ>t +6?ngZ?> /TA \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_6_dx1_1_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_6_dx1_1_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..4fe396e1bc150ad956af7b8602e6757fd0a076d5 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_6_dx1_1_6.bin @@ -0,0 +1 @@ +YA:ۢA93*B%B \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_6_dx2_4_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_6_dx2_4_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..35a86f48b51240092c784af514c212c08a869719 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_6_dx2_4_6.bin @@ -0,0 +1 @@ +gE|: J"@۽>'BtpB=?C?[^9Ano*Bļ-hVCDBYD\ۿ \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_6_dy_4_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_6_dy_4_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..4bfe040d8ca8e26da89e09f67b3a5ac7b861036e Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_6_dy_4_6.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_7_dx1_4_5_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_7_dx1_4_5_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..32e73ebd1a59a71d7b9cafad77c73a29792021d2 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_7_dx1_4_5_6.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_7_dx2_1_1_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_7_dx2_1_1_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..06a8bb81819565622b30e10794bdf9d3a8ea00ec --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_7_dx2_1_1_6.bin @@ -0,0 +1 @@ +MA=@TY$¦&+f \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_7_dy_4_5_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_7_dy_4_5_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..3a75ce88edd50937fedf5f93eaa74791ddc77b33 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_7_dy_4_5_6.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_7_x1_4_5_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_7_x1_4_5_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..e5104b8e39e8c6fe018c5934c6ee8b21c874c58b --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_7_x1_4_5_6.bin @@ -0,0 +1,2 @@ +5^x|>?NWւpټR4=p?v֥?RݾQ>(v?pj?H?G)>I޿ʟ>u{`Խ>Ԁ!O?9;>c>?\7K?cb_?.kuS=j?1=y? p)P?=V ?.>ј?H?*wMH ?\q>(z࿣?-=? y?ƿ׻+ _?dGp X?Qt>F?N=N +Yn`%G?)$?Jdi?7"8){oϾ w>(L?Pԝ'2?u?2?">!פ???<߾f?9F&QEE}<:G>.}.S?\̿K?*V%?Q꿹uyC7:?T4>`>o>!?>;y_ \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_7_x2_1_1_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_7_x2_1_1_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..b0921a2d80bc93374e13ac96cd3ab0ff2a778128 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_7_x2_1_1_6.bin @@ -0,0 +1 @@ +<>o8?#?d@ٌ< \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_8_dx1_5_4_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_8_dx1_5_4_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..199d5821733aa9fa063141ad08b318b442d5a522 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_8_dx1_5_4_6.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_8_dx2_5_1_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_8_dx2_5_1_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea5dbc6a93c0f4d62a7f5edfbbe453ce12661a1b Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_8_dx2_5_1_6.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_8_dy_5_4_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_8_dy_5_4_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..199d5821733aa9fa063141ad08b318b442d5a522 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_8_dy_5_4_6.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_8_x1_5_4_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_8_x1_5_4_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..d14440793e805aa258e892763ba286414278fb17 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_8_x1_5_4_6.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_8_x2_5_1_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_8_x2_5_1_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..9b6169666c7e2ea3861264db2b9ae128d87e02de Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_8_x2_5_1_6.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_9_dx1_5_4_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_9_dx1_5_4_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..5824f414cfa339ade5c4b7008f205129063a623f Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_9_dx1_5_4_6.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_9_dx2_5_1_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_9_dx2_5_1_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..bee4fa98bc60baf50da59ac8847941b548c72321 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_9_dx2_5_1_6.bin @@ -0,0 +1,2 @@ + @UpQ@(A,@N5@(q?O)X@!g@}@+??B>+?M@,@ @eAOF@ۉ@%AN* ++| \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_9_dy_5_4_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_9_dy_5_4_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..dde1dd8cbc6da1c723ef49e774c4e3a368f33ea4 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_9_dy_5_4_6.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_9_x1_5_4_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_9_x1_5_4_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..d14440793e805aa258e892763ba286414278fb17 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_9_x1_5_4_6.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_9_x2_5_1_6.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_9_x2_5_1_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..9b6169666c7e2ea3861264db2b9ae128d87e02de Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/arithmetic_fp32_9_x2_5_1_6.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/biasgradfp32_1_db_7.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/biasgradfp32_1_db_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..46853507f8e63a02e44b66514730cffc2c388d75 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/biasgradfp32_1_db_7.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/biasgradfp32_1_dy_10_28_28_7.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/biasgradfp32_1_dy_10_28_28_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..c079b77a367c198c8b6da19257eb43ca89f5d605 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/biasgradfp32_1_dy_10_28_28_7.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/sce_fp32_1_dy_6_4.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/sce_fp32_1_dy_6_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..d135477618dbadaf3294fbf815d26b51831aa577 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/sce_fp32_1_dy_6_4.bin @@ -0,0 +1,2 @@ +,Q= dt=*<ʮ<'=䜶=_<ὥ +=i=ӭ=a:<=AL#wDh?>?>>a>S?>>*D^? [?Q=?56>ȭ? \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/avgpoolgradfp32_1_dx_1_28_28_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/avgpoolgradfp32_1_dx_1_28_28_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..e5d086ef061316adad5650c336a9ff7ce158c8e3 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/avgpoolgradfp32_1_dx_1_28_28_3.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/avgpoolgradfp32_1_dy_1_28_28_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/avgpoolgradfp32_1_dy_1_28_28_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..ad87262c97b758e251a9ad648ec5311cc6bfe0e6 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/avgpoolgradfp32_1_dy_1_28_28_3.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/avgpoolgradfp32_1_x_1_28_28_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/avgpoolgradfp32_1_x_1_28_28_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..d32ffbe755462f66fdb69c3585ca43e787be2296 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/avgpoolgradfp32_1_x_1_28_28_3.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_1_dx_1_28_28_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_1_dx_1_28_28_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..cca67a85df1163f95027823b6502328f323f4555 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_1_dx_1_28_28_3.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_1_dy_1_28_28_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_1_dy_1_28_28_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..15c810365e291c8ced141d0cfec88668882d99dc Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_1_dy_1_28_28_3.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_1_i_1_28_28_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_1_i_1_28_28_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..c50b6145ab474febecb071cbc1237a8a6babb4d0 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_1_i_1_28_28_3.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_2_dx_1_30_30_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_2_dx_1_30_30_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..e2ee3307f9f464025eea0a473e7921a1c0ce282c Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_2_dx_1_30_30_3.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_2_dy_1_10_10_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_2_dy_1_10_10_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..0985d76b035273333eb1f6f69b1276a3f4aecda6 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_2_dy_1_10_10_3.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_2_dy_1_3_10_10.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_2_dy_1_3_10_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..c95d4ce0fa1a53c68a3f38a00505bad5ec7b4ea7 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_2_dy_1_3_10_10.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_2_i_1_10_10_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_2_i_1_10_10_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..d5647f32dc7d45efa3283cbf26ec277dd030e70b Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_2_i_1_10_10_3.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_2_i_1_3_10_10.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_2_i_1_3_10_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..89157fd9dd7289a92a7f1654e256139966c8c6e5 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_2_i_1_3_10_10.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_2_x_1_30_30_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_2_x_1_30_30_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..61cfb07fdb62835ef471cc0b277cb50fae2ee0e7 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolgradfp32_2_x_1_30_30_3.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/power/powerfp32_dx_scale5_shift2_power3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/power/powerfp32_dx_scale5_shift2_power3.bin new file mode 100644 index 0000000000000000000000000000000000000000..0bb3ced76525b2182d5b247ddf40af2849bf760c --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/power/powerfp32_dx_scale5_shift2_power3.bin @@ -0,0 +1,2 @@ +NG^aFN.cJ5G +WExeء?wDRGTEAC_>YHÜ*IIM.IEB:g$IHpñ BFCBtHjC{@FIW>tFӦJE1F x3E~#)JIE4'nIB \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/power/powerfp32_dy_scale5_shift2_power3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/power/powerfp32_dy_scale5_shift2_power3.bin new file mode 100644 index 0000000000000000000000000000000000000000..b1e404b742da5cd3ef913243701371920176fe4d --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/power/powerfp32_dy_scale5_shift2_power3.bin @@ -0,0 +1 @@ +N#LSRDi] Bo;Eg9WNA{CB@I4~D>PD DSpBH  VNLDCm B*l A֓\@iC AB^tB+3ECOAs`D3DeB~D`v@: \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/power/powerfp32_x_scale5_shift2_power3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/power/powerfp32_x_scale5_shift2_power3.bin new file mode 100644 index 0000000000000000000000000000000000000000..d902be1588fdff4fb65acbfdbd2fbceb8542ddfa --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/power/powerfp32_x_scale5_shift2_power3.bin @@ -0,0 +1 @@ +h)jr_O%?"=|;9$>=Dw9u??PҴ?~q<= BͿ9Ċ?,?6>8$du5?gb>")H`>q?I>.=,%?$!Y>H?| \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/train/train_bias_10.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/train/train_bias_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..527f9f7399d5a7b40d66f4101cb4189d4ad97a7f --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/train/train_bias_10.bin @@ -0,0 +1 @@ +>SY@+[Kc߾&O?-?Qjt? \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/train/train_input_32_1000.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/train/train_input_32_1000.bin new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/train/train_weight_10_1000.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/train/train_weight_10_1000.bin new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391