diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h index f77486525c1dacaee81b4e78363171e75afe0db5..c81701b96eeecdae5627bd7fa69b90a4c8600ca0 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h @@ -90,6 +90,7 @@ class ArithmeticCPUKernel : public LiteKernel { case PrimitiveType_FloorMod: arithmetic_run_ = ElementFloorMod; arithmetic_broadcast_run_ = BroadcastFloorMod; + break; case PrimitiveType_Equal: arithmetic_run_ = ElementEqual; arithmetic_broadcast_run_ = BroadcastEqual; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.cc index 8f852fcfd4bbc0ba450db0fc304c54b594951f28..0510c5fd45d63729dbd70778cd602a749d704494 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.cc @@ -17,7 +17,7 @@ #include #include "schema/model_generated.h" #include "src/kernel_registry.h" -#include "src/runtime/kernel/arm/opclib/arg_min_max.h" +#include "src/runtime/kernel/arm/opclib/int8/arg_min_max_int8.h" #include "include/errorcode.h" using mindspore::lite::RET_OK; @@ -31,12 +31,45 @@ int ArgMinMaxInt8CPUKernel::Init() { } auto param = reinterpret_cast(opParameter); param->data_type_ = kNumberTypeInt8; + auto *input_tensor = inputs_.at(kInputIndex); + auto in_quant_args = input_tensor->GetQuantParams(); + in_quant_arg_.scale_ = in_quant_args.front().scale; + in_quant_arg_.zp_ = in_quant_args.front().zeroPoint; + + auto *out_tensor = outputs_.at(kOutputIndex); + auto out_quant_args = out_tensor->GetQuantParams(); + out_quant_arg_.scale_ = out_quant_args.front().scale; + out_quant_arg_.zp_ = out_quant_args.front().zeroPoint; return RET_OK; } int ArgMinMaxInt8CPUKernel::Run() { - auto ret = ArgMinMaxBaseCPUKernel::Run(); - FreeTmpMemory(); - return ret; + auto input = inputs_.at(0); + + const int8_t *input_data = reinterpret_cast(inputs_.at(0)->Data()); + int8_t *output_data = reinterpret_cast(outputs_.at(0)->Data()); + + auto in_shape = input->shape().data(); + auto param = reinterpret_cast(opParameter); + if (param->topk_ == 1) { + ArgMinMaxQuant(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_); + return RET_OK; + } + + switch (param->axis_) { + case 0: + ArgMinMaxDim0(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_); + break; + case 1: + ArgMinMaxDim1(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_); + break; + case 2: + ArgMinMaxDim2(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_); + break; + case 3: + ArgMinMaxDim3(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_); + break; + } + return RET_OK; } } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h index 680c4dedef3655de1000c9639a713feb9eb43684..63e3938bc42908c39de17b65012234db9546de00 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h @@ -18,6 +18,7 @@ #include #include "src/runtime/kernel/arm/base/arg_min_max_base.h" +#include "src/runtime/kernel/arm/opclib/quantization/quantize.h" namespace mindspore::kernel { class ArgMinMaxInt8CPUKernel : public ArgMinMaxBaseCPUKernel { @@ -31,6 +32,9 @@ class ArgMinMaxInt8CPUKernel : public ArgMinMaxBaseCPUKernel { int Init() override; int ReSize() override { return 0; } int Run() override; + private: + QuantArg in_quant_arg_; + QuantArg out_quant_arg_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc new file mode 100644 index 0000000000000000000000000000000000000000..f9e3bbf9595fa1579fa537aa5299c47d69867013 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc @@ -0,0 +1,194 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/int8/arithmetic_int8.h" +#include "src/runtime/kernel/arm/opclib/int8/arithmetic_int8.h" +#include "src/runtime/kernel/arm/opclib/arithmetic_common.h" +#include "schema/model_generated.h" +#include "src/kernel_registry.h" +#include "src/runtime/runtime_api.h" +#include "include/errorcode.h" + +using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_PARAM_INVALID; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; + +using mindspore::schema::PrimitiveType_Equal; +using mindspore::schema::PrimitiveType_NotEqual; +using mindspore::schema::PrimitiveType_LessEqual; +using mindspore::schema::PrimitiveType_Greater; +using mindspore::schema::PrimitiveType_GreaterEqual; +using mindspore::schema::PrimitiveType_Less; + +namespace mindspore::kernel { +namespace { +int ArithmeticsInt8Launch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { + auto arithmetic_kernel = reinterpret_cast(cdata); + auto error_code = arithmetic_kernel->DoArithmetic(thread_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "ArithmeticsRun error thread_id[" << thread_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} +} // namespace + +ArithmeticInt8CPUKernel::~ArithmeticInt8CPUKernel() { + auto param = reinterpret_cast(opParameter); + if (!param->broadcasting_) { + return; + } + if (context_->allocator != nullptr) { + if (tile_data0_ != nullptr) { + context_->allocator->Free(tile_data0_); + } + if (tile_data1_ != nullptr) { + context_->allocator->Free(tile_data1_); + } + } else { + if (tile_data0_ != nullptr) { + free(tile_data0_); + } + if (tile_data1_ != nullptr) { + free(tile_data1_); + } + } + tile_data0_ = nullptr; + tile_data1_ = nullptr; +} + +int ArithmeticInt8CPUKernel::Init() { + switch (opParameter->type_) { + case PrimitiveType_Equal: + arithmetic_run_ = ElementEqual; + break; + case PrimitiveType_NotEqual: + arithmetic_run_ = ElementNotEqual; + break; + case PrimitiveType_Less: + arithmetic_run_ = ElementEqual; + break; + case PrimitiveType_LessEqual: + arithmetic_run_ = ElementNotEqual; + break; + case PrimitiveType_Greater: + arithmetic_run_ = ElementGreater; + break; + case PrimitiveType_GreaterEqual: + arithmetic_run_ = ElementGreaterEqual; + break; + default: + MS_LOG(ERROR) << "Error Operator type " << opParameter->type_; + arithmetic_run_ = nullptr; + return RET_PARAM_INVALID; + } + auto data_size = outputs_[0]->Size(); + auto param = reinterpret_cast(opParameter); + if (param->broadcasting_) { + if (context_->allocator != nullptr) { + tile_data0_ = reinterpret_cast(context_->allocator->Malloc(data_size)); + tile_data1_ = reinterpret_cast(context_->allocator->Malloc(data_size)); + } else { + tile_data0_ = reinterpret_cast(malloc(data_size)); + tile_data1_ = reinterpret_cast(malloc(data_size)); + } + } else { + tile_data0_ = nullptr; + tile_data1_ = nullptr; + } + return RET_OK; +} + +int ArithmeticInt8CPUKernel::ReSize() { return RET_OK; } + +int ArithmeticInt8CPUKernel::DoArithmetic(int thread_id) { + auto input0_data = reinterpret_cast(inputs_[0]->Data()); + auto input1_data1 = reinterpret_cast(inputs_[1]->Data()); + auto output_data = reinterpret_cast(outputs_[0]->Data()); + auto element_num = outputs_[0]->ElementsNum(); + auto param = reinterpret_cast(opParameter); + if (param->broadcasting_ && arithmetic_run_ != nullptr) { + MS_ASSERT(thread_count_ != 0); + int stride = UP_DIV(element_num, thread_count_); + int count = MSMIN(stride, element_num - stride * thread_id); + + int error_code = arithmetic_run_(tile_data0_ + stride * thread_id, tile_data1_ + stride * thread_id, + output_data + stride * thread_id, count); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "Arithmetic run fail! ret: " << error_code; + return RET_ERROR; + } + } else if (arithmetic_run_ != nullptr) { + int error_code = arithmetic_run_(input0_data, input1_data1, output_data, element_num); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "Arithmetic run fail!ret: " << error_code; + return RET_ERROR; + } + } else { + MS_LOG(ERROR) << "arithmetic_run function is nullptr!"; + return RET_ERROR; + } + return RET_OK; +} + +int ArithmeticInt8CPUKernel::Run() { + auto param = reinterpret_cast(opParameter); + if (param->broadcasting_) { + auto input_data0 = reinterpret_cast(inputs_[0]->Data()); + auto input_data1 = reinterpret_cast(inputs_[1]->Data()); + TileDimensionsInt8(input_data0, input_data1, tile_data0_, tile_data1_, param); + } + int error_code = LiteBackendParallelLaunch(ArithmeticsInt8Launch, this, thread_count_); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "Arithmetic launch function fail! ret: " << error_code; + return RET_ERROR; + } + return RET_OK; +} + +kernel::LiteKernel *CpuArithmeticInt8KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *parameter, const lite::Context *ctx, + const kernel::KernelKey &desc) { + if (parameter == nullptr) { + MS_LOG(ERROR) << "Input parameter is null!"; + return nullptr; + } + auto kernel = new (std::nothrow) ArithmeticInt8CPUKernel(parameter, inputs, outputs, ctx); + if (kernel == nullptr) { + MS_LOG(ERROR) << "Create ArithmeticInt8CPUKernel failed, name: " << parameter->name_; + return nullptr; + } + auto ret = kernel->Init(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Init kernel failed, name: " << parameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(parameter->type_)); + delete kernel; + return nullptr; + } + return kernel; +} + +REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Equal, CpuArithmeticInt8KernelCreator) +REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_NotEqual, CpuArithmeticInt8KernelCreator) +REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Less, CpuArithmeticInt8KernelCreator) +REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_LessEqual, CpuArithmeticInt8KernelCreator) +REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Greater, CpuArithmeticInt8KernelCreator) +REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_GreaterEqual, CpuArithmeticInt8KernelCreator) + +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h new file mode 100644 index 0000000000000000000000000000000000000000..56ebcd7e0b105d2a84e691a61f31dbb09fa18a77 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h @@ -0,0 +1,47 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_ + +#include +#include "src/lite_kernel.h" +#include "schema/model_generated.h" + +namespace mindspore::kernel { +class ArithmeticInt8CPUKernel : public LiteKernel { + typedef int (*ArithmeticRunInt8)(int8_t *input0, int8_t *input1, int8_t *output, int element_size); + + public: + ArithmeticInt8CPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::Context *ctx) + : LiteKernel(parameter, inputs, outputs), thread_count_(ctx->thread_num_), context_(ctx) {} + ~ArithmeticInt8CPUKernel(); + + int Init() override; + int ReSize() override; + int Run() override; + int DoArithmetic(int thread_id); + + private: + int thread_count_; + int8_t *tile_data0_; + int8_t *tile_data1_; + const lite::Context *context_; + ArithmeticRunInt8 arithmetic_run_; +}; +} // namespace mindspore::kernel +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.cc index 89732f82a26a7cb5fd8d8115627bc498a8a23358..12dcd76a840fdf1642317b753e055bc1a2a181b7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.cc @@ -18,13 +18,27 @@ #include "schema/model_generated.h" #include "src/kernel_registry.h" #include "src/runtime/kernel/arm/opclib/batch_to_space.h" +#include "src/runtime/kernel/arm/opclib/int8/batch_to_space_int8.h" #include "include/errorcode.h" using mindspore::lite::RET_OK; namespace mindspore::kernel { int BatchToSpaceInt8CPUKernel::Init() { - return BatchToSpaceBaseCPUKernel::Init(); + auto ret = BatchToSpaceBaseCPUKernel::Init(); + if (ret != RET_OK) { + return ret; + } + auto *input_tensor = inputs_.at(kInputIndex); + auto in_quant_args = input_tensor->GetQuantParams(); + in_quant_arg_.scale_ = in_quant_args.front().scale; + in_quant_arg_.zp_ = in_quant_args.front().zeroPoint; + + auto *out_tensor = outputs_.at(kOutputIndex); + auto out_quant_args = out_tensor->GetQuantParams(); + out_quant_arg_.scale_ = out_quant_args.front().scale; + out_quant_arg_.zp_ = out_quant_args.front().zeroPoint; + return RET_OK; } int BatchToSpaceInt8CPUKernel::Run() { @@ -36,12 +50,22 @@ int BatchToSpaceInt8CPUKernel::Run() { auto out_shape = output->shape(); BatchToSpaceParameter *param = reinterpret_cast(this->opParameter); - if (IsNoCrop()) { - BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, - sizeof(int8_t)); + if (in_quant_arg_.scale_ == out_quant_arg_.scale_ && in_quant_arg_.zp_ == out_quant_arg_.zp_) { + if (IsNoCrop()) { + BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, + sizeof(int8_t)); + } else { + BatchToSpaceForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, param->crops_, + sizeof(int8_t)); + } } else { - BatchToSpaceForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, param->crops_, - sizeof(int8_t)); + if (IsNoCrop()) { + BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, + &in_quant_arg_, &out_quant_arg_); + } else { + BatchToSpaceForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, param->crops_, + &in_quant_arg_, &out_quant_arg_); + } } return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.h index 98a3dd14380b1429622a5b7060b691e0063c8d0e..17f30f004f00fe261c91e7fb9a5e3209bf178951 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.h @@ -31,6 +31,9 @@ class BatchToSpaceInt8CPUKernel : public BatchToSpaceBaseCPUKernel { int Init() override; int ReSize() override { return 0; } int Run() override; + private: + QuantArg in_quant_arg_; + QuantArg out_quant_arg_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.cc index ccce2e2a313baa918e7231fe323e8313fb8b38d5..ba6205780bb8463c6b04cd15b06350a291c53e05 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.cc @@ -18,6 +18,7 @@ #include "schema/model_generated.h" #include "src/kernel_registry.h" #include "src/runtime/kernel/arm/opclib/depth_to_space.h" +#include "src/runtime/kernel/arm/opclib/int8/depth_to_space_int8.h" #include "include/errorcode.h" using mindspore::lite::RET_OK; @@ -31,6 +32,16 @@ int DepthToSpaceInt8CPUKernel::Init() { } DepthToSpaceParameter *param = reinterpret_cast(opParameter); param->data_type_size_ = sizeof(int8_t); + + auto *input_tensor = inputs_.at(kInputIndex); + auto in_quant_args = input_tensor->GetQuantParams(); + in_quant_arg_.scale_ = in_quant_args.front().scale; + in_quant_arg_.zp_ = in_quant_args.front().zeroPoint; + + auto *out_tensor = outputs_.at(kOutputIndex); + auto out_quant_args = out_tensor->GetQuantParams(); + out_quant_arg_.scale_ = out_quant_args.front().scale; + out_quant_arg_.zp_ = out_quant_args.front().zeroPoint; return RET_OK; } @@ -41,14 +52,11 @@ int DepthToSpaceInt8CPUKernel::Run() { int8_t *output_data = reinterpret_cast(output->Data()); auto in_shape = input->shape(); DepthToSpaceParameter *param = reinterpret_cast(opParameter); - if (input->GetFormat() == schema::Format_NHWC) { + if (in_quant_arg_.scale_ == out_quant_arg_.scale_ && in_quant_arg_.zp_ == out_quant_arg_.zp_) { DepthToSpaceForNHWC(input_data, output_data, in_shape.data(), param); - return RET_OK; } else { - MS_LOG(ERROR) << "Depth_to_space only support NHWC now!"; - return RET_ERROR; + DepthToSpaceForNHWC(input_data, output_data, in_shape.data(), param, &in_quant_arg_, &out_quant_arg_); } - return RET_OK; } } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.h index ce812354298cacec024070c05315fdae54cdfbde..427b6d5eb055d04ceb26eadd7c62b09482e731ef 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.h @@ -31,6 +31,9 @@ class DepthToSpaceInt8CPUKernel : public DepthToSpaceBaseCPUKernel { int Init() override; int ReSize() override { return 0; } int Run() override; + private: + QuantArg in_quant_arg_; + QuantArg out_quant_arg_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/arg_min_max.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/arg_min_max.cc index bf8e7534929caa64f5e1f6608ec93087abc17da1..816aef4c1b4947dcb11cce85136b6712eb884cc1 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/arg_min_max.cc +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/arg_min_max.cc @@ -15,10 +15,8 @@ */ #include "src/runtime/kernel/arm/opclib/arg_min_max.h" #include "src/runtime/kernel/arm/opclib/fp32/arg_min_max.h" -#include "src/runtime/kernel/arm/opclib/int8/arg_min_max.h" #define FLOAT_DATA_TYPE 43 -#define INT8_DATA_TYPE 32 void GetCalcParameter(const int *shape, int dims_number, int axis, int *pre_axis_count, int *axis_count, int *after_axis_count) { @@ -51,16 +49,6 @@ void ArgMinMaxTopk1(const void *input, void *output, const int *shape, ArgMinMax } break; } - case INT8_DATA_TYPE: { - if (param->get_max_) { - ArgMax(reinterpret_cast(input), reinterpret_cast(output), param, pre_axis_count, - axis_count, after_axis_count); - } else { - ArgMin(reinterpret_cast(input), reinterpret_cast(output), param, pre_axis_count, - axis_count, after_axis_count); - } - break; - } default: break; } @@ -100,40 +88,6 @@ void ArgMinMaxTopknFp32(const float *input, float *output, const int *in_shape, } } -void ArgMinMaxTopknInt8(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - if (param->get_max_) { - switch (param->axis_) { - case 0: - ArgMaxDim0(input, output, in_shape, param); - break; - case 1: - ArgMaxDim1(input, output, in_shape, param); - break; - case 2: - ArgMaxDim2(input, output, in_shape, param); - break; - case 3: - ArgMaxDim3(input, output, in_shape, param); - break; - } - } else { - switch (param->axis_) { - case 0: - ArgMinDim0(input, output, in_shape, param); - break; - case 1: - ArgMinDim1(input, output, in_shape, param); - break; - case 2: - ArgMinDim2(input, output, in_shape, param); - break; - case 3: - ArgMinDim3(input, output, in_shape, param); - break; - } - } -} - void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxParameter *param) { if (param->topk_ == 1) { ArgMinMaxTopk1(input, output, in_shape, param); @@ -145,10 +99,6 @@ void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxPa ArgMinMaxTopknFp32(reinterpret_cast(input), reinterpret_cast(output), in_shape, param); return; } - case INT8_DATA_TYPE: { - ArgMinMaxTopknInt8(reinterpret_cast(input), reinterpret_cast(output), in_shape, param); - return; - } default: break; } diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/arg_min_max.h b/mindspore/lite/src/runtime/kernel/arm/opclib/arg_min_max.h index c4f2e1c6ccc5d709773901bc892235ebde158b83..91580106aa7c5b0f180356cc50639953857e0167 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/arg_min_max.h +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/arg_min_max.h @@ -18,6 +18,7 @@ #include "src/runtime/kernel/arm/opclib/op_base.h" #include "src/runtime/kernel/arm/opclib/arg_min_max_parameter.h" +#include "src/runtime/kernel/arm/opclib/quantization/quantize.h" void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxParameter *param); #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_ARG_MIN_MAX_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/depth_to_space.h b/mindspore/lite/src/runtime/kernel/arm/opclib/depth_to_space.h index 91e7cf1e69d1756ed4528f7b6dfd699bad330902..d2b282b9670a6699fcf74020b1f332dc365db8bf 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/depth_to_space.h +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/depth_to_space.h @@ -15,19 +15,7 @@ */ #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_H_ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_H_ -#include "src/runtime/kernel/arm/opclib/op_base.h" - -struct DepthToSpaceParameter { - OpParameter op_parameter_; - int32_t block_size_; - int32_t in_stride_dim0_; - int32_t in_stride_dim1_; - int32_t in_stride_dim2_; - int32_t out_stride_dim0_; - int32_t out_stride_dim1_; - int32_t out_stride_dim2_; - uint8_t data_type_size_; -}; +#include "src/runtime/kernel/arm/opclib/depth_to_space_parameter.h" void DepthToSpaceForNHWC(const void *input, void *output, int *in_shape, DepthToSpaceParameter *param); #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/depth_to_space_parameter.h b/mindspore/lite/src/runtime/kernel/arm/opclib/depth_to_space_parameter.h new file mode 100644 index 0000000000000000000000000000000000000000..9d4e0e506544205cfeb41baa1f35c83d72e17c60 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/depth_to_space_parameter.h @@ -0,0 +1,32 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_PARAMETER_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_PARAMETER_H_ +#include "src/runtime/kernel/arm/opclib/op_base.h" + +struct DepthToSpaceParameter { + OpParameter op_parameter_; + int32_t block_size_; + int32_t in_stride_dim0_; + int32_t in_stride_dim1_; + int32_t in_stride_dim2_; + int32_t out_stride_dim0_; + int32_t out_stride_dim1_; + int32_t out_stride_dim2_; + uint8_t data_type_size_; +}; + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_PARAMETER_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max.cc deleted file mode 100644 index dd17758920aff06f87c75549968d0abffb04c03d..0000000000000000000000000000000000000000 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max.cc +++ /dev/null @@ -1,488 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "src/runtime/kernel/arm/opclib/int8/arg_min_max.h" - -#define INT8_MAX_VALUE 127 - -int ArgCompareAscInt8(const void *a, const void *b) { - return reinterpret_cast(a)->data_.i8_data_ - - reinterpret_cast(b)->data_.i8_data_; -} - -int ArgCompareDescInt8(const void *a, const void *b) { - return reinterpret_cast(b)->data_.i8_data_ - - reinterpret_cast(a)->data_.i8_data_; -} - -void ArgMaxDim0OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - for (int32_t i = 0; i < param->in_strides_[0]; ++i) { - for (int j = 0; j < in_shape[0]; ++j) { - size_t offset = param->in_strides_[0] * j + i; - param->arg_elements_[j].index_ = j; - param->arg_elements_[j].data_.f_data_ = input[offset]; - } - qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareDescInt8); - for (int j = 0; j < param->topk_; ++j) { - size_t out_offset = j * param->out_strides_[0] + i; - output[out_offset] = param->arg_elements_[j].data_.f_data_; - } - } -} - -void ArgMaxDim0OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - for (int32_t i = 0; i < param->in_strides_[0]; ++i) { - for (int j = 0; j < in_shape[0]; ++j) { - size_t offset = param->in_strides_[0] * j + i; - param->arg_elements_[j].index_ = j; - param->arg_elements_[j].data_.f_data_ = input[offset]; - } - qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareDescInt8); - for (int j = 0; j < param->topk_; ++j) { - size_t out_offset = j * param->out_strides_[0] + i; - output[out_offset] = param->arg_elements_[j].index_; - } - } -} - -void ArgMinDim0OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - for (int32_t i = 0; i < param->in_strides_[0]; ++i) { - for (int j = 0; j < in_shape[0]; ++j) { - size_t offset = param->in_strides_[0] * j + i; - param->arg_elements_[j].index_ = j; - param->arg_elements_[j].data_.f_data_ = input[offset]; - } - qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareAscInt8); - for (int j = 0; j < param->topk_; ++j) { - size_t out_offset = j * param->out_strides_[0] + i; - output[out_offset] = param->arg_elements_[j].data_.f_data_; - } - } -} - -void ArgMinDim0OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - for (int32_t i = 0; i < param->in_strides_[0]; ++i) { - for (int j = 0; j < in_shape[0]; ++j) { - size_t offset = param->in_strides_[0] * j + i; - param->arg_elements_[j].index_ = j; - param->arg_elements_[j].data_.f_data_ = input[offset]; - } - qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareAscInt8); - for (int j = 0; j < param->topk_; ++j) { - size_t out_offset = j * param->out_strides_[0] + i; - output[out_offset] = param->arg_elements_[j].index_; - } - } -} - -void ArgMaxDim1OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - int in_shape1 = in_shape[1]; - for (int i = 0; i < in_shape[0]; ++i) { - size_t in_dim0_offset = i * param->in_strides_[0]; - size_t out_dim0_offset = i * param->out_strides_[0]; - for (int j = 0; j < param->in_strides_[1]; ++j) { - for (int k = 0; k < in_shape1; ++k) { - size_t offset = param->in_strides_[1] * k + in_dim0_offset + j; - param->arg_elements_[k].index_ = k; - param->arg_elements_[k].data_.f_data_ = input[offset]; - } - qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareDescInt8); - for (int k = 0; k < param->topk_; ++k) { - size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1]; - output[out_offset] = param->arg_elements_[k].data_.f_data_; - } - } - } -} - -void ArgMaxDim1OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - int in_shape1 = in_shape[1]; - for (int i = 0; i < in_shape[0]; ++i) { - size_t in_dim0_offset = i * param->in_strides_[0]; - size_t out_dim0_offset = i * param->out_strides_[0]; - for (int j = 0; j < param->in_strides_[1]; ++j) { - for (int k = 0; k < in_shape1; ++k) { - size_t offset = param->in_strides_[1] * k + in_dim0_offset + j; - param->arg_elements_[k].index_ = k; - param->arg_elements_[k].data_.f_data_ = input[offset]; - } - qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareDescInt8); - for (int k = 0; k < param->topk_; ++k) { - size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1]; - output[out_offset] = param->arg_elements_[k].index_; - } - } - } -} - -void ArgMinDim1OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - int in_shape1 = in_shape[1]; - for (int i = 0; i < in_shape[0]; ++i) { - size_t in_dim0_offset = i * param->in_strides_[0]; - size_t out_dim0_offset = i * param->out_strides_[0]; - for (int j = 0; j < param->in_strides_[1]; ++j) { - for (int k = 0; k < in_shape1; ++k) { - size_t offset = param->in_strides_[1] * k + in_dim0_offset + j; - param->arg_elements_[k].index_ = k; - param->arg_elements_[k].data_.f_data_ = input[offset]; - } - qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareAscInt8); - for (int k = 0; k < param->topk_; ++k) { - size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1]; - output[out_offset] = param->arg_elements_[k].data_.f_data_; - } - } - } -} - -void ArgMinDim1OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - int in_shape1 = in_shape[1]; - for (int i = 0; i < in_shape[0]; ++i) { - size_t in_dim0_offset = i * param->in_strides_[0]; - size_t out_dim0_offset = i * param->out_strides_[0]; - for (int j = 0; j < param->in_strides_[1]; ++j) { - for (int k = 0; k < in_shape1; ++k) { - size_t offset = param->in_strides_[1] * k + in_dim0_offset + j; - param->arg_elements_[k].index_ = k; - param->arg_elements_[k].data_.f_data_ = input[offset]; - } - qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareAscInt8); - for (int k = 0; k < param->topk_; ++k) { - size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1]; - output[out_offset] = param->arg_elements_[k].index_; - } - } - } -} - -void ArgMaxDim2OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - int in_shape1 = in_shape[1]; - int in_shape2 = in_shape[2]; - for (int i = 0; i < in_shape[0]; ++i) { - size_t in_dim0_offset = i * param->in_strides_[0]; - size_t out_dim0_offset = i * param->out_strides_[0]; - for (int j = 0; j < in_shape1; ++j) { - size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; - size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; - for (int k = 0; k < param->in_strides_[2]; ++k) { - for (int l = 0; l < in_shape2; ++l) { - size_t offset = param->in_strides_[2] * l + k + in_dim1_offset; - param->arg_elements_[l].index_ = l; - param->arg_elements_[l].data_.f_data_ = input[offset]; - } - qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareDescInt8); - for (int l = 0; l < param->topk_; ++l) { - size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2]; - output[out_offset] = param->arg_elements_[l].data_.f_data_; - } - } - } - } -} - -void ArgMaxDim2OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - int in_shape1 = in_shape[1]; - int in_shape2 = in_shape[2]; - for (int i = 0; i < in_shape[0]; ++i) { - size_t in_dim0_offset = i * param->in_strides_[0]; - size_t out_dim0_offset = i * param->out_strides_[0]; - for (int j = 0; j < in_shape1; ++j) { - size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; - size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; - for (int k = 0; k < param->in_strides_[2]; ++k) { - for (int l = 0; l < in_shape2; ++l) { - size_t offset = param->in_strides_[2] * l + k + in_dim1_offset; - param->arg_elements_[l].index_ = l; - param->arg_elements_[l].data_.f_data_ = input[offset]; - } - qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareDescInt8); - for (int l = 0; l < param->topk_; ++l) { - size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2]; - output[out_offset] = param->arg_elements_[l].index_; - } - } - } - } -} - -void ArgMinDim2OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - int in_shape1 = in_shape[1]; - int in_shape2 = in_shape[2]; - for (int i = 0; i < in_shape[0]; ++i) { - size_t in_dim0_offset = i * param->in_strides_[0]; - size_t out_dim0_offset = i * param->out_strides_[0]; - for (int j = 0; j < in_shape1; ++j) { - size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; - size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; - for (int k = 0; k < param->in_strides_[2]; ++k) { - for (int l = 0; l < in_shape2; ++l) { - size_t offset = param->in_strides_[2] * l + k + in_dim1_offset; - param->arg_elements_[l].index_ = l; - param->arg_elements_[l].data_.f_data_ = input[offset]; - } - qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareAscInt8); - for (int l = 0; l < param->topk_; ++l) { - size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2]; - output[out_offset] = param->arg_elements_[l].data_.f_data_; - } - } - } - } -} - -void ArgMinDim2OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - int in_shape1 = in_shape[1]; - int in_shape2 = in_shape[2]; - for (int i = 0; i < in_shape[0]; ++i) { - size_t in_dim0_offset = i * param->in_strides_[0]; - size_t out_dim0_offset = i * param->out_strides_[0]; - for (int j = 0; j < in_shape1; ++j) { - size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; - size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; - for (int k = 0; k < param->in_strides_[2]; ++k) { - for (int l = 0; l < in_shape2; ++l) { - size_t offset = param->in_strides_[2] * l + k + in_dim1_offset; - param->arg_elements_[l].index_ = l; - param->arg_elements_[l].data_.f_data_ = input[offset]; - } - qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareAscInt8); - for (int l = 0; l < param->topk_; ++l) { - size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2]; - output[out_offset] = param->arg_elements_[l].index_; - } - } - } - } -} - -void ArgMaxDim3OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - int in_shape1 = in_shape[1]; - int in_shape2 = in_shape[2]; - int in_shape3 = in_shape[3]; - for (int i = 0; i < in_shape[0]; ++i) { - size_t in_dim0_offset = i * param->in_strides_[0]; - size_t out_dim0_offset = i * param->out_strides_[0]; - for (int j = 0; j < in_shape1; ++j) { - size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; - size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; - for (int k = 0; k < in_shape2; ++k) { - size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset; - size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset; - for (int l = 0; l < in_shape3; ++l) { - size_t offset = l + in_dim2_offset; - param->arg_elements_[l].index_ = l; - param->arg_elements_[l].data_.f_data_ = input[offset]; - } - qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareDescInt8); - for (int l = 0; l < param->topk_; ++l) { - size_t out_offset = out_dim2_offset + l; - output[out_offset] = param->arg_elements_[l].data_.f_data_; - } - } - } - } -} - -void ArgMaxDim3OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - int in_shape1 = in_shape[1]; - int in_shape2 = in_shape[2]; - int in_shape3 = in_shape[3]; - for (int i = 0; i < in_shape[0]; ++i) { - size_t in_dim0_offset = i * param->in_strides_[0]; - size_t out_dim0_offset = i * param->out_strides_[0]; - for (int j = 0; j < in_shape1; ++j) { - size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; - size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; - for (int k = 0; k < in_shape2; ++k) { - size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset; - size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset; - for (int l = 0; l < in_shape3; ++l) { - size_t offset = l + in_dim2_offset; - param->arg_elements_[l].index_ = l; - param->arg_elements_[l].data_.f_data_ = input[offset]; - } - qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareDescInt8); - for (int l = 0; l < param->topk_; ++l) { - size_t out_offset = out_dim2_offset + l; - output[out_offset] = param->arg_elements_[l].index_; - } - } - } - } -} - -void ArgMinDim3OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - int in_shape1 = in_shape[1]; - int in_shape2 = in_shape[2]; - int in_shape3 = in_shape[3]; - for (int i = 0; i < in_shape[0]; ++i) { - size_t in_dim0_offset = i * param->in_strides_[0]; - size_t out_dim0_offset = i * param->out_strides_[0]; - for (int j = 0; j < in_shape1; ++j) { - size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; - size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; - for (int k = 0; k < in_shape2; ++k) { - size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset; - size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset; - for (int l = 0; l < in_shape3; ++l) { - size_t offset = l + in_dim2_offset; - param->arg_elements_[l].index_ = l; - param->arg_elements_[l].data_.f_data_ = input[offset]; - } - qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareAscInt8); - for (int l = 0; l < param->topk_; ++l) { - size_t out_offset = out_dim2_offset + l; - output[out_offset] = param->arg_elements_[l].data_.f_data_; - } - } - } - } -} - -void ArgMinDim3OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - int in_shape1 = in_shape[1]; - int in_shape2 = in_shape[2]; - int in_shape3 = in_shape[3]; - for (int i = 0; i < in_shape[0]; ++i) { - size_t in_dim0_offset = i * param->in_strides_[0]; - size_t out_dim0_offset = i * param->out_strides_[0]; - for (int j = 0; j < in_shape1; ++j) { - size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; - size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; - for (int k = 0; k < in_shape2; ++k) { - size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset; - size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset; - for (int l = 0; l < in_shape3; ++l) { - size_t offset = l + in_dim2_offset; - param->arg_elements_[l].index_ = l; - param->arg_elements_[l].data_.f_data_ = input[offset]; - } - qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareAscInt8); - for (int l = 0; l < param->topk_; ++l) { - size_t out_offset = out_dim2_offset + l; - output[out_offset] = param->arg_elements_[l].index_; - } - } - } - } -} - -void ArgMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - if (param->out_value_) { - ArgMaxDim0OutValue(input, output, in_shape, param); - } else { - ArgMaxDim0OutIndex(input, output, in_shape, param); - } -} - -void ArgMinDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - if (param->out_value_) { - ArgMinDim0OutValue(input, output, in_shape, param); - } else { - ArgMinDim0OutIndex(input, output, in_shape, param); - } -} - -void ArgMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - if (param->out_value_) { - ArgMaxDim1OutValue(input, output, in_shape, param); - } else { - ArgMaxDim1OutIndex(input, output, in_shape, param); - } -} - -void ArgMinDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - if (param->out_value_) { - ArgMinDim1OutValue(input, output, in_shape, param); - } else { - ArgMinDim1OutIndex(input, output, in_shape, param); - } -} - -void ArgMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - if (param->out_value_) { - ArgMaxDim2OutValue(input, output, in_shape, param); - } else { - ArgMaxDim2OutIndex(input, output, in_shape, param); - } -} - -void ArgMinDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - if (param->out_value_) { - ArgMinDim2OutValue(input, output, in_shape, param); - } else { - ArgMinDim2OutIndex(input, output, in_shape, param); - } -} - -void ArgMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - if (param->out_value_) { - ArgMaxDim3OutValue(input, output, in_shape, param); - } else { - ArgMaxDim3OutIndex(input, output, in_shape, param); - } -} - -void ArgMinDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { - if (param->out_value_) { - ArgMinDim3OutValue(input, output, in_shape, param); - } else { - ArgMinDim3OutIndex(input, output, in_shape, param); - } -} - -void ArgMax(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count, - int after_axis_count) { - bool out_value = param->out_value_; - for (int i = 0; i < pre_axis_count; ++i) { - size_t output_offset = i * after_axis_count; - size_t input_offset = output_offset * axis_count; - for (int j = 0; j < after_axis_count; ++j) { - int8_t value = -INT8_MAX_VALUE; - int8_t index = 0; - for (int k = 0; k < axis_count; ++k) { - int8_t value_tmp = input[input_offset + k * after_axis_count + j]; - if (value_tmp > value) { - value = value_tmp; - index = k; - } - } - output[output_offset + j] = out_value ? value : index; - } - } -} - -void ArgMin(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count, - int after_axis_count) { - bool out_value = param->out_value_; - for (int i = 0; i < pre_axis_count; ++i) { - size_t output_offset = i * after_axis_count; - size_t input_offset = output_offset * axis_count; - for (int j = 0; j < after_axis_count; ++j) { - int8_t value = INT8_MAX_VALUE; - int8_t index = 0; - for (int k = 0; k < axis_count; ++k) { - int8_t value_tmp = input[input_offset + k * after_axis_count + j]; - if (value_tmp < value) { - value = value_tmp; - index = k; - } - } - output[output_offset + j] = out_value ? value : index; - } - } -} - -#undef INT8_MAX_VALUE diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max.h b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max.h deleted file mode 100644 index 627f744008e16157e327846ebfe06be6e4dc7634..0000000000000000000000000000000000000000 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max.h +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_H_ -#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_H_ - -#include "src/runtime/kernel/arm/opclib/arg_min_max_parameter.h" - -void ArgMax(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count, - int after_axis_count); -void ArgMin(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count, - int after_axis_count); -void ArgMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param); -void ArgMinDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param); -void ArgMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param); -void ArgMinDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param); -void ArgMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param); -void ArgMinDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param); -void ArgMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param); -void ArgMinDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param); -#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max_int8.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max_int8.cc new file mode 100644 index 0000000000000000000000000000000000000000..9ce4d793b914fd846d5fa425c77e880737355e21 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max_int8.cc @@ -0,0 +1,221 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/runtime/kernel/arm/opclib/int8/arg_min_max_int8.h" +#include + +void CalcParameter(const int *shape, int dims_number, int axis, int *pre_axis_count, int *axis_count, + int *after_axis_count) { + *pre_axis_count = 1; + for (int i = 0; i < axis; ++i) { + *pre_axis_count = (*pre_axis_count) * shape[i]; + } + + *axis_count = shape[axis]; + + *after_axis_count = 1; + for (int i = axis + 1; i < dims_number; ++i) { + *after_axis_count = (*after_axis_count) * shape[i]; + } +} + +void ArgMinMaxQuant(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count, + int after_axis_count, QuantArg *in_quant_arg, QuantArg *out_quant_arg) { + bool out_value = param->out_value_; + float output_inverse_scale = 1.f / out_quant_arg->scale_; + float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; + int32_t output_zp = out_quant_arg->zp_; + for (int i = 0; i < pre_axis_count; ++i) { + size_t output_offset = i * after_axis_count; + size_t input_offset = output_offset * axis_count; + for (int j = 0; j < after_axis_count; ++j) { + float value = -FLT_MAX; + if (!param->get_max_) { + value = FLT_MAX; + } + float index = 0.0f; + for (int k = 0; k < axis_count; ++k) { + float value_tmp = input[input_offset + k * after_axis_count + j] * in_quant_arg->scale_ + bias; + if (param->get_max_) { + if (value_tmp > value) { + value = value_tmp; + index = k; + } + } else { + if (value_tmp < value) { + value = value_tmp; + index = k; + } + } + } + float real_out = out_value ? value : index; + output[output_offset + j] = real_out * output_inverse_scale + output_zp; + } + } +} + +void ArgMinMaxQuant(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, + QuantArg *in_quant_arg, QuantArg *out_quant_arg) { + int pre_axis_count = 1; + int axis_count = 1; + int after_axis_count = 1; + CalcParameter(in_shape, param->dims_size_, param->axis_, &pre_axis_count, &axis_count, &after_axis_count); + ArgMinMaxQuant(input, output, param, pre_axis_count, axis_count, after_axis_count, in_quant_arg, out_quant_arg); + return; +} + +int ArgCompareAscInt8(const void *a, const void *b) { + return reinterpret_cast(a)->data_.f_data_ + - reinterpret_cast(b)->data_.f_data_; +} + +int ArgCompareDescInt8(const void *a, const void *b) { + return reinterpret_cast(b)->data_.f_data_ + - reinterpret_cast(a)->data_.f_data_; +} + +int8_t GetInt8Output(float real_out, float output_inverse_scale, int32_t output_zp) { + return real_out * output_inverse_scale + output_zp; +} + +void ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, + QuantArg *in_quant_arg, QuantArg *out_quant_arg) { + bool out_value = param->out_value_; + float output_inverse_scale = 1.f / out_quant_arg->scale_; + float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; + int32_t output_zp = out_quant_arg->zp_; + for (int32_t i = 0; i < param->in_strides_[0]; ++i) { + for (int j = 0; j < in_shape[0]; ++j) { + size_t offset = param->in_strides_[0] * j + i; + param->arg_elements_[j].index_ = j; + param->arg_elements_[j].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias; + } + if (param->get_max_) { + qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareDescInt8); + } else { + qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareAscInt8); + } + + for (int j = 0; j < param->topk_; ++j) { + size_t out_offset = j * param->out_strides_[0] + i; + float real_out = out_value ? param->arg_elements_[j].data_.f_data_ : param->arg_elements_[j].index_; + output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp); + } + } +} + +void ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, + QuantArg *in_quant_arg, QuantArg *out_quant_arg) { + bool out_value = param->out_value_; + float output_inverse_scale = 1.f / out_quant_arg->scale_; + float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; + int32_t output_zp = out_quant_arg->zp_; + int in_shape1 = in_shape[1]; + for (int i = 0; i < in_shape[0]; ++i) { + size_t in_dim0_offset = i * param->in_strides_[0]; + size_t out_dim0_offset = i * param->out_strides_[0]; + for (int j = 0; j < param->in_strides_[1]; ++j) { + for (int k = 0; k < in_shape1; ++k) { + size_t offset = param->in_strides_[1] * k + in_dim0_offset + j; + param->arg_elements_[k].index_ = k; + param->arg_elements_[k].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias; + } + if (param->get_max_) { + qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareDescInt8); + } else { + qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareAscInt8); + } + + for (int k = 0; k < param->topk_; ++k) { + size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1]; + float real_out = out_value ? param->arg_elements_[k].data_.f_data_ : param->arg_elements_[k].index_; + output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp); + } + } + } +} + +void ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, + QuantArg *in_quant_arg, QuantArg *out_quant_arg) { + bool out_value = param->out_value_; + float output_inverse_scale = 1.f / out_quant_arg->scale_; + float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; + int32_t output_zp = out_quant_arg->zp_; + int in_shape1 = in_shape[1]; + int in_shape2 = in_shape[2]; + for (int i = 0; i < in_shape[0]; ++i) { + size_t in_dim0_offset = i * param->in_strides_[0]; + size_t out_dim0_offset = i * param->out_strides_[0]; + for (int j = 0; j < in_shape1; ++j) { + size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; + size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; + for (int k = 0; k < param->in_strides_[2]; ++k) { + for (int l = 0; l < in_shape2; ++l) { + size_t offset = param->in_strides_[2] * l + k + in_dim1_offset; + param->arg_elements_[l].index_ = l; + param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias; + } + if (param->get_max_) { + qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareDescInt8); + } else { + qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareAscInt8); + } + for (int l = 0; l < param->topk_; ++l) { + size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2]; + float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_; + output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp); + } + } + } + } +} + +void ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, + QuantArg *in_quant_arg, QuantArg *out_quant_arg) { + bool out_value = param->out_value_; + float output_inverse_scale = 1.f / out_quant_arg->scale_; + float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; + int32_t output_zp = out_quant_arg->zp_; + int in_shape1 = in_shape[1]; + int in_shape2 = in_shape[2]; + int in_shape3 = in_shape[3]; + for (int i = 0; i < in_shape[0]; ++i) { + size_t in_dim0_offset = i * param->in_strides_[0]; + size_t out_dim0_offset = i * param->out_strides_[0]; + for (int j = 0; j < in_shape1; ++j) { + size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; + size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; + for (int k = 0; k < in_shape2; ++k) { + size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset; + size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset; + for (int l = 0; l < in_shape3; ++l) { + size_t offset = l + in_dim2_offset; + param->arg_elements_[l].index_ = l; + param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias; + } + if (param->get_max_) { + qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareDescInt8); + } else { + qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareAscInt8); + } + for (int l = 0; l < param->topk_; ++l) { + size_t out_offset = out_dim2_offset + l; + float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_; + output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp); + } + } + } + } +} diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max_int8.h b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max_int8.h new file mode 100644 index 0000000000000000000000000000000000000000..d3528a47e3c85bd70c212e4a2faf35d461ebe5b0 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max_int8.h @@ -0,0 +1,32 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_INT8_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_INT8_H_ + +#include "src/runtime/kernel/arm/opclib/arg_min_max_parameter.h" +#include "src/runtime/kernel/arm/opclib/quantization/quantize.h" + +void ArgMinMaxQuant(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, + QuantArg *in_quant, QuantArg *out_quant); +void ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, + QuantArg *in_quant, QuantArg *out_quant); +void ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, + QuantArg *in_quant, QuantArg *out_quant); +void ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, + QuantArg *in_quant, QuantArg *out_quant); +void ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, + QuantArg *in_quant, QuantArg *out_quant); +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_INT8_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arithmetic_int8.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arithmetic_int8.cc new file mode 100644 index 0000000000000000000000000000000000000000..5ec1dba10ee02ff98277c1d27b9b29fd4e65ea05 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arithmetic_int8.cc @@ -0,0 +1,63 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/opclib/int8/arithmetic_int8.h" +#ifdef ENABLE_NEON +#include +#endif +#include "src/runtime/kernel/arm/opclib/errorcode.h" + +int ElementNotEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) { + for (int index = 0; index < element_size; ++index) { + output[index] = (int8_t)(input0[index] != input1[index]); + } + return OPCLIB_OK; +} + +int ElementEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) { + for (int index = 0; index < element_size; ++index) { + output[index] = (int8_t)(input0[index] == input1[index]); + } + return OPCLIB_OK; +} + +int ElementLess(int8_t *input0, int8_t *input1, int8_t *output, int element_size) { + for (int index = 0; index < element_size; ++index) { + output[index] = (int8_t)(input0[index] < input1[index]); + } + return OPCLIB_OK; +} + +int ElementLessEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) { + for (int index = 0; index < element_size; ++index) { + output[index] = (int8_t)(input0[index] <= input1[index]); + } + return OPCLIB_OK; +} + +int ElementGreater(int8_t *input0, int8_t *input1, int8_t *output, int element_size) { + for (int index = 0; index < element_size; ++index) { + output[index] = (int8_t)(input0[index] > input1[index]); + } + return OPCLIB_OK; +} + +int ElementGreaterEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) { + for (int index = 0; index < element_size; ++index) { + output[index] = (int8_t)(input0[index] >= input1[index]); + } + return OPCLIB_OK; +} diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arithmetic_int8.h b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arithmetic_int8.h new file mode 100644 index 0000000000000000000000000000000000000000..fdb72b133d2c567e9a57d7ff9222562025d3630c --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arithmetic_int8.h @@ -0,0 +1,32 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARITHMETIC_INT8_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARITHMETIC_INT8_H_ + +#include "src/runtime/kernel/arm/opclib/op_base.h" + +int ElementNotEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size); + +int ElementEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size); + +int ElementLess(int8_t *input0, int8_t *input1, int8_t *output, int element_size); + +int ElementLessEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size); + +int ElementGreater(int8_t *input0, int8_t *input1, int8_t *output, int element_size); + +int ElementGreaterEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size); +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARITHMETIC_INT8_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/batch_to_space_int8.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/batch_to_space_int8.cc new file mode 100644 index 0000000000000000000000000000000000000000..3279ad82a3737df7e95c9d9a0a3c8befd0255b82 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/batch_to_space_int8.cc @@ -0,0 +1,111 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/opclib/int8/batch_to_space_int8.h" +#include "src/runtime/kernel/arm/opclib/arithmetic_common.h" + +void BatchToSpaceNoCropForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block, + QuantArg *in_quant_arg, QuantArg *out_quant_arg) { + int block_h = block[0]; + int block_w = block[1]; + int in_h = in_shape[1]; + int in_w = in_shape[2]; + int in_c = in_shape[3]; + size_t stride_h = block_w * out_n; + size_t output_offset = 0; + size_t in_stride_h = in_w * in_c; + size_t in_stride_n = in_stride_h * in_h; + float output_inverse_scale = 1.f / out_quant_arg->scale_; + float scale = in_quant_arg->scale_ * output_inverse_scale; + float bias = -in_quant_arg->zp_ * scale; + int32_t output_zp = out_quant_arg->zp_; + + for (int n = 0; n < out_n; ++n) { + for (int h = 0; h < in_h; ++h) { + size_t h_offset = h * in_stride_h; + for (int bh = 0; bh < block_h; ++bh) { + for (int w = 0; w < in_w; ++w) { + size_t w_offset = w * in_c; + for (int bw = 0; bw < block_w; ++bw) { + size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset; + for (int c = 0; c < in_c; ++c) { + int32_t output_tmp = round(input[in_offset + c] * scale + bias) + output_zp; + output_tmp = output_tmp > 127 ? 127 : output_tmp; + output_tmp = output_tmp < -128 ? -128 : output_tmp; + output[output_offset++] = output_tmp; + } + } + } + } + } + } +} + +void BatchToSpaceForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block, + const int *crops, QuantArg *in_quant_arg, QuantArg *out_quant_arg) { + int block_h = block[0]; + int block_w = block[1]; + int in_n = in_shape[0]; + int in_h = in_shape[1]; + int in_w = in_shape[2]; + int in_c = in_shape[3]; + int h_start = crops[0] / block_h; + int h_valid_begin = crops[0]; + int h_end = MSMIN((in_h * block_h - crops[1]) / block_h + 1, in_h); + int h_valid_end = in_h * block_h - crops[1] - 1; + int w_start = crops[2] / block_w; + int w_valid_begin = crops[2]; + int w_end = MSMIN((in_w * block_w - crops[3]) / block_w + 1, in_w); + int w_valid_end = in_w * block_w - crops[3] - 1; + + size_t stride_h = block_w * out_n; + size_t output_offset = 0; + size_t in_stride_h = in_w * in_c; + size_t in_stride_n = in_stride_h * in_h; + + float output_inverse_scale = 1.f / out_quant_arg->scale_; + float scale = in_quant_arg->scale_ * output_inverse_scale; + float bias = -in_quant_arg->zp_ * scale; + int32_t output_zp = out_quant_arg->zp_; + + for (int n = 0; n < out_n; ++n) { + for (int h = h_start; h < h_end; ++h) { + size_t h_offset = h * in_stride_h; + for (int bh = 0; bh < block_h; ++bh) { + size_t h_index = h * block_h + bh; + if (h_index < h_valid_begin || h_index > h_valid_end) { + continue; + } + for (int w = w_start; w < w_end; ++w) { + size_t w_offset = w * in_c; + for (int bw = 0; bw < block_w; ++bw) { + size_t w_index = w * block_w + bw; + if (w_index < w_valid_begin || w_index > w_valid_end) { + continue; + } + size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset; + for (int c = 0; c < in_c; ++c) { + int32_t output_tmp = round(input[in_offset + c] * scale + bias) + output_zp; + output_tmp = output_tmp > 127 ? 127 : output_tmp; + output_tmp = output_tmp < -128 ? -128 : output_tmp; + output[output_offset++] = output_tmp; + } + } + } + } + } + } +} diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/batch_to_space_int8.h b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/batch_to_space_int8.h new file mode 100644 index 0000000000000000000000000000000000000000..efbac8adb84570221c36017dd65f9c620c7451f6 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/batch_to_space_int8.h @@ -0,0 +1,25 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_BATCH_TO_SPACE_INT8_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_BATCH_TO_SPACE_INT8_H_ +#include "src/runtime/kernel/arm/opclib/op_base.h" +#include "src/runtime/kernel/arm/opclib/quantization/quantize.h" + +void BatchToSpaceNoCropForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block, + QuantArg *in_quant_arg, QuantArg *out_quant_arg); +void BatchToSpaceForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block, + const int *crops, QuantArg *in_quant_arg, QuantArg *out_quant_arg); +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_BATCH_TO_SPACE_INT8_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/depth_to_space_int8.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/depth_to_space_int8.cc new file mode 100644 index 0000000000000000000000000000000000000000..7d9dc74e5d89b120d6d71dcdbdd075e85f289839 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/depth_to_space_int8.cc @@ -0,0 +1,51 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/runtime/kernel/arm/opclib/int8/depth_to_space_int8.h" +#include + +void DepthToSpaceForNHWC(const int8_t *input, int8_t *output, int *in_shape, DepthToSpaceParameter *param, + QuantArg *in_quant_arg, QuantArg *out_quant_arg) { + int32_t block_size = param->block_size_; + int32_t in_shape_dim2 = in_shape[2]; + int32_t in_shape_dim1 = in_shape[1]; + size_t copy_size = block_size * param->out_stride_dim2_; + float output_inverse_scale = 1.f / out_quant_arg->scale_; + float scale = in_quant_arg->scale_ * output_inverse_scale; + float bias = -in_quant_arg->zp_ * scale; + int32_t output_zp = out_quant_arg->zp_; + for (int i = 0; i < in_shape[0]; ++i) { + size_t in_offset_n = i * param->in_stride_dim0_; + size_t out_offset_n = i * param->out_stride_dim0_; + for (int j = 0; j < in_shape_dim1; ++j) { + size_t in_offset_h = in_offset_n + j * param->in_stride_dim1_; + size_t out_offset_h = out_offset_n + j * block_size * param->out_stride_dim1_; + for (int k = 0; k < in_shape_dim2; ++k) { + size_t in_offset_w = in_offset_h + k * param->in_stride_dim2_; + size_t out_offset_w = out_offset_h + k * block_size * param->out_stride_dim2_; + for (int l = 0; l < block_size; ++l) { + size_t out_offset = out_offset_w + l * param->out_stride_dim1_; + size_t in_offset = in_offset_w + l * block_size * param->out_stride_dim2_; + for (int m = 0; m < copy_size; ++m) { + int32_t output_tmp = round(input[in_offset + m] * scale + bias) + output_zp; + output_tmp = output_tmp > 127 ? 127 : output_tmp; + output_tmp = output_tmp < -128 ? -128 : output_tmp; + output[out_offset + m] = output_tmp; + } + } + } + } + } +} diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/depth_to_space_int8.h b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/depth_to_space_int8.h new file mode 100644 index 0000000000000000000000000000000000000000..9b60653eb7761783e467acca21a8e4f220f83ba9 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/depth_to_space_int8.h @@ -0,0 +1,24 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_DEPTH_TO_SPACE_INT8_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_DEPTH_TO_SPACE_INT8_H_ + +#include "src/runtime/kernel/arm/opclib/depth_to_space_parameter.h" +#include "src/runtime/kernel/arm/opclib/quantization/quantize.h" + +void DepthToSpaceForNHWC(const int8_t *input, int8_t *output, int *in_shape, DepthToSpaceParameter *param, + QuantArg *in_quant_arg, QuantArg *out_quant_arg); +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_DEPTH_TO_SPACE_INT8_H_ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/argminmax_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/argminmax_fp32_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..4eea25e0b05152a79efa1e6e1971bb57e468f150 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/argminmax_fp32_test.cc @@ -0,0 +1,328 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "mindspore/core/utils/log_adapter.h" +#include "common/common_test.h" +#include "mindspore/lite/src/runtime/kernel/arm/opclib/fp32/arg_min_max.h" +#include "mindspore/lite/src/runtime/kernel/arm/opclib/arg_min_max.h" +#include "mindspore/lite/src/runtime/kernel/arm/opclib/arithmetic_common.h" + +namespace mindspore { + +class TestArgMinMaxTestFp32 : public mindspore::Common { + public: + TestArgMinMaxTestFp32() = default; +}; + +TEST_F(TestArgMinMaxTestFp32, ArgMaxTest1) { + std::vector in = {10, 20, 30, 40, 90, + 20, 11, 15, 1, 50, + 30, 45, 25, 50, 30}; + std::vector except_out = {2, 2, 0, 2, 0}; + std::vector shape = {3, 5}; + float out[5]; + ArgMinMaxParameter param; + param.topk_ = 1; + param.out_value_ = false; + param.axis_ = 0; + param.data_type_ = 43; + param.dims_size_ = 2; + param.get_max_ = true; + ArgMinMax(in.data(), out, shape.data(), ¶m); + for (size_t i = 0; i < except_out.size(); ++i) { + std::cout << out[i] << " "; + } + std::cout << "\n"; + CompareOutputData(out, except_out.data(), except_out.size(), 0.000001); +} + +TEST_F(TestArgMinMaxTestFp32, ArgMaxTest2) { + std::vector in = {10, 20, 30, 40, 90, + 20, 11, 15, 1, 50, + 30, 45, 25, 50, 30}; + std::vector except_out = {30, 45, 30, 50, 90}; + std::vector shape = {3, 5}; + float out[5]; + ArgMinMaxParameter param; + param.topk_ = 1; + param.out_value_ = true; + param.axis_ = 0; + param.data_type_ = 43; + param.dims_size_ = 2; + param.get_max_ = true; + ArgMinMax(in.data(), out, shape.data(), ¶m); + CompareOutputData(out, except_out.data(), except_out.size(), 0.000001); +} + +TEST_F(TestArgMinMaxTestFp32, ArgMinTest2) { + std::vector in = {10, 20, 30, 40, 90, + 20, 11, 15, 1, 50, + 30, 45, 25, 50, 30}; + std::vector except_out = {10, 11, 15, 1, 30}; + std::vector shape = {3, 5}; + float out[5]; + ArgMinMaxParameter param; + param.topk_ = 1; + param.out_value_ = true; + param.axis_ = 0; + param.data_type_ = 43; + param.dims_size_ = 2; + param.get_max_ = false; + ArgMinMax(in.data(), out, shape.data(), ¶m); + CompareOutputData(out, except_out.data(), except_out.size(), 0.000001); +} + +TEST_F(TestArgMinMaxTestFp32, ArgMaxTest3_axis2_out_data) { + std::vector in = {10, 20, 30, 40, 90, + 20, 11, 15, 1, 50, + 30, 45, 25, 50, 30}; + std::vector except_out = {30, 45, 30, 50, 90, 20, 20, 25, 40, 50}; + ArgMinMaxParameter param; + param.axis_ = 2; + std::vector in_shape = {1, 1, 3, 5}; + param.arg_elements_ = reinterpret_cast(malloc(in_shape[param.axis_] * sizeof(ArgElement))); + param.out_value_ = true; + param.topk_ = 2; + std::vector out_shape = {1, 1, 2, 5}; + ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size()); + ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size()); + float out[10]; + ArgMaxDim2(in.data(), out, in_shape.data(), ¶m); + CompareOutputData(out, except_out.data(), except_out.size(), 0.00001); +} + +TEST_F(TestArgMinMaxTestFp32, ArgMaxTest3_axis2_out_index) { + std::vector in = {10, 20, 30, 40, 90, + 20, 11, 15, 1, 50, + 30, 45, 25, 50, 30}; + std::vector except_out = {2, 2, 0, 2, 0, 1, 0, 2, 0, 1}; + ArgMinMaxParameter param; + param.axis_ = 2; + std::vector in_shape = {1, 1, 3, 5}; + param.arg_elements_ = reinterpret_cast(malloc(in_shape[param.axis_] * sizeof(ArgElement))); + param.out_value_ = false; + param.topk_ = 2; + std::vector out_shape = {1, 1, 2, 5}; + ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size()); + ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size()); + float out[10]; + ArgMaxDim2(in.data(), out, in_shape.data(), ¶m); + CompareOutputData(out, except_out.data(), except_out.size(), 0.00001); +} + +TEST_F(TestArgMinMaxTestFp32, ArgMaxTest4_axis3_out_data) { + std::vector in = {10, 20, 30, 40, 90, + 20, 11, 15, 1, 50, + 30, 45, 25, 50, 30}; + std::vector except_out = {90, 40, + 50, 20, + 50, 45}; + ArgMinMaxParameter param; + param.axis_ = 3; + std::vector in_shape = {1, 1, 3, 5}; + param.arg_elements_ = reinterpret_cast(malloc(in_shape[param.axis_] * sizeof(ArgElement))); + param.out_value_ = true; + param.topk_ = 2; + std::vector out_shape = {1, 1, 3, 2}; + ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size()); + ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size()); + float out[6]; + ArgMaxDim3(in.data(), out, in_shape.data(), ¶m); + CompareOutputData(out, except_out.data(), except_out.size(), 0.00001); +} + +TEST_F(TestArgMinMaxTestFp32, ArgMaxTest4_axis3_out_index) { + std::vector in = {10, 20, 30, 40, 90, + 20, 11, 15, 1, 50, + 30, 45, 25, 50, 30}; + std::vector except_out = {4, 3, + 4, 0, + 3, 1}; + ArgMinMaxParameter param; + param.axis_ = 3; + std::vector in_shape = {1, 1, 3, 5}; + param.arg_elements_ = reinterpret_cast(malloc(in_shape[param.axis_] * sizeof(ArgElement))); + param.out_value_ = false; + param.topk_ = 2; + std::vector out_shape = {1, 1, 3, 2}; + ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size()); + ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size()); + float out[6]; + ArgMaxDim3(in.data(), out, in_shape.data(), ¶m); + CompareOutputData(out, except_out.data(), except_out.size(), 0.00001); +} + +TEST_F(TestArgMinMaxTestFp32, ArgMaxTest5_axis1_out_index) { + std::vector in = {100, 2, 300, + 4, 50, 6, + 11, 12, 13, + 34, 35, 36, + 9, 6, 17, + 10, 20, 30, + 10, 20, 30, + 40, 5, 60, + 7, 80, 90, + 10, 11, 120, + 18, 5, 16, + 9, 22, 23}; + std::vector except_out = {0, 1, 0, + 1, 0, 1, + 1, 2, 2, + 2, 1, 2, + 2, 1, 1, + 0, 2, 1, + 0, 0, 0, + 1, 1, 0}; + ArgMinMaxParameter param; + param.axis_ = 1; + std::vector in_shape = {2, 3, 2, 3}; + param.arg_elements_ = reinterpret_cast(malloc(in_shape[param.axis_] * sizeof(ArgElement))); + param.out_value_ = false; + param.topk_ = 2; + std::vector out_shape = {2, 2, 2, 3}; + ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size()); + ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size()); + float out[24]; + ArgMaxDim1(in.data(), out, in_shape.data(), ¶m); + CompareOutputData(out, except_out.data(), except_out.size(), 0.00001); +} + +TEST_F(TestArgMinMaxTestFp32, ArgMaxTest5_axis1_out_data) { + std::vector in = {100, 2, 300, + 4, 50, 6, + 11, 12, 13, + 34, 35, 36, + 9, 6, 17, + 10, 20, 30, + 10, 20, 30, + 40, 5, 60, + 7, 80, 90, + 10, 11, 120, + 18, 5, 16, + 9, 22, 23}; + std::vector except_out = {100, 12, 300, + 34, 50, 36, + 11, 6, 17, + 10, 35, 30, + 18, 80, 90, + 40, 22, 120, + 10, 20, 30, + 10, 11, 60}; + ArgMinMaxParameter param; + param.axis_ = 1; + std::vector in_shape = {2, 3, 2, 3}; + param.arg_elements_ = reinterpret_cast(malloc(in_shape[param.axis_] * sizeof(ArgElement))); + param.out_value_ = true; + param.topk_ = 2; + std::vector out_shape = {2, 2, 2, 3}; + ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size()); + ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size()); + float out[24]; + ArgMaxDim1(in.data(), out, in_shape.data(), ¶m); + CompareOutputData(out, except_out.data(), except_out.size(), 0.00001); +} + +TEST_F(TestArgMinMaxTestFp32, ArgMaxTest6_axis0_out_index) { + std::vector in = {100, 2, + 4, 50, + 11, 12, + 34, 35, + 10, 20, + 40, 5, + 7, 80, + 10, 11, + 55, 25, + 5, 15, + 18, 8, + 15, 16}; + std::vector except_out = {0, 2, + 1, 0, + 2, 1, + 0, 0, + 2, 1, + 2, 2, + 0, 0, + 2, 2}; + ArgMinMaxParameter param; + param.axis_ = 1; + std::vector in_shape = {3, 2, 2, 2}; + param.arg_elements_ = reinterpret_cast(malloc(in_shape[param.axis_] * sizeof(ArgElement))); + param.out_value_ = false; + param.topk_ = 2; + std::vector out_shape = {2, 2, 2, 2}; + ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size()); + ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size()); + float out[16]; + ArgMaxDim0(in.data(), out, in_shape.data(), ¶m); + CompareOutputData(out, except_out.data(), except_out.size(), 0.00001); +} + +TEST_F(TestArgMinMaxTestFp32, ArgMaxTest6_axis0_out_data) { + std::vector in = {100, 2, + 4, 50, + 11, 12, + 34, 35, + 10, 20, + 40, 5, + 7, 80, + 10, 11, + 55, 25, + 5, 15, + 18, 8, + 15, 16}; + std::vector except_out = {100, 25, + 40, 50, + 18, 80, + 34, 35, + 55, 20, + 5, 15, + 11, 12, + 15, 16}; + ArgMinMaxParameter param; + param.axis_ = 1; + std::vector in_shape = {3, 2, 2, 2}; + param.arg_elements_ = reinterpret_cast(malloc(in_shape[param.axis_] * sizeof(ArgElement))); + param.out_value_ = true; + param.topk_ = 2; + std::vector out_shape = {2, 2, 2, 2}; + ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size()); + ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size()); + float out[16]; + ArgMaxDim0(in.data(), out, in_shape.data(), ¶m); + CompareOutputData(out, except_out.data(), except_out.size(), 0.00001); +} + +TEST_F(TestArgMinMaxTestFp32, ArgMinTest1_axis3_out_data) { + std::vector in = {10, 20, 30, 40, 90, + 20, 11, 15, 1, 50, + 30, 45, 25, 50, 30}; + std::vector except_out = {10, 20, + 1, 11, + 25, 30}; + ArgMinMaxParameter param; + param.axis_ = 3; + std::vector in_shape = {1, 1, 3, 5}; + param.arg_elements_ = reinterpret_cast(malloc(in_shape[param.axis_] * sizeof(ArgElement))); + param.out_value_ = true; + param.topk_ = 2; + std::vector out_shape = {1, 1, 3, 2}; + ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size()); + ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size()); + float out[6]; + ArgMinDim3(in.data(), out, in_shape.data(), ¶m); + CompareOutputData(out, except_out.data(), except_out.size(), 0.00001); +} + +} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batch_to_space_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batch_to_space_fp32_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..44cc3deab02d0bc21917df23d6b83873c7bfabb2 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batch_to_space_fp32_test.cc @@ -0,0 +1,197 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "mindspore/core/utils/log_adapter.h" +#include "common/common_test.h" +#include "mindspore/lite/src/runtime/kernel/arm/opclib/batch_to_space.h" +#include "mindspore/lite/src/runtime/kernel/arm/opclib/arithmetic_common.h" + +namespace mindspore { + +class BatchToSpaceTestFp32 : public mindspore::Common { + public: + BatchToSpaceTestFp32() = default; +}; + + +TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest1) { + float input[12] = {10, 30, 90, 2, 20, 120, 5, 50, 150, 6, 16, 160}; + constexpr int kOutSize = 12; + float expect_out[kOutSize] = {10, 30, 90, 2, 20, 120, 5, 50, 150, 6, 16, 160}; + + float output[kOutSize]; + int in_shape[4] = {4, 1, 1, 3}; + int out_n = 1; + int block[2] = {2, 2}; + BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float)); + for (int i = 0; i < kOutSize; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_1) { + float input[12] = {10, 30, 90, 2, 20, 120, 5, 50, 150, 6, 16, 160}; + constexpr int kOutSize = 3; + float expect_out[kOutSize] = {5, 50, 150}; + + float output[kOutSize]; + int in_shape[4] = {4, 1, 1, 3}; + int out_n = 1; + int block[2] = {2, 2}; + int crops[4] = {1, 0, 0, 1}; + BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float)); + for (int i = 0; i < kOutSize; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest2) { + float input[32] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120, + 5, 50, 7, 70, 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160}; + constexpr int kOutSize = 32; + float expect_out[kOutSize] = {1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 6, 60, 7, 70, 8, 80, + 9, 90, 10, 100, 11, 110, 12, 120, 13, 130, 14, 140, 15, 150, 16, 160}; + + float output[kOutSize]; + int in_shape[4] = {4, 2, 2, 2}; + int out_n = 1; + int block[2] = {2, 2}; + BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float)); + for (int i = 0; i < kOutSize; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_2) { + float input[32] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120, + 5, 50, 7, 70, 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160}; + constexpr int kOutSize = 12; + float expect_out[kOutSize] = {6, 60, 7, 70, 8, 80, + 10, 100, 11, 110, 12, 120}; + + float output[kOutSize]; + int in_shape[4] = {4, 2, 2, 2}; + int out_n = 1; + int block[2] = {2, 2}; + int crops[4] = {1, 1, 1, 0}; + BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float)); + for (int i = 0; i < kOutSize; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest3) { + float input[64] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120, + 5, 50, 7, 70, 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160, + 21, 10, 23, 30, 29, 90, 211, 110, 22, 20, 24, 40, 210, 100, 212, 120, + 25, 50, 27, 70, 213, 130, 215, 150, 26, 60, 28, 80, 214, 140, 216, 160}; + constexpr int kOutSize = 64; + float expect_out[kOutSize] = {1, 10, 5, 50, 3, 30, 7, 70, 21, 10, 25, 50, 23, 30, 27, 70, + 9, 90, 13, 130, 11, 110, 15, 150, 29, 90, 213, 130, 211, 110, 215, 150, + 2, 20, 6, 60, 4, 40, 8, 80, 22, 20, 26, 60, 24, 40, 28, 80, + 10, 100, 14, 140, 12, 120, 16, 160, 210, 100, 214, 140, 212, 120, 216, 160}; + + float output[kOutSize]; + int in_shape[4] = {8, 2, 2, 2}; + int out_n = 2; + int block[2] = {2, 2}; + BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float)); + for (int i = 0; i < kOutSize && i < 32; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_3) { + float input[64] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120, + 5, 50, 7, 70, 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160, + 21, 10, 23, 30, 29, 90, 211, 110, 22, 20, 24, 40, 210, 100, 212, 120, + 25, 50, 27, 70, 213, 130, 215, 150, 26, 60, 28, 80, 214, 140, 216, 160}; + constexpr int kOutSize = 16; + float expect_out[kOutSize] = {9, 90, 13, 130, 29, 90, 213, 130, + 10, 100, 14, 140, 210, 100, 214, 140}; + + float output[kOutSize]; + int in_shape[4] = {8, 2, 2, 2}; + int out_n = 2; + int block[2] = {2, 2}; + int crops[4] = {2, 0, 0, 2}; + BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float)); + for (int i = 0; i < kOutSize && i < 32; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest4) { + float input[96] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120, 5, 50, 7, 70, + 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160, 21, 10, 23, 30, 29, 90, 211, 110, + 22, 20, 24, 40, 210, 100, 212, 120, 25, 50, 27, 70, 213, 130, 215, 150, 26, 60, 28, 80, + 214, 140, 216, 160, 31, 10, 33, 30, 39, 90, 311, 110, 32, 20, 34, 40, 310, 100, 312, 120, + 35, 50, 37, 70, 313, 130, 315, 150, 36, 60, 38, 80, 314, 140, 316, 160}; + constexpr int kOutSize = 96; + float expect_out[kOutSize] = { + 1, 10, 5, 50, 3, 30, 7, 70, 21, 10, 25, 50, 23, 30, 27, 70, 31, 10, 35, 50, 33, 30, 37, 70, + 9, 90, 13, 130, 11, 110, 15, 150, 29, 90, 213, 130, 211, 110, 215, 150, 39, 90, 313, 130, 311, 110, 315, 150, + 2, 20, 6, 60, 4, 40, 8, 80, 22, 20, 26, 60, 24, 40, 28, 80, 32, 20, 36, 60, 34, 40, 38, 80, + 10, 100, 14, 140, 12, 120, 16, 160, 210, 100, 214, 140, 212, 120, 216, 160, 310, 100, 314, 140, 312, 120, 316, 160}; + + float output[kOutSize]; + int in_shape[4] = {12, 2, 2, 2}; + int out_n = 2; + int block[2] = {3, 2}; + BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float)); + for (int i = 0; i < kOutSize && i < 32; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_4) { + float input[96] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120, 5, 50, 7, 70, + 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160, 21, 10, 23, 30, 29, 90, 211, 110, + 22, 20, 24, 40, 210, 100, 212, 120, 25, 50, 27, 70, 213, 130, 215, 150, 26, 60, 28, 80, + 214, 140, 216, 160, 31, 10, 33, 30, 39, 90, 311, 110, 32, 20, 34, 40, 310, 100, 312, 120, + 35, 50, 37, 70, 313, 130, 315, 150, 36, 60, 38, 80, 314, 140, 316, 160}; + constexpr int kOutSize = 24; + float expect_out[kOutSize] = { + 25, 50, 23, 30, 35, 50, 33, 30, + 13, 130, 11, 110, 26, 60, 24, 40, 36, 60, 34, 40, 14, 140, 12, 120}; + + float output[kOutSize]; + int in_shape[4] = {12, 2, 2, 2}; + int out_n = 2; + int block[2] = {3, 2}; + int crops[4] = {1, 2, 1, 1}; + BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float)); + for (int i = 0; i < kOutSize && i < 32; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/crop_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/crop_fp32_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..efd68219d136ee6bab4bc25d26fe1946f1fe85de --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/crop_fp32_test.cc @@ -0,0 +1,234 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "common/common_test.h" +#include "mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.h" + +namespace mindspore { +class CropTestFp32 : public mindspore::Common { + public: + CropTestFp32() = default; +}; + +TEST_F(CropTestFp32, CropTest1) { + float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + const int kOutSize = 2; + float expect_out[kOutSize] = {8, 16}; + + float output[kOutSize]; + int in_shape[4] = {2, 2, 2, 2}; + int out_shape[4] = {2, 1, 1, 1}; + CropParameter crop_param; + crop_param.axis_ = 1; + crop_param.offset_[0] = 1; + crop_param.offset_[1] = 1; + crop_param.offset_[2] = 1; + crop_param.op_parameter_.thread_num_ = 1; + crop_param.thread_id_ = 0; + Crop4D(input, output, in_shape, out_shape, &crop_param); + for (int i = 0; i < kOutSize; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +TEST_F(CropTestFp32, CropTest2) { + float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + const int kOutSize = 4; + float expect_out[kOutSize] = {13, 14, 15, 16}; + + float output[kOutSize]; + int in_shape[4] = {2, 2, 2, 2}; + int out_shape[4] = {1, 1, 2, 2}; + CropParameter crop_param; + crop_param.axis_ = 0; + crop_param.offset_[0] = 1; + crop_param.offset_[1] = 1; + crop_param.offset_[2] = 0; + crop_param.offset_[3] = 0; + crop_param.op_parameter_.thread_num_ = 1; + crop_param.thread_id_ = 0; + Crop4D(input, output, in_shape, out_shape, &crop_param); + for (int i = 0; i < kOutSize; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +TEST_F(CropTestFp32, CropTest3) { + float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + const int kOutSize = 8; + float expect_out[kOutSize] = {2, 4, 6, 8, 10, 12, 14, 16}; + + float output[kOutSize]; + int in_shape[4] = {2, 2, 2, 2}; + int out_shape[4] = {2, 2, 2, 1}; + CropParameter crop_param; + crop_param.axis_ = 3; + crop_param.offset_[0] = 1; + crop_param.op_parameter_.thread_num_ = 1; + crop_param.thread_id_ = 0; + Crop4D(input, output, in_shape, out_shape, &crop_param); + for (int i = 0; i < kOutSize; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +TEST_F(CropTestFp32, CropTest4) { + float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + const int kOutSize = 8; + float expect_out[kOutSize] = {2, 4, 6, 8, 10, 12, 14, 16}; + + float output[kOutSize]; + int in_shape[4] = {2, 2, 2, 2}; + int out_shape[4] = {2, 2, 2, 1}; + CropParameter crop_param; + crop_param.axis_ = 3; + crop_param.offset_[0] = 1; + crop_param.op_parameter_.thread_num_ = 2; + crop_param.thread_id_ = 0; + Crop4D(input, output, in_shape, out_shape, &crop_param); + crop_param.thread_id_ = 1; + Crop4D(input, output, in_shape, out_shape, &crop_param); + for (int i = 0; i < kOutSize; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +TEST_F(CropTestFp32, CropTest5) { + float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + const int kOutSize = 2; + float expect_out[kOutSize] = {8, 16}; + + float output[kOutSize]; + int in_shape[4] = {2, 2, 2, 2}; + int out_shape[4] = {2, 1, 1, 1}; + CropParameter crop_param; + crop_param.axis_ = 1; + crop_param.offset_[0] = 1; + crop_param.offset_[1] = 1; + crop_param.offset_[2] = 1; + Crop4DNoParallel(input, output, in_shape, out_shape, &crop_param); + for (int i = 0; i < kOutSize; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +TEST_F(CropTestFp32, CropTest6) { + float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + const int kOutSize = 4; + float expect_out[kOutSize] = {13, 14, 15, 16}; + + float output[kOutSize]; + int in_shape[4] = {2, 2, 2, 2}; + int out_shape[4] = {1, 1, 2, 2}; + CropParameter crop_param; + crop_param.axis_ = 0; + crop_param.offset_[0] = 1; + crop_param.offset_[1] = 1; + crop_param.offset_[2] = 0; + crop_param.offset_[3] = 0; + Crop4DNoParallel(input, output, in_shape, out_shape, &crop_param); + for (int i = 0; i < kOutSize; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +TEST_F(CropTestFp32, CropTest7) { + float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + const int kOutSize = 8; + float expect_out[kOutSize] = {2, 4, 6, 8, 10, 12, 14, 16}; + + float output[kOutSize]; + int in_shape[4] = {2, 2, 2, 2}; + int out_shape[4] = {2, 2, 2, 1}; + CropParameter crop_param; + crop_param.axis_ = 3; + crop_param.offset_[0] = 1; + Crop4DNoParallel(input, output, in_shape, out_shape, &crop_param); + for (int i = 0; i < kOutSize; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +TEST_F(CropTestFp32, CropTest8) { + float input[27] = {1, 2, 3, 4, 5, 6, 7, 8, 9, + 11, 12, 13, 14, 15, 16, 17, 18, 19, + 21, 22, 23, 24, 25, 26, 27, 28, 29}; + const int kOutSize = 4; + float expect_out[kOutSize] = {15, 16, 18, 19}; + + float output[kOutSize]; + int in_shape[4] = {1, 3, 3, 3}; + int out_shape[4] = {1, 1, 2, 2}; + CropParameter crop_param; + crop_param.axis_ = 1; + crop_param.offset_[0] = 1; + crop_param.offset_[1] = 1; + crop_param.offset_[2] = 1; + crop_param.op_parameter_.thread_num_ = 2; + crop_param.thread_id_ = 0; + Crop4D(input, output, in_shape, out_shape, &crop_param); + crop_param.thread_id_ = 1; + Crop4D(input, output, in_shape, out_shape, &crop_param); + for (int i = 0; i < kOutSize; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +TEST_F(CropTestFp32, CropTest9) { + float input[64] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 110, 111, 112, 113, 114, 115, 116, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 210, 211, 212, 213, 214, 215, 216, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 310, 311, 312, 313, 314, 315, 316}; + const int kOutSize = 8; + float expect_out[kOutSize] = {16, 17, 110, 111, 26, 27, 210, 211}; + + float output[kOutSize]; + int in_shape[4] = {1, 4, 4, 4}; + int out_shape[4] = {1, 2, 2, 2}; + CropParameter crop_param; + crop_param.axis_ = 1; + crop_param.offset_[0] = 1; + crop_param.offset_[1] = 1; + crop_param.offset_[2] = 1; + crop_param.op_parameter_.thread_num_ = 2; + crop_param.thread_id_ = 0; + Crop4D(input, output, in_shape, out_shape, &crop_param); + crop_param.thread_id_ = 1; + Crop4D(input, output, in_shape, out_shape, &crop_param); + for (int i = 0; i < kOutSize; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +} // namespace mindspore + diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/depth_to_space_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/depth_to_space_fp32_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..f14949ed8bb1f567ac56c9f7421693d740c2cbb6 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/depth_to_space_fp32_test.cc @@ -0,0 +1,85 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "mindspore/core/utils/log_adapter.h" +#include "common/common_test.h" +#include "mindspore/lite/src/runtime/kernel/arm/opclib/depth_to_space.h" +#include "mindspore/lite/src/runtime/kernel/arm/opclib/arithmetic_common.h" + +namespace mindspore { + +class DepthToSpaceTestFp32 : public mindspore::Common { + public: + DepthToSpaceTestFp32() = default; +}; + +TEST_F(DepthToSpaceTestFp32, DepthToSpaceTest2) { + float input[16] = {1, 2, 10, 20, 5, 6, 3, 8, 18, 10, 11, 55, 3, 4, 15, 25}; + constexpr int kOutSize = 16; + float expect_out[kOutSize] = {1, 2, 5, 6, 10, 20, 3, 8, 18, 10, 3, 4, 11, 55, 15, 25}; + + float output[kOutSize]; + int in_shape[4] = {1, 2, 2, 4}; + int out_shape[4] = {1, 4, 4, 1}; + DepthToSpaceParameter param; + param.block_size_ = 2; + int in_strides[4]; + ComputeStrides(in_shape, in_strides, 4); + int out_strides[4]; + ComputeStrides(out_shape, out_strides, 4); + param.in_stride_dim0_ = in_strides[0]; + param.in_stride_dim1_ = in_strides[1]; + param.in_stride_dim2_ = in_strides[2]; + param.out_stride_dim0_ = out_strides[0]; + param.out_stride_dim1_ = out_strides[1]; + param.out_stride_dim2_ = out_strides[2]; + param.data_type_size_ = sizeof(float); + DepthToSpaceForNHWC((const void *)input, output, in_shape, ¶m); + for (int i = 0; i < kOutSize; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +TEST_F(DepthToSpaceTestFp32, DepthToSpaceTest3) { + float input[8] = {1, 2, 3, 4, 5, 6, 7, 8}; + constexpr int kOutSize = 8; + float expect_out[kOutSize] = {1, 2, 3, 4, 5, 6, 7, 8}; + + float output[kOutSize]; + int in_shape[4] = {1, 1, 1, 8}; + int out_shape[4] = {1, 2, 2, 2}; + DepthToSpaceParameter param; + param.block_size_ = 2; + int in_strides[4]; + ComputeStrides(in_shape, in_strides, 4); + int out_strides[4]; + ComputeStrides(out_shape, out_strides, 4); + param.in_stride_dim0_ = in_strides[0]; + param.in_stride_dim1_ = in_strides[1]; + param.in_stride_dim2_ = in_strides[2]; + param.out_stride_dim0_ = out_strides[0]; + param.out_stride_dim1_ = out_strides[1]; + param.out_stride_dim2_ = out_strides[2]; + param.data_type_size_ = sizeof(float); + DepthToSpaceForNHWC((const void *)input, output, in_shape, ¶m); + for (int i = 0; i < kOutSize; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} +} // namespace mindspore