提交 00fa08ba 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!3936 [lite]compare,batch_to_space,depth_to_space,argmin,argmax support int8

Merge pull request !3936 from chenjianping/lite_dev
......@@ -90,6 +90,7 @@ class ArithmeticCPUKernel : public LiteKernel {
case PrimitiveType_FloorMod:
arithmetic_run_ = ElementFloorMod;
arithmetic_broadcast_run_ = BroadcastFloorMod;
break;
case PrimitiveType_Equal:
arithmetic_run_ = ElementEqual;
arithmetic_broadcast_run_ = BroadcastEqual;
......
......@@ -17,7 +17,7 @@
#include <vector>
#include "schema/model_generated.h"
#include "src/kernel_registry.h"
#include "src/runtime/kernel/arm/opclib/arg_min_max.h"
#include "src/runtime/kernel/arm/opclib/int8/arg_min_max_int8.h"
#include "include/errorcode.h"
using mindspore::lite::RET_OK;
......@@ -31,12 +31,45 @@ int ArgMinMaxInt8CPUKernel::Init() {
}
auto param = reinterpret_cast<ArgMinMaxParameter *>(opParameter);
param->data_type_ = kNumberTypeInt8;
auto *input_tensor = inputs_.at(kInputIndex);
auto in_quant_args = input_tensor->GetQuantParams();
in_quant_arg_.scale_ = in_quant_args.front().scale;
in_quant_arg_.zp_ = in_quant_args.front().zeroPoint;
auto *out_tensor = outputs_.at(kOutputIndex);
auto out_quant_args = out_tensor->GetQuantParams();
out_quant_arg_.scale_ = out_quant_args.front().scale;
out_quant_arg_.zp_ = out_quant_args.front().zeroPoint;
return RET_OK;
}
int ArgMinMaxInt8CPUKernel::Run() {
auto ret = ArgMinMaxBaseCPUKernel::Run();
FreeTmpMemory();
return ret;
auto input = inputs_.at(0);
const int8_t *input_data = reinterpret_cast<const int8_t *>(inputs_.at(0)->Data());
int8_t *output_data = reinterpret_cast<int8_t *>(outputs_.at(0)->Data());
auto in_shape = input->shape().data();
auto param = reinterpret_cast<ArgMinMaxParameter *>(opParameter);
if (param->topk_ == 1) {
ArgMinMaxQuant(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
return RET_OK;
}
switch (param->axis_) {
case 0:
ArgMinMaxDim0(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
break;
case 1:
ArgMinMaxDim1(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
break;
case 2:
ArgMinMaxDim2(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
break;
case 3:
ArgMinMaxDim3(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
break;
}
return RET_OK;
}
} // namespace mindspore::kernel
......@@ -18,6 +18,7 @@
#include <vector>
#include "src/runtime/kernel/arm/base/arg_min_max_base.h"
#include "src/runtime/kernel/arm/opclib/quantization/quantize.h"
namespace mindspore::kernel {
class ArgMinMaxInt8CPUKernel : public ArgMinMaxBaseCPUKernel {
......@@ -31,6 +32,9 @@ class ArgMinMaxInt8CPUKernel : public ArgMinMaxBaseCPUKernel {
int Init() override;
int ReSize() override { return 0; }
int Run() override;
private:
QuantArg in_quant_arg_;
QuantArg out_quant_arg_;
};
} // namespace mindspore::kernel
......
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/int8/arithmetic_int8.h"
#include "src/runtime/kernel/arm/opclib/int8/arithmetic_int8.h"
#include "src/runtime/kernel/arm/opclib/arithmetic_common.h"
#include "schema/model_generated.h"
#include "src/kernel_registry.h"
#include "src/runtime/runtime_api.h"
#include "include/errorcode.h"
using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_PARAM_INVALID;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Equal;
using mindspore::schema::PrimitiveType_NotEqual;
using mindspore::schema::PrimitiveType_LessEqual;
using mindspore::schema::PrimitiveType_Greater;
using mindspore::schema::PrimitiveType_GreaterEqual;
using mindspore::schema::PrimitiveType_Less;
namespace mindspore::kernel {
namespace {
int ArithmeticsInt8Launch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) {
auto arithmetic_kernel = reinterpret_cast<ArithmeticInt8CPUKernel *>(cdata);
auto error_code = arithmetic_kernel->DoArithmetic(thread_id);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "ArithmeticsRun error thread_id[" << thread_id << "] error_code[" << error_code << "]";
return RET_ERROR;
}
return RET_OK;
}
} // namespace
ArithmeticInt8CPUKernel::~ArithmeticInt8CPUKernel() {
auto param = reinterpret_cast<ArithmeticParameter *>(opParameter);
if (!param->broadcasting_) {
return;
}
if (context_->allocator != nullptr) {
if (tile_data0_ != nullptr) {
context_->allocator->Free(tile_data0_);
}
if (tile_data1_ != nullptr) {
context_->allocator->Free(tile_data1_);
}
} else {
if (tile_data0_ != nullptr) {
free(tile_data0_);
}
if (tile_data1_ != nullptr) {
free(tile_data1_);
}
}
tile_data0_ = nullptr;
tile_data1_ = nullptr;
}
int ArithmeticInt8CPUKernel::Init() {
switch (opParameter->type_) {
case PrimitiveType_Equal:
arithmetic_run_ = ElementEqual;
break;
case PrimitiveType_NotEqual:
arithmetic_run_ = ElementNotEqual;
break;
case PrimitiveType_Less:
arithmetic_run_ = ElementEqual;
break;
case PrimitiveType_LessEqual:
arithmetic_run_ = ElementNotEqual;
break;
case PrimitiveType_Greater:
arithmetic_run_ = ElementGreater;
break;
case PrimitiveType_GreaterEqual:
arithmetic_run_ = ElementGreaterEqual;
break;
default:
MS_LOG(ERROR) << "Error Operator type " << opParameter->type_;
arithmetic_run_ = nullptr;
return RET_PARAM_INVALID;
}
auto data_size = outputs_[0]->Size();
auto param = reinterpret_cast<ArithmeticParameter *>(opParameter);
if (param->broadcasting_) {
if (context_->allocator != nullptr) {
tile_data0_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(data_size));
tile_data1_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(data_size));
} else {
tile_data0_ = reinterpret_cast<int8_t *>(malloc(data_size));
tile_data1_ = reinterpret_cast<int8_t *>(malloc(data_size));
}
} else {
tile_data0_ = nullptr;
tile_data1_ = nullptr;
}
return RET_OK;
}
int ArithmeticInt8CPUKernel::ReSize() { return RET_OK; }
int ArithmeticInt8CPUKernel::DoArithmetic(int thread_id) {
auto input0_data = reinterpret_cast<int8_t *>(inputs_[0]->Data());
auto input1_data1 = reinterpret_cast<int8_t *>(inputs_[1]->Data());
auto output_data = reinterpret_cast<int8_t *>(outputs_[0]->Data());
auto element_num = outputs_[0]->ElementsNum();
auto param = reinterpret_cast<ArithmeticParameter *>(opParameter);
if (param->broadcasting_ && arithmetic_run_ != nullptr) {
MS_ASSERT(thread_count_ != 0);
int stride = UP_DIV(element_num, thread_count_);
int count = MSMIN(stride, element_num - stride * thread_id);
int error_code = arithmetic_run_(tile_data0_ + stride * thread_id, tile_data1_ + stride * thread_id,
output_data + stride * thread_id, count);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Arithmetic run fail! ret: " << error_code;
return RET_ERROR;
}
} else if (arithmetic_run_ != nullptr) {
int error_code = arithmetic_run_(input0_data, input1_data1, output_data, element_num);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Arithmetic run fail!ret: " << error_code;
return RET_ERROR;
}
} else {
MS_LOG(ERROR) << "arithmetic_run function is nullptr!";
return RET_ERROR;
}
return RET_OK;
}
int ArithmeticInt8CPUKernel::Run() {
auto param = reinterpret_cast<ArithmeticParameter *>(opParameter);
if (param->broadcasting_) {
auto input_data0 = reinterpret_cast<int8_t *>(inputs_[0]->Data());
auto input_data1 = reinterpret_cast<int8_t *>(inputs_[1]->Data());
TileDimensionsInt8(input_data0, input_data1, tile_data0_, tile_data1_, param);
}
int error_code = LiteBackendParallelLaunch(ArithmeticsInt8Launch, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Arithmetic launch function fail! ret: " << error_code;
return RET_ERROR;
}
return RET_OK;
}
kernel::LiteKernel *CpuArithmeticInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *parameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
if (parameter == nullptr) {
MS_LOG(ERROR) << "Input parameter is null!";
return nullptr;
}
auto kernel = new (std::nothrow) ArithmeticInt8CPUKernel(parameter, inputs, outputs, ctx);
if (kernel == nullptr) {
MS_LOG(ERROR) << "Create ArithmeticInt8CPUKernel failed, name: " << parameter->name_;
return nullptr;
}
auto ret = kernel->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init kernel failed, name: " << parameter->name_ << ", type: "
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(parameter->type_));
delete kernel;
return nullptr;
}
return kernel;
}
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Equal, CpuArithmeticInt8KernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_NotEqual, CpuArithmeticInt8KernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Less, CpuArithmeticInt8KernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_LessEqual, CpuArithmeticInt8KernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Greater, CpuArithmeticInt8KernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_GreaterEqual, CpuArithmeticInt8KernelCreator)
} // namespace mindspore::kernel
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_
#include <vector>
#include "src/lite_kernel.h"
#include "schema/model_generated.h"
namespace mindspore::kernel {
class ArithmeticInt8CPUKernel : public LiteKernel {
typedef int (*ArithmeticRunInt8)(int8_t *input0, int8_t *input1, int8_t *output, int element_size);
public:
ArithmeticInt8CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(parameter, inputs, outputs), thread_count_(ctx->thread_num_), context_(ctx) {}
~ArithmeticInt8CPUKernel();
int Init() override;
int ReSize() override;
int Run() override;
int DoArithmetic(int thread_id);
private:
int thread_count_;
int8_t *tile_data0_;
int8_t *tile_data1_;
const lite::Context *context_;
ArithmeticRunInt8 arithmetic_run_;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_
......@@ -18,13 +18,27 @@
#include "schema/model_generated.h"
#include "src/kernel_registry.h"
#include "src/runtime/kernel/arm/opclib/batch_to_space.h"
#include "src/runtime/kernel/arm/opclib/int8/batch_to_space_int8.h"
#include "include/errorcode.h"
using mindspore::lite::RET_OK;
namespace mindspore::kernel {
int BatchToSpaceInt8CPUKernel::Init() {
return BatchToSpaceBaseCPUKernel::Init();
auto ret = BatchToSpaceBaseCPUKernel::Init();
if (ret != RET_OK) {
return ret;
}
auto *input_tensor = inputs_.at(kInputIndex);
auto in_quant_args = input_tensor->GetQuantParams();
in_quant_arg_.scale_ = in_quant_args.front().scale;
in_quant_arg_.zp_ = in_quant_args.front().zeroPoint;
auto *out_tensor = outputs_.at(kOutputIndex);
auto out_quant_args = out_tensor->GetQuantParams();
out_quant_arg_.scale_ = out_quant_args.front().scale;
out_quant_arg_.zp_ = out_quant_args.front().zeroPoint;
return RET_OK;
}
int BatchToSpaceInt8CPUKernel::Run() {
......@@ -36,12 +50,22 @@ int BatchToSpaceInt8CPUKernel::Run() {
auto out_shape = output->shape();
BatchToSpaceParameter *param = reinterpret_cast<BatchToSpaceParameter *>(this->opParameter);
if (IsNoCrop()) {
BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_,
sizeof(int8_t));
if (in_quant_arg_.scale_ == out_quant_arg_.scale_ && in_quant_arg_.zp_ == out_quant_arg_.zp_) {
if (IsNoCrop()) {
BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_,
sizeof(int8_t));
} else {
BatchToSpaceForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, param->crops_,
sizeof(int8_t));
}
} else {
BatchToSpaceForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, param->crops_,
sizeof(int8_t));
if (IsNoCrop()) {
BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_,
&in_quant_arg_, &out_quant_arg_);
} else {
BatchToSpaceForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, param->crops_,
&in_quant_arg_, &out_quant_arg_);
}
}
return RET_OK;
......
......@@ -31,6 +31,9 @@ class BatchToSpaceInt8CPUKernel : public BatchToSpaceBaseCPUKernel {
int Init() override;
int ReSize() override { return 0; }
int Run() override;
private:
QuantArg in_quant_arg_;
QuantArg out_quant_arg_;
};
} // namespace mindspore::kernel
......
......@@ -18,6 +18,7 @@
#include "schema/model_generated.h"
#include "src/kernel_registry.h"
#include "src/runtime/kernel/arm/opclib/depth_to_space.h"
#include "src/runtime/kernel/arm/opclib/int8/depth_to_space_int8.h"
#include "include/errorcode.h"
using mindspore::lite::RET_OK;
......@@ -31,6 +32,16 @@ int DepthToSpaceInt8CPUKernel::Init() {
}
DepthToSpaceParameter *param = reinterpret_cast<DepthToSpaceParameter *>(opParameter);
param->data_type_size_ = sizeof(int8_t);
auto *input_tensor = inputs_.at(kInputIndex);
auto in_quant_args = input_tensor->GetQuantParams();
in_quant_arg_.scale_ = in_quant_args.front().scale;
in_quant_arg_.zp_ = in_quant_args.front().zeroPoint;
auto *out_tensor = outputs_.at(kOutputIndex);
auto out_quant_args = out_tensor->GetQuantParams();
out_quant_arg_.scale_ = out_quant_args.front().scale;
out_quant_arg_.zp_ = out_quant_args.front().zeroPoint;
return RET_OK;
}
......@@ -41,14 +52,11 @@ int DepthToSpaceInt8CPUKernel::Run() {
int8_t *output_data = reinterpret_cast<int8_t *>(output->Data());
auto in_shape = input->shape();
DepthToSpaceParameter *param = reinterpret_cast<DepthToSpaceParameter *>(opParameter);
if (input->GetFormat() == schema::Format_NHWC) {
if (in_quant_arg_.scale_ == out_quant_arg_.scale_ && in_quant_arg_.zp_ == out_quant_arg_.zp_) {
DepthToSpaceForNHWC(input_data, output_data, in_shape.data(), param);
return RET_OK;
} else {
MS_LOG(ERROR) << "Depth_to_space only support NHWC now!";
return RET_ERROR;
DepthToSpaceForNHWC(input_data, output_data, in_shape.data(), param, &in_quant_arg_, &out_quant_arg_);
}
return RET_OK;
}
} // namespace mindspore::kernel
......@@ -31,6 +31,9 @@ class DepthToSpaceInt8CPUKernel : public DepthToSpaceBaseCPUKernel {
int Init() override;
int ReSize() override { return 0; }
int Run() override;
private:
QuantArg in_quant_arg_;
QuantArg out_quant_arg_;
};
} // namespace mindspore::kernel
......
......@@ -15,10 +15,8 @@
*/
#include "src/runtime/kernel/arm/opclib/arg_min_max.h"
#include "src/runtime/kernel/arm/opclib/fp32/arg_min_max.h"
#include "src/runtime/kernel/arm/opclib/int8/arg_min_max.h"
#define FLOAT_DATA_TYPE 43
#define INT8_DATA_TYPE 32
void GetCalcParameter(const int *shape, int dims_number, int axis, int *pre_axis_count, int *axis_count,
int *after_axis_count) {
......@@ -51,16 +49,6 @@ void ArgMinMaxTopk1(const void *input, void *output, const int *shape, ArgMinMax
}
break;
}
case INT8_DATA_TYPE: {
if (param->get_max_) {
ArgMax(reinterpret_cast<const int8_t *>(input), reinterpret_cast<int8_t *>(output), param, pre_axis_count,
axis_count, after_axis_count);
} else {
ArgMin(reinterpret_cast<const int8_t *>(input), reinterpret_cast<int8_t *>(output), param, pre_axis_count,
axis_count, after_axis_count);
}
break;
}
default:
break;
}
......@@ -100,40 +88,6 @@ void ArgMinMaxTopknFp32(const float *input, float *output, const int *in_shape,
}
}
void ArgMinMaxTopknInt8(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
if (param->get_max_) {
switch (param->axis_) {
case 0:
ArgMaxDim0(input, output, in_shape, param);
break;
case 1:
ArgMaxDim1(input, output, in_shape, param);
break;
case 2:
ArgMaxDim2(input, output, in_shape, param);
break;
case 3:
ArgMaxDim3(input, output, in_shape, param);
break;
}
} else {
switch (param->axis_) {
case 0:
ArgMinDim0(input, output, in_shape, param);
break;
case 1:
ArgMinDim1(input, output, in_shape, param);
break;
case 2:
ArgMinDim2(input, output, in_shape, param);
break;
case 3:
ArgMinDim3(input, output, in_shape, param);
break;
}
}
}
void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxParameter *param) {
if (param->topk_ == 1) {
ArgMinMaxTopk1(input, output, in_shape, param);
......@@ -145,10 +99,6 @@ void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxPa
ArgMinMaxTopknFp32(reinterpret_cast<const float *>(input), reinterpret_cast<float *>(output), in_shape, param);
return;
}
case INT8_DATA_TYPE: {
ArgMinMaxTopknInt8(reinterpret_cast<const int8_t *>(input), reinterpret_cast<int8_t *>(output), in_shape, param);
return;
}
default:
break;
}
......
......@@ -18,6 +18,7 @@
#include "src/runtime/kernel/arm/opclib/op_base.h"
#include "src/runtime/kernel/arm/opclib/arg_min_max_parameter.h"
#include "src/runtime/kernel/arm/opclib/quantization/quantize.h"
void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxParameter *param);
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_ARG_MIN_MAX_H_
......@@ -15,19 +15,7 @@
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_H_
#include "src/runtime/kernel/arm/opclib/op_base.h"
struct DepthToSpaceParameter {
OpParameter op_parameter_;
int32_t block_size_;
int32_t in_stride_dim0_;
int32_t in_stride_dim1_;
int32_t in_stride_dim2_;
int32_t out_stride_dim0_;
int32_t out_stride_dim1_;
int32_t out_stride_dim2_;
uint8_t data_type_size_;
};
#include "src/runtime/kernel/arm/opclib/depth_to_space_parameter.h"
void DepthToSpaceForNHWC(const void *input, void *output, int *in_shape, DepthToSpaceParameter *param);
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_PARAMETER_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_PARAMETER_H_
#include "src/runtime/kernel/arm/opclib/op_base.h"
struct DepthToSpaceParameter {
OpParameter op_parameter_;
int32_t block_size_;
int32_t in_stride_dim0_;
int32_t in_stride_dim1_;
int32_t in_stride_dim2_;
int32_t out_stride_dim0_;
int32_t out_stride_dim1_;
int32_t out_stride_dim2_;
uint8_t data_type_size_;
};
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_PARAMETER_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/opclib/int8/arg_min_max_int8.h"
#include <float.h>
void CalcParameter(const int *shape, int dims_number, int axis, int *pre_axis_count, int *axis_count,
int *after_axis_count) {
*pre_axis_count = 1;
for (int i = 0; i < axis; ++i) {
*pre_axis_count = (*pre_axis_count) * shape[i];
}
*axis_count = shape[axis];
*after_axis_count = 1;
for (int i = axis + 1; i < dims_number; ++i) {
*after_axis_count = (*after_axis_count) * shape[i];
}
}
void ArgMinMaxQuant(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
int after_axis_count, QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
bool out_value = param->out_value_;
float output_inverse_scale = 1.f / out_quant_arg->scale_;
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
int32_t output_zp = out_quant_arg->zp_;
for (int i = 0; i < pre_axis_count; ++i) {
size_t output_offset = i * after_axis_count;
size_t input_offset = output_offset * axis_count;
for (int j = 0; j < after_axis_count; ++j) {
float value = -FLT_MAX;
if (!param->get_max_) {
value = FLT_MAX;
}
float index = 0.0f;
for (int k = 0; k < axis_count; ++k) {
float value_tmp = input[input_offset + k * after_axis_count + j] * in_quant_arg->scale_ + bias;
if (param->get_max_) {
if (value_tmp > value) {
value = value_tmp;
index = k;
}
} else {
if (value_tmp < value) {
value = value_tmp;
index = k;
}
}
}
float real_out = out_value ? value : index;
output[output_offset + j] = real_out * output_inverse_scale + output_zp;
}
}
}
void ArgMinMaxQuant(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
int pre_axis_count = 1;
int axis_count = 1;
int after_axis_count = 1;
CalcParameter(in_shape, param->dims_size_, param->axis_, &pre_axis_count, &axis_count, &after_axis_count);
ArgMinMaxQuant(input, output, param, pre_axis_count, axis_count, after_axis_count, in_quant_arg, out_quant_arg);
return;
}
int ArgCompareAscInt8(const void *a, const void *b) {
return reinterpret_cast<const ArgElement *>(a)->data_.f_data_
- reinterpret_cast<const ArgElement *>(b)->data_.f_data_;
}
int ArgCompareDescInt8(const void *a, const void *b) {
return reinterpret_cast<const ArgElement *>(b)->data_.f_data_
- reinterpret_cast<const ArgElement *>(a)->data_.f_data_;
}
int8_t GetInt8Output(float real_out, float output_inverse_scale, int32_t output_zp) {
return real_out * output_inverse_scale + output_zp;
}
void ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
bool out_value = param->out_value_;
float output_inverse_scale = 1.f / out_quant_arg->scale_;
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
int32_t output_zp = out_quant_arg->zp_;
for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
for (int j = 0; j < in_shape[0]; ++j) {
size_t offset = param->in_strides_[0] * j + i;
param->arg_elements_[j].index_ = j;
param->arg_elements_[j].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
}
if (param->get_max_) {
qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareDescInt8);
} else {
qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareAscInt8);
}
for (int j = 0; j < param->topk_; ++j) {
size_t out_offset = j * param->out_strides_[0] + i;
float real_out = out_value ? param->arg_elements_[j].data_.f_data_ : param->arg_elements_[j].index_;
output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
}
}
}
void ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
bool out_value = param->out_value_;
float output_inverse_scale = 1.f / out_quant_arg->scale_;
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
int32_t output_zp = out_quant_arg->zp_;
int in_shape1 = in_shape[1];
for (int i = 0; i < in_shape[0]; ++i) {
size_t in_dim0_offset = i * param->in_strides_[0];
size_t out_dim0_offset = i * param->out_strides_[0];
for (int j = 0; j < param->in_strides_[1]; ++j) {
for (int k = 0; k < in_shape1; ++k) {
size_t offset = param->in_strides_[1] * k + in_dim0_offset + j;
param->arg_elements_[k].index_ = k;
param->arg_elements_[k].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
}
if (param->get_max_) {
qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareDescInt8);
} else {
qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareAscInt8);
}
for (int k = 0; k < param->topk_; ++k) {
size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1];
float real_out = out_value ? param->arg_elements_[k].data_.f_data_ : param->arg_elements_[k].index_;
output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
}
}
}
}
void ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
bool out_value = param->out_value_;
float output_inverse_scale = 1.f / out_quant_arg->scale_;
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
int32_t output_zp = out_quant_arg->zp_;
int in_shape1 = in_shape[1];
int in_shape2 = in_shape[2];
for (int i = 0; i < in_shape[0]; ++i) {
size_t in_dim0_offset = i * param->in_strides_[0];
size_t out_dim0_offset = i * param->out_strides_[0];
for (int j = 0; j < in_shape1; ++j) {
size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
for (int k = 0; k < param->in_strides_[2]; ++k) {
for (int l = 0; l < in_shape2; ++l) {
size_t offset = param->in_strides_[2] * l + k + in_dim1_offset;
param->arg_elements_[l].index_ = l;
param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
}
if (param->get_max_) {
qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareDescInt8);
} else {
qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareAscInt8);
}
for (int l = 0; l < param->topk_; ++l) {
size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2];
float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_;
output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
}
}
}
}
}
void ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
bool out_value = param->out_value_;
float output_inverse_scale = 1.f / out_quant_arg->scale_;
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
int32_t output_zp = out_quant_arg->zp_;
int in_shape1 = in_shape[1];
int in_shape2 = in_shape[2];
int in_shape3 = in_shape[3];
for (int i = 0; i < in_shape[0]; ++i) {
size_t in_dim0_offset = i * param->in_strides_[0];
size_t out_dim0_offset = i * param->out_strides_[0];
for (int j = 0; j < in_shape1; ++j) {
size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
for (int k = 0; k < in_shape2; ++k) {
size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
for (int l = 0; l < in_shape3; ++l) {
size_t offset = l + in_dim2_offset;
param->arg_elements_[l].index_ = l;
param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
}
if (param->get_max_) {
qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareDescInt8);
} else {
qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareAscInt8);
}
for (int l = 0; l < param->topk_; ++l) {
size_t out_offset = out_dim2_offset + l;
float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_;
output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
}
}
}
}
}
......@@ -13,21 +13,20 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_H_
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_INT8_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_INT8_H_
#include "src/runtime/kernel/arm/opclib/arg_min_max_parameter.h"
#include "src/runtime/kernel/arm/opclib/quantization/quantize.h"
void ArgMax(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
int after_axis_count);
void ArgMin(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
int after_axis_count);
void ArgMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
void ArgMinDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
void ArgMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
void ArgMinDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
void ArgMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
void ArgMinDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
void ArgMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
void ArgMinDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_H_
void ArgMinMaxQuant(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
QuantArg *in_quant, QuantArg *out_quant);
void ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
QuantArg *in_quant, QuantArg *out_quant);
void ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
QuantArg *in_quant, QuantArg *out_quant);
void ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
QuantArg *in_quant, QuantArg *out_quant);
void ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
QuantArg *in_quant, QuantArg *out_quant);
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_INT8_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/opclib/int8/arithmetic_int8.h"
#ifdef ENABLE_NEON
#include <arm_neon.h>
#endif
#include "src/runtime/kernel/arm/opclib/errorcode.h"
int ElementNotEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
for (int index = 0; index < element_size; ++index) {
output[index] = (int8_t)(input0[index] != input1[index]);
}
return OPCLIB_OK;
}
int ElementEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
for (int index = 0; index < element_size; ++index) {
output[index] = (int8_t)(input0[index] == input1[index]);
}
return OPCLIB_OK;
}
int ElementLess(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
for (int index = 0; index < element_size; ++index) {
output[index] = (int8_t)(input0[index] < input1[index]);
}
return OPCLIB_OK;
}
int ElementLessEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
for (int index = 0; index < element_size; ++index) {
output[index] = (int8_t)(input0[index] <= input1[index]);
}
return OPCLIB_OK;
}
int ElementGreater(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
for (int index = 0; index < element_size; ++index) {
output[index] = (int8_t)(input0[index] > input1[index]);
}
return OPCLIB_OK;
}
int ElementGreaterEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
for (int index = 0; index < element_size; ++index) {
output[index] = (int8_t)(input0[index] >= input1[index]);
}
return OPCLIB_OK;
}
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARITHMETIC_INT8_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARITHMETIC_INT8_H_
#include "src/runtime/kernel/arm/opclib/op_base.h"
int ElementNotEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size);
int ElementEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size);
int ElementLess(int8_t *input0, int8_t *input1, int8_t *output, int element_size);
int ElementLessEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size);
int ElementGreater(int8_t *input0, int8_t *input1, int8_t *output, int element_size);
int ElementGreaterEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size);
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARITHMETIC_INT8_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/opclib/int8/batch_to_space_int8.h"
#include "src/runtime/kernel/arm/opclib/arithmetic_common.h"
void BatchToSpaceNoCropForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block,
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
int block_h = block[0];
int block_w = block[1];
int in_h = in_shape[1];
int in_w = in_shape[2];
int in_c = in_shape[3];
size_t stride_h = block_w * out_n;
size_t output_offset = 0;
size_t in_stride_h = in_w * in_c;
size_t in_stride_n = in_stride_h * in_h;
float output_inverse_scale = 1.f / out_quant_arg->scale_;
float scale = in_quant_arg->scale_ * output_inverse_scale;
float bias = -in_quant_arg->zp_ * scale;
int32_t output_zp = out_quant_arg->zp_;
for (int n = 0; n < out_n; ++n) {
for (int h = 0; h < in_h; ++h) {
size_t h_offset = h * in_stride_h;
for (int bh = 0; bh < block_h; ++bh) {
for (int w = 0; w < in_w; ++w) {
size_t w_offset = w * in_c;
for (int bw = 0; bw < block_w; ++bw) {
size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset;
for (int c = 0; c < in_c; ++c) {
int32_t output_tmp = round(input[in_offset + c] * scale + bias) + output_zp;
output_tmp = output_tmp > 127 ? 127 : output_tmp;
output_tmp = output_tmp < -128 ? -128 : output_tmp;
output[output_offset++] = output_tmp;
}
}
}
}
}
}
}
void BatchToSpaceForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block,
const int *crops, QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
int block_h = block[0];
int block_w = block[1];
int in_n = in_shape[0];
int in_h = in_shape[1];
int in_w = in_shape[2];
int in_c = in_shape[3];
int h_start = crops[0] / block_h;
int h_valid_begin = crops[0];
int h_end = MSMIN((in_h * block_h - crops[1]) / block_h + 1, in_h);
int h_valid_end = in_h * block_h - crops[1] - 1;
int w_start = crops[2] / block_w;
int w_valid_begin = crops[2];
int w_end = MSMIN((in_w * block_w - crops[3]) / block_w + 1, in_w);
int w_valid_end = in_w * block_w - crops[3] - 1;
size_t stride_h = block_w * out_n;
size_t output_offset = 0;
size_t in_stride_h = in_w * in_c;
size_t in_stride_n = in_stride_h * in_h;
float output_inverse_scale = 1.f / out_quant_arg->scale_;
float scale = in_quant_arg->scale_ * output_inverse_scale;
float bias = -in_quant_arg->zp_ * scale;
int32_t output_zp = out_quant_arg->zp_;
for (int n = 0; n < out_n; ++n) {
for (int h = h_start; h < h_end; ++h) {
size_t h_offset = h * in_stride_h;
for (int bh = 0; bh < block_h; ++bh) {
size_t h_index = h * block_h + bh;
if (h_index < h_valid_begin || h_index > h_valid_end) {
continue;
}
for (int w = w_start; w < w_end; ++w) {
size_t w_offset = w * in_c;
for (int bw = 0; bw < block_w; ++bw) {
size_t w_index = w * block_w + bw;
if (w_index < w_valid_begin || w_index > w_valid_end) {
continue;
}
size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset;
for (int c = 0; c < in_c; ++c) {
int32_t output_tmp = round(input[in_offset + c] * scale + bias) + output_zp;
output_tmp = output_tmp > 127 ? 127 : output_tmp;
output_tmp = output_tmp < -128 ? -128 : output_tmp;
output[output_offset++] = output_tmp;
}
}
}
}
}
}
}
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_BATCH_TO_SPACE_INT8_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_BATCH_TO_SPACE_INT8_H_
#include "src/runtime/kernel/arm/opclib/op_base.h"
#include "src/runtime/kernel/arm/opclib/quantization/quantize.h"
void BatchToSpaceNoCropForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block,
QuantArg *in_quant_arg, QuantArg *out_quant_arg);
void BatchToSpaceForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block,
const int *crops, QuantArg *in_quant_arg, QuantArg *out_quant_arg);
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_BATCH_TO_SPACE_INT8_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/opclib/int8/depth_to_space_int8.h"
#include <string.h>
void DepthToSpaceForNHWC(const int8_t *input, int8_t *output, int *in_shape, DepthToSpaceParameter *param,
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
int32_t block_size = param->block_size_;
int32_t in_shape_dim2 = in_shape[2];
int32_t in_shape_dim1 = in_shape[1];
size_t copy_size = block_size * param->out_stride_dim2_;
float output_inverse_scale = 1.f / out_quant_arg->scale_;
float scale = in_quant_arg->scale_ * output_inverse_scale;
float bias = -in_quant_arg->zp_ * scale;
int32_t output_zp = out_quant_arg->zp_;
for (int i = 0; i < in_shape[0]; ++i) {
size_t in_offset_n = i * param->in_stride_dim0_;
size_t out_offset_n = i * param->out_stride_dim0_;
for (int j = 0; j < in_shape_dim1; ++j) {
size_t in_offset_h = in_offset_n + j * param->in_stride_dim1_;
size_t out_offset_h = out_offset_n + j * block_size * param->out_stride_dim1_;
for (int k = 0; k < in_shape_dim2; ++k) {
size_t in_offset_w = in_offset_h + k * param->in_stride_dim2_;
size_t out_offset_w = out_offset_h + k * block_size * param->out_stride_dim2_;
for (int l = 0; l < block_size; ++l) {
size_t out_offset = out_offset_w + l * param->out_stride_dim1_;
size_t in_offset = in_offset_w + l * block_size * param->out_stride_dim2_;
for (int m = 0; m < copy_size; ++m) {
int32_t output_tmp = round(input[in_offset + m] * scale + bias) + output_zp;
output_tmp = output_tmp > 127 ? 127 : output_tmp;
output_tmp = output_tmp < -128 ? -128 : output_tmp;
output[out_offset + m] = output_tmp;
}
}
}
}
}
}
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_DEPTH_TO_SPACE_INT8_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_DEPTH_TO_SPACE_INT8_H_
#include "src/runtime/kernel/arm/opclib/depth_to_space_parameter.h"
#include "src/runtime/kernel/arm/opclib/quantization/quantize.h"
void DepthToSpaceForNHWC(const int8_t *input, int8_t *output, int *in_shape, DepthToSpaceParameter *param,
QuantArg *in_quant_arg, QuantArg *out_quant_arg);
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_DEPTH_TO_SPACE_INT8_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "mindspore/core/utils/log_adapter.h"
#include "common/common_test.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/fp32/arg_min_max.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/arg_min_max.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/arithmetic_common.h"
namespace mindspore {
class TestArgMinMaxTestFp32 : public mindspore::Common {
public:
TestArgMinMaxTestFp32() = default;
};
TEST_F(TestArgMinMaxTestFp32, ArgMaxTest1) {
std::vector<float> in = {10, 20, 30, 40, 90,
20, 11, 15, 1, 50,
30, 45, 25, 50, 30};
std::vector<float> except_out = {2, 2, 0, 2, 0};
std::vector<int> shape = {3, 5};
float out[5];
ArgMinMaxParameter param;
param.topk_ = 1;
param.out_value_ = false;
param.axis_ = 0;
param.data_type_ = 43;
param.dims_size_ = 2;
param.get_max_ = true;
ArgMinMax(in.data(), out, shape.data(), &param);
for (size_t i = 0; i < except_out.size(); ++i) {
std::cout << out[i] << " ";
}
std::cout << "\n";
CompareOutputData(out, except_out.data(), except_out.size(), 0.000001);
}
TEST_F(TestArgMinMaxTestFp32, ArgMaxTest2) {
std::vector<float> in = {10, 20, 30, 40, 90,
20, 11, 15, 1, 50,
30, 45, 25, 50, 30};
std::vector<float> except_out = {30, 45, 30, 50, 90};
std::vector<int> shape = {3, 5};
float out[5];
ArgMinMaxParameter param;
param.topk_ = 1;
param.out_value_ = true;
param.axis_ = 0;
param.data_type_ = 43;
param.dims_size_ = 2;
param.get_max_ = true;
ArgMinMax(in.data(), out, shape.data(), &param);
CompareOutputData(out, except_out.data(), except_out.size(), 0.000001);
}
TEST_F(TestArgMinMaxTestFp32, ArgMinTest2) {
std::vector<float> in = {10, 20, 30, 40, 90,
20, 11, 15, 1, 50,
30, 45, 25, 50, 30};
std::vector<float> except_out = {10, 11, 15, 1, 30};
std::vector<int> shape = {3, 5};
float out[5];
ArgMinMaxParameter param;
param.topk_ = 1;
param.out_value_ = true;
param.axis_ = 0;
param.data_type_ = 43;
param.dims_size_ = 2;
param.get_max_ = false;
ArgMinMax(in.data(), out, shape.data(), &param);
CompareOutputData(out, except_out.data(), except_out.size(), 0.000001);
}
TEST_F(TestArgMinMaxTestFp32, ArgMaxTest3_axis2_out_data) {
std::vector<float> in = {10, 20, 30, 40, 90,
20, 11, 15, 1, 50,
30, 45, 25, 50, 30};
std::vector<float> except_out = {30, 45, 30, 50, 90, 20, 20, 25, 40, 50};
ArgMinMaxParameter param;
param.axis_ = 2;
std::vector<int> in_shape = {1, 1, 3, 5};
param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
param.out_value_ = true;
param.topk_ = 2;
std::vector<int> out_shape = {1, 1, 2, 5};
ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
float out[10];
ArgMaxDim2(in.data(), out, in_shape.data(), &param);
CompareOutputData(out, except_out.data(), except_out.size(), 0.00001);
}
TEST_F(TestArgMinMaxTestFp32, ArgMaxTest3_axis2_out_index) {
std::vector<float> in = {10, 20, 30, 40, 90,
20, 11, 15, 1, 50,
30, 45, 25, 50, 30};
std::vector<float> except_out = {2, 2, 0, 2, 0, 1, 0, 2, 0, 1};
ArgMinMaxParameter param;
param.axis_ = 2;
std::vector<int> in_shape = {1, 1, 3, 5};
param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
param.out_value_ = false;
param.topk_ = 2;
std::vector<int> out_shape = {1, 1, 2, 5};
ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
float out[10];
ArgMaxDim2(in.data(), out, in_shape.data(), &param);
CompareOutputData(out, except_out.data(), except_out.size(), 0.00001);
}
TEST_F(TestArgMinMaxTestFp32, ArgMaxTest4_axis3_out_data) {
std::vector<float> in = {10, 20, 30, 40, 90,
20, 11, 15, 1, 50,
30, 45, 25, 50, 30};
std::vector<float> except_out = {90, 40,
50, 20,
50, 45};
ArgMinMaxParameter param;
param.axis_ = 3;
std::vector<int> in_shape = {1, 1, 3, 5};
param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
param.out_value_ = true;
param.topk_ = 2;
std::vector<int> out_shape = {1, 1, 3, 2};
ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
float out[6];
ArgMaxDim3(in.data(), out, in_shape.data(), &param);
CompareOutputData(out, except_out.data(), except_out.size(), 0.00001);
}
TEST_F(TestArgMinMaxTestFp32, ArgMaxTest4_axis3_out_index) {
std::vector<float> in = {10, 20, 30, 40, 90,
20, 11, 15, 1, 50,
30, 45, 25, 50, 30};
std::vector<float> except_out = {4, 3,
4, 0,
3, 1};
ArgMinMaxParameter param;
param.axis_ = 3;
std::vector<int> in_shape = {1, 1, 3, 5};
param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
param.out_value_ = false;
param.topk_ = 2;
std::vector<int> out_shape = {1, 1, 3, 2};
ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
float out[6];
ArgMaxDim3(in.data(), out, in_shape.data(), &param);
CompareOutputData(out, except_out.data(), except_out.size(), 0.00001);
}
TEST_F(TestArgMinMaxTestFp32, ArgMaxTest5_axis1_out_index) {
std::vector<float> in = {100, 2, 300,
4, 50, 6,
11, 12, 13,
34, 35, 36,
9, 6, 17,
10, 20, 30,
10, 20, 30,
40, 5, 60,
7, 80, 90,
10, 11, 120,
18, 5, 16,
9, 22, 23};
std::vector<float> except_out = {0, 1, 0,
1, 0, 1,
1, 2, 2,
2, 1, 2,
2, 1, 1,
0, 2, 1,
0, 0, 0,
1, 1, 0};
ArgMinMaxParameter param;
param.axis_ = 1;
std::vector<int> in_shape = {2, 3, 2, 3};
param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
param.out_value_ = false;
param.topk_ = 2;
std::vector<int> out_shape = {2, 2, 2, 3};
ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
float out[24];
ArgMaxDim1(in.data(), out, in_shape.data(), &param);
CompareOutputData(out, except_out.data(), except_out.size(), 0.00001);
}
TEST_F(TestArgMinMaxTestFp32, ArgMaxTest5_axis1_out_data) {
std::vector<float> in = {100, 2, 300,
4, 50, 6,
11, 12, 13,
34, 35, 36,
9, 6, 17,
10, 20, 30,
10, 20, 30,
40, 5, 60,
7, 80, 90,
10, 11, 120,
18, 5, 16,
9, 22, 23};
std::vector<float> except_out = {100, 12, 300,
34, 50, 36,
11, 6, 17,
10, 35, 30,
18, 80, 90,
40, 22, 120,
10, 20, 30,
10, 11, 60};
ArgMinMaxParameter param;
param.axis_ = 1;
std::vector<int> in_shape = {2, 3, 2, 3};
param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
param.out_value_ = true;
param.topk_ = 2;
std::vector<int> out_shape = {2, 2, 2, 3};
ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
float out[24];
ArgMaxDim1(in.data(), out, in_shape.data(), &param);
CompareOutputData(out, except_out.data(), except_out.size(), 0.00001);
}
TEST_F(TestArgMinMaxTestFp32, ArgMaxTest6_axis0_out_index) {
std::vector<float> in = {100, 2,
4, 50,
11, 12,
34, 35,
10, 20,
40, 5,
7, 80,
10, 11,
55, 25,
5, 15,
18, 8,
15, 16};
std::vector<float> except_out = {0, 2,
1, 0,
2, 1,
0, 0,
2, 1,
2, 2,
0, 0,
2, 2};
ArgMinMaxParameter param;
param.axis_ = 1;
std::vector<int> in_shape = {3, 2, 2, 2};
param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
param.out_value_ = false;
param.topk_ = 2;
std::vector<int> out_shape = {2, 2, 2, 2};
ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
float out[16];
ArgMaxDim0(in.data(), out, in_shape.data(), &param);
CompareOutputData(out, except_out.data(), except_out.size(), 0.00001);
}
TEST_F(TestArgMinMaxTestFp32, ArgMaxTest6_axis0_out_data) {
std::vector<float> in = {100, 2,
4, 50,
11, 12,
34, 35,
10, 20,
40, 5,
7, 80,
10, 11,
55, 25,
5, 15,
18, 8,
15, 16};
std::vector<float> except_out = {100, 25,
40, 50,
18, 80,
34, 35,
55, 20,
5, 15,
11, 12,
15, 16};
ArgMinMaxParameter param;
param.axis_ = 1;
std::vector<int> in_shape = {3, 2, 2, 2};
param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
param.out_value_ = true;
param.topk_ = 2;
std::vector<int> out_shape = {2, 2, 2, 2};
ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
float out[16];
ArgMaxDim0(in.data(), out, in_shape.data(), &param);
CompareOutputData(out, except_out.data(), except_out.size(), 0.00001);
}
TEST_F(TestArgMinMaxTestFp32, ArgMinTest1_axis3_out_data) {
std::vector<float> in = {10, 20, 30, 40, 90,
20, 11, 15, 1, 50,
30, 45, 25, 50, 30};
std::vector<float> except_out = {10, 20,
1, 11,
25, 30};
ArgMinMaxParameter param;
param.axis_ = 3;
std::vector<int> in_shape = {1, 1, 3, 5};
param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
param.out_value_ = true;
param.topk_ = 2;
std::vector<int> out_shape = {1, 1, 3, 2};
ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
float out[6];
ArgMinDim3(in.data(), out, in_shape.data(), &param);
CompareOutputData(out, except_out.data(), except_out.size(), 0.00001);
}
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "mindspore/core/utils/log_adapter.h"
#include "common/common_test.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/batch_to_space.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/arithmetic_common.h"
namespace mindspore {
class BatchToSpaceTestFp32 : public mindspore::Common {
public:
BatchToSpaceTestFp32() = default;
};
TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest1) {
float input[12] = {10, 30, 90, 2, 20, 120, 5, 50, 150, 6, 16, 160};
constexpr int kOutSize = 12;
float expect_out[kOutSize] = {10, 30, 90, 2, 20, 120, 5, 50, 150, 6, 16, 160};
float output[kOutSize];
int in_shape[4] = {4, 1, 1, 3};
int out_n = 1;
int block[2] = {2, 2};
BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float));
for (int i = 0; i < kOutSize; ++i) {
std::cout << output[i] << " ";
}
std::cout << "\n";
CompareOutputData(output, expect_out, kOutSize, 0.000001);
}
TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_1) {
float input[12] = {10, 30, 90, 2, 20, 120, 5, 50, 150, 6, 16, 160};
constexpr int kOutSize = 3;
float expect_out[kOutSize] = {5, 50, 150};
float output[kOutSize];
int in_shape[4] = {4, 1, 1, 3};
int out_n = 1;
int block[2] = {2, 2};
int crops[4] = {1, 0, 0, 1};
BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float));
for (int i = 0; i < kOutSize; ++i) {
std::cout << output[i] << " ";
}
std::cout << "\n";
CompareOutputData(output, expect_out, kOutSize, 0.000001);
}
TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest2) {
float input[32] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120,
5, 50, 7, 70, 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160};
constexpr int kOutSize = 32;
float expect_out[kOutSize] = {1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 6, 60, 7, 70, 8, 80,
9, 90, 10, 100, 11, 110, 12, 120, 13, 130, 14, 140, 15, 150, 16, 160};
float output[kOutSize];
int in_shape[4] = {4, 2, 2, 2};
int out_n = 1;
int block[2] = {2, 2};
BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float));
for (int i = 0; i < kOutSize; ++i) {
std::cout << output[i] << " ";
}
std::cout << "\n";
CompareOutputData(output, expect_out, kOutSize, 0.000001);
}
TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_2) {
float input[32] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120,
5, 50, 7, 70, 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160};
constexpr int kOutSize = 12;
float expect_out[kOutSize] = {6, 60, 7, 70, 8, 80,
10, 100, 11, 110, 12, 120};
float output[kOutSize];
int in_shape[4] = {4, 2, 2, 2};
int out_n = 1;
int block[2] = {2, 2};
int crops[4] = {1, 1, 1, 0};
BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float));
for (int i = 0; i < kOutSize; ++i) {
std::cout << output[i] << " ";
}
std::cout << "\n";
CompareOutputData(output, expect_out, kOutSize, 0.000001);
}
TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest3) {
float input[64] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120,
5, 50, 7, 70, 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160,
21, 10, 23, 30, 29, 90, 211, 110, 22, 20, 24, 40, 210, 100, 212, 120,
25, 50, 27, 70, 213, 130, 215, 150, 26, 60, 28, 80, 214, 140, 216, 160};
constexpr int kOutSize = 64;
float expect_out[kOutSize] = {1, 10, 5, 50, 3, 30, 7, 70, 21, 10, 25, 50, 23, 30, 27, 70,
9, 90, 13, 130, 11, 110, 15, 150, 29, 90, 213, 130, 211, 110, 215, 150,
2, 20, 6, 60, 4, 40, 8, 80, 22, 20, 26, 60, 24, 40, 28, 80,
10, 100, 14, 140, 12, 120, 16, 160, 210, 100, 214, 140, 212, 120, 216, 160};
float output[kOutSize];
int in_shape[4] = {8, 2, 2, 2};
int out_n = 2;
int block[2] = {2, 2};
BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float));
for (int i = 0; i < kOutSize && i < 32; ++i) {
std::cout << output[i] << " ";
}
std::cout << "\n";
CompareOutputData(output, expect_out, kOutSize, 0.000001);
}
TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_3) {
float input[64] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120,
5, 50, 7, 70, 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160,
21, 10, 23, 30, 29, 90, 211, 110, 22, 20, 24, 40, 210, 100, 212, 120,
25, 50, 27, 70, 213, 130, 215, 150, 26, 60, 28, 80, 214, 140, 216, 160};
constexpr int kOutSize = 16;
float expect_out[kOutSize] = {9, 90, 13, 130, 29, 90, 213, 130,
10, 100, 14, 140, 210, 100, 214, 140};
float output[kOutSize];
int in_shape[4] = {8, 2, 2, 2};
int out_n = 2;
int block[2] = {2, 2};
int crops[4] = {2, 0, 0, 2};
BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float));
for (int i = 0; i < kOutSize && i < 32; ++i) {
std::cout << output[i] << " ";
}
std::cout << "\n";
CompareOutputData(output, expect_out, kOutSize, 0.000001);
}
TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest4) {
float input[96] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120, 5, 50, 7, 70,
13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160, 21, 10, 23, 30, 29, 90, 211, 110,
22, 20, 24, 40, 210, 100, 212, 120, 25, 50, 27, 70, 213, 130, 215, 150, 26, 60, 28, 80,
214, 140, 216, 160, 31, 10, 33, 30, 39, 90, 311, 110, 32, 20, 34, 40, 310, 100, 312, 120,
35, 50, 37, 70, 313, 130, 315, 150, 36, 60, 38, 80, 314, 140, 316, 160};
constexpr int kOutSize = 96;
float expect_out[kOutSize] = {
1, 10, 5, 50, 3, 30, 7, 70, 21, 10, 25, 50, 23, 30, 27, 70, 31, 10, 35, 50, 33, 30, 37, 70,
9, 90, 13, 130, 11, 110, 15, 150, 29, 90, 213, 130, 211, 110, 215, 150, 39, 90, 313, 130, 311, 110, 315, 150,
2, 20, 6, 60, 4, 40, 8, 80, 22, 20, 26, 60, 24, 40, 28, 80, 32, 20, 36, 60, 34, 40, 38, 80,
10, 100, 14, 140, 12, 120, 16, 160, 210, 100, 214, 140, 212, 120, 216, 160, 310, 100, 314, 140, 312, 120, 316, 160};
float output[kOutSize];
int in_shape[4] = {12, 2, 2, 2};
int out_n = 2;
int block[2] = {3, 2};
BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float));
for (int i = 0; i < kOutSize && i < 32; ++i) {
std::cout << output[i] << " ";
}
std::cout << "\n";
CompareOutputData(output, expect_out, kOutSize, 0.000001);
}
TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_4) {
float input[96] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120, 5, 50, 7, 70,
13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160, 21, 10, 23, 30, 29, 90, 211, 110,
22, 20, 24, 40, 210, 100, 212, 120, 25, 50, 27, 70, 213, 130, 215, 150, 26, 60, 28, 80,
214, 140, 216, 160, 31, 10, 33, 30, 39, 90, 311, 110, 32, 20, 34, 40, 310, 100, 312, 120,
35, 50, 37, 70, 313, 130, 315, 150, 36, 60, 38, 80, 314, 140, 316, 160};
constexpr int kOutSize = 24;
float expect_out[kOutSize] = {
25, 50, 23, 30, 35, 50, 33, 30,
13, 130, 11, 110, 26, 60, 24, 40, 36, 60, 34, 40, 14, 140, 12, 120};
float output[kOutSize];
int in_shape[4] = {12, 2, 2, 2};
int out_n = 2;
int block[2] = {3, 2};
int crops[4] = {1, 2, 1, 1};
BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float));
for (int i = 0; i < kOutSize && i < 32; ++i) {
std::cout << output[i] << " ";
}
std::cout << "\n";
CompareOutputData(output, expect_out, kOutSize, 0.000001);
}
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common/common_test.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.h"
namespace mindspore {
class CropTestFp32 : public mindspore::Common {
public:
CropTestFp32() = default;
};
TEST_F(CropTestFp32, CropTest1) {
float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
const int kOutSize = 2;
float expect_out[kOutSize] = {8, 16};
float output[kOutSize];
int in_shape[4] = {2, 2, 2, 2};
int out_shape[4] = {2, 1, 1, 1};
CropParameter crop_param;
crop_param.axis_ = 1;
crop_param.offset_[0] = 1;
crop_param.offset_[1] = 1;
crop_param.offset_[2] = 1;
crop_param.op_parameter_.thread_num_ = 1;
crop_param.thread_id_ = 0;
Crop4D(input, output, in_shape, out_shape, &crop_param);
for (int i = 0; i < kOutSize; ++i) {
std::cout << output[i] << " ";
}
std::cout << "\n";
CompareOutputData(output, expect_out, kOutSize, 0.000001);
}
TEST_F(CropTestFp32, CropTest2) {
float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
const int kOutSize = 4;
float expect_out[kOutSize] = {13, 14, 15, 16};
float output[kOutSize];
int in_shape[4] = {2, 2, 2, 2};
int out_shape[4] = {1, 1, 2, 2};
CropParameter crop_param;
crop_param.axis_ = 0;
crop_param.offset_[0] = 1;
crop_param.offset_[1] = 1;
crop_param.offset_[2] = 0;
crop_param.offset_[3] = 0;
crop_param.op_parameter_.thread_num_ = 1;
crop_param.thread_id_ = 0;
Crop4D(input, output, in_shape, out_shape, &crop_param);
for (int i = 0; i < kOutSize; ++i) {
std::cout << output[i] << " ";
}
std::cout << "\n";
CompareOutputData(output, expect_out, kOutSize, 0.000001);
}
TEST_F(CropTestFp32, CropTest3) {
float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
const int kOutSize = 8;
float expect_out[kOutSize] = {2, 4, 6, 8, 10, 12, 14, 16};
float output[kOutSize];
int in_shape[4] = {2, 2, 2, 2};
int out_shape[4] = {2, 2, 2, 1};
CropParameter crop_param;
crop_param.axis_ = 3;
crop_param.offset_[0] = 1;
crop_param.op_parameter_.thread_num_ = 1;
crop_param.thread_id_ = 0;
Crop4D(input, output, in_shape, out_shape, &crop_param);
for (int i = 0; i < kOutSize; ++i) {
std::cout << output[i] << " ";
}
std::cout << "\n";
CompareOutputData(output, expect_out, kOutSize, 0.000001);
}
TEST_F(CropTestFp32, CropTest4) {
float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
const int kOutSize = 8;
float expect_out[kOutSize] = {2, 4, 6, 8, 10, 12, 14, 16};
float output[kOutSize];
int in_shape[4] = {2, 2, 2, 2};
int out_shape[4] = {2, 2, 2, 1};
CropParameter crop_param;
crop_param.axis_ = 3;
crop_param.offset_[0] = 1;
crop_param.op_parameter_.thread_num_ = 2;
crop_param.thread_id_ = 0;
Crop4D(input, output, in_shape, out_shape, &crop_param);
crop_param.thread_id_ = 1;
Crop4D(input, output, in_shape, out_shape, &crop_param);
for (int i = 0; i < kOutSize; ++i) {
std::cout << output[i] << " ";
}
std::cout << "\n";
CompareOutputData(output, expect_out, kOutSize, 0.000001);
}
TEST_F(CropTestFp32, CropTest5) {
float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
const int kOutSize = 2;
float expect_out[kOutSize] = {8, 16};
float output[kOutSize];
int in_shape[4] = {2, 2, 2, 2};
int out_shape[4] = {2, 1, 1, 1};
CropParameter crop_param;
crop_param.axis_ = 1;
crop_param.offset_[0] = 1;
crop_param.offset_[1] = 1;
crop_param.offset_[2] = 1;
Crop4DNoParallel(input, output, in_shape, out_shape, &crop_param);
for (int i = 0; i < kOutSize; ++i) {
std::cout << output[i] << " ";
}
std::cout << "\n";
CompareOutputData(output, expect_out, kOutSize, 0.000001);
}
TEST_F(CropTestFp32, CropTest6) {
float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
const int kOutSize = 4;
float expect_out[kOutSize] = {13, 14, 15, 16};
float output[kOutSize];
int in_shape[4] = {2, 2, 2, 2};
int out_shape[4] = {1, 1, 2, 2};
CropParameter crop_param;
crop_param.axis_ = 0;
crop_param.offset_[0] = 1;
crop_param.offset_[1] = 1;
crop_param.offset_[2] = 0;
crop_param.offset_[3] = 0;
Crop4DNoParallel(input, output, in_shape, out_shape, &crop_param);
for (int i = 0; i < kOutSize; ++i) {
std::cout << output[i] << " ";
}
std::cout << "\n";
CompareOutputData(output, expect_out, kOutSize, 0.000001);
}
TEST_F(CropTestFp32, CropTest7) {
float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
const int kOutSize = 8;
float expect_out[kOutSize] = {2, 4, 6, 8, 10, 12, 14, 16};
float output[kOutSize];
int in_shape[4] = {2, 2, 2, 2};
int out_shape[4] = {2, 2, 2, 1};
CropParameter crop_param;
crop_param.axis_ = 3;
crop_param.offset_[0] = 1;
Crop4DNoParallel(input, output, in_shape, out_shape, &crop_param);
for (int i = 0; i < kOutSize; ++i) {
std::cout << output[i] << " ";
}
std::cout << "\n";
CompareOutputData(output, expect_out, kOutSize, 0.000001);
}
TEST_F(CropTestFp32, CropTest8) {
float input[27] = {1, 2, 3, 4, 5, 6, 7, 8, 9,
11, 12, 13, 14, 15, 16, 17, 18, 19,
21, 22, 23, 24, 25, 26, 27, 28, 29};
const int kOutSize = 4;
float expect_out[kOutSize] = {15, 16, 18, 19};
float output[kOutSize];
int in_shape[4] = {1, 3, 3, 3};
int out_shape[4] = {1, 1, 2, 2};
CropParameter crop_param;
crop_param.axis_ = 1;
crop_param.offset_[0] = 1;
crop_param.offset_[1] = 1;
crop_param.offset_[2] = 1;
crop_param.op_parameter_.thread_num_ = 2;
crop_param.thread_id_ = 0;
Crop4D(input, output, in_shape, out_shape, &crop_param);
crop_param.thread_id_ = 1;
Crop4D(input, output, in_shape, out_shape, &crop_param);
for (int i = 0; i < kOutSize; ++i) {
std::cout << output[i] << " ";
}
std::cout << "\n";
CompareOutputData(output, expect_out, kOutSize, 0.000001);
}
TEST_F(CropTestFp32, CropTest9) {
float input[64] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
11, 12, 13, 14, 15, 16, 17, 18, 19, 110, 111, 112, 113, 114, 115, 116,
21, 22, 23, 24, 25, 26, 27, 28, 29, 210, 211, 212, 213, 214, 215, 216,
31, 32, 33, 34, 35, 36, 37, 38, 39, 310, 311, 312, 313, 314, 315, 316};
const int kOutSize = 8;
float expect_out[kOutSize] = {16, 17, 110, 111, 26, 27, 210, 211};
float output[kOutSize];
int in_shape[4] = {1, 4, 4, 4};
int out_shape[4] = {1, 2, 2, 2};
CropParameter crop_param;
crop_param.axis_ = 1;
crop_param.offset_[0] = 1;
crop_param.offset_[1] = 1;
crop_param.offset_[2] = 1;
crop_param.op_parameter_.thread_num_ = 2;
crop_param.thread_id_ = 0;
Crop4D(input, output, in_shape, out_shape, &crop_param);
crop_param.thread_id_ = 1;
Crop4D(input, output, in_shape, out_shape, &crop_param);
for (int i = 0; i < kOutSize; ++i) {
std::cout << output[i] << " ";
}
std::cout << "\n";
CompareOutputData(output, expect_out, kOutSize, 0.000001);
}
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "mindspore/core/utils/log_adapter.h"
#include "common/common_test.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/depth_to_space.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/arithmetic_common.h"
namespace mindspore {
class DepthToSpaceTestFp32 : public mindspore::Common {
public:
DepthToSpaceTestFp32() = default;
};
TEST_F(DepthToSpaceTestFp32, DepthToSpaceTest2) {
float input[16] = {1, 2, 10, 20, 5, 6, 3, 8, 18, 10, 11, 55, 3, 4, 15, 25};
constexpr int kOutSize = 16;
float expect_out[kOutSize] = {1, 2, 5, 6, 10, 20, 3, 8, 18, 10, 3, 4, 11, 55, 15, 25};
float output[kOutSize];
int in_shape[4] = {1, 2, 2, 4};
int out_shape[4] = {1, 4, 4, 1};
DepthToSpaceParameter param;
param.block_size_ = 2;
int in_strides[4];
ComputeStrides(in_shape, in_strides, 4);
int out_strides[4];
ComputeStrides(out_shape, out_strides, 4);
param.in_stride_dim0_ = in_strides[0];
param.in_stride_dim1_ = in_strides[1];
param.in_stride_dim2_ = in_strides[2];
param.out_stride_dim0_ = out_strides[0];
param.out_stride_dim1_ = out_strides[1];
param.out_stride_dim2_ = out_strides[2];
param.data_type_size_ = sizeof(float);
DepthToSpaceForNHWC((const void *)input, output, in_shape, &param);
for (int i = 0; i < kOutSize; ++i) {
std::cout << output[i] << " ";
}
std::cout << "\n";
CompareOutputData(output, expect_out, kOutSize, 0.000001);
}
TEST_F(DepthToSpaceTestFp32, DepthToSpaceTest3) {
float input[8] = {1, 2, 3, 4, 5, 6, 7, 8};
constexpr int kOutSize = 8;
float expect_out[kOutSize] = {1, 2, 3, 4, 5, 6, 7, 8};
float output[kOutSize];
int in_shape[4] = {1, 1, 1, 8};
int out_shape[4] = {1, 2, 2, 2};
DepthToSpaceParameter param;
param.block_size_ = 2;
int in_strides[4];
ComputeStrides(in_shape, in_strides, 4);
int out_strides[4];
ComputeStrides(out_shape, out_strides, 4);
param.in_stride_dim0_ = in_strides[0];
param.in_stride_dim1_ = in_strides[1];
param.in_stride_dim2_ = in_strides[2];
param.out_stride_dim0_ = out_strides[0];
param.out_stride_dim1_ = out_strides[1];
param.out_stride_dim2_ = out_strides[2];
param.data_type_size_ = sizeof(float);
DepthToSpaceForNHWC((const void *)input, output, in_shape, &param);
for (int i = 0; i < kOutSize; ++i) {
std::cout << output[i] << " ";
}
std::cout << "\n";
CompareOutputData(output, expect_out, kOutSize, 0.000001);
}
} // namespace mindspore
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册