From c4a5c960d1de68b992228ea448e070d70bb9c30d Mon Sep 17 00:00:00 2001 From: Qi Li Date: Fri, 25 Sep 2020 09:40:19 +0800 Subject: [PATCH] [X86] add new kernel of relu6 and reduce_mean, test=develop (#4431) --- lite/kernels/x86/activation_compute.cc | 11 +++ lite/kernels/x86/activation_compute.h | 36 ++++++++++ lite/kernels/x86/reduce_compute.cc | 10 +++ lite/kernels/x86/reduce_compute.h | 70 +++++++++++++++---- lite/operators/activation_ops.cc | 3 + lite/operators/op_params.h | 2 + lite/tests/kernels/activation_compute_test.cc | 9 ++- .../tests/kernels/reduce_mean_compute_test.cc | 7 +- 8 files changed, 132 insertions(+), 16 deletions(-) diff --git a/lite/kernels/x86/activation_compute.cc b/lite/kernels/x86/activation_compute.cc index 9b4c2fadd9..aee6bd6bd3 100644 --- a/lite/kernels/x86/activation_compute.cc +++ b/lite/kernels/x86/activation_compute.cc @@ -88,3 +88,14 @@ REGISTER_LITE_KERNEL(sigmoid, .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))}) .Finalize(); + +// float +REGISTER_LITE_KERNEL(relu6, + kX86, + kFloat, + kNCHW, + paddle::lite::kernels::x86::Relu6Compute, + def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))}) + .Finalize(); diff --git a/lite/kernels/x86/activation_compute.h b/lite/kernels/x86/activation_compute.h index 520adaf44f..b76e94398e 100644 --- a/lite/kernels/x86/activation_compute.h +++ b/lite/kernels/x86/activation_compute.h @@ -248,6 +248,42 @@ class SoftsignCompute : public KernelLite { virtual ~SoftsignCompute() = default; }; +// relu6(x) = min(max(0, x), 6) +template +struct Relu6Functor { + float threshold; + explicit Relu6Functor(float threshold_) : threshold(threshold_) {} + + template + void operator()(Device d, X x, Out out) const { + out.device(d) = + x.cwiseMax(static_cast(0)).cwiseMin(static_cast(threshold)); + } +}; + +template +class Relu6Compute : public KernelLite { + public: + using param_t = operators::ActivationParam; + + void Run() override { + auto& param = *param_.get_mutable(); + + param.Out->template mutable_data(); + auto X = param.X; + auto Out = param.Out; + auto place = lite::fluid::EigenDeviceType(); + CHECK(X); + CHECK(Out); + auto x = lite::fluid::EigenVector::Flatten(*X); + auto out = lite::fluid::EigenVector::Flatten(*Out); + Relu6Functor functor(param.threshold); + functor(place, x, out); + } + + virtual ~Relu6Compute() = default; +}; + } // namespace x86 } // namespace kernels } // namespace lite diff --git a/lite/kernels/x86/reduce_compute.cc b/lite/kernels/x86/reduce_compute.cc index f95f4cfb88..edeac0a84e 100644 --- a/lite/kernels/x86/reduce_compute.cc +++ b/lite/kernels/x86/reduce_compute.cc @@ -23,3 +23,13 @@ REGISTER_LITE_KERNEL(reduce_sum, .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))}) .Finalize(); + +REGISTER_LITE_KERNEL(reduce_mean, + kX86, + kFloat, + kNCHW, + paddle::lite::kernels::x86::ReduceMeanCompute, + def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))}) + .Finalize(); diff --git a/lite/kernels/x86/reduce_compute.h b/lite/kernels/x86/reduce_compute.h index 1b7c99eeef..fb02348759 100644 --- a/lite/kernels/x86/reduce_compute.h +++ b/lite/kernels/x86/reduce_compute.h @@ -31,11 +31,18 @@ struct SumFunctor { } }; -#define HANDLE_DIM(NDIM, RDIM) \ - if (ndim == NDIM && rdim == RDIM) { \ - paddle::lite::kernels::x86:: \ - ReduceFunctor( \ - *input, output, dims, keep_dim); \ +struct MeanFunctor { + template + void operator()(X* x, Y* y, const Dim& dim) { + y->device(lite::fluid::EigenDeviceType()) = x->mean(dim); + } +}; + +#define HANDLE_DIM(NDIM, RDIM, FUNCTOR) \ + if (ndim == NDIM && rdim == RDIM) { \ + paddle::lite::kernels::x86:: \ + ReduceFunctor( \ + *input, output, dims, keep_dim); \ } template @@ -64,19 +71,58 @@ class ReduceSumCompute : public KernelLite { } else { int ndim = input->dims().size(); int rdim = dims.size(); - HANDLE_DIM(4, 3); - HANDLE_DIM(4, 2); - HANDLE_DIM(4, 1); - HANDLE_DIM(3, 2); - HANDLE_DIM(3, 1); - HANDLE_DIM(2, 1); - HANDLE_DIM(1, 1); + HANDLE_DIM(4, 3, SumFunctor); + HANDLE_DIM(4, 2, SumFunctor); + HANDLE_DIM(4, 1, SumFunctor); + HANDLE_DIM(3, 2, SumFunctor); + HANDLE_DIM(3, 1, SumFunctor); + HANDLE_DIM(2, 1, SumFunctor); + HANDLE_DIM(1, 1, SumFunctor); } } virtual ~ReduceSumCompute() = default; }; +template +class ReduceMeanCompute : public KernelLite { + public: + using param_t = operators::ReduceParam; + + void Run() override { + auto& param = *param_.get_mutable(); + // auto& context = ctx_->As(); + auto* input = param.x; + auto* output = param.output; + param.output->template mutable_data(); + + const auto& dims = param.dim; + bool keep_dim = param.keep_dim; + + if (dims.size() == 0) { + // Flatten and reduce 1-D tensor + auto x = lite::fluid::EigenVector::Flatten(*input); + auto out = lite::fluid::EigenScalar::From(output); + // auto& place = *platform::CPUDeviceContext().eigen_device(); + auto reduce_dim = Eigen::array({{0}}); + MeanFunctor functor; + functor(&x, &out, reduce_dim); + } else { + int ndim = input->dims().size(); + int rdim = dims.size(); + HANDLE_DIM(4, 3, MeanFunctor); + HANDLE_DIM(4, 2, MeanFunctor); + HANDLE_DIM(4, 1, MeanFunctor); + HANDLE_DIM(3, 2, MeanFunctor); + HANDLE_DIM(3, 1, MeanFunctor); + HANDLE_DIM(2, 1, MeanFunctor); + HANDLE_DIM(1, 1, MeanFunctor); + } + } + + virtual ~ReduceMeanCompute() = default; +}; + } // namespace x86 } // namespace kernels } // namespace lite diff --git a/lite/operators/activation_ops.cc b/lite/operators/activation_ops.cc index 9b20f4348b..a25297f012 100644 --- a/lite/operators/activation_ops.cc +++ b/lite/operators/activation_ops.cc @@ -89,6 +89,9 @@ bool ActivationOp::AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) { } else if (opdesc.Type() == "elu") { param_.active_type = lite_api::ActivationType::kElu; param_.Elu_alpha = opdesc.GetAttr("alpha"); + } else if (opdesc.Type() == "relu6") { + param_.active_type = lite_api::ActivationType::kRelu6; + param_.threshold = opdesc.GetAttr("threshold"); } VLOG(4) << "opdesc.Type():" << opdesc.Type(); diff --git a/lite/operators/op_params.h b/lite/operators/op_params.h index 33da913d2e..85d7854970 100644 --- a/lite/operators/op_params.h +++ b/lite/operators/op_params.h @@ -403,6 +403,8 @@ struct ActivationParam : ParamBase { float relu_threshold{1.0f}; // elu float Elu_alpha{1.0f}; + // relu6 + float threshold{6.0f}; /////////////////////////////////////////////////////////////////////////////////// // get a vector of input tensors diff --git a/lite/tests/kernels/activation_compute_test.cc b/lite/tests/kernels/activation_compute_test.cc index fb88f6b553..6799da30da 100644 --- a/lite/tests/kernels/activation_compute_test.cc +++ b/lite/tests/kernels/activation_compute_test.cc @@ -58,6 +58,7 @@ class ActivationComputeTester : public arena::TestCase { float hard_swish_offset = 3.0; float relu_threshold_ = 1.0; float elu_alpha_ = 1.0; + float threshold_ = 6.0; DDim dims_{{1}}; std::string type_ = ""; activation_type_test act_type_ = RELU; @@ -170,7 +171,8 @@ class ActivationComputeTester : public arena::TestCase { case RELU6: { for (int i = 0; i < dims_.production(); i++) { output_data[i] = x_data[i] > 0.f ? x_data[i] : 0.f; - output_data[i] = output_data[i] < 6.0 ? output_data[i] : 6.0; + output_data[i] = + output_data[i] < threshold_ ? output_data[i] : threshold_; } break; } @@ -273,6 +275,9 @@ class ActivationComputeTester : public arena::TestCase { if (act_type_ == ELU) { op_desc->SetAttr("alpha", elu_alpha_); } + if (act_type_ == RELU6) { + op_desc->SetAttr("threshold", threshold_); + } } void PrepareData() override { @@ -510,6 +515,8 @@ TEST(Activation_relu6, precision) { #elif defined(LITE_WITH_HUAWEI_ASCEND_NPU) place = TARGET(kHuaweiAscendNPU); abs_error = 1e-2; // precision_mode default is force_fp16 +#elif defined(LITE_WITH_X86) + place = TARGET(kX86); #else return; #endif diff --git a/lite/tests/kernels/reduce_mean_compute_test.cc b/lite/tests/kernels/reduce_mean_compute_test.cc index 0d41d25179..d679d027a6 100644 --- a/lite/tests/kernels/reduce_mean_compute_test.cc +++ b/lite/tests/kernels/reduce_mean_compute_test.cc @@ -333,9 +333,10 @@ void test_reduce_mean(Place place) { } TEST(ReduceMean, precision) { -// #ifdef LITE_WITH_X86 -// Place place(TARGET(kX86)); -// #endif +#ifdef LITE_WITH_X86 + Place place(TARGET(kX86)); + test_reduce_mean(place); +#endif #ifdef LITE_WITH_ARM Place place(TARGET(kARM)); test_reduce_mean(place); -- GitLab