提交 8235e8bf 编写于 作者: Z zhangwen31

[arm][kernel]: feat: add arm kernel for op 'pow'

上级 80452148
...@@ -52,7 +52,7 @@ add_kernel(grid_sampler_compute_arm ARM basic SRCS grid_sampler_compute.cc DEPS ...@@ -52,7 +52,7 @@ add_kernel(grid_sampler_compute_arm ARM basic SRCS grid_sampler_compute.cc DEPS
## 2.other basic kernels: basic kernels that not used in basic models ## 2.other basic kernels: basic kernels that not used in basic models
add_kernel(negative_compute_arm ARM extra SRCS negative_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(negative_compute_arm ARM extra SRCS negative_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(crop_compute_arm ARM extra SRCS crop_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(crop_compute_arm ARM extra SRCS crop_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(power_compute_arm ARM extra SRCS power_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(pow_compute_arm ARM extra SRCS pow_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(norm_compute_arm ARM extra SRCS norm_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(norm_compute_arm ARM extra SRCS norm_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(group_norm_compute ARM extra SRCS group_norm_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(group_norm_compute ARM extra SRCS group_norm_compute.cc DEPS ${lite_kernel_deps} math_arm)
## 3. extra kernels ## 3. extra kernels
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "lite/kernels/arm/power_compute.h" #include "lite/kernels/arm/pow_compute.h"
#include "lite/backends/arm/math/funcs.h" #include "lite/backends/arm/math/funcs.h"
namespace paddle { namespace paddle {
...@@ -20,15 +20,17 @@ namespace lite { ...@@ -20,15 +20,17 @@ namespace lite {
namespace kernels { namespace kernels {
namespace arm { namespace arm {
void PowerCompute::Run() { void PowCompute::Run() {
auto& param = Param<operators::PowerParam>(); auto& param = Param<operators::PowParam>();
const float* x_data = param.X->data<float>(); const float* x_data = param.X->data<float>();
float* output_data = param.Out->mutable_data<float>(); float* output_data = param.Out->mutable_data<float>();
DDim x_dims = param.X->dims(); DDim x_dims = param.X->dims();
float scale = param.scale; float scale = 1.0;
float shift = param.shift; float shift = 0.0;
float power = param.power; float power = param.factor;
// fixme: update lite::arm::math::power if necessary, for scale and shift is
// not used
lite::arm::math::power( lite::arm::math::power(
x_data, output_data, x_dims.production(), scale, shift, power); x_data, output_data, x_dims.production(), scale, shift, power);
} }
...@@ -39,7 +41,7 @@ void PowerCompute::Run() { ...@@ -39,7 +41,7 @@ void PowerCompute::Run() {
} /* namespace paddle */ } /* namespace paddle */
REGISTER_LITE_KERNEL( REGISTER_LITE_KERNEL(
power, kARM, kFloat, kNCHW, paddle::lite::kernels::arm::PowerCompute, def) pow, kARM, kFloat, kNCHW, paddle::lite::kernels::arm::PowCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize(); .Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -21,11 +21,11 @@ namespace lite { ...@@ -21,11 +21,11 @@ namespace lite {
namespace kernels { namespace kernels {
namespace arm { namespace arm {
class PowerCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> { class PowCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
public: public:
void Run() override; void Run() override;
virtual ~PowerCompute() = default; virtual ~PowCompute() = default;
}; };
} /* namespace arm */ } /* namespace arm */
......
...@@ -23,6 +23,9 @@ namespace paddle { ...@@ -23,6 +23,9 @@ namespace paddle {
namespace lite { namespace lite {
namespace operators { namespace operators {
/**
* @deprecated There is NO power op in paddle fluid
*/
class PowerOp : public OpLite { class PowerOp : public OpLite {
public: public:
PowerOp() {} PowerOp() {}
......
...@@ -2,7 +2,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM AND NOT LIT ...@@ -2,7 +2,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM AND NOT LIT
lite_cc_test(test_kernel_conv_compute SRCS conv_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_conv_compute SRCS conv_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_conv_transpose_compute SRCS conv_transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_conv_transpose_compute SRCS conv_transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_scale_compute SRCS scale_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_scale_compute SRCS scale_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_power_compute SRCS power_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_pow_compute SRCS pow_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_shuffle_channel_compute SRCS shuffle_channel_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_shuffle_channel_compute SRCS shuffle_channel_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_yolo_box_compute SRCS yolo_box_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_yolo_box_compute SRCS yolo_box_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_fc_compute SRCS fc_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_fc_compute SRCS fc_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
......
...@@ -20,23 +20,17 @@ ...@@ -20,23 +20,17 @@
namespace paddle { namespace paddle {
namespace lite { namespace lite {
class PowerComputeTester : public arena::TestCase { class PowComputeTester : public arena::TestCase {
protected: protected:
// common attributes for this op. // common attributes for this op.
std::string input_ = "X"; std::string input_ = "X";
std::string output_ = "Out"; std::string output_ = "Out";
float scale_ = 0.; float factor_ = 0.;
float shift_ = 0.;
float power_ = 0.;
DDim dims_{{5, 2}}; DDim dims_{{5, 2}};
public: public:
PowerComputeTester(const Place& place, PowComputeTester(const Place& place, const std::string& alias, float factor)
const std::string& alias, : TestCase(place, alias), factor_(factor) {}
float scale,
float shift,
float power)
: TestCase(place, alias), scale_(scale), shift_(shift), power_(power) {}
void RunBaseline(Scope* scope) override { void RunBaseline(Scope* scope) override {
auto* out = scope->NewTensor(output_); auto* out = scope->NewTensor(output_);
...@@ -48,17 +42,15 @@ class PowerComputeTester : public arena::TestCase { ...@@ -48,17 +42,15 @@ class PowerComputeTester : public arena::TestCase {
const auto* x_data = x->data<float>(); const auto* x_data = x->data<float>();
for (int i = 0; i < dims_.production(); i++) { for (int i = 0; i < dims_.production(); i++) {
out_data[i] = std::pow((x_data[i] * scale_ + shift_), power_); out_data[i] = std::pow(x_data[i], factor_);
} }
} }
void PrepareOpDesc(cpp::OpDesc* op_desc) { void PrepareOpDesc(cpp::OpDesc* op_desc) {
op_desc->SetType("power"); op_desc->SetType("pow");
op_desc->SetInput("X", {input_}); op_desc->SetInput("X", {input_});
op_desc->SetOutput("Out", {output_}); op_desc->SetOutput("Out", {output_});
op_desc->SetAttr("scale", scale_); op_desc->SetAttr("factor", factor_);
op_desc->SetAttr("shift", shift_);
op_desc->SetAttr("power", power_);
} }
void PrepareData() override { void PrepareData() override {
...@@ -72,12 +64,12 @@ class PowerComputeTester : public arena::TestCase { ...@@ -72,12 +64,12 @@ class PowerComputeTester : public arena::TestCase {
} }
}; };
void test_power(Place place) { void test_pow(Place place) {
for (float scale : {0.923, 2., 1.2}) { for (float scale : {0.923, 2., 1.2}) {
for (float shift : {1., 0., 1.2331}) { for (float shift : {1., 0., 1.2331}) {
for (float power : {1., 1.2, 1.6}) { for (float factor : {1., 1.2, 1.6}) {
std::unique_ptr<arena::TestCase> tester( std::unique_ptr<arena::TestCase> tester(
new PowerComputeTester(place, "def", scale, shift, power)); new PowComputeTester(place, "def", factor));
arena::Arena arena(std::move(tester), place, 2e-4); arena::Arena arena(std::move(tester), place, 2e-4);
arena.TestPrecision(); arena.TestPrecision();
} }
...@@ -85,13 +77,13 @@ void test_power(Place place) { ...@@ -85,13 +77,13 @@ void test_power(Place place) {
} }
} }
TEST(Power, precision) { TEST(Pow, precision) {
// #ifdef LITE_WITH_X86 // #ifdef LITE_WITH_X86
// Place place(TARGET(kX86)); // Place place(TARGET(kX86));
// #endif // #endif
#ifdef LITE_WITH_ARM #ifdef LITE_WITH_ARM
Place place(TARGET(kARM)); Place place(TARGET(kARM));
test_power(place); test_pow(place);
#endif #endif
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册