未验证 提交 a6f9e0c7 编写于 作者: M mapingshuo 提交者: GitHub

add square op for arm kernel (#3169)

* add backend

* add kernel

* add grad kernel
上级 80fb550c
......@@ -700,6 +700,35 @@ void act_rsqrt<float>(const float* din, float* dout, int size, int threads) {
}
}
template <>
void act_square<float>(const float* din, float* dout, int size, int threads) {
const float* ptr_in = din;
float* ptr_out = dout;
for (int i = 0; i < size; ++i) {
ptr_out[0] = ptr_in[0] * ptr_in[0];
ptr_in++;
ptr_out++;
}
}
#ifdef LITE_WITH_TRAIN
template <>
void act_square_grad(const float* din,
const float* dout_grad,
float* din_grad,
int size,
int threads) {
const float* ptr_out_grad = dout_grad;
float* ptr_in_grad = din_grad;
for (int i = 0; i < size; ++i) {
ptr_in_grad[0] = ptr_out_grad[0] * 2.0 * din[0];
ptr_out_grad++;
ptr_in_grad++;
din++;
}
}
#endif
} // namespace math
} // namespace arm
} // namespace lite
......
......@@ -69,6 +69,15 @@ void act_hard_sigmoid(const T* din,
template <typename T>
void act_rsqrt(const T* din, T* dout, int size, int threads);
template <typename T>
void act_square(const T* din, T* dout, int size, int threads);
#ifdef LITE_WITH_TRAIN
template <typename T>
void act_square_grad(
const T* din, const T* dout_grad, T* din_grad, int size, int threads);
#endif
} // namespace math
} // namespace arm
} // namespace lite
......
......@@ -108,6 +108,7 @@ add_kernel(lstm_arm ARM extra SRCS lstm_compute.cc DEPS ${lite_kernel_deps} math
add_kernel(mean_compute_arm ARM extra SRCS mean_compute.cc DEPS ${lite_kernel_deps} math_arm)
if(LITE_WITH_TRAIN)
add_kernel(mean_grad_compute_arm ARM extra SRCS mean_grad_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(activation_grad_compute_arm ARM basic SRCS activation_grad_compute.cc DEPS ${lite_kernel_deps} math_arm)
endif()
lite_cc_test(test_scale_compute_arm SRCS scale_compute_test.cc DEPS scale_compute_arm)
......
......@@ -169,6 +169,16 @@ void RsqrtCompute::Run() {
x_data, output_data, x_dims.production(), ctx.threads());
}
void SquareCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->template As<ARMContext>();
auto x_dims = param.X->dims();
auto x_data = param.X->data<float>();
auto output_data = param.Out->mutable_data<float>();
lite::arm::math::act_square<float>(
x_data, output_data, x_dims.production(), ctx.threads());
}
} // namespace arm
} // namespace kernels
} // namespace lite
......@@ -260,3 +270,8 @@ REGISTER_LITE_KERNEL(
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize();
REGISTER_LITE_KERNEL(
square, kARM, kFloat, kNCHW, paddle::lite::kernels::arm::SquareCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize();
......@@ -139,6 +139,15 @@ class RsqrtCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
virtual ~RsqrtCompute() = default;
};
class SquareCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;
void Run() override;
virtual ~SquareCompute() = default;
};
} // namespace arm
} // namespace kernels
} // namespace lite
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/arm/activation_grad_compute.h"
#include "lite/backends/arm/math/funcs.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
void SquareGradCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->template As<ARMContext>();
auto out_grad_dims = param.Out_grad->dims();
auto out_grad_data = param.Out_grad->data<float>();
auto x_data = param.X->data<float>();
auto x_grad_data = param.X_grad->mutable_data<float>();
lite::arm::math::act_square_grad<float>(x_data,
out_grad_data,
x_grad_data,
out_grad_dims.production(),
ctx.threads());
}
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(square_grad,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::SquareGradCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("Out@GRAD", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("X@GRAD", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
class SquareGradCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationGradParam;
void Run() override;
virtual ~SquareGradCompute() = default;
};
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
......@@ -143,6 +143,7 @@ add_operator(lstm_op extra SRCS lstm_op.cc DEPS ${op_DEPS})
add_operator(mean_op extra SRCS mean_op.cc DEPS ${op_DEPS})
if (LITE_WITH_TRAIN)
add_operator(mean_grad_op extra SRCS mean_grad_op.cc DEPS ${op_DEPS})
add_operator(activation_grad_ops basic SRCS activation_grad_ops.cc DEPS ${op_DEPS})
endif()
if (NOT LITE_WITH_X86)
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.i
#include "lite/operators/activation_grad_ops.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace operators {
bool ActivationGradOp::CheckShape() const {
CHECK_OR_FALSE(param_.X_grad);
CHECK_OR_FALSE(param_.Out_grad);
return true;
}
bool ActivationGradOp::InferShape() const {
param_.X_grad->Resize(param_.Out_grad->dims());
return true;
}
bool ActivationGradOp::AttachImpl(const cpp::OpDesc& opdesc,
lite::Scope* scope) {
auto Out_grad_name = opdesc.Input("Out@GRAD").front();
auto X_grad_name = opdesc.Output("X@GRAD").front();
param_.Out_grad = GetVar<lite::Tensor>(scope, Out_grad_name);
param_.X_grad = GetMutableVar<Tensor>(scope, X_grad_name);
if (opdesc.HasInput("X")) {
auto X_name = opdesc.Input("X").front();
param_.X = GetVar<lite::Tensor>(scope, X_name);
} else {
param_.X = param_.X_grad;
}
if (opdesc.HasInput("Out")) {
auto Out_name = opdesc.Input("Out").front();
param_.Out = GetVar<lite::Tensor>(scope, Out_name);
} else {
param_.Out = param_.Out_grad;
}
return true;
}
} // namespace operators
} // namespace lite
} // namespace paddle
REGISTER_LITE_OP(square_grad, paddle::lite::operators::ActivationGradOp);
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "lite/core/op_lite.h"
namespace paddle {
namespace lite {
namespace operators {
class ActivationGradOp : public OpLite {
public:
explicit ActivationGradOp(const std::string& type) : OpLite(type) {}
bool CheckShape() const override;
bool InferShape() const override;
bool AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) override;
void AttachKernel(KernelBase* kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "activation_grad_op"; }
private:
mutable operators::ActivationGradParam param_;
};
} // namespace operators
} // namespace lite
} // namespace paddle
......@@ -78,46 +78,6 @@ bool ActivationOp::AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) {
return true;
}
// #ifdef LITE_WITH_TRAIN
// bool ActivationGradOp::CheckShape() const {
// CHECK_OR_FALSE(param_.X_grad);
// CHECK_OR_FALSE(param_.Out_grad);
// return true;
// }
// bool ActivationGradOp::InferShape() const {
// param_.X_grad->Resize(param_.Out_grad->dims());
// return true;
// }
// bool ActivationGradOp::AttachImpl(const cpp::OpDesc& opdesc,
// lite::Scope* scope) {
// auto Out_grad_name = opdesc.Input(framework::GradVarName("Out")).front();
// auto X_grad_name = opdesc.Output(framework::GradVarName("X")).front();
// param_.Out_grad = GetVar<lite::Tensor>(scope, Out_grad_name);
// param_.X_grad = GetMutableVar<Tensor>(scope, X_grad_name);
// if (opdesc.HasInput("X")) {
// auto X_name = opdesc.Input("X").front();
// param_.X = GetVar<lite::Tensor>(scope, X_name);
// } else {
// param_.X = param_.X_grad;
// }
// if (opdesc.HasInput("Out")) {
// auto Out_name = opdesc.Input("Out").front();
// param_.Out = GetVar<lite::Tensor>(scope, Out_name);
// } else {
// param_.Out = param_.Out_grad;
// }
// return true;
// }
// #endif
} // namespace operators
} // namespace lite
} // namespace paddle
......@@ -138,7 +98,3 @@ REGISTER_LITE_OP(sqrt, paddle::lite::operators::ActivationOp);
REGISTER_LITE_OP(rsqrt, paddle::lite::operators::ActivationOp);
REGISTER_LITE_OP(softsign, paddle::lite::operators::ActivationOp);
REGISTER_LITE_OP(gelu, paddle::lite::operators::ActivationOp);
// #ifdef LITE_WITH_TRAIN
// REGISTER_LITE_OP(square_grad, paddle::lite::operators::ActivationGradOp);
// #endif
......@@ -38,27 +38,6 @@ class ActivationOp : public OpLite {
mutable operators::ActivationParam param_;
};
// #ifdef LITE_WITH_TRAIN
// class ActivationGradOp : public OpLite {
// public:
// explicit ActivationGradOp(const std::string& type) : OpLite(type) {}
// bool CheckShape() const override;
// bool InferShape() const override;
// bool AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) override;
// void AttachKernel(KernelBase* kernel) override { kernel->SetParam(param_);
// }
// std::string DebugString() const override { return "activation_grad_op"; }
// private:
// mutable operators::ActivationGradParam param_;
// };
// #endif
} // namespace operators
} // namespace lite
} // namespace paddle
......@@ -60,8 +60,12 @@ if(LITE_BUILD_EXTRA)
lite_cc_test(test_kernel_lookup_table_compute SRCS lookup_table_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_lookup_table_dequant_compute SRCS lookup_table_dequant_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_gather_compute SRCS gather_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
# for training kernel
lite_cc_test(test_kernel_mean_compute SRCS mean_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
if (LITE_WITH_TRAIN)
lite_cc_test(test_kernel_mean_compute SRCS mean_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_activation_grad_compute SRCS activation_grad_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
endif()
endif()
lite_cc_test(test_kernel_pad2d_compute SRCS pad2d_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
......
......@@ -35,7 +35,8 @@ enum activation_type_test {
EXP,
FLOOR,
RSQRT,
GELU
GELU,
SQUARE
};
class ActivationComputeTester : public arena::TestCase {
......@@ -192,6 +193,12 @@ class ActivationComputeTester : public arena::TestCase {
}
break;
}
case SQUARE: {
for (int i = 0; i < dims_.production(); i++) {
output_data[i] = x_data[i] * x_data[i];
}
break;
}
default:
LOG(INFO) << "the type of activation is unknow.";
}
......@@ -632,6 +639,33 @@ TEST(Activation_rsqrt, precision) {
#endif
}
TEST(Activation_square, precision) {
LOG(INFO) << "test square op";
#ifdef LITE_WITH_ARM
Place place(TARGET(kARM));
for (auto n : {2}) {
for (auto c : {2}) {
for (auto h : {2}) {
for (auto w : {2}) {
std::unique_ptr<arena::TestCase> tester(new ActivationComputeTester(
place,
"def",
0.01,
6.,
"all",
0.,
DDim(std::vector<int64_t>({n, c, h, w})),
"square",
SQUARE));
arena::Arena arena(std::move(tester), place, 2e-5);
arena.TestPrecision();
}
}
}
}
#endif
}
TEST(Activation_gelu, precision) {
LOG(INFO) << "test gelu op";
Place place;
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/arm/activation_grad_compute.h"
#include <gtest/gtest.h>
#include "lite/core/op_registry.h"
#include "lite/kernels/arm/activation_compute.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
using param_t = operators::ActivationParam;
using grad_param_t = operators::ActivationGradParam;
using kernel_t = SquareCompute;
using grad_kernel_t = SquareGradCompute;
class ActivationGradTester {
public:
explicit ActivationGradTester(DDim dims) : dims_(dims) {}
void prepare_kernel() {
std::unique_ptr<KernelContext> ctx1(new KernelContext);
ctx1->As<ARMContext>();
kernel_.SetContext(std::move(ctx1));
std::unique_ptr<KernelContext> ctx2(new KernelContext);
ctx2->As<ARMContext>();
delta_kernel_.SetContext(std::move(ctx2));
std::unique_ptr<KernelContext> ctx3(new KernelContext);
ctx3->As<ARMContext>();
grad_kernel_.SetContext(std::move(ctx3));
}
void run_forward(param_t* param,
kernel_t* kernel,
const std::vector<float>& in_vec,
float* out_vec) {
Tensor x;
Tensor output;
x.Resize(dims_);
output.Resize(dims_);
auto* x_data = x.mutable_data<float>();
for (int i = 0; i < dims_.production(); i++) {
x_data[i] = in_vec[i];
}
param->X = &x;
param->Out = &output;
kernel->SetParam(*param);
kernel->Launch();
auto* output_data = output.mutable_data<float>();
for (int i = 0; i < dims_.production(); i++) {
out_vec[i] = output_data[i];
}
}
void run_backward(grad_param_t* param,
grad_kernel_t* kernel,
const std::vector<float>& in_vec,
const std::vector<float>& out_grad_vec,
float* in_grad_vec) {
Tensor x;
Tensor x_grad;
Tensor out_grad;
x.Resize(dims_);
x_grad.Resize(dims_);
out_grad.Resize(dims_);
auto* x_data = x.mutable_data<float>();
auto* out_grad_data = out_grad.mutable_data<float>();
for (int i = 0; i < dims_.production(); i++) {
x_data[i] = in_vec[i];
out_grad_data[i] = out_grad_vec[i];
}
param->X = &x;
param->X_grad = &x_grad;
param->Out_grad = &out_grad;
kernel->SetParam(*param);
kernel->Launch();
auto* x_grad_data = x_grad.mutable_data<float>();
for (int i = 0; i < dims_.production(); i++) {
in_grad_vec[i] = x_grad_data[i];
}
}
void check_grad(float delta, float max_grad_delta) {
std::vector<float> x(dims_.production());
std::vector<float> out(dims_.production());
for (int i = 0; i < dims_.production(); i++) {
x[i] = 1.0 * static_cast<float>(i % 128) * 0.3f - 1.1;
}
this->run_forward(&param_, &kernel_, x, out.data());
std::vector<float> x_delta(dims_.production());
std::vector<float> out_delta(dims_.production());
for (int i = 0; i < dims_.production(); i++) {
x_delta[i] = x[i] + delta;
}
this->run_forward(&delta_param_, &delta_kernel_, x_delta, out_delta.data());
std::vector<float> out_grad(dims_.production());
std::vector<float> x_grad(dims_.production());
for (int i = 0; i < dims_.production(); i++) {
out_grad[i] = 1.0;
}
this->run_backward(&grad_param_, &grad_kernel_, x, out_grad, x_grad.data());
for (int i = 0; i < dims_.production(); i++) {
EXPECT_NEAR(x_grad[i], (out_delta[i] - out[i]) / delta, max_grad_delta);
}
}
private:
DDim dims_;
kernel_t kernel_;
kernel_t delta_kernel_;
grad_kernel_t grad_kernel_;
param_t param_;
param_t delta_param_;
grad_param_t grad_param_;
};
void TestNormalCase(DDim dims) {
std::unique_ptr<ActivationGradTester> tester(new ActivationGradTester(dims));
tester->prepare_kernel();
float delta = 0.001;
float max_grad_delta = 0.005;
tester->check_grad(delta, max_grad_delta);
}
TEST(activation_grad_arm, compute) {
LOG(INFO) << "Test Square grad";
DeviceInfo::Init();
for (auto n : {2}) {
for (auto c : {2}) {
for (auto h : {2}) {
for (auto w : {2}) {
TestNormalCase(DDim(std::vector<int64_t>({n, c, h, w})));
}
}
}
}
}
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_KERNEL(square, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(square_grad, kARM, kFloat, kNCHW, def);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册