未验证 提交 845e80d9 编写于 作者: M mapingshuo 提交者: GitHub

add Act grad (#3923)

add act grad ops
上级 f4c04186
......@@ -763,24 +763,6 @@ void act_thresholded_relu<float>(
}
}
#ifdef LITE_WITH_TRAIN
template <>
void act_square_grad(const float* din,
const float* dout_grad,
float* din_grad,
int size,
int threads) {
const float* ptr_out_grad = dout_grad;
float* ptr_in_grad = din_grad;
for (int i = 0; i < size; ++i) {
ptr_in_grad[0] = ptr_out_grad[0] * 2.0 * din[0];
ptr_out_grad++;
ptr_in_grad++;
din++;
}
}
#endif
} // namespace math
} // namespace arm
} // namespace lite
......
......@@ -90,12 +90,6 @@ template <typename T>
void act_thresholded_relu(
const T* din, T* dout, int size, float threshold, int threads);
#ifdef LITE_WITH_TRAIN
template <typename T>
void act_square_grad(
const T* din, const T* dout_grad, T* din_grad, int size, int threads);
#endif
} // namespace math
} // namespace arm
} // namespace lite
......
......@@ -103,7 +103,6 @@ add_kernel(deformable_conv_compute_arm ARM extra SRCS deformable_conv_compute.cc
add_kernel(mean_compute_arm ARM extra SRCS mean_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(mean_grad_compute_arm ARM train SRCS mean_grad_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(activation_grad_compute_arm ARM train SRCS activation_grad_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(elementwise_grad_compute_arm ARM train SRCS elementwise_grad_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(mul_grad_compute_arm ARM train SRCS mul_grad_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(sgd_compute_arm ARM train SRCS sgd_compute.cc DEPS ${lite_kernel_deps} math_arm)
......
......@@ -18,6 +18,7 @@ add_kernel(read_from_array_compute_host Host extra SRCS read_from_array_compute.
add_kernel(assign_compute_host Host extra SRCS assign_compute.cc DEPS ${lite_kernel_deps})
add_kernel(retinanet_detection_output_compute_host Host extra SRCS retinanet_detection_output_compute.cc DEPS ${lite_kernel_deps})
add_kernel(where_index_compute_host Host extra SRCS where_index_compute.cc DEPS ${lite_kernel_deps})
add_kernel(activation_grad_compute_host Host train SRCS activation_grad_compute.cc DEPS ${lite_kernel_deps})
if(LITE_BUILD_EXTRA)
lite_cc_test(test_where_index_compute_host SRCS where_index_compute.cc DEPS where_index_compute_host)
......
......@@ -12,41 +12,87 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/arm/activation_grad_compute.h"
#include "lite/backends/arm/math/funcs.h"
#include "lite/kernels/host/activation_grad_compute.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
namespace host {
void SquareGradCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->template As<ARMContext>();
CHECK(param.X);
auto out_grad_dims = param.Out_grad->dims();
auto out_grad_data = param.Out_grad->data<float>();
auto x_data = param.X->data<float>();
auto x_grad_data = param.X_grad->mutable_data<float>();
lite::arm::math::act_square_grad<float>(x_data,
out_grad_data,
x_grad_data,
out_grad_dims.production(),
ctx.threads());
for (int i = 0; i < out_grad_dims.production(); i++) {
x_grad_data[i] = out_grad_data[i] * 2.0 * x_data[i];
}
}
} // namespace arm
void ReluGradCompute::Run() {
auto& param = this->Param<param_t>();
CHECK(param.X);
auto out_grad_dims = param.Out_grad->dims();
auto out_grad_data = param.Out_grad->data<float>();
auto x_data = param.X->data<float>();
auto x_grad_data = param.X_grad->mutable_data<float>();
for (int i = 0; i < out_grad_dims.production(); i++) {
x_grad_data[i] = x_data[i] > 0 ? out_grad_data[i] : 0.0;
}
}
void TanhGradCompute::Run() {
auto& param = this->Param<param_t>();
CHECK(param.Out);
auto out_grad_dims = param.Out_grad->dims();
auto out_grad_data = param.Out_grad->data<float>();
auto out_data = param.Out->data<float>();
auto x_grad_data = param.X_grad->mutable_data<float>();
for (int i = 0; i < out_grad_dims.production(); i++) {
x_grad_data[i] = out_grad_data[i] *
(static_cast<float>(1.0) - out_data[i] * out_data[i]);
}
}
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(square_grad,
kARM,
kHost,
kFloat,
kNCHW,
paddle::lite::kernels::host::SquareGradCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost))})
.BindInput("Out@GRAD", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("X@GRAD", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize();
REGISTER_LITE_KERNEL(relu_grad,
kHost,
kFloat,
kNCHW,
paddle::lite::kernels::host::SquareGradCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost))})
.BindInput("Out@GRAD", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("X@GRAD", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize();
REGISTER_LITE_KERNEL(tanh_grad,
kHost,
kFloat,
kNCHW,
paddle::lite::kernels::arm::SquareGradCompute,
paddle::lite::kernels::host::SquareGradCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("Out@GRAD", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("X@GRAD", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("Out", {LiteType::GetTensorTy(TARGET(kHost))})
.BindInput("Out@GRAD", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("X@GRAD", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize();
......@@ -20,9 +20,9 @@
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
namespace host {
class SquareGradCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
class SquareGradCompute : public KernelLite<TARGET(kHost), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationGradParam;
......@@ -31,7 +31,25 @@ class SquareGradCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
virtual ~SquareGradCompute() = default;
};
} // namespace arm
class ReluGradCompute : public KernelLite<TARGET(kHost), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationGradParam;
void Run() override;
virtual ~ReluGradCompute() = default;
};
class TanhGradCompute : public KernelLite<TARGET(kHost), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationGradParam;
void Run() override;
virtual ~TanhGradCompute() = default;
};
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
......@@ -41,15 +41,11 @@ bool ActivationGradOp::AttachImpl(const cpp::OpDesc& opdesc,
if (opdesc.HasInput("X")) {
auto X_name = opdesc.Input("X").front();
param_.X = GetVar<lite::Tensor>(scope, X_name);
} else {
param_.X = param_.X_grad;
}
if (opdesc.HasInput("Out")) {
auto Out_name = opdesc.Input("Out").front();
param_.Out = GetVar<lite::Tensor>(scope, Out_name);
} else {
param_.Out = param_.Out_grad;
}
return true;
......@@ -60,3 +56,5 @@ bool ActivationGradOp::AttachImpl(const cpp::OpDesc& opdesc,
} // namespace paddle
REGISTER_LITE_OP(square_grad, paddle::lite::operators::ActivationGradOp);
REGISTER_LITE_OP(relu_grad, paddle::lite::operators::ActivationGradOp);
REGISTER_LITE_OP(tanh_grad, paddle::lite::operators::ActivationGradOp);
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/arm/activation_grad_compute.h"
#include "lite/kernels/host/activation_grad_compute.h"
#include <gtest/gtest.h>
#include "lite/core/op_registry.h"
#include "lite/kernels/arm/activation_compute.h"
......@@ -20,13 +20,11 @@
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
using param_t = operators::ActivationParam;
using grad_param_t = operators::ActivationGradParam;
using kernel_t = SquareCompute;
using grad_kernel_t = SquareGradCompute;
template <class kernel_t, class grad_kernel_t>
class ActivationGradTester {
public:
explicit ActivationGradTester(DDim dims) : dims_(dims) {}
......@@ -71,22 +69,28 @@ class ActivationGradTester {
void run_backward(grad_param_t* param,
grad_kernel_t* kernel,
const std::vector<float>& in_vec,
const std::vector<float>& out_vec,
const std::vector<float>& out_grad_vec,
float* in_grad_vec) {
Tensor x;
Tensor out;
Tensor x_grad;
Tensor out_grad;
x.Resize(dims_);
out.Resize(dims_);
x_grad.Resize(dims_);
out_grad.Resize(dims_);
auto* x_data = x.mutable_data<float>();
auto* out_data = out.mutable_data<float>();
auto* out_grad_data = out_grad.mutable_data<float>();
for (int i = 0; i < dims_.production(); i++) {
x_data[i] = in_vec[i];
out_data[i] = out_vec[i];
out_grad_data[i] = out_grad_vec[i];
}
param->X = &x;
param->Out = &out;
param->X_grad = &x_grad;
param->Out_grad = &out_grad;
kernel->SetParam(*param);
......@@ -102,7 +106,9 @@ class ActivationGradTester {
std::vector<float> x(dims_.production());
std::vector<float> out(dims_.production());
for (int i = 0; i < dims_.production(); i++) {
x[i] = 1.0 * static_cast<float>(i % 128) * 0.3f - 1.1;
x[i] = static_cast<float>(i % 3 - 2.0) / 2.0 * 0.333 +
static_cast<float>(i % 19 - 10.0) / 10.0 * 0.333 +
static_cast<float>(i % 39 - 20.0) / 20.0 * 0.333 + 0.001213;
}
this->run_forward(&param_, &kernel_, x, out.data());
......@@ -120,7 +126,8 @@ class ActivationGradTester {
for (int i = 0; i < dims_.production(); i++) {
out_grad[i] = 1.0;
}
this->run_backward(&grad_param_, &grad_kernel_, x, out_grad, x_grad.data());
this->run_backward(
&grad_param_, &grad_kernel_, x, out, out_grad, x_grad.data());
for (int i = 0; i < dims_.production(); i++) {
EXPECT_NEAR(x_grad[i], (out_delta[i] - out[i]) / delta, max_grad_delta);
......@@ -137,31 +144,58 @@ class ActivationGradTester {
grad_param_t grad_param_;
};
void TestNormalCase(DDim dims) {
std::unique_ptr<ActivationGradTester> tester(new ActivationGradTester(dims));
void TestSquareGrad(DDim dims) {
LOG(INFO) << "Test Square grad";
std::unique_ptr<
ActivationGradTester<arm::SquareCompute, host::SquareGradCompute>>
tester(
new ActivationGradTester<arm::SquareCompute, host::SquareGradCompute>(
dims));
tester->prepare_kernel();
float delta = 0.001;
float max_grad_delta = 0.005;
tester->check_grad(delta, max_grad_delta);
}
TEST(activation_grad_arm, compute) {
LOG(INFO) << "Test Square grad";
void TestReluGrad(DDim dims) {
LOG(INFO) << "Test Relu grad";
std::unique_ptr<ActivationGradTester<arm::ReluCompute, host::ReluGradCompute>>
tester(new ActivationGradTester<arm::ReluCompute, host::ReluGradCompute>(
dims));
tester->prepare_kernel();
float delta = 0.001;
float max_grad_delta = 0.005;
tester->check_grad(delta, max_grad_delta);
}
void TestTanhGrad(DDim dims) {
LOG(INFO) << "Test Tanh grad";
std::unique_ptr<ActivationGradTester<arm::TanhCompute, host::TanhGradCompute>>
tester(new ActivationGradTester<arm::TanhCompute, host::TanhGradCompute>(
dims));
tester->prepare_kernel();
float delta = 0.001;
float max_grad_delta = 0.005;
tester->check_grad(delta, max_grad_delta);
}
TEST(activation_grad_host, compute) {
DeviceInfo::Init();
for (auto n : {2}) {
for (auto c : {2}) {
for (auto h : {2}) {
for (auto w : {2}) {
TestNormalCase(DDim(std::vector<int64_t>({n, c, h, w})));
for (auto n : {2, 1}) {
for (auto c : {2, 9}) {
for (auto h : {2, 1}) {
for (auto w : {2, 10}) {
TestSquareGrad(DDim(std::vector<int64_t>({n, c, h, w})));
TestReluGrad(DDim(std::vector<int64_t>({n, c, h, w})));
TestTanhGrad(DDim(std::vector<int64_t>({n, c, h, w})));
}
}
}
}
}
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_KERNEL(square, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(square_grad, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(square_grad, kHost, kFloat, kNCHW, def);
......@@ -215,18 +215,6 @@ class ElementwiseAddGradTester {
fill_data_rand(y.data(), -1.f, 1.f, y_dims_.production());
this->run_forward(&param_, &kernel_, x, y, out.data());
for (int i = 0; i < x_dims_.production(); i++) {
LOG(INFO) << "x_" << i << ": " << x[i];
}
for (int i = 0; i < y_dims_.production(); i++) {
LOG(INFO) << "y_" << i << ": " << y[i];
}
for (int i = 0; i < out_dims_.production(); i++) {
LOG(INFO) << "out_" << i << ": " << out[i];
}
// backward
std::vector<float> out_grad(out_dims_.production());
std::vector<float> x_grad(x_dims_.production());
......@@ -242,14 +230,6 @@ class ElementwiseAddGradTester {
x_grad.data(),
y_grad.data());
for (int i = 0; i < x_grad.size(); i++) {
LOG(INFO) << "x_grad_" << i << ": " << x_grad[i];
}
for (int i = 0; i < y_grad.size(); i++) {
LOG(INFO) << "y_grad_" << i << ": " << y_grad[i];
}
// get numeric gradient
std::vector<float> x_delta(x_dims_.production());
std::vector<float> y_delta(y_dims_.production());
......@@ -443,18 +423,6 @@ class ElementwiseSubGradTester {
fill_data_rand(y.data(), -1.f, 1.f, y_dims_.production());
this->run_forward(&param_, &kernel_, x, y, out.data());
for (int i = 0; i < x_dims_.production(); i++) {
LOG(INFO) << "x_" << i << ": " << x[i];
}
for (int i = 0; i < y_dims_.production(); i++) {
LOG(INFO) << "y_" << i << ": " << y[i];
}
for (int i = 0; i < out_dims_.production(); i++) {
LOG(INFO) << "out_" << i << ": " << out[i];
}
// backward
std::vector<float> out_grad(out_dims_.production());
std::vector<float> x_grad(x_dims_.production());
......@@ -470,14 +438,6 @@ class ElementwiseSubGradTester {
x_grad.data(),
y_grad.data());
for (int i = 0; i < x_grad.size(); i++) {
LOG(INFO) << "x_grad_" << i << ": " << x_grad[i];
}
for (int i = 0; i < y_grad.size(); i++) {
LOG(INFO) << "y_grad_" << i << ": " << y_grad[i];
}
// get numeric gradient
std::vector<float> x_delta(x_dims_.production());
std::vector<float> y_delta(y_dims_.production());
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册