未验证 提交 845e80d9 编写于 作者: M mapingshuo 提交者: GitHub

add Act grad (#3923)

add act grad ops
上级 f4c04186
...@@ -763,24 +763,6 @@ void act_thresholded_relu<float>( ...@@ -763,24 +763,6 @@ void act_thresholded_relu<float>(
} }
} }
#ifdef LITE_WITH_TRAIN
template <>
void act_square_grad(const float* din,
const float* dout_grad,
float* din_grad,
int size,
int threads) {
const float* ptr_out_grad = dout_grad;
float* ptr_in_grad = din_grad;
for (int i = 0; i < size; ++i) {
ptr_in_grad[0] = ptr_out_grad[0] * 2.0 * din[0];
ptr_out_grad++;
ptr_in_grad++;
din++;
}
}
#endif
} // namespace math } // namespace math
} // namespace arm } // namespace arm
} // namespace lite } // namespace lite
......
...@@ -90,12 +90,6 @@ template <typename T> ...@@ -90,12 +90,6 @@ template <typename T>
void act_thresholded_relu( void act_thresholded_relu(
const T* din, T* dout, int size, float threshold, int threads); const T* din, T* dout, int size, float threshold, int threads);
#ifdef LITE_WITH_TRAIN
template <typename T>
void act_square_grad(
const T* din, const T* dout_grad, T* din_grad, int size, int threads);
#endif
} // namespace math } // namespace math
} // namespace arm } // namespace arm
} // namespace lite } // namespace lite
......
...@@ -103,7 +103,6 @@ add_kernel(deformable_conv_compute_arm ARM extra SRCS deformable_conv_compute.cc ...@@ -103,7 +103,6 @@ add_kernel(deformable_conv_compute_arm ARM extra SRCS deformable_conv_compute.cc
add_kernel(mean_compute_arm ARM extra SRCS mean_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(mean_compute_arm ARM extra SRCS mean_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(mean_grad_compute_arm ARM train SRCS mean_grad_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(mean_grad_compute_arm ARM train SRCS mean_grad_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(activation_grad_compute_arm ARM train SRCS activation_grad_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(elementwise_grad_compute_arm ARM train SRCS elementwise_grad_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(elementwise_grad_compute_arm ARM train SRCS elementwise_grad_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(mul_grad_compute_arm ARM train SRCS mul_grad_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(mul_grad_compute_arm ARM train SRCS mul_grad_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(sgd_compute_arm ARM train SRCS sgd_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(sgd_compute_arm ARM train SRCS sgd_compute.cc DEPS ${lite_kernel_deps} math_arm)
......
...@@ -18,6 +18,7 @@ add_kernel(read_from_array_compute_host Host extra SRCS read_from_array_compute. ...@@ -18,6 +18,7 @@ add_kernel(read_from_array_compute_host Host extra SRCS read_from_array_compute.
add_kernel(assign_compute_host Host extra SRCS assign_compute.cc DEPS ${lite_kernel_deps}) add_kernel(assign_compute_host Host extra SRCS assign_compute.cc DEPS ${lite_kernel_deps})
add_kernel(retinanet_detection_output_compute_host Host extra SRCS retinanet_detection_output_compute.cc DEPS ${lite_kernel_deps}) add_kernel(retinanet_detection_output_compute_host Host extra SRCS retinanet_detection_output_compute.cc DEPS ${lite_kernel_deps})
add_kernel(where_index_compute_host Host extra SRCS where_index_compute.cc DEPS ${lite_kernel_deps}) add_kernel(where_index_compute_host Host extra SRCS where_index_compute.cc DEPS ${lite_kernel_deps})
add_kernel(activation_grad_compute_host Host train SRCS activation_grad_compute.cc DEPS ${lite_kernel_deps})
if(LITE_BUILD_EXTRA) if(LITE_BUILD_EXTRA)
lite_cc_test(test_where_index_compute_host SRCS where_index_compute.cc DEPS where_index_compute_host) lite_cc_test(test_where_index_compute_host SRCS where_index_compute.cc DEPS where_index_compute_host)
......
...@@ -12,41 +12,87 @@ ...@@ -12,41 +12,87 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "lite/kernels/arm/activation_grad_compute.h" #include "lite/kernels/host/activation_grad_compute.h"
#include "lite/backends/arm/math/funcs.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
namespace kernels { namespace kernels {
namespace arm { namespace host {
void SquareGradCompute::Run() { void SquareGradCompute::Run() {
auto& param = this->Param<param_t>(); auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->template As<ARMContext>(); CHECK(param.X);
auto out_grad_dims = param.Out_grad->dims(); auto out_grad_dims = param.Out_grad->dims();
auto out_grad_data = param.Out_grad->data<float>(); auto out_grad_data = param.Out_grad->data<float>();
auto x_data = param.X->data<float>(); auto x_data = param.X->data<float>();
auto x_grad_data = param.X_grad->mutable_data<float>(); auto x_grad_data = param.X_grad->mutable_data<float>();
lite::arm::math::act_square_grad<float>(x_data, for (int i = 0; i < out_grad_dims.production(); i++) {
out_grad_data, x_grad_data[i] = out_grad_data[i] * 2.0 * x_data[i];
x_grad_data, }
out_grad_dims.production(),
ctx.threads());
} }
} // namespace arm void ReluGradCompute::Run() {
auto& param = this->Param<param_t>();
CHECK(param.X);
auto out_grad_dims = param.Out_grad->dims();
auto out_grad_data = param.Out_grad->data<float>();
auto x_data = param.X->data<float>();
auto x_grad_data = param.X_grad->mutable_data<float>();
for (int i = 0; i < out_grad_dims.production(); i++) {
x_grad_data[i] = x_data[i] > 0 ? out_grad_data[i] : 0.0;
}
}
void TanhGradCompute::Run() {
auto& param = this->Param<param_t>();
CHECK(param.Out);
auto out_grad_dims = param.Out_grad->dims();
auto out_grad_data = param.Out_grad->data<float>();
auto out_data = param.Out->data<float>();
auto x_grad_data = param.X_grad->mutable_data<float>();
for (int i = 0; i < out_grad_dims.production(); i++) {
x_grad_data[i] = out_grad_data[i] *
(static_cast<float>(1.0) - out_data[i] * out_data[i]);
}
}
} // namespace host
} // namespace kernels } // namespace kernels
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
REGISTER_LITE_KERNEL(square_grad, REGISTER_LITE_KERNEL(square_grad,
kARM, kHost,
kFloat,
kNCHW,
paddle::lite::kernels::host::SquareGradCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost))})
.BindInput("Out@GRAD", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("X@GRAD", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize();
REGISTER_LITE_KERNEL(relu_grad,
kHost,
kFloat,
kNCHW,
paddle::lite::kernels::host::SquareGradCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost))})
.BindInput("Out@GRAD", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("X@GRAD", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize();
REGISTER_LITE_KERNEL(tanh_grad,
kHost,
kFloat, kFloat,
kNCHW, kNCHW,
paddle::lite::kernels::arm::SquareGradCompute, paddle::lite::kernels::host::SquareGradCompute,
def) def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) .BindInput("Out", {LiteType::GetTensorTy(TARGET(kHost))})
.BindInput("Out@GRAD", {LiteType::GetTensorTy(TARGET(kARM))}) .BindInput("Out@GRAD", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("X@GRAD", {LiteType::GetTensorTy(TARGET(kARM))}) .BindOutput("X@GRAD", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize(); .Finalize();
...@@ -20,9 +20,9 @@ ...@@ -20,9 +20,9 @@
namespace paddle { namespace paddle {
namespace lite { namespace lite {
namespace kernels { namespace kernels {
namespace arm { namespace host {
class SquareGradCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> { class SquareGradCompute : public KernelLite<TARGET(kHost), PRECISION(kFloat)> {
public: public:
using param_t = operators::ActivationGradParam; using param_t = operators::ActivationGradParam;
...@@ -31,7 +31,25 @@ class SquareGradCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> { ...@@ -31,7 +31,25 @@ class SquareGradCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
virtual ~SquareGradCompute() = default; virtual ~SquareGradCompute() = default;
}; };
} // namespace arm class ReluGradCompute : public KernelLite<TARGET(kHost), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationGradParam;
void Run() override;
virtual ~ReluGradCompute() = default;
};
class TanhGradCompute : public KernelLite<TARGET(kHost), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationGradParam;
void Run() override;
virtual ~TanhGradCompute() = default;
};
} // namespace host
} // namespace kernels } // namespace kernels
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
...@@ -41,15 +41,11 @@ bool ActivationGradOp::AttachImpl(const cpp::OpDesc& opdesc, ...@@ -41,15 +41,11 @@ bool ActivationGradOp::AttachImpl(const cpp::OpDesc& opdesc,
if (opdesc.HasInput("X")) { if (opdesc.HasInput("X")) {
auto X_name = opdesc.Input("X").front(); auto X_name = opdesc.Input("X").front();
param_.X = GetVar<lite::Tensor>(scope, X_name); param_.X = GetVar<lite::Tensor>(scope, X_name);
} else {
param_.X = param_.X_grad;
} }
if (opdesc.HasInput("Out")) { if (opdesc.HasInput("Out")) {
auto Out_name = opdesc.Input("Out").front(); auto Out_name = opdesc.Input("Out").front();
param_.Out = GetVar<lite::Tensor>(scope, Out_name); param_.Out = GetVar<lite::Tensor>(scope, Out_name);
} else {
param_.Out = param_.Out_grad;
} }
return true; return true;
...@@ -60,3 +56,5 @@ bool ActivationGradOp::AttachImpl(const cpp::OpDesc& opdesc, ...@@ -60,3 +56,5 @@ bool ActivationGradOp::AttachImpl(const cpp::OpDesc& opdesc,
} // namespace paddle } // namespace paddle
REGISTER_LITE_OP(square_grad, paddle::lite::operators::ActivationGradOp); REGISTER_LITE_OP(square_grad, paddle::lite::operators::ActivationGradOp);
REGISTER_LITE_OP(relu_grad, paddle::lite::operators::ActivationGradOp);
REGISTER_LITE_OP(tanh_grad, paddle::lite::operators::ActivationGradOp);
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "lite/kernels/arm/activation_grad_compute.h" #include "lite/kernels/host/activation_grad_compute.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "lite/core/op_registry.h" #include "lite/core/op_registry.h"
#include "lite/kernels/arm/activation_compute.h" #include "lite/kernels/arm/activation_compute.h"
...@@ -20,13 +20,11 @@ ...@@ -20,13 +20,11 @@
namespace paddle { namespace paddle {
namespace lite { namespace lite {
namespace kernels { namespace kernels {
namespace arm {
using param_t = operators::ActivationParam; using param_t = operators::ActivationParam;
using grad_param_t = operators::ActivationGradParam; using grad_param_t = operators::ActivationGradParam;
using kernel_t = SquareCompute;
using grad_kernel_t = SquareGradCompute;
template <class kernel_t, class grad_kernel_t>
class ActivationGradTester { class ActivationGradTester {
public: public:
explicit ActivationGradTester(DDim dims) : dims_(dims) {} explicit ActivationGradTester(DDim dims) : dims_(dims) {}
...@@ -71,22 +69,28 @@ class ActivationGradTester { ...@@ -71,22 +69,28 @@ class ActivationGradTester {
void run_backward(grad_param_t* param, void run_backward(grad_param_t* param,
grad_kernel_t* kernel, grad_kernel_t* kernel,
const std::vector<float>& in_vec, const std::vector<float>& in_vec,
const std::vector<float>& out_vec,
const std::vector<float>& out_grad_vec, const std::vector<float>& out_grad_vec,
float* in_grad_vec) { float* in_grad_vec) {
Tensor x; Tensor x;
Tensor out;
Tensor x_grad; Tensor x_grad;
Tensor out_grad; Tensor out_grad;
x.Resize(dims_); x.Resize(dims_);
out.Resize(dims_);
x_grad.Resize(dims_); x_grad.Resize(dims_);
out_grad.Resize(dims_); out_grad.Resize(dims_);
auto* x_data = x.mutable_data<float>(); auto* x_data = x.mutable_data<float>();
auto* out_data = out.mutable_data<float>();
auto* out_grad_data = out_grad.mutable_data<float>(); auto* out_grad_data = out_grad.mutable_data<float>();
for (int i = 0; i < dims_.production(); i++) { for (int i = 0; i < dims_.production(); i++) {
x_data[i] = in_vec[i]; x_data[i] = in_vec[i];
out_data[i] = out_vec[i];
out_grad_data[i] = out_grad_vec[i]; out_grad_data[i] = out_grad_vec[i];
} }
param->X = &x; param->X = &x;
param->Out = &out;
param->X_grad = &x_grad; param->X_grad = &x_grad;
param->Out_grad = &out_grad; param->Out_grad = &out_grad;
kernel->SetParam(*param); kernel->SetParam(*param);
...@@ -102,7 +106,9 @@ class ActivationGradTester { ...@@ -102,7 +106,9 @@ class ActivationGradTester {
std::vector<float> x(dims_.production()); std::vector<float> x(dims_.production());
std::vector<float> out(dims_.production()); std::vector<float> out(dims_.production());
for (int i = 0; i < dims_.production(); i++) { for (int i = 0; i < dims_.production(); i++) {
x[i] = 1.0 * static_cast<float>(i % 128) * 0.3f - 1.1; x[i] = static_cast<float>(i % 3 - 2.0) / 2.0 * 0.333 +
static_cast<float>(i % 19 - 10.0) / 10.0 * 0.333 +
static_cast<float>(i % 39 - 20.0) / 20.0 * 0.333 + 0.001213;
} }
this->run_forward(&param_, &kernel_, x, out.data()); this->run_forward(&param_, &kernel_, x, out.data());
...@@ -120,7 +126,8 @@ class ActivationGradTester { ...@@ -120,7 +126,8 @@ class ActivationGradTester {
for (int i = 0; i < dims_.production(); i++) { for (int i = 0; i < dims_.production(); i++) {
out_grad[i] = 1.0; out_grad[i] = 1.0;
} }
this->run_backward(&grad_param_, &grad_kernel_, x, out_grad, x_grad.data()); this->run_backward(
&grad_param_, &grad_kernel_, x, out, out_grad, x_grad.data());
for (int i = 0; i < dims_.production(); i++) { for (int i = 0; i < dims_.production(); i++) {
EXPECT_NEAR(x_grad[i], (out_delta[i] - out[i]) / delta, max_grad_delta); EXPECT_NEAR(x_grad[i], (out_delta[i] - out[i]) / delta, max_grad_delta);
...@@ -137,31 +144,58 @@ class ActivationGradTester { ...@@ -137,31 +144,58 @@ class ActivationGradTester {
grad_param_t grad_param_; grad_param_t grad_param_;
}; };
void TestNormalCase(DDim dims) { void TestSquareGrad(DDim dims) {
std::unique_ptr<ActivationGradTester> tester(new ActivationGradTester(dims)); LOG(INFO) << "Test Square grad";
std::unique_ptr<
ActivationGradTester<arm::SquareCompute, host::SquareGradCompute>>
tester(
new ActivationGradTester<arm::SquareCompute, host::SquareGradCompute>(
dims));
tester->prepare_kernel(); tester->prepare_kernel();
float delta = 0.001; float delta = 0.001;
float max_grad_delta = 0.005; float max_grad_delta = 0.005;
tester->check_grad(delta, max_grad_delta); tester->check_grad(delta, max_grad_delta);
} }
TEST(activation_grad_arm, compute) { void TestReluGrad(DDim dims) {
LOG(INFO) << "Test Square grad"; LOG(INFO) << "Test Relu grad";
std::unique_ptr<ActivationGradTester<arm::ReluCompute, host::ReluGradCompute>>
tester(new ActivationGradTester<arm::ReluCompute, host::ReluGradCompute>(
dims));
tester->prepare_kernel();
float delta = 0.001;
float max_grad_delta = 0.005;
tester->check_grad(delta, max_grad_delta);
}
void TestTanhGrad(DDim dims) {
LOG(INFO) << "Test Tanh grad";
std::unique_ptr<ActivationGradTester<arm::TanhCompute, host::TanhGradCompute>>
tester(new ActivationGradTester<arm::TanhCompute, host::TanhGradCompute>(
dims));
tester->prepare_kernel();
float delta = 0.001;
float max_grad_delta = 0.005;
tester->check_grad(delta, max_grad_delta);
}
TEST(activation_grad_host, compute) {
DeviceInfo::Init(); DeviceInfo::Init();
for (auto n : {2}) { for (auto n : {2, 1}) {
for (auto c : {2}) { for (auto c : {2, 9}) {
for (auto h : {2}) { for (auto h : {2, 1}) {
for (auto w : {2}) { for (auto w : {2, 10}) {
TestNormalCase(DDim(std::vector<int64_t>({n, c, h, w}))); TestSquareGrad(DDim(std::vector<int64_t>({n, c, h, w})));
TestReluGrad(DDim(std::vector<int64_t>({n, c, h, w})));
TestTanhGrad(DDim(std::vector<int64_t>({n, c, h, w})));
} }
} }
} }
} }
} }
} // namespace arm
} // namespace kernels } // namespace kernels
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
USE_LITE_KERNEL(square, kARM, kFloat, kNCHW, def); USE_LITE_KERNEL(square, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(square_grad, kARM, kFloat, kNCHW, def); USE_LITE_KERNEL(square_grad, kHost, kFloat, kNCHW, def);
...@@ -215,18 +215,6 @@ class ElementwiseAddGradTester { ...@@ -215,18 +215,6 @@ class ElementwiseAddGradTester {
fill_data_rand(y.data(), -1.f, 1.f, y_dims_.production()); fill_data_rand(y.data(), -1.f, 1.f, y_dims_.production());
this->run_forward(&param_, &kernel_, x, y, out.data()); this->run_forward(&param_, &kernel_, x, y, out.data());
for (int i = 0; i < x_dims_.production(); i++) {
LOG(INFO) << "x_" << i << ": " << x[i];
}
for (int i = 0; i < y_dims_.production(); i++) {
LOG(INFO) << "y_" << i << ": " << y[i];
}
for (int i = 0; i < out_dims_.production(); i++) {
LOG(INFO) << "out_" << i << ": " << out[i];
}
// backward // backward
std::vector<float> out_grad(out_dims_.production()); std::vector<float> out_grad(out_dims_.production());
std::vector<float> x_grad(x_dims_.production()); std::vector<float> x_grad(x_dims_.production());
...@@ -242,14 +230,6 @@ class ElementwiseAddGradTester { ...@@ -242,14 +230,6 @@ class ElementwiseAddGradTester {
x_grad.data(), x_grad.data(),
y_grad.data()); y_grad.data());
for (int i = 0; i < x_grad.size(); i++) {
LOG(INFO) << "x_grad_" << i << ": " << x_grad[i];
}
for (int i = 0; i < y_grad.size(); i++) {
LOG(INFO) << "y_grad_" << i << ": " << y_grad[i];
}
// get numeric gradient // get numeric gradient
std::vector<float> x_delta(x_dims_.production()); std::vector<float> x_delta(x_dims_.production());
std::vector<float> y_delta(y_dims_.production()); std::vector<float> y_delta(y_dims_.production());
...@@ -443,18 +423,6 @@ class ElementwiseSubGradTester { ...@@ -443,18 +423,6 @@ class ElementwiseSubGradTester {
fill_data_rand(y.data(), -1.f, 1.f, y_dims_.production()); fill_data_rand(y.data(), -1.f, 1.f, y_dims_.production());
this->run_forward(&param_, &kernel_, x, y, out.data()); this->run_forward(&param_, &kernel_, x, y, out.data());
for (int i = 0; i < x_dims_.production(); i++) {
LOG(INFO) << "x_" << i << ": " << x[i];
}
for (int i = 0; i < y_dims_.production(); i++) {
LOG(INFO) << "y_" << i << ": " << y[i];
}
for (int i = 0; i < out_dims_.production(); i++) {
LOG(INFO) << "out_" << i << ": " << out[i];
}
// backward // backward
std::vector<float> out_grad(out_dims_.production()); std::vector<float> out_grad(out_dims_.production());
std::vector<float> x_grad(x_dims_.production()); std::vector<float> x_grad(x_dims_.production());
...@@ -470,14 +438,6 @@ class ElementwiseSubGradTester { ...@@ -470,14 +438,6 @@ class ElementwiseSubGradTester {
x_grad.data(), x_grad.data(),
y_grad.data()); y_grad.data());
for (int i = 0; i < x_grad.size(); i++) {
LOG(INFO) << "x_grad_" << i << ": " << x_grad[i];
}
for (int i = 0; i < y_grad.size(); i++) {
LOG(INFO) << "y_grad_" << i << ": " << y_grad[i];
}
// get numeric gradient // get numeric gradient
std::vector<float> x_delta(x_dims_.production()); std::vector<float> x_delta(x_dims_.production());
std::vector<float> y_delta(y_dims_.production()); std::vector<float> y_delta(y_dims_.production());
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册