未验证 提交 297f5efe 编写于 作者: C cyber-pioneer 提交者: GitHub

Add sin double grad operator. (#47543)

* add sin double grad operator

* add sin double grad test example

* move sindoublegradopmaker to backward.yaml

* fix sindoublegrad code

* simplify sindoublegrad functor
上级 eac973d1
...@@ -478,6 +478,17 @@ ...@@ -478,6 +478,17 @@
func : silu_grad func : silu_grad
inplace : (out_grad -> x_grad) inplace : (out_grad -> x_grad)
- backward_op : sin_double_grad
forward : sin_grad (Tensor x, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : sin_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : sin_grad - backward_op : sin_grad
forward : sin (Tensor x) -> Tensor(out) forward : sin (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad) args : (Tensor x, Tensor out_grad)
...@@ -487,6 +498,7 @@ ...@@ -487,6 +498,7 @@
param : [x] param : [x]
kernel : kernel :
func : sin_grad func : sin_grad
backward : sin_double_grad
inplace : (out_grad -> x_grad) inplace : (out_grad -> x_grad)
- backward_op : sinh_grad - backward_op : sinh_grad
......
...@@ -798,7 +798,7 @@ ...@@ -798,7 +798,7 @@
attrs : [bool use_mkldnn = false, bool use_cudnn = false] attrs : [bool use_mkldnn = false, bool use_cudnn = false]
- op : sin - op : sin
backward : sin_grad backward : sin_grad, sin_double_grad
inputs : inputs :
x : X x : X
outputs : outputs :
......
...@@ -80,6 +80,14 @@ void ReluDoubleGradKernel(const Context& dev_ctx, ...@@ -80,6 +80,14 @@ void ReluDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& ddx, const DenseTensor& ddx,
DenseTensor* ddout); DenseTensor* ddout);
template <typename T, typename Context>
void SinDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& dout,
const DenseTensor& ddx,
DenseTensor* dx,
DenseTensor* ddout);
template <typename T, typename Context> template <typename T, typename Context>
void TanhDoubleGradKernel(const Context& dev_ctx, void TanhDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& out, const DenseTensor& out,
......
...@@ -336,6 +336,15 @@ PD_REGISTER_KERNEL(square_double_grad, ...@@ -336,6 +336,15 @@ PD_REGISTER_KERNEL(square_double_grad,
phi::dtype::float16, phi::dtype::float16,
int, int,
int64_t) {} int64_t) {}
PD_REGISTER_KERNEL(sin_double_grad,
CPU,
ALL_LAYOUT,
phi::SinDoubleGradKernel,
float,
double,
phi::dtype::float16,
int,
int64_t) {}
PD_REGISTER_ACTIVATION_GRAD_KERNEL(softsign_grad, SoftsignGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(softsign_grad, SoftsignGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_grad, SigmoidGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_grad, SigmoidGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_double_grad, SigmoidDoubleGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_double_grad, SigmoidDoubleGradKernel)
......
...@@ -106,6 +106,38 @@ struct SinFunctor : public BaseActivationFunctor<T> { ...@@ -106,6 +106,38 @@ struct SinFunctor : public BaseActivationFunctor<T> {
} }
}; };
// sine''(x) = -sin(x)
template <typename T>
struct SinDoubleGradFunctor : public BaseActivationFunctor<T> {
template <typename Device>
void operator()(const Device& dev,
const DenseTensor* X,
const DenseTensor* dOut,
const DenseTensor* ddX,
DenseTensor* dX,
DenseTensor* ddOut) const {
auto* d = dev.eigen_device();
auto ddx = EigenVector<T>::Flatten(
GET_DATA_SAFELY(ddX, "Input", "DDX", "SinDoubleGrad"));
auto x = EigenVector<T>::Flatten(
GET_DATA_SAFELY(X, "Input", "X", "SinDoubleGrad"));
// sin DoubleGrad: ddy=cos(x)*ddx, dx=-sin(x)*dy*ddx
// calculate dx first, so ddy can inplace ddx
auto dx = EigenVector<T>::Flatten(
GET_DATA_SAFELY(dX, "Output", "DX", "SinDoubleGrad"));
auto dout = EigenVector<T>::Flatten(
GET_DATA_SAFELY(dOut, "Output", "DOut", "SinDoubleGrad"));
dx.device(*d) = -ddx * x.unaryExpr(Sine<T>()) * dout;
// calculate ddout
auto ddout = EigenVector<T>::Flatten(
GET_DATA_SAFELY(ddOut, "Output", "DDOut", "SinDoubleGrad"));
ddout.device(*d) = ddx * x.unaryExpr(Cosine<T>());
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
};
// reciprocal(x) = 1 / x // reciprocal(x) = 1 / x
template <typename T> template <typename T>
struct ReciprocalFunctor : public BaseActivationFunctor<T> { struct ReciprocalFunctor : public BaseActivationFunctor<T> {
......
...@@ -417,6 +417,16 @@ PD_REGISTER_KERNEL(square_double_grad, ...@@ -417,6 +417,16 @@ PD_REGISTER_KERNEL(square_double_grad,
phi::dtype::float16, phi::dtype::float16,
phi::dtype::bfloat16) {} phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(sin_double_grad,
GPU,
ALL_LAYOUT,
phi::SinDoubleGradKernel,
float,
double,
int,
int64_t,
phi::dtype::float16) {}
PD_REGISTER_ACTIVATION_GRAD_KERNEL(softsign_grad, SoftsignGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(softsign_grad, SoftsignGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_grad, SigmoidGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_grad, SigmoidGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_double_grad, SigmoidDoubleGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_double_grad, SigmoidDoubleGradKernel)
......
...@@ -417,4 +417,22 @@ void SquareDoubleGradKernel(const Context& dev_ctx, ...@@ -417,4 +417,22 @@ void SquareDoubleGradKernel(const Context& dev_ctx,
functor(dev_ctx, &x, &dout, &ddx, dx, ddout); functor(dev_ctx, &x, &dout, &ddx, dx, ddout);
} }
template <typename T, typename Context>
void SinDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& dout,
const DenseTensor& ddx,
DenseTensor* dx,
DenseTensor* ddout) {
if (dx) {
dx->Resize(x.dims());
dev_ctx.template Alloc<T>(dx);
}
if (ddout) {
dev_ctx.template Alloc<T>(ddout);
}
phi::funcs::SinDoubleGradFunctor<T> functor;
functor(dev_ctx, &x, &dout, &ddx, dx, ddout);
}
} // namespace phi } // namespace phi
...@@ -471,5 +471,37 @@ class TestLogDoubleGradCheck(unittest.TestCase): ...@@ -471,5 +471,37 @@ class TestLogDoubleGradCheck(unittest.TestCase):
self.func(p) self.func(p)
class TestSinDoubleGradCheck(unittest.TestCase):
def sin_wrapper(self, x):
return paddle.sin(x[0])
@prog_scope()
def func(self, place):
shape = [2, 3, 7, 9]
eps = 0.0005
dtype = np.float64
x = layers.data('x', shape, False, dtype=dtype)
x.persistable = True
y = paddle.sin(x)
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
x_arr[np.abs(x_arr) < 0.005] = 0.002
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph(
self.sin_wrapper, [x], y, x_init=x_arr, place=place
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self):
paddle.enable_static()
places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
self.func(p)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册