未验证 提交 8fb6e77b 编写于 作者: W Weilong Wu 提交者: GitHub

Add dygraph triple grad test (#36814)

* native commit for triple grad of sigmod

* Updated unittests files

* init functional jacobian api

* Updated trible_test func

* Updated gradient_checker & test_script

* finish test with dtype float32

* add float64 test case

* polish code

* use atol=1e-5 with dtype float64

* fix for ci

* set timeout for test_jacobian

* fix dygraph grad to support high differential

* polish API docstring

* Updated gradient checker and some related files

* fix double grad strip error for high differential

* fix double grad strip error for high differential

* Add Sigmoid triple grad tests

* fix dygraph double grad dtype error when calling for high differential senario

* Updated triple grad teses func

* Use np.random to initialize ddx

* Updated triple_grad_check func

* add todo for gradient checker and refine some comments

* remove additional code

* add test for warnging in backward.py

* format python code

* support multi input in triple gradient checker

* Add matmul triple grad kernel

* Updated comments of TODO

* Supported some special tests

* Change code-format to follow CI std

* Updated gradient_checker.py

* Fix conflicts

* Removed unnecessary printing log

* Change code style to follow CI std

* Add Dygraph Triple Grad test
Co-authored-by: Nlevi131 <limaolin01@baidu.com>
Co-authored-by: NJiabin Yang <360788950@qq.com>
上级 65fd59e2
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import paddle
from paddle.fluid.wrapped_decorator import wrap_decorator
from paddle.vision.models import resnet50, resnet101
import unittest
from unittest import TestCase
import numpy as np
def _dygraph_guard_(func):
def __impl__(*args, **kwargs):
if fluid.in_dygraph_mode():
return func(*args, **kwargs)
else:
with fluid.dygraph.guard():
return func(*args, **kwargs)
return __impl__
dygraph_guard = wrap_decorator(_dygraph_guard_)
def random_var(size, low=-1, high=1, dtype='float32'):
np.random.seed(2021)
x_np = np.random.uniform(low=low, high=high, size=size).astype(dtype)
return fluid.dygraph.to_variable(x_np)
class TestDygraphTripleGrad(TestCase):
def setUp(self):
self.sort_sum_gradient = False
self.shape = [5, 5]
def grad(self,
outputs,
inputs,
grad_outputs=None,
no_grad_vars=None,
retain_graph=None,
create_graph=False,
allow_unused=False):
fluid.set_flags({'FLAGS_sort_sum_gradient': self.sort_sum_gradient})
return fluid.dygraph.grad(
outputs=outputs,
inputs=inputs,
grad_outputs=grad_outputs,
no_grad_vars=no_grad_vars,
retain_graph=retain_graph,
create_graph=create_graph,
allow_unused=allow_unused)
@dygraph_guard
def test_exception(self):
with self.assertRaises(AssertionError):
self.grad(None, None)
shape = self.shape
with self.assertRaises(AssertionError):
self.grad(1, random_var(shape))
with self.assertRaises(AssertionError):
self.grad(random_var(shape), 1)
with self.assertRaises(AssertionError):
self.grad([1], [random_var(shape)])
with self.assertRaises(AssertionError):
self.grad([random_var(shape)], [1])
with self.assertRaises(AssertionError):
self.grad([random_var(shape), random_var(shape)],
[random_var(shape)], [random_var(shape)])
with self.assertRaises(AssertionError):
self.grad(
[random_var(shape)], [random_var(shape)], no_grad_vars=[1])
with self.assertRaises(AssertionError):
self.grad([random_var(shape)], [random_var(shape)], no_grad_vars=1)
@dygraph_guard
def test_example_with_gradient_and_create_graph(self):
x = random_var(self.shape)
x_np = x.numpy()
x.stop_gradient = False
y = random_var(self.shape)
y_np = y.numpy()
y.stop_gradient = False
z = random_var(self.shape)
z_np = z.numpy()
numel = z_np.size
z.stop_gradient = False
out = fluid.layers.sigmoid(paddle.matmul(x, y) + z)
out_np = out.numpy()
dx_actual, = self.grad([out], [x], create_graph=True)
# Theoritical result based on math calculation
dout = np.ones(self.shape).astype('float32')
dx_expected = np.matmul(dout * out_np * (1 - out_np),
np.transpose(y_np))
self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected))
ddx_actual, = self.grad([dx_actual], [x], create_graph=True)
# Theoritical result based on math calculation
DDY = np.zeros(self.shape).astype('float32')
DDX = np.ones(self.shape).astype('float32')
double_grad_tmp1 = np.matmul(dout * out_np * (1 - out_np),
np.transpose(DDY))
double_grad_tmp2 = np.matmul(DDX, y_np) + np.matmul(x_np, DDY)
double_grad_tmp3 = (
1 - 2 * out_np) * dout * double_grad_tmp2 * out_np * (1 - out_np)
ddx_expected = double_grad_tmp1 + np.matmul(double_grad_tmp3,
np.transpose(y_np))
self.assertTrue(np.allclose(ddx_actual.numpy(), ddx_expected))
# Theoritical result based on math calculation
d_ddout = np.zeros(self.shape).astype('float32')
tmp0 = np.matmul(DDX, y_np) + np.matmul(x_np, DDY)
tmp1 = (1 - 2 * out_np) * ((1 - 2 * out_np) * dout * tmp0 * tmp0)
tmp2 = tmp0 * (1 - 2 * out_np) * d_ddout - 2 * dout * (
1 - out_np) * out_np * tmp0 * tmp0
dddx_expected = np.matmul(((tmp1 + tmp2) * out_np * (1 - out_np)),
np.transpose(y_np))
ddx_actual.backward()
dddx_grad_actual = x.gradient()
self.assertTrue(np.allclose(dddx_grad_actual, dddx_expected))
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册