未验证 提交 dd63e5b4 编写于 作者: X Xiaoxu Chen 提交者: GitHub

reorganize the higher order autodiff api (#44119)

* move _gradients to primapi and rename to grad

* modify jvp to call forward_grad in primitive mode

* add primapi unittest and remove some unused test cases.

* fix  circular import problem

* move paddle/autograd/functional into paddle/incubate.autograd/functional

* remove unused JacobianBatchLast class
上级 37216a8f
...@@ -26,8 +26,6 @@ else: ...@@ -26,8 +26,6 @@ else:
from .py_layer import LegacyPyLayerContext as PyLayerContext # noqa: F401 from .py_layer import LegacyPyLayerContext as PyLayerContext # noqa: F401
from ..framework import set_grad_enabled, is_grad_enabled # noqa: F401 from ..framework import set_grad_enabled, is_grad_enabled # noqa: F401
from ..fluid.dygraph.base import no_grad_ as no_grad # noqa: F401 from ..fluid.dygraph.base import no_grad_ as no_grad # noqa: F401
from .functional import vjp, jvp, Jacobian, Hessian # noqa: F401
from .functional import jacobian, hessian, batch_jacobian, batch_hessian, vhp # noqa: F401
__all__ = [ # noqa __all__ = [ # noqa
'backward', 'backward',
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import typing
from paddle.fluid import framework
def as_tensors(xs):
if isinstance(xs, framework.Variable):
return (xs, )
elif isinstance(xs, typing.Sequence):
return tuple(xs)
else:
return xs
...@@ -2211,12 +2211,6 @@ def gradients(targets, inputs, target_gradients=None, no_grad_set=None): ...@@ -2211,12 +2211,6 @@ def gradients(targets, inputs, target_gradients=None, no_grad_set=None):
check_type(target_gradients, 'target_gradients', check_type(target_gradients, 'target_gradients',
(framework.Variable, list, tuple, type(None)), (framework.Variable, list, tuple, type(None)),
'paddle.static.gradients') 'paddle.static.gradients')
from ..incubate.autograd.primx import _gradients
from ..incubate.autograd.utils import prim_enabled
if prim_enabled():
return _gradients(targets, inputs, target_gradients)
outs = calc_gradient(targets, inputs, target_gradients, no_grad_set) outs = calc_gradient(targets, inputs, target_gradients, no_grad_set)
return _as_list(outs) return _as_list(outs)
......
...@@ -17,7 +17,7 @@ endforeach() ...@@ -17,7 +17,7 @@ endforeach()
set_tests_properties(test_autograd_functional_dynamic PROPERTIES TIMEOUT 200) set_tests_properties(test_autograd_functional_dynamic PROPERTIES TIMEOUT 200)
set_tests_properties(test_autograd_functional_static PROPERTIES TIMEOUT 160) set_tests_properties(test_autograd_functional_static PROPERTIES TIMEOUT 160)
set_tests_properties(test_gradients_and_minimize PROPERTIES TIMEOUT 60) set_tests_properties(test_minimize PROPERTIES TIMEOUT 60)
if(NOT WIN32) if(NOT WIN32)
set_tests_properties(test_autograd_functional_prim PROPERTIES TIMEOUT 60) set_tests_properties(test_autograd_functional_prim PROPERTIES TIMEOUT 60)
endif() endif()
...@@ -145,5 +145,130 @@ class TestHessianPrim(unittest.TestCase): ...@@ -145,5 +145,130 @@ class TestHessianPrim(unittest.TestCase):
atol=self._atol) atol=self._atol)
@utils.place(config.DEVICES)
@utils.parameterize((utils.TEST_CASE_NAME, 'fun', 'args', 'dtype'), (
('unary_float32', paddle.tanh, (np.random.rand(2, 3), ), 'float32'),
('binary_float32', paddle.matmul,
(np.random.rand(2, 3), np.random.rand(3, 2)), 'float32'),
('unary_float64', paddle.tanh, (np.random.rand(2, 3), ), 'float64'),
('binary_float64', paddle.matmul,
(np.random.rand(2, 3), np.random.rand(3, 2)), 'float64'),
))
class TestJvpPrim(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.args = [arg.astype(cls.dtype) for arg in cls.args]
cls._rtol = config.TOLERANCE.get(
cls.dtype).get('first_order_grad').get('rtol')
cls._atol = config.TOLERANCE.get(
cls.dtype).get('first_order_grad').get('atol')
def setUp(self):
paddle.enable_static()
paddle.incubate.autograd.enable_prim()
def tearDown(self):
paddle.incubate.autograd.disable_prim()
paddle.disable_static()
def test_jacobian_prim(self):
def wrapper(fun, args):
mp = paddle.static.Program()
sp = paddle.static.Program()
with paddle.static.program_guard(mp, sp):
static_args = [
paddle.static.data(f'arg{i}', arg.shape, self.dtype)
for i, arg in enumerate(args)
]
for arg in static_args:
arg.stop_gradient = False
_, jvp_res = paddle.incubate.autograd.jvp(fun, static_args)
if paddle.incubate.autograd.prim_enabled():
paddle.incubate.autograd.prim2orig()
exe = paddle.static.Executor()
exe.run(sp)
jvp_res = exe.run(
mp,
feed={f'arg{i}': arg
for i, arg in enumerate(args)},
fetch_list=[jvp_res])
return jvp_res
paddle.incubate.autograd.enable_prim()
prim_jvp = wrapper(self.fun, self.args)
paddle.incubate.autograd.disable_prim()
orig_jvp = wrapper(self.fun, self.args)
np.testing.assert_allclose(orig_jvp,
prim_jvp,
rtol=self._rtol,
atol=self._atol)
@utils.place(config.DEVICES)
@utils.parameterize((utils.TEST_CASE_NAME, 'fun', 'args', 'dtype'), (
('unary_float32', paddle.tanh, (np.random.rand(2, 3), ), 'float32'),
('binary_float32', paddle.matmul,
(np.random.rand(2, 3), np.random.rand(3, 2)), 'float32'),
('unary_float64', paddle.tanh, (np.random.rand(2, 3), ), 'float64'),
('binary_float64', paddle.matmul,
(np.random.rand(2, 3), np.random.rand(3, 2)), 'float64'),
))
class TestVjpPrim(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.args = [arg.astype(cls.dtype) for arg in cls.args]
cls._rtol = config.TOLERANCE.get(
cls.dtype).get('first_order_grad').get('rtol')
cls._atol = config.TOLERANCE.get(
cls.dtype).get('first_order_grad').get('atol')
def setUp(self):
paddle.enable_static()
paddle.incubate.autograd.enable_prim()
def tearDown(self):
paddle.incubate.autograd.disable_prim()
paddle.disable_static()
def test_jacobian_prim(self):
def wrapper(fun, args):
mp = paddle.static.Program()
sp = paddle.static.Program()
with paddle.static.program_guard(mp, sp):
static_args = [
paddle.static.data(f'arg{i}', arg.shape, self.dtype)
for i, arg in enumerate(args)
]
for arg in static_args:
arg.stop_gradient = False
_, vjp_res = paddle.incubate.autograd.vjp(fun, static_args)
if paddle.incubate.autograd.prim_enabled():
paddle.incubate.autograd.prim2orig()
exe = paddle.static.Executor()
exe.run(sp)
vjp_res = exe.run(
mp,
feed={f'arg{i}': arg
for i, arg in enumerate(args)},
fetch_list=[vjp_res])
return vjp_res
paddle.incubate.autograd.enable_prim()
prim_vjp = wrapper(self.fun, self.args)
paddle.incubate.autograd.disable_prim()
orig_vjp = wrapper(self.fun, self.args)
for orig, prim in zip(orig_vjp, prim_vjp):
np.testing.assert_allclose(orig,
prim,
rtol=self._rtol,
atol=self._atol)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -59,7 +59,8 @@ class TestVJP(unittest.TestCase): ...@@ -59,7 +59,8 @@ class TestVJP(unittest.TestCase):
with paddle.static.program_guard(mp, sp): with paddle.static.program_guard(mp, sp):
feed, static_xs, static_v = utils.gen_static_data_and_feed( feed, static_xs, static_v = utils.gen_static_data_and_feed(
self.xs, self.v, stop_gradient=self.stop_gradient) self.xs, self.v, stop_gradient=self.stop_gradient)
ys, xs_grads = paddle.autograd.vjp(self.fun, static_xs, static_v) ys, xs_grads = paddle.incubate.autograd.vjp(self.fun, static_xs,
static_v)
exe.run(sp) exe.run(sp)
return exe.run(mp, feed=feed, fetch_list=[ys, xs_grads]) return exe.run(mp, feed=feed, fetch_list=[ys, xs_grads])
...@@ -103,7 +104,8 @@ class TestVJPException(unittest.TestCase): ...@@ -103,7 +104,8 @@ class TestVJPException(unittest.TestCase):
with paddle.static.program_guard(mp, sp): with paddle.static.program_guard(mp, sp):
feed, static_xs, static_v = utils.gen_static_data_and_feed( feed, static_xs, static_v = utils.gen_static_data_and_feed(
self.xs, self.v) self.xs, self.v)
ys, xs_grads = paddle.autograd.vjp(self.fun, static_xs, static_v) ys, xs_grads = paddle.incubate.autograd.vjp(self.fun, static_xs,
static_v)
self.exe.run(sp) self.exe.run(sp)
return self.exe.run(mp, feed, fetch_list=[ys, xs_grads]) return self.exe.run(mp, feed, fetch_list=[ys, xs_grads])
...@@ -214,7 +216,7 @@ class TestJacobianFloat32(unittest.TestCase): ...@@ -214,7 +216,7 @@ class TestJacobianFloat32(unittest.TestCase):
startup = fluid.Program() startup = fluid.Program()
with fluid.program_guard(main, startup): with fluid.program_guard(main, startup):
xs = make_tensors(inps) xs = make_tensors(inps)
JJ = paddle.autograd.functional.Jacobian(pd_f, xs, is_batched=batch) JJ = paddle.incubate.autograd.Jacobian(pd_f, xs, is_batched=batch)
if batch: if batch:
_, nrow, ncol = JJ.shape _, nrow, ncol = JJ.shape
else: else:
...@@ -244,7 +246,7 @@ class TestJacobianFloat32(unittest.TestCase): ...@@ -244,7 +246,7 @@ class TestJacobianFloat32(unittest.TestCase):
startup = fluid.Program() startup = fluid.Program()
with fluid.program_guard(main, startup): with fluid.program_guard(main, startup):
xs = make_tensors(inps) xs = make_tensors(inps)
JJ = paddle.autograd.functional.Jacobian(pd_f, xs, is_batched=batch) JJ = paddle.incubate.autograd.Jacobian(pd_f, xs, is_batched=batch)
if batch: if batch:
nbatch, nrow, ncol = JJ.shape nbatch, nrow, ncol = JJ.shape
rows = [JJ[:, i, :] for i in range(nrow)] rows = [JJ[:, i, :] for i in range(nrow)]
...@@ -269,7 +271,7 @@ class TestJacobianFloat32(unittest.TestCase): ...@@ -269,7 +271,7 @@ class TestJacobianFloat32(unittest.TestCase):
startup = fluid.Program() startup = fluid.Program()
with fluid.program_guard(main, startup): with fluid.program_guard(main, startup):
xs = make_tensors(inps) xs = make_tensors(inps)
JJ = paddle.autograd.functional.Jacobian(pd_f, xs, is_batched=batch) JJ = paddle.incubate.autograd.Jacobian(pd_f, xs, is_batched=batch)
if batch: if batch:
nbatch, nrow, ncol = JJ.shape nbatch, nrow, ncol = JJ.shape
entries = [ entries = [
...@@ -390,7 +392,7 @@ class TestHessianFloat32(unittest.TestCase): ...@@ -390,7 +392,7 @@ class TestHessianFloat32(unittest.TestCase):
startup = fluid.Program() startup = fluid.Program()
with fluid.program_guard(main, startup): with fluid.program_guard(main, startup):
xs = make_tensors(inps) xs = make_tensors(inps)
HH = paddle.autograd.functional.Hessian(pd_f, xs, is_batched=batch) HH = paddle.incubate.autograd.Hessian(pd_f, xs, is_batched=batch)
nrow, ncol = HH.shape nrow, ncol = HH.shape
full_hessian = HH[:] full_hessian = HH[:]
exe = fluid.Executor(self.place) exe = fluid.Executor(self.place)
......
...@@ -13,82 +13,16 @@ ...@@ -13,82 +13,16 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import numpy as np
import numpy as np
import paddle import paddle
from paddle.incubate.autograd.primx import prim2orig from paddle.incubate.autograd.primx import prim2orig
from paddle.incubate.autograd.utils import enable_prim, disable_prim, prim_enabled from paddle.incubate.autograd.utils import (disable_prim, enable_prim,
prim_enabled)
paddle.enable_static() paddle.enable_static()
class TestGradients(unittest.TestCase):
def test_third_order(self):
enable_prim()
main = paddle.static.Program()
startup = paddle.static.Program()
with paddle.static.program_guard(main, startup):
x = paddle.static.data(name='x', shape=[1], dtype='float32')
x2 = paddle.multiply(x, x)
x3 = paddle.multiply(x2, x)
x4 = paddle.multiply(x3, x)
grad1, = paddle.static.gradients([x4], [x])
grad2, = paddle.static.gradients([grad1], [x])
grad3, = paddle.static.gradients([grad2], [x])
prim2orig(main.block(0))
feed = {x.name: np.array([2.]).astype('float32')}
fetch_list = [grad3.name]
result = [np.array([48.])]
place = paddle.CPUPlace()
if paddle.device.is_compiled_with_cuda():
place = paddle.CUDAPlace(0)
exe = paddle.static.Executor(place)
exe.run(startup)
outs = exe.run(main, feed=feed, fetch_list=fetch_list)
np.allclose(outs, result)
disable_prim()
def test_fourth_order(self):
enable_prim()
main = paddle.static.Program()
startup = paddle.static.Program()
with paddle.static.program_guard(main, startup):
x = paddle.static.data(name='x', shape=[1], dtype='float32')
x2 = paddle.multiply(x, x)
x3 = paddle.multiply(x2, x)
x4 = paddle.multiply(x3, x)
x5 = paddle.multiply(x4, x)
out = paddle.sqrt(x5 + x4)
grad1, = paddle.static.gradients([out], [x])
grad2, = paddle.static.gradients([grad1], [x])
grad3, = paddle.static.gradients([grad2], [x])
grad4, = paddle.static.gradients([grad3], [x])
prim2orig(main.block(0))
feed = {
x.name: np.array([2.]).astype('float32'),
}
fetch_list = [grad4.name]
# (3*(-5*x^2-16*x-16))/(16*(x+1)^3.5)
result = [np.array([-0.27263762711])]
place = paddle.CPUPlace()
if paddle.device.is_compiled_with_cuda():
place = paddle.CUDAPlace(0)
exe = paddle.static.Executor(place)
exe.run(startup)
outs = exe.run(main, feed=feed, fetch_list=fetch_list)
np.allclose(outs, result)
disable_prim()
class TestMinimize(unittest.TestCase): class TestMinimize(unittest.TestCase):
def model(self, x, w, bias, opt): def model(self, x, w, bias, opt):
......
...@@ -37,7 +37,7 @@ import utils ...@@ -37,7 +37,7 @@ import utils
('input_gradients_not_none', paddle.matmul, ('input_gradients_not_none', paddle.matmul,
(np.random.rand(3, 3), np.random.rand(3, 3)), (np.random.rand(3, 3), np.random.rand(3, 3)),
(np.random.rand(3, 3), np.random.rand(3, 3)), 'float64'))) (np.random.rand(3, 3), np.random.rand(3, 3)), 'float64')))
class TestForwardGradients(unittest.TestCase): class TestForwardGrad(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
...@@ -55,7 +55,7 @@ class TestForwardGradients(unittest.TestCase): ...@@ -55,7 +55,7 @@ class TestForwardGradients(unittest.TestCase):
paddle.incubate.autograd.disable_prim() paddle.incubate.autograd.disable_prim()
paddle.disable_static() paddle.disable_static()
def test_forward_gradients(self): def test_forward_grad(self):
def expected(): def expected():
paddle.incubate.autograd.disable_prim() paddle.incubate.autograd.disable_prim()
...@@ -64,7 +64,8 @@ class TestForwardGradients(unittest.TestCase): ...@@ -64,7 +64,8 @@ class TestForwardGradients(unittest.TestCase):
with paddle.static.program_guard(mp, sp): with paddle.static.program_guard(mp, sp):
feed, static_xs, static_v = utils.gen_static_data_and_feed( feed, static_xs, static_v = utils.gen_static_data_and_feed(
self.xs, self.v, stop_gradient=False) self.xs, self.v, stop_gradient=False)
_, ys_grad = paddle.autograd.jvp(self.fun, static_xs, static_v) _, ys_grad = paddle.incubate.autograd.jvp(
self.fun, static_xs, static_v)
exe = paddle.static.Executor() exe = paddle.static.Executor()
exe.run(sp) exe.run(sp)
out = exe.run(mp, feed=feed, fetch_list=ys_grad) out = exe.run(mp, feed=feed, fetch_list=ys_grad)
...@@ -80,7 +81,8 @@ class TestForwardGradients(unittest.TestCase): ...@@ -80,7 +81,8 @@ class TestForwardGradients(unittest.TestCase):
self.xs, self.v, stop_gradient=False) self.xs, self.v, stop_gradient=False)
ys = self.fun(*static_xs) if isinstance( ys = self.fun(*static_xs) if isinstance(
static_xs, typing.Sequence) else self.fun(static_xs) static_xs, typing.Sequence) else self.fun(static_xs)
ys_grad = primapi.forward_gradients(ys, static_xs, static_v) ys_grad = paddle.incubate.autograd.forward_grad(
ys, static_xs, static_v)
paddle.incubate.autograd.prim2orig(mp.block(0)) paddle.incubate.autograd.prim2orig(mp.block(0))
exe = paddle.static.Executor() exe = paddle.static.Executor()
exe.run(sp) exe.run(sp)
...@@ -106,7 +108,7 @@ class TestForwardGradients(unittest.TestCase): ...@@ -106,7 +108,7 @@ class TestForwardGradients(unittest.TestCase):
self.xs, self.v, stop_gradient=False) self.xs, self.v, stop_gradient=False)
ys = self.fun(*static_xs) if isinstance( ys = self.fun(*static_xs) if isinstance(
static_xs, typing.Sequence) else self.fun(static_xs) static_xs, typing.Sequence) else self.fun(static_xs)
ys_grad = primapi.forward_gradients(ys, static_xs, static_v) ys_grad = primapi.forward_grad(ys, static_xs, static_v)
paddle.incubate.autograd.prim2orig(mp.block(0)) paddle.incubate.autograd.prim2orig(mp.block(0))
exe = paddle.static.Executor() exe = paddle.static.Executor()
exe.run(sp) exe.run(sp)
...@@ -116,14 +118,125 @@ class TestForwardGradients(unittest.TestCase): ...@@ -116,14 +118,125 @@ class TestForwardGradients(unittest.TestCase):
def test_illegal_param(self): def test_illegal_param(self):
paddle.incubate.autograd.enable_prim() paddle.incubate.autograd.enable_prim()
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
primapi.forward_gradients(1, paddle.static.data('inputs', primapi.forward_grad(1, paddle.static.data('inputs', shape=[1]))
shape=[1]))
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
primapi.forward_gradients(paddle.static.data('targets', shape=[1]), primapi.forward_grad(paddle.static.data('targets', shape=[1]), 1)
1)
paddle.incubate.autograd.disable_prim() paddle.incubate.autograd.disable_prim()
class TestGrad(unittest.TestCase):
def setUp(self):
paddle.enable_static()
paddle.incubate.autograd.enable_prim()
def tearDown(self):
paddle.incubate.autograd.disable_prim()
paddle.disable_static()
def test_third_order(self):
paddle.incubate.autograd.enable_prim()
main = paddle.static.Program()
startup = paddle.static.Program()
with paddle.static.program_guard(main, startup):
x = paddle.static.data(name='x', shape=[1], dtype='float32')
x2 = paddle.multiply(x, x)
x3 = paddle.multiply(x2, x)
x4 = paddle.multiply(x3, x)
grad1, = paddle.incubate.autograd.grad([x4], [x])
grad2, = paddle.incubate.autograd.grad([grad1], [x])
grad3, = paddle.incubate.autograd.grad([grad2], [x])
paddle.incubate.autograd.prim2orig(main.block(0))
feed = {x.name: np.array([2.]).astype('float32')}
fetch_list = [grad3.name]
result = [np.array([48.])]
place = paddle.CPUPlace()
if paddle.device.is_compiled_with_cuda():
place = paddle.CUDAPlace(0)
exe = paddle.static.Executor(place)
exe.run(startup)
outs = exe.run(main, feed=feed, fetch_list=fetch_list)
np.allclose(outs, result)
paddle.incubate.autograd.disable_prim()
def test_fourth_order(self):
paddle.incubate.autograd.enable_prim()
main = paddle.static.Program()
startup = paddle.static.Program()
with paddle.static.program_guard(main, startup):
x = paddle.static.data(name='x', shape=[1], dtype='float32')
x2 = paddle.multiply(x, x)
x3 = paddle.multiply(x2, x)
x4 = paddle.multiply(x3, x)
x5 = paddle.multiply(x4, x)
out = paddle.sqrt(x5 + x4)
grad1, = paddle.incubate.autograd.grad([out], [x])
grad2, = paddle.incubate.autograd.grad([grad1], [x])
grad3, = paddle.incubate.autograd.grad([grad2], [x])
grad4, = paddle.incubate.autograd.grad([grad3], [x])
paddle.incubate.autograd.prim2orig(main.block(0))
feed = {
x.name: np.array([2.]).astype('float32'),
}
fetch_list = [grad4.name]
# (3*(-5*x^2-16*x-16))/(16*(x+1)^3.5)
result = [np.array([-0.27263762711])]
place = paddle.CPUPlace()
if paddle.device.is_compiled_with_cuda():
place = paddle.CUDAPlace(0)
exe = paddle.static.Executor(place)
exe.run(startup)
outs = exe.run(main, feed=feed, fetch_list=fetch_list)
np.allclose(outs, result)
paddle.incubate.autograd.disable_prim()
def test_disable_prim(self):
def actual(x: np.array):
paddle.incubate.autograd.disable_prim()
main = paddle.static.Program()
startup = paddle.static.Program()
with paddle.static.program_guard(main, startup):
var_x = paddle.static.data('x', shape=x.shape, dtype=x.dtype)
var_x.stop_gradient = False
y = paddle.tanh(var_x)
y_grad = paddle.incubate.autograd.grad(y, var_x)
y_second_grad = paddle.incubate.autograd.grad(y_grad, var_x)
exe = paddle.static.Executor()
exe.run(startup)
return exe.run(main,
feed={'x': x},
fetch_list=[y_grad, y_second_grad])
def expect(x: np.array):
paddle.incubate.autograd.disable_prim()
main = paddle.static.Program()
startup = paddle.static.Program()
with paddle.static.program_guard(main, startup):
var_x = paddle.static.data('x', shape=x.shape, dtype=x.dtype)
var_x.stop_gradient = False
y = paddle.tanh(var_x)
y_grad = paddle.static.gradients(y, var_x)
y_second_grad = paddle.static.gradients(y_grad, var_x)
exe = paddle.static.Executor()
exe.run(startup)
return exe.run(main,
feed={'x': x},
fetch_list=[y_grad, y_second_grad])
x = np.random.randn(100, 200)
for i, j in zip(actual(x), expect(x)):
np.testing.assert_allclose(i, j)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -21,7 +21,7 @@ from paddle.incubate.autograd.primops import (neg, set_value, add, sub, mul, ...@@ -21,7 +21,7 @@ from paddle.incubate.autograd.primops import (neg, set_value, add, sub, mul,
concat, reduce, matmul, concat, reduce, matmul,
slice_select, slice_assign, slice_select, slice_assign,
gather, scatter_add, fill_const) gather, scatter_add, fill_const)
from paddle.incubate.autograd.primx import Transform, topo_path, orig2prim, prim2orig, _gradients from paddle.incubate.autograd.primx import Transform, topo_path, orig2prim, prim2orig
from paddle.incubate.autograd.utils import enable_prim, disable_prim, prim_enabled from paddle.incubate.autograd.utils import enable_prim, disable_prim, prim_enabled
......
...@@ -22,7 +22,7 @@ import contextlib ...@@ -22,7 +22,7 @@ import contextlib
import collections import collections
import numpy as np import numpy as np
import paddle import paddle
from paddle.autograd.utils import as_tensors from paddle.incubate.autograd.utils import as_tensors
########################################################## ##########################################################
......
...@@ -11,11 +11,12 @@ ...@@ -11,11 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle.autograd.functional import Hessian, Jacobian, jvp, vjp from .functional import Hessian, Jacobian, jvp, vjp
from .primapi import forward_grad, grad
from .primx import prim2orig from .primx import prim2orig
from .utils import enable_prim, disable_prim, prim_enabled from .utils import disable_prim, enable_prim, prim_enabled
__all__ = [ # noqa __all__ = [ # noqa
'vjp', 'jvp', 'Jacobian', 'Hessian', 'prim2orig', 'enable_prim', 'vjp', 'jvp', 'Jacobian', 'Hessian', 'enable_prim', 'disable_prim',
'disable_prim', 'prim_enabled' 'forward_grad', 'grad'
] ]
...@@ -14,28 +14,26 @@ ...@@ -14,28 +14,26 @@
import typing import typing
import paddle.autograd.utils as tensor_utils from paddle.fluid import backward, framework
import paddle.incubate.autograd.utils as prim_utils from paddle.incubate.autograd import primx, utils
from paddle.fluid import framework
from paddle.incubate.autograd import primx
@framework.static_only @framework.static_only
def forward_gradients(targets, inputs, input_gradients=None): def forward_grad(outputs, inputs, grad_inputs=None):
"""Forward mode of automatic differentiation. """Forward mode of automatic differentiation.
.. note:: .. note::
**ONLY available in the static mode and primitive operators.** **ONLY available in the static mode and primitive operators.**
Args: Args:
targets: The target tensor or tensors outputs: The output tensor or tensors
inputs: The input tensor or tensors inputs: The input tensor or tensors
input_gradients: The gradient Tensor or Tensors of inputs which has grad_inputs: The gradient Tensor or Tensors of inputs which has
the same shape with inputs, Defaults to None, in this case is the same shape with inputs, Defaults to None, in this case is
equivalent to all ones . equivalent to all ones .
Returns: Returns:
target_gradients (Tensor|Sequence[Tensor]): The gradients for targets. grad_outputs (Tensor|Sequence[Tensor]): The gradients for outputs.
Examples: Examples:
...@@ -53,7 +51,7 @@ def forward_gradients(targets, inputs, input_gradients=None): ...@@ -53,7 +51,7 @@ def forward_gradients(targets, inputs, input_gradients=None):
with paddle.static.program_guard(main_program, startup_program): with paddle.static.program_guard(main_program, startup_program):
x = paddle.static.data('x', shape=[1], dtype='float32') x = paddle.static.data('x', shape=[1], dtype='float32')
y = x * x y = x * x
y_grad = paddle.incubate.autograd.forward_gradients(y, x) y_grad = paddle.incubate.autograd.forward_grad(y, x)
paddle.incubate.autograd.prim2orig() paddle.incubate.autograd.prim2orig()
exe = paddle.static.Executor() exe = paddle.static.Executor()
...@@ -65,20 +63,20 @@ def forward_gradients(targets, inputs, input_gradients=None): ...@@ -65,20 +63,20 @@ def forward_gradients(targets, inputs, input_gradients=None):
paddle.incubate.autograd.disable_prim() paddle.incubate.autograd.disable_prim()
paddle.disable_static() paddle.disable_static()
""" """
if not prim_utils.prim_enabled(): if not utils.prim_enabled():
raise RuntimeError('forward_gradients must be running on primitive' raise RuntimeError('forward_grad must be running on primitive'
'operators, use enable_prim to turn it on.') 'operators, use enable_prim to turn it on.')
if not isinstance(targets, (framework.Variable, typing.Sequence)): if not isinstance(outputs, (framework.Variable, typing.Sequence)):
raise TypeError(f'Expected targets is Tensor|Sequence[Tesnor], ' raise TypeError(f'Expected outputs is Tensor|Sequence[Tesnor], '
f'but got {type(targets)}.') f'but got {type(outputs)}.')
if not isinstance(inputs, (framework.Variable, typing.Sequence)): if not isinstance(inputs, (framework.Variable, typing.Sequence)):
raise TypeError(f'Expected inputs is Tensor|Sequence[Tesnor], ' raise TypeError(f'Expected inputs is Tensor|Sequence[Tesnor], '
f'but got {type(inputs)}.') f'but got {type(inputs)}.')
ys, xs, xs_dot = tensor_utils.as_tensors(targets), tensor_utils.as_tensors( ys, xs, xs_dot = utils.as_tensors(outputs), utils.as_tensors(
inputs), tensor_utils.as_tensors(input_gradients) inputs), utils.as_tensors(grad_inputs)
block = framework.default_main_program().current_block() block = framework.default_main_program().current_block()
if any(x.block != block for x in xs + ys): if any(x.block != block for x in xs + ys):
...@@ -90,4 +88,95 @@ def forward_gradients(targets, inputs, input_gradients=None): ...@@ -90,4 +88,95 @@ def forward_gradients(targets, inputs, input_gradients=None):
ad = primx.Transform(ys[0].block) ad = primx.Transform(ys[0].block)
_, ys_dot = ad.linearize(xs, ys, xs_dot) _, ys_dot = ad.linearize(xs, ys, xs_dot)
return ys_dot[0] if isinstance(targets, framework.Variable) else ys_dot return ys_dot[0] if isinstance(outputs, framework.Variable) else ys_dot
@framework.static_only
def grad(outputs, inputs, grad_outputs=None):
"""Reverse mode of automatic differentiation.
.. note::
**ONLY available in the static mode and primitive operators**
Args:
outputs (Tensor|Sequence[Tensor]): The output Tensor or Tensors.
inputs (Tensor|Sequence[Tensor]): The input Tensor or Tensors.
grad_outputs (Tensor|Sequence[Tensor]): The gradient Tensor or
Tensors of outputs which has the same shape with outputs, Defaults
to None, in this case is equivalent to all ones .
Returns:
grad_inputs (Tensor|Tensors): The gradients for inputs.
Examples:
.. code-block:: python
import numpy as np
import paddle
paddle.enable_static()
paddle.incubate.autograd.enable_prim()
startup_program = paddle.static.Program()
main_program = paddle.static.Program()
with paddle.static.program_guard(main_program, startup_program):
x = paddle.static.data('x', shape=[1], dtype='float32')
x.stop_gradients = False
y = x * x
x_grad = paddle.incubate.autograd.grad(y, x)
paddle.incubate.autograd.prim2orig()
exe = paddle.static.Executor()
exe.run(startup_program)
x_grad = exe.run(main_program, feed={'x': np.array([2.]).astype('float32')}, fetch_list=[x_grad])
print(x_grad)
# [array([4.], dtype=float32)]
paddle.incubate.autograd.disable_prim()
paddle.disable_static()
"""
if not utils.prim_enabled():
return backward.gradients(outputs, inputs, grad_outputs)
if not isinstance(outputs, (framework.Variable, typing.Sequence)):
raise TypeError(f'Expected outputs is Tensor|Sequence[Tesnor], '
f'but got {type(outputs)}.')
if not isinstance(inputs, (framework.Variable, typing.Sequence)):
raise TypeError(f'Expected inputs is Tensor|Sequence[Tesnor], '
f'but got {type(inputs)}.')
ys, xs, ys_bar = utils.as_tensors(outputs), utils.as_tensors(
inputs), utils.as_tensors(grad_outputs)
block = framework.default_main_program().current_block()
if any((x is not None and x.block != block) for x in xs + ys):
raise RuntimeError(
'Variable in inputs and outputs should be None or in current block of main program'
)
# TODO(Tongxin) without any prior knowledge about whether the program
# is completely lowered to primitive ops, it's mandatory to run the lowering
# pass once and again. This is obviously inefficient and needs to be
# optimized.
primx.orig2prim(block)
ad = primx.Transform(block)
xs_dot, ys_dot = ad.linearize(xs, ys)
if any(var is None for var in ys_dot):
raise RuntimeError(
'Grads cannot be computed. The given outputs does not depend on inputs'
)
ys_bar, xs_bar = ad.transpose(ys_dot, xs_dot, ys_bar)
# remove xs_dot and their constructor ops
op_indexes = []
for var in xs_dot:
if var is not None:
op_index = block.ops.index(var.op)
if op_index < 0:
raise ValueError(
f'op_index should be greater than or equal to 0, but op_index={op_index}.'
)
op_indexes.append(op_index)
ad.erase_ops(sorted(op_indexes))
ad.erase_dots(xs_dot)
return xs_bar[0] if isinstance(inputs, framework.Variable) else xs_bar
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
import paddle import paddle
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
from .primreg import REGISTER_FN from .primreg import REGISTER_FN
......
...@@ -22,7 +22,7 @@ from .primreg import op_position_inputs, op_position_output, lookup_orig2prim, l ...@@ -22,7 +22,7 @@ from .primreg import op_position_inputs, op_position_output, lookup_orig2prim, l
from .primrules import _orig2prim, _prim2orig, _jvp, _transpose from .primrules import _orig2prim, _prim2orig, _jvp, _transpose
from .utils import get_input_var_list, get_output_var_list, flatten, flatten_and_remove_none from .utils import get_input_var_list, get_output_var_list, flatten, flatten_and_remove_none
from collections import OrderedDict from collections import OrderedDict
from paddle.autograd.utils import as_tensors from paddle.incubate.autograd.utils import as_tensors
def topo_path(xs, ys, block=None): def topo_path(xs, ys, block=None):
...@@ -577,47 +577,3 @@ def prim2orig(block=None): ...@@ -577,47 +577,3 @@ def prim2orig(block=None):
assert block == default_main_program().current_block( assert block == default_main_program().current_block(
), f'block is neither None nor current block of main program' ), f'block is neither None nor current block of main program'
_lower(block, reverse=True) _lower(block, reverse=True)
def _gradients(ys, xs, ys_bar=None):
""" A drop-in replacement of paddle.gradients but instead computing
on primitive ops.
Args:
ys: the target tensor or tensors
xs: the input tensor or tensors
ys_bar: the optional gradient tensors of `ys`
Returns:
xs_bar: a list gradients of input `xs`
"""
ys, xs, ys_bar = as_tensors(ys), as_tensors(xs), as_tensors(ys_bar)
block = default_main_program().current_block()
for el in xs + ys:
assert el is None or el.block == block, f'variable in xs and ys should be None or in current block of main program'
# TODO(Tongxin) without any prior knowledge about whether the program
# is completely lowered to primitive ops, it's mandatory to run the lowering
# pass once and again. This is obviously inefficient and needs to be
# optimized.
orig2prim(block)
ad = Transform(block)
xs_dot, ys_dot = ad.linearize(xs, ys)
if any(var is None for var in ys_dot):
assert False, f'Gradients cannot be computed. The given output `ys` does not depend on input `xs`.'
ys_bar, xs_bar = ad.transpose(ys_dot, xs_dot, ys_bar)
# remove xs_dot and their constructor ops
op_indexes = []
for var in xs_dot:
if var is not None:
op_index = block.ops.index(var.op)
assert op_index >= 0, f'op_index should be greater than or equal to 0, but op_index={op_index}.'
op_indexes.append(op_index)
ad.erase_ops(sorted(op_indexes))
ad.erase_dots(xs_dot)
return xs_bar
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import typing
import paddle import paddle
from paddle.fluid import framework as framework from paddle.fluid import framework as framework
...@@ -170,3 +171,12 @@ def flatten(inp): ...@@ -170,3 +171,12 @@ def flatten(inp):
def flatten_and_remove_none(inp): def flatten_and_remove_none(inp):
flattened = flatten(inp) flattened = flatten(inp)
return [var for var in flattened if var is not None] return [var for var in flattened if var is not None]
def as_tensors(xs):
if isinstance(xs, framework.Variable):
return (xs, )
elif isinstance(xs, typing.Sequence):
return tuple(xs)
else:
return xs
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册