未验证 提交 dd63e5b4 编写于 作者: X Xiaoxu Chen 提交者: GitHub

reorganize the higher order autodiff api (#44119)

* move _gradients to primapi and rename to grad

* modify jvp to call forward_grad in primitive mode

* add primapi unittest and remove some unused test cases.

* fix  circular import problem

* move paddle/autograd/functional into paddle/incubate.autograd/functional

* remove unused JacobianBatchLast class
上级 37216a8f
......@@ -26,8 +26,6 @@ else:
from .py_layer import LegacyPyLayerContext as PyLayerContext # noqa: F401
from ..framework import set_grad_enabled, is_grad_enabled # noqa: F401
from ..fluid.dygraph.base import no_grad_ as no_grad # noqa: F401
from .functional import vjp, jvp, Jacobian, Hessian # noqa: F401
from .functional import jacobian, hessian, batch_jacobian, batch_hessian, vhp # noqa: F401
__all__ = [ # noqa
'backward',
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import typing
from paddle.fluid import framework
def as_tensors(xs):
if isinstance(xs, framework.Variable):
return (xs, )
elif isinstance(xs, typing.Sequence):
return tuple(xs)
else:
return xs
......@@ -2211,12 +2211,6 @@ def gradients(targets, inputs, target_gradients=None, no_grad_set=None):
check_type(target_gradients, 'target_gradients',
(framework.Variable, list, tuple, type(None)),
'paddle.static.gradients')
from ..incubate.autograd.primx import _gradients
from ..incubate.autograd.utils import prim_enabled
if prim_enabled():
return _gradients(targets, inputs, target_gradients)
outs = calc_gradient(targets, inputs, target_gradients, no_grad_set)
return _as_list(outs)
......
......@@ -17,7 +17,7 @@ endforeach()
set_tests_properties(test_autograd_functional_dynamic PROPERTIES TIMEOUT 200)
set_tests_properties(test_autograd_functional_static PROPERTIES TIMEOUT 160)
set_tests_properties(test_gradients_and_minimize PROPERTIES TIMEOUT 60)
set_tests_properties(test_minimize PROPERTIES TIMEOUT 60)
if(NOT WIN32)
set_tests_properties(test_autograd_functional_prim PROPERTIES TIMEOUT 60)
endif()
......@@ -145,5 +145,130 @@ class TestHessianPrim(unittest.TestCase):
atol=self._atol)
@utils.place(config.DEVICES)
@utils.parameterize((utils.TEST_CASE_NAME, 'fun', 'args', 'dtype'), (
('unary_float32', paddle.tanh, (np.random.rand(2, 3), ), 'float32'),
('binary_float32', paddle.matmul,
(np.random.rand(2, 3), np.random.rand(3, 2)), 'float32'),
('unary_float64', paddle.tanh, (np.random.rand(2, 3), ), 'float64'),
('binary_float64', paddle.matmul,
(np.random.rand(2, 3), np.random.rand(3, 2)), 'float64'),
))
class TestJvpPrim(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.args = [arg.astype(cls.dtype) for arg in cls.args]
cls._rtol = config.TOLERANCE.get(
cls.dtype).get('first_order_grad').get('rtol')
cls._atol = config.TOLERANCE.get(
cls.dtype).get('first_order_grad').get('atol')
def setUp(self):
paddle.enable_static()
paddle.incubate.autograd.enable_prim()
def tearDown(self):
paddle.incubate.autograd.disable_prim()
paddle.disable_static()
def test_jacobian_prim(self):
def wrapper(fun, args):
mp = paddle.static.Program()
sp = paddle.static.Program()
with paddle.static.program_guard(mp, sp):
static_args = [
paddle.static.data(f'arg{i}', arg.shape, self.dtype)
for i, arg in enumerate(args)
]
for arg in static_args:
arg.stop_gradient = False
_, jvp_res = paddle.incubate.autograd.jvp(fun, static_args)
if paddle.incubate.autograd.prim_enabled():
paddle.incubate.autograd.prim2orig()
exe = paddle.static.Executor()
exe.run(sp)
jvp_res = exe.run(
mp,
feed={f'arg{i}': arg
for i, arg in enumerate(args)},
fetch_list=[jvp_res])
return jvp_res
paddle.incubate.autograd.enable_prim()
prim_jvp = wrapper(self.fun, self.args)
paddle.incubate.autograd.disable_prim()
orig_jvp = wrapper(self.fun, self.args)
np.testing.assert_allclose(orig_jvp,
prim_jvp,
rtol=self._rtol,
atol=self._atol)
@utils.place(config.DEVICES)
@utils.parameterize((utils.TEST_CASE_NAME, 'fun', 'args', 'dtype'), (
('unary_float32', paddle.tanh, (np.random.rand(2, 3), ), 'float32'),
('binary_float32', paddle.matmul,
(np.random.rand(2, 3), np.random.rand(3, 2)), 'float32'),
('unary_float64', paddle.tanh, (np.random.rand(2, 3), ), 'float64'),
('binary_float64', paddle.matmul,
(np.random.rand(2, 3), np.random.rand(3, 2)), 'float64'),
))
class TestVjpPrim(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.args = [arg.astype(cls.dtype) for arg in cls.args]
cls._rtol = config.TOLERANCE.get(
cls.dtype).get('first_order_grad').get('rtol')
cls._atol = config.TOLERANCE.get(
cls.dtype).get('first_order_grad').get('atol')
def setUp(self):
paddle.enable_static()
paddle.incubate.autograd.enable_prim()
def tearDown(self):
paddle.incubate.autograd.disable_prim()
paddle.disable_static()
def test_jacobian_prim(self):
def wrapper(fun, args):
mp = paddle.static.Program()
sp = paddle.static.Program()
with paddle.static.program_guard(mp, sp):
static_args = [
paddle.static.data(f'arg{i}', arg.shape, self.dtype)
for i, arg in enumerate(args)
]
for arg in static_args:
arg.stop_gradient = False
_, vjp_res = paddle.incubate.autograd.vjp(fun, static_args)
if paddle.incubate.autograd.prim_enabled():
paddle.incubate.autograd.prim2orig()
exe = paddle.static.Executor()
exe.run(sp)
vjp_res = exe.run(
mp,
feed={f'arg{i}': arg
for i, arg in enumerate(args)},
fetch_list=[vjp_res])
return vjp_res
paddle.incubate.autograd.enable_prim()
prim_vjp = wrapper(self.fun, self.args)
paddle.incubate.autograd.disable_prim()
orig_vjp = wrapper(self.fun, self.args)
for orig, prim in zip(orig_vjp, prim_vjp):
np.testing.assert_allclose(orig,
prim,
rtol=self._rtol,
atol=self._atol)
if __name__ == "__main__":
unittest.main()
......@@ -59,7 +59,8 @@ class TestVJP(unittest.TestCase):
with paddle.static.program_guard(mp, sp):
feed, static_xs, static_v = utils.gen_static_data_and_feed(
self.xs, self.v, stop_gradient=self.stop_gradient)
ys, xs_grads = paddle.autograd.vjp(self.fun, static_xs, static_v)
ys, xs_grads = paddle.incubate.autograd.vjp(self.fun, static_xs,
static_v)
exe.run(sp)
return exe.run(mp, feed=feed, fetch_list=[ys, xs_grads])
......@@ -103,7 +104,8 @@ class TestVJPException(unittest.TestCase):
with paddle.static.program_guard(mp, sp):
feed, static_xs, static_v = utils.gen_static_data_and_feed(
self.xs, self.v)
ys, xs_grads = paddle.autograd.vjp(self.fun, static_xs, static_v)
ys, xs_grads = paddle.incubate.autograd.vjp(self.fun, static_xs,
static_v)
self.exe.run(sp)
return self.exe.run(mp, feed, fetch_list=[ys, xs_grads])
......@@ -214,7 +216,7 @@ class TestJacobianFloat32(unittest.TestCase):
startup = fluid.Program()
with fluid.program_guard(main, startup):
xs = make_tensors(inps)
JJ = paddle.autograd.functional.Jacobian(pd_f, xs, is_batched=batch)
JJ = paddle.incubate.autograd.Jacobian(pd_f, xs, is_batched=batch)
if batch:
_, nrow, ncol = JJ.shape
else:
......@@ -244,7 +246,7 @@ class TestJacobianFloat32(unittest.TestCase):
startup = fluid.Program()
with fluid.program_guard(main, startup):
xs = make_tensors(inps)
JJ = paddle.autograd.functional.Jacobian(pd_f, xs, is_batched=batch)
JJ = paddle.incubate.autograd.Jacobian(pd_f, xs, is_batched=batch)
if batch:
nbatch, nrow, ncol = JJ.shape
rows = [JJ[:, i, :] for i in range(nrow)]
......@@ -269,7 +271,7 @@ class TestJacobianFloat32(unittest.TestCase):
startup = fluid.Program()
with fluid.program_guard(main, startup):
xs = make_tensors(inps)
JJ = paddle.autograd.functional.Jacobian(pd_f, xs, is_batched=batch)
JJ = paddle.incubate.autograd.Jacobian(pd_f, xs, is_batched=batch)
if batch:
nbatch, nrow, ncol = JJ.shape
entries = [
......@@ -390,7 +392,7 @@ class TestHessianFloat32(unittest.TestCase):
startup = fluid.Program()
with fluid.program_guard(main, startup):
xs = make_tensors(inps)
HH = paddle.autograd.functional.Hessian(pd_f, xs, is_batched=batch)
HH = paddle.incubate.autograd.Hessian(pd_f, xs, is_batched=batch)
nrow, ncol = HH.shape
full_hessian = HH[:]
exe = fluid.Executor(self.place)
......
......@@ -13,82 +13,16 @@
# limitations under the License.
import unittest
import numpy as np
import numpy as np
import paddle
from paddle.incubate.autograd.primx import prim2orig
from paddle.incubate.autograd.utils import enable_prim, disable_prim, prim_enabled
from paddle.incubate.autograd.utils import (disable_prim, enable_prim,
prim_enabled)
paddle.enable_static()
class TestGradients(unittest.TestCase):
def test_third_order(self):
enable_prim()
main = paddle.static.Program()
startup = paddle.static.Program()
with paddle.static.program_guard(main, startup):
x = paddle.static.data(name='x', shape=[1], dtype='float32')
x2 = paddle.multiply(x, x)
x3 = paddle.multiply(x2, x)
x4 = paddle.multiply(x3, x)
grad1, = paddle.static.gradients([x4], [x])
grad2, = paddle.static.gradients([grad1], [x])
grad3, = paddle.static.gradients([grad2], [x])
prim2orig(main.block(0))
feed = {x.name: np.array([2.]).astype('float32')}
fetch_list = [grad3.name]
result = [np.array([48.])]
place = paddle.CPUPlace()
if paddle.device.is_compiled_with_cuda():
place = paddle.CUDAPlace(0)
exe = paddle.static.Executor(place)
exe.run(startup)
outs = exe.run(main, feed=feed, fetch_list=fetch_list)
np.allclose(outs, result)
disable_prim()
def test_fourth_order(self):
enable_prim()
main = paddle.static.Program()
startup = paddle.static.Program()
with paddle.static.program_guard(main, startup):
x = paddle.static.data(name='x', shape=[1], dtype='float32')
x2 = paddle.multiply(x, x)
x3 = paddle.multiply(x2, x)
x4 = paddle.multiply(x3, x)
x5 = paddle.multiply(x4, x)
out = paddle.sqrt(x5 + x4)
grad1, = paddle.static.gradients([out], [x])
grad2, = paddle.static.gradients([grad1], [x])
grad3, = paddle.static.gradients([grad2], [x])
grad4, = paddle.static.gradients([grad3], [x])
prim2orig(main.block(0))
feed = {
x.name: np.array([2.]).astype('float32'),
}
fetch_list = [grad4.name]
# (3*(-5*x^2-16*x-16))/(16*(x+1)^3.5)
result = [np.array([-0.27263762711])]
place = paddle.CPUPlace()
if paddle.device.is_compiled_with_cuda():
place = paddle.CUDAPlace(0)
exe = paddle.static.Executor(place)
exe.run(startup)
outs = exe.run(main, feed=feed, fetch_list=fetch_list)
np.allclose(outs, result)
disable_prim()
class TestMinimize(unittest.TestCase):
def model(self, x, w, bias, opt):
......
......@@ -37,7 +37,7 @@ import utils
('input_gradients_not_none', paddle.matmul,
(np.random.rand(3, 3), np.random.rand(3, 3)),
(np.random.rand(3, 3), np.random.rand(3, 3)), 'float64')))
class TestForwardGradients(unittest.TestCase):
class TestForwardGrad(unittest.TestCase):
@classmethod
def setUpClass(cls):
......@@ -55,7 +55,7 @@ class TestForwardGradients(unittest.TestCase):
paddle.incubate.autograd.disable_prim()
paddle.disable_static()
def test_forward_gradients(self):
def test_forward_grad(self):
def expected():
paddle.incubate.autograd.disable_prim()
......@@ -64,7 +64,8 @@ class TestForwardGradients(unittest.TestCase):
with paddle.static.program_guard(mp, sp):
feed, static_xs, static_v = utils.gen_static_data_and_feed(
self.xs, self.v, stop_gradient=False)
_, ys_grad = paddle.autograd.jvp(self.fun, static_xs, static_v)
_, ys_grad = paddle.incubate.autograd.jvp(
self.fun, static_xs, static_v)
exe = paddle.static.Executor()
exe.run(sp)
out = exe.run(mp, feed=feed, fetch_list=ys_grad)
......@@ -80,7 +81,8 @@ class TestForwardGradients(unittest.TestCase):
self.xs, self.v, stop_gradient=False)
ys = self.fun(*static_xs) if isinstance(
static_xs, typing.Sequence) else self.fun(static_xs)
ys_grad = primapi.forward_gradients(ys, static_xs, static_v)
ys_grad = paddle.incubate.autograd.forward_grad(
ys, static_xs, static_v)
paddle.incubate.autograd.prim2orig(mp.block(0))
exe = paddle.static.Executor()
exe.run(sp)
......@@ -106,7 +108,7 @@ class TestForwardGradients(unittest.TestCase):
self.xs, self.v, stop_gradient=False)
ys = self.fun(*static_xs) if isinstance(
static_xs, typing.Sequence) else self.fun(static_xs)
ys_grad = primapi.forward_gradients(ys, static_xs, static_v)
ys_grad = primapi.forward_grad(ys, static_xs, static_v)
paddle.incubate.autograd.prim2orig(mp.block(0))
exe = paddle.static.Executor()
exe.run(sp)
......@@ -116,14 +118,125 @@ class TestForwardGradients(unittest.TestCase):
def test_illegal_param(self):
paddle.incubate.autograd.enable_prim()
with self.assertRaises(TypeError):
primapi.forward_gradients(1, paddle.static.data('inputs',
shape=[1]))
primapi.forward_grad(1, paddle.static.data('inputs', shape=[1]))
with self.assertRaises(TypeError):
primapi.forward_gradients(paddle.static.data('targets', shape=[1]),
1)
primapi.forward_grad(paddle.static.data('targets', shape=[1]), 1)
paddle.incubate.autograd.disable_prim()
class TestGrad(unittest.TestCase):
def setUp(self):
paddle.enable_static()
paddle.incubate.autograd.enable_prim()
def tearDown(self):
paddle.incubate.autograd.disable_prim()
paddle.disable_static()
def test_third_order(self):
paddle.incubate.autograd.enable_prim()
main = paddle.static.Program()
startup = paddle.static.Program()
with paddle.static.program_guard(main, startup):
x = paddle.static.data(name='x', shape=[1], dtype='float32')
x2 = paddle.multiply(x, x)
x3 = paddle.multiply(x2, x)
x4 = paddle.multiply(x3, x)
grad1, = paddle.incubate.autograd.grad([x4], [x])
grad2, = paddle.incubate.autograd.grad([grad1], [x])
grad3, = paddle.incubate.autograd.grad([grad2], [x])
paddle.incubate.autograd.prim2orig(main.block(0))
feed = {x.name: np.array([2.]).astype('float32')}
fetch_list = [grad3.name]
result = [np.array([48.])]
place = paddle.CPUPlace()
if paddle.device.is_compiled_with_cuda():
place = paddle.CUDAPlace(0)
exe = paddle.static.Executor(place)
exe.run(startup)
outs = exe.run(main, feed=feed, fetch_list=fetch_list)
np.allclose(outs, result)
paddle.incubate.autograd.disable_prim()
def test_fourth_order(self):
paddle.incubate.autograd.enable_prim()
main = paddle.static.Program()
startup = paddle.static.Program()
with paddle.static.program_guard(main, startup):
x = paddle.static.data(name='x', shape=[1], dtype='float32')
x2 = paddle.multiply(x, x)
x3 = paddle.multiply(x2, x)
x4 = paddle.multiply(x3, x)
x5 = paddle.multiply(x4, x)
out = paddle.sqrt(x5 + x4)
grad1, = paddle.incubate.autograd.grad([out], [x])
grad2, = paddle.incubate.autograd.grad([grad1], [x])
grad3, = paddle.incubate.autograd.grad([grad2], [x])
grad4, = paddle.incubate.autograd.grad([grad3], [x])
paddle.incubate.autograd.prim2orig(main.block(0))
feed = {
x.name: np.array([2.]).astype('float32'),
}
fetch_list = [grad4.name]
# (3*(-5*x^2-16*x-16))/(16*(x+1)^3.5)
result = [np.array([-0.27263762711])]
place = paddle.CPUPlace()
if paddle.device.is_compiled_with_cuda():
place = paddle.CUDAPlace(0)
exe = paddle.static.Executor(place)
exe.run(startup)
outs = exe.run(main, feed=feed, fetch_list=fetch_list)
np.allclose(outs, result)
paddle.incubate.autograd.disable_prim()
def test_disable_prim(self):
def actual(x: np.array):
paddle.incubate.autograd.disable_prim()
main = paddle.static.Program()
startup = paddle.static.Program()
with paddle.static.program_guard(main, startup):
var_x = paddle.static.data('x', shape=x.shape, dtype=x.dtype)
var_x.stop_gradient = False
y = paddle.tanh(var_x)
y_grad = paddle.incubate.autograd.grad(y, var_x)
y_second_grad = paddle.incubate.autograd.grad(y_grad, var_x)
exe = paddle.static.Executor()
exe.run(startup)
return exe.run(main,
feed={'x': x},
fetch_list=[y_grad, y_second_grad])
def expect(x: np.array):
paddle.incubate.autograd.disable_prim()
main = paddle.static.Program()
startup = paddle.static.Program()
with paddle.static.program_guard(main, startup):
var_x = paddle.static.data('x', shape=x.shape, dtype=x.dtype)
var_x.stop_gradient = False
y = paddle.tanh(var_x)
y_grad = paddle.static.gradients(y, var_x)
y_second_grad = paddle.static.gradients(y_grad, var_x)
exe = paddle.static.Executor()
exe.run(startup)
return exe.run(main,
feed={'x': x},
fetch_list=[y_grad, y_second_grad])
x = np.random.randn(100, 200)
for i, j in zip(actual(x), expect(x)):
np.testing.assert_allclose(i, j)
if __name__ == '__main__':
unittest.main()
......@@ -21,7 +21,7 @@ from paddle.incubate.autograd.primops import (neg, set_value, add, sub, mul,
concat, reduce, matmul,
slice_select, slice_assign,
gather, scatter_add, fill_const)
from paddle.incubate.autograd.primx import Transform, topo_path, orig2prim, prim2orig, _gradients
from paddle.incubate.autograd.primx import Transform, topo_path, orig2prim, prim2orig
from paddle.incubate.autograd.utils import enable_prim, disable_prim, prim_enabled
......
......@@ -22,7 +22,7 @@ import contextlib
import collections
import numpy as np
import paddle
from paddle.autograd.utils import as_tensors
from paddle.incubate.autograd.utils import as_tensors
##########################################################
......
......@@ -11,11 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.autograd.functional import Hessian, Jacobian, jvp, vjp
from .functional import Hessian, Jacobian, jvp, vjp
from .primapi import forward_grad, grad
from .primx import prim2orig
from .utils import enable_prim, disable_prim, prim_enabled
from .utils import disable_prim, enable_prim, prim_enabled
__all__ = [ # noqa
'vjp', 'jvp', 'Jacobian', 'Hessian', 'prim2orig', 'enable_prim',
'disable_prim', 'prim_enabled'
'vjp', 'jvp', 'Jacobian', 'Hessian', 'enable_prim', 'disable_prim',
'forward_grad', 'grad'
]
......@@ -14,28 +14,26 @@
import typing
import paddle.autograd.utils as tensor_utils
import paddle.incubate.autograd.utils as prim_utils
from paddle.fluid import framework
from paddle.incubate.autograd import primx
from paddle.fluid import backward, framework
from paddle.incubate.autograd import primx, utils
@framework.static_only
def forward_gradients(targets, inputs, input_gradients=None):
def forward_grad(outputs, inputs, grad_inputs=None):
"""Forward mode of automatic differentiation.
.. note::
**ONLY available in the static mode and primitive operators.**
Args:
targets: The target tensor or tensors
outputs: The output tensor or tensors
inputs: The input tensor or tensors
input_gradients: The gradient Tensor or Tensors of inputs which has
grad_inputs: The gradient Tensor or Tensors of inputs which has
the same shape with inputs, Defaults to None, in this case is
equivalent to all ones .
Returns:
target_gradients (Tensor|Sequence[Tensor]): The gradients for targets.
grad_outputs (Tensor|Sequence[Tensor]): The gradients for outputs.
Examples:
......@@ -53,7 +51,7 @@ def forward_gradients(targets, inputs, input_gradients=None):
with paddle.static.program_guard(main_program, startup_program):
x = paddle.static.data('x', shape=[1], dtype='float32')
y = x * x
y_grad = paddle.incubate.autograd.forward_gradients(y, x)
y_grad = paddle.incubate.autograd.forward_grad(y, x)
paddle.incubate.autograd.prim2orig()
exe = paddle.static.Executor()
......@@ -65,20 +63,20 @@ def forward_gradients(targets, inputs, input_gradients=None):
paddle.incubate.autograd.disable_prim()
paddle.disable_static()
"""
if not prim_utils.prim_enabled():
raise RuntimeError('forward_gradients must be running on primitive'
if not utils.prim_enabled():
raise RuntimeError('forward_grad must be running on primitive'
'operators, use enable_prim to turn it on.')
if not isinstance(targets, (framework.Variable, typing.Sequence)):
raise TypeError(f'Expected targets is Tensor|Sequence[Tesnor], '
f'but got {type(targets)}.')
if not isinstance(outputs, (framework.Variable, typing.Sequence)):
raise TypeError(f'Expected outputs is Tensor|Sequence[Tesnor], '
f'but got {type(outputs)}.')
if not isinstance(inputs, (framework.Variable, typing.Sequence)):
raise TypeError(f'Expected inputs is Tensor|Sequence[Tesnor], '
f'but got {type(inputs)}.')
ys, xs, xs_dot = tensor_utils.as_tensors(targets), tensor_utils.as_tensors(
inputs), tensor_utils.as_tensors(input_gradients)
ys, xs, xs_dot = utils.as_tensors(outputs), utils.as_tensors(
inputs), utils.as_tensors(grad_inputs)
block = framework.default_main_program().current_block()
if any(x.block != block for x in xs + ys):
......@@ -90,4 +88,95 @@ def forward_gradients(targets, inputs, input_gradients=None):
ad = primx.Transform(ys[0].block)
_, ys_dot = ad.linearize(xs, ys, xs_dot)
return ys_dot[0] if isinstance(targets, framework.Variable) else ys_dot
return ys_dot[0] if isinstance(outputs, framework.Variable) else ys_dot
@framework.static_only
def grad(outputs, inputs, grad_outputs=None):
"""Reverse mode of automatic differentiation.
.. note::
**ONLY available in the static mode and primitive operators**
Args:
outputs (Tensor|Sequence[Tensor]): The output Tensor or Tensors.
inputs (Tensor|Sequence[Tensor]): The input Tensor or Tensors.
grad_outputs (Tensor|Sequence[Tensor]): The gradient Tensor or
Tensors of outputs which has the same shape with outputs, Defaults
to None, in this case is equivalent to all ones .
Returns:
grad_inputs (Tensor|Tensors): The gradients for inputs.
Examples:
.. code-block:: python
import numpy as np
import paddle
paddle.enable_static()
paddle.incubate.autograd.enable_prim()
startup_program = paddle.static.Program()
main_program = paddle.static.Program()
with paddle.static.program_guard(main_program, startup_program):
x = paddle.static.data('x', shape=[1], dtype='float32')
x.stop_gradients = False
y = x * x
x_grad = paddle.incubate.autograd.grad(y, x)
paddle.incubate.autograd.prim2orig()
exe = paddle.static.Executor()
exe.run(startup_program)
x_grad = exe.run(main_program, feed={'x': np.array([2.]).astype('float32')}, fetch_list=[x_grad])
print(x_grad)
# [array([4.], dtype=float32)]
paddle.incubate.autograd.disable_prim()
paddle.disable_static()
"""
if not utils.prim_enabled():
return backward.gradients(outputs, inputs, grad_outputs)
if not isinstance(outputs, (framework.Variable, typing.Sequence)):
raise TypeError(f'Expected outputs is Tensor|Sequence[Tesnor], '
f'but got {type(outputs)}.')
if not isinstance(inputs, (framework.Variable, typing.Sequence)):
raise TypeError(f'Expected inputs is Tensor|Sequence[Tesnor], '
f'but got {type(inputs)}.')
ys, xs, ys_bar = utils.as_tensors(outputs), utils.as_tensors(
inputs), utils.as_tensors(grad_outputs)
block = framework.default_main_program().current_block()
if any((x is not None and x.block != block) for x in xs + ys):
raise RuntimeError(
'Variable in inputs and outputs should be None or in current block of main program'
)
# TODO(Tongxin) without any prior knowledge about whether the program
# is completely lowered to primitive ops, it's mandatory to run the lowering
# pass once and again. This is obviously inefficient and needs to be
# optimized.
primx.orig2prim(block)
ad = primx.Transform(block)
xs_dot, ys_dot = ad.linearize(xs, ys)
if any(var is None for var in ys_dot):
raise RuntimeError(
'Grads cannot be computed. The given outputs does not depend on inputs'
)
ys_bar, xs_bar = ad.transpose(ys_dot, xs_dot, ys_bar)
# remove xs_dot and their constructor ops
op_indexes = []
for var in xs_dot:
if var is not None:
op_index = block.ops.index(var.op)
if op_index < 0:
raise ValueError(
f'op_index should be greater than or equal to 0, but op_index={op_index}.'
)
op_indexes.append(op_index)
ad.erase_ops(sorted(op_indexes))
ad.erase_dots(xs_dot)
return xs_bar[0] if isinstance(inputs, framework.Variable) else xs_bar
......@@ -14,6 +14,7 @@
import paddle
from paddle.fluid.layer_helper import LayerHelper
from .primreg import REGISTER_FN
......
......@@ -22,7 +22,7 @@ from .primreg import op_position_inputs, op_position_output, lookup_orig2prim, l
from .primrules import _orig2prim, _prim2orig, _jvp, _transpose
from .utils import get_input_var_list, get_output_var_list, flatten, flatten_and_remove_none
from collections import OrderedDict
from paddle.autograd.utils import as_tensors
from paddle.incubate.autograd.utils import as_tensors
def topo_path(xs, ys, block=None):
......@@ -577,47 +577,3 @@ def prim2orig(block=None):
assert block == default_main_program().current_block(
), f'block is neither None nor current block of main program'
_lower(block, reverse=True)
def _gradients(ys, xs, ys_bar=None):
""" A drop-in replacement of paddle.gradients but instead computing
on primitive ops.
Args:
ys: the target tensor or tensors
xs: the input tensor or tensors
ys_bar: the optional gradient tensors of `ys`
Returns:
xs_bar: a list gradients of input `xs`
"""
ys, xs, ys_bar = as_tensors(ys), as_tensors(xs), as_tensors(ys_bar)
block = default_main_program().current_block()
for el in xs + ys:
assert el is None or el.block == block, f'variable in xs and ys should be None or in current block of main program'
# TODO(Tongxin) without any prior knowledge about whether the program
# is completely lowered to primitive ops, it's mandatory to run the lowering
# pass once and again. This is obviously inefficient and needs to be
# optimized.
orig2prim(block)
ad = Transform(block)
xs_dot, ys_dot = ad.linearize(xs, ys)
if any(var is None for var in ys_dot):
assert False, f'Gradients cannot be computed. The given output `ys` does not depend on input `xs`.'
ys_bar, xs_bar = ad.transpose(ys_dot, xs_dot, ys_bar)
# remove xs_dot and their constructor ops
op_indexes = []
for var in xs_dot:
if var is not None:
op_index = block.ops.index(var.op)
assert op_index >= 0, f'op_index should be greater than or equal to 0, but op_index={op_index}.'
op_indexes.append(op_index)
ad.erase_ops(sorted(op_indexes))
ad.erase_dots(xs_dot)
return xs_bar
......@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import typing
import paddle
from paddle.fluid import framework as framework
......@@ -170,3 +171,12 @@ def flatten(inp):
def flatten_and_remove_none(inp):
flattened = flatten(inp)
return [var for var in flattened if var is not None]
def as_tensors(xs):
if isinstance(xs, framework.Variable):
return (xs, )
elif isinstance(xs, typing.Sequence):
return tuple(xs)
else:
return xs
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册