diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 07073bfbfde1c6bb298656d86f97acb8679a95f7..ad4a6d64d10adb4deb58f8e0c67c56b4f96afadf 100644 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -202,3 +202,9 @@ from .tensor.stat import var #DEFINE_ALIAS # from .tensor.tensor import Tensor #DEFINE_ALIAS # from .tensor.tensor import LoDTensor #DEFINE_ALIAS # from .tensor.tensor import LoDTensorArray #DEFINE_ALIAS +from .fluid.dygraph.base import enable_dygraph #DEFINE_ALIAS +from .fluid.dygraph.base import disable_dygraph #DEFINE_ALIAS +from .fluid.framework import in_dygraph_mode #DEFINE_ALIAS +enable_imperative = enable_dygraph #DEFINE_ALIAS +disable_imperative = disable_dygraph #DEFINE_ALIAS +in_imperative_mode = in_dygraph_mode diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py index a8f621fa6274c37e829bbf5555ab9ac9d67c33c9..d52860396a278e93c075025853063bf29f75d17c 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py @@ -204,6 +204,31 @@ class TestImperative(unittest.TestCase): self.assertTrue(np.array_equal(dy_out1, dy_out2)) self.assertTrue(np.array_equal(dy_grad1, dy_grad2)) + def test_functional_paddle_imperative_dygraph_context(self): + self.assertFalse(paddle.imperative.enabled()) + paddle.enable_imperative() + self.assertTrue(paddle.imperative.enabled()) + np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) + var_inp = paddle.imperative.to_variable(np_inp) + mlp = MLP(input_size=2) + out = mlp(var_inp) + dy_out1 = out.numpy() + out.backward() + dy_grad1 = mlp._linear1.weight.gradient() + paddle.disable_imperative() + self.assertFalse(paddle.imperative.enabled()) + with paddle.imperative.guard(): + self.assertTrue(paddle.imperative.enabled()) + var_inp = paddle.imperative.to_variable(np_inp) + mlp = MLP(input_size=2) + out = mlp(var_inp) + dy_out2 = out.numpy() + out.backward() + dy_grad2 = mlp._linear1.weight.gradient() + self.assertFalse(paddle.imperative.enabled()) + self.assertTrue(np.array_equal(dy_out1, dy_out2)) + self.assertTrue(np.array_equal(dy_grad1, dy_grad2)) + def test_isinstance(self): var = fluid.layers.data(shape=[1], name='x', dtype='float32') self.assertTrue(isinstance(var, fluid.Variable)) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py index 734856ec0d66c6b17f801eca73cbbea3f421cb6b..3dac9324e4eeb2073a4a61fddb02217969dafe50 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py @@ -289,7 +289,7 @@ class TestDygraphPtbRnn(unittest.TestCase): np_t = v.numpy() self.model_base[k] = np_t - fluid.save_dygraph(self.state_dict, "./test_dy") + paddle.imperative.save(self.state_dict, "./test_dy") def testLoadAndSetVarBase(self): seed = 90 @@ -369,7 +369,8 @@ class TestDygraphPtbRnn(unittest.TestCase): if isinstance(adam._learning_rate, LearningRateDecay): adam._learning_rate.step_num = 0 - para_state_dict, opti_state_dict = fluid.load_dygraph("./test_dy") + para_state_dict, opti_state_dict = paddle.imperative.load( + "./test_dy") adam.set_dict(opti_state_dict) opti_dict = adam.state_dict() @@ -881,18 +882,18 @@ class TestDygraphPtbRnn(unittest.TestCase): with fluid.dygraph.guard(): emb = fluid.dygraph.Embedding([10, 10]) state_dict = emb.state_dict() - paddle.imperative.save_dygraph(state_dict, - os.path.join('saved_dy', 'emb_dy')) + paddle.imperative.save(state_dict, + os.path.join('saved_dy', 'emb_dy')) - para_state_dict, opti_state_dict = paddle.imperative.load_dygraph( + para_state_dict, opti_state_dict = paddle.imperative.load( os.path.join('saved_dy', 'emb_dy')) self.assertTrue(opti_state_dict == None) - para_state_dict, opti_state_dict = paddle.imperative.load_dygraph( + para_state_dict, opti_state_dict = paddle.imperative.load( os.path.join('saved_dy', 'emb_dy.pdparams')) - para_state_dict, opti_state_dict = paddle.imperative.load_dygraph( + para_state_dict, opti_state_dict = paddle.imperative.load( os.path.join('saved_dy', 'emb_dy.pdopt')) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py index cfaca5a565deb4ac29542c76f4b9e7b4f8ec1431..2789174ba7a5805b86557a9a465c661a906bc0a7 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py @@ -24,7 +24,7 @@ import paddle.fluid.core as core import paddle -class SimpleNet(paddle.imperative.Layer): +class SimpleNet(paddle.nn.Layer): def __init__(self, vocab_size, hidden_size, dtype): super(SimpleNet, self).__init__() self.emb = fluid.dygraph.Embedding( diff --git a/python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py b/python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py new file mode 100644 index 0000000000000000000000000000000000000000..50e587478957a9e5c359d0c8a9d606859f17e994 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py @@ -0,0 +1,300 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as fluid +from paddle.fluid.wrapped_decorator import wrap_decorator +import unittest +from unittest import TestCase +import numpy as np +import paddle + + +def _dygraph_guard_(func): + def __impl__(*args, **kwargs): + if paddle.in_imperative_mode(): + return func(*args, **kwargs) + else: + with fluid.dygraph.guard(): + return func(*args, **kwargs) + + return __impl__ + + +dygraph_guard = wrap_decorator(_dygraph_guard_) + + +def random_var(size, low=-1, high=1, dtype='float32'): + x_np = np.random.uniform(low=low, high=high, size=size).astype(dtype) + return fluid.dygraph.to_variable(x_np) + + +class TestDygraphDoubleGrad(TestCase): + def setUp(self): + self.sort_sum_gradient = False + self.shape = [5, 10] + + def grad(self, + outputs, + inputs, + grad_outputs=None, + no_grad_vars=None, + retain_graph=None, + create_graph=False, + allow_unused=False): + backward_strategy = fluid.dygraph.BackwardStrategy() + backward_strategy.sort_sum_gradient = self.sort_sum_gradient + return paddle.imperative.grad( + outputs=outputs, + inputs=inputs, + grad_outputs=grad_outputs, + no_grad_vars=no_grad_vars, + retain_graph=retain_graph, + create_graph=create_graph, + allow_unused=allow_unused, + backward_strategy=backward_strategy) + + @dygraph_guard + def test_exception(self): + with self.assertRaises(AssertionError): + self.grad(None, None) + + shape = self.shape + + with self.assertRaises(AssertionError): + self.grad(1, random_var(shape)) + + with self.assertRaises(AssertionError): + self.grad(random_var(shape), 1) + + with self.assertRaises(AssertionError): + self.grad([1], [random_var(shape)]) + + with self.assertRaises(AssertionError): + self.grad([random_var(shape)], [1]) + + with self.assertRaises(AssertionError): + self.grad([random_var(shape), random_var(shape)], + [random_var(shape)], [random_var(shape)]) + + with self.assertRaises(AssertionError): + self.grad( + [random_var(shape)], [random_var(shape)], no_grad_vars=[1]) + + with self.assertRaises(AssertionError): + self.grad([random_var(shape)], [random_var(shape)], no_grad_vars=1) + + @dygraph_guard + def test_simple_example(self): + x = random_var(self.shape) + x.stop_gradient = False + y = x + 1 + + for create_graph in [False, True]: + dx, = self.grad( + [x], [x], create_graph=create_graph, retain_graph=True) + self.assertEqual(dx.shape, x.shape) + self.assertTrue(np.all(dx.numpy() == 1)) + self.assertNotEqual(dx.stop_gradient, create_graph) + + dx_mul_2, = self.grad( + [y, x], [x], create_graph=create_graph, retain_graph=True) + self.assertEqual(dx_mul_2.shape, x.shape) + self.assertTrue(np.all(dx_mul_2.numpy() == 2)) + self.assertNotEqual(dx_mul_2.stop_gradient, create_graph) + + none_grad, = self.grad( + [x], [y], create_graph=create_graph, allow_unused=True) + self.assertTrue(none_grad is None) + + grad_with_none_and_not_none, = self.grad( + [x, y], [y], create_graph=create_graph) + self.assertTrue(grad_with_none_and_not_none.shape, x.shape) + self.assertTrue(np.all(grad_with_none_and_not_none.numpy() == 1)) + self.assertNotEqual(grad_with_none_and_not_none.stop_gradient, + create_graph) + + @dygraph_guard + def test_none_one_initial_gradient(self): + numel = 1 + for s in self.shape: + numel *= s + + half_numel = int(numel / 2) + half_x_positive = np.random.uniform(low=1, high=2, size=[half_numel]) + half_x_negative = np.random.uniform( + low=-2, high=-1, size=[numel - half_numel]) + x_np = np.array(list(half_x_positive) + list(half_x_negative)).astype( + 'float32') + np.random.shuffle(x_np) + + x = fluid.dygraph.to_variable(x_np) + x.stop_gradient = False + + alpha = 0.2 + y = fluid.layers.leaky_relu(x, alpha=alpha) + y = y * y + z = y * y + + x_np = x.numpy() + relu_x_np = np.maximum(x_np, alpha * x_np).astype('float32') + relu_x_grad_np = ((x_np > 0) + (x_np < 0) * alpha).astype('float32') + dy_expected = (relu_x_np * relu_x_grad_np * 2).astype('float32') + dz_expected = (np.power(relu_x_np, 3) * relu_x_grad_np * + 4).astype('float32') + + random_grad_y = random_var(y.shape, low=1, high=2) + random_grad_z = random_var(z.shape, low=1, high=2) + ones_grad_y = np.ones(y.shape).astype('float32') + ones_grad_z = np.ones(z.shape).astype('float32') + + original_random_grad_y = random_grad_y.numpy() + original_random_grad_z = random_grad_z.numpy() + + for grad_y in [random_grad_y]: + for grad_z in [random_grad_z]: + for create_graph in [False, True]: + dx_actual, = self.grad( + outputs=[y, z], + inputs=[x], + grad_outputs=[grad_y, grad_z], + create_graph=create_graph, + retain_graph=True) + + grad_y_np = ones_grad_y if grad_y is None else grad_y.numpy( + ) + grad_z_np = ones_grad_z if grad_z is None else grad_z.numpy( + ) + + dx_expected = dy_expected * grad_y_np + dz_expected * grad_z_np + self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected)) + + if grad_y is not None: + self.assertTrue(grad_y.stop_gradient) + self.assertTrue( + np.array_equal(grad_y.numpy(), + original_random_grad_y)) + + if grad_z is not None: + self.assertTrue(grad_z.stop_gradient) + self.assertTrue( + np.array_equal(grad_z.numpy(), + original_random_grad_z)) + + @dygraph_guard + def test_example_with_gradient_accumulation_and_create_graph(self): + x = random_var(self.shape) + x_np = x.numpy() + numel = x_np.size + x.stop_gradient = False + + y = fluid.layers.relu(x) + z = y + 1 + w = z * z + + w_mean = fluid.layers.reduce_mean(w) + del y, z, w + + dx_actual, = self.grad([w_mean], [x], create_graph=True) + del w_mean + + self.assertFalse(dx_actual.stop_gradient) + + # Theoritical result based on math calculation + dx_expected = (1.0 / float(numel) * (np.maximum(x_np, 0) + 1) * + (x_np > 0) * 2).astype('float32') + self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected)) + + loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x) + loss.backward() + + x_grad_actual = x.gradient() + x_grad_expected = (2.0 / float(numel) * + (x_np + dx_expected * + (x_np > 0) * 2 / float(numel))).astype('float32') + self.assertTrue(np.allclose(x_grad_actual, x_grad_expected)) + + @dygraph_guard + def test_example_with_gradient_accumulation_and_no_grad_vars(self): + x = random_var(self.shape) + x_np = x.numpy() + numel = x_np.size + x.stop_gradient = False + + y1 = fluid.layers.relu(x) + y2 = fluid.layers.relu(x) + z = y1 + y2 + w = z * z + + w_mean = fluid.layers.reduce_mean(w) + del y1, z, w + + dx_actual, = self.grad( + [w_mean], [x], create_graph=True, no_grad_vars=[y2]) + + self.assertFalse(y2.stop_gradient) + self.assertFalse(dx_actual.stop_gradient) + + dx_expected = (1.0 / float(numel) * (np.maximum(x_np, 0) + y2.numpy()) * + (x_np > 0) * 2).astype('float32') + self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected)) + + loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x) + loss.backward() + + x_grad_actual = x.gradient() + x_grad_expected = (2.0 / float(numel) * + (x_np + dx_expected * + (x_np > 0) * 4 / float(numel))).astype('float32') + self.assertTrue(np.allclose(x_grad_actual, x_grad_expected)) + + @dygraph_guard + def test_example_with_gradient_accumulation_and_not_create_graph(self): + x = random_var(self.shape) + x_np = x.numpy() + numel = x_np.size + x.stop_gradient = False + + y = fluid.layers.relu(x) + z = y + 1 + w = z * z + + w_mean = fluid.layers.reduce_mean(w) + del y, z, w + + dx_actual, = self.grad([w_mean], [x], create_graph=False) + del w_mean + + self.assertTrue(dx_actual.stop_gradient) + + dx_expected = (1.0 / float(numel) * (np.maximum(x_np, 0) + 1) * + (x_np > 0) * 2).astype('float32') + + self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected)) + + loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x) + loss.backward() + + x_grad_actual = x.gradient() + x_grad_expected = (2.0 * x_np / float(numel)).astype('float32') + self.assertTrue(np.allclose(x_grad_actual, x_grad_expected)) + + +class TestDygraphDoubleGradSortGradient(TestDygraphDoubleGrad): + def setUp(self): + self.sort_sum_gradient = True + self.shape = [5, 10] + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/imperative/__init__.py b/python/paddle/imperative/__init__.py index 6feef89a2d07a59d5577d481282c826ab7a47488..dcaf23a765344aac02840db4b089b01dbaf3fa0b 100644 --- a/python/paddle/imperative/__init__.py +++ b/python/paddle/imperative/__init__.py @@ -14,16 +14,16 @@ # define api used to run in imperative mode __all__ = [ - 'BackwardStrategy', 'guard', 'Layer', 'LayerList', 'load_dygraph', - 'save_dygraph', 'prepare_context', 'to_variable', 'TracedLayer', 'no_grad', - 'ParameterList', 'Sequential' + 'BackwardStrategy', 'enabled', 'grad', 'guard', 'LayerList', 'load', 'save', + 'prepare_context', 'to_variable', 'TracedLayer', 'no_grad', 'ParameterList', + 'Sequential' ] from paddle.fluid import core -from ..fluid.dygraph.base import guard, no_grad, to_variable -from ..fluid.dygraph.layers import Layer +from ..fluid.dygraph.base import enabled, guard, no_grad, to_variable, grad from ..fluid.dygraph.container import LayerList, ParameterList, Sequential -from ..fluid.dygraph.checkpoint import load_dygraph, save_dygraph +from ..fluid.dygraph.checkpoint import load_dygraph as load +from ..fluid.dygraph.checkpoint import save_dygraph as save from ..fluid.dygraph.parallel import prepare_context from ..fluid.dygraph.jit import TracedLayer diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index e20d9023055721628bec5eb9e9a53941051351d0..c164bb7829c41604d536fff5681bc2e3d6851268 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -95,3 +95,4 @@ from .layer.norm import InstanceNorm #DEFINE_ALIAS # from .layer.rnn import LSTMCell #DEFINE_ALIAS from .layer import loss #DEFINE_ALIAS from .layer import conv #DEFINE_ALIAS +from ..fluid.dygraph.layers import Layer #DEFINE_ALIAS