From c40c16a964013dd62bb9c97977f860ff53657575 Mon Sep 17 00:00:00 2001 From: xiemoyuan <71377852+xiemoyuan@users.noreply.github.com> Date: Mon, 26 Apr 2021 15:19:34 +0800 Subject: [PATCH 001/720] Modified the return value of tensor.grad from numpy to tensor. (#32142) * Modified the return value of tensor.grad from numpy as tensor. * Modify unittests. * fixed bugs. * Add warning info for x.grad * fixed unittests which used x.grad * fixed bug. --- .../fluid/dygraph/varbase_patch_methods.py | 35 +++++++++++-- .../tests/custom_op/test_custom_concat.py | 2 +- .../fluid/tests/custom_op/test_custom_conj.py | 5 +- .../custom_op/test_custom_relu_op_setup.py | 5 +- .../parallel_dygraph_gradient_check.py | 3 +- .../fluid/tests/unittests/test_base_layer.py | 6 ++- .../tests/unittests/test_custom_grad_input.py | 9 ++-- .../tests/unittests/test_imperative_basic.py | 50 +++++++++++-------- .../fluid/tests/unittests/test_inplace.py | 8 +-- .../fluid/tests/unittests/test_lookahead.py | 3 +- .../fluid/tests/unittests/test_pylayer_op.py | 6 ++- .../unittests/test_tensor_register_hook.py | 39 ++++++++------- .../fluid/tests/unittests/test_var_base.py | 9 ++-- 13 files changed, 114 insertions(+), 66 deletions(-) diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index 11bc150b281..dbc2b24aeea 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -26,6 +26,7 @@ from .base import switch_to_static_graph from .math_op_patch import monkey_patch_math_varbase from .parallel import scale_loss from paddle.fluid.data_feeder import convert_dtype, _PADDLE_DTYPE_2_NUMPY_DTYPE +import paddle.utils.deprecated as deprecated class TensorHookRemoveHelper(object): @@ -238,8 +239,16 @@ def monkey_patch_varbase(): "Variable.backward() is only available in DyGraph mode") @framework.dygraph_only + @deprecated( + since="2.1.0", + reason="Please use x.grad, which returns the tensor value of the gradient." + ) def gradient(self): """ + .. warning:: + This API will be deprecated in the future, it is recommended to use + :code:`x.grad` which returns the tensor value of the gradient. + Get the Gradient of Current Tensor. Returns: @@ -253,7 +262,7 @@ def monkey_patch_varbase(): x = paddle.to_tensor(5., stop_gradient=False) y = paddle.pow(x, 4.0) y.backward() - print("grad of x: {}".format(x.grad)) + print("grad of x: {}".format(x.gradient())) # [500.] """ @@ -337,10 +346,28 @@ def monkey_patch_varbase(): @property def grad(self): """ - The alias of gradient(). - """ + .. warning:: + This API will return the tensor value of the gradient. If you want + to get the numpy value of the gradient, you can use :code:`x.grad.numpy()`. + + Get the Gradient of Current Tensor. + + Returns: + Tensor: the gradient of current Tensor + + Examples: + .. code-block:: python + + import paddle - return self.gradient() + x = paddle.to_tensor(5., stop_gradient=False) + y = paddle.pow(x, 4.0) + y.backward() + print("grad of x: {}".format(x.grad)) + # Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=False, [500.]) + + """ + return self._grad_ivar() def clear_grad(self): """ diff --git a/python/paddle/fluid/tests/custom_op/test_custom_concat.py b/python/paddle/fluid/tests/custom_op/test_custom_concat.py index ea41126c1c4..d796c3b5fbd 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_concat.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_concat.py @@ -58,7 +58,7 @@ def concat_dynamic(func, dtype, np_inputs, axis_v, with_attr=False): out = func(inputs, axis) out.stop_gradient = False out.backward() - grad_inputs = [x.grad for x in inputs] + grad_inputs = [x.grad.numpy() for x in inputs] return out.numpy(), grad_inputs diff --git a/python/paddle/fluid/tests/custom_op/test_custom_conj.py b/python/paddle/fluid/tests/custom_op/test_custom_conj.py index 3a8f79a06fc..a8e40198803 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_conj.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_conj.py @@ -63,7 +63,10 @@ def conj_dynamic(func, dtype, np_input): sum_out.real().backward() else: sum_out.backward() - return out.numpy(), x.grad + if x.grad is None: + return out.numpy(), x.grad + else: + return out.numpy(), x.grad.numpy() def conj_static(func, shape, dtype, np_input): diff --git a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py index 642e93ebcb8..0af0aa16466 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py @@ -34,7 +34,10 @@ def custom_relu_dynamic(func, device, dtype, np_x, use_func=True): out.backward() - return out.numpy(), t.grad + if t.grad is None: + return out.numpy(), t.grad + else: + return out.numpy(), t.grad.numpy() def custom_relu_static(func, diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check.py index 0d2631fa108..70023522409 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check.py @@ -110,7 +110,8 @@ class TestDistTraning(unittest.TestCase): def check_acc(self, grad, grad_sum, acc_grad): if grad is not None: - grad_sum = grad_sum + grad + grad_sum = grad_sum + grad.numpy() + acc_grad = acc_grad.numpy() if acc_grad is not None else None np.testing.assert_allclose(grad_sum, acc_grad, rtol=1e-6) return grad_sum diff --git a/python/paddle/fluid/tests/unittests/test_base_layer.py b/python/paddle/fluid/tests/unittests/test_base_layer.py index e6e15575f2c..27c8869b21d 100644 --- a/python/paddle/fluid/tests/unittests/test_base_layer.py +++ b/python/paddle/fluid/tests/unittests/test_base_layer.py @@ -349,7 +349,8 @@ class TestLayerTo(unittest.TestCase): paddle.fluid.core.VarDesc.VarType.FP64) self.assertEqual(self.linear.buf_name.dtype, paddle.fluid.core.VarDesc.VarType.FP64) - self.assertTrue(np.allclose(self.linear.weight.grad, self.new_grad)) + self.assertTrue( + np.allclose(self.linear.weight.grad.numpy(), self.new_grad)) self.assertTrue(self.linear.weight._grad_ivar().dtype, paddle.fluid.core.VarDesc.VarType.FP64) @@ -358,7 +359,8 @@ class TestLayerTo(unittest.TestCase): paddle.fluid.core.VarDesc.VarType.FP64) self.assertEqual(self.linear.buf_name.dtype, paddle.fluid.core.VarDesc.VarType.FP64) - self.assertTrue(np.allclose(self.linear.weight.grad, self.new_grad)) + self.assertTrue( + np.allclose(self.linear.weight.grad.numpy(), self.new_grad)) self.assertTrue(self.linear.weight._grad_ivar().dtype, paddle.fluid.core.VarDesc.VarType.FP64) diff --git a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py index a7472e7ffd7..623b7e68b3f 100644 --- a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py +++ b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py @@ -46,7 +46,7 @@ class TestTensorBackward(unittest.TestCase): x_grad = np.matmul(grad, y.T) - self.assertTrue(np.allclose(x_grad, x_tensor.grad)) + self.assertTrue(np.allclose(x_grad, x_tensor.grad.numpy())) class TestBackwardAPI(unittest.TestCase): @@ -75,7 +75,8 @@ class TestBackwardAPI(unittest.TestCase): x_grad = np.matmul(grad, y.T) - self.assertTrue(np.allclose(x_grad * 2, x_tensor.grad)) + self.assertTrue( + np.allclose(x_grad * 2, x_tensor.grad.numpy())) def test_backward_single_tensor(self): for dtype in self._dtypes: @@ -94,7 +95,7 @@ class TestBackwardAPI(unittest.TestCase): x_grad = np.matmul(grad, y.T) - self.assertTrue(np.allclose(x_grad, x_tensor.grad)) + self.assertTrue(np.allclose(x_grad, x_tensor.grad.numpy())) def test_backward_none_grad_tensor(self): for dtype in self._dtypes: @@ -112,7 +113,7 @@ class TestBackwardAPI(unittest.TestCase): x_grad = np.matmul(grad, y.T) - self.assertTrue(np.allclose(x_grad, x_tensor.grad)) + self.assertTrue(np.allclose(x_grad, x_tensor.grad.numpy())) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py index 9dae36c3c22..1cdb57c540a 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py @@ -506,15 +506,15 @@ class TestImperative(unittest.TestCase): for i in range(10): y = paddle.pow(x, 4.0) y.backward() - self.assertEqual(x.grad, (i + 1) * 500) + self.assertEqual(x.grad.numpy(), (i + 1) * 500) x.clear_gradient() - self.assertEqual(x.grad, 0.) + self.assertEqual(x.grad.numpy(), 0.) for i in range(10): y = paddle.pow(x, 4.0) y.backward() - self.assertEqual(x.grad, (i + 1) * 500) + self.assertEqual(x.grad.numpy(), (i + 1) * 500) x.clear_grad() - self.assertEqual(x.grad, 0.) + self.assertEqual(x.grad.numpy(), 0.) def test_simple_net(sort_sum_gradient): fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient}) @@ -527,9 +527,9 @@ class TestImperative(unittest.TestCase): loss2 = x * z loss1.backward(retain_graph=True) loss2.backward(retain_graph=True) - self.assertTrue(np.array_equal(x.grad, [23.])) - self.assertTrue(np.array_equal(y.grad, [25.])) - self.assertTrue(np.array_equal(z.grad, [5.])) + self.assertTrue(np.array_equal(x.grad.numpy(), [23.])) + self.assertTrue(np.array_equal(y.grad.numpy(), [25.])) + self.assertTrue(np.array_equal(z.grad.numpy(), [5.])) x.clear_grad() y.clear_grad() z.clear_grad() @@ -542,13 +542,13 @@ class TestImperative(unittest.TestCase): loss = fun(x, y, z) loss.backward(retain_graph=True) # x.grad = 2*x*y + z + 2*y = 27 - self.assertTrue(np.array_equal(x.grad, [27])) + self.assertTrue(np.array_equal(x.grad.numpy(), [27])) loss.backward(retain_graph=True) - self.assertTrue(np.array_equal(x.grad, [54])) + self.assertTrue(np.array_equal(x.grad.numpy(), [54])) loss.backward() - self.assertTrue(np.array_equal(x.grad, [81])) + self.assertTrue(np.array_equal(x.grad.numpy(), [81])) with self.assertRaises(RuntimeError): loss.backward() @@ -558,8 +558,8 @@ class TestImperative(unittest.TestCase): dx = paddle.grad([loss1], x, create_graph=True)[0] loss = loss1 + loss2 + dx loss.backward() - self.assertTrue(np.array_equal(dx.grad, [1])) - self.assertTrue(np.array_equal(x.grad, [108])) + self.assertTrue(np.array_equal(dx.grad.numpy(), [1])) + self.assertTrue(np.array_equal(x.grad.numpy(), [108])) def test_mlp(sort_sum_gradient): fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient}) @@ -579,28 +579,34 @@ class TestImperative(unittest.TestCase): detach_x = x.detach() clear_loss = mlp2(detach_x) clear_loss.backward() - expected_weight1_grad = expected_weight1_grad + mlp2._linear1.weight.grad - expected_bias1_grad = expected_bias1_grad + mlp2._linear1.bias.grad - expected_weight2_grad = expected_weight2_grad + mlp2._linear2.weight.grad - expected_bias2_grad = expected_bias2_grad + mlp2._linear2.bias.grad + expected_weight1_grad = ( + expected_weight1_grad + mlp2._linear1.weight.grad.numpy()) + expected_bias1_grad = ( + expected_bias1_grad + mlp2._linear1.bias.grad.numpy()) + expected_weight2_grad = ( + expected_weight2_grad + mlp2._linear2.weight.grad.numpy()) + expected_bias2_grad = ( + expected_bias2_grad + mlp2._linear2.bias.grad.numpy()) loss = mlp1(x) loss.backward() - self.assertTrue(np.array_equal(loss.grad, [1])) + self.assertTrue(np.array_equal(loss.grad.numpy(), [1])) self.assertTrue( - np.allclose(mlp1._linear1.weight.grad, + np.allclose(mlp1._linear1.weight.grad.numpy(), expected_weight1_grad)) self.assertTrue( - np.allclose(mlp1._linear1.bias.grad, expected_bias1_grad)) + np.allclose(mlp1._linear1.bias.grad.numpy(), + expected_bias1_grad)) self.assertTrue( - np.allclose(mlp1._linear2.weight.grad, + np.allclose(mlp1._linear2.weight.grad.numpy(), expected_weight2_grad)) self.assertTrue( - np.allclose(mlp1._linear2.bias.grad, expected_bias2_grad)) + np.allclose(mlp1._linear2.bias.grad.numpy(), + expected_bias2_grad)) mlp2.clear_gradients() - self.assertTrue(np.array_equal(clear_loss.grad, [1])) + self.assertTrue(np.array_equal(clear_loss.grad.numpy(), [1])) if ((batch_id + 1) % 10) == 0: mlp1.clear_gradients() expected_weight1_grad = 0. diff --git a/python/paddle/fluid/tests/unittests/test_inplace.py b/python/paddle/fluid/tests/unittests/test_inplace.py index 2c6507c486e..7b9becacd82 100644 --- a/python/paddle/fluid/tests/unittests/test_inplace.py +++ b/python/paddle/fluid/tests/unittests/test_inplace.py @@ -177,7 +177,7 @@ class TestDygraphInplace(unittest.TestCase): var_d = var_c**2 loss = var_d.sum() loss.backward() - grad_var_a_inplace = var_a.grad + grad_var_a_inplace = var_a.grad.numpy() with paddle.fluid.dygraph.guard(): var_a = paddle.to_tensor(self.input_var_numpy).astype(self.dtype) @@ -188,7 +188,7 @@ class TestDygraphInplace(unittest.TestCase): var_d = var_c**2 loss = var_d.sum() loss.backward() - grad_var_a = var_a.grad + grad_var_a = var_a.grad.numpy() self.assertTrue(np.array_equal(grad_var_a_inplace, grad_var_a)) @@ -209,7 +209,7 @@ class TestDygraphInplace(unittest.TestCase): loss = var_d.sum() loss.backward() - grad_var_a_inplace = var_a.grad + grad_var_a_inplace = var_a.grad.numpy() with paddle.fluid.dygraph.guard(): var_a = paddle.to_tensor(self.input_var_numpy).astype(self.dtype) @@ -224,7 +224,7 @@ class TestDygraphInplace(unittest.TestCase): loss = var_d.sum() loss.backward() - grad_var_a = var_a.grad + grad_var_a = var_a.grad.numpy() self.assertTrue(np.array_equal(grad_var_a_inplace, grad_var_a)) diff --git a/python/paddle/fluid/tests/unittests/test_lookahead.py b/python/paddle/fluid/tests/unittests/test_lookahead.py index 98349be93db..a4b5e6d0d95 100644 --- a/python/paddle/fluid/tests/unittests/test_lookahead.py +++ b/python/paddle/fluid/tests/unittests/test_lookahead.py @@ -110,7 +110,8 @@ class TestLookAhead(unittest.TestCase): out = layer(image) loss = loss_fn(out, label) loss.backward() - fast_param = layer.bias.numpy() - SGD_LR * layer.bias.grad + fast_param = ( + layer.bias.numpy() - SGD_LR * layer.bias.grad.numpy()) opt.step() if idx == 1: slow_param = fast_param diff --git a/python/paddle/fluid/tests/unittests/test_pylayer_op.py b/python/paddle/fluid/tests/unittests/test_pylayer_op.py index f00db0b3693..565ed992bc5 100644 --- a/python/paddle/fluid/tests/unittests/test_pylayer_op.py +++ b/python/paddle/fluid/tests/unittests/test_pylayer_op.py @@ -50,7 +50,8 @@ class TestPyLayer(unittest.TestCase): z2 = paddle.tanh(input2) + paddle.tanh(input2) z2.mean().backward() - self.assertTrue(np.max(np.abs((input1.grad - input2.grad))) < 1e-10) + self.assertTrue( + np.max(np.abs((input1.grad.numpy() - input2.grad.numpy()))) < 1e-10) def test_simple_pylayer_return_none_with_no_grad(self): class tanh(PyLayer): @@ -110,7 +111,8 @@ class TestPyLayer(unittest.TestCase): z2 = paddle.tanh(input2) z2.mean().backward() - self.assertTrue(np.max(np.abs((input1.grad - input2.grad))) < 1e-10) + self.assertTrue( + np.max(np.abs((input1.grad.numpy() - input2.grad.numpy()))) < 1e-10) def test_pylayer_dtype(self): class tanh(PyLayer): diff --git a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py index 50b00ab34fd..a03e4ae4bd9 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py @@ -75,15 +75,15 @@ class TestTensorRegisterHook(unittest.TestCase): o.backward() # z.grad is not affected - self.assertTrue(np.array_equal(z.grad, w.numpy())) + self.assertTrue(np.array_equal(z.grad.numpy(), w.numpy())) # w.grad is not changed by hook - self.assertTrue(np.array_equal(w.grad, z.numpy())) + self.assertTrue(np.array_equal(w.grad.numpy(), z.numpy())) # x.grad and y.grad are changed if run hook self.assertTrue( - np.array_equal(x.grad, + np.array_equal(x.grad.numpy(), z.numpy() * 2 if not removed else z.numpy())) self.assertTrue( - np.array_equal(y.grad, + np.array_equal(y.grad.numpy(), z.numpy() * 2 if not removed else z.numpy())) def run_print_hook_for_interior_var(print_hook, removed=False): @@ -111,10 +111,10 @@ class TestTensorRegisterHook(unittest.TestCase): o.backward() # all grads are not affected - self.assertTrue(np.array_equal(z.grad, w.numpy())) - self.assertTrue(np.array_equal(w.grad, z.numpy())) - self.assertTrue(np.array_equal(x.grad, z.numpy())) - self.assertTrue(np.array_equal(y.grad, z.numpy())) + self.assertTrue(np.array_equal(z.grad.numpy(), w.numpy())) + self.assertTrue(np.array_equal(w.grad.numpy(), z.numpy())) + self.assertTrue(np.array_equal(x.grad.numpy(), z.numpy())) + self.assertTrue(np.array_equal(y.grad.numpy(), z.numpy())) def double_hook(grad): grad = grad * 2 @@ -165,12 +165,12 @@ class TestTensorRegisterHook(unittest.TestCase): o.backward() # z.grad, w.grad, x.grad is not affected - self.assertTrue(np.array_equal(z.grad, w.numpy())) - self.assertTrue(np.array_equal(w.grad, z.numpy())) - self.assertTrue(np.array_equal(x.grad, z.numpy())) + self.assertTrue(np.array_equal(z.grad.numpy(), w.numpy())) + self.assertTrue(np.array_equal(w.grad.numpy(), z.numpy())) + self.assertTrue(np.array_equal(x.grad.numpy(), z.numpy())) # y.grad are changed if run hook self.assertTrue( - np.array_equal(y.grad, + np.array_equal(y.grad.numpy(), z.numpy() * 2 if not removed else z.numpy())) # register hook @@ -217,14 +217,14 @@ class TestTensorRegisterHook(unittest.TestCase): base_grad = np.array([5., 9., 13., 19.]) # x.grad is not changed - self.assertTrue(np.array_equal(x.grad, base_grad)) + self.assertTrue(np.array_equal(x.grad.numpy(), base_grad)) # b.grad is changed by x.hook self.assertTrue( - np.array_equal(b.grad, base_grad * 2 + np.array_equal(b.grad.numpy(), base_grad * 2 if not removed else base_grad)) # a.grad is changed by x.hook and a.hook self.assertTrue( - np.array_equal(a.grad, base_grad * 4 + np.array_equal(a.grad.numpy(), base_grad * 4 if not removed else base_grad)) # register hook @@ -265,7 +265,7 @@ class TestTensorRegisterHook(unittest.TestCase): base_grad = np.array([5., 9., 13., 19.]) # x.grad is changed by x.hook self.assertTrue( - np.array_equal(x.grad, base_grad * 2 + np.array_equal(x.grad.numpy(), base_grad * 2 if not removed else base_grad)) # register hook @@ -294,7 +294,8 @@ class TestTensorRegisterHook(unittest.TestCase): loss = loss_fn(out, label) loss.backward() - return ret1.grad, net.linear1.weight.grad, net.linear1.bias.grad + return (ret1.grad.numpy(), net.linear1.weight.grad.numpy(), + net.linear1.bias.grad.numpy()) data = np.random.uniform( size=[self.batch_size, self.in_size]).astype('float32') @@ -355,7 +356,7 @@ class TestTensorRegisterHook(unittest.TestCase): o.backward() - return z.numpy(), w.grad, x.grad, y.grad + return z.numpy(), w.grad.numpy(), x.grad.numpy(), y.grad.numpy() def double_hook(grad): return grad * 2 @@ -428,7 +429,7 @@ class TestTensorRegisterHook(unittest.TestCase): # after changed by hook: 8.0 z.backward() - self.assertTrue(np.array_equal(x.grad, np.array([8.]))) + self.assertTrue(np.array_equal(x.grad.numpy(), np.array([8.]))) def test_remove_one_hook_multiple_times(self): for device in self.devices: diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index 7901df79171..a65308c84e7 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -65,7 +65,8 @@ class TestVarBase(unittest.TestCase): y = clone_x**2 y.backward() self.assertTrue( - np.array_equal(x.grad, np.array([2.4]).astype('float32'))) + np.array_equal(x.grad.numpy(), + np.array([2.4]).astype('float32'))) y = x.cpu() self.assertEqual(y.place.__repr__(), "CPUPlace") if core.is_compiled_with_cuda(): @@ -260,14 +261,14 @@ class TestVarBase(unittest.TestCase): y = x**2 y.backward() - self.assertTrue(np.array_equal(x.grad, [20.0])) + self.assertTrue(np.array_equal(x.grad.numpy(), [20.0])) self.assertEqual(detach_x.grad, None) detach_x.stop_gradient = False # Set stop_gradient to be False, supported auto-grad z = 3 * detach_x**2 z.backward() - self.assertTrue(np.array_equal(x.grad, [20.0])) - self.assertTrue(np.array_equal(detach_x.grad, [60.0])) + self.assertTrue(np.array_equal(x.grad.numpy(), [20.0])) + self.assertTrue(np.array_equal(detach_x.grad.numpy(), [60.0])) # Due to sharing of data with origin Tensor, There are some unsafe operations: with self.assertRaises(RuntimeError): -- GitLab From 400c3aa733a43f8e5ce6ff4ce88f312e9909ca99 Mon Sep 17 00:00:00 2001 From: xiemoyuan <71377852+xiemoyuan@users.noreply.github.com> Date: Mon, 26 Apr 2021 15:32:13 +0800 Subject: [PATCH 002/720] [2.1 API] Modified params of some APIs to support tuple and list. (#32528) * Modified params of some APIs to support tuple and list. * fixed bug. --- python/paddle/distribution.py | 25 ++-- python/paddle/fluid/backward.py | 28 ++-- python/paddle/fluid/dygraph/container.py | 4 +- python/paddle/fluid/dygraph/jit.py | 6 +- .../tests/unittests/test_distribution.py | 127 ++++++++++++++++++ .../fluid/tests/unittests/test_dropout_op.py | 18 ++- .../test_imperative_container_sequential.py | 35 +++++ .../tests/unittests/test_initializer_nn.py | 12 ++ .../tests/unittests/test_jit_save_load.py | 52 +++++++ python/paddle/hapi/model.py | 15 ++- python/paddle/nn/functional/common.py | 8 +- python/paddle/nn/initializer/assign.py | 8 +- python/paddle/nn/layer/common.py | 4 +- python/paddle/tests/test_model.py | 53 ++++++++ python/paddle/tests/test_transforms.py | 12 ++ python/paddle/vision/transforms/functional.py | 9 +- 16 files changed, 364 insertions(+), 52 deletions(-) diff --git a/python/paddle/distribution.py b/python/paddle/distribution.py index 7f0d71e3877..d866f74b0e8 100644 --- a/python/paddle/distribution.py +++ b/python/paddle/distribution.py @@ -105,7 +105,7 @@ class Distribution(object): for arg in args: if isinstance(arg, float): arg = [arg] - if not isinstance(arg, (list, np.ndarray, tensor.Variable)): + if not isinstance(arg, (list, tuple, np.ndarray, tensor.Variable)): raise TypeError( "Type of input args must be float, list, numpy.ndarray or Tensor, but received type {}". format(type(arg))) @@ -190,8 +190,8 @@ class Uniform(Distribution): [broadcasting](https://www.paddlepaddle.org.cn/documentation/docs/en/develop/beginners_guide/basic_concept/broadcasting_en.html) (e.g., `high - low` is a valid operation). Args: - low(int|float|list|numpy.ndarray|Tensor): The lower boundary of uniform distribution.The data type is int, float, list, numpy.ndarray or Tensor - high(int|float|list|numpy.ndarray|Tensor): The higher boundary of uniform distribution.The data type is int, float, list, numpy.ndarray or Tensor + low(int|float|list|tuple|numpy.ndarray|Tensor): The lower boundary of uniform distribution.The data type is int, float, list, numpy.ndarray or Tensor + high(int|float|list|tuple|numpy.ndarray|Tensor): The higher boundary of uniform distribution.The data type is int, float, list, numpy.ndarray or Tensor name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Examples: @@ -229,10 +229,10 @@ class Uniform(Distribution): def __init__(self, low, high, name=None): if not in_dygraph_mode(): check_type(low, 'low', - (int, float, np.ndarray, tensor.Variable, list), + (int, float, np.ndarray, tensor.Variable, list, tuple), 'Uniform') check_type(high, 'high', - (int, float, np.ndarray, tensor.Variable, list), + (int, float, np.ndarray, tensor.Variable, list, tuple), 'Uniform') self.all_arg_is_float = False @@ -409,8 +409,8 @@ class Normal(Distribution): * :math:`Z`: is the normalization constant. Args: - loc(int|float|list|numpy.ndarray|Tensor): The mean of normal distribution.The data type is int, float, list, numpy.ndarray or Tensor. - scale(int|float|list|numpy.ndarray|Tensor): The std of normal distribution.The data type is int, float, list, numpy.ndarray or Tensor. + loc(int|float|list|tuple|numpy.ndarray|Tensor): The mean of normal distribution.The data type is int, float, list, numpy.ndarray or Tensor. + scale(int|float|list|tuple|numpy.ndarray|Tensor): The std of normal distribution.The data type is int, float, list, numpy.ndarray or Tensor. name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Examples: @@ -451,10 +451,10 @@ class Normal(Distribution): def __init__(self, loc, scale, name=None): if not in_dygraph_mode(): check_type(loc, 'loc', - (int, float, np.ndarray, tensor.Variable, list), + (int, float, np.ndarray, tensor.Variable, list, tuple), 'Normal') check_type(scale, 'scale', - (int, float, np.ndarray, tensor.Variable, list), + (int, float, np.ndarray, tensor.Variable, list, tuple), 'Normal') self.batch_size_unknown = False @@ -655,7 +655,7 @@ class Categorical(Distribution): * :math:`[x=i]` : it evaluates to 1 if :math:`x==i` , 0 otherwise. Args: - logits(list|numpy.ndarray|Tensor): The logits input of categorical distribution. The data type is float32 or float64. + logits(list|tuple|numpy.ndarray|Tensor): The logits input of categorical distribution. The data type is float32 or float64. name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Examples: @@ -702,11 +702,12 @@ class Categorical(Distribution): def __init__(self, logits, name=None): """ Args: - logits(list|numpy.ndarray|Tensor): The logits input of categorical distribution. The data type is float32 or float64. + logits(list|tuple|numpy.ndarray|Tensor): The logits input of categorical distribution. The data type is float32 or float64. name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. """ if not in_dygraph_mode(): - check_type(logits, 'logits', (np.ndarray, tensor.Variable, list), + check_type(logits, 'logits', + (np.ndarray, tensor.Variable, list, tuple), 'Categorical') self.name = name if name is not None else 'Categorical' diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 572ebb26d73..25412a86a8b 100755 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -1036,7 +1036,7 @@ def _append_backward_ops_(block, val(list) the op path of block(index) """ if callbacks is not None: - assert (isinstance(callbacks, list)) + assert (isinstance(callbacks, (list, tuple))) for cb in callbacks: if not hasattr(cb, '__call__'): raise ValueError("'callback' must be a callable object.") @@ -1157,7 +1157,7 @@ def _append_backward_ops_(block, new_op_desc._set_attr(op_role_attr_name, backward) grad_to_var["__current_op_desc__"] = new_op_desc if callbacks is not None: - assert (isinstance(callbacks, list)) + assert (isinstance(callbacks, (list, tuple))) for cb in callbacks: cb(block=target_block, context=grad_to_var) @@ -1380,7 +1380,7 @@ def append_backward(loss, Parameters: loss(Tensor): The loss Tensor of the network. - parameter_list(list[Tensor|str], optional): List of Parameters or Parameter.names + parameter_list(list[Tensor|str]|tuple[Tensor|str], optional): List/Tuple of Parameters or Parameter.names that need to be updated by optimizers. If it is None, all parameters will be updated. @@ -1391,7 +1391,7 @@ def append_backward(loss, be automatically added into this set. If this parameter is not None, the Tensors or Tensor.names in this set will be added to the default set. Default: None. - callbacks(list[callable object], optional): List of callback functions. + callbacks(list[callable object]|tuple[callable object], optional): List/Tuple of callback functions. The callbacks are used for doing some custom jobs during backward part building. All @@ -1477,7 +1477,7 @@ def append_backward(loss, int(core.op_proto_and_checker_maker.OpRole.Loss)) if callbacks is not None: - check_type(callbacks, 'callbacks', list, + check_type(callbacks, 'callbacks', (list, tuple), 'paddle.static.append_backward') program = loss.block.program @@ -1823,9 +1823,9 @@ def calc_gradient(targets, inputs, target_gradients=None, no_grad_set=None): Backpropagate the gradients of targets to inputs. Args: - targets(Tensor|list[Tensor]): The target Tensors - inputs(Tensor|list[Tensor]): The input Tensors - target_gradients (Tensor|list[Tensor], optional): The gradient Tensors + targets(Tensor|list[Tensor]|tuple[Tensor]): The target Tensors + inputs(Tensor|list[Tensor]|tuple[Tensor]): The input Tensors + target_gradients (Tensor|list[Tensor]|tuple[Tensor], optional): The gradient Tensors of targets which has the same shape with targets, If None, ones will be created for them. no_grad_set(set[Tensor|str], optional): Set of Tensors or Tensor.names in the :ref:`api_guide_Block_en` 0 whose gradients @@ -1962,9 +1962,9 @@ def gradients(targets, inputs, target_gradients=None, no_grad_set=None): Backpropagate the gradients of targets to inputs. Args: - targets (Tensor|list[Tensor]): The target Tensors. - inputs (Tensor|list[Tensor]): The input Tensors. - target_gradients (Tensor|list[Tensor], optional): The gradient Tensor + targets (Tensor|list[Tensor]|tuple[Tensor]): The target Tensors. + inputs (Tensor|list[Tensor]|tuple[Tensor]): The input Tensors. + target_gradients (Tensor|list[Tensor]|tuple[Tensor], optional): The gradient Tensor of targets which has the same shape with targets, If None, ones will be created for them. no_grad_set (set[Tensor|str], optional): Set of Tensors or Tensor.names in the :ref:`api_guide_Block_en` 0 whose gradients @@ -1992,12 +1992,12 @@ def gradients(targets, inputs, target_gradients=None, no_grad_set=None): z = paddle.static.gradients([y], x) print(z) # [var x@GRAD : fluid.VarType.LOD_TENSOR.shape(-1L, 2L, 8L, 8L).astype(VarType.FP32)] """ - check_type(targets, 'targets', (framework.Variable, list), + check_type(targets, 'targets', (framework.Variable, list, tuple), 'paddle.static.gradients') - check_type(inputs, 'inputs', (framework.Variable, list), + check_type(inputs, 'inputs', (framework.Variable, list, tuple), 'paddle.static.gradients') check_type(target_gradients, 'target_gradients', ( - framework.Variable, list, type(None)), 'paddle.static.gradients') + framework.Variable, list, tuple, type(None)), 'paddle.static.gradients') outs = calc_gradient(targets, inputs, target_gradients, no_grad_set) return _as_list(outs) diff --git a/python/paddle/fluid/dygraph/container.py b/python/paddle/fluid/dygraph/container.py index 345b71d8999..c7ea412fec1 100644 --- a/python/paddle/fluid/dygraph/container.py +++ b/python/paddle/fluid/dygraph/container.py @@ -29,7 +29,7 @@ class Sequential(Layer): The argument passed to the constructor can be iterable Layers or iterable name Layer pairs. Parameters: - *layers(tuple): Layers or iterable name Layer pairs. + layers(Layer|list|tuple): Layer or list/tuple of iterable name Layer pair. Examples: .. code-block:: python @@ -59,7 +59,7 @@ class Sequential(Layer): def __init__(self, *layers): super(Sequential, self).__init__() - if len(layers) > 0 and isinstance(layers[0], tuple): + if len(layers) > 0 and isinstance(layers[0], (list, tuple)): for name, layer in layers: self.add_sublayer(name, layer) else: diff --git a/python/paddle/fluid/dygraph/jit.py b/python/paddle/fluid/dygraph/jit.py index 40ab19184c9..4c7c7b17eb1 100644 --- a/python/paddle/fluid/dygraph/jit.py +++ b/python/paddle/fluid/dygraph/jit.py @@ -168,7 +168,7 @@ def declarative(function=None, input_spec=None): Args: function (callable): callable imperative function. - input_spec(list[InputSpec]): list of InputSpec to specific the shape/dtype/name + input_spec(list[InputSpec]|tuple[InputSpec]): list/tuple of InputSpec to specific the shape/dtype/name information of each input Tensor. Returns: @@ -525,7 +525,7 @@ def save(layer, path, input_spec=None, **configs): Args: layer (Layer): The Layer to be saved. path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``. - input_spec (list[InputSpec|Tensor], optional): Describes the input of the saved model's forward + input_spec (list[InputSpec|Tensor]|tuple[InputSpec|Tensor], optional): Describes the input of the saved model's forward method, which can be described by InputSpec or example Tensor. If None, all input variables of the original Layer's forward method would be the inputs of the saved model. Default None. **configs (dict, optional): Other save configuration options for compatibility. We do not @@ -654,7 +654,7 @@ def save(layer, path, input_spec=None, **configs): raise ValueError( "If there are static functions other than 'forward' that need to be saved, the input 'input_spec' should be None, but received the type of 'input_spec' is %s." % type(input_spec)) - if not isinstance(input_spec, list): + if not isinstance(input_spec, (list, tuple)): raise TypeError( "The input input_spec should be 'list', but received input_spec's type is %s." % type(input_spec)) diff --git a/python/paddle/fluid/tests/unittests/test_distribution.py b/python/paddle/fluid/tests/unittests/test_distribution.py index d5790811df9..f1c12c90490 100644 --- a/python/paddle/fluid/tests/unittests/test_distribution.py +++ b/python/paddle/fluid/tests/unittests/test_distribution.py @@ -301,6 +301,41 @@ class UniformTest9(UniformTest): name='values', shape=[dims], dtype='float32') +class UniformTest10(UniformTest): + def init_numpy_data(self, batch_size, dims): + # low and high are list. + self.low_np = np.random.randn(batch_size, + dims).astype('float32').tolist() + self.high_np = np.random.uniform( + 5.0, 15.0, (batch_size, dims)).astype('float32').tolist() + self.values_np = np.random.randn(batch_size, dims).astype('float32') + + def init_static_data(self, batch_size, dims): + self.static_low = self.low_np + self.static_high = self.high_np + with fluid.program_guard(self.test_program): + self.static_values = layers.data( + name='values', shape=[dims], dtype='float32') + + +class UniformTest11(UniformTest): + def init_numpy_data(self, batch_size, dims): + # low and high are tuple. + self.low_np = tuple( + np.random.randn(batch_size, dims).astype('float32').tolist()) + self.high_np = tuple( + np.random.uniform(5.0, 15.0, (batch_size, dims)).astype('float32') + .tolist()) + self.values_np = np.random.randn(batch_size, dims).astype('float32') + + def init_static_data(self, batch_size, dims): + self.static_low = self.low_np + self.static_high = self.high_np + with fluid.program_guard(self.test_program): + self.static_values = layers.data( + name='values', shape=[dims], dtype='float32') + + class NormalNumpy(DistributionNumpy): def __init__(self, loc, scale): self.loc = np.array(loc) @@ -673,6 +708,66 @@ class NormalTest8(NormalTest): name='other_scale', shape=[dims], dtype='float64') +class NormalTest9(NormalTest): + def init_numpy_data(self, batch_size, dims): + # loc and scale are list. + self.loc_np = np.random.randn(batch_size, + dims).astype('float32').tolist() + self.scale_np = np.random.randn(batch_size, dims).astype('float32') + while not np.all(self.scale_np > 0): + self.scale_np = np.random.randn(batch_size, dims).astype('float32') + self.scale_np = self.scale_np.tolist() + self.values_np = np.random.randn(batch_size, dims).astype('float32') + # used to construct another Normal object to calculate kl_divergence + self.other_loc_np = np.random.randn(batch_size, + dims).astype('float32').tolist() + self.other_scale_np = np.random.randn(batch_size, + dims).astype('float32') + while not np.all(self.other_scale_np > 0): + self.other_scale_np = np.random.randn(batch_size, + dims).astype('float32') + self.other_scale_np = self.other_scale_np.tolist() + + def init_static_data(self, batch_size, dims): + self.static_loc = self.loc_np + self.static_scale = self.scale_np + self.static_other_loc = self.other_loc_np + self.static_other_scale = self.other_scale_np + with fluid.program_guard(self.test_program): + self.static_values = layers.data( + name='values', shape=[dims], dtype='float32') + + +class NormalTest10(NormalTest): + def init_numpy_data(self, batch_size, dims): + # loc and scale are tuple. + self.loc_np = tuple( + np.random.randn(batch_size, dims).astype('float32').tolist()) + self.scale_np = np.random.randn(batch_size, dims).astype('float32') + while not np.all(self.scale_np > 0): + self.scale_np = np.random.randn(batch_size, dims).astype('float32') + self.scale_np = tuple(self.scale_np.tolist()) + self.values_np = np.random.randn(batch_size, dims).astype('float32') + # used to construct another Normal object to calculate kl_divergence + self.other_loc_np = tuple( + np.random.randn(batch_size, dims).astype('float32').tolist()) + self.other_scale_np = np.random.randn(batch_size, + dims).astype('float32') + while not np.all(self.other_scale_np > 0): + self.other_scale_np = np.random.randn(batch_size, + dims).astype('float32') + self.other_scale_np = tuple(self.other_scale_np.tolist()) + + def init_static_data(self, batch_size, dims): + self.static_loc = self.loc_np + self.static_scale = self.scale_np + self.static_other_loc = self.other_loc_np + self.static_other_scale = self.other_scale_np + with fluid.program_guard(self.test_program): + self.static_values = layers.data( + name='values', shape=[dims], dtype='float32') + + class CategoricalNumpy(DistributionNumpy): def __init__(self, logits): self.logits = np.array(logits).astype('float32') @@ -961,6 +1056,38 @@ class CategoricalTest7(CategoricalTest): return np_probs +class CategoricalTest8(CategoricalTest): + def init_dynamic_data(self, batch_size, dims): + # input logtis is 2-D list + # value used in probs and log_prob method is 1-D Tensor + self.logits = self.logits_np.tolist() + self.other_logits = self.other_logits_np.tolist() + self.value = paddle.to_tensor(self.value_np) + + def init_static_data(self, batch_size, dims): + with fluid.program_guard(self.test_program): + self.logits_static = self.logits_np.tolist() + self.other_logits_static = self.other_logits_np.tolist() + self.value_static = fluid.data( + name='value', shape=self.value_shape, dtype='int64') + + +class CategoricalTest9(CategoricalTest): + def init_dynamic_data(self, batch_size, dims): + # input logtis is 2-D tuple + # value used in probs and log_prob method is 1-D Tensor + self.logits = tuple(self.logits_np.tolist()) + self.other_logits = tuple(self.other_logits_np.tolist()) + self.value = paddle.to_tensor(self.value_np) + + def init_static_data(self, batch_size, dims): + with fluid.program_guard(self.test_program): + self.logits_static = tuple(self.logits_np.tolist()) + self.other_logits_static = tuple(self.other_logits_np.tolist()) + self.value_static = fluid.data( + name='value', shape=self.value_shape, dtype='int64') + + class DistributionTestError(unittest.TestCase): def test_distribution_error(self): distribution = Distribution() diff --git a/python/paddle/fluid/tests/unittests/test_dropout_op.py b/python/paddle/fluid/tests/unittests/test_dropout_op.py index ba2abd72500..89755d0365f 100644 --- a/python/paddle/fluid/tests/unittests/test_dropout_op.py +++ b/python/paddle/fluid/tests/unittests/test_dropout_op.py @@ -303,6 +303,12 @@ class TestDropoutFAPI(unittest.TestCase): mode='downscale_in_infer') res10 = paddle.nn.functional.dropout(x=input, p=1., training=True) res11 = paddle.fluid.layers.dropout(x=input, dropout_prob=0.) + res12 = paddle.nn.functional.dropout( + x=input, + p=0., + axis=(0, 1), + training=False, + mode='upscale_in_train') in_np = np.random.random([40, 40]).astype("float32") res_np = in_np @@ -310,7 +316,8 @@ class TestDropoutFAPI(unittest.TestCase): exe = fluid.Executor(place) res_list = [ - res1, res2, res3, res4, res5, res6, res7, res8, res9, res11 + res1, res2, res3, res4, res5, res6, res7, res8, res9, res11, + res12 ] for res in res_list: fetches = exe.run(fluid.default_main_program(), @@ -388,9 +395,16 @@ class TestDropoutFAPI(unittest.TestCase): x=input, p=1., training=True) dropout = paddle.fluid.dygraph.Dropout(p=0, ) res11 = dropout(input) + res12 = paddle.nn.functional.dropout( + x=input, + p=0., + axis=(0, 1), + training=False, + mode='upscale_in_train') res_list = [ - res1, res2, res3, res4, res5, res6, res7, res8, res9, res11 + res1, res2, res3, res4, res5, res6, res7, res8, res9, res11, + res12 ] for res in res_list: self.assertTrue(np.allclose(res.numpy(), res_np)) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py b/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py index 846c84c8a58..972f1b64e14 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py @@ -55,6 +55,41 @@ class TestImperativeContainerSequential(unittest.TestCase): loss2 = fluid.layers.reduce_mean(res2) loss2.backward() + def test_sequential_list_params(self): + data = np.random.uniform(-1, 1, [5, 10]).astype('float32') + with fluid.dygraph.guard(): + data = fluid.dygraph.to_variable(data) + model1 = fluid.dygraph.Sequential( + fluid.Linear(10, 1), fluid.Linear(1, 2)) + res1 = model1(data) + self.assertListEqual(res1.shape, [5, 2]) + model1[1] = fluid.Linear(1, 3) + res1 = model1(data) + self.assertListEqual(res1.shape, [5, 3]) + loss1 = fluid.layers.reduce_mean(res1) + loss1.backward() + + l1 = fluid.Linear(10, 1) + l2 = fluid.Linear(1, 3) + model2 = fluid.dygraph.Sequential(['l1', l1], ['l2', l2]) + self.assertEqual(len(model2), 2) + res2 = model2(data) + self.assertTrue(l1 is model2.l1) + self.assertListEqual(res2.shape, res1.shape) + self.assertEqual(len(model1.parameters()), len(model2.parameters())) + del model2['l2'] + self.assertEqual(len(model2), 1) + res2 = model2(data) + self.assertListEqual(res2.shape, [5, 1]) + model2.add_sublayer('l3', fluid.Linear(1, 3)) + model2.add_sublayer('l4', fluid.Linear(3, 4)) + self.assertEqual(len(model2), 3) + res2 = model2(data) + self.assertListEqual(res2.shape, [5, 4]) + + loss2 = fluid.layers.reduce_mean(res2) + loss2.backward() + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_initializer_nn.py b/python/paddle/fluid/tests/unittests/test_initializer_nn.py index 08ec516ba95..9ec78366226 100644 --- a/python/paddle/fluid/tests/unittests/test_initializer_nn.py +++ b/python/paddle/fluid/tests/unittests/test_initializer_nn.py @@ -718,6 +718,18 @@ class TestAssign(unittest.TestCase): self.assertTrue((linear_3.weight.numpy() == [2.0, 2.0]).all(), '') + def test_assign_initializer_dygraph_4(self): + """Test assign initializer in dygraph model. + """ + paddle.disable_static() + + weight_attr_4 = paddle.framework.ParamAttr( + name="linear_weight_4", + initializer=paddle.nn.initializer.Assign((2, 2))) + linear_4 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_4) + + self.assertTrue((linear_4.weight.numpy() == [2.0, 2.0]).all(), '') + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_jit_save_load.py b/python/paddle/fluid/tests/unittests/test_jit_save_load.py index bf9912c89cb..16adcb8f241 100644 --- a/python/paddle/fluid/tests/unittests/test_jit_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_jit_save_load.py @@ -158,6 +158,22 @@ class LinearNetMultiInput(fluid.dygraph.Layer): return x_out, y_out, loss +class LinearNetMultiInput1(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): + super(LinearNetMultiInput1, self).__init__() + self._linear1 = Linear(in_size, out_size) + self._linear2 = Linear(in_size, out_size) + + @declarative(input_spec=(InputSpec( + [None, 8], dtype='float32'), InputSpec( + [None, 8], dtype='float32'))) + def forward(self, x, y): + x_out = self._linear1(x) + y_out = self._linear2(y) + loss = fluid.layers.mean(x_out + y_out) + return x_out, y_out, loss + + class MultiLoadingLinearNet(fluid.dygraph.Layer): def __init__(self, size, model_path): super(MultiLoadingLinearNet, self).__init__() @@ -542,6 +558,42 @@ class TestSaveLoadWithInputSpec(unittest.TestCase): # 4. assert pred_x == pred_xx self.assertTrue(np.allclose(pred_x.numpy(), pred_xx.numpy())) + def test_multi_in_out1(self): + net = LinearNetMultiInput1(8, 8) + + model_path = "multi_inout1.output_spec1/model" + # 1. check inputs and outputs + self.assertTrue(len(net.forward.inputs) == 2) + input_x = net.forward.inputs[0] + input_y = net.forward.inputs[1] + self.assertTrue(input_x.shape == (-1, 8)) + self.assertTrue(input_y.shape == (-1, 8)) + + # 2. prune loss + output_spec = net.forward.outputs[:2] + paddle.jit.save(net, model_path, output_spec=output_spec) + + # 3. load to infer + infer_layer = paddle.jit.load(model_path) + x = fluid.dygraph.to_variable( + np.random.random((4, 8)).astype('float32')) + y = fluid.dygraph.to_variable( + np.random.random((4, 8)).astype('float32')) + # 4. predict + pred_x, pred_y = infer_layer(x, y) + + # 1. prune y and loss + model_path = "multi_inout1.output_spec2/model" + output_spec = net.forward.outputs[:1] + paddle.jit.save(net, model_path, (input_x, ), output_spec=output_spec) + # 2. load again + infer_layer2 = paddle.jit.load(model_path) + # 3. predict + pred_xx = infer_layer2(x) + + # 4. assert pred_x == pred_xx + self.assertTrue(np.allclose(pred_x.numpy(), pred_xx.numpy())) + class TestJitSaveLoadConfig(unittest.TestCase): def setUp(self): diff --git a/python/paddle/hapi/model.py b/python/paddle/hapi/model.py index 6cd879c388c..5a33d5b58dc 100644 --- a/python/paddle/hapi/model.py +++ b/python/paddle/hapi/model.py @@ -236,7 +236,7 @@ def _update_input_info(inputs): if isinstance(inputs, Input): shapes = [list(inputs.shape)] dtypes = [inputs.dtype] - elif isinstance(inputs, list): + elif isinstance(inputs, (list, tuple)): shapes = [list(input.shape) for input in inputs] dtypes = [input.dtype for input in inputs] elif isinstance(inputs, dict): @@ -895,12 +895,12 @@ class Model(object): Args: network (paddle.nn.Layer): The network is an instance of paddle.nn.Layer. - inputs (InputSpec|list|dict|None): `inputs`, entry points of network, - could be a InputSpec instance, or lits of InputSpec instances, + inputs (InputSpec|list|tuple|dict|None): `inputs`, entry points of network, + could be a InputSpec instance, or list/tuple of InputSpec instances, or dict ({name: InputSpec}), and it couldn't be None in static graph. - labels (InputSpec|list|None): `labels`, entry points of network, - could be a InputSpec instnace or lits of InputSpec instances, + labels (InputSpec|list|tuple|None): `labels`, entry points of network, + could be a InputSpec instnace or list/tuple of InputSpec instances, or None. For static graph, if labels is required in loss, labels must be set. Otherwise, it could be None. @@ -994,9 +994,10 @@ class Model(object): self.stop_training = False if not in_dygraph_mode(): - if not isinstance(inputs, (list, dict, Input)): + if not isinstance(inputs, (list, tuple, dict, Input)): raise TypeError( - "'inputs' must be list or dict, and couldn't be None.") + "'inputs' must be list or tuple or dict, and couldn't be None." + ) elif inputs: self._input_info = _update_input_info(inputs) diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index 0859d05af1c..5e8dc15cb4a 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -764,8 +764,8 @@ def dropout(x, Args: x (Tensor): The input tensor. The data type is float32 or float64. - p (float | int): Probability of setting units to zero. Default 0.5. - axis (int | list): The axis along which the dropout is performed. Default None. + p (float|int): Probability of setting units to zero. Default 0.5. + axis (int|list|tuple): The axis along which the dropout is performed. Default None. training (bool): A flag indicating whether it is in train phrase or not. Default True. mode(str): ['upscale_in_train'(default) | 'downscale_in_infer']. @@ -896,7 +896,7 @@ def dropout(x, if mode not in ('downscale_in_infer', 'upscale_in_train'): raise ValueError( "mode argument should be 'downscale_in_infer' or 'upscale_in_train'") - if axis and not isinstance(axis, (int, list)): + if axis and not isinstance(axis, (int, list, tuple)): raise TypeError("datatype of axis argument should be int or list") if axis == None: # commonly used dropout @@ -955,7 +955,7 @@ def dropout(x, #get mask shape input_shape = x.shape - drop_axes = [axis] if isinstance(axis, int) else axis + drop_axes = [axis] if isinstance(axis, int) else list(axis) if min(drop_axes) < 0 or max(drop_axes) > len(input_shape) - 1: raise ValueError("axis value should be greater than or equal to 0 and less than dimensions of x:{}, but get axis value:{} " \ .format(len(input_shape), max(drop_axes))) diff --git a/python/paddle/nn/initializer/assign.py b/python/paddle/nn/initializer/assign.py index a33301230e8..94c4ddc1938 100644 --- a/python/paddle/nn/initializer/assign.py +++ b/python/paddle/nn/initializer/assign.py @@ -26,7 +26,7 @@ class Assign(NumpyArrayInitializer): """Init an parameter with a numpy array, list, or tensor. Args: - value (Tensor|numpy.ndarray|list): numpy array, list, or tensor to initialize the parameter. + value (Tensor|numpy.ndarray|list|tuple): numpy array, list, tuple, or tensor to initialize the parameter. name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. @@ -87,10 +87,10 @@ class Assign(NumpyArrayInitializer): def __init__(self, value, name=None): import numpy - check_type(value, 'value', (numpy.ndarray, list, framework.Variable), - 'Assign') + check_type(value, 'value', + (numpy.ndarray, list, tuple, framework.Variable), 'Assign') - if (isinstance(value, list)): + if (isinstance(value, (list, tuple))): value = numpy.array(value) # TODO: value is already is a tensor, accounting efficiency maybe it does not need to convert tensor to numpy data and then initialized. diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py index 2f71e5470fd..db0a5a5cab3 100644 --- a/python/paddle/nn/layer/common.py +++ b/python/paddle/nn/layer/common.py @@ -680,8 +680,8 @@ class Dropout(layers.Layer): In dygraph mode, please use ``eval()`` to switch to evaluation mode, where dropout is disabled. Parameters: - p (float | int): Probability of setting units to zero. Default: 0.5 - axis (int | list): The axis along which the dropout is performed. Default None. + p (float|int): Probability of setting units to zero. Default: 0.5 + axis (int|list|tuple): The axis along which the dropout is performed. Default None. mode(str, optional): ['upscale_in_train'(default) | 'downscale_in_infer'] 1. upscale_in_train(default), upscale the output at training time diff --git a/python/paddle/tests/test_model.py b/python/paddle/tests/test_model.py index 10ceb487969..ae574a8241b 100644 --- a/python/paddle/tests/test_model.py +++ b/python/paddle/tests/test_model.py @@ -172,6 +172,12 @@ class TestModel(unittest.TestCase): def test_fit_static(self): self.fit(False) + def test_fit_dynamic_with_tuple_input(self): + self.fit_with_tuple_input(True) + + def test_fit_static_with_tuple_input(self): + self.fit_with_tuple_input(False) + def test_fit_dynamic_with_rank(self): self.fit(True, 2, 0) @@ -240,6 +246,53 @@ class TestModel(unittest.TestCase): model.fit(train_loader, val_loader) fluid.disable_dygraph() if dynamic else None + def fit_with_tuple_input(self, dynamic, num_replicas=None, rank=None): + fluid.enable_dygraph(self.device) if dynamic else None + seed = 333 + paddle.seed(seed) + paddle.framework.random._manual_program_seed(seed) + + net = LeNet() + optim_new = fluid.optimizer.Adam( + learning_rate=0.001, parameter_list=net.parameters()) + model = Model(net, inputs=tuple(self.inputs), labels=tuple(self.labels)) + model.prepare( + optim_new, + loss=CrossEntropyLoss(reduction="sum"), + metrics=Accuracy()) + model.fit(self.train_dataset, batch_size=64, shuffle=False) + + result = model.evaluate(self.val_dataset, batch_size=64) + np.testing.assert_allclose(result['acc'], self.acc1) + + train_sampler = DistributedBatchSampler( + self.train_dataset, + batch_size=64, + shuffle=False, + num_replicas=num_replicas, + rank=rank) + val_sampler = DistributedBatchSampler( + self.val_dataset, + batch_size=64, + shuffle=False, + num_replicas=num_replicas, + rank=rank) + + train_loader = fluid.io.DataLoader( + self.train_dataset, + batch_sampler=train_sampler, + places=self.device, + return_list=True) + + val_loader = fluid.io.DataLoader( + self.val_dataset, + batch_sampler=val_sampler, + places=self.device, + return_list=True) + + model.fit(train_loader, val_loader) + fluid.disable_dygraph() if dynamic else None + def evaluate(self, dynamic): fluid.enable_dygraph(self.device) if dynamic else None model = Model(LeNet(), self.inputs, self.labels) diff --git a/python/paddle/tests/test_transforms.py b/python/paddle/tests/test_transforms.py index 47977bdf535..5086a12d945 100644 --- a/python/paddle/tests/test_transforms.py +++ b/python/paddle/tests/test_transforms.py @@ -454,6 +454,18 @@ class TestFunctional(unittest.TestCase): np.testing.assert_equal(rotated_np_img.shape, np.array(rotated_pil_img).shape) + def test_rotate1(self): + np_img = (np.random.rand(28, 28, 3) * 255).astype('uint8') + pil_img = Image.fromarray(np_img).convert('RGB') + + rotated_np_img = F.rotate( + np_img, 80, expand=True, center=[0, 0], fill=[0, 0, 0]) + rotated_pil_img = F.rotate( + pil_img, 80, expand=True, center=[0, 0], fill=[0, 0, 0]) + + np.testing.assert_equal(rotated_np_img.shape, + np.array(rotated_pil_img).shape) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/vision/transforms/functional.py b/python/paddle/vision/transforms/functional.py index da90e4907e4..c65c2423d13 100644 --- a/python/paddle/vision/transforms/functional.py +++ b/python/paddle/vision/transforms/functional.py @@ -538,10 +538,10 @@ def rotate(img, If true, expands the output image to make it large enough to hold the entire rotated image. If false or omitted, make the output image the same size as the input image. Note that the expand flag assumes rotation around the center and no translation. - center (2-tuple, optional): Optional center of rotation. + center (2-list|2-tuple, optional): Optional center of rotation. Origin is the upper left corner. Default is the center of the image. - fill (3-tuple or int): RGB pixel fill value for area outside the rotated image. + fill (3-list|3-tuple or int): RGB pixel fill value for area outside the rotated image. If int, it is used for all channels respectively. @@ -568,6 +568,11 @@ def rotate(img, 'img should be PIL Image or ndarray with dim=[2 or 3]. Got {}'. format(type(img))) + if isinstance(center, list): + center = tuple(center) + if isinstance(fill, list): + fill = tuple(fill) + if _is_pil_image(img): return F_pil.rotate(img, angle, interpolation, expand, center, fill) else: -- GitLab From 78908b4ba536918766f0a5a9b4d0bc1e9edbe2aa Mon Sep 17 00:00:00 2001 From: zhiboniu <31800336+zhiboniu@users.noreply.github.com> Date: Mon, 26 Apr 2021 15:47:10 +0800 Subject: [PATCH 003/720] update 2.0 public api in static&text (#32007) --- python/paddle/static/__init__.py | 159 ++++++++++----------- python/paddle/static/amp/__init__.py | 17 ++- python/paddle/static/input.py | 2 - python/paddle/static/io.py | 12 -- python/paddle/static/nn/__init__.py | 92 ++++++------ python/paddle/static/nn/common.py | 2 - python/paddle/text/__init__.py | 20 ++- python/paddle/text/datasets/__init__.py | 30 +--- python/paddle/text/datasets/conll05.py | 2 - python/paddle/text/datasets/imdb.py | 2 - python/paddle/text/datasets/imikolov.py | 2 - python/paddle/text/datasets/movielens.py | 2 - python/paddle/text/datasets/uci_housing.py | 2 - python/paddle/text/datasets/wmt14.py | 2 - python/paddle/text/datasets/wmt16.py | 2 - 15 files changed, 157 insertions(+), 191 deletions(-) diff --git a/python/paddle/static/__init__.py b/python/paddle/static/__init__.py index 91b4a29cefc..89da75ae91e 100644 --- a/python/paddle/static/__init__.py +++ b/python/paddle/static/__init__.py @@ -12,88 +12,83 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: import framework api under this directory -__all__ = [ - 'append_backward', - 'gradients', - 'Executor', - 'global_scope', - 'scope_guard', - 'BuildStrategy', - 'CompiledProgram', - 'Print', - 'py_func', - 'ExecutionStrategy', - 'name_scope', - 'ParallelExecutor', - 'program_guard', - 'WeightNormParamAttr', - 'default_main_program', - 'default_startup_program', - 'Program', - 'data', - 'InputSpec', - 'save', - 'load', - 'save_inference_model', - 'load_inference_model', - 'load_program_state', - 'set_program_state', - 'cpu_places', - 'cuda_places', - 'xpu_places', - 'Variable', - 'load_vars', - 'save_vars', - 'auc', - 'accuracy', -] +from . import amp # noqa: F401 +from . import nn # noqa: F401 +from .io import save_inference_model # noqa: F401 +from .io import load_inference_model # noqa: F401 +from .io import deserialize_persistables # noqa: F401 +from .io import serialize_persistables # noqa: F401 +from .io import deserialize_program # noqa: F401 +from .io import serialize_program # noqa: F401 +from .io import load_from_file # noqa: F401 +from .io import save_to_file # noqa: F401 +from .io import normalize_program # noqa: F401 +from ..fluid import Scope # noqa: F401 +from .input import data # noqa: F401 +from .input import InputSpec # noqa: F401 +from ..fluid.executor import Executor # noqa: F401 +from ..fluid.executor import global_scope # noqa: F401 +from ..fluid.executor import scope_guard # noqa: F401 +from ..fluid.backward import append_backward # noqa: F401 +from ..fluid.backward import gradients # noqa: F401 +from ..fluid.compiler import BuildStrategy # noqa: F401 +from ..fluid.compiler import CompiledProgram # noqa: F401 +from ..fluid.compiler import ExecutionStrategy # noqa: F401 +from ..fluid.framework import default_main_program # noqa: F401 +from ..fluid.framework import default_startup_program # noqa: F401 +from ..fluid.framework import device_guard # noqa: F401 +from ..fluid.framework import Program # noqa: F401 +from ..fluid.framework import name_scope # noqa: F401 +from ..fluid.framework import program_guard # noqa: F401 +from ..fluid.framework import cpu_places # noqa: F401 +from ..fluid.framework import cuda_places # noqa: F401 +from ..fluid.framework import xpu_places # noqa: F401 +from ..fluid.framework import Variable # noqa: F401 +from ..fluid.layers.control_flow import Print # noqa: F401 +from ..fluid.layers.nn import py_func # noqa: F401 +from ..fluid.parallel_executor import ParallelExecutor # noqa: F401 +from ..fluid.param_attr import WeightNormParamAttr # noqa: F401 +from ..fluid.io import save # noqa: F401 +from ..fluid.io import load # noqa: F401 +from ..fluid.io import load_program_state # noqa: F401 +from ..fluid.io import set_program_state # noqa: F401 -from . import nn -from . import amp -from .io import save_inference_model #DEFINE_ALIAS -from .io import load_inference_model #DEFINE_ALIAS -from .io import deserialize_persistables #DEFINE_ALIAS -from .io import serialize_persistables #DEFINE_ALIAS -from .io import deserialize_program #DEFINE_ALIAS -from .io import serialize_program #DEFINE_ALIAS -from .io import load_from_file #DEFINE_ALIAS -from .io import save_to_file #DEFINE_ALIAS -from .io import normalize_program #DEFINE_ALIAS -from ..fluid import Scope #DEFINE_ALIAS -from .input import data #DEFINE_ALIAS -from .input import InputSpec #DEFINE_ALIAS -from ..fluid.executor import Executor #DEFINE_ALIAS -from ..fluid.executor import global_scope #DEFINE_ALIAS -from ..fluid.executor import scope_guard #DEFINE_ALIAS -from ..fluid.backward import append_backward #DEFINE_ALIAS -from ..fluid.backward import gradients #DEFINE_ALIAS -from ..fluid.compiler import BuildStrategy #DEFINE_ALIAS -from ..fluid.compiler import CompiledProgram #DEFINE_ALIAS -from ..fluid.compiler import ExecutionStrategy #DEFINE_ALIAS -from ..fluid.framework import default_main_program #DEFINE_ALIAS -from ..fluid.framework import default_startup_program #DEFINE_ALIAS -from ..fluid.framework import device_guard #DEFINE_ALIAS -from ..fluid.framework import Program #DEFINE_ALIAS -from ..fluid.framework import name_scope #DEFINE_ALIAS -from ..fluid.framework import program_guard #DEFINE_ALIAS -from ..fluid.framework import cpu_places #DEFINE_ALIAS -from ..fluid.framework import cuda_places #DEFINE_ALIAS -from ..fluid.framework import xpu_places #DEFINE_ALIAS -from ..fluid.framework import Variable #DEFINE_ALIAS -from ..fluid.layers.control_flow import Print #DEFINE_ALIAS -from ..fluid.layers.nn import py_func #DEFINE_ALIAS -from ..fluid.parallel_executor import ParallelExecutor #DEFINE_ALIAS -from ..fluid.param_attr import WeightNormParamAttr #DEFINE_ALIAS -from ..fluid.io import save #DEFINE_ALIAS -from ..fluid.io import load #DEFINE_ALIAS -from ..fluid.io import load_program_state #DEFINE_ALIAS -from ..fluid.io import set_program_state #DEFINE_ALIAS +from ..fluid.io import load_vars # noqa: F401 +from ..fluid.io import save_vars # noqa: F401 -from ..fluid.io import load_vars #DEFINE_ALIAS -from ..fluid.io import save_vars #DEFINE_ALIAS +from ..fluid.layers import create_parameter # noqa: F401 +from ..fluid.layers import create_global_var # noqa: F401 +from ..fluid.layers.metric_op import auc # noqa: F401 +from ..fluid.layers.metric_op import accuracy # noqa: F401 -from ..fluid.layers import create_parameter #DEFINE_ALIAS -from ..fluid.layers import create_global_var #DEFINE_ALIAS -from ..fluid.layers.metric_op import auc #DEFINE_ALIAS -from ..fluid.layers.metric_op import accuracy #DEFINE_ALIAS +__all__ = [ #noqa + 'append_backward', + 'gradients', + 'Executor', + 'global_scope', + 'scope_guard', + 'BuildStrategy', + 'CompiledProgram', + 'Print', + 'py_func', + 'ExecutionStrategy', + 'name_scope', + 'ParallelExecutor', + 'program_guard', + 'WeightNormParamAttr', + 'default_main_program', + 'default_startup_program', + 'Program', + 'data', + 'InputSpec', + 'save', + 'load', + 'save_inference_model', + 'load_inference_model', + 'load_program_state', + 'set_program_state', + 'cpu_places', + 'cuda_places', + 'Variable', + 'create_global_var' +] diff --git a/python/paddle/static/amp/__init__.py b/python/paddle/static/amp/__init__.py index bfc1beed552..7320efe9b17 100644 --- a/python/paddle/static/amp/__init__.py +++ b/python/paddle/static/amp/__init__.py @@ -12,10 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ...fluid.contrib import mixed_precision -from ...fluid.contrib.mixed_precision import * -from ...fluid.contrib.mixed_precision import bf16 -from ...fluid.contrib.mixed_precision.bf16 import * - -__all__ = mixed_precision.__all__ -__all__ += bf16.__all__ +from ...fluid.contrib.mixed_precision import decorate # noqa: F401 +from ...fluid.contrib.mixed_precision import CustomOpLists # noqa: F401 +from ...fluid.contrib.mixed_precision import AutoMixedPrecisionLists # noqa: F401 +from ...fluid.contrib.mixed_precision import fp16_guard # noqa: F401 +from ...fluid.contrib.mixed_precision import cast_model_to_fp16 # noqa: F401 +from ...fluid.contrib.mixed_precision import cast_parameters_to_fp16 # noqa: F401 +from ...fluid.contrib.mixed_precision import AutoMixedPrecisionListsBF16 # noqa: F401 +from ...fluid.contrib.mixed_precision import bf16_guard # noqa: F401 +from ...fluid.contrib.mixed_precision import rewrite_program_bf16 # noqa: F401 +from ...fluid.contrib.mixed_precision import convert_float_to_uint16 # noqa: F401 diff --git a/python/paddle/static/input.py b/python/paddle/static/input.py index f05051d3e68..c1de576ee74 100644 --- a/python/paddle/static/input.py +++ b/python/paddle/static/input.py @@ -21,8 +21,6 @@ from paddle.fluid.data_feeder import check_type from paddle.fluid.framework import convert_np_dtype_to_dtype_ from paddle.fluid.framework import static_only -__all__ = ['data', 'InputSpec'] - @static_only def data(name, shape, dtype=None, lod_level=0): diff --git a/python/paddle/static/io.py b/python/paddle/static/io.py index 6bbab6ed672..fc6d8b64f18 100644 --- a/python/paddle/static/io.py +++ b/python/paddle/static/io.py @@ -37,18 +37,6 @@ from paddle.fluid.framework import static_only, Parameter from paddle.fluid.executor import Executor, global_scope from paddle.fluid.log_helper import get_logger -__all__ = [ - 'save_inference_model', - 'load_inference_model', - 'serialize_program', - 'serialize_persistables', - 'save_to_file', - 'deserialize_program', - 'deserialize_persistables', - 'load_from_file', - 'normalize_program', -] - _logger = get_logger( __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') diff --git a/python/paddle/static/nn/__init__.py b/python/paddle/static/nn/__init__.py index 0e9754d3c1f..416f6e4f3df 100644 --- a/python/paddle/static/nn/__init__.py +++ b/python/paddle/static/nn/__init__.py @@ -12,7 +12,52 @@ # See the License for the specific language governing permissions and # limitations under the License. -__all__ = [ +from .common import fc # noqa: F401 +from .common import deform_conv2d # noqa: F401 + +from ...fluid.layers import batch_norm # noqa: F401 +from ...fluid.layers import bilinear_tensor_product # noqa: F401 +from ...fluid.layers import case # noqa: F401 +from ...fluid.layers import cond # noqa: F401 +from ...fluid.layers import conv2d # noqa: F401 +from ...fluid.layers import conv2d_transpose # noqa: F401 +from ...fluid.layers import conv3d # noqa: F401 +from ...fluid.layers import conv3d_transpose # noqa: F401 +from ...fluid.layers import create_parameter # noqa: F401 +from ...fluid.layers import crf_decoding # noqa: F401 +from ...fluid.layers import data_norm # noqa: F401 +from ...fluid.layers import group_norm # noqa: F401 +from ...fluid.layers import instance_norm # noqa: F401 +from ...fluid.layers import layer_norm # noqa: F401 +from ...fluid.layers import multi_box_head # noqa: F401 +from ...fluid.layers import nce # noqa: F401 +from ...fluid.layers import prelu # noqa: F401 +from ...fluid.layers import py_func # noqa: F401 +from ...fluid.layers import row_conv # noqa: F401 +from ...fluid.layers import spectral_norm # noqa: F401 +from ...fluid.layers import switch_case # noqa: F401 +from ...fluid.layers import while_loop # noqa: F401 + +from ...fluid.input import embedding # noqa: F401 +from ...fluid.contrib.layers import sparse_embedding # noqa: F401 + +from ...fluid.layers.sequence_lod import sequence_conv # noqa: F401 +from ...fluid.layers.sequence_lod import sequence_softmax # noqa: F401 +from ...fluid.layers.sequence_lod import sequence_pool # noqa: F401 +from ...fluid.layers.sequence_lod import sequence_concat # noqa: F401 +from ...fluid.layers.sequence_lod import sequence_first_step # noqa: F401 +from ...fluid.layers.sequence_lod import sequence_last_step # noqa: F401 +from ...fluid.layers.sequence_lod import sequence_slice # noqa: F401 +from ...fluid.layers.sequence_lod import sequence_expand # noqa: F401 +from ...fluid.layers.sequence_lod import sequence_expand_as # noqa: F401 +from ...fluid.layers.sequence_lod import sequence_pad # noqa: F401 +from ...fluid.layers.sequence_lod import sequence_unpad # noqa: F401 +from ...fluid.layers.sequence_lod import sequence_reshape # noqa: F401 +from ...fluid.layers.sequence_lod import sequence_scatter # noqa: F401 +from ...fluid.layers.sequence_lod import sequence_enumerate # noqa: F401 +from ...fluid.layers.sequence_lod import sequence_reverse # noqa: F401 + +__all__ = [ #noqa 'fc', 'batch_norm', 'embedding', @@ -55,48 +100,3 @@ __all__ = [ 'sequence_enumerate', 'sequence_reverse', ] - -from .common import fc #DEFINE_ALIAS -from .common import deform_conv2d #DEFINE_ALIAS - -from ...fluid.layers import batch_norm #DEFINE_ALIAS -from ...fluid.layers import bilinear_tensor_product #DEFINE_ALIAS -from ...fluid.layers import case #DEFINE_ALIAS -from ...fluid.layers import cond #DEFINE_ALIAS -from ...fluid.layers import conv2d #DEFINE_ALIAS -from ...fluid.layers import conv2d_transpose #DEFINE_ALIAS -from ...fluid.layers import conv3d #DEFINE_ALIAS -from ...fluid.layers import conv3d_transpose #DEFINE_ALIAS -from ...fluid.layers import create_parameter #DEFINE_ALIAS -from ...fluid.layers import crf_decoding #DEFINE_ALIAS -from ...fluid.layers import data_norm #DEFINE_ALIAS -from ...fluid.layers import group_norm #DEFINE_ALIAS -from ...fluid.layers import instance_norm #DEFINE_ALIAS -from ...fluid.layers import layer_norm #DEFINE_ALIAS -from ...fluid.layers import multi_box_head #DEFINE_ALIAS -from ...fluid.layers import nce #DEFINE_ALIAS -from ...fluid.layers import prelu #DEFINE_ALIAS -from ...fluid.layers import py_func #DEFINE_ALIAS -from ...fluid.layers import row_conv #DEFINE_ALIAS -from ...fluid.layers import spectral_norm #DEFINE_ALIAS -from ...fluid.layers import switch_case #DEFINE_ALIAS -from ...fluid.layers import while_loop #DEFINE_ALIAS - -from ...fluid.input import embedding #DEFINE_ALIAS -from ...fluid.contrib.layers import sparse_embedding #DEFINE_ALIAS - -from ...fluid.layers.sequence_lod import sequence_conv -from ...fluid.layers.sequence_lod import sequence_softmax -from ...fluid.layers.sequence_lod import sequence_pool -from ...fluid.layers.sequence_lod import sequence_concat -from ...fluid.layers.sequence_lod import sequence_first_step -from ...fluid.layers.sequence_lod import sequence_last_step -from ...fluid.layers.sequence_lod import sequence_slice -from ...fluid.layers.sequence_lod import sequence_expand -from ...fluid.layers.sequence_lod import sequence_expand_as -from ...fluid.layers.sequence_lod import sequence_pad -from ...fluid.layers.sequence_lod import sequence_unpad -from ...fluid.layers.sequence_lod import sequence_reshape -from ...fluid.layers.sequence_lod import sequence_scatter -from ...fluid.layers.sequence_lod import sequence_enumerate -from ...fluid.layers.sequence_lod import sequence_reverse diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py index f917b4fa09a..88802026db8 100755 --- a/python/paddle/static/nn/common.py +++ b/python/paddle/static/nn/common.py @@ -15,8 +15,6 @@ import paddle from paddle.fluid.framework import static_only -__all__ = ['fc', 'deform_conv2d'] - @static_only def fc(x, diff --git a/python/paddle/text/__init__.py b/python/paddle/text/__init__.py index b6f8ea6bcc7..00eaae5b29e 100644 --- a/python/paddle/text/__init__.py +++ b/python/paddle/text/__init__.py @@ -12,7 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. -from . import datasets -from .datasets import * +from .datasets import Conll05st # noqa: F401 +from .datasets import Imdb # noqa: F401 +from .datasets import Imikolov # noqa: F401 +from .datasets import Movielens # noqa: F401 +from .datasets import UCIHousing # noqa: F401 +from .datasets import WMT14 # noqa: F401 +from .datasets import WMT16 # noqa: F401 -__all__ = datasets.__all__ + +__all__ = [ #noqa + 'Conll05st', + 'Imdb', + 'Imikolov', + 'Movielens', + 'UCIHousing', + 'WMT14', + 'WMT16' +] diff --git a/python/paddle/text/datasets/__init__.py b/python/paddle/text/datasets/__init__.py index 71571d09b5c..9a00081469a 100644 --- a/python/paddle/text/datasets/__init__.py +++ b/python/paddle/text/datasets/__init__.py @@ -12,26 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from . import conll05 -from . import imdb -from . import imikolov -from . import movielens -from . import uci_housing -from . import wmt14 -from . import wmt16 - -from .conll05 import * -from .imdb import * -from .imikolov import * -from .movielens import * -from .uci_housing import * -from .wmt14 import * -from .wmt16 import * - -__all__ = conll05.__all__ \ - + imdb.__all__ \ - + imikolov.__all__ \ - + movielens.__all__ \ - + uci_housing.__all__ \ - + wmt14.__all__ \ - + wmt16.__all__ +from .conll05 import Conll05st # noqa: F401 +from .imdb import Imdb # noqa: F401 +from .imikolov import Imikolov # noqa: F401 +from .movielens import Movielens # noqa: F401 +from .uci_housing import UCIHousing # noqa: F401 +from .wmt14 import WMT14 # noqa: F401 +from .wmt16 import WMT16 # noqa: F401 diff --git a/python/paddle/text/datasets/conll05.py b/python/paddle/text/datasets/conll05.py index 23a2f1c8f28..070c787db85 100644 --- a/python/paddle/text/datasets/conll05.py +++ b/python/paddle/text/datasets/conll05.py @@ -24,8 +24,6 @@ from paddle.io import Dataset import paddle.compat as cpt from paddle.dataset.common import _check_exists_and_download -__all__ = ['Conll05st'] - DATA_URL = 'http://paddlemodels.bj.bcebos.com/conll05st/conll05st-tests.tar.gz' DATA_MD5 = '387719152ae52d60422c016e92a742fc' WORDDICT_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2FwordDict.txt' diff --git a/python/paddle/text/datasets/imdb.py b/python/paddle/text/datasets/imdb.py index 142c70c953b..c64890dc43d 100644 --- a/python/paddle/text/datasets/imdb.py +++ b/python/paddle/text/datasets/imdb.py @@ -24,8 +24,6 @@ import collections from paddle.io import Dataset from paddle.dataset.common import _check_exists_and_download -__all__ = ['Imdb'] - URL = 'https://dataset.bj.bcebos.com/imdb%2FaclImdb_v1.tar.gz' MD5 = '7c2ac02c03563afcf9b574c7e56c153a' diff --git a/python/paddle/text/datasets/imikolov.py b/python/paddle/text/datasets/imikolov.py index 1a1c625f605..7e4daf731a2 100644 --- a/python/paddle/text/datasets/imikolov.py +++ b/python/paddle/text/datasets/imikolov.py @@ -22,8 +22,6 @@ import collections from paddle.io import Dataset from paddle.dataset.common import _check_exists_and_download -__all__ = ['Imikolov'] - URL = 'https://dataset.bj.bcebos.com/imikolov%2Fsimple-examples.tgz' MD5 = '30177ea32e27c525793142b6bf2c8e2d' diff --git a/python/paddle/text/datasets/movielens.py b/python/paddle/text/datasets/movielens.py index 1f399eebd3b..7741e82194c 100644 --- a/python/paddle/text/datasets/movielens.py +++ b/python/paddle/text/datasets/movielens.py @@ -26,8 +26,6 @@ from paddle.io import Dataset import paddle.compat as cpt from paddle.dataset.common import _check_exists_and_download -__all__ = ['Movielens'] - age_table = [1, 18, 25, 35, 45, 50, 56] URL = 'https://dataset.bj.bcebos.com/movielens%2Fml-1m.zip' diff --git a/python/paddle/text/datasets/uci_housing.py b/python/paddle/text/datasets/uci_housing.py index a8dfbc44a97..c876ed409cf 100644 --- a/python/paddle/text/datasets/uci_housing.py +++ b/python/paddle/text/datasets/uci_housing.py @@ -21,8 +21,6 @@ import paddle from paddle.io import Dataset from paddle.dataset.common import _check_exists_and_download -__all__ = ["UCIHousing"] - URL = 'http://paddlemodels.bj.bcebos.com/uci_housing/housing.data' MD5 = 'd4accdce7a25600298819f8e28e8d593' feature_names = [ diff --git a/python/paddle/text/datasets/wmt14.py b/python/paddle/text/datasets/wmt14.py index b080824d724..96d29c79c6a 100644 --- a/python/paddle/text/datasets/wmt14.py +++ b/python/paddle/text/datasets/wmt14.py @@ -22,8 +22,6 @@ from paddle.io import Dataset import paddle.compat as cpt from paddle.dataset.common import _check_exists_and_download -__all__ = ['WMT14'] - URL_DEV_TEST = ('http://www-lium.univ-lemans.fr/~schwenk/' 'cslm_joint_paper/data/dev+test.tgz') MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5' diff --git a/python/paddle/text/datasets/wmt16.py b/python/paddle/text/datasets/wmt16.py index 03a62e93470..5605fd2aecb 100644 --- a/python/paddle/text/datasets/wmt16.py +++ b/python/paddle/text/datasets/wmt16.py @@ -27,8 +27,6 @@ from paddle.io import Dataset import paddle.compat as cpt from paddle.dataset.common import _check_exists_and_download -__all__ = ['WMT16'] - DATA_URL = ("http://paddlemodels.bj.bcebos.com/wmt/wmt16.tar.gz") DATA_MD5 = "0c38be43600334966403524a40dcd81e" -- GitLab From 7f162b5e4fbd119d2ba6ee6a96ae317d0f0f940c Mon Sep 17 00:00:00 2001 From: Huihuang Zheng Date: Mon, 26 Apr 2021 15:48:12 +0800 Subject: [PATCH 004/720] Make assign Doc Same for creation.py and layers/tensor.py, test=document_fix (#32553) A follow up PR of #32420, we changed the doc of python/paddle/fluid/layers/tensor.py in that PR and we are changing python/paddle/tensor/creation.py in this PR. --- python/paddle/tensor/creation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index 4cf10f8a69c..1817ce8256d 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -1036,8 +1036,10 @@ def assign(x, output=None): The OP copies the :attr:`x` to the :attr:`output`. Parameters: - x (Tensor|numpy.ndarray|list|tuple|scalar): A tensor, numpy ndarray, tuple, list or scalar, - its data type supports float16, float32, float64, int32, int64, and bool. + x (Tensor|numpy.ndarray|list|tuple|scalar): A tensor, numpy ndarray, tuple/list of scalar, + or scalar. Its data type supports float16, float32, float64, int32, int64, and bool. + Note: the float64 data will be converted to float32 because of current platform protobuf + data limitation. output (Tensor, optional): A tensor. If :attr:`output` is None, a new tensor will be created as :attr:`output`. Default: None. -- GitLab From 913317fe0ee37f87c09a120a8eb2efa986497ffb Mon Sep 17 00:00:00 2001 From: ceci3 Date: Mon, 26 Apr 2021 17:04:48 +0800 Subject: [PATCH 005/720] fix bn docs (#32492) * fix bn docs * fix unittest --- .../tests/unittests/test_imperative_layers.py | 6 ++-- python/paddle/nn/layer/norm.py | 28 ++++++++++++++++++- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_layers.py b/python/paddle/fluid/tests/unittests/test_imperative_layers.py index 214339c50d6..dc15566f854 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_layers.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_layers.py @@ -210,7 +210,8 @@ class TestLayerPrint(unittest.TestCase): module = nn.BatchNorm1D(1) self.assertEqual( str(module), - 'BatchNorm1D(num_features=1, momentum=0.9, epsilon=1e-05)') + 'BatchNorm1D(num_features=1, momentum=0.9, epsilon=1e-05, data_format=NCL)' + ) module = nn.BatchNorm2D(1) self.assertEqual( @@ -220,7 +221,8 @@ class TestLayerPrint(unittest.TestCase): module = nn.BatchNorm3D(1) self.assertEqual( str(module), - 'BatchNorm3D(num_features=1, momentum=0.9, epsilon=1e-05)') + 'BatchNorm3D(num_features=1, momentum=0.9, epsilon=1e-05, data_format=NCDHW)' + ) module = nn.SyncBatchNorm(2) self.assertEqual( diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py index a1cc41f3912..0b0b2bf7b9b 100644 --- a/python/paddle/nn/layer/norm.py +++ b/python/paddle/nn/layer/norm.py @@ -745,6 +745,19 @@ class BatchNorm1D(_BatchNormBase): print(batch_norm_out) """ + def __init__(self, + num_features, + momentum=0.9, + epsilon=1e-05, + weight_attr=None, + bias_attr=None, + data_format='NCL', + use_global_stats=None, + name=None): + super(BatchNorm1D, + self).__init__(num_features, momentum, epsilon, weight_attr, + bias_attr, data_format, use_global_stats, name) + def _check_data_format(self, input): if input == 'NCHW' or input == 'NC' or input == 'NCL': self._data_format = 'NCHW' @@ -924,6 +937,19 @@ class BatchNorm3D(_BatchNormBase): print(batch_norm_out) """ + def __init__(self, + num_features, + momentum=0.9, + epsilon=1e-05, + weight_attr=None, + bias_attr=None, + data_format='NCDHW', + use_global_stats=None, + name=None): + super(BatchNorm3D, + self).__init__(num_features, momentum, epsilon, weight_attr, + bias_attr, data_format, use_global_stats, name) + def _check_data_format(self, input): if input == 'NCHW' or input == 'NCDHW': self._data_format = 'NCHW' @@ -1036,7 +1062,7 @@ class SyncBatchNorm(_BatchNormBase): name=None): super(SyncBatchNorm, self).__init__(num_features, momentum, epsilon, weight_attr, - bias_attr, data_format, name) + bias_attr, data_format, None, name) def forward(self, x): # create output -- GitLab From 4b7242b0d8c7917a8e23e49ee8ebf4c460a392cd Mon Sep 17 00:00:00 2001 From: Thunderbrook <52529258+Thunderbrook@users.noreply.github.com> Date: Mon, 26 Apr 2021 19:05:12 +0800 Subject: [PATCH 006/720] [PsCore] optimize performance of large kv (#32535) * optimize pull sparse * optimize pull sparse * change macro * format --- CMakeLists.txt | 5 + .../distributed/service/brpc_ps_server.cc | 23 +-- .../distributed/table/common_sparse_table.cc | 55 +++--- .../table/depends/large_scale_kv.h | 158 ++++++++++-------- .../framework/fleet/heter_ps/CMakeLists.txt | 7 +- .../distributed/fleet/runtime/the_one_ps.py | 45 +++-- .../distributed_strategy.py | 1 + .../fleet/parameter_server/ir/public.py | 1 + 8 files changed, 176 insertions(+), 119 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d874b21b087..2d13874f178 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -352,6 +352,11 @@ if (WITH_MIPS) add_definitions(-DPADDLE_WITH_MIPS) endif() +if (WITH_HETERPS) + if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -faligned-new") + endif() +endif() set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") diff --git a/paddle/fluid/distributed/service/brpc_ps_server.cc b/paddle/fluid/distributed/service/brpc_ps_server.cc index a9370561a54..a1440260bf2 100644 --- a/paddle/fluid/distributed/service/brpc_ps_server.cc +++ b/paddle/fluid/distributed/service/brpc_ps_server.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/distributed/service/brpc_ps_server.h" #include // NOLINT +#include "butil/object_pool.h" #include "paddle/fluid/distributed/table/depends/sparse_utils.h" #include "paddle/fluid/distributed/table/table.h" #include "paddle/fluid/framework/archive.h" @@ -196,12 +197,13 @@ int32_t BrpcPsService::pull_dense(Table *table, const PsRequestMessage &request, return 0; } - std::vector res_data; - res_data.resize(num * table->value_accesor()->select_size() / sizeof(float)); - table->pull_dense(res_data.data(), num); + auto res_data = butil::get_object>(); + res_data->resize(num * table->value_accesor()->select_size() / sizeof(float)); + table->pull_dense(res_data->data(), num); - cntl->response_attachment().append((char *)res_data.data(), - res_data.size() * sizeof(float)); + cntl->response_attachment().append((char *)(res_data->data()), + res_data->size() * sizeof(float)); + butil::return_object(res_data); return 0; } @@ -367,12 +369,13 @@ int32_t BrpcPsService::pull_sparse(Table *table, value.DeserializeFromBytes(const_cast(data)); - std::vector res_data; - res_data.resize(num * dim); - table->pull_sparse(res_data.data(), value); + auto res_data = butil::get_object>(); + res_data->resize(num * dim); + table->pull_sparse(res_data->data(), value); - cntl->response_attachment().append((char *)res_data.data(), - res_data.size() * sizeof(float)); + cntl->response_attachment().append((char *)(res_data->data()), + res_data->size() * sizeof(float)); + butil::return_object(res_data); return 0; } diff --git a/paddle/fluid/distributed/table/common_sparse_table.cc b/paddle/fluid/distributed/table/common_sparse_table.cc index 1c315d34abc..718fce99507 100644 --- a/paddle/fluid/distributed/table/common_sparse_table.cc +++ b/paddle/fluid/distributed/table/common_sparse_table.cc @@ -125,34 +125,37 @@ void ProcessALine(const std::vector& columns, const Meta& meta, int64_t SaveToText(std::ostream* os, std::shared_ptr block, const int mode) { - int64_t not_save_num = 0; - for (auto& value : block->values_) { - if (mode == SaveMode::delta && !value.second.need_save_) { - not_save_num++; - continue; - } - - auto* vs = value.second.data_; - std::stringstream ss; - auto id = value.first; - ss << id << "\t" << value.second.count_ << "\t" << value.second.unseen_days_ - << "\t" << value.second.is_entry_ << "\t"; - - for (int i = 0; i < block->value_length_; i++) { - ss << vs[i]; - ss << ","; - } + int64_t save_num = 0; + for (auto& table : block->values_) { + for (auto& value : table) { + if (mode == SaveMode::delta && !value.second->need_save_) { + continue; + } + save_num += 1; + + auto* vs = value.second->data_.data(); + std::stringstream ss; + auto id = value.first; + ss << id << "\t" << value.second->count_ << "\t" + << value.second->unseen_days_ << "\t" << value.second->is_entry_ + << "\t"; + + for (int i = 0; i < block->value_length_; i++) { + ss << vs[i]; + ss << ","; + } - ss << "\n"; + ss << "\n"; - os->write(ss.str().c_str(), sizeof(char) * ss.str().size()); + os->write(ss.str().c_str(), sizeof(char) * ss.str().size()); - if (mode == SaveMode::base || mode == SaveMode::delta) { - value.second.need_save_ = false; + if (mode == SaveMode::base || mode == SaveMode::delta) { + value.second->need_save_ = false; + } } } - return block->values_.size() - not_save_num; + return save_num; } int64_t LoadFromText(const std::string& valuepath, const std::string& metapath, @@ -183,7 +186,7 @@ int64_t LoadFromText(const std::string& valuepath, const std::string& metapath, block->Init(id, false); - auto value_instant = block->GetValue(id); + VALUE* value_instant = block->GetValue(id); if (values.size() == 5) { value_instant->count_ = std::stoi(values[1]); value_instant->unseen_days_ = std::stoi(values[2]); @@ -373,8 +376,10 @@ std::pair CommonSparseTable::print_table_stat() { int64_t feasign_size = 0; int64_t mf_size = 0; - for (auto& value : shard_values_) { - feasign_size += value->values_.size(); + for (auto& shard : shard_values_) { + for (auto& table : shard->values_) { + feasign_size += table.size(); + } } return {feasign_size, mf_size}; diff --git a/paddle/fluid/distributed/table/depends/large_scale_kv.h b/paddle/fluid/distributed/table/depends/large_scale_kv.h index bb4174bd2c5..5c10fca98cd 100644 --- a/paddle/fluid/distributed/table/depends/large_scale_kv.h +++ b/paddle/fluid/distributed/table/depends/large_scale_kv.h @@ -26,6 +26,7 @@ #include #include "gflags/gflags.h" +#include "butil/object_pool.h" #include "paddle/fluid/distributed/common/utils.h" #include "paddle/fluid/distributed/table/depends/initializers.h" #include "paddle/fluid/distributed/thirdparty/round_robin.h" @@ -48,6 +49,10 @@ namespace distributed { enum Mode { training, infer }; +static const int SPARSE_SHARD_BUCKET_NUM_BITS = 6; +static const size_t SPARSE_SHARD_BUCKET_NUM = (size_t)1 + << SPARSE_SHARD_BUCKET_NUM_BITS; + struct VALUE { explicit VALUE(size_t length) : length_(length), @@ -55,46 +60,16 @@ struct VALUE { unseen_days_(0), need_save_(false), is_entry_(false) { - data_ = new float[length]; - memset(data_, 0, sizeof(float) * length); - } - - VALUE(const VALUE &value) { - length_ = value.length_; - count_ = value.count_; - unseen_days_ = value.unseen_days_; - need_save_ = value.need_save_; - is_entry_ = value.is_entry_; - data_ = new float[length_]; - memcpy(data_, value.data_, sizeof(float) * length_); - } - - VALUE &operator=(const VALUE &value) { - if (this != &value) { - delete[] data_; - length_ = value.length_; - count_ = value.count_; - unseen_days_ = value.unseen_days_; - need_save_ = value.need_save_; - is_entry_ = value.is_entry_; - - data_ = new float[length_]; - memcpy(data_, value.data_, sizeof(float) * length_); - } - return *this; - } - - ~VALUE() { - delete[] data_; - data_ = nullptr; + data_.resize(length); + memset(data_.data(), 0, sizeof(float) * length); } size_t length_; + std::vector data_; int count_; int unseen_days_; // use to check knock-out bool need_save_; // whether need to save bool is_entry_; // whether knock-in - float *data_; }; inline bool count_entry(VALUE *value, int threshold) { @@ -176,12 +151,12 @@ class ValueBlock { const std::vector &value_dims) { auto pts = std::vector(); pts.reserve(value_names.size()); - auto &values = values_.at(id); + auto values = GetValue(id); for (int i = 0; i < static_cast(value_names.size()); i++) { PADDLE_ENFORCE_EQ( value_dims[i], value_dims_[i], platform::errors::InvalidArgument("value dims is not match")); - pts.push_back(values.data_ + + pts.push_back(values->data_.data() + value_offsets_.at(value_idx_.at(value_names[i]))); } return pts; @@ -190,33 +165,45 @@ class ValueBlock { // pull float *Init(const uint64_t &id, const bool with_update = true, const int counter = 1) { - if (!Has(id)) { - values_.emplace(std::make_pair(id, VALUE(value_length_))); - } + size_t hash = _hasher(id); + size_t bucket = compute_bucket(hash); - auto &value = values_.at(id); + auto &table = values_[bucket]; + auto res = table.find(id); - if (with_update) { - AttrUpdate(&value, counter); + VALUE *value = nullptr; + if (res == table.end()) { + value = butil::get_object(value_length_); + + table[id] = value; + + } else { + value = res->second; } - return value.data_; + if (with_update) { + AttrUpdate(value, counter); + } + return value->data_.data(); } - VALUE *InitGet(const uint64_t &id, const bool with_update = true, const int counter = 1) { - if (!Has(id)) { - values_.emplace(std::make_pair(id, VALUE(value_length_))); - } + size_t hash = _hasher(id); + size_t bucket = compute_bucket(hash); - auto &value = values_.at(id); + auto &table = values_[bucket]; + auto res = table.find(id); - if (with_update) { - AttrUpdate(&value, counter); + VALUE *value = nullptr; + if (res == table.end()) { + value = butil::get_object(value_length_); + // value = _alloc.acquire(value_length_); + table[id] = value; + } else { + value = (VALUE *)(void *)(res->second); } - - return &value; + return value; } void AttrUpdate(VALUE *value, const int counter) { @@ -229,7 +216,7 @@ class ValueBlock { if (value->is_entry_) { // initialize for (size_t x = 0; x < value_names_.size(); ++x) { - initializers_[x]->GetValue(value->data_ + value_offsets_[x], + initializers_[x]->GetValue(value->data_.data() + value_offsets_[x], value_dims_[x]); } value->need_save_ = true; @@ -243,42 +230,73 @@ class ValueBlock { // dont jude if (has(id)) float *Get(const uint64_t &id) { - auto &value = values_.at(id); - return value.data_; + size_t hash = _hasher(id); + size_t bucket = compute_bucket(hash); + auto &table = values_[bucket]; + + // auto &value = table.at(id); + // return value->data_.data(); + auto res = table.find(id); + VALUE *value = res->second; + return value->data_.data(); } // for load, to reset count, unseen_days - VALUE *GetValue(const uint64_t &id) { return &values_.at(id); } + VALUE *GetValue(const uint64_t &id) { + size_t hash = _hasher(id); + size_t bucket = compute_bucket(hash); + + auto &table = values_[bucket]; + auto res = table.find(id); + return res->second; + } bool GetEntry(const uint64_t &id) { - auto &value = values_.at(id); - return value.is_entry_; + auto value = GetValue(id); + return value->is_entry_; } void SetEntry(const uint64_t &id, const bool state) { - auto &value = values_.at(id); - value.is_entry_ = state; + auto value = GetValue(id); + value->is_entry_ = state; } void Shrink(const int threshold) { - for (auto iter = values_.begin(); iter != values_.end();) { - auto &value = iter->second; - value.unseen_days_++; - if (value.unseen_days_ >= threshold) { - iter = values_.erase(iter); - } else { - ++iter; + for (auto &table : values_) { + for (auto iter = table.begin(); iter != table.end();) { + // VALUE* value = (VALUE*)(void*)(iter->second); + VALUE *value = iter->second; + value->unseen_days_++; + if (value->unseen_days_ >= threshold) { + butil::return_object(iter->second); + //_alloc.release(iter->second); + //_alloc.release(value); + iter = table.erase(iter); + } else { + ++iter; + } } } return; } float GetThreshold() { return threshold_; } + size_t compute_bucket(size_t hash) { + if (SPARSE_SHARD_BUCKET_NUM == 1) { + return 0; + } else { + return hash >> (sizeof(size_t) * 8 - SPARSE_SHARD_BUCKET_NUM_BITS); + } + } private: bool Has(const uint64_t id) { - auto got = values_.find(id); - if (got == values_.end()) { + size_t hash = _hasher(id); + size_t bucket = compute_bucket(hash); + auto &table = values_[bucket]; + + auto got = table.find(id); + if (got == table.end()) { return false; } else { return true; @@ -286,8 +304,9 @@ class ValueBlock { } public: - robin_hood::unordered_map values_; + robin_hood::unordered_map values_[SPARSE_SHARD_BUCKET_NUM]; size_t value_length_ = 0; + std::hash _hasher; private: const std::vector &value_names_; @@ -302,4 +321,3 @@ class ValueBlock { } // namespace distributed } // namespace paddle - diff --git a/paddle/fluid/framework/fleet/heter_ps/CMakeLists.txt b/paddle/fluid/framework/fleet/heter_ps/CMakeLists.txt index 6df2cd52bb4..db562045dcc 100644 --- a/paddle/fluid/framework/fleet/heter_ps/CMakeLists.txt +++ b/paddle/fluid/framework/fleet/heter_ps/CMakeLists.txt @@ -1,5 +1,10 @@ IF(WITH_GPU) - nv_library(heter_comm SRCS heter_comm.h feature_value.h heter_resource.cc heter_resource.h hashtable.h DEPS cub device_context) + SET(HETERPS_DEPS device_context) + if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) + SET(HETERPS_DEPS ${HETERPS_DEPS} cub) + endif() + + nv_library(heter_comm SRCS heter_comm.h feature_value.h heter_resource.cc heter_resource.h hashtable.h DEPS ${HETERPS_DEPS}) nv_test(test_heter_comm SRCS test_heter_comm.cu feature_value.h DEPS heter_comm) nv_library(heter_ps SRCS heter_ps.cu DEPS heter_comm) ENDIF() diff --git a/python/paddle/distributed/fleet/runtime/the_one_ps.py b/python/paddle/distributed/fleet/runtime/the_one_ps.py index df07a7a6e77..24b83662c9d 100644 --- a/python/paddle/distributed/fleet/runtime/the_one_ps.py +++ b/python/paddle/distributed/fleet/runtime/the_one_ps.py @@ -77,10 +77,13 @@ class CommonAccessor: ("Moment2", None), ("Beta1Pow", 1), ("Beta2Pow", 1), ("LearningRate", 1)] opt_input_map["sum"] = [("Param", None)] + opt_input_map["naive_adagrad"] = [("Param", None), ("G2Sum", 1), + ("LearningRate", 1)] opt_attr_map = {} opt_attr_map["sgd"] = [] opt_attr_map["sum"] = [] + opt_attr_map["naive_adagrad"] = [] opt_attr_map["adam"] = [("beta1", "f"), ("beta2", "f"), ("epsilon", "f")] @@ -169,6 +172,10 @@ class CommonAccessor: param_varnames = self.opt_input_map["sum"] attr_varnames = self.opt_attr_map["sum"] self.accessor_class = "sum" + elif compiled_strategy.use_ps_gpu and is_sparse: + param_varnames = self.opt_input_map["naive_adagrad"] + attr_varnames = self.opt_attr_map["naive_adagrad"] + self.accessor_class = "sgd" else: param_varnames = self.opt_input_map[oop.type] attr_varnames = self.opt_attr_map[oop.type] @@ -176,20 +183,28 @@ class CommonAccessor: for (formal_name, shape) in param_varnames: params.append(formal_name) - param = main_program.global_block().vars[oop.input(formal_name)[0]] - if formal_name == "LearningRate" and param.name != "learning_rate_0": - warnings.warn("will support decay soon") - param = main_program.global_block().vars["learning_rate_0"] - - if shape is None: - if is_sparse: - shape = total_dims - else: - shape = self.get_shard(total_dims, pserver_num, pserver_id) - dims.append(shape) + if formal_name == "G2Sum": + dims.append(1) + initializer = "fill_constant&0" + initializers.append(initializer) + else: + param = main_program.global_block().vars[oop.input(formal_name)[ + 0]] + if formal_name == "LearningRate" and param.name != "learning_rate_0": + warnings.warn("will support decay soon") + param = main_program.global_block().vars["learning_rate_0"] + + if shape is None: + if is_sparse: + shape = total_dims + else: + shape = self.get_shard(total_dims, pserver_num, + pserver_id) + dims.append(shape) - initializer = self.get_initializer_attr(param.name, startup_program) - initializers.append(initializer) + initializer = self.get_initializer_attr(param.name, + startup_program) + initializers.append(initializer) for (attr_varname, type_) in attr_varnames: value = oop.attr(attr_varname) @@ -435,6 +450,8 @@ class TheOnePSRuntime(RuntimeBase): if not strategy: raise ValueError("k_steps must be invalid value, please check") + if dist_strategy.a_sync_configs["use_ps_gpu"]: + strategy.use_ps_gpu = True return strategy def build_compiled_startegy(self): @@ -443,6 +460,8 @@ class TheOnePSRuntime(RuntimeBase): compiled_config = CompileTimeStrategy( self.origin_main_program, self.origin_main_program, self.async_strategy, self.role_maker) + if self.async_strategy.use_ps_gpu: + compiled_config.use_ps_gpu = True return compiled_config def _init_worker(self): diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/distributed_strategy.py b/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/distributed_strategy.py index 35029a3dfc7..2a9d26daaed 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/distributed_strategy.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/distributed_strategy.py @@ -149,6 +149,7 @@ class DistributedStrategy(object): if num_threads > 1: self._build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce self.debug_opt = None + self.use_ps_gpu = False def set_debug_opt(self, opt_info): self.debug_opt = opt_info diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py index baf8add04ca..b2735727f67 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py @@ -138,6 +138,7 @@ class CompileTimeStrategy(object): self.strategy = strategy self.role_maker = role_maker + self.use_ps_gpu = False try: self.is_heter_ps_mode = role_maker._is_heter_parameter_server_mode except: -- GitLab From 6ec4e6409d213ce3c32bdac24151b97738625f70 Mon Sep 17 00:00:00 2001 From: jiangcheng Date: Mon, 26 Apr 2021 19:42:33 +0800 Subject: [PATCH 007/720] Optimize where_index_op(prefix sum) (#30601) * new optimize for where_index_op with prefix sum version. * write a scan prefix sum kernel with stream for where index op. * optimize where_index by using cub::DeviceScan::InclusiveSum instead of imperfect self-kernel. * remove CheckTrue struct and rename stide_array for readable. * optimize variable name for readable. * optimize function name and annotation. --- paddle/fluid/operators/where_index_op.cu | 156 +++++++++++++++++------ 1 file changed, 118 insertions(+), 38 deletions(-) diff --git a/paddle/fluid/operators/where_index_op.cu b/paddle/fluid/operators/where_index_op.cu index bb968743585..b1cd172923e 100644 --- a/paddle/fluid/operators/where_index_op.cu +++ b/paddle/fluid/operators/where_index_op.cu @@ -12,7 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +#ifdef __NVCC__ +#include "cub/cub.cuh" +#endif +#ifdef __HIPCC__ +#include +namespace cub = hipcub; +#endif + +#include #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/where_index_op.h" @@ -25,52 +33,124 @@ namespace operators { using CUDADeviceContext = paddle::platform::CUDADeviceContext; template -class CUDAWhereIndexKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* condition = context.Input("Condition"); - auto* out = context.Output("Out"); - - // TODO(zhoukunsheng): Should optimize to ensure GPU is faster than CPU. - framework::Tensor cond_cpu; - framework::TensorCopy(*condition, platform::CPUPlace(), &cond_cpu); - - const T* cond_data = cond_cpu.data(); - int64_t numel = cond_cpu.numel(); - auto dims = cond_cpu.dims(); - int rank = dims.size(); - - thrust::host_vector h_true_index; - for (int64_t i = 0; i < numel; i++) { - if (static_cast(cond_data[i])) { - h_true_index.push_back(i); +__global__ void GetTrueNum(const T *cond_data, const int64_t numel, + int64_t *true_num_array) { + const int64_t tid = blockIdx.x * blockDim.x + threadIdx.x; + + for (int64_t idx = tid; idx < numel; idx += gridDim.x * blockDim.x) { + true_num_array[idx] = + static_cast(static_cast(cond_data[idx])); + } +} + +template +__global__ void SetTrueIndex(int64_t *out_ptr, const T *cond_data, + const int64_t numel, const int64_t *stride_array, + const int64_t rank, + const int64_t *true_num_array) { + const int64_t tid = blockIdx.x * blockDim.x + threadIdx.x; + + for (int64_t idx = tid; idx < numel; idx += gridDim.x * blockDim.x) { + // true_num_array is calculated by cub::InclusiveSum, + // cause the first element of true_num_array is 1, + // so we need substract 1 to get true index. + const int64_t true_index = true_num_array[idx] - 1; + if (static_cast(cond_data[idx])) { + int64_t rank_index = idx; + for (int j = 0; j < rank; j++) { + const int64_t out_index = rank_index / stride_array[j]; + out_ptr[true_index * rank + j] = out_index; + rank_index -= out_index * stride_array[j]; } } - thrust::device_vector d_true_index = h_true_index; - int64_t* ptr_true_index = thrust::raw_pointer_cast(d_true_index.data()); - - size_t true_num = h_true_index.size(); + } +} +template +class CUDAWhereIndexKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &context) const override { + auto *condition = context.Input("Condition"); + auto *out = context.Output("Out"); + auto &dev_ctx = context.template device_context(); + + const T *cond_data = condition->data(); + const int64_t numel = condition->numel(); + auto dims = condition->dims(); + const int rank = dims.size(); + + auto d_array_mem = memory::Alloc(dev_ctx, (numel + rank) * sizeof(int64_t)); + auto h_array_mem = + memory::Alloc(platform::CPUPlace(), (rank + 1) * sizeof(int64_t)); + + // "stride_array" is an array and len(stride_array)==rank, + // each element is the stride of each dimension -- the length from i to i+1. + int64_t *h_stride_array = reinterpret_cast(h_array_mem->ptr()); + int64_t *d_stride_array = reinterpret_cast(d_array_mem->ptr()); + + // "true_num_array" is an array and len(stride_array)==numel, + // at the beginning, + // "true_num_array" will set 1 if condition[i] == true else 0, + // then it will be calculated by cub::InclusiveSum, + // so that we can get the true number before i as the out index + int64_t *d_true_num_array = d_stride_array + rank; + + // the total_true_num is the total number of condition[i] == true + int64_t *h_total_true_num = h_stride_array + rank; + + // alloce cub memory + size_t cub_size = 0; + cub::DeviceScan::InclusiveSum(nullptr, cub_size, d_true_num_array, + d_true_num_array, numel, dev_ctx.stream()); + auto cub_mem = memory::Alloc(dev_ctx, cub_size * sizeof(int64_t)); + void *cub_data = cub_mem->ptr(); + + // set d_true_num_array[i]=1 if cond_data[i]==true else 0 + const int threads = std::min(numel, static_cast(128)); + const int64_t need_grids = (numel + threads - 1) / threads; + const int grids = std::min(need_grids, static_cast(256)); + GetTrueNum<<>>(cond_data, numel, + d_true_num_array); + + // calculate the inclusive prefix sum of "true_num_array" + // to get the index of "out" tensor, + // and the total number of cond_data[i]==true. + // Example: + // condition: F T T F F F T T + // before: 0 1 1 0 0 0 1 1 + // after: 0 1 2 2 2 2 3 4 + // out: 1 2 6 7 + cub::DeviceScan::InclusiveSum(cub_data, cub_size, d_true_num_array, + d_true_num_array, numel, dev_ctx.stream()); + + // calculate each dimension's stride + h_stride_array[rank - 1] = 1; + for (int i = rank - 2; i >= 0; i--) { + h_stride_array[i] = h_stride_array[i + 1] * dims[i + 1]; + } + memory::Copy(BOOST_GET_CONST(platform::CUDAPlace, dev_ctx.GetPlace()), + d_stride_array, platform::CPUPlace(), h_stride_array, + rank * sizeof(int64_t), dev_ctx.stream()); + + // get total ture number and set output size + // the last element of cub::InclusiveSum is the total number + memory::Copy(platform::CPUPlace(), h_total_true_num, + BOOST_GET_CONST(platform::CUDAPlace, dev_ctx.GetPlace()), + d_true_num_array + numel - 1, sizeof(int64_t), + dev_ctx.stream()); + dev_ctx.Wait(); + + int64_t true_num = *h_total_true_num; out->Resize(framework::make_ddim({static_cast(true_num), rank})); - auto out_ptr = out->mutable_data(context.GetPlace()); + auto out_data = out->mutable_data(context.GetPlace()); if (true_num == 0) { return; } - thrust::host_vector h_stride(rank, 0); - h_stride[rank - 1] = 1; - for (int i = rank - 2; i >= 0; i--) { - h_stride[i] = h_stride[i + 1] * dims[i + 1]; - } - thrust::device_vector d_stride = h_stride; - int64_t* ptr_stride = thrust::raw_pointer_cast(d_stride.data()); - - auto& dev_ctx = context.template device_context(); - WhereIndexFunctor functor(ptr_true_index, true_num, ptr_stride, - rank, out_ptr); - platform::ForRange for_range(dev_ctx, true_num); - for_range(functor); + // using true_num_array and stride_array to calculate the output index + SetTrueIndex<<>>( + out_data, cond_data, numel, d_stride_array, rank, d_true_num_array); } }; -- GitLab From 1ec9525a02933b847232097ca1924345e5fb48a9 Mon Sep 17 00:00:00 2001 From: Zhou Wei <52485244+zhouwei25@users.noreply.github.com> Date: Mon, 26 Apr 2021 19:49:19 +0800 Subject: [PATCH 008/720] Fix OPENBLAS ci and fix windows CPU CI to parallel compile (#32548) * clear CUDA compile environment on windows * fix Windows CI * fix Windows CI * fix Windows CI --- CMakeLists.txt | 9 +++++---- paddle/scripts/paddle_build.bat | 13 ++++++++++-- tools/windows/run_unittests.sh | 35 ++++++++++++++++++++------------- 3 files changed, 37 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d13874f178..f30671bd3a8 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,9 +105,7 @@ if(WIN32) endforeach(flag_var) endif() - # NOTE(zhouwei25): temporarily change MP to 1 for reducing CPU & memory utilization - set(PROCESS_MAX 1) - #math(EXPR PROCESS_MAX "${CPU_CORES} * 1 / 2") + math(EXPR PROCESS_MAX "${CPU_CORES} * 2 / 3") # windows build turn off warnings, use parallel compiling. foreach(flag_var @@ -116,7 +114,10 @@ if(WIN32) CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}") - set(${flag_var} "${${flag_var}} /MP${PROCESS_MAX}") + # NOTE(zhouwei25): GPU compile have too high memory utilization when parallel compiling + if(NOT WITH_GPU) + set(${flag_var} "${${flag_var}} /MP${PROCESS_MAX}") + endif() endforeach(flag_var) foreach(flag_var CMAKE_CXX_FLAGS CMAKE_C_FLAGS) set(${flag_var} "${${flag_var}} /w") diff --git a/paddle/scripts/paddle_build.bat b/paddle/scripts/paddle_build.bat index 787f5297e74..439c8a4f241 100644 --- a/paddle/scripts/paddle_build.bat +++ b/paddle/scripts/paddle_build.bat @@ -150,7 +150,6 @@ rem ------pre install python requirement---------- where python where pip pip install wheel --user -pip install -r %work_dir%\python\unittest_py\requirements.txt --user pip install -r %work_dir%\python\requirements.txt --user if %ERRORLEVEL% NEQ 0 ( @@ -194,12 +193,15 @@ echo "Usage: paddle_build.bat [OPTION]" echo "OPTION:" echo "wincheck_mkl: run Windows MKL/GPU/UnitTest CI tasks on Windows" echo "wincheck_openbals: run Windows OPENBLAS/CPU CI tasks on Windows" +echo "build_avx_whl: build Windows avx whl package on Windows" +echo "build_no_avx_whl: build Windows no avx whl package on Windows" exit /b 1 rem ------PR CI windows check for MKL/GPU---------- :CASE_wincheck_mkl set WITH_MKL=ON set WITH_GPU=ON +set WITH_AVX=ON set MSVC_STATIC_CRT=OFF call :cmake || goto cmake_error @@ -212,8 +214,9 @@ goto:success rem ------PR CI windows check for OPENBLAS/CPU------ :CASE_wincheck_openblas -set WITH_MKL=ON +set WITH_MKL=OFF set WITH_GPU=OFF +set WITH_AVX=OFF set MSVC_STATIC_CRT=ON set retry_times=1 @@ -497,6 +500,12 @@ echo ======================================== echo Step 4. Running unit tests ... echo ======================================== +pip install -r %work_dir%\python\unittest_py\requirements.txt --user +if %ERRORLEVEL% NEQ 0 ( + echo pip install unittest requirements.txt failed! + exit /b 7 +) + for /F %%# in ('wmic os get localdatetime^|findstr 20') do set start=%%# set start=%start:~4,10% diff --git a/tools/windows/run_unittests.sh b/tools/windows/run_unittests.sh index 0aeea63d6ab..d2cefcc441f 100644 --- a/tools/windows/run_unittests.sh +++ b/tools/windows/run_unittests.sh @@ -47,7 +47,7 @@ if [ ${WITH_GPU:-OFF} == "ON" ];then fi -# /*==================Fixed Disabled Windows unittests==============================*/ +# /*==================Fixed Disabled Windows GPU MKL unittests==============================*/ # TODO: fix these unittest that is bound to fail diable_wingpu_test="^lite_mul_model_test$|\ ^test_analyzer_int8_resnet50$|\ @@ -118,16 +118,32 @@ diable_wingpu_test="^lite_mul_model_test$|\ ^diable_wingpu_test$" # /*============================================================================*/ +# /*==================Fixed Disabled Windows CPU OPENBLAS unittests==============================*/ +# TODO: fix these unittest that is bound to fail +diable_wincpu_test="^jit_kernel_test$|\ +^test_analyzer_transformer$|\ +^test_vision_models$|\ +^test_dygraph_multi_forward$|\ +^test_imperative_transformer_sorted_gradient$|\ +^test_program_prune_backward$|\ +^test_imperative_resnet$|\ +^test_imperative_resnet_sorted_gradient$|\ +^test_imperative_se_resnext$|\ +^test_imperative_static_runner_mnist$|\ +^test_bmn$|\ +^test_mobile_net$|\ +^test_resnet_v2$|\ +^test_se_resnet$|\ +^diable_wincpu_test$" + # these unittest that cost long time, diabled temporarily, Maybe moved to the night long_time_test="^best_fit_allocator_test$|\ -^test_image_classification$|\ ^decorator_test$|\ ^test_dataset_cifar$|\ ^test_dataset_imdb$|\ ^test_dataset_movielens$|\ ^test_datasets$|\ ^test_pretrained_model$|\ -^test_concat_op$|\ ^test_elementwise_add_op$|\ ^test_elementwise_sub_op$|\ ^test_gather_op$|\ @@ -143,8 +159,6 @@ long_time_test="^best_fit_allocator_test$|\ ^test_bicubic_interp_op$|\ ^test_bicubic_interp_v2_op$|\ ^test_bilinear_interp_v2_op$|\ -^test_conv2d_op$|\ -^test_conv3d_op$| ^test_conv3d_transpose_part2_op$|\ ^test_conv_nn_grad$|\ ^test_crop_tensor_op$|\ @@ -158,7 +172,6 @@ long_time_test="^best_fit_allocator_test$|\ ^test_empty_op$|\ ^test_fused_elemwise_activation_op$|\ ^test_group_norm_op$|\ -^test_gru_op$|\ ^test_gru_unit_op$|\ ^test_imperative_lod_tensor_to_selected_rows$|\ ^test_imperative_optimizer$|\ @@ -206,14 +219,8 @@ long_time_test="^best_fit_allocator_test$|\ ^test_imperative_auto_mixed_precision$|\ ^test_imperative_optimizer_v2$|\ ^test_imperative_ptb_rnn_sorted_gradient$|\ -^test_imperative_save_load_v2$|\ -^test_nan_inf$|\ -^test_norm_op$|\ -^test_reduce_op$|\ ^test_sigmoid_cross_entropy_with_logits_op$|\ -^test_stack_op$|\ -^test_strided_slice_op$|\ -^test_transpose_op$" +^test_strided_slice_op$" if [ ${WITH_GPU:-OFF} == "ON" ];then export FLAGS_call_stack_level=2 @@ -267,7 +274,7 @@ function collect_failed_tests() { function run_unittest_cpu() { tmpfile=$tmp_dir/$RANDOM - (ctest -E "${disable_ut_quickly}" -LE "${nightly_label}" --output-on-failure -C Release -j 8 | tee $tmpfile) & + (ctest -E "$disable_ut_quickly|$diable_wincpu_test" -LE "${nightly_label}" --output-on-failure -C Release -j 8 | tee $tmpfile) & wait; } -- GitLab From fcd18ef11020fbc30708ea3748390b33f53770a2 Mon Sep 17 00:00:00 2001 From: zhangchunle Date: Mon, 26 Apr 2021 20:05:19 +0800 Subject: [PATCH 009/720] fix no-value-for-parameter in iscan (#32551) --- python/paddle/fluid/distributed/ps_instance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/distributed/ps_instance.py b/python/paddle/fluid/distributed/ps_instance.py index 61b2bcad01d..42033a0ada4 100644 --- a/python/paddle/fluid/distributed/ps_instance.py +++ b/python/paddle/fluid/distributed/ps_instance.py @@ -24,7 +24,7 @@ class PaddlePSInstance(object): instance = PaddlePSInstance(1, 2) """ - def __init__(self, server_worker_mode, proc_per_node): + def __init__(self, server_worker_mode=1, proc_per_node=2): self.dh = MPIHelper() self._rankid = self.dh.get_rank() self._server_worker_mode = server_worker_mode -- GitLab From 4ba49af5773818547859b1be6e070fbba8f8f4db Mon Sep 17 00:00:00 2001 From: ShenLiang <1422485404@qq.com> Date: Mon, 26 Apr 2021 20:06:50 +0800 Subject: [PATCH 010/720] add barrier for new group (#32572) --- python/paddle/distributed/collective.py | 83 +++++++++++++------------ 1 file changed, 42 insertions(+), 41 deletions(-) diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index c0feadb6883..8b1b6dc0ff8 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -160,6 +160,46 @@ def get_group(id=0): return gm[group] if group in gm else None +def barrier(group=None): + """ + + Barrier among all participators in the group. + + Args: + group (Group): The group instance return by new_group or None for global default group. + + Returns: + None. + + Examples: + .. code-block:: python + + import paddle + from paddle.distributed import init_parallel_env + + paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) + init_parallel_env() + paddle.distributed.barrier() + """ + if group is not None and not group.is_member(): + return + + ring_id = 0 if group is None else group.id + + op_type = 'barrier' + temp = fill_constant([1], dtype="int32", value="1") + if in_dygraph_mode(): + return core.ops.barrier(temp, temp, 'ring_id', ring_id) + if not isinstance(ring_id, int): + raise ValueError("The type of 'group' for barrier must be int.") + helper = LayerHelper(op_type, **locals()) + helper.append_op( + type=op_type, + inputs={'X': [temp]}, + outputs={'Out': [temp]}, + attrs={'ring_id': ring_id}) + + def new_group(ranks=None, backend=None): """ @@ -220,7 +260,8 @@ def new_group(ranks=None, backend=None): core.NCCLParallelContext(strategy, place).init_with_ring_id(ring_id) else: assert False, ("no cuda device found") - + # need to barrier to construct group + barrier(gp) return gp @@ -838,46 +879,6 @@ def _mp_allreduce(tensor, raise NotImplementedError("No support _mp_allreduce in dygraph mode.") -def barrier(group=None): - """ - - Barrier among all participators in the group. - - Args: - group (Group): The group instance return by new_group or None for global default group. - - Returns: - None. - - Examples: - .. code-block:: python - - import paddle - from paddle.distributed import init_parallel_env - - paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) - init_parallel_env() - paddle.distributed.barrier() - """ - if group is not None and not group.is_member(): - return - - ring_id = 0 if group is None else group.id - - op_type = 'barrier' - temp = fill_constant([1], dtype="int32", value="1") - if in_dygraph_mode(): - return core.ops.barrier(temp, temp, 'ring_id', ring_id) - if not isinstance(ring_id, int): - raise ValueError("The type of 'group' for barrier must be int.") - helper = LayerHelper(op_type, **locals()) - helper.append_op( - type=op_type, - inputs={'X': [temp]}, - outputs={'Out': [temp]}, - attrs={'ring_id': ring_id}) - - def _parallel_linear(x, num_rows, num_cols, -- GitLab From a7be32ccbb1a669db3593ada31eaaffe0d508a10 Mon Sep 17 00:00:00 2001 From: WeiXin Date: Mon, 26 Apr 2021 21:06:03 +0800 Subject: [PATCH 011/720] deal with conflict. (#32578) --- python/paddle/fluid/tests/unittests/test_pylayer_op.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_pylayer_op.py b/python/paddle/fluid/tests/unittests/test_pylayer_op.py index 565ed992bc5..d329bf570a5 100644 --- a/python/paddle/fluid/tests/unittests/test_pylayer_op.py +++ b/python/paddle/fluid/tests/unittests/test_pylayer_op.py @@ -85,7 +85,8 @@ class TestPyLayer(unittest.TestCase): z2 = paddle.tanh(input2) + paddle.tanh(input4) z2.mean().backward() - self.assertTrue(np.max(np.abs((input1.grad - input2.grad))) < 1e-10) + self.assertTrue( + np.max(np.abs((input1.grad.numpy() - input2.grad.numpy()))) < 1e-10) def test_simple_pylayer_single_output(self): class tanh(PyLayer): -- GitLab From c47bafc62e539fa5d8dfc94d5484c9e286028eea Mon Sep 17 00:00:00 2001 From: lilong12 Date: Mon, 26 Apr 2021 23:06:45 +0800 Subject: [PATCH 012/720] add send/recv api (#32504) * add sendrecv, test=develop --- paddle/fluid/pybind/op_function_generator.cc | 2 + python/paddle/distributed/collective.py | 102 ++++++++++++++++++ .../fluid/tests/unittests/CMakeLists.txt | 2 + .../unittests/collective_sendrecv_api.py | 60 +++++++++++ .../collective_sendrecv_api_dygraph.py | 54 ++++++++++ .../unittests/test_collective_api_base.py | 35 ++++-- .../unittests/test_collective_sendrecv_api.py | 44 ++++++++ 7 files changed, 288 insertions(+), 11 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/collective_sendrecv_api.py create mode 100644 python/paddle/fluid/tests/unittests/collective_sendrecv_api_dygraph.py create mode 100644 python/paddle/fluid/tests/unittests/test_collective_sendrecv_api.py diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc index 237cec13a80..a340d7a0f00 100644 --- a/paddle/fluid/pybind/op_function_generator.cc +++ b/paddle/fluid/pybind/op_function_generator.cc @@ -44,6 +44,7 @@ std::map> op_ins_map = { {"gru_unit", {"Input", "HiddenPrev", "Weight", "Bias"}}, {"label_smooth", {"X", "PriorDist"}}, {"assign", {"X"}}, + {"send_v2", {"X"}}, {"reshape2", {"X", "Shape"}}, {"expand", {"X", "ExpandTimes"}}, {"slice", {"Input", "StartsTensor", "EndsTensor"}}, @@ -123,6 +124,7 @@ std::map> op_passing_outs_map = { {"sync_batch_norm", {"MeanOut", "VarianceOut"}}, {"accuracy", {"Correct", "Total"}}, {"fill_constant", {"Out"}}, + {"recv_v2", {"Out"}}, {"matmul", {"Out"}}, {"c_broadcast", {"Out"}}, {"c_sync_calc_stream", {"Out"}}, diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index 8b1b6dc0ff8..7fb9e1d0455 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -37,6 +37,8 @@ __all__ = [ 'barrier', 'split', 'ReduceOp', + 'send', + 'recv', ] @@ -1170,3 +1172,103 @@ def split(x, name=name, group=None) return linear_out + + +def send(tensor, dst=0, group=None, use_calc_stream=True): + """ + Send a tensor to the receiver. + + Args: + tensor (Tensor): The Tensor to send. Its data type + should be float16, float32, float64, int32 or int64. + dst (int): The destination rank id. + group (Group): The group instance return by new_group or None for global default group. + use_calc_stream (bool): Whether to use calculate stream or communication stream. + Returns: + None. + + Examples: + .. code-block:: python + import paddle + #from paddle.distributed import init_parallel_env + #init_parallel_env() + #if paddle.distributed.ParallelEnv().rank == 0: + # data = paddle.to_tensor([7, 8, 9]) + # paddle.distributed.send(data, dst=1) + #else: + # data = paddle.to_tensor([1,2,3]) + # paddle.distributed.recv(data, src=0) + #out = data.numpy() + """ + if group is not None and not group.is_member(): + return + ring_id = 0 if group is None else group.id + + op_type = 'send_v2' + if in_dygraph_mode(): + return core.ops.send_v2(tensor, 'use_calc_stream', use_calc_stream, + 'ring_id', ring_id, 'peer', dst) + check_variable_and_dtype( + tensor, 'tensor', ['float16', 'float32', 'float64', 'int32', 'int64'], + 'send') + + helper = LayerHelper(op_type, **locals()) + helper.append_op( + type=op_type, + inputs={'X': [tensor]}, + attrs={ + 'ring_id': ring_id, + 'peer': dst, + 'use_calc_stream': use_calc_stream, + }) + + +def recv(tensor, src=0, group=None, use_calc_stream=True): + """ + Receive a tensor to the sender. + + Args: + tensor (Tensor): The Tensor to receive. Its data type + should be float16, float32, float64, int32 or int64. + src (int): The source rank id. + group (Group): The group instance return by new_group or None for global default group. + use_calc_stream (bool): Whether to use calculate stream or communication stream. + Returns: + None. + + Examples: + .. code-block:: python + import paddle + #from paddle.distributed import init_parallel_env + #init_parallel_env() + #if paddle.distributed.ParallelEnv().rank == 0: + # data = paddle.to_tensor([7, 8, 9]) + # paddle.distributed.send(data, dst=1) + #else: + # data = paddle.to_tensor([1,2,3]) + # paddle.distributed.recv(data, src=0) + #out = data.numpy() + """ + if group is not None and not group.is_member(): + return + ring_id = 0 if group is None else group.id + + op_type = 'recv_v2' + if in_dygraph_mode(): + return core.ops.recv_v2(tensor, 'use_calc_stream', use_calc_stream, + 'ring_id', ring_id, 'peer', src, 'dtype', + tensor.dtype, 'out_shape', tensor.shape) + check_variable_and_dtype( + tensor, 'tensor', ['float16', 'float32', 'float64', 'int32', 'int64'], + 'recv') + helper = LayerHelper(op_type, **locals()) + helper.append_op( + type=op_type, + outputs={'Out': [tensor]}, + attrs={ + 'ring_id': ring_id, + 'peer': src, + 'out_shape': tensor.shape, + 'dtype': tensor.dtype, + 'use_calc_stream': use_calc_stream, + }) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 3bf96944edb..c1a29c050b1 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -96,6 +96,7 @@ if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32) LIST(REMOVE_ITEM TEST_OPS test_new_group_api) LIST(REMOVE_ITEM TEST_OPS test_collective_broadcast_api) LIST(REMOVE_ITEM TEST_OPS test_collective_allgather_api) + LIST(REMOVE_ITEM TEST_OPS test_collective_sendrecv_api) LIST(REMOVE_ITEM TEST_OPS test_collective_wait) LIST(REMOVE_ITEM TEST_OPS test_memcpy_op) endif() @@ -871,6 +872,7 @@ if(WITH_DISTRIBUTE AND WITH_GPU AND WITH_NCCL) endif() if((WITH_ROCM OR WITH_GPU) AND NOT WIN32) set_tests_properties(test_collective_allgather_api PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_sendrecv_api PROPERTIES TIMEOUT 120) set_tests_properties(test_collective_broadcast_api PROPERTIES TIMEOUT 120) set_tests_properties(test_collective_allreduce_api PROPERTIES TIMEOUT 120) set_tests_properties(test_new_group_api PROPERTIES TIMEOUT 120) diff --git a/python/paddle/fluid/tests/unittests/collective_sendrecv_api.py b/python/paddle/fluid/tests/unittests/collective_sendrecv_api.py new file mode 100644 index 00000000000..551537a0ea4 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/collective_sendrecv_api.py @@ -0,0 +1,60 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import argparse +import os +import sys +import signal +import time +import socket +from contextlib import closing +from six import string_types +import math +import paddle +import paddle.fluid as fluid +import paddle.fluid.profiler as profiler +import paddle.fluid.unique_name as nameGen +from paddle.fluid import core +import unittest +from multiprocessing import Process +import paddle.fluid.layers as layers +from functools import reduce +from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main + +paddle.enable_static() + + +class TestCollectiveSendRecvAPI(TestCollectiveAPIRunnerBase): + def __init__(self): + self.global_ring_id = 0 + + def get_model(self, main_prog, startup_program, rank): + with fluid.program_guard(main_prog, startup_program): + tindata = layers.data( + name="tindata", + shape=[10, 1000], + dtype='float32', + append_batch_size=False) + if rank == 0: + paddle.distributed.send(tindata, dst=1) + else: + paddle.distributed.recv(tindata, src=0) + return [tindata] + + +if __name__ == "__main__": + runtime_main(TestCollectiveSendRecvAPI, "sendrecv") diff --git a/python/paddle/fluid/tests/unittests/collective_sendrecv_api_dygraph.py b/python/paddle/fluid/tests/unittests/collective_sendrecv_api_dygraph.py new file mode 100644 index 00000000000..10028488e85 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/collective_sendrecv_api_dygraph.py @@ -0,0 +1,54 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import argparse +import os +import sys +import signal +import time +import socket +from contextlib import closing +from six import string_types +import math +import paddle +import paddle.fluid as fluid +import paddle.fluid.profiler as profiler +import paddle.fluid.unique_name as nameGen +from paddle.fluid import core +import unittest +from multiprocessing import Process +import paddle.fluid.layers as layers +from functools import reduce +from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main + + +class TestCollectiveSendRecvAPI(TestCollectiveAPIRunnerBase): + def __init__(self): + self.global_ring_id = 0 + + def get_model(self, main_prog, startup_program, rank, indata=None): + with fluid.program_guard(main_prog, startup_program): + tindata = paddle.to_tensor(indata) + if rank == 0: + paddle.distributed.send(tindata, dst=1) + else: + paddle.distributed.recv(tindata, src=0) + return [tindata.numpy()] + + +if __name__ == "__main__": + runtime_main(TestCollectiveSendRecvAPI, "sendrecv") diff --git a/python/paddle/fluid/tests/unittests/test_collective_api_base.py b/python/paddle/fluid/tests/unittests/test_collective_api_base.py index ad85adb2d51..832ffafa85e 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_api_base.py +++ b/python/paddle/fluid/tests/unittests/test_collective_api_base.py @@ -33,7 +33,7 @@ from paddle.fluid import core class TestCollectiveAPIRunnerBase(object): - def get_model(self, train_prog, startup_prog, rank): + def get_model(self, train_prog, startup_prog, rank, indata=None): raise NotImplementedError( "get model should be implemented by child class.") @@ -44,7 +44,6 @@ class TestCollectiveAPIRunnerBase(object): rank = args["trainerid"] current_endpoint = args["currentendpoint"] nranks = 2 - result = self.get_model(train_prog, startup_prog, rank) paddle.distributed.init_parallel_env() if args['backend'] == 'nccl': device_id = int(os.getenv("FLAGS_selected_gpus", "0")) @@ -55,16 +54,21 @@ class TestCollectiveAPIRunnerBase(object): place = fluid.XPUPlace(device_id) else: place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_prog) np.random.seed(os.getpid()) indata = np.random.random((10, 1000)).astype("float32") - fetch_list = [] - for elem in result: - fetch_list.append(elem.name) - out = exe.run(train_prog, - feed={'tindata': indata}, - fetch_list=fetch_list) + if args['static_mode']: + result = self.get_model(train_prog, startup_prog, rank) + exe = fluid.Executor(place) + exe.run(startup_prog) + fetch_list = [] + for elem in result: + fetch_list.append(elem.name) + out = exe.run(train_prog, + feed={'tindata': indata}, + fetch_list=fetch_list) + else: + out = self.get_model(train_prog, startup_prog, rank, indata) + #print(out, sys.stderr) if six.PY2: print(pickle.dumps(out)) else: @@ -81,6 +85,7 @@ def runtime_main(test_class, col_type): args["col_type"] = col_type args["backend"] = os.getenv("BACKEND") args["path_id"] = int(os.getenv("PATH_ID")) + args["static_mode"] = int(os.getenv("STATIC_MODE")) model.run_trainer(args) @@ -186,6 +191,7 @@ class TestDistBase(unittest.TestCase): col_type, backend="nccl", path_id="0", + static_mode="1", check_error_log=False, need_envs={}): if backend == "nccl" or backend == "bkcl": @@ -199,8 +205,10 @@ class TestDistBase(unittest.TestCase): "PYTHONPATH": os.getenv("PYTHONPATH", ""), "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""), "LD_PRELOAD": os.getenv("LD_PRELOAD", ""), - "GLOG_v": "0", + "FLAGS_call_stack_level": "2", + "GLOG_v": "3", "NCCL_P2P_DISABLE": "1", + "STATIC_MODE": static_mode, "PADDLE_WITH_GLOO": with_gloo, "BACKEND": backend, "PATH_ID": path_id @@ -269,5 +277,10 @@ class TestDistBase(unittest.TestCase): self.assertTrue( np.allclose( result_data, need_result, rtol=1e-05, atol=1e-05)) + elif col_type == "sendrecv": + result_data = tr1_out[0] + self.assertTrue( + np.allclose( + input1, result_data, rtol=1e-05, atol=1e-05)) else: pass diff --git a/python/paddle/fluid/tests/unittests/test_collective_sendrecv_api.py b/python/paddle/fluid/tests/unittests/test_collective_sendrecv_api.py new file mode 100644 index 00000000000..f1d5ec1300e --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_collective_sendrecv_api.py @@ -0,0 +1,44 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +import numpy as np +import paddle + +from test_collective_api_base import TestDistBase + +paddle.enable_static() + + +class TestCollectiveSendRecvAPI(TestDistBase): + def _setup_config(self): + pass + + #def test_sendrecv_nccl(self): + # if paddle.fluid.core.is_compiled_with_cuda(): + # self.check_with_place("collective_sendrecv_api.py", "sendrecv", + # "nccl") + + def test_sendrecv_nccl_dygraph(self): + if paddle.fluid.core.is_compiled_with_cuda(): + self.check_with_place( + "collective_sendrecv_api_dygraph.py", + "sendrecv", + "nccl", + static_mode='0') + + +if __name__ == '__main__': + unittest.main() -- GitLab From 0bc97e92b83c66104df6f48e357b8543def1e72c Mon Sep 17 00:00:00 2001 From: zhiboniu <31800336+zhiboniu@users.noreply.github.com> Date: Tue, 27 Apr 2021 09:48:13 +0800 Subject: [PATCH 013/720] update 2.0 public api in utils (#32008) --- python/paddle/utils/__init__.py | 36 ++++++++++--------- python/paddle/utils/cpp_extension/__init__.py | 26 +++++++------- python/paddle/utils/download.py | 2 -- python/paddle/utils/install_check.py | 2 -- python/paddle/utils/op_version.py | 2 -- python/paddle/utils/profiler.py | 19 ++++++++-- 6 files changed, 50 insertions(+), 37 deletions(-) diff --git a/python/paddle/utils/__init__.py b/python/paddle/utils/__init__.py index d32fa4c88c4..40c9d415e11 100644 --- a/python/paddle/utils/__init__.py +++ b/python/paddle/utils/__init__.py @@ -12,21 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .profiler import ProfilerOptions -from .profiler import Profiler -from .profiler import get_profiler -from .deprecated import deprecated -from .lazy_import import try_import -from .op_version import OpLastCheckpointChecker -from .install_check import run_check -from ..fluid.framework import unique_name -from ..fluid.framework import require_version +from .profiler import ProfilerOptions # noqa: F401 +from .profiler import Profiler # noqa: F401 +from .profiler import get_profiler # noqa: F401 +from .deprecated import deprecated # noqa: F401 +from .lazy_import import try_import # noqa: F401 +from .op_version import OpLastCheckpointChecker # noqa: F401 +from .install_check import run_check # noqa: F401 +from ..fluid.framework import unique_name # noqa: F401 +from ..fluid.framework import require_version # noqa: F401 -from . import download +from . import download # noqa: F401 +from . import image_util # noqa: F401 +from . import cpp_extension # noqa: F401 -from . import cpp_extension - -__all__ = ['dump_config', 'deprecated', 'download', 'run_check'] - -#TODO: define new api under this directory -__all__ += ['unique_name', 'require_version'] +__all__ = [ #noqa + 'deprecated', + 'download', + 'run_check', + 'unique_name', + 'require_version', + 'try_import' +] diff --git a/python/paddle/utils/cpp_extension/__init__.py b/python/paddle/utils/cpp_extension/__init__.py index 130ab79b303..cef2716b7f3 100644 --- a/python/paddle/utils/cpp_extension/__init__.py +++ b/python/paddle/utils/cpp_extension/__init__.py @@ -12,18 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .cpp_extension import CUDAExtension -from .cpp_extension import CppExtension -from .cpp_extension import BuildExtension -from .cpp_extension import load, setup +from .cpp_extension import CUDAExtension # noqa: F401 +from .cpp_extension import CppExtension # noqa: F401 +from .cpp_extension import BuildExtension # noqa: F401 +from .cpp_extension import load # noqa: F401 +from .cpp_extension import setup # noqa: F401 -from .extension_utils import parse_op_info -from .extension_utils import get_build_directory -from .extension_utils import load_op_meta_info_and_register_op +from .extension_utils import parse_op_info # noqa: F401 +from .extension_utils import get_build_directory # noqa: F401 +from .extension_utils import load_op_meta_info_and_register_op # noqa: F401 -from . import cpp_extension -from . import extension_utils - -__all__ = [ - 'CppExtension', 'CUDAExtension', 'load', 'setup', 'get_build_directory' +__all__ = [ #noqa + 'CppExtension', + 'CUDAExtension', + 'load', + 'setup', + 'get_build_directory' ] diff --git a/python/paddle/utils/download.py b/python/paddle/utils/download.py index dda8abeff21..bd70013e112 100644 --- a/python/paddle/utils/download.py +++ b/python/paddle/utils/download.py @@ -55,8 +55,6 @@ except: import logging logger = logging.getLogger(__name__) -__all__ = ['get_weights_path_from_url'] - WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/hapi/weights") DOWNLOAD_RETRY_LIMIT = 3 diff --git a/python/paddle/utils/install_check.py b/python/paddle/utils/install_check.py index b39009985e7..5d70cf61007 100644 --- a/python/paddle/utils/install_check.py +++ b/python/paddle/utils/install_check.py @@ -20,8 +20,6 @@ import numpy as np import paddle -__all__ = ['run_check'] - def _simple_network(): """ diff --git a/python/paddle/utils/op_version.py b/python/paddle/utils/op_version.py index 68acc9de081..a1fa230d64f 100644 --- a/python/paddle/utils/op_version.py +++ b/python/paddle/utils/op_version.py @@ -14,8 +14,6 @@ from ..fluid import core -__all__ = ['OpLastCheckpointChecker'] - def Singleton(cls): _instance = {} diff --git a/python/paddle/utils/profiler.py b/python/paddle/utils/profiler.py index 89c0d2cac68..cc33342ec5a 100644 --- a/python/paddle/utils/profiler.py +++ b/python/paddle/utils/profiler.py @@ -18,9 +18,22 @@ import sys import warnings from ..fluid import core -from ..fluid.profiler import * - -__all__ = ['ProfilerOptions', 'Profiler', 'get_profiler'] +from ..fluid.profiler import cuda_profiler # noqa: F401 +from ..fluid.profiler import start_profiler +from ..fluid.profiler import profiler # noqa: F401 +from ..fluid.profiler import stop_profiler +from ..fluid.profiler import reset_profiler + +__all__ = [ #noqa + 'Profiler', + 'get_profiler', + 'ProfilerOptions', + 'cuda_profiler', + 'start_profiler', + 'profiler', + 'stop_profiler', + 'reset_profiler' +] class ProfilerOptions(object): -- GitLab From f1bc322c92eae17a4245a575a40ceedc54951a22 Mon Sep 17 00:00:00 2001 From: zhiboniu <31800336+zhiboniu@users.noreply.github.com> Date: Tue, 27 Apr 2021 09:49:00 +0800 Subject: [PATCH 014/720] update 2.0 public api in tensor (#32026) --- python/paddle/fluid/dygraph/math_op_patch.py | 8 +- python/paddle/fluid/layers/math_op_patch.py | 8 +- python/paddle/tensor/__init__.py | 506 +++++++++++-------- python/paddle/tensor/attribute.py | 6 +- python/paddle/tensor/creation.py | 28 +- python/paddle/tensor/linalg.py | 21 +- python/paddle/tensor/logic.py | 31 +- python/paddle/tensor/manipulation.py | 47 +- python/paddle/tensor/math.py | 135 ++--- python/paddle/tensor/random.py | 12 - python/paddle/tensor/search.py | 19 +- python/paddle/tensor/stat.py | 2 - python/paddle/tensor/tensor.py | 6 - python/paddle/tensor/to_string.py | 2 - 14 files changed, 367 insertions(+), 464 deletions(-) diff --git a/python/paddle/fluid/dygraph/math_op_patch.py b/python/paddle/fluid/dygraph/math_op_patch.py index 41cce6a0858..e39fc3e23fe 100644 --- a/python/paddle/fluid/dygraph/math_op_patch.py +++ b/python/paddle/fluid/dygraph/math_op_patch.py @@ -325,13 +325,7 @@ def monkey_patch_math_varbase(): else: import paddle.tensor # Tensor method from module paddle.tensor - tensor_methods = paddle.tensor.linalg.__all__ + \ - paddle.tensor.math.__all__ + \ - paddle.tensor.logic.__all__ + \ - paddle.tensor.manipulation.__all__ + \ - paddle.tensor.search.__all__ + \ - paddle.tensor.stat.__all__ + \ - paddle.tensor.attribute.__all__ + tensor_methods = paddle.tensor.tensor_method_func for method_name in tensor_methods: if hasattr(core.VarBase, method_name): continue method_impl = getattr(paddle.tensor, method_name, None) diff --git a/python/paddle/fluid/layers/math_op_patch.py b/python/paddle/fluid/layers/math_op_patch.py index a68331b156b..a2dee91dbef 100644 --- a/python/paddle/fluid/layers/math_op_patch.py +++ b/python/paddle/fluid/layers/math_op_patch.py @@ -370,13 +370,7 @@ def monkey_patch_variable(): setattr(Variable, method_name, method_impl) else: import paddle.tensor - variabel_methods = paddle.tensor.linalg.__all__ + \ - paddle.tensor.math.__all__ + \ - paddle.tensor.logic.__all__ + \ - paddle.tensor.manipulation.__all__ + \ - paddle.tensor.search.__all__ + \ - paddle.tensor.stat.__all__ + \ - paddle.tensor.attribute.__all__ + variabel_methods = paddle.tensor.tensor_method_func for method_name in variabel_methods: if hasattr(Variable, method_name): continue method_impl = getattr(paddle.tensor, method_name, None) diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py index 0a75f6fd7ba..c863f2b86a5 100755 --- a/python/paddle/tensor/__init__.py +++ b/python/paddle/tensor/__init__.py @@ -11,205 +11,315 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function -#from .math import * -#from .creation import * -#from .linalg import * +from .attribute import rank # noqa: F401 +from .attribute import shape # noqa: F401 +from .attribute import real # noqa: F401 +from .attribute import imag # noqa: F401 +from .creation import to_tensor # noqa: F401 +from .creation import diag # noqa: F401 +from .creation import eye # noqa: F401 +from .creation import linspace # noqa: F401 +from .creation import ones # noqa: F401 +from .creation import ones_like # noqa: F401 +from .creation import zeros # noqa: F401 +from .creation import zeros_like # noqa: F401 +from .creation import arange # noqa: F401 +from .creation import eye # noqa: F401 +from .creation import full # noqa: F401 +from .creation import full_like # noqa: F401 +from .creation import triu # noqa: F401 +from .creation import tril # noqa: F401 +from .creation import meshgrid # noqa: F401 +from .creation import empty # noqa: F401 +from .creation import empty_like # noqa: F401 +from .linalg import matmul # noqa: F401 +from .linalg import dot # noqa: F401 +from .linalg import norm # noqa: F401 +from .linalg import transpose # noqa: F401 +from .linalg import dist # noqa: F401 +from .linalg import t # noqa: F401 +from .linalg import cross # noqa: F401 +from .linalg import cholesky # noqa: F401 +from .linalg import bmm # noqa: F401 +from .linalg import histogram # noqa: F401 +from .linalg import mv # noqa: F401 +from .logic import equal # noqa: F401 +from .logic import greater_equal # noqa: F401 +from .logic import greater_than # noqa: F401 +from .logic import is_empty # noqa: F401 +from .logic import less_equal # noqa: F401 +from .logic import less_than # noqa: F401 +from .logic import logical_and # noqa: F401 +from .logic import logical_not # noqa: F401 +from .logic import logical_or # noqa: F401 +from .logic import logical_xor # noqa: F401 +from .logic import not_equal # noqa: F401 +from .logic import allclose # noqa: F401 +from .logic import equal_all # noqa: F401 +from .logic import is_tensor # noqa: F401 +from .manipulation import cast # noqa: F401 +from .manipulation import concat # noqa: F401 +from .manipulation import expand # noqa: F401 +from .manipulation import broadcast_to # noqa: F401 +from .manipulation import expand_as # noqa: F401 +from .manipulation import tile # noqa: F401 +from .manipulation import flatten # noqa: F401 +from .manipulation import gather # noqa: F401 +from .manipulation import gather_nd # noqa: F401 +from .manipulation import reshape # noqa: F401 +from .manipulation import reshape_ # noqa: F401 +from .manipulation import flip as reverse # noqa: F401 +from .manipulation import scatter # noqa: F401 +from .manipulation import scatter_ # noqa: F401 +from .manipulation import scatter_nd_add # noqa: F401 +from .manipulation import scatter_nd # noqa: F401 +from .manipulation import shard_index # noqa: F401 +from .manipulation import slice # noqa: F401 +from .manipulation import split # noqa: F401 +from .manipulation import squeeze # noqa: F401 +from .manipulation import squeeze_ # noqa: F401 +from .manipulation import stack # noqa: F401 +from .manipulation import strided_slice # noqa: F401 +from .manipulation import transpose # noqa: F401 +from .manipulation import unique # noqa: F401 +from .manipulation import unsqueeze # noqa: F401 +from .manipulation import unsqueeze_ # noqa: F401 +from .manipulation import unstack # noqa: F401 +from .manipulation import flip # noqa: F401 +from .manipulation import unbind # noqa: F401 +from .manipulation import roll # noqa: F401 +from .manipulation import chunk # noqa: F401 +from .math import abs # noqa: F401 +from .math import acos # noqa: F401 +from .math import asin # noqa: F401 +from .math import atan # noqa: F401 +from .math import ceil # noqa: F401 +from .math import cos # noqa: F401 +from .math import tan # noqa: F401 +from .math import cosh # noqa: F401 +from .math import cumsum # noqa: F401 +from .math import exp # noqa: F401 +from .math import floor # noqa: F401 +from .math import increment # noqa: F401 +from .math import log # noqa: F401 +from .math import multiplex # noqa: F401 +from .math import pow # noqa: F401 +from .math import reciprocal # noqa: F401 +from .math import round # noqa: F401 +from .math import rsqrt # noqa: F401 +from .math import scale # noqa: F401 +from .math import sign # noqa: F401 +from .math import sin # noqa: F401 +from .math import sinh # noqa: F401 +from .math import sqrt # noqa: F401 +from .math import square # noqa: F401 +from .math import stanh # noqa: F401 +from .math import sum # noqa: F401 +from .math import tanh # noqa: F401 +from .math import tanh_ # noqa: F401 +from .math import add_n # noqa: F401 +from .math import max # noqa: F401 +from .math import maximum # noqa: F401 +from .math import min # noqa: F401 +from .math import minimum # noqa: F401 +from .math import mm # noqa: F401 +from .math import divide # noqa: F401 +from .math import floor_divide # noqa: F401 +from .math import remainder # noqa: F401 +from .math import mod # noqa: F401 +from .math import floor_mod # noqa: F401 +from .math import multiply # noqa: F401 +from .math import add # noqa: F401 +from .math import subtract # noqa: F401 +from .math import atan # noqa: F401 +from .math import logsumexp # noqa: F401 +from .math import inverse # noqa: F401 +from .math import log2 # noqa: F401 +from .math import log10 # noqa: F401 +from .math import log1p # noqa: F401 +from .math import erf # noqa: F401 +from .math import addmm # noqa: F401 +from .math import clip # noqa: F401 +from .math import trace # noqa: F401 +from .math import kron # noqa: F401 +from .math import isfinite # noqa: F401 +from .math import isinf # noqa: F401 +from .math import isnan # noqa: F401 +from .math import prod # noqa: F401 +from .math import all # noqa: F401 +from .math import any # noqa: F401 +from .math import broadcast_shape # noqa: F401 +from .math import conj # noqa: F401 -# TODO: define alias in tensor and framework directory +from .random import multinomial # noqa: F401 +from .random import standard_normal # noqa: F401 +from .random import normal # noqa: F401 +from .random import uniform # noqa: F401 +from .random import randn # noqa: F401 +from .random import rand # noqa: F401 +from .random import randint # noqa: F401 +from .random import randperm # noqa: F401 +from .search import argmax # noqa: F401 +from .search import argmin # noqa: F401 +from .search import argsort # noqa: F401 +from .search import topk # noqa: F401 +from .search import where # noqa: F401 +from .search import index_select # noqa: F401 +from .search import nonzero # noqa: F401 +from .search import sort # noqa: F401 +from .search import index_sample # noqa: F401 +from .search import masked_select # noqa: F401 +from .stat import mean # noqa: F401 +from .stat import std # noqa: F401 +from .stat import var # noqa: F401 +from .stat import numel # noqa: F401 +from .stat import median # noqa: F401 +from .to_string import set_printoptions # noqa: F401 -from .random import randperm -from .attribute import rank #DEFINE_ALIAS -from .attribute import shape #DEFINE_ALIAS -from .attribute import real #DEFINE_ALIAS -from .attribute import imag #DEFINE_ALIAS -from .creation import to_tensor #DEFINE_ALIAS -from .creation import diag #DEFINE_ALIAS -from .creation import eye #DEFINE_ALIAS -# from .creation import fill_constant #DEFINE_ALIAS -# from .creation import get_tensor_from_selected_rows #DEFINE_ALIAS -from .creation import linspace #DEFINE_ALIAS -from .creation import ones #DEFINE_ALIAS -from .creation import ones_like #DEFINE_ALIAS -from .creation import zeros #DEFINE_ALIAS -from .creation import zeros_like #DEFINE_ALIAS -from .creation import arange #DEFINE_ALIAS -from .creation import eye #DEFINE_ALIAS -from .creation import full #DEFINE_ALIAS -from .creation import full_like #DEFINE_ALIAS -from .creation import triu #DEFINE_ALIAS -from .creation import tril #DEFINE_ALIAS -from .creation import meshgrid #DEFINE_ALIAS -from .creation import empty #DEFINE_ALIAS -from .creation import empty_like #DEFINE_ALIAS -from .linalg import matmul #DEFINE_ALIAS -from .linalg import dot #DEFINE_ALIAS -# from .linalg import einsum #DEFINE_ALIAS -from .linalg import norm #DEFINE_ALIAS -from .linalg import transpose #DEFINE_ALIAS -from .linalg import dist #DEFINE_ALIAS -from .linalg import t #DEFINE_ALIAS -from .linalg import cross #DEFINE_ALIAS -from .linalg import cholesky #DEFINE_ALIAS -# from .linalg import tensordot #DEFINE_ALIAS -from .linalg import bmm #DEFINE_ALIAS -from .linalg import histogram #DEFINE_ALIAS -from .linalg import mv #DEFINE_ALIAS -from .logic import equal #DEFINE_ALIAS -from .logic import greater_equal #DEFINE_ALIAS -from .logic import greater_than #DEFINE_ALIAS -from .logic import is_empty #DEFINE_ALIAS -#from .logic import isfinite #DEFINE_ALIAS -from .logic import less_equal #DEFINE_ALIAS -from .logic import less_than #DEFINE_ALIAS -from .logic import logical_and #DEFINE_ALIAS -from .logic import logical_not #DEFINE_ALIAS -from .logic import logical_or #DEFINE_ALIAS -from .logic import logical_xor #DEFINE_ALIAS -from .logic import not_equal #DEFINE_ALIAS -from .logic import allclose #DEFINE_ALIAS -from .logic import equal_all #DEFINE_ALIAS -# from .logic import isnan #DEFINE_ALIAS -from .logic import is_tensor #DEFINE_ALIAS -from .manipulation import cast #DEFINE_ALIAS -from .manipulation import concat #DEFINE_ALIAS -from .manipulation import expand #DEFINE_ALIAS -from .manipulation import broadcast_to #DEFINE_ALIAS -from .manipulation import expand_as #DEFINE_ALIAS -from .manipulation import tile #DEFINE_ALIAS -from .manipulation import flatten #DEFINE_ALIAS -from .manipulation import gather #DEFINE_ALIAS -from .manipulation import gather_nd #DEFINE_ALIAS -from .manipulation import reshape #DEFINE_ALIAS -from .manipulation import reshape_ #DEFINE_ALIAS -from .manipulation import flip as reverse #DEFINE_ALIAS -from .manipulation import scatter #DEFINE_ALIAS -from .manipulation import scatter_ #DEFINE_ALIAS -from .manipulation import scatter_nd_add #DEFINE_ALIAS -from .manipulation import scatter_nd #DEFINE_ALIAS -from .manipulation import shard_index #DEFINE_ALIAS -from .manipulation import slice #DEFINE_ALIAS -from .manipulation import split #DEFINE_ALIAS -from .manipulation import squeeze #DEFINE_ALIAS -from .manipulation import squeeze_ #DEFINE_ALIAS -from .manipulation import stack #DEFINE_ALIAS -from .manipulation import strided_slice #DEFINE_ALIAS -from .manipulation import transpose #DEFINE_ALIAS -from .manipulation import unique #DEFINE_ALIAS -from .manipulation import unsqueeze #DEFINE_ALIAS -from .manipulation import unsqueeze_ #DEFINE_ALIAS -from .manipulation import unstack #DEFINE_ALIAS -from .manipulation import flip #DEFINE_ALIAS -from .manipulation import unbind #DEFINE_ALIAS -from .manipulation import roll #DEFINE_ALIAS -from .manipulation import chunk #DEFINE_ALIAS -from .math import abs #DEFINE_ALIAS -from .math import acos #DEFINE_ALIAS -from .math import asin #DEFINE_ALIAS -from .math import atan #DEFINE_ALIAS -from .math import ceil #DEFINE_ALIAS -from .math import cos #DEFINE_ALIAS -from .math import tan #DEFINE_ALIAS -from .math import cosh #DEFINE_ALIAS -from .math import cumsum #DEFINE_ALIAS -# from .math import elementwise_add #DEFINE_ALIAS -# from .math import elementwise_div #DEFINE_ALIAS -# from .math import elementwise_floordiv #DEFINE_ALIAS -# from .math import elementwise_mul #DEFINE_ALIAS -# from .math import elementwise_mod #DEFINE_ALIAS -# from .math import elementwise_pow #DEFINE_ALIAS -# from .math import elementwise_sub #DEFINE_ALIAS -from .math import exp #DEFINE_ALIAS -from .math import floor #DEFINE_ALIAS -from .math import increment #DEFINE_ALIAS -from .math import log #DEFINE_ALIAS -from .math import multiplex #DEFINE_ALIAS -from .math import pow #DEFINE_ALIAS -from .math import reciprocal #DEFINE_ALIAS -# from .math import reduce_max #DEFINE_ALIAS -# from .math import reduce_min #DEFINE_ALIAS -# from .math import reduce_prod #DEFINE_ALIAS -# from .math import reduce_sum #DEFINE_ALIAS -from .math import round #DEFINE_ALIAS -from .math import rsqrt #DEFINE_ALIAS -from .math import scale #DEFINE_ALIAS -from .math import sign #DEFINE_ALIAS -from .math import sin #DEFINE_ALIAS -from .math import sinh #DEFINE_ALIAS -from .math import sqrt #DEFINE_ALIAS -from .math import square #DEFINE_ALIAS -from .math import stanh #DEFINE_ALIAS -from .math import sum #DEFINE_ALIAS -from .math import tanh #DEFINE_ALIAS -from .math import tanh_ #DEFINE_ALIAS -from .math import add_n #DEFINE_ALIAS -from .math import max #DEFINE_ALIAS -from .math import maximum #DEFINE_ALIAS -from .math import min #DEFINE_ALIAS -from .math import minimum #DEFINE_ALIAS -from .math import mm #DEFINE_ALIAS -from .math import divide #DEFINE_ALIAS -from .math import floor_divide #DEFINE_ALIAS -from .math import remainder #DEFINE_ALIAS -from .math import mod #DEFINE_ALIAS -from .math import floor_mod #DEFINE_ALIAS -from .math import multiply #DEFINE_ALIAS -from .math import add #DEFINE_ALIAS -from .math import subtract #DEFINE_ALIAS -from .math import atan #DEFINE_ALIAS -from .math import logsumexp #DEFINE_ALIAS -from .math import inverse #DEFINE_ALIAS -from .math import log2 #DEFINE_ALIAS -from .math import log10 #DEFINE_ALIAS -from .math import log1p #DEFINE_ALIAS -from .math import erf #DEFINE_ALIAS -from .math import addmm #DEFINE_ALIAS -from .math import clip #DEFINE_ALIAS -from .math import trace #DEFINE_ALIAS -from .math import kron #DEFINE_ALIAS -from .math import isfinite #DEFINE_ALIAS -from .math import isinf #DEFINE_ALIAS -from .math import isnan #DEFINE_ALIAS -from .math import prod #DEFINE_ALIAS -from .math import all #DEFINE_ALIAS -from .math import any #DEFINE_ALIAS -from .math import broadcast_shape #DEFINE_ALIAS -from .math import conj #DEFINE_ALIAS +from .array import array_length # noqa: F401 +from .array import array_read # noqa: F401 +from .array import array_write # noqa: F401 +from .array import create_array # noqa: F401 -from .random import multinomial #DEFINE_ALIAS -from .random import standard_normal -from .random import normal -from .random import uniform #DEFINE_ALIAS -from .random import randn #DEFINE_ALIAS -from .random import rand #DEFINE_ALIAS -from .random import randint #DEFINE_ALIAS -from .random import randperm #DEFINE_ALIAS -from .search import argmax #DEFINE_ALIAS -from .search import argmin #DEFINE_ALIAS -from .search import argsort #DEFINE_ALIAS -# from .search import has_inf #DEFINE_ALIAS -# from .search import has_nan #DEFINE_ALIAS -# from .search import masked_select #DEFINE_ALIAS -from .search import topk #DEFINE_ALIAS -from .search import where #DEFINE_ALIAS -from .search import index_select #DEFINE_ALIAS -from .search import nonzero #DEFINE_ALIAS -from .search import sort #DEFINE_ALIAS -from .search import index_sample #DEFINE_ALIAS -from .search import masked_select #DEFINE_ALIAS -from .stat import mean #DEFINE_ALIAS -# from .stat import reduce_mean #DEFINE_ALIAS -from .stat import std #DEFINE_ALIAS -from .stat import var #DEFINE_ALIAS -from .stat import numel #DEFINE_ALIAS -from .stat import median #DEFINE_ALIAS -# from .tensor import Tensor #DEFINE_ALIAS -# from .tensor import LoDTensor #DEFINE_ALIAS -# from .tensor import LoDTensorArray #DEFINE_ALIAS -from .to_string import set_printoptions #DEFINE_ALIAS - -from .array import array_length #DEFINE_ALIAS -from .array import array_read #DEFINE_ALIAS -from .array import array_write #DEFINE_ALIAS -from .array import create_array #DEFINE_ALIAS +#this list used in math_op_patch.py for _binary_creator_ +tensor_method_func = [ #noqa + 'matmul', + 'dot', + 'norm', + 'transpose', + 'dist', + 't', + 'cross', + 'cholesky', + 'bmm', + 'histogram', + 'mv', + 'abs', + 'acos', + 'all', + 'any', + 'asin', + 'atan', + 'ceil', + 'cos', + 'cosh', + 'cumsum', + 'exp', + 'floor', + 'increment', + 'log', + 'log2', + 'log10', + 'logsumexp', + 'mul', + 'multiplex', + 'pow', + 'prod', + 'reciprocal', + 'round', + 'rsqrt', + 'scale', + 'sign', + 'sin', + 'sinh', + 'sqrt', + 'square', + 'stanh', + 'sum', + 'tanh', + 'tanh_', + 'add_n', + 'max', + 'maximum', + 'min', + 'minimum', + 'mm', + 'divide', + 'floor_divide', + 'remainder', + 'mod', + 'floor_mod', + 'multiply', + 'add', + 'subtract', + 'atan', + 'logsumexp', + 'inverse', + 'log1p', + 'erf', + 'addmm', + 'clip', + 'trace', + 'kron', + 'isfinite', + 'isinf', + 'isnan', + 'broadcast_shape', + 'conj', + 'equal', + 'equal_all', + 'greater_equal', + 'greater_than', + 'is_empty', + 'less_equal', + 'less_than', + 'logical_and', + 'logical_not', + 'logical_or', + 'logical_xor', + 'not_equal', + 'allclose', + 'is_tensor', + 'cast', + 'concat', + 'expand', + 'broadcast_to', + 'expand_as', + 'flatten', + 'gather', + 'gather_nd', + 'reshape', + 'reshape_', + 'reverse', + 'scatter', + 'scatter_', + 'scatter_nd_add', + 'scatter_nd', + 'shard_index', + 'slice', + 'split', + 'chunk', + 'squeeze', + 'squeeze_', + 'stack', + 'strided_slice', + 'transpose', + 'unique', + 'unsqueeze', + 'unsqueeze_', + 'unstack', + 'flip', + 'unbind', + 'roll', + 'tile', + 'argmax', + 'argmin', + 'argsort', + 'masked_select', + 'topk', + 'where', + 'index_select', + 'nonzero', + 'sort', + 'index_sample', + 'mean', + 'std', + 'var', + 'numel', + 'median', + 'rank', + 'shape', + 'real', + 'imag' +] diff --git a/python/paddle/tensor/attribute.py b/python/paddle/tensor/attribute.py index 499586b083f..1f709ac4dbc 100644 --- a/python/paddle/tensor/attribute.py +++ b/python/paddle/tensor/attribute.py @@ -19,10 +19,8 @@ from ..fluid.layer_helper import LayerHelper from ..fluid.data_feeder import check_variable_and_dtype # TODO: define functions to get tensor attributes -from ..fluid.layers import rank #DEFINE_ALIAS -from ..fluid.layers import shape #DEFINE_ALIAS - -__all__ = ['rank', 'shape', 'real', 'imag'] +from ..fluid.layers import rank # noqa: F401 +from ..fluid.layers import shape # noqa: F401 def _complex_to_real_dtype(dtype): diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index 1817ce8256d..b31984f6846 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -14,6 +14,8 @@ from __future__ import print_function import numpy as np +from paddle.common_ops_import import fill_constant +from ..fluid.layers import utils from ..fluid.layers import tensor from ..fluid.framework import Variable @@ -25,32 +27,10 @@ from ..fluid.layers import core from ..fluid.layer_helper import LayerHelper from ..fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype, convert_dtype from ..fluid.framework import convert_np_dtype_to_dtype_, in_dygraph_mode, _varbase_creator, device_guard, OpProtoHolder -from paddle.common_ops_import import * # TODO: define functions to get create a tensor -from ..fluid.layers import linspace #DEFINE_ALIAS +from ..fluid.layers import linspace # noqa: F401 import paddle -__all__ = [ - 'to_tensor', - 'diag', - # 'get_tensor_from_selected_rows', - 'linspace', - 'ones', - 'ones_like', - 'zeros', - 'zeros_like', - 'arange', - 'eye', - 'full', - 'full_like', - 'empty', - 'empty_like', - 'triu', - 'tril', - 'meshgrid', - 'assign', -] - @dygraph_only def to_tensor(data, dtype=None, place=None, stop_gradient=True): @@ -1060,6 +1040,6 @@ def assign(x, output=None): result2 = paddle.assign(data) # result2 = [[2.5, 2.5], [2.5, 2.5], [2.5, 2.5]] result3 = paddle.assign(np.array([[2.5, 2.5], [2.5, 2.5], [2.5, 2.5]], dtype='float32')) # result3 = [[2.5, 2.5], [2.5, 2.5], [2.5, 2.5]] """ - check_type(x, 'x', (Variable, numpy.ndarray, list, tuple, float, int, bool), + check_type(x, 'x', (Variable, np.ndarray, list, tuple, float, int, bool), 'assign') return tensor.assign(x, output) diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index 583290e431d..87e3bce4b1d 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -13,28 +13,13 @@ # limitations under the License. import numpy as np -from paddle.common_ops_import import * from ..fluid.layer_helper import LayerHelper from ..fluid.data_feeder import check_variable_and_dtype, check_type from ..fluid.framework import in_dygraph_mode, _varbase_creator -from ..fluid.layers import transpose #DEFINE_ALIAS - -__all__ = [ - 'matmul', - 'dot', - # 'einsum', - 'norm', - 'transpose', - 'dist', - 't', - 'cross', - 'cholesky', - # 'tensordot', - 'bmm', - 'histogram', - 'mv' -] +from ..fluid.layers import transpose # noqa: F401 +from paddle.common_ops_import import core +from paddle.common_ops_import import VarDesc def matmul(x, y, transpose_x=False, transpose_y=False, name=None): diff --git a/python/paddle/tensor/logic.py b/python/paddle/tensor/logic.py index d5989a1b10c..14154fb06f8 100644 --- a/python/paddle/tensor/logic.py +++ b/python/paddle/tensor/logic.py @@ -17,33 +17,16 @@ from ..fluid.data_feeder import check_type, check_variable_and_dtype from ..fluid.layers.layer_function_generator import templatedoc from .. import fluid from ..fluid.framework import in_dygraph_mode -from paddle.common_ops_import import * from ..framework import VarBase as Tensor # TODO: define logic functions of a tensor -from ..fluid.layers import is_empty #DEFINE_ALIAS -from ..fluid.layers import logical_and #DEFINE_ALIAS -from ..fluid.layers import logical_not #DEFINE_ALIAS -from ..fluid.layers import logical_or #DEFINE_ALIAS -from ..fluid.layers import logical_xor #DEFINE_ALIAS - -__all__ = [ - 'equal', - 'equal_all', - 'greater_equal', - 'greater_than', - 'is_empty', - 'less_equal', - 'less_than', - 'logical_and', - 'logical_not', - 'logical_or', - 'logical_xor', - 'not_equal', - 'allclose', - 'is_tensor' - # 'isnan' -] +from ..fluid.layers import is_empty # noqa: F401 +from ..fluid.layers import logical_and # noqa: F401 +from ..fluid.layers import logical_not # noqa: F401 +from ..fluid.layers import logical_or # noqa: F401 +from ..fluid.layers import logical_xor # noqa: F401 + +from paddle.common_ops_import import core def equal_all(x, y, name=None): diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 669225d8136..e4222dcccbd 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -23,52 +23,17 @@ from ..fluid.layers import utils import numpy as np import six # TODO: define functions to manipulate a tensor -from ..fluid.layers import cast #DEFINE_ALIAS -from ..fluid.layers import slice #DEFINE_ALIAS -from ..fluid.layers import transpose #DEFINE_ALIAS -from ..fluid.layers import unstack #DEFINE_ALIAS +from ..fluid.layers import cast # noqa: F401 +from ..fluid.layers import slice # noqa: F401 +from ..fluid.layers import transpose # noqa: F401 +from ..fluid.layers import unstack # noqa: F401 -from ..fluid.layers import scatter_nd #DEFINE_ALIAS -from ..fluid.layers import shard_index #DEFINE_ALIAS +from ..fluid.layers import scatter_nd # noqa: F401 +from ..fluid.layers import shard_index # noqa: F401 from ..fluid import layers import paddle import warnings -__all__ = [ - 'cast', - 'concat', - 'expand', - 'broadcast_to', - 'expand_as', - 'flatten', - 'gather', - 'gather_nd', - 'reshape', - 'reshape_', - 'reverse', - 'scatter', - 'scatter_', - 'scatter_nd_add', - 'scatter_nd', - 'shard_index', - 'slice', - 'split', - 'chunk', - 'squeeze', - 'squeeze_', - 'stack', - 'strided_slice', - 'transpose', - 'unique', - 'unsqueeze', - 'unsqueeze_', - 'unstack', - 'flip', - 'unbind', - 'roll', - 'tile', -] - def _print_warning_in_static_mode(api_name): warnings.warn( diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index 215d467828a..328115ac933 100755 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -17,7 +17,12 @@ math functions from __future__ import print_function import numpy as np -from paddle.common_ops_import import * +from paddle.common_ops_import import VarDesc +from paddle.common_ops_import import dygraph_only +from paddle.common_ops_import import OpProtoHolder +from paddle.common_ops_import import templatedoc +from paddle.common_ops_import import dygraph_utils + from paddle.tensor import cast import paddle from ..fluid import layers @@ -29,109 +34,31 @@ from .manipulation import _print_warning_in_static_mode # TODO: define math functions # yapf: disable -from ..fluid.layers import abs #DEFINE_ALIAS -from ..fluid.layers import acos #DEFINE_ALIAS -from ..fluid.layers import asin #DEFINE_ALIAS -from ..fluid.layers import ceil #DEFINE_ALIAS -from ..fluid.layers import cos #DEFINE_ALIAS -from ..fluid.layers import tan #DEFINE_ALIAS -from ..fluid.layers import sinh #DEFINE_ALIAS -from ..fluid.layers import cosh #DEFINE_ALIAS -# from ..fluid.layers import elementwise_add #DEFINE_ALIAS -# from ..fluid.layers import elementwise_div #DEFINE_ALIAS -# from ..fluid.layers import elementwise_floordiv #DEFINE_ALIAS -# from ..fluid.layers import elementwise_mod #DEFINE_ALIAS -# from ..fluid.layers import elementwise_mul #DEFINE_ALIAS -# from ..fluid.layers import elementwise_pow #DEFINE_ALIAS -# from ..fluid.layers import elementwise_sub #DEFINE_ALIAS -from ..fluid.layers import exp #DEFINE_ALIAS -from ..fluid.layers import floor #DEFINE_ALIAS -from ..fluid.layers import log #DEFINE_ALIAS -from ..fluid.layers import reciprocal #DEFINE_ALIAS -# from ..fluid.layers import reduce_max #DEFINE_ALIAS -# from ..fluid.layers import reduce_min #DEFINE_ALIAS -# from ..fluid.layers import reduce_prod #DEFINE_ALIAS -# from ..fluid.layers import reduce_sum #DEFINE_ALIAS -from ..fluid.layers import round #DEFINE_ALIAS -from ..fluid.layers import rsqrt #DEFINE_ALIAS -from ..fluid.layers import scale #DEFINE_ALIAS -from ..fluid.layers import square #DEFINE_ALIAS -from ..fluid.layers import stanh #DEFINE_ALIAS -from ..fluid.layers import atan #DEFINE_ALIAS -from ..fluid.layers import erf #DEFINE_ALIAS -from ..fluid.layers import sqrt #DEFINE_ALIAS -from ..fluid.layers import sin #DEFINE_ALIAS - -from ..fluid.layers import multiplex #DEFINE_ALIAS +from ..fluid.layers import abs # noqa: F401 +from ..fluid.layers import acos # noqa: F401 +from ..fluid.layers import asin # noqa: F401 +from ..fluid.layers import ceil # noqa: F401 +from ..fluid.layers import cos # noqa: F401 +from ..fluid.layers import tan # noqa: F401 +from ..fluid.layers import sinh # noqa: F401 +from ..fluid.layers import cosh # noqa: F401 +from ..fluid.layers import exp # noqa: F401 +from ..fluid.layers import floor # noqa: F401 +from ..fluid.layers import log # noqa: F401 +from ..fluid.layers import reciprocal # noqa: F401 +from ..fluid.layers import round # noqa: F401 +from ..fluid.layers import rsqrt # noqa: F401 +from ..fluid.layers import scale # noqa: F401 +from ..fluid.layers import square # noqa: F401 +from ..fluid.layers import stanh # noqa: F401 +from ..fluid.layers import atan # noqa: F401 +from ..fluid.layers import erf # noqa: F401 +from ..fluid.layers import sqrt # noqa: F401 +from ..fluid.layers import sin # noqa: F401 + +from ..fluid.layers import multiplex # noqa: F401 from ..fluid import layers - -__all__ = [ - 'abs', - 'acos', - 'all', - 'any', - 'asin', - 'atan', - 'ceil', - 'cos', - 'cosh', - 'cumsum', - 'exp', - 'floor', - 'increment', - 'log', - 'log2', - 'log10', - 'logsumexp', - 'mul', - 'multiplex', - 'pow', - 'prod', - 'reciprocal', - 'round', - 'rsqrt', - 'scale', - 'sign', - 'sin', - 'sinh', - 'sqrt', - 'square', - 'stanh', - 'sum', - 'tanh', - 'tanh_', - 'add_n', - 'max', - 'maximum', - 'min', - 'minimum', - 'mm', - 'divide', - 'floor_divide', - 'remainder', - 'mod', - 'floor_mod', - 'multiply', - 'add', - 'subtract', - 'atan', - 'logsumexp', - 'inverse', - 'log1p', - 'erf', - 'addmm', - 'clip', - 'trace', - 'kron', - 'isfinite', - 'isinf', - 'isnan', - 'broadcast_shape', - 'conj' -] -# yapf: enable. - _supported_int_dtype_ = [ VarDesc.VarType.UINT8, VarDesc.VarType.INT8, @@ -472,8 +399,8 @@ def remainder(x, y, name=None): return _elementwise_op(LayerHelper(op_type, **locals())) -mod = remainder #DEFINE_ALIAS -floor_mod = remainder #DEFINE_ALIAS +mod = remainder # noqa: F841 +floor_mod = remainder # noqa: F841 def multiply(x, y, name=None): diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py index 5aca87c1507..7e1eef8f325 100644 --- a/python/paddle/tensor/random.py +++ b/python/paddle/tensor/random.py @@ -21,18 +21,6 @@ from ..fluid.data_feeder import check_variable_and_dtype, check_type, check_dtyp from ..fluid.layers import utils import paddle -__all__ = [ - 'bernoulli', - 'multinomial', - 'standard_normal', - 'normal', - 'uniform', - 'randn', - 'rand', - 'randint', - 'randperm', -] - def bernoulli(x, name=None): """ diff --git a/python/paddle/tensor/search.py b/python/paddle/tensor/search.py index 95f8fa449bd..ac303d2311e 100644 --- a/python/paddle/tensor/search.py +++ b/python/paddle/tensor/search.py @@ -16,26 +16,15 @@ import numpy as np from ..fluid.layer_helper import LayerHelper from ..fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype from ..fluid import core, layers +from paddle.common_ops_import import in_dygraph_mode +from paddle.common_ops_import import convert_np_dtype_to_dtype_ +from paddle.common_ops_import import Variable +from paddle.common_ops_import import VarDesc # TODO: define searching & indexing functions of a tensor # from ..fluid.layers import has_inf #DEFINE_ALIAS # from ..fluid.layers import has_nan #DEFINE_ALIAS -__all__ = [ - 'argmax', - 'argmin', - 'argsort', - 'masked_select', - 'topk', - 'where', - 'index_select', - 'nonzero', - 'sort', - 'index_sample', -] - -from paddle.common_ops_import import * - def argsort(x, axis=-1, descending=False, name=None): """ diff --git a/python/paddle/tensor/stat.py b/python/paddle/tensor/stat.py index 9e565d4e522..fa7a278a2b5 100644 --- a/python/paddle/tensor/stat.py +++ b/python/paddle/tensor/stat.py @@ -14,8 +14,6 @@ # TODO: define statistical functions of a tensor -__all__ = ['mean', 'std', 'var', 'numel', 'median'] - import numpy as np from ..fluid.framework import Variable from ..fluid.layer_helper import LayerHelper diff --git a/python/paddle/tensor/tensor.py b/python/paddle/tensor/tensor.py index 478e8264681..ec7b50c63c0 100644 --- a/python/paddle/tensor/tensor.py +++ b/python/paddle/tensor/tensor.py @@ -13,9 +13,3 @@ # limitations under the License. # TODO: define the basic tensor classes - -__all__ = [ - # 'Tensor', - # 'LoDTensor', - # 'LoDTensorArray' -] diff --git a/python/paddle/tensor/to_string.py b/python/paddle/tensor/to_string.py index e5148d039c9..2e76a8d47a7 100644 --- a/python/paddle/tensor/to_string.py +++ b/python/paddle/tensor/to_string.py @@ -17,8 +17,6 @@ import numpy as np from paddle.fluid.layers import core from paddle.fluid.data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype -__all__ = ['set_printoptions'] - class PrintOptions(object): precision = 8 -- GitLab From 9930a582700698dbf93b9dc604306fa68eadf3f7 Mon Sep 17 00:00:00 2001 From: zhiboniu <31800336+zhiboniu@users.noreply.github.com> Date: Tue, 27 Apr 2021 09:50:58 +0800 Subject: [PATCH 015/720] update 2.0 public api in dataset&framework (#31985) --- python/paddle/__init__.py | 1 + python/paddle/dataset/__init__.py | 24 +++++++------- python/paddle/dataset/cifar.py | 2 -- python/paddle/dataset/common.py | 8 ----- python/paddle/dataset/conll05.py | 2 -- python/paddle/dataset/flowers.py | 8 +++-- python/paddle/dataset/image.py | 6 ---- python/paddle/dataset/imdb.py | 2 -- python/paddle/dataset/imikolov.py | 2 -- python/paddle/dataset/mnist.py | 1 - python/paddle/dataset/movielens.py | 5 --- python/paddle/dataset/uci_housing.py | 2 -- python/paddle/dataset/voc2012.py | 2 -- python/paddle/dataset/wmt14.py | 6 ---- python/paddle/dataset/wmt16.py | 8 ----- python/paddle/framework/__init__.py | 48 +++++++++++----------------- python/paddle/framework/framework.py | 2 -- python/paddle/framework/io.py | 5 --- python/paddle/framework/random.py | 2 -- 19 files changed, 38 insertions(+), 98 deletions(-) diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 94091c94bb5..4b9f310e73b 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -265,6 +265,7 @@ from .framework import DataParallel #DEFINE_ALIAS from .framework import set_default_dtype #DEFINE_ALIAS from .framework import get_default_dtype #DEFINE_ALIAS +from .framework import set_grad_enabled #DEFINE_ALIAS from .tensor.search import index_sample #DEFINE_ALIAS from .tensor.stat import mean #DEFINE_ALIAS diff --git a/python/paddle/dataset/__init__.py b/python/paddle/dataset/__init__.py index 2db867d7a7a..4b71ff6ac66 100644 --- a/python/paddle/dataset/__init__.py +++ b/python/paddle/dataset/__init__.py @@ -15,18 +15,18 @@ Dataset package. """ -import paddle.dataset.mnist -import paddle.dataset.imikolov -import paddle.dataset.imdb -import paddle.dataset.cifar -import paddle.dataset.movielens -import paddle.dataset.conll05 -import paddle.dataset.uci_housing -import paddle.dataset.wmt14 -import paddle.dataset.wmt16 -import paddle.dataset.flowers -import paddle.dataset.voc2012 -import paddle.dataset.image +import paddle.dataset.mnist # noqa: F401 +import paddle.dataset.imikolov # noqa: F401 +import paddle.dataset.imdb # noqa: F401 +import paddle.dataset.cifar # noqa: F401 +import paddle.dataset.movielens # noqa: F401 +import paddle.dataset.conll05 # noqa: F401 +import paddle.dataset.uci_housing # noqa: F401 +import paddle.dataset.wmt14 # noqa: F401 +import paddle.dataset.wmt16 # noqa: F401 +import paddle.dataset.flowers # noqa: F401 +import paddle.dataset.voc2012 # noqa: F401 +import paddle.dataset.image # noqa: F401 # set __all__ as empty for not showing APIs under paddle.dataset __all__ = [] diff --git a/python/paddle/dataset/cifar.py b/python/paddle/dataset/cifar.py index 2ee95c3723b..a6b6e28c0f5 100644 --- a/python/paddle/dataset/cifar.py +++ b/python/paddle/dataset/cifar.py @@ -37,8 +37,6 @@ import tarfile import six from six.moves import cPickle as pickle -__all__ = ['train100', 'test100', 'train10', 'test10'] - URL_PREFIX = 'https://dataset.bj.bcebos.com/cifar/' CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz' CIFAR10_MD5 = 'c58f30108f718f92721af3b95e74349a' diff --git a/python/paddle/dataset/common.py b/python/paddle/dataset/common.py index 2884fa0ce5e..cff0c625738 100644 --- a/python/paddle/dataset/common.py +++ b/python/paddle/dataset/common.py @@ -26,14 +26,6 @@ import paddle.dataset import six.moves.cPickle as pickle import glob -__all__ = [ - 'DATA_HOME', - 'download', - 'md5file', - 'split', - 'cluster_files_reader', -] - HOME = os.path.expanduser('~') DATA_HOME = os.path.join(HOME, '.cache', 'paddle', 'dataset') diff --git a/python/paddle/dataset/conll05.py b/python/paddle/dataset/conll05.py index e7176626ca2..96fd5ae7d76 100644 --- a/python/paddle/dataset/conll05.py +++ b/python/paddle/dataset/conll05.py @@ -30,8 +30,6 @@ import paddle.compat as cpt import paddle.utils.deprecated as deprecated from six.moves import zip, range -__all__ = ['test, get_dict', 'get_embedding'] - DATA_URL = 'http://paddlemodels.bj.bcebos.com/conll05st/conll05st-tests.tar.gz' DATA_MD5 = '387719152ae52d60422c016e92a742fc' WORDDICT_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2FwordDict.txt' diff --git a/python/paddle/dataset/flowers.py b/python/paddle/dataset/flowers.py index e16ea6e561e..67ffd8e1ee1 100644 --- a/python/paddle/dataset/flowers.py +++ b/python/paddle/dataset/flowers.py @@ -35,7 +35,12 @@ import itertools import functools from .common import download import tarfile -from paddle.dataset.image import * + +from paddle.dataset.image import load_image_bytes +from paddle.dataset.image import load_image +from paddle.dataset.image import simple_transform +from paddle.dataset.image import batch_images_from_tar + from paddle.reader import map_readers, xmap_readers from paddle import compat as cpt import paddle.utils.deprecated as deprecated @@ -45,7 +50,6 @@ from multiprocessing import cpu_count import six from six.moves import cPickle as pickle from paddle.utils import try_import -__all__ = ['train', 'test', 'valid'] DATA_URL = 'http://paddlemodels.bj.bcebos.com/flowers/102flowers.tgz' LABEL_URL = 'http://paddlemodels.bj.bcebos.com/flowers/imagelabels.mat' diff --git a/python/paddle/dataset/image.py b/python/paddle/dataset/image.py index 09b5607252b..31329cd978c 100644 --- a/python/paddle/dataset/image.py +++ b/python/paddle/dataset/image.py @@ -58,12 +58,6 @@ import os import tarfile import six.moves.cPickle as pickle -__all__ = [ - "load_image_bytes", "load_image", "resize_short", "to_chw", "center_crop", - "random_crop", "left_right_flip", "simple_transform", "load_and_transform", - "batch_images_from_tar" -] - def _check_cv2(): if cv2 is None: diff --git a/python/paddle/dataset/imdb.py b/python/paddle/dataset/imdb.py index dab3c964cc6..33ae4405c50 100644 --- a/python/paddle/dataset/imdb.py +++ b/python/paddle/dataset/imdb.py @@ -30,8 +30,6 @@ import re import string import six -__all__ = ['build_dict', 'train', 'test'] - #URL = 'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz' URL = 'https://dataset.bj.bcebos.com/imdb%2FaclImdb_v1.tar.gz' MD5 = '7c2ac02c03563afcf9b574c7e56c153a' diff --git a/python/paddle/dataset/imikolov.py b/python/paddle/dataset/imikolov.py index cc8e95fc342..3b8b12303c9 100644 --- a/python/paddle/dataset/imikolov.py +++ b/python/paddle/dataset/imikolov.py @@ -27,8 +27,6 @@ import collections import tarfile import six -__all__ = ['train', 'test', 'build_dict'] - #URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz' URL = 'https://dataset.bj.bcebos.com/imikolov%2Fsimple-examples.tgz' MD5 = '30177ea32e27c525793142b6bf2c8e2d' diff --git a/python/paddle/dataset/mnist.py b/python/paddle/dataset/mnist.py index 14e54d593bb..06e8174a61e 100644 --- a/python/paddle/dataset/mnist.py +++ b/python/paddle/dataset/mnist.py @@ -26,7 +26,6 @@ import gzip import numpy import struct from six.moves import range -__all__ = ['train', 'test'] URL_PREFIX = 'https://dataset.bj.bcebos.com/mnist/' TEST_IMAGE_URL = URL_PREFIX + 't10k-images-idx3-ubyte.gz' diff --git a/python/paddle/dataset/movielens.py b/python/paddle/dataset/movielens.py index f753f405bba..23781b65785 100644 --- a/python/paddle/dataset/movielens.py +++ b/python/paddle/dataset/movielens.py @@ -34,11 +34,6 @@ import functools import six import paddle.compat as cpt -__all__ = [ - 'train', 'test', 'get_movie_title_dict', 'max_movie_id', 'max_user_id', - 'age_table', 'movie_categories', 'max_job_id', 'user_info', 'movie_info' -] - age_table = [1, 18, 25, 35, 45, 50, 56] #URL = 'http://files.grouplens.org/datasets/movielens/ml-1m.zip' diff --git a/python/paddle/dataset/uci_housing.py b/python/paddle/dataset/uci_housing.py index daed62fbefb..1bc2098350f 100644 --- a/python/paddle/dataset/uci_housing.py +++ b/python/paddle/dataset/uci_housing.py @@ -29,8 +29,6 @@ import os import paddle.dataset.common import paddle.utils.deprecated as deprecated -__all__ = ['train', 'test'] - URL = 'http://paddlemodels.bj.bcebos.com/uci_housing/housing.data' MD5 = 'd4accdce7a25600298819f8e28e8d593' feature_names = [ diff --git a/python/paddle/dataset/voc2012.py b/python/paddle/dataset/voc2012.py index 5a0ff76aab4..1575b44cd16 100644 --- a/python/paddle/dataset/voc2012.py +++ b/python/paddle/dataset/voc2012.py @@ -29,8 +29,6 @@ from paddle.dataset.image import * import paddle.utils.deprecated as deprecated from PIL import Image -__all__ = ['train', 'test', 'val'] - VOC_URL = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/\ VOCtrainval_11-May-2012.tar' diff --git a/python/paddle/dataset/wmt14.py b/python/paddle/dataset/wmt14.py index 3bd5e8d5bad..818f4b28ba1 100644 --- a/python/paddle/dataset/wmt14.py +++ b/python/paddle/dataset/wmt14.py @@ -30,12 +30,6 @@ import paddle.dataset.common import paddle.compat as cpt import paddle.utils.deprecated as deprecated -__all__ = [ - 'train', - 'test', - 'get_dict', -] - URL_DEV_TEST = ('http://www-lium.univ-lemans.fr/~schwenk/' 'cslm_joint_paper/data/dev+test.tgz') MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5' diff --git a/python/paddle/dataset/wmt16.py b/python/paddle/dataset/wmt16.py index 7f11bc4b1f0..6804e7ab5fc 100644 --- a/python/paddle/dataset/wmt16.py +++ b/python/paddle/dataset/wmt16.py @@ -40,14 +40,6 @@ import paddle import paddle.compat as cpt import paddle.utils.deprecated as deprecated -__all__ = [ - "train", - "test", - "validation", - "fetch", - "get_dict", -] - DATA_URL = ("http://paddlemodels.bj.bcebos.com/wmt/wmt16.tar.gz") DATA_MD5 = "0c38be43600334966403524a40dcd81e" diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py index b8684874085..660267c24e5 100644 --- a/python/paddle/framework/__init__.py +++ b/python/paddle/framework/__init__.py @@ -12,35 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: import framework api under this directory -__all__ = [ - 'create_parameter', 'ParamAttr', 'CPUPlace', 'CUDAPlace', 'CUDAPinnedPlace', - 'NPUPlace', 'get_default_dtype', 'set_default_dtype' -] +# TODO: import framework api under this directory -__all__ += [ - 'grad', 'set_grad_enabled', 'LayerList', 'load', 'save', 'no_grad', - 'DataParallel' -] +from . import random # noqa: F401 +from .random import seed # noqa: F401 +from .framework import get_default_dtype # noqa: F401 +from .framework import set_default_dtype # noqa: F401 +from .framework import set_grad_enabled # noqa: F401 -from . import random -from .random import seed -from .framework import get_default_dtype -from .framework import set_default_dtype -from .framework import set_grad_enabled +from ..fluid.param_attr import ParamAttr # noqa: F401 +from ..fluid.layers.tensor import create_parameter # noqa: F401 +from ..fluid.core import CPUPlace # noqa: F401 +from ..fluid.core import CUDAPlace # noqa: F401 +from ..fluid.core import CUDAPinnedPlace # noqa: F401 +from ..fluid.core import NPUPlace # noqa: F401 +from ..fluid.core import VarBase # noqa: F401 -from ..fluid.param_attr import ParamAttr #DEFINE_ALIAS -# from ..fluid.layers.tensor import create_global_var #DEFINE_ALIAS -from ..fluid.layers.tensor import create_parameter #DEFINE_ALIAS -from ..fluid.core import CPUPlace #DEFINE_ALIAS -from ..fluid.core import CUDAPlace #DEFINE_ALIAS -from ..fluid.core import CUDAPinnedPlace #DEFINE_ALIAS -from ..fluid.core import NPUPlace #DEFINE_ALIAS -from ..fluid.core import VarBase #DEFINE_ALIAS - -from paddle.fluid import core #DEFINE_ALIAS -from ..fluid.dygraph.base import no_grad_ as no_grad #DEFINE_ALIAS -from ..fluid.dygraph.base import grad #DEFINE_ALIAS -from .io import save -from .io import load -from ..fluid.dygraph.parallel import DataParallel #DEFINE_ALIAS +from paddle.fluid import core # noqa: F401 +from ..fluid.dygraph.base import no_grad_ as no_grad # noqa: F401 +from ..fluid.dygraph.base import grad # noqa: F401 +from .io import save # noqa: F401 +from .io import load # noqa: F401 +from ..fluid.dygraph.parallel import DataParallel # noqa: F401 diff --git a/python/paddle/framework/framework.py b/python/paddle/framework/framework.py index 77be85a3195..f50285010cc 100644 --- a/python/paddle/framework/framework.py +++ b/python/paddle/framework/framework.py @@ -19,8 +19,6 @@ from paddle.fluid.framework import _dygraph_tracer import numpy as np from contextlib import contextmanager -__all__ = ['set_default_dtype', 'get_default_dtype'] - def set_default_dtype(d): """ diff --git a/python/paddle/framework/io.py b/python/paddle/framework/io.py index 32a62d2461a..955d8610a59 100644 --- a/python/paddle/framework/io.py +++ b/python/paddle/framework/io.py @@ -38,11 +38,6 @@ from paddle.fluid.dygraph.jit import _SaveLoadConfig from paddle.fluid.dygraph.io import _construct_program_holders, _construct_params_and_buffers from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX, INFER_PARAMS_INFO_SUFFIX -__all__ = [ - 'save', - 'load', -] - def _build_saved_state_dict(state_dict): save_dict = {} diff --git a/python/paddle/framework/random.py b/python/paddle/framework/random.py index 1624a069a51..cce95137436 100644 --- a/python/paddle/framework/random.py +++ b/python/paddle/framework/random.py @@ -16,8 +16,6 @@ import paddle.fluid as fluid from paddle.fluid import core -__all__ = ['seed', 'get_cuda_rng_state', 'set_cuda_rng_state'] - def seed(seed): """ -- GitLab From c1db7e32128fe821c2adc02d6624f39589dad38b Mon Sep 17 00:00:00 2001 From: ShenLiang <1422485404@qq.com> Date: Tue, 27 Apr 2021 09:57:24 +0800 Subject: [PATCH 016/720] [HybridParallel] Fix amp bug in ModelParallel (#32579) * fix amp bug * fix name of wordsize --- .../dygraph_optimizer/hybrid_parallel_gradscaler.py | 7 ++++--- .../fleet/meta_parallel/parallel_layers/pp_layers.py | 10 +++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py index 11bb897a678..13bb9d2acec 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py @@ -67,10 +67,11 @@ class HybridParallelGradScaler: # allreduce_max found_inf in check_group if self._is_mp: self._found_inf = paddle.cast(self._found_inf, dtype="int32") + # TODO(shenliang03) Since the minimize call in the optimizer is + # after the gradscaler, check_finite needs to synchronize global + # information. In the future, we should use check_group paddle.distributed.all_reduce( - self._found_inf, - op=paddle.distributed.ReduceOp.MAX, - group=self._hcg.get_check_parallel_group()) + self._found_inf, op=paddle.distributed.ReduceOp.MAX, group=None) self._found_inf = paddle.cast(self._found_inf, dtype="bool") def __getattr__(self, item): diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py index e2db689eb76..669ed032a34 100644 --- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py +++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py @@ -77,7 +77,7 @@ class PipelineLayer(Layer): self.layers = layers self._loss_fn = loss_fn self._topo = topology - word_size = dist.get_world_size() + world_size = dist.get_world_size() self.global_rank = dist.get_rank() if self._topo: @@ -88,11 +88,11 @@ class PipelineLayer(Layer): self._num_stages) else: # construct default topology - if word_size % num_stages != 0: + if world_size % num_stages != 0: raise ValueError("should provide correct num_stages({}) " - "which can be divided by word_size({})".format( - num_stages, word_size)) - dp_num = word_size // num_stages + "which can be divided by world_size({})". + format(num_stages, world_size)) + dp_num = world_size // num_stages self._topo = fleet.CommunicateTopology(["data", "pipe", "model"], [dp_num, num_stages, 1]) self._stage_id = self._topo.get_coord(self.global_rank).pipe -- GitLab From 19eefef4ca8f1f006c687c0f443c3837e9f1b2f6 Mon Sep 17 00:00:00 2001 From: XiangGao Date: Tue, 27 Apr 2021 10:00:53 +0800 Subject: [PATCH 017/720] Check for cuda errors immediately after kernel launch (#32557) Co-authored-by: Yang Zhang --- paddle/fluid/framework/op_registry.h | 23 ++++++++++++++++++++--- paddle/fluid/platform/enforce.h | 10 ++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h index 818da7478b2..9f0dc50774a 100644 --- a/paddle/fluid/framework/op_registry.h +++ b/paddle/fluid/framework/op_registry.h @@ -134,6 +134,17 @@ class OpRegistry { static std::unique_ptr CreateOp(const OpDesc& op_desc); }; +template +inline void CheckKernelLaunch(const char* op_type){}; + +#ifdef PADDLE_WITH_CUDA +template <> +inline void CheckKernelLaunch<::paddle::platform::CUDAPlace>( + const char* op_type) { + PADDLE_ENFORCE_CUDA_LAUNCH_SUCCESS(op_type); +}; +#endif + template struct OpKernelRegistrarFunctor; @@ -162,8 +173,9 @@ struct OpKernelRegistrarFunctor { RegisterKernelClass( op_type, library_type, customized_type_value, - [](const framework::ExecutionContext& ctx) { + [op_type](const framework::ExecutionContext& ctx) { KERNEL_TYPE().Compute(ctx); + CheckKernelLaunch(op_type); }); constexpr auto size = std::tuple_size>::value; OpKernelRegistrarFunctor @@ -223,8 +235,13 @@ struct OpKernelRegistrarFunctorEx(op_type, library_type, - customized_type_value, Functor()); + RegisterKernelClass( + op_type, library_type, customized_type_value, + + [op_type](const framework::ExecutionContext& ctx) { + Functor()(ctx); + CheckKernelLaunch(op_type); + }); constexpr auto size = std::tuple_size>::value; diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index cfca3ceadf4..d42733823e6 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -991,6 +991,16 @@ DEFINE_CUDA_STATUS_TYPE(ncclResult_t, ncclSuccess); } \ } while (0) +#define PADDLE_ENFORCE_CUDA_LAUNCH_SUCCESS(OP) \ + do { \ + auto res = cudaGetLastError(); \ + if (UNLIKELY(res != cudaSuccess)) { \ + auto msg = ::paddle::platform::build_nvidia_error_msg(res); \ + PADDLE_THROW(platform::errors::Fatal("CUDA error after kernel (%s): %s", \ + OP, msg)); \ + } \ + } while (0) + inline void retry_sleep(unsigned milliseconds) { #ifdef _WIN32 Sleep(milliseconds); -- GitLab From 6579432ff663d1402754409286618fea502f6940 Mon Sep 17 00:00:00 2001 From: Aurelius84 Date: Tue, 27 Apr 2021 10:33:48 +0800 Subject: [PATCH 018/720] Fix grad calculation bug in tensor_array_to_tensor (#32558) --- paddle/fluid/operators/tensor_array_to_tensor_op.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/operators/tensor_array_to_tensor_op.cc b/paddle/fluid/operators/tensor_array_to_tensor_op.cc index 620231eb2e2..eb20e1c2cd2 100644 --- a/paddle/fluid/operators/tensor_array_to_tensor_op.cc +++ b/paddle/fluid/operators/tensor_array_to_tensor_op.cc @@ -250,8 +250,12 @@ class LoDTensorArray2TensorGradOp : public framework::OperatorBase { auto dout_name = Input(framework::GradVarName("Out")); std::vector grad_names; + // NOTE(Aurelius84): Generating grad base name by Input("X") instead of + // fixed string to avoid incorrectly sharing same var's allocation in + // multi-thread that will cause wrong calculation result. + std::string grad_base_name = base_name + "_temp_grad_"; - LodTensorVectorResizeFromLodTensorArray(scope, "grad_name", Input("X"), + LodTensorVectorResizeFromLodTensorArray(scope, grad_base_name, Input("X"), &grad_names); auto use_stack = Attr("use_stack"); -- GitLab From 809ac03656712744d6dea7a6268aeeea46b6f12e Mon Sep 17 00:00:00 2001 From: tianshuo78520a <707759223@qq.com> Date: Tue, 27 Apr 2021 11:54:00 +0800 Subject: [PATCH 019/720] Revert "[PsCore] optimize performance of large kv (#32535)" (#32599) This reverts commit 4b7242b0d8c7917a8e23e49ee8ebf4c460a392cd. --- CMakeLists.txt | 5 - .../distributed/service/brpc_ps_server.cc | 23 ++- .../distributed/table/common_sparse_table.cc | 55 +++--- .../table/depends/large_scale_kv.h | 158 ++++++++---------- .../framework/fleet/heter_ps/CMakeLists.txt | 7 +- .../distributed/fleet/runtime/the_one_ps.py | 45 ++--- .../distributed_strategy.py | 1 - .../fleet/parameter_server/ir/public.py | 1 - 8 files changed, 119 insertions(+), 176 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f30671bd3a8..2f16c390d8b 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -353,11 +353,6 @@ if (WITH_MIPS) add_definitions(-DPADDLE_WITH_MIPS) endif() -if (WITH_HETERPS) - if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -faligned-new") - endif() -endif() set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") diff --git a/paddle/fluid/distributed/service/brpc_ps_server.cc b/paddle/fluid/distributed/service/brpc_ps_server.cc index a1440260bf2..a9370561a54 100644 --- a/paddle/fluid/distributed/service/brpc_ps_server.cc +++ b/paddle/fluid/distributed/service/brpc_ps_server.cc @@ -14,7 +14,6 @@ #include "paddle/fluid/distributed/service/brpc_ps_server.h" #include // NOLINT -#include "butil/object_pool.h" #include "paddle/fluid/distributed/table/depends/sparse_utils.h" #include "paddle/fluid/distributed/table/table.h" #include "paddle/fluid/framework/archive.h" @@ -197,13 +196,12 @@ int32_t BrpcPsService::pull_dense(Table *table, const PsRequestMessage &request, return 0; } - auto res_data = butil::get_object>(); - res_data->resize(num * table->value_accesor()->select_size() / sizeof(float)); - table->pull_dense(res_data->data(), num); + std::vector res_data; + res_data.resize(num * table->value_accesor()->select_size() / sizeof(float)); + table->pull_dense(res_data.data(), num); - cntl->response_attachment().append((char *)(res_data->data()), - res_data->size() * sizeof(float)); - butil::return_object(res_data); + cntl->response_attachment().append((char *)res_data.data(), + res_data.size() * sizeof(float)); return 0; } @@ -369,13 +367,12 @@ int32_t BrpcPsService::pull_sparse(Table *table, value.DeserializeFromBytes(const_cast(data)); - auto res_data = butil::get_object>(); - res_data->resize(num * dim); - table->pull_sparse(res_data->data(), value); + std::vector res_data; + res_data.resize(num * dim); + table->pull_sparse(res_data.data(), value); - cntl->response_attachment().append((char *)(res_data->data()), - res_data->size() * sizeof(float)); - butil::return_object(res_data); + cntl->response_attachment().append((char *)res_data.data(), + res_data.size() * sizeof(float)); return 0; } diff --git a/paddle/fluid/distributed/table/common_sparse_table.cc b/paddle/fluid/distributed/table/common_sparse_table.cc index 718fce99507..1c315d34abc 100644 --- a/paddle/fluid/distributed/table/common_sparse_table.cc +++ b/paddle/fluid/distributed/table/common_sparse_table.cc @@ -125,37 +125,34 @@ void ProcessALine(const std::vector& columns, const Meta& meta, int64_t SaveToText(std::ostream* os, std::shared_ptr block, const int mode) { - int64_t save_num = 0; - for (auto& table : block->values_) { - for (auto& value : table) { - if (mode == SaveMode::delta && !value.second->need_save_) { - continue; - } - save_num += 1; - - auto* vs = value.second->data_.data(); - std::stringstream ss; - auto id = value.first; - ss << id << "\t" << value.second->count_ << "\t" - << value.second->unseen_days_ << "\t" << value.second->is_entry_ - << "\t"; - - for (int i = 0; i < block->value_length_; i++) { - ss << vs[i]; - ss << ","; - } + int64_t not_save_num = 0; + for (auto& value : block->values_) { + if (mode == SaveMode::delta && !value.second.need_save_) { + not_save_num++; + continue; + } - ss << "\n"; + auto* vs = value.second.data_; + std::stringstream ss; + auto id = value.first; + ss << id << "\t" << value.second.count_ << "\t" << value.second.unseen_days_ + << "\t" << value.second.is_entry_ << "\t"; - os->write(ss.str().c_str(), sizeof(char) * ss.str().size()); + for (int i = 0; i < block->value_length_; i++) { + ss << vs[i]; + ss << ","; + } - if (mode == SaveMode::base || mode == SaveMode::delta) { - value.second->need_save_ = false; - } + ss << "\n"; + + os->write(ss.str().c_str(), sizeof(char) * ss.str().size()); + + if (mode == SaveMode::base || mode == SaveMode::delta) { + value.second.need_save_ = false; } } - return save_num; + return block->values_.size() - not_save_num; } int64_t LoadFromText(const std::string& valuepath, const std::string& metapath, @@ -186,7 +183,7 @@ int64_t LoadFromText(const std::string& valuepath, const std::string& metapath, block->Init(id, false); - VALUE* value_instant = block->GetValue(id); + auto value_instant = block->GetValue(id); if (values.size() == 5) { value_instant->count_ = std::stoi(values[1]); value_instant->unseen_days_ = std::stoi(values[2]); @@ -376,10 +373,8 @@ std::pair CommonSparseTable::print_table_stat() { int64_t feasign_size = 0; int64_t mf_size = 0; - for (auto& shard : shard_values_) { - for (auto& table : shard->values_) { - feasign_size += table.size(); - } + for (auto& value : shard_values_) { + feasign_size += value->values_.size(); } return {feasign_size, mf_size}; diff --git a/paddle/fluid/distributed/table/depends/large_scale_kv.h b/paddle/fluid/distributed/table/depends/large_scale_kv.h index 5c10fca98cd..bb4174bd2c5 100644 --- a/paddle/fluid/distributed/table/depends/large_scale_kv.h +++ b/paddle/fluid/distributed/table/depends/large_scale_kv.h @@ -26,7 +26,6 @@ #include #include "gflags/gflags.h" -#include "butil/object_pool.h" #include "paddle/fluid/distributed/common/utils.h" #include "paddle/fluid/distributed/table/depends/initializers.h" #include "paddle/fluid/distributed/thirdparty/round_robin.h" @@ -49,10 +48,6 @@ namespace distributed { enum Mode { training, infer }; -static const int SPARSE_SHARD_BUCKET_NUM_BITS = 6; -static const size_t SPARSE_SHARD_BUCKET_NUM = (size_t)1 - << SPARSE_SHARD_BUCKET_NUM_BITS; - struct VALUE { explicit VALUE(size_t length) : length_(length), @@ -60,16 +55,46 @@ struct VALUE { unseen_days_(0), need_save_(false), is_entry_(false) { - data_.resize(length); - memset(data_.data(), 0, sizeof(float) * length); + data_ = new float[length]; + memset(data_, 0, sizeof(float) * length); + } + + VALUE(const VALUE &value) { + length_ = value.length_; + count_ = value.count_; + unseen_days_ = value.unseen_days_; + need_save_ = value.need_save_; + is_entry_ = value.is_entry_; + data_ = new float[length_]; + memcpy(data_, value.data_, sizeof(float) * length_); + } + + VALUE &operator=(const VALUE &value) { + if (this != &value) { + delete[] data_; + length_ = value.length_; + count_ = value.count_; + unseen_days_ = value.unseen_days_; + need_save_ = value.need_save_; + is_entry_ = value.is_entry_; + + data_ = new float[length_]; + memcpy(data_, value.data_, sizeof(float) * length_); + } + return *this; + } + + ~VALUE() { + delete[] data_; + data_ = nullptr; } size_t length_; - std::vector data_; int count_; int unseen_days_; // use to check knock-out bool need_save_; // whether need to save bool is_entry_; // whether knock-in + float *data_; }; inline bool count_entry(VALUE *value, int threshold) { @@ -151,12 +176,12 @@ class ValueBlock { const std::vector &value_dims) { auto pts = std::vector(); pts.reserve(value_names.size()); - auto values = GetValue(id); + auto &values = values_.at(id); for (int i = 0; i < static_cast(value_names.size()); i++) { PADDLE_ENFORCE_EQ( value_dims[i], value_dims_[i], platform::errors::InvalidArgument("value dims is not match")); - pts.push_back(values->data_.data() + + pts.push_back(values.data_ + value_offsets_.at(value_idx_.at(value_names[i]))); } return pts; @@ -165,45 +190,33 @@ class ValueBlock { // pull float *Init(const uint64_t &id, const bool with_update = true, const int counter = 1) { - size_t hash = _hasher(id); - size_t bucket = compute_bucket(hash); - - auto &table = values_[bucket]; - auto res = table.find(id); - - VALUE *value = nullptr; - if (res == table.end()) { - value = butil::get_object(value_length_); - - table[id] = value; - - } else { - value = res->second; + if (!Has(id)) { + values_.emplace(std::make_pair(id, VALUE(value_length_))); } + auto &value = values_.at(id); + if (with_update) { - AttrUpdate(value, counter); + AttrUpdate(&value, counter); } - return value->data_.data(); + + return value.data_; } + VALUE *InitGet(const uint64_t &id, const bool with_update = true, const int counter = 1) { - size_t hash = _hasher(id); - size_t bucket = compute_bucket(hash); + if (!Has(id)) { + values_.emplace(std::make_pair(id, VALUE(value_length_))); + } - auto &table = values_[bucket]; - auto res = table.find(id); + auto &value = values_.at(id); - VALUE *value = nullptr; - if (res == table.end()) { - value = butil::get_object(value_length_); - // value = _alloc.acquire(value_length_); - table[id] = value; - } else { - value = (VALUE *)(void *)(res->second); + if (with_update) { + AttrUpdate(&value, counter); } - return value; + + return &value; } void AttrUpdate(VALUE *value, const int counter) { @@ -216,7 +229,7 @@ class ValueBlock { if (value->is_entry_) { // initialize for (size_t x = 0; x < value_names_.size(); ++x) { - initializers_[x]->GetValue(value->data_.data() + value_offsets_[x], + initializers_[x]->GetValue(value->data_ + value_offsets_[x], value_dims_[x]); } value->need_save_ = true; @@ -230,73 +243,42 @@ class ValueBlock { // dont jude if (has(id)) float *Get(const uint64_t &id) { - size_t hash = _hasher(id); - size_t bucket = compute_bucket(hash); - auto &table = values_[bucket]; - - // auto &value = table.at(id); - // return value->data_.data(); - auto res = table.find(id); - VALUE *value = res->second; - return value->data_.data(); + auto &value = values_.at(id); + return value.data_; } // for load, to reset count, unseen_days - VALUE *GetValue(const uint64_t &id) { - size_t hash = _hasher(id); - size_t bucket = compute_bucket(hash); - - auto &table = values_[bucket]; - auto res = table.find(id); - return res->second; - } + VALUE *GetValue(const uint64_t &id) { return &values_.at(id); } bool GetEntry(const uint64_t &id) { - auto value = GetValue(id); - return value->is_entry_; + auto &value = values_.at(id); + return value.is_entry_; } void SetEntry(const uint64_t &id, const bool state) { - auto value = GetValue(id); - value->is_entry_ = state; + auto &value = values_.at(id); + value.is_entry_ = state; } void Shrink(const int threshold) { - for (auto &table : values_) { - for (auto iter = table.begin(); iter != table.end();) { - // VALUE* value = (VALUE*)(void*)(iter->second); - VALUE *value = iter->second; - value->unseen_days_++; - if (value->unseen_days_ >= threshold) { - butil::return_object(iter->second); - //_alloc.release(iter->second); - //_alloc.release(value); - iter = table.erase(iter); - } else { - ++iter; - } + for (auto iter = values_.begin(); iter != values_.end();) { + auto &value = iter->second; + value.unseen_days_++; + if (value.unseen_days_ >= threshold) { + iter = values_.erase(iter); + } else { + ++iter; } } return; } float GetThreshold() { return threshold_; } - size_t compute_bucket(size_t hash) { - if (SPARSE_SHARD_BUCKET_NUM == 1) { - return 0; - } else { - return hash >> (sizeof(size_t) * 8 - SPARSE_SHARD_BUCKET_NUM_BITS); - } - } private: bool Has(const uint64_t id) { - size_t hash = _hasher(id); - size_t bucket = compute_bucket(hash); - auto &table = values_[bucket]; - - auto got = table.find(id); - if (got == table.end()) { + auto got = values_.find(id); + if (got == values_.end()) { return false; } else { return true; @@ -304,9 +286,8 @@ class ValueBlock { } public: - robin_hood::unordered_map values_[SPARSE_SHARD_BUCKET_NUM]; + robin_hood::unordered_map values_; size_t value_length_ = 0; - std::hash _hasher; private: const std::vector &value_names_; @@ -321,3 +302,4 @@ class ValueBlock { } // namespace distributed } // namespace paddle + diff --git a/paddle/fluid/framework/fleet/heter_ps/CMakeLists.txt b/paddle/fluid/framework/fleet/heter_ps/CMakeLists.txt index db562045dcc..6df2cd52bb4 100644 --- a/paddle/fluid/framework/fleet/heter_ps/CMakeLists.txt +++ b/paddle/fluid/framework/fleet/heter_ps/CMakeLists.txt @@ -1,10 +1,5 @@ IF(WITH_GPU) - SET(HETERPS_DEPS device_context) - if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) - SET(HETERPS_DEPS ${HETERPS_DEPS} cub) - endif() - - nv_library(heter_comm SRCS heter_comm.h feature_value.h heter_resource.cc heter_resource.h hashtable.h DEPS ${HETERPS_DEPS}) + nv_library(heter_comm SRCS heter_comm.h feature_value.h heter_resource.cc heter_resource.h hashtable.h DEPS cub device_context) nv_test(test_heter_comm SRCS test_heter_comm.cu feature_value.h DEPS heter_comm) nv_library(heter_ps SRCS heter_ps.cu DEPS heter_comm) ENDIF() diff --git a/python/paddle/distributed/fleet/runtime/the_one_ps.py b/python/paddle/distributed/fleet/runtime/the_one_ps.py index 24b83662c9d..df07a7a6e77 100644 --- a/python/paddle/distributed/fleet/runtime/the_one_ps.py +++ b/python/paddle/distributed/fleet/runtime/the_one_ps.py @@ -77,13 +77,10 @@ class CommonAccessor: ("Moment2", None), ("Beta1Pow", 1), ("Beta2Pow", 1), ("LearningRate", 1)] opt_input_map["sum"] = [("Param", None)] - opt_input_map["naive_adagrad"] = [("Param", None), ("G2Sum", 1), - ("LearningRate", 1)] opt_attr_map = {} opt_attr_map["sgd"] = [] opt_attr_map["sum"] = [] - opt_attr_map["naive_adagrad"] = [] opt_attr_map["adam"] = [("beta1", "f"), ("beta2", "f"), ("epsilon", "f")] @@ -172,10 +169,6 @@ class CommonAccessor: param_varnames = self.opt_input_map["sum"] attr_varnames = self.opt_attr_map["sum"] self.accessor_class = "sum" - elif compiled_strategy.use_ps_gpu and is_sparse: - param_varnames = self.opt_input_map["naive_adagrad"] - attr_varnames = self.opt_attr_map["naive_adagrad"] - self.accessor_class = "sgd" else: param_varnames = self.opt_input_map[oop.type] attr_varnames = self.opt_attr_map[oop.type] @@ -183,28 +176,20 @@ class CommonAccessor: for (formal_name, shape) in param_varnames: params.append(formal_name) - if formal_name == "G2Sum": - dims.append(1) - initializer = "fill_constant&0" - initializers.append(initializer) - else: - param = main_program.global_block().vars[oop.input(formal_name)[ - 0]] - if formal_name == "LearningRate" and param.name != "learning_rate_0": - warnings.warn("will support decay soon") - param = main_program.global_block().vars["learning_rate_0"] - - if shape is None: - if is_sparse: - shape = total_dims - else: - shape = self.get_shard(total_dims, pserver_num, - pserver_id) - dims.append(shape) + param = main_program.global_block().vars[oop.input(formal_name)[0]] + if formal_name == "LearningRate" and param.name != "learning_rate_0": + warnings.warn("will support decay soon") + param = main_program.global_block().vars["learning_rate_0"] + + if shape is None: + if is_sparse: + shape = total_dims + else: + shape = self.get_shard(total_dims, pserver_num, pserver_id) + dims.append(shape) - initializer = self.get_initializer_attr(param.name, - startup_program) - initializers.append(initializer) + initializer = self.get_initializer_attr(param.name, startup_program) + initializers.append(initializer) for (attr_varname, type_) in attr_varnames: value = oop.attr(attr_varname) @@ -450,8 +435,6 @@ class TheOnePSRuntime(RuntimeBase): if not strategy: raise ValueError("k_steps must be invalid value, please check") - if dist_strategy.a_sync_configs["use_ps_gpu"]: - strategy.use_ps_gpu = True return strategy def build_compiled_startegy(self): @@ -460,8 +443,6 @@ class TheOnePSRuntime(RuntimeBase): compiled_config = CompileTimeStrategy( self.origin_main_program, self.origin_main_program, self.async_strategy, self.role_maker) - if self.async_strategy.use_ps_gpu: - compiled_config.use_ps_gpu = True return compiled_config def _init_worker(self): diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/distributed_strategy.py b/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/distributed_strategy.py index 2a9d26daaed..35029a3dfc7 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/distributed_strategy.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/distributed_strategy.py @@ -149,7 +149,6 @@ class DistributedStrategy(object): if num_threads > 1: self._build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce self.debug_opt = None - self.use_ps_gpu = False def set_debug_opt(self, opt_info): self.debug_opt = opt_info diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py index b2735727f67..baf8add04ca 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py @@ -138,7 +138,6 @@ class CompileTimeStrategy(object): self.strategy = strategy self.role_maker = role_maker - self.use_ps_gpu = False try: self.is_heter_ps_mode = role_maker._is_heter_parameter_server_mode except: -- GitLab From 85e697d74933d2251d25192a2bcf381adff7d433 Mon Sep 17 00:00:00 2001 From: Pei Yang Date: Tue, 27 Apr 2021 12:32:36 +0800 Subject: [PATCH 020/720] support depthwise_conv2d_transpose (#32593) --- .../inference/tensorrt/convert/conv2d_op.cc | 2 +- .../inference/tensorrt/convert/op_converter.h | 6 ++++++ paddle/fluid/inference/tensorrt/op_teller.cc | 7 +++++-- .../ir/inference/test_trt_conv_pass.py | 19 +++++++++++++++++-- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc index ba47358b147..61199724bcf 100644 --- a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc @@ -160,7 +160,7 @@ class Deconv2dOpConverter : public OpConverter { nvinfer1::DimsHW& ksize, TensorRTEngine::Weight& weight, TensorRTEngine::Weight& bias) -> nvinfer1::IDeconvolutionLayer* { auto* layer = - TRT_ENGINE_ADD_LAYER(engine_, Deconvolution, *inputs, n_input, + TRT_ENGINE_ADD_LAYER(engine_, Deconvolution, *inputs, n_output, ksize, weight.get(), bias.get()); return layer; }, diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index 8de16df0a2f..f72ae2c3ec2 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -109,6 +109,12 @@ class OpConverter { it, platform::errors::Unimplemented("no OpConverter for optype [%s]", op_desc.Type())); } + if (op_desc.Type() == "depthwise_conv2d_transpose") { + it = Registry::Global().Lookup("conv2d_transpose"); + PADDLE_ENFORCE_NOT_NULL( + it, platform::errors::Unimplemented("no OpConverter for optype [%s]", + op_desc.Type())); + } if (op_desc.Type() == "transpose2") { it = Registry::Global().Lookup("transpose"); PADDLE_ENFORCE_NOT_NULL( diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index c8dfc169535..48c7b7fdd0d 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -102,6 +102,7 @@ struct SimpleOpTypeSetTeller : public Teller { "dropout", "prelu", "conv2d_transpose", + "depthwise_conv2d_transpose", "leaky_relu", "fc", "shuffle_channel", @@ -172,7 +173,8 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, } if (op_type == "conv2d" || op_type == "conv2d_transpose" || - op_type == "conv2d_fusion") { + op_type == "conv2d_fusion" || op_type == "depthwise_conv2d" || + op_type == "depthwise_conv2d_transpose") { std::vector paddings = BOOST_GET_CONST(std::vector, desc.GetAttr("paddings")); @@ -202,7 +204,8 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, } } - if (op_type == "conv2d_transpose") { + if (op_type == "conv2d_transpose" || + op_type == "depthwise_conv2d_transpose") { if (!desc.HasAttr("dilations")) { return false; } else { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py index 0821b390e5e..ec3955a9ae1 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py @@ -96,6 +96,7 @@ class TensorRTSubgraphPassConvTransposeTest(InferencePassTest): groups=self.conv_groups, padding=self.conv_padding, bias_attr=False, + use_cudnn=self.use_cudnn, act=None) self.feeds = { "data": np.random.random([1, 6, 64, 64]).astype("float32"), @@ -110,6 +111,7 @@ class TensorRTSubgraphPassConvTransposeTest(InferencePassTest): self.conv_filter_size = 6 self.conv_groups = 1 self.conv_padding = [1, 1] + self.use_cudnn = True def test_check_output(self): if core.is_compiled_with_cuda(): @@ -126,6 +128,7 @@ class TensorRTSubgraphPassConvTransposeValidPaddingTest( self.conv_filter_size = 6 self.conv_groups = 1 self.conv_padding = 'VALID' + self.use_cudnn = True class TensorRTSubgraphPassConvTransposeSamePaddingTest( @@ -135,15 +138,27 @@ class TensorRTSubgraphPassConvTransposeSamePaddingTest( self.conv_filter_size = 6 self.conv_groups = 1 self.conv_padding = 'SAME' + self.use_cudnn = True -class TensorRTSubgraphPassDepthwiseConvTransposeTest( +class TensorRTSubgraphPassConvTransposeMultiGroupTest( TensorRTSubgraphPassConvTransposeTest): def set_params(self): self.conv_num_filters = 6 self.conv_filter_size = 6 - self.conv_groups = 1 + self.conv_groups = 2 + self.conv_padding = [1, 1] + self.use_cudnn = True + + +class TensorRTSubgraphPassDepthwiseConvTransposeTest( + TensorRTSubgraphPassConvTransposeTest): + def set_params(self): + self.conv_num_filters = 6 + self.conv_filter_size = 4 + self.conv_groups = 6 self.conv_padding = [1, 1] + self.use_cudnn = False if __name__ == "__main__": -- GitLab From a08a118dbf02dfab1d7b90f86caf5741202458d6 Mon Sep 17 00:00:00 2001 From: xiemoyuan <71377852+xiemoyuan@users.noreply.github.com> Date: Tue, 27 Apr 2021 12:58:44 +0800 Subject: [PATCH 021/720] Support list and tuple for args. (#32344) * Support list and tuple for parameters of layer_norm, multiprocess_reader, DatasetFolder and ImageFolder. * add unittest for layer_norm. * add require gpu for example. --- python/paddle/distributed/collective.py | 4 +- .../tests/unittests/test_layer_norm_op_v2.py | 55 +++++++++++++++++++ python/paddle/nn/functional/norm.py | 10 ++++ python/paddle/reader/decorator.py | 3 +- python/paddle/vision/datasets/folder.py | 9 ++- python/paddle/vision/ops.py | 2 +- 6 files changed, 77 insertions(+), 6 deletions(-) diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index 7fb9e1d0455..f4562924af5 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -1080,10 +1080,12 @@ def split(x, import paddle from paddle.distributed import init_parallel_env + # required: gpu + paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) init_parallel_env() data = paddle.randint(0, 8, shape=[10,4]) - emb_out = padle.distributed.split( + emb_out = paddle.distributed.split( data, (8, 8), operation="embedding", diff --git a/python/paddle/fluid/tests/unittests/test_layer_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_layer_norm_op_v2.py index f324e4bd377..77cd6926b56 100644 --- a/python/paddle/fluid/tests/unittests/test_layer_norm_op_v2.py +++ b/python/paddle/fluid/tests/unittests/test_layer_norm_op_v2.py @@ -82,5 +82,60 @@ class TestDygraphLayerNormv2(unittest.TestCase): self.assertTrue(np.allclose(y1, y2)) +class TestLayerNormFunction(unittest.TestCase): + def test_dygraph(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda() and core.op_support_gpu("layer_norm"): + places.append(fluid.CUDAPlace(0)) + for p in places: + shape = [4, 10, 4, 4] + + def compute_v0(x): + with fluid.dygraph.guard(p): + ln = fluid.dygraph.LayerNorm(shape[1:]) + y = ln(fluid.dygraph.to_variable(x)) + return y.numpy() + + def compute_v1(x): + with fluid.dygraph.guard(p): + x = fluid.dygraph.to_variable(x) + y = paddle.nn.functional.layer_norm(x, shape[1:]) + return y.numpy() + + def compute_v2(x): + with fluid.dygraph.guard(p): + x = fluid.dygraph.to_variable(x) + y = paddle.nn.functional.layer_norm(x, tuple(shape[1:])) + return y.numpy() + + def compute_v3(x): + with fluid.dygraph.guard(p): + ln = fluid.dygraph.LayerNorm(shape[-1]) + y = ln(fluid.dygraph.to_variable(x)) + return y.numpy() + + def compute_v4(x): + with fluid.dygraph.guard(p): + x = fluid.dygraph.to_variable(x) + y = paddle.nn.functional.layer_norm(x, shape[-1]) + return y.numpy() + + x = np.random.randn(*shape).astype("float32") + y0 = compute_v0(x) + y1 = compute_v1(x) + y2 = compute_v2(x) + self.assertTrue(np.allclose(y0, y1)) + self.assertTrue(np.allclose(y0, y2)) + y3 = compute_v3(x) + y4 = compute_v4(x) + self.assertTrue(np.allclose(y3, y4)) + + self.assertRaises( + ValueError, + paddle.nn.functional.layer_norm, + x=x, + normalized_shape=1.0) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py index e6971b3781c..73df03e3714 100644 --- a/python/paddle/nn/functional/norm.py +++ b/python/paddle/nn/functional/norm.py @@ -23,6 +23,8 @@ from ...fluid.initializer import Constant from ...fluid.param_attr import ParamAttr from ...fluid import core, dygraph_utils +import numbers + __all__ = [ 'batch_norm', # 'data_norm', @@ -289,6 +291,14 @@ def layer_norm(x, """ input_shape = list(x.shape) input_ndim = len(input_shape) + if isinstance(normalized_shape, numbers.Integral): + normalized_shape = [normalized_shape] + elif isinstance(normalized_shape, tuple): + normalized_shape = list(normalized_shape) + elif not isinstance(normalized_shape, list): + raise ValueError( + "`normalized_shape` should be int, list of ints or tuple of ints.") + normalized_ndim = len(normalized_shape) begin_norm_axis = input_ndim - normalized_ndim if input_ndim < normalized_ndim or input_shape[ diff --git a/python/paddle/reader/decorator.py b/python/paddle/reader/decorator.py index 4e1c3827d38..0aefcf9e683 100644 --- a/python/paddle/reader/decorator.py +++ b/python/paddle/reader/decorator.py @@ -588,7 +588,8 @@ def multiprocess_reader(readers, use_pipe=True, queue_size=1000): sys.stderr.write("import ujson error: " + str(e) + " use json\n") import json - assert type(readers) is list and len(readers) > 0 + assert isinstance(readers, (list, tuple)) and len(readers) > 0, ( + "`readers` must be list or tuple.") def _read_into_queue(reader, queue): try: diff --git a/python/paddle/vision/datasets/folder.py b/python/paddle/vision/datasets/folder.py index 06a55b71808..718af041307 100644 --- a/python/paddle/vision/datasets/folder.py +++ b/python/paddle/vision/datasets/folder.py @@ -28,11 +28,14 @@ def has_valid_extension(filename, extensions): Args: filename (str): path to a file - extensions (tuple of str): extensions to consider (lowercase) + extensions (list[str]|tuple[str]): extensions to consider Returns: bool: True if the filename ends with one of given extensions """ + assert isinstance(extensions, + (list, tuple)), ("`extensions` must be list or tuple.") + extensions = tuple([x.lower() for x in extensions]) return filename.lower().endswith(extensions) @@ -73,7 +76,7 @@ class DatasetFolder(Dataset): Args: root (string): Root directory path. loader (callable|optional): A function to load a sample given its path. - extensions (tuple[str]|optional): A list of allowed extensions. + extensions (list[str]|tuple[str]|optional): A list of allowed extensions. both extensions and is_valid_file should not be passed. transform (callable|optional): A function/transform that takes in a sample and returns a transformed version. @@ -226,7 +229,7 @@ class ImageFolder(Dataset): Args: root (string): Root directory path. loader (callable, optional): A function to load a sample given its path. - extensions (tuple[string], optional): A list of allowed extensions. + extensions (list[str]|tuple[str], optional): A list of allowed extensions. both extensions and is_valid_file should not be passed. transform (callable, optional): A function/transform that takes in a sample and returns a transformed version. diff --git a/python/paddle/vision/ops.py b/python/paddle/vision/ops.py index 079aa086f2b..005e2b12307 100644 --- a/python/paddle/vision/ops.py +++ b/python/paddle/vision/ops.py @@ -336,7 +336,7 @@ def yolo_box(x, import paddle import numpy as np - x = np.random.random([2, 14, 8, 8]).astype('float32') + x = np.random.random([2, 14, 8, 8]).astype('float32') img_size = np.ones((2, 2)).astype('int32') x = paddle.to_tensor(x) -- GitLab From 97794eca9200515bbe4e771ebcf2e048d13500ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=20Wei=20=28=E4=BB=BB=E5=8D=AB=29?= Date: Tue, 27 Apr 2021 13:10:07 +0800 Subject: [PATCH 022/720] str in python2 is different to python3's, it make mistakes for some api's docstring (#32588) * UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 1788: ordinal not in range(128) test=document_fix str(doc) in python2 test=document_fix * update md5 function in count_api_without_core_ops.py str in py2 is different. test=document_fix --- tools/count_api_without_core_ops.py | 18 +++++++++++++++--- tools/print_signatures.py | 18 +++++++++++++++--- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/tools/count_api_without_core_ops.py b/tools/count_api_without_core_ops.py index 99e84074158..664b94a059f 100644 --- a/tools/count_api_without_core_ops.py +++ b/tools/count_api_without_core_ops.py @@ -22,6 +22,7 @@ import pydoc import hashlib import six import functools +import platform __all__ = ['get_apis_with_and_without_core_ops', ] @@ -34,9 +35,20 @@ omitted_list = [ def md5(doc): - hash = hashlib.md5() - hash.update(str(doc).encode('utf-8')) - return hash.hexdigest() + try: + hashinst = hashlib.md5() + if platform.python_version()[0] == "2": + hashinst.update(str(doc)) + else: + hashinst.update(str(doc).encode('utf-8')) + md5sum = hashinst.hexdigest() + except UnicodeDecodeError as e: + md5sum = None + print( + "Error({}) occurred when `md5({})`, discard it.".format( + str(e), doc), + file=sys.stderr) + return md5sum def split_with_and_without_core_ops(member, cur_name): diff --git a/tools/print_signatures.py b/tools/print_signatures.py index cfe34fa3426..6de9d84379f 100644 --- a/tools/print_signatures.py +++ b/tools/print_signatures.py @@ -34,9 +34,21 @@ visited_modules = set() def md5(doc): - hash = hashlib.md5() - hash.update(str(doc).encode('utf-8')) - return hash.hexdigest() + try: + hashinst = hashlib.md5() + if platform.python_version()[0] == "2": + hashinst.update(str(doc)) + else: + hashinst.update(str(doc).encode('utf-8')) + md5sum = hashinst.hexdigest() + except UnicodeDecodeError as e: + md5sum = None + print( + "Error({}) occurred when `md5({})`, discard it.".format( + str(e), doc), + file=sys.stderr) + + return md5sum def get_functools_partial_spec(func): -- GitLab From 23d3e36a376c4c910ad35342c7f6c4557ca2e161 Mon Sep 17 00:00:00 2001 From: Guanghua Yu <742925032@qq.com> Date: Tue, 27 Apr 2021 13:18:22 +0800 Subject: [PATCH 023/720] fix cross_entropy calculation error (#32545) * fix cross_entropy calculation error * add unittest and fix static --- .../unittests/test_cross_entropy_loss.py | 47 +++++++++++++++++-- python/paddle/nn/functional/loss.py | 12 ++--- 2 files changed, 49 insertions(+), 10 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py index ea44e23da24..897d76a35dc 100644 --- a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py +++ b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py @@ -59,8 +59,8 @@ def cross_entropy_loss_1d(input, if reduction == 'sum': return np.sum(out), np.array([total_weight]).astype('float64') elif reduction == 'mean': - return out.sum() / total_weight, np.array( - [total_weight]).astype('float64') + out = out.sum() / total_weight if total_weight != 0 else out.sum() + return out, np.array([total_weight]).astype('float64') elif reduction == 'none': return out @@ -92,8 +92,8 @@ def cross_entropy_loss_2d(input, if reduction == 'sum': return np.sum(out), np.array([total_weight]).astype('float64') elif reduction == 'mean': - return out.sum() / total_weight, np.array( - [total_weight]).astype('float64') + out = out.sum() / total_weight if total_weight != 0 else out.sum() + return out, np.array([total_weight]).astype('float64') elif reduction == 'none': return out @@ -759,6 +759,45 @@ class CrossEntropyLoss(unittest.TestCase): self.assertTrue(np.allclose(static_ret, expected)) self.assertTrue(np.allclose(dy_ret_value, expected)) + def test_cross_entropy_loss_1d_with_mean_ignore_negative(self): + N = 100 + C = 200 + input_np = np.random.random([N, C]).astype(self.dtype) + label_np = -np.ones((N)).astype(np.int64) + paddle.enable_static() + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data(name='input', shape=[N, C], dtype=self.dtype) + label = fluid.data(name='label', shape=[N], dtype='int64') + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + ignore_index=-1) + ret = cross_entropy_loss(input, label) + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + + with fluid.dygraph.guard(): + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + axis=1, ignore_index=-1) + dy_ret = cross_entropy_loss( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_1d(input_np, label_np, ignore_index=-1)[0] + + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + def test_cross_entropy_loss_1d_with_weight_mean_ignore(self): N = 100 C = 200 diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index 6eb316ceeb8..ca0ad06532d 100755 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -1454,20 +1454,20 @@ def cross_entropy(input, if weight is None: mask = paddle.cast(mask, dtype=out_sum.dtype) count = core.ops.reduce_sum(mask, 'reduce_all', True) - ret = out_sum / count + ret = out_sum / (count + (count == 0.0)) else: mask = paddle.cast(mask, weight_gather_reshape.dtype) weight_ignored = core.ops.elementwise_mul( mask, weight_gather_reshape) weight_sum = core.ops.reduce_sum(weight_ignored, 'reduce_all', True) - ret = out_sum / weight_sum + ret = out_sum / (weight_sum + (weight_sum == 0.0)) return ret elif weight is not None: out_sum = core.ops.reduce_sum(out, 'reduce_all', True) total_weight = core.ops.reduce_sum(weight_gather_reshape, 'reduce_all', True) - return out_sum / total_weight + return out_sum / (total_weight + (total_weight == 0.0)) else: return core.ops.mean(out) @@ -1537,17 +1537,17 @@ def cross_entropy(input, if (weight is None): mask = paddle.cast(mask, dtype=out_sum.dtype) count = paddle.sum(mask, name=name) - ret = out_sum / count + ret = out_sum / (count + (count == 0.0)) else: mask = paddle.cast(mask, weight_gather_reshape.dtype) weight_ignored = paddle.multiply(mask, weight_gather_reshape) weight_sum = paddle.sum(weight_ignored, name=name) - ret = out_sum / weight_sum + ret = out_sum / (weight_sum + (weight_sum == 0.0)) return ret elif weight is not None: out_sum = paddle.sum(out, name=name) total_weight = paddle.sum(weight_gather_reshape) - return out_sum / total_weight + return out_sum / (total_weight + (total_weight == 0.0)) else: return paddle.mean(out, name=name) -- GitLab From 1515892766fb6255562964ed5d669b0343905dea Mon Sep 17 00:00:00 2001 From: xiemoyuan <71377852+xiemoyuan@users.noreply.github.com> Date: Tue, 27 Apr 2021 13:41:05 +0800 Subject: [PATCH 024/720] [Docs] Modified the docs of some api for supporting list/tuple args. (#32360) * fixed docs. * Fixed docs. test=document_fix code bak. fixed docs. test=document_fix * Revert to previous version of python/paddle/fluid/backward.py * fixed bugs. * test=document_fix. Fixed examples. --- python/paddle/amp/auto_cast.py | 4 +- python/paddle/distributed/collective.py | 4 +- .../fleet/base/private_helper_function.py | 2 +- python/paddle/distributed/spawn.py | 2 +- python/paddle/fluid/dataloader/dataset.py | 2 +- python/paddle/fluid/framework.py | 4 +- python/paddle/fluid/io.py | 4 +- python/paddle/fluid/layers/tensor.py | 2 +- python/paddle/framework/random.py | 2 +- python/paddle/metric/metrics.py | 2 +- python/paddle/nn/functional/common.py | 4 +- python/paddle/nn/functional/conv.py | 50 +++++++++---------- python/paddle/nn/layer/common.py | 6 +-- python/paddle/nn/layer/conv.py | 41 +++++++-------- python/paddle/nn/layer/rnn.py | 4 +- python/paddle/nn/layer/transformer.py | 24 ++++----- python/paddle/optimizer/adadelta.py | 16 +++--- python/paddle/optimizer/adagrad.py | 20 ++++---- python/paddle/optimizer/adam.py | 20 ++++---- python/paddle/optimizer/adamax.py | 20 ++++---- python/paddle/optimizer/adamw.py | 6 +-- python/paddle/optimizer/lr.py | 4 +- python/paddle/optimizer/momentum.py | 14 +++--- python/paddle/optimizer/optimizer.py | 2 +- python/paddle/optimizer/rmsprop.py | 20 ++++---- python/paddle/optimizer/sgd.py | 14 +++--- python/paddle/static/nn/common.py | 10 ++-- python/paddle/tensor/manipulation.py | 4 +- python/paddle/tensor/math.py | 6 +-- python/paddle/vision/ops.py | 12 ++--- python/paddle/vision/transforms/functional.py | 4 +- .../vision/transforms/functional_cv2.py | 4 +- .../vision/transforms/functional_pil.py | 4 +- python/paddle/vision/transforms/transforms.py | 12 ++--- 34 files changed, 174 insertions(+), 175 deletions(-) diff --git a/python/paddle/amp/auto_cast.py b/python/paddle/amp/auto_cast.py index 441bc31b936..b83f81b27d1 100644 --- a/python/paddle/amp/auto_cast.py +++ b/python/paddle/amp/auto_cast.py @@ -28,10 +28,10 @@ def auto_cast(enable=True, custom_white_list=None, custom_black_list=None): Args: enable(bool, optional): Enable auto-mixed-precision or not. Default is True. - custom_white_list(set|list, optional): The custom white_list. It's the set of ops that support + custom_white_list(set|list|tuple, optional): The custom white_list. It's the set of ops that support fp16 calculation and are considered numerically-safe and performance-critical. These ops will be converted to fp16. - custom_black_list(set|list, optional): The custom black_list. The set of ops that support fp16 + custom_black_list(set|list|tuple, optional): The custom black_list. The set of ops that support fp16 calculation and are considered numerically-dangerous and whose effects may also be observed in downstream ops. These ops will not be converted to fp16. diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index f4562924af5..69a8f8956a8 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -662,7 +662,7 @@ def scatter(tensor, tensor_list=None, src=0, group=None, use_calc_stream=True): Args: tensor (Tensor): The output Tensor. Its data type should be float16, float32, float64, int32 or int64. - tensor_list (list): A list of Tensors to scatter. Every element in the list must be a Tensor whose data type + tensor_list (list|tuple): A list/tuple of Tensors to scatter. Every element in the list must be a Tensor whose data type should be float16, float32, float64, int32 or int64. Default value is None. src (int): The source rank id. Default value is 0. group (Group): The group instance return by new_group or None for global default group. @@ -679,6 +679,8 @@ def scatter(tensor, tensor_list=None, src=0, group=None, use_calc_stream=True): import paddle from paddle.distributed import init_parallel_env + # required: gpu + paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) init_parallel_env() if paddle.distributed.ParallelEnv().local_rank == 0: diff --git a/python/paddle/distributed/fleet/base/private_helper_function.py b/python/paddle/distributed/fleet/base/private_helper_function.py index 6b3232b93b2..6af4a9e6675 100644 --- a/python/paddle/distributed/fleet/base/private_helper_function.py +++ b/python/paddle/distributed/fleet/base/private_helper_function.py @@ -24,7 +24,7 @@ def wait_server_ready(endpoints): port readiness. Args: - endpoints (list): endpoints string list, like: + endpoints (list|tuple): endpoints string list, like: ["127.0.0.1:8080", "127.0.0.1:8081"] Examples: diff --git a/python/paddle/distributed/spawn.py b/python/paddle/distributed/spawn.py index bf49604a897..782fcb28e99 100644 --- a/python/paddle/distributed/spawn.py +++ b/python/paddle/distributed/spawn.py @@ -325,7 +325,7 @@ def spawn(func, args=(), nprocs=-1, join=True, daemon=False, **options): func (function): The target function is called by spawned process. This function need to be able to pickled, so it must be defined at the top level of a module. - args (tuple, optional): Arguments passed to ``func``. + args (list|tuple, optional): Arguments passed to ``func``. nprocs (int, optional): Number of processed to start. Default: -1. when nprocs is -1, the available device will be obtained from the environment variable when the model is executed: If use GPU, diff --git a/python/paddle/fluid/dataloader/dataset.py b/python/paddle/fluid/dataloader/dataset.py index bf3d0a81f99..3578e27cf02 100755 --- a/python/paddle/fluid/dataloader/dataset.py +++ b/python/paddle/fluid/dataloader/dataset.py @@ -233,7 +233,7 @@ class TensorDataset(Dataset): each sample by indexing tensors in the 1st dimension. Args: - tensors(list of Tensor): tensors with same shape in the 1st dimension. + tensors(list|tuple): A list/tuple of tensors with same shape in the 1st dimension. Returns: Dataset: a Dataset instance wrapping tensors. diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 59e22f24f33..a280667d03d 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -418,7 +418,7 @@ def cuda_places(device_ids=None): [paddle.CUDAPlace(0), paddle.CUDAPlace(1), paddle.CUDAPlace(2)]. Parameters: - device_ids (list or tuple of int, optional): list of GPU device ids. + device_ids (list|tuple, optional): A list/tuple of int of GPU device ids. Returns: list of paddle.CUDAPlace: Created GPU place list. @@ -429,6 +429,8 @@ def cuda_places(device_ids=None): import paddle import paddle.static as static + # required: gpu + paddle.enable_static() cuda_places = static.cuda_places() diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 768248e136b..30baa2aa26c 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -1913,7 +1913,7 @@ def load(program, model_path, executor=None, var_list=None): model_path(str): The file prefix store the program executor(Executor, optional): The executor used for initialize the parameter When startup program is not run. - var_list(list, optional): The Tensor list to load single model file saved with + var_list(list|tuple, optional): The Tensor list/tuple to load single model file saved with [ save_params, save_persistables, save_vars ]. Default: None @@ -2103,7 +2103,7 @@ def load_program_state(model_path, var_list=None): Args: model_path(str): The file prefix store the program - var_list(list, optional): The Tensor list to load saved with + var_list(list|tuple, optional): The Tensor list/tuple to load saved with [ save_params, save_persistables, save_vars ]. Default: None. The var_list is only used to get name, diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 3e2c06f69cf..a7ec339bf74 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -148,7 +148,7 @@ def create_global_var(shape, This function creates a new tensor variable with value in the global block(block 0). Parameters: - shape (list of int): Shape of the variable + shape (list[int]|tuple[int]): Shape of the variable value (float): The value of the variable. The new created variable will be filled with it. dtype (str): Data type of the variable diff --git a/python/paddle/framework/random.py b/python/paddle/framework/random.py index cce95137436..251a8407035 100644 --- a/python/paddle/framework/random.py +++ b/python/paddle/framework/random.py @@ -81,7 +81,7 @@ def set_cuda_rng_state(state_list): Sets generator state for all cuda generators Args: - state_list(list): The cuda states to set back to cuda generators. state_list is obtained from get_cuda_rng_state(). + state_list(list|tuple): The cuda states to set back to cuda generators. state_list is obtained from get_cuda_rng_state(). Returns: None diff --git a/python/paddle/metric/metrics.py b/python/paddle/metric/metrics.py index b939f548e9c..61d1eb0e373 100644 --- a/python/paddle/metric/metrics.py +++ b/python/paddle/metric/metrics.py @@ -182,7 +182,7 @@ class Accuracy(Metric): Encapsulates accuracy metric logic. Args: - topk (int|tuple(int)): Number of top elements to look at + topk (int|list[int]|tuple[int]): Number of top elements to look at for computing accuracy. Default is (1,). name (str, optional): String name of the metric instance. Default is `acc`. diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index 5e8dc15cb4a..1cc8ef6c39b 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -207,7 +207,7 @@ def interpolate(x, size (list|tuple|Tensor|None): Output shape of image resize layer, the shape is (out_w, ) when input is a 3-D Tensor, the shape is (out_h, out_w) when input is a 4-D Tensor and is (out_d, out_h, out_w) when input is a 5-D Tensor. - Default: None. If a list, each element can be an integer or a Tensor of shape: [1]. + Default: None. If a list/tuple, each element can be an integer or a Tensor of shape: [1]. If a Tensor, its dimensions size should be a 1. scale_factor (float|Tensor|list|tuple|None): The multiplier for the input height or width. At least one of :attr:`size` or :attr:`scale_factor` must be set. @@ -638,7 +638,7 @@ def upsample(x, size (list|tuple|Tensor|None): Output shape of image resize layer, the shape is (out_w, ) when input is a 3-D Tensor, the shape is (out_h, out_w) when input is a 4-D Tensor and is (out_d, out_h, out_w) when input is a 5-D Tensor. - Default: None. If a list, each element can be an integer or a Tensor of shape: [1]. + Default: None. If a list/tuple, each element can be an integer or a Tensor of shape: [1]. If a Tensor , its dimensions size should be a 1. scale_factor (float|Tensor|list|tuple|None): The multiplier for the input height or width. At least one of :attr:`size` or :attr:`scale_factor` must be set. diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py index 5263d54045e..a8d6a6cc38d 100644 --- a/python/paddle/nn/functional/conv.py +++ b/python/paddle/nn/functional/conv.py @@ -218,7 +218,7 @@ def conv1d(x, weight (Tensor): The convolution kernel with shape [M, C/g, K], where M is the number of output channels, g is the number of groups, K is the kernel's size. bias (Tensor, optional): The bias with shape [M,]. Default: None. - stride (int or tuple, optional): The stride size. If stride is a tuple, it must + stride (int|list|tuple, optional): The stride size. If stride is a list/tuple, it must contain one integers, (stride_size). Default: 1. padding(int|str|tuple|list, optional): The padding size. Padding could be in one of the following forms. 1. a string in ['valid', 'same']. @@ -227,7 +227,7 @@ def conv1d(x, 4. a list[int] or tuple[int] whose length is 2. It has the form [pad_before, pad_after]. 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). The default value is 0. - dilation (int or tuple, optional): The dilation size. If dilation is a tuple, it must + dilation (int|list|tuple, optional): The dilation size. If dilation is a list/tuple, it must contain one integer, (dilation_size). Default: 1. groups (int, optional): The groups number of the conv1d function. According to grouped convolution in Alex Krizhevsky's Deep CNN paper: when group=2, @@ -250,7 +250,7 @@ def conv1d(x, ValueError: If the channel dimension of the input is less than or equal to zero. ValueError: If `data_format` is not "NCL" or "NLC". ValueError: If `padding` is a string, but not "SAME" or "VALID". - ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 + ValueError: If `padding` is a list/tuple, but the element corresponding to the input's batch size is not 0 or the element corresponding to the input's channel is not 0. ShapeError: If the input is not 3-D Tensor. ShapeError: If the input's dimension size and filter's dimension size not equal. @@ -451,8 +451,8 @@ def conv2d(x, the number of output channels, g is the number of groups, kH is the filter's height, kW is the filter's width. bias (Tensor, optional): The bias with shape [M,]. - stride (int|tuple): The stride size. It means the stride in convolution. - If stride is a tuple, it must contain two integers, (stride_height, stride_width). + stride (int|list|tuple): The stride size. It means the stride in convolution. + If stride is a list/tuple, it must contain two integers, (stride_height, stride_width). Otherwise, stride_height = stride_width = stride. Default: stride = 1. padding (string|int|list|tuple): The padding size. It means the number of zero-paddings on both sides for each dimension.If `padding` is a string, either 'VALID' or @@ -464,8 +464,8 @@ def conv2d(x, when `data_format` is `"NHWC"`, `padding` can be in the form `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. - dilation (int|tuple): The dilation size. It means the spacing between the kernel - points. If dilation is a tuple, it must contain two integers, (dilation_height, + dilation (int|list|tuple): The dilation size. It means the spacing between the kernel + points. If dilation is a list/tuple, it must contain two integers, (dilation_height, dilation_width). Otherwise, dilation_height = dilation_width = dilation. Default: dilation = 1. groups (int): The groups number of the Conv2D Layer. According to grouped @@ -488,7 +488,7 @@ def conv2d(x, ValueError: If `data_format` is not "NCHW" or "NHWC". ValueError: If the channel dimension of the input is less than or equal to zero. ValueError: If `padding` is a string, but not "SAME" or "VALID". - ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 + ValueError: If `padding` is a list/tuple, but the element corresponding to the input's batch size is not 0 or the element corresponding to the input's channel is not 0. ShapeError: If the input is not 4-D Tensor. ShapeError: If the input's dimension size and filter's dimension size not equal. @@ -637,7 +637,7 @@ def conv1d_transpose(x, K is the size of the kernel. bias(Tensor, optional): The bias, a Tensor with shape [M, ]. stride(int|tuple|list, optional): The stride size. It means the stride in transposed convolution. - If stride is a tuple, it must contain one integer, `(stride_size)`. + If stride is a list/tuple, it must contain one integer, `(stride_size)`. Default: stride = 1. padding(int|list|str|tuple, optional): The padding size. The padding argument effectively adds `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a @@ -645,7 +645,7 @@ def conv1d_transpose(x, If `padding` is a tuple or list, it could be in two forms: `[pad]` or `[pad_left, pad_right]`. Default: padding = 0. output_padding(int|list|tuple, optional): The count of zeros to be added to tail of each dimension. - If it is a tuple, it must contain one integer. Default: 0. + If it is a list/tuple, it must contain one integer. Default: 0. groups(int, optional): The groups number of the conv1d transpose function. Inspired by grouped convolution in Alex Krizhevsky's Deep CNN paper, in which when group=2, the first half of the filters is only connected to the @@ -653,10 +653,10 @@ def conv1d_transpose(x, filters is only connected to the second half of the input channels. Default: groups = 1. dilation(int|tuple|list, optional): The dilation size. It means the spacing between the kernel points. - If dilation is a tuple, it must contain one integer, `(dilation_size)`. + If dilation is a list/tuple, it must contain one integer, `(dilation_size)`. Default: dilation = 1. output_size(int|tuple|list, optional): The output image size. If output size is a - tuple, it must contain one integer, `(feature_length)`. None if use + tuple/list, it must contain one integer, `(feature_length)`. None if use filter_size(shape of weight), padding, and stride to calculate output_size. data_format (str, optional): Specify the data format of the input, and the data format of the output will be consistent with that of the input. An optional string from: `"NCL"`, `"NLC"`. @@ -675,7 +675,7 @@ def conv1d_transpose(x, Raises: ValueError: If `data_format` is a string, but not "NCL" or "NLC". ValueError: If `padding` is a string, but not "SAME" or "VALID". - ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 + ValueError: If `padding` is a list/tuple, but the element corresponding to the input's batch size is not 0 or the element corresponding to the input's channel is not 0. ValueError: If `output_size` and filter_size are None at the same time. ValueError: If `output_padding` is greater than `stride`. @@ -900,7 +900,7 @@ def conv2d_transpose(x, kH is the height of the kernel, and kW is the width of the kernel. bias(Tensor, optional): The bias, a Tensor with shape [M, ]. stride(int|list|tuple, optional): The stride size. It means the stride in transposed convolution. - If stride is a tuple, it must contain two integers, (stride_height, stride_width). + If stride is a list/tuple, it must contain two integers, (stride_height, stride_width). Otherwise, stride_height = stride_width = stride. Default: stride = 1. padding(str|int|list|tuple, optional): The padding size. It means the number of zero-paddings on both sides for each dimension. If `padding` is a string, either 'VALID' or @@ -921,10 +921,10 @@ def conv2d_transpose(x, filters is only connected to the second half of the input channels. Default: groups = 1. dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points. - If dilation is a tuple, it must contain two integers, (dilation_height, dilation_width). + If dilation is a list/tuple, it must contain two integers, (dilation_height, dilation_width). Otherwise, dilation_height = dilation_width = dilation. Default: dilation = 1. output_size(int|tuple|list, optional): The output image size. If output size is a - tuple, it must contain two integers, (image_height, image_width). None if use + tuple/list, it must contain two integers, (image_height, image_width). None if use filter_size(shape of weight), padding, and stride to calculate output_size. data_format (str, optional): Specify the data format of the input, and the data format of the output will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`. @@ -943,7 +943,7 @@ def conv2d_transpose(x, Raises: ValueError: If `data_format` is not "NCHW" or "NHWC". ValueError: If `padding` is a string, but not "SAME" or "VALID". - ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 + ValueError: If `padding` is a list/tuple, but the element corresponding to the input's batch size is not 0 or the element corresponding to the input's channel is not 0. ValueError: If `output_size` and kernel_size are None at the same time. ShapeError: If the input is not 4-D Tensor. @@ -1120,8 +1120,8 @@ def conv3d(x, where M is the number of filters(output channels), g is the number of groups, kD, kH, kW are the filter's depth, height and width respectively. bias (Tensor, optional): The bias, a Tensor of shape [M, ]. - stride (int|tuple): The stride size. It means the stride in convolution. If stride is a - tuple, it must contain three integers, (stride_depth, stride_height, stride_width). + stride (int|list|tuple): The stride size. It means the stride in convolution. If stride is a + list/tuple, it must contain three integers, (stride_depth, stride_height, stride_width). Otherwise, stride_depth = stride_height = stride_width = stride. Default: stride = 1. padding (string|int|list|tuple): The padding size. It means the number of zero-paddings on both sides for each dimension. If `padding` is a string, either 'VALID' or @@ -1133,8 +1133,8 @@ def conv3d(x, when `data_format` is `"NDHWC"`, `padding` can be in the form `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. - dilation (int|tuple): The dilation size. It means the spacing between the kernel points. - If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height, + dilation (int|list|tuple): The dilation size. It means the spacing between the kernel points. + If dilation is a list/tuple, it must contain three integers, (dilation_depth, dilation_height, dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation. Default: dilation = 1. groups (int): The groups number of the Conv3D Layer. According to grouped @@ -1292,7 +1292,7 @@ def conv3d_transpose(x, kD, kH, kW are the filter's depth, height and width respectively. bias (Tensor, optional): The bias, a Tensor of shape [M, ]. stride(int|list|tuple, optional): The stride size. It means the stride in transposed convolution. - If stride is a tuple, it must contain three integers, (stride_depth, stride_height, + If stride is a list/tuple, it must contain three integers, (stride_depth, stride_height, stride_width). Otherwise, stride_depth = stride_height = stride_width = stride. Default: stride = 1. padding (string|int|list|tuple, optional): The padding size. It means the number of zero-paddings @@ -1314,11 +1314,11 @@ def conv3d_transpose(x, filters is only connected to the second half of the input channels. Default: groups=1 dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points. - If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height, + If dilation is a list/tuple, it must contain three integers, (dilation_depth, dilation_height, dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation. Default: dilation = 1. output_size(int|list|tuple, optional): The output image size. If output size is a - tuple, it must contain three integers, (image_depth, image_height, image_width). + list/tuple, it must contain three integers, (image_depth, image_height, image_width). None if use filter_size(shape of weight), padding, and stride to calculate output_size. data_format (str, optional): Specify the data format of the input, and the data format of the output will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`. @@ -1338,7 +1338,7 @@ def conv3d_transpose(x, Raises: ValueError: If `data_format` is not "NCDHW" or "NDHWC". ValueError: If `padding` is a string, but not "SAME" or "VALID". - ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 + ValueError: If `padding` is a list/tuple, but the element corresponding to the input's batch size is not 0 or the element corresponding to the input's channel is not 0. ValueError: If `output_size` and kernel_size are None at the same time. ShapeError: If the input is not 5-D Tensor. diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py index db0a5a5cab3..8c001793715 100644 --- a/python/paddle/nn/layer/common.py +++ b/python/paddle/nn/layer/common.py @@ -300,7 +300,7 @@ class Upsample(layers.Layer): size (list|tuple|Tensor|None): Output shape of image resize layer, the shape is (out_w, ) when input is a 3-D Tensor, the shape is (out_h, out_w) when input is a 4-D Tensor and is (out_d, out_h, out_w) when input is a 5-D Tensor. - Default: None. If a list, each element can be an integer or a Tensor of shape: [1]. + Default: None. If a list/tuple, each element can be an integer or a Tensor of shape: [1]. If a Tensor , its dimensions size should be a 1. scale_factor (float|Tensor|list|tuple|None): The multiplier for the input height or width. At least one of :attr:`size` or :attr:`scale_factor` must be set. @@ -419,7 +419,7 @@ class UpsamplingNearest2D(layers.Layer): its data format is specified by :attr:`data_format`. size (list|tuple|Tensor|None): Output shape of image resize layer, the shape is (out_h, out_w) when input is a 4-D Tensor. - Default: None. If a list, each element can be an integer or a Tensor of shape: [1]. + Default: None. If a list/tuple, each element can be an integer or a Tensor of shape: [1]. If a Tensor , its dimensions size should be a 1. scale_factor (float|int|list|tuple|Tensor|None): The multiplier for the input height or width. At least one of :attr:`size` or :attr:`scale_factor` must be set. @@ -506,7 +506,7 @@ class UpsamplingBilinear2D(layers.Layer): its data format is specified by :attr:`data_format`. size (list|tuple|Tensor|None): Output shape of image resize layer, the shape is (out_h, out_w) when input is a 4-D Tensor. - Default: None. If a list, each element can be an integer or a Tensor of shape: [1]. + Default: None. If a list/tuple, each element can be an integer or a Tensor of shape: [1]. If a Tensor , its dimensions size should be a 1. scale_factor (float|int|list|tuple|Tensor|None): The multiplier for the input height or width. At least one of :attr:`size` or :attr:`scale_factor` must be set. diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py index b90421c2f8c..d6ba04dad04 100644 --- a/python/paddle/nn/layer/conv.py +++ b/python/paddle/nn/layer/conv.py @@ -232,16 +232,16 @@ class Conv1D(_ConvNd): in_channels(int): The number of channels in the input image. out_channels(int): The number of filter. It is as same as the output feature map. - kernel_size (int|tuple|list): The filter size. If kernel_size is a tuple, + kernel_size (int|tuple|list): The filter size. If kernel_size is a tuple/list, it must contain one integer, (kernel_size). - stride (int|tuple|list, optional): The stride size. If stride is a tuple, it must + stride (int|tuple|list, optional): The stride size. If stride is a tuple/list, it must contain one integer, (stride_size). Default: 1. padding(int|str|tuple|list, optional): The size of zeros to be padded. It must be in one of the following forms. 1. a string in ['valid', 'same']. 2. an int, which means the feature map is zero paded by size of `padding` on both sides. 3. a list[int] or tuple[int] whose length is 1, which means the feature map is zero paded by size of `padding[0]` on both sides. The default value is 0. - dilation (int|tuple|list, optional): The dilation size. If dilation is a tuple, it must + dilation (int|tuple|list, optional): The dilation size. If dilation is a tuple/list, it must contain one integer, (dilation_size). Default: 1. groups (int, optional): The groups number of the conv2d Layer. According to grouped convolution in Alex Krizhevsky's Deep CNN paper: when group=2, @@ -410,12 +410,12 @@ class Conv1DTranspose(_ConvNd): in_channels(int): The number of channels in the input image. out_channels(int): The number of the filter. It is as same as the output feature map. - kernel_size(int|tuple|list, optional): The filter size. If kernel_size is a tuple, + kernel_size(int|tuple|list, optional): The filter size. If kernel_size is a tuple/list, it must contain one integers, (kernel_size). None if use output size to calculate kernel_size. Default: None. kernel_size and output_size should not be None at the same time. stride(int|tuple|list, optional): The stride size. It means the stride in transposed convolution. - If stride is a tuple, it must contain one integer, (stride_size). + If stride is a tuple/list, it must contain one integer, (stride_size). Default: stride = 1. padding(int|list|str|tuple, optional): The padding size. The padding argument effectively adds `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a @@ -423,7 +423,7 @@ class Conv1DTranspose(_ConvNd): If `padding` is a tuple or list, it could be in two forms: `[pad]` or `[pad_left, pad_right]`. Default: padding = 0. output_padding(int|list|tuple, optional): The count of zeros to be added to tail of each dimension. - If it is a tuple, it must contain one integer. Default: 0. + If it is a tuple/list, it must contain one integer. Default: 0. groups(int, optional): The groups number of the Conv2D transpose layer. Inspired by grouped convolution in Alex Krizhevsky's Deep CNN paper, in which when group=2, the first half of the filters is only connected to the @@ -432,7 +432,7 @@ class Conv1DTranspose(_ConvNd): Default: groups = 1. bias(bool, optional): Whether to use bias. Default: True. dilation(int|tuple|list, optional): The dilation size. It means the spacing between the kernel points. - If dilation is a tuple, it must contain one integer, (dilation_size). + If dilation is a tuple/list, it must contain one integer, (dilation_size). Default: dilation = 1. weight_attr (ParamAttr, optional): The parameter attribute for learnable parameters/weights of conv1d_transpose. If it is set to None or one attribute of ParamAttr, conv1d_transpose @@ -451,7 +451,7 @@ class Conv1DTranspose(_ConvNd): Shape: - x(Tensor): 3-D tensor with shape (batch, in_channels, length) when data_format is "NCL" or shape (batch, length, in_channels) when data_format is "NLC". - - output_size(int|tuple|list, optional): The output image size. If output size is a tuple, it must contain one integer, (feature_length). None if use kernel_size, padding, output_padding and stride to calculate output_size. If output_size and kernel_size are specified at the same time, They should follow the formula above. Default: None. output_size and kernel_size should not be None at the same time. + - output_size(int|tuple|list, optional): The output image size. If output size is a tuple/list, it must contain one integer, (feature_length). None if use kernel_size, padding, output_padding and stride to calculate output_size. If output_size and kernel_size are specified at the same time, They should follow the formula above. Default: None. output_size and kernel_size should not be None at the same time. - output(Tensor): 3-D tensor with same shape as input x. Examples: @@ -555,7 +555,7 @@ class Conv2D(_ConvNd): in_channels(int): The number of input channels in the input image. out_channels(int): The number of output channels produced by the convolution. kernel_size(int|list|tuple, optional): The size of the convolving kernel. - stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must + stride(int|list|tuple, optional): The stride size. If stride is a list/tuple, it must contain three integers, (stride_H, stride_W). Otherwise, the stride_H = stride_W = stride. The default value is 1. padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. @@ -565,7 +565,7 @@ class Conv2D(_ConvNd): 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). The default value is 0. - dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must + dilation(int|list|tuple, optional): The dilation size. If dilation is a list/tuple, it must contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the dilation_D = dilation_H = dilation_W = dilation. The default value is 1. groups(int, optional): The groups number of the Conv3D Layer. According to grouped @@ -710,10 +710,10 @@ class Conv2DTranspose(_ConvNd): Parameters: in_channels(int): The number of channels in the input image. out_channels(int): The number of channels produced by the convolution. - kernel_size(int|list|tuple): The kernel size. If kernel_size is a tuple, + kernel_size(int|list|tuple): The kernel size. If kernel_size is a list/tuple, it must contain two integers, (kernel_size_H, kernel_size_W). Otherwise, the kernel will be a square. - stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must + stride(int|list|tuple, optional): The stride size. If stride is a list/tuple, it must contain two integers, (stride_H, stride_W). Otherwise, the stride_H = stride_W = stride. Default: 1. padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. @@ -725,7 +725,7 @@ class Conv2DTranspose(_ConvNd): The default value is 0. output_padding(int|list|tuple, optional): Additional size added to one side of each dimension in the output shape. Default: 0. - dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must + dilation(int|list|tuple, optional): The dilation size. If dilation is a list/tuple, it must contain two integers, (dilation_H, dilation_W). Otherwise, the dilation_H = dilation_W = dilation. Default: 1. groups(int, optional): The groups number of the Conv2D transpose layer. Inspired by @@ -866,7 +866,7 @@ class Conv3D(_ConvNd): in_channels(int): The number of input channels in the input image. out_channels(int): The number of output channels produced by the convolution. kernel_size(int|list|tuple, optional): The size of the convolving kernel. - stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must + stride(int|list|tuple, optional): The stride size. If stride is a list/tuple, it must contain three integers, (stride_D, stride_H, stride_W). Otherwise, the stride_D = stride_H = stride_W = stride. The default value is 1. padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. @@ -876,7 +876,7 @@ class Conv3D(_ConvNd): 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). The default value is 0. - dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must + dilation(int|list|tuple, optional): The dilation size. If dilation is a list/tuple, it must contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the dilation_D = dilation_H = dilation_W = dilation. The default value is 1. groups(int, optional): The groups number of the Conv3D Layer. According to grouped @@ -1037,11 +1037,11 @@ class Conv3DTranspose(_ConvNd): Parameters: in_channels(int): The number of channels in the input image. out_channels(int): The number of channels produced by the convolution. - kernel_size(int|list|tuple): The kernel size. If kernel_size is a tuple, + kernel_size(int|list|tuple): The kernel size. If kernel_size is a list/tuple, it must contain three integers, (kernel_size_D, kernel_size_H, kernel_size_W). Otherwise, the kernel will be a square. stride(int|list|tuple, optional): The stride size. It means the stride in transposed convolution. - If stride is a tuple, it must contain three integers, (stride_depth, stride_height, + If stride is a list/tuple, it must contain three integers, (stride_depth, stride_height, stride_width). Otherwise, stride_depth = stride_height = stride_width = stride. The default value is 1. padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. @@ -1053,7 +1053,7 @@ class Conv3DTranspose(_ConvNd): The default value is 0. output_padding(int|list|tuple, optional): Additional size added to one side of each dimension in the output shape. Default: 0. - dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must + dilation(int|list|tuple, optional): The dilation size. If dilation is a list/tuple, it must contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the dilation_D = dilation_H = dilation_W = dilation. The default value is 1. groups(int, optional): The groups number of the Conv3D transpose layer. Inspired by @@ -1071,11 +1071,6 @@ class Conv3DTranspose(_ConvNd): If it is set to None or one attribute of ParamAttr, conv3d_transpose will create ParamAttr as bias_attr. If the Initializer of the bias_attr is not set, the bias is initialized zero. The default value is None. - output_size(int|list|tuple, optional): The output image size. If output size is a - tuple, it must contain two integers, (image_H, image_W). None if use - filter_size, padding, and stride to calculate output_size. - if output_size and filter_size are specified at the same time, They - should follow the formula above. Default: None. data_format(str, optional): Data format that specifies the layout of input. It can be "NCDHW" or "NDHWC". Default: "NCDHW". diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py index 0cefb89340a..964cfa74ebf 100644 --- a/python/paddle/nn/layer/rnn.py +++ b/python/paddle/nn/layer/rnn.py @@ -447,7 +447,7 @@ class LSTMCell(RNNCellBase): Inputs: - **inputs** (Tensor): shape `[batch_size, input_size]`, the input, corresponding to :math:`x_t` in the formula. - - **states** (tuple, optional): a tuple of two tensors, each of shape `[batch_size, hidden_size]`, the previous hidden state, corresponding to :math:`h_{t-1}, c_{t-1}` in the formula. When states is None, zero state is used. Defaults to None. + - **states** (list|tuple, optional): a list/tuple of two tensors, each of shape `[batch_size, hidden_size]`, the previous hidden state, corresponding to :math:`h_{t-1}, c_{t-1}` in the formula. When states is None, zero state is used. Defaults to None. Returns: - **outputs** (Tensor): shape `[batch_size, hidden_size]`, the output, corresponding to :math:`h_{t}` in the formula. @@ -1251,7 +1251,7 @@ class LSTM(RNNBase): Inputs: - **inputs** (Tensor): the input sequence. If `time_major` is True, the shape is `[time_steps, batch_size, input_size]`, else, the shape is `[batch_size, time_steps, hidden_size]`. - - **initial_states** (tuple, optional): the initial state, a tuple of (h, c), the shape of each is `[num_layers * num_directions, batch_size, hidden_size]`. If initial_state is not given, zero initial states are used. + - **initial_states** (list|tuple, optional): the initial state, a list/tuple of (h, c), the shape of each is `[num_layers * num_directions, batch_size, hidden_size]`. If initial_state is not given, zero initial states are used. - **sequence_length** (Tensor, optional): shape `[batch_size]`, dtype: int64 or int32. The valid lengths of input sequences. Defaults to None. If `sequence_length` is not None, the inputs are treated as padded sequences. In each input sequence, elements whos time step index are not less than the valid length are treated as paddings. Returns: diff --git a/python/paddle/nn/layer/transformer.py b/python/paddle/nn/layer/transformer.py index 5aded4949e2..fe70a99ffb5 100644 --- a/python/paddle/nn/layer/transformer.py +++ b/python/paddle/nn/layer/transformer.py @@ -461,14 +461,14 @@ class TransformerEncoderLayer(Layer): normalization and post-precess includes dropout, residual connection. Otherwise, no pre-process and post-precess includes dropout, residual connection, layer normalization. Default False - weight_attr(ParamAttr|tuple, optional): To specify the weight parameter property. - If it is a tuple, `weight_attr[0]` would be used as `weight_attr` for + weight_attr(ParamAttr|list|tuple, optional): To specify the weight parameter property. + If it is a list/tuple, `weight_attr[0]` would be used as `weight_attr` for MHA, and `weight_attr[1]` would be used as `weight_attr` for linear in FFN. Otherwise, MHA and FFN both use it as `weight_attr` to create parameters. Default: None, which means the default weight parameter property is used. See usage for details in :code:`ParamAttr` . - bias_attr (ParamAttr|tuple|bool, optional): To specify the bias parameter property. - If it is a tuple, `bias_attr[0]` would be used as `bias_attr` for + bias_attr (ParamAttr|list|tuple|bool, optional): To specify the bias parameter property. + If it is a list/tuple, `bias_attr[0]` would be used as `bias_attr` for MHA, and `bias_attr[1]` would be used as `bias_attr` for linear in FFN. Otherwise, MHA and FFN both use it as `bias_attr` to create parameters. The `False` value means the corresponding layer would not have trainable @@ -747,16 +747,16 @@ class TransformerDecoderLayer(Layer): normalization and post-precess includes dropout, residual connection. Otherwise, no pre-process and post-precess includes dropout, residual connection, layer normalization. Default False - weight_attr(ParamAttr|tuple, optional): To specify the weight parameter property. - If it is a tuple, `weight_attr[0]` would be used as `weight_attr` for + weight_attr(ParamAttr|list|tuple, optional): To specify the weight parameter property. + If it is a list/tuple, `weight_attr[0]` would be used as `weight_attr` for self attention, `weight_attr[1]` would be used as `weight_attr` for cross attention, and `weight_attr[2]` would be used as `weight_attr` for linear in FFN. Otherwise, the three sub-layers all uses it as `weight_attr` to create parameters. Default: None, which means the default weight parameter property is used. See usage for details in :ref:`api_paddle_fluid_param_attr_ParamAttr` . - bias_attr (ParamAttr|tuple|bool, optional): To specify the bias parameter property. - If it is a tuple, `bias_attr[0]` would be used as `bias_attr` for + bias_attr (ParamAttr|list|tuple|bool, optional): To specify the bias parameter property. + If it is a list/tuple, `bias_attr[0]` would be used as `bias_attr` for self attention, `bias_attr[1]` would be used as `bias_attr` for cross attention, and `bias_attr[2]` would be used as `bias_attr` for linear in FFN. Otherwise, the three sub-layers all uses it as @@ -1129,8 +1129,8 @@ class Transformer(Layer): normalization and post-precess includes dropout, residual connection. Otherwise, no pre-process and post-precess includes dropout, residual connection, layer normalization. Default False - weight_attr(ParamAttr|tuple, optional): To specify the weight parameter property. - If it is a tuple, the length of `weight_attr` could be 1, 2 or 3. If it is 3, + weight_attr(ParamAttr|list|tuple, optional): To specify the weight parameter property. + If it is a list/tuple, the length of `weight_attr` could be 1, 2 or 3. If it is 3, `weight_attr[0]` would be used as `weight_attr` for self attention, `weight_attr[1]` would be used as `weight_attr` for cross attention of `TransformerDecoder`, and `weight_attr[2]` would be used as `weight_attr` for linear in FFN. @@ -1142,8 +1142,8 @@ class Transformer(Layer): Default: None, which means the default weight parameter property is used. See usage for details in :code:`ParamAttr` . - bias_attr (ParamAttr|tuple|bool, optional): To specify the bias parameter property. - If it is a tuple, the length of `bias_attr` could be 1, 2 or 3. If it is 3, + bias_attr (ParamAttr|list|tuple|bool, optional): To specify the bias parameter property. + If it is a list/tuple, the length of `bias_attr` could be 1, 2 or 3. If it is 3, `bias_attr[0]` would be used as `bias_attr` for self attention, `bias_attr[1]` would be used as `bias_attr` for cross attention of `TransformerDecoder`, and `bias_attr[2]` would be used as `bias_attr` for linear in FFN. diff --git a/python/paddle/optimizer/adadelta.py b/python/paddle/optimizer/adadelta.py index 42e2a5851c2..af07d706e13 100644 --- a/python/paddle/optimizer/adadelta.py +++ b/python/paddle/optimizer/adadelta.py @@ -36,20 +36,20 @@ class Adadelta(Optimizer): E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\_rate)^2 Args: - learning_rate (float|Tensor|LearningRateDecay, optional): The learning rate used to update ``Parameter``. + learning_rate (float|Tensor|LearningRateDecay, optional): The learning rate used to update ``Parameter``. It can be a float value, a ``Tensor`` with a float type or a LearningRateDecay. The default value is 0.001. epsilon (float): a small float number for numeric stability. Default 1.0e-6. rho (float): a floating point value indicating the decay rate. Default 0.95. - parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \ + parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ - It canbe a float value as coeff of L2 regularization or \ - :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. - If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ - the regularization setting here in optimizer will be ignored for this parameter. \ - Otherwise, the regularization setting here in optimizer will take effect. \ - Default None, meaning there is no regularization. + It canbe a float value as coeff of L2 regularization or \ + :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. + If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ + the regularization setting here in optimizer will be ignored for this parameter. \ + Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of some derived class of ``GradientClipBase`` . There are three cliping strategies ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , diff --git a/python/paddle/optimizer/adagrad.py b/python/paddle/optimizer/adagrad.py index d3077949ff0..82615c92b7c 100644 --- a/python/paddle/optimizer/adagrad.py +++ b/python/paddle/optimizer/adagrad.py @@ -43,16 +43,16 @@ class Adagrad(Optimizer): It can be a float value or a ``Variable`` with a float type. epsilon (float, optional): A small float value for numerical stability. The default value is 1e-06. - parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \ - This parameter is required in dygraph mode. \ - The default value is None in static mode, at this time all parameters will be updated. - weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ - It canbe a float value as coeff of L2 regularization or \ - :ref:`api_paddle_regularizer_L1Decay`, :ref:`api_paddle_regularizer_L2Decay`. - If a parameter has set regularizer using :ref:`api_paddle_fluid_param_attr_aramAttr` already, \ - the regularization setting here in optimizer will be ignored for this parameter. \ - Otherwise, the regularization setting here in optimizer will take effect. \ - Default None, meaning there is no regularization. + parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \ + This parameter is required in dygraph mode. \ + The default value is None in static mode, at this time all parameters will be updated. + weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ + It canbe a float value as coeff of L2 regularization or \ + :ref:`api_paddle_regularizer_L1Decay`, :ref:`api_paddle_regularizer_L2Decay`. + If a parameter has set regularizer using :ref:`api_paddle_fluid_param_attr_aramAttr` already, \ + the regularization setting here in optimizer will be ignored for this parameter. \ + Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of some derived class of ``GradientClipBase`` . There are three cliping strategies, ClipGradByGlobalNorm, ClipGradByNorm and ClipGradByValue. Default None, diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py index dcedf4fc502..4904ebb56cc 100644 --- a/python/paddle/optimizer/adam.py +++ b/python/paddle/optimizer/adam.py @@ -60,16 +60,16 @@ class Adam(Optimizer): The default value is 0.999. epsilon (float, optional): A small float value for numerical stability. The default value is 1e-08. - parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \ - This parameter is required in dygraph mode. \ - The default value is None in static mode, at this time all parameters will be updated. - weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ - It canbe a float value as coeff of L2 regularization or \ - :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. - If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ - the regularization setting here in optimizer will be ignored for this parameter. \ - Otherwise, the regularization setting here in optimizer will take effect. \ - Default None, meaning there is no regularization. + parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \ + This parameter is required in dygraph mode. \ + The default value is None in static mode, at this time all parameters will be updated. + weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ + It canbe a float value as coeff of L2 regularization or \ + :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. + If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ + the regularization setting here in optimizer will be ignored for this parameter. \ + Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of some derived class of ``GradientClipBase`` . There are three cliping strategies ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , diff --git a/python/paddle/optimizer/adamax.py b/python/paddle/optimizer/adamax.py index 9d5adf0bba5..175d932540d 100644 --- a/python/paddle/optimizer/adamax.py +++ b/python/paddle/optimizer/adamax.py @@ -53,16 +53,16 @@ class Adamax(Optimizer): The default value is 0.999. epsilon (float, optional): A small float value for numerical stability. The default value is 1e-08. - parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \ - This parameter is required in dygraph mode. \ - The default value is None in static mode, at this time all parameters will be updated. - weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ - It canbe a float value as coeff of L2 regularization or \ - :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. - If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ - the regularization setting here in optimizer will be ignored for this parameter. \ - Otherwise, the regularization setting here in optimizer will take effect. \ - Default None, meaning there is no regularization. + parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \ + This parameter is required in dygraph mode. \ + The default value is None in static mode, at this time all parameters will be updated. + weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ + It canbe a float value as coeff of L2 regularization or \ + :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. + If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ + the regularization setting here in optimizer will be ignored for this parameter. \ + Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of some derived class of ``GradientClipBase`` . There are three cliping strategies ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , diff --git a/python/paddle/optimizer/adamw.py b/python/paddle/optimizer/adamw.py index eb88a48f303..899c2957a6a 100644 --- a/python/paddle/optimizer/adamw.py +++ b/python/paddle/optimizer/adamw.py @@ -43,9 +43,9 @@ class AdamW(Adam): Args: learning_rate (float|LRScheduler, optional): The learning rate used to update ``Parameter``. It can be a float value or a LRScheduler. The default value is 0.001. - parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \ - This parameter is required in dygraph mode. \ - The default value is None in static mode, at this time all parameters will be updated. + parameters (list|tuple, optional): List/Tuple of ``Tensor`` names to update to minimize ``loss``. \ + This parameter is required in dygraph mode. \ + The default value is None in static mode, at this time all parameters will be updated. beta1 (float|Tensor, optional): The exponential decay rate for the 1st moment estimates. It should be a float number or a Tensor with shape [1] and data type as float32. The default value is 0.9. diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index f269bffc75e..7da933a9b72 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -312,8 +312,8 @@ class PiecewiseDecay(LRScheduler): learning_rate = 0.1 Args: - boundaries(list): A list of steps numbers. The type of element in the list is python int. - values(list): A list of learning rate values that will be picked during different epoch boundaries. + boundaries(list|tuple): A list/tuple of steps numbers. The type of element in the list is python int. + values(list|tuple): A list/tuple of learning rate values that will be picked during different epoch boundaries. The type of element in the list is python float. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` . diff --git a/python/paddle/optimizer/momentum.py b/python/paddle/optimizer/momentum.py index 932a4ad100e..c1dc0e8ddd8 100644 --- a/python/paddle/optimizer/momentum.py +++ b/python/paddle/optimizer/momentum.py @@ -49,16 +49,16 @@ class Momentum(Optimizer): learning_rate (float|Tensor|LearningRateDecay, optional): The learning rate used to update ``Parameter``. It can be a float value, a ``Tensor`` with a float type or a LearningRateDecay. The default value is 0.001. momentum (float): Momentum factor. The default value is 0.9. - parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \ + parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ - It canbe a float value as coeff of L2 regularization or \ - :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. - If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ - the regularization setting here in optimizer will be ignored for this parameter. \ - Otherwise, the regularization setting here in optimizer will take effect. \ - Default None, meaning there is no regularization. + It canbe a float value as coeff of L2 regularization or \ + :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. + If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ + the regularization setting here in optimizer will be ignored for this parameter. \ + Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of some derived class of ``GradientClipBase`` . There are three cliping strategies ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py index a050852728d..9425ab1431e 100644 --- a/python/paddle/optimizer/optimizer.py +++ b/python/paddle/optimizer/optimizer.py @@ -53,7 +53,7 @@ class Optimizer(object): Args: learning_rate (float|LRScheduler): The learning rate used to update ``Parameter``. It can be a float value or any subclass of ``LRScheduler`` . - parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \ + parameters (list|tuple, optional): List/Tuple of ``Tensor`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ diff --git a/python/paddle/optimizer/rmsprop.py b/python/paddle/optimizer/rmsprop.py index 7146b7d8993..a2fd40bc0b3 100644 --- a/python/paddle/optimizer/rmsprop.py +++ b/python/paddle/optimizer/rmsprop.py @@ -78,16 +78,16 @@ class RMSProp(Optimizer): the gradient; if False, by the uncentered second moment. Setting this to True may help with training, but is slightly more expensive in terms of computation and memory. Defaults to False. - parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \ - This parameter is required in dygraph mode. \ - The default value is None in static mode, at this time all parameters will be updated. - weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ - It canbe a float value as coeff of L2 regularization or \ - :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. - If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ - the regularization setting here in optimizer will be ignored for this parameter. \ - Otherwise, the regularization setting here in optimizer will take effect. \ - Default None, meaning there is no regularization. + parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \ + This parameter is required in dygraph mode. \ + The default value is None in static mode, at this time all parameters will be updated. + weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ + It canbe a float value as coeff of L2 regularization or \ + :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. + If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ + the regularization setting here in optimizer will be ignored for this parameter. \ + Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of some derived class of ``GradientClipBase`` . There are three cliping strategies ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , diff --git a/python/paddle/optimizer/sgd.py b/python/paddle/optimizer/sgd.py index fc208519a2e..ecac40aec72 100644 --- a/python/paddle/optimizer/sgd.py +++ b/python/paddle/optimizer/sgd.py @@ -30,16 +30,16 @@ class SGD(Optimizer): Parameters: learning_rate (float|Tensor|LearningRateDecay, optional): The learning rate used to update ``Parameter``. It can be a float value, a ``Tensor`` with a float type or a LearningRateDecay. The default value is 0.001. - parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \ + parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ - It canbe a float value as coeff of L2 regularization or \ - :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. - If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ - the regularization setting here in optimizer will be ignored for this parameter. \ - Otherwise, the regularization setting here in optimizer will take effect. \ - Default None, meaning there is no regularization. + It canbe a float value as coeff of L2 regularization or \ + :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. + If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ + the regularization setting here in optimizer will be ignored for this parameter. \ + Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of some derived class of ``GradientClipBase`` . There are three cliping strategies ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py index 88802026db8..659b7f45b26 100755 --- a/python/paddle/static/nn/common.py +++ b/python/paddle/static/nn/common.py @@ -86,7 +86,7 @@ def fc(x, out.shape = (1, 2) Args: - x (Tensor|list of Tensor): A tensor or a list of tensor. The number of dimensions + x (Tensor|list[Tensor]|tuple[Tensor]): A tensor or a list/tuple of tensors. The number of dimensions of each tensor is at least 2. The data type should be float16, float32 or float64. size (int): The number of output units in this layer, which also means the feature size of output tensor. @@ -233,16 +233,16 @@ def deform_conv2d(x, deformable convolution v1. num_filters(int): The number of filter. It is as same as the output image channel. - filter_size (int|tuple): The filter size. If filter_size is a tuple, + filter_size (int|list|tuple): The filter size. If filter_size is a list/tuple, it must contain two integers, (filter_size_H, filter_size_W). Otherwise, the filter will be a square. - stride (int|tuple, Optional): The stride size. If stride is a tuple, it must + stride (int|list|tuple, Optional): The stride size. If stride is a list/tuple, it must contain two integers, (stride_H, stride_W). Otherwise, the stride_H = stride_W = stride. Default: stride = 1. - padding (int|tuple, Optional): The padding size. If padding is a tuple, it must + padding (int|list|tuple, Optional): The padding size. If padding is a list/tuple, it must contain two integers, (padding_H, padding_W). Otherwise, the padding_H = padding_W = padding. Default: padding = 0. - dilation (int|tuple, Optional): The dilation size. If dilation is a tuple, it must + dilation (int|list|tuple, Optional): The dilation size. If dilation is a list/tuple, it must contain two integers, (dilation_H, dilation_W). Otherwise, the dilation_H = dilation_W = dilation. Default: dilation = 1. groups (int, Optional): The groups number of the deformable conv layer. According to diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index e4222dcccbd..dc811ea0f3f 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -132,7 +132,7 @@ def flip(x, axis, name=None): Args: x (Tensor): A Tensor(or LoDTensor) with shape :math:`[N_1, N_2,..., N_k]` . The data type of the input Tensor x should be float32, float64, int32, int64, bool. - axis (list): The axis(axes) to flip on. Negative indices for indexing from the end are accepted. + axis (list|tuple): The axis(axes) to flip on. Negative indices for indexing from the end are accepted. name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` . @@ -545,7 +545,7 @@ def squeeze(x, axis=None, name=None): Args: x (Tensor): The input Tensor. Supported data type: float32, float64, bool, int8, int32, int64. - axis (int|list|tuple, optional): An integer or list of integers, indicating the dimensions to be squeezed. Default is None. + axis (int|list|tuple, optional): An integer or list/tuple of integers, indicating the dimensions to be squeezed. Default is None. The range of axis is :math:`[-ndim(x), ndim(x))`. If axis is negative, :math:`axis = axis + ndim(x)`. If axis is None, all the dimensions of x of size 1 will be removed. diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index 328115ac933..65f57b4b4e9 100755 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -752,7 +752,7 @@ def add_n(inputs, name=None): [14, 16, 18]] Args: - inputs (Tensor|list(Tensor)): A Tensor list. The shape and data type of the list elements should be consistent. + inputs (Tensor|list[Tensor]|tuple[Tensor]): A Tensor or a list/tuple of Tensors. The shape and data type of the list/tuple elements should be consistent. Input can be multi-dimensional Tensor, and data types can be: float32, float64, int32, int64. name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` @@ -1082,7 +1082,7 @@ def max(x, axis=None, keepdim=False, name=None): Args: x(Tensor): A tensor, the data type is float32, float64, int32, int64. - axis(list|int, optional): The axis along which the maximum is computed. + axis(int|list|tuple, optional): The axis along which the maximum is computed. If :attr:`None`, compute the maximum over all elements of `x` and return a Tensor with a single element, otherwise must be in the range :math:`[-x.ndim(x), x.ndim(x))`. @@ -1174,7 +1174,7 @@ def min(x, axis=None, keepdim=False, name=None): Args: x(Tensor): A tensor, the data type is float32, float64, int32, int64. - axis(list|int, optional): The axis along which the minimum is computed. + axis(int|list|tuple, optional): The axis along which the minimum is computed. If :attr:`None`, compute the minimum over all elements of `x` and return a Tensor with a single element, otherwise must be in the range :math:`[-x.ndim, x.ndim)`. diff --git a/python/paddle/vision/ops.py b/python/paddle/vision/ops.py index 005e2b12307..47425476a65 100644 --- a/python/paddle/vision/ops.py +++ b/python/paddle/vision/ops.py @@ -454,13 +454,13 @@ def deform_conv2d(x, the number of output channels, g is the number of groups, kH is the filter's height, kW is the filter's width. bias (Tensor, optional): The bias with shape [M,]. - stride (int|list|tuple, optional): The stride size. If stride is a tuple, it must + stride (int|list|tuple, optional): The stride size. If stride is a list/tuple, it must contain two integers, (stride_H, stride_W). Otherwise, the stride_H = stride_W = stride. Default: stride = 1. - padding (int|list|tuple, optional): The padding size. If padding is a tuple, it must + padding (int|list|tuple, optional): The padding size. If padding is a list/tuple, it must contain two integers, (padding_H, padding_W). Otherwise, the padding_H = padding_W = padding. Default: padding = 0. - dilation (int|list|tuple, optional): The dilation size. If dilation is a tuple, it must + dilation (int|list|tuple, optional): The dilation size. If dilation is a list/tuple, it must contain two integers, (dilation_H, dilation_W). Otherwise, the dilation_H = dilation_W = dilation. Default: dilation = 1. deformable_groups (int): The number of deformable group partitions. @@ -644,13 +644,13 @@ class DeformConv2D(Layer): in_channels(int): The number of input channels in the input image. out_channels(int): The number of output channels produced by the convolution. kernel_size(int|list|tuple): The size of the convolving kernel. - stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must + stride(int|list|tuple, optional): The stride size. If stride is a list/tuple, it must contain three integers, (stride_H, stride_W). Otherwise, the stride_H = stride_W = stride. The default value is 1. - padding (int|list|tuple, optional): The padding size. If padding is a tuple, it must + padding (int|list|tuple, optional): The padding size. If padding is a list/tuple, it must contain two integers, (padding_H, padding_W). Otherwise, the padding_H = padding_W = padding. Default: padding = 0. - dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must + dilation(int|list|tuple, optional): The dilation size. If dilation is a list/tuple, it must contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the dilation_D = dilation_H = dilation_W = dilation. The default value is 1. deformable_groups (int): The number of deformable group partitions. diff --git a/python/paddle/vision/transforms/functional.py b/python/paddle/vision/transforms/functional.py index c65c2423d13..c0e72877ffc 100644 --- a/python/paddle/vision/transforms/functional.py +++ b/python/paddle/vision/transforms/functional.py @@ -153,8 +153,8 @@ def pad(img, padding, fill=0, padding_mode='constant'): Args: img (PIL.Image|np.array): Image to be padded. padding (int|list|tuple): Padding on each border. If a single int is provided this - is used to pad all borders. If tuple of length 2 is provided this is the padding - on left/right and top/bottom respectively. If a tuple of length 4 is provided + is used to pad all borders. If list/tuple of length 2 is provided this is the padding + on left/right and top/bottom respectively. If a list/tuple of length 4 is provided this is the padding for the left, top, right and bottom borders respectively. fill (float, optional): Pixel fill value for constant fill. If a tuple of diff --git a/python/paddle/vision/transforms/functional_cv2.py b/python/paddle/vision/transforms/functional_cv2.py index d50ba7b23c7..99cbfd6dc4f 100644 --- a/python/paddle/vision/transforms/functional_cv2.py +++ b/python/paddle/vision/transforms/functional_cv2.py @@ -136,8 +136,8 @@ def pad(img, padding, fill=0, padding_mode='constant'): Args: img (np.array): Image to be padded. padding (int|list|tuple): Padding on each border. If a single int is provided this - is used to pad all borders. If tuple of length 2 is provided this is the padding - on left/right and top/bottom respectively. If a tuple of length 4 is provided + is used to pad all borders. If list/tuple of length 2 is provided this is the padding + on left/right and top/bottom respectively. If a list/tuple of length 4 is provided this is the padding for the left, top, right and bottom borders respectively. fill (float, optional): Pixel fill value for constant fill. If a tuple of diff --git a/python/paddle/vision/transforms/functional_pil.py b/python/paddle/vision/transforms/functional_pil.py index 516c28f8499..eee60c5452b 100644 --- a/python/paddle/vision/transforms/functional_pil.py +++ b/python/paddle/vision/transforms/functional_pil.py @@ -141,8 +141,8 @@ def pad(img, padding, fill=0, padding_mode='constant'): Args: img (PIL.Image): Image to be padded. padding (int|list|tuple): Padding on each border. If a single int is provided this - is used to pad all borders. If tuple of length 2 is provided this is the padding - on left/right and top/bottom respectively. If a tuple of length 4 is provided + is used to pad all borders. If list/tuple of length 2 is provided this is the padding + on left/right and top/bottom respectively. If a list/tuple of length 4 is provided this is the padding for the left, top, right and bottom borders respectively. fill (float, optional): Pixel fill value for constant fill. If a tuple of diff --git a/python/paddle/vision/transforms/transforms.py b/python/paddle/vision/transforms/transforms.py index 921e78cace6..6eeb726fcee 100644 --- a/python/paddle/vision/transforms/transforms.py +++ b/python/paddle/vision/transforms/transforms.py @@ -86,7 +86,7 @@ class Compose(object): together for a dataset transform. Args: - transforms (list): List of transforms to compose. + transforms (list|tuple): List/Tuple of transforms to compose. Returns: A compose object which is callable, __call__ for this Compose @@ -608,8 +608,8 @@ class Normalize(BaseTransform): ``output[channel] = (input[channel] - mean[channel]) / std[channel]`` Args: - mean (int|float|list): Sequence of means for each channel. - std (int|float|list): Sequence of standard deviations for each channel. + mean (int|float|list|tuple): Sequence of means for each channel. + std (int|float|list|tuple): Sequence of standard deviations for each channel. data_format (str, optional): Data format of img, should be 'HWC' or 'CHW'. Default: 'CHW'. to_rgb (bool, optional): Whether to convert to rgb. Default: False. @@ -1022,11 +1022,11 @@ class Pad(BaseTransform): Args: padding (int|list|tuple): Padding on each border. If a single int is provided this - is used to pad all borders. If tuple of length 2 is provided this is the padding - on left/right and top/bottom respectively. If a tuple of length 4 is provided + is used to pad all borders. If list/tuple of length 2 is provided this is the padding + on left/right and top/bottom respectively. If a list/tuple of length 4 is provided this is the padding for the left, top, right and bottom borders respectively. - fill (int|list|tuple): Pixel fill value for constant fill. Default is 0. If a tuple of + fill (int|list|tuple): Pixel fill value for constant fill. Default is 0. If a list/tuple of length 3, it is used to fill R, G, B channels respectively. This value is only used when the padding_mode is constant padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant. -- GitLab From 0372f1ddf1fa50c42ebbe726f7e002f213797e1b Mon Sep 17 00:00:00 2001 From: WeiXin Date: Tue, 27 Apr 2021 14:08:52 +0800 Subject: [PATCH 025/720] 'jit.save/load' support save/load function without parameters. (#32430) * jit.save/load support function. * delete unnittest test_jit_load_model_incomplete. * edit code according to CI * Modify the documentation. * add note to doc. --- python/paddle/fluid/dygraph/io.py | 4 + python/paddle/fluid/dygraph/jit.py | 180 +++++++++++------- .../tests/unittests/test_jit_save_load.py | 66 ++++++- 3 files changed, 177 insertions(+), 73 deletions(-) diff --git a/python/paddle/fluid/dygraph/io.py b/python/paddle/fluid/dygraph/io.py index ce40fde1630..33eb16f1b2b 100644 --- a/python/paddle/fluid/dygraph/io.py +++ b/python/paddle/fluid/dygraph/io.py @@ -650,6 +650,7 @@ def _construct_params_and_buffers(model_path, append_suffix=True): var_info_filename = str(params_filename) + ".info" var_info_path = os.path.join(model_path, var_info_filename) + params_path = os.path.join(model_path, str(params_filename)) if os.path.exists(var_info_path): var_dict = _load_persistable_vars(model_path, var_info_path, @@ -671,6 +672,9 @@ def _construct_params_and_buffers(model_path, var_dict.update( _load_persistable_vars(model_path, var_info_path, programs[ func_name], file_name)) + elif params_filename is not None and not os.path.exists(params_path): + # When saving XX, there is only '*.pdmodel' + return dict() else: var_dict = _load_persistable_vars_by_program( model_path, programs['forward'], params_filename) diff --git a/python/paddle/fluid/dygraph/jit.py b/python/paddle/fluid/dygraph/jit.py index 4c7c7b17eb1..352a377fa3a 100644 --- a/python/paddle/fluid/dygraph/jit.py +++ b/python/paddle/fluid/dygraph/jit.py @@ -19,6 +19,7 @@ import pickle import warnings import functools from collections import OrderedDict +import inspect import six import paddle @@ -506,7 +507,7 @@ def _build_load_path_and_config(path, config): @switch_to_static_graph def save(layer, path, input_spec=None, **configs): """ - Saves input Layer as ``paddle.jit.TranslatedLayer`` + Saves input Layer or function as ``paddle.jit.TranslatedLayer`` format model, which can be used for inference or fine-tuning after loading. It will save the translated program and all related persistable @@ -522,8 +523,12 @@ def save(layer, path, input_spec=None, **configs): - ``paddle.static.load_inference_model`` - Other C++ inference APIs + .. note:: + When using ``paddle.jit.save`` to save a function, parameters will not be saved. If you have to + save the parameter, please pass the Layer containing function and parameter to ``paddle.jit.save``. + Args: - layer (Layer): The Layer to be saved. + layer (Layer|function): The Layer or function to be saved. path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``. input_spec (list[InputSpec|Tensor]|tuple[InputSpec|Tensor], optional): Describes the input of the saved model's forward method, which can be described by InputSpec or example Tensor. If None, all input variables of @@ -543,6 +548,7 @@ def save(layer, path, input_spec=None, **configs): Examples: .. code-block:: python + # example 1: save layer import numpy as np import paddle import paddle.nn as nn @@ -609,6 +615,28 @@ def save(layer, path, input_spec=None, **configs): # save path = "example_model/linear" paddle.jit.save(layer, path) + + # example 2: save function + import paddle + from paddle.static import InputSpec + + + def save_function(): + @paddle.jit.to_static + def fun(inputs): + return paddle.tanh(inputs) + + path = 'test_jit_save_load_function_1/func' + inps = paddle.rand([3, 6]) + origin = fun(inps) + + paddle.jit.save(fun, path) + load_func = paddle.jit.load(path) + + load_result = load_func(inps) + print((load_result - origin).abs().max() < 1e-10) + + save_function() """ # 1. input build & check @@ -617,9 +645,11 @@ def save(layer, path, input_spec=None, **configs): raise RuntimeError( "The paddle.jit.save doesn't work when setting ProgramTranslator.enable to False." ) - if not isinstance(layer, Layer): + + if not (isinstance(layer, Layer) or inspect.isfunction(layer) or isinstance( + layer, StaticFunction)): raise TypeError( - "The input layer of paddle.jit.save should be 'Layer', but received layer type is %s." + "The input of paddle.jit.save should be 'Layer' or 'Function', but received input type is %s." % type(layer)) # NOTE(chenweihang): If the input layer be wrapped by DataParallel, @@ -647,13 +677,15 @@ def save(layer, path, input_spec=None, **configs): # avoid change user given input_spec inner_input_spec = None if input_spec is not None: - for attr_func in dir(inner_layer): - static_func = getattr(inner_layer, attr_func, None) - if isinstance(static_func, - StaticFunction) and 'forward' != attr_func: - raise ValueError( - "If there are static functions other than 'forward' that need to be saved, the input 'input_spec' should be None, but received the type of 'input_spec' is %s." - % type(input_spec)) + if isinstance(layer, Layer): + for attr_func in dir(inner_layer): + static_func = getattr(inner_layer, attr_func, None) + if isinstance(static_func, + StaticFunction) and 'forward' != attr_func: + raise ValueError( + "If there are static functions other than 'forward' that need to be saved, the input 'input_spec' should be None, but received the type of 'input_spec' is %s." + % type(input_spec)) + if not isinstance(input_spec, (list, tuple)): raise TypeError( "The input input_spec should be 'list', but received input_spec's type is %s." @@ -674,29 +706,74 @@ def save(layer, path, input_spec=None, **configs): configs = _parse_save_configs(configs) scope = core.Scope() extra_var_info = dict() - for attr_func in dir(inner_layer): - static_func = getattr(inner_layer, attr_func, None) - if isinstance(static_func, StaticFunction): - concrete_program = static_func.concrete_program_specify_input_spec( - inner_input_spec) - elif 'forward' == attr_func: - # transform in jit.save, if input_spec is incomplete, declarative will throw error - # inner_input_spec is list[InputSpec], it should be packed with same sturcture - # as original input_spec here. - if inner_input_spec: - inner_input_spec = pack_sequence_as(input_spec, - inner_input_spec) - static_forward = declarative( - inner_layer.forward, input_spec=inner_input_spec) - concrete_program = static_forward.concrete_program - # the input_spec has been used in declarative, which is equal to - # @declarative with input_spec and jit.save without input_spec, - # avoid needless warning - inner_input_spec = None + if isinstance(layer, Layer): + functions = dir(inner_layer) + else: + # layer is function + functions = [layer, ] + for attr_func in functions: + if isinstance(layer, Layer): + static_func = getattr(inner_layer, attr_func, None) + if isinstance(static_func, StaticFunction): + concrete_program = static_func.concrete_program_specify_input_spec( + inner_input_spec) + elif 'forward' == attr_func: + # transform in jit.save, if input_spec is incomplete, declarative will throw error + # inner_input_spec is list[InputSpec], it should be packed with same sturcture + # as original input_spec here. + if inner_input_spec: + inner_input_spec = pack_sequence_as(input_spec, + inner_input_spec) + static_forward = declarative( + inner_layer.forward, input_spec=inner_input_spec) + concrete_program = static_forward.concrete_program + # the input_spec has been used in declarative, which is equal to + # @declarative with input_spec and jit.save without input_spec, + # avoid needless warning + inner_input_spec = None + else: + continue + + # NOTE(chenweihang): we maintain the mapping of variable name to + # structured name, the buffer variable (non-persistable) + # saved to inference program may not need by dygraph Layer, + # we only record the state_dict variable's structured name + state_names_dict = dict() + for structured_name, var in six.iteritems(inner_layer.state_dict()): + state_names_dict[var.name] = structured_name + + # 3. share parameters from Layer to scope & record var info + for param_or_buffer in concrete_program.parameters: + # share to scope + param_or_buffer_tensor = scope.var( + param_or_buffer.name).get_tensor() + src_tensor = param_or_buffer.value().get_tensor() + param_or_buffer_tensor._share_data_with(src_tensor) + # record var info + if param_or_buffer.name not in extra_var_info: + extra_info_dict = dict() + if param_or_buffer.name in state_names_dict: + extra_info_dict['structured_name'] = state_names_dict[ + param_or_buffer.name] + extra_info_dict[ + 'stop_gradient'] = param_or_buffer.stop_gradient + if isinstance(param_or_buffer, ParamBase): + extra_info_dict['trainable'] = param_or_buffer.trainable + extra_var_info[param_or_buffer.name] = extra_info_dict else: - continue - - # 3. build input & output of save_infernece_model + # When layer is a function + if isinstance(attr_func, StaticFunction): + concrete_program = attr_func.concrete_program_specify_input_spec( + inner_input_spec) + else: + if inner_input_spec: + inner_input_spec = pack_sequence_as(input_spec, + inner_input_spec) + static_function = declarative( + attr_func, input_spec=inner_input_spec) + concrete_program = static_function.concrete_program + + # 4. build input & output of save_infernece_model # NOTE(chenweihang): [ Get input variables name ] # There are two cases, whether to prune the inputs or not # - not prune inputs (recommend): @@ -715,32 +792,6 @@ def save(layer, path, input_spec=None, **configs): output_vars = _get_output_vars(concrete_program.outputs, configs.output_spec) - # NOTE(chenweihang): we maintain the mapping of variable name to - # structured name, the buffer variable (non-persistable) - # saved to inference program may not need by dygraph Layer, - # we only record the state_dict variable's structured name - state_names_dict = dict() - for structured_name, var in six.iteritems(inner_layer.state_dict()): - state_names_dict[var.name] = structured_name - - # 4. share parameters from Layer to scope & record var info - for param_or_buffer in concrete_program.parameters: - # share to scope - param_or_buffer_tensor = scope.var(param_or_buffer.name).get_tensor( - ) - src_tensor = param_or_buffer.value().get_tensor() - param_or_buffer_tensor._share_data_with(src_tensor) - # record var info - if param_or_buffer.name not in extra_var_info: - extra_info_dict = dict() - if param_or_buffer.name in state_names_dict: - extra_info_dict['structured_name'] = state_names_dict[ - param_or_buffer.name] - extra_info_dict['stop_gradient'] = param_or_buffer.stop_gradient - if isinstance(param_or_buffer, ParamBase): - extra_info_dict['trainable'] = param_or_buffer.trainable - extra_var_info[param_or_buffer.name] = extra_info_dict - # 5. save inference model from paddle.fluid.io import save_inference_model @@ -748,7 +799,7 @@ def save(layer, path, input_spec=None, **configs): model_path = dirname # NOTE(chenweihang): because prefix contains model and params filename, # so we don't support set model_filename & params_filename - if 'forward' == attr_func: + if 'forward' == attr_func or not isinstance(layer, Layer): model_filename = file_prefix + INFER_MODEL_SUFFIX params_filename = file_prefix + INFER_PARAMS_SUFFIX else: @@ -782,10 +833,11 @@ def save(layer, path, input_spec=None, **configs): # but we can save these information in `jit.save` without changing the original # storage to improve user experience. So we save extra information into # file `***.pdiparams.info` - with scope_guard(scope): - extra_var_info_path = path + INFER_PARAMS_INFO_SUFFIX - with open(extra_var_info_path, 'wb') as f: - pickle.dump(extra_var_info, f, protocol=2) + if isinstance(layer, Layer) and extra_var_info: + with scope_guard(scope): + extra_var_info_path = path + INFER_PARAMS_INFO_SUFFIX + with open(extra_var_info_path, 'wb') as f: + pickle.dump(extra_var_info, f, protocol=2) @dygraph_only diff --git a/python/paddle/fluid/tests/unittests/test_jit_save_load.py b/python/paddle/fluid/tests/unittests/test_jit_save_load.py index 16adcb8f241..eef38182f6e 100644 --- a/python/paddle/fluid/tests/unittests/test_jit_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_jit_save_load.py @@ -399,15 +399,6 @@ class TestJitSaveLoad(unittest.TestCase): with self.assertRaises(ValueError): model_dict, _ = fluid.dygraph.load_dygraph(model_path) - def test_jit_load_model_incomplete(self): - model_path = "test_jit_save_load.remove_variables/model" - self.train_and_save_model(model_path) - # remove `.pdiparams` - var_path = model_path + INFER_PARAMS_SUFFIX - os.remove(var_path) - with self.assertRaises(ValueError): - paddle.jit.load(model_path) - def test_jit_load_no_path(self): path = "test_jit_save_load.no_path/model_path" with self.assertRaises(ValueError): @@ -1164,6 +1155,63 @@ class TestJitSaveLoadFinetuneLoad(unittest.TestCase): self.assertTrue(float(((result_01 - result_11)).abs().max()) < 1e-5) +class TestJitSaveLoadFunction(unittest.TestCase): + def setUp(self): + paddle.disable_static() + + def test_jit_save_load_static_function(self): + @paddle.jit.to_static + def fun(inputs): + return paddle.tanh(inputs) + + path = 'test_jit_save_load_function_1/func' + inps = paddle.rand([3, 6]) + origin = fun(inps) + + paddle.jit.save(fun, path) + load_func = paddle.jit.load(path) + + load_result = load_func(inps) + self.assertTrue((load_result - origin).abs().max() < 1e-10) + + def test_jit_save_load_function_input_spec(self): + @paddle.jit.to_static(input_spec=[ + InputSpec( + shape=[None, 6], dtype='float32', name='x'), + ]) + def fun(inputs): + return paddle.nn.functional.relu(inputs) + + path = 'test_jit_save_load_function_2/func' + inps = paddle.rand([3, 6]) + origin = fun(inps) + + paddle.jit.save(fun, path) + load_func = paddle.jit.load(path) + load_result = load_func(inps) + self.assertTrue((load_result - origin).abs().max() < 1e-10) + + def test_jit_save_load_function_function(self): + def fun(inputs): + return paddle.tanh(inputs) + + path = 'test_jit_save_load_function_3/func' + inps = paddle.rand([3, 6]) + origin = fun(inps) + + paddle.jit.save( + fun, + path, + input_spec=[ + InputSpec( + shape=[None, 6], dtype='float32', name='x'), + ]) + load_func = paddle.jit.load(path) + + load_result = load_func(inps) + self.assertTrue((load_result - origin).abs().max() < 1e-10) + + class TestJitSaveLoadDataParallel(unittest.TestCase): def verify_inference_correctness(self, layer, path): layer.eval() -- GitLab From 6f6e159a70bddf2bd7ab25a9a42b162ba9b2188e Mon Sep 17 00:00:00 2001 From: Baibaifan <39549453+Baibaifan@users.noreply.github.com> Date: Tue, 27 Apr 2021 14:43:12 +0800 Subject: [PATCH 026/720] slove develop bugs (#32560) --- paddle/fluid/operators/collective/c_sync_comm_stream_op.cc | 2 -- paddle/fluid/pybind/ascend_wrapper_py.cc | 2 ++ python/paddle/distributed/fleet/launch.py | 4 ++-- python/paddle/distributed/fleet/launch_utils.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc b/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc index e6f6bf53456..772122bb58d 100644 --- a/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc +++ b/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc @@ -63,7 +63,6 @@ class CSyncCommStreamCudaKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto place = ctx.GetPlace(); #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) - int ring_id = ctx.Attr("ring_id"); auto stream = platform::NCCLCommContext::Instance().Get(ring_id, place)->stream(); @@ -75,7 +74,6 @@ class CSyncCommStreamCudaKernel : public framework::OpKernel { #endif #elif defined(PADDLE_WITH_ASCEND_CL) - auto place = ctx.GetPlace(); PADDLE_ENFORCE_EQ(is_npu_place(place), true, platform::errors::PreconditionNotMet( "Sync stream op can run on npu place only for now.")); diff --git a/paddle/fluid/pybind/ascend_wrapper_py.cc b/paddle/fluid/pybind/ascend_wrapper_py.cc index 9a1fa1d7704..43725f7dc0f 100644 --- a/paddle/fluid/pybind/ascend_wrapper_py.cc +++ b/paddle/fluid/pybind/ascend_wrapper_py.cc @@ -108,12 +108,14 @@ enum AttrType { AT_NAMEATTR }; +#ifdef PADDLE_WITH_ASCEND void BindAscendDevice(py::module *m) { py::class_(*m, "NPUDevice") .def_static( "get_device_count", static_cast(&platform::ascend::NPUDevice::GetDeviceCount)); } +#endif void BindAscendGraph(py::module *m) { m->def("ge_initialize", &ge_initialize, "GEInitialize"); diff --git a/python/paddle/distributed/fleet/launch.py b/python/paddle/distributed/fleet/launch.py index 89ca7e19613..69c5b325d18 100644 --- a/python/paddle/distributed/fleet/launch.py +++ b/python/paddle/distributed/fleet/launch.py @@ -325,8 +325,8 @@ def which_distributed_mode(args): if fluid.core.is_compiled_with_cuda(): accelerators = fluid.core.get_cuda_device_count() - elif fluid.core.is_compiled_with_ascend(): - accelerators = fluid.core.NPUDevice.get_device_count() + elif fluid.core.is_compiled_with_npu(): + accelerators = fluid.core.get_npu_device_count() elif fluid.core.is_compiled_with_xpu(): accelerators = fluid.core.get_xpu_device_count() else: diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py index b4d5c58abbf..be7ad257ccb 100644 --- a/python/paddle/distributed/fleet/launch_utils.py +++ b/python/paddle/distributed/fleet/launch_utils.py @@ -653,8 +653,8 @@ def get_xpus(xpus): def get_device_mode(): - if fluid.core.is_compiled_with_ascend() and \ - fluid.core.NPUDevice.get_device_count() > 0: + if fluid.core.is_compiled_with_npu() and \ + fluid.core.get_npu_device_count() > 0: print("launch train in ascend npu mode!") return DeviceMode.ASCEND_NPU -- GitLab From eca8dcc7a3d95d970d960a0e6f1631ca448324c1 Mon Sep 17 00:00:00 2001 From: Zhang Zheng <32410583+ZzSean@users.noreply.github.com> Date: Tue, 27 Apr 2021 15:01:07 +0800 Subject: [PATCH 027/720] Unify the implementation of activation operation (#32348) --- paddle/fluid/operators/activation_op.cu | 1112 +++++++++++++++-------- paddle/fluid/operators/activation_op.h | 4 +- 2 files changed, 759 insertions(+), 357 deletions(-) diff --git a/paddle/fluid/operators/activation_op.cu b/paddle/fluid/operators/activation_op.cu index 781a97c1ffc..836c5fa06f6 100644 --- a/paddle/fluid/operators/activation_op.cu +++ b/paddle/fluid/operators/activation_op.cu @@ -10,382 +10,719 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/activation_op.h" +#include "paddle/fluid/operators/amp/fp16_type_traits.h" +#include "paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h" #include "paddle/fluid/operators/math/math_cuda_utils.h" #include "paddle/fluid/platform/cuda_device_function.h" -#include "paddle/fluid/platform/float16.h" namespace paddle { namespace operators { -using Tensor = framework::Tensor; -using float16 = paddle::platform::float16; +template +struct CudaReluFunctor : public BaseActivationFunctor { + T zero = static_cast(0.0f); + + // relu(x) = max(x, 0) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + return args[0] > zero ? args[0] : zero; + } +}; template -struct CudaVecType { - using type = T; - static constexpr int vecsize = 1; +struct CudaReluGradFunctor : public BaseActivationFunctor { + T zero = static_cast(0.0f); + + // dx = dout * (out > 0) + // Inputs: args[0], the input dout + // args[1], the input out + __device__ __forceinline__ T operator()(const T* args) const { + return args[1] > zero ? args[0] : zero; + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } }; -template <> -struct CudaVecType { - using type = __half2; - static constexpr int vecsize = 2; +template +struct CudaLeakyReluFunctor : public BaseActivationFunctor { + T zero = static_cast(0.0f); + float alpha; + + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"alpha", &alpha}}; + } + + // leakyrelu(x) = x > 0 ? x : alpha * x + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + return args[0] > zero ? args[0] : static_cast(alpha) * args[0]; + } }; -template <> -struct CudaVecType { - using type = float4; - static constexpr int vecsize = 4; +template +struct CudaLeakyReluGradFunctor : public BaseActivationFunctor { + T zero = static_cast(0.0f); + float alpha; + + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"alpha", &alpha}}; + } + + // dx = dout * (x > 0 ? 1 : alpha) + // Inputs: args[0], the input dout + // args[1], the input x + __device__ __forceinline__ T operator()(const T* args) const { + return args[1] > zero ? args[0] : static_cast(alpha) * args[0]; + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } }; template -class BaseGPUFunctor { - public: - using ELEMENT_TYPE = T; +struct CudaSigmoidFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + MPType one = static_cast(1.0f); + + // sigmoid(x) = 1 / (1 + exp(-x)) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType x = static_cast(args[0]); + return static_cast(one / (one + exp(-x))); + } +}; - using AttrPair = std::vector>; +template +struct CudaSigmoidGradFunctor : public BaseActivationFunctor { + T one = static_cast(1.0f); + + // dx = dout * out * (1 - out) + // Inputs: args[0], the input dout + // args[1], the input out + __device__ __forceinline__ T operator()(const T* args) const { + return args[0] * args[1] * (one - args[1]); + } - AttrPair GetAttrs() { return AttrPair(); } + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } }; -/* ========================================================================== */ +template +struct CudaSiluFunctor : public BaseActivationFunctor { + // MPType means Compute Type + using MPType = typename details::MPTypeTrait::Type; + MPType one = static_cast(1.0f); + + // silu(x) = x / (1 + exp(-x)) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType x = static_cast(args[0]); + return static_cast(x / (one + exp(-x))); + } +}; -/* =========================== relu forward ============================ */ template -class ReluGPUFunctor : public BaseGPUFunctor { - private: - T zero_; +struct CudaSiluGradFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + MPType one = static_cast(1.0f); + + // dx = dout * (1 + exp(-x) + x * exp(-x) / (1 + exp(-x))^2) + // Inputs: args[0], the input dout + // args[1], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType dout = static_cast(args[0]); + MPType x = static_cast(args[1]); + MPType temp = one / (one + exp(-x)); + return static_cast(dout * (temp * (one + x * (one - temp)))); + } - public: - ReluGPUFunctor() { zero_ = static_cast(0.0f); } - - // for relu forward when T is double - __device__ __forceinline__ typename CudaVecType::type Compute( - const typename CudaVecType::type in) { - // relu forward : out = max(x, 0) - return in > zero_ ? in : zero_; - } - - // when num % vecsize != 0 this func will be used - __device__ __forceinline__ T ComputeRemainder(const T in) { - // relu forward : out = max(x, 0) - return in > zero_ ? in : zero_; - } -}; - -template <> -__device__ __forceinline__ CudaVecType::type -ReluGPUFunctor::Compute(const CudaVecType::type in) { - // relu forward : out = max(in, 0) - return make_float4((in.x > zero_) * (in.x), (in.y > zero_) * (in.y), - (in.z > zero_) * (in.z), (in.w > zero_) * (in.w)); -} - -template <> -__device__ __forceinline__ CudaVecType::type -ReluGPUFunctor::Compute(const CudaVecType::type in) { -// relu forward : out = max(in, 0) -#ifdef __HIPCC__ || CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__) - const half2 kzero = __float2half2_rn(0.0f); - return __hmul2(__hgt2(in, kzero), in); -#else - const float2 xx = __half22float2(in); - return __floats2half2_rn((xx.x > 0.0f) * static_cast(xx.x), - (xx.y > 0.0f) * static_cast(xx.y)); -#endif -} -/* ========================================================================== */ + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } +}; -/* =========================== relu backward ============================ - */ +template +struct CudaLogSigmoidFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + MPType zero = static_cast(0.0f); + + // logsigmoid(x) = log(1 / (1 + exp(-x))) + // For numerical stability, + // logsigmoid(x) = + // - (max(-x, 0) + log(exp(-max(-x, 0)) + exp(-x - max(-x, 0)))) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType x = static_cast(args[0]); + MPType temp = x > zero ? zero : -x; + return static_cast(-temp - log(exp(-temp) + exp(-x - temp))); + } +}; template -class ReluGradGPUFunctor : public BaseGPUFunctor { - private: - T zero_; +struct CudaLogSigmoidGradFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + MPType zero = static_cast(0.0f); + + // dx = dout * exp(-x) / (1 + exp(-x)) + // For numerical stability: + // dx = dout * exp(-x - max(-x, 0)) / (exp(-max(-x, 0)) + exp(-x - max(-x, + // 0))) + // Inputs: args[0], the input dout + // args[1], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType dout = static_cast(args[0]); + MPType x = static_cast(args[1]); + MPType temp1 = x > zero ? zero : -x; + MPType temp2 = exp(-x - temp1); + return static_cast(dout * (temp2 / (exp(-temp1) + temp2))); + } - public: - ReluGradGPUFunctor() { zero_ = static_cast(0.0f); } + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } +}; + +template +struct CudaAtanFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // atan(x) = atan(x) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType x = static_cast(args[0]); + return static_cast(atan(x)); + } +}; + +template +struct CudaAtanGradFunctor : public BaseActivationFunctor { + T one = static_cast(1.0f); + + // dx = dout / (1 + x^2) + // Inputs: args[0], the input dout + // args[1], the input x + __device__ __forceinline__ T operator()(const T* args) const { + return args[0] / (one + args[1] * args[1]); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } +}; + +template +struct CudaSoftShrinkFunctor : public BaseActivationFunctor { + float lambda; + + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"lambda", &lambda}}; + } + + // softshrink(x) = x - lambda, if x > lambda; + // x + lambda, if x < -lambda; + // 0, otherwise. + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + T x = args[0]; + T l = static_cast(lambda); + T temp1 = static_cast(x > l); + T temp2 = static_cast(x < -l); + return temp1 * (x - l) + temp2 * (x + l); + } +}; + +template +struct CudaSoftShrinkGradFunctor : public BaseActivationFunctor { + T zero = static_cast(0.0f); + float lambda; + + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"lambda", &lambda}}; + } + + // dx = dout, if x > lambda or x < -lambda else 0 + // Inputs: args[0], the input dout + // args[1], the input x + __device__ __forceinline__ T operator()(const T* args) const { + T x = args[1]; + T l = static_cast(lambda); + return (x >= -l && x <= l) ? zero : args[0]; + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } +}; + +template +struct CudaCeilFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // ceil(x) = ceil(x) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType x = static_cast(args[0]); + return static_cast(ceil(x)); + } +}; + +template +struct CudaFloorFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // floor(x) = floor(x) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType x = static_cast(args[0]); + return static_cast(floor(x)); + } +}; + +template +struct CudaRoundFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // round(x) = round(x) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType x = static_cast(args[0]); + return static_cast(round(x)); + } +}; + +// grad functor for ceil, floor and round +template +struct CudaZeroGradFunctor : public BaseActivationFunctor { + __device__ __forceinline__ T operator()(const T* args) const { + return static_cast(0.0f); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return kNoDeps; } +}; + +template +struct CudaCosFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // cos(x) = cos(x) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType x = static_cast(args[0]); + return static_cast(cos(x)); + } +}; + +template +struct CudaCosGradFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // dx = dout * (-sin(x)) + // Inputs: args[0], the input dout + // args[1], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType dout = static_cast(args[0]); + MPType x = static_cast(args[1]); + return static_cast(-dout * sin(x)); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } +}; + +template +struct CudaSinFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // sin(x) = sin(x) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType x = static_cast(args[0]); + return static_cast(sin(x)); + } +}; + +template +struct CudaSinGradFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // dx = dout * cos(x) + // Inputs: args[0], the input dout + // args[1], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType dout = static_cast(args[0]); + MPType x = static_cast(args[1]); + return static_cast(dout * cos(x)); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } +}; + +template +struct CudaTanFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // tan(x) = tan(x) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType x = static_cast(args[0]); + return static_cast(tan(x)); + } +}; + +template +struct CudaTanGradFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // dx = dout / cos(x)^2 + // Inputs: args[0], the input dout + // args[1], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType dout = static_cast(args[0]); + MPType x = static_cast(args[1]); + return static_cast(dout / (cos(x) * cos(x))); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } +}; + +template +struct CudaAsinFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // asin(x) = asin(x) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType x = static_cast(args[0]); + return static_cast(asin(x)); + } +}; + +template +struct CudaAsinGradFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + MPType one = static_cast(1.0f); + + // dx = dout / sqrt(1 - x^2) + // Inputs: args[0], the input dout + // args[1], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType dout = static_cast(args[0]); + MPType x = static_cast(args[1]); + return static_cast(dout / sqrt(one - x * x)); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } +}; + +template +struct CudaAcosFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // acos(x) = acos(x) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType x = static_cast(args[0]); + return static_cast(acos(x)); + } +}; + +template +struct CudaAcosGradFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + MPType one = static_cast(1.0f); + + // dx = -dout / sqrt(1 - x^2) + // Inputs: args[0], the input dout + // args[1], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType dout = static_cast(args[0]); + MPType x = static_cast(args[1]); + return static_cast(-dout / sqrt(one - x * x)); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } +}; - // for relu backward when T is double - __device__ __forceinline__ typename CudaVecType::type Compute( - const typename CudaVecType::type out, - const typename CudaVecType::type dout) { - return out > zero_ ? dout : zero_; +template +struct CudaCoshFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // cosh(x) = cosh(x) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType x = static_cast(args[0]); + return static_cast(cosh(x)); + } +}; + +template +struct CudaCoshGradFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // dx = dout * sinh(x) + // Inputs: args[0], the input dout + // args[1], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType dout = static_cast(args[0]); + MPType x = static_cast(args[1]); + return static_cast(dout * sinh(x)); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } +}; + +template +struct CudaSinhFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // sinh(x) = sinh(x) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType x = static_cast(args[0]); + return static_cast(sinh(x)); + } +}; + +template +struct CudaSinhGradFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // dx = dout * cosh(x) + // Inputs: args[0], the input dout + // args[1], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType dout = static_cast(args[0]); + MPType x = static_cast(args[1]); + return static_cast(dout * cosh(x)); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } +}; + +template +struct CudaTanhFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // tanh(x) = tanh(x) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType x = static_cast(args[0]); + return static_cast(tanh(x)); } +}; - // when num % vecsize != 0 this func will be used - __device__ __forceinline__ T ComputeRemainder(const T out, const T dout) { - // relu backward : dx = out > 0 ? dout : 0 - return out > zero_ ? dout : zero_; +template +struct CudaTanhGradFunctor : public BaseActivationFunctor { + T one = static_cast(1.0f); + + // dx = dout * (1 - out^2) + // Inputs: args[0], the input dout + // args[1], the input out + __device__ __forceinline__ T operator()(const T* args) const { + T dout = static_cast(args[0]); + T out = static_cast(args[1]); + return dout * (one - out * out); } static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } }; -template <> -__device__ __forceinline__ CudaVecType::type -ReluGradGPUFunctor::Compute(const CudaVecType::type out, - const CudaVecType::type dout) { - // relu backward : dx = out > 0 ? dout : 0; - return make_float4((out.x > zero_) * (dout.x), (out.y > zero_) * (dout.y), - (out.z > zero_) * (dout.z), (out.w > zero_) * (dout.w)); -} - -template <> -__device__ __forceinline__ CudaVecType::type -ReluGradGPUFunctor::Compute(const CudaVecType::type out, - const CudaVecType::type dout) { -// relu backward : dx = out > 0 ? dout : 0; -#ifdef __HIPCC__ || CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__) - const half2 kzero = __float2half2_rn(0.0f); - return __hmul2(__hgt2(out, kzero), dout); -#else - const float2 xx = __half22float2(out); - const float2 yy = __half22float2(dout); - return __floats2half2_rn((xx.x > 0.0f) * static_cast(yy.x), - (xx.y > 0.0f) * static_cast(yy.y)); -#endif -} +template +struct CudaReciprocalFunctor : public BaseActivationFunctor { + T one = static_cast(1.0f); + + // reciprocal(x) = 1 / x + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + return one / args[0]; + } +}; -/* ========================================================================== */ -/* ======================== leaky relu forward ======================== - */ template -class LeakyReluGPUFunctor : public BaseGPUFunctor { - private: - T zero_; - float alpha_; +struct CudaReciprocalGradFunctor : public BaseActivationFunctor { + // dx = -dout * out^2 + // Inputs: args[0], the input dout + // args[1], the input out + __device__ __forceinline__ T operator()(const T* args) const { + return -args[0] * args[1] * args[1]; + } - public: - LeakyReluGPUFunctor() { zero_ = static_cast(0.0f); } + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } +}; - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"alpha", &alpha_}}; - } - // leakyrelu forward : out = x > 0 ? x : x * alpha - __device__ __forceinline__ typename CudaVecType::type Compute( - const typename CudaVecType::type in) { - return in > zero_ ? in : static_cast(alpha_) * in; - } - - __device__ __forceinline__ T ComputeRemainder(const T in) { - // leakyrelu forward : out = x > 0 ? x : x * alpha - return in > zero_ ? in : static_cast(alpha_) * in; - } -}; - -template <> -__device__ __forceinline__ CudaVecType::type -LeakyReluGPUFunctor::Compute(const CudaVecType::type in) { - // leakyrelu forward : out = x > 0 ? x : x * alpha - return make_float4((in.x > zero_) ? (in.x) : (in.x) * alpha_, - (in.y > zero_) ? (in.y) : (in.y) * alpha_, - (in.z > zero_) ? (in.z) : (in.z) * alpha_, - (in.w > zero_) ? (in.w) : (in.w) * alpha_); -} - -template <> -__device__ __forceinline__ CudaVecType::type -LeakyReluGPUFunctor::Compute(const CudaVecType::type in) { - // leakyrelu forward : out = x > 0 ? x : x * alpha - const float2 xx = __half22float2(in); - return __floats2half2_rn((xx.x > 0.0f) ? xx.x : xx.x * alpha_, - (xx.y > 0.0f) ? xx.y : xx.y * alpha_); -} -/* ========================================================================== */ +template +struct CudaExpFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // exp(x) = exp(x) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType x = static_cast(args[0]); + return static_cast(exp(x)); + } +}; -/* =========================== leaky relu backward ======================= - */ template -class LeakyReluGradGPUFunctor : public BaseGPUFunctor { - private: - T zero_; - float alpha_; +struct CudaExpGradFunctor : public BaseActivationFunctor { + // dx = dout * out + // Inputs: args[0], the input dout + // args[1], the input out + __device__ __forceinline__ T operator()(const T* args) const { + return args[0] * args[1]; + } - public: - LeakyReluGradGPUFunctor() { zero_ = static_cast(0.0f); } + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } +}; - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"alpha", &alpha_}}; +template +struct CudaLogFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // log(x) = log(x) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType x = static_cast(args[0]); + return static_cast(log(x)); + } +}; + +template +struct CudaLogGradFunctor : public BaseActivationFunctor { + // dx = dout / x + // Inputs: args[0], the input dout + // args[1], the input x + __device__ __forceinline__ T operator()(const T* args) const { + return args[0] / args[1]; } - // for leaky relu backward when T is double - __device__ __forceinline__ typename CudaVecType::type Compute( - const typename CudaVecType::type in, - const typename CudaVecType::type dout) { - // leakyrelu backward : dx = x > 0 ? dout : alpha * dout - return in > zero_ ? dout : static_cast(alpha_) * dout; + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } +}; + +template +struct CudaSquareFunctor : public BaseActivationFunctor { + // square(x) = x * x + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + return args[0] * args[0]; } +}; - // when num % vecsize != 0 this func will be used - __device__ __forceinline__ T ComputeRemainder(const T in, const T dout) { - // leakyrelu backward : dx = x > 0 ? dout : alpha * dout - return in > zero_ ? dout : static_cast(alpha_) * dout; +template +struct CudaSquareGradFunctor : public BaseActivationFunctor { + T two = static_cast(2.0f); + + // dx = dout * 2 * x + // Inputs: args[0], the input dout + // args[1], the input x + __device__ __forceinline__ T operator()(const T* args) const { + return args[0] * two * args[1]; } static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } }; -template <> -__device__ __forceinline__ CudaVecType::type -LeakyReluGradGPUFunctor::Compute(const CudaVecType::type in, - const CudaVecType::type dout) { - // leakyrelu backward : dx = x > 0 ? dout : alpha * dout - return make_float4((in.x > zero_) ? (dout.x) : alpha_ * (dout.x), - (in.y > zero_) ? (dout.y) : alpha_ * (dout.y), - (in.z > zero_) ? (dout.z) : alpha_ * (dout.z), - (in.w > zero_) ? (dout.w) : alpha_ * (dout.w)); -} - -template <> -__device__ __forceinline__ CudaVecType::type LeakyReluGradGPUFunctor< - float16>::Compute(const CudaVecType::type in, - const CudaVecType::type dout) { - // leakyrelu backward : dx = x > 0 ? dout : alpha * dout - const float2 xx = __half22float2(in); - const float2 yy = __half22float2(dout); - return __floats2half2_rn((xx.x > 0.0f) ? yy.x : alpha_ * yy.x, - (xx.y > 0.0f) ? yy.y : alpha_ * yy.y); -} +template +struct CudaSqrtFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // sqrt(x) = sqrt(x) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType x = static_cast(args[0]); + return static_cast(sqrt(x)); + } +}; -/* ========================================================================== */ +template +struct CudaSqrtGradFunctor : public BaseActivationFunctor { + T one_half = static_cast(0.5f); + + // dx = dout * 0.5 / out + // Inputs: args[0], the input dout + // args[1], the input out + __device__ __forceinline__ T operator()(const T* args) const { + return one_half * args[0] / args[1]; + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } +}; -template -__global__ void ActivationGradKernelVec(const T* forward_data, const T* dout, - T* dx, int num, Functor functor) { - using VecType = typename CudaVecType::type; - constexpr int vecsize = CudaVecType::vecsize; - int idx = threadIdx.x + blockIdx.x * blockDim.x; - int stride = blockDim.x * gridDim.x; - int loop = num / vecsize; - int tail = num % vecsize; - const VecType* in_forward = reinterpret_cast(forward_data); - const VecType* in_dout = reinterpret_cast(dout); - VecType* out = reinterpret_cast(dx); - VecType forward_vec, dout_vec; - T in_data, dout_data; - for (int i = idx; i < loop; i += stride) { -#ifdef __HIPCC__ || __CUDA_ARCH__ >= 350 - forward_vec = __ldg(in_forward + i); - dout_vec = __ldg(in_dout + i); -#else - forward_vec = in_forward[i]; - dout_vec = in_dout[i]; -#endif - out[i] = functor.Compute(forward_vec, dout_vec); - } - - while (idx == loop && tail) { - in_data = forward_data[num - tail]; - dout_data = dout[num - tail]; - dx[num - tail] = functor.ComputeRemainder(in_data, dout_data); - --tail; - } -} - -template -__global__ void ActivationkernelVec(const T* src, T* dst, int num, - Functor functor) { - constexpr int vecsize = CudaVecType::vecsize; - using VecType = typename CudaVecType::type; - int idx = threadIdx.x + blockIdx.x * blockDim.x; - int stride = blockDim.x * gridDim.x; - int loop = num / vecsize; - int tail = num % vecsize; - const VecType* in = reinterpret_cast(src); - VecType* out = reinterpret_cast(dst); - VecType x_vec; - for (int i = idx; i < loop; i += stride) { -#ifdef __HIPCC__ || __CUDA_ARCH__ >= 350 - x_vec = __ldg(in + i); -#else - x_vec = in[i]; -#endif - out[i] = functor.Compute(x_vec); - } - - while (idx == loop && tail) { - dst[num - tail] = functor.ComputeRemainder(src[num - tail]); - --tail; - } -} +template +struct CudaRsqrtFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // rsqrt(x) = rsqrt(x) + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T* args) const { + MPType x = static_cast(args[0]); + return static_cast(rsqrt(x)); + } +}; + +template +struct CudaRsqrtGradFunctor : public BaseActivationFunctor { + T minus_one_half = static_cast(-0.5f); + + // dx = dout * -0.5 / out^3 + // Inputs: args[0], the input dout + // args[1], the input out + __device__ __forceinline__ T operator()(const T* args) const { + T out = args[1]; + return minus_one_half * args[0] * out * out * out; + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } +}; template -class ActivationGPUKernel +class ActivationCudaKernel : public framework::OpKernel { public: using T = typename Functor::ELEMENT_TYPE; - void Compute(const framework::ExecutionContext& context) const override { - const framework::Tensor* in_x = nullptr; + void Compute(const framework::ExecutionContext& ctx) const override { + const framework::Tensor* x = nullptr; framework::Tensor* out = nullptr; - ExtractActivationTensor(context, &in_x, &out); - auto& dev_ctx = context.template device_context(); - - int num = in_x->numel(); - const T* input_data = in_x->data(); - T* output_data = out->mutable_data(dev_ctx.GetPlace(), - static_cast(num * sizeof(T))); - - int block = 512; -#ifdef __HIPCC__ - block = 256; -#endif - Functor functor; + ExtractActivationTensor(ctx, &x, &out); + out->mutable_data(ctx.GetPlace()); + auto& dev_ctx = ctx.template device_context(); + std::vector ins = {x}; + std::vector outs = {out}; + auto functor = Functor(); auto attrs = functor.GetAttrs(); for (auto& attr : attrs) { - *attr.second = context.Attr(attr.first); + *attr.second = ctx.Attr(attr.first); } - constexpr int vecsize = CudaVecType::vecsize; - int grid = max((num / vecsize + block - 1) / block, 1); - auto stream = context.cuda_device_context().stream(); - ActivationkernelVec<<>>( - input_data, output_data, num, functor); + LaunchElementwiseCudaKernel(dev_ctx, ins, &outs, + functor); } }; template -class ActivationGradGPUKernel +class ActivationGradCudaKernel : public framework::OpKernel { public: using T = typename Functor::ELEMENT_TYPE; - void Compute(const framework::ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& ctx) const override { const framework::Tensor *x, *out, *d_out; framework::Tensor* d_x = nullptr; x = out = d_out = nullptr; - ExtractActivationGradTensor(context, &x, &out, &d_out, + ExtractActivationGradTensor(ctx, &x, &out, &d_out, &d_x); - int numel = d_out->numel(); - auto& dev_ctx = context.template device_context(); - auto* dx_data = d_x->mutable_data( - dev_ctx.GetPlace(), static_cast(numel * sizeof(T))); - auto* dout_data = d_out->data(); + d_x->mutable_data(ctx.GetPlace()); + auto& dev_ctx = ctx.template device_context(); + auto functor = Functor(); + auto attrs = functor.GetAttrs(); + for (auto& attr : attrs) { + *attr.second = ctx.Attr(attr.first); + } + + std::vector ins = {d_out}; + std::vector outs = {d_x}; - auto* forward_data = dout_data; if (static_cast(Functor::FwdDeps()) == static_cast(kDepOut)) { // Only need forward output Out - forward_data = out->data(); + ins.push_back(out); + LaunchElementwiseCudaKernel(dev_ctx, ins, + &outs, functor); } else if (static_cast(Functor::FwdDeps()) == static_cast(kDepX)) { // Only need forward input X - forward_data = x->data(); + ins.push_back(x); + LaunchElementwiseCudaKernel(dev_ctx, ins, + &outs, functor); + } else { + LaunchElementwiseCudaKernel(dev_ctx, ins, + &outs, functor); } - - int block = 512; -#ifdef __HIPCC__ - block = 256; -#endif - - Functor functor; - auto attrs = functor.GetAttrs(); - for (auto& attr : attrs) { - *attr.second = context.Attr(attr.first); - } - constexpr int vecsize = CudaVecType::vecsize; - int grid = max((numel / vecsize + block - 1) / block, 1); - auto stream = context.cuda_device_context().stream(); - ActivationGradKernelVec<<>>( - forward_data, dout_data, dx_data, numel, functor); } }; @@ -395,12 +732,13 @@ class ActivationGradGPUKernel namespace ops = paddle::operators; namespace plat = paddle::platform; -#define REGISTER_ACTIVATION_CUDA_KERNEL(act_type, op_name, functor, \ - grad_functor) \ +#define REGISTER_ACTIVATION_GPU_KERNEL(act_type, op_name, functor, \ + grad_functor) \ REGISTER_OP_CUDA_KERNEL( \ - act_type, \ - ops::ActivationKernel>, \ - ops::ActivationKernel>, \ + act_type, ops::ActivationKernel>, \ + ops::ActivationKernel>, \ ops::ActivationKernel>); \ REGISTER_OP_CUDA_KERNEL( \ @@ -410,28 +748,28 @@ namespace plat = paddle::platform; ops::grad_functor>, \ ops::ActivationGradKernel>); -FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CUDA_KERNEL); -#define REGISTER_ACTIVATION_GPU_KERNEL(act_type, op_name, functor, \ - grad_functor) \ +#define REGISTER_ACTIVATION_CUDA_KERNEL(act_type, op_name, functor, \ + grad_functor) \ REGISTER_OP_CUDA_KERNEL( \ - act_type, ops::ActivationGPUKernel>, \ - ops::ActivationGPUKernel>, \ - ops::ActivationGPUKernel>); \ + act_type, ops::ActivationCudaKernel>, \ + ops::ActivationCudaKernel>, \ + ops::ActivationCudaKernel>); \ REGISTER_OP_CUDA_KERNEL( \ - act_type##_grad, ops::ActivationGradGPUKernel>, \ - ops::ActivationGradGPUKernel>, \ - ops::ActivationGradGPUKernel>); + act_type##_grad, \ + ops::ActivationGradCudaKernel>, \ + ops::ActivationGradCudaKernel>, \ + ops::ActivationGradCudaKernel>); /* ======================== leaky relu register ============================ */ -REGISTER_ACTIVATION_GPU_KERNEL(leaky_relu, LeakyRelu, LeakyReluGPUFunctor, - LeakyReluGradGPUFunctor); +REGISTER_ACTIVATION_CUDA_KERNEL(leaky_relu, LeakyRelu, CudaLeakyReluFunctor, + CudaLeakyReluGradFunctor); REGISTER_OP_CUDA_KERNEL( leaky_relu_grad_grad, @@ -444,7 +782,7 @@ REGISTER_OP_CUDA_KERNEL( /* ========================================================================== */ /* ======================== elu register ============================ */ -REGISTER_ACTIVATION_CUDA_KERNEL(elu, ELU, ELUFunctor, ELUGradFunctor); +REGISTER_ACTIVATION_GPU_KERNEL(elu, ELU, ELUFunctor, ELUGradFunctor); REGISTER_OP_CUDA_KERNEL( elu_grad_grad, ops::ELUDoubleGradKernel>, - ops::ActivationKernel>, - ops::ActivationKernel>, - ops::ActivationKernel>, - ops::ActivationKernel>); + square, ops::ActivationCudaKernel>, + ops::ActivationCudaKernel>, + ops::ActivationCudaKernel>, + ops::ActivationCudaKernel>, + ops::ActivationCudaKernel>); REGISTER_OP_CUDA_KERNEL( - square_grad, ops::ActivationGradKernel>, - ops::ActivationGradKernel>, - ops::ActivationGradKernel>, - ops::ActivationGradKernel>, - ops::ActivationGradKernel>); + square_grad, + ops::ActivationGradCudaKernel>, + ops::ActivationGradCudaKernel>, + ops::ActivationGradCudaKernel>, + ops::ActivationGradCudaKernel>, + ops::ActivationGradCudaKernel>); REGISTER_OP_CUDA_KERNEL( square_grad_grad, @@ -564,27 +910,29 @@ REGISTER_OP_CUDA_KERNEL( /* ========================== exp register ============================ */ REGISTER_OP_CUDA_KERNEL( - exp, ops::ActivationKernel>, - ops::ActivationKernel>, + exp, ops::ActivationCudaKernel>, + ops::ActivationCudaKernel>, ops::ActivationKernel>, ops::ActivationKernel>, - ops::ActivationKernel>); + ops::ActivationCudaKernel>); REGISTER_OP_CUDA_KERNEL( - exp_grad, ops::ActivationGradKernel>, - ops::ActivationGradKernel>, - ops::ActivationGradKernel>, - ops::ActivationGradKernel>, - ops::ActivationGradKernel>); + exp_grad, ops::ActivationGradCudaKernel>, + ops::ActivationGradCudaKernel>, + ops::ActivationGradCudaKernel>, + ops::ActivationGradCudaKernel>, + ops::ActivationGradCudaKernel>); /* ========================================================================== */ /* ========================== Log register ==================================*/ -REGISTER_ACTIVATION_CUDA_KERNEL(log, Log, LogFunctor, LogGradFunctor); +REGISTER_ACTIVATION_CUDA_KERNEL(log, Log, CudaLogFunctor, CudaLogGradFunctor); REGISTER_OP_CUDA_KERNEL( log_grad_grad, ops::LogDoubleGradKernel>); /* ========================================================================== */ + +REGISTER_ACTIVATION_CUDA_KERNEL(sigmoid, Sigmoid, CudaSigmoidFunctor, + CudaSigmoidGradFunctor); +REGISTER_ACTIVATION_CUDA_KERNEL(silu, Silu, CudaSiluFunctor, + CudaSiluGradFunctor); +REGISTER_ACTIVATION_CUDA_KERNEL(logsigmoid, LogSigmoid, CudaLogSigmoidFunctor, + CudaLogSigmoidGradFunctor); +REGISTER_ACTIVATION_CUDA_KERNEL(atan, Atan, CudaAtanFunctor, + CudaAtanGradFunctor); +REGISTER_ACTIVATION_CUDA_KERNEL(softshrink, SoftShrink, CudaSoftShrinkFunctor, + CudaSoftShrinkGradFunctor); +REGISTER_ACTIVATION_CUDA_KERNEL(ceil, Ceil, CudaCeilFunctor, + CudaZeroGradFunctor); +REGISTER_ACTIVATION_CUDA_KERNEL(floor, Floor, CudaFloorFunctor, + CudaZeroGradFunctor); +REGISTER_ACTIVATION_CUDA_KERNEL(cos, Cos, CudaCosFunctor, CudaCosGradFunctor); +REGISTER_ACTIVATION_CUDA_KERNEL(tan, Tan, CudaTanFunctor, CudaTanGradFunctor); +REGISTER_ACTIVATION_CUDA_KERNEL(acos, Acos, CudaAcosFunctor, + CudaAcosGradFunctor); +REGISTER_ACTIVATION_CUDA_KERNEL(sin, Sin, CudaSinFunctor, CudaSinGradFunctor); +REGISTER_ACTIVATION_CUDA_KERNEL(asin, Asin, CudaAsinFunctor, + CudaAsinGradFunctor); +REGISTER_ACTIVATION_CUDA_KERNEL(sinh, Sinh, CudaSinhFunctor, + CudaSinhGradFunctor); +REGISTER_ACTIVATION_CUDA_KERNEL(cosh, Cosh, CudaCoshFunctor, + CudaCoshGradFunctor); +REGISTER_ACTIVATION_CUDA_KERNEL(round, Round, CudaRoundFunctor, + CudaZeroGradFunctor); +REGISTER_ACTIVATION_CUDA_KERNEL(reciprocal, Reciprocal, CudaReciprocalFunctor, + CudaReciprocalGradFunctor); +REGISTER_ACTIVATION_GPU_KERNEL(log1p, Log1p, Log1pFunctor, Log1pGradFunctor); +REGISTER_ACTIVATION_GPU_KERNEL(log2, Log2, Log2Functor, Log2GradFunctor); +REGISTER_ACTIVATION_GPU_KERNEL(log10, Log10, Log10Functor, Log10GradFunctor); +REGISTER_ACTIVATION_GPU_KERNEL(brelu, BRelu, BReluFunctor, BReluGradFunctor); +REGISTER_ACTIVATION_GPU_KERNEL(soft_relu, SoftRelu, SoftReluFunctor, + SoftReluGradFunctor); +REGISTER_ACTIVATION_GPU_KERNEL(stanh, STanh, STanhFunctor, STanhGradFunctor); +REGISTER_ACTIVATION_GPU_KERNEL(softplus, Softplus, SoftplusFunctor, + SoftplusGradFunctor); +REGISTER_ACTIVATION_GPU_KERNEL(softsign, Softsign, SoftsignFunctor, + SoftsignGradFunctor); +REGISTER_ACTIVATION_GPU_KERNEL(relu6, Relu6, Relu6Functor, Relu6GradFunctor); +REGISTER_ACTIVATION_GPU_KERNEL(tanh_shrink, TanhShrink, TanhShrinkFunctor, + TanhShrinkGradFunctor); +REGISTER_ACTIVATION_GPU_KERNEL(hard_shrink, HardShrink, HardShrinkFunctor, + HardShrinkGradFunctor); +REGISTER_ACTIVATION_GPU_KERNEL(hard_sigmoid, HardSigmoid, HardSigmoidFunctor, + HardSigmoidGradFunctor); +REGISTER_ACTIVATION_GPU_KERNEL(swish, Swish, SwishFunctor, SwishGradFunctor); +REGISTER_ACTIVATION_GPU_KERNEL(thresholded_relu, ThresholdedRelu, + ThresholdedReluFunctor, + ThresholdedReluGradFunctor); +REGISTER_ACTIVATION_GPU_KERNEL(hard_swish, HardSwish, HardSwishFunctor, + HardSwishGradFunctor); diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 7245dea9cf9..ccd5bf528ba 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -455,7 +455,7 @@ struct HardShrinkFunctor : public BaseActivationFunctor { void operator()(Device d, X x, Out out) const { auto temp1 = x < static_cast(threshold * -1.f); auto temp2 = x > static_cast(threshold); - out.device(d) = x * (temp1 + temp2).template cast(); + out.device(d) = x * (temp1 || temp2).template cast(); } }; @@ -472,7 +472,7 @@ struct HardShrinkGradFunctor : public BaseActivationFunctor { void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto temp1 = x < static_cast(threshold * -1.f); auto temp2 = x > static_cast(threshold); - dx.device(d) = dout * (temp1 + temp2).template cast(); + dx.device(d) = dout * (temp1 || temp2).template cast(); } static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } -- GitLab From 1afe1ac9161c1597f78d1a8e13a500568d3d88b6 Mon Sep 17 00:00:00 2001 From: Zhong Hui Date: Tue, 27 Apr 2021 15:22:49 +0800 Subject: [PATCH 028/720] [OPs] Bug fix, fix the segment mean for illegal syncthreads usage. (#32596) * [OPs] Bug fix, fix the segment mean for illegal syncthreads usage. --- .../fluid/operators/math/segment_pooling.cu | 116 ++++++++++++------ 1 file changed, 78 insertions(+), 38 deletions(-) diff --git a/paddle/fluid/operators/math/segment_pooling.cu b/paddle/fluid/operators/math/segment_pooling.cu index 0b615cefac4..b49b5036ac4 100644 --- a/paddle/fluid/operators/math/segment_pooling.cu +++ b/paddle/fluid/operators/math/segment_pooling.cu @@ -25,14 +25,12 @@ namespace operators { using Tensor = framework::Tensor; template -__global__ void SegmentMeanCustomKernel( - const Index* segment_ids, const T* input, T* output, T* summed_ids, - const Index input_length_size, const Index inner_dim_size, - const Index output_length_size, const Index total_stripe_count) { +__global__ void SegmentSumIdsKernel(const Index* segment_ids, T* summed_ids, + const Index input_length_size, + const Index total_stripe_count) { CUDA_KERNEL_LOOP(stripe_index, total_stripe_count) { - const Index segment_offset = stripe_index % inner_dim_size; - const Index dim_index_base = - stripe_index / inner_dim_size * Index(DimTileSize); + const Index segment_offset = stripe_index; + const Index dim_index_base = stripe_index * Index(DimTileSize); const Index actual_height = min(Index(DimTileSize), input_length_size - dim_index_base); @@ -41,19 +39,20 @@ __global__ void SegmentMeanCustomKernel( if (dim_index_base > 0) { last_segment_id = segment_ids[dim_index_base - 1]; } - if (segment_offset == 0) { - T sum = T(0); - for (Index j = 0; j < actual_height; j++) { - Index current_segment_id = segment_ids[dim_index_base + j]; - // Note(ZHUI): following check may cause - // cudaErrorLaunchOutOfResources. - // PADDLE_ENFORCE(current_segment_id >= last_segment_id, - // "the segment ids should be sorted, but got " - // "segment_ids[%d]:%d > segment_ids[%d]:%d.", - // dim_index_base + j - 1, dim_index_base + j, - // last_segment_id, current_segment_id); - - if (j > 0 && current_segment_id > last_segment_id) { + T sum = T(0); + for (Index j = 0; j < actual_height; j++) { + Index current_segment_id = segment_ids[dim_index_base + j]; + PADDLE_ENFORCE(current_segment_id >= last_segment_id, + "the segment ids should be sorted, but got " + "segment_ids[%d]:%d > segment_ids[%d]:%d.", + dim_index_base + j - 1, dim_index_base + j, + last_segment_id, current_segment_id); + if (current_segment_id > last_segment_id) { + for (Index interval_id = last_segment_id + 1; + interval_id < current_segment_id; ++interval_id) { + *(summed_ids + interval_id) = 0; + } + if (j > 0) { if (last_segment_id == first_segment_id) { platform::CudaAtomicAdd(summed_ids + last_segment_id, sum); } else { @@ -61,33 +60,60 @@ __global__ void SegmentMeanCustomKernel( } sum = T(0); } - sum += T(1); - last_segment_id = current_segment_id; } - platform::CudaAtomicAdd(summed_ids + last_segment_id, sum); + sum += T(1); + last_segment_id = current_segment_id; + } + platform::CudaAtomicAdd(summed_ids + last_segment_id, sum); + } +} + +template +__global__ void SegmentMeanKernel(const Index* segment_ids, const T* input, + T* output, T* summed_ids, + const Index input_length_size, + const Index inner_dim_size, + const Index output_length_size, + const Index total_stripe_count) { + CUDA_KERNEL_LOOP(stripe_index, total_stripe_count) { + const Index segment_offset = stripe_index % inner_dim_size; + const Index dim_index_base = + stripe_index / inner_dim_size * Index(DimTileSize); + const Index actual_height = + min(Index(DimTileSize), input_length_size - dim_index_base); + + Index first_segment_id = segment_ids[dim_index_base]; + Index last_segment_id = -1; + if (dim_index_base > 0) { + last_segment_id = segment_ids[dim_index_base - 1]; } - // ensure last_segment_id is the largest - last_segment_id = output_length_size; - __syncthreads(); T sum = T(0); for (Index j = 0; j < actual_height; j++) { Index current_segment_id = segment_ids[dim_index_base + j]; if (current_segment_id > last_segment_id) { - const Index output_index = - last_segment_id * inner_dim_size + segment_offset; - if (last_segment_id == first_segment_id) { - platform::CudaAtomicAdd(output + output_index, - sum / *(summed_ids + last_segment_id)); - } else { - *(output + output_index) = sum / *(summed_ids + last_segment_id); + // reset the interval value which do not have corresponding ids. + for (Index interval_id = last_segment_id + 1; + interval_id < current_segment_id; ++interval_id) { + *(output + interval_id * inner_dim_size + segment_offset) = T(0); + } + + if (j > 0) { + Index output_index = + last_segment_id * inner_dim_size + segment_offset; + + if (last_segment_id == first_segment_id) { + platform::CudaAtomicAdd(output + output_index, + sum / *(summed_ids + last_segment_id)); + } else { + *(output + output_index) = sum / *(summed_ids + last_segment_id); + } + sum = T(0); } - sum = T(0); } sum += input[(dim_index_base + j) * inner_dim_size + segment_offset]; last_segment_id = current_segment_id; } - const Index output_index = - last_segment_id * inner_dim_size + segment_offset; + Index output_index = last_segment_id * inner_dim_size + segment_offset; platform::CudaAtomicAdd(output + output_index, sum / *(summed_ids + last_segment_id)); } @@ -122,7 +148,7 @@ __global__ void SegmentOpsKernel(const Index* segment_ids, const T* input, // reset the interval value which do not have corresponding ids. for (Index interval_id = last_segment_id + 1; interval_id < current_segment_id; ++interval_id) { - *(output + interval_id * inner_dim_size + segment_offset) = 0; + *(output + interval_id * inner_dim_size + segment_offset) = T(0); } // don't update result when j=0 if (j > 0) { @@ -272,11 +298,25 @@ class SegmentPoolFunctor { framework::Tensor* output, framework::Tensor* summed_ids = nullptr, const std::string pooltype = "SUM") { + if (pooltype == "MEAN") { + // Sum the segment id num first + T DimTileSize = 8; + auto input_length_size = segment_ids.numel(); + auto total_stripe_count = + (input_length_size + DimTileSize - 1) / DimTileSize; + auto config = platform::GetGpuLaunchConfig1D(ctx, total_stripe_count); + SegmentSumIdsKernel< + T, IndexT, IndexT(8)><<>>( + segment_ids.data(), summed_ids->data(), input_length_size, + total_stripe_count); + } + auto h = ArrangeHelper(input.numel(), segment_ids.dims()[0], output->dims()[0]); auto config = platform::GetGpuLaunchConfig1D(ctx, h.total_stripe_count); if (pooltype == "MEAN") { - SegmentMeanCustomKernel< + SegmentMeanKernel< T, IndexT, IndexT(8)><<>>( segment_ids.data(), input.data(), output->data(), -- GitLab From f285f4c16212d6bfef772e6f74bf205b09f5e95c Mon Sep 17 00:00:00 2001 From: wenbin Date: Tue, 27 Apr 2021 15:56:46 +0800 Subject: [PATCH 029/720] conservative judgment (#32556) --- paddle/fluid/inference/tensorrt/convert/elementwise_op.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc index 5419933e407..19d79510547 100644 --- a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc @@ -25,6 +25,10 @@ static bool CheckDims(const nvinfer1::Dims& dims_x, return false; } for (int i = 0; i < dims_x.nbDims; i++) { + // conservative judgment + if (dims_x.d[i] == -1 || dims_y.d[i] == -1) { + return false; + } if (dims_x.d[i] != dims_y.d[i]) { return false; } -- GitLab From 797b2dfda8decc54b71fe856cf901ce1308a08c1 Mon Sep 17 00:00:00 2001 From: WeiXin Date: Tue, 27 Apr 2021 16:30:32 +0800 Subject: [PATCH 030/720] clear 'BasicEngine' when an exception occurs in the backward. (#32546) * clear 'BasicEngine' when an exception occurs in the backward. * deal with conflict. * deal with conflict. --- paddle/fluid/imperative/basic_engine.cc | 20 +++++++---- .../fluid/tests/unittests/test_pylayer_op.py | 33 ++++++++----------- 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index d5350744e4c..023a148763d 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -470,12 +470,20 @@ void BasicEngine::Execute() { { VLOG(3) << "Start to execute grad op " << cur_op.Type(); - if (tmp_ins_ptr == nullptr) { - OpBase::Run(cur_op.InnerOp(), bwd_ins, tmp_outs, cur_op.Attrs(), - cur_op.place()); - } else { - OpBase::Run(cur_op.InnerOp(), *tmp_ins_ptr, tmp_outs, cur_op.Attrs(), - cur_op.place()); + try { + if (tmp_ins_ptr == nullptr) { + OpBase::Run(cur_op.InnerOp(), bwd_ins, tmp_outs, cur_op.Attrs(), + cur_op.place()); + } else { + OpBase::Run(cur_op.InnerOp(), *tmp_ins_ptr, tmp_outs, + cur_op.Attrs(), cur_op.place()); + } + } catch (platform::EnforceNotMet& exception) { + Clear(); + throw std::move(exception); + } catch (std::exception& ex) { + Clear(); + PADDLE_THROW(platform::errors::External("%s", ex.what())); } } diff --git a/python/paddle/fluid/tests/unittests/test_pylayer_op.py b/python/paddle/fluid/tests/unittests/test_pylayer_op.py index d329bf570a5..e3374c15a0a 100644 --- a/python/paddle/fluid/tests/unittests/test_pylayer_op.py +++ b/python/paddle/fluid/tests/unittests/test_pylayer_op.py @@ -234,8 +234,7 @@ class TestPyLayer(unittest.TestCase): z = Layer_bk_none1.apply(input2) with self.assertRaises(ValueError): - with paddle.fluid.dygraph.guard(): - z.sum().backward() + z.sum().backward() class Layer_bk_none2(PyLayer): @staticmethod @@ -249,9 +248,9 @@ class TestPyLayer(unittest.TestCase): input1 = paddle.randn([2, 3]).astype("float64") input1.stop_gradient = False z = Layer_bk_none2.apply(input1, input1) + with self.assertRaises(ValueError): - with paddle.fluid.dygraph.guard(): - z.mean().backward() + z.mean().backward() class Layer_bk_one1(PyLayer): @staticmethod @@ -265,9 +264,9 @@ class TestPyLayer(unittest.TestCase): input1 = paddle.randn([2, 3]).astype("float64") input1.stop_gradient = False z = Layer_bk_one1.apply(input1) + with self.assertRaises(ValueError): - with paddle.fluid.dygraph.guard(): - z.mean().backward() + z.mean().backward() class Layer_bk_one2(PyLayer): @staticmethod @@ -280,11 +279,11 @@ class TestPyLayer(unittest.TestCase): input1 = paddle.randn([2, 3]).astype("float64") input1.stop_gradient = False + y = Layer_bk_one2.apply(input1, input1) z = y[0] + y[1] with self.assertRaises(ValueError): - with paddle.fluid.dygraph.guard(): - z.mean().backward() + z.mean().backward() class Layer_no_bk(PyLayer): @staticmethod @@ -295,10 +294,9 @@ class TestPyLayer(unittest.TestCase): input1.stop_gradient = False z = Layer_no_bk.apply(input1) - with self.assertRaises(NotImplementedError): - with paddle.fluid.dygraph.guard(): - z = z[0] + z[1] - z.mean().backward() + with self.assertRaises(OSError): + z = z[0] + z[1] + z.mean().backward() class Layer_bk_match(PyLayer): @staticmethod @@ -313,9 +311,8 @@ class TestPyLayer(unittest.TestCase): input1.stop_gradient = False z = Layer_bk_match.apply(input1) with self.assertRaises(ValueError): - with paddle.fluid.dygraph.guard(): - z = z[0] + z[1] - z.mean().backward() + z = z[0] + z[1] + z.mean().backward() def test_pylayer_bk_return_none(self): class Layer_bk_none1(PyLayer): @@ -334,8 +331,7 @@ class TestPyLayer(unittest.TestCase): z = Layer_bk_none1.apply(input1, input2) with self.assertRaises(ValueError): - with paddle.fluid.dygraph.guard(): - z.mean().backward() + z.mean().backward() class Layer_bk_none2(PyLayer): @staticmethod @@ -353,8 +349,7 @@ class TestPyLayer(unittest.TestCase): z = Layer_bk_none2.apply(input1, input2) z = z[0] + z[1] with self.assertRaises(ValueError): - with paddle.fluid.dygraph.guard(): - z.mean().backward() + z.mean().backward() def test_pylayer_inplace(self): class cus_tanh(PyLayer): -- GitLab From 79f7ba69877b038fa2607a28edea72ca53e2c253 Mon Sep 17 00:00:00 2001 From: WeiXin Date: Tue, 27 Apr 2021 16:32:22 +0800 Subject: [PATCH 031/720] edit paddle.save/load API (#32532) * edit paddle.save/load API * Update io.py edit doc * delete cpython-37.pyc * Update io.py edit doc * Update io.py recommit * Update io.py recommit * Update io.py recommit * Update io.py recommit --- python/paddle/framework/io.py | 8 ++++---- .../static_mode_white_list.cpython-37.pyc | Bin 20443 -> 0 bytes 2 files changed, 4 insertions(+), 4 deletions(-) delete mode 100644 tools/__pycache__/static_mode_white_list.cpython-37.pyc diff --git a/python/paddle/framework/io.py b/python/paddle/framework/io.py index 955d8610a59..ac0e172d49d 100644 --- a/python/paddle/framework/io.py +++ b/python/paddle/framework/io.py @@ -494,7 +494,7 @@ def save(obj, path, protocol=2, **configs): Save an object to the specified path. .. note:: - Now supports saving ``state_dict`` of Layer or Optimizer, Tensor. + Now supports saving ``state_dict`` of Layer/Optimizer, Layer, Tensor and nested structure containing Tensor. .. note:: Different from ``paddle.jit.save``, since the save result of ``paddle.save`` is a single file, @@ -558,7 +558,7 @@ def save(obj, path, protocol=2, **configs): prog = paddle.static.default_main_program() for var in prog.list_vars(): if list(var.shape) == [224, 10]: - tensor = var.get_tensor() + tensor = var.get_value() break # save/load tensor @@ -665,7 +665,7 @@ def load(path, **configs): Load an object can be used in paddle from specified path. .. note:: - Now supports load ``state_dict`` of Layer or Optimizer, Tensor. + Now supports loading ``state_dict`` of Layer/Optimizer, Layer, Tensor and nested structure containing Tensor. .. note:: In order to use the model parameters saved by paddle more efficiently, @@ -758,7 +758,7 @@ def load(path, **configs): prog = paddle.static.default_main_program() for var in prog.list_vars(): if list(var.shape) == [224, 10]: - tensor = var.get_tensor() + tensor = var.get_value() break # save/load tensor diff --git a/tools/__pycache__/static_mode_white_list.cpython-37.pyc b/tools/__pycache__/static_mode_white_list.cpython-37.pyc deleted file mode 100644 index b1e58ce7689c7db6cc0ce4ed18f87752b16d8beb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20443 zcmeI4XP6{Mm99rxj1UNf7v6&eNMdG$@E!reNPq^{m?XPSR&~-@m8Hz8?rGt@2jRW< zZar&R?;Y!%)-UTl>z8%+Jti`X9Q4<)QuZX-AI85jjOpm2Z%1$Tj6!a&0+Ht|O<*b>(_;hFo86 zAZN-Aic2?*+lZVSA1ygWfJkxS)?@+5h(TqY;vDe_c#nmk>eAzSiHd6qm|o+Ft&S8~~wk%$yB zmWk|0DbJH#sbnfMsbwygOCzl;WGQ>HFDKqI`#Zr+k-uw|tL$uY5_q zPrhG%Kz>kuNPbv;M1E9$OnzK`LVi+yN`6{?Mt)X)PQENZFTWtaD8D4XEWaYZD!(Sb zF25naDObsF$#2W=$nVPU$?wY_$REmAf&*{` zcs_Umcp-QZcrkbhcqw=pcsY0lcqMohcr|zpcrADxcs+Oncq4cdcr&;Xyal`!ybZh^ zyaT)wybHV=ya&7&ybrt|d;ok9dk6-13wOa z0{kTSDe%+aXTZ;bp95b8KM#HZ{37@z@XO#=z^{T|1HTS_1N0^AbZ3fvmp2HY0h4%{Bx0o)PX z3EUan1>6h2TEmzTke~{@?-Nf#5;l z!Qdg_q2OWQ;ouSAk>DcmDDY_T7;rIoEO;DvJa_`Q1Y8Q92%ZF<3@!sFz*E3e!PCIg z!85=XcqVujcs6(r$iQ>La?pSlEWi@%fqifi z9Dpmp^T7+i3&D%Pi@{64OTo*)%fTzaE5WP4tHEo)Yr*Tl>%kkq8^N2vo57XfE#R%- zZQ$+T9pIhdUEtl|J>b3Iec=7z1K@+;L*T>UBjBUpW8mZ96X28JQ{dC!GvKq}b61`^ z`Qp|Qe){}nN6!8NvCY=a7Dc9$NH9vFXoMYvz%q4W~?nYw0EjnvZiPY z`|wSIRPQe5*&^Sr3hj7{P3rde=!M#JhBeJ|A}M{mfjzb?V^K6p@B!Q6@?|j_v1VK1 z){VT|fV#I%o7Z&}eCXaTPR{e0WO=(jtT(JvnUzk#ncfYuoCV!k*3k!(UVEdhN~Yc| zwtI|j^PJX2ok=lY>}a=J`RXoT=1n14bu5#zUF5oa8qc)Gd^B1%`RG6?zS;R`QSRl7 zvYutLSvF~MNnHsei)xW;S7&c7xwb3edOn)uQ+>M4@M%#_OGF_X&yOGDuM02e-=5|t zvwU8L*fE?Ec;7cupZE3A8N}n9sUd=UlT3HzsI0f>>(P>?AQ0Npmcz!^QqCqikVR6jj9^0sPcoN*}9|moX!#NVzir~UFCxL zA}VaEi*T%EHrlD1Y*NgMCSULqQ@XCd-Q=@Lu@&PtlWdBf1Y3gZ&7`dx(~=mhY4QVG zN4DSS2N}#2Id9v-dv?#|WTx$|4z%m@US2KjtuPDd%*;4ewF_fGOl>*RS3^vAXkjL+ znyYlta64bXUJZ*d7KoXyzs#%h3X`W`>~=od-On3|HE!o^Im#MjZI(}q(7x?_G1|#y zbz`5usSUhB%l3|?;rI@F(8iW-7l>esiQ#=I*l=5zvvb(Se0{asu@Wx zM&C0V=4;z6T^synOJQU!%hAB_ZkO}CS?HEd zjUi0Vw_@8x({*^eKADZG(&irXPzp;oxw@@$50dkvF@cP&XT%s}-WVH|0d5tM*pcaf zi8New9UEnow?*hWKiW+F^$~seoNYu|`VrUhk1Bna@(xyOx)m#` zXVSen6SEXdaJ#BUyJ=E2Ydg7M4`%rUHFqB4LH3SfRZa8V(gtaP3b#xvnT5VTzRCN$ zI%XH{XZE();&$UVq;OKtHz9sG?;;-OsPH`2Bc;(ULoe-DJG1-6_W= z1#X%~`1`grosG@Fx+1-ks@{gFdwG*sisLtF409a&%x>yxTw6!_6q?cRE+IvmA#5}r z3{;ZNJ>A7Hwtc06aYF8l%wz1tVw7R0CbRUsoVLq>=^O3oLnwjdi##~4KUTFl5KK;` zR+s=LRDaO8TNJFUK_j_wr zD|hnoH(N6OrhBF|!D5VswctvnBilQw+;=ULzHQz%b!mQ;-3sf7r^KrspiCu(ODzy@ z-aS5zLm=>C!qZ|}H);Vq25=iYCQM1zT8O-Ie9aYj$?Z1zIhUP5TUDFWb|I6Z9bxb4 zSrNuiibX!!=^pm*JfWjf=tDLk0FsD0q|7H-G2Jd?>-IifVxp$pULvL&7R5b*Mor!W zKaO%OipfFP$gYWSsZBGe2X;~F{n@-;Htj@0@?_4>%B@(9TCkeBXSz*t(1H zqfi@%a#n?f00_pKQFPz6*3J4Cs^M*@##)C-v&?3whKv7Nqs1B8dg$1M$riTh%6-q) zUPs)PO~b;Kt%rVKe+SdAt!@+wmb4NKyPgg694Yq(qYVk*Heq>%-G<}+H^E6M~R%I{-6Su9(kz4v=I!DtrkJG8R z*s29S$jGjmh#z{wvCxCUle^&fN%lwsT@8j2?J!ftki5iWhI7tFXmkH^($ zBh|Hxp%FC9!e4CN!`GNbZ;HjTNsaVFQ1*&OgJrd7?*F=+P8aFXNKf*p&3#2s>9z8r zck%G7I51ZE7L0QkH!x{B1rb$2*u8p=IMj17%H} z!&By(41@>u#^r2UG!uB{(|4k};+Tb|>iMkS-~k!s*M!mZ$}Ny3_DnUFwwmTE=UgD^ zrr{uwT&W|N_4crEOz6y1&2_sn2SrozMLAj4%Qmwds28@>qi0c7U@TlZ$NDpSsxJCy znwQb$2ZVXZ^iJOH#EyFBhRAfvt~3$&tx>b%E5j^FVn~yVmJmIp-(OfR$||J4C$te) zjtR&UGnhuPUpKo*0e)S5NT|rTwRk63yJ$?MU8=@Qwk7FYbWjpJaQ8S~&dAj38F7_5 z?_AD zOCDb`pwd#uZX_i(R0Ax6VI-S6Z*{BD4$Q&Upal^sVyTatr5Kq$T8xNVlYY2cYz*Pe z2rgqq#m!xN8X-zIWPe%I*L?1ie6fRmEt+CrXeRlRd_HIP7}?sQS9gX{X3#9M-6aFr zE<9m!{p(s&GBxPotl#Ki>6QmaeF>uvX+tw7=;gf9XiwMf(S_2&QjHtt*s|v%4HcVu z-@5sJb#7EszYsnvy|nq1csENz9%?;kvXnOsLU7!rl5W-}&489sL~w&0g5f4gA5`ge z)~{hR+{fyIX%|Z`5?N?l~Y14B3ez!$0c%L zVrhA$Pr97WwM?Y7FRPY>9uLSu1~D23qXzVGGCke*yOAdO><}U$mJ8`%5KV9}S{5mi zG?F^IdtK?$IUovqFR8euNo7nm~ZT+T*~&&)%RlA7enETkTZS)rQlsBncG&Bly7tBJE=(e`qzk8M>i+Qew>VCC;A z#i1%I>u_IPoeq~6f;uzJB`dSQ)awCdB8uoPr&K65D;nE+ywEt;ShA9qhx23<@||{G zj3NTD5J9Wbsov)?#5!5E)q~G+M)YRxCbX|)HaXg%lA4sGUTP_VN*z(26`?||o6W3r zj6F0v>gKZC>|*ZBtwTxq!2)9Vo+wf1h2j=sx6%Ybi($GuZbNn(+y+m+gz9LwTHjK$ z%2_+DYp>n=OI41)>xQ-3QTJXPhNzuz+LX@{{nOo@$e_EOlJxCvRtD`Pk8pQmMD@Au zrkyhtMUI)4Y%~d`roBUrtwLsEJgT^q$o-PWAs7`d4aT!-pwKG$Ntaw4XnG^>(V#cbjXb~(30r<}5+oiQa2tcBAI z)W%+GB_i#s@4iw(<|D#}Lh*<+A#za?#{Lv55&@l?$IvUS!z7|2W1&|FLR8{|aXeY% z=*yet&4dIJw~u!Zp>shba8Sp^9+D=+^q`S|>Co=F;@Lmlz1EZ`6JPzC(*iqJ*2{>4 zqO{W#Dk8FnT1&W+Czfikq6sW zj4=WI-WbNU0VoMtT5)GqYXlkrKtoFqgM@2jtGkNHMa|T27+SM zBzT^uV;4gz_1siXZC4b@Wr)&=NNE@sQW{JN$_w=2W zPCLVD78$l4>e`uId_a$A$OvN9X2DYh!kL_mx^q>Z@+#EWAL4X~vny5GCQK-YquAjj z&G9j`$^_SWMFlX6>ABb@!Z2Hh=4a5MNcN2QxXD5_jN`=LBy6LUQ^I~fmeX2es7)WW zxOMHG*$~TJ$tp97L;K-?^l+D9VJ-QyCUOyKt{evIA}!-dC$kZKX_&<+2YZxC7^2m1 z7YzUYVO=N!FJ}>UY_^+rva(iBf%QaPHT9UqG@G3Xk|I{{*Yq3;nb9CW%-ZHV(-|Xg zoqIJSGBx%SuGrA2(zE5!!{%T%ir4K3G|Ji5@zrtLHrI%YO6mVJRgbE4^l3pwqT3r^ zO`mpkv#?dJ)fWe0TZBgpXdUB_z-^lb34~o`rDcAX$a#5^O>B3F#i}BteqL7x6Jh}^ zns+0I`upLv>6gp65;zj-56Odo1os`t)K@z|po5z|%Q2(y7?NWL;)F}Hf;>{Q3ez>- zaHFQ>`_gh_t>b&m!4;4e(s(+iM{H5=NLJ1QQjeNaP|;lFwjdq71;6|t4lR*X&PwIK zIl`jJPMj3G5l)RBdurv*eK_<*O}I;ap&g}q1jLghR+P5-LzAw%w|E&oHmXTy9J(Ga z#Mva$Ms{Z)+8(SwFl(v=_gnLH2*Z-5+vUtMxd_JkHVrwdV$b9Piyl_DZ=dUdzj@Ax z5C%<4yxshe??r*LQvLH}pnoZnhb~c%l&J4O9fXiBS(->OhQ+g1%5*AIK+xsP~# zgMz#k4SU$z6l1Mdnyn$C_G=!ouwnvZn}f85VzSh<0#_K!YI=%kqI>10uoFLwUP_J= z=ZJW;jtf^j_-A!SNSB#>3)xg;2;YuiFQ}4eS%9M=J*sg5Y>=YJgMu(^k$Z~_gH`zA zQGp7peTa2Jir{UhsMrggM)c;G&rU&=GfUAKEbS6o8i#{QaW%&inwt*p zaqNJh-RrvhqjrxOaga|Ut)(F?f%pZLkRg0VQhWN})m#9!f%8PaR}-iD_DmESv6MUa z5PZV%a$zB(2Z@#`dIGHRk{-UPBlO93_jG*u_Q(}_f*_`0moxf6&n-u~uI2fLGoaN3 zykk&XOf>R@kj}=u$bjS0OxU-y=N^q_V`F%@vaMA_J8SVEDh?21#z~Z%V+ay)vzHB+ zfm_aYrzz1aJq--Y5*%{JgrU__1iHK6hQHEFdg)@|ImUT5XR5b*R?>RU^m{J5s->>u z8W+;ValxM0V|O=$ES=87;J(#!VP+L8^W2!s*&^Cl*$PoRd#ru+RS4}9t5>1cTRbU5 zjaTQSmVQp9=OzR7)mL3pTI&;OOzeY^=n;D~H#Lk|S{*kla57wdtTEET=&0{`HnT9D@<%V~R4fbQ#DP#LN1UR1gl;wm>ho6?$^KZmv86vcMgSFY#Sw(@x7 za-y}moj2>Ov1{yunIpSu(`}8_#MR#PyT$2!g<&sEtz77ov|g@=xvsZ(HsSVQf$j0D z4TluleTvH&p5NG+p2xMb*Lg%`Su22pBE4@Wbi8W_hCf6Nt;XK9k%e$+RE+_vb&J2$ z1j9wer@x#NqDOUFt(UrXdOJ6Tz zc-JY6>>Up3s!rEHT#S^mcf8Ylw`Tp7;C@n0VV1?C;+ z=~`;s(O1g8tzIy253)husK&a|4ElL)pt#-29DS7r{#APilk;0o$bAqi_Z^WsVx4}r<=@i*R z#4%Lu!E6+p5_j%2SLpKI6!dPSs@&>MLFh&sUepqTFw=*meL5{Kkw?A#f9#Xx+$tas ztoIi?u~s`SPv+G3(dw8~vbo#rHnZ@)FtT5YNZ7IM{@HH)uE$EJviWwCWtIc*Q(6=?ZBIx0D)1CKm z*-%t!7e(j&Qws-D(U3xsxIC}Nh{=bh&Jn_Hk}X7C|7dd0OviK5ZkpKt-R_3B=6G(o zbSieaS!3?&RKC!b!!Z@-Q{L(|K8wW+?>?f?_l%;cn)BFw#%HmabDf;JGkld z4qCEfcWv;qP5g{LycfhV^)XX`zNZ&G4(qwr(JjrP9sAKdpjcr-XrEpqUJhpHGf#)~ z{Rk=TSv!00J4AqX$k}_}C**1EoYmX4A)6cF-??Hgpn8Veo6SuA4%7?qm z!Oi&fj5j`u!@O^jAU@&#DcaVuVPAH%9WtEWSYCUj8&2ESKGhAUZT)b)bVKepqwnBh zr>=9Q!`heo(J6WLFFRWXcdq&--@Y})7`*DDzTUp}+08-pw4ndU<{;X>Hb|#0{d=8a ztaWvxw-)8}ulYTWwPwQBzK2hEJ$~xjJ-S_QKV*oIYt`qqjqdb9s)VMdpl)$={GU@4|)h6&uU-Pbf@2V z>v!N>^Pkvm*MW zI~sknCur^?@VS_O=Q)??iT06Jt#)|9t~aL`T}H5qW9qMQwf=Ud{;TIWR6oOn*=Cms^CWyG*HAa-@WD{#UH0>Cb?zL!o>K9K2ancm`*q=!ZB4!;fT;OIXKc7TS zV@2gY!D9WMI{TeXjq_s+C-|9C3iH#%(-QT!AJZ>1+N+U-ZC)9%U5!M>`MTg&@m9j4 z6pAb|Q$F+RFg{%o(B*kZ*T3(-a5k@&9QemUpAOrL(A9=BUV5`rd|2RGV24%RMV_?d zen-wWAS9*cmls>=Fny5XqlHKWqkD$NZw5w;?ey-_Eyqs#k%zlHz>}fB?PF!;=`5n3 zisIF-l8#-x9kuPWP1&qUCArz2W}lHz%*%P96`+vesD0RP+N_AR<$=f2cy3!>?E^rt__SmMJAYIeG%Hvd*_ReNZxl~fr~OqJqxuG= zLo>bT6W?;4MS~x94~7YMbu9IEarV;*>*oF;9d->#@tf>Ma{qP6C1bC=#!mW8ID5gT z>qj`oc}hU)|5NeX4t9-l$vLN7b=GAk9(Lm5M`Ta9^pO{3Coa0|#Ko69I(z)Zm!0^` zk>;BGPoC_ruU&e6!_~X<^(?wQzbmx+Pe>VBHuWt2{3o(?RdN1;IxIcCGOu&VD7b0m zm7~J*<_A}u_CSc-j(hN#`ahD|;Y+8U^7TLeKXc$3r+od-*Ew)F2UIV#CH>p~lqlYQ M_}gEn4mt3D0KTbuYybcN -- GitLab From 125e4816a81e8658b86b11a936d5fafadcc6d44e Mon Sep 17 00:00:00 2001 From: zhiboniu <31800336+zhiboniu@users.noreply.github.com> Date: Tue, 27 Apr 2021 18:04:18 +0800 Subject: [PATCH 032/720] update 2.0 public api in paddle.init (#32034) Co-authored-by: XiaoguangHu <46782768+XiaoguangHu01@users.noreply.github.com> --- python/paddle/__init__.py | 712 ++++++++++++++++++++++++-------------- 1 file changed, 450 insertions(+), 262 deletions(-) diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 4b9f310e73b..054fcdfcbe6 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -11,9 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -import os - try: from paddle.version import full_version as __version__ from paddle.version import commit as __git_commit__ @@ -30,280 +27,471 @@ from .fluid import monkey_patch_variable from .fluid.dygraph import monkey_patch_math_varbase monkey_patch_variable() monkey_patch_math_varbase() -import paddle.framework -from .framework.dtype import dtype as dtype -from paddle.framework.dtype import uint8 -from paddle.framework.dtype import int8 -from paddle.framework.dtype import int16 -from paddle.framework.dtype import int32 -from paddle.framework.dtype import int64 -from paddle.framework.dtype import float16 -from paddle.framework.dtype import float32 -from paddle.framework.dtype import float64 -from paddle.framework.dtype import bfloat16 -from paddle.framework.dtype import bool -from paddle.framework.dtype import complex64 -from paddle.framework.dtype import complex128 -from .framework import VarBase as Tensor -Tensor.__qualname__ = 'Tensor' -import paddle.compat -import paddle.distributed -import paddle.sysconfig -import paddle.tensor -import paddle.distribution -import paddle.nn -import paddle.distributed.fleet -import paddle.optimizer -import paddle.metric -import paddle.device -import paddle.regularizer -import paddle.incubate -import paddle.autograd +from .framework.dtype import dtype as dtype # noqa: F401 +from paddle.framework.dtype import uint8 # noqa: F401 +from paddle.framework.dtype import int8 # noqa: F401 +from paddle.framework.dtype import int16 # noqa: F401 +from paddle.framework.dtype import int32 # noqa: F401 +from paddle.framework.dtype import int64 # noqa: F401 +from paddle.framework.dtype import float16 # noqa: F401 +from paddle.framework.dtype import float32 # noqa: F401 +from paddle.framework.dtype import float64 # noqa: F401 +from paddle.framework.dtype import bfloat16 # noqa: F401 +from paddle.framework.dtype import bool # noqa: F401 +from paddle.framework.dtype import complex64 # noqa: F401 +from paddle.framework.dtype import complex128 # noqa: F401 +from .framework import VarBase as Tensor # noqa: F401 +Tensor.__qualname__ = 'Tensor' # noqa: F401 +import paddle.compat # noqa: F401 +import paddle.distributed # noqa: F401 +import paddle.sysconfig # noqa: F401 +import paddle.distribution # noqa: F401 +import paddle.nn # noqa: F401 +import paddle.distributed.fleet # noqa: F401 +import paddle.optimizer # noqa: F401 +import paddle.metric # noqa: F401 +import paddle.regularizer # noqa: F401 +import paddle.incubate # noqa: F401 +import paddle.autograd # noqa: F401 -# TODO: define alias in tensor and framework directory +import paddle.jit # noqa: F401 +import paddle.amp # noqa: F401 +import paddle.dataset # noqa: F401 +import paddle.inference # noqa: F401 +import paddle.io # noqa: F401 +import paddle.onnx # noqa: F401 +import paddle.reader # noqa: F401 +import paddle.static # noqa: F401 +import paddle.vision # noqa: F401 -from .tensor.random import randperm -from .tensor.random import bernoulli +from .tensor.random import bernoulli # noqa: F401 -from .tensor.attribute import rank #DEFINE_ALIAS -from .tensor.attribute import shape #DEFINE_ALIAS -from .tensor.attribute import real #DEFINE_ALIAS -from .tensor.attribute import imag #DEFINE_ALIAS -from .tensor.creation import to_tensor #DEFINE_ALIAS -from .tensor.creation import diag #DEFINE_ALIAS -from .tensor.creation import eye #DEFINE_ALIAS -# from .tensor.creation import fill_constant #DEFINE_ALIAS -# from .tensor.creation import get_tensor_from_selected_rows #DEFINE_ALIAS -from .tensor.creation import linspace #DEFINE_ALIAS -from .tensor.creation import ones #DEFINE_ALIAS -from .tensor.creation import ones_like #DEFINE_ALIAS -from .tensor.creation import zeros #DEFINE_ALIAS -from .tensor.creation import zeros_like #DEFINE_ALIAS -from .tensor.creation import arange #DEFINE_ALIAS -from .tensor.creation import eye #DEFINE_ALIAS -from .tensor.creation import full #DEFINE_ALIAS -from .tensor.creation import full_like #DEFINE_ALIAS -from .tensor.creation import triu #DEFINE_ALIAS -from .tensor.creation import tril #DEFINE_ALIAS -from .tensor.creation import meshgrid #DEFINE_ALIAS -from .tensor.creation import empty #DEFINE_ALIAS -from .tensor.creation import empty_like #DEFINE_ALIAS -from .tensor.creation import assign #DEFINE_ALIAS -from .tensor.linalg import matmul #DEFINE_ALIAS -from .tensor.linalg import dot #DEFINE_ALIAS -# from .tensor.linalg import einsum #DEFINE_ALIAS -from .tensor.linalg import norm #DEFINE_ALIAS -from .tensor.linalg import transpose #DEFINE_ALIAS -from .tensor.linalg import dist #DEFINE_ALIAS -from .tensor.linalg import t #DEFINE_ALIAS -from .tensor.linalg import cross #DEFINE_ALIAS -from .tensor.linalg import cholesky #DEFINE_ALIAS -# from .tensor.linalg import tensordot #DEFINE_ALIAS -from .tensor.linalg import bmm #DEFINE_ALIAS -from .tensor.linalg import histogram #DEFINE_ALIAS -from .tensor.linalg import mv #DEFINE_ALIAS -from .tensor.logic import equal #DEFINE_ALIAS -from .tensor.logic import greater_equal #DEFINE_ALIAS -from .tensor.logic import greater_than #DEFINE_ALIAS -from .tensor.logic import is_empty #DEFINE_ALIAS -#from .tensor.logic import isfinite #DEFINE_ALIAS -from .tensor.logic import less_equal #DEFINE_ALIAS -from .tensor.logic import less_than #DEFINE_ALIAS -from .tensor.logic import logical_and #DEFINE_ALIAS -from .tensor.logic import logical_not #DEFINE_ALIAS -from .tensor.logic import logical_or #DEFINE_ALIAS -from .tensor.logic import logical_xor #DEFINE_ALIAS -from .tensor.logic import not_equal #DEFINE_ALIAS -from .tensor.logic import allclose #DEFINE_ALIAS -from .tensor.logic import equal_all #DEFINE_ALIAS -# from .tensor.logic import isnan #DEFINE_ALIAS -from .tensor.logic import is_tensor #DEFINE_ALIAS -from .tensor.manipulation import cast #DEFINE_ALIAS -from .tensor.manipulation import concat #DEFINE_ALIAS -from .tensor.manipulation import expand #DEFINE_ALIAS -from .tensor.manipulation import broadcast_to #DEFINE_ALIAS -from .tensor.manipulation import expand_as #DEFINE_ALIAS -from .tensor.manipulation import tile #DEFINE_ALIAS -from .tensor.manipulation import flatten #DEFINE_ALIAS -from .tensor.manipulation import gather #DEFINE_ALIAS -from .tensor.manipulation import gather_nd #DEFINE_ALIAS -from .tensor.manipulation import reshape #DEFINE_ALIAS -from .tensor.manipulation import reshape_ #DEFINE_ALIAS -from .tensor.manipulation import flip as reverse #DEFINE_ALIAS -from .tensor.manipulation import scatter #DEFINE_ALIAS -from .tensor.manipulation import scatter_ #DEFINE_ALIAS -from .tensor.manipulation import scatter_nd_add #DEFINE_ALIAS -from .tensor.manipulation import scatter_nd #DEFINE_ALIAS -from .tensor.manipulation import shard_index #DEFINE_ALIAS -from .tensor.manipulation import slice #DEFINE_ALIAS -from .tensor.manipulation import split #DEFINE_ALIAS -from .tensor.manipulation import squeeze #DEFINE_ALIAS -from .tensor.manipulation import squeeze_ #DEFINE_ALIAS -from .tensor.manipulation import stack #DEFINE_ALIAS -from .tensor.manipulation import strided_slice #DEFINE_ALIAS -from .tensor.manipulation import transpose #DEFINE_ALIAS -from .tensor.manipulation import unique #DEFINE_ALIAS -from .tensor.manipulation import unsqueeze #DEFINE_ALIAS -from .tensor.manipulation import unsqueeze_ #DEFINE_ALIAS -from .tensor.manipulation import unstack #DEFINE_ALIAS -from .tensor.manipulation import flip #DEFINE_ALIAS -from .tensor.manipulation import unbind #DEFINE_ALIAS -from .tensor.manipulation import roll #DEFINE_ALIAS -from .tensor.manipulation import chunk #DEFINE_ALIAS -from .tensor.manipulation import tolist #DEFINE_ALIAS -from .tensor.math import abs #DEFINE_ALIAS -from .tensor.math import acos #DEFINE_ALIAS -from .tensor.math import asin #DEFINE_ALIAS -from .tensor.math import atan #DEFINE_ALIAS -from .tensor.math import ceil #DEFINE_ALIAS -from .tensor.math import cos #DEFINE_ALIAS -from .tensor.math import tan #DEFINE_ALIAS -from .tensor.math import cosh #DEFINE_ALIAS -from .tensor.math import cumsum #DEFINE_ALIAS -# from .tensor.math import elementwise_add #DEFINE_ALIAS -# from .tensor.math import elementwise_div #DEFINE_ALIAS -# from .tensor.math import elementwise_floordiv #DEFINE_ALIAS -# from .tensor.math import elementwise_mod #DEFINE_ALIAS -# from .tensor.math import elementwise_pow #DEFINE_ALIAS -# from .tensor.math import elementwise_sub #DEFINE_ALIAS -from .tensor.math import exp #DEFINE_ALIAS -from .tensor.math import floor #DEFINE_ALIAS -from .tensor.math import increment #DEFINE_ALIAS -from .tensor.math import log #DEFINE_ALIAS -from .tensor.math import log2 #DEFINE_ALIAS -from .tensor.math import log10 #DEFINE_ALIAS -from .tensor.math import multiplex #DEFINE_ALIAS -from .tensor.math import pow #DEFINE_ALIAS -from .tensor.math import reciprocal #DEFINE_ALIAS -# from .tensor.math import reduce_max #DEFINE_ALIAS -# from .tensor.math import reduce_min #DEFINE_ALIAS -# from .tensor.math import reduce_prod #DEFINE_ALIAS -# from .tensor.math import reduce_sum #DEFINE_ALIAS -from .tensor.math import all #DEFINE_ALIAS -from .tensor.math import any #DEFINE_ALIAS -from .tensor.math import round #DEFINE_ALIAS -from .tensor.math import rsqrt #DEFINE_ALIAS -from .tensor.math import scale #DEFINE_ALIAS -from .tensor.math import sign #DEFINE_ALIAS -from .tensor.math import sin #DEFINE_ALIAS -from .tensor.math import sinh #DEFINE_ALIAS -from .tensor.math import sqrt #DEFINE_ALIAS -from .tensor.math import square #DEFINE_ALIAS -from .tensor.math import stanh #DEFINE_ALIAS -from .tensor.math import sum #DEFINE_ALIAS -from .tensor.math import tanh #DEFINE_ALIAS -from .tensor.math import tanh_ #DEFINE_ALIAS -from .tensor.math import add_n #DEFINE_ALIAS -from .tensor.math import max #DEFINE_ALIAS -from .tensor.math import maximum #DEFINE_ALIAS -from .tensor.math import min #DEFINE_ALIAS -from .tensor.math import minimum #DEFINE_ALIAS -from .tensor.math import mm #DEFINE_ALIAS -from .tensor.math import divide #DEFINE_ALIAS -from .tensor.math import floor_divide #DEFINE_ALIAS -from .tensor.math import remainder #DEFINE_ALIAS -from .tensor.math import mod #DEFINE_ALIAS -from .tensor.math import floor_mod #DEFINE_ALIAS -from .tensor.math import multiply #DEFINE_ALIAS -from .tensor.math import add #DEFINE_ALIAS -from .tensor.math import subtract #DEFINE_ALIAS -from .tensor.math import atan #DEFINE_ALIAS -from .tensor.math import logsumexp #DEFINE_ALIAS -from .tensor.math import inverse #DEFINE_ALIAS -from .tensor.math import log1p #DEFINE_ALIAS -from .tensor.math import erf #DEFINE_ALIAS -from .tensor.math import addmm #DEFINE_ALIAS -from .tensor.math import clip #DEFINE_ALIAS -from .tensor.math import trace #DEFINE_ALIAS -from .tensor.math import kron #DEFINE_ALIAS -from .tensor.math import isfinite #DEFINE_ALIAS -from .tensor.math import isinf #DEFINE_ALIAS -from .tensor.math import isnan #DEFINE_ALIAS -from .tensor.math import prod #DEFINE_ALIAS -from .tensor.math import broadcast_shape #DEFINE_ALIAS -from .tensor.math import conj #DEFINE_ALIAS +from .tensor.attribute import rank # noqa: F401 +from .tensor.attribute import shape # noqa: F401 +from .tensor.attribute import real # noqa: F401 +from .tensor.attribute import imag # noqa: F401 +from .tensor.creation import to_tensor # noqa: F401 +from .tensor.creation import diag # noqa: F401 +from .tensor.creation import eye # noqa: F401 +from .tensor.creation import linspace # noqa: F401 +from .tensor.creation import ones # noqa: F401 +from .tensor.creation import ones_like # noqa: F401 +from .tensor.creation import zeros # noqa: F401 +from .tensor.creation import zeros_like # noqa: F401 +from .tensor.creation import arange # noqa: F401 +from .tensor.creation import full # noqa: F401 +from .tensor.creation import full_like # noqa: F401 +from .tensor.creation import triu # noqa: F401 +from .tensor.creation import tril # noqa: F401 +from .tensor.creation import meshgrid # noqa: F401 +from .tensor.creation import empty # noqa: F401 +from .tensor.creation import empty_like # noqa: F401 +from .tensor.creation import assign # noqa: F401 +from .tensor.linalg import matmul # noqa: F401 +from .tensor.linalg import dot # noqa: F401 +from .tensor.linalg import norm # noqa: F401 +from .tensor.linalg import transpose # noqa: F401 +from .tensor.linalg import dist # noqa: F401 +from .tensor.linalg import t # noqa: F401 +from .tensor.linalg import cross # noqa: F401 +from .tensor.linalg import cholesky # noqa: F401 +from .tensor.linalg import bmm # noqa: F401 +from .tensor.linalg import histogram # noqa: F401 +from .tensor.linalg import mv # noqa: F401 +from .tensor.logic import equal # noqa: F401 +from .tensor.logic import greater_equal # noqa: F401 +from .tensor.logic import greater_than # noqa: F401 +from .tensor.logic import is_empty # noqa: F401 +from .tensor.logic import less_equal # noqa: F401 +from .tensor.logic import less_than # noqa: F401 +from .tensor.logic import logical_and # noqa: F401 +from .tensor.logic import logical_not # noqa: F401 +from .tensor.logic import logical_or # noqa: F401 +from .tensor.logic import logical_xor # noqa: F401 +from .tensor.logic import not_equal # noqa: F401 +from .tensor.logic import allclose # noqa: F401 +from .tensor.logic import equal_all # noqa: F401 +from .tensor.logic import is_tensor # noqa: F401 +from .tensor.manipulation import cast # noqa: F401 +from .tensor.manipulation import concat # noqa: F401 +from .tensor.manipulation import expand # noqa: F401 +from .tensor.manipulation import broadcast_to # noqa: F401 +from .tensor.manipulation import expand_as # noqa: F401 +from .tensor.manipulation import tile # noqa: F401 +from .tensor.manipulation import flatten # noqa: F401 +from .tensor.manipulation import gather # noqa: F401 +from .tensor.manipulation import gather_nd # noqa: F401 +from .tensor.manipulation import reshape # noqa: F401 +from .tensor.manipulation import reshape_ # noqa: F401 +from .tensor.manipulation import flip as reverse # noqa: F401 +from .tensor.manipulation import scatter # noqa: F401 +from .tensor.manipulation import scatter_ # noqa: F401 +from .tensor.manipulation import scatter_nd_add # noqa: F401 +from .tensor.manipulation import scatter_nd # noqa: F401 +from .tensor.manipulation import shard_index # noqa: F401 +from .tensor.manipulation import slice # noqa: F401 +from .tensor.manipulation import split # noqa: F401 +from .tensor.manipulation import squeeze # noqa: F401 +from .tensor.manipulation import squeeze_ # noqa: F401 +from .tensor.manipulation import stack # noqa: F401 +from .tensor.manipulation import strided_slice # noqa: F401 +from .tensor.manipulation import transpose # noqa: F401 +from .tensor.manipulation import unique # noqa: F401 +from .tensor.manipulation import unsqueeze # noqa: F401 +from .tensor.manipulation import unsqueeze_ # noqa: F401 +from .tensor.manipulation import unstack # noqa: F401 +from .tensor.manipulation import flip # noqa: F401 +from .tensor.manipulation import unbind # noqa: F401 +from .tensor.manipulation import roll # noqa: F401 +from .tensor.manipulation import chunk # noqa: F401 +from .tensor.manipulation import tolist # noqa: F401 +from .tensor.math import abs # noqa: F401 +from .tensor.math import acos # noqa: F401 +from .tensor.math import asin # noqa: F401 +from .tensor.math import atan # noqa: F401 +from .tensor.math import ceil # noqa: F401 +from .tensor.math import cos # noqa: F401 +from .tensor.math import tan # noqa: F401 +from .tensor.math import cosh # noqa: F401 +from .tensor.math import cumsum # noqa: F401 +from .tensor.math import exp # noqa: F401 +from .tensor.math import floor # noqa: F401 +from .tensor.math import increment # noqa: F401 +from .tensor.math import log # noqa: F401 +from .tensor.math import log2 # noqa: F401 +from .tensor.math import log10 # noqa: F401 +from .tensor.math import multiplex # noqa: F401 +from .tensor.math import pow # noqa: F401 +from .tensor.math import reciprocal # noqa: F401 +from .tensor.math import all # noqa: F401 +from .tensor.math import any # noqa: F401 +from .tensor.math import round # noqa: F401 +from .tensor.math import rsqrt # noqa: F401 +from .tensor.math import scale # noqa: F401 +from .tensor.math import sign # noqa: F401 +from .tensor.math import sin # noqa: F401 +from .tensor.math import sinh # noqa: F401 +from .tensor.math import sqrt # noqa: F401 +from .tensor.math import square # noqa: F401 +from .tensor.math import stanh # noqa: F401 +from .tensor.math import sum # noqa: F401 +from .tensor.math import tanh # noqa: F401 +from .tensor.math import tanh_ # noqa: F401 +from .tensor.math import add_n # noqa: F401 +from .tensor.math import max # noqa: F401 +from .tensor.math import maximum # noqa: F401 +from .tensor.math import min # noqa: F401 +from .tensor.math import minimum # noqa: F401 +from .tensor.math import mm # noqa: F401 +from .tensor.math import divide # noqa: F401 +from .tensor.math import floor_divide # noqa: F401 +from .tensor.math import remainder # noqa: F401 +from .tensor.math import mod # noqa: F401 +from .tensor.math import floor_mod # noqa: F401 +from .tensor.math import multiply # noqa: F401 +from .tensor.math import add # noqa: F401 +from .tensor.math import subtract # noqa: F401 +from .tensor.math import atan # noqa: F401 +from .tensor.math import logsumexp # noqa: F401 +from .tensor.math import inverse # noqa: F401 +from .tensor.math import log1p # noqa: F401 +from .tensor.math import erf # noqa: F401 +from .tensor.math import addmm # noqa: F401 +from .tensor.math import clip # noqa: F401 +from .tensor.math import trace # noqa: F401 +from .tensor.math import kron # noqa: F401 +from .tensor.math import isfinite # noqa: F401 +from .tensor.math import isinf # noqa: F401 +from .tensor.math import isnan # noqa: F401 +from .tensor.math import prod # noqa: F401 +from .tensor.math import broadcast_shape # noqa: F401 +from .tensor.math import conj # noqa: F401 -from .tensor.random import multinomial #DEFINE_ALIAS -from .tensor.random import standard_normal -from .tensor.random import normal -from .tensor.random import uniform #DEFINE_ALIAS -from .tensor.random import randn #DEFINE_ALIAS -from .tensor.random import rand #DEFINE_ALIAS -from .tensor.random import randint #DEFINE_ALIAS -from .tensor.random import randperm #DEFINE_ALIAS -from .tensor.search import argmax #DEFINE_ALIAS -from .tensor.search import argmin #DEFINE_ALIAS -from .tensor.search import argsort #DEFINE_ALIAS -# from .tensor.search import has_inf #DEFINE_ALIAS -# from .tensor.search import has_nan #DEFINE_ALIAS -from .tensor.search import masked_select #DEFINE_ALIAS -from .tensor.search import topk #DEFINE_ALIAS -from .tensor.search import where #DEFINE_ALIAS -from .tensor.search import index_select #DEFINE_ALIAS -from .tensor.search import nonzero #DEFINE_ALIAS -from .tensor.search import sort #DEFINE_ALIAS +from .tensor.random import multinomial # noqa: F401 +from .tensor.random import standard_normal # noqa: F401 +from .tensor.random import normal # noqa: F401 +from .tensor.random import uniform # noqa: F401 +from .tensor.random import randn # noqa: F401 +from .tensor.random import rand # noqa: F401 +from .tensor.random import randint # noqa: F401 +from .tensor.random import randperm # noqa: F401 +from .tensor.search import argmax # noqa: F401 +from .tensor.search import argmin # noqa: F401 +from .tensor.search import argsort # noqa: F401 +from .tensor.search import masked_select # noqa: F401 +from .tensor.search import topk # noqa: F401 +from .tensor.search import where # noqa: F401 +from .tensor.search import index_select # noqa: F401 +from .tensor.search import nonzero # noqa: F401 +from .tensor.search import sort # noqa: F401 -from .tensor.to_string import set_printoptions #DEFINE_ALIAS +from .tensor.to_string import set_printoptions # noqa: F401 -from .framework.random import seed #DEFINE_ALIAS -from .framework.random import get_cuda_rng_state #DEFINE_ALIAS -from .framework.random import set_cuda_rng_state #DEFINE_ALIAS -from .framework import ParamAttr #DEFINE_ALIAS -# from .framework import create_global_var #DEFINE_ALIAS -from .framework import create_parameter #DEFINE_ALIAS -from .framework import CPUPlace #DEFINE_ALIAS -from .framework import CUDAPlace #DEFINE_ALIAS -from .framework import NPUPlace #DEFINE_ALIAS -from .framework import CUDAPinnedPlace #DEFINE_ALIAS +from .framework.random import seed # noqa: F401 +from .framework.random import get_cuda_rng_state # noqa: F401 +from .framework.random import set_cuda_rng_state # noqa: F401 +from .framework import ParamAttr # noqa: F401 +from .framework import create_parameter # noqa: F401 +from .framework import CPUPlace # noqa: F401 +from .framework import CUDAPlace # noqa: F401 +from .framework import NPUPlace # noqa: F401 +from .framework import CUDAPinnedPlace # noqa: F401 -from .framework import grad #DEFINE_ALIAS -from .framework import no_grad #DEFINE_ALIAS -from .framework import set_grad_enabled #DEFINE_ALIAS -from .framework import save #DEFINE_ALIAS -from .framework import load #DEFINE_ALIAS -from .framework import DataParallel #DEFINE_ALIAS +from .framework import grad # noqa: F401 +from .framework import no_grad # noqa: F401 +from .framework import set_grad_enabled # noqa: F401 +from .framework import save # noqa: F401 +from .framework import load # noqa: F401 +from .framework import DataParallel # noqa: F401 from .framework import set_default_dtype #DEFINE_ALIAS from .framework import get_default_dtype #DEFINE_ALIAS from .framework import set_grad_enabled #DEFINE_ALIAS -from .tensor.search import index_sample #DEFINE_ALIAS -from .tensor.stat import mean #DEFINE_ALIAS -# from .tensor.stat import reduce_mean #DEFINE_ALIAS -from .tensor.stat import std #DEFINE_ALIAS -from .tensor.stat import var #DEFINE_ALIAS -# from .fluid.data import data -from .tensor.stat import numel #DEFINE_ALIAS -from .tensor.stat import median #DEFINE_ALIAS -from .device import get_cudnn_version -from .device import set_device -from .device import get_device -from .device import is_compiled_with_cuda #DEFINE_ALIAS -from .device import is_compiled_with_xpu -from .device import is_compiled_with_npu -from .device import XPUPlace -# from .tensor.tensor import Tensor #DEFINE_ALIAS -# from .tensor.tensor import LoDTensor #DEFINE_ALIAS -# from .tensor.tensor import LoDTensorArray #DEFINE_ALIAS +from .tensor.search import index_sample # noqa: F401 +from .tensor.stat import mean # noqa: F401 +from .tensor.stat import std # noqa: F401 +from .tensor.stat import var # noqa: F401 +from .tensor.stat import numel # noqa: F401 +from .tensor.stat import median # noqa: F401 +from .device import get_cudnn_version # noqa: F401 +from .device import set_device # noqa: F401 +from .device import get_device # noqa: F401 +from .fluid.framework import is_compiled_with_cuda # noqa: F401 +from .device import is_compiled_with_xpu # noqa: F401 +from .device import is_compiled_with_npu # noqa: F401 +from .device import XPUPlace # noqa: F401 -from .fluid.dygraph.base import enable_dygraph as disable_static #DEFINE_ALIAS -from .fluid.dygraph.base import disable_dygraph as enable_static #DEFINE_ALIAS -from .fluid.framework import in_dygraph_mode as in_dynamic_mode #DEFINE_ALIAS -from .fluid.layers import crop_tensor as crop #DEFINE_ALIAS - -from . import jit -from . import static -from . import amp -from . import onnx +from .fluid.dygraph.base import enable_dygraph as disable_static # noqa: F401 +from .fluid.dygraph.base import disable_dygraph as enable_static # noqa: F401 +from .fluid.framework import in_dygraph_mode as in_dynamic_mode # noqa: F401 +from .fluid.layers import crop_tensor as crop # noqa: F401 # high-level api -from .hapi import Model -from .hapi import callbacks -from .hapi import summary -from .hapi import flops -from .hapi import hub +from .hapi import Model # noqa: F401 +from .hapi import callbacks # noqa: F401 +from .hapi import summary # noqa: F401 +from .hapi import flops # noqa: F401 +from .hapi import hub # noqa: F401 -import paddle.text -import paddle.vision +import paddle.text # noqa: F401 +import paddle.vision # noqa: F401 +from .tensor.random import check_shape # noqa: F401 disable_static() + +__all__ = [ #noqa + 'dtype', + 'uint8', + 'int8', + 'int16', + 'int32', + 'int64', + 'float16', + 'float32', + 'float64', + 'bfloat16', + 'bool', + 'complex64', + 'complex128', + 'addmm', + 'allclose', + 't', + 'add', + 'subtract', + 'diag', + 'isnan', + 'scatter_nd_add', + 'unstack', + 'get_default_dtype', + 'save', + 'multinomial', + 'get_cuda_rng_state', + 'rank', + 'empty_like', + 'eye', + 'cumsum', + 'sign', + 'is_empty', + 'equal', + 'equal_all', + 'is_tensor', + 'cross', + 'where', + 'log1p', + 'cos', + 'tan', + 'mean', + 'XPUPlace', + 'mv', + 'in_dynamic_mode', + 'min', + 'any', + 'slice', + 'normal', + 'logsumexp', + 'full', + 'unsqueeze', + 'unsqueeze_', + 'argmax', + 'Model', + 'callbacks', + 'summary', + 'flops', + 'hub', + 'sort', + 'split', + 'logical_and', + 'full_like', + 'less_than', + 'kron', + 'clip', + 'Tensor', + 'crop', + 'ParamAttr', + 'stanh', + 'randint', + 'assign', + 'gather', + 'scale', + 'zeros', + 'rsqrt', + 'squeeze', + 'squeeze_', + 'to_tensor', + 'gather_nd', + 'isinf', + 'set_device', + 'uniform', + 'floor_divide', + 'remainder', + 'floor_mod', + 'roll', + 'batch', + 'max', + 'norm', + 'logical_or', + 'mm', + 'flip', + 'histogram', + 'multiplex', + 'CUDAPlace', + 'NPUPlace', + 'empty', + 'shape', + 'real', + 'imag', + 'reciprocal', + 'rand', + 'less_equal', + 'triu', + 'is_compiled_with_cuda', + 'sin', + 'dist', + 'unbind', + 'meshgrid', + 'arange', + 'load', + 'numel', + 'median', + 'inverse', + 'no_grad', + 'set_grad_enabled', + 'mod', + 'abs', + 'tril', + 'pow', + 'zeros_like', + 'maximum', + 'topk', + 'index_select', + 'CPUPlace', + 'matmul', + 'seed', + 'acos', + 'logical_xor', + 'exp', + 'bernoulli', + 'summary', + 'sinh', + 'is_compiled_with_xpu', + 'is_compiled_with_npu', + 'round', + 'DataParallel', + 'argmin', + 'prod', + 'broadcast_shape', + 'conj', + 'square', + 'divide', + 'ceil', + 'atan', + 'expand', + 'broadcast_to', + 'ones_like', + 'index_sample', + 'cast', + 'grad', + 'all', + 'ones', + 'not_equal', + 'sum', + 'tile', + 'get_device', + 'greater_equal', + 'isfinite', + 'create_parameter', + 'dot', + 'increment', + 'erf', + 'bmm', + 'chunk', + 'tolist', + 'greater_than', + 'shard_index', + 'argsort', + 'tanh', + 'tanh_', + 'transpose', + 'randn', + 'strided_slice', + 'unique', + 'set_cuda_rng_state', + 'set_printoptions', + 'std', + 'flatten', + 'asin', + 'multiply', + 'disable_static', + 'masked_select', + 'var', + 'trace', + 'enable_static', + 'scatter_nd', + 'set_default_dtype', + 'expand_as', + 'get_cudnn_version', + 'stack', + 'sqrt', + 'cholesky', + 'randperm', + 'linspace', + 'reshape', + 'reshape_', + 'reverse', + 'nonzero', + 'CUDAPinnedPlace', + 'logical_not', + 'add_n', + 'minimum', + 'ComplexTensor', + 'scatter', + 'scatter_', + 'floor', + 'cosh', + 'log', + 'log2', + 'log10', + 'concat', + 'check_shape' +] -- GitLab From 3b81f2b8cb7e8ddb4bde54331ea5d2a17d2dfb87 Mon Sep 17 00:00:00 2001 From: zhiboniu <31800336+zhiboniu@users.noreply.github.com> Date: Tue, 27 Apr 2021 18:57:32 +0800 Subject: [PATCH 033/720] update 2.0 public api in nn (#31912) * update 2.0 public api in nn * replace Chinese character cause error in ci;synchronization with pr:#32588 to avoid 'ascii' codec in python2 * numbers used in paddle.nn.functional.norm but not imported --- .../fleet/parameter_server/ir/trainer_pass.py | 2 +- .../fluid/tests/unittests/hccl_tools.py | 2 +- python/paddle/nn/__init__.py | 415 ++++++++++++------ python/paddle/nn/clip.py | 8 +- python/paddle/nn/decode.py | 9 +- python/paddle/nn/functional/__init__.py | 382 ++++++++-------- python/paddle/nn/functional/activation.py | 45 +- python/paddle/nn/functional/common.py | 30 +- python/paddle/nn/functional/conv.py | 9 - python/paddle/nn/functional/extension.py | 2 - python/paddle/nn/functional/input.py | 2 - python/paddle/nn/functional/loss.py | 35 +- python/paddle/nn/functional/norm.py | 11 - python/paddle/nn/functional/pooling.py | 15 - python/paddle/nn/functional/vision.py | 37 -- python/paddle/nn/initializer/__init__.py | 50 +-- python/paddle/nn/initializer/assign.py | 2 - python/paddle/nn/initializer/constant.py | 2 - python/paddle/nn/initializer/kaiming.py | 2 - python/paddle/nn/initializer/normal.py | 2 - python/paddle/nn/initializer/uniform.py | 2 - python/paddle/nn/initializer/xavier.py | 2 - python/paddle/nn/layer/__init__.py | 150 +++---- python/paddle/nn/layer/activation.py | 27 -- python/paddle/nn/layer/common.py | 20 +- python/paddle/nn/layer/conv.py | 9 - python/paddle/nn/layer/distance.py | 2 - python/paddle/nn/layer/loss.py | 18 +- python/paddle/nn/layer/norm.py | 13 +- python/paddle/nn/layer/pooling.py | 15 - python/paddle/nn/layer/rnn.py | 12 - python/paddle/nn/layer/transformer.py | 8 - python/paddle/nn/layer/vision.py | 2 - python/paddle/nn/utils/__init__.py | 7 +- python/paddle/nn/utils/weight_norm_hook.py | 2 - python/paddle/utils/deprecated.py | 5 +- 36 files changed, 570 insertions(+), 786 deletions(-) diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py index 5f327497047..d4af3e2f804 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py @@ -527,7 +527,7 @@ def create_heter_program(program, config, heter_program, heter_ops, # This function mainly includes the following contents: # 1. For every heter block: # a) copy heter device op from origin program - # b) create variables which belong to heter op: + # b) create variables which belong to heter op: # -> if variable is persistable, clone it in global_scope # -> if variable is temp, create it in heter block # c) create communicate related op as follow: diff --git a/python/paddle/fluid/tests/unittests/hccl_tools.py b/python/paddle/fluid/tests/unittests/hccl_tools.py index 3ae8f38dc64..e3628ee5a4e 100644 --- a/python/paddle/fluid/tests/unittests/hccl_tools.py +++ b/python/paddle/fluid/tests/unittests/hccl_tools.py @@ -58,7 +58,7 @@ def parse_args(): default="[0,8)", help="The number of the Ascend accelerators used. please note that the Ascend accelerators" "used must be continuous, such [0,4) means to use four chips " - "0,1,2,3; [0,1) means to use chip 0; The first four chips are" + "0,1,2,3; [0,1) means to use chip 0; The first four chips are" "a group, and the last four chips are a group. In addition to" "the [0,8) chips are allowed, other cross-group such as [3,6)" "are prohibited.") diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index 836d4008f7d..d2f0063af0d 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -15,148 +15,273 @@ # TODO: import all neural network related api under this directory, # including layers, linear, conv, rnn etc. -from .layer import norm -from .functional import extension -from .layer import common -from .layer import rnn -from .utils import weight_norm_hook - -from . import initializer - -__all__ = [] -__all__ += norm.__all__ -__all__ += extension.__all__ -__all__ += common.__all__ -__all__ += rnn.__all__ -__all__ += weight_norm_hook.__all__ - -# TODO: define alias in nn directory -from .clip import ClipGradByGlobalNorm #DEFINE_ALIAS -from .clip import ClipGradByNorm #DEFINE_ALIAS -from .clip import ClipGradByValue #DEFINE_ALIAS -# from .control_flow import cond #DEFINE_ALIAS -# from .control_flow import DynamicRNN #DEFINE_ALIAS -# from .control_flow import StaticRNN #DEFINE_ALIAS -# from .control_flow import while_loop #DEFINE_ALIAS -# from .control_flow import rnn #DEFINE_ALIAS -from .decode import BeamSearchDecoder #DEFINE_ALIAS -from .decode import dynamic_decode #DEFINE_ALIAS -# from .decode import Decoder #DEFINE_ALIAS -# from .decode import crf_decoding #DEFINE_ALIAS -# from .decode import ctc_greedy_decoder #DEFINE_ALIAS -# from .input import Input #DEFINE_ALIAS -from .layer.activation import ELU #DEFINE_ALIAS -from .layer.activation import GELU #DEFINE_ALIAS -from .layer.activation import Tanh #DEFINE_ALIAS -from .layer.activation import Hardshrink #DEFINE_ALIAS -from .layer.activation import Hardswish #DEFINE_ALIAS -from .layer.activation import Hardtanh #DEFINE_ALIAS -from .layer.activation import PReLU #DEFINE_ALIAS -from .layer.activation import ReLU #DEFINE_ALIAS -from .layer.activation import ReLU6 #DEFINE_ALIAS -from .layer.activation import SELU #DEFINE_ALIAS -from .layer.activation import Silu #DEFINE_ALIAS -from .layer.activation import LeakyReLU #DEFINE_ALIAS -from .layer.activation import Sigmoid #DEFINE_ALIAS -from .layer.activation import Hardsigmoid #DEFINE_ALIAS -from .layer.activation import LogSigmoid #DEFINE_ALIAS -from .layer.activation import Softmax #DEFINE_ALIAS -from .layer.activation import Softplus #DEFINE_ALIAS -from .layer.activation import Softshrink #DEFINE_ALIAS -from .layer.activation import Softsign #DEFINE_ALIAS -from .layer.activation import Swish #DEFINE_ALIAS -from .layer.activation import Tanhshrink #DEFINE_ALIAS -from .layer.activation import ThresholdedReLU #DEFINE_ALIAS -from .layer.activation import LogSoftmax #DEFINE_ALIAS -from .layer.activation import Maxout #DEFINE_ALIAS -from .layer.common import Pad1D #DEFINE_ALIAS -from .layer.common import Pad2D #DEFINE_ALIAS -from .layer.common import Pad3D #DEFINE_ALIAS -from .layer.common import CosineSimilarity #DEFINE_ALIAS -from .layer.common import Embedding #DEFINE_ALIAS -from .layer.common import Linear #DEFINE_ALIAS -from .layer.common import Flatten #DEFINE_ALIAS -from .layer.common import Upsample #DEFINE_ALIAS -from .layer.common import UpsamplingNearest2D #DEFINE_ALIAS -from .layer.common import UpsamplingBilinear2D #DEFINE_ALIAS -from .layer.common import Bilinear #DEFINE_ALIAS -from .layer.common import Dropout #DEFINE_ALIAS -from .layer.common import Dropout2D #DEFINE_ALIAS -from .layer.common import Dropout3D #DEFINE_ALIAS -from .layer.common import AlphaDropout #DEFINE_ALIAS -from .layer.common import Unfold #DEFINE_ALIAS - -from .layer.pooling import AvgPool1D #DEFINE_ALIAS -from .layer.pooling import AvgPool2D #DEFINE_ALIAS -from .layer.pooling import AvgPool3D #DEFINE_ALIAS -from .layer.pooling import MaxPool1D #DEFINE_ALIAS -from .layer.pooling import MaxPool2D #DEFINE_ALIAS -from .layer.pooling import MaxPool3D #DEFINE_ALIAS -from .layer.pooling import AdaptiveAvgPool1D #DEFINE_ALIAS -from .layer.pooling import AdaptiveAvgPool2D #DEFINE_ALIAS -from .layer.pooling import AdaptiveAvgPool3D #DEFINE_ALIAS - -from .layer.pooling import AdaptiveMaxPool1D #DEFINE_ALIAS -from .layer.pooling import AdaptiveMaxPool2D #DEFINE_ALIAS -from .layer.pooling import AdaptiveMaxPool3D #DEFINE_ALIAS -from .layer.conv import Conv1D #DEFINE_ALIAS -from .layer.conv import Conv2D #DEFINE_ALIAS -from .layer.conv import Conv3D #DEFINE_ALIAS -from .layer.conv import Conv1DTranspose #DEFINE_ALIAS -from .layer.conv import Conv2DTranspose #DEFINE_ALIAS -from .layer.conv import Conv3DTranspose #DEFINE_ALIAS -# from .layer.conv import TreeConv #DEFINE_ALIAS -# from .layer.conv import Conv1D #DEFINE_ALIAS -from .layer.common import Linear -# from .layer.loss import NCELoss #DEFINE_ALIAS -from .layer.loss import BCEWithLogitsLoss #DEFINE_ALIAS -from .layer.loss import CrossEntropyLoss #DEFINE_ALIAS -from .layer.loss import HSigmoidLoss #DEFINE_ALIAS -from .layer.loss import MSELoss #DEFINE_ALIAS -from .layer.loss import L1Loss #DEFINE_ALIAS -from .layer.loss import NLLLoss #DEFINE_ALIAS -from .layer.loss import BCELoss #DEFINE_ALIAS -from .layer.loss import KLDivLoss #DEFINE_ALIAS -from .layer.loss import MarginRankingLoss #DEFINE_ALIAS -from .layer.loss import CTCLoss #DEFINE_ALIAS -from .layer.loss import SmoothL1Loss #DEFINE_ALIAS -from .layer.norm import BatchNorm #DEFINE_ALIAS -from .layer.norm import SyncBatchNorm #DEFINE_ALIAS -from .layer.norm import GroupNorm #DEFINE_ALIAS -from .layer.norm import LayerNorm #DEFINE_ALIAS -from .layer.norm import SpectralNorm #DEFINE_ALIAS -from .layer.norm import InstanceNorm1D #DEFINE_ALIAS -from .layer.norm import InstanceNorm2D #DEFINE_ALIAS -from .layer.norm import InstanceNorm3D #DEFINE_ALIAS -from .layer.norm import BatchNorm1D #DEFINE_ALIAS -from .layer.norm import BatchNorm2D #DEFINE_ALIAS -from .layer.norm import BatchNorm3D #DEFINE_ALIAS -from .layer.norm import LocalResponseNorm #DEFINE_ALIAS - -from .layer.rnn import RNNCellBase #DEFINE_ALIAS -from .layer.rnn import SimpleRNNCell #DEFINE_ALIAS -from .layer.rnn import LSTMCell #DEFINE_ALIAS -from .layer.rnn import GRUCell #DEFINE_ALIAS -from .layer.rnn import RNN #DEFINE_ALIAS -from .layer.rnn import BiRNN #DEFINE_ALIAS -from .layer.rnn import SimpleRNN #DEFINE_ALIAS -from .layer.rnn import LSTM #DEFINE_ALIAS -from .layer.rnn import GRU #DEFINE_ALIAS - -from .layer.transformer import MultiHeadAttention -from .layer.transformer import TransformerEncoderLayer -from .layer.transformer import TransformerEncoder -from .layer.transformer import TransformerDecoderLayer -from .layer.transformer import TransformerDecoder -from .layer.transformer import Transformer -from .layer.distance import PairwiseDistance #DEFINE_ALIAS - -from .layer.vision import PixelShuffle - -from .layer.container import LayerDict #DEFINE_ALIAS - -from .layer import loss #DEFINE_ALIAS -from .layer import conv #DEFINE_ALIAS -from .layer import vision #DEFINE_ALIAS -from ..fluid.dygraph.layers import Layer #DEFINE_ALIAS -from ..fluid.dygraph.container import LayerList, ParameterList, Sequential #DEFINE_ALIAS +from .clip import ClipGradByGlobalNorm # noqa: F401 +from .clip import ClipGradByNorm # noqa: F401 +from .clip import ClipGradByValue # noqa: F401 +from .decode import BeamSearchDecoder # noqa: F401 +from .decode import dynamic_decode # noqa: F401 +from .layer.activation import ELU # noqa: F401 +from .layer.activation import GELU # noqa: F401 +from .layer.activation import Tanh # noqa: F401 +from .layer.activation import Hardshrink # noqa: F401 +from .layer.activation import Hardswish # noqa: F401 +from .layer.activation import Hardtanh # noqa: F401 +from .layer.activation import PReLU # noqa: F401 +from .layer.activation import ReLU # noqa: F401 +from .layer.activation import ReLU6 # noqa: F401 +from .layer.activation import SELU # noqa: F401 +from .layer.activation import Silu # noqa: F401 +from .layer.activation import LeakyReLU # noqa: F401 +from .layer.activation import Sigmoid # noqa: F401 +from .layer.activation import Hardsigmoid # noqa: F401 +from .layer.activation import LogSigmoid # noqa: F401 +from .layer.activation import Softmax # noqa: F401 +from .layer.activation import Softplus # noqa: F401 +from .layer.activation import Softshrink # noqa: F401 +from .layer.activation import Softsign # noqa: F401 +from .layer.activation import Swish # noqa: F401 +from .layer.activation import Tanhshrink # noqa: F401 +from .layer.activation import ThresholdedReLU # noqa: F401 +from .layer.activation import LogSoftmax # noqa: F401 +from .layer.activation import Maxout # noqa: F401 +from .layer.common import Pad1D # noqa: F401 +from .layer.common import Pad2D # noqa: F401 +from .layer.common import Pad3D # noqa: F401 +from .layer.common import CosineSimilarity # noqa: F401 +from .layer.common import Embedding # noqa: F401 +from .layer.common import Linear # noqa: F401 +from .layer.common import Flatten # noqa: F401 +from .layer.common import Upsample # noqa: F401 +from .layer.common import UpsamplingNearest2D # noqa: F401 +from .layer.common import UpsamplingBilinear2D # noqa: F401 +from .layer.common import Bilinear # noqa: F401 +from .layer.common import Dropout # noqa: F401 +from .layer.common import Dropout2D # noqa: F401 +from .layer.common import Dropout3D # noqa: F401 +from .layer.common import AlphaDropout # noqa: F401 +from .layer.common import Unfold # noqa: F401 + +from .layer.pooling import AvgPool1D # noqa: F401 +from .layer.pooling import AvgPool2D # noqa: F401 +from .layer.pooling import AvgPool3D # noqa: F401 +from .layer.pooling import MaxPool1D # noqa: F401 +from .layer.pooling import MaxPool2D # noqa: F401 +from .layer.pooling import MaxPool3D # noqa: F401 +from .layer.pooling import AdaptiveAvgPool1D # noqa: F401 +from .layer.pooling import AdaptiveAvgPool2D # noqa: F401 +from .layer.pooling import AdaptiveAvgPool3D # noqa: F401 +from .layer.pooling import AdaptiveMaxPool1D # noqa: F401 +from .layer.pooling import AdaptiveMaxPool2D # noqa: F401 +from .layer.pooling import AdaptiveMaxPool3D # noqa: F401 + +from .layer.conv import Conv1D # noqa: F401 +from .layer.conv import Conv2D # noqa: F401 +from .layer.conv import Conv3D # noqa: F401 +from .layer.conv import Conv1DTranspose # noqa: F401 +from .layer.conv import Conv2DTranspose # noqa: F401 +from .layer.conv import Conv3DTranspose # noqa: F401 + +from .layer.loss import BCEWithLogitsLoss # noqa: F401 +from .layer.loss import CrossEntropyLoss # noqa: F401 +from .layer.loss import HSigmoidLoss # noqa: F401 +from .layer.loss import MSELoss # noqa: F401 +from .layer.loss import L1Loss # noqa: F401 +from .layer.loss import NLLLoss # noqa: F401 +from .layer.loss import BCELoss # noqa: F401 +from .layer.loss import KLDivLoss # noqa: F401 +from .layer.loss import MarginRankingLoss # noqa: F401 +from .layer.loss import CTCLoss # noqa: F401 +from .layer.loss import SmoothL1Loss # noqa: F401 +from .layer.norm import BatchNorm # noqa: F401 +from .layer.norm import SyncBatchNorm # noqa: F401 +from .layer.norm import GroupNorm # noqa: F401 +from .layer.norm import LayerNorm # noqa: F401 +from .layer.norm import SpectralNorm # noqa: F401 +from .layer.norm import InstanceNorm1D # noqa: F401 +from .layer.norm import InstanceNorm2D # noqa: F401 +from .layer.norm import InstanceNorm3D # noqa: F401 +from .layer.norm import BatchNorm1D # noqa: F401 +from .layer.norm import BatchNorm2D # noqa: F401 +from .layer.norm import BatchNorm3D # noqa: F401 +from .layer.norm import LocalResponseNorm # noqa: F401 + +from .layer.rnn import RNNCellBase # noqa: F401 +from .layer.rnn import SimpleRNNCell # noqa: F401 +from .layer.rnn import LSTMCell # noqa: F401 +from .layer.rnn import GRUCell # noqa: F401 +from .layer.rnn import RNN # noqa: F401 +from .layer.rnn import BiRNN # noqa: F401 +from .layer.rnn import SimpleRNN # noqa: F401 +from .layer.rnn import LSTM # noqa: F401 +from .layer.rnn import GRU # noqa: F401 + +from .layer.transformer import MultiHeadAttention # noqa: F401 +from .layer.transformer import TransformerEncoderLayer # noqa: F401 +from .layer.transformer import TransformerEncoder # noqa: F401 +from .layer.transformer import TransformerDecoderLayer # noqa: F401 +from .layer.transformer import TransformerDecoder # noqa: F401 +from .layer.transformer import Transformer # noqa: F401 +from .layer.distance import PairwiseDistance # noqa: F401 + +from .layer.vision import PixelShuffle # noqa: F401 +from .layer.container import LayerDict # noqa: F401 + +# TODO: remove loss, keep it for too many used in unitests +from .layer import loss # noqa: F401 +from ..fluid.dygraph.layers import Layer # noqa: F401 +from ..fluid.dygraph.container import LayerList # noqa: F401 +from ..fluid.dygraph.container import ParameterList # noqa: F401 +from ..fluid.dygraph.container import Sequential # noqa: F401 + +from . import utils # noqa: F401 +from . import functional # noqa: F401 +from . import initializer # noqa: F401 + +#TODO: remove 'diag_embed', 'remove_weight_norm', 'weight_norm' months later. +import paddle.utils.deprecated as deprecated + + +@deprecated( + since="2.0.0", + update_to="paddle.nn.funcitional.diag_embed", + reason="diag_embed in paddle.nn will removed in future") +def diag_embed(*args): + ''' + alias name of paddle.nn.functional.diag_embed + ''' + return functional.diag_embed(*args) + + +@deprecated( + since="2.0.0", + update_to="paddle.nn.utils.remove_weight_norm", + reason="remove_weight_norm in paddle.nn will removed in future") +def remove_weight_norm(*args): + ''' + alias name of paddle.nn.utils.remove_weight_norm + ''' + return utils.remove_weight_norm(*args) + + +@deprecated( + since="2.0.0", + update_to="paddle.nn.utils.weight_norm", + reason="weight_norm in paddle.nn will removed in future") +def weight_norm(*args): + ''' + alias name of paddle.nn.utils.weight_norm + ''' + return utils.weight_norm(*args) + + +__all__ = [ #noqa + 'BatchNorm', + 'GroupNorm', + 'LayerNorm', + 'SpectralNorm', + 'BatchNorm1D', + 'BatchNorm2D', + 'BatchNorm3D', + 'InstanceNorm1D', + 'InstanceNorm2D', + 'InstanceNorm3D', + 'SyncBatchNorm', + 'LocalResponseNorm', + 'Embedding', + 'Linear', + 'Upsample', + 'UpsamplingNearest2D', + 'UpsamplingBilinear2D', + 'Pad1D', + 'Pad2D', + 'Pad3D', + 'CosineSimilarity', + 'Dropout', + 'Dropout2D', + 'Dropout3D', + 'Bilinear', + 'AlphaDropout', + 'Unfold' + 'RNNCellBase', + 'SimpleRNNCell', + 'LSTMCell', + 'GRUCell', + 'RNN', + 'BiRNN', + 'SimpleRNN', + 'LSTM', + 'GRU', + 'dynamic_decode', + 'MultiHeadAttention', + 'Maxout', + 'Softsign', + 'Transformer', + 'MSELoss', + 'LogSigmoid', + 'BeamSearchDecoder', + 'ClipGradByNorm', + 'ReLU', + 'PairwiseDistance', + 'BCEWithLogitsLoss', + 'SmoothL1Loss', + 'MaxPool3D', + 'AdaptiveMaxPool2D', + 'Hardshrink', + 'clip', + 'Softplus', + 'KLDivLoss', + 'clip_by_norm', + 'AvgPool2D', + 'L1Loss', + 'LeakyReLU', + 'AvgPool1D', + 'AdaptiveAvgPool3D', + 'AdaptiveMaxPool3D', + 'NLLLoss', + 'Conv1D', + 'Sequential', + 'Hardswish', + 'Conv1DTranspose', + 'AdaptiveMaxPool1D', + 'TransformerEncoder', + 'Softmax', + 'ParameterList', + 'Conv2D', + 'Softshrink', + 'Hardtanh', + 'TransformerDecoderLayer', + 'CrossEntropyLoss', + 'GELU', + 'SELU', + 'Silu', + 'Conv2DTranspose', + 'CTCLoss', + 'ThresholdedReLU', + 'AdaptiveAvgPool2D', + 'MaxPool1D', + 'Layer', + 'TransformerDecoder', + 'Conv3D', + 'Tanh', + 'Conv3DTranspose', + 'Flatten', + 'AdaptiveAvgPool1D', + 'Tanhshrink', + 'HSigmoidLoss', + 'PReLU', + 'TransformerEncoderLayer', + 'AvgPool3D', + 'MaxPool2D', + 'MarginRankingLoss', + 'LayerList', + 'ClipGradByValue', + 'BCELoss', + 'Hardsigmoid', + 'ClipGradByGlobalNorm', + 'LogSoftmax', + 'Sigmoid', + 'Swish', + 'PixelShuffle', + 'ELU', + 'ReLU6' +] diff --git a/python/paddle/nn/clip.py b/python/paddle/nn/clip.py index 9180a883e83..70c49b4a538 100644 --- a/python/paddle/nn/clip.py +++ b/python/paddle/nn/clip.py @@ -13,8 +13,6 @@ # limitations under the License. # TODO: define the functions to clip gradient of parameter -from ..fluid.clip import ClipGradByGlobalNorm #DEFINE_ALIAS -from ..fluid.clip import ClipGradByNorm #DEFINE_ALIAS -from ..fluid.clip import ClipGradByValue #DEFINE_ALIAS - -__all__ = ['ClipGradByGlobalNorm', 'ClipGradByNorm', 'ClipGradByValue'] +from ..fluid.clip import ClipGradByGlobalNorm # noqa: F401 +from ..fluid.clip import ClipGradByNorm # noqa: F401 +from ..fluid.clip import ClipGradByValue # noqa: F401 diff --git a/python/paddle/nn/decode.py b/python/paddle/nn/decode.py index bba5aba0da9..3229f0b21a6 100644 --- a/python/paddle/nn/decode.py +++ b/python/paddle/nn/decode.py @@ -12,10 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..fluid.layers import BeamSearchDecoder #DEFINE_ALIAS -from ..fluid.layers import dynamic_decode #DEFINE_ALIAS - -__all__ = [ - 'BeamSearchDecoder', - 'dynamic_decode', -] +from ..fluid.layers import BeamSearchDecoder # noqa: F401 +from ..fluid.layers import dynamic_decode # noqa: F401 diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py index 98124be7288..d4c17a27a61 100644 --- a/python/paddle/nn/functional/__init__.py +++ b/python/paddle/nn/functional/__init__.py @@ -14,211 +14,185 @@ # TODO: import all neural network related api under this directory, # including layers, linear, conv, rnn etc. -__all__ = [] -# TODO: define alias in functional directory -from . import conv -__all__ += conv.__all__ -from . import activation -__all__ += activation.__all__ -from . import extension -__all__ += extension.__all__ -from . import common -__all__ += common.__all__ -from . import pooling -__all__ += pooling.__all__ -from . import loss -__all__ += loss.__all__ -from .activation import elu #DEFINE_ALIAS -from .activation import elu_ #DEFINE_ALIAS -# from .activation import erf #DEFINE_ALIAS -from .activation import gelu #DEFINE_ALIAS -from .activation import hardshrink #DEFINE_ALIAS -from .activation import hardtanh #DEFINE_ALIAS -from .activation import hardsigmoid #DEFINE_ALIAS -from .activation import hardswish #DEFINE_ALIAS -from .activation import leaky_relu #DEFINE_ALIAS -from .activation import log_sigmoid #DEFINE_ALIAS -from .activation import maxout #DEFINE_ALIAS -from .activation import prelu #DEFINE_ALIAS -from .activation import relu #DEFINE_ALIAS -from .activation import relu_ #DEFINE_ALIAS -from .activation import relu6 #DEFINE_ALIAS -from .activation import selu #DEFINE_ALIAS -from .activation import sigmoid #DEFINE_ALIAS -from .activation import silu #DEFINE_ALIAS -# from .activation import soft_relu #DEFINE_ALIAS -from .activation import softmax #DEFINE_ALIAS -from .activation import softmax_ #DEFINE_ALIAS -from .activation import softplus #DEFINE_ALIAS -from .activation import softshrink #DEFINE_ALIAS -from .activation import softsign #DEFINE_ALIAS -from .activation import swish #DEFINE_ALIAS -from .activation import tanh #DEFINE_ALIAS -from .activation import tanh_ #DEFINE_ALIAS -from .activation import tanhshrink #DEFINE_ALIAS -from .activation import thresholded_relu #DEFINE_ALIAS -from .activation import log_softmax #DEFINE_ALIAS -from .activation import glu #DEFINE_ALIAS -from .common import dropout #DEFINE_ALIAS -from .common import dropout2d #DEFINE_ALIAS -from .common import dropout3d #DEFINE_ALIAS -from .common import alpha_dropout #DEFINE_ALIAS -# from .common import embedding #DEFINE_ALIAS -# from .common import fc #DEFINE_ALIAS -from .common import label_smooth -# from .common import one_hot #DEFINE_ALIAS -from .common import pad #DEFINE_ALIAS -# from .common import pad_constant_like #DEFINE_ALIAS -# from .common import pad2d #DEFINE_ALIAS -from .common import cosine_similarity #DEFINE_ALIAS -from .common import unfold #DEFINE_ALIAS -# from .common import bilinear_tensor_product #DEFINE_ALIAS -from .common import interpolate #DEFINE_ALIAS -from .common import upsample #DEFINE_ALIAS -from .common import bilinear #DEFINE_ALIAS -from .conv import conv1d #DEFINE_ALIAS -from .conv import conv1d_transpose #DEFINE_ALIAS -from .common import linear #DEFINE_ALIAS -from .conv import conv2d #DEFINE_ALIAS -from .conv import conv2d_transpose #DEFINE_ALIAS -from .conv import conv3d #DEFINE_ALIAS -from .conv import conv3d_transpose #DEFINE_ALIAS -# from .extension import add_position_encoding #DEFINE_ALIAS -# from .extension import autoincreased_step_counter #DEFINE_ALIAS -# from .extension import continuous_value_model #DEFINE_ALIAS -# from .extension import filter_by_instag #DEFINE_ALIAS -# from .extension import linear_chain_crf #DEFINE_ALIAS -# from .extension import merge_selected_rows #DEFINE_ALIAS -# from .extension import multiclass_nms #DEFINE_ALIAS -# from .extension import polygon_box_transform #DEFINE_ALIAS -# from .extension import random_crop #DEFINE_ALIAS -# from .extension import rpn_target_assign #DEFINE_ALIAS -# from .extension import similarity_focus #DEFINE_ALIAS -# from .extension import target_assign #DEFINE_ALIAS -# from .extension import temporal_shift #DEFINE_ALIAS -# from .extension import warpctc #DEFINE_ALIAS -from .extension import diag_embed #DEFINE_ALIAS +from .activation import elu # noqa: F401 +from .activation import elu_ # noqa: F401 +from .activation import gelu # noqa: F401 +from .activation import hardshrink # noqa: F401 +from .activation import hardtanh # noqa: F401 +from .activation import hardsigmoid # noqa: F401 +from .activation import hardswish # noqa: F401 +from .activation import leaky_relu # noqa: F401 +from .activation import log_sigmoid # noqa: F401 +from .activation import maxout # noqa: F401 +from .activation import prelu # noqa: F401 +from .activation import relu # noqa: F401 +from .activation import relu_ # noqa: F401 +from .activation import relu6 # noqa: F401 +from .activation import selu # noqa: F401 +from .activation import sigmoid # noqa: F401 +from .activation import silu # noqa: F401 +from .activation import softmax # noqa: F401 +from .activation import softmax_ # noqa: F401 +from .activation import softplus # noqa: F401 +from .activation import softshrink # noqa: F401 +from .activation import softsign # noqa: F401 +from .activation import swish # noqa: F401 +from .activation import tanh # noqa: F401 +from .activation import tanh_ # noqa: F401 +from .activation import tanhshrink # noqa: F401 +from .activation import thresholded_relu # noqa: F401 +from .activation import log_softmax # noqa: F401 +from .activation import glu # noqa: F401 +from .common import dropout # noqa: F401 +from .common import dropout2d # noqa: F401 +from .common import dropout3d # noqa: F401 +from .common import alpha_dropout # noqa: F401 +from .common import label_smooth # noqa: F401 +from .common import pad # noqa: F401 +from .common import cosine_similarity # noqa: F401 +from .common import unfold # noqa: F401 +from .common import interpolate # noqa: F401 +from .common import upsample # noqa: F401 +from .common import bilinear # noqa: F401 +from .conv import conv1d # noqa: F401 +from .conv import conv1d_transpose # noqa: F401 +from .common import linear # noqa: F401 +from .conv import conv2d # noqa: F401 +from .conv import conv2d_transpose # noqa: F401 +from .conv import conv3d # noqa: F401 +from .conv import conv3d_transpose # noqa: F401 +from .extension import diag_embed # noqa: F401 from .extension import sequence_mask -# from .lod import sequence_concat #DEFINE_ALIAS -# from .lod import sequence_conv #DEFINE_ALIAS -# from .lod import sequence_enumerate #DEFINE_ALIAS -# from .lod import sequence_expand_as #DEFINE_ALIAS -# from .lod import sequence_expand #DEFINE_ALIAS -# from .lod import sequence_first_step #DEFINE_ALIAS -# from .lod import sequence_last_step #DEFINE_ALIAS -# from .lod import sequence_mask #DEFINE_ALIAS -# from .lod import sequence_pad #DEFINE_ALIAS -# from .lod import sequence_pool #DEFINE_ALIAS -# from .lod import sequence_reshape #DEFINE_ALIAS -# from .lod import sequence_reverse #DEFINE_ALIAS -# from .lod import sequence_scatter #DEFINE_ALIAS -# from .lod import sequence_slice #DEFINE_ALIAS -# from .lod import sequence_softmax #DEFINE_ALIAS -# from .lod import sequence_unpad #DEFINE_ALIAS -# from .lod import array_length #DEFINE_ALIAS -# from .lod import array_read #DEFINE_ALIAS -# from .lod import array_write #DEFINE_ALIAS -# from .lod import create_array #DEFINE_ALIAS -# from .lod import hash #DEFINE_ALIAS -# from .lod import im2sequence #DEFINE_ALIAS -# from .lod import lod_append #DEFINE_ALIAS -# from .lod import lod_reset #DEFINE_ALIAS -# from .lod import reorder_lod_tensor_by_rank #DEFINE_ALIAS -# from .lod import tensor_array_to_tensor #DEFINE_ALIAS -# from .lod import dynamic_gru #DEFINE_ALIAS -# from .lod import dynamic_lstm #DEFINE_ALIAS -# from .lod import dynamic_lstmp #DEFINE_ALIAS -from .loss import binary_cross_entropy #DEFINE_ALIAS -from .loss import binary_cross_entropy_with_logits #DEFINE_ALIAS -# from .loss import bpr_loss #DEFINE_ALIAS -# from .loss import center_loss #DEFINE_ALIAS -#from .loss import cross_entropy #DEFINE_ALIAS -from .loss import cross_entropy #DEFINE_ALIAS -from .loss import dice_loss #DEFINE_ALIAS -from .loss import hsigmoid_loss #DEFINE_ALIAS -from .loss import kl_div #DEFINE_ALIAS -from .loss import l1_loss #DEFINE_ALIAS -from .loss import log_loss #DEFINE_ALIAS -from .loss import margin_ranking_loss #DEFINE_ALIAS -from .loss import mse_loss #DEFINE_ALIAS -from .loss import nll_loss #DEFINE_ALIAS -# from .loss import nce #DEFINE_ALIAS -from .loss import npair_loss #DEFINE_ALIAS -from .loss import sigmoid_focal_loss #DEFINE_ALIAS -# from .loss import smooth_l1 #DEFINE_ALIAS -from .loss import smooth_l1_loss #DEFINE_ALIAS -from .loss import softmax_with_cross_entropy #DEFINE_ALIAS -from .loss import square_error_cost #DEFINE_ALIAS -# from .loss import teacher_student_sigmoid_loss #DEFINE_ALIAS -from .loss import ctc_loss #DEFINE_ALIAS -# from .norm import data_norm #DEFINE_ALIAS -# from .norm import group_norm #DEFINE_ALIAS -from .norm import batch_norm #DEFINE_ALIAS -from .norm import instance_norm #DEFINE_ALIAS -from .norm import layer_norm #DEFINE_ALIAS -from .norm import local_response_norm #DEFINE_ALIAS -from .norm import normalize #DEFINE_ALIAS -# from .norm import spectral_norm #DEFINE_ALIAS -# from .pooling import pool2d #DEFINE_ALIAS -# from .pooling import pool3d #DEFINE_ALIAS -from .pooling import avg_pool1d #DEFINE_ALIAS -from .pooling import avg_pool2d #DEFINE_ALIAS -from .pooling import avg_pool3d #DEFINE_ALIAS -from .pooling import max_pool1d #DEFINE_ALIAS -from .pooling import max_pool2d #DEFINE_ALIAS -from .pooling import max_pool3d #DEFINE_ALIAS +from .loss import binary_cross_entropy # noqa: F401 +from .loss import binary_cross_entropy_with_logits # noqa: F401 +from .loss import cross_entropy # noqa: F401 +from .loss import dice_loss # noqa: F401 +from .loss import hsigmoid_loss # noqa: F401 +from .loss import kl_div # noqa: F401 +from .loss import l1_loss # noqa: F401 +from .loss import log_loss # noqa: F401 +from .loss import margin_ranking_loss # noqa: F401 +from .loss import mse_loss # noqa: F401 +from .loss import nll_loss # noqa: F401 +from .loss import npair_loss # noqa: F401 +from .loss import sigmoid_focal_loss # noqa: F401 +from .loss import smooth_l1_loss # noqa: F401 +from .loss import softmax_with_cross_entropy # noqa: F401 +from .loss import square_error_cost # noqa: F401 +from .loss import ctc_loss # noqa: F401 +from .norm import batch_norm # noqa: F401 +from .norm import instance_norm # noqa: F401 +from .norm import layer_norm # noqa: F401 +from .norm import local_response_norm # noqa: F401 +from .norm import normalize # noqa: F401 +from .pooling import avg_pool1d # noqa: F401 +from .pooling import avg_pool2d # noqa: F401 +from .pooling import avg_pool3d # noqa: F401 +from .pooling import max_pool1d # noqa: F401 +from .pooling import max_pool2d # noqa: F401 +from .pooling import max_pool3d # noqa: F401 -from .pooling import adaptive_max_pool1d #DEFINE_ALIAS -from .pooling import adaptive_max_pool2d #DEFINE_ALIAS -from .pooling import adaptive_max_pool3d #DEFINE_ALIAS -from .pooling import adaptive_avg_pool1d #DEFINE_ALIAS -from .pooling import adaptive_avg_pool2d #DEFINE_ALIAS -from .pooling import adaptive_avg_pool3d #DEFINE_ALIAS +from .pooling import adaptive_max_pool1d # noqa: F401 +from .pooling import adaptive_max_pool2d # noqa: F401 +from .pooling import adaptive_max_pool3d # noqa: F401 +from .pooling import adaptive_avg_pool1d # noqa: F401 +from .pooling import adaptive_avg_pool2d # noqa: F401 +from .pooling import adaptive_avg_pool3d # noqa: F401 -# from .rnn import rnn #DEFINE_ALIAS -# from .rnn import birnn #DEFINE_ALIAS -# from .rnn import gru_unit #DEFINE_ALIAS -# from .rnn import lstm #DEFINE_ALIAS -# from .rnn import lstm_unit #DEFINE_ALIAS -# from .vision import affine_channel #DEFINE_ALIAS -from .vision import affine_grid #DEFINE_ALIAS -# from .vision import anchor_generator #DEFINE_ALIAS -# from .vision import bipartite_match #DEFINE_ALIAS -# from .vision import box_clip #DEFINE_ALIAS -# from .vision import box_coder #DEFINE_ALIAS -# from .vision import box_decoder_and_assign #DEFINE_ALIAS -# from .vision import collect_fpn_proposals #DEFINE_ALIAS -# from .vision import deformable_conv #DEFINE_ALIAS -# from .vision import deformable_roi_pooling #DEFINE_ALIAS -# from .vision import density_prior_box #DEFINE_ALIAS -# from .vision import detection_output #DEFINE_ALIAS -# from .vision import distribute_fpn_proposals #DEFINE_ALIAS -# from .vision import fsp_matrix #DEFINE_ALIAS -# from .vision import generate_mask_labels #DEFINE_ALIAS -# from .vision import generate_proposal_labels #DEFINE_ALIAS -# from .vision import generate_proposals #DEFINE_ALIAS -from .vision import grid_sample #DEFINE_ALIAS -# from .vision import image_resize #DEFINE_ALIAS -# from .vision import image_resize_short #DEFINE_ALIAS -# from .vision import multi_box_head #DEFINE_ALIAS -from .vision import pixel_shuffle #DEFINE_ALIAS -# from .vision import prior_box #DEFINE_ALIAS -# from .vision import prroi_pool #DEFINE_ALIAS -# from .vision import psroi_pool #DEFINE_ALIAS -# from .vision import resize_bilinear #DEFINE_ALIAS -# from .vision import resize_nearest #DEFINE_ALIAS -# from .vision import resize_trilinear #DEFINE_ALIAS -# from .vision import retinanet_detection_output #DEFINE_ALIAS -# from .vision import retinanet_target_assign #DEFINE_ALIAS -# from .vision import roi_align #DEFINE_ALIAS -# from .vision import roi_perspective_transform #DEFINE_ALIAS -# from .vision import roi_pool #DEFINE_ALIAS -# from .vision import shuffle_channel #DEFINE_ALIAS -# from .vision import space_to_depth #DEFINE_ALIAS -# from .vision import yolo_box #DEFINE_ALIAS -# from .vision import yolov3_loss #DEFINE_ALIAS -from .input import one_hot #DEFINE_ALIAS -from .input import embedding #DEFINE_ALIAS -from ...fluid.layers import gather_tree -from ...fluid.layers import temporal_shift +from .vision import affine_grid # noqa: F401 +from .vision import grid_sample # noqa: F401 +from .vision import pixel_shuffle # noqa: F401 +from .input import one_hot # noqa: F401 +from .input import embedding # noqa: F401 +from ...fluid.layers import gather_tree # noqa: F401 +from ...fluid.layers import temporal_shift # noqa: F401 + +__all__ = [ #noqa + 'conv1d', + 'conv1d_transpose', + 'conv2d', + 'conv2d_transpose', + 'conv3d', + 'conv3d_transpose', + 'elu', + 'elu_', + 'gelu', + 'hardshrink', + 'hardtanh', + 'hardsigmoid', + 'hardswish', + 'leaky_relu', + 'log_sigmoid', + 'maxout', + 'prelu', + 'relu', + 'relu_', + 'relu6', + 'selu', + 'softmax', + 'softmax_', + 'softplus', + 'softshrink', + 'softsign', + 'sigmoid', + 'silu', + 'swish', + 'tanh', + 'tanh_', + 'tanhshrink', + 'thresholded_relu', + 'log_softmax', + 'glu', + 'diag_embed', + 'sequence_mask', + 'dropout', + 'dropout2d', + 'dropout3d', + 'alpha_dropout', + 'label_smooth', + 'linear', + 'pad', + 'unfold', + 'interpolate', + 'upsample', + 'bilinear', + 'cosine_similarity', + 'avg_pool1d', + 'avg_pool2d', + 'avg_pool3d', + 'max_pool1d', + 'max_pool2d', + 'max_pool3d', + 'adaptive_avg_pool1d', + 'adaptive_avg_pool2d', + 'adaptive_avg_pool3d', + 'adaptive_max_pool1d', + 'adaptive_max_pool2d', + 'adaptive_max_pool3d', + 'binary_cross_entropy', + 'binary_cross_entropy_with_logits', + 'cross_entropy', + 'dice_loss', + 'hsigmoid_loss', + 'kl_div', + 'l1_loss', + 'log_loss', + 'mse_loss', + 'margin_ranking_loss', + 'nll_loss', + 'npair_loss', + 'sigmoid_focal_loss', + 'smooth_l1_loss', + 'softmax_with_cross_entropy', + 'square_error_cost', + 'ctc_loss', + 'affine_grid', + 'grid_sample', + 'local_response_norm', + 'pixel_shuffle', + 'embedding', + 'gather_tree', + 'one_hot', + 'normalize' +] diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py index d74308dc9aa..cd8ee99baa2 100644 --- a/python/paddle/nn/functional/activation.py +++ b/python/paddle/nn/functional/activation.py @@ -12,53 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define activation functions of neural network -from ...fluid.layers import brelu #DEFINE_ALIAS -# from ...fluid.layers import erf #DEFINE_ALIAS -from ...fluid.layers import maxout #DEFINE_ALIAS -# from ...fluid.layers import soft_relu #DEFINE_ALIAS -from ...fluid.layers import swish #DEFINE_ALIAS -from ...fluid.layers import sigmoid #DEFINE_ALIAS -from ...tensor.math import tanh #DEFINE_ALIAS -from ...tensor.math import tanh_ #DEFINE_ALIAS +from ...fluid.layers import sigmoid # noqa: F401 +from ...tensor.math import tanh # noqa: F401 +from ...tensor.math import tanh_ # noqa: F401 from ...tensor.manipulation import _print_warning_in_static_mode from ...tensor.manipulation import chunk from ...tensor.math import multiply -__all__ = [ - 'brelu', - 'elu', - 'elu_', - 'gelu', - 'hardshrink', - 'hardtanh', - 'hardsigmoid', - 'hardswish', - 'leaky_relu', - 'log_sigmoid', - 'maxout', - 'prelu', - 'relu', - 'relu_', - 'relu6', - 'selu', - 'softmax', - 'softmax_', - 'softplus', - 'softshrink', - 'softsign', - 'sigmoid', - 'silu' - 'swish', - 'tanh', - 'tanh_', - 'tanhshrink', - 'thresholded_relu', - 'log_softmax', - 'glu', -] - import warnings from ...fluid.layer_helper import LayerHelper from ...fluid.framework import in_dygraph_mode, convert_np_dtype_to_dtype_ diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index 1cc8ef6c39b..7379c7a5f67 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -20,44 +20,20 @@ from paddle.fluid.layers.tensor import Variable, fill_constant, zeros, concat from ...fluid.layers import core from ...fluid import dygraph_utils # TODO: define the common functions to build a neural network -# from ...fluid import one_hot #DEFINE_ALIAS -# from ...fluid.layers import pad2d #DEFINE_ALIAS -from ...fluid.layers import unfold #DEFINE_ALIAS -from ...fluid.layers import squeeze #DEFINE_ALIAS -from ...fluid.layers import unsqueeze #DEFINE_ALIAS +from ...fluid.layers import unfold # noqa: F401 +from ...fluid.layers import squeeze +from ...fluid.layers import unsqueeze from ...tensor import clip from ...tensor import sum from ...tensor import sqrt -from ...tensor import sum #DEFINE_ALIAS -from ...tensor import sqrt #DEFINE_ALIAS from ...fluid.data_feeder import check_variable_and_dtype, check_dtype from ...fluid.framework import Variable, in_dygraph_mode, _varbase_creator -#from ...fluid.layers import fc #DEFINE_ALIAS -# from ...fluid.layers import pad_constant_like #DEFINE_ALIAS from ...fluid.framework import in_dygraph_mode from ...fluid import core, dygraph_utils from ...fluid import core, layers from ...fluid.data_feeder import check_variable_and_dtype -__all__ = [ - 'dropout', - 'dropout2d', - 'dropout3d', - 'alpha_dropout', - # 'embedding', - # 'fc', - 'label_smooth', - 'linear', - 'pad', - 'unfold', - # 'bilinear_tensor_product', - 'interpolate', - 'upsample', - 'bilinear', - 'cosine_similarity', -] - def interpolate(x, size=None, diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py index a8d6a6cc38d..800c8204973 100644 --- a/python/paddle/nn/functional/conv.py +++ b/python/paddle/nn/functional/conv.py @@ -13,15 +13,6 @@ # limitations under the License. from __future__ import print_function -__all__ = [ - 'conv1d', - 'conv1d_transpose', - 'conv2d', - 'conv2d_transpose', - 'conv3d', - 'conv3d_transpose', -] - import numpy as np from ...device import get_cudnn_version from ...fluid.framework import Variable, in_dygraph_mode diff --git a/python/paddle/nn/functional/extension.py b/python/paddle/nn/functional/extension.py index b004d79a877..7900f903e7f 100644 --- a/python/paddle/nn/functional/extension.py +++ b/python/paddle/nn/functional/extension.py @@ -14,8 +14,6 @@ # TODO: define the extention functions -__all__ = ['diag_embed', 'sequence_mask'] - import numpy as np from ...fluid.data_feeder import check_dtype from ...fluid.layer_helper import LayerHelper diff --git a/python/paddle/nn/functional/input.py b/python/paddle/nn/functional/input.py index b88a2b042ff..4fff9cda4be 100644 --- a/python/paddle/nn/functional/input.py +++ b/python/paddle/nn/functional/input.py @@ -19,8 +19,6 @@ from ...fluid.layer_helper import LayerHelper from ...fluid.layers import core from ...fluid.data_feeder import check_variable_and_dtype, check_dtype -__all__ = ['one_hot', 'embedding'] - def one_hot(x, num_classes, name=None): """ diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index ca0ad06532d..bb2d8005f4e 100755 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -24,14 +24,14 @@ import paddle import paddle.fluid as fluid from ...fluid.framework import core, in_dygraph_mode from ...fluid.layers.nn import _elementwise_op_in_dygraph -from ...fluid.layers import dice_loss #DEFINE_ALIAS -from ...fluid.layers import log_loss #DEFINE_ALIAS -from ...fluid.layers import npair_loss #DEFINE_ALIAS +from ...fluid.layers import dice_loss # noqa: F401 +from ...fluid.layers import log_loss # noqa: F401 +from ...fluid.layers import npair_loss # noqa: F401 from ...fluid.layers import reshape -from ...fluid.layers import softmax_with_cross_entropy as fluid_softmax_with_cross_entropy #DEFINE_ALIAS -from ...fluid.layers import square_error_cost #DEFINE_ALIAS +from ...fluid.layers import softmax_with_cross_entropy as fluid_softmax_with_cross_entropy +from ...fluid.layers import square_error_cost # noqa: F401 -from ...fluid.layers import edit_distance #DEFINE_ALIAS +from ...fluid.layers import edit_distance # noqa: F401 from ...fluid.layers import huber_loss from ...fluid.layer_helper import LayerHelper from ...fluid.framework import in_dygraph_mode @@ -39,27 +39,6 @@ from ...fluid.framework import _varbase_creator from ...fluid.framework import Variable from paddle.utils import deprecated -__all__ = [ - 'binary_cross_entropy', - 'binary_cross_entropy_with_logits', - 'cross_entropy', - 'dice_loss', - 'hsigmoid_loss', - 'kl_div', - 'l1_loss', - 'log_loss', - 'mse_loss', - 'margin_ranking_loss', - # 'nce', - 'nll_loss', - 'npair_loss', - 'sigmoid_focal_loss', - 'smooth_l1_loss', - 'softmax_with_cross_entropy', - 'square_error_cost', - 'ctc_loss', -] - def binary_cross_entropy(input, label, weight=None, reduction='mean', name=None): @@ -1312,7 +1291,7 @@ def cross_entropy(input, Indicate whether compute softmax before cross_entropy. Default is ``True``. - - **name** (str,optional) + - **name** (str, optional) The name of the operator. Default is ``None`` . For more information, please refer to :ref:`api_guide_Name` . diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py index 73df03e3714..dddc4c66d59 100644 --- a/python/paddle/nn/functional/norm.py +++ b/python/paddle/nn/functional/norm.py @@ -22,19 +22,8 @@ from ...framework import create_parameter from ...fluid.initializer import Constant from ...fluid.param_attr import ParamAttr from ...fluid import core, dygraph_utils - import numbers -__all__ = [ - 'batch_norm', - # 'data_norm', - 'instance_norm', - 'layer_norm', - 'local_response_norm', - 'normalize', - # 'spectral_norm' -] - def normalize(x, p=2, axis=1, epsilon=1e-12, name=None): r""" diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py index 5f3642710ae..27a66c629ca 100755 --- a/python/paddle/nn/functional/pooling.py +++ b/python/paddle/nn/functional/pooling.py @@ -18,21 +18,6 @@ from ...fluid.framework import in_dygraph_mode from ...fluid.layers import utils, LayerHelper, unsqueeze, squeeze from ...fluid.data_feeder import check_type, check_variable_and_dtype -__all__ = [ - 'avg_pool1d', - 'avg_pool2d', - 'avg_pool3d', - 'max_pool1d', - 'max_pool2d', - 'max_pool3d', - 'adaptive_avg_pool1d', - 'adaptive_avg_pool2d', - 'adaptive_avg_pool3d', - 'adaptive_max_pool1d', - 'adaptive_max_pool2d', - 'adaptive_max_pool3d', -] - def _is_list_or_tuple(input): return isinstance(input, (list, tuple)) diff --git a/python/paddle/nn/functional/vision.py b/python/paddle/nn/functional/vision.py index 032d5b47eda..cb8a817023d 100644 --- a/python/paddle/nn/functional/vision.py +++ b/python/paddle/nn/functional/vision.py @@ -19,43 +19,6 @@ from ...fluid.data_feeder import check_variable_and_dtype from ...fluid import dygraph_utils import numpy as np -# TODO: define specitial functions used in computer vision task -# from ...fluid.layers import affine_channel #DEFINE_ALIAS -# from ...fluid.layers import anchor_generator #DEFINE_ALIAS -# from ...fluid.layers import bipartite_match #DEFINE_ALIAS -# from ...fluid.layers import box_clip #DEFINE_ALIAS -# from ...fluid.layers import box_coder #DEFINE_ALIAS -# from ...fluid.layers import box_decoder_and_assign #DEFINE_ALIAS -# from ...fluid.layers import collect_fpn_proposals #DEFINE_ALIAS -# from ...fluid.layers import deformable_roi_pooling #DEFINE_ALIAS -# from ...fluid.layers import density_prior_box #DEFINE_ALIAS -# from ...fluid.layers import detection_output #DEFINE_ALIAS -# from ...fluid.layers import distribute_fpn_proposals #DEFINE_ALIAS -# from ...fluid.layers import generate_mask_labels #DEFINE_ALIAS -# from ...fluid.layers import generate_proposal_labels #DEFINE_ALIAS -# from ...fluid.layers import generate_proposals #DEFINE_ALIAS -# from ...fluid.layers import image_resize #DEFINE_ALIAS -# from ...fluid.layers import prior_box #DEFINE_ALIAS -# from ...fluid.layers import prroi_pool #DEFINE_ALIAS -# from ...fluid.layers import psroi_pool #DEFINE_ALIAS -# from ...fluid.layers import resize_bilinear #DEFINE_ALIAS -# from ...fluid.layers import resize_nearest #DEFINE_ALIAS -# from ...fluid.layers import resize_trilinear #DEFINE_ALIAS -# from ...fluid.layers import roi_align #DEFINE_ALIAS -# from ...fluid.layers import roi_pool #DEFINE_ALIAS -# from ...fluid.layers import space_to_depth #DEFINE_ALIAS -# from ...fluid.layers import yolo_box #DEFINE_ALIAS -# from ...fluid.layers import yolov3_loss #DEFINE_ALIAS -# from ...fluid.layers import fsp_matrix #DEFINE_ALIAS -# from ...fluid.layers import image_resize_short #DEFINE_ALIAS -# from ...fluid.layers import pixel_shuffle #DEFINE_ALIAS -# from ...fluid.layers import retinanet_detection_output #DEFINE_ALIAS -# from ...fluid.layers import retinanet_target_assign #DEFINE_ALIAS -# from ...fluid.layers import roi_perspective_transform #DEFINE_ALIAS -# from ...fluid.layers import shuffle_channel #DEFINE_ALIAS - -__all__ = ['affine_grid', 'grid_sample', 'pixel_shuffle'] - def affine_grid(theta, out_shape, align_corners=True, name=None): """ diff --git a/python/paddle/nn/initializer/__init__.py b/python/paddle/nn/initializer/__init__.py index c128a1b401b..03e91f80dd1 100644 --- a/python/paddle/nn/initializer/__init__.py +++ b/python/paddle/nn/initializer/__init__.py @@ -13,36 +13,34 @@ # limitations under the License. # TODO: define the initializers to create a Parameter in neural network -from ...fluid.initializer import Bilinear #DEFINE_ALIAS -from ...fluid.initializer import set_global_initializer #DEFINE_ALIAS +from ...fluid.initializer import Bilinear # noqa: F401 +from ...fluid.initializer import set_global_initializer # noqa: F401 -from . import constant -from .constant import Constant #DEFINE_ALIAS +from .constant import Constant # noqa: F401 -from . import kaiming -from .kaiming import KaimingNormal #DEFINE_ALIAS -from .kaiming import KaimingUniform #DEFINE_ALIAS +from .kaiming import KaimingNormal # noqa: F401 +from .kaiming import KaimingUniform # noqa: F401 -__all__ = ['Bilinear', 'set_global_initializer'] +from .xavier import XavierNormal # noqa: F401 +from .xavier import XavierUniform # noqa: F401 -__all__ += constant.__all__ -__all__ += kaiming.__all__ +from .assign import Assign # noqa: F401 -from . import xavier -from .xavier import XavierNormal #DEFINE_ALIAS -from .xavier import XavierUniform #DEFINE_ALIAS +from .normal import Normal # noqa: F401 +from .normal import TruncatedNormal # noqa: F401 -from . import assign -from .assign import Assign #DEFINE_ALIAS +from .uniform import Uniform # noqa: F401 -from . import normal -from .normal import Normal #DEFINE_ALIAS -from .normal import TruncatedNormal #DEFINE_ALIAS - -from . import uniform -from .uniform import Uniform #DEFINE_ALIAS - -__all__ += xavier.__all__ -__all__ += assign.__all__ -__all__ += normal.__all__ -__all__ += uniform.__all__ +__all__ = [ #noqa + 'Bilinear', + 'Constant', + 'KaimingUniform', + 'KaimingNormal', + 'XavierNormal', + 'XavierUniform', + 'Assign', + 'Normal', + 'TruncatedNormal', + 'Uniform', + 'set_global_initializer' +] diff --git a/python/paddle/nn/initializer/assign.py b/python/paddle/nn/initializer/assign.py index 94c4ddc1938..642919f3540 100644 --- a/python/paddle/nn/initializer/assign.py +++ b/python/paddle/nn/initializer/assign.py @@ -19,8 +19,6 @@ from ...fluid.core import VarDesc from ...fluid.data_feeder import check_type from ...fluid.initializer import NumpyArrayInitializer -__all__ = ['Assign'] - class Assign(NumpyArrayInitializer): """Init an parameter with a numpy array, list, or tensor. diff --git a/python/paddle/nn/initializer/constant.py b/python/paddle/nn/initializer/constant.py index 6d21ddae0d1..aec3e82aab6 100644 --- a/python/paddle/nn/initializer/constant.py +++ b/python/paddle/nn/initializer/constant.py @@ -15,8 +15,6 @@ # TODO: define the initializers of Constant in neural network from ...fluid.initializer import ConstantInitializer -__all__ = ['Constant'] - class Constant(ConstantInitializer): """Implement the constant initializer. diff --git a/python/paddle/nn/initializer/kaiming.py b/python/paddle/nn/initializer/kaiming.py index 7e2b6f787f8..712bffccda1 100644 --- a/python/paddle/nn/initializer/kaiming.py +++ b/python/paddle/nn/initializer/kaiming.py @@ -15,8 +15,6 @@ # TODO: define the initializers of Kaiming functions in neural network from ...fluid.initializer import MSRAInitializer -__all__ = ['KaimingUniform', 'KaimingNormal'] - class KaimingNormal(MSRAInitializer): r"""Implements the Kaiming Normal initializer diff --git a/python/paddle/nn/initializer/normal.py b/python/paddle/nn/initializer/normal.py index a572d0e2c92..c009df78005 100644 --- a/python/paddle/nn/initializer/normal.py +++ b/python/paddle/nn/initializer/normal.py @@ -15,8 +15,6 @@ from ...fluid.initializer import NormalInitializer from ...fluid.initializer import TruncatedNormalInitializer -__all__ = ['Normal', 'TruncatedNormal'] - class Normal(NormalInitializer): """The Random Normal (Gaussian) distribution initializer. diff --git a/python/paddle/nn/initializer/uniform.py b/python/paddle/nn/initializer/uniform.py index a5d7d34efcf..e54a4d2187b 100644 --- a/python/paddle/nn/initializer/uniform.py +++ b/python/paddle/nn/initializer/uniform.py @@ -14,8 +14,6 @@ from ...fluid.initializer import UniformInitializer -__all__ = ['Uniform'] - class Uniform(UniformInitializer): """The random uniform distribution initializer. diff --git a/python/paddle/nn/initializer/xavier.py b/python/paddle/nn/initializer/xavier.py index 821a6984753..01a4a8887b4 100644 --- a/python/paddle/nn/initializer/xavier.py +++ b/python/paddle/nn/initializer/xavier.py @@ -14,8 +14,6 @@ from ...fluid.initializer import XavierInitializer -__all__ = ['XavierNormal', 'XavierUniform'] - class XavierNormal(XavierInitializer): r""" diff --git a/python/paddle/nn/layer/__init__.py b/python/paddle/nn/layer/__init__.py index 17c4ca5c5d1..64f0391fb65 100644 --- a/python/paddle/nn/layer/__init__.py +++ b/python/paddle/nn/layer/__init__.py @@ -14,90 +14,70 @@ # TODO: define activation functions of neural network -from . import activation -from . import loss -from . import conv -from . import activation -from . import norm -from . import rnn -from . import vision -from . import distance -from . import transformer -from . import container +from . import rnn # noqa: F401 +from . import transformer # noqa: F401 +from . import container # noqa: F401 -from .activation import * -from .loss import * -from .conv import * -from .activation import * -from .norm import * -from .rnn import * -from .vision import * +from .activation import PReLU # noqa: F401 +from .activation import ReLU # noqa: F401 +from .activation import ReLU6 # noqa: F401 +from .activation import LeakyReLU # noqa: F401 +from .activation import Sigmoid # noqa: F401 +from .activation import Softmax # noqa: F401 +from .activation import LogSoftmax # noqa: F401 +from .common import Bilinear # noqa: F401 +from .common import Pad1D # noqa: F401 +from .common import Pad2D # noqa: F401 +from .common import Pad3D # noqa: F401 +from .common import CosineSimilarity # noqa: F401 +from .common import Embedding # noqa: F401 +from .common import Linear # noqa: F401 +from .common import Flatten # noqa: F401 +from .common import Upsample # noqa: F401 +from .common import Dropout # noqa: F401 +from .common import Dropout2D # noqa: F401 +from .common import Dropout3D # noqa: F401 +from .common import AlphaDropout # noqa: F401 +from .common import Upsample # noqa: F401 +from .common import UpsamplingBilinear2D # noqa: F401 +from .common import UpsamplingNearest2D # noqa: F401 +from .pooling import AvgPool1D # noqa: F401 +from .pooling import AvgPool2D # noqa: F401 +from .pooling import AvgPool3D # noqa: F401 +from .pooling import MaxPool1D # noqa: F401 +from .pooling import MaxPool2D # noqa: F401 +from .pooling import MaxPool3D # noqa: F401 +from .pooling import AdaptiveAvgPool1D # noqa: F401 +from .pooling import AdaptiveAvgPool2D # noqa: F401 +from .pooling import AdaptiveAvgPool3D # noqa: F401 +from .pooling import AdaptiveMaxPool1D # noqa: F401 +from .pooling import AdaptiveMaxPool2D # noqa: F401 +from .pooling import AdaptiveMaxPool3D # noqa: F401 +from .conv import Conv1D # noqa: F401 +from .conv import Conv2D # noqa: F401 +from .conv import Conv3D # noqa: F401 +from .conv import Conv1DTranspose # noqa: F401 +from .conv import Conv2DTranspose # noqa: F401 +from .conv import Conv3DTranspose # noqa: F401 +from .loss import BCEWithLogitsLoss # noqa: F401 +from .loss import CrossEntropyLoss # noqa: F401 +from .loss import MSELoss # noqa: F401 +from .loss import L1Loss # noqa: F401 +from .loss import NLLLoss # noqa: F401 +from .loss import BCELoss # noqa: F401 +from .loss import KLDivLoss # noqa: F401 +from .loss import MarginRankingLoss # noqa: F401 +from .loss import CTCLoss # noqa: F401 +from .loss import SmoothL1Loss # noqa: F401 +from .norm import BatchNorm1D # noqa: F401 +from .norm import BatchNorm2D # noqa: F401 +from .norm import BatchNorm3D # noqa: F401 +from .norm import SyncBatchNorm # noqa: F401 +from .norm import GroupNorm # noqa: F401 +from .norm import LayerNorm # noqa: F401 +from .norm import SpectralNorm # noqa: F401 +from .norm import LocalResponseNorm # noqa: F401 -from .transformer import * -from .activation import PReLU #DEFINE_ALIAS -from .activation import ReLU #DEFINE_ALIAS -from .activation import LeakyReLU #DEFINE_ALIAS -from .activation import Sigmoid #DEFINE_ALIAS -from .activation import Softmax #DEFINE_ALIAS -from .activation import LogSoftmax #DEFINE_ALIAS -from .common import Bilinear #DEFINE_ALIAS -from .common import Pad1D #DEFINE_ALIAS -from .common import Pad2D #DEFINE_ALIAS -from .common import Pad3D #DEFINE_ALIAS -from .common import CosineSimilarity #DEFINE_ALIAS -from .common import Embedding #DEFINE_ALIAS -from .common import Linear #DEFINE_ALIAS -from .common import Flatten #DEFINE_ALIAS -from .common import Upsample #DEFINE_ALIAS -from .common import Dropout #DEFINE_ALIAS -from .common import Dropout2D #DEFINE_ALIAS -from .common import Dropout3D #DEFINE_ALIAS -from .common import AlphaDropout #DEFINE_ALIAS -from .common import Upsample #DEFINE_ALIAS -from .common import UpsamplingBilinear2D #DEFINE_ALIAS -from .common import UpsamplingNearest2D #DEFINE_ALIAS -from .pooling import AvgPool1D #DEFINE_ALIAS -from .pooling import AvgPool2D #DEFINE_ALIAS -from .pooling import AvgPool3D #DEFINE_ALIAS -from .pooling import MaxPool1D #DEFINE_ALIAS -from .pooling import MaxPool2D #DEFINE_ALIAS -from .pooling import MaxPool3D #DEFINE_ALIAS -from .pooling import AdaptiveAvgPool1D #DEFINE_ALIAS -from .pooling import AdaptiveAvgPool2D #DEFINE_ALIAS -from .pooling import AdaptiveAvgPool3D #DEFINE_ALIAS -from .pooling import AdaptiveMaxPool1D #DEFINE_ALIAS -from .pooling import AdaptiveMaxPool2D #DEFINE_ALIAS -from .pooling import AdaptiveMaxPool3D #DEFINE_ALIAS -from .conv import Conv1D #DEFINE_ALIAS -from .conv import Conv2D #DEFINE_ALIAS -from .conv import Conv3D #DEFINE_ALIAS -from .conv import Conv1DTranspose #DEFINE_ALIAS -from .conv import Conv2DTranspose #DEFINE_ALIAS -from .conv import Conv3DTranspose #DEFINE_ALIAS -# from .conv import TreeConv #DEFINE_ALIAS -# from .conv import Conv1D #DEFINE_ALIAS -# from .loss import NCELoss #DEFINE_ALIAS -from .loss import BCEWithLogitsLoss #DEFINE_ALIAS -from .loss import CrossEntropyLoss #DEFINE_ALIAS -from .loss import MSELoss #DEFINE_ALIAS -from .loss import L1Loss #DEFINE_ALIAS -from .loss import NLLLoss #DEFINE_ALIAS -from .loss import BCELoss #DEFINE_ALIAS -from .loss import KLDivLoss #DEFINE_ALIAS -from .loss import MarginRankingLoss #DEFINE_ALIAS -from .loss import CTCLoss #DEFINE_ALIAS -from .loss import SmoothL1Loss #DEFINE_ALIAS -from .norm import BatchNorm #DEFINE_ALIAS -from .norm import SyncBatchNorm #DEFINE_ALIAS -from .norm import GroupNorm #DEFINE_ALIAS -from .norm import LayerNorm #DEFINE_ALIAS -from .norm import SpectralNorm #DEFINE_ALIAS -#from .norm import InstanceNorm #DEFINE_ALIAS -from .norm import LocalResponseNorm #DEFINE_ALIAS -# from .rnn import RNNCell #DEFINE_ALIAS -# from .rnn import GRUCell #DEFINE_ALIAS -# from .rnn import LSTMCell #DEFINE_ALIAS - -from .vision import PixelShuffle #DEFINE_ALIAS -from .distance import PairwiseDistance #DEFINE_ALIAS -from .container import LayerDict #DEFINE_ALIAS +from .vision import PixelShuffle # noqa: F401 +from .distance import PairwiseDistance # noqa: F401 +from .container import LayerDict # noqa: F401 diff --git a/python/paddle/nn/layer/activation.py b/python/paddle/nn/layer/activation.py index 2a9ae310615..c6ce4588ea5 100644 --- a/python/paddle/nn/layer/activation.py +++ b/python/paddle/nn/layer/activation.py @@ -14,33 +14,6 @@ # TODO: define activation functions of neural network -__all__ = [ - 'ELU', - 'GELU', - 'Hardshrink', - 'Hardswish', - 'Tanh', - 'Hardtanh', - 'PReLU', - 'ReLU', - 'ReLU6', - 'SELU', - 'LeakyReLU', - 'Sigmoid', - 'Silu', - 'Hardsigmoid', - 'Softmax', - 'Softplus', - 'Softshrink', - 'Softsign', - 'Swish', - 'Tanhshrink', - 'ThresholdedReLU', - 'LogSigmoid', - 'LogSoftmax', - 'Maxout', -] - from ...fluid.dygraph import layers from ...fluid import core from ...fluid.framework import in_dygraph_mode diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py index 8c001793715..058507ba5de 100644 --- a/python/paddle/nn/layer/common.py +++ b/python/paddle/nn/layer/common.py @@ -14,30 +14,12 @@ # TODO: define the common classes to build a neural network import paddle -from ...fluid.dygraph import Flatten #DEFINE_ALIAS +from ...fluid.dygraph import Flatten # noqa: F401 from ...fluid.dygraph import layers from ...fluid.framework import in_dygraph_mode from .. import functional as F from ...fluid.framework import _dygraph_tracer -__all__ = [ - 'Embedding', - 'Linear', - 'Upsample', - 'Pad1D', - 'Pad2D', - 'Pad3D', - 'UpsamplingNearest2D', - 'UpsamplingBilinear2D', - 'CosineSimilarity', - 'Dropout', - 'Dropout2D', - 'Dropout3D', - 'Bilinear', - 'AlphaDropout', - 'Unfold', -] - def _npairs(x, n): if isinstance(x, (paddle.Tensor, list)): diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py index d6ba04dad04..2360dc17cf1 100644 --- a/python/paddle/nn/layer/conv.py +++ b/python/paddle/nn/layer/conv.py @@ -14,15 +14,6 @@ # TODO: define classes of convolutional neural network -__all__ = [ - 'Conv1D', - 'Conv2D', - 'Conv3D', - 'Conv1DTranspose', - 'Conv2DTranspose', - 'Conv3DTranspose', -] - import numpy as np from ...fluid import get_flags diff --git a/python/paddle/nn/layer/distance.py b/python/paddle/nn/layer/distance.py index 72e0a1b2d6d..7eb0fc1fbb5 100644 --- a/python/paddle/nn/layer/distance.py +++ b/python/paddle/nn/layer/distance.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -__all__ = ['PairwiseDistance'] - import numpy as np import paddle diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py index 2dfb3acca68..356b22c632c 100644 --- a/python/paddle/nn/layer/loss.py +++ b/python/paddle/nn/layer/loss.py @@ -21,20 +21,6 @@ import paddle from .. import functional as F from paddle.fluid.framework import core, in_dygraph_mode, _varbase_creator -__all__ = [ - 'BCEWithLogitsLoss', - 'CrossEntropyLoss', - 'HSigmoidLoss', - 'MSELoss', - 'L1Loss', - 'NLLLoss', - 'BCELoss', - 'KLDivLoss', - 'MarginRankingLoss', - 'CTCLoss', - 'SmoothL1Loss', -] - class BCEWithLogitsLoss(fluid.dygraph.Layer): r""" @@ -295,7 +281,7 @@ class CrossEntropyLoss(fluid.dygraph.Layer): Indicate whether compute softmax before cross_entropy. Default is ``True``. - - **name** (str,optional) + - **name** (str, optional) The name of the operator. Default is ``None`` . For more information, please refer to :ref:`api_guide_Name` . @@ -318,7 +304,7 @@ class CrossEntropyLoss(fluid.dygraph.Layer): - **label** (Tensor) - 1. If soft_label=False,the shape is + 1. If soft_label=False, the shape is :math:`[N_1, N_2, ..., N_k]` or :math:`[N_1, N_2, ..., N_k, 1]`, k >= 1. the data type is int32, int64, float32, float64, where each value is [0, C-1]. diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py index 0b0b2bf7b9b..970d68e8263 100644 --- a/python/paddle/nn/layer/norm.py +++ b/python/paddle/nn/layer/norm.py @@ -28,13 +28,10 @@ # TODO: define normalization api import six -#from ...fluid.dygraph.nn import InstanceNorm -from ...fluid.dygraph import BatchNorm #DEFINE_ALIAS -#from ...fluid.dygraph import GroupNorm #DEFINE_ALIAS +from ...fluid.dygraph import BatchNorm # noqa: F401 -#from ...fluid.dygraph import LayerNorm #DEFINE_ALIAS -from ...fluid.dygraph import SpectralNorm #DEFINE_ALIAS +from ...fluid.dygraph import SpectralNorm # noqa: F401 from ...fluid.dygraph import layers from ...framework import get_default_dtype, set_default_dtype @@ -53,12 +50,6 @@ import warnings from ...fluid.dygraph.base import no_grad from .. import functional as F -__all__ = [ - 'BatchNorm', 'GroupNorm', 'LayerNorm', 'SpectralNorm', 'BatchNorm1D', - 'BatchNorm2D', 'BatchNorm3D', 'InstanceNorm1D', 'InstanceNorm2D', - 'InstanceNorm3D', 'SyncBatchNorm', 'LocalResponseNorm' -] - class _InstanceNormBase(layers.Layer): """ diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py index cdb87a1cb39..5916fd7c69e 100755 --- a/python/paddle/nn/layer/pooling.py +++ b/python/paddle/nn/layer/pooling.py @@ -16,21 +16,6 @@ from ...fluid.dygraph import layers from ...fluid.layer_helper import LayerHelper from .. import functional as F -__all__ = [ - 'AvgPool1D', - 'AvgPool2D', - 'AvgPool3D', - 'MaxPool1D', - 'MaxPool2D', - 'MaxPool3D', - 'AdaptiveAvgPool1D', - 'AdaptiveAvgPool2D', - 'AdaptiveAvgPool3D', - 'AdaptiveMaxPool1D', - 'AdaptiveMaxPool2D', - 'AdaptiveMaxPool3D', -] - class AvgPool1D(layers.Layer): r""" diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py index 964cfa74ebf..a7539b5b095 100644 --- a/python/paddle/nn/layer/rnn.py +++ b/python/paddle/nn/layer/rnn.py @@ -33,18 +33,6 @@ from paddle.fluid.layers import utils from paddle.fluid.layers.utils import map_structure, flatten, pack_sequence_as from paddle.fluid.data_feeder import convert_dtype -__all__ = [ - 'RNNCellBase', - 'SimpleRNNCell', - 'LSTMCell', - 'GRUCell', - 'RNN', - 'BiRNN', - 'SimpleRNN', - 'LSTM', - 'GRU', -] - def split_states(states, bidirectional=False, state_components=1): r""" diff --git a/python/paddle/nn/layer/transformer.py b/python/paddle/nn/layer/transformer.py index fe70a99ffb5..752870f3d0a 100644 --- a/python/paddle/nn/layer/transformer.py +++ b/python/paddle/nn/layer/transformer.py @@ -13,14 +13,6 @@ # limitations under the License. # TODO: define the classes of Transformer neural network -__all__ = [ - 'MultiHeadAttention', - 'TransformerEncoderLayer', - 'TransformerEncoder', - 'TransformerDecoderLayer', - 'TransformerDecoder', - 'Transformer', -] import copy import collections diff --git a/python/paddle/nn/layer/vision.py b/python/paddle/nn/layer/vision.py index d9c948a848a..e66e122be52 100644 --- a/python/paddle/nn/layer/vision.py +++ b/python/paddle/nn/layer/vision.py @@ -17,8 +17,6 @@ from ...fluid.dygraph import layers from .. import functional -__all__ = ['PixelShuffle'] - class PixelShuffle(layers.Layer): """ diff --git a/python/paddle/nn/utils/__init__.py b/python/paddle/nn/utils/__init__.py index 6562ac35e1e..bf2573d2cbc 100644 --- a/python/paddle/nn/utils/__init__.py +++ b/python/paddle/nn/utils/__init__.py @@ -12,5 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from . import weight_norm_hook -from .weight_norm_hook import weight_norm, remove_weight_norm +from .weight_norm_hook import weight_norm, remove_weight_norm # noqa: F401 + +__all__ = [ #noqa + 'weight_norm', 'remove_weight_norm' +] diff --git a/python/paddle/nn/utils/weight_norm_hook.py b/python/paddle/nn/utils/weight_norm_hook.py index fdf7a1b5bb2..23df38ca08c 100755 --- a/python/paddle/nn/utils/weight_norm_hook.py +++ b/python/paddle/nn/utils/weight_norm_hook.py @@ -19,8 +19,6 @@ from ...fluid import layers as F from ...fluid.layer_helper import LayerHelper from ...fluid.data_feeder import check_variable_and_dtype -__all__ = ['weight_norm', 'remove_weight_norm'] - def l2_norm(x, axis, epsilon=1e-12, name=None): if len(x.shape) == 1: diff --git a/python/paddle/utils/deprecated.py b/python/paddle/utils/deprecated.py index daa2826ca36..a46f1ae3a2c 100755 --- a/python/paddle/utils/deprecated.py +++ b/python/paddle/utils/deprecated.py @@ -83,13 +83,14 @@ def deprecated(update_to="", since="", reason=""): 2. since version is empty, in this case, API is deprecated in all versions. 3. current version is newer than since version. """ - msg = "\033[93mWarning %s \033[0m" % (msg) + warningmsg = "\033[93mWarning %s \033[0m" % (msg) v_current = [int(i) for i in paddle.__version__.split(".")] v_current += [0] * (4 - len(v_current)) v_since = [int(i) for i in _since.split(".")] v_since += [0] * (4 - len(v_since)) if paddle.__version__ == "0.0.0" or _since == "" or v_current >= v_since: - warnings.warn(msg, category=DeprecationWarning, stacklevel=2) + warnings.warn( + warningmsg, category=DeprecationWarning, stacklevel=2) return func(*args, **kwargs) -- GitLab From 3132695044babaa33e4fbea47e9fee7cf68f108f Mon Sep 17 00:00:00 2001 From: pangyoki Date: Tue, 27 Apr 2021 19:45:13 +0800 Subject: [PATCH 034/720] [Docker] support cuda11.2 and using gcc5.4 in cuda10.1 (#32531) * support cuda11.2 and using gcc5.4 in cuda10.1 * fix manylinux py36 bug * support cuda11.2 * fix python36 pip version problem in ubuntu * save cuda11.0 --- tools/dockerfile/Dockerfile.ubuntu | 2 +- tools/dockerfile/Dockerfile.ubuntu18 | 2 +- tools/dockerfile/build_scripts/build_utils.sh | 15 +++++++++++--- .../dockerfile/build_scripts/install_nccl2.sh | 2 +- tools/dockerfile/build_scripts/install_trt.sh | 5 +++++ tools/dockerfile/centos7_manylinux.sh | 20 +++++++++++++------ tools/dockerfile/ubuntu16_dev.sh | 2 ++ tools/dockerfile/ubuntu18_dev.sh | 2 ++ 8 files changed, 38 insertions(+), 12 deletions(-) diff --git a/tools/dockerfile/Dockerfile.ubuntu b/tools/dockerfile/Dockerfile.ubuntu index 9500acb2f97..78a8b140279 100644 --- a/tools/dockerfile/Dockerfile.ubuntu +++ b/tools/dockerfile/Dockerfile.ubuntu @@ -205,7 +205,7 @@ RUN pip3.6 --no-cache-dir install -r /root/requirements.txt && \ # To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use # the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2 RUN apt-get install -y libssl-dev libffi-dev && apt-get clean -y && \ - pip3.6 install --upgrade pip && \ + pip3.6 install --upgrade pip==20.3.3 && \ pip3.7 install --upgrade pip && \ pip3.8 install --upgrade pip && \ pip3.9 install --upgrade pip && \ diff --git a/tools/dockerfile/Dockerfile.ubuntu18 b/tools/dockerfile/Dockerfile.ubuntu18 index 7dad70f00d4..a4a445e6db2 100644 --- a/tools/dockerfile/Dockerfile.ubuntu18 +++ b/tools/dockerfile/Dockerfile.ubuntu18 @@ -11,7 +11,7 @@ ARG WITH_AVX ENV WITH_GPU=${WITH_GPU:-ON} ENV WITH_AVX=${WITH_AVX:-ON} ENV DEBIAN_FRONTEND=noninteractive -ENV LD_LIBRARY_PATH=/usr/local/cuda-11.0/targets/x86_64-linux/lib:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH=/usr/local/cuda-11.2/targets/x86_64-linux/lib:$LD_LIBRARY_PATH ENV HOME /root # Add bash enhancements diff --git a/tools/dockerfile/build_scripts/build_utils.sh b/tools/dockerfile/build_scripts/build_utils.sh index bb560d0fdf2..8f4f88328aa 100755 --- a/tools/dockerfile/build_scripts/build_utils.sh +++ b/tools/dockerfile/build_scripts/build_utils.sh @@ -93,8 +93,8 @@ function do_cpython_build { rm -rf Python-$py_ver # Some python's install as bin/python3. Make them available as # bin/python. - if [ -e ${prefix}/bin/python3 ]; then - ln -s python3 ${prefix}/bin/python + if [ -e ${prefix}/bin/python3.6 ]; then + ln -s python3.6 ${prefix}/bin/python fi if [ -e ${prefix}/bin/python3.7 ]; then ln -s python3.7 ${prefix}/bin/python @@ -106,7 +106,13 @@ function do_cpython_build { ln -s python3.9 ${prefix}/bin/python fi # NOTE Make libpython shared library visible to python calls below - LD_LIBRARY_PATH="/usr/local/ssl/lib:${prefix}/lib" ${prefix}/bin/python get-pip.py + if [ -e ${prefix}/bin/python3.6 ]; then + LD_LIBRARY_PATH="/usr/local/ssl/lib:${prefix}/lib" ${prefix}/bin/python ez_setup.py + LD_LIBRARY_PATH="/usr/local/ssl/lib:${prefix}/lib" ${prefix}/bin/python -m easy_install pip + LD_LIBRARY_PATH="/usr/local/ssl/lib:${prefix}/lib" ${prefix}/bin/python -m pip install --upgrade pip==20.3.3 + else + LD_LIBRARY_PATH="/usr/local/ssl/lib:${prefix}/lib" ${prefix}/bin/python get-pip.py + fi LD_LIBRARY_PATH="/usr/local/ssl/lib:${prefix}/lib" ${prefix}/bin/pip install wheel==0.32.2 cd / ls ${MY_DIR} @@ -137,6 +143,8 @@ function build_cpythons { GET_PIP_URL="https://bootstrap.pypa.io/2.7/get-pip.py" elif [ ${py_ver} == "3.5.1" ] ;then GET_PIP_URL="https://bootstrap.pypa.io/3.5/get-pip.py" + elif [ ${py_ver} == "3.6.0" ] ;then + GET_PIP_URL="https://bootstrap.pypa.io/ez_setup.py" fi check_var $GET_PIP_URL @@ -144,6 +152,7 @@ function build_cpythons { build_cpython $py_ver done rm get-pip.py + rm ez_setup.py } diff --git a/tools/dockerfile/build_scripts/install_nccl2.sh b/tools/dockerfile/build_scripts/install_nccl2.sh index b06b3d44c6e..07f186f3d4e 100644 --- a/tools/dockerfile/build_scripts/install_nccl2.sh +++ b/tools/dockerfile/build_scripts/install_nccl2.sh @@ -17,7 +17,7 @@ VERSION=$(nvcc --version | grep release | grep -oEi "release ([0-9]+)\.([0-9])"| sed "s/release //") if [ "$VERSION" == "10.0" ]; then DEB="nccl-repo-ubuntu1604-2.4.7-ga-cuda10.0_1-1_amd64.deb" -elif [ "$VERSION" == "10.2" ] || [ "$VERSION" == "10.1" ] || [ "$VERSION" == "11.0" ]; then +elif [ "$VERSION" == "10.2" ] || [ "$VERSION" == "10.1" ] || [ "$VERSION" == "11.0" ] || [ "$VERSION" == "11.2" ]; then if [ -f "/etc/redhat-release" ];then rm -f /usr/local/lib/libnccl.so wget --no-check-certificate -q https://nccl2-deb.cdn.bcebos.com/libnccl-2.7.8-1+cuda10.2.x86_64.rpm diff --git a/tools/dockerfile/build_scripts/install_trt.sh b/tools/dockerfile/build_scripts/install_trt.sh index e5ec70d2f37..1df8d0f4568 100644 --- a/tools/dockerfile/build_scripts/install_trt.sh +++ b/tools/dockerfile/build_scripts/install_trt.sh @@ -21,6 +21,11 @@ if [[ "$VERSION" == "10.1" ]];then tar -zxf TensorRT6-cuda10.1-cudnn7.tar.gz -C /usr/local cp -rf /usr/local/TensorRT6-cuda10.1-cudnn7/include/* /usr/include/ && cp -rf /usr/local/TensorRT6-cuda10.1-cudnn7/lib/* /usr/lib/ rm TensorRT6-cuda10.1-cudnn7.tar.gz +elif [[ "$VERSION" == "11.2" ]];then + wget -q https://paddle-ci.gz.bcebos.com/TRT/TensorRT7-cuda11.1-cudnn8.1.tar.gz --no-check-certificate + tar -zxf TensorRT7-cuda11.1-cudnn8.1.tar.gz -C /usr/local + cp -rf /usr/local/TensorRT-7.2.3.4/include/* /usr/include/ && cp -rf /usr/local/TensorRT-7.2.3.4/lib/* /usr/lib/ + rm TensorRT7-cuda11.1-cudnn8.1.tar.gz elif [[ "$VERSION" == "11.0" ]];then wget -q https://paddle-ci.cdn.bcebos.com/TRT/TensorRT-7.1.3.4.Ubuntu-16.04.x86_64-gnu.cuda-11.0.cudnn8.0.tar.gz --no-check-certificate tar -zxf TensorRT-7.1.3.4.Ubuntu-16.04.x86_64-gnu.cuda-11.0.cudnn8.0.tar.gz -C /usr/local diff --git a/tools/dockerfile/centos7_manylinux.sh b/tools/dockerfile/centos7_manylinux.sh index 0c738de62ea..6ea2a8f836f 100755 --- a/tools/dockerfile/centos7_manylinux.sh +++ b/tools/dockerfile/centos7_manylinux.sh @@ -20,36 +20,41 @@ REPO="${REPO:-paddledocker}" function make_cuda9cudnn7(){ sed 's//9.0-cudnn7-devel-centos7/g' Dockerfile.centos >Dockerfile.tmp - sed -i "s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc54 \nRUN mv /usr/bin/gcc /usr/bin/gcc.bak \&\& ln -s /usr/local/gcc-5.4/bin/gcc /usr/bin/gcc \nENV PATH=/usr/local/gcc-5.4/bin:\$PATH \nRUN bash build_scripts/build.sh#g" Dockerfile.tmp + sed -i "s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc54 \nRUN mv /usr/bin/cc /usr/bin/cc.bak \&\& ln -s /usr/local/gcc-5.4/bin/gcc /usr/bin/cc \nENV PATH=/usr/local/gcc-5.4/bin:\$PATH \nRUN bash build_scripts/build.sh#g" Dockerfile.tmp } function make_cuda10cudnn7() { sed 's//10.0-cudnn7-devel-centos7/g' Dockerfile.centos >Dockerfile.tmp - sed -i "s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc54 \nRUN mv /usr/bin/gcc /usr/bin/gcc.bak \&\& ln -s /usr/local/gcc-5.4/bin/gcc /usr/bin/gcc \nENV PATH=/usr/local/gcc-5.4/bin:\$PATH \nRUN bash build_scripts/build.sh#g" Dockerfile.tmp + sed -i "s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc54 \nRUN mv /usr/bin/cc /usr/bin/cc.bak \&\& ln -s /usr/local/gcc-5.4/bin/gcc /usr/bin/cc \nENV PATH=/usr/local/gcc-5.4/bin:\$PATH \nRUN bash build_scripts/build.sh#g" Dockerfile.tmp } function make_cuda101cudnn7() { sed 's//10.1-cudnn7-devel-centos7/g' Dockerfile.centos >Dockerfile.tmp - sed -i "s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc82 \nRUN mv /usr/bin/gcc /usr/bin/gcc.bak \&\& ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/gcc \nENV PATH=/usr/local/gcc-8.2/bin:\$PATH \nRUN bash build_scripts/build.sh#g" Dockerfile.tmp + sed -i "s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc54 \nRUN mv /usr/bin/cc /usr/bin/cc.bak \&\& ln -s /usr/local/gcc-5.4/bin/gcc /usr/bin/cc \nENV PATH=/usr/local/gcc-5.4/bin:\$PATH \nRUN bash build_scripts/build.sh#g" Dockerfile.tmp } function make_cuda102cudnn7() { sed 's//10.2-cudnn7-devel-centos7/g' Dockerfile.centos >Dockerfile.tmp - sed -i "s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc82 \nRUN mv /usr/bin/gcc /usr/bin/gcc.bak \&\& ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/gcc \nENV PATH=/usr/local/gcc-8.2/bin:\$PATH \nRUN bash build_scripts/build.sh#g" Dockerfile.tmp + sed -i "s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc82 \nRUN mv /usr/bin/cc /usr/bin/cc.bak \&\& ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/cc \nENV PATH=/usr/local/gcc-8.2/bin:\$PATH \nRUN bash build_scripts/build.sh#g" Dockerfile.tmp } function make_cuda102cudnn8() { sed 's//10.2-cudnn8-devel-centos7/g' Dockerfile.centos >Dockerfile.tmp - sed -i "s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc82 \nRUN mv /usr/bin/gcc /usr/bin/gcc.bak \&\& ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/gcc \nENV PATH=/usr/local/gcc-8.2/bin:\$PATH \nRUN bash build_scripts/build.sh#g" Dockerfile.tmp + sed -i "s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc82 \nRUN mv /usr/bin/cc /usr/bin/cc.bak \&\& ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/cc \nENV PATH=/usr/local/gcc-8.2/bin:\$PATH \nRUN bash build_scripts/build.sh#g" Dockerfile.tmp } function make_cuda11cudnn8() { sed 's//11.0-cudnn8-devel-centos7/g' Dockerfile.centos >Dockerfile.tmp - sed -i "s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc82 \nRUN mv /usr/bin/gcc /usr/bin/gcc.bak \&\& ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/gcc \nENV PATH=/usr/local/gcc-8.2/bin:\$PATH \nRUN bash build_scripts/build.sh#g" Dockerfile.tmp + sed -i "s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc82 \nRUN mv /usr/bin/cc /usr/bin/cc.bak \&\& ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/cc \nENV PATH=/usr/local/gcc-8.2/bin:\$PATH \nRUN bash build_scripts/build.sh#g" Dockerfile.tmp +} + +function make_cuda112cudnn8() { + sed 's//11.2.1-cudnn8-devel-centos7/g' Dockerfile.centos >Dockerfile.tmp + sed -i "s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc82 \nRUN mv /usr/bin/cc /usr/bin/cc.bak \&\& ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/cc \nENV PATH=/usr/local/gcc-8.2/bin:\$PATH \nRUN bash build_scripts/build.sh#g" Dockerfile.tmp } function main() { @@ -73,6 +78,9 @@ function main() { cuda11cudnn8) make_cuda11cudnn8 ;; + cuda112cudnn8) + make_cuda112cudnn8 + ;; *) echo "Make dockerfile error, Without this paramet." exit 1 diff --git a/tools/dockerfile/ubuntu16_dev.sh b/tools/dockerfile/ubuntu16_dev.sh index 23578b4143f..0de9f82acee 100755 --- a/tools/dockerfile/ubuntu16_dev.sh +++ b/tools/dockerfile/ubuntu16_dev.sh @@ -40,6 +40,8 @@ function ref_whl(){ if [[ ${ref_CUDA_MAJOR} == "11.0" ]];then ref_version=.post110 + elif [[ ${ref_CUDA_MAJOR} == "11.2" ]];then + ref_version=.post112 elif [[ ${ref_CUDA_MAJOR} == "10" ]];then ref_version=.post100 elif [[ ${ref_CUDA_MAJOR} == "10.1" ]];then diff --git a/tools/dockerfile/ubuntu18_dev.sh b/tools/dockerfile/ubuntu18_dev.sh index 6c6a14529ca..c72243ef052 100755 --- a/tools/dockerfile/ubuntu18_dev.sh +++ b/tools/dockerfile/ubuntu18_dev.sh @@ -40,6 +40,8 @@ function ref_whl(){ if [[ ${ref_CUDA_MAJOR} == "11.0" ]];then ref_version=.post110 + elif [[ ${ref_CUDA_MAJOR} == "11.2" ]];then + ref_version=.post112 elif [[ ${ref_CUDA_MAJOR} == "10" ]];then ref_version=.post100 elif [[ ${ref_CUDA_MAJOR} == "10.1" ]];then -- GitLab From db41b74240e98a2f57fbf9a4eb681c5cf544e449 Mon Sep 17 00:00:00 2001 From: lilong12 Date: Tue, 27 Apr 2021 19:50:21 +0800 Subject: [PATCH 035/720] add alltoall api (#32507) * add alltoall api, test=develop --- .../fluid/operators/collective/alltoall_op.cc | 94 ++++++++++++++++++ .../operators/collective/alltoall_op.cu.cc | 95 +++++++++++++++++++ .../fluid/operators/collective/alltoall_op.h | 42 ++++++++ python/paddle/distributed/collective.py | 72 ++++++++++++++ .../fluid/tests/unittests/CMakeLists.txt | 3 + .../unittests/collective_alltoall_api.py | 56 +++++++++++ .../unittests/test_collective_alltoall_api.py | 34 +++++++ .../unittests/test_collective_api_base.py | 13 +++ 8 files changed, 409 insertions(+) create mode 100644 paddle/fluid/operators/collective/alltoall_op.cc create mode 100644 paddle/fluid/operators/collective/alltoall_op.cu.cc create mode 100644 paddle/fluid/operators/collective/alltoall_op.h create mode 100644 python/paddle/fluid/tests/unittests/collective_alltoall_api.py create mode 100644 python/paddle/fluid/tests/unittests/test_collective_alltoall_api.py diff --git a/paddle/fluid/operators/collective/alltoall_op.cc b/paddle/fluid/operators/collective/alltoall_op.cc new file mode 100644 index 00000000000..1c57b9f9967 --- /dev/null +++ b/paddle/fluid/operators/collective/alltoall_op.cc @@ -0,0 +1,94 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/collective/alltoall_op.h" + +namespace paddle { +namespace operators { + +class AllToAllOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "AllToAll"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "AllToAll"); + int ring_id = ctx->Attrs().Get("ring_id"); + PADDLE_ENFORCE_GE( + ring_id, 0, + platform::errors::InvalidArgument( + "The ring_id (%d) for alltoall op must be non-negative.", ring_id)); + framework::DDim dim = ctx->GetInputDim("X"); + if (dim[0] < 0) dim[0] = -1; + ctx->SetOutputDim("Out", dim); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); + } +}; + +class AllToAllOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() { + AddInput("X", "(Tensor) tensor send."); + AddOutput("Out", "(Tensor) the result of alltoall."); + AddAttr("ring_id", "(int default 0) nccl communication ring id.") + .SetDefault(0); + AddAttr( + "use_calc_stream", + "(bool default false) eject CUDA operations to calculation stream.") + .SetDefault(false); + AddComment(R"DOC( +AllToAll Operator +Scatter tensors from all participators to all participators. +)DOC"); + } +}; + +template +class AllToAllOpGradMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr retv) const override { + retv->SetType("alltoall"); + retv->SetInput("X", this->OutputGrad("Out")); + retv->SetOutput("Out", this->InputGrad("X")); + retv->SetAttrMap(this->Attrs()); + } +}; + +DECLARE_INPLACE_OP_INFERER(AllToAllInplaceInferer, {"X", "Out"}); + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OPERATOR(alltoall, ops::AllToAllOp, ops::AllToAllOpMaker, + ops::AllToAllOpGradMaker, + ops::AllToAllOpGradMaker, + ops::AllToAllInplaceInferer) + +REGISTER_OP_CPU_KERNEL(alltoall, ops::AllToAllOpCPUKernel, + ops::AllToAllOpCPUKernel, + ops::AllToAllOpCPUKernel, + ops::AllToAllOpCPUKernel, + ops::AllToAllOpCPUKernel); diff --git a/paddle/fluid/operators/collective/alltoall_op.cu.cc b/paddle/fluid/operators/collective/alltoall_op.cu.cc new file mode 100644 index 00000000000..1bcb47fc686 --- /dev/null +++ b/paddle/fluid/operators/collective/alltoall_op.cu.cc @@ -0,0 +1,95 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/collective/alltoall_op.h" + +#if defined(PADDLE_WITH_NCCL) +#include "paddle/fluid/platform/collective_helper.h" +#include "paddle/fluid/platform/nccl_helper.h" +#endif + +namespace paddle { +namespace operators { + +template +class AllToAllOpCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { +#if defined(PADDLE_WITH_NCCL) +#if NCCL_VERSION_CODE >= 2703 + auto x = ctx.Input("X"); + auto out = ctx.Output("Out"); + int send_numel = x->numel(); + ncclDataType_t dtype = platform::ToNCCLDataType(x->type()); + + int ring_id = ctx.Attr("ring_id"); + PADDLE_ENFORCE_GE( + ring_id, 0, + platform::errors::InvalidArgument( + "The ring_id (%d) for alltoall op must be non-negative.", ring_id)); + auto place = ctx.GetPlace(); + auto comm = platform::NCCLCommContext::Instance().Get(ring_id, place); + int nranks = comm->nranks(); + + cudaStream_t stream = nullptr; + if (ctx.Attr("use_calc_stream")) { + auto dev_ctx = platform::DeviceContextPool::Instance().Get(place); + stream = static_cast(dev_ctx)->stream(); + } else { + stream = comm->stream(); + } + + framework::DDim x_dims = x->dims(); + framework::DDim out_dims(x_dims); + PADDLE_ENFORCE_EQ( + x_dims[0] % nranks, 0, + platform::errors::InvalidArgument( + "The first dimension size (%d) of the input tensor must be " + "divisible by the number of ranks (%d).", + x_dims[0], nranks)); + auto send_buf = x->data(); + auto recv_buf = out->mutable_data(out_dims, place); + size_t offset = 0; + send_numel /= nranks; + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclGroupStart()); + for (auto i = 0; i < nranks; ++i) { + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclSend( + send_buf + offset, send_numel, dtype, i, comm->comm(), stream)); + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclRecv( + recv_buf + offset, send_numel, dtype, i, comm->comm(), stream)); + offset += send_numel; + } + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclGroupEnd()); +#else + PADDLE_THROW( + platform::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); +#endif +#else + PADDLE_THROW( + platform::errors::Unavailable("PaddlePaddle should compile with GPU.")); +#endif + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_CUDA_KERNEL(alltoall, ops::AllToAllOpCUDAKernel, + ops::AllToAllOpCUDAKernel, + ops::AllToAllOpCUDAKernel, + ops::AllToAllOpCUDAKernel, + ops::AllToAllOpCUDAKernel); diff --git a/paddle/fluid/operators/collective/alltoall_op.h b/paddle/fluid/operators/collective/alltoall_op.h new file mode 100644 index 00000000000..61eec440937 --- /dev/null +++ b/paddle/fluid/operators/collective/alltoall_op.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include + +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_registry.h" + +#if defined(PADDLE_WITH_GLOO) +#include "paddle/fluid/framework/fleet/gloo_wrapper.h" +#endif + +namespace paddle { +namespace operators { + +template +class AllToAllOpCPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + PADDLE_THROW(platform::errors::Unavailable( + "Do not support alltoall for cpu kernel now.")); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index 69a8f8956a8..7aa765ba93f 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -36,6 +36,7 @@ __all__ = [ 'scatter', 'barrier', 'split', + 'alltoall', 'ReduceOp', 'send', 'recv', @@ -1178,6 +1179,77 @@ def split(x, return linear_out +def alltoall(in_tensor_list, out_tensor_list, group=None, use_calc_stream=True): + """ + Scatter tensors in in_tensor_list to all participators and gather the result tensors in out_tensor_list. + Args: + in_tensor_list (list): A list of input Tensors. Every element in the list must be a Tensor whose data type + should be float16, float32, float64, int32 or int64. + out_tensor_list (Tensor): A list of output Tensors. The data type of its elements should be the same as the + data type of the input Tensors. + group (Group, optional): The group instance return by new_group or None for global default group. Default: None. + use_calc_stream (bool, optional): Wether to use calculation stream (True) or communication stream. Default: True. + Returns: + None. + Examples: + .. code-block:: python + # required: distributed + import numpy as np + import paddle + from paddle.distributed import init_parallel_env + init_parallel_env() + out_tensor_list = [] + if paddle.distributed.ParallelEnv().rank == 0: + np_data1 = np.array([[1, 2, 3], [4, 5, 6]]) + np_data2 = np.array([[7, 8, 9], [10, 11, 12]]) + else: + np_data1 = np.array([[13, 14, 15], [16, 17, 18]]) + np_data2 = np.array([[19, 20, 21], [22, 23, 24]]) + data1 = paddle.to_tensor(np_data1) + data2 = paddle.to_tensor(np_data2) + paddle.distributed.all_to_all([data1, data2], out_tensor_list) + # out for rank 0: [[[1, 2, 3], [4, 5, 6]], [[13, 14, 15], [16, 17, 18]]] + # out for rank 1: [[[7, 8, 9], [10, 11, 12]], [[19, 20, 21], [22, 23, 24]]] + """ + if group is not None and not group.is_member(): + return + + ring_id = 0 if group is None else group.id + op_type = 'alltoall' + temp = paddle.concat(in_tensor_list, axis=0) + helper = LayerHelper(op_type, **locals()) + nranks = len(in_tensor_list) + out = helper.create_variable_for_type_inference( + dtype=in_tensor_list[0].dtype) + if in_dygraph_mode(): + core.ops.alltoall_(temp, 'use_calc_stream', use_calc_stream, 'ring_id', + ring_id) + else: + if not isinstance(in_tensor_list, list): + raise ValueError("The type of 'in_tensor_list' for all_to_all " + "should be list.") + for elem in in_tensor_list: + check_variable_and_dtype( + elem, 'in_tensor_list', + ['float16', 'float32', 'float64', 'int32', 'int64'], + 'all_to_all') + if not isinstance(out_tensor_list, list): + raise ValueError("The type of 'out_tensor_list' for all_to_all " + "should be list.") + if len(out_tensor_list) != 0: + raise ValueError("The 'out_tensor_list' for all_to_all " + "must be an empty list.") + helper.append_op( + type=op_type, + inputs={'X': [temp]}, + outputs={'Out': [out]}, + attrs={ + 'ring_id': group, + 'use_calc_stream': use_calc_stream, + }) + out_tensor_list.extend(paddle.split(out, nranks, 0)) + + def send(tensor, dst=0, group=None, use_calc_stream=True): """ Send a tensor to the receiver. diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index c1a29c050b1..8e998459cd4 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -96,6 +96,7 @@ if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32) LIST(REMOVE_ITEM TEST_OPS test_new_group_api) LIST(REMOVE_ITEM TEST_OPS test_collective_broadcast_api) LIST(REMOVE_ITEM TEST_OPS test_collective_allgather_api) + LIST(REMOVE_ITEM TEST_OPS test_collective_alltoall_api) LIST(REMOVE_ITEM TEST_OPS test_collective_sendrecv_api) LIST(REMOVE_ITEM TEST_OPS test_collective_wait) LIST(REMOVE_ITEM TEST_OPS test_memcpy_op) @@ -872,6 +873,7 @@ if(WITH_DISTRIBUTE AND WITH_GPU AND WITH_NCCL) endif() if((WITH_ROCM OR WITH_GPU) AND NOT WIN32) set_tests_properties(test_collective_allgather_api PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_alltoall_api PROPERTIES TIMEOUT 120) set_tests_properties(test_collective_sendrecv_api PROPERTIES TIMEOUT 120) set_tests_properties(test_collective_broadcast_api PROPERTIES TIMEOUT 120) set_tests_properties(test_collective_allreduce_api PROPERTIES TIMEOUT 120) @@ -907,6 +909,7 @@ if((WITH_ROCM OR WITH_GPU) AND NOT WIN32) test_new_group_api test_collective_broadcast_api test_collective_allgather_api + test_collective_alltoall_api PROPERTIES LABELS "RUN_TYPE=DIST") endif() if(WITH_GPU OR WITH_ROCM) diff --git a/python/paddle/fluid/tests/unittests/collective_alltoall_api.py b/python/paddle/fluid/tests/unittests/collective_alltoall_api.py new file mode 100644 index 00000000000..be18b68a1da --- /dev/null +++ b/python/paddle/fluid/tests/unittests/collective_alltoall_api.py @@ -0,0 +1,56 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import argparse +import os +import sys +import signal +import time +import socket +from contextlib import closing +from six import string_types +import math +import paddle +import paddle.fluid as fluid +import paddle.fluid.profiler as profiler +import paddle.fluid.unique_name as nameGen +from paddle.fluid import core +import unittest +from multiprocessing import Process +import paddle.fluid.layers as layers +from functools import reduce +from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main + +paddle.enable_static() + + +class TestCollectiveAllToAllAPI(TestCollectiveAPIRunnerBase): + def __init__(self): + self.global_ring_id = 0 + + def get_model(self, main_prog, startup_program, rank): + with fluid.program_guard(main_prog, startup_program): + tindata = layers.data( + name="tindata", shape=[10, 1000], dtype='float32') + tindata = paddle.split(tindata, 2, axis=0) + tout_data = [] + paddle.distributed.alltoall(tindata, tout_data) + return tout_data + + +if __name__ == "__main__": + runtime_main(TestCollectiveAllToAllAPI, "alltoall") diff --git a/python/paddle/fluid/tests/unittests/test_collective_alltoall_api.py b/python/paddle/fluid/tests/unittests/test_collective_alltoall_api.py new file mode 100644 index 00000000000..fab975a9d62 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_collective_alltoall_api.py @@ -0,0 +1,34 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +import numpy as np +import paddle + +from test_collective_api_base import TestDistBase + +paddle.enable_static() + + +class TestCollectiveAllToAllAPI(TestDistBase): + def _setup_config(self): + pass + + def test_alltoall_nccl(self): + self.check_with_place("collective_alltoall_api.py", "alltoall", "nccl") + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_collective_api_base.py b/python/paddle/fluid/tests/unittests/test_collective_api_base.py index 832ffafa85e..e6693b676cf 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_api_base.py +++ b/python/paddle/fluid/tests/unittests/test_collective_api_base.py @@ -277,6 +277,19 @@ class TestDistBase(unittest.TestCase): self.assertTrue( np.allclose( result_data, need_result, rtol=1e-05, atol=1e-05)) + elif col_type == "alltoall": + need_result1 = np.vstack((input1[0:input1.shape[0] // 2, :], + input2[0:input2.shape[0] // 2, :])) + need_result2 = np.vstack((input1[input1.shape[0] // 2:, :], + input2[input2.shape[0] // 2:, :])) + tr0_out = np.vstack(tr0_out) + tr1_out = np.vstack(tr1_out) + self.assertTrue( + np.allclose( + tr0_out, need_result1, rtol=1e-05, atol=1e-05)) + self.assertTrue( + np.allclose( + tr1_out, need_result2, rtol=1e-05, atol=1e-05)) elif col_type == "sendrecv": result_data = tr1_out[0] self.assertTrue( -- GitLab From 0dc02dc73763aab3b5d54161000b7f0d16bca221 Mon Sep 17 00:00:00 2001 From: jiangcheng Date: Wed, 28 Apr 2021 10:29:04 +0800 Subject: [PATCH 036/720] Optimize update_loss_scaling_op (#32554) * optimize update_loss_scaling_op by fused for loop to one kernel, test=develop * remove useless while loop and optimize variable name, test=develop * optimize variable name from out_addrs_tensor to out_addrs_mem, test=develop * optimize variable name for readable by change prefix identifier from t_ to local_ --- .../amp/check_finite_and_unscale_op.cu | 63 +++++++------ .../operators/amp/update_loss_scaling_op.cu | 93 ++++++++++++++++--- 2 files changed, 113 insertions(+), 43 deletions(-) diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op.cu b/paddle/fluid/operators/amp/check_finite_and_unscale_op.cu index 2c3a9c366e4..c699486a914 100644 --- a/paddle/fluid/operators/amp/check_finite_and_unscale_op.cu +++ b/paddle/fluid/operators/amp/check_finite_and_unscale_op.cu @@ -39,33 +39,36 @@ __global__ void CheckFiniteAndUnscale(const T** xs, const MT* scale, __syncthreads(); const int64_t num = s_starts[size]; - int pre_xs_index = 0; - bool t_found_inf = false; - const MT t_scale = *scale; + int xs_index = 0; + bool local_found_inf = false; + const MT local_scale = *scale; for (int64_t idx = tid; idx < num; idx += gridDim.x * blockDim.x) { - // get the xs's index of thread - int xs_index = pre_xs_index; - while (idx < s_starts[xs_index]) xs_index++; - // avoid some tensor's numel is zero - while (idx >= s_starts[xs_index]) xs_index++; - pre_xs_index = xs_index - 1; + // get the "out" index of "id" + // For example: + // idx = 15, starts = [0, 10, 10, 20, 30] + // because 10 <= idx < 20 ==> + // the idx element locate in the 3rd tensor (notice the 2nd tensor size is + // 0) + int next_xs_index = xs_index; + while (idx >= s_starts[next_xs_index]) next_xs_index++; + xs_index = next_xs_index - 1; // get in data and out data - const T* in = xs[pre_xs_index]; - T* out = outs[pre_xs_index]; - int64_t in_idx = idx - s_starts[pre_xs_index]; + const T* in = xs[xs_index]; + T* out = outs[xs_index]; + int64_t in_idx = idx - s_starts[xs_index]; // Unscale - MT val = static_cast(in[in_idx]) * t_scale; + MT val = static_cast(in[in_idx]) * local_scale; T narrow_val = static_cast(val); out[in_idx] = narrow_val; // CheckFinite if (!isfinite(narrow_val)) { - t_found_inf = true; + local_found_inf = true; } } - if (t_found_inf) { + if (local_found_inf) { *found_inf = true; } } @@ -94,28 +97,30 @@ class CheckFiniteAndUnscaleGpuKernel : public framework::OpKernel { scale_data, inverse_scale_v, found_inf_data); size_t xs_size = xs.size(); + const auto& cpu_place = platform::CPUPlace(); // calculate each tensor's start index and copy to device auto h_starts_tensor = - memory::Alloc(platform::CPUPlace(), (xs_size + 1) * sizeof(int64_t)); + memory::Alloc(cpu_place, (xs_size + 1) * sizeof(int64_t)); int64_t* h_starts = reinterpret_cast(h_starts_tensor->ptr()); auto d_starts_tensor = memory::Alloc(dev_ctx, (xs_size + 1) * sizeof(int64_t)); int64_t* d_starts = reinterpret_cast(d_starts_tensor->ptr()); + // the start index value of each tensor is + // the sum of previous tensor's size. For example: + // xs = [10, 0, 10, 10] ==> starts = [0, 10, 10, 20, 30] h_starts[0] = 0; for (int i = 1; i <= xs_size; i++) { - // the start index value of each tensor is - // the sum of previous tensor's size h_starts[i] = h_starts[i - 1] + xs[i - 1]->numel(); } int64_t total_num = h_starts[xs_size]; memory::Copy(BOOST_GET_CONST(platform::CUDAPlace, dev_ctx.GetPlace()), - d_starts, platform::CPUPlace(), h_starts, - (xs_size + 1) * sizeof(int64_t), dev_ctx.stream()); + d_starts, cpu_place, h_starts, (xs_size + 1) * sizeof(int64_t), + dev_ctx.stream()); // copy each tensor's data address to device - auto h_mem = memory::Alloc(platform::CPUPlace(), 2 * xs_size * sizeof(T*)); + auto h_mem = memory::Alloc(cpu_place, 2 * xs_size * sizeof(T*)); const T** h_xs = reinterpret_cast(h_mem->ptr()); T** h_outs = reinterpret_cast(h_mem->ptr()) + xs_size; @@ -128,16 +133,18 @@ class CheckFiniteAndUnscaleGpuKernel : public framework::OpKernel { h_outs[i] = outs[i]->mutable_data(dev_ctx.GetPlace()); } memory::Copy(BOOST_GET_CONST(platform::CUDAPlace, dev_ctx.GetPlace()), d_xs, - platform::CPUPlace(), h_xs, 2 * xs_size * sizeof(T*), - dev_ctx.stream()); + cpu_place, h_xs, 2 * xs_size * sizeof(T*), dev_ctx.stream()); // Launch Kernel - int block = 1024; - int block_num = block * 20; // each thread deal with 20 number - int grid = (total_num + block_num - 1) / block_num; + int threads_per_block = std::min(static_cast(1024), total_num); + int elements_per_block = + threads_per_block * 20; // each thread deal with 20 number + int blocks_per_grid = + (total_num + elements_per_block - 1) / elements_per_block; VLOG(3) << "launch kernel"; - CheckFiniteAndUnscale<<< - grid, block, (xs_size + 1) * sizeof(int64_t), dev_ctx.stream()>>>( + CheckFiniteAndUnscale< + T, MPDType><<>>( d_xs, inverse_scale_v, xs_size, d_starts, found_inf_data, d_outs); VLOG(3) << "finish kernel"; } diff --git a/paddle/fluid/operators/amp/update_loss_scaling_op.cu b/paddle/fluid/operators/amp/update_loss_scaling_op.cu index b48b0e78892..de1f83c1ee5 100644 --- a/paddle/fluid/operators/amp/update_loss_scaling_op.cu +++ b/paddle/fluid/operators/amp/update_loss_scaling_op.cu @@ -34,13 +34,39 @@ __global__ void GpuUpdateLossScaling( } template -__global__ void FillIf(T* data, const int64_t num, const T value, - const bool* has_inf) { - if (*has_inf) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - for (int i = tid; i < num; i += blockDim.x * gridDim.x) { - data[i] = value; - } +__global__ void FusedFillIf(T** outs, const size_t xs_size, + const int64_t* starts, const T value, + const bool* has_inf) { + if (!(*has_inf)) return; + + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + + // copy starts array from global memory to shared memory + extern __shared__ int64_t s_starts[]; + for (int i = threadIdx.x; i <= xs_size; i += blockDim.x) { + s_starts[i] = starts[i]; + } + __syncthreads(); + + const int64_t total_num = s_starts[xs_size]; + int out_index = 0; + + for (int64_t id = tid; id < total_num; id += blockDim.x * gridDim.x) { + // get the "out" index of "id" + // For example: + // id = 15, starts = [0, 10, 10, 20, 30] + // because 10 <= id < 20 ==> + // the id element locate in the 3rd tensor (notice the 2nd tensor size is 0) + int next_out_index = out_index; + while (id >= s_starts[next_out_index]) next_out_index++; + out_index = next_out_index - 1; + + // get data pointer and index + T* out_data = outs[out_index]; + int64_t idx = id - s_starts[out_index]; + + // set value + out_data[idx] = value; } } @@ -68,15 +94,52 @@ class LazyZeros { const bool* found_inf_data, const std::vector& xs, const std::vector& outs) const { - for (size_t i = 0; i < xs.size(); ++i) { - auto* out = outs[i]; - T* out_data = out->mutable_data(dev_ctx.GetPlace()); - int64_t num = out->numel(); - int block = 1024; - int grid = (block - 1 + num) / block; - FillIf<<>>( - out_data, num, static_cast(0), found_inf_data); + size_t xs_size = xs.size(); + const auto& cpu_place = platform::CPUPlace(); + // alloc each tensor's start index and copy to device + auto h_in_starts_mem = + memory::Alloc(cpu_place, (xs_size + 1) * sizeof(int64_t)); + int64_t* h_starts = reinterpret_cast(h_in_starts_mem->ptr()); + + auto d_in_starts_mem = + memory::Alloc(dev_ctx, (xs_size + 1) * sizeof(int64_t)); + int64_t* d_starts = reinterpret_cast(d_in_starts_mem->ptr()); + + // the start index value of each tensor is + // the sum of previous tensor's size. For example: + // outs = [10, 0, 10, 10] ==> starts = [0, 10, 10, 20, 30] + h_starts[0] = 0; + for (int i = 0; i < xs_size; i++) { + h_starts[i + 1] = h_starts[i] + outs[i]->numel(); } + memory::Copy(BOOST_GET_CONST(platform::CUDAPlace, dev_ctx.GetPlace()), + d_starts, cpu_place, h_starts, (xs_size + 1) * sizeof(int64_t), + dev_ctx.stream()); + + // copy each tensor of "outs" data address array to device + auto h_out_addrs_mem = memory::Alloc(cpu_place, xs_size * sizeof(T*)); + T** h_out_addrs = reinterpret_cast(h_out_addrs_mem->ptr()); + + auto d_out_addrs_mem = memory::Alloc(dev_ctx, xs_size * sizeof(T*)); + T** d_out_addrs = reinterpret_cast(d_out_addrs_mem->ptr()); + + for (size_t i = 0; i < xs_size; ++i) { + h_out_addrs[i] = outs[i]->mutable_data(dev_ctx.GetPlace()); + } + memory::Copy(BOOST_GET_CONST(platform::CUDAPlace, dev_ctx.GetPlace()), + d_out_addrs, cpu_place, h_out_addrs, xs_size * sizeof(T*), + dev_ctx.stream()); + + // launch cuda kernel + int64_t total_num = h_starts[xs_size]; + int64_t threads_per_block = std::min(static_cast(1024), total_num); + int64_t elements_per_block = + threads_per_block * 50; // each thread deal with 50 data + int64_t blocks_per_grid = + (total_num + elements_per_block - 1) / elements_per_block; + FusedFillIf<<>>( + d_out_addrs, xs_size, d_starts, static_cast(0), found_inf_data); } }; -- GitLab From ba6107614e4fdf03e8193d6d43786908b23065d5 Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Wed, 28 Apr 2021 05:28:09 +0200 Subject: [PATCH 037/720] [oneDNN] Added clearing oneDNN cache per executor (#32499) * - Added clearing oneDNN per executor * - Executor is nt always having FLAGS_use_mkldnn set to true --- paddle/fluid/framework/executor.cc | 9 ++++-- paddle/fluid/framework/naive_executor.cc | 2 +- .../fluid/inference/api/mkldnn_quantizer.cc | 3 +- .../operators/mkldnn/test_mkldnn_caching.cc | 2 +- paddle/fluid/platform/device_context.cc | 30 ++++++++++++++++--- paddle/fluid/platform/device_context.h | 14 ++++++++- paddle/fluid/platform/mkldnn_helper.h | 8 +++-- 7 files changed, 56 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index e5bfbf4a8f7..de007c128d7 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -72,7 +72,7 @@ Executor::~Executor() { #ifdef PADDLE_WITH_MKLDNN // Clear mkl-dnn cache, // this is needed to have mkl-dnn unit tests working - ClearMKLDNNCache(place_); + ClearMKLDNNCache(place_, this); #endif } @@ -169,6 +169,9 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, bool force_disable_gc, bool keep_kid_scopes) { platform::RecordBlock b(block_id); if (FLAGS_use_mkldnn) EnableMKLDNN(pdesc); +#ifdef PADDLE_WITH_MKLDNN + platform::AttachPointerHashToMKLDNNKey(this, place_); +#endif auto ctx = Prepare(pdesc, block_id, skip_ref_cnt_vars, force_disable_gc); RunPreparedContext(ctx.get(), scope, create_local_scope, create_vars, keep_kid_scopes); @@ -294,6 +297,9 @@ void Executor::Run(const ProgramDesc& program, Scope* scope, const std::string& fetch_holder_name) { platform::RecordBlock b(kProgramId); if (FLAGS_use_mkldnn) EnableMKLDNN(program); +#ifdef PADDLE_WITH_MKLDNN + platform::AttachPointerHashToMKLDNNKey(this, place_); +#endif bool has_feed_ops = has_feed_operators(program.Block(0), *feed_targets, feed_holder_name); bool has_fetch_ops = @@ -576,7 +582,6 @@ void Executor::EnableMKLDNN(const ProgramDesc& program) { } } } - platform::AttachPointerHashToMKLDNNKey(this, place_); #else LOG(WARNING) << "'MKLDNN' is not supported, Please re-compile with WITH_MKLDNN option"; diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index f107321958b..7d55d8c41e3 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -128,7 +128,7 @@ NaiveExecutor::~NaiveExecutor() { #ifdef PADDLE_WITH_MKLDNN // Clear mkl-dnn cache, // this is needed to have mkl-dnn unit tests working - ClearMKLDNNCache(place_); + ClearMKLDNNCache(place_, this); #endif } diff --git a/paddle/fluid/inference/api/mkldnn_quantizer.cc b/paddle/fluid/inference/api/mkldnn_quantizer.cc index 793fc53d90b..f6cdbb00b50 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer.cc +++ b/paddle/fluid/inference/api/mkldnn_quantizer.cc @@ -411,7 +411,8 @@ void AnalysisPredictor::MkldnnQuantizer::ClearDeviceContext() const { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::MKLDNNDeviceContext* dev_ctx = (platform::MKLDNNDeviceContext*)pool.Get(predictor_.place_); - dev_ctx->ResetBlobMap(); + dev_ctx->ResetBlobMap( + paddle::platform::MKLDNNDeviceContext::tls().get_curr_exec()); } void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const { diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc index aafff5248a0..d6cd76b697f 100644 --- a/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc +++ b/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc @@ -50,7 +50,7 @@ class CacheTester { platform::CPUPlace place; onednn_dev_ctx_ = dynamic_cast(pool.Get(place)); - onednn_dev_ctx_->ResetBlobMap(); + onednn_dev_ctx_->ResetBlobMap(nullptr); } bool Analyze(unsigned short int num_entries) { diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 50bb64d5574..9a47ac45462 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -537,6 +537,7 @@ Place CUDAPinnedDeviceContext::GetPlace() const { return place_; } MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place) : CPUDeviceContext(place), p_blobmap_() { p_blobmap_.reset(new BlobMap()); + p_exec_items_.reset(new ExecMap()); p_mutex_.reset(new std::mutex()); } @@ -560,7 +561,7 @@ MKLDNNDeviceContextThreadLocals::Body::~Body() { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::MKLDNNDeviceContext* dev_ctx = (platform::MKLDNNDeviceContext*)pool.Get(cpu_place); - dev_ctx->ResetBlobMap(); + dev_ctx->ResetBlobMap(exec_ptr_); } void MKLDNNDeviceContextThreadLocals::Body::set_cur_mkldnn_session_id( @@ -607,17 +608,34 @@ mkldnn::stream& MKLDNNDeviceContextThreadLocals::Body::get_stream(void) { return cur_stream; } -void MKLDNNDeviceContext::ResetBlobMap() { +void MKLDNNDeviceContext::ResetBlobMap(void* ptr) { std::lock_guard lock(*p_mutex_); if (!block_next_cache_clearing_) { VLOG(3) << "Clearing DNNL cache."; - p_blobmap_->clear(); + // If no specific executor pointer then clear + // everything. For executor pointer then clear only + // objects allocated when using given executor + if (ptr == nullptr) { + p_blobmap_->clear(); + } else { + for (auto& v : (*p_exec_items_)[ptr]) { + (v.first)->erase(v.second); + } + p_exec_items_->erase(ptr); + } } else { VLOG(3) << "Prevented Clearing DNNL cache."; block_next_cache_clearing_ = false; } } +void MKLDNNDeviceContext::LinkEntryWithExecutor(BlobPtr_t pblob, + KeyBlob::iterator it) const { + // Take current executor addess from TLS + // and for this executor's items add the one defined with arguments + (*p_exec_items_)[tls().get_curr_exec()].push_back(std::make_pair(pblob, it)); +} + void MKLDNNDeviceContext::BlockNextCacheClearing() { std::lock_guard lock(*p_mutex_); VLOG(3) << "Next DNNL cache clearing has been blocked."; @@ -682,7 +700,11 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, // Find Blob via name auto blob_it = pBlob->find(name); if (blob_it == pBlob->end()) { - (*pBlob)[name] = data; + auto el = + pBlob->insert(std::make_pair(name, data)); // (*pBlob)[name] = data; + // Register new element in per executor map + // to have easily erased when executor terminated + LinkEntryWithExecutor(pBlob, el.first); } else { blob_it->second = data; // set data to existing blob } diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index f79cb1ab947..d91e14ec3aa 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -673,6 +673,7 @@ class MKLDNNDeviceContextThreadLocals { mkldnn::stream cur_stream; std::string key_suffix; // Key identifying current Executor bool key_attach_thread_id = true; + void* exec_ptr_ = nullptr; Body(); ~Body(); @@ -689,6 +690,8 @@ class MKLDNNDeviceContextThreadLocals { const std::string& get_key_suffix(void) const { return key_suffix; } void disable_tid_in_key(void) { key_attach_thread_id = false; } bool is_tid_used_in_key(void) const { return key_attach_thread_id; } + void set_curr_exec(void* exec_ptr) { exec_ptr_ = exec_ptr; } + void* get_curr_exec(void) const { return exec_ptr_; } }; MKLDNNDeviceContextThreadLocals() = default; MKLDNNDeviceContextThreadLocals(const MKLDNNDeviceContextThreadLocals& c) = @@ -724,13 +727,19 @@ class MKLDNNDeviceContext : public CPUDeviceContext { using ShapeBlob = umap_key_string_t; using BlobMap = umap_value_smart_t; + using ExecMap = std::unordered_map< + void*, std::vector, KeyBlob::iterator>>>; + explicit MKLDNNDeviceContext(CPUPlace place); /* \brief Get the active engine */ const mkldnn::engine& GetEngine() const { return tls().get_engine(); } + // Register object to currently used executor's map + void LinkEntryWithExecutor(BlobPtr_t, KeyBlob::iterator) const; + // Remove all entries from the blob map - void ResetBlobMap(); + void ResetBlobMap(void* ptr); // Prevent next ResetBlobMap() void BlockNextCacheClearing(); @@ -753,6 +762,9 @@ class MKLDNNDeviceContext : public CPUDeviceContext { private: std::shared_ptr p_blobmap_; + // Map key is pointer of executor and value is a data(iterator in map) needed + // to erase + std::shared_ptr p_exec_items_; std::shared_ptr p_mutex_; bool block_next_cache_clearing_ = false; }; diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index 35776b9f1e6..0b683a742c9 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -135,13 +135,14 @@ inline mkldnn::memory::desc MKLDNNMemDesc(const std::vector& dims, return mkldnn::memory::desc({dims}, data_type, format); } -inline void ClearMKLDNNCache(const platform::Place& place) { +inline void ClearMKLDNNCache(const platform::Place& place, + void* ptr = nullptr) { // Clear mkl-dnn cache, if (platform::is_cpu_place(place)) { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::MKLDNNDeviceContext* dev_ctx = (platform::MKLDNNDeviceContext*)pool.Get(place); - dev_ctx->ResetBlobMap(); + dev_ctx->ResetBlobMap(ptr); platform::MKLDNNDeviceContext::tls().set_cur_paddle_data_layout( paddle::framework::DataLayout::kNCHW); } @@ -452,6 +453,9 @@ inline void AttachPointerHashToMKLDNNKey(void* ptr, paddle::platform::MKLDNNDeviceContext::tls().set_key_suffix( "E" + std::to_string(reinterpret_cast(ptr))); } + // Let's register adress of current executor + paddle::platform::MKLDNNDeviceContext::tls().set_curr_exec(ptr); + // For first thread if (first_thread == ThreadIDasStr()) { paddle::platform::MKLDNNDeviceContext::tls().disable_tid_in_key(); -- GitLab From 6d3eb3d0ed2e3004a24096ef9bd13be08db6c229 Mon Sep 17 00:00:00 2001 From: wawltor Date: Wed, 28 Apr 2021 11:56:32 +0800 Subject: [PATCH 038/720] Reduce the time cost for the elementwise_add test case (#32628) Reduce the time cost for the elementwise_add test case (#32628) --- .../fluid/tests/unittests/test_elementwise_add_op.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py index cc362005f33..9235542fede 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py @@ -204,7 +204,7 @@ class TestFP16ElementwiseAddOp_broadcast_2(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp): def init_input_output(self): - self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype) + self.x = np.random.rand(2, 10, 12, 1).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) self.out = self.x + self.y.reshape(1, 10, 12, 1) @@ -224,7 +224,7 @@ class TestFP16ElementwiseAddOp_broadcast_3(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp): def init_input_output(self): - self.x = np.random.rand(100, 2, 3, 4).astype(self.dtype) + self.x = np.random.rand(100, 2, 1, 2).astype(self.dtype) self.y = np.random.rand(100, 1).astype(self.dtype) self.out = self.x + self.y.reshape(100, 1, 1, 1) @@ -234,7 +234,7 @@ class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_4(TestFP16ElementwiseAddOp): def init_input_output(self): - self.x = np.random.rand(100, 2, 3, 4).astype(self.dtype) + self.x = np.random.rand(100, 2, 1, 2).astype(self.dtype) self.y = np.random.rand(100, 1).astype(self.dtype) self.out = self.x + self.y.reshape(100, 1, 1, 1) @@ -353,7 +353,7 @@ class TestElementwiseAddOp_commonuse_add1(TestElementwiseAddOp): class TestElementwiseFP16AddOp_commonuse_add1(TestFP16ElementwiseAddOp): def init_input_output(self): - self.x = np.random.rand(20, 30, 100).astype(self.dtype) + self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(1, 1, 100).astype(self.dtype) self.out = self.x + self.y @@ -374,7 +374,7 @@ class TestElementwiseAddOp_commonuse_add2(TestElementwiseAddOp): class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp): def init_input_output(self): self.x = np.random.rand(10, 12).astype(self.dtype) - self.y = np.random.rand(2, 3, 10, 12).astype(self.dtype) + self.y = np.random.rand(2, 2, 10, 12).astype(self.dtype) self.out = self.x + self.y def init_axis(self): @@ -384,7 +384,7 @@ class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp): class TestElementwiseAddOp_same_shape_ysize_large(TestElementwiseAddOp): def init_input_output(self): self.x = np.random.rand(10, 1, 12).astype(self.dtype) - self.y = np.random.rand(10, 3, 12).astype(self.dtype) + self.y = np.random.rand(10, 2, 12).astype(self.dtype) self.out = self.x + self.y def init_axis(self): -- GitLab From 7a245b7a6aa665ec08db816aba50eb51d0e4219b Mon Sep 17 00:00:00 2001 From: zhulei <563755780@qq.com> Date: Wed, 28 Apr 2021 14:31:10 +0800 Subject: [PATCH 039/720] [Rocm] fix test_var_base (#32639) --- paddle/fluid/imperative/tracer.cc | 4 ++-- python/paddle/fluid/tests/unittests/test_var_base.py | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc index 742514c0910..41ad70e5a57 100644 --- a/paddle/fluid/imperative/tracer.cc +++ b/paddle/fluid/imperative/tracer.cc @@ -84,7 +84,7 @@ paddle::framework::GarbageCollector* Tracer::MutableGarbageCollectorIfNotExists( if (gcs_.count(place) == 0) { std::unique_ptr gc; if (platform::is_gpu_place(place)) { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) gc.reset(new framework::DefaultStreamGarbageCollector( BOOST_GET_CONST(platform::CUDAPlace, place), 0)); @@ -95,7 +95,7 @@ paddle::framework::GarbageCollector* Tracer::MutableGarbageCollectorIfNotExists( "Please recompile or reinstall Paddle with GPU support.")); #endif } else if (platform::is_cuda_pinned_place(place)) { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) gc.reset(new framework::CUDAPinnedGarbageCollector( BOOST_GET_CONST(platform::CUDAPinnedPlace, place), 0)); diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index a65308c84e7..8bf42390d1e 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -256,19 +256,21 @@ class TestVarBase(unittest.TestCase): detach_x = x.detach() self.assertTrue(detach_x.stop_gradient, True) + cmp_float = np.allclose if core.is_compiled_with_rocm( + ) else np.array_equal detach_x[:] = 10.0 - self.assertTrue(np.array_equal(x.numpy(), [10.0])) + self.assertTrue(cmp_float(x.numpy(), [10.0])) y = x**2 y.backward() - self.assertTrue(np.array_equal(x.grad.numpy(), [20.0])) + self.assertTrue(cmp_float(x.grad.numpy(), [20.0])) self.assertEqual(detach_x.grad, None) detach_x.stop_gradient = False # Set stop_gradient to be False, supported auto-grad z = 3 * detach_x**2 z.backward() - self.assertTrue(np.array_equal(x.grad.numpy(), [20.0])) - self.assertTrue(np.array_equal(detach_x.grad.numpy(), [60.0])) + self.assertTrue(cmp_float(x.grad.numpy(), [20.0])) + self.assertTrue(cmp_float(detach_x.grad.numpy(), [60.0])) # Due to sharing of data with origin Tensor, There are some unsafe operations: with self.assertRaises(RuntimeError): -- GitLab From 9ee709fc8dff70c2580c26886a5f69793f866a24 Mon Sep 17 00:00:00 2001 From: Kqnonrime <36952116+Kqnonrime@users.noreply.github.com> Date: Wed, 28 Apr 2021 14:50:25 +0800 Subject: [PATCH 040/720] Fix some error message (#32614) * fix two error message * fix two error message * fix error * fix error * fix error * fix error * fix some error message * fix some error * fix error * fix some error * fix some error * fix some error * fix one error * fix some error * fix seven error message * fix error * fix error * fix error * fix error * fix some error message * fix error * fix some error * fix some error --- paddle/fluid/operators/interpolate_op.cc | 7 +- paddle/fluid/operators/interpolate_v2_op.cc | 50 ++++-- paddle/fluid/operators/interpolate_v2_op.cu | 176 ++++++++++++++++---- 3 files changed, 184 insertions(+), 49 deletions(-) diff --git a/paddle/fluid/operators/interpolate_op.cc b/paddle/fluid/operators/interpolate_op.cc index 6c488c387f8..445d129d07c 100644 --- a/paddle/fluid/operators/interpolate_op.cc +++ b/paddle/fluid/operators/interpolate_op.cc @@ -88,8 +88,11 @@ static void Interpolate1DInferShapeCheck(framework::InferShapeContext* ctx) { platform::errors::InvalidArgument( "OutSize's dimension size must be 1, but got dimention = %d .", out_size_dim.size())); - PADDLE_ENFORCE_EQ(out_size_dim[0], 1, platform::errors::InvalidArgument( - "OutSize's dim[0] must be 1")); + PADDLE_ENFORCE_EQ( + out_size_dim[0], 1, + platform::errors::InvalidArgument( + "OutSize's 0-th dimension's value must be 1, but got value = %d .", + out_size_dim[0])); ctx->ShareLoD("X", "Out"); return; } diff --git a/paddle/fluid/operators/interpolate_v2_op.cc b/paddle/fluid/operators/interpolate_v2_op.cc index cb93044ca58..a4353420c84 100644 --- a/paddle/fluid/operators/interpolate_v2_op.cc +++ b/paddle/fluid/operators/interpolate_v2_op.cc @@ -76,9 +76,12 @@ static void Interpolate1DInferShapeCheck(framework::InferShapeContext* ctx) { if (scale.size() > 0) { float scale_w = -1; scale_w = scale[0]; - PADDLE_ENFORCE_EQ(scale_w > 0, true, platform::errors::InvalidArgument( - "scale of Op(interpolate) " - "should be greater than 0.")); + PADDLE_ENFORCE_EQ( + scale_w > 0, true, + platform::errors::InvalidArgument( + "The scale_w in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_w)); if (scale_w > 0.) { // round down out_w = (data_layout == DataLayout::kNCHW @@ -99,8 +102,11 @@ static void Interpolate1DInferShapeCheck(framework::InferShapeContext* ctx) { platform::errors::InvalidArgument( "OutSize's dimension size must be 1, but got dimention = %d .", out_size_dim.size())); - PADDLE_ENFORCE_EQ(out_size_dim[0], 1, platform::errors::InvalidArgument( - "OutSize's dim[0] must be 1")); + PADDLE_ENFORCE_EQ( + out_size_dim[0], 1, + platform::errors::InvalidArgument( + "OutSize's 0-th dimension's value must be 1, but got value = %d .", + out_size_dim[0])); ctx->ShareLoD("X", "Out"); return; } @@ -173,9 +179,17 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { scale_h = scale[0]; scale_w = scale[1]; PADDLE_ENFORCE_EQ( - scale_w > 0 && scale_h > 0, true, - platform::errors::InvalidArgument("scale of Op(interpolate) " - "should be greater than 0.")); + scale_w > 0, true, + platform::errors::InvalidArgument( + "The scale_w in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_w)); + PADDLE_ENFORCE_EQ( + scale_h > 0, true, + platform::errors::InvalidArgument( + "The scale_h in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_h)); if (scale_h > 0. && scale_w > 0.) { // round down out_h = (data_layout == DataLayout::kNCHW @@ -281,9 +295,23 @@ static void Interpolate3DInferShapeCheck(framework::InferShapeContext* ctx) { scale_h = scale[1]; scale_w = scale[2]; PADDLE_ENFORCE_EQ( - scale_w > 0 && scale_h > 0 && scale_d > 0, true, - platform::errors::InvalidArgument("scale of Op(interpolate) " - "should be greater than 0.")); + scale_w > 0, true, + platform::errors::InvalidArgument( + "The scale_w in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_w)); + PADDLE_ENFORCE_EQ( + scale_h > 0, true, + platform::errors::InvalidArgument( + "The scale_h in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_h)); + PADDLE_ENFORCE_EQ( + scale_d > 0, true, + platform::errors::InvalidArgument( + "The scale_d in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_d)); if (scale_d > 0. && scale_h > 0. && scale_w > 0.) { // round down out_d = (data_layout == DataLayout::kNCHW diff --git a/paddle/fluid/operators/interpolate_v2_op.cu b/paddle/fluid/operators/interpolate_v2_op.cu index e5002e72d0e..6745592c5c1 100644 --- a/paddle/fluid/operators/interpolate_v2_op.cu +++ b/paddle/fluid/operators/interpolate_v2_op.cu @@ -982,15 +982,21 @@ static void Interpolate1DCUDAFwd(const framework::ExecutionContext& ctx, if (scale_tensor != nullptr) { auto scale_data = get_new_data_from_tensor(scale_tensor); scale_w = scale_data[0]; - PADDLE_ENFORCE_EQ(scale_w > 0, true, platform::errors::InvalidArgument( - "scale of Op(interpolate) " - "should be greater than 0.")); + PADDLE_ENFORCE_EQ( + scale_w > 0, true, + platform::errors::InvalidArgument( + "The scale_w in input 'Scale' Tensor of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_w)); } else { if (scale.size() > 0) { scale_w = scale[0]; - PADDLE_ENFORCE_EQ(scale_w > 0, true, platform::errors::InvalidArgument( - "scale of Op(interpolate) " - "should be greater than 0.")); + PADDLE_ENFORCE_EQ( + scale_w > 0, true, + platform::errors::InvalidArgument( + "The scale_w in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_w)); } } if (scale_w > 0.) { @@ -1081,18 +1087,36 @@ static void Interpolate2DCUDAFwd(const framework::ExecutionContext& ctx, scale_h = scale_data[0]; scale_w = scale_data[0]; } + PADDLE_ENFORCE_EQ( - scale_w > 0 && scale_h > 0, true, - platform::errors::InvalidArgument("scale of Op(interpolate) " - "should be greater than 0.")); + scale_w > 0, true, + platform::errors::InvalidArgument( + "The scale_w in input 'Scale' Tensor of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_w)); + PADDLE_ENFORCE_EQ( + scale_h > 0, true, + platform::errors::InvalidArgument( + "The scale_h in input 'Scale' Tensor of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_h)); } else { if (scale.size() > 1) { scale_w = scale[1]; scale_h = scale[0]; + PADDLE_ENFORCE_EQ( - scale_w > 0 && scale_h > 0, true, - platform::errors::InvalidArgument("scale of Op(interpolate) " - "should be greater than 0.")); + scale_w > 0, true, + platform::errors::InvalidArgument( + "The scale_w in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_w)); + PADDLE_ENFORCE_EQ( + scale_h > 0, true, + platform::errors::InvalidArgument( + "The scale_h in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_h)); } } if (scale_w > 0. && scale_h > 0.) { @@ -1216,10 +1240,25 @@ static void Interpolate3DCUDAFwd(const framework::ExecutionContext& ctx, scale_h = scale_data[0]; scale_w = scale_data[0]; } + + PADDLE_ENFORCE_EQ( + scale_w > 0, true, + platform::errors::InvalidArgument( + "The scale_w in input 'Scale' Tensor of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_w)); PADDLE_ENFORCE_EQ( - scale_w > 0 && scale_h > 0 && scale_d > 0, true, - platform::errors::InvalidArgument("scale of Op(interpolate) " - "should be greater than 0.")); + scale_h > 0, true, + platform::errors::InvalidArgument( + "The scale_h in input 'Scale' Tensor of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_h)); + PADDLE_ENFORCE_EQ( + scale_d > 0, true, + platform::errors::InvalidArgument( + "The scale_d in input 'Scale' Tensor of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_d)); } else { if (scale.size() > 1) { scale_d = scale[0]; @@ -1227,9 +1266,23 @@ static void Interpolate3DCUDAFwd(const framework::ExecutionContext& ctx, scale_w = scale[2]; PADDLE_ENFORCE_EQ( - scale_w > 0 && scale_h > 0 && scale_d > 0, true, - platform::errors::InvalidArgument("scale of Op(interpolate) " - "should be greater than 0.")); + scale_w > 0, true, + platform::errors::InvalidArgument( + "The scale_w in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_w)); + PADDLE_ENFORCE_EQ( + scale_h > 0, true, + platform::errors::InvalidArgument( + "The scale_h in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_h)); + PADDLE_ENFORCE_EQ( + scale_d > 0, true, + platform::errors::InvalidArgument( + "The scale_d in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_d)); } } if (scale_d > 0. && scale_h > 0. && scale_w > 0.) { @@ -1334,16 +1387,22 @@ static void Interpolate1DCUDABwd(const framework::ExecutionContext& ctx, if (scale_tensor != nullptr) { auto scale_data = get_new_data_from_tensor(scale_tensor); scale_w = scale_data[0]; - PADDLE_ENFORCE_EQ(scale_w > 0, true, platform::errors::InvalidArgument( - "scale of Op(interpolate) " - "should be greater than 0.")); + PADDLE_ENFORCE_EQ( + scale_w > 0, true, + platform::errors::InvalidArgument( + "The scale_w in input 'Scale' Tensor of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_w)); } else { if (scale.size() > 0) { scale_w = scale[0]; - PADDLE_ENFORCE_EQ(scale_w > 0, true, platform::errors::InvalidArgument( - "scale of Op(interpolate) " - "should be greater than 0.")); + PADDLE_ENFORCE_EQ( + scale_w > 0, true, + platform::errors::InvalidArgument( + "The scale_w in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_w)); } } if (scale_w > 0.) { @@ -1433,19 +1492,36 @@ static void Interpolate2DCUDABwd(const framework::ExecutionContext& ctx, scale_h = scale_data[0]; scale_w = scale_data[0]; } + + PADDLE_ENFORCE_EQ( + scale_w > 0, true, + platform::errors::InvalidArgument( + "The scale_w in input 'Scale' Tensor of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_w)); PADDLE_ENFORCE_EQ( - scale_w > 0 && scale_h > 0, true, - platform::errors::InvalidArgument("scale of Op(interpolate) " - "should be greater than 0.")); + scale_h > 0, true, + platform::errors::InvalidArgument( + "The scale_h in input 'Scale' Tensor of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_h)); } else { if (scale.size() > 1) { scale_w = scale[1]; scale_h = scale[0]; PADDLE_ENFORCE_EQ( - scale_w > 0 && scale_h > 0, true, - platform::errors::InvalidArgument("scale of Op(interpolate) " - "should be greater than 0.")); + scale_w > 0, true, + platform::errors::InvalidArgument( + "The scale_w in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_w)); + PADDLE_ENFORCE_EQ( + scale_h > 0, true, + platform::errors::InvalidArgument( + "The scale_h in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_h)); } } if (scale_w > 0. && scale_h > 0.) { @@ -1581,9 +1657,23 @@ static void Interpolate3DCUDABwd(const framework::ExecutionContext& ctx, scale_w = scale_data[0]; } PADDLE_ENFORCE_EQ( - scale_w > 0 && scale_h > 0 && scale_d > 0, true, - platform::errors::InvalidArgument("scale of Op(interpolate) " - "should be greater than 0.")); + scale_w > 0, true, + platform::errors::InvalidArgument( + "The scale_w in input 'Scale' Tensor of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_w)); + PADDLE_ENFORCE_EQ( + scale_h > 0, true, + platform::errors::InvalidArgument( + "The scale_h in input 'Scale' Tensor of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_h)); + PADDLE_ENFORCE_EQ( + scale_d > 0, true, + platform::errors::InvalidArgument( + "The scale_d in input 'Scale' Tensor of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_d)); } else { if (scale.size() > 1) { scale_d = scale[0]; @@ -1591,9 +1681,23 @@ static void Interpolate3DCUDABwd(const framework::ExecutionContext& ctx, scale_w = scale[2]; PADDLE_ENFORCE_EQ( - scale_w > 0 && scale_h > 0 && scale_d > 0, true, - platform::errors::InvalidArgument("scale of Op(interpolate) " - "should be greater than 0.")); + scale_w > 0, true, + platform::errors::InvalidArgument( + "The scale_w in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_w)); + PADDLE_ENFORCE_EQ( + scale_h > 0, true, + platform::errors::InvalidArgument( + "The scale_h in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_h)); + PADDLE_ENFORCE_EQ( + scale_d > 0, true, + platform::errors::InvalidArgument( + "The scale_d in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_d)); } } if (scale_d > 0. && scale_h > 0. && scale_w > 0.) { -- GitLab From 4ead9a5a3c936d045ffa400536ec348e81bcaea2 Mon Sep 17 00:00:00 2001 From: Thunderbrook <52529258+Thunderbrook@users.noreply.github.com> Date: Wed, 28 Apr 2021 15:02:33 +0800 Subject: [PATCH 041/720] [PsCore] solve Brpc dep (#32632) * Revert "Revert "[PsCore] optimize performance of large kv (#32535)" (#32599)" This reverts commit 809ac03656712744d6dea7a6268aeeea46b6f12e. * brpc dep --- CMakeLists.txt | 5 + paddle/fluid/distributed/CMakeLists.txt | 2 +- .../distributed/service/brpc_ps_server.cc | 23 +-- paddle/fluid/distributed/table/CMakeLists.txt | 6 +- .../distributed/table/common_sparse_table.cc | 55 +++--- .../table/depends/large_scale_kv.h | 158 ++++++++++-------- paddle/fluid/distributed/test/CMakeLists.txt | 6 +- paddle/fluid/framework/CMakeLists.txt | 10 +- .../framework/fleet/heter_ps/CMakeLists.txt | 10 +- paddle/fluid/framework/trainer.h | 1 - .../distributed/fleet/runtime/the_one_ps.py | 45 +++-- .../distributed_strategy.py | 1 + .../fleet/parameter_server/ir/public.py | 1 + 13 files changed, 197 insertions(+), 126 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2f16c390d8b..f30671bd3a8 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -353,6 +353,11 @@ if (WITH_MIPS) add_definitions(-DPADDLE_WITH_MIPS) endif() +if (WITH_HETERPS) + if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -faligned-new") + endif() +endif() set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") diff --git a/paddle/fluid/distributed/CMakeLists.txt b/paddle/fluid/distributed/CMakeLists.txt index a2062d82c81..905347d031b 100644 --- a/paddle/fluid/distributed/CMakeLists.txt +++ b/paddle/fluid/distributed/CMakeLists.txt @@ -11,8 +11,8 @@ if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") endif() -add_subdirectory(table) add_subdirectory(service) +add_subdirectory(table) add_subdirectory(test) add_subdirectory(index_dataset) diff --git a/paddle/fluid/distributed/service/brpc_ps_server.cc b/paddle/fluid/distributed/service/brpc_ps_server.cc index a9370561a54..a1440260bf2 100644 --- a/paddle/fluid/distributed/service/brpc_ps_server.cc +++ b/paddle/fluid/distributed/service/brpc_ps_server.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/distributed/service/brpc_ps_server.h" #include // NOLINT +#include "butil/object_pool.h" #include "paddle/fluid/distributed/table/depends/sparse_utils.h" #include "paddle/fluid/distributed/table/table.h" #include "paddle/fluid/framework/archive.h" @@ -196,12 +197,13 @@ int32_t BrpcPsService::pull_dense(Table *table, const PsRequestMessage &request, return 0; } - std::vector res_data; - res_data.resize(num * table->value_accesor()->select_size() / sizeof(float)); - table->pull_dense(res_data.data(), num); + auto res_data = butil::get_object>(); + res_data->resize(num * table->value_accesor()->select_size() / sizeof(float)); + table->pull_dense(res_data->data(), num); - cntl->response_attachment().append((char *)res_data.data(), - res_data.size() * sizeof(float)); + cntl->response_attachment().append((char *)(res_data->data()), + res_data->size() * sizeof(float)); + butil::return_object(res_data); return 0; } @@ -367,12 +369,13 @@ int32_t BrpcPsService::pull_sparse(Table *table, value.DeserializeFromBytes(const_cast(data)); - std::vector res_data; - res_data.resize(num * dim); - table->pull_sparse(res_data.data(), value); + auto res_data = butil::get_object>(); + res_data->resize(num * dim); + table->pull_sparse(res_data->data(), value); - cntl->response_attachment().append((char *)res_data.data(), - res_data.size() * sizeof(float)); + cntl->response_attachment().append((char *)(res_data->data()), + res_data->size() * sizeof(float)); + butil::return_object(res_data); return 0; } diff --git a/paddle/fluid/distributed/table/CMakeLists.txt b/paddle/fluid/distributed/table/CMakeLists.txt index dde1f5ae8ee..dab39095803 100644 --- a/paddle/fluid/distributed/table/CMakeLists.txt +++ b/paddle/fluid/distributed/table/CMakeLists.txt @@ -13,7 +13,11 @@ set_source_files_properties(sparse_geo_table.cc PROPERTIES COMPILE_FLAGS ${DISTR set_source_files_properties(barrier_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(common_graph_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_library(common_table SRCS common_sparse_table.cc common_dense_table.cc sparse_geo_table.cc barrier_table.cc common_graph_table.cc DEPS ${TABLE_DEPS} graph_edge graph_node device_context string_helper simple_threadpool xxhash generator) +get_property(RPC_DEPS GLOBAL PROPERTY RPC_DEPS) + +cc_library(common_table SRCS common_sparse_table.cc common_dense_table.cc +sparse_geo_table.cc barrier_table.cc common_graph_table.cc DEPS ${TABLE_DEPS} +${RPC_DEPS} graph_edge graph_node device_context string_helper simple_threadpool xxhash generator) set_source_files_properties(tensor_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(tensor_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) diff --git a/paddle/fluid/distributed/table/common_sparse_table.cc b/paddle/fluid/distributed/table/common_sparse_table.cc index 1c315d34abc..718fce99507 100644 --- a/paddle/fluid/distributed/table/common_sparse_table.cc +++ b/paddle/fluid/distributed/table/common_sparse_table.cc @@ -125,34 +125,37 @@ void ProcessALine(const std::vector& columns, const Meta& meta, int64_t SaveToText(std::ostream* os, std::shared_ptr block, const int mode) { - int64_t not_save_num = 0; - for (auto& value : block->values_) { - if (mode == SaveMode::delta && !value.second.need_save_) { - not_save_num++; - continue; - } - - auto* vs = value.second.data_; - std::stringstream ss; - auto id = value.first; - ss << id << "\t" << value.second.count_ << "\t" << value.second.unseen_days_ - << "\t" << value.second.is_entry_ << "\t"; - - for (int i = 0; i < block->value_length_; i++) { - ss << vs[i]; - ss << ","; - } + int64_t save_num = 0; + for (auto& table : block->values_) { + for (auto& value : table) { + if (mode == SaveMode::delta && !value.second->need_save_) { + continue; + } + save_num += 1; + + auto* vs = value.second->data_.data(); + std::stringstream ss; + auto id = value.first; + ss << id << "\t" << value.second->count_ << "\t" + << value.second->unseen_days_ << "\t" << value.second->is_entry_ + << "\t"; + + for (int i = 0; i < block->value_length_; i++) { + ss << vs[i]; + ss << ","; + } - ss << "\n"; + ss << "\n"; - os->write(ss.str().c_str(), sizeof(char) * ss.str().size()); + os->write(ss.str().c_str(), sizeof(char) * ss.str().size()); - if (mode == SaveMode::base || mode == SaveMode::delta) { - value.second.need_save_ = false; + if (mode == SaveMode::base || mode == SaveMode::delta) { + value.second->need_save_ = false; + } } } - return block->values_.size() - not_save_num; + return save_num; } int64_t LoadFromText(const std::string& valuepath, const std::string& metapath, @@ -183,7 +186,7 @@ int64_t LoadFromText(const std::string& valuepath, const std::string& metapath, block->Init(id, false); - auto value_instant = block->GetValue(id); + VALUE* value_instant = block->GetValue(id); if (values.size() == 5) { value_instant->count_ = std::stoi(values[1]); value_instant->unseen_days_ = std::stoi(values[2]); @@ -373,8 +376,10 @@ std::pair CommonSparseTable::print_table_stat() { int64_t feasign_size = 0; int64_t mf_size = 0; - for (auto& value : shard_values_) { - feasign_size += value->values_.size(); + for (auto& shard : shard_values_) { + for (auto& table : shard->values_) { + feasign_size += table.size(); + } } return {feasign_size, mf_size}; diff --git a/paddle/fluid/distributed/table/depends/large_scale_kv.h b/paddle/fluid/distributed/table/depends/large_scale_kv.h index bb4174bd2c5..5c10fca98cd 100644 --- a/paddle/fluid/distributed/table/depends/large_scale_kv.h +++ b/paddle/fluid/distributed/table/depends/large_scale_kv.h @@ -26,6 +26,7 @@ #include #include "gflags/gflags.h" +#include "butil/object_pool.h" #include "paddle/fluid/distributed/common/utils.h" #include "paddle/fluid/distributed/table/depends/initializers.h" #include "paddle/fluid/distributed/thirdparty/round_robin.h" @@ -48,6 +49,10 @@ namespace distributed { enum Mode { training, infer }; +static const int SPARSE_SHARD_BUCKET_NUM_BITS = 6; +static const size_t SPARSE_SHARD_BUCKET_NUM = (size_t)1 + << SPARSE_SHARD_BUCKET_NUM_BITS; + struct VALUE { explicit VALUE(size_t length) : length_(length), @@ -55,46 +60,16 @@ struct VALUE { unseen_days_(0), need_save_(false), is_entry_(false) { - data_ = new float[length]; - memset(data_, 0, sizeof(float) * length); - } - - VALUE(const VALUE &value) { - length_ = value.length_; - count_ = value.count_; - unseen_days_ = value.unseen_days_; - need_save_ = value.need_save_; - is_entry_ = value.is_entry_; - data_ = new float[length_]; - memcpy(data_, value.data_, sizeof(float) * length_); - } - - VALUE &operator=(const VALUE &value) { - if (this != &value) { - delete[] data_; - length_ = value.length_; - count_ = value.count_; - unseen_days_ = value.unseen_days_; - need_save_ = value.need_save_; - is_entry_ = value.is_entry_; - - data_ = new float[length_]; - memcpy(data_, value.data_, sizeof(float) * length_); - } - return *this; - } - - ~VALUE() { - delete[] data_; - data_ = nullptr; + data_.resize(length); + memset(data_.data(), 0, sizeof(float) * length); } size_t length_; + std::vector data_; int count_; int unseen_days_; // use to check knock-out bool need_save_; // whether need to save bool is_entry_; // whether knock-in - float *data_; }; inline bool count_entry(VALUE *value, int threshold) { @@ -176,12 +151,12 @@ class ValueBlock { const std::vector &value_dims) { auto pts = std::vector(); pts.reserve(value_names.size()); - auto &values = values_.at(id); + auto values = GetValue(id); for (int i = 0; i < static_cast(value_names.size()); i++) { PADDLE_ENFORCE_EQ( value_dims[i], value_dims_[i], platform::errors::InvalidArgument("value dims is not match")); - pts.push_back(values.data_ + + pts.push_back(values->data_.data() + value_offsets_.at(value_idx_.at(value_names[i]))); } return pts; @@ -190,33 +165,45 @@ class ValueBlock { // pull float *Init(const uint64_t &id, const bool with_update = true, const int counter = 1) { - if (!Has(id)) { - values_.emplace(std::make_pair(id, VALUE(value_length_))); - } + size_t hash = _hasher(id); + size_t bucket = compute_bucket(hash); - auto &value = values_.at(id); + auto &table = values_[bucket]; + auto res = table.find(id); - if (with_update) { - AttrUpdate(&value, counter); + VALUE *value = nullptr; + if (res == table.end()) { + value = butil::get_object(value_length_); + + table[id] = value; + + } else { + value = res->second; } - return value.data_; + if (with_update) { + AttrUpdate(value, counter); + } + return value->data_.data(); } - VALUE *InitGet(const uint64_t &id, const bool with_update = true, const int counter = 1) { - if (!Has(id)) { - values_.emplace(std::make_pair(id, VALUE(value_length_))); - } + size_t hash = _hasher(id); + size_t bucket = compute_bucket(hash); - auto &value = values_.at(id); + auto &table = values_[bucket]; + auto res = table.find(id); - if (with_update) { - AttrUpdate(&value, counter); + VALUE *value = nullptr; + if (res == table.end()) { + value = butil::get_object(value_length_); + // value = _alloc.acquire(value_length_); + table[id] = value; + } else { + value = (VALUE *)(void *)(res->second); } - - return &value; + return value; } void AttrUpdate(VALUE *value, const int counter) { @@ -229,7 +216,7 @@ class ValueBlock { if (value->is_entry_) { // initialize for (size_t x = 0; x < value_names_.size(); ++x) { - initializers_[x]->GetValue(value->data_ + value_offsets_[x], + initializers_[x]->GetValue(value->data_.data() + value_offsets_[x], value_dims_[x]); } value->need_save_ = true; @@ -243,42 +230,73 @@ class ValueBlock { // dont jude if (has(id)) float *Get(const uint64_t &id) { - auto &value = values_.at(id); - return value.data_; + size_t hash = _hasher(id); + size_t bucket = compute_bucket(hash); + auto &table = values_[bucket]; + + // auto &value = table.at(id); + // return value->data_.data(); + auto res = table.find(id); + VALUE *value = res->second; + return value->data_.data(); } // for load, to reset count, unseen_days - VALUE *GetValue(const uint64_t &id) { return &values_.at(id); } + VALUE *GetValue(const uint64_t &id) { + size_t hash = _hasher(id); + size_t bucket = compute_bucket(hash); + + auto &table = values_[bucket]; + auto res = table.find(id); + return res->second; + } bool GetEntry(const uint64_t &id) { - auto &value = values_.at(id); - return value.is_entry_; + auto value = GetValue(id); + return value->is_entry_; } void SetEntry(const uint64_t &id, const bool state) { - auto &value = values_.at(id); - value.is_entry_ = state; + auto value = GetValue(id); + value->is_entry_ = state; } void Shrink(const int threshold) { - for (auto iter = values_.begin(); iter != values_.end();) { - auto &value = iter->second; - value.unseen_days_++; - if (value.unseen_days_ >= threshold) { - iter = values_.erase(iter); - } else { - ++iter; + for (auto &table : values_) { + for (auto iter = table.begin(); iter != table.end();) { + // VALUE* value = (VALUE*)(void*)(iter->second); + VALUE *value = iter->second; + value->unseen_days_++; + if (value->unseen_days_ >= threshold) { + butil::return_object(iter->second); + //_alloc.release(iter->second); + //_alloc.release(value); + iter = table.erase(iter); + } else { + ++iter; + } } } return; } float GetThreshold() { return threshold_; } + size_t compute_bucket(size_t hash) { + if (SPARSE_SHARD_BUCKET_NUM == 1) { + return 0; + } else { + return hash >> (sizeof(size_t) * 8 - SPARSE_SHARD_BUCKET_NUM_BITS); + } + } private: bool Has(const uint64_t id) { - auto got = values_.find(id); - if (got == values_.end()) { + size_t hash = _hasher(id); + size_t bucket = compute_bucket(hash); + auto &table = values_[bucket]; + + auto got = table.find(id); + if (got == table.end()) { return false; } else { return true; @@ -286,8 +304,9 @@ class ValueBlock { } public: - robin_hood::unordered_map values_; + robin_hood::unordered_map values_[SPARSE_SHARD_BUCKET_NUM]; size_t value_length_ = 0; + std::hash _hasher; private: const std::vector &value_names_; @@ -302,4 +321,3 @@ class ValueBlock { } // namespace distributed } // namespace paddle - diff --git a/paddle/fluid/distributed/test/CMakeLists.txt b/paddle/fluid/distributed/test/CMakeLists.txt index b756c740ac7..af87e1b6cc6 100644 --- a/paddle/fluid/distributed/test/CMakeLists.txt +++ b/paddle/fluid/distributed/test/CMakeLists.txt @@ -1,8 +1,10 @@ set_source_files_properties(table_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(table_test SRCS table_test.cc DEPS common_table table tensor_accessor ps_framework_proto ${COMMON_DEPS}) +cc_test(table_test SRCS table_test.cc DEPS common_table table tensor_accessor +ps_framework_proto ${COMMON_DEPS} ${RPC_DEPS}) set_source_files_properties(dense_table_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(dense_table_test SRCS dense_table_test.cc DEPS common_table table tensor_accessor ps_framework_proto ${COMMON_DEPS}) +cc_test(dense_table_test SRCS dense_table_test.cc DEPS common_table table +tensor_accessor ps_framework_proto ${COMMON_DEPS} ${RPC_DEPS}) set_source_files_properties(barrier_table_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) cc_test(barrier_table_test SRCS barrier_table_test.cc DEPS common_table table tensor_accessor ps_framework_proto ${COMMON_DEPS}) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 24bed277280..1494e74c071 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -301,8 +301,14 @@ cc_library(parallel_executor SRCS parallel_executor.cc DEPS fast_threaded_ssa_graph_executor variable_helper) cc_library(executor_cache SRCS executor_cache.cc DEPS executor) -cc_test(dist_multi_trainer_test SRCS dist_multi_trainer_test.cc DEPS - conditional_block_op executor) +if(WITH_PSCORE) + get_property(RPC_DEPS GLOBAL PROPERTY RPC_DEPS) + cc_test(dist_multi_trainer_test SRCS dist_multi_trainer_test.cc DEPS + conditional_block_op executor ${RPC_DEPS}) +else() + cc_test(dist_multi_trainer_test SRCS dist_multi_trainer_test.cc DEPS + conditional_block_op executor) +endif() cc_library(prune SRCS prune.cc DEPS framework_proto boost) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) cc_test(var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry diff --git a/paddle/fluid/framework/fleet/heter_ps/CMakeLists.txt b/paddle/fluid/framework/fleet/heter_ps/CMakeLists.txt index 6df2cd52bb4..67c44368b7a 100644 --- a/paddle/fluid/framework/fleet/heter_ps/CMakeLists.txt +++ b/paddle/fluid/framework/fleet/heter_ps/CMakeLists.txt @@ -1,5 +1,13 @@ IF(WITH_GPU) - nv_library(heter_comm SRCS heter_comm.h feature_value.h heter_resource.cc heter_resource.h hashtable.h DEPS cub device_context) + SET(HETERPS_DEPS device_context) + if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) + SET(HETERPS_DEPS ${HETERPS_DEPS} cub) + endif() + if(WITH_PSCORE) + get_property(RPC_DEPS GLOBAL PROPERTY RPC_DEPS) + SET(HETERPS_DEPS ${HETERPS_DEPS} ${RPC_DEPS}) + endif() + nv_library(heter_comm SRCS heter_comm.h feature_value.h heter_resource.cc heter_resource.h hashtable.h DEPS ${HETERPS_DEPS}) nv_test(test_heter_comm SRCS test_heter_comm.cu feature_value.h DEPS heter_comm) nv_library(heter_ps SRCS heter_ps.cu DEPS heter_comm) ENDIF() diff --git a/paddle/fluid/framework/trainer.h b/paddle/fluid/framework/trainer.h index 01aa07e6184..10f6c1ddbd0 100644 --- a/paddle/fluid/framework/trainer.h +++ b/paddle/fluid/framework/trainer.h @@ -26,7 +26,6 @@ limitations under the License. */ #include "paddle/fluid/framework/data_feed.h" #include "paddle/fluid/framework/data_set.h" #include "paddle/fluid/framework/device_worker.h" -#include "paddle/fluid/framework/fleet/heter_context.h" #include "paddle/fluid/framework/fleet/heter_wrapper.h" #include "paddle/fluid/framework/heter_service.h" #include "paddle/fluid/framework/lod_tensor.h" diff --git a/python/paddle/distributed/fleet/runtime/the_one_ps.py b/python/paddle/distributed/fleet/runtime/the_one_ps.py index df07a7a6e77..24b83662c9d 100644 --- a/python/paddle/distributed/fleet/runtime/the_one_ps.py +++ b/python/paddle/distributed/fleet/runtime/the_one_ps.py @@ -77,10 +77,13 @@ class CommonAccessor: ("Moment2", None), ("Beta1Pow", 1), ("Beta2Pow", 1), ("LearningRate", 1)] opt_input_map["sum"] = [("Param", None)] + opt_input_map["naive_adagrad"] = [("Param", None), ("G2Sum", 1), + ("LearningRate", 1)] opt_attr_map = {} opt_attr_map["sgd"] = [] opt_attr_map["sum"] = [] + opt_attr_map["naive_adagrad"] = [] opt_attr_map["adam"] = [("beta1", "f"), ("beta2", "f"), ("epsilon", "f")] @@ -169,6 +172,10 @@ class CommonAccessor: param_varnames = self.opt_input_map["sum"] attr_varnames = self.opt_attr_map["sum"] self.accessor_class = "sum" + elif compiled_strategy.use_ps_gpu and is_sparse: + param_varnames = self.opt_input_map["naive_adagrad"] + attr_varnames = self.opt_attr_map["naive_adagrad"] + self.accessor_class = "sgd" else: param_varnames = self.opt_input_map[oop.type] attr_varnames = self.opt_attr_map[oop.type] @@ -176,20 +183,28 @@ class CommonAccessor: for (formal_name, shape) in param_varnames: params.append(formal_name) - param = main_program.global_block().vars[oop.input(formal_name)[0]] - if formal_name == "LearningRate" and param.name != "learning_rate_0": - warnings.warn("will support decay soon") - param = main_program.global_block().vars["learning_rate_0"] - - if shape is None: - if is_sparse: - shape = total_dims - else: - shape = self.get_shard(total_dims, pserver_num, pserver_id) - dims.append(shape) + if formal_name == "G2Sum": + dims.append(1) + initializer = "fill_constant&0" + initializers.append(initializer) + else: + param = main_program.global_block().vars[oop.input(formal_name)[ + 0]] + if formal_name == "LearningRate" and param.name != "learning_rate_0": + warnings.warn("will support decay soon") + param = main_program.global_block().vars["learning_rate_0"] + + if shape is None: + if is_sparse: + shape = total_dims + else: + shape = self.get_shard(total_dims, pserver_num, + pserver_id) + dims.append(shape) - initializer = self.get_initializer_attr(param.name, startup_program) - initializers.append(initializer) + initializer = self.get_initializer_attr(param.name, + startup_program) + initializers.append(initializer) for (attr_varname, type_) in attr_varnames: value = oop.attr(attr_varname) @@ -435,6 +450,8 @@ class TheOnePSRuntime(RuntimeBase): if not strategy: raise ValueError("k_steps must be invalid value, please check") + if dist_strategy.a_sync_configs["use_ps_gpu"]: + strategy.use_ps_gpu = True return strategy def build_compiled_startegy(self): @@ -443,6 +460,8 @@ class TheOnePSRuntime(RuntimeBase): compiled_config = CompileTimeStrategy( self.origin_main_program, self.origin_main_program, self.async_strategy, self.role_maker) + if self.async_strategy.use_ps_gpu: + compiled_config.use_ps_gpu = True return compiled_config def _init_worker(self): diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/distributed_strategy.py b/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/distributed_strategy.py index 35029a3dfc7..2a9d26daaed 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/distributed_strategy.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/distributed_strategy.py @@ -149,6 +149,7 @@ class DistributedStrategy(object): if num_threads > 1: self._build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce self.debug_opt = None + self.use_ps_gpu = False def set_debug_opt(self, opt_info): self.debug_opt = opt_info diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py index baf8add04ca..b2735727f67 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py @@ -138,6 +138,7 @@ class CompileTimeStrategy(object): self.strategy = strategy self.role_maker = role_maker + self.use_ps_gpu = False try: self.is_heter_ps_mode = role_maker._is_heter_parameter_server_mode except: -- GitLab From bda0e60981cd2485fb09b9f8a7c294ebe3433f05 Mon Sep 17 00:00:00 2001 From: wangna11BD <79366697+wangna11BD@users.noreply.github.com> Date: Wed, 28 Apr 2021 16:57:03 +0800 Subject: [PATCH 042/720] modify spectralnorm (#32633) --- .../unittests/test_dygraph_spectral_norm.py | 139 ++++++++++++ python/paddle/nn/__init__.py | 2 + python/paddle/nn/utils/__init__.py | 3 +- python/paddle/nn/utils/spectral_norm_hook.py | 210 ++++++++++++++++++ 4 files changed, 353 insertions(+), 1 deletion(-) create mode 100644 python/paddle/fluid/tests/unittests/test_dygraph_spectral_norm.py create mode 100644 python/paddle/nn/utils/spectral_norm_hook.py diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_spectral_norm.py b/python/paddle/fluid/tests/unittests/test_dygraph_spectral_norm.py new file mode 100644 index 00000000000..ef220ba1016 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dygraph_spectral_norm.py @@ -0,0 +1,139 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import collections +import paddle +import paddle.nn as nn +from paddle.nn.utils import spectral_norm + + +class TestDygraphSpectralNorm(unittest.TestCase): + def setUp(self): + self.init_test_case() + self.set_data() + + def init_test_case(self): + self.batch_size = 3 + self.data_desc = (['x', [2, 12, 12]], ) + self.n_power_iterations = 1 + self.eps = 1e-12 + self.dim = None + + def set_data(self): + self.data = collections.OrderedDict() + for desc in self.data_desc: + data_name = desc[0] + data_shape = desc[1] + data_value = np.random.random( + size=[self.batch_size] + data_shape).astype('float32') + self.data[data_name] = data_value + + def spectral_normalize(self, weight, u, v, dim, power_iters, eps): + shape = weight.shape + weight_mat = weight.copy() + h = shape[dim] + w = np.prod(shape) // h + if dim != 0: + perm = [dim] + [d for d in range(len(shape)) if d != dim] + weight_mat = weight_mat.transpose(perm) + weight_mat = weight_mat.reshape((h, w)) + + u = u.reshape((h, 1)) + v = v.reshape((w, 1)) + for i in range(power_iters): + v = np.matmul(weight_mat.T, u) + v_norm = np.sqrt((v * v).sum()) + v = v / (v_norm + eps) + u = np.matmul(weight_mat, v) + u_norm = np.sqrt((u * u).sum()) + u = u / (u_norm + eps) + sigma = (u * np.matmul(weight_mat, v)).sum() + return weight / sigma + + def test_check_output(self): + linear = paddle.nn.Conv2D(2, 1, 3) + before_weight = linear.weight.numpy().copy() + if self.dim == None: + if isinstance(linear, (nn.Conv1DTranspose, nn.Conv2DTranspose, + nn.Conv3DTranspose, nn.Linear)): + self.dim = 1 + else: + self.dim = 0 + else: + self.dim = (self.dim + len(before_weight)) % len(before_weight) + + sn = spectral_norm( + linear, + n_power_iterations=self.n_power_iterations, + eps=self.eps, + dim=self.dim) + u = sn.weight_u.numpy().copy() + v = sn.weight_v.numpy().copy() + outputs = [] + for name, data in self.data.items(): + output = linear(paddle.to_tensor(data)) + outputs.append(output.numpy()) + self.actual_outputs = linear.weight.numpy() + + expect_output = self.spectral_normalize( + before_weight, u, v, self.dim, self.n_power_iterations, self.eps) + + for expect, actual in zip(expect_output, self.actual_outputs): + self.assertTrue( + np.allclose( + np.array(actual), np.array(expect), atol=0.001)) + + +class TestDygraphWeightNormCase(TestDygraphSpectralNorm): + def init_test_case(self): + self.batch_size = 3 + self.data_desc = (['x', [2, 3, 3]], ) + self.n_power_iterations = 1 + self.eps = 1e-12 + self.dim = None + + +class TestDygraphWeightNormWithIterations(TestDygraphSpectralNorm): + def init_test_case(self): + self.batch_size = 3 + self.data_desc = (['x', [2, 3, 3]], ) + self.n_power_iterations = 2 + self.eps = 1e-12 + self.dim = None + + +class TestDygraphWeightNormWithDim(TestDygraphSpectralNorm): + def init_test_case(self): + self.batch_size = 3 + self.data_desc = (['x', [2, 3, 3]], ) + self.n_power_iterations = 1 + self.eps = 1e-12 + self.dim = 1 + + +class TestDygraphWeightNormWithEps(TestDygraphSpectralNorm): + def init_test_case(self): + self.batch_size = 3 + self.data_desc = (['x', [2, 3, 3]], ) + self.n_power_iterations = 1 + self.eps = 1e-10 + self.dim = None + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index d2f0063af0d..817fd501181 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -126,6 +126,8 @@ from .layer.distance import PairwiseDistance # noqa: F401 from .layer.vision import PixelShuffle # noqa: F401 from .layer.container import LayerDict # noqa: F401 +from .utils.spectral_norm_hook import spectral_norm + # TODO: remove loss, keep it for too many used in unitests from .layer import loss # noqa: F401 from ..fluid.dygraph.layers import Layer # noqa: F401 diff --git a/python/paddle/nn/utils/__init__.py b/python/paddle/nn/utils/__init__.py index bf2573d2cbc..b6801cfe320 100644 --- a/python/paddle/nn/utils/__init__.py +++ b/python/paddle/nn/utils/__init__.py @@ -12,8 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +from .spectral_norm_hook import spectral_norm from .weight_norm_hook import weight_norm, remove_weight_norm # noqa: F401 __all__ = [ #noqa - 'weight_norm', 'remove_weight_norm' + 'weight_norm', 'remove_weight_norm', 'spectral_norm' ] diff --git a/python/paddle/nn/utils/spectral_norm_hook.py b/python/paddle/nn/utils/spectral_norm_hook.py new file mode 100644 index 00000000000..5ce9e0937d3 --- /dev/null +++ b/python/paddle/nn/utils/spectral_norm_hook.py @@ -0,0 +1,210 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import numpy as np + +import paddle +from ..layer.conv import Conv1DTranspose, Conv2DTranspose, Conv3DTranspose +from ..layer.common import Linear +from .. import functional as F + +__all__ = ['spectral_norm'] + + +def normal_(x, mean=0., std=1.): + temp_value = paddle.normal(mean, std, shape=x.shape) + x.set_value(temp_value) + return x + + +class SpectralNorm(object): + def __init__(self, name='weight', n_power_iterations=1, dim=0, eps=1e-12): + self.name = name + self.dim = dim + if n_power_iterations <= 0: + raise ValueError('Expected n_power_iterations to be positive, but ' + 'got n_power_iterations={}'.format( + n_power_iterations)) + self.n_power_iterations = n_power_iterations + self.eps = eps + + def reshape_weight_to_matrix(self, weight): + weight_mat = weight + if self.dim != 0: + # transpose dim to front + weight_mat = weight_mat.transpose([self.dim] + [ + d for d in range(weight_mat.dim()) if d != self.dim + ]) + + height = weight_mat.shape[0] + + return weight_mat.reshape([height, -1]) + + def compute_weight(self, layer, do_power_iteration): + weight = getattr(layer, self.name + '_orig') + u = getattr(layer, self.name + '_u') + v = getattr(layer, self.name + '_v') + weight_mat = self.reshape_weight_to_matrix(weight) + + if do_power_iteration: + with paddle.no_grad(): + for _ in range(self.n_power_iterations): + v.set_value( + F.normalize( + paddle.matmul( + weight_mat, + u, + transpose_x=True, + transpose_y=False), + axis=0, + epsilon=self.eps, )) + + u.set_value( + F.normalize( + paddle.matmul(weight_mat, v), + axis=0, + epsilon=self.eps, )) + if self.n_power_iterations > 0: + u = u.clone() + v = v.clone() + + sigma = paddle.dot(u, paddle.mv(weight_mat, v)) + weight = weight / sigma + return weight + + def __call__(self, layer, inputs): + setattr( + layer, + self.name, + self.compute_weight( + layer, do_power_iteration=layer.training)) + + @staticmethod + def apply(layer, name, n_power_iterations, dim, eps): + for k, hook in layer._forward_pre_hooks.items(): + if isinstance(hook, SpectralNorm) and hook.name == name: + raise RuntimeError("Cannot register two spectral_norm hooks on " + "the same parameter {}".format(name)) + + fn = SpectralNorm(name, n_power_iterations, dim, eps) + weight = layer._parameters[name] + + with paddle.no_grad(): + weight_mat = fn.reshape_weight_to_matrix(weight) + h, w = weight_mat.shape + + # randomly initialize u and v + u = layer.create_parameter([h]) + u = normal_(u, 0., 1.) + v = layer.create_parameter([w]) + v = normal_(v, 0., 1.) + u = F.normalize(u, axis=0, epsilon=fn.eps) + v = F.normalize(v, axis=0, epsilon=fn.eps) + + # delete fn.name form parameters, otherwise you can not set attribute + del layer._parameters[fn.name] + layer.add_parameter(fn.name + "_orig", weight) + # still need to assign weight back as fn.name because all sorts of + # things may assume that it exists, e.g., when initializing weights. + # However, we can't directly assign as it could be an Parameter and + # gets added as a parameter. Instead, we register weight * 1.0 as a plain + # attribute. + setattr(layer, fn.name, weight * 1.0) + layer.register_buffer(fn.name + "_u", u) + layer.register_buffer(fn.name + "_v", v) + layer.register_forward_pre_hook(fn) + return fn + + +def spectral_norm(layer, + name='weight', + n_power_iterations=1, + eps=1e-12, + dim=None): + r""" + This spectral_norm layer applies spectral normalization to a parameter according to the + following Calculation: + + Step 1: + Generate vector U in shape of [H], and V in shape of [W]. + While H is the :attr:`dim` th dimension of the input weights, + and W is the product result of remaining dimensions. + + Step 2: + :attr:`power_iters` should be a positive integer, do following + calculations with U and V for :attr:`power_iters` rounds. + + .. math:: + + \mathbf{v} := \\frac{\mathbf{W}^{T} \mathbf{u}}{\|\mathbf{W}^{T} \mathbf{u}\|_2} + + \mathbf{u} := \\frac{\mathbf{W} \mathbf{v}}{\|\mathbf{W} \mathbf{v}\|_2} + + Step 3: + Calculate :math:`\sigma(\mathbf{W})` and normalize weight values. + + .. math:: + + \sigma(\mathbf{W}) = \mathbf{u}^{T} \mathbf{W} \mathbf{v} + + \mathbf{W} = \\frac{\mathbf{W}}{\sigma(\mathbf{W})} + + + Refer to `Spectral Normalization `_ . + + Parameters: + layer(Layer): Layer of paddle, which has weight. + name(str, optional): Name of the weight parameter. Default: 'weight'. + n_power_iterations(int, optional): The number of power iterations to calculate spectral norm. Default: 1. + eps(float, optional): The epsilon for numerical stability in calculating norms. Default: 1e-12. + dim(int, optional): The index of dimension which should be permuted to the first before reshaping Input(Weight) to matrix, it should be set as 0 if Input(Weight) is the weight of fc layer, and should be set as 1 if Input(Weight) is the weight of conv layer. Default: None. + + Returns: + The original layer with the spectral norm hook + + Examples: + .. code-block:: python + + from paddle.nn import Conv2D + from paddle.nn.utils import Spectralnorm + + conv = Conv2D(3, 1, 3) + sn_conv = spectral_norm(conv) + print(sn_conv) + # Conv2D(3, 1, kernel_size=[3, 3], data_format=NCHW) + print(sn_conv.weight) + # Tensor(shape=[1, 3, 3, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=False, + # [[[[-0.21090528, 0.18563725, -0.14127982], + # [-0.02310637, 0.03197737, 0.34353802], + # [-0.17117859, 0.33152047, -0.28408015]], + # + # [[-0.13336606, -0.01862637, 0.06959272], + # [-0.02236020, -0.27091628, -0.24532901], + # [ 0.27254242, 0.15516677, 0.09036587]], + # + # [[ 0.30169338, -0.28146112, -0.11768346], + # [-0.45765871, -0.12504843, -0.17482486], + # [-0.36866254, -0.19969313, 0.08783543]]]]) + + """ + + if dim is None: + if isinstance(layer, (Conv1DTranspose, Conv2DTranspose, Conv3DTranspose, + Linear)): + dim = 1 + else: + dim = 0 + SpectralNorm.apply(layer, name, n_power_iterations, dim, eps) + return layer -- GitLab From abcb3f54a5fb9ed603545107773623b37472da48 Mon Sep 17 00:00:00 2001 From: denglin-github <82362191+denglin-github@users.noreply.github.com> Date: Wed, 28 Apr 2021 20:52:23 +0800 Subject: [PATCH 043/720] Nne integration (#32604) * Add dlnne engine runtime * Fix log * Remove and remove unrelated modify with dlnne, +clang-format * Fix CMakeList format error * Add copyright message * Fix dlnne CMakeList.txt * Add some paddlepaddle_pass to support more networks * Fix some format bug * Add delete dropout_op pass * Fix some format bug * Fix format bug --- paddle/fluid/framework/ir/CMakeLists.txt | 1 + .../framework/ir/delete_dropout_op_pass.cc | 96 +++++++++++++++++++ .../framework/ir/delete_dropout_op_pass.h | 37 +++++++ .../framework/ir/graph_pattern_detector.cc | 23 +++++ .../framework/ir/graph_pattern_detector.h | 13 +++ .../inference/api/paddle_pass_builder.cc | 1 + 6 files changed, 171 insertions(+) create mode 100644 paddle/fluid/framework/ir/delete_dropout_op_pass.cc create mode 100644 paddle/fluid/framework/ir/delete_dropout_op_pass.h diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 0ca78c679ae..ab69170322c 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -86,6 +86,7 @@ pass_library(quant_conv2d_dequant_fuse_pass inference) pass_library(shuffle_channel_detect_pass inference) pass_library(delete_quant_dequant_op_pass inference) pass_library(delete_quant_dequant_filter_op_pass inference) +pass_library(delete_dropout_op_pass inference) pass_library(simplify_with_basic_ops_pass base) pass_library(fc_elementwise_layernorm_fuse_pass base) pass_library(skip_layernorm_fuse_pass base) diff --git a/paddle/fluid/framework/ir/delete_dropout_op_pass.cc b/paddle/fluid/framework/ir/delete_dropout_op_pass.cc new file mode 100644 index 00000000000..09962239a01 --- /dev/null +++ b/paddle/fluid/framework/ir/delete_dropout_op_pass.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include + +#include "paddle/fluid/framework/ir/delete_dropout_op_pass.h" + +namespace paddle { +namespace framework { +class LoDTensor; +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace framework { +namespace ir { + +#define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern); +#define GET_NODES \ + GET_IR_NODE(any_op_out); \ + GET_IR_NODE(dropout_op); \ + GET_IR_NODE(dropout_op_out); \ + GET_IR_NODE(dropout_op_outmask); \ + GET_IR_NODE(any_op2); + +void DeleteDropoutOpPass::ApplyImpl(ir::Graph* graph) const { + const std::string pattern_name = "delete_dropout_op_pattern"; + FusePassBase::Init(pattern_name, graph); + + GraphPatternDetector gpd; + + patterns::DeleteDropoutOpPattern pattern(gpd.mutable_pattern(), pattern_name); + pattern(); + + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + GET_NODES; + IR_NODE_LINK_TO(any_op_out, any_op2); + std::string any_op_out_name = any_op_out->Var()->Name(); + std::string dropout_op_out_name = dropout_op_out->Var()->Name(); + + auto* any_op2_desc = any_op2->Op(); + auto var_map = any_op2_desc->Inputs(); + std::string arg_name = ""; + for (auto& name_m : var_map) { + if (std::find(name_m.second.begin(), name_m.second.end(), + dropout_op_out_name) != name_m.second.end()) { + arg_name = name_m.first; + } + } + if (arg_name.size() == 0) { + LOG(INFO) << "Delete dropout op pass: can not find the input " + << dropout_op_out_name; + return; + } + + // modify the any_op2's inputs + for (auto& name_m : var_map) { + if (std::find(name_m.second.begin(), name_m.second.end(), + dropout_op_out_name) != name_m.second.end()) { + std::vector new_inputs; + for (auto& i_n : name_m.second) { + if (i_n != dropout_op_out_name) { + new_inputs.push_back(i_n); + } + } + new_inputs.push_back(any_op_out_name); + any_op2_desc->SetInput(name_m.first, new_inputs); + any_op2_desc->Flush(); + } + } + any_op2_desc->Flush(); + // Delete the unneeded nodes. + GraphSafeRemoveNodes(graph, + {dropout_op, dropout_op_out, dropout_op_outmask}); + }; + + gpd(graph, handler); +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(delete_dropout_op_pass, + paddle::framework::ir::DeleteDropoutOpPass); diff --git a/paddle/fluid/framework/ir/delete_dropout_op_pass.h b/paddle/fluid/framework/ir/delete_dropout_op_pass.h new file mode 100644 index 00000000000..c49abf3c871 --- /dev/null +++ b/paddle/fluid/framework/ir/delete_dropout_op_pass.h @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include + +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" + +namespace paddle { +namespace framework { +namespace ir { + +class Graph; + +class DeleteDropoutOpPass : public FusePassBase { + public: + virtual ~DeleteDropoutOpPass() {} + + protected: + void ApplyImpl(ir::Graph* graph) const override; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index d74e8e5f65c..064da3d9416 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -2439,6 +2439,29 @@ PDNode *patterns::TransposeFlattenConcat::operator()( return concat_out; } +void patterns::DeleteDropoutOpPattern::operator()() { + auto any_op_out = pattern->NewNode(any_op_out_repr()) + ->assert_is_op_input("dropout", "X") + ->AsInput(); + + auto dropout_op = + pattern->NewNode(dropout_op_repr())->assert_is_op("dropout"); + + auto dropout_op_out = pattern->NewNode(dropout_op_out_repr()) + ->assert_is_op_output("dropout", "Out") + ->AsIntermediate(); + + auto dropout_op_outmask = pattern->NewNode(dropout_op_outmask_repr()) + ->assert_is_op_output("dropout", "Mask") + ->AsOutput(); + auto any_op2 = pattern->NewNode(any_op2_repr())->assert_is_op()->AsOutput(); + + dropout_op->LinksFrom({any_op_out}); + dropout_op_out->LinksFrom({dropout_op}); + dropout_op_outmask->LinksFrom({dropout_op}); + any_op2->LinksFrom({dropout_op_out}); +} + void patterns::DeleteQuantOpFuse::operator()(PDNode *input_act_node, const std::string &quant_type) { auto *input_scale_node = pattern->NewNode(GetNodeName("input_scale_node")) diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index cfac01ec9de..13f65859954 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -1464,6 +1464,19 @@ struct ShuffleChannelPattern : public PatternBase { PATTERN_DECL_NODE(reshape2_out); }; +struct DeleteDropoutOpPattern : public PatternBase { + DeleteDropoutOpPattern(PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, "delete_dropout_op_pattern") {} + + void operator()(); + + PATTERN_DECL_NODE(any_op_out); + PATTERN_DECL_NODE(dropout_op); + PATTERN_DECL_NODE(dropout_op_out); + PATTERN_DECL_NODE(dropout_op_outmask); + PATTERN_DECL_NODE(any_op2); +}; + struct DeleteQuantDequantOpPattern : public PatternBase { DeleteQuantDequantOpPattern(PDPattern* pattern, const std::string& name_scope) : PatternBase(pattern, name_scope, "delete_quantdequant_op_pattern") {} diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 2b7333edae0..b2e3de63691 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -112,6 +112,7 @@ const std::vector kTRTSubgraphPasses({ const std::vector kDlnneSubgraphPasses({ "is_test_pass", // + "delete_dropout_op_pass" // "simplify_with_basic_ops_pass", // "conv_bn_fuse_pass", // "depthwise_conv_bn_fuse_pass", // -- GitLab From 9aad752775c29cd9deaab2334bca17f790f0ef26 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Wed, 28 Apr 2021 21:24:06 +0800 Subject: [PATCH 044/720] Add fake interface for register_hook in static mode (#32642) * add fake interface for hook in static mode * add unittests * fix failed unittests --- python/paddle/fluid/framework.py | 14 +++--- .../fluid/tests/unittests/test_detach.py | 12 +----- .../unittests/test_tensor_register_hook.py | 43 +++++++++++++++++++ 3 files changed, 53 insertions(+), 16 deletions(-) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index a280667d03d..0e9d756848a 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -246,11 +246,11 @@ def _static_only_(func): def _fake_interface_only_(func): def __impl__(*args, **kwargs): raise AssertionError( - "'%s' should be called by imperative Varible in imperative mode, please run it in dygraph " - "mode. You can turn off paddle.enable_static() if you are in static mode, or turn off " - "ProgramTranslator if you are using @paddle.jit.to_static. If you have to run ProgramTranslator, " - "please use other API to replace '%s'" % (func.__name__, - func.__name__)) + "'%s' only can be called by `paddle.Tensor` in dynamic graph mode. Suggestions:\n" + " 1. If you are in static graph mode, you can switch to dynamic graph mode by turning off `paddle.enable_static()` or calling `paddle.disable_static()`.\n" + " 2. If you are using `@paddle.jit.to_static`, you can turn off ProgramTranslator by calling `paddle.jit.ProgramTranslator().enable(False)`. " + "If you have to translate dynamic graph to static graph, please use other API to replace '%s'." + % (func.__name__, func.__name__)) return __impl__ @@ -1306,6 +1306,10 @@ class Variable(object): """ pass + @fake_interface_only + def register_hook(self, hook): + pass + def __str__(self): return self._to_readable_code() diff --git a/python/paddle/fluid/tests/unittests/test_detach.py b/python/paddle/fluid/tests/unittests/test_detach.py index 38cdd9b727f..5a31418205c 100644 --- a/python/paddle/fluid/tests/unittests/test_detach.py +++ b/python/paddle/fluid/tests/unittests/test_detach.py @@ -152,18 +152,8 @@ class Test_Detach(unittest.TestCase): def test_detach_exception(self): x = fluid.layers.data(name="a", shape=[3, 4], dtype='float32') y = fluid.layers.fc(input=x, size=10, bias_attr=True) - try: + with self.assertRaises(AssertionError): y_detach = y.detach() - except Exception as e: - # Here is to check - assert type(e) == AssertionError - assert str(e) == ( - "'detach' should be called by imperative Varible " - "in imperative mode, please run it in dygraph mode. You can " - "turn off paddle.enable_static() if you are in static mode, " - "or turn off ProgramTranslator if you are using " - "@paddle.jit.to_static. If you have to run ProgramTranslator, " - "please use other API to replace 'detach'") class TestInplace(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py index a03e4ae4bd9..52256766fed 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py @@ -39,6 +39,21 @@ class SimpleNet(nn.Layer): return ret1, out +class SimpleNetForStatic(nn.Layer): + def __init__(self, in_size, out_size): + super(SimpleNetForStatic, self).__init__() + self.linear1 = nn.Linear(in_size, in_size) + self.linear2 = nn.Linear(in_size, out_size) + + def forward(self, x): + ret1 = self.linear1(x) + ret1.register_hook(lambda grad: grad * 2) + + ret2 = self.linear2(ret1) + out = paddle.mean(ret2, axis=-1) + return out + + class TestTensorRegisterHook(unittest.TestCase): def setUp(self): self.seed = 2021 @@ -451,6 +466,34 @@ class TestTensorRegisterHook(unittest.TestCase): with self.assertRaises(RuntimeError): x.register_hook(lambda grad: grad * 2) + def test_register_hook_in_static_mode(self): + paddle.enable_static() + + startup_program = paddle.static.Program() + main_program = paddle.static.Program() + with paddle.static.scope_guard(paddle.static.Scope()): + with paddle.static.program_guard(main_program, startup_program): + x = paddle.static.data( + name='x', shape=[None, self.in_size], dtype='float32') + + net = SimpleNetForStatic(self.in_size, self.out_size) + with self.assertRaises(AssertionError): + out = net(x) + + paddle.disable_static() + + def test_register_hook_in_dy2static_mode(self): + net = SimpleNetForStatic(self.in_size, self.out_size) + jit_net = paddle.jit.to_static( + net, input_spec=[paddle.static.InputSpec([None, self.in_size])]) + + data = np.random.uniform( + size=[self.batch_size, self.in_size]).astype('float32') + data_t = paddle.to_tensor(data) + + with self.assertRaises(AssertionError): + out = jit_net(data_t) + HOOK_INIT_VALUE = 10 HOOK_IS_CALLED = False -- GitLab From bc379ca3d5895eadbc1748bc5b71606011563ee1 Mon Sep 17 00:00:00 2001 From: arlesniak Date: Wed, 28 Apr 2021 15:33:00 +0200 Subject: [PATCH 045/720] Added pure_bf16 mode (#32281) --- paddle/fluid/operators/assign_op.cc | 1 + .../fluid/contrib/mixed_precision/__init__.py | 3 - .../contrib/mixed_precision/bf16/__init__.py | 4 +- .../contrib/mixed_precision/bf16/amp_lists.py | 14 +- .../contrib/mixed_precision/bf16/amp_utils.py | 219 +++++++++++- .../contrib/mixed_precision/bf16/decorator.py | 318 ++++++++++++++++++ .../fluid/contrib/tests/test_bf16_utils.py | 26 +- .../contrib/tests/test_model_cast_to_bf16.py | 36 +- python/paddle/fluid/layers/nn.py | 3 +- python/paddle/fluid/layers/tensor.py | 7 +- .../fluid/tests/book/test_fit_a_line.py | 78 +++-- .../fluid/tests/book/test_word2vec_book.py | 39 ++- .../tests/unittests/test_optimizer_grad.py | 32 +- python/paddle/static/amp/__init__.py | 5 +- 14 files changed, 699 insertions(+), 86 deletions(-) create mode 100644 python/paddle/fluid/contrib/mixed_precision/bf16/decorator.py diff --git a/paddle/fluid/operators/assign_op.cc b/paddle/fluid/operators/assign_op.cc index add533bafcb..433cabcfee0 100644 --- a/paddle/fluid/operators/assign_op.cc +++ b/paddle/fluid/operators/assign_op.cc @@ -162,6 +162,7 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(assign, float, ops::AssignKernel, double, ops::AssignKernel, int, ops::AssignKernel, int64_t, ops::AssignKernel, bool, ops::AssignKernel, plat::float16, + ops::AssignKernel, plat::bfloat16, ops::AssignKernel); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) diff --git a/python/paddle/fluid/contrib/mixed_precision/__init__.py b/python/paddle/fluid/contrib/mixed_precision/__init__.py index 571b755b50d..a580ae5574c 100644 --- a/python/paddle/fluid/contrib/mixed_precision/__init__.py +++ b/python/paddle/fluid/contrib/mixed_precision/__init__.py @@ -20,10 +20,7 @@ from . import fp16_lists from .fp16_lists import * from . import fp16_utils from .fp16_utils import * -from . import bf16 -from .bf16 import * __all__ = decorator.__all__ __all__ += fp16_lists.__all__ __all__ += fp16_utils.__all__ -__all__ += bf16.__all__ diff --git a/python/paddle/fluid/contrib/mixed_precision/bf16/__init__.py b/python/paddle/fluid/contrib/mixed_precision/bf16/__init__.py index 8c05bc4899c..d3632729a3b 100644 --- a/python/paddle/fluid/contrib/mixed_precision/bf16/__init__.py +++ b/python/paddle/fluid/contrib/mixed_precision/bf16/__init__.py @@ -18,7 +18,9 @@ from . import amp_lists from .amp_lists import * from . import amp_utils from .amp_utils import * +from . import decorator +from .decorator import * -__all__ = [] +__all__ = decorator.__all__ __all__ += amp_lists.__all__ __all__ += amp_utils.__all__ diff --git a/python/paddle/fluid/contrib/mixed_precision/bf16/amp_lists.py b/python/paddle/fluid/contrib/mixed_precision/bf16/amp_lists.py index 81dc32d114b..1cf54aa0838 100644 --- a/python/paddle/fluid/contrib/mixed_precision/bf16/amp_lists.py +++ b/python/paddle/fluid/contrib/mixed_precision/bf16/amp_lists.py @@ -13,8 +13,10 @@ # limitations under the License. import copy +from paddle.fluid import core + from ..fp16_lists import white_list as white_list_fp16, black_list as black_list_fp16,\ - gray_list as gray_list_fp16, unsupported_fp16_list + gray_list as gray_list_fp16 __all__ = ["AutoMixedPrecisionListsBF16"] @@ -82,11 +84,17 @@ bf16_list = {'elementwise_add', } # depends on the prev_op type gray_list = { + 'cast', + 'fill_constant', + 'reduce_mean', 'reshape2', - 'lookup_table', + 'scale', } -unsupported_list = unsupported_fp16_list.copy().copy() +_, _, _sys_unsupported_bf16_list = core.op_supported_infos( + 'CPU', core.VarDesc.VarType.BF16) +unsupported_list = _sys_unsupported_bf16_list + fp32_list = black_list_fp16.copy().copy() fp32_list |= white_list_fp16 fp32_list |= gray_list_fp16 diff --git a/python/paddle/fluid/contrib/mixed_precision/bf16/amp_utils.py b/python/paddle/fluid/contrib/mixed_precision/bf16/amp_utils.py index c2c01f88c74..038479098a6 100644 --- a/python/paddle/fluid/contrib/mixed_precision/bf16/amp_utils.py +++ b/python/paddle/fluid/contrib/mixed_precision/bf16/amp_utils.py @@ -14,18 +14,25 @@ # limitations under the License. from __future__ import print_function -import struct from .... import core from .... import framework +from .... import global_scope from ....log_helper import get_logger from ....wrapped_decorator import signature_safe_contextmanager from .amp_lists import AutoMixedPrecisionListsBF16 -from ..fp16_utils import find_true_prev_op, find_true_post_op, _rename_arg, find_op_index +from ..fp16_utils import find_true_prev_op, find_true_post_op, _rename_arg, \ + find_op_index, _rename_op_input + +import collections +import struct import logging import numpy as np -__all__ = ["bf16_guard", "rewrite_program_bf16", "convert_float_to_uint16"] +__all__ = [ + "bf16_guard", "rewrite_program_bf16", "cast_model_to_bf16", + "cast_parameters_to_bf16", "convert_float_to_uint16" +] _logger = get_logger( __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') @@ -126,7 +133,41 @@ def _insert_cast_op(block, op, idx, src_dtype, dest_dtype): return num_cast_ops +def _insert_cast_post_op(block, op, idx, src_dtype, dest_dtype, target_name, + op_var_rename_map): + num_cast_ops = 0 + target_var = block.var(target_name) + if target_var.type not in _valid_types or target_var.dtype == dest_dtype: + return num_cast_ops + + assert target_var.dtype == src_dtype, \ + "The real dtype({}) is not equal to the src dtype({})".format(_dtype_to_str(target_var.dtype), _dtype_to_str(src_dtype)) + + cast_name = target_var.name + '.cast_' + _dtype_to_str(dest_dtype) + cast_var = block.vars.get(cast_name) + if cast_var is None or cast_var.dtype != dest_dtype: + cast_var = block.create_var( + name=cast_name, + dtype=dest_dtype, + persistable=False, + stop_gradient=target_var.stop_gradient) + block._insert_op( + idx, + type="cast", + inputs={"X": target_var}, + outputs={"Out": cast_var}, + attrs={"in_dtype": target_var.dtype, + "out_dtype": cast_var.dtype}) + num_cast_ops += 1 + op_var_rename_map[block.idx][target_var.name] = cast_var.name + + return num_cast_ops + + def _is_in_fp32_varnames(op, amp_lists): + if not amp_lists.fp32_varnames: + return False + for in_name in op.input_arg_names: if in_name in amp_lists.fp32_varnames: return True @@ -191,7 +232,174 @@ def bf16_guard(): yield -def rewrite_program_bf16(main_prog, amp_lists=None, use_bf16_guard=False): +def cast_model_to_bf16(program, amp_lists=None, use_bf16_guard=True): + """ + Traverse all ops in the whole model and set their inputs and outputs + to the bf16 data type. This function will do some special processing for + the batch normalization, which will keep the batchnorm's computations in FP32. + Args: + program (Program): The used program. + amp_lists (AutoMixedPrecisionListsBF16): An AutoMixedPrecisionListsBF16 object. + use_bf16_guard(bool): Determine whether to use `bf16_guard` when + constructing the program. Default True. + """ + + if amp_lists is None: + amp_lists = AutoMixedPrecisionListsBF16() + global_block = program.global_block() + keep_fp32_ops = set() + to_bf16_var_names = set() + to_bf16_pre_cast_ops = set() + origin_ops = [] + for block in program.blocks: + origin_ops.extend(block.ops) + + for block in program.blocks: + ops = block.ops + for op in ops: + if op.type == 'create_py_reader' or op.type == 'read': + continue + if _need_keep_fp32(op, amp_lists.unsupported_list, use_bf16_guard): + keep_fp32_ops.add(op) + continue # processed below + for in_name in op.input_names: + if op.type in { + 'batch_norm', 'fused_bn_add_activation', 'layer_norm' + } and in_name not in {'X', 'Z'}: + continue + for in_var_name in op.input(in_name): + in_var = None + try: + in_var = block.var(in_var_name) + except ValueError as e: + _logger.debug( + "-- {}, try to get it in the global block --". + format(e)) + in_var = global_block.var(in_var_name) + if in_var is not None: + _logger.debug( + "-- var {} is got in the global block --". + format(in_var_name)) + + if in_var is None or in_var.type not in _valid_types: + continue + + if in_var.dtype == core.VarDesc.VarType.FP32: + in_var.desc.set_dtype(core.VarDesc.VarType.BF16) + to_bf16_var_names.add(in_var_name) + + _logger.debug( + "-- op type: {}, in var name: {}, in var dtype: {} --". + format(op.type, in_var_name, in_var.dtype)) + + for out_name in op.output_names: + if op.type in { + 'batch_norm', 'fused_bn_add_activation', 'layer_norm' + } and out_name != 'Y': + continue + for out_var_name in op.output(out_name): + out_var = None + try: + out_var = block.var(out_var_name) + except ValueError as e: + _logger.debug( + "-- {}, try to get it in the global block --". + format(e)) + out_var = global_block.var(out_var_name) + if out_var is not None: + _logger.debug( + "-- var {} is got in the global block --". + format(out_var_name)) + + if out_var is None or out_var.type not in _valid_types: + continue + + if out_var.dtype == core.VarDesc.VarType.FP32: + out_var.desc.set_dtype(core.VarDesc.VarType.BF16) + + _logger.debug( + "-- op type: {}, out var name: {}, out var dtype: {} --". + format(op.type, out_var_name, out_var.dtype)) + for attr_name in ['in_dtype', 'out_dtype', 'dtype']: + if op.has_attr(attr_name) and op.attr( + attr_name) == core.VarDesc.VarType.FP32: + op._set_attr(attr_name, core.VarDesc.VarType.BF16) + if op.has_attr('use_mkldnn'): + op._set_attr('use_mkldnn', True) + if op.has_attr('mkldnn_data_type'): + op._set_attr('mkldnn_data_type', 'bfloat16') + + # process ops in keep_fp32_ops + op_var_rename_map = [ + collections.OrderedDict() for _ in range(len(program.blocks)) + ] + for block in program.blocks: + ops = block.ops + idx = 0 + while idx < len(ops): + op = ops[idx] + num_cast_ops = 0 + if op not in keep_fp32_ops: + if op in to_bf16_pre_cast_ops: + in_var_cast_num = _insert_cast_op(block, op, idx, + core.VarDesc.VarType.FP32, + core.VarDesc.VarType.BF16) + num_cast_ops += in_var_cast_num + else: + pre_cast_num = _insert_cast_op(block, op, idx, + core.VarDesc.VarType.BF16, + core.VarDesc.VarType.FP32) + num_cast_ops += pre_cast_num + for out_var_name in op.output_arg_names: + out_var = block.vars.get(out_var_name) + if out_var is None or out_var.type not in _valid_types: + continue + if out_var.dtype == core.VarDesc.VarType.BF16: + out_var.desc.set_dtype(core.VarDesc.VarType.FP32) + post_ops = find_true_post_op(ops, op, out_var_name) + for post_op in post_ops: + if post_op in keep_fp32_ops: + continue + post_cast_num = _insert_cast_post_op( + block, op, idx + pre_cast_num + 1, + core.VarDesc.VarType.FP32, + core.VarDesc.VarType.BF16, out_var_name, + op_var_rename_map) + num_cast_ops += post_cast_num + idx += num_cast_ops + 1 + + _rename_op_input(program, op_var_rename_map, origin_ops, keep_fp32_ops) + return to_bf16_var_names + + +def cast_parameters_to_bf16(place, program, scope=None, to_bf16_var_names=None): + """ + Traverse all parameters in the whole model and set them to the BF16 data type. + Whereas, this function will keep parameters of batchnorms in FP32. + Args: + place(fluid.CPUPlace|fluid.CUDAPlace): `place` is used to restore the BF16 weight tensors. + program (Program): The used program. + scope(fluid.Scope, optional): `scope` is used to get the FP32 weight tensor values. + Default is None. + to_bf16_var_names(set|list, optional): The data types of vars in `to_bf16_var_names` + will be set to BF16. Usually, it is the returned + value of `cast_model_to_bf16` API. + """ + all_parameters = [] + for block in program.blocks: + all_parameters.extend(block.all_parameters()) + + bf16_var_names = to_bf16_var_names if to_bf16_var_names else set() + var_scope = scope if scope else global_scope() + for param in all_parameters: + if param.name in bf16_var_names: + _logger.debug("---- cast {} to bf16 dtype ----".format(param.name)) + param_t = var_scope.find_var(param.name).get_tensor() + data = np.array(param_t) + param_t.set(convert_float_to_uint16(data), place) + + +def rewrite_program_bf16(main_prog, amp_lists=None): """ Traverse all ops in current block and insert cast op according to which set current op belongs to. @@ -231,8 +439,7 @@ def rewrite_program_bf16(main_prog, amp_lists=None, use_bf16_guard=False): fp32_op_set.add(op) continue - if op.type in amp_lists.fp32_list or _need_keep_fp32( - op, amp_lists.unsupported_list, use_bf16_guard): + if op.type in amp_lists.fp32_list: fp32_op_set.add(op) elif op.type in amp_lists.bf16_list: bf16_op_set.add(op) diff --git a/python/paddle/fluid/contrib/mixed_precision/bf16/decorator.py b/python/paddle/fluid/contrib/mixed_precision/bf16/decorator.py new file mode 100644 index 00000000000..86b5a5df75d --- /dev/null +++ b/python/paddle/fluid/contrib/mixed_precision/bf16/decorator.py @@ -0,0 +1,318 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.fluid import (core, default_main_program, layers, program_guard, + unique_name) +from .amp_utils import (rewrite_program_bf16, cast_model_to_bf16, + cast_parameters_to_bf16) +from .amp_lists import AutoMixedPrecisionListsBF16 +import types +import warnings + +__all__ = ["decorate_bf16"] + + +class OptimizerWithMixedPrecision(object): + """ + Optimizer with mixed-precision (MP) training. This is a wrapper of a common + optimizer, plus the support of mixed-precision pre-training. The object + of this class almost has the same behavior as the common optimizer, with the + methods `minimize()`, `backward()`, `apply_gradients()` implemented. + Additionally, it enables the MP training automatically, i.e, the creation + and maintenance of master parameters, scaling of loss, etc. + + Args: + optimizer (Optimizer): A common Optimizer object. + amp_lists (CustomOpLists): An CustomOpLists object. + use_pure_bf16(bool): Whether to use the pure bf16 training. + use_bf16_guard(bool): Whether to use `bf16_guard` when constructing the program. + + """ + + def __init__(self, optimizer, amp_lists, use_pure_bf16, use_bf16_guard): + self._optimizer = optimizer + self._amp_lists = amp_lists + self._param_grads = None + self._train_program = None + + self._learning_rate = optimizer._learning_rate + self._learning_rate_map = optimizer._learning_rate_map + self._use_pure_bf16 = use_pure_bf16 + self._use_bf16_guard = use_bf16_guard + self._to_bf16_var_names = None + + def _init_amp_var(self): + # Ensure the data type of learning rate vars is float32 (same as the + # master parameter dtype) + if isinstance(self._optimizer._learning_rate, float): + self._optimizer._learning_rate_map[default_main_program()] = \ + layers.create_global_var( + name=unique_name.generate("learning_rate"), + shape=[1], + value=float(self._optimizer._learning_rate), + dtype='float32', + persistable=True) + + def backward(self, + loss, + startup_program=None, + parameter_list=None, + no_grad_set=None, + callbacks=None): + """ + Backward propagation or auto differentiation for gradients' computation. + + Args: + loss (Variable): The loss Variable to minimize. + startup_program (Program|None): The startup Program for initializing + parameters in `parameter_list`. + parameter_list (list|None): A list of Variables to update. + no_grad_set (set|None): A set of Variables should be ignored. + callbacks (list|None): A list of callable objects to run when appending + backward operator for one parameter. + + Returns: + A list of (param, grad), which is a tuple of a parameter and its + gradient respectively, and the scaled loss. + """ + train_program = loss.block.program + self._train_program = train_program + + with program_guard(self._train_program, startup_program): + self._init_amp_var() + + if self._use_pure_bf16: + self._to_bf16_var_names = cast_model_to_bf16( + self._train_program, self._amp_lists, self._use_bf16_guard) + else: + rewrite_program_bf16(self._train_program, self._amp_lists) + + if loss.dtype != core.VarDesc.VarType.FP32: + loss = loss.astype('float32') + + params_grads = self._optimizer.backward( + loss, startup_program, parameter_list, no_grad_set, callbacks) + return params_grads + + def amp_init(self, + place, + scope=None, + test_program=None, + use_bf16_test=False): + """ + Init the amp training, such as cast fp32 parameters to bf16 type. + + Args: + place(CPUPlace): place is used to initialize + bf16 parameters with fp32 values. + scope(Scope): The scope is used to find fp32 parameters. + test_program(Program): The program is used for testing. + use_bf16_test(bool): Whether to use bf16 testing. + + Examples: + .. code-block:: python + + import numpy as np + import paddle + import paddle.nn.functional as F + paddle.enable_static() + + def run_example_code(): + place = paddle.CPUPlace(0) + exe = paddle.static.Executor(place) + data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32') + conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3) + # 1) Use bf16_guard to control the range of bf16 kernels used. + with paddle.static.amp.bf16_guard(): + bn = paddle.static.nn.batch_norm(input=conv2d, act="relu") + pool = F.max_pool2d(bn, kernel_size=2, stride=2) + hidden = paddle.static.nn.fc(pool, size=10) + loss = paddle.mean(hidden) + # 2) Create the optimizer and set `multi_precision` to True. + # Setting `multi_precision` to True can avoid the poor accuracy + # or the slow convergence in a way. + optimizer = paddle.optimizer.Momentum(learning_rate=0.01, multi_precision=True) + # 3) These ops in `custom_fp32_list` will keep in the float32 computation type. + amp_list = paddle.static.amp.CustomOpLists( + custom_fp32_list=['pool2d']) + # 4) The entry of Paddle AMP. + # Enable pure bf16 training by setting `use_pure_bf16` to True. + optimizer = paddle.static.amp.bf16.decorate_bf16( + optimizer, + amp_list, + use_pure_bf16=True) + # If you don't use the default_startup_program(), you sholud pass + # your defined `startup_program` into `minimize`. + optimizer.minimize(loss) + exe.run(paddle.static.default_startup_program()) + # 5) Use `amp_init` after FP32 parameters initialization(such as `exe.run(startup_program)`). + # If you want to perform the testing process, you should pass `test_program` into `amp_init`. + optimizer.amp_init(place, scope=paddle.static.global_scope()) + + """ + assert self._train_program is not None, \ + "Please call the minimize method first." + if self._use_pure_bf16: + cast_parameters_to_bf16(place, self._train_program, scope, + self._to_bf16_var_names) + if test_program is not None: + if self._use_pure_bf16: + cast_model_to_bf16(test_program, self._amp_lists, + self._use_bf16_guard) + elif use_bf16_test: + rewrite_program_bf16(test_program, self._amp_lists) + + def apply_gradients(self, params_grads): + """ + Apply gradients. + + Args: + params_grads (list): A list of params. + + Returns: + A list of optimize operators. + """ + + return self._optimizer.apply_gradients(params_grads) + + def apply_optimize(self, loss, startup_program, params_grads): + program = loss.block.program + with program_guard(program, startup_program): + optimize_ops = self.apply_gradients(params_grads) + return optimize_ops + + def minimize(self, + loss, + startup_program=None, + parameter_list=None, + no_grad_set=None): + """ + Perform optimization by minimizing the given loss. + + Args: + loss (Variable): The loss Variable. + startup_program (Program): startup_program for initializing parameters + in `parameter_list`. + parameter_list (list): list of Variables to update. + no_grad_set (set|None): set of Variables should be ignored. + + Returns: + The scaled loss by scaling factor, the list of optimize ops, and a + list of scaled parameters and gradients. + """ + opt_dict = self._optimizer.__class__.__dict__ + if 'minimize' in opt_dict and isinstance(opt_dict['minimize'], + types.FunctionType): + warnings.warn( + "The decorated optimizer has its own `minimize` method, but it will not be executed." + ) + + params_grads = self.backward( + loss, + startup_program=startup_program, + parameter_list=parameter_list, + no_grad_set=no_grad_set) + + optimize_ops = self.apply_optimize(loss, startup_program, params_grads) + + return optimize_ops, params_grads + + +def decorate_bf16(optimizer, + amp_lists=None, + use_pure_bf16=False, + use_bf16_guard=None): + """ + Decorate the given optimizer to adapt to the mixed-precision training. + + Args: + optimizer(Optimizer): A common Optimizer. + amp_lists (CustomOpLists): An CustomOpLists object. + use_pure_bf16(bool): Whether to use the pure bf16 training. Default False. + use_bf16_guard(bool): Whether to use `bf16_guard` when constructing the program. + Default None, which means that its value equals to `use_pure_bf16`. + + Returns: + An optimizer acting like a normal one but with mixed-precision training + enabled. + + Examples 1: + .. code-block:: python + + # fp32&bf16 list based strategy example + import paddle + import paddle.static as static + + paddle.enable_static() + + data = static.data(name='X', shape=[None, 1], dtype='float32') + hidden = static.nn.fc(x=data, size=10) + loss = paddle.mean(hidden) + optimizer = paddle.optimizer.Adam(learning_rate=0.001) + + mp_optimizer = static.amp.decorate_bf16(optimizer=optimizer) + + ops, param_grads = mp_optimizer.minimize(loss) + + Examples 2: + .. code-block:: python + + # pure bf16 training example + import numpy as np + import paddle + import paddle.nn.functional as F + + def run_example_code(): + place = paddle.CPUPlace(0) + exe = paddle.static.Executor(place) + data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32') + conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3) + # 1) Use bf16_guard to control the range of bf16 kernels used. + with paddle.static.amp.bf16_guard(): + bn = paddle.static.nn.batch_norm(input=conv2d, act="relu") + pool = F.max_pool2d(bn, kernel_size=2, stride=2) + hidden = paddle.static.nn.fc(pool, size=10) + loss = paddle.mean(hidden) + # 2) Create the optimizer and set `multi_precision` to True. + # Setting `multi_precision` to True can avoid the poor accuracy + # or the slow convergence in a way. + optimizer = paddle.optimizer.Momentum(learning_rate=0.01, multi_precision=True) + # 3) These ops in `custom_fp32_list` will keep in the float32 computation type. + amp_list = paddle.static.amp.CustomOpLists( + custom_fp32_list=['pool2d']) + # 4) The entry of Paddle AMP. + # Enable pure bf16 training by setting `use_pure_bf16` to True. + optimizer = paddle.static.amp.decorate_bf16( + optimizer, + amp_list, + use_pure_bf16=True) + # If you don't use the default_startup_program(), you sholud pass + # your defined `startup_program` into `minimize`. + optimizer.minimize(loss) + exe.run(paddle.static.default_startup_program()) + # 5) Use `amp_init` after FP32 parameters initialization(such as `exe.run(startup_program)`). + # If you want to perform the testing process, you should pass `test_program` into `amp_init`. + optimizer.amp_init(place, scope=paddle.static.global_scope()) + + """ + if amp_lists is None: + amp_lists = AutoMixedPrecisionListsBF16() + + if use_bf16_guard is None: + use_bf16_guard = use_pure_bf16 + + mp_optimizer = OptimizerWithMixedPrecision(optimizer, amp_lists, + use_pure_bf16, use_bf16_guard) + + return mp_optimizer diff --git a/python/paddle/fluid/contrib/tests/test_bf16_utils.py b/python/paddle/fluid/contrib/tests/test_bf16_utils.py index faf2307f814..2969b7ea11d 100644 --- a/python/paddle/fluid/contrib/tests/test_bf16_utils.py +++ b/python/paddle/fluid/contrib/tests/test_bf16_utils.py @@ -14,7 +14,7 @@ import copy import unittest import paddle.fluid as fluid -import paddle.fluid.contrib.mixed_precision as amp +import paddle.static.amp as amp from paddle.fluid import core import paddle @@ -34,34 +34,34 @@ class AMPTest(unittest.TestCase): self.assertEqual(self.amp_lists_.gray_list, self.gray_list) def test_amp_lists(self): - self.amp_lists_ = amp.AutoMixedPrecisionListsBF16() + self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16() def test_amp_lists_1(self): # 1. w={'exp}, b=None self.bf16_list.add('exp') self.fp32_list.remove('exp') - self.amp_lists_ = amp.AutoMixedPrecisionListsBF16({'exp'}) + self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16({'exp'}) def test_amp_lists_2(self): # 2. w={'tanh'}, b=None self.fp32_list.remove('tanh') self.bf16_list.add('tanh') - self.amp_lists_ = amp.AutoMixedPrecisionListsBF16({'tanh'}) + self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16({'tanh'}) def test_amp_lists_3(self): # 3. w={'lstm'}, b=None self.bf16_list.add('lstm') - self.amp_lists_ = amp.AutoMixedPrecisionListsBF16({'lstm'}) + self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16({'lstm'}) def test_amp_lists_4(self): # 4. w=None, b={'elementwise_add'} self.bf16_list.remove('elementwise_add') self.fp32_list.add('elementwise_add') - self.amp_lists_ = amp.AutoMixedPrecisionListsBF16( + self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16( custom_fp32_list={'elementwise_add'}) def test_amp_lists_5(self): @@ -69,28 +69,28 @@ class AMPTest(unittest.TestCase): self.fp32_list.add('elementwise_add') self.bf16_list.remove('elementwise_add') - self.amp_lists_ = amp.AutoMixedPrecisionListsBF16( + self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16( custom_fp32_list={'elementwise_add'}) def test_amp_lists_6(self): # 6. w=None, b={'lstm'} self.fp32_list.add('lstm') - self.amp_lists_ = amp.AutoMixedPrecisionListsBF16( + self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16( custom_fp32_list={'lstm'}) def test_amp_lists_7(self): self.fp32_list.add('reshape2') self.gray_list.remove('reshape2') - self.amp_lists_ = amp.AutoMixedPrecisionListsBF16( + self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16( custom_fp32_list={'reshape2'}) def test_amp_list_8(self): self.bf16_list.add('reshape2') self.gray_list.remove('reshape2') - self.amp_lists_ = amp.AutoMixedPrecisionListsBF16( + self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16( custom_bf16_list={'reshape2'}) @@ -98,7 +98,7 @@ class AMPTest2(unittest.TestCase): def test_amp_lists_(self): # 7. w={'lstm'} b={'lstm'} # raise ValueError - self.assertRaises(ValueError, amp.AutoMixedPrecisionListsBF16, + self.assertRaises(ValueError, amp.bf16.AutoMixedPrecisionListsBF16, {'lstm'}, {'lstm'}) def test_find_op_index(self): @@ -117,10 +117,10 @@ class AMPTest2(unittest.TestCase): type="abs", inputs={"X": [var1]}, outputs={"Out": [var2]}) op2 = block.append_op( type="abs", inputs={"X": [var2]}, outputs={"Out": [var3]}) - amp_lists_1 = amp.AutoMixedPrecisionListsBF16( + amp_lists_1 = amp.bf16.AutoMixedPrecisionListsBF16( custom_fp32_varnames={'X'}) assert amp.bf16.amp_utils._is_in_fp32_varnames(op1, amp_lists_1) - amp_lists_2 = amp.AutoMixedPrecisionListsBF16( + amp_lists_2 = amp.bf16.AutoMixedPrecisionListsBF16( custom_fp32_varnames={'Y'}) assert amp.bf16.amp_utils._is_in_fp32_varnames(op2, amp_lists_2) assert amp.bf16.amp_utils._is_in_fp32_varnames(op1, amp_lists_2) diff --git a/python/paddle/fluid/contrib/tests/test_model_cast_to_bf16.py b/python/paddle/fluid/contrib/tests/test_model_cast_to_bf16.py index 40ddcf2e66b..af2c42d6b85 100644 --- a/python/paddle/fluid/contrib/tests/test_model_cast_to_bf16.py +++ b/python/paddle/fluid/contrib/tests/test_model_cast_to_bf16.py @@ -65,13 +65,13 @@ class TestModelCastBF16(unittest.TestCase): fetch_list=fetch_list, return_numpy=(not with_lod)) - def test_graph_rewrite(self): + def _graph_common(self, _amp_fun): size = 3 n = np.ones([size, size], dtype='float32') * 3.2 nn = np.ones([size, size], dtype='float32') * -2.7 - n_bf16 = amp.convert_float_to_uint16(n) - nn_bf16 = amp.convert_float_to_uint16(nn) + n_bf16 = amp.bf16.convert_float_to_uint16(n) + nn_bf16 = amp.bf16.convert_float_to_uint16(nn) with self.static_graph(): t_bf16 = layers.data( @@ -85,12 +85,12 @@ class TestModelCastBF16(unittest.TestCase): ret = layers.elementwise_mul(ret, t) ret = layers.reshape(ret, [0, 0]) - with amp.bf16_guard(): + with amp.bf16.bf16_guard(): ret_bf16 = layers.elementwise_add(t_bf16, tt_bf16) ret_bf16 = layers.elementwise_mul(ret_bf16, t_bf16) ret_bf16 = layers.reshape(ret_bf16, [0, 0]) - with amp.bf16_guard(): + with amp.bf16.bf16_guard(): ret_fp32bf16 = layers.elementwise_add(t, tt) ret_fp32bf16 = layers.elementwise_mul(ret_fp32bf16, t) ret_fp32bf16 = layers.reshape(ret_fp32bf16, [0, 0]) @@ -103,7 +103,7 @@ class TestModelCastBF16(unittest.TestCase): 'tt_bf16': nn_bf16, }, fetch_list=[ret_bf16, ret, ret_fp32bf16], - amp_fun=lambda prog: amp.rewrite_program_bf16(prog, use_bf16_guard=True)) + amp_fun=lambda prog: amp.bf16.rewrite_program_bf16(prog)) self.assertTrue(np.allclose(static_ret_bf16, static_ret, 1e-2)) self.assertTrue(np.allclose(static_ret_bf16, ret_fp32bf16, 1e-2)) @@ -112,7 +112,7 @@ class TestModelCastBF16(unittest.TestCase): t = layers.data(name='t', shape=[size, size], dtype='float32') tt = layers.data(name='tt', shape=[size, size], dtype='float32') - with amp.bf16_guard(): + with amp.bf16.bf16_guard(): ret = layers.elementwise_add(t, tt) ret = layers.reshape(ret, [0, 0], act='elu') ret = layers.elementwise_mul(ret, t) @@ -122,17 +122,27 @@ class TestModelCastBF16(unittest.TestCase): self.get_static_graph_result( feed={'t': n, 'tt': nn}, fetch_list=[ret], - amp_fun=lambda prog: amp.rewrite_program_bf16( - prog, - amp.AutoMixedPrecisionListsBF16( - custom_fp32_varnames={'elementwise_add_0.tmp_0'}), - use_bf16_guard=True - ) + amp_fun=_amp_fun ) self.assertTrue( static_ret_bf16, np.ones( [size, size], dtype='float32') * -1.1) + def test_graph_rewrite(self): + self._graph_common(lambda prog: amp.bf16.rewrite_program_bf16( + prog, + amp.bf16.AutoMixedPrecisionListsBF16( + custom_fp32_varnames={'elementwise_add_0.tmp_0'}), + )) + + def test_graph_cast(self): + self._graph_common(lambda prog: amp.bf16.cast_model_to_bf16( + prog, + amp.bf16.AutoMixedPrecisionListsBF16( + custom_fp32_list={'elementwise_mul'}), + use_bf16_guard=True + )) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index e5663d607aa..751b6251565 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -332,7 +332,8 @@ def fc(input, for i, input_x in enumerate(input): check_type(input_x, 'input[' + str(i) + ']', Variable, 'fc') dtype = helper.input_dtype() - check_dtype(dtype, 'input', ['float16', 'float32', 'float64'], 'fc') + check_dtype(dtype, 'input', ['float16', 'uint16', 'float32', 'float64'], + 'fc') mul_results = [] for input_var, param_attr in helper.iter_inputs_and_params(): input_shape = input_var.shape diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index a7ec339bf74..7dcce5efcfc 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -582,10 +582,9 @@ def assign(input, output=None): input = numpy.array(input) if isinstance(input, Variable): - check_dtype( - input.dtype, 'input', - ['float16', 'float32', 'float64', 'int32', 'int64', 'bool'], - 'assign', '(When the type of input in assign is Variable.)') + check_dtype(input.dtype, 'input', [ + 'float16', 'uint16', 'float32', 'float64', 'int32', 'int64', 'bool' + ], 'assign', '(When the type of input in assign is Variable.)') if output is None: output = helper.create_variable_for_type_inference( dtype=input.dtype) diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py index df43d9366ff..1172ae0f0ea 100644 --- a/python/paddle/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/test_fit_a_line.py @@ -16,6 +16,8 @@ from __future__ import print_function import paddle import paddle.fluid as fluid +import paddle.static.amp as amp + import contextlib import numpy import unittest @@ -26,19 +28,34 @@ import os paddle.enable_static() -def train(use_cuda, save_dirname, is_local, use_bf16): +def train(use_cuda, save_dirname, is_local, use_bf16, pure_bf16): x = fluid.layers.data(name='x', shape=[13], dtype='float32') - - y_predict = fluid.layers.fc(input=x, size=1, act=None) - y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) - avg_cost = fluid.layers.mean(cost) + if use_bf16: + if not pure_bf16: + with amp.bf16.bf16_guard(): + y_predict = fluid.layers.fc(input=x, size=1, act=None) + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + avg_cost = fluid.layers.mean(cost) + else: + y_predict = fluid.layers.fc(input=x, size=1, act=None) + with amp.bf16.bf16_guard(): + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + avg_cost = fluid.layers.mean(cost) + else: + y_predict = fluid.layers.fc(input=x, size=1, act=None) + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + avg_cost = fluid.layers.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) + if use_bf16: - paddle.static.amp.rewrite_program_bf16(fluid.default_main_program()) + sgd_optimizer = amp.bf16.decorate_bf16( + sgd_optimizer, + amp_lists=amp.bf16.AutoMixedPrecisionListsBF16(), + use_bf16_guard=False, + use_pure_bf16=pure_bf16) sgd_optimizer.minimize(avg_cost) BATCH_SIZE = 20 @@ -54,6 +71,10 @@ def train(use_cuda, save_dirname, is_local, use_bf16): def train_loop(main_program): feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) exe.run(fluid.default_startup_program()) + test_prog = main_program.clone(for_test=True) + if pure_bf16: + sgd_optimizer.amp_init( + exe.place, test_program=test_prog, use_bf16_test=True) PASS_NUM = 100 for pass_id in range(PASS_NUM): @@ -61,9 +82,8 @@ def train(use_cuda, save_dirname, is_local, use_bf16): avg_loss_value, = exe.run(main_program, feed=feeder.feed(data), fetch_list=[avg_cost]) - print(avg_loss_value) - if avg_loss_value[0] < 10.0: - if save_dirname is not None: + if avg_loss_value[0] < 10.0 or pure_bf16: + if save_dirname is not None and not pure_bf16: fluid.io.save_inference_model(save_dirname, ['x'], [y_predict], exe) return @@ -97,7 +117,7 @@ def train(use_cuda, save_dirname, is_local, use_bf16): train_loop(t.get_trainer_program()) -def infer(use_cuda, save_dirname=None): +def infer(use_cuda, save_dirname=None, use_bf16=False): if save_dirname is None: return @@ -135,7 +155,7 @@ def infer(use_cuda, save_dirname=None): print("ground truth: ", test_label) -def main(use_cuda, is_local=True, use_bf16=False): +def main(use_cuda, is_local=True, use_bf16=False, pure_bf16=False): if use_cuda and not fluid.core.is_compiled_with_cuda(): return @@ -145,11 +165,22 @@ def main(use_cuda, is_local=True, use_bf16=False): # Directory for saving the trained model save_dirname = "fit_a_line.inference.model" - train(use_cuda, save_dirname, is_local, use_bf16) - infer(use_cuda, save_dirname) + train(use_cuda, save_dirname, is_local, use_bf16, pure_bf16) + infer(use_cuda, save_dirname, use_bf16) + + +class TestFitALineBase(unittest.TestCase): + @contextlib.contextmanager + def program_scope_guard(self): + prog = fluid.Program() + startup_prog = fluid.Program() + scope = fluid.core.Scope() + with fluid.scope_guard(scope): + with fluid.program_guard(prog, startup_prog): + yield -class TestFitALine(unittest.TestCase): +class TestFitALine(TestFitALineBase): def test_cpu(self): with self.program_scope_guard(): main(use_cuda=False) @@ -158,20 +189,17 @@ class TestFitALine(unittest.TestCase): with self.program_scope_guard(): main(use_cuda=True) - @unittest.skipIf(not fluid.core.supports_bfloat16(), - "place does not support BF16 evaluation") + +@unittest.skipIf(not fluid.core.supports_bfloat16(), + "place does not support BF16 evaluation") +class TestFitALineBF16(TestFitALineBase): def test_bf16(self): with self.program_scope_guard(): main(use_cuda=False, use_bf16=True) - @contextlib.contextmanager - def program_scope_guard(self): - prog = fluid.Program() - startup_prog = fluid.Program() - scope = fluid.core.Scope() - with fluid.scope_guard(scope): - with fluid.program_guard(prog, startup_prog): - yield + def test_pure_bf16(self): + with self.program_scope_guard(): + main(use_cuda=False, use_bf16=True, pure_bf16=True) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/test_word2vec_book.py b/python/paddle/fluid/tests/book/test_word2vec_book.py index ad7550fa9dd..f16592a55cf 100644 --- a/python/paddle/fluid/tests/book/test_word2vec_book.py +++ b/python/paddle/fluid/tests/book/test_word2vec_book.py @@ -44,7 +44,8 @@ def train(target, is_parallel, save_dirname, is_local=True, - use_bf16=False): + use_bf16=False, + pure_bf16=False): PASS_NUM = 100 EMBED_SIZE = 32 HIDDEN_SIZE = 256 @@ -107,7 +108,13 @@ def train(target, sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) if use_bf16: - paddle.static.amp.rewrite_program_bf16(fluid.default_main_program()) + sgd_optimizer = paddle.static.amp.bf16.decorate_bf16( + sgd_optimizer, + amp_lists=paddle.static.amp.bf16.AutoMixedPrecisionListsBF16( + custom_fp32_list={'softmax', 'concat'}, ), + use_bf16_guard=False, + use_pure_bf16=pure_bf16) + sgd_optimizer.minimize(avg_cost) train_reader = paddle.batch( @@ -121,6 +128,8 @@ def train(target, def train_loop(main_program): exe.run(fluid.default_startup_program()) + if pure_bf16: + sgd_optimizer.amp_init(exe.place) for pass_id in range(PASS_NUM): for data in train_reader(): @@ -128,7 +137,7 @@ def train(target, feed=feeder.feed(data), fetch_list=[avg_cost]) if avg_cost_np[0] < 5.0: - if save_dirname is not None: + if save_dirname is not None and not pure_bf16: fluid.io.save_inference_model(save_dirname, [ 'firstw', 'secondw', 'thirdw', 'forthw' ], [predict_word], exe) @@ -246,7 +255,7 @@ def infer(target, save_dirname=None): assert np.isclose(a, b, rtol=5e-5), "a: {}, b: {}".format(a, b) -def main(target, is_sparse, is_parallel, use_bf16): +def main(target, is_sparse, is_parallel, use_bf16, pure_bf16): if target == "cuda" and not fluid.core.is_compiled_with_cuda(): return if target == "xpu" and not fluid.core.is_compiled_with_xpu(): @@ -265,7 +274,13 @@ def main(target, is_sparse, is_parallel, use_bf16): # so only inference is turned on. train("cpu", is_sparse, is_parallel, save_dirname) else: - train(target, is_sparse, is_parallel, save_dirname, use_bf16=use_bf16) + train( + target, + is_sparse, + is_parallel, + save_dirname, + use_bf16=use_bf16, + pure_bf16=pure_bf16) infer(target, save_dirname) @@ -278,10 +293,15 @@ class W2VTest(unittest.TestCase): pass -def inject_test_method(target, is_sparse, is_parallel, use_bf16=False): +def inject_test_method(target, + is_sparse, + is_parallel, + use_bf16=False, + pure_bf16=False): fn_name = "test_{0}_{1}_{2}{3}".format(target, "sparse" if is_sparse else "dense", "parallel" - if is_parallel else "normal", "_bf16" + if is_parallel else "normal", + "_purebf16" if pure_bf16 else "_bf16" if use_bf16 else "") def __impl__(*args, **kwargs): @@ -290,7 +310,7 @@ def inject_test_method(target, is_sparse, is_parallel, use_bf16=False): scope = fluid.core.Scope() with fluid.scope_guard(scope): with fluid.program_guard(prog, startup_prog): - main(target, is_sparse, is_parallel, use_bf16) + main(target, is_sparse, is_parallel, use_bf16, pure_bf16) if (not fluid.core.is_compiled_with_cuda() or target == "cuda") and is_sparse: @@ -307,7 +327,8 @@ for target in ("cuda", "cpu", "xpu"): for is_sparse in (False, True): for is_parallel in (False, ): inject_test_method(target, is_sparse, is_parallel) -inject_test_method("cpu", False, False, use_bf16=True) +inject_test_method("cpu", False, False, True) +inject_test_method("cpu", False, False, True, True) if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py index 69298f0f6a5..7caae211b7b 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py @@ -64,7 +64,7 @@ class SimpleNetWithCond(object): return grads - def build_net(self, cond_i): + def build_net(self, cond_i, use_bf16=False): """ pseudo code: sum_xy = x + y @@ -122,13 +122,22 @@ class SimpleNetWithCond(object): sum_cond = fluid.layers.cond(cond_i > 1.0, cond_true, cond_false) sum_all = fluid.layers.sum([sum_xy, sub_yz, sum_cond]) mean_out = fluid.layers.mean(sum_all) + if use_bf16: + import paddle.static.amp as amp + self.optimizer = amp.bf16.decorate_bf16( + self.optimizer, + amp_lists=amp.bf16.AutoMixedPrecisionListsBF16( + custom_fp32_list={'elementwise_add'}), + use_bf16_guard=False, + use_pure_bf16=True) + self.optimizer.minimize(mean_out) fetch_list = ["param_x", "param_z"] if self.y_no_grad else [ "param_x", "param_y", "param_z" ] fetch_list += [_append_grad_suffix_(param) for param in fetch_list] - return fetch_list + return fetch_list, self.optimizer class TestOptimizer(unittest.TestCase): @@ -180,7 +189,7 @@ class TestOptimizer(unittest.TestCase): for key in ['x', 'y', 'z']: self.param_attr[key] = self.attr.copy() - def _check_grads(self): + def _check_grads(self, use_bf16=False): """ main logic code to check the validity of apply_optimize. """ @@ -204,10 +213,16 @@ class TestOptimizer(unittest.TestCase): lambda: dict()) test_net = self.NetClass(self.optimizer, param_lr, y_no_grad) - fetch_list = test_net.build_net(cond_i) + fetch_list, decorated_optimizer = test_net.build_net( + cond_i, use_bf16) + if use_bf16: + self.optimizer = decorated_optimizer exe = fluid.Executor(place) exe.run(init_program) + if use_bf16: + self.optimizer.amp_init(exe.place) + # Train 2 steps to check validity for batch_i in range(2): @@ -222,6 +237,15 @@ class TestOptimizer(unittest.TestCase): param_grads[i]) +@unittest.skipIf(not fluid.core.supports_bfloat16(), + "place does not support BF16 evaluation") +class TestSGDOptimizer(TestOptimizer): + def test_optimizer_multiblock_except(self): + with self.assertRaisesRegexp(ValueError, + "var param_y not in this block"): + self._check_grads(use_bf16=True) + + class TestAdamOptimizer(TestOptimizer): """ inherit TestOptimizer and shall override two functions as follows: diff --git a/python/paddle/static/amp/__init__.py b/python/paddle/static/amp/__init__.py index 7320efe9b17..8ee3225057d 100644 --- a/python/paddle/static/amp/__init__.py +++ b/python/paddle/static/amp/__init__.py @@ -18,7 +18,4 @@ from ...fluid.contrib.mixed_precision import AutoMixedPrecisionLists # noqa: F4 from ...fluid.contrib.mixed_precision import fp16_guard # noqa: F401 from ...fluid.contrib.mixed_precision import cast_model_to_fp16 # noqa: F401 from ...fluid.contrib.mixed_precision import cast_parameters_to_fp16 # noqa: F401 -from ...fluid.contrib.mixed_precision import AutoMixedPrecisionListsBF16 # noqa: F401 -from ...fluid.contrib.mixed_precision import bf16_guard # noqa: F401 -from ...fluid.contrib.mixed_precision import rewrite_program_bf16 # noqa: F401 -from ...fluid.contrib.mixed_precision import convert_float_to_uint16 # noqa: F401 +from ...fluid.contrib.mixed_precision import bf16 # noqa: F401 -- GitLab From 119cda3d518ccd6d15c7abc263d930dbde7c4505 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Wed, 28 Apr 2021 23:35:02 +0800 Subject: [PATCH 046/720] [NPU] add input EpsilonTensor for adam (#32605) * add input EpsilonTensor for adam * update python api * add unit test * add npu test * add more ut --- paddle/fluid/operators/optimizers/adam_op.cc | 16 +- paddle/fluid/operators/optimizers/adam_op.cu | 11 +- paddle/fluid/operators/optimizers/adam_op.h | 11 +- .../fluid/operators/optimizers/adam_op_npu.cc | 62 ++++--- .../fluid/operators/optimizers/adam_op_xpu.cc | 7 +- python/paddle/fluid/optimizer.py | 24 ++- .../tests/unittests/npu/test_adam_op_npu.py | 150 +++++++++++++++- .../fluid/tests/unittests/test_adam_op.py | 164 ++++++++++++++++++ python/paddle/optimizer/adam.py | 26 ++- 9 files changed, 422 insertions(+), 49 deletions(-) diff --git a/paddle/fluid/operators/optimizers/adam_op.cc b/paddle/fluid/operators/optimizers/adam_op.cc index 621920731fb..a7886cdd670 100644 --- a/paddle/fluid/operators/optimizers/adam_op.cc +++ b/paddle/fluid/operators/optimizers/adam_op.cc @@ -151,6 +151,11 @@ class AdamOpMaker : public framework::OpProtoAndCheckerMaker { "as beta2, this has a higher priority than attr(beta2), the " "shape of this tensor MUST BE [1].") .AsDispensable(); + AddInput("EpsilonTensor", + "(Tensor, optional) If provided, Adam will use this " + "as epsilon, this has a higher priority than attr(epsilon), the " + "shape of this tensor MUST BE [1].") + .AsDispensable(); AddInput("MasterParam", "FP32 master weight for AMP.").AsDispensable(); AddOutput("ParamOut", "(Tensor) Output parameter"); @@ -232,4 +237,13 @@ REGISTER_OP_VERSION(adam) paddle::framework::compatible::OpVersionDesc().NewAttr( "multi_precision", "(bool) Whether to use multi-precision during weight updating.", - false)); + false)) + .AddCheckpoint( + R"ROC( + Upgrade adam, add 1 dispensable input [EpsilonTensor]. + )ROC", + paddle::framework::compatible::OpVersionDesc().NewInput( + "EpsilonTensor", + "If provided, Adam will use this as epsilon, " + "this has a higher priority than attr(epsilon). " + "For better performance in npu kernel. ")); diff --git a/paddle/fluid/operators/optimizers/adam_op.cu b/paddle/fluid/operators/optimizers/adam_op.cu index 54aea67f4ea..3d6f0f99a52 100644 --- a/paddle/fluid/operators/optimizers/adam_op.cu +++ b/paddle/fluid/operators/optimizers/adam_op.cu @@ -154,7 +154,7 @@ class AdamOpCUDAKernel : public framework::OpKernel { int64_t min_row_size_to_use_multithread = ctx.Attr("min_row_size_to_use_multithread"); bool lazy_mode = ctx.Attr("lazy_mode"); - MPDType epsilon = static_cast(ctx.Attr("epsilon")); + auto* param = ctx.Input("Param"); auto* grad_var = ctx.InputVar("Grad"); auto* mom1 = ctx.Input("Moment1"); @@ -188,6 +188,15 @@ class AdamOpCUDAKernel : public framework::OpKernel { beta2_tensor->numel())); beta2 = static_cast(GetAttrFromTensor(beta2_tensor)); } + MPDType epsilon = static_cast(ctx.Attr("epsilon")); + if (ctx.HasInput("EpsilonTensor")) { + auto* epsilon_tensor = ctx.Input("EpsilonTensor"); + PADDLE_ENFORCE_EQ(epsilon_tensor->numel(), 1, + platform::errors::InvalidArgument( + "Input(EpsilonTensor) size must be 1, but get %d", + epsilon_tensor->numel())); + epsilon = static_cast(GetAttrFromTensor(epsilon_tensor)); + } VLOG(3) << "beta1_pow.numel() : " << beta1_pow->numel() << "beta2_pow.numel() : " << beta2_pow->numel(); VLOG(3) << "param.numel(): " << param->numel(); diff --git a/paddle/fluid/operators/optimizers/adam_op.h b/paddle/fluid/operators/optimizers/adam_op.h index 6356911f067..9667db8055b 100644 --- a/paddle/fluid/operators/optimizers/adam_op.h +++ b/paddle/fluid/operators/optimizers/adam_op.h @@ -406,7 +406,7 @@ class AdamOpKernel : public framework::OpKernel { int64_t min_row_size_to_use_multithread = ctx.Attr("min_row_size_to_use_multithread"); bool lazy_mode = ctx.Attr("lazy_mode"); - T epsilon = static_cast(ctx.Attr("epsilon")); + auto* param = ctx.Input("Param"); auto* grad_var = ctx.InputVar("Grad"); auto* mom1 = ctx.Input("Moment1"); @@ -440,6 +440,15 @@ class AdamOpKernel : public framework::OpKernel { beta2_tensor->numel())); beta2 = static_cast(GetAttrFromTensor(beta2_tensor)); } + T epsilon = static_cast(ctx.Attr("epsilon")); + if (ctx.HasInput("EpsilonTensor")) { + auto* epsilon_tensor = ctx.Input("EpsilonTensor"); + PADDLE_ENFORCE_EQ(epsilon_tensor->numel(), 1, + platform::errors::InvalidArgument( + "Input(EpsilonTensor) size must be 1, but get %d", + epsilon_tensor->numel())); + epsilon = static_cast(GetAttrFromTensor(epsilon_tensor)); + } VLOG(3) << "beta1_pow.numel() : " << beta1_pow->numel() << "beta2_pow.numel() : " << beta2_pow->numel(); VLOG(3) << "param.numel(): " << param->numel(); diff --git a/paddle/fluid/operators/optimizers/adam_op_npu.cc b/paddle/fluid/operators/optimizers/adam_op_npu.cc index a922a2bca66..343a6704388 100644 --- a/paddle/fluid/operators/optimizers/adam_op_npu.cc +++ b/paddle/fluid/operators/optimizers/adam_op_npu.cc @@ -80,24 +80,53 @@ class AdamNPUKernel : public framework::OpKernel { beta2_pow_out->mutable_data(ctx.GetPlace()); } - T beta1 = static_cast(ctx.Attr("beta1")); + const Tensor* beta1_tensor = nullptr; + const Tensor* beta2_tensor = nullptr; + const Tensor* epsilon_tensor = nullptr; + + Tensor beta1_tmp(framework::proto::VarType::FP32); + Tensor beta2_tmp(framework::proto::VarType::FP32); + Tensor epsilon_tmp(framework::proto::VarType::FP32); + if (ctx.HasInput("Beta1Tensor")) { - auto* beta1_tensor = ctx.Input("Beta1Tensor"); + beta1_tensor = ctx.Input("Beta1Tensor"); PADDLE_ENFORCE_EQ(beta1_tensor->numel(), 1, platform::errors::InvalidArgument( "Input(Beta1Tensor) size must be 1, but get %d", beta1_tensor->numel())); - beta1 = static_cast(GetAttrFromTensor(beta1_tensor)); + } else { + T beta1 = static_cast(ctx.Attr("beta1")); + beta1_tmp.mutable_data({1}, ctx.GetPlace()); + FillNpuTensorWithConstant(&beta1_tmp, beta1); + beta1_tensor = &beta1_tmp; } - T beta2 = static_cast(ctx.Attr("beta2")); + if (ctx.HasInput("Beta2Tensor")) { - auto* beta2_tensor = ctx.Input("Beta2Tensor"); - PADDLE_ENFORCE_EQ(beta2_tensor->numel(), 1, + beta2_tensor = ctx.Input("Beta2Tensor"); + PADDLE_ENFORCE_EQ(beta1_tensor->numel(), 1, platform::errors::InvalidArgument( "Input(Beta2Tensor) size must be 1, but get %d", beta2_tensor->numel())); - beta2 = static_cast(GetAttrFromTensor(beta2_tensor)); + } else { + T beta2 = static_cast(ctx.Attr("beta2")); + beta2_tmp.mutable_data({1}, ctx.GetPlace()); + FillNpuTensorWithConstant(&beta2_tmp, beta2); + beta2_tensor = &beta2_tmp; } + + if (ctx.HasInput("EpsilonTensor")) { + epsilon_tensor = ctx.Input("EpsilonTensor"); + PADDLE_ENFORCE_EQ(epsilon_tensor->numel(), 1, + platform::errors::InvalidArgument( + "Input(EpsilonTensor) size must be 1, but get %d", + epsilon_tensor->numel())); + } else { + T epsilon = static_cast(ctx.Attr("epsilon")); + epsilon_tmp.mutable_data({1}, ctx.GetPlace()); + FillNpuTensorWithConstant(&epsilon_tmp, epsilon); + epsilon_tensor = &epsilon_tmp; + } + VLOG(3) << "beta1_pow.numel() : " << beta1_pow->numel() << "beta2_pow.numel() : " << beta2_pow->numel(); VLOG(3) << "param.numel(): " << param->numel(); @@ -113,19 +142,6 @@ class AdamNPUKernel : public framework::OpKernel { "beta2 pow output size should be 1, but received " "value is:%d.", beta2_pow_out->numel())); - - // reshape - Tensor beta1_tensor(framework::proto::VarType::FP32); - beta1_tensor.mutable_data({1}, ctx.GetPlace()); - FillNpuTensorWithConstant(&beta1_tensor, beta1); - Tensor beta2_tensor(framework::proto::VarType::FP32); - beta2_tensor.mutable_data({1}, ctx.GetPlace()); - FillNpuTensorWithConstant(&beta2_tensor, beta2); - - Tensor epsilon_tensor(framework::proto::VarType::FP32); - TensorFromVector(std::vector{epsilon}, - ctx.template device_context(), - &epsilon_tensor); auto stream = ctx.template device_context() .stream(); @@ -133,7 +149,7 @@ class AdamNPUKernel : public framework::OpKernel { NpuOpRunner("ApplyAdamD", { *param, *mom1, *mom2, *beta1_pow, *beta2_pow, *lr, - beta1_tensor, beta2_tensor, epsilon_tensor, *grad, + *beta1_tensor, *beta2_tensor, *epsilon_tensor, *grad, }, { *param_out, *mom1_out, *mom2_out, @@ -159,10 +175,10 @@ class AdamNPUKernel : public framework::OpKernel { ctx.template device_context(), mom2_out); } auto runner_m1 = - NpuOpRunner("Mul", {*beta1_pow, beta1_tensor}, {*beta1_pow_out}, {}); + NpuOpRunner("Mul", {*beta1_pow, *beta1_tensor}, {*beta1_pow_out}, {}); runner_m1.Run(stream); auto runner_m2 = - NpuOpRunner("Mul", {*beta2_pow, beta2_tensor}, {*beta2_pow_out}, {}); + NpuOpRunner("Mul", {*beta2_pow, *beta2_tensor}, {*beta2_pow_out}, {}); runner_m2.Run(stream); } }; diff --git a/paddle/fluid/operators/optimizers/adam_op_xpu.cc b/paddle/fluid/operators/optimizers/adam_op_xpu.cc index 3baba424e8f..09f11737449 100644 --- a/paddle/fluid/operators/optimizers/adam_op_xpu.cc +++ b/paddle/fluid/operators/optimizers/adam_op_xpu.cc @@ -35,8 +35,6 @@ class AdamOpXPUKernel : public framework::OpKernel { framework::ToTypeName(param_var->Type()))); using paddle::framework::LoDTensor; - T epsilon = static_cast(ctx.Attr("epsilon")); - auto& param = GET_DATA_SAFELY(ctx.Input("Param"), "Input", "Param", "Adam"); // auto& grad = Ref(ctx.Input("Grad"), "Must set Grad"); @@ -85,6 +83,11 @@ class AdamOpXPUKernel : public framework::OpKernel { auto* beta2_tensor = ctx.Input("Beta2Tensor"); beta2 = static_cast(GetAttrFromTensor(beta2_tensor)); } + T epsilon = static_cast(ctx.Attr("epsilon")); + if (ctx.HasInput("EpsilonTensor")) { + auto* epsilon_tensor = ctx.Input("EpsilonTensor"); + epsilon = static_cast(GetAttrFromTensor(epsilon_tensor)); + } if (grad_var->IsType()) { auto& grad = GET_DATA_SAFELY(ctx.Input("Grad"), "Input", "Grad", "Adam"); diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 21b4c429a66..e4fafb0132c 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -1890,7 +1890,8 @@ class AdamOptimizer(Optimizer): beta2 (float|Variable, optional): The exponential decay rate for the 2nd moment estimates. It should be a float number or a Variable with shape [1] and data type as float32. The default value is 0.999. - epsilon (float, optional): A small float value for numerical stability. + epsilon (float|Tensor, optional): A small float value for numerical stability. + It should be a float number or a Variable with shape [1] and data type as float32. The default value is 1e-08. parameter_list (Iterable, optional): Iterable of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ @@ -1959,7 +1960,7 @@ class AdamOptimizer(Optimizer): avg_cost = fluid.layers.mean(cost) # define beta decay variable - def get_decayed_betas(beta1_init, beta2_init, decay_steps, decay_rate): + def get_decayed_betas(beta1_init, beta2_init, decay_steps, decay_rate, epsilon_init): global_step = lr_scheduler._decay_step_counter() beta1 = fluid.layers.create_global_var( @@ -1976,6 +1977,13 @@ class AdamOptimizer(Optimizer): # set persistable for save checkpoints and resume persistable=True, name="beta2") + epsilon = fluid.layers.create_global_var( + shape=[1], + value=float(epsilon_init), + dtype='float32', + # set persistable for save checkpoints and resume + persistable=True, + name="epsilon") div_res = global_step / decay_steps decayed_beta1 = beta1_init * (decay_rate**div_res) @@ -1983,13 +1991,14 @@ class AdamOptimizer(Optimizer): fluid.layers.assign(decayed_beta1, beta1) fluid.layers.assign(decayed_beta2, beta2) - return beta1, beta2 + return beta1, beta2, epsilon - beta1, beta2 = get_decayed_betas(0.9, 0.99, 1e5, 0.9) + beta1, beta2, epsilon = get_decayed_betas(0.9, 0.99, 1e5, 0.9, 1e-8) adam_optimizer = fluid.optimizer.AdamOptimizer( learning_rate=0.01, beta1=beta1, - beta2=beta2) + beta2=beta2, + epsilon=epsilon) adam_optimizer.minimize(avg_cost) fetch_list = [avg_cost] @@ -2099,7 +2108,6 @@ class AdamOptimizer(Optimizer): "Beta2PowOut": [beta2_pow_acc], } attrs = { - "epsilon": self._epsilon, "lazy_mode": self._lazy_mode, "min_row_size_to_use_multithread": 1000 } @@ -2112,6 +2120,10 @@ class AdamOptimizer(Optimizer): inputs['Beta2Tensor'] = self._beta2 else: attrs['beta2'] = self._beta2 + if isinstance(self._epsilon, Variable): + inputs['EpsilonTensor'] = self._epsilon + else: + attrs['epsilon'] = self._epsilon adam_op = block.append_op( type=self.type, diff --git a/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py index ebf041388ee..ec616070b63 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py @@ -27,7 +27,7 @@ SEED = 2021 @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") -class TestSGD(OpTest): +class TestAdam(OpTest): def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -78,9 +78,61 @@ class TestSGD(OpTest): self.check_output_with_place(self.place, atol=1e-5, check_dygraph=False) -''' -# TODO(zhiqiu): The following test may let 0-3 card down. -# we need to analyze it and open it. +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestAdamWithEpsilonTensor(OpTest): + def setUp(self): + self.set_npu() + self.place = paddle.NPUPlace(0) + self.op_type = "adam" + param = np.random.uniform(-1, 1, (102, 105)).astype("float32") + grad = np.random.uniform(-1, 1, (102, 105)).astype("float32") + moment1 = np.random.uniform(-1, 1, (102, 105)).astype("float32") + # The second moment is positive + moment2 = np.random.random((102, 105)).astype("float32") + + learning_rate = 0.004 + beta1 = 0.78 + beta2 = 0.836 + epsilon = 1e-4 + beta1_pow = beta1**10 + beta2_pow = beta2**10 + + self.inputs = { + 'Param': param, + 'Grad': grad, + 'Moment1': moment1, + 'Moment2': moment2, + 'LearningRate': np.array([learning_rate]).astype("float32"), + 'Beta1Pow': np.array([beta1_pow]).astype("float32"), + 'Beta2Pow': np.array([beta2_pow]).astype("float32"), + 'Beta1Tensor': np.array([beta1]).astype("float32"), + 'Beta2Tensor': np.array([beta2]).astype("float32"), + 'EpsilonTensor': np.array([epsilon]).astype("float32"), + } + + self.attrs = {'epsilon': epsilon} + + param_out, moment1_out, \ + moment2_out = adam_step(self.inputs, self.attrs) + + self.outputs = { + 'Moment1Out': moment1_out, + 'Moment2Out': moment2_out, + 'ParamOut': param_out, + 'Beta1PowOut': np.array([beta1_pow]).astype("float32") * beta1, + 'Beta2PowOut': np.array([beta2_pow]).astype("float32") * beta2 + } + + def set_npu(self): + self.__class__.use_npu = True + + def init_dtype(self): + self.dtype = np.float32 + + def test_check_output(self): + self.check_output_with_place(self.place, atol=1e-5, check_dygraph=False) + @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") @@ -140,9 +192,93 @@ class TestNet(unittest.TestCase): cpu_pred, cpu_loss = self._test(False) npu_pred, npu_loss = self._test(True) - self.assertTrue(np.allclose(npu_pred, cpu_pred)) - self.assertTrue(np.allclose(npu_loss, cpu_loss)) -''' + self.assertTrue(np.allclose(npu_pred, cpu_pred, rtol=1e-4)) + self.assertTrue(np.allclose(npu_loss, cpu_loss, rtol=1e-4)) + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestNetWithEpsilonTensor(unittest.TestCase): + def _test(self, run_npu=True): + main_prog = paddle.static.Program() + startup_prog = paddle.static.Program() + main_prog.random_seed = SEED + startup_prog.random_seed = SEED + np.random.seed(SEED) + + a_np = np.random.random(size=(32, 32)).astype('float32') + b_np = np.random.random(size=(32, 32)).astype('float32') + label_np = np.random.randint(2, size=(32, 1)).astype('int64') + + with paddle.static.program_guard(main_prog, startup_prog): + a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') + b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') + label = paddle.static.data( + name="label", shape=[32, 1], dtype='int64') + + sum = paddle.add(a, b) + z = paddle.pow(sum, 2.0) + + fc_1 = fluid.layers.fc(input=z, size=128) + prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax') + + cost = fluid.layers.cross_entropy(input=prediction, label=label) + loss = fluid.layers.reduce_mean(cost) + beta1_init = 0.9 + beta2_init = 0.999 + epsilon_init = 1e-8 + beta1 = fluid.layers.create_global_var( + shape=[1], + value=float(beta1_init), + dtype='float32', + persistable=True, + name="beta1") + beta2 = fluid.layers.create_global_var( + shape=[1], + value=float(beta2_init), + dtype='float32', + persistable=True, + name="beta2") + epsilon = fluid.layers.create_global_var( + shape=[1], + value=float(epsilon_init), + dtype='float32', + persistable=True, + name="epsilon") + adam = fluid.optimizer.Adam( + learning_rate=0.01, beta1=beta1, beta2=beta2, epsilon=epsilon) + adam.minimize(loss) + + if run_npu: + place = paddle.NPUPlace(0) + else: + place = paddle.CPUPlace() + + exe = paddle.static.Executor(place) + exe.run(startup_prog) + + print("Start run on {}".format(place)) + for epoch in range(100): + + pred_res, loss_res = exe.run( + main_prog, + feed={"a": a_np, + "b": b_np, + "label": label_np}, + fetch_list=[prediction, loss]) + if epoch % 10 == 0: + print("Epoch {} | Prediction[0]: {}, Loss: {}".format( + epoch, pred_res[0], loss_res)) + + return pred_res, loss_res + + def test_npu(self): + cpu_pred, cpu_loss = self._test(False) + npu_pred, npu_loss = self._test(True) + + self.assertTrue(np.allclose(npu_pred, cpu_pred, rtol=1e-4)) + self.assertTrue(np.allclose(npu_loss, cpu_loss, rtol=1e-4)) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py index f337e0079e7..cb646ef0b93 100644 --- a/python/paddle/fluid/tests/unittests/test_adam_op.py +++ b/python/paddle/fluid/tests/unittests/test_adam_op.py @@ -402,6 +402,54 @@ class TestAdamOpBetaVariable(OpTest): self.check_output() +class TestAdamOpBetaEpsilonVariable(OpTest): + def setUp(self): + '''Test Adam Op with beta as Variable + ''' + self.op_type = "adam" + param = np.random.uniform(-1, 1, (102, 105)).astype("float32") + grad = np.random.uniform(-1, 1, (102, 105)).astype("float32") + moment1 = np.random.uniform(-1, 1, (102, 105)).astype("float32") + # The second moment is positive + moment2 = np.random.random((102, 105)).astype("float32") + beta1 = 0.85 + beta2 = 0.95 + + learning_rate = 0.001 + epsilon = 1e-8 + beta1_pow = beta1**10 + beta2_pow = beta2**10 + + self.inputs = { + 'Param': param, + 'Grad': grad, + 'Moment1': moment1, + 'Moment2': moment2, + 'LearningRate': np.array([learning_rate]).astype("float32"), + 'Beta1Pow': np.array([beta1_pow]).astype("float32"), + 'Beta2Pow': np.array([beta2_pow]).astype("float32"), + "Beta1Tensor": np.array([beta1]).astype("float32"), + "Beta2Tensor": np.array([beta2]).astype("float32"), + "EpsilonTensor": np.array([epsilon]).astype("float32"), + } + + attributes = {'epsilon': epsilon} + + param_out, moment1_out, \ + moment2_out = adam_step(self.inputs, attributes) + + self.outputs = { + 'Moment1Out': moment1_out, + 'Moment2Out': moment2_out, + 'ParamOut': param_out, + 'Beta1PowOut': np.array([beta1_pow]).astype("float32") * beta1, + 'Beta2PowOut': np.array([beta2_pow]).astype("float32") * beta2 + } + + def test_check_output(self): + self.check_output() + + class TestAdamOpV2(unittest.TestCase): def test_adam_op(self): place = fluid.CPUPlace() @@ -531,5 +579,121 @@ class TestAdamOpV2(unittest.TestCase): adam.step() +class TestNetWithEpsilonTensor(unittest.TestCase): + def _test(self, place, use_tensor=True, use_fluid_api=True): + paddle.enable_static() + main_prog = paddle.static.Program() + startup_prog = paddle.static.Program() + SEED = 2021 + paddle.seed(SEED) + np.random.seed(SEED) + + a_np = np.random.random(size=(32, 32)).astype('float32') + b_np = np.random.random(size=(32, 32)).astype('float32') + label_np = np.random.randint(2, size=(32, 1)).astype('int64') + + with paddle.static.program_guard(main_prog, startup_prog): + a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') + b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') + label = paddle.static.data( + name="label", shape=[32, 1], dtype='int64') + + sum = paddle.add(a, b) + z = paddle.pow(sum, 2.0) + + fc_1 = fluid.layers.fc(input=z, size=128) + prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax') + + cost = fluid.layers.cross_entropy(input=prediction, label=label) + loss = fluid.layers.reduce_mean(cost) + beta1_init = 0.9 + beta2_init = 0.999 + epsilon_init = 1e-8 + if use_tensor: + beta1 = fluid.layers.create_global_var( + shape=[1], + value=float(beta1_init), + dtype='float32', + persistable=True, + name="beta1") + beta2 = fluid.layers.create_global_var( + shape=[1], + value=float(beta2_init), + dtype='float32', + persistable=True, + name="beta2") + epsilon = fluid.layers.create_global_var( + shape=[1], + value=float(epsilon_init), + dtype='float32', + persistable=True, + name="epsilon") + if use_fluid_api: + adam = fluid.optimizer.Adam( + learning_rate=0.01, + beta1=beta1, + beta2=beta2, + epsilon=epsilon) + else: + adam = paddle.optimizer.Adam( + learning_rate=0.01, + beta1=beta1, + beta2=beta2, + epsilon=epsilon) + else: + if use_fluid_api: + adam = fluid.optimizer.Adam( + learning_rate=0.01, + beta1=beta1_init, + beta2=beta2_init, + epsilon=epsilon_init) + else: + adam = fluid.optimizer.Adam( + learning_rate=0.01, + beta1=beta1_init, + beta2=beta2_init, + epsilon=epsilon_init) + + adam.minimize(loss) + + exe = paddle.static.Executor(place) + exe.run(startup_prog) + + print("Start run on {}".format(place)) + for epoch in range(10): + + pred_res, loss_res = exe.run( + main_prog, + feed={"a": a_np, + "b": b_np, + "label": label_np}, + fetch_list=[prediction, loss]) + + print("Epoch {} | Prediction[0]: {}, Loss: {}".format(epoch, pred_res[ + 0], loss_res)) + paddle.disable_static() + return pred_res, loss_res + + def _test_with_place(self, place): + preds = [] + losses = [] + + for use_tensor in [True, False]: + for use_fluid_api in [True, False]: + pred, loss = self._test(place, use_tensor, use_fluid_api) + preds.append(pred) + losses.append(loss) + for pred in preds: + self.assertTrue(np.allclose(pred, preds[0])) + for loss in losses: + self.assertTrue(np.allclose(loss, losses[0])) + + def test_adam_api(self): + # NOTE(zhiqiu): cpu and gpu has different seed, so should compare separatly. + self._test_with_place(paddle.CPUPlace()) + if core.is_compiled_with_cuda(): + self._test_with_place(paddle.CUDAPlace(0)) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py index 4904ebb56cc..358fa8fb97d 100644 --- a/python/paddle/optimizer/adam.py +++ b/python/paddle/optimizer/adam.py @@ -58,7 +58,8 @@ class Adam(Optimizer): beta2 (float|Tensor, optional): The exponential decay rate for the 2nd moment estimates. It should be a float number or a Tensor with shape [1] and data type as float32. The default value is 0.999. - epsilon (float, optional): A small float value for numerical stability. + epsilon (float|Tensor, optional): A small float value for numerical stability. + It should be a float number or a Tensor with shape [1] and data type as float32. The default value is 1e-08. parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ @@ -144,12 +145,18 @@ class Adam(Optimizer): assert beta1 is not None assert beta2 is not None assert epsilon is not None - if not 0 <= beta1 < 1: - raise ValueError("Invaild value of beta1, expect beta1 in [0,1).") - if not 0 <= beta2 < 1: - raise ValueError("Invaild value of beta2, expect beta2 in [0,1).") - if not 0 <= epsilon: - raise ValueError("Invaild value of epsilon, expect epsilon >= 0.") + if not isinstance(beta1, Variable): + if not 0 <= beta1 < 1: + raise ValueError( + "Invaild value of beta1, expect beta1 in [0,1).") + if not isinstance(beta2, Variable): + if not 0 <= beta2 < 1: + raise ValueError( + "Invaild value of beta2, expect beta2 in [0,1).") + if not isinstance(epsilon, Variable): + if not 0 <= epsilon: + raise ValueError( + "Invaild value of epsilon, expect epsilon >= 0.") super(Adam, self).__init__( learning_rate=learning_rate, parameters=parameters, @@ -295,7 +302,6 @@ class Adam(Optimizer): "Beta2PowOut": [beta2_pow_acc], } attrs = { - "epsilon": self._epsilon, "lazy_mode": self._lazy_mode, "min_row_size_to_use_multithread": 1000, "multi_precision": find_master @@ -309,6 +315,10 @@ class Adam(Optimizer): inputs['Beta2Tensor'] = self._beta2 else: attrs['beta2'] = self._beta2 + if isinstance(self._epsilon, Variable): + inputs['EpsilonTensor'] = self._epsilon + else: + attrs['epsilon'] = self._epsilon if find_master: inputs["MasterParam"] = master_weight -- GitLab From 243b43261a8fa0ebff5284e22b5867480e0a6764 Mon Sep 17 00:00:00 2001 From: zhiboniu <31800336+zhiboniu@users.noreply.github.com> Date: Thu, 29 Apr 2021 08:38:52 +0800 Subject: [PATCH 047/720] update 2.0 public api in hapi (#32650) --- python/paddle/hapi/__init__.py | 19 +++++++++---------- python/paddle/hapi/dynamic_flops.py | 2 +- python/paddle/hapi/hub.py | 2 ++ python/paddle/hapi/logger.py | 2 ++ python/paddle/hapi/model.py | 2 +- python/paddle/hapi/model_summary.py | 2 +- python/paddle/hapi/progressbar.py | 2 +- python/paddle/hapi/static_flops.py | 2 ++ 8 files changed, 19 insertions(+), 14 deletions(-) diff --git a/python/paddle/hapi/__init__.py b/python/paddle/hapi/__init__.py index 6b7672828e6..2829bbe9470 100644 --- a/python/paddle/hapi/__init__.py +++ b/python/paddle/hapi/__init__.py @@ -12,17 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from . import logger -from . import callbacks -from . import model_summary -from . import hub +from . import logger # noqa: F401 +from . import callbacks # noqa: F401 +from . import hub # noqa: F401 +from . import progressbar # noqa: F401 +from . import static_flops # noqa: F401 -from . import model -from .model import * -from .model_summary import summary -from .dynamic_flops import flops +from .model import Model # noqa: F401 +from .model_summary import summary # noqa: F401 +from .dynamic_flops import flops # noqa: F401 logger.setup_logger() -__all__ = ['callbacks'] + model.__all__ + ['summary'] -__all__ = model.__all__ + ['flops'] +__all__ = [] diff --git a/python/paddle/hapi/dynamic_flops.py b/python/paddle/hapi/dynamic_flops.py index 35819d6b7bb..8be6758f1e5 100644 --- a/python/paddle/hapi/dynamic_flops.py +++ b/python/paddle/hapi/dynamic_flops.py @@ -18,7 +18,7 @@ import paddle.nn as nn import numpy as np from .static_flops import static_flops, Table -__all__ = ['flops'] +__all__ = [] def flops(net, input_size, custom_ops=None, print_detail=False): diff --git a/python/paddle/hapi/hub.py b/python/paddle/hapi/hub.py index 31a8be0944f..6490c878f9b 100644 --- a/python/paddle/hapi/hub.py +++ b/python/paddle/hapi/hub.py @@ -19,6 +19,8 @@ import shutil import zipfile from paddle.utils.download import get_path_from_url +__all__ = [] + DEFAULT_CACHE_DIR = '~/.cache' VAR_DEPENDENCY = 'dependencies' MODULE_HUBCONF = 'hubconf.py' diff --git a/python/paddle/hapi/logger.py b/python/paddle/hapi/logger.py index d4f18ce0ff7..ea515d95324 100644 --- a/python/paddle/hapi/logger.py +++ b/python/paddle/hapi/logger.py @@ -22,6 +22,8 @@ import logging from paddle.fluid.dygraph.parallel import ParallelEnv +__all__ = [] + def setup_logger(output=None, name="hapi", log_level=logging.INFO): """ diff --git a/python/paddle/hapi/model.py b/python/paddle/hapi/model.py index 5a33d5b58dc..160d6c54759 100644 --- a/python/paddle/hapi/model.py +++ b/python/paddle/hapi/model.py @@ -54,7 +54,7 @@ from paddle.distributed.fleet.base import role_maker from .callbacks import config_callbacks, EarlyStopping from .model_summary import summary -__all__ = ['Model', ] +__all__ = [] _parallel_context_initialized = False diff --git a/python/paddle/hapi/model_summary.py b/python/paddle/hapi/model_summary.py index 9f2769e1ca2..d78196d9445 100644 --- a/python/paddle/hapi/model_summary.py +++ b/python/paddle/hapi/model_summary.py @@ -22,7 +22,7 @@ from paddle.static import InputSpec from collections import OrderedDict -__all__ = ['summary'] +__all__ = [] def summary(net, input_size, dtypes=None): diff --git a/python/paddle/hapi/progressbar.py b/python/paddle/hapi/progressbar.py index cf5a03ed498..5f63a3169f8 100644 --- a/python/paddle/hapi/progressbar.py +++ b/python/paddle/hapi/progressbar.py @@ -22,7 +22,7 @@ import time import numpy as np from collections import namedtuple -__all__ = ['ProgressBar'] +__all__ = [] class ProgressBar(object): diff --git a/python/paddle/hapi/static_flops.py b/python/paddle/hapi/static_flops.py index 3656e0c1894..07fc19b2cb8 100644 --- a/python/paddle/hapi/static_flops.py +++ b/python/paddle/hapi/static_flops.py @@ -18,6 +18,8 @@ import paddle from collections import OrderedDict from paddle.static import Program, program_guard, Variable +__all__ = [] + class VarWrapper(object): def __init__(self, var, graph): -- GitLab From 75282e7466f948673faa7adf9a2da513e82c7d52 Mon Sep 17 00:00:00 2001 From: zlsh80826 Date: Thu, 29 Apr 2021 09:04:32 +0800 Subject: [PATCH 048/720] [Paddle-TRT] Implement MHA fp16 order same as training (#32629) * implement MHA order same as training * fix fp16 compile issue on old architecture * fix format * fix format --- .../tensorrt/plugin/qkv_to_context_plugin.cu | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu index a5fc9e73c5f..214e1a81e7d 100644 --- a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu @@ -225,6 +225,14 @@ nvinfer1::DataType QkvToContextPluginDynamic::getOutputDataType( return input_types[0]; } +template +__global__ void apply_scale(T *data, T scale, int n) { +#if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__) + int tid = blockIdx.x * blockDim.x + threadIdx.x; + data[tid] = data[tid] * scale; +#endif +} + int QkvToContextPluginDynamic::enqueue( const nvinfer1::PluginTensorDesc *input_desc, const nvinfer1::PluginTensorDesc *output_desc, const void *const *inputs, @@ -291,10 +299,17 @@ int QkvToContextPluginDynamic::enqueue( platform::DeviceContextPool::Instance().Get( platform::CUDAPlace(device_id))); + int n_q = seq_len * head_number_ * head_size_; + constexpr int threads = 128; + int blocks = (n_q + threads - 1) / threads; + + apply_scale<<>>(tptr, static_cast(scale_), + n_q); + const platform::CUDADeviceContext &dev_ctx = *device_ctx; operators::math::MultiHeadGPUComputeFunctor multihead_compute_func; multihead_compute_func(dev_ctx, batch, seq_len, head_number_, head_size_, - qkptr, input1_data, tptr, half(scale_), half(0.0)); + qkptr, input1_data, tptr, half(1.), half(0.0)); int grid = batch * head_number_ * seq_len; int block = head_size_; -- GitLab From dec8ab8f2b70afa51028c858522ba6251eb29d37 Mon Sep 17 00:00:00 2001 From: Wilber Date: Thu, 29 Apr 2021 10:01:17 +0800 Subject: [PATCH 049/720] fix mem release error. (#32654) --- .../fluid/inference/api/analysis_predictor.cc | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 6a6be14fd59..89c8c7902ba 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -191,22 +191,8 @@ bool AnalysisPredictor::PrepareScope( status_is_cloned_ = true; } else { paddle::framework::InitDevices(); - scope_.reset(new paddle::framework::Scope(), [](framework::Scope *scope) { - delete scope; -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - for (int dev_id = 0; dev_id < paddle::platform::GetCUDADeviceCount(); - ++dev_id) { - memory::Release(platform::CUDAPlace(dev_id)); - } -#endif -#ifdef PADDLE_WITH_XPU - for (int dev_id = 0; dev_id < paddle::platform::GetXPUDeviceCount(); - ++dev_id) { - memory::Release(platform::XPUPlace(dev_id)); - } -#endif - memory::Release(platform::CPUPlace()); - }); + // TODO(wilber): we need to release memory occupied by weights. + scope_.reset(new paddle::framework::Scope()); status_is_cloned_ = false; } sub_scope_ = &scope_->NewScope(); -- GitLab From f46f15a024756527690f705a3ab97bfae41f24ba Mon Sep 17 00:00:00 2001 From: "joanna.wozna.intel" Date: Thu, 29 Apr 2021 04:29:44 +0200 Subject: [PATCH 050/720] Add BF16 uniform random initializer (#32468) * Add bf16 uniform random initializer * Remove duplicated section * Change UT to CPU place only * Put detail functions into anonymous namespace --- paddle/fluid/operators/fill_constant_op.h | 3 + paddle/fluid/operators/uniform_random_op.cc | 58 +++- paddle/fluid/operators/uniform_random_op.h | 9 +- python/paddle/fluid/initializer.py | 16 +- python/paddle/fluid/layers/nn.py | 7 +- .../fluid/tests/unittests/test_initializer.py | 45 +-- .../tests/unittests/test_initializer_nn.py | 11 +- .../unittests/test_uniform_random_bf16_op.py | 276 ++++++++++++++++++ tools/static_mode_white_list.py | 1 + 9 files changed, 371 insertions(+), 55 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_uniform_random_bf16_op.py diff --git a/paddle/fluid/operators/fill_constant_op.h b/paddle/fluid/operators/fill_constant_op.h index 46c4ae12036..17c7321122b 100644 --- a/paddle/fluid/operators/fill_constant_op.h +++ b/paddle/fluid/operators/fill_constant_op.h @@ -117,6 +117,9 @@ class FillConstantKernel : public framework::OpKernel { } if (actual_place == 0) { + VLOG(4) << "[CPU] FillConstantKernel" + << ((data_type == framework::proto::VarType::BF16) ? "" + : ""); tensor->mutable_data(platform::CPUPlace(), data_type); math::SetConstant functor; functor(reinterpret_cast(dev_ctx), diff --git a/paddle/fluid/operators/uniform_random_op.cc b/paddle/fluid/operators/uniform_random_op.cc index 6efada4343c..007276b16d7 100644 --- a/paddle/fluid/operators/uniform_random_op.cc +++ b/paddle/fluid/operators/uniform_random_op.cc @@ -18,10 +18,41 @@ limitations under the License. */ #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/platform/bfloat16.h" namespace paddle { namespace operators { +namespace { +template +inline void UniformRealDistribution(T *data, const int64_t &size, + const float &min, const float &max, + const unsigned int &seed) { + VLOG(4) << "[CPU] UniformRandomKernel"; + std::uniform_real_distribution dist(static_cast(min), + static_cast(max)); + auto engine = paddle::framework::GetCPURandomEngine(seed); + + for (int64_t i = 0; i < size; ++i) { + data[i] = dist(*engine); + } +} + +template <> +inline void UniformRealDistribution(paddle::platform::bfloat16 *data, + const int64_t &size, const float &min, + const float &max, + const unsigned int &seed) { + VLOG(4) << "[CPU] UniformRandomKernel"; + std::uniform_real_distribution dist(min, max); + auto engine = paddle::framework::GetCPURandomEngine(seed); + + for (int64_t i = 0; i < size; ++i) { + data[i] = static_cast(dist(*engine)); + } +} +} // namespace + // It seems that Eigen::Tensor::random in GPU will SEGFAULT. // Use std::random and thrust::random(thrust is a std library in CUDA) to // implement uniform random. @@ -61,17 +92,11 @@ class CPUUniformRandomKernel : public framework::OpKernel { framework::ToTypeName(out_var->Type()))); } T *data = tensor->mutable_data(ctx.GetPlace()); - int64_t size = tensor->numel(); - std::uniform_real_distribution dist( - static_cast(ctx.Attr("min")), - static_cast(ctx.Attr("max"))); - unsigned int seed = static_cast(ctx.Attr("seed")); - auto engine = framework::GetCPURandomEngine(seed); - for (int64_t i = 0; i < size; ++i) { - data[i] = dist(*engine); - } + UniformRealDistribution( + data, size, ctx.Attr("min"), ctx.Attr("max"), + static_cast(ctx.Attr("seed"))); unsigned int diag_num = static_cast(ctx.Attr("diag_num")); @@ -257,9 +282,12 @@ REGISTER_OPERATOR( paddle::framework::EmptyGradOpMaker, paddle::operators::UniformRandomOpVarTypeInference); -REGISTER_OP_CPU_KERNEL(uniform_random, - paddle::operators::CPUUniformRandomKernel, - paddle::operators::CPUUniformRandomKernel); -REGISTER_OP_CPU_KERNEL(uniform_random_batch_size_like, - paddle::operators::CPUUniformRandomKernel, - paddle::operators::CPUUniformRandomKernel); +REGISTER_OP_CPU_KERNEL( + uniform_random, paddle::operators::CPUUniformRandomKernel, + paddle::operators::CPUUniformRandomKernel, + paddle::operators::CPUUniformRandomKernel); +REGISTER_OP_CPU_KERNEL( + uniform_random_batch_size_like, + paddle::operators::CPUUniformRandomKernel, + paddle::operators::CPUUniformRandomKernel, + paddle::operators::CPUUniformRandomKernel); diff --git a/paddle/fluid/operators/uniform_random_op.h b/paddle/fluid/operators/uniform_random_op.h index 6052e533643..18a4154be30 100644 --- a/paddle/fluid/operators/uniform_random_op.h +++ b/paddle/fluid/operators/uniform_random_op.h @@ -24,9 +24,9 @@ namespace operators { using Tensor = framework::Tensor; inline std::vector GetNewDataFromShapeTensor( - const Tensor *new_data_tensor) { + const Tensor* new_data_tensor) { if (new_data_tensor->type() == framework::proto::VarType::INT64) { - auto *new_data = new_data_tensor->data(); + auto* new_data = new_data_tensor->data(); framework::Tensor cpu_starts_tensor; if (platform::is_gpu_place(new_data_tensor->place())) { TensorCopySync(*new_data_tensor, platform::CPUPlace(), @@ -37,7 +37,7 @@ inline std::vector GetNewDataFromShapeTensor( new_data + new_data_tensor->numel()); return vec_new_data; } else if (new_data_tensor->type() == framework::proto::VarType::INT32) { - auto *new_data = new_data_tensor->data(); + auto* new_data = new_data_tensor->data(); std::vector vec_new_data; framework::Tensor cpu_starts_tensor; if (platform::is_gpu_place(new_data_tensor->place())) { @@ -58,7 +58,7 @@ inline std::vector GetNewDataFromShapeTensor( } inline std::vector GetNewDataFromShapeTensorList( - const std::vector &list_new_shape_tensor) { + const std::vector& list_new_shape_tensor) { std::vector vec_new_shape; vec_new_shape.reserve(list_new_shape_tensor.size()); for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) { @@ -97,6 +97,5 @@ inline std::vector GetNewDataFromShapeTensorList( return vec_new_shape; } - } // namespace operators } // namespace paddle diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index dc153614fcd..5b2010f3409 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -245,7 +245,7 @@ class UniformInitializer(Initializer): self._seed = block.program.random_seed # to be compatible of fp16 initializers - if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: + if var.dtype == VarDesc.VarType.FP16: out_dtype = VarDesc.VarType.FP32 out_var = block.create_var( name=unique_name.generate(".".join( @@ -274,7 +274,7 @@ class UniformInitializer(Initializer): }, stop_gradient=True) - if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: + if var.dtype == VarDesc.VarType.FP16: block.append_op( type="cast", inputs={"X": out_var}, @@ -540,7 +540,8 @@ class XavierInitializer(Initializer): self._seed = block.program.random_seed # to be compatible of fp16 initalizers - if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: + if var.dtype == VarDesc.VarType.FP16 or ( + var.dtype == VarDesc.VarType.BF16 and not self._uniform): out_dtype = VarDesc.VarType.FP32 out_var = block.create_var( name=unique_name.generate(".".join( @@ -582,7 +583,8 @@ class XavierInitializer(Initializer): }, stop_gradient=True) - if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: + if var.dtype == VarDesc.VarType.FP16 or ( + var.dtype == VarDesc.VarType.BF16 and not self._uniform): block.append_op( type="cast", inputs={"X": out_var}, @@ -671,7 +673,8 @@ class MSRAInitializer(Initializer): self._seed = block.program.random_seed # to be compatible of fp16 initalizers - if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: + if var.dtype == VarDesc.VarType.FP16 or ( + var.dtype == VarDesc.VarType.BF16 and not self._uniform): out_dtype = VarDesc.VarType.FP32 out_var = block.create_var( name=unique_name.generate(".".join( @@ -713,7 +716,8 @@ class MSRAInitializer(Initializer): }, stop_gradient=True) - if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: + if var.dtype == VarDesc.VarType.FP16 or ( + var.dtype == VarDesc.VarType.BF16 and not self._uniform): block.append_op( type="cast", inputs={"X": out_var}, diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 751b6251565..9ac314528dc 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -10524,10 +10524,10 @@ def uniform_random_batch_size_like(input, """ - check_variable_and_dtype(input, 'Input', ("float32", 'float64'), + check_variable_and_dtype(input, 'Input', ("float32", 'float64', "uint16"), 'uniform_random_batch_size_like') check_type(shape, 'shape', (list, tuple), 'uniform_random_batch_size_like') - check_dtype(dtype, 'dtype', ('float32', 'float64'), + check_dtype(dtype, 'dtype', ('float32', 'float64', "uint16"), 'uniform_random_batch_size_like') helper = LayerHelper('uniform_random_batch_size_like', **locals()) @@ -15121,7 +15121,8 @@ def uniform_random(shape, dtype='float32', min=-1.0, max=1.0, seed=0, float(max), 'seed', seed, 'dtype', dtype) check_type(shape, 'shape', (list, tuple, Variable), 'uniform_random/rand') - check_dtype(dtype, 'dtype', ('float32', 'float64'), 'uniform_random/rand') + check_dtype(dtype, 'dtype', ('float32', 'float64', 'uint16'), + 'uniform_random/rand') inputs = dict() attrs = {'seed': seed, 'min': min, 'max': max, 'dtype': dtype} diff --git a/python/paddle/fluid/tests/unittests/test_initializer.py b/python/paddle/fluid/tests/unittests/test_initializer.py index 237ff0c958e..8ddb7498971 100644 --- a/python/paddle/fluid/tests/unittests/test_initializer.py +++ b/python/paddle/fluid/tests/unittests/test_initializer.py @@ -53,7 +53,7 @@ class TestConstantInitializer(unittest.TestCase): lod_level=0, name="param", initializer=initializer.ConstantInitializer()) - num_ops = 2 if dtype in ["float16"] else 1 + num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'fill_constant') @@ -72,7 +72,7 @@ class TestConstantInitializer(unittest.TestCase): lod_level=0, name="param", initializer=initializer.ConstantInitializer(2.3)) - num_ops = 2 if dtype in ["float16"] else 1 + num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'fill_constant') @@ -108,7 +108,7 @@ class TestUniformInitializer(unittest.TestCase): lod_level=0, name="param", initializer=initializer.UniformInitializer()) - num_ops = 2 if dtype in ["float16", "uint16"] else 1 + num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'uniform_random') @@ -153,7 +153,7 @@ class TestUniformInitializer(unittest.TestCase): lod_level=0, name="param", initializer=initializer.UniformInitializer(-4.2, 3.1, 123)) - num_ops = 2 if dtype in ["float16", "uint16"] else 1 + num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'uniform_random') @@ -174,7 +174,7 @@ class TestUniformInitializer(unittest.TestCase): lod_level=0, name="param", initializer=initializer.UniformInitializer(-4.2, float(i), 123)) - num_ops = 2 if dtype in ["float16", "uint16"] else 1 + num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op0 = block.ops[0] self.assertEqual(init_op0.type, 'uniform_random') @@ -195,13 +195,11 @@ class TestUniformInitializer(unittest.TestCase): def test_uniform_initializer_bf16(self): """Test uniform initializer with bfloat16 + No cast operator has been added here """ block = self.test_uniform_initializer_default_value("uint16") - self.assertTrue(check_cast_op(block.ops[1])) block = self.test_uniform_initializer(dtype="uint16") - self.assertTrue(check_cast_op(block.ops[1])) block = self.test_uniform_initializer_two_op("uint16") - self.assertTrue(check_cast_op(block.ops[1])) class TestNormalInitializer(unittest.TestCase): @@ -347,7 +345,9 @@ class TestXavierInitializer(unittest.TestCase): self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA) self.assertEqual(init_op.attr('seed'), 0) - def test_xavier_initializer_supplied_arguments(self, dtype="float32"): + def test_xavier_initializer_supplied_arguments(self, + dtype="float32", + uniform=True): """Test the Xavier initializer with supplied arguments """ program = framework.Program() @@ -359,14 +359,18 @@ class TestXavierInitializer(unittest.TestCase): lod_level=0, name="param", initializer=initializer.XavierInitializer( - fan_in=12, fan_out=23, seed=134)) - num_ops = 2 if dtype in ["float16", "uint16"] else 1 + uniform=uniform, fan_in=12, fan_out=23, seed=134)) + num_ops = 2 if (dtype == "float16" or (dtype == "uint16" and + not uniform)) else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] - self.assertEqual(init_op.type, 'uniform_random') - limit = np.sqrt(6.0 / (12 + 23)) - self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA) - self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA) + if uniform: + self.assertEqual(init_op.type, 'uniform_random') + limit = np.sqrt(6.0 / (12 + 23)) + self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA) + self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA) + else: + self.assertEqual(init_op.type, 'gaussian_random') self.assertEqual(init_op.attr('seed'), 134) return block @@ -379,8 +383,12 @@ class TestXavierInitializer(unittest.TestCase): def test_xavier_initializer_bf16(self): """Test the Xavier initializer with bfloat16 """ - block = self.test_xavier_initializer_supplied_arguments("uint16") - self.assertTrue(check_cast_op(block.ops[1])) + block_uniform = self.test_xavier_initializer_supplied_arguments( + "uint16") + self.assertEqual(len(block_uniform.ops), 1) + block_gaussian = self.test_xavier_initializer_supplied_arguments( + "uint16", False) + self.assertTrue(check_cast_op(block_gaussian.ops[1])) class TestMSRAInitializer(unittest.TestCase): @@ -483,7 +491,7 @@ class TestMSRAInitializer(unittest.TestCase): name="param", initializer=initializer.MSRAInitializer( fan_in=12, seed=134)) - num_ops = 2 if dtype in ["float16", "uint16"] else 1 + num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'uniform_random') @@ -503,7 +511,6 @@ class TestMSRAInitializer(unittest.TestCase): """Test the MSRA initializer with bfloat16 """ block = self.test_msra_initializer_supplied_arguments("uint16") - self.assertTrue(check_cast_op(block.ops[1])) class TestBilinearInitializer(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_initializer_nn.py b/python/paddle/fluid/tests/unittests/test_initializer_nn.py index 9ec78366226..85815c5eeef 100644 --- a/python/paddle/fluid/tests/unittests/test_initializer_nn.py +++ b/python/paddle/fluid/tests/unittests/test_initializer_nn.py @@ -225,7 +225,7 @@ class TestUniform(unittest.TestCase): lod_level=0, name="param", initializer=initializer.Uniform()) - num_ops = 2 if dtype in ["float16", "uint16"] else 1 + num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'uniform_random') @@ -256,7 +256,7 @@ class TestUniform(unittest.TestCase): lod_level=0, name="param", initializer=initializer.Uniform()) - num_ops = 2 if dtype in ["float16", "uint16"] else 1 + num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'uniform_random') @@ -287,7 +287,7 @@ class TestUniform(unittest.TestCase): lod_level=0, name="param", initializer=initializer.Uniform(min_value, max_vlaue)) - num_ops = 2 if dtype in ["float16", "uint16"] else 1 + num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'uniform_random') @@ -317,7 +317,7 @@ class TestUniform(unittest.TestCase): lod_level=0, name="param", initializer=initializer.Uniform(min_value, float(i))) - num_ops = 2 if dtype in ["float16", "uint16"] else 1 + num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op0 = block.ops[0] self.assertEqual(init_op0.type, 'uniform_random') @@ -343,11 +343,8 @@ class TestUniform(unittest.TestCase): """Test uniform initializer with bfloat16 """ block = self.test_uniform_initializer_default_value("uint16") #bfloat16 - self.assertTrue(check_cast_op(block.ops[1])) block = self.test_uniform_initializer(dtype="uint16") #bfloat16 - self.assertTrue(check_cast_op(block.ops[1])) block = self.test_uniform_initializer_two_op("uint16") #bfloat16 - self.assertTrue(check_cast_op(block.ops[1])) def test_uniform_initializer_dygraph(self): """Test uniform initializer in dygraph model. diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_bf16_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_bf16_op.py new file mode 100644 index 00000000000..2ba808a341e --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_uniform_random_bf16_op.py @@ -0,0 +1,276 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from op_test import OpTest, convert_uint16_to_float, convert_float_to_uint16 +import paddle +import paddle.fluid.core as core +from paddle.fluid.op import Operator +import paddle.fluid as fluid +from paddle.fluid import Program, program_guard +from paddle.fluid.tests.unittests.test_uniform_random_op import output_hist, output_hist_diag + + +class TestUniformRandomOpBF16(OpTest): + def setUp(self): + self.op_type = "uniform_random" + self.dtype = "uint16" + self.inputs = {} + self.init_attrs() + self.outputs = {"Out": np.zeros((1000, 784)).astype("uint16")} + + def init_attrs(self): + self.attrs = { + "shape": [1000, 784], + "min": -5.0, + "max": 10.0, + "seed": 10, + 'dtype': int(core.VarDesc.VarType.BF16) + } + self.output_hist = output_hist + + def verify_output(self, outs): + if np.array(outs[0]).dtype == np.uint16: + result = convert_uint16_to_float(np.array(outs[0])) + else: + result = np.array(outs[0]) + + hist, prob = self.output_hist(result) + self.assertTrue( + np.allclose( + hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + + def test_check_output(self): + outs = self.calc_output(core.CPUPlace()) + outs = [np.array(out) for out in outs] + outs.sort(key=len) + self.verify_output(outs) + + +class TestUniformRandomOpBF16AttrTensorList(TestUniformRandomOpBF16): + def setUp(self): + self.op_type = "uniform_random" + self.new_shape = (1000, 784) + self.dtype = "uint16" + shape_tensor = [] + for index, ele in enumerate(self.new_shape): + shape_tensor.append(("x" + str(index), np.ones( + (1)).astype("int64") * ele)) + self.inputs = {'ShapeTensorList': shape_tensor} + self.init_attrs() + self.outputs = {"Out": np.zeros((1000, 784)).astype("uint16")} + + def init_attrs(self): + self.attrs = { + "min": -5.0, + "max": 10.0, + "seed": 10, + 'dtype': int(core.VarDesc.VarType.BF16) + } + self.output_hist = output_hist + + +class TestUniformRandomOpBF16AttrTensorInt32( + TestUniformRandomOpBF16AttrTensorList): + def setUp(self): + self.op_type = "uniform_random" + self.dtype = "uint16" + self.inputs = {"ShapeTensor": np.array([1000, 784]).astype("int32")} + self.init_attrs() + self.outputs = {"Out": np.zeros((1000, 784)).astype("uint16")} + + +class TestUniformRandomOpBF16WithDiagInit(TestUniformRandomOpBF16): + def init_attrs(self): + self.attrs = { + "shape": [1000, 784], + "min": -5.0, + "max": 10.0, + "seed": 10, + "diag_num": 784, + "diag_step": 784, + "diag_val": 1.0, + 'dtype': int(core.VarDesc.VarType.BF16) + } + self.output_hist = output_hist_diag + + +class TestUniformRandomOpBF16SelectedRows(unittest.TestCase): + def test_check_output(self): + self.check_with_place(core.CPUPlace()) + + def check_with_place(self, place): + scope = core.Scope() + out = scope.var("X").get_selected_rows() + paddle.seed(10) + op = Operator( + "uniform_random", + Out="X", + shape=[1000, 784], + min=-5.0, + max=10.0, + seed=10, + dtype=int(core.VarDesc.VarType.BF16)) + op.run(scope, place) + self.assertEqual(out.get_tensor().shape(), [1000, 784]) + result = convert_uint16_to_float(np.array(out.get_tensor())) + hist, prob = output_hist(result) + self.assertTrue( + np.allclose( + hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + + +class TestUniformRandomOpBF16SelectedRowsWithDiagInit( + TestUniformRandomOpBF16SelectedRows): + def check_with_place(self, place): + scope = core.Scope() + out = scope.var("X").get_selected_rows() + paddle.seed(10) + op = Operator( + "uniform_random", + Out="X", + shape=[500, 784], + min=-5.0, + max=10.0, + seed=10, + diag_num=500, + diag_step=784, + diag_val=1.0, + dtype=int(core.VarDesc.VarType.BF16)) + op.run(scope, place) + self.assertEqual(out.get_tensor().shape(), [500, 784]) + result = convert_uint16_to_float(np.array(out.get_tensor())) + hist, prob = output_hist(result) + self.assertTrue( + np.allclose( + hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + + +class TestUniformRandomOpBF16AttrTensorAPI(unittest.TestCase): + def test_attr_tensor_API(self): + startup_program = fluid.Program() + train_program = fluid.Program() + with fluid.program_guard(train_program, startup_program): + dim_tensor = fluid.layers.fill_constant([1], "int64", 3) + ret = fluid.layers.nn.uniform_random( + [1, dim_tensor, 2], dtype=np.uint16) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + exe.run(startup_program) + outs = exe.run(train_program, fetch_list=[ret]) + + +class TestUniformRandomOpAPISeed(unittest.TestCase): + def test_attr_tensor_API(self): + _seed = 10 + gen = paddle.seed(_seed) + gen._is_init_py = False + startup_program = fluid.Program() + train_program = fluid.Program() + with fluid.program_guard(train_program, startup_program): + _min = 5 + _max = 10 + + ret = fluid.layers.nn.uniform_random( + [2, 3, 2], min=_min, max=_max, seed=_seed) + ret_2 = fluid.layers.nn.uniform_random( + [2, 3, 2], min=_min, max=_max, seed=_seed) + res = fluid.layers.equal(ret, ret_2) + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + exe.run(startup_program) + ret_value, cmp_value = exe.run(train_program, fetch_list=[ret, res]) + self.assertTrue(np.array(cmp_value).all()) + for i in ret_value.flatten(): + self.assertGreaterEqual(i, _min) + self.assertLess(i, _max) + + +class TestUniformRandomOpBF16SelectedRowsShapeTensor(unittest.TestCase): + def test_check_output(self): + place = core.CPUPlace() + scope = core.Scope() + out = scope.var("X").get_selected_rows() + shape_tensor = scope.var("Shape").get_tensor() + shape_tensor.set(np.array([1000, 784]).astype("int64"), place) + paddle.seed(10) + op = Operator( + "uniform_random", + ShapeTensor="Shape", + Out="X", + min=-5.0, + max=10.0, + seed=10, + dtype=int(core.VarDesc.VarType.BF16)) + op.run(scope, place) + self.assertEqual(out.get_tensor().shape(), [1000, 784]) + result = convert_uint16_to_float(np.array(out.get_tensor())) + hist, prob = output_hist(result) + self.assertTrue( + np.allclose( + hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + + +class TestUniformRandomOpBF16SelectedRowsShapeTensorList( + TestUniformRandomOpBF16SelectedRowsShapeTensor): + def test_check_output(self): + place = core.CPUPlace() + scope = core.Scope() + out = scope.var("X").get_selected_rows() + shape_1 = scope.var("shape1").get_tensor() + shape_1.set(np.array([1000]).astype("int64"), place) + shape_2 = scope.var("shape2").get_tensor() + shape_2.set(np.array([784]).astype("int64"), place) + paddle.seed(10) + op = Operator( + "uniform_random", + ShapeTensorList=["shape1", "shape2"], + Out="X", + min=-5.0, + max=10.0, + seed=10, + dtype=int(core.VarDesc.VarType.BF16)) + op.run(scope, place) + self.assertEqual(out.get_tensor().shape(), [1000, 784]) + result = convert_uint16_to_float(np.array(out.get_tensor())) + hist, prob = output_hist(result) + self.assertTrue( + np.allclose( + hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + + +class TestUniformRandomBatchSizeLikeOpBF16API(unittest.TestCase): + def test_attr_tensorlist_int32_API(self): + startup_program = fluid.Program() + train_program = fluid.Program() + with fluid.program_guard(train_program, startup_program): + input = fluid.data(name="input", shape=[1, 3], dtype='uint16') + out_1 = fluid.layers.uniform_random_batch_size_like( + input, [2, 4], dtype=np.uint16) # out_1.shape=[1, 4] + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + exe.run(startup_program) + outs = exe.run(train_program, fetch_list=[out_1]) + + +if __name__ == "__main__": + from paddle import enable_static + enable_static() + unittest.main() diff --git a/tools/static_mode_white_list.py b/tools/static_mode_white_list.py index 7c1f54adfb3..15bcae82606 100644 --- a/tools/static_mode_white_list.py +++ b/tools/static_mode_white_list.py @@ -498,6 +498,7 @@ STATIC_MODE_TESTING_LIST = [ 'test_truncated_gaussian_random_op', 'test_unbind_op', 'test_unfold_op', + 'test_uniform_random_bf16_op', 'test_uniform_random_op', 'test_unique', 'test_unique_with_counts', -- GitLab From 8ccf549be194acbee4e01d3530b1b7439629ba07 Mon Sep 17 00:00:00 2001 From: Pei Yang Date: Thu, 29 Apr 2021 10:39:58 +0800 Subject: [PATCH 051/720] specify multihead_matmul_fuse_pass_v3 QK path (#32659) --- paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc index 1e8349e8787..57bee20247c 100644 --- a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc @@ -753,7 +753,7 @@ PDNode* MultiHeadMatmulV3Pattern::operator()() { pattern->NewNode(transpose2_0_repr())->assert_is_op("transpose2"); auto* transpose2_0_out_var = pattern->NewNode(transpose2_0_out_repr()) ->assert_is_op_output("transpose2"); - transpose2_0_out_var->AsIntermediate()->assert_is_op_input("matmul"); + transpose2_0_out_var->AsIntermediate()->assert_is_op_input("matmul", "X"); auto* matmul_qk = pattern->NewNode(matmul_qk_repr())->assert_is_op("matmul"); auto* matmul_qk_out_var = @@ -827,7 +827,7 @@ PDNode* MultiHeadMatmulV3Pattern::operator()() { auto* transpose2_1_out_var = pattern->NewNode(transpose2_1_out_repr()) ->assert_is_op_output("transpose2"); transpose2_1_out_var->AsIntermediate()->assert_is_op_input( - "matmul"); // link to matmul qk + "matmul", "Y"); // link to matmul qk // Third path to matmul auto* mul2 = pattern->NewNode(mul2_repr())->assert_is_op("matmul"); -- GitLab From b7ddd7d7a18dc270a84f7bb64f3c3e1a79b676ce Mon Sep 17 00:00:00 2001 From: cc <52520497+juncaipeng@users.noreply.github.com> Date: Thu, 29 Apr 2021 11:26:26 +0800 Subject: [PATCH 052/720] skip fuse repeated fc when the fc with weight padding (#32648) --- .../framework/ir/repeated_fc_relu_fuse_pass.cc | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc b/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc index 479df876fbe..bf59c140005 100644 --- a/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc @@ -54,6 +54,17 @@ static bool IsFCWithAct(Node* n, const std::string& act_type = "relu") { return false; } +static bool IsFCWithPaddingWeights(Node* n) { + bool res = false; + if (n && n->IsOp() && n->Op() && n->Op()->Type() == "fc" && + n->inputs.size() == 3U && n->outputs.size() == 1U) { + if (n->Op()->HasAttr("padding_weights")) { + res = BOOST_GET_CONST(bool, n->Op()->GetAttr("padding_weights")); + } + } + return res; +} + static bool IsParamOfFC(Node* n, const std::string& param_name) { if (IsInputOfFC(n) && n->inputs.empty() && (n->Name() == n->outputs[0]->Op()->Input(param_name)[0])) { @@ -255,7 +266,7 @@ void BuildRepeatedFCReluPattern(PDPattern* pattern, fc_ops[i] = pattern->NewNode( [=](Node* x) { - if (!IsFCWithAct(x, "relu")) { + if (!IsFCWithAct(x, "relu") || IsFCWithPaddingWeights(x)) { return false; } auto* fc_out_var = x->outputs[0]; -- GitLab From b6ca6a55420b745bf6c4a8a6d03559c1b5a2cc03 Mon Sep 17 00:00:00 2001 From: WeiXin Date: Thu, 29 Apr 2021 12:05:12 +0800 Subject: [PATCH 053/720] forward return any type. (#32661) --- paddle/fluid/imperative/py_layer_fwd.h | 20 ++++---- paddle/fluid/operators/py_layer_op.cc | 6 +++ .../fluid/tests/unittests/test_pylayer_op.py | 46 ++++++++++++++----- 3 files changed, 52 insertions(+), 20 deletions(-) diff --git a/paddle/fluid/imperative/py_layer_fwd.h b/paddle/fluid/imperative/py_layer_fwd.h index bd132f2576f..ccfd5b0e2db 100644 --- a/paddle/fluid/imperative/py_layer_fwd.h +++ b/paddle/fluid/imperative/py_layer_fwd.h @@ -115,12 +115,12 @@ py::object PyLayerApply(const platform::Place& place, const py::object& cls, tuple_result[i].cast>(); output_vars.push_back(temp_out); } catch (py::cast_error&) { - PADDLE_THROW(platform::errors::Unimplemented( - "The output of `PyLayer.forward` should be `Tensor`.")); + // Only collect Tensor type in 'kwargs' and pass them to backward. + // Ignore other types of input temporarily. } } else { - PADDLE_THROW(platform::errors::Unimplemented( - "The output of `PyLayer.forward` can not be `None`.")); + // Only collect Tensor type in 'kwargs' and pass them to backward. + // Ignore other types of input temporarily. } } } else { @@ -130,14 +130,18 @@ py::object PyLayerApply(const platform::Place& place, const py::object& cls, result_forward.cast>(); output_vars.push_back(temp_out); } catch (py::cast_error&) { - PADDLE_THROW(platform::errors::Unimplemented( - "The output of `PyLayer.forward` should be `Tensor`.")); + // Only collect Tensor type in 'kwargs' and pass them to backward. + // Ignore other types of input temporarily. } } else { - PADDLE_THROW(platform::errors::Unimplemented( - "The output of `PyLayer.forward` can not be `None`.")); + // Only collect Tensor type in 'kwargs' and pass them to backward. + // Ignore other types of input temporarily. } } + if (output_vars.size() == 0) { + PADDLE_THROW(platform::errors::InvalidArgument( + "At least one output of `PyLayer.forward` is a `Tensor`.")); + } NameVarBaseMap outs = {{"Out", output_vars}}; diff --git a/paddle/fluid/operators/py_layer_op.cc b/paddle/fluid/operators/py_layer_op.cc index 65e10181dcc..0090747d116 100644 --- a/paddle/fluid/operators/py_layer_op.cc +++ b/paddle/fluid/operators/py_layer_op.cc @@ -86,6 +86,12 @@ void RunPyObject(py::object *py_object, } } } else { + if (1 != outs->size()) { + PADDLE_THROW(platform::errors::InvalidArgument( + "The number of outputs of `PyLayer.backward` should be %d, but " + "received 1.", + outs->size())); + } if ((*outs)[0] != nullptr) { if (Py_None != py_result.ptr()) { try { diff --git a/python/paddle/fluid/tests/unittests/test_pylayer_op.py b/python/paddle/fluid/tests/unittests/test_pylayer_op.py index e3374c15a0a..e058115d691 100644 --- a/python/paddle/fluid/tests/unittests/test_pylayer_op.py +++ b/python/paddle/fluid/tests/unittests/test_pylayer_op.py @@ -30,7 +30,7 @@ class TestPyLayer(unittest.TestCase): y1 = func1(x1) y2 = func1(x2) ctx.save_for_backward(y1, y2) - return y1, y2 + return y1, 1, y2, None @staticmethod def backward(ctx, dy1, dy2): @@ -44,7 +44,7 @@ class TestPyLayer(unittest.TestCase): input1.stop_gradient = False input2.stop_gradient = False z = tanh.apply(input1, input1, paddle.tanh, paddle.square) - z = z[0] + z[1] + z = z[0] + z[2] z.mean().backward() z2 = paddle.tanh(input2) + paddle.tanh(input2) @@ -61,7 +61,7 @@ class TestPyLayer(unittest.TestCase): y1 = func1(x1) y2 = func1(x2) ctx.save_for_backward(y1, y2) - return y1, y2 + return 1, None, y1, y2, '' @staticmethod def backward(ctx, dy1, dy2): @@ -79,7 +79,7 @@ class TestPyLayer(unittest.TestCase): input3.stop_gradient = True input4.stop_gradient = True z = tanh.apply(input1, input3, paddle.tanh, paddle.square) - z = z[0] + z[1] + z = z[2] + z[3] z.mean().backward() z2 = paddle.tanh(input2) + paddle.tanh(input4) @@ -115,6 +115,27 @@ class TestPyLayer(unittest.TestCase): self.assertTrue( np.max(np.abs((input1.grad.numpy() - input2.grad.numpy()))) < 1e-10) + def test_pylayer_num_output_match(self): + class tanh(PyLayer): + @staticmethod + def forward( + ctx, + x1, + x2, ): + return x1 + x2 + + @staticmethod + def backward(ctx, dy1): + return dy1 + 1 + + input1 = paddle.randn([2, 3]).astype("float64") + input2 = input1.detach().clone() + input1.stop_gradient = False + input2.stop_gradient = False + z = tanh.apply(input1, input2) + with self.assertRaises(ValueError): + z.mean().backward() + def test_pylayer_dtype(self): class tanh(PyLayer): @staticmethod @@ -150,21 +171,21 @@ class TestPyLayer(unittest.TestCase): return args input1 = paddle.randn([2, 3]).astype("float64") - with self.assertRaises(NotImplementedError): + with self.assertRaises(ValueError): z = Layer_None1.apply(input1) class Layer_None2(PyLayer): @staticmethod def forward(ctx, *args): - return [None, None] + return [None, args[0]] @staticmethod def backward(ctx, *args): return args input1 = paddle.randn([2, 3]).astype("float64") - with self.assertRaises(NotImplementedError): - z = Layer_None2.apply(input1) + # return None + z = Layer_None2.apply(input1) class Layer_one1(PyLayer): @staticmethod @@ -176,21 +197,22 @@ class TestPyLayer(unittest.TestCase): return args input1 = paddle.randn([2, 3]).astype("float64") - with self.assertRaises(NotImplementedError): + # At least one output of `PyLayer.backward` is a `Tensor` + with self.assertRaises(ValueError): z = Layer_one1.apply(input1) class Layer_one2(PyLayer): @staticmethod def forward(ctx, *args): - return [1, 2] + return [1, 2, args[0]] @staticmethod def backward(ctx, *args): return args input1 = paddle.randn([2, 3]).astype("float64") - with self.assertRaises(NotImplementedError): - z = Layer_one2.apply(input1) + # return int + z = Layer_one2.apply(input1) class Layer_no_fw(PyLayer): @staticmethod -- GitLab From 10c493a87be9071c8dd6ebd84a14b56141d7efb8 Mon Sep 17 00:00:00 2001 From: Wenyu Date: Thu, 29 Apr 2021 12:46:02 +0800 Subject: [PATCH 054/720] fix error imformation when trigger import error (#32616) --- python/paddle/hapi/hub.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/hapi/hub.py b/python/paddle/hapi/hub.py index 6490c878f9b..54765c1d4d4 100644 --- a/python/paddle/hapi/hub.py +++ b/python/paddle/hapi/hub.py @@ -43,8 +43,8 @@ def _import_module(name, repo_dir): except ImportError: sys.path.remove(repo_dir) raise RuntimeError( - 'Cannot import `{}`, please make sure `{}`.py in repo root dir'. - format(name, name)) + 'Please make sure config exists or repo error messages above fixed when importing' + ) sys.path.remove(repo_dir) -- GitLab From 7a73692b92e0f2ff86f6a6cc8482e5a2780ef828 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Thu, 29 Apr 2021 12:57:40 +0800 Subject: [PATCH 055/720] normalized custom operator impl (#32666) --- paddle/fluid/framework/custom_operator.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/framework/custom_operator.cc b/paddle/fluid/framework/custom_operator.cc index 97d58df6dc5..c4b833ec94c 100644 --- a/paddle/fluid/framework/custom_operator.cc +++ b/paddle/fluid/framework/custom_operator.cc @@ -246,7 +246,7 @@ class CustomOperator : public OperatorWithKernel { * it can only be determined at runtime. */ framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const { + const framework::ExecutionContext& ctx) const override { return framework::OpKernelType(proto::VarType::RAW, ctx.GetPlace()); } @@ -257,7 +257,7 @@ class CustomOperator : public OperatorWithKernel { */ framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, const Tensor& tensor, - const OpKernelType& expected_kernel_type) { + const OpKernelType& expected_kernel_type) const override { return OpKernelType(expected_kernel_type.data_type_, expected_kernel_type.place_, tensor.layout()); } -- GitLab From b22f6d6927c137a81bcb052359f33bd015227d24 Mon Sep 17 00:00:00 2001 From: LielinJiang <50691816+LielinJiang@users.noreply.github.com> Date: Thu, 29 Apr 2021 17:31:41 +0800 Subject: [PATCH 056/720] Add op read_file and decode_jpeg (#32564) * add op read_file and decode_jpeg --- cmake/operators.cmake | 1 + paddle/fluid/operators/decode_jpeg_op.cc | 114 +++++++++++++++ paddle/fluid/operators/decode_jpeg_op.cu | 138 ++++++++++++++++++ paddle/fluid/operators/read_file_op.cc | 92 ++++++++++++ paddle/fluid/platform/dynload/CMakeLists.txt | 2 +- .../fluid/platform/dynload/dynamic_loader.cc | 17 +++ .../fluid/platform/dynload/dynamic_loader.h | 1 + paddle/fluid/platform/dynload/nvjpeg.cc | 27 ++++ paddle/fluid/platform/dynload/nvjpeg.h | 53 +++++++ python/paddle/tests/test_read_file.py | 67 +++++++++ python/paddle/vision/ops.py | 97 +++++++++++- 11 files changed, 607 insertions(+), 2 deletions(-) create mode 100644 paddle/fluid/operators/decode_jpeg_op.cc create mode 100644 paddle/fluid/operators/decode_jpeg_op.cu create mode 100644 paddle/fluid/operators/read_file_op.cc create mode 100644 paddle/fluid/platform/dynload/nvjpeg.cc create mode 100644 paddle/fluid/platform/dynload/nvjpeg.h create mode 100644 python/paddle/tests/test_read_file.py diff --git a/cmake/operators.cmake b/cmake/operators.cmake index 7dac91e531e..16288e1fb45 100644 --- a/cmake/operators.cmake +++ b/cmake/operators.cmake @@ -182,6 +182,7 @@ function(op_library TARGET) list(REMOVE_ITEM hip_srcs "cholesky_op.cu") list(REMOVE_ITEM hip_srcs "correlation_op.cu") list(REMOVE_ITEM hip_srcs "multinomial_op.cu") + list(REMOVE_ITEM hip_srcs "decode_jpeg_op.cu") hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs} ${mkldnn_cc_srcs} ${hip_srcs} DEPS ${op_library_DEPS} ${op_common_deps}) else() diff --git a/paddle/fluid/operators/decode_jpeg_op.cc b/paddle/fluid/operators/decode_jpeg_op.cc new file mode 100644 index 00000000000..e553b1076a8 --- /dev/null +++ b/paddle/fluid/operators/decode_jpeg_op.cc @@ -0,0 +1,114 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "paddle/fluid/framework/generator.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/platform/dynload/nvjpeg.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace operators { + +template +class CPUDecodeJpegKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + // TODO(LieLinJiang): add cpu implement. + PADDLE_THROW(platform::errors::Unimplemented( + "DecodeJpeg op only supports GPU now.")); + } +}; + +class DecodeJpegOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "DecodeJpeg"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "DecodeJpeg"); + + auto mode = ctx->Attrs().Get("mode"); + std::vector out_dims; + + if (mode == "unchanged") { + out_dims = {-1, -1, -1}; + } else if (mode == "gray") { + out_dims = {1, -1, -1}; + } else if (mode == "rgb") { + out_dims = {3, -1, -1}; + } else { + PADDLE_THROW(platform::errors::Fatal( + "The provided mode is not supported for JPEG files on GPU: ", mode)); + } + + ctx->SetOutputDim("Out", framework::make_ddim(out_dims)); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); + } + + framework::OpKernelType GetKernelTypeForVar( + const std::string& var_name, const framework::Tensor& tensor, + const framework::OpKernelType& expected_kernel_type) const { + if (var_name == "X") { + return expected_kernel_type; + } + + return framework::OpKernelType(tensor.type(), tensor.place(), + tensor.layout()); + } +}; + +class DecodeJpegOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "A one dimensional uint8 tensor containing the raw bytes " + "of the JPEG image. It is a tensor with rank 1."); + AddOutput("Out", "The output tensor of DecodeJpeg op"); + AddComment(R"DOC( +This operator decodes a JPEG image into a 3 dimensional RGB Tensor +or 1 dimensional Gray Tensor. Optionally converts the image to the +desired format. The values of the output tensor are uint8 between 0 +and 255. +)DOC"); + AddAttr( + "mode", + "(string, default \"unchanged\"), The read mode used " + "for optionally converting the image, can be \"unchanged\" " + ",\"gray\" , \"rgb\" .") + .SetDefault("unchanged"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR( + decode_jpeg, ops::DecodeJpegOp, ops::DecodeJpegOpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker) + +REGISTER_OP_CPU_KERNEL(decode_jpeg, ops::CPUDecodeJpegKernel) diff --git a/paddle/fluid/operators/decode_jpeg_op.cu b/paddle/fluid/operators/decode_jpeg_op.cu new file mode 100644 index 00000000000..35975a6a549 --- /dev/null +++ b/paddle/fluid/operators/decode_jpeg_op.cu @@ -0,0 +1,138 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef PADDLE_WITH_HIP + +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/platform/dynload/nvjpeg.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/stream/cuda_stream.h" + +namespace paddle { +namespace operators { + +static cudaStream_t nvjpeg_stream = nullptr; +static nvjpegHandle_t nvjpeg_handle = nullptr; + +void InitNvjpegImage(nvjpegImage_t* img) { + for (int c = 0; c < NVJPEG_MAX_COMPONENT; c++) { + img->channel[c] = nullptr; + img->pitch[c] = 0; + } +} + +template +class GPUDecodeJpegKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + // Create nvJPEG handle + if (nvjpeg_handle == nullptr) { + nvjpegStatus_t create_status = + platform::dynload::nvjpegCreateSimple(&nvjpeg_handle); + + PADDLE_ENFORCE_EQ(create_status, NVJPEG_STATUS_SUCCESS, + platform::errors::Fatal("nvjpegCreateSimple failed: ", + create_status)); + } + + nvjpegJpegState_t nvjpeg_state; + nvjpegStatus_t state_status = + platform::dynload::nvjpegJpegStateCreate(nvjpeg_handle, &nvjpeg_state); + + PADDLE_ENFORCE_EQ(state_status, NVJPEG_STATUS_SUCCESS, + platform::errors::Fatal("nvjpegJpegStateCreate failed: ", + state_status)); + + int components; + nvjpegChromaSubsampling_t subsampling; + int widths[NVJPEG_MAX_COMPONENT]; + int heights[NVJPEG_MAX_COMPONENT]; + + auto* x = ctx.Input("X"); + auto* x_data = x->data(); + + nvjpegStatus_t info_status = platform::dynload::nvjpegGetImageInfo( + nvjpeg_handle, x_data, (size_t)x->numel(), &components, &subsampling, + widths, heights); + + PADDLE_ENFORCE_EQ( + info_status, NVJPEG_STATUS_SUCCESS, + platform::errors::Fatal("nvjpegGetImageInfo failed: ", info_status)); + + int width = widths[0]; + int height = heights[0]; + + nvjpegOutputFormat_t output_format; + int output_components; + + auto mode = ctx.Attr("mode"); + if (mode == "unchanged") { + if (components == 1) { + output_format = NVJPEG_OUTPUT_Y; + output_components = 1; + } else if (components == 3) { + output_format = NVJPEG_OUTPUT_RGB; + output_components = 3; + } else { + platform::dynload::nvjpegJpegStateDestroy(nvjpeg_state); + PADDLE_THROW(platform::errors::Fatal( + "The provided mode is not supported for JPEG files on GPU")); + } + } else if (mode == "gray") { + output_format = NVJPEG_OUTPUT_Y; + output_components = 1; + } else if (mode == "rgb") { + output_format = NVJPEG_OUTPUT_RGB; + output_components = 3; + } else { + platform::dynload::nvjpegJpegStateDestroy(nvjpeg_state); + PADDLE_THROW(platform::errors::Fatal( + "The provided mode is not supported for JPEG files on GPU")); + } + + nvjpegImage_t out_image; + InitNvjpegImage(&out_image); + + // create nvjpeg stream + if (nvjpeg_stream == nullptr) { + cudaStreamCreateWithFlags(&nvjpeg_stream, cudaStreamNonBlocking); + } + + int sz = widths[0] * heights[0]; + + auto* out = ctx.Output("Out"); + std::vector out_shape = {output_components, height, width}; + out->Resize(framework::make_ddim(out_shape)); + + T* data = out->mutable_data(ctx.GetPlace()); + + for (int c = 0; c < output_components; c++) { + out_image.channel[c] = data + c * sz; + out_image.pitch[c] = width; + } + + nvjpegStatus_t decode_status = platform::dynload::nvjpegDecode( + nvjpeg_handle, nvjpeg_state, x_data, x->numel(), output_format, + &out_image, nvjpeg_stream); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL(decode_jpeg, ops::GPUDecodeJpegKernel) + +#endif diff --git a/paddle/fluid/operators/read_file_op.cc b/paddle/fluid/operators/read_file_op.cc new file mode 100644 index 00000000000..6da92ed7df7 --- /dev/null +++ b/paddle/fluid/operators/read_file_op.cc @@ -0,0 +1,92 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "paddle/fluid/framework/generator.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace operators { + +template +class CPUReadFileKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto filename = ctx.Attr("filename"); + + std::ifstream input(filename.c_str(), + std::ios::in | std::ios::binary | std::ios::ate); + std::streamsize file_size = input.tellg(); + + input.seekg(0, std::ios::beg); + + auto* out = ctx.Output("Out"); + std::vector out_shape = {file_size}; + out->Resize(framework::make_ddim(out_shape)); + + uint8_t* data = out->mutable_data(ctx.GetPlace()); + + input.read(reinterpret_cast(data), file_size); + } +}; + +class ReadFileOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, + platform::errors::InvalidArgument( + "Output(Out) of ReadFileOp is null.")); + + auto out_dims = std::vector(1, -1); + ctx->SetOutputDim("Out", framework::make_ddim(out_dims)); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(framework::proto::VarType::UINT8, + platform::CPUPlace()); + } +}; + +class ReadFileOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddOutput("Out", "The output tensor of ReadFile op"); + AddComment(R"DOC( +This operator read a file. +)DOC"); + AddAttr("filename", "Path of the file to be readed.") + .SetDefault({}); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR( + read_file, ops::ReadFileOp, ops::ReadFileOpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker) + +REGISTER_OP_CPU_KERNEL(read_file, ops::CPUReadFileKernel) diff --git a/paddle/fluid/platform/dynload/CMakeLists.txt b/paddle/fluid/platform/dynload/CMakeLists.txt index b25fb5978d0..8bff2ead0a2 100644 --- a/paddle/fluid/platform/dynload/CMakeLists.txt +++ b/paddle/fluid/platform/dynload/CMakeLists.txt @@ -1,6 +1,6 @@ cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce) -list(APPEND CUDA_SRCS cublas.cc cudnn.cc curand.cc cusolver.cc nvtx.cc) +list(APPEND CUDA_SRCS cublas.cc cudnn.cc curand.cc cusolver.cc nvtx.cc nvjpeg.cc) if (WITH_ROCM) list(APPEND HIP_SRCS rocblas.cc miopen.cc hiprand.cc) diff --git a/paddle/fluid/platform/dynload/dynamic_loader.cc b/paddle/fluid/platform/dynload/dynamic_loader.cc index b49875f256b..be9cda4a2e9 100644 --- a/paddle/fluid/platform/dynload/dynamic_loader.cc +++ b/paddle/fluid/platform/dynload/dynamic_loader.cc @@ -100,6 +100,9 @@ static constexpr char* win_cublas_lib = static constexpr char* win_curand_lib = "curand64_" CUDA_VERSION_MAJOR CUDA_VERSION_MINOR ".dll;curand64_" CUDA_VERSION_MAJOR ".dll;curand64_10.dll"; +static constexpr char* win_nvjpeg_lib = + "nvjpeg64_" CUDA_VERSION_MAJOR CUDA_VERSION_MINOR + ".dll;nvjpeg64_" CUDA_VERSION_MAJOR ".dll;nvjpeg64_10.dll"; static constexpr char* win_cusolver_lib = "cusolver64_" CUDA_VERSION_MAJOR CUDA_VERSION_MINOR ".dll;cusolver64_" CUDA_VERSION_MAJOR ".dll;cusolver64_10.dll"; @@ -107,6 +110,9 @@ static constexpr char* win_cusolver_lib = static constexpr char* win_curand_lib = "curand64_" CUDA_VERSION_MAJOR CUDA_VERSION_MINOR ".dll;curand64_" CUDA_VERSION_MAJOR ".dll"; +static constexpr char* win_nvjpeg_lib = + "nvjpeg64_" CUDA_VERSION_MAJOR CUDA_VERSION_MINOR + ".dll;nvjpeg64_" CUDA_VERSION_MAJOR ".dll"; static constexpr char* win_cusolver_lib = "cusolver64_" CUDA_VERSION_MAJOR CUDA_VERSION_MINOR ".dll;cusolver64_" CUDA_VERSION_MAJOR ".dll"; @@ -330,6 +336,17 @@ void* GetCurandDsoHandle() { #endif } +void* GetNvjpegDsoHandle() { +#if defined(__APPLE__) || defined(__OSX__) + return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libnvjpeg.dylib"); +#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA) + return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, win_nvjpeg_lib, true, + {cuda_lib_path}); +#else + return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libnvjpeg.so"); +#endif +} + void* GetCusolverDsoHandle() { #if defined(__APPLE__) || defined(__OSX__) return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcusolver.dylib"); diff --git a/paddle/fluid/platform/dynload/dynamic_loader.h b/paddle/fluid/platform/dynload/dynamic_loader.h index 84241609316..9ab6dca0126 100644 --- a/paddle/fluid/platform/dynload/dynamic_loader.h +++ b/paddle/fluid/platform/dynload/dynamic_loader.h @@ -29,6 +29,7 @@ void* GetCublasDsoHandle(); void* GetCUDNNDsoHandle(); void* GetCUPTIDsoHandle(); void* GetCurandDsoHandle(); +void* GetNvjpegDsoHandle(); void* GetCusolverDsoHandle(); void* GetNVRTCDsoHandle(); void* GetCUDADsoHandle(); diff --git a/paddle/fluid/platform/dynload/nvjpeg.cc b/paddle/fluid/platform/dynload/nvjpeg.cc new file mode 100644 index 00000000000..eb0ad78b9b7 --- /dev/null +++ b/paddle/fluid/platform/dynload/nvjpeg.cc @@ -0,0 +1,27 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/platform/dynload/nvjpeg.h" + +namespace paddle { +namespace platform { +namespace dynload { + +std::once_flag nvjpeg_dso_flag; +void *nvjpeg_dso_handle; + +#define DEFINE_WRAP(__name) DynLoad__##__name __name + +NVJPEG_RAND_ROUTINE_EACH(DEFINE_WRAP); + +} // namespace dynload +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/dynload/nvjpeg.h b/paddle/fluid/platform/dynload/nvjpeg.h new file mode 100644 index 00000000000..ae457b2958f --- /dev/null +++ b/paddle/fluid/platform/dynload/nvjpeg.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#ifdef PADDLE_WITH_CUDA +#include +#include // NOLINT + +#include "paddle/fluid/platform/dynload/dynamic_loader.h" +#include "paddle/fluid/platform/port.h" + +namespace paddle { +namespace platform { +namespace dynload { +extern std::once_flag nvjpeg_dso_flag; +extern void *nvjpeg_dso_handle; + +#define DECLARE_DYNAMIC_LOAD_NVJPEG_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + nvjpegStatus_t operator()(Args... args) { \ + using nvjpegFunc = decltype(&::__name); \ + std::call_once(nvjpeg_dso_flag, []() { \ + nvjpeg_dso_handle = paddle::platform::dynload::GetNvjpegDsoHandle(); \ + }); \ + static void *p_##__name = dlsym(nvjpeg_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + }; \ + extern DynLoad__##__name __name + +#define NVJPEG_RAND_ROUTINE_EACH(__macro) \ + __macro(nvjpegCreateSimple); \ + __macro(nvjpegJpegStateCreate); \ + __macro(nvjpegGetImageInfo); \ + __macro(nvjpegJpegStateDestroy); \ + __macro(nvjpegDecode); + +NVJPEG_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_NVJPEG_WRAP); + +} // namespace dynload +} // namespace platform +} // namespace paddle + +#endif diff --git a/python/paddle/tests/test_read_file.py b/python/paddle/tests/test_read_file.py new file mode 100644 index 00000000000..fbcba9a6bbf --- /dev/null +++ b/python/paddle/tests/test_read_file.py @@ -0,0 +1,67 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import cv2 +import shutil +import unittest +import numpy as np + +import paddle +from paddle.vision.ops import read_file, decode_jpeg + + +class TestReadFile(unittest.TestCase): + def setUp(self): + fake_img = (np.random.random((400, 300, 3)) * 255).astype('uint8') + cv2.imwrite('fake.jpg', fake_img) + + def tearDown(self): + os.remove('fake.jpg') + + def read_file_decode_jpeg(self): + if not paddle.is_compiled_with_cuda(): + return + + img_bytes = read_file('fake.jpg') + + img = decode_jpeg(img_bytes, mode='gray') + img = decode_jpeg(img_bytes, mode='rgb') + + img = decode_jpeg(img_bytes) + + img_cv2 = cv2.imread('fake.jpg') + if paddle.in_dynamic_mode(): + np.testing.assert_equal(img.shape, img_cv2.transpose(2, 0, 1).shape) + else: + place = paddle.CUDAPlace(0) + exe = paddle.static.Executor(place) + exe.run(paddle.static.default_startup_program()) + out = exe.run(paddle.static.default_main_program(), + fetch_list=[img]) + + np.testing.assert_equal(out[0].shape, + img_cv2.transpose(2, 0, 1).shape) + + def test_read_file_decode_jpeg_dynamic(self): + self.read_file_decode_jpeg() + + def test_read_file_decode_jpeg_static(self): + paddle.enable_static() + self.read_file_decode_jpeg() + paddle.disable_static() + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/vision/ops.py b/python/paddle/vision/ops.py index 47425476a65..60a7a90c9be 100644 --- a/python/paddle/vision/ops.py +++ b/python/paddle/vision/ops.py @@ -22,7 +22,10 @@ from ..fluid.initializer import Normal from paddle.common_ops_import import * -__all__ = ['yolo_loss', 'yolo_box', 'deform_conv2d', 'DeformConv2D'] +__all__ = [ + 'yolo_loss', 'yolo_box', 'deform_conv2d', 'DeformConv2D', 'read_file', + 'decode_jpeg' +] def yolo_loss(x, @@ -782,3 +785,95 @@ class DeformConv2D(Layer): groups=self._groups, mask=mask) return out + + +def read_file(filename, name=None): + """ + Reads and outputs the bytes contents of a file as a uint8 Tensor + with one dimension. + + Args: + filename (str): Path of the file to be read. + name (str, optional): The default value is None. Normally there is no + need for user to set this property. For more information, please + refer to :ref:`api_guide_Name`. + + Returns: + A uint8 tensor. + + Examples: + .. code-block:: python + + import cv2 + import paddle + + fake_img = (np.random.random( + (400, 300, 3)) * 255).astype('uint8') + + cv2.imwrite('fake.jpg', fake_img) + + img_bytes = paddle.vision.ops.read_file('fake.jpg') + + print(img_bytes.shape) + + """ + + if in_dygraph_mode(): + return core.ops.read_file('filename', filename) + + inputs = dict() + attrs = {'filename': filename} + + helper = LayerHelper("read_file", **locals()) + out = helper.create_variable_for_type_inference('uint8') + helper.append_op( + type="read_file", inputs=inputs, attrs=attrs, outputs={"Out": out}) + + return out + + +def decode_jpeg(x, mode='unchanged', name=None): + """ + Decodes a JPEG image into a 3 dimensional RGB Tensor or 1 dimensional Gray Tensor. + Optionally converts the image to the desired format. + The values of the output tensor are uint8 between 0 and 255. + + Args: + x (Tensor): A one dimensional uint8 tensor containing the raw bytes + of the JPEG image. + mode (str): The read mode used for optionally converting the image. + Default: 'unchanged'. + name (str, optional): The default value is None. Normally there is no + need for user to set this property. For more information, please + refer to :ref:`api_guide_Name`. + Returns: + Tensor: A decoded image tensor with shape (imge_channels, image_height, image_width) + + Examples: + .. code-block:: python + import cv2 + import paddle + + fake_img = (np.random.random( + (400, 300, 3)) * 255).astype('uint8') + + cv2.imwrite('fake.jpg', fake_img) + + img_bytes = paddle.vision.ops.read_file('fake.jpg') + img = paddle.vision.ops.decode_jpeg(img_bytes) + + print(img.shape) + """ + + if in_dygraph_mode(): + return core.ops.decode_jpeg(x, "mode", mode) + + inputs = {'X': x} + attrs = {"mode": mode} + + helper = LayerHelper("decode_jpeg", **locals()) + out = helper.create_variable_for_type_inference('uint8') + helper.append_op( + type="decode_jpeg", inputs=inputs, attrs=attrs, outputs={"Out": out}) + + return out -- GitLab From 69d237c22ddc083d5e03f5ad5009f976569e1f16 Mon Sep 17 00:00:00 2001 From: zhiboniu <31800336+zhiboniu@users.noreply.github.com> Date: Thu, 29 Apr 2021 19:31:40 +0800 Subject: [PATCH 057/720] add __all__=[] to python files not in API public list; import * only support in API public list files (#32643) --- python/paddle/dataset/cifar.py | 2 ++ python/paddle/dataset/common.py | 2 ++ python/paddle/dataset/conll05.py | 2 ++ python/paddle/dataset/flowers.py | 2 ++ python/paddle/dataset/image.py | 2 ++ python/paddle/dataset/imdb.py | 2 ++ python/paddle/dataset/imikolov.py | 2 ++ python/paddle/dataset/mnist.py | 2 ++ python/paddle/dataset/movielens.py | 2 ++ python/paddle/dataset/tests/cifar_test.py | 2 ++ python/paddle/dataset/tests/flowers_test.py | 2 ++ python/paddle/dataset/tests/imdb_test.py | 2 ++ python/paddle/dataset/tests/imikolov_test.py | 2 ++ python/paddle/dataset/tests/mnist_test.py | 2 ++ python/paddle/dataset/tests/test_image.py | 2 ++ python/paddle/dataset/tests/voc2012_test.py | 2 ++ python/paddle/dataset/tests/wmt16_test.py | 2 ++ python/paddle/dataset/uci_housing.py | 2 ++ python/paddle/dataset/voc2012.py | 3 ++- python/paddle/dataset/wmt14.py | 2 ++ python/paddle/dataset/wmt16.py | 2 ++ python/paddle/framework/__init__.py | 2 ++ python/paddle/framework/dtype.py | 7 ++----- python/paddle/framework/framework.py | 2 ++ python/paddle/framework/io.py | 2 ++ python/paddle/framework/random.py | 2 ++ python/paddle/nn/clip.py | 2 ++ python/paddle/nn/decode.py | 2 ++ python/paddle/nn/functional/activation.py | 2 ++ python/paddle/nn/functional/common.py | 2 ++ python/paddle/nn/functional/conv.py | 2 ++ python/paddle/nn/functional/extension.py | 2 ++ python/paddle/nn/functional/input.py | 2 ++ python/paddle/nn/functional/loss.py | 2 ++ python/paddle/nn/functional/norm.py | 2 ++ python/paddle/nn/functional/pooling.py | 2 ++ python/paddle/nn/functional/vision.py | 2 ++ python/paddle/nn/initializer/assign.py | 2 ++ python/paddle/nn/initializer/constant.py | 2 ++ python/paddle/nn/initializer/kaiming.py | 2 ++ python/paddle/nn/initializer/normal.py | 2 ++ python/paddle/nn/initializer/uniform.py | 2 ++ python/paddle/nn/initializer/xavier.py | 2 ++ python/paddle/nn/layer/__init__.py | 2 ++ python/paddle/nn/layer/activation.py | 2 ++ python/paddle/nn/layer/common.py | 2 ++ python/paddle/nn/layer/container.py | 2 +- python/paddle/nn/layer/conv.py | 2 ++ python/paddle/nn/layer/distance.py | 2 ++ python/paddle/nn/layer/loss.py | 2 ++ python/paddle/nn/layer/norm.py | 2 ++ python/paddle/nn/layer/pooling.py | 2 ++ python/paddle/nn/layer/rnn.py | 2 ++ python/paddle/nn/layer/transformer.py | 2 ++ python/paddle/nn/layer/vision.py | 2 ++ python/paddle/nn/utils/weight_norm_hook.py | 2 ++ python/paddle/optimizer/adadelta.py | 2 ++ python/paddle/optimizer/adagrad.py | 2 ++ python/paddle/optimizer/adam.py | 2 ++ python/paddle/optimizer/adamax.py | 2 ++ python/paddle/optimizer/adamw.py | 2 ++ python/paddle/optimizer/lamb.py | 2 ++ python/paddle/optimizer/momentum.py | 2 ++ python/paddle/optimizer/optimizer.py | 2 ++ python/paddle/optimizer/rmsprop.py | 2 ++ python/paddle/optimizer/sgd.py | 2 ++ python/paddle/proto/__init__.py | 2 ++ python/paddle/reader/decorator.py | 2 ++ python/paddle/reader/tests/decorator_test.py | 2 ++ python/paddle/static/input.py | 2 ++ python/paddle/static/io.py | 2 ++ python/paddle/static/nn/common.py | 2 ++ python/paddle/tensor/array.py | 2 ++ python/paddle/tensor/attribute.py | 2 ++ python/paddle/tensor/creation.py | 2 ++ python/paddle/tensor/linalg.py | 2 ++ python/paddle/tensor/logic.py | 2 ++ python/paddle/tensor/manipulation.py | 2 ++ python/paddle/tensor/math.py | 2 ++ python/paddle/tensor/random.py | 2 ++ python/paddle/tensor/search.py | 2 ++ python/paddle/tensor/stat.py | 2 ++ python/paddle/tensor/to_string.py | 2 ++ python/paddle/tests/test_dataset_cifar.py | 2 +- python/paddle/tests/test_dataset_conll05.py | 2 +- python/paddle/tests/test_dataset_imdb.py | 2 +- python/paddle/tests/test_dataset_imikolov.py | 2 +- python/paddle/tests/test_dataset_movielens.py | 2 +- python/paddle/tests/test_dataset_uci_housing.py | 2 +- python/paddle/tests/test_dataset_wmt.py | 2 +- python/paddle/tests/test_datasets.py | 2 +- python/paddle/text/datasets/__init__.py | 2 ++ python/paddle/text/datasets/conll05.py | 2 ++ python/paddle/text/datasets/imdb.py | 2 ++ python/paddle/text/datasets/imikolov.py | 2 ++ python/paddle/text/datasets/movielens.py | 2 ++ python/paddle/text/datasets/uci_housing.py | 2 ++ python/paddle/text/datasets/wmt14.py | 2 ++ python/paddle/text/datasets/wmt16.py | 2 ++ python/paddle/utils/deprecated.py | 2 ++ python/paddle/utils/download.py | 2 ++ python/paddle/utils/image_util.py | 2 ++ python/paddle/utils/install_check.py | 2 ++ python/paddle/utils/lazy_import.py | 2 ++ python/paddle/utils/op_version.py | 2 ++ 105 files changed, 201 insertions(+), 15 deletions(-) diff --git a/python/paddle/dataset/cifar.py b/python/paddle/dataset/cifar.py index a6b6e28c0f5..e3d239e2cdf 100644 --- a/python/paddle/dataset/cifar.py +++ b/python/paddle/dataset/cifar.py @@ -37,6 +37,8 @@ import tarfile import six from six.moves import cPickle as pickle +__all__ = [] + URL_PREFIX = 'https://dataset.bj.bcebos.com/cifar/' CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz' CIFAR10_MD5 = 'c58f30108f718f92721af3b95e74349a' diff --git a/python/paddle/dataset/common.py b/python/paddle/dataset/common.py index cff0c625738..2a476f63862 100644 --- a/python/paddle/dataset/common.py +++ b/python/paddle/dataset/common.py @@ -26,6 +26,8 @@ import paddle.dataset import six.moves.cPickle as pickle import glob +__all__ = [] + HOME = os.path.expanduser('~') DATA_HOME = os.path.join(HOME, '.cache', 'paddle', 'dataset') diff --git a/python/paddle/dataset/conll05.py b/python/paddle/dataset/conll05.py index 96fd5ae7d76..65cf04f05b7 100644 --- a/python/paddle/dataset/conll05.py +++ b/python/paddle/dataset/conll05.py @@ -30,6 +30,8 @@ import paddle.compat as cpt import paddle.utils.deprecated as deprecated from six.moves import zip, range +__all__ = [] + DATA_URL = 'http://paddlemodels.bj.bcebos.com/conll05st/conll05st-tests.tar.gz' DATA_MD5 = '387719152ae52d60422c016e92a742fc' WORDDICT_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2FwordDict.txt' diff --git a/python/paddle/dataset/flowers.py b/python/paddle/dataset/flowers.py index 67ffd8e1ee1..3b437a1f074 100644 --- a/python/paddle/dataset/flowers.py +++ b/python/paddle/dataset/flowers.py @@ -51,6 +51,8 @@ import six from six.moves import cPickle as pickle from paddle.utils import try_import +__all__ = [] + DATA_URL = 'http://paddlemodels.bj.bcebos.com/flowers/102flowers.tgz' LABEL_URL = 'http://paddlemodels.bj.bcebos.com/flowers/imagelabels.mat' SETID_URL = 'http://paddlemodels.bj.bcebos.com/flowers/setid.mat' diff --git a/python/paddle/dataset/image.py b/python/paddle/dataset/image.py index 31329cd978c..c20672c2ce1 100644 --- a/python/paddle/dataset/image.py +++ b/python/paddle/dataset/image.py @@ -58,6 +58,8 @@ import os import tarfile import six.moves.cPickle as pickle +__all__ = [] + def _check_cv2(): if cv2 is None: diff --git a/python/paddle/dataset/imdb.py b/python/paddle/dataset/imdb.py index 33ae4405c50..9a6c8e837ed 100644 --- a/python/paddle/dataset/imdb.py +++ b/python/paddle/dataset/imdb.py @@ -30,6 +30,8 @@ import re import string import six +__all__ = [] + #URL = 'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz' URL = 'https://dataset.bj.bcebos.com/imdb%2FaclImdb_v1.tar.gz' MD5 = '7c2ac02c03563afcf9b574c7e56c153a' diff --git a/python/paddle/dataset/imikolov.py b/python/paddle/dataset/imikolov.py index 3b8b12303c9..7a4efe27aa9 100644 --- a/python/paddle/dataset/imikolov.py +++ b/python/paddle/dataset/imikolov.py @@ -27,6 +27,8 @@ import collections import tarfile import six +__all__ = [] + #URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz' URL = 'https://dataset.bj.bcebos.com/imikolov%2Fsimple-examples.tgz' MD5 = '30177ea32e27c525793142b6bf2c8e2d' diff --git a/python/paddle/dataset/mnist.py b/python/paddle/dataset/mnist.py index 06e8174a61e..e4f724bd66d 100644 --- a/python/paddle/dataset/mnist.py +++ b/python/paddle/dataset/mnist.py @@ -27,6 +27,8 @@ import numpy import struct from six.moves import range +__all__ = [] + URL_PREFIX = 'https://dataset.bj.bcebos.com/mnist/' TEST_IMAGE_URL = URL_PREFIX + 't10k-images-idx3-ubyte.gz' TEST_IMAGE_MD5 = '9fb629c4189551a2d022fa330f9573f3' diff --git a/python/paddle/dataset/movielens.py b/python/paddle/dataset/movielens.py index 23781b65785..862ac586bc9 100644 --- a/python/paddle/dataset/movielens.py +++ b/python/paddle/dataset/movielens.py @@ -34,6 +34,8 @@ import functools import six import paddle.compat as cpt +__all__ = [] + age_table = [1, 18, 25, 35, 45, 50, 56] #URL = 'http://files.grouplens.org/datasets/movielens/ml-1m.zip' diff --git a/python/paddle/dataset/tests/cifar_test.py b/python/paddle/dataset/tests/cifar_test.py index 8e514f0fd9a..54dff6b40cf 100644 --- a/python/paddle/dataset/tests/cifar_test.py +++ b/python/paddle/dataset/tests/cifar_test.py @@ -17,6 +17,8 @@ from __future__ import print_function import paddle.dataset.cifar import unittest +__all__ = [] + class TestCIFAR(unittest.TestCase): def check_reader(self, reader): diff --git a/python/paddle/dataset/tests/flowers_test.py b/python/paddle/dataset/tests/flowers_test.py index 06a0a7761cf..256c116b7cf 100644 --- a/python/paddle/dataset/tests/flowers_test.py +++ b/python/paddle/dataset/tests/flowers_test.py @@ -17,6 +17,8 @@ from __future__ import print_function import paddle.dataset.flowers import unittest +__all__ = [] + class TestFlowers(unittest.TestCase): def check_reader(self, reader): diff --git a/python/paddle/dataset/tests/imdb_test.py b/python/paddle/dataset/tests/imdb_test.py index 613c5f8edb2..264b0f232fa 100644 --- a/python/paddle/dataset/tests/imdb_test.py +++ b/python/paddle/dataset/tests/imdb_test.py @@ -18,6 +18,8 @@ import paddle.dataset.imdb import unittest import re +__all__ = [] + TRAIN_POS_PATTERN = re.compile(r"aclImdb/train/pos/.*\.txt$") TRAIN_NEG_PATTERN = re.compile(r"aclImdb/train/neg/.*\.txt$") TRAIN_PATTERN = re.compile(r"aclImdb/train/.*\.txt$") diff --git a/python/paddle/dataset/tests/imikolov_test.py b/python/paddle/dataset/tests/imikolov_test.py index 1f78a5dd4d1..5556274211f 100644 --- a/python/paddle/dataset/tests/imikolov_test.py +++ b/python/paddle/dataset/tests/imikolov_test.py @@ -19,6 +19,8 @@ import unittest WORD_DICT = paddle.dataset.imikolov.build_dict() +__all__ = [] + class TestMikolov(unittest.TestCase): def check_reader(self, reader, n): diff --git a/python/paddle/dataset/tests/mnist_test.py b/python/paddle/dataset/tests/mnist_test.py index fbb5d926494..238b58244e1 100644 --- a/python/paddle/dataset/tests/mnist_test.py +++ b/python/paddle/dataset/tests/mnist_test.py @@ -17,6 +17,8 @@ from __future__ import print_function import paddle.dataset.mnist import unittest +__all__ = [] + class TestMNIST(unittest.TestCase): def check_reader(self, reader): diff --git a/python/paddle/dataset/tests/test_image.py b/python/paddle/dataset/tests/test_image.py index 32d2eb17ae6..259939d62f6 100644 --- a/python/paddle/dataset/tests/test_image.py +++ b/python/paddle/dataset/tests/test_image.py @@ -19,6 +19,8 @@ import numpy as np import paddle.dataset.image as image +__all__ = [] + class Image(unittest.TestCase): def test_resize_flip_chw(self): diff --git a/python/paddle/dataset/tests/voc2012_test.py b/python/paddle/dataset/tests/voc2012_test.py index cddeb91cab2..21c24e6df82 100644 --- a/python/paddle/dataset/tests/voc2012_test.py +++ b/python/paddle/dataset/tests/voc2012_test.py @@ -17,6 +17,8 @@ from __future__ import print_function import paddle.dataset.voc2012 import unittest +__all__ = [] + class TestVOC(unittest.TestCase): def check_reader(self, reader): diff --git a/python/paddle/dataset/tests/wmt16_test.py b/python/paddle/dataset/tests/wmt16_test.py index be121bb1012..68a9819c8f3 100644 --- a/python/paddle/dataset/tests/wmt16_test.py +++ b/python/paddle/dataset/tests/wmt16_test.py @@ -17,6 +17,8 @@ from __future__ import print_function import paddle.dataset.wmt16 import unittest +__all__ = [] + class TestWMT16(unittest.TestCase): def checkout_one_sample(self, sample): diff --git a/python/paddle/dataset/uci_housing.py b/python/paddle/dataset/uci_housing.py index 1bc2098350f..0ac65f0fda4 100644 --- a/python/paddle/dataset/uci_housing.py +++ b/python/paddle/dataset/uci_housing.py @@ -29,6 +29,8 @@ import os import paddle.dataset.common import paddle.utils.deprecated as deprecated +__all__ = [] + URL = 'http://paddlemodels.bj.bcebos.com/uci_housing/housing.data' MD5 = 'd4accdce7a25600298819f8e28e8d593' feature_names = [ diff --git a/python/paddle/dataset/voc2012.py b/python/paddle/dataset/voc2012.py index 1575b44cd16..5784e739b41 100644 --- a/python/paddle/dataset/voc2012.py +++ b/python/paddle/dataset/voc2012.py @@ -25,10 +25,11 @@ import tarfile import io import numpy as np from paddle.dataset.common import download -from paddle.dataset.image import * import paddle.utils.deprecated as deprecated from PIL import Image +__all__ = [] + VOC_URL = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/\ VOCtrainval_11-May-2012.tar' diff --git a/python/paddle/dataset/wmt14.py b/python/paddle/dataset/wmt14.py index 818f4b28ba1..c842ceaa091 100644 --- a/python/paddle/dataset/wmt14.py +++ b/python/paddle/dataset/wmt14.py @@ -30,6 +30,8 @@ import paddle.dataset.common import paddle.compat as cpt import paddle.utils.deprecated as deprecated +__all__ = [] + URL_DEV_TEST = ('http://www-lium.univ-lemans.fr/~schwenk/' 'cslm_joint_paper/data/dev+test.tgz') MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5' diff --git a/python/paddle/dataset/wmt16.py b/python/paddle/dataset/wmt16.py index 6804e7ab5fc..320ef139f77 100644 --- a/python/paddle/dataset/wmt16.py +++ b/python/paddle/dataset/wmt16.py @@ -40,6 +40,8 @@ import paddle import paddle.compat as cpt import paddle.utils.deprecated as deprecated +__all__ = [] + DATA_URL = ("http://paddlemodels.bj.bcebos.com/wmt/wmt16.tar.gz") DATA_MD5 = "0c38be43600334966403524a40dcd81e" diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py index 660267c24e5..ce84fb739c0 100644 --- a/python/paddle/framework/__init__.py +++ b/python/paddle/framework/__init__.py @@ -34,3 +34,5 @@ from ..fluid.dygraph.base import grad # noqa: F401 from .io import save # noqa: F401 from .io import load # noqa: F401 from ..fluid.dygraph.parallel import DataParallel # noqa: F401 + +__all__ = [] diff --git a/python/paddle/framework/dtype.py b/python/paddle/framework/dtype.py index 3eeaa6e74ec..f49f7489758 100644 --- a/python/paddle/framework/dtype.py +++ b/python/paddle/framework/dtype.py @@ -12,11 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -__all__ = [ - "dtype", "uint8", "int8", "int16", "int32", "int64", "bfloat16", "float16", - "float32", "float64", "complex64", "complex128", "bool" -] - from ..fluid.core import VarDesc dtype = VarDesc.VarType @@ -38,3 +33,5 @@ complex64 = VarDesc.VarType.COMPLEX64 complex128 = VarDesc.VarType.COMPLEX128 bool = VarDesc.VarType.BOOL + +__all__ = [] diff --git a/python/paddle/framework/framework.py b/python/paddle/framework/framework.py index f50285010cc..17eaa82cd8b 100644 --- a/python/paddle/framework/framework.py +++ b/python/paddle/framework/framework.py @@ -19,6 +19,8 @@ from paddle.fluid.framework import _dygraph_tracer import numpy as np from contextlib import contextmanager +__all__ = [] + def set_default_dtype(d): """ diff --git a/python/paddle/framework/io.py b/python/paddle/framework/io.py index ac0e172d49d..493574c5bef 100644 --- a/python/paddle/framework/io.py +++ b/python/paddle/framework/io.py @@ -38,6 +38,8 @@ from paddle.fluid.dygraph.jit import _SaveLoadConfig from paddle.fluid.dygraph.io import _construct_program_holders, _construct_params_and_buffers from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX, INFER_PARAMS_INFO_SUFFIX +__all__ = [] + def _build_saved_state_dict(state_dict): save_dict = {} diff --git a/python/paddle/framework/random.py b/python/paddle/framework/random.py index 251a8407035..701f8b5352c 100644 --- a/python/paddle/framework/random.py +++ b/python/paddle/framework/random.py @@ -16,6 +16,8 @@ import paddle.fluid as fluid from paddle.fluid import core +__all__ = [] + def seed(seed): """ diff --git a/python/paddle/nn/clip.py b/python/paddle/nn/clip.py index 70c49b4a538..e868cbdbacc 100644 --- a/python/paddle/nn/clip.py +++ b/python/paddle/nn/clip.py @@ -16,3 +16,5 @@ from ..fluid.clip import ClipGradByGlobalNorm # noqa: F401 from ..fluid.clip import ClipGradByNorm # noqa: F401 from ..fluid.clip import ClipGradByValue # noqa: F401 + +__all__ = [] diff --git a/python/paddle/nn/decode.py b/python/paddle/nn/decode.py index 3229f0b21a6..ff4a6e4f482 100644 --- a/python/paddle/nn/decode.py +++ b/python/paddle/nn/decode.py @@ -14,3 +14,5 @@ from ..fluid.layers import BeamSearchDecoder # noqa: F401 from ..fluid.layers import dynamic_decode # noqa: F401 + +__all__ = [] diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py index cd8ee99baa2..9001ba16b7a 100644 --- a/python/paddle/nn/functional/activation.py +++ b/python/paddle/nn/functional/activation.py @@ -27,6 +27,8 @@ from ...fluid import core from ...fluid.data_feeder import check_variable_and_dtype, check_dtype import paddle +__all__ = [] + def elu(x, alpha=1.0, name=None): r""" diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index 7379c7a5f67..65b9c6771c4 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -34,6 +34,8 @@ from ...fluid import core, dygraph_utils from ...fluid import core, layers from ...fluid.data_feeder import check_variable_and_dtype +__all__ = [] + def interpolate(x, size=None, diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py index 800c8204973..1edbc5f462e 100644 --- a/python/paddle/nn/functional/conv.py +++ b/python/paddle/nn/functional/conv.py @@ -22,6 +22,8 @@ from ...fluid.data_feeder import check_variable_and_dtype from ...fluid.param_attr import ParamAttr from ...fluid.layer_helper import LayerHelper +__all__ = [] + def _is_list_or_tuple(input): return isinstance(input, (list, tuple)) diff --git a/python/paddle/nn/functional/extension.py b/python/paddle/nn/functional/extension.py index 7900f903e7f..8a9597119ab 100644 --- a/python/paddle/nn/functional/extension.py +++ b/python/paddle/nn/functional/extension.py @@ -23,6 +23,8 @@ from ...fluid import core, dygraph_utils from ...fluid.layers.layer_function_generator import templatedoc from ...fluid.layers.sequence_lod import sequence_mask +__all__ = [] + def diag_embed(input, offset=0, dim1=-2, dim2=-1): """ diff --git a/python/paddle/nn/functional/input.py b/python/paddle/nn/functional/input.py index 4fff9cda4be..67dc69c1a93 100644 --- a/python/paddle/nn/functional/input.py +++ b/python/paddle/nn/functional/input.py @@ -19,6 +19,8 @@ from ...fluid.layer_helper import LayerHelper from ...fluid.layers import core from ...fluid.data_feeder import check_variable_and_dtype, check_dtype +__all__ = [] + def one_hot(x, num_classes, name=None): """ diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index bb2d8005f4e..31ffb91f30d 100755 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -39,6 +39,8 @@ from ...fluid.framework import _varbase_creator from ...fluid.framework import Variable from paddle.utils import deprecated +__all__ = [] + def binary_cross_entropy(input, label, weight=None, reduction='mean', name=None): diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py index dddc4c66d59..20e32546389 100644 --- a/python/paddle/nn/functional/norm.py +++ b/python/paddle/nn/functional/norm.py @@ -24,6 +24,8 @@ from ...fluid.param_attr import ParamAttr from ...fluid import core, dygraph_utils import numbers +__all__ = [] + def normalize(x, p=2, axis=1, epsilon=1e-12, name=None): r""" diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py index 27a66c629ca..1869ac15b17 100755 --- a/python/paddle/nn/functional/pooling.py +++ b/python/paddle/nn/functional/pooling.py @@ -18,6 +18,8 @@ from ...fluid.framework import in_dygraph_mode from ...fluid.layers import utils, LayerHelper, unsqueeze, squeeze from ...fluid.data_feeder import check_type, check_variable_and_dtype +__all__ = [] + def _is_list_or_tuple(input): return isinstance(input, (list, tuple)) diff --git a/python/paddle/nn/functional/vision.py b/python/paddle/nn/functional/vision.py index cb8a817023d..55a66e70160 100644 --- a/python/paddle/nn/functional/vision.py +++ b/python/paddle/nn/functional/vision.py @@ -19,6 +19,8 @@ from ...fluid.data_feeder import check_variable_and_dtype from ...fluid import dygraph_utils import numpy as np +__all__ = [] + def affine_grid(theta, out_shape, align_corners=True, name=None): """ diff --git a/python/paddle/nn/initializer/assign.py b/python/paddle/nn/initializer/assign.py index 642919f3540..13a70a179ff 100644 --- a/python/paddle/nn/initializer/assign.py +++ b/python/paddle/nn/initializer/assign.py @@ -19,6 +19,8 @@ from ...fluid.core import VarDesc from ...fluid.data_feeder import check_type from ...fluid.initializer import NumpyArrayInitializer +__all__ = [] + class Assign(NumpyArrayInitializer): """Init an parameter with a numpy array, list, or tensor. diff --git a/python/paddle/nn/initializer/constant.py b/python/paddle/nn/initializer/constant.py index aec3e82aab6..292eaff362b 100644 --- a/python/paddle/nn/initializer/constant.py +++ b/python/paddle/nn/initializer/constant.py @@ -15,6 +15,8 @@ # TODO: define the initializers of Constant in neural network from ...fluid.initializer import ConstantInitializer +__all__ = [] + class Constant(ConstantInitializer): """Implement the constant initializer. diff --git a/python/paddle/nn/initializer/kaiming.py b/python/paddle/nn/initializer/kaiming.py index 712bffccda1..f0847c85237 100644 --- a/python/paddle/nn/initializer/kaiming.py +++ b/python/paddle/nn/initializer/kaiming.py @@ -15,6 +15,8 @@ # TODO: define the initializers of Kaiming functions in neural network from ...fluid.initializer import MSRAInitializer +__all__ = [] + class KaimingNormal(MSRAInitializer): r"""Implements the Kaiming Normal initializer diff --git a/python/paddle/nn/initializer/normal.py b/python/paddle/nn/initializer/normal.py index c009df78005..6fee5058057 100644 --- a/python/paddle/nn/initializer/normal.py +++ b/python/paddle/nn/initializer/normal.py @@ -15,6 +15,8 @@ from ...fluid.initializer import NormalInitializer from ...fluid.initializer import TruncatedNormalInitializer +__all__ = [] + class Normal(NormalInitializer): """The Random Normal (Gaussian) distribution initializer. diff --git a/python/paddle/nn/initializer/uniform.py b/python/paddle/nn/initializer/uniform.py index e54a4d2187b..cac03b59480 100644 --- a/python/paddle/nn/initializer/uniform.py +++ b/python/paddle/nn/initializer/uniform.py @@ -14,6 +14,8 @@ from ...fluid.initializer import UniformInitializer +__all__ = [] + class Uniform(UniformInitializer): """The random uniform distribution initializer. diff --git a/python/paddle/nn/initializer/xavier.py b/python/paddle/nn/initializer/xavier.py index 01a4a8887b4..f2d5593032f 100644 --- a/python/paddle/nn/initializer/xavier.py +++ b/python/paddle/nn/initializer/xavier.py @@ -14,6 +14,8 @@ from ...fluid.initializer import XavierInitializer +__all__ = [] + class XavierNormal(XavierInitializer): r""" diff --git a/python/paddle/nn/layer/__init__.py b/python/paddle/nn/layer/__init__.py index 64f0391fb65..10c2b1e3056 100644 --- a/python/paddle/nn/layer/__init__.py +++ b/python/paddle/nn/layer/__init__.py @@ -81,3 +81,5 @@ from .norm import LocalResponseNorm # noqa: F401 from .vision import PixelShuffle # noqa: F401 from .distance import PairwiseDistance # noqa: F401 from .container import LayerDict # noqa: F401 + +__all__ = [] diff --git a/python/paddle/nn/layer/activation.py b/python/paddle/nn/layer/activation.py index c6ce4588ea5..d5b37144cff 100644 --- a/python/paddle/nn/layer/activation.py +++ b/python/paddle/nn/layer/activation.py @@ -22,6 +22,8 @@ from ...fluid.initializer import Constant from paddle.framework import get_default_dtype from .. import functional as F +__all__ = [] + class ELU(layers.Layer): r""" diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py index 058507ba5de..f608f20feef 100644 --- a/python/paddle/nn/layer/common.py +++ b/python/paddle/nn/layer/common.py @@ -20,6 +20,8 @@ from ...fluid.framework import in_dygraph_mode from .. import functional as F from ...fluid.framework import _dygraph_tracer +__all__ = [] + def _npairs(x, n): if isinstance(x, (paddle.Tensor, list)): diff --git a/python/paddle/nn/layer/container.py b/python/paddle/nn/layer/container.py index db317839ae8..ad41535f44a 100644 --- a/python/paddle/nn/layer/container.py +++ b/python/paddle/nn/layer/container.py @@ -16,7 +16,7 @@ from collections import OrderedDict from ...fluid.dygraph.layers import Layer from six.moves import collections_abc -__all__ = ['LayerDict', ] +__all__ = [] class LayerDict(Layer): diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py index 2360dc17cf1..2de065d62a4 100644 --- a/python/paddle/nn/layer/conv.py +++ b/python/paddle/nn/layer/conv.py @@ -25,6 +25,8 @@ from .. import functional as F from ...fluid.layers import utils from ..functional.conv import _update_padding_nd +__all__ = [] + def _get_default_param_initializer(num_channels, filter_size): filter_elem_num = num_channels * np.prod(filter_size) diff --git a/python/paddle/nn/layer/distance.py b/python/paddle/nn/layer/distance.py index 7eb0fc1fbb5..77e3447ffda 100644 --- a/python/paddle/nn/layer/distance.py +++ b/python/paddle/nn/layer/distance.py @@ -20,6 +20,8 @@ from ...fluid.framework import core, in_dygraph_mode from ...fluid.data_feeder import check_variable_and_dtype, check_type from ...fluid.layer_helper import LayerHelper +__all__ = [] + class PairwiseDistance(layers.Layer): r""" diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py index 356b22c632c..8f43eb8866b 100644 --- a/python/paddle/nn/layer/loss.py +++ b/python/paddle/nn/layer/loss.py @@ -21,6 +21,8 @@ import paddle from .. import functional as F from paddle.fluid.framework import core, in_dygraph_mode, _varbase_creator +__all__ = [] + class BCEWithLogitsLoss(fluid.dygraph.Layer): r""" diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py index 970d68e8263..45640a6598e 100644 --- a/python/paddle/nn/layer/norm.py +++ b/python/paddle/nn/layer/norm.py @@ -50,6 +50,8 @@ import warnings from ...fluid.dygraph.base import no_grad from .. import functional as F +__all__ = [] + class _InstanceNormBase(layers.Layer): """ diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py index 5916fd7c69e..528572ee21b 100755 --- a/python/paddle/nn/layer/pooling.py +++ b/python/paddle/nn/layer/pooling.py @@ -16,6 +16,8 @@ from ...fluid.dygraph import layers from ...fluid.layer_helper import LayerHelper from .. import functional as F +__all__ = [] + class AvgPool1D(layers.Layer): r""" diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py index a7539b5b095..de9b8cdbfce 100644 --- a/python/paddle/nn/layer/rnn.py +++ b/python/paddle/nn/layer/rnn.py @@ -33,6 +33,8 @@ from paddle.fluid.layers import utils from paddle.fluid.layers.utils import map_structure, flatten, pack_sequence_as from paddle.fluid.data_feeder import convert_dtype +__all__ = [] + def split_states(states, bidirectional=False, state_components=1): r""" diff --git a/python/paddle/nn/layer/transformer.py b/python/paddle/nn/layer/transformer.py index 752870f3d0a..891177532a4 100644 --- a/python/paddle/nn/layer/transformer.py +++ b/python/paddle/nn/layer/transformer.py @@ -28,6 +28,8 @@ from ...fluid.dygraph import Layer, LayerList from ...fluid.param_attr import ParamAttr from ...fluid.data_feeder import convert_dtype +__all__ = [] + def _convert_param_attr_to_list(param_attr, n): """ diff --git a/python/paddle/nn/layer/vision.py b/python/paddle/nn/layer/vision.py index e66e122be52..e6d3af9a37b 100644 --- a/python/paddle/nn/layer/vision.py +++ b/python/paddle/nn/layer/vision.py @@ -17,6 +17,8 @@ from ...fluid.dygraph import layers from .. import functional +__all__ = [] + class PixelShuffle(layers.Layer): """ diff --git a/python/paddle/nn/utils/weight_norm_hook.py b/python/paddle/nn/utils/weight_norm_hook.py index 23df38ca08c..8d2cc8062d2 100755 --- a/python/paddle/nn/utils/weight_norm_hook.py +++ b/python/paddle/nn/utils/weight_norm_hook.py @@ -19,6 +19,8 @@ from ...fluid import layers as F from ...fluid.layer_helper import LayerHelper from ...fluid.data_feeder import check_variable_and_dtype +__all__ = [] + def l2_norm(x, axis, epsilon=1e-12, name=None): if len(x.shape) == 1: diff --git a/python/paddle/optimizer/adadelta.py b/python/paddle/optimizer/adadelta.py index af07d706e13..6c10d9bc269 100644 --- a/python/paddle/optimizer/adadelta.py +++ b/python/paddle/optimizer/adadelta.py @@ -17,6 +17,8 @@ from ..fluid import core from ..fluid import framework from ..fluid.framework import Variable, name_scope +__all__ = [] + class Adadelta(Optimizer): r""" diff --git a/python/paddle/optimizer/adagrad.py b/python/paddle/optimizer/adagrad.py index 82615c92b7c..bb934e5a926 100644 --- a/python/paddle/optimizer/adagrad.py +++ b/python/paddle/optimizer/adagrad.py @@ -17,6 +17,8 @@ from ..fluid import core from ..fluid import framework from ..fluid.framework import Variable +__all__ = [] + class Adagrad(Optimizer): r""" diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py index 358fa8fb97d..63ca462d1a2 100644 --- a/python/paddle/optimizer/adam.py +++ b/python/paddle/optimizer/adam.py @@ -24,6 +24,8 @@ from ..fluid.dygraph import base as imperative_base import paddle +__all__ = [] + class Adam(Optimizer): r""" diff --git a/python/paddle/optimizer/adamax.py b/python/paddle/optimizer/adamax.py index 175d932540d..44ae89f49d1 100644 --- a/python/paddle/optimizer/adamax.py +++ b/python/paddle/optimizer/adamax.py @@ -17,6 +17,8 @@ from ..fluid import core from ..fluid import framework from ..fluid.framework import Variable, name_scope +__all__ = [] + class Adamax(Optimizer): r""" diff --git a/python/paddle/optimizer/adamw.py b/python/paddle/optimizer/adamw.py index 899c2957a6a..304f0b77182 100644 --- a/python/paddle/optimizer/adamw.py +++ b/python/paddle/optimizer/adamw.py @@ -19,6 +19,8 @@ from ..fluid import framework from ..fluid.dygraph import base as imperative_base import paddle +__all__ = [] + class AdamW(Adam): r""" diff --git a/python/paddle/optimizer/lamb.py b/python/paddle/optimizer/lamb.py index bab130ec590..bff24e71c81 100644 --- a/python/paddle/optimizer/lamb.py +++ b/python/paddle/optimizer/lamb.py @@ -17,6 +17,8 @@ from ..fluid import core from ..fluid import framework from ..fluid.framework import Variable +__all__ = [] + class Lamb(Optimizer): r""" diff --git a/python/paddle/optimizer/momentum.py b/python/paddle/optimizer/momentum.py index c1dc0e8ddd8..372143553e0 100644 --- a/python/paddle/optimizer/momentum.py +++ b/python/paddle/optimizer/momentum.py @@ -22,6 +22,8 @@ from ..fluid import layers import paddle.fluid as fluid from paddle.fluid.regularizer import L2DecayRegularizer +__all__ = [] + class Momentum(Optimizer): r""" diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py index 9425ab1431e..b06bd2a2b0b 100644 --- a/python/paddle/optimizer/optimizer.py +++ b/python/paddle/optimizer/optimizer.py @@ -42,6 +42,8 @@ from ..fluid.wrapped_decorator import signature_safe_contextmanager from .. import compat as cpt from .lr import LRScheduler +__all__ = [] + class Optimizer(object): r"""Optimizer Base class. diff --git a/python/paddle/optimizer/rmsprop.py b/python/paddle/optimizer/rmsprop.py index a2fd40bc0b3..b0bb0228c8c 100644 --- a/python/paddle/optimizer/rmsprop.py +++ b/python/paddle/optimizer/rmsprop.py @@ -17,6 +17,8 @@ from ..fluid import core from ..fluid import framework from ..fluid.framework import Variable +__all__ = [] + class RMSProp(Optimizer): r""" diff --git a/python/paddle/optimizer/sgd.py b/python/paddle/optimizer/sgd.py index ecac40aec72..4526034b405 100644 --- a/python/paddle/optimizer/sgd.py +++ b/python/paddle/optimizer/sgd.py @@ -18,6 +18,8 @@ from ..fluid import framework from ..fluid.framework import Variable, name_scope from ..fluid.dygraph import no_grad +__all__ = [] + class SGD(Optimizer): r""" diff --git a/python/paddle/proto/__init__.py b/python/paddle/proto/__init__.py index 07406a841ec..f482d80548d 100644 --- a/python/paddle/proto/__init__.py +++ b/python/paddle/proto/__init__.py @@ -14,3 +14,5 @@ from paddle.proto.TrainerConfig_pb2 import OptimizationConfig, TrainerConfig from paddle.proto.ModelConfig_pb2 import ModelConfig + +__all__ = [] diff --git a/python/paddle/reader/decorator.py b/python/paddle/reader/decorator.py index 0aefcf9e683..3129029d829 100644 --- a/python/paddle/reader/decorator.py +++ b/python/paddle/reader/decorator.py @@ -27,6 +27,8 @@ import random import zlib import paddle.compat as cpt +__all__ = [] + # On macOS, the 'spawn' start method is now the default in Python3.8 multiprocessing, # Paddle is currently unable to solve this, so forces the process to start using # the 'fork' start method. diff --git a/python/paddle/reader/tests/decorator_test.py b/python/paddle/reader/tests/decorator_test.py index e15702e39c4..e11600a06fb 100644 --- a/python/paddle/reader/tests/decorator_test.py +++ b/python/paddle/reader/tests/decorator_test.py @@ -19,6 +19,8 @@ import functools import paddle.reader +__all__ = [] + def reader_creator_10(dur): def reader(): diff --git a/python/paddle/static/input.py b/python/paddle/static/input.py index c1de576ee74..f06c45cc369 100644 --- a/python/paddle/static/input.py +++ b/python/paddle/static/input.py @@ -21,6 +21,8 @@ from paddle.fluid.data_feeder import check_type from paddle.fluid.framework import convert_np_dtype_to_dtype_ from paddle.fluid.framework import static_only +__all__ = [] + @static_only def data(name, shape, dtype=None, lod_level=0): diff --git a/python/paddle/static/io.py b/python/paddle/static/io.py index fc6d8b64f18..58e8ebc481d 100644 --- a/python/paddle/static/io.py +++ b/python/paddle/static/io.py @@ -37,6 +37,8 @@ from paddle.fluid.framework import static_only, Parameter from paddle.fluid.executor import Executor, global_scope from paddle.fluid.log_helper import get_logger +__all__ = [] + _logger = get_logger( __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py index 659b7f45b26..b8133872aa9 100755 --- a/python/paddle/static/nn/common.py +++ b/python/paddle/static/nn/common.py @@ -15,6 +15,8 @@ import paddle from paddle.fluid.framework import static_only +__all__ = [] + @static_only def fc(x, diff --git a/python/paddle/tensor/array.py b/python/paddle/tensor/array.py index ee28d47a9a9..6c3d5c577e7 100644 --- a/python/paddle/tensor/array.py +++ b/python/paddle/tensor/array.py @@ -16,6 +16,8 @@ from ..fluid import layers +__all__ = [] + def array_length(array): """ diff --git a/python/paddle/tensor/attribute.py b/python/paddle/tensor/attribute.py index 1f709ac4dbc..131afca0d67 100644 --- a/python/paddle/tensor/attribute.py +++ b/python/paddle/tensor/attribute.py @@ -22,6 +22,8 @@ from ..fluid.data_feeder import check_variable_and_dtype from ..fluid.layers import rank # noqa: F401 from ..fluid.layers import shape # noqa: F401 +__all__ = [] + def _complex_to_real_dtype(dtype): if dtype == core.VarDesc.VarType.COMPLEX64: diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index b31984f6846..361c0e80f90 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -31,6 +31,8 @@ from ..fluid.framework import convert_np_dtype_to_dtype_, in_dygraph_mode, _varb from ..fluid.layers import linspace # noqa: F401 import paddle +__all__ = [] + @dygraph_only def to_tensor(data, dtype=None, place=None, stop_gradient=True): diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index 87e3bce4b1d..8aa9c9bd2bd 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -21,6 +21,8 @@ from ..fluid.layers import transpose # noqa: F401 from paddle.common_ops_import import core from paddle.common_ops_import import VarDesc +__all__ = [] + def matmul(x, y, transpose_x=False, transpose_y=False, name=None): """ diff --git a/python/paddle/tensor/logic.py b/python/paddle/tensor/logic.py index 14154fb06f8..bdf2c477d86 100644 --- a/python/paddle/tensor/logic.py +++ b/python/paddle/tensor/logic.py @@ -28,6 +28,8 @@ from ..fluid.layers import logical_xor # noqa: F401 from paddle.common_ops_import import core +__all__ = [] + def equal_all(x, y, name=None): """ diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index dc811ea0f3f..1a596204267 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -34,6 +34,8 @@ from ..fluid import layers import paddle import warnings +__all__ = [] + def _print_warning_in_static_mode(api_name): warnings.warn( diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index 65f57b4b4e9..84c67a9ae8d 100755 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -59,6 +59,8 @@ from ..fluid.layers import sin # noqa: F401 from ..fluid.layers import multiplex # noqa: F401 from ..fluid import layers +__all__ = [] + _supported_int_dtype_ = [ VarDesc.VarType.UINT8, VarDesc.VarType.INT8, diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py index 7e1eef8f325..69a46345447 100644 --- a/python/paddle/tensor/random.py +++ b/python/paddle/tensor/random.py @@ -21,6 +21,8 @@ from ..fluid.data_feeder import check_variable_and_dtype, check_type, check_dtyp from ..fluid.layers import utils import paddle +__all__ = [] + def bernoulli(x, name=None): """ diff --git a/python/paddle/tensor/search.py b/python/paddle/tensor/search.py index ac303d2311e..3d8a75f9277 100644 --- a/python/paddle/tensor/search.py +++ b/python/paddle/tensor/search.py @@ -25,6 +25,8 @@ from paddle.common_ops_import import VarDesc # from ..fluid.layers import has_inf #DEFINE_ALIAS # from ..fluid.layers import has_nan #DEFINE_ALIAS +__all__ = [] + def argsort(x, axis=-1, descending=False, name=None): """ diff --git a/python/paddle/tensor/stat.py b/python/paddle/tensor/stat.py index fa7a278a2b5..8c74360a17d 100644 --- a/python/paddle/tensor/stat.py +++ b/python/paddle/tensor/stat.py @@ -23,6 +23,8 @@ from .search import where from ..fluid.data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype import paddle +__all__ = [] + def mean(x, axis=None, keepdim=False, name=None): """ diff --git a/python/paddle/tensor/to_string.py b/python/paddle/tensor/to_string.py index 2e76a8d47a7..9d07840be68 100644 --- a/python/paddle/tensor/to_string.py +++ b/python/paddle/tensor/to_string.py @@ -17,6 +17,8 @@ import numpy as np from paddle.fluid.layers import core from paddle.fluid.data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype +__all__ = [] + class PrintOptions(object): precision = 8 diff --git a/python/paddle/tests/test_dataset_cifar.py b/python/paddle/tests/test_dataset_cifar.py index e84f7318866..abf79fb1e39 100644 --- a/python/paddle/tests/test_dataset_cifar.py +++ b/python/paddle/tests/test_dataset_cifar.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from paddle.vision.datasets import * +from paddle.vision.datasets import Cifar10, Cifar100 class TestCifar10Train(unittest.TestCase): diff --git a/python/paddle/tests/test_dataset_conll05.py b/python/paddle/tests/test_dataset_conll05.py index e35c04275d2..9eb0036718b 100644 --- a/python/paddle/tests/test_dataset_conll05.py +++ b/python/paddle/tests/test_dataset_conll05.py @@ -16,7 +16,7 @@ import os import unittest import numpy as np -from paddle.text.datasets import * +from paddle.text.datasets import Conll05st class TestConll05st(unittest.TestCase): diff --git a/python/paddle/tests/test_dataset_imdb.py b/python/paddle/tests/test_dataset_imdb.py index 62c75ab232c..aed8c387409 100644 --- a/python/paddle/tests/test_dataset_imdb.py +++ b/python/paddle/tests/test_dataset_imdb.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from paddle.text.datasets import * +from paddle.text.datasets import Imdb class TestImdbTrain(unittest.TestCase): diff --git a/python/paddle/tests/test_dataset_imikolov.py b/python/paddle/tests/test_dataset_imikolov.py index f4f0b8e4836..6ffeeda73c3 100644 --- a/python/paddle/tests/test_dataset_imikolov.py +++ b/python/paddle/tests/test_dataset_imikolov.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from paddle.text.datasets import * +from paddle.text.datasets import Imikolov class TestImikolovTrain(unittest.TestCase): diff --git a/python/paddle/tests/test_dataset_movielens.py b/python/paddle/tests/test_dataset_movielens.py index 3b61fd6f5c7..e5c6d8376ee 100644 --- a/python/paddle/tests/test_dataset_movielens.py +++ b/python/paddle/tests/test_dataset_movielens.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from paddle.text.datasets import * +from paddle.text.datasets import Movielens class TestMovielensTrain(unittest.TestCase): diff --git a/python/paddle/tests/test_dataset_uci_housing.py b/python/paddle/tests/test_dataset_uci_housing.py index 623c7d24d09..bdf960b4336 100644 --- a/python/paddle/tests/test_dataset_uci_housing.py +++ b/python/paddle/tests/test_dataset_uci_housing.py @@ -19,7 +19,7 @@ import tempfile import shutil import cv2 -from paddle.text.datasets import * +from paddle.text.datasets import UCIHousing, WMT14 class TestUCIHousingTrain(unittest.TestCase): diff --git a/python/paddle/tests/test_dataset_wmt.py b/python/paddle/tests/test_dataset_wmt.py index b4945cb90f9..3e63090c9f0 100644 --- a/python/paddle/tests/test_dataset_wmt.py +++ b/python/paddle/tests/test_dataset_wmt.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from paddle.text.datasets import * +from paddle.text.datasets import WMT14, WMT16 class TestWMT14Train(unittest.TestCase): diff --git a/python/paddle/tests/test_datasets.py b/python/paddle/tests/test_datasets.py index 89fa01cbceb..c93bac3ac27 100644 --- a/python/paddle/tests/test_datasets.py +++ b/python/paddle/tests/test_datasets.py @@ -20,7 +20,7 @@ import shutil import cv2 import paddle.vision.transforms as T -from paddle.vision.datasets import * +from paddle.vision.datasets import DatasetFolder, ImageFolder, MNIST, FashionMNIST, Flowers from paddle.dataset.common import _check_exists_and_download diff --git a/python/paddle/text/datasets/__init__.py b/python/paddle/text/datasets/__init__.py index 9a00081469a..11891704992 100644 --- a/python/paddle/text/datasets/__init__.py +++ b/python/paddle/text/datasets/__init__.py @@ -19,3 +19,5 @@ from .movielens import Movielens # noqa: F401 from .uci_housing import UCIHousing # noqa: F401 from .wmt14 import WMT14 # noqa: F401 from .wmt16 import WMT16 # noqa: F401 + +__all__ = [] diff --git a/python/paddle/text/datasets/conll05.py b/python/paddle/text/datasets/conll05.py index 070c787db85..7dd29637706 100644 --- a/python/paddle/text/datasets/conll05.py +++ b/python/paddle/text/datasets/conll05.py @@ -24,6 +24,8 @@ from paddle.io import Dataset import paddle.compat as cpt from paddle.dataset.common import _check_exists_and_download +__all__ = [] + DATA_URL = 'http://paddlemodels.bj.bcebos.com/conll05st/conll05st-tests.tar.gz' DATA_MD5 = '387719152ae52d60422c016e92a742fc' WORDDICT_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2FwordDict.txt' diff --git a/python/paddle/text/datasets/imdb.py b/python/paddle/text/datasets/imdb.py index c64890dc43d..f4fe7eb174b 100644 --- a/python/paddle/text/datasets/imdb.py +++ b/python/paddle/text/datasets/imdb.py @@ -24,6 +24,8 @@ import collections from paddle.io import Dataset from paddle.dataset.common import _check_exists_and_download +__all__ = [] + URL = 'https://dataset.bj.bcebos.com/imdb%2FaclImdb_v1.tar.gz' MD5 = '7c2ac02c03563afcf9b574c7e56c153a' diff --git a/python/paddle/text/datasets/imikolov.py b/python/paddle/text/datasets/imikolov.py index 7e4daf731a2..9c84669d6b8 100644 --- a/python/paddle/text/datasets/imikolov.py +++ b/python/paddle/text/datasets/imikolov.py @@ -22,6 +22,8 @@ import collections from paddle.io import Dataset from paddle.dataset.common import _check_exists_and_download +__all__ = [] + URL = 'https://dataset.bj.bcebos.com/imikolov%2Fsimple-examples.tgz' MD5 = '30177ea32e27c525793142b6bf2c8e2d' diff --git a/python/paddle/text/datasets/movielens.py b/python/paddle/text/datasets/movielens.py index 7741e82194c..798a7c590e1 100644 --- a/python/paddle/text/datasets/movielens.py +++ b/python/paddle/text/datasets/movielens.py @@ -26,6 +26,8 @@ from paddle.io import Dataset import paddle.compat as cpt from paddle.dataset.common import _check_exists_and_download +__all__ = [] + age_table = [1, 18, 25, 35, 45, 50, 56] URL = 'https://dataset.bj.bcebos.com/movielens%2Fml-1m.zip' diff --git a/python/paddle/text/datasets/uci_housing.py b/python/paddle/text/datasets/uci_housing.py index c876ed409cf..597b1e1e818 100644 --- a/python/paddle/text/datasets/uci_housing.py +++ b/python/paddle/text/datasets/uci_housing.py @@ -21,6 +21,8 @@ import paddle from paddle.io import Dataset from paddle.dataset.common import _check_exists_and_download +__all__ = [] + URL = 'http://paddlemodels.bj.bcebos.com/uci_housing/housing.data' MD5 = 'd4accdce7a25600298819f8e28e8d593' feature_names = [ diff --git a/python/paddle/text/datasets/wmt14.py b/python/paddle/text/datasets/wmt14.py index 96d29c79c6a..424a564216d 100644 --- a/python/paddle/text/datasets/wmt14.py +++ b/python/paddle/text/datasets/wmt14.py @@ -22,6 +22,8 @@ from paddle.io import Dataset import paddle.compat as cpt from paddle.dataset.common import _check_exists_and_download +__all__ = [] + URL_DEV_TEST = ('http://www-lium.univ-lemans.fr/~schwenk/' 'cslm_joint_paper/data/dev+test.tgz') MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5' diff --git a/python/paddle/text/datasets/wmt16.py b/python/paddle/text/datasets/wmt16.py index 5605fd2aecb..f95cbe771ca 100644 --- a/python/paddle/text/datasets/wmt16.py +++ b/python/paddle/text/datasets/wmt16.py @@ -27,6 +27,8 @@ from paddle.io import Dataset import paddle.compat as cpt from paddle.dataset.common import _check_exists_and_download +__all__ = [] + DATA_URL = ("http://paddlemodels.bj.bcebos.com/wmt/wmt16.tar.gz") DATA_MD5 = "0c38be43600334966403524a40dcd81e" diff --git a/python/paddle/utils/deprecated.py b/python/paddle/utils/deprecated.py index a46f1ae3a2c..5390dea69fe 100755 --- a/python/paddle/utils/deprecated.py +++ b/python/paddle/utils/deprecated.py @@ -19,6 +19,8 @@ import warnings import functools import paddle +__all__ = [] + # NOTE(zhiqiu): Since python 3.2, DeprecationWarning is ignored by default, # and since python 3.7, it is once again shown by default when triggered directly by code in __main__. # See details: https://docs.python.org/3/library/warnings.html#default-warning-filter diff --git a/python/paddle/utils/download.py b/python/paddle/utils/download.py index bd70013e112..ddd1dad9dbd 100644 --- a/python/paddle/utils/download.py +++ b/python/paddle/utils/download.py @@ -55,6 +55,8 @@ except: import logging logger = logging.getLogger(__name__) +__all__ = [] + WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/hapi/weights") DOWNLOAD_RETRY_LIMIT = 3 diff --git a/python/paddle/utils/image_util.py b/python/paddle/utils/image_util.py index b113f574e9f..18be9366c40 100644 --- a/python/paddle/utils/image_util.py +++ b/python/paddle/utils/image_util.py @@ -16,6 +16,8 @@ import numpy as np from PIL import Image from six.moves import cStringIO as StringIO +__all__ = [] + def resize_image(img, target_size): """ diff --git a/python/paddle/utils/install_check.py b/python/paddle/utils/install_check.py index 5d70cf61007..69baa4facfa 100644 --- a/python/paddle/utils/install_check.py +++ b/python/paddle/utils/install_check.py @@ -20,6 +20,8 @@ import numpy as np import paddle +__all__ = [] + def _simple_network(): """ diff --git a/python/paddle/utils/lazy_import.py b/python/paddle/utils/lazy_import.py index ea07077b2da..d9146422819 100644 --- a/python/paddle/utils/lazy_import.py +++ b/python/paddle/utils/lazy_import.py @@ -15,6 +15,8 @@ import importlib +__all__ = [] + def try_import(module_name): """Try importing a module, with an informative error message on failure.""" diff --git a/python/paddle/utils/op_version.py b/python/paddle/utils/op_version.py index a1fa230d64f..6e81b5a2c17 100644 --- a/python/paddle/utils/op_version.py +++ b/python/paddle/utils/op_version.py @@ -14,6 +14,8 @@ from ..fluid import core +__all__ = [] + def Singleton(cls): _instance = {} -- GitLab From 0f578db968ae319a58cd395510111856f9864fec Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Thu, 29 Apr 2021 20:47:24 +0800 Subject: [PATCH 058/720] [NPU] refine FillNpuTensorWithConstant (#32682) --- paddle/fluid/operators/npu_op_runner.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/operators/npu_op_runner.h b/paddle/fluid/operators/npu_op_runner.h index 5506ddd8969..cfc933c7a76 100644 --- a/paddle/fluid/operators/npu_op_runner.h +++ b/paddle/fluid/operators/npu_op_runner.h @@ -90,6 +90,9 @@ aclrtStream GetCurrentNPUStream(int device_id = -1); template void FillNpuTensorWithConstant(Tensor *tensor, T val) { + // NOTE(zhiqiu): we found that power sometimes returns 0 when val is small + // like 1e-8. + constexpr float MIN_PRECISION_FOR_POWER = 1e-3; PADDLE_ENFORCE_EQ( tensor->IsInitialized(), true, platform::errors::InvalidArgument("The tensor should be initialized.")); @@ -97,7 +100,8 @@ void FillNpuTensorWithConstant(Tensor *tensor, T val) { platform::is_npu_place(tensor->place()), true, platform::errors::InvalidArgument("The tensor should be on NPUPlace.")); // do async for better performance - if (typeid(float) == typeid(T) || typeid(platform::float16) == typeid(T)) { + if ((typeid(float) == typeid(T) || typeid(platform::float16) == typeid(T)) && + static_cast(val) > MIN_PRECISION_FOR_POWER) { Tensor tmp(tensor->type()); tmp.Resize(tensor->dims()); tmp.mutable_data(tensor->place()); -- GitLab From a3e771974be044e8a368a0dce3557df61b7d1c47 Mon Sep 17 00:00:00 2001 From: liuyuhui Date: Thu, 29 Apr 2021 22:49:31 +0800 Subject: [PATCH 059/720] [Kunlun]fix multi xpu dygraph hang, test=kunlun (#32662) --- paddle/fluid/imperative/reducer.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/imperative/reducer.cc b/paddle/fluid/imperative/reducer.cc index a92704ce447..bf479e0d797 100644 --- a/paddle/fluid/imperative/reducer.cc +++ b/paddle/fluid/imperative/reducer.cc @@ -762,10 +762,11 @@ void Reducer::MarkGroupReady(size_t group_index) { // TODO(liuyuhui): Add try catch to deal with exception later, // otherwise the main thread will continue to run when an exception is // thrown in comm_pool_. - comm_pool_->enqueue([&] { + auto next_group = next_group_; + comm_pool_->enqueue([this, run_order, next_group, &group] { auto dev_id = BOOST_GET_CONST(platform::XPUPlace, place_).device; platform::SetXPUDeviceId(dev_id); - FusedAllReduceSchedule(run_order, group, next_group_); + FusedAllReduceSchedule(run_order, group, next_group); { std::lock_guard lock(mutex_); comm_op_count_ -= 1; // lock -- GitLab From 43527a2b4fc627d392c7e6cc44f744b7231b6418 Mon Sep 17 00:00:00 2001 From: jakpiase <62569058+jakpiase@users.noreply.github.com> Date: Fri, 30 Apr 2021 04:05:35 +0200 Subject: [PATCH 060/720] Reduce grad fix (#32592) --- .../mkldnn/reduce_mean_mkldnn_op.cc | 3 +- .../reduce_ops/mkldnn/reduce_mkldnn_op.h | 90 ++++++++++++------- .../reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc | 3 +- paddle/fluid/operators/reduce_ops/reduce_op.h | 25 ++---- paddle/fluid/platform/mkldnn_reuse.h | 31 +++---- 5 files changed, 79 insertions(+), 73 deletions(-) diff --git a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mean_mkldnn_op.cc b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mean_mkldnn_op.cc index 33daeea8599..dfba933940b 100644 --- a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mean_mkldnn_op.cc +++ b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mean_mkldnn_op.cc @@ -45,7 +45,8 @@ class ReduceMeanGradMKLDNNKernel : public ReduceGradMKLDNNKernel { number_of_elements = input_x->numel(); } - this->RunKernel(ctx, dnnl::algorithm::binary_add, 0.0f, + this->RunKernel(ctx, dnnl::algorithm::binary_add, + dnnl::algorithm::reduction_mean, 0.0f, 1.0L / number_of_elements); } }; diff --git a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h index 58416f479c0..40cd3ba974f 100644 --- a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h +++ b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h @@ -21,6 +21,27 @@ using paddle::framework::LoDTensor; using paddle::framework::Tensor; using platform::to_void_cast; +inline std::vector CalculateReducedDims(const Tensor* input, + const Tensor* output, + std::vector& reduce_dims, + bool reduce_all, + bool keep_dim) { + if (keep_dim) return framework::vectorize(output->dims()); + + if (reduce_all) + return std::vector(framework::vectorize(input->dims()).size(), 1); + + std::vector output_dims(framework::vectorize(input->dims())); + for (size_t i = 0; i < reduce_dims.size(); ++i) { + reduce_dims[i] = (reduce_dims[i] >= 0) + ? reduce_dims[i] + : input->dims().size() + reduce_dims[i]; + output_dims[reduce_dims[i]] = 1; + } + + return output_dims; +} + template class ReduceMKLDNNKernel : public framework::OpKernel { public: @@ -37,9 +58,8 @@ class ReduceMKLDNNKernel : public framework::OpKernel { bool reduce_all = ctx.Attr("reduce_all"); bool keep_dim = ctx.Attr("keep_dim"); - std::vector output_dims = - CalculateOutputDims(input, output, reduce_dims, reduce_all, keep_dim); - + auto output_dims = + CalculateReducedDims(input, output, reduce_dims, reduce_all, keep_dim); auto input_dims = framework::vectorize(input->dims()); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); @@ -96,53 +116,63 @@ class ReduceMKLDNNKernel : public framework::OpKernel { paddle::framework::vectorize(output->dims())))); } } - - private: - std::vector CalculateOutputDims(const Tensor* input, - const Tensor* output, - std::vector& reduce_dims, - bool reduce_all, - bool keep_dim) const { - if (keep_dim) return framework::vectorize(output->dims()); - - if (reduce_all) - return std::vector(framework::vectorize(input->dims()).size(), - 1); - - std::vector output_dims(framework::vectorize(input->dims())); - for (size_t i = 0; i < reduce_dims.size(); ++i) { - reduce_dims[i] = (reduce_dims[i] >= 0) - ? reduce_dims[i] - : input->dims().size() + reduce_dims[i]; - output_dims[reduce_dims[i]] = 1; - } - - return output_dims; - } }; template class ReduceGradMKLDNNKernel : public framework::OpKernel { public: void RunKernel(const framework::ExecutionContext& ctx, - dnnl::algorithm binary_type, float scale_x, - float scale_y) const { + dnnl::algorithm binary_type, dnnl::algorithm reduction_type, + float scale_x, float scale_y) const { const auto& dev_ctx = ctx.template device_context(); const auto& onednn_engine = dev_ctx.GetEngine(); + bool keep_dim = ctx.Attr("keep_dim"); + bool reduce_all = ctx.Attr("reduce_all"); auto dims = ctx.Attr>("dim"); auto* input_dy = ctx.Input(framework::GradVarName("Out")); auto* output_dx = ctx.Output(framework::GradVarName("X")); + mkldnn::memory::format_tag x_format_tag; + auto input_dims = + CalculateReducedDims(output_dx, input_dy, dims, reduce_all, keep_dim); + + if (input_dims != framework::vectorize(output_dx->dims())) { + const std::string key_pd = + platform::CreateKey( + dev_ctx, framework::vectorize(output_dx->dims()), + ctx.InputName("X"), + (std::to_string(static_cast(reduction_type)))) + + "@fwd_pd"; + std::shared_ptr fwd_pd = + std::static_pointer_cast( + dev_ctx.GetBlob(key_pd)); + + PADDLE_ENFORCE_NOT_NULL( + fwd_pd, platform::errors::Unavailable( + "Forward primitive descriptor is not available in %s op, " + "cannot deduce memory format tag", + ctx.Type())); + + x_format_tag = platform::GetMKLDNNFormat(fwd_pd->src_desc()); + + PADDLE_ENFORCE_NE(x_format_tag, mkldnn::memory::format_tag::undef, + platform::errors::InvalidArgument( + "Cannot deduce format tag for %s op", ctx.Type())); + } else { // fwd descriptor not available because reorder was used instead + // of reduction + x_format_tag = getPlainFormatTag(output_dx); + } + output_dx->mutable_data(ctx.GetPlace()); - output_dx->set_format(getPlainFormatTag(output_dx)); + output_dx->set_format(x_format_tag); output_dx->set_layout(input_dy->layout()); platform::BroadcastDataMKLDNNHandler handler( binary_type, dev_ctx, onednn_engine, ctx.GetPlace(), output_dx, input_dy, scale_x, scale_y, - ctx.InputName(framework::GradVarName("Out"))); + ctx.InputName(framework::GradVarName("Out")), input_dims); const auto src_dx_memory = handler.AcquireSrcMemory(output_dx); const auto src_dy_memory = handler.AcquireSecondSrcMemory(input_dy); diff --git a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc index e62edcf5596..3f92d39ede1 100644 --- a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc +++ b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc @@ -29,7 +29,8 @@ template class ReduceSumGradMKLDNNKernel : public ReduceGradMKLDNNKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - this->RunKernel(ctx, dnnl::algorithm::binary_add, 0.0f, 1.0f); + this->RunKernel(ctx, dnnl::algorithm::binary_add, + dnnl::algorithm::reduction_sum, 0.0f, 1.0f); } }; diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h index 913d941df88..390c4d9709a 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.h @@ -559,8 +559,11 @@ class ReduceGradOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - auto input_data_type = OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")); + int in_dtype = ctx.Attr("in_dtype"); + auto input_data_type = + (in_dtype >= 0) ? static_cast(in_dtype) + : OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Out")); #ifdef PADDLE_WITH_MKLDNN auto CanMKLDNNReduceGradBeUsed = [&]() { @@ -568,18 +571,6 @@ class ReduceGradOp : public framework::OperatorWithKernel { if (dx_dims.size() > 5) return false; // max 5D tensor is supported - if (ctx.Attr("reduce_all") || - ((int)ctx.Attr>("dim").size() == dx_dims.size())) - return true; - - auto dy_dims = ctx.Input(framework::GradVarName("Out"))->dims(); - - // Subtensor must be on rightmost part of the bigger tensor - for (int i = 0; i < dy_dims.size(); ++i) { - if (dx_dims[dx_dims.size() - dy_dims.size() + i] != dy_dims[i]) { - return false; - } - } return true; }; if (this->CanMKLDNNBeUsed(ctx, input_data_type) && @@ -590,12 +581,6 @@ class ReduceGradOp : public framework::OperatorWithKernel { } #endif - int in_dtype = ctx.Attr("in_dtype"); - if (in_dtype >= 0) { - return framework::OpKernelType( - static_cast(in_dtype), - ctx.GetPlace()); - } return framework::OpKernelType(input_data_type, ctx.GetPlace()); } }; diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 54efa55cc4c..f1eb1f96363 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -639,7 +639,8 @@ class BroadcastDataMKLDNNHandler const mkldnn::engine engine, platform::Place cpu_place, const Tensor* x, const Tensor* y, float scale_x, float scale_y, - const std::string& uniq_name) + const std::string& uniq_name, + std::vector& input_dims) : platform::MKLDNNHandlerT( dev_ctx, engine, cpu_place, platform::CreateKey(dev_ctx, framework::vectorize(x->dims()), @@ -659,24 +660,12 @@ class BroadcastDataMKLDNNHandler y->format(), MKLDNNMemoryFormat::undef, platform::errors::InvalidArgument("Wrong format set for Y tensor.")); - auto src1_tz = framework::vectorize(y->dims()); const auto src0_tz = framework::vectorize(x->dims()); - // GetExpectedKernelType checks if smaller vector is a subvector with all - // the dims in correct order on the rightmost part of the bigger vector, - // i.e. a correct vector for broadcasting: - // x = 5, 7, 3, 2, 4, 8 - // y = 4, 8 - src1_tz.reserve(src0_tz.size()); - - for (size_t i = src1_tz.size(); i < src0_tz.size(); ++i) { - src1_tz.insert(src1_tz.begin(), 1L); - } - const auto src0_md = dnnl::memory::desc( src0_tz, platform::MKLDNNGetDataType(), x->format()); const auto src1_md = dnnl::memory::desc( - src1_tz, platform::MKLDNNGetDataType(), x->format()); + input_dims, platform::MKLDNNGetDataType(), x->format()); dnnl::primitive_attr attributes; attributes.set_scales(DNNL_ARG_SRC_0, 0, {scale_x}); @@ -711,7 +700,7 @@ class ReductionMKLDNNHandler const mkldnn::engine engine, platform::Place cpu_place, const Tensor* x, const Tensor* y, const std::string& uniq_name, - std::vector output_dims) + std::vector y_tz) : platform::MKLDNNHandlerT( dev_ctx, engine, cpu_place, platform::CreateKey(dev_ctx, framework::vectorize(x->dims()), @@ -725,14 +714,14 @@ class ReductionMKLDNNHandler x->format(), MKLDNNMemoryFormat::undef, platform::errors::InvalidArgument("Wrong format set for X tensor.")); - const auto src_tz = framework::vectorize(x->dims()); + const auto x_tz = framework::vectorize(x->dims()); - const auto src_md = dnnl::memory::desc( - src_tz, platform::MKLDNNGetDataType(), x->format()); - const auto dst_md = memory::desc( - output_dims, platform::MKLDNNGetDataType(), x->format()); + const auto x_md = dnnl::memory::desc( + x_tz, platform::MKLDNNGetDataType(), x->format()); + const auto y_md = + memory::desc(y_tz, platform::MKLDNNGetDataType(), x->format()); - this->AcquireForwardPrimitiveDescriptor(algo, src_md, dst_md, p, eps); + this->AcquireForwardPrimitiveDescriptor(algo, x_md, y_md, p, eps); } } }; -- GitLab From 8fd724a5026e9b5da3a68225566ea4861338d9e2 Mon Sep 17 00:00:00 2001 From: Baibaifan <39549453+Baibaifan@users.noreply.github.com> Date: Fri, 30 Apr 2021 11:24:43 +0800 Subject: [PATCH 061/720] add_c_sync_npu_kernel (#32687) --- paddle/fluid/operators/collective/c_sync_calc_stream_op.cc | 7 ++++--- .../operators/collective/c_sync_calc_stream_op_npu_test.cc | 2 +- paddle/fluid/operators/collective/c_sync_comm_stream_op.cc | 7 ++++--- .../operators/collective/c_sync_comm_stream_op_npu_test.cc | 2 +- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op.cc b/paddle/fluid/operators/collective/c_sync_calc_stream_op.cc index 83da712bee9..71ab25a7b0f 100644 --- a/paddle/fluid/operators/collective/c_sync_calc_stream_op.cc +++ b/paddle/fluid/operators/collective/c_sync_calc_stream_op.cc @@ -46,7 +46,7 @@ Call calculation stream synchronization. }; template -class CSyncCalcStreamCudaKernel : public framework::OpKernel { +class CSyncCalcStreamKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { #if (defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)) && !defined(_WIN32) @@ -86,5 +86,6 @@ namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(c_sync_calc_stream, ops::CSyncCalcStreamOp, ops::CSyncCalcStreamOpMaker); -REGISTER_OP_CUDA_KERNEL(c_sync_calc_stream, - ops::CSyncCalcStreamCudaKernel); +REGISTER_OP_CUDA_KERNEL(c_sync_calc_stream, ops::CSyncCalcStreamKernel); + +REGISTER_OP_NPU_KERNEL(c_sync_calc_stream, ops::CSyncCalcStreamKernel); diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc b/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc index 4b1f7bb3401..45613715b82 100644 --- a/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc @@ -35,7 +35,7 @@ namespace m = paddle::operators::math; USE_OP(elementwise_add); USE_OP_DEVICE_KERNEL(elementwise_add, NPU); -USE_NO_KERNEL_OP(c_sync_calc_stream); +USE_OP_DEVICE_KERNEL(c_sync_calc_stream, NPU); template void Compare(f::Scope* scope, const p::DeviceContext& ctx) { diff --git a/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc b/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc index 772122bb58d..71fda2cd01c 100644 --- a/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc +++ b/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc @@ -58,7 +58,7 @@ Call communication stream synchronization. }; template -class CSyncCommStreamCudaKernel : public framework::OpKernel { +class CSyncCommStreamKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto place = ctx.GetPlace(); @@ -97,5 +97,6 @@ namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(c_sync_comm_stream, ops::CSyncCommStreamOp, ops::CSyncCommStreamOpMaker); -REGISTER_OP_CUDA_KERNEL(c_sync_comm_stream, - ops::CSyncCommStreamCudaKernel); +REGISTER_OP_CUDA_KERNEL(c_sync_comm_stream, ops::CSyncCommStreamKernel); + +REGISTER_OP_NPU_KERNEL(c_sync_comm_stream, ops::CSyncCommStreamKernel); diff --git a/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc b/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc index 3915ec4fa35..6c5a6db6148 100644 --- a/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc @@ -43,7 +43,7 @@ namespace p = paddle::platform; namespace m = paddle::operators::math; USE_OP(c_broadcast); -USE_NO_KERNEL_OP(c_sync_comm_stream); +USE_OP_DEVICE_KERNEL(c_sync_comm_stream, NPU); USE_NO_KERNEL_OP(c_gen_hccl_id); USE_NO_KERNEL_OP(c_comm_init_hccl); USE_OP_DEVICE_KERNEL(c_broadcast, NPU); -- GitLab From 5ada0329743e035e9c07a909595d7b488a5d1bda Mon Sep 17 00:00:00 2001 From: 123malin Date: Fri, 30 Apr 2021 12:06:22 +0800 Subject: [PATCH 062/720] test=develop, optimize index_sampler (#32663) --- .../index_dataset/index_sampler.cc | 27 +++---------------- .../distributed/index_dataset/index_sampler.h | 20 ++++++++++++++ 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/paddle/fluid/distributed/index_dataset/index_sampler.cc b/paddle/fluid/distributed/index_dataset/index_sampler.cc index 58f85d98fb0..3e573bbdd2d 100644 --- a/paddle/fluid/distributed/index_dataset/index_sampler.cc +++ b/paddle/fluid/distributed/index_dataset/index_sampler.cc @@ -13,13 +13,10 @@ // limitations under the License. #include "paddle/fluid/distributed/index_dataset/index_sampler.h" -#include "paddle/fluid/operators/math/sampler.h" namespace paddle { namespace distributed { -using Sampler = paddle::operators::math::Sampler; - std::vector> LayerWiseSampler::sample( const std::vector>& user_inputs, const std::vector& target_ids, bool with_hierarchy) { @@ -30,22 +27,7 @@ std::vector> LayerWiseSampler::sample( std::vector(user_feature_num + 2)); auto max_layer = tree_->Height(); - std::vector sampler_vec(max_layer - start_sample_layer_); - std::vector> layer_ids(max_layer - - start_sample_layer_); - - auto layer_index = max_layer - 1; size_t idx = 0; - while (layer_index >= start_sample_layer_) { - auto layer_codes = tree_->GetLayerCodes(layer_index); - layer_ids[idx] = tree_->GetNodes(layer_codes); - sampler_vec[idx] = new paddle::operators::math::UniformSampler( - layer_ids[idx].size() - 1, seed_); - layer_index--; - idx++; - } - - idx = 0; for (size_t i = 0; i < input_num; i++) { auto travel_codes = tree_->GetTravelCodes(target_ids[i], start_sample_layer_); @@ -76,18 +58,15 @@ std::vector> LayerWiseSampler::sample( for (int idx_offset = 0; idx_offset < layer_counts_[j]; idx_offset++) { int sample_res = 0; do { - sample_res = sampler_vec[j]->Sample(); - } while (layer_ids[j][sample_res].id() == travel_path[j].id()); + sample_res = sampler_vec_[j]->Sample(); + } while (layer_ids_[j][sample_res].id() == travel_path[j].id()); outputs[idx + idx_offset][user_feature_num] = - layer_ids[j][sample_res].id(); + layer_ids_[j][sample_res].id(); outputs[idx + idx_offset][user_feature_num + 1] = 0; } idx += layer_counts_[j]; } } - for (size_t i = 0; i < sampler_vec.size(); i++) { - delete sampler_vec[i]; - } return outputs; } diff --git a/paddle/fluid/distributed/index_dataset/index_sampler.h b/paddle/fluid/distributed/index_dataset/index_sampler.h index 66882bedc9b..8813421446a 100644 --- a/paddle/fluid/distributed/index_dataset/index_sampler.h +++ b/paddle/fluid/distributed/index_dataset/index_sampler.h @@ -16,6 +16,7 @@ #include #include "paddle/fluid/distributed/index_dataset/index_wrapper.h" #include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/math/sampler.h" #include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -83,6 +84,23 @@ class LayerWiseSampler : public IndexSampler { } reverse(layer_counts_.begin(), layer_counts_.end()); VLOG(3) << "sample counts sum: " << layer_counts_sum_; + + auto max_layer = tree_->Height(); + sampler_vec_.clear(); + layer_ids_.clear(); + + auto layer_index = max_layer - 1; + size_t idx = 0; + while (layer_index >= start_sample_layer_) { + auto layer_codes = tree_->GetLayerCodes(layer_index); + layer_ids_.push_back(tree_->GetNodes(layer_codes)); + auto sampler_temp = + std::make_shared( + layer_ids_[idx].size() - 1, seed_); + sampler_vec_.push_back(sampler_temp); + layer_index--; + idx++; + } } std::vector> sample( const std::vector>& user_inputs, @@ -94,6 +112,8 @@ class LayerWiseSampler : public IndexSampler { std::shared_ptr tree_{nullptr}; int seed_{0}; int start_sample_layer_{1}; + std::vector> sampler_vec_; + std::vector> layer_ids_; }; } // end namespace distributed -- GitLab From bd8d35a211aa3e0cbf4a881d35fb92bf9ee6e3a4 Mon Sep 17 00:00:00 2001 From: ceci3 Date: Fri, 30 Apr 2021 12:53:13 +0800 Subject: [PATCH 063/720] remove is_test=True in grad (#32678) --- paddle/fluid/operators/batch_norm_op.cc | 11 +++-------- paddle/fluid/operators/batch_norm_op.cu | 9 ++------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index fc31885824b..edad20435b4 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -575,7 +575,7 @@ class BatchNormGradKernel // SavedVariance have been reverted in forward operator const auto *saved_inv_variance = ctx.Input("SavedVariance"); const std::string data_layout_str = ctx.Attr("data_layout"); - const bool use_global_stats = ctx.Attr("use_global_stats"); + bool use_global_stats = ctx.Attr("use_global_stats"); const bool is_test = ctx.Attr("is_test"); const float epsilon = ctx.Attr("epsilon"); const DataLayout data_layout = @@ -585,6 +585,8 @@ class BatchNormGradKernel auto *d_scale = ctx.Output(framework::GradVarName("Scale")); auto *d_bias = ctx.Output(framework::GradVarName("Bias")); + use_global_stats = is_test || use_global_stats; + // batch_norm with inplace as false will take X as grad input, which // is same as cuDNN batch_norm backward calculation, batch_norm // with inplace as true only take Y as input and X should be calculate @@ -605,13 +607,6 @@ class BatchNormGradKernel "X@GRAD and Y@GRAD inplaced in non-inplace mode")); } - PADDLE_ENFORCE_EQ( - is_test, false, - platform::errors::InvalidArgument( - "`is_test = True` CANNOT be used in train program. If " - "you want to use global status in pre_train model, " - "please set `use_global_stats = True`")); - // Get the size for each dimension. // NCHW [batch_size, in_channels, in_height, in_width] const auto &x_dims = x->dims(); diff --git a/paddle/fluid/operators/batch_norm_op.cu b/paddle/fluid/operators/batch_norm_op.cu index 41dc87ac1ba..6fc78732b10 100644 --- a/paddle/fluid/operators/batch_norm_op.cu +++ b/paddle/fluid/operators/batch_norm_op.cu @@ -817,7 +817,7 @@ class BatchNormGradKernel platform::errors::InvalidArgument("It must use CUDAPlace.")); double epsilon = static_cast(ctx.Attr("epsilon")); const std::string data_layout_str = ctx.Attr("data_layout"); - const bool use_global_stats = ctx.Attr("use_global_stats"); + bool use_global_stats = ctx.Attr("use_global_stats"); const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); @@ -850,12 +850,7 @@ class BatchNormGradKernel } const bool is_test = ctx.Attr("is_test"); - PADDLE_ENFORCE_EQ( - is_test, false, - platform::errors::InvalidArgument( - "`is_test = True` CANNOT be used in train program. If " - "you want to use global status in pre_train model, " - "please set `use_global_stats = True`")); + use_global_stats = is_test || use_global_stats; const auto &x_dims = x->dims(); -- GitLab From 9b4fabf9f1f5730c5608e517872a72bbc7b85afa Mon Sep 17 00:00:00 2001 From: feng626 <57284900+feng626@users.noreply.github.com> Date: Fri, 30 Apr 2021 12:54:39 +0800 Subject: [PATCH 064/720] =?UTF-8?q?=E5=8D=95=E6=B5=8B=E5=85=A8=E9=87=8F?= =?UTF-8?q?=E5=88=97=E8=A1=A8=E4=BF=AE=E6=94=B9=20(#32641)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 单测全量列表修改 * 单测全量列表修改 * 去除挂掉的windows单测 * 去除挂掉的windows单测 --- tools/parallel_UT_rule.py | 222 +++++++++++++++++++++++++++++++++++++- 1 file changed, 218 insertions(+), 4 deletions(-) diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index d2969618b85..4fefa7cee31 100644 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -436,9 +436,173 @@ CPU_PARALLEL_JOB = [ 'assign_op_test', 'allocator_facade_frac_flags_test', 'aes_cipher_test', + 'test_dist_sparse_tensor_load_adagrad', + 'test_dist_mnist_fp16_allreduce', + 'test_dist_mnist_gradient_merge', + 'test_dist_allreduce_op', + 'test_hdfs3', + 'test_parallel_dygraph_se_resnext', + 'test_dist_fleet_ps9', + 'test_dist_fleet_infer', + 'test_dist_se_resnext_sync', + 'test_dist_oneps', + 'test_dist_sparse_load_ps1', + 'test_dist_mnist_batch_merge', + 'test_dist_fleet_ctr', + 'test_dist_fleet_ps10', + 'test_parallel_dygraph_transformer', + 'test_dist_mnist_fleetapi', + 'test_dist_sparse_tensor_load_adam', + 'test_dist_fleet_ps4', + 'test_dist_fleet_heter_program', + 'test_parallel_dygraph_sparse_embedding_over_height', + 'test_hdfs2', + 'test_dist_sharding_save', + 'test_dist_fleet_ps_gpu_ctr', + 'test_dist_mnist_backward_deps', + 'test_dist_fleet_heter_base', + 'test_dist_sparse_tensor_load_sgd', + 'test_new_group', + 'test_dist_mnist_with_program', + 'test_dist_mnist_pg', + 'test_dist_sparse_tensor_load_rmsprop', + 'test_auto_checkpoint2', + 'test_dist_sparse_tensor_load_ftrl', + 'test_dist_fleet_ps6', + 'test_dist_mnist_fleet_save', + 'test_auto_checkpoint1', + 'test_dist_fleet_a_sync_optimizer_sync', + 'test_dist_fleet_ps3', + 'test_dist_se_resnext_nccl', + 'test_parallel_dygraph_mnist', + 'test_auto_checkpoint_multiple', + 'test_dist_fleet_a_sync_optimizer_auto_async', + 'test_pipeline', + 'test_dist_fleet_ps8', + 'test_dist_fleet_sparse_embedding_ctr', + 'test_dist_se_resnext_dgc', + 'test_dist_fleet_ps7', + 'test_dist_fleet_decay', + 'test_dist_fleet_a_sync_optimizer_auto_geo', + 'test_dist_fleet_geo', + 'test_parallel_dygraph_dataparallel', + 'test_hdfs1', + 'test_dist_mnist_dgc_nccl', + 'test_dist_fleet_ctr2', + 'test_parallel_dygraph_unused_variables', + 'test_dist_mnist_multi_comm', + 'test_dist_sparse_tensor_load_momentum', + 'test_gen_nccl_id_op', + 'test_parallel_dygraph_sparse_embedding', + 'test_dist_mnist_ring_allreduce', + 'test_fleet_launch_async', + 'test_dist_fleet_a_sync_optimizer_geo', + 'test_parallel_dygraph_control_flow', + 'test_auto_checkpoint', + 'test_fleet_pipeline_meta_optimizer', + 'test_dist_fleet_heter_ctr', + 'test_fleet_graph_execution_meta_optimizer', + 'test_fleet_run_random_port', + 'test_dist_fleet_ps5', + 'test_dist_fleet_a_sync_optimizer_auto', + 'test_dist_lookup_sparse_table_fuse_ops', + 'test_dist_fleet_a_sync_optimizer_async', + 'test_c_comm_init_op', + 'test_fleet_launch_nproc', + 'test_dist_fleet_simnet', + 'test_auto_checkpoint_dist_basic', + 'test_fleet_launch_cloud', + 'test_dist_fleet_ps', + 'test_dist_op', + 'test_dist_sparse_load_ps0', + 'test_auto_checkpoint3', + 'test_dist_fleet_ps2', + 'test_dist_fleet_grad_clip', + 'test_custom_concat', + 'test_analyzer_transformer_fuse', + 'test_analyzer_seq_pool1_fuse_statis', + 'test_fc_lstm_fuse_pass_cc', + 'test_layer_norm_fuse_pass', + 'test_fc_gru_fuse_pass_cc', + 'test_analyzer_save_model', + 'test_fleet_ps', + 'test_analyzer_multi_model_prediction', + 'test_fleet_base_3', + 'test_fleet_base_2', + 'test_ascend_trigger', + 'test_fleet_amp_meta_optimizer', + 'test_fleetrun', + 'test_check_abi', + 'dense_table_test', + 'test_custom_relu_op_setup', + 'test_adaptive_pool2d_convert_global_pass', + 'test_fleet_recompute_meta_optimizer', + 'test_fleet_fp16_allreduce_meta_optimizer', + 'test_post_training_quantization_lstm_model', + 'test_fleet_metric', + 'test_fleet_gradient_merge_meta_optimizer', + 'test_fleet_sharding_meta_optimizer', + 'test_listen_and_serv_op', + 'test_analyzer_zerocopytensor_tensor', + 'test_conv_bn_fuse_pass_cc', + 'test_collective_optimizer', + 'test_bf16_utils', + 'test_analyzer_seq_pool1_compare_determine', + 'test_avoid_twice_initialization', + 'test_callback_early_stop', + 'test_fleet_distributed_strategy', + 'test_launch_coverage', + 'test_sgd_op_bf16', + 'test_model_cast_to_bf16', + 'test_hybrid_parallel_topology', + 'barrier_table_test', + 'test_check_error', + 'test_fleet_lamb_meta_optimizer', + 'test_fleet_rolemaker_2', + 'test_distributed_strategy', + 'test_rnn_cudnn_params_packing', + 'test_communicator_async', + 'brpc_utils_test', + 'test_analyzer_capi_pd_tensor', + 'test_recv_save_op', + 'heter_listen_and_server_test', + 'test_analyzer_capi_ner', + 'test_unsqueeze2_eltwise_fuse_pass', + 'test_dgc_optimizer', + 'test_fleet_cc', + 'test_repeated_fc_relu_fuse_pass_cc', + 'heter_server_test', + 'test_static_save_load_large', + 'graph_node_test', + 'test_custom_conj', + 'test_fleet_private_function', + 'test_fake_init_op', + 'brpc_service_sparse_sgd_test', + 'test_tf32_cudnn', + 'test_communicator_geo', + 'test_dispatch_jit', + 'test_layer_norm_fuse_pass_cc', + 'test_fleet_dgc_meta_optimizer', + 'test_fc_fuse_pass_cc', + 'test_communicator_sync', + 'test_analyzer_capi', + 'test_fleet_lars_meta_optimizer', + 'test_communicator_half_async', + 'test_fleet_localsgd_meta_optimizer', + 'test_fleet_amp_init', + 'test_fleet_checkpoint', + 'test_analyzer_seq_pool1_fuse_compare_zero_copy', + 'test_lookup_table_bf16_op', + 'test_fleet_meta_optimizer_base', + 'table_test', + 'test_fleet_rolemaker_new', + 'test_fleet_graph_executor', + 'test_multi_out_jit', + 'test_fleet_utils', + 'brpc_service_dense_sgd_test', ] -# It run 4 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED, +# It run 4 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED, # just remove it from this list. TETRAD_PARALLEL_JOB = [ 'buffered_allocator_test', @@ -477,9 +641,56 @@ TETRAD_PARALLEL_JOB = [ 'tensor_test', 'test_repeated_fc_relu_fuse_pass_cc', 'test_mkldnn_caching', + 'test_analyzer_seq_pool1', + 'test_analyzer_ocr', + 'test_analyzer_seq_conv1', + 'test_analyzer_small_dam', + 'test_analyzer_mobilenet_depthwise_conv', + 'test_analyzer_pyramid_dnn', + 'test_analyzer_text_classification', + 'test_analyzer_rnn2', + 'test_analyzer_transformer', + 'test_analyzer_resnet50', + 'test_analyzer_ner', + 'test_analyzer_lac', + 'test_analyzer_transformer_profile', + 'test_analyzer_mobilenet_transpose', + 'test_analyzer_rnn1', + 'test_analyzer_seq_pool1_profile', + 'test_analyzer_paddletensor_tensor', + 'test_analyzer_bert', + 'test_analyzer_googlenet', + 'zero_copy_tensor_test', + 'custom_tensor_test', + 'test_fleet_base', + 'test_imperative_container_layerdict', + 'test_complex_simplenet', + 'test_tensor_register_hook', + 'test_set_value_op', + 'test_tensor_type_promotion', + 'test_view_op_reuse_allocation', + 'test_complex_grad_accumulated', + 'test_tensor_methods', + 'test_sequential', + 'test_tensor_methods', + 'test_sequential', + 'test_imperative_layers', + 'test_dgc_momentum_op', + 'test_memcpy_op', + 'test_dgc_op', + 'test_modelaverage', + 'test_lookahead', + 'test_word2vec_book', + 'test_callback_visualdl', + 'test_new_group_api', + 'test_collective_split_embedding_none_divisible', + 'test_collective_wait', + 'test_collective_split_row_linear', + 'test_collective_split_col_linear', + 'test_collective_split_embedding', ] -# It run 2 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED, +# It run 2 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED, # just remove it from this list. TWO_PARALLEL_JOB = [ 'convert_model2dot_ernie', @@ -611,7 +822,6 @@ TWO_PARALLEL_JOB = [ 'test_adam_op_multi_thread', 'test_adamax_op', 'test_while_loop_op', - 'test_affine_grid_function', 'test_transpose_flatten_concat_fuse_pass', 'test_trace_op', 'test_backward', @@ -663,7 +873,6 @@ TWO_PARALLEL_JOB = [ 'test_gather_op', 'test_partial_concat_op', 'test_gaussian_random_op', - 'test_paddle_imperative_double_grad', 'test_generate_proposals_v2_op', 'test_pad_constant_like', 'test_grid_sample_function', @@ -879,6 +1088,11 @@ TWO_PARALLEL_JOB = [ 'test_imperative_load_static_param', 'test_fuse_bn_add_act_pass', 'test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass', + 'test_quantize_transpiler_v2', + 'paddle_infer_api_test', + 'test_analyzer_ernie', + 'lite_resnet50_test', + 'lite_mul_model_test', ] -- GitLab From 308073de9ae511c03ab8d1ffd504ee2867cb1f79 Mon Sep 17 00:00:00 2001 From: pangyoki Date: Fri, 30 Apr 2021 13:25:57 +0800 Subject: [PATCH 065/720] Add 12 inplace APIs including auto generated (#32573) * add relu6_ hardsigmoid_ leaky_relu_ Inplace APIs * add softmax_with_cross_entropy_ Inplace API * add clip_ scale_ add_ subtract_ Inplace APIs * add wlist * fix parameter of scale api * add add_n_ Inplace API and remove log_ Inplace API * fix elementwise_add_ and elementwise_sub_ broadcast problem * elementwise inplace api give error message before run the op * use broadcast_shape in elementwise inplace op * add 8 inplace apis that is auto generated * add unittest for all inplace apis * add decorator for inplace apis in static mode * fix windows blas fail of exp inplace api, change array_equal to allclose * add flatten inplace api * add flatten unittest * fix flatten unittest * add decorator * fix grad.numpy in test_pylayer_op * unsupport softmax_with_cross_entropy_ * add test_inplace_softmax_with_cross_entropy to static_mode_white_list * delete __all__ in inplace_utils * delete activation inplace function and add Tensor.inplace_func * change paddle.inplace_ to Tensor.inplace_ * fix little problem * add paddle in inplace_utils --- paddle/fluid/imperative/basic_engine.cc | 3 +- paddle/fluid/operators/flatten_op.h | 37 +-- python/paddle/fluid/dygraph/__init__.py | 2 + python/paddle/fluid/dygraph/inplace_utils.py | 38 +++ .../fluid/layers/layer_function_generator.py | 32 +- python/paddle/fluid/layers/ops.py | 21 +- .../fluid/tests/unittests/test_clip_op.py | 48 +-- .../unittests/test_elementwise_add_op.py | 74 ++++- .../unittests/test_elementwise_sub_op.py | 106 +++++++ .../test_flatten_contiguous_range_op.py | 42 +++ .../fluid/tests/unittests/test_inplace.py | 117 +++++++- .../test_inplace_auto_generated_apis.py | 281 ++++++++++++++++++ .../fluid/tests/unittests/test_scale_op.py | 42 +++ python/paddle/nn/functional/activation.py | 27 +- python/paddle/tensor/__init__.py | 24 ++ python/paddle/tensor/manipulation.py | 108 ++++--- python/paddle/tensor/math.py | 82 ++++- tools/wlist.json | 48 +++ 18 files changed, 997 insertions(+), 135 deletions(-) create mode 100644 python/paddle/fluid/dygraph/inplace_utils.py create mode 100644 python/paddle/fluid/tests/unittests/test_inplace_auto_generated_apis.py diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index 023a148763d..7bcc3d6c608 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -408,7 +408,8 @@ void BasicEngine::Execute() { VLOG(10) << "create temporary var of " << var->Name() << " for sum gradient within this graph!"; } else if (!inplace_grad_name_map.empty() && - inplace_grad_name_map.count(pair.first)) { + inplace_grad_name_map.count(pair.first) && + bwd_ins.count(inplace_grad_name_map.at(pair.first))) { // When calculate Inplace grad op, create a new output var. // If a tmp var has been created, there is no need to create it // again. diff --git a/paddle/fluid/operators/flatten_op.h b/paddle/fluid/operators/flatten_op.h index 1b2f1db1b07..efcb0cbe2e2 100644 --- a/paddle/fluid/operators/flatten_op.h +++ b/paddle/fluid/operators/flatten_op.h @@ -120,23 +120,9 @@ template class FlattenContiguousRangeKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - auto &start_axis = context.Attr("start_axis"); - auto &stop_axis = context.Attr("stop_axis"); - auto *in = context.Input("X"); - auto x_dims = in->dims(); - int in_dims_size = x_dims.size(); - int real_start_axis = start_axis, real_stop_axis = stop_axis; - if (start_axis < 0) { - real_start_axis = start_axis + in_dims_size; - } - if (stop_axis < 0) { - real_stop_axis = stop_axis + in_dims_size; - } auto *out = context.Output("Out"); - - auto out_dims = framework::make_ddim( - GetOutputShape(real_start_axis, real_stop_axis, x_dims)); + auto out_dims = out->dims(); out->mutable_data(context.GetPlace(), in->type()); framework::TensorCopy( @@ -144,27 +130,6 @@ class FlattenContiguousRangeKernel : public framework::OpKernel { context.template device_context(), out); out->Resize(out_dims); } - static std::vector GetOutputShape(const int start_axis, - const int stop_axis, - const framework::DDim &in_dims) { - int64_t outer = 1; - std::vector out_shape; - int in_dims_size = in_dims.size(); - out_shape.reserve(in_dims_size - stop_axis + start_axis); - - for (int i = 0; i < start_axis; ++i) { - out_shape.push_back(in_dims[i]); - } - for (int i = start_axis; i <= stop_axis; i++) { - outer *= in_dims[i]; - } - out_shape.push_back(outer); - for (int i = stop_axis + 1; i < in_dims_size; i++) { - out_shape.push_back(in_dims[i]); - } - - return out_shape; - } }; template diff --git a/python/paddle/fluid/dygraph/__init__.py b/python/paddle/fluid/dygraph/__init__.py index cf270ced3b7..d66e3309783 100644 --- a/python/paddle/fluid/dygraph/__init__.py +++ b/python/paddle/fluid/dygraph/__init__.py @@ -58,6 +58,8 @@ from .amp import * from .math_op_patch import monkey_patch_math_varbase +from .inplace_utils import inplace_apis_in_dygraph_only + __all__ = [] __all__ += layers.__all__ __all__ += base.__all__ diff --git a/python/paddle/fluid/dygraph/inplace_utils.py b/python/paddle/fluid/dygraph/inplace_utils.py new file mode 100644 index 00000000000..c1f7ef9b691 --- /dev/null +++ b/python/paddle/fluid/dygraph/inplace_utils.py @@ -0,0 +1,38 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ..wrapped_decorator import wrap_decorator +from ..framework import in_dygraph_mode +import warnings +import paddle + + +# NOTE(pangyoki): The Inplace APIs with underline(`_`) is only valid for the method of calling `core.ops` +# in dygraph mode. If static mode is used, the inplace mechanism will not be used, and the static method +# of the original API will be called. +def _inplace_apis_in_dygraph_only_(func): + def __impl__(*args, **kwargs): + if not in_dygraph_mode(): + origin_api_name = func.__name__[:-1] + warnings.warn( + "In static mode, {}() is the same as {}() and does not perform inplace operation.". + format(func.__name__, origin_api_name)) + origin_func = "{}.{}".format(func.__module__, origin_api_name) + return eval(origin_func)(*args, **kwargs) + return func(*args, **kwargs) + + return __impl__ + + +inplace_apis_in_dygraph_only = wrap_decorator(_inplace_apis_in_dygraph_only_) diff --git a/python/paddle/fluid/layers/layer_function_generator.py b/python/paddle/fluid/layers/layer_function_generator.py index 708692c215f..6e52ea04a19 100755 --- a/python/paddle/fluid/layers/layer_function_generator.py +++ b/python/paddle/fluid/layers/layer_function_generator.py @@ -25,7 +25,8 @@ from ..layer_helper import LayerHelper from ..data_feeder import check_variable_and_dtype __all__ = [ - 'generate_layer_fn', 'generate_activation_fn', 'autodoc', 'templatedoc' + 'generate_layer_fn', 'generate_activation_fn', 'generate_inplace_fn', + 'autodoc', 'templatedoc' ] @@ -283,6 +284,35 @@ def generate_activation_fn(op_type): return func +def generate_inplace_fn(inplace_op_type): + """Register the Python layer for an Inplace Operator without Attribute. + + Args: + inplace_op_type: The name of the inplace operator to be created. + + This function takes in the inplace operator type (exp_ , ceil_ etc) and + creates the operator functionality. + """ + origin_op_type = inplace_op_type[:-1] + + def func(x, name=None): + if in_dygraph_mode(): + op = getattr(core.ops, inplace_op_type) + return op(x) + warnings.warn( + "In static mode, {}() is the same as {}() and does not perform inplace operation.". + format(inplace_op_type, origin_op_type)) + return generate_activation_fn(origin_op_type)(x, name) + + func.__name__ = inplace_op_type + func.__doc__ = """ +Inplace version of ``{0}`` API, the output Tensor will be inplaced with input ``x``. +Please refer to :ref:`api_fluid_layers_{1}`. +""".format(origin_op_type, origin_op_type) + + return func + + def autodoc(comment=""): def __impl__(func): func.__doc__ = _generate_doc_string_(OpProtoHolder.instance( diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index 67cdc6dce5a..813f671e020 100755 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -14,7 +14,7 @@ from __future__ import print_function import os -from .layer_function_generator import generate_layer_fn, generate_activation_fn, add_sample_code +from .layer_function_generator import generate_layer_fn, generate_activation_fn, generate_inplace_fn, add_sample_code from .. import core from ..framework import convert_np_dtype_to_dtype_, Variable from ..data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype @@ -55,6 +55,16 @@ __unary_func__ = [ 'square', ] +__inplace_unary_func__ = [ + 'exp_', + 'sqrt_', + 'rsqrt_', + 'ceil_', + 'floor_', + 'round_', + 'reciprocal_', +] + __all__ = [] for _OP in set(__all__): @@ -69,6 +79,7 @@ globals()['_elementwise_div'] = generate_layer_fn('elementwise_div') __all__ += __activations_noattr__ __all__ += __unary_func__ +__all__ += __inplace_unary_func__ for _OP in set(__activations_noattr__): _new_OP = _OP @@ -87,6 +98,14 @@ for _OP in set(__unary_func__): func = deprecated(since="2.0.0", update_to="paddle.%s" % (_new_OP))(func) globals()[_OP] = func +for _OP in set(__inplace_unary_func__): + _new_OP = _OP + if _OP in __deprecated_func_name__: + _new_OP = __deprecated_func_name__[_OP] + func = generate_inplace_fn(_OP) + func = deprecated(since="2.0.0", update_to="paddle.%s" % (_new_OP))(func) + globals()[_OP] = func + add_sample_code(globals()["sigmoid"], r""" Examples: .. code-block:: python diff --git a/python/paddle/fluid/tests/unittests/test_clip_op.py b/python/paddle/fluid/tests/unittests/test_clip_op.py index b05100fc7b4..1833c473d18 100644 --- a/python/paddle/fluid/tests/unittests/test_clip_op.py +++ b/python/paddle/fluid/tests/unittests/test_clip_op.py @@ -124,6 +124,9 @@ class TestClipOpError(unittest.TestCase): class TestClipAPI(unittest.TestCase): + def _executed_api(self, x, min=None, max=None): + return paddle.clip(x, min, max) + def test_clip(self): paddle.enable_static() data_shape = [1, 9, 9, 4] @@ -136,18 +139,20 @@ class TestClipAPI(unittest.TestCase): ) else fluid.CPUPlace() exe = fluid.Executor(place) - out_1 = paddle.clip(images, min=min, max=max) - out_2 = paddle.clip(images, min=0.2, max=0.9) - out_3 = paddle.clip(images, min=0.3) - out_4 = paddle.clip(images, max=0.7) - out_5 = paddle.clip(images, min=min) - out_6 = paddle.clip(images, max=max) - out_7 = paddle.clip(images, max=-1.) - out_8 = paddle.clip(images) - out_9 = paddle.clip(paddle.cast(images, 'float64'), min=0.2, max=0.9) - - out_10 = paddle.clip(paddle.cast(images * 10, 'int32'), min=2, max=8) - out_11 = paddle.clip(paddle.cast(images * 10, 'int64'), min=2, max=8) + out_1 = self._executed_api(images, min=min, max=max) + out_2 = self._executed_api(images, min=0.2, max=0.9) + out_3 = self._executed_api(images, min=0.3) + out_4 = self._executed_api(images, max=0.7) + out_5 = self._executed_api(images, min=min) + out_6 = self._executed_api(images, max=max) + out_7 = self._executed_api(images, max=-1.) + out_8 = self._executed_api(images) + out_9 = self._executed_api( + paddle.cast(images, 'float64'), min=0.2, max=0.9) + out_10 = self._executed_api( + paddle.cast(images * 10, 'int32'), min=2, max=8) + out_11 = self._executed_api( + paddle.cast(images * 10, 'int64'), min=2, max=8) res1, res2, res3, res4, res5, res6, res7, res8, res9, res10, res11 = exe.run( fluid.default_main_program(), @@ -188,12 +193,16 @@ class TestClipAPI(unittest.TestCase): v_min = paddle.to_tensor(np.array([0.2], dtype=np.float32)) v_max = paddle.to_tensor(np.array([0.8], dtype=np.float32)) - out_1 = paddle.clip(images, min=0.2, max=0.8) - out_2 = paddle.clip(images, min=0.2, max=0.9) - out_3 = paddle.clip(images, min=v_min, max=v_max) + out_1 = self._executed_api(images, min=0.2, max=0.8) + images = paddle.to_tensor(data, dtype='float32') + out_2 = self._executed_api(images, min=0.2, max=0.9) + images = paddle.to_tensor(data, dtype='float32') + out_3 = self._executed_api(images, min=v_min, max=v_max) - out_4 = paddle.clip(paddle.cast(images * 10, 'int32'), min=2, max=8) - out_5 = paddle.clip(paddle.cast(images * 10, 'int64'), min=2, max=8) + out_4 = self._executed_api( + paddle.cast(images * 10, 'int32'), min=2, max=8) + out_5 = self._executed_api( + paddle.cast(images * 10, 'int64'), min=2, max=8) self.assertTrue(np.allclose(out_1.numpy(), data.clip(0.2, 0.8))) self.assertTrue(np.allclose(out_2.numpy(), data.clip(0.2, 0.9))) @@ -212,5 +221,10 @@ class TestClipAPI(unittest.TestCase): paddle.disable_static() +class TestInplaceClipAPI(TestClipAPI): + def _executed_api(self, x, min=None, max=None): + return x.clip_(min, max) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py index 9235542fede..d067a2bd577 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py @@ -408,13 +408,16 @@ class TestElementwiseAddOpError(unittest.TestCase): self.assertRaises(TypeError, fluid.layers.elementwise_add, x2, y2) -class TestAddOp(unittest.TestCase): +class TestAddApi(unittest.TestCase): + def _executed_api(self, x, y, name=None): + return paddle.add(x, y, name) + def test_name(self): with fluid.program_guard(fluid.Program()): x = fluid.data(name="x", shape=[2, 3], dtype="float32") y = fluid.data(name='y', shape=[2, 3], dtype='float32') - y_1 = paddle.add(x, y, name='add_res') + y_1 = self._executed_api(x, y, name='add_res') self.assertEqual(('add_res' in y_1.name), True) def test_declarative(self): @@ -428,7 +431,7 @@ class TestAddOp(unittest.TestCase): x = fluid.data(name="x", shape=[3], dtype='float32') y = fluid.data(name="y", shape=[3], dtype='float32') - z = paddle.add(x, y) + z = self._executed_api(x, y) place = fluid.CPUPlace() exe = fluid.Executor(place) @@ -442,12 +445,75 @@ class TestAddOp(unittest.TestCase): np_y = np.array([1, 5, 2]).astype('float64') x = fluid.dygraph.to_variable(np_x) y = fluid.dygraph.to_variable(np_y) - z = paddle.add(x, y) + z = self._executed_api(x, y) np_z = z.numpy() z_expected = np.array([3., 8., 6.]) self.assertEqual((np_z == z_expected).all(), True) +class TestAddInplaceApi(TestAddApi): + def _executed_api(self, x, y, name=None): + return x.add_(y, name) + + +class TestAddInplaceBroadcastSuccess(unittest.TestCase): + def init_data(self): + self.x_numpy = np.random.rand(2, 3, 4).astype('float') + self.y_numpy = np.random.rand(3, 4).astype('float') + + def test_broadcast_success(self): + paddle.disable_static() + self.init_data() + x = paddle.to_tensor(self.x_numpy) + y = paddle.to_tensor(self.y_numpy) + inplace_result = x.add_(y) + numpy_result = self.x_numpy + self.y_numpy + self.assertEqual((inplace_result.numpy() == numpy_result).all(), True) + paddle.enable_static() + + +class TestAddInplaceBroadcastSuccess2(TestAddInplaceBroadcastSuccess): + def init_data(self): + self.x_numpy = np.random.rand(1, 2, 3, 1).astype('float') + self.y_numpy = np.random.rand(3, 1).astype('float') + + +class TestAddInplaceBroadcastSuccess3(TestAddInplaceBroadcastSuccess): + def init_data(self): + self.x_numpy = np.random.rand(2, 3, 1, 5).astype('float') + self.y_numpy = np.random.rand(1, 3, 1, 5).astype('float') + + +class TestAddInplaceBroadcastError(unittest.TestCase): + def init_data(self): + self.x_numpy = np.random.rand(3, 4).astype('float') + self.y_numpy = np.random.rand(2, 3, 4).astype('float') + + def test_broadcast_errors(self): + paddle.disable_static() + self.init_data() + x = paddle.to_tensor(self.x_numpy) + y = paddle.to_tensor(self.y_numpy) + + def broadcast_shape_error(): + x.add_(y) + + self.assertRaises(ValueError, broadcast_shape_error) + paddle.enable_static() + + +class TestAddInplaceBroadcastError2(TestAddInplaceBroadcastError): + def init_data(self): + self.x_numpy = np.random.rand(2, 1, 4).astype('float') + self.y_numpy = np.random.rand(2, 3, 4).astype('float') + + +class TestAddInplaceBroadcastError3(TestAddInplaceBroadcastError): + def init_data(self): + self.x_numpy = np.random.rand(5, 2, 1, 4).astype('float') + self.y_numpy = np.random.rand(2, 3, 4).astype('float') + + class TestComplexElementwiseAddOp(OpTest): def setUp(self): self.op_type = "elementwise_add" diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_sub_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_sub_op.py index c5372d5b758..2594c96eebd 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_sub_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_sub_op.py @@ -16,6 +16,7 @@ from __future__ import print_function import unittest import numpy as np import paddle +import paddle.fluid as fluid from op_test import OpTest, skip_check_grad_ci @@ -237,6 +238,111 @@ class TestRealComplexElementwiseSubOp(TestComplexElementwiseSubOp): self.grad_y = -self.grad_out +class TestSubtractApi(unittest.TestCase): + def _executed_api(self, x, y, name=None): + return paddle.subtract(x, y, name) + + def test_name(self): + with fluid.program_guard(fluid.Program()): + x = fluid.data(name="x", shape=[2, 3], dtype="float32") + y = fluid.data(name='y', shape=[2, 3], dtype='float32') + + y_1 = self._executed_api(x, y, name='subtract_res') + self.assertEqual(('subtract_res' in y_1.name), True) + + def test_declarative(self): + with fluid.program_guard(fluid.Program()): + + def gen_data(): + return { + "x": np.array([2, 3, 4]).astype('float32'), + "y": np.array([1, 5, 2]).astype('float32') + } + + x = fluid.data(name="x", shape=[3], dtype='float32') + y = fluid.data(name="y", shape=[3], dtype='float32') + z = self._executed_api(x, y) + place = fluid.CPUPlace() + exe = fluid.Executor(place) + z_value = exe.run(feed=gen_data(), fetch_list=[z.name]) + z_expected = np.array([1., -2., 2.]) + self.assertEqual((z_value == z_expected).all(), True) + + def test_dygraph(self): + with fluid.dygraph.guard(): + np_x = np.array([2, 3, 4]).astype('float64') + np_y = np.array([1, 5, 2]).astype('float64') + x = fluid.dygraph.to_variable(np_x) + y = fluid.dygraph.to_variable(np_y) + z = self._executed_api(x, y) + np_z = z.numpy() + z_expected = np.array([1., -2., 2.]) + self.assertEqual((np_z == z_expected).all(), True) + + +class TestSubtractInplaceApi(TestSubtractApi): + def _executed_api(self, x, y, name=None): + return x.subtract_(y, name) + + +class TestSubtractInplaceBroadcastSuccess(unittest.TestCase): + def init_data(self): + self.x_numpy = np.random.rand(2, 3, 4).astype('float') + self.y_numpy = np.random.rand(3, 4).astype('float') + + def test_broadcast_success(self): + paddle.disable_static() + self.init_data() + x = paddle.to_tensor(self.x_numpy) + y = paddle.to_tensor(self.y_numpy) + inplace_result = x.subtract_(y) + numpy_result = self.x_numpy - self.y_numpy + self.assertEqual((inplace_result.numpy() == numpy_result).all(), True) + paddle.enable_static() + + +class TestSubtractInplaceBroadcastSuccess2(TestSubtractInplaceBroadcastSuccess): + def init_data(self): + self.x_numpy = np.random.rand(1, 2, 3, 1).astype('float') + self.y_numpy = np.random.rand(3, 1).astype('float') + + +class TestSubtractInplaceBroadcastSuccess3(TestSubtractInplaceBroadcastSuccess): + def init_data(self): + self.x_numpy = np.random.rand(2, 3, 1, 5).astype('float') + self.y_numpy = np.random.rand(1, 3, 1, 5).astype('float') + + +class TestSubtractInplaceBroadcastError(unittest.TestCase): + def init_data(self): + self.x_numpy = np.random.rand(3, 4).astype('float') + self.y_numpy = np.random.rand(2, 3, 4).astype('float') + + def test_broadcast_errors(self): + paddle.disable_static() + self.init_data() + x = paddle.to_tensor(self.x_numpy) + y = paddle.to_tensor(self.y_numpy) + + def broadcast_shape_error(): + x.subtract_(y) + + self.assertRaises(ValueError, broadcast_shape_error) + paddle.enable_static() + + +class TestSubtractInplaceBroadcastError2(TestSubtractInplaceBroadcastError): + def init_data(self): + self.x_numpy = np.random.rand(2, 1, 4).astype('float') + self.y_numpy = np.random.rand(2, 3, 4).astype('float') + + +class TestSubtractInplaceBroadcastError3(TestSubtractInplaceBroadcastError): + def init_data(self): + self.x_numpy = np.random.rand(5, 2, 1, 4).astype('float') + self.y_numpy = np.random.rand(2, 3, 4).astype('float') + + if __name__ == '__main__': paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py index d6cc6ecffc1..bc9ff369771 100644 --- a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py +++ b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py @@ -182,6 +182,30 @@ class TestFlatten2OpError(unittest.TestCase): self.assertRaises(ValueError, test_InputError) +class TestStaticFlattenPythonAPI(unittest.TestCase): + def execute_api(self, x, start_axis=0, stop_axis=-1): + return paddle.flatten(x, start_axis, stop_axis) + + def test_static_api(self): + paddle.enable_static() + np_x = np.random.rand(2, 3, 4, 4).astype('float32') + + main_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, paddle.static.Program()): + x = paddle.static.data( + name="x", shape=[2, 3, 4, 4], dtype='float32') + out = self.execute_api(x, start_axis=-2, stop_axis=-1) + + exe = paddle.static.Executor(place=paddle.CPUPlace()) + fetch_out = exe.run(main_prog, feed={"x": np_x}, fetch_list=[out]) + self.assertTrue((2, 3, 16) == fetch_out[0].shape) + + +class TestStaticInplaceFlattenPythonAPI(TestStaticFlattenPythonAPI): + def execute_api(self, x, start_axis=0, stop_axis=-1): + return x.flatten_(start_axis, stop_axis) + + class TestFlattenPython(unittest.TestCase): def test_python_api(self): image_shape = (2, 3, 4, 4) @@ -204,5 +228,23 @@ class TestFlattenPython(unittest.TestCase): self.assertTrue((2, 3, 16) == res_shape) +class TestDygraphInplaceFlattenPython(unittest.TestCase): + def test_python_api(self): + image_shape = (2, 3, 4, 4) + x = np.arange(image_shape[0] * image_shape[1] * image_shape[2] * + image_shape[3]).reshape(image_shape) / 100. + x = x.astype('float32') + + def test_Negative(): + paddle.disable_static() + img = paddle.to_tensor(x) + out = img.flatten_(start_axis=-2, stop_axis=-1) + return out.numpy().shape + + res_shape = test_Negative() + self.assertTrue((2, 3, 16) == res_shape) + paddle.enable_static() + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_inplace.py b/python/paddle/fluid/tests/unittests/test_inplace.py index 7b9becacd82..3d158763527 100644 --- a/python/paddle/fluid/tests/unittests/test_inplace.py +++ b/python/paddle/fluid/tests/unittests/test_inplace.py @@ -98,11 +98,15 @@ class TestInplace(unittest.TestCase): class TestDygraphInplace(unittest.TestCase): def setUp(self): self.init_data() + self.set_np_compare_func() def init_data(self): - self.input_var_numpy = np.random.rand(2, 3, 1) + self.input_var_numpy = np.random.uniform(-5, 5, [10, 20, 1]) self.dtype = "float32" + def set_np_compare_func(self): + self.np_compare = np.array_equal + def non_inplace_api_processing(self, var): return paddle.squeeze(var) @@ -190,7 +194,7 @@ class TestDygraphInplace(unittest.TestCase): loss.backward() grad_var_a = var_a.grad.numpy() - self.assertTrue(np.array_equal(grad_var_a_inplace, grad_var_a)) + self.assertTrue(self.np_compare(grad_var_a_inplace, grad_var_a)) def test_backward_success_2(self): # Although var_b is modified inplace after using it, it does not used in gradient computation. @@ -244,6 +248,14 @@ class TestDygraphInplaceReshape(TestDygraphInplace): return paddle.reshape_(var, [-1]) +class TestDygraphInplaceFlatten(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return var.flatten() + + def inplace_api_processing(self, var): + return var.flatten_() + + class TestDygraphInplaceScatter(TestDygraphInplace): def init_data(self): self.input_var_numpy = np.array([[1, 1], [2, 2], [3, 3]]) @@ -296,5 +308,106 @@ class TestDygraphInplaceTanh(TestDygraphInplace): return paddle.tanh_(var) +class TestDygraphInplaceCeil(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return var.ceil() + + def inplace_api_processing(self, var): + return var.ceil_() + + +class TestDygraphInplaceFloor(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return var.floor() + + def inplace_api_processing(self, var): + return var.floor_() + + +class TestDygraphInplaceExp(TestDygraphInplace): + def set_np_compare_func(self): + self.np_compare = np.allclose + + def non_inplace_api_processing(self, var): + return var.exp() + + def inplace_api_processing(self, var): + return var.exp_() + + +class TestDygraphInplaceReciprocal(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return var.reciprocal() + + def inplace_api_processing(self, var): + return var.reciprocal_() + + +class TestDygraphInplaceRound(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return var.round() + + def inplace_api_processing(self, var): + return var.round_() + + +class TestDygraphInplaceSqrt(TestDygraphInplace): + def init_data(self): + self.input_var_numpy = np.random.uniform(0, 5, [10, 20, 1]) + self.dtype = "float32" + + def non_inplace_api_processing(self, var): + return var.sqrt() + + def inplace_api_processing(self, var): + return var.sqrt_() + + +class TestDygraphInplaceRsqrt(TestDygraphInplaceSqrt): + def non_inplace_api_processing(self, var): + return var.rsqrt() + + def inplace_api_processing(self, var): + return var.rsqrt_() + + +class TestDygraphInplaceClip(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return var.clip(0.6, 1.5) + + def inplace_api_processing(self, var): + return var.clip_(0.6, 1.5) + + +class TestDygraphInplaceScale(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return var.scale(scale=2.0, bias=3.0) + + def inplace_api_processing(self, var): + return var.scale_(scale=2.0, bias=3.0) + + +class TestDygraphInplaceAdd(TestDygraphInplace): + def init_data(self): + self.input_var_numpy = np.random.rand(2, 3, 4) + self.dtype = "float32" + input_var_numpy_2 = np.random.rand(2, 3, 4).astype(self.dtype) + self.input_var_2 = paddle.to_tensor(input_var_numpy_2) + + def non_inplace_api_processing(self, var): + return var.add(self.input_var_2) + + def inplace_api_processing(self, var): + return var.add_(self.input_var_2) + + +class TestDygraphInplaceSubtract(TestDygraphInplaceAdd): + def non_inplace_api_processing(self, var): + return var.subtract(self.input_var_2) + + def inplace_api_processing(self, var): + return var.subtract_(self.input_var_2) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_inplace_auto_generated_apis.py b/python/paddle/fluid/tests/unittests/test_inplace_auto_generated_apis.py new file mode 100644 index 00000000000..abc8849b614 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_inplace_auto_generated_apis.py @@ -0,0 +1,281 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np + +from op_test import OpTest +import paddle +import paddle.fluid as fluid +from paddle.static import Program, program_guard + + +# In static mode, inplace strategy will not be used in Inplace APIs. +class TestStaticAutoGeneratedAPI(unittest.TestCase): + def setUp(self): + paddle.enable_static() + self.init_data() + self.set_np_compare_func() + + def init_data(self): + self.dtype = 'float32' + self.shape = [10, 20] + self.np_x = np.random.uniform(-5, 5, self.shape).astype(self.dtype) + + def set_np_compare_func(self): + self.np_compare = np.array_equal + + def executed_paddle_api(self, x): + return x.ceil() + + def executed_numpy_api(self, x): + return np.ceil(x) + + def test_api(self): + main_prog = Program() + with program_guard(main_prog, Program()): + x = paddle.static.data(name="x", shape=self.shape, dtype=self.dtype) + out = self.executed_paddle_api(x) + + exe = paddle.static.Executor(place=paddle.CPUPlace()) + fetch_x, fetch_out = exe.run(main_prog, + feed={"x": self.np_x}, + fetch_list=[x, out]) + + self.assertTrue(np.array_equal(fetch_x, self.np_x)) + self.assertTrue( + self.np_compare(fetch_out, self.executed_numpy_api(self.np_x))) + + +class TestStaticInplaceAutoGeneratedAPI(TestStaticAutoGeneratedAPI): + def executed_paddle_api(self, x): + return x.ceil_() + + +class TestStaticFloorAPI(TestStaticAutoGeneratedAPI): + def executed_paddle_api(self, x): + return x.floor() + + def executed_numpy_api(self, x): + return np.floor(x) + + +class TestStaticInplaceFloorAPI(TestStaticFloorAPI): + def executed_paddle_api(self, x): + return x.floor_() + + +class TestStaticExpAPI(TestStaticAutoGeneratedAPI): + def set_np_compare_func(self): + self.np_compare = np.allclose + + def executed_paddle_api(self, x): + return x.exp() + + def executed_numpy_api(self, x): + return np.exp(x) + + +class TestStaticInplaceExpAPI(TestStaticExpAPI): + def executed_paddle_api(self, x): + return x.exp_() + + +class TestStaticReciprocalAPI(TestStaticAutoGeneratedAPI): + def executed_paddle_api(self, x): + return x.reciprocal() + + def executed_numpy_api(self, x): + return np.reciprocal(x) + + +class TestStaticInplaceReciprocalAPI(TestStaticReciprocalAPI): + def executed_paddle_api(self, x): + return x.reciprocal_() + + +class TestStaticRoundAPI(TestStaticAutoGeneratedAPI): + def executed_paddle_api(self, x): + return x.round() + + def executed_numpy_api(self, x): + return np.round(x) + + +class TestStaticInplaceRoundAPI(TestStaticRoundAPI): + def executed_paddle_api(self, x): + return x.round_() + + +class TestStaticSqrtAPI(TestStaticAutoGeneratedAPI): + def init_data(self): + self.dtype = 'float32' + self.shape = [10, 20] + self.np_x = np.random.uniform(0, 5, self.shape).astype(self.dtype) + + def set_np_compare_func(self): + self.np_compare = np.allclose + + def executed_paddle_api(self, x): + return x.sqrt() + + def executed_numpy_api(self, x): + return np.sqrt(x) + + +class TestStaticInplaceSqrtAPI(TestStaticSqrtAPI): + def executed_paddle_api(self, x): + return x.sqrt_() + + +class TestStaticRsqrtAPI(TestStaticSqrtAPI): + def executed_paddle_api(self, x): + return x.rsqrt() + + def executed_numpy_api(self, x): + return 1 / np.sqrt(x) + + +class TestStaticInplaceRsqrtAPI(TestStaticRsqrtAPI): + def executed_paddle_api(self, x): + return x.rsqrt_() + + +# In dygraph mode, inplace strategy will be used in Inplace APIs. +class TestDygraphAutoGeneratedAPI(unittest.TestCase): + def setUp(self): + paddle.disable_static() + self.init_data() + self.set_np_compare_func() + + def init_data(self): + self.dtype = 'float32' + self.shape = [10, 20] + self.np_x = np.random.uniform(-5, 5, self.shape).astype(self.dtype) + + def set_np_compare_func(self): + self.np_compare = np.array_equal + + def executed_paddle_api(self, x): + return x.ceil() + + def executed_numpy_api(self, x): + return np.ceil(x) + + def test_api(self): + x = paddle.to_tensor(self.np_x, dtype=self.dtype) + out = self.executed_paddle_api(x) + + self.assertTrue( + self.np_compare(out.numpy(), self.executed_numpy_api(self.np_x))) + + +class TestDygraphInplaceAutoGeneratedAPI(TestDygraphAutoGeneratedAPI): + def executed_paddle_api(self, x): + return x.ceil_() + + +class TestDygraphFloorAPI(TestDygraphAutoGeneratedAPI): + def executed_paddle_api(self, x): + return x.floor() + + def executed_numpy_api(self, x): + return np.floor(x) + + +class TestDygraphInplaceFloorAPI(TestDygraphFloorAPI): + def executed_paddle_api(self, x): + return x.floor_() + + +class TestDygraphExpAPI(TestDygraphAutoGeneratedAPI): + def executed_paddle_api(self, x): + return x.exp() + + def executed_numpy_api(self, x): + return np.exp(x) + + def set_np_compare_func(self): + self.np_compare = np.allclose + + +class TestDygraphInplaceExpAPI(TestDygraphExpAPI): + def executed_paddle_api(self, x): + return x.exp_() + + +class TestDygraphReciprocalAPI(TestDygraphAutoGeneratedAPI): + def executed_paddle_api(self, x): + return x.reciprocal() + + def executed_numpy_api(self, x): + return np.reciprocal(x) + + +class TestDygraphInplaceReciprocalAPI(TestDygraphReciprocalAPI): + def executed_paddle_api(self, x): + return x.reciprocal_() + + +class TestDygraphRoundAPI(TestDygraphAutoGeneratedAPI): + def executed_paddle_api(self, x): + return x.round() + + def executed_numpy_api(self, x): + return np.round(x) + + +class TestDygraphInplaceRoundAPI(TestDygraphRoundAPI): + def executed_paddle_api(self, x): + return x.round_() + + +class TestDygraphSqrtAPI(TestDygraphAutoGeneratedAPI): + def init_data(self): + self.dtype = 'float32' + self.shape = [10, 20] + self.np_x = np.random.uniform(0, 100, self.shape).astype(self.dtype) + + def set_np_compare_func(self): + self.np_compare = np.allclose + + def executed_paddle_api(self, x): + return x.sqrt() + + def executed_numpy_api(self, x): + return np.sqrt(x) + + +class TestDygraphInplaceSqrtAPI(TestDygraphSqrtAPI): + def executed_paddle_api(self, x): + return x.sqrt_() + + +class TestDygraphRsqrtAPI(TestDygraphSqrtAPI): + def executed_paddle_api(self, x): + return x.rsqrt() + + def executed_numpy_api(self, x): + return 1. / np.sqrt(x) + + +class TestDygraphInplaceRsqrtAPI(TestDygraphRsqrtAPI): + def executed_paddle_api(self, x): + return x.rsqrt_() + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_scale_op.py b/python/paddle/fluid/tests/unittests/test_scale_op.py index 052704659b6..c1ce032f506 100644 --- a/python/paddle/fluid/tests/unittests/test_scale_op.py +++ b/python/paddle/fluid/tests/unittests/test_scale_op.py @@ -17,9 +17,11 @@ from __future__ import print_function import unittest import numpy as np from op_test import OpTest +import paddle import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.op import Operator +from paddle.static import Program, program_guard class TestScaleOp(OpTest): @@ -168,5 +170,45 @@ class TestScaleFp16OpSelectedRows(TestScaleOpSelectedRows): self.check_with_place(place, 'in', 'in') +class TestScaleApiStatic(unittest.TestCase): + def _executed_api(self, x, scale=1.0, bias=0.0): + return paddle.scale(x, scale, bias) + + def test_api(self): + paddle.enable_static() + input = np.random.random([2, 25]).astype("float32") + main_prog = Program() + with program_guard(main_prog, Program()): + x = paddle.static.data(name="x", shape=[2, 25], dtype="float32") + out = self._executed_api(x, scale=2.0, bias=3.0) + + exe = paddle.static.Executor(place=paddle.CPUPlace()) + out = exe.run(main_prog, feed={"x": input}, fetch_list=[out]) + self.assertEqual(np.array_equal(out[0], input * 2.0 + 3.0), True) + + +class TestScaleInplaceApiStatic(TestScaleApiStatic): + def _executed_api(self, x, scale=1.0, bias=0.0): + return x.scale_(scale, bias) + + +class TestScaleApiDygraph(unittest.TestCase): + def _executed_api(self, x, scale=1.0, bias=0.0): + return paddle.scale(x, scale, bias) + + def test_api(self): + paddle.disable_static() + input = np.random.random([2, 25]).astype("float32") + x = paddle.to_tensor(input) + out = self._executed_api(x, scale=2.0, bias=3.0) + self.assertEqual(np.array_equal(out.numpy(), input * 2.0 + 3.0), True) + paddle.enable_static() + + +class TestScaleInplaceApiDygraph(TestScaleApiDygraph): + def _executed_api(self, x, scale=1.0, bias=0.0): + return x.scale_(scale, bias) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py index 9001ba16b7a..d5dc6322522 100644 --- a/python/paddle/nn/functional/activation.py +++ b/python/paddle/nn/functional/activation.py @@ -16,7 +16,7 @@ from ...fluid.layers import sigmoid # noqa: F401 from ...tensor.math import tanh # noqa: F401 from ...tensor.math import tanh_ # noqa: F401 -from ...tensor.manipulation import _print_warning_in_static_mode +from ...fluid.dygraph.inplace_utils import inplace_apis_in_dygraph_only from ...tensor.manipulation import chunk from ...tensor.math import multiply @@ -73,17 +73,13 @@ def elu(x, alpha=1.0, name=None): return out +@inplace_apis_in_dygraph_only def elu_(x, alpha=1.0, name=None): r""" Inplace version of ``elu`` API, the output Tensor will be inplaced with input ``x``. Please refer to :ref:`api_nn_cn_elu`. """ - - if in_dygraph_mode(): - return core.ops.elu_(x, 'alpha', alpha) - - _print_warning_in_static_mode("elu") - return elu(x, alpha, name) + return core.ops.elu_(x, 'alpha', alpha) def gelu(x, approximate=False, name=None): @@ -501,17 +497,13 @@ def relu(x, name=None): return out +@inplace_apis_in_dygraph_only def relu_(x, name=None): """ Inplace version of ``relu`` API, the output Tensor will be inplaced with input ``x``. Please refer to :ref:`api_nn_cn_relu`. """ - - if in_dygraph_mode(): - return core.ops.relu_(x) - - _print_warning_in_static_mode("relu") - return relu(x, name) + return core.ops.relu_(x) def log_sigmoid(x, name=None): @@ -912,21 +904,16 @@ def softmax(x, axis=-1, dtype=None, name=None): return outs_softmax +@inplace_apis_in_dygraph_only def softmax_(x, axis=-1, dtype=None, name=None): r""" Inplace version of ``softmax`` API, the output Tensor will be inplaced with input ``x``. Please refer to :ref:`api_nn_cn_softmax`. """ - if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)): dtype = convert_np_dtype_to_dtype_(dtype) use_cudnn = True - - if in_dygraph_mode(): - return core.ops.softmax_(x, 'axis', axis, 'use_cudnn', use_cudnn) - - _print_warning_in_static_mode("softmax") - return softmax(x, axis, dtype, name) + return core.ops.softmax_(x, 'axis', axis, 'use_cudnn', use_cudnn) def softplus(x, beta=1, threshold=20, name=None): diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py index c863f2b86a5..c8d80fc9bc6 100755 --- a/python/paddle/tensor/__init__.py +++ b/python/paddle/tensor/__init__.py @@ -65,6 +65,7 @@ from .manipulation import broadcast_to # noqa: F401 from .manipulation import expand_as # noqa: F401 from .manipulation import tile # noqa: F401 from .manipulation import flatten # noqa: F401 +from .manipulation import flatten_ # noqa: F401 from .manipulation import gather # noqa: F401 from .manipulation import gather_nd # noqa: F401 from .manipulation import reshape # noqa: F401 @@ -95,24 +96,32 @@ from .math import acos # noqa: F401 from .math import asin # noqa: F401 from .math import atan # noqa: F401 from .math import ceil # noqa: F401 +from .math import ceil_ # noqa: F401 from .math import cos # noqa: F401 from .math import tan # noqa: F401 from .math import cosh # noqa: F401 from .math import cumsum # noqa: F401 from .math import exp # noqa: F401 +from .math import exp_ # noqa: F401 from .math import floor # noqa: F401 +from .math import floor_ # noqa: F401 from .math import increment # noqa: F401 from .math import log # noqa: F401 from .math import multiplex # noqa: F401 from .math import pow # noqa: F401 from .math import reciprocal # noqa: F401 +from .math import reciprocal_ # noqa: F401 from .math import round # noqa: F401 +from .math import round_ # noqa: F401 from .math import rsqrt # noqa: F401 +from .math import rsqrt_ # noqa: F401 from .math import scale # noqa: F401 +from .math import scale_ # noqa: F401 from .math import sign # noqa: F401 from .math import sin # noqa: F401 from .math import sinh # noqa: F401 from .math import sqrt # noqa: F401 +from .math import sqrt_ # noqa: F401 from .math import square # noqa: F401 from .math import stanh # noqa: F401 from .math import sum # noqa: F401 @@ -131,7 +140,9 @@ from .math import mod # noqa: F401 from .math import floor_mod # noqa: F401 from .math import multiply # noqa: F401 from .math import add # noqa: F401 +from .math import add_ # noqa: F401 from .math import subtract # noqa: F401 +from .math import subtract_ # noqa: F401 from .math import atan # noqa: F401 from .math import logsumexp # noqa: F401 from .math import inverse # noqa: F401 @@ -141,6 +152,7 @@ from .math import log1p # noqa: F401 from .math import erf # noqa: F401 from .math import addmm # noqa: F401 from .math import clip # noqa: F401 +from .math import clip_ # noqa: F401 from .math import trace # noqa: F401 from .math import kron # noqa: F401 from .math import isfinite # noqa: F401 @@ -202,11 +214,14 @@ tensor_method_func = [ #noqa 'asin', 'atan', 'ceil', + 'ceil_', 'cos', 'cosh', 'cumsum', 'exp', + 'exp_', 'floor', + 'floor_', 'increment', 'log', 'log2', @@ -217,13 +232,18 @@ tensor_method_func = [ #noqa 'pow', 'prod', 'reciprocal', + 'reciprocal_', 'round', + 'round_', 'rsqrt', + 'rsqrt_', 'scale', + 'scale_', 'sign', 'sin', 'sinh', 'sqrt', + 'sqrt_', 'square', 'stanh', 'sum', @@ -242,7 +262,9 @@ tensor_method_func = [ #noqa 'floor_mod', 'multiply', 'add', + 'add_', 'subtract', + 'subtract_', 'atan', 'logsumexp', 'inverse', @@ -250,6 +272,7 @@ tensor_method_func = [ #noqa 'erf', 'addmm', 'clip', + 'clip_', 'trace', 'kron', 'isfinite', @@ -277,6 +300,7 @@ tensor_method_func = [ #noqa 'broadcast_to', 'expand_as', 'flatten', + 'flatten_', 'gather', 'gather_nd', 'reshape', diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 1a596204267..97826f7d5f8 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -31,18 +31,12 @@ from ..fluid.layers import unstack # noqa: F401 from ..fluid.layers import scatter_nd # noqa: F401 from ..fluid.layers import shard_index # noqa: F401 from ..fluid import layers +from ..fluid.dygraph.inplace_utils import inplace_apis_in_dygraph_only import paddle -import warnings __all__ = [] -def _print_warning_in_static_mode(api_name): - warnings.warn( - "In static mode, {}_() is the same as {}() and does not perform inplace operation.". - format(api_name, api_name)) - - @dygraph_only def tolist(x): """ @@ -289,6 +283,36 @@ def flatten(x, start_axis=0, stop_axis=-1, name=None): return out +@inplace_apis_in_dygraph_only +def flatten_(x, start_axis=0, stop_axis=-1, name=None): + """ + Inplace version of ``flatten`` API, the output Tensor will be inplaced with input ``x``. + Please refer to :ref:`api_tensor_flatten`. + """ + if not (isinstance(x, Variable)): + raise ValueError("The input x should be a Tensor") + + x_dim = len(x.shape) + if not (isinstance(start_axis, int)) or ( + start_axis > x_dim - 1) or start_axis < -x_dim: + raise ValueError( + "The start_axis should be a int, and in range [-rank(x), rank(x))") + if not (isinstance(stop_axis, int)) or ( + stop_axis > x_dim - 1) or stop_axis < -x_dim: + raise ValueError( + "The stop_axis should be a int, and in range [-rank(x), rank(x))") + if start_axis < 0: + start_axis = start_axis + x_dim + if stop_axis < 0: + stop_axis = stop_axis + x_dim + if start_axis > stop_axis: + raise ValueError("The stop_axis should be larger than stat_axis") + + dy_out, _ = core.ops.flatten_contiguous_range_(x, 'start_axis', start_axis, + 'stop_axis', stop_axis) + return dy_out + + def roll(x, shifts, axis=None, name=None): """ Roll the `x` tensor along the given axis(axes). With specific 'shifts', Elements that @@ -582,6 +606,7 @@ def squeeze(x, axis=None, name=None): return layers.squeeze(x, axis, name) +@inplace_apis_in_dygraph_only def squeeze_(x, axis=None, name=None): """ Inplace version of ``squeeze`` API, the output Tensor will be inplaced with input ``x``. @@ -594,12 +619,8 @@ def squeeze_(x, axis=None, name=None): elif isinstance(axis, tuple): axis = list(axis) - if in_dygraph_mode(): - out, _ = core.ops.squeeze2_(x, 'axes', axis) - return out - - _print_warning_in_static_mode("squeeze") - return squeeze(x, axis, name) + out, _ = core.ops.squeeze2_(x, 'axes', axis) + return out def unique(x, @@ -775,26 +796,23 @@ def unsqueeze(x, axis, name=None): return layers.unsqueeze(x, axis, name) +@inplace_apis_in_dygraph_only def unsqueeze_(x, axis, name=None): """ Inplace version of ``unsqueeze`` API, the output Tensor will be inplaced with input ``x``. Please refer to :ref:`api_paddle_tensor_unsqueeze`. """ - if in_dygraph_mode(): - if isinstance(axis, int): - axis = [axis] - elif isinstance(axis, Variable): - axis = axis.numpy().tolist() - elif isinstance(axis, (list, tuple)): - axis = [ - item.numpy().item(0) if isinstance(item, Variable) else item - for item in axis - ] - out, _ = core.ops.unsqueeze2_(x, 'axes', axis) - return out - - _print_warning_in_static_mode("unsqueeze") - return unsqueeze(x, axis, name) + if isinstance(axis, int): + axis = [axis] + elif isinstance(axis, Variable): + axis = axis.numpy().tolist() + elif isinstance(axis, (list, tuple)): + axis = [ + item.numpy().item(0) if isinstance(item, Variable) else item + for item in axis + ] + out, _ = core.ops.unsqueeze2_(x, 'axes', axis) + return out def gather(x, index, axis=None, name=None): @@ -1023,16 +1041,13 @@ def scatter(x, index, updates, overwrite=True, name=None): return out +@inplace_apis_in_dygraph_only def scatter_(x, index, updates, overwrite=True, name=None): """ Inplace version of ``scatter`` API, the output Tensor will be inplaced with input ``x``. Please refer to :ref:`api_paddle_tensor_scatter`. """ - if in_dygraph_mode(): - return core.ops.scatter_(x, index, updates, 'overwrite', overwrite) - - _print_warning_in_static_mode("scatter") - return scatter(x, index, updates, overwrite, name) + return core.ops.scatter_(x, index, updates, 'overwrite', overwrite) def scatter_nd_add(x, index, updates, name=None): @@ -1555,26 +1570,23 @@ def reshape(x, shape, name=None): return paddle.fluid.layers.reshape(x=x, shape=shape, name=name) +@inplace_apis_in_dygraph_only def reshape_(x, shape, name=None): """ Inplace version of ``reshape`` API, the output Tensor will be inplaced with input ``x``. Please refer to :ref:`api_paddle_tensor_reshape`. """ - if in_dygraph_mode(): - if isinstance(shape, (list, tuple)): - shape = [ - item.numpy().item(0) if isinstance(item, Variable) else item - for item in shape - ] - out, _ = core.ops.reshape2_(x, None, 'shape', shape) - return out - elif isinstance(shape, Variable): - shape.stop_gradient = True - out, _ = core.ops.reshape2_(x, shape) - return out - - _print_warning_in_static_mode("reshape") - return reshape(x, shape, name) + if isinstance(shape, (list, tuple)): + shape = [ + item.numpy().item(0) if isinstance(item, Variable) else item + for item in shape + ] + out, _ = core.ops.reshape2_(x, None, 'shape', shape) + return out + elif isinstance(shape, Variable): + shape.stop_gradient = True + out, _ = core.ops.reshape2_(x, shape) + return out def gather_nd(x, index, name=None): diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index 84c67a9ae8d..23addcb7e3f 100755 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -30,7 +30,7 @@ from ..fluid.framework import core, _varbase_creator, in_dygraph_mode, Variable, from ..fluid.layer_helper import LayerHelper from ..fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype, convert_dtype from ..fluid.layers.layer_function_generator import _generate_doc_string_, generate_activation_fn, generate_layer_fn -from .manipulation import _print_warning_in_static_mode +from ..fluid.dygraph.inplace_utils import inplace_apis_in_dygraph_only # TODO: define math functions # yapf: disable @@ -38,22 +38,29 @@ from ..fluid.layers import abs # noqa: F401 from ..fluid.layers import acos # noqa: F401 from ..fluid.layers import asin # noqa: F401 from ..fluid.layers import ceil # noqa: F401 +from ..fluid.layers import ceil_ # noqa: F401 from ..fluid.layers import cos # noqa: F401 from ..fluid.layers import tan # noqa: F401 from ..fluid.layers import sinh # noqa: F401 from ..fluid.layers import cosh # noqa: F401 from ..fluid.layers import exp # noqa: F401 +from ..fluid.layers import exp_ # noqa: F401 from ..fluid.layers import floor # noqa: F401 +from ..fluid.layers import floor_ # noqa: F401 from ..fluid.layers import log # noqa: F401 from ..fluid.layers import reciprocal # noqa: F401 +from ..fluid.layers import reciprocal_ # noqa: F401 from ..fluid.layers import round # noqa: F401 +from ..fluid.layers import round_ # noqa: F401 from ..fluid.layers import rsqrt # noqa: F401 +from ..fluid.layers import rsqrt_ # noqa: F401 from ..fluid.layers import scale # noqa: F401 from ..fluid.layers import square # noqa: F401 from ..fluid.layers import stanh # noqa: F401 from ..fluid.layers import atan # noqa: F401 from ..fluid.layers import erf # noqa: F401 from ..fluid.layers import sqrt # noqa: F401 +from ..fluid.layers import sqrt_ # noqa: F401 from ..fluid.layers import sin # noqa: F401 from ..fluid.layers import multiplex # noqa: F401 @@ -74,6 +81,19 @@ _supported_float_dtype_ = [ VarDesc.VarType.FP64, ] + +@inplace_apis_in_dygraph_only +def scale_(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None): + """ + Inplace version of ``scale`` API, the output Tensor will be inplaced with input ``x``. + Please refer to :ref:`api_tensor_scale`. + """ + _scale = scale.numpy().item(0) if isinstance(scale, Variable) else scale + return core.ops.scale_(x, 'scale', + float(_scale), 'bias', + float(bias), 'bias_after_scale', bias_after_scale) + + def pow(x, y, name=None): """ Compute the power of tensor elements. The equation is: @@ -221,6 +241,24 @@ def add(x, y, name=None): return _elementwise_op(LayerHelper(op_type, **locals())) +@inplace_apis_in_dygraph_only +def add_(x, y, name=None): + """ + Inplace version of ``add`` API, the output Tensor will be inplaced with input ``x``. + Please refer to :ref:`api_tensor_add`. + """ + op_type = 'elementwise_add_' + axis = -1 + + out_shape = broadcast_shape(x.shape, y.shape) + if out_shape != x.shape: + raise ValueError("The shape of broadcast output {} is different from that of inplace tensor {} in the Inplace operation.".format(out_shape, x.shape)) + + out = _elementwise_op_in_dygraph( + x, y, axis=axis, op_name=op_type) + return out + + def subtract(x, y, name=None): """ Substract two tensors element-wise. The equation is: @@ -282,6 +320,24 @@ def subtract(x, y, name=None): return _elementwise_op(LayerHelper(op_type, **locals())) +@inplace_apis_in_dygraph_only +def subtract_(x, y, name=None): + """ + Inplace version of ``subtract`` API, the output Tensor will be inplaced with input ``x``. + Please refer to :ref:`api_tensor_subtract`. + """ + axis = -1 + act = None + + out_shape = broadcast_shape(x.shape, y.shape) + if out_shape != x.shape: + raise ValueError("The shape of broadcast output {} is different from that of inplace tensor {} in the Inplace operation.".format(out_shape, x.shape)) + + out = _elementwise_op_in_dygraph( + x, y, axis=axis, act=act, op_name='elementwise_sub_') + return out + + def divide(x, y, name=None): """ Divide two tensors element-wise. The equation is: @@ -1489,6 +1545,24 @@ def clip(x, min=None, max=None, name=None): return output +@inplace_apis_in_dygraph_only +def clip_(x, min=None, max=None, name=None): + """ + Inplace version of ``clip`` API, the output Tensor will be inplaced with input ``x``. + Please refer to :ref:`api_tensor_clip`. + """ + fmin = float(np.finfo(np.float32).min) + fmax = float(np.finfo(np.float32).max) + if isinstance(min, Variable): + min = min.numpy().item(0) + if isinstance(max, Variable): + max = max.numpy().item(0) + min = fmin if min is None else min + max = fmax if max is None else max + return core.ops.clip_(x, "min", min, "max", max) + + + def trace(x, offset=0, axis1=0, axis2=1, name=None): """ **trace** @@ -1908,16 +1982,14 @@ def tanh(x, name=None): helper.append_op(type='tanh', inputs={'X': x}, outputs={'Out': out}) return out +@inplace_apis_in_dygraph_only def tanh_(x, name=None): r""" Inplace version of ``tanh`` API, the output Tensor will be inplaced with input ``x``. Please refer to :ref:`api_tensor_tanh`. """ - if in_dygraph_mode(): - return core.ops.tanh_(x) + return core.ops.tanh_(x) - _print_warning_in_static_mode("tanh") - return tanh(x, name) def increment(x, value=1.0, name=None): """ diff --git a/tools/wlist.json b/tools/wlist.json index cd9f2a7ca66..5a83a9ee470 100644 --- a/tools/wlist.json +++ b/tools/wlist.json @@ -34,6 +34,10 @@ "name":"reshape_", "annotation":"Inplace APIs don't need sample code. There is a special document introducing Inplace strategy" }, + { + "name":"flatten_", + "annotation":"Inplace APIs don't need sample code. There is a special document introducing Inplace strategy" + }, { "name":"scatter_", "annotation":"Inplace APIs don't need sample code. There is a special document introducing Inplace strategy" @@ -53,6 +57,50 @@ { "name":"tanh_", "annotation":"Inplace APIs don't need sample code. There is a special document introducing Inplace strategy" + }, + { + "name":"ceil_", + "annotation":"Inplace APIs don't need sample code. There is a special document introducing Inplace strategy" + }, + { + "name":"floor_", + "annotation":"Inplace APIs don't need sample code. There is a special document introducing Inplace strategy" + }, + { + "name":"exp_", + "annotation":"Inplace APIs don't need sample code. There is a special document introducing Inplace strategy" + }, + { + "name":"reciprocal_", + "annotation":"Inplace APIs don't need sample code. There is a special document introducing Inplace strategy" + }, + { + "name":"round_", + "annotation":"Inplace APIs don't need sample code. There is a special document introducing Inplace strategy" + }, + { + "name":"sqrt_", + "annotation":"Inplace APIs don't need sample code. There is a special document introducing Inplace strategy" + }, + { + "name":"rsqrt_", + "annotation":"Inplace APIs don't need sample code. There is a special document introducing Inplace strategy" + }, + { + "name":"clip_", + "annotation":"Inplace APIs don't need sample code. There is a special document introducing Inplace strategy" + }, + { + "name":"scale_", + "annotation":"Inplace APIs don't need sample code. There is a special document introducing Inplace strategy" + }, + { + "name":"subtract_", + "annotation":"Inplace APIs don't need sample code. There is a special document introducing Inplace strategy" + }, + { + "name":"add_", + "annotation":"Inplace APIs don't need sample code. There is a special document introducing Inplace strategy" } ], "wlist_temp_api":[ -- GitLab From eb13c19fa2549ed54d8ac21218c604e6febaa8e7 Mon Sep 17 00:00:00 2001 From: tianshuo78520a <707759223@qq.com> Date: Fri, 30 Apr 2021 13:27:53 +0800 Subject: [PATCH 066/720] revert data_generator __init__.py (#32670) * revert data_generator * test * add setup.py --- .../fluid/incubate/data_generator/__init__.py | 343 ++++++++++++++++++ python/setup.py.in | 1 + 2 files changed, 344 insertions(+) create mode 100644 python/paddle/fluid/incubate/data_generator/__init__.py diff --git a/python/paddle/fluid/incubate/data_generator/__init__.py b/python/paddle/fluid/incubate/data_generator/__init__.py new file mode 100644 index 00000000000..7ff80039ae2 --- /dev/null +++ b/python/paddle/fluid/incubate/data_generator/__init__.py @@ -0,0 +1,343 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys + +__all__ = ['MultiSlotDataGenerator', 'MultiSlotStringDataGenerator'] + + +class DataGenerator(object): + """ + DataGenerator is a general Base class for user to inherit + A user who wants to define his/her own python processing logic + with paddle.fluid.dataset should inherit this class + """ + + def __init__(self): + self._proto_info = None + self.batch_size_ = 32 + + def _set_line_limit(self, line_limit): + if not isinstance(line_limit, int): + raise ValueError("line_limit%s must be in int type" % + type(line_limit)) + if line_limit < 1: + raise ValueError("line_limit can not less than 1") + self._line_limit = line_limit + + def set_batch(self, batch_size): + ''' + Set batch size of current DataGenerator + This is necessary only if a user wants to define generator_batch + + Example: + .. code-block:: python + import paddle.fluid.incubate.data_generator as dg + class MyData(dg.DataGenerator): + def generate_sample(self, line): + def local_iter(): + int_words = [int(x) for x in line.split()] + yield ("words", int_words) + return local_iter + def generate_batch(self, samples): + def local_iter(): + for s in samples: + yield ("words", s[1].extend([s[1][0]])) + mydata = MyData() + mydata.set_batch(128) + + ''' + self.batch_size_ = batch_size + + def run_from_memory(self): + ''' + This function generator data from memory, it is usually used for + debug and benchmarking + Example: + .. code-block:: python + import paddle.fluid.incubate.data_generator as dg + class MyData(dg.DataGenerator): + def generate_sample(self, line): + def local_iter(): + yield ("words", [1, 2, 3, 4]) + return local_iter + mydata = MyData() + mydata.run_from_memory() + ''' + batch_samples = [] + line_iter = self.generate_sample(None) + for user_parsed_line in line_iter(): + if user_parsed_line == None: + continue + batch_samples.append(user_parsed_line) + if len(batch_samples) == self.batch_size_: + batch_iter = self.generate_batch(batch_samples) + for sample in batch_iter(): + sys.stdout.write(self._gen_str(sample)) + batch_samples = [] + if len(batch_samples) > 0: + batch_iter = self.generate_batch(batch_samples) + for sample in batch_iter(): + sys.stdout.write(self._gen_str(sample)) + + def run_from_stdin(self): + ''' + This function reads the data row from stdin, parses it with the + process function, and further parses the return value of the + process function with the _gen_str function. The parsed data will + be wrote to stdout and the corresponding protofile will be + generated. + Example: + + .. code-block:: python + import paddle.fluid.incubate.data_generator as dg + class MyData(dg.DataGenerator): + def generate_sample(self, line): + def local_iter(): + int_words = [int(x) for x in line.split()] + yield ("words", [int_words]) + return local_iter + mydata = MyData() + mydata.run_from_stdin() + ''' + batch_samples = [] + for line in sys.stdin: + line_iter = self.generate_sample(line) + for user_parsed_line in line_iter(): + if user_parsed_line == None: + continue + batch_samples.append(user_parsed_line) + if len(batch_samples) == self.batch_size_: + batch_iter = self.generate_batch(batch_samples) + for sample in batch_iter(): + sys.stdout.write(self._gen_str(sample)) + batch_samples = [] + if len(batch_samples) > 0: + batch_iter = self.generate_batch(batch_samples) + for sample in batch_iter(): + sys.stdout.write(self._gen_str(sample)) + + def _gen_str(self, line): + ''' + Further processing the output of the process() function rewritten by + user, outputting data that can be directly read by the datafeed,and + updating proto_info information. + Args: + line(str): the output of the process() function rewritten by user. + Returns: + Return a string data that can be read directly by the datafeed. + ''' + raise NotImplementedError( + "pls use MultiSlotDataGenerator or PairWiseDataGenerator") + + def generate_sample(self, line): + ''' + This function needs to be overridden by the user to process the + original data row into a list or tuple. + Args: + line(str): the original data row + Returns: + Returns the data processed by the user. + The data format is list or tuple: + [(name, [feasign, ...]), ...] + or ((name, [feasign, ...]), ...) + + For example: + [("words", [1926, 08, 17]), ("label", [1])] + or (("words", [1926, 08, 17]), ("label", [1])) + Note: + The type of feasigns must be in int or float. Once the float + element appears in the feasign, the type of that slot will be + processed into a float. + Example: + .. code-block:: python + import paddle.fluid.incubate.data_generator as dg + class MyData(dg.DataGenerator): + def generate_sample(self, line): + def local_iter(): + int_words = [int(x) for x in line.split()] + yield ("words", [int_words]) + return local_iter + ''' + raise NotImplementedError( + "Please rewrite this function to return a list or tuple: " + + "[(name, [feasign, ...]), ...] or ((name, [feasign, ...]), ...)") + + def generate_batch(self, samples): + ''' + This function needs to be overridden by the user to process the + generated samples from generate_sample(self, str) function + It is usually used as batch processing when a user wants to + do preprocessing on a batch of samples, e.g. padding according to + the max length of a sample in the batch + Args: + samples(list tuple): generated sample from generate_sample + Returns: + a python generator, the same format as return value of generate_sample + Example: + .. code-block:: python + import paddle.fluid.incubate.data_generator as dg + class MyData(dg.DataGenerator): + def generate_sample(self, line): + def local_iter(): + int_words = [int(x) for x in line.split()] + yield ("words", int_words) + return local_iter + def generate_batch(self, samples): + def local_iter(): + for s in samples: + yield ("words", s[1].extend([s[1][0]])) + mydata = MyData() + mydata.set_batch(128) + ''' + + def local_iter(): + for sample in samples: + yield sample + + return local_iter + + +# TODO: guru4elephant +# add more generalized DataGenerator that can adapt user-defined slot +# for example, [(name, float_list), (name, str_list), (name, int_list)] +class MultiSlotStringDataGenerator(DataGenerator): + def _gen_str(self, line): + ''' + Further processing the output of the process() function rewritten by + user, outputting data that can be directly read by the MultiSlotDataFeed, + and updating proto_info information. + The input line will be in this format: + >>> [(name, [str(feasign), ...]), ...] + >>> or ((name, [str(feasign), ...]), ...) + The output will be in this format: + >>> [ids_num id1 id2 ...] ... + For example, if the input is like this: + >>> [("words", ["1926", "08", "17"]), ("label", ["1"])] + >>> or (("words", ["1926", "08", "17"]), ("label", ["1"])) + the output will be: + >>> 3 1234 2345 3456 1 1 + Args: + line(str): the output of the process() function rewritten by user. + Returns: + Return a string data that can be read directly by the MultiSlotDataFeed. + ''' + if not isinstance(line, list) and not isinstance(line, tuple): + raise ValueError( + "the output of process() must be in list or tuple type" + "Examples: [('words', ['1926', '08', '17']), ('label', ['1'])]") + output = "" + for index, item in enumerate(line): + name, elements = item + if output: + output += " " + out_str = [] + out_str.append(str(len(elements))) + out_str.extend(elements) + output += " ".join(out_str) + return output + "\n" + + +class MultiSlotDataGenerator(DataGenerator): + def _gen_str(self, line): + ''' + Further processing the output of the process() function rewritten by + user, outputting data that can be directly read by the MultiSlotDataFeed, + and updating proto_info information. + The input line will be in this format: + >>> [(name, [feasign, ...]), ...] + >>> or ((name, [feasign, ...]), ...) + The output will be in this format: + >>> [ids_num id1 id2 ...] ... + The proto_info will be in this format: + >>> [(name, type), ...] + + For example, if the input is like this: + >>> [("words", [1926, 08, 17]), ("label", [1])] + >>> or (("words", [1926, 08, 17]), ("label", [1])) + the output will be: + >>> 3 1234 2345 3456 1 1 + the proto_info will be: + >>> [("words", "uint64"), ("label", "uint64")] + Args: + line(str): the output of the process() function rewritten by user. + Returns: + Return a string data that can be read directly by the MultiSlotDataFeed. + ''' + if not isinstance(line, list) and not isinstance(line, tuple): + raise ValueError( + "the output of process() must be in list or tuple type" + "Example: [('words', [1926, 08, 17]), ('label', [1])]") + output = "" + + if self._proto_info is None: + self._proto_info = [] + for item in line: + name, elements = item + if not isinstance(name, str): + raise ValueError("name%s must be in str type" % type(name)) + if not isinstance(elements, list): + raise ValueError("elements%s must be in list type" % + type(elements)) + if not elements: + raise ValueError( + "the elements of each field can not be empty, you need padding it in process()." + ) + self._proto_info.append((name, "uint64")) + if output: + output += " " + output += str(len(elements)) + for elem in elements: + if isinstance(elem, float): + self._proto_info[-1] = (name, "float") + elif not isinstance(elem, int) and not isinstance(elem, + long): + raise ValueError( + "the type of element%s must be in int or float" % + type(elem)) + output += " " + str(elem) + else: + if len(line) != len(self._proto_info): + raise ValueError( + "the complete field set of two given line are inconsistent.") + for index, item in enumerate(line): + name, elements = item + if not isinstance(name, str): + raise ValueError("name%s must be in str type" % type(name)) + if not isinstance(elements, list): + raise ValueError("elements%s must be in list type" % + type(elements)) + if not elements: + raise ValueError( + "the elements of each field can not be empty, you need padding it in process()." + ) + if name != self._proto_info[index][0]: + raise ValueError( + "the field name of two given line are not match: require<%s>, get<%s>." + % (self._proto_info[index][0], name)) + if output: + output += " " + output += str(len(elements)) + for elem in elements: + if self._proto_info[index][1] != "float": + if isinstance(elem, float): + self._proto_info[index] = (name, "float") + elif not isinstance(elem, int) and not isinstance(elem, + long): + raise ValueError( + "the type of element%s must be in int or float" + % type(elem)) + output += " " + str(elem) + return output + "\n" diff --git a/python/setup.py.in b/python/setup.py.in index 0e94d02cd6f..d9ca3038fb2 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -188,6 +188,7 @@ packages=['paddle', 'paddle.fluid.transpiler', 'paddle.fluid.transpiler.details', 'paddle.fluid.incubate', + 'paddle.fluid.incubate.data_generator', 'paddle.fluid.incubate.fleet', 'paddle.fluid.incubate.checkpoint', 'paddle.fluid.incubate.fleet.base', -- GitLab From 7e2b60a4a5cdc4f022226e01ce6acdfbc83807f8 Mon Sep 17 00:00:00 2001 From: Zhou Wei <52485244+zhouwei25@users.noreply.github.com> Date: Fri, 30 Apr 2021 13:52:22 +0800 Subject: [PATCH 067/720] add API Tensor.item() to convert Tensor element to a Python scalar (#32561) --- paddle/fluid/pybind/imperative.cc | 64 +++++++++++++++++ .../fluid/dygraph/varbase_patch_methods.py | 70 ++++++++++++++++++- .../fluid/tests/unittests/test_var_base.py | 68 ++++++++++++++++++ 3 files changed, 200 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 93441eb52fe..450c992d411 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -784,6 +784,70 @@ void BindImperative(py::module *m_ptr) { return out; } }) + .def( + "_getitem_from_offset", + [](std::shared_ptr &self, const py::args &args) { + const auto &tensor = self->Var().Get(); + PADDLE_ENFORCE_EQ( + tensor.IsInitialized(), true, + platform::errors::InvalidArgument( + "Tensor of %s is Empty, please check if it has no data.", + self->Name())); + + const auto &tensor_dims = tensor.dims(); + + std::vector dims(tensor_dims.size()); + std::vector strides(tensor_dims.size()); + + size_t numel = 1; + for (int i = tensor_dims.size() - 1; i >= 0; --i) { + strides[i] = numel; + dims[i] = static_cast(tensor_dims[i]); + numel *= dims[i]; + } + size_t offset = 0; + if (args.empty()) { + PADDLE_ENFORCE_EQ( + numel, 1, + platform::errors::InvalidArgument( + "only one element tensors can be converted to Python " + "scalars when no input coordinates")); + } else if (args.size() == 1) { + offset = args[0].cast(); + PADDLE_ENFORCE_LT( + offset, numel, + platform::errors::InvalidArgument( + "index %d is out of bounds for size %d", offset, numel)); + } else { + PADDLE_ENFORCE_EQ(args.size(), dims.size(), + platform::errors::InvalidArgument( + "incorrect number of indices for Tensor")); + + for (size_t i = 0; i < args.size(); ++i) { + size_t index = args[i].cast(); + PADDLE_ENFORCE_LT( + index, dims[i], + platform::errors::InvalidArgument( + "index %d is out fo bounds for axis %d with size %d", + index, i, dims[i])); + offset += index * strides[i]; + } + } +#define TENSOR_TO_PY_SCALAR(T, proto_type) \ + if (tensor.type() == proto_type) { \ + std::string py_dtype_str = details::TensorDTypeToPyDTypeStr(proto_type); \ + T b = TensorGetElement(tensor, offset); \ + return py::array(py::dtype(py_dtype_str.c_str()), {}, {}, \ + static_cast(&b)); \ + } + + _ForEachDataType_(TENSOR_TO_PY_SCALAR); +#undef TENSOR_TO_PY_SCALAR + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported tensor data type: %s", + framework::DataTypeToString(tensor.type()))); + }, + py::return_value_policy::copy) .def("_inplace_version", [](imperative::VarBase &self) -> uint32_t { const auto &var = self.MutableVar(); diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index dbc2b24aeea..bb84b2ca970 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -375,6 +375,49 @@ def monkey_patch_varbase(): """ self.clear_gradient() + def item(self, *args): + """ + Convert one element Tensor to a Python scalar. + + Args: + *args(int): The input coordinates. If it's single int, the data in the corresponding order of flattened Tensor will be returned. + Default: None, and it must be in the case where Tensor has only one element. + + Returns(Python scalar): A Python scalar, whose dtype is corresponds to the dtype of Tensor. + + Raises: + ValueError: If the Tensor has more than one element, there must be coordinates. + + Examples: + .. code-block:: python + + import paddle + + x = paddle.to_tensor(1) + print(x.item()) #1 + print(type(x.item())) # + + x = paddle.to_tensor(1.0) + print(x.item()) #1.0 + print(type(x.item())) # + + x = paddle.to_tensor(True) + print(x.item()) #True + print(type(x.item())) # + + x = paddle.to_tensor(1+1j) + print(x.item()) #(1+1j) + print(type(x.item())) # + + x = paddle.to_tensor([[1.1, 2.2, 3.3]]) + print(x.item(2)) #3.3 + print(x.item(0, 2)) #3.3 + + x = paddle.to_tensor([1, 2]) + x.item() #ValueError: only one element tensor can be converted to Python scalar when no input coordinates. + """ + return self._getitem_from_offset(*args).item() + @property def inplace_version(self): """ @@ -462,7 +505,30 @@ def monkey_patch_varbase(): return self.__nonzero__() def __array__(self, dtype=None): - return self.numpy().astype(dtype) + """ + Returns a numpy array shows the value of current Tensor. + + Returns: + ndarray: The numpy value of current Tensor. + + Returns type: + ndarray: dtype is same as current Tensor + + Examples: + .. code-block:: python + + import paddle + import numpy as np + x = paddle.randn([2, 2]) + x_array = np.array(x) + + print(type(x_array)) # + print(x_array.shape) #(2, 2) + """ + array = self.numpy() + if dtype: + array = array.astype(dtype) + return array def __getitem__(self, item): def contain_tensor(item): @@ -498,7 +564,7 @@ def monkey_patch_varbase(): ("__str__", __str__), ("__repr__", __str__), ("__deepcopy__", __deepcopy__), ("__module__", "paddle"), ("__name__", "Tensor"), ("__array__", __array__), - ("__getitem__", __getitem__)): + ("__getitem__", __getitem__), ("item", item)): setattr(core.VarBase, method_name, method) # NOTE(zhiqiu): pybind11 will set a default __str__ method of enum class. diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index 8bf42390d1e..83f02b629d7 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -143,6 +143,74 @@ class TestVarBase(unittest.TestCase): self.assertEqual(y.dtype, core.VarDesc.VarType.COMPLEX64) self.assertEqual(y.shape, [2]) + paddle.set_default_dtype('float32') + x = paddle.randn([3, 4]) + x_array = np.array(x) + self.assertEqual(x_array.shape, x.numpy().shape) + self.assertEqual(x_array.dtype, x.numpy().dtype) + self.assertTrue(np.array_equal(x_array, x.numpy())) + + x = paddle.to_tensor(1.0) + self.assertEqual(x.item(), 1.0) + self.assertTrue(isinstance(x.item(), float)) + + x = paddle.randn([3, 2, 2]) + self.assertTrue(isinstance(x.item(5), float)) + self.assertTrue(isinstance(x.item(1, 0, 1), float)) + self.assertEqual(x.item(5), x.item(1, 0, 1)) + self.assertTrue( + np.array_equal(x.item(1, 0, 1), x.numpy().item(1, 0, 1))) + + x = paddle.to_tensor([[1.111111, 2.222222, 3.333333]]) + self.assertEqual(x.item(0, 2), x.item(2)) + self.assertAlmostEqual(x.item(2), 3.333333) + self.assertTrue(isinstance(x.item(0, 2), float)) + + x = paddle.to_tensor(1.0, dtype='float64') + self.assertEqual(x.item(), 1.0) + self.assertTrue(isinstance(x.item(), float)) + + x = paddle.to_tensor(1.0, dtype='float16') + self.assertEqual(x.item(), 1.0) + self.assertTrue(isinstance(x.item(), float)) + + x = paddle.to_tensor(1, dtype='uint8') + self.assertEqual(x.item(), 1) + print(type(x.item())) + self.assertTrue(isinstance(x.item(), int)) + + x = paddle.to_tensor(1, dtype='int8') + self.assertEqual(x.item(), 1) + self.assertTrue(isinstance(x.item(), int)) + + x = paddle.to_tensor(1, dtype='int16') + self.assertEqual(x.item(), 1) + self.assertTrue(isinstance(x.item(), int)) + + x = paddle.to_tensor(1, dtype='int32') + self.assertEqual(x.item(), 1) + self.assertTrue(isinstance(x.item(), int)) + + x = paddle.to_tensor(1, dtype='int64') + self.assertEqual(x.item(), 1) + self.assertTrue(isinstance(x.item(), long if six.PY2 else int)) + + x = paddle.to_tensor(True) + self.assertEqual(x.item(), True) + self.assertTrue(isinstance(x.item(), bool)) + + x = paddle.to_tensor(1 + 1j) + self.assertEqual(x.item(), 1 + 1j) + self.assertTrue(isinstance(x.item(), complex)) + + with self.assertRaises(ValueError): + paddle.randn([3, 2, 2]).item() + with self.assertRaises(ValueError): + paddle.randn([3, 2, 2]).item(18) + with self.assertRaises(ValueError): + paddle.randn([3, 2, 2]).item(1, 2) + with self.assertRaises(ValueError): + paddle.randn([3, 2, 2]).item(2, 1, 2) with self.assertRaises(TypeError): paddle.to_tensor('test') with self.assertRaises(TypeError): -- GitLab From c6713bc00e881b281a6ad4cf20daf1088334dbea Mon Sep 17 00:00:00 2001 From: Pei Yang Date: Fri, 30 Apr 2021 14:06:05 +0800 Subject: [PATCH 068/720] remove check for optim_cache_dir in trt slim int8 (#32676) --- paddle/fluid/inference/analysis/ir_pass_manager.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 8407f98e6df..4bb08dc96b1 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -106,8 +106,8 @@ void IRPassManager::CreatePasses(Argument *argument, bool use_static_engine = argument->tensorrt_use_static_engine(); bool model_from_memory = argument->model_from_memory(); std::string optim_cache_dir = argument->optim_cache_dir(); - bool int8_valid = - !(model_from_memory && optim_cache_dir.empty() && enable_int8); + bool int8_valid = !(model_from_memory && optim_cache_dir.empty() && + enable_int8 && use_calib_mode); PADDLE_ENFORCE_EQ( int8_valid, true, platform::errors::PreconditionNotMet( -- GitLab From 6ab43f7fe8a9876293f3bc93a86c1a38588c0ae5 Mon Sep 17 00:00:00 2001 From: Wenyu Date: Fri, 30 Apr 2021 14:32:38 +0800 Subject: [PATCH 069/720] Support transforms for paddle tensor image (#31970) * add to_grayscale, normalize * add rotate * add vfip and hflip * add crop center_crop * add padding, support constant, reflect, replicate, circular same as paddle.pad * add get-image-[n,c,w,h] axis utils --- python/paddle/tests/test_transforms.py | 230 ++++++++- python/paddle/vision/image.py | 10 +- python/paddle/vision/transforms/functional.py | 75 ++- .../vision/transforms/functional_tensor.py | 488 +++++++++++++++++- python/paddle/vision/transforms/transforms.py | 5 + 5 files changed, 764 insertions(+), 44 deletions(-) diff --git a/python/paddle/tests/test_transforms.py b/python/paddle/tests/test_transforms.py index 5086a12d945..c84950fdbc5 100644 --- a/python/paddle/tests/test_transforms.py +++ b/python/paddle/tests/test_transforms.py @@ -56,7 +56,10 @@ class TestTransformsCV2(unittest.TestCase): 'uint8')) def get_shape(self, img): - if self.backend == 'pil': + if isinstance(img, paddle.Tensor): + return img.shape + + elif self.backend == 'pil': return np.array(img).shape return img.shape @@ -253,6 +256,22 @@ class TestTransformsCV2(unittest.TestCase): fake_img = self.create_image((100, 120, 3)) F.pad(fake_img, [1.0, 2.0, 3.0]) + with self.assertRaises(TypeError): + tensor_img = paddle.rand((3, 100, 100)) + F.pad(tensor_img, '1') + + with self.assertRaises(TypeError): + tensor_img = paddle.rand((3, 100, 100)) + F.pad(tensor_img, 1, {}) + + with self.assertRaises(TypeError): + tensor_img = paddle.rand((3, 100, 100)) + F.pad(tensor_img, 1, padding_mode=-1) + + with self.assertRaises(ValueError): + tensor_img = paddle.rand((3, 100, 100)) + F.pad(tensor_img, [1.0, 2.0, 3.0]) + with self.assertRaises(ValueError): transforms.RandomRotation(-2) @@ -290,6 +309,159 @@ class TestTransformsPIL(TestTransformsCV2): return 'pil' +class TestTransformsTensor(TestTransformsCV2): + def get_backend(self): + return 'tensor' + + def create_image(self, shape): + return paddle.to_tensor(np.random.rand(*shape)).transpose( + (2, 0, 1)) # hwc->chw + + def do_transform(self, trans): + trans.transforms.insert(0, transforms.ToTensor(data_format='CHW')) + trans.transforms.append(transforms.Transpose(order=(1, 2, 0))) + dataset_folder = DatasetFolder(self.data_dir, transform=trans) + for _ in dataset_folder: + pass + + def test_trans_all(self): + normalize = transforms.Normalize( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.120, 57.375], ) + trans = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + normalize, + ]) + self.do_transform(trans) + + def test_grayscale(self): + trans = transforms.Compose([transforms.Grayscale()]) + self.do_transform(trans) + + trans_gray = transforms.Grayscale() + fake_img = self.create_image((500, 400, 3)) + fake_img_gray = trans_gray(fake_img) + + np.testing.assert_equal(self.get_shape(fake_img_gray)[1], 500) + np.testing.assert_equal(self.get_shape(fake_img_gray)[2], 400) + + trans_gray3 = transforms.Grayscale(3) + fake_img = self.create_image((500, 400, 3)) + fake_img_gray = trans_gray3(fake_img) + + def test_normalize(self): + normalize = transforms.Normalize(mean=0.5, std=0.5) + trans = transforms.Compose([normalize]) + self.do_transform(trans) + + def test_pad(self): + trans = transforms.Compose([transforms.Pad(2)]) + self.do_transform(trans) + + fake_img = self.create_image((200, 150, 3)) + trans_pad = transforms.Compose([transforms.Pad(10)]) + fake_img_padded = trans_pad(fake_img) + np.testing.assert_equal(self.get_shape(fake_img_padded), (3, 220, 170)) + trans_pad1 = transforms.Pad([1, 2]) + trans_pad2 = transforms.Pad([1, 2, 3, 4]) + trans_pad4 = transforms.Pad(1, padding_mode='edge') + img = trans_pad1(fake_img) + img = trans_pad2(img) + img = trans_pad4(img) + + def test_random_crop(self): + trans = transforms.Compose([ + transforms.RandomCrop(200), + transforms.RandomCrop((140, 160)), + ]) + self.do_transform(trans) + + trans_random_crop1 = transforms.RandomCrop(224) + trans_random_crop2 = transforms.RandomCrop((140, 160)) + + fake_img = self.create_image((500, 400, 3)) + fake_img_crop1 = trans_random_crop1(fake_img) + fake_img_crop2 = trans_random_crop2(fake_img_crop1) + + np.testing.assert_equal(self.get_shape(fake_img_crop1), (3, 224, 224)) + + np.testing.assert_equal(self.get_shape(fake_img_crop2), (3, 140, 160)) + + trans_random_crop_same = transforms.RandomCrop((140, 160)) + img = trans_random_crop_same(fake_img_crop2) + + trans_random_crop_bigger = transforms.RandomCrop( + (180, 200), pad_if_needed=True) + img = trans_random_crop_bigger(img) + + trans_random_crop_pad = transforms.RandomCrop((224, 256), 2, True) + img = trans_random_crop_pad(img) + + def test_exception(self): + trans = transforms.Compose([transforms.Resize(-1)]) + + trans_batch = transforms.Compose([transforms.Resize(-1)]) + + with self.assertRaises(Exception): + self.do_transform(trans) + + with self.assertRaises(Exception): + self.do_transform(trans_batch) + + with self.assertRaises(ValueError): + transforms.Pad([1.0, 2.0, 3.0]) + + with self.assertRaises(TypeError): + fake_img = self.create_image((100, 120, 3)) + F.pad(fake_img, '1') + + with self.assertRaises(TypeError): + fake_img = self.create_image((100, 120, 3)) + F.pad(fake_img, 1, {}) + + with self.assertRaises(TypeError): + fake_img = self.create_image((100, 120, 3)) + F.pad(fake_img, 1, padding_mode=-1) + + with self.assertRaises(ValueError): + fake_img = self.create_image((100, 120, 3)) + F.pad(fake_img, [1.0, 2.0, 3.0]) + + with self.assertRaises(TypeError): + tensor_img = paddle.rand((3, 100, 100)) + F.pad(tensor_img, '1') + + with self.assertRaises(TypeError): + tensor_img = paddle.rand((3, 100, 100)) + F.pad(tensor_img, 1, {}) + + with self.assertRaises(TypeError): + tensor_img = paddle.rand((3, 100, 100)) + F.pad(tensor_img, 1, padding_mode=-1) + + with self.assertRaises(ValueError): + tensor_img = paddle.rand((3, 100, 100)) + F.pad(tensor_img, [1.0, 2.0, 3.0]) + + with self.assertRaises(ValueError): + transforms.RandomRotation(-2) + + with self.assertRaises(ValueError): + transforms.RandomRotation([1, 2, 3]) + + with self.assertRaises(ValueError): + trans_gray = transforms.Grayscale(5) + fake_img = self.create_image((100, 120, 3)) + trans_gray(fake_img) + + with self.assertRaises(TypeError): + transform = transforms.RandomResizedCrop(64) + transform(1) + + test_color_jitter = None + + class TestFunctional(unittest.TestCase): def test_errors(self): with self.assertRaises(TypeError): @@ -300,6 +472,14 @@ class TestFunctional(unittest.TestCase): 'uint8')) F.to_tensor(fake_img, data_format=1) + with self.assertRaises(ValueError): + fake_img = paddle.rand((3, 100, 100)) + F.pad(fake_img, 1, padding_mode='symmetric') + + with self.assertRaises(TypeError): + fake_img = paddle.rand((3, 100, 100)) + F.resize(fake_img, {1: 1}) + with self.assertRaises(TypeError): fake_img = Image.fromarray((np.random.rand(28, 28, 3) * 255).astype( 'uint8')) @@ -354,31 +534,50 @@ class TestFunctional(unittest.TestCase): std = [0.5, 0.5, 0.5] normalized_img = F.normalize(tensor_img, mean, std) - normalized_img = F.normalize( + normalized_img_tensor = F.normalize( tensor_img_hwc, mean, std, data_format='HWC') - normalized_img = F.normalize(pil_img, mean, std, data_format='HWC') - normalized_img = F.normalize( + normalized_img_pil = F.normalize(pil_img, mean, std, data_format='HWC') + normalized_img_np = F.normalize( np_img, mean, std, data_format='HWC', to_rgb=True) + np.testing.assert_almost_equal( + np.array(normalized_img_pil), normalized_img_np) + np.testing.assert_almost_equal(normalized_img_tensor.numpy(), + normalized_img_np) + def test_center_crop(self): np_img = (np.random.rand(28, 24, 3)).astype('uint8') pil_img = Image.fromarray(np_img) + tensor_img = F.to_tensor(pil_img, data_format='CHW') np_cropped_img = F.center_crop(np_img, 4) pil_cropped_img = F.center_crop(pil_img, 4) + tensor_cropped_img = F.center_crop(tensor_img, 4) np.testing.assert_almost_equal(np_cropped_img, np.array(pil_cropped_img)) + np.testing.assert_almost_equal(np_cropped_img, + tensor_cropped_img.numpy().transpose( + (1, 2, 0))) def test_pad(self): np_img = (np.random.rand(28, 24, 3)).astype('uint8') pil_img = Image.fromarray(np_img) + tensor_img = F.to_tensor(pil_img, 'CHW') np_padded_img = F.pad(np_img, [1, 2], padding_mode='reflect') pil_padded_img = F.pad(pil_img, [1, 2], padding_mode='reflect') + tensor_padded_img = F.pad(tensor_img, [1, 2], padding_mode='reflect') np.testing.assert_almost_equal(np_padded_img, np.array(pil_padded_img)) + np.testing.assert_almost_equal(np_padded_img, + tensor_padded_img.numpy().transpose( + (1, 2, 0))) + + tensor_padded_img = F.pad(tensor_img, 1, padding_mode='reflect') + tensor_padded_img = F.pad(tensor_img, [1, 2, 1, 2], + padding_mode='reflect') pil_p_img = pil_img.convert('P') pil_padded_img = F.pad(pil_p_img, [1, 2]) @@ -387,12 +586,21 @@ class TestFunctional(unittest.TestCase): def test_resize(self): np_img = (np.zeros([28, 24, 3])).astype('uint8') pil_img = Image.fromarray(np_img) + tensor_img = F.to_tensor(pil_img, 'CHW') np_reseized_img = F.resize(np_img, 40) pil_reseized_img = F.resize(pil_img, 40) + tensor_reseized_img = F.resize(tensor_img, 40) + tensor_reseized_img2 = F.resize(tensor_img, (46, 40)) np.testing.assert_almost_equal(np_reseized_img, np.array(pil_reseized_img)) + np.testing.assert_almost_equal(np_reseized_img, + tensor_reseized_img.numpy().transpose( + (1, 2, 0))) + np.testing.assert_almost_equal(np_reseized_img, + tensor_reseized_img2.numpy().transpose( + (1, 2, 0))) gray_img = (np.zeros([28, 32])).astype('uint8') gray_resize_img = F.resize(gray_img, 40) @@ -447,12 +655,24 @@ class TestFunctional(unittest.TestCase): def test_rotate(self): np_img = (np.random.rand(28, 28, 3) * 255).astype('uint8') pil_img = Image.fromarray(np_img).convert('RGB') - rotated_np_img = F.rotate(np_img, 80, expand=True) rotated_pil_img = F.rotate(pil_img, 80, expand=True) + tensor_img = F.to_tensor(pil_img, 'CHW') + + rotated_tensor_img1 = F.rotate(tensor_img, 80, expand=True) + + rotated_tensor_img2 = F.rotate( + tensor_img, + 80, + interpolation='bilinear', + center=(10, 10), + expand=False) + np.testing.assert_equal(rotated_np_img.shape, np.array(rotated_pil_img).shape) + np.testing.assert_equal(rotated_np_img.shape, + rotated_tensor_img1.transpose((1, 2, 0)).shape) def test_rotate1(self): np_img = (np.random.rand(28, 28, 3) * 255).astype('uint8') diff --git a/python/paddle/vision/image.py b/python/paddle/vision/image.py index 3d5ea3a73af..19986816b7c 100644 --- a/python/paddle/vision/image.py +++ b/python/paddle/vision/image.py @@ -80,9 +80,9 @@ def set_image_backend(backend): shutil.rmtree(temp_dir) """ global _image_backend - if backend not in ['pil', 'cv2']: + if backend not in ['pil', 'cv2', 'tensor']: raise ValueError( - "Expected backend are one of ['pil', 'cv2'], but got {}" + "Expected backend are one of ['pil', 'cv2', 'tensor'], but got {}" .format(backend)) _image_backend = backend @@ -150,13 +150,13 @@ def image_load(path, backend=None): if backend is None: backend = _image_backend - if backend not in ['pil', 'cv2']: + if backend not in ['pil', 'cv2', 'tensor']: raise ValueError( - "Expected backend are one of ['pil', 'cv2'], but got {}" + "Expected backend are one of ['pil', 'cv2', 'tensor'], but got {}" .format(backend)) if backend == 'pil': return Image.open(path) - else: + elif backend == 'cv2': cv2 = try_import('cv2') return cv2.imread(path) diff --git a/python/paddle/vision/transforms/functional.py b/python/paddle/vision/transforms/functional.py index c0e72877ffc..18a35915c99 100644 --- a/python/paddle/vision/transforms/functional.py +++ b/python/paddle/vision/transforms/functional.py @@ -25,13 +25,6 @@ from PIL import Image from numpy import sin, cos, tan import paddle -if sys.version_info < (3, 3): - Sequence = collections.Sequence - Iterable = collections.Iterable -else: - Sequence = collections.abc.Sequence - Iterable = collections.abc.Iterable - from . import functional_pil as F_pil from . import functional_cv2 as F_cv2 from . import functional_tensor as F_t @@ -83,14 +76,18 @@ def to_tensor(pic, data_format='CHW'): print(tensor.shape) """ - if not (_is_pil_image(pic) or _is_numpy_image(pic)): - raise TypeError('pic should be PIL Image or ndarray. Got {}'.format( - type(pic))) + if not (_is_pil_image(pic) or _is_numpy_image(pic) or + _is_tensor_image(pic)): + raise TypeError( + 'pic should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. + format(type(pic))) if _is_pil_image(pic): return F_pil.to_tensor(pic, data_format) - else: + elif _is_numpy_image(pic): return F_cv2.to_tensor(pic, data_format) + else: + return pic if data_format.lower() == 'chw' else pic.transpose((1, 2, 0)) def resize(img, size, interpolation='bilinear'): @@ -135,13 +132,16 @@ def resize(img, size, interpolation='bilinear'): converted_img = F.resize(fake_img, (200, 150)) print(converted_img.size) """ - if not (_is_pil_image(img) or _is_numpy_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) or + _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or ndarray with dim=[2 or 3]. Got {}'. + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. format(type(img))) if _is_pil_image(img): return F_pil.resize(img, size, interpolation) + elif _is_tensor_image(img): + return F_t.resize(img, size, interpolation) else: return F_cv2.resize(img, size, interpolation) @@ -196,13 +196,16 @@ def pad(img, padding, fill=0, padding_mode='constant'): padded_img = F.pad(fake_img, padding=(2, 1)) print(padded_img.size) """ - if not (_is_pil_image(img) or _is_numpy_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) or + _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or ndarray with dim=[2 or 3]. Got {}'. + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. format(type(img))) if _is_pil_image(img): return F_pil.pad(img, padding, fill, padding_mode) + elif _is_tensor_image(img): + return F_t.pad(img, padding, fill, padding_mode) else: return F_cv2.pad(img, padding, fill, padding_mode) @@ -236,13 +239,16 @@ def crop(img, top, left, height, width): print(cropped_img.size) """ - if not (_is_pil_image(img) or _is_numpy_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) or + _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or ndarray with dim=[2 or 3]. Got {}'. + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. format(type(img))) if _is_pil_image(img): return F_pil.crop(img, top, left, height, width) + elif _is_tensor_image(img): + return F_t.crop(img, top, left, height, width) else: return F_cv2.crop(img, top, left, height, width) @@ -272,13 +278,16 @@ def center_crop(img, output_size): cropped_img = F.center_crop(fake_img, (150, 100)) print(cropped_img.size) """ - if not (_is_pil_image(img) or _is_numpy_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) or + _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or ndarray with dim=[2 or 3]. Got {}'. + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. format(type(img))) if _is_pil_image(img): return F_pil.center_crop(img, output_size) + elif _is_tensor_image(img): + return F_t.center_crop(img, output_size) else: return F_cv2.center_crop(img, output_size) @@ -307,13 +316,16 @@ def hflip(img): print(flpped_img.size) """ - if not (_is_pil_image(img) or _is_numpy_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) or + _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or ndarray with dim=[2 or 3]. Got {}'. + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. format(type(img))) if _is_pil_image(img): return F_pil.hflip(img) + elif _is_tensor_image(img): + return F_t.hflip(img) else: return F_cv2.hflip(img) @@ -342,13 +354,16 @@ def vflip(img): print(flpped_img.size) """ - if not (_is_pil_image(img) or _is_numpy_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) or + _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or ndarray with dim=[2 or 3]. Got {}'. + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. format(type(img))) if _is_pil_image(img): return F_pil.vflip(img) + elif _is_tensor_image(img): + return F_t.vflip(img) else: return F_cv2.vflip(img) @@ -563,9 +578,10 @@ def rotate(img, print(rotated_img.size) """ - if not (_is_pil_image(img) or _is_numpy_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) or + _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or ndarray with dim=[2 or 3]. Got {}'. + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. format(type(img))) if isinstance(center, list): @@ -575,6 +591,8 @@ def rotate(img, if _is_pil_image(img): return F_pil.rotate(img, angle, interpolation, expand, center, fill) + elif _is_tensor_image(img): + return F_t.rotate(img, angle, interpolation, expand, center, fill) else: return F_cv2.rotate(img, angle, interpolation, expand, center, fill) @@ -606,13 +624,16 @@ def to_grayscale(img, num_output_channels=1): print(gray_img.size) """ - if not (_is_pil_image(img) or _is_numpy_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) or + _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or ndarray with dim=[2 or 3]. Got {}'. + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. format(type(img))) if _is_pil_image(img): return F_pil.to_grayscale(img, num_output_channels) + elif _is_tensor_image(img): + return F_t.to_grayscale(img, num_output_channels) else: return F_cv2.to_grayscale(img, num_output_channels) diff --git a/python/paddle/vision/transforms/functional_tensor.py b/python/paddle/vision/transforms/functional_tensor.py index e8b70820dd9..7f490d57916 100644 --- a/python/paddle/vision/transforms/functional_tensor.py +++ b/python/paddle/vision/transforms/functional_tensor.py @@ -14,11 +14,78 @@ from __future__ import division +import math +import numbers + import paddle +import paddle.nn.functional as F + +import sys +import collections + + +def _assert_image_tensor(img, data_format): + if not isinstance( + img, paddle.Tensor) or img.ndim != 3 or not data_format.lower() in ( + 'chw', 'hwc'): + raise RuntimeError( + 'not support [type={}, ndim={}, data_format={}] paddle image'. + format(type(img), img.ndim, data_format)) + + +def _get_image_h_axis(data_format): + if data_format.lower() == 'chw': + return -2 + elif data_format.lower() == 'hwc': + return -3 + + +def _get_image_w_axis(data_format): + if data_format.lower() == 'chw': + return -1 + elif data_format.lower() == 'hwc': + return -2 + + +def _get_image_c_axis(data_format): + if data_format.lower() == 'chw': + return -3 + elif data_format.lower() == 'hwc': + return -1 + + +def _get_image_n_axis(data_format): + if len(data_format) == 3: + return None + elif len(data_format) == 4: + return 0 + + +def _is_channel_last(data_format): + return _get_image_c_axis(data_format) == -1 + + +def _is_channel_first(data_format): + return _get_image_c_axis(data_format) == -3 + + +def _get_image_num_batches(img, data_format): + if _get_image_n_axis(data_format): + return img.shape[_get_image_n_axis(data_format)] + return None + + +def _get_image_num_channels(img, data_format): + return img.shape[_get_image_c_axis(data_format)] + + +def _get_image_size(img, data_format): + return img.shape[_get_image_w_axis(data_format)], img.shape[ + _get_image_h_axis(data_format)] def normalize(img, mean, std, data_format='CHW'): - """Normalizes a tensor image with mean and standard deviation. + """Normalizes a tensor image given mean and standard deviation. Args: img (paddle.Tensor): input data to be normalized. @@ -31,10 +98,417 @@ def normalize(img, mean, std, data_format='CHW'): Tensor: Normalized mage. """ - if data_format == 'CHW': - mean = paddle.to_tensor(mean).reshape([-1, 1, 1]) - std = paddle.to_tensor(std).reshape([-1, 1, 1]) - else: - mean = paddle.to_tensor(mean) - std = paddle.to_tensor(std) + _assert_image_tensor(img, data_format) + + mean = paddle.to_tensor(mean, place=img.place) + std = paddle.to_tensor(std, place=img.place) + + if _is_channel_first(data_format): + mean = mean.reshape([-1, 1, 1]) + std = std.reshape([-1, 1, 1]) + return (img - mean) / std + + +def to_grayscale(img, num_output_channels=1, data_format='CHW'): + """Converts image to grayscale version of image. + + Args: + img (paddel.Tensor): Image to be converted to grayscale. + num_output_channels (int, optionl[1, 3]): + if num_output_channels = 1 : returned image is single channel + if num_output_channels = 3 : returned image is 3 channel + data_format (str, optional): Data format of img, should be 'HWC' or + 'CHW'. Default: 'CHW'. + + Returns: + paddle.Tensor: Grayscale version of the image. + """ + _assert_image_tensor(img, data_format) + + if num_output_channels not in (1, 3): + raise ValueError('num_output_channels should be either 1 or 3') + + rgb_weights = paddle.to_tensor( + [0.2989, 0.5870, 0.1140], place=img.place).astype(img.dtype) + + if _is_channel_first(data_format): + rgb_weights = rgb_weights.reshape((-1, 1, 1)) + + _c_index = _get_image_c_axis(data_format) + + img = (img * rgb_weights).sum(axis=_c_index, keepdim=True) + _shape = img.shape + _shape[_c_index] = num_output_channels + + return img.expand(_shape) + + +def _affine_grid(theta, w, h, ow, oh): + d = 0.5 + base_grid = paddle.ones((1, oh, ow, 3), dtype=theta.dtype) + + x_grid = paddle.linspace(-ow * 0.5 + d, ow * 0.5 + d - 1, ow) + base_grid[..., 0] = x_grid + y_grid = paddle.linspace(-oh * 0.5 + d, oh * 0.5 + d - 1, oh).unsqueeze_(-1) + base_grid[..., 1] = y_grid + + scaled_theta = theta.transpose( + (0, 2, 1)) / paddle.to_tensor([0.5 * w, 0.5 * h]) + output_grid = base_grid.reshape((1, oh * ow, 3)).bmm(scaled_theta) + + return output_grid.reshape((1, oh, ow, 2)) + + +def _grid_transform(img, grid, mode, fill): + if img.shape[0] > 1: + grid = grid.expand(img.shape[0], grid.shape[1], grid.shape[2], + grid.shape[3]) + + if fill is not None: + dummy = paddle.ones( + (img.shape[0], 1, img.shape[2], img.shape[3]), dtype=img.dtype) + img = paddle.concat((img, dummy), axis=1) + + img = F.grid_sample( + img, grid, mode=mode, padding_mode="zeros", align_corners=False) + + # Fill with required color + if fill is not None: + mask = img[:, -1:, :, :] # n 1 h w + img = img[:, :-1, :, :] # n c h w + mask = mask.expand_as(img) + len_fill = len(fill) if isinstance(fill, (tuple, list)) else 1 + fill_img = paddle.to_tensor(fill).reshape( + (1, len_fill, 1, 1)).expand_as(img) + + if mode == 'nearest': + mask = paddle.cast(mask < 0.5, img.dtype) + img = img * (1. - mask) + mask * fill_img + else: # 'bilinear' + img = img * mask + (1.0 - mask) * fill_img + + return img + + +def rotate(img, + angle, + interpolation='nearest', + expand=False, + center=None, + fill=None, + data_format='CHW'): + """Rotates the image by angle. + + Args: + img (paddle.Tensor): Image to be rotated. + angle (float or int): In degrees degrees counter clockwise order. + interpolation (str, optional): Interpolation method. If omitted, or if the + image has only one channel, it is set NEAREST . when use pil backend, + support method are as following: + - "nearest" + - "bilinear" + - "bicubic" + expand (bool, optional): Optional expansion flag. + If true, expands the output image to make it large enough to hold the entire rotated image. + If false or omitted, make the output image the same size as the input image. + Note that the expand flag assumes rotation around the center and no translation. + center (2-tuple, optional): Optional center of rotation. + Origin is the upper left corner. + Default is the center of the image. + fill (3-tuple or int): RGB pixel fill value for area outside the rotated image. + If int, it is used for all channels respectively. + + Returns: + paddle.Tensor: Rotated image. + + """ + + angle = -angle % 360 + img = img.unsqueeze(0) + + # n, c, h, w = img.shape + w, h = _get_image_size(img, data_format=data_format) + + img = img if data_format.lower() == 'chw' else img.transpose((0, 3, 1, 2)) + + post_trans = [0, 0] + + if center is None: + rotn_center = [0, 0] + else: + rotn_center = [(p - s * 0.5) for p, s in zip(center, [w, h])] + + angle = math.radians(angle) + matrix = [ + math.cos(angle), + math.sin(angle), + 0.0, + -math.sin(angle), + math.cos(angle), + 0.0, + ] + + matrix[2] += matrix[0] * (-rotn_center[0] - post_trans[0]) + matrix[1] * ( + -rotn_center[1] - post_trans[1]) + matrix[5] += matrix[3] * (-rotn_center[0] - post_trans[0]) + matrix[4] * ( + -rotn_center[1] - post_trans[1]) + + matrix[2] += rotn_center[0] + matrix[5] += rotn_center[1] + + matrix = paddle.to_tensor(matrix, place=img.place) + matrix = matrix.reshape((1, 2, 3)) + + if expand: + # calculate output size + corners = paddle.to_tensor( + [[-0.5 * w, -0.5 * h, 1.0], [-0.5 * w, 0.5 * h, 1.0], + [0.5 * w, 0.5 * h, 1.0], [0.5 * w, -0.5 * h, 1.0]], + place=matrix.place).astype(matrix.dtype) + + _pos = corners.reshape( + (1, -1, 3)).bmm(matrix.transpose((0, 2, 1))).reshape((1, -1, 2)) + _min = _pos.min(axis=-2).floor() + _max = _pos.max(axis=-2).ceil() + + npos = _max - _min + nw = npos[0][0] + nh = npos[0][1] + + ow, oh = int(nw.numpy()[0]), int(nh.numpy()[0]) + + else: + ow, oh = w, h + + grid = _affine_grid(matrix, w, h, ow, oh) + + out = _grid_transform(img, grid, mode=interpolation, fill=fill) + + out = out if data_format.lower() == 'chw' else out.transpose((0, 2, 3, 1)) + + return out.squeeze(0) + + +def vflip(img, data_format='CHW'): + """Vertically flips the given paddle tensor. + + Args: + img (paddle.Tensor): Image to be flipped. + data_format (str, optional): Data format of img, should be 'HWC' or + 'CHW'. Default: 'CHW'. + + Returns: + paddle.Tensor: Vertically flipped image. + + """ + _assert_image_tensor(img, data_format) + + h_axis = _get_image_h_axis(data_format) + + return img.flip(axis=[h_axis]) + + +def hflip(img, data_format='CHW'): + """Horizontally flips the given paddle.Tensor Image. + + Args: + img (paddle.Tensor): Image to be flipped. + data_format (str, optional): Data format of img, should be 'HWC' or + 'CHW'. Default: 'CHW'. + + Returns: + paddle.Tensor: Horizontall flipped image. + + """ + _assert_image_tensor(img, data_format) + + w_axis = _get_image_w_axis(data_format) + + return img.flip(axis=[w_axis]) + + +def crop(img, top, left, height, width, data_format='CHW'): + """Crops the given paddle.Tensor Image. + + Args: + img (paddle.Tensor): Image to be cropped. (0,0) denotes the top left + corner of the image. + top (int): Vertical component of the top left corner of the crop box. + left (int): Horizontal component of the top left corner of the crop box. + height (int): Height of the crop box. + width (int): Width of the crop box. + data_format (str, optional): Data format of img, should be 'HWC' or + 'CHW'. Default: 'CHW'. + Returns: + paddle.Tensor: Cropped image. + + """ + _assert_image_tensor(img, data_format) + + if _is_channel_first(data_format): + return img[:, top:top + height, left:left + width] + else: + return img[top:top + height, left:left + width, :] + + +def center_crop(img, output_size, data_format='CHW'): + """Crops the given paddle.Tensor Image and resize it to desired size. + + Args: + img (paddle.Tensor): Image to be cropped. (0,0) denotes the top left corner of the image. + output_size (sequence or int): (height, width) of the crop box. If int, + it is used for both directions + data_format (str, optional): Data format of img, should be 'HWC' or + 'CHW'. Default: 'CHW'. + Returns: + paddle.Tensor: Cropped image. + + """ + _assert_image_tensor(img, data_format) + + if isinstance(output_size, numbers.Number): + output_size = (int(output_size), int(output_size)) + + image_width, image_height = _get_image_size(img, data_format) + crop_height, crop_width = output_size + crop_top = int(round((image_height - crop_height) / 2.)) + crop_left = int(round((image_width - crop_width) / 2.)) + return crop( + img, + crop_top, + crop_left, + crop_height, + crop_width, + data_format=data_format) + + +def pad(img, padding, fill=0, padding_mode='constant', data_format='CHW'): + """ + Pads the given paddle.Tensor on all sides with specified padding mode and fill value. + + Args: + img (paddle.Tensor): Image to be padded. + padding (int|list|tuple): Padding on each border. If a single int is provided this + is used to pad all borders. If tuple of length 2 is provided this is the padding + on left/right and top/bottom respectively. If a tuple of length 4 is provided + this is the padding for the left, top, right and bottom borders + respectively. + fill (float, optional): Pixel fill value for constant fill. If a tuple of + length 3, it is used to fill R, G, B channels respectively. + This value is only used when the padding_mode is constant. Default: 0. + padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default: 'constant'. + + - constant: pads with a constant value, this value is specified with fill + + - edge: pads with the last value on the edge of the image + + - reflect: pads with reflection of image (without repeating the last value on the edge) + + padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode + will result in [3, 2, 1, 2, 3, 4, 3, 2] + + - symmetric: pads with reflection of image (repeating the last value on the edge) + + padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode + will result in [2, 1, 1, 2, 3, 4, 4, 3] + + Returns: + paddle.Tensor: Padded image. + + """ + _assert_image_tensor(img, data_format) + + if not isinstance(padding, (numbers.Number, list, tuple)): + raise TypeError('Got inappropriate padding arg') + if not isinstance(fill, (numbers.Number, str, list, tuple)): + raise TypeError('Got inappropriate fill arg') + if not isinstance(padding_mode, str): + raise TypeError('Got inappropriate padding_mode arg') + + if isinstance(padding, (list, tuple)) and len(padding) not in [2, 4]: + raise ValueError( + "Padding must be an int or a 2, or 4 element tuple, not a " + + "{} element tuple".format(len(padding))) + + assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'], \ + 'Padding mode should be either constant, edge, reflect or symmetric' + + if isinstance(padding, int): + pad_left = pad_right = pad_top = pad_bottom = padding + elif len(padding) == 2: + pad_left = pad_right = padding[0] + pad_top = pad_bottom = padding[1] + else: + pad_left = padding[0] + pad_top = padding[1] + pad_right = padding[2] + pad_bottom = padding[3] + + padding = [pad_left, pad_right, pad_top, pad_bottom] + + if padding_mode == 'edge': + padding_mode = 'replicate' + elif padding_mode == 'symmetric': + raise ValueError('Do not support symmetric mdoe') + + img = img.unsqueeze(0) + # 'constant', 'reflect', 'replicate', 'circular' + img = F.pad(img, + pad=padding, + mode=padding_mode, + value=float(fill), + data_format='N' + data_format) + + return img.squeeze(0) + + +def resize(img, size, interpolation='bilinear', data_format='CHW'): + """ + Resizes the image to given size + + Args: + input (paddle.Tensor): Image to be resized. + size (int|list|tuple): Target size of input data, with (height, width) shape. + interpolation (int|str, optional): Interpolation method. when use paddle backend, + support method are as following: + - "nearest" + - "bilinear" + - "bicubic" + - "trilinear" + - "area" + - "linear" + data_format (str, optional): paddle.Tensor format + - 'CHW' + - 'HWC' + Returns: + paddle.Tensor: Resized image. + + """ + _assert_image_tensor(img, data_format) + + if not (isinstance(size, int) or + (isinstance(size, (tuple, list)) and len(size) == 2)): + raise TypeError('Got inappropriate size arg: {}'.format(size)) + + if isinstance(size, int): + w, h = _get_image_size(img, data_format) + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + else: + oh = size + ow = int(size * w / h) + else: + oh, ow = size + + img = img.unsqueeze(0) + img = F.interpolate( + img, + size=(oh, ow), + mode=interpolation.lower(), + data_format='N' + data_format.upper()) + + return img.squeeze(0) diff --git a/python/paddle/vision/transforms/transforms.py b/python/paddle/vision/transforms/transforms.py index 6eeb726fcee..00e12689c4d 100644 --- a/python/paddle/vision/transforms/transforms.py +++ b/python/paddle/vision/transforms/transforms.py @@ -49,6 +49,8 @@ def _get_image_size(img): return img.size elif F._is_numpy_image(img): return img.shape[:2][::-1] + elif F._is_tensor_image(img): + return img.shape[1:][::-1] # chw else: raise TypeError("Unexpected type {}".format(type(img))) @@ -690,6 +692,9 @@ class Transpose(BaseTransform): self.order = order def _apply_image(self, img): + if F._is_tensor_image(img): + return img.transpose(self.order) + if F._is_pil_image(img): img = np.asarray(img) -- GitLab From 109fdf142835b9ea4553442e51231414cccf0d49 Mon Sep 17 00:00:00 2001 From: XiangGao Date: Fri, 30 Apr 2021 15:19:29 +0800 Subject: [PATCH 070/720] add flag to check_kernel launch (#32692) --- paddle/fluid/framework/op_registry.h | 13 +++++++++---- paddle/fluid/platform/flags.cc | 13 +++++++++++++ paddle/fluid/pybind/global_value_getter_setter.cc | 3 ++- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h index 9f0dc50774a..593d4d839fa 100644 --- a/paddle/fluid/framework/op_registry.h +++ b/paddle/fluid/framework/op_registry.h @@ -25,7 +25,8 @@ limitations under the License. */ #include #define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h -#include "glog/logging.h" // For VLOG() +#include "gflags/gflags.h" +#include "glog/logging.h" // For VLOG() #include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/details/op_registry.h" #include "paddle/fluid/framework/grad_op_desc_maker.h" @@ -67,6 +68,8 @@ class Version; } // namespace framework } // namespace paddle +DECLARE_bool(check_kernel_launch); + namespace paddle { namespace framework { @@ -135,14 +138,16 @@ class OpRegistry { }; template -inline void CheckKernelLaunch(const char* op_type){}; +inline void CheckKernelLaunch(const char* op_type) {} #ifdef PADDLE_WITH_CUDA template <> inline void CheckKernelLaunch<::paddle::platform::CUDAPlace>( const char* op_type) { - PADDLE_ENFORCE_CUDA_LAUNCH_SUCCESS(op_type); -}; + if (FLAGS_check_kernel_launch) { + PADDLE_ENFORCE_CUDA_LAUNCH_SUCCESS(op_type); + } +} #endif template diff --git a/paddle/fluid/platform/flags.cc b/paddle/fluid/platform/flags.cc index 83b9544d232..1d76c2ea584 100644 --- a/paddle/fluid/platform/flags.cc +++ b/paddle/fluid/platform/flags.cc @@ -578,6 +578,19 @@ DEFINE_string(tracer_mkldnn_ops_on, "", DEFINE_string(tracer_mkldnn_ops_off, "", "List of OneDNN operation types to be turned off"); +/** + * Debug related FLAG + * Name: check_kernel_launch + * Since Version: 2.1.0 + * Value Range: bool, default=false + * Example: + * Note: Check kernel launch status after every kernel compute. + */ +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +DEFINE_bool(check_kernel_launch, false, + "Check kernel launch status after every kernel compute"); +#endif + /** * CUDNN related FLAG * Name: conv2d_disable_cudnn diff --git a/paddle/fluid/pybind/global_value_getter_setter.cc b/paddle/fluid/pybind/global_value_getter_setter.cc index bc8d1e5b405..4824a34e843 100644 --- a/paddle/fluid/pybind/global_value_getter_setter.cc +++ b/paddle/fluid/pybind/global_value_getter_setter.cc @@ -41,6 +41,7 @@ DECLARE_int32(multiple_of_cupti_buffer_size); DECLARE_bool(reader_queue_speed_test_mode); DECLARE_int32(call_stack_level); DECLARE_bool(sort_sum_gradient); +DECLARE_bool(check_kernel_launch); // device management DECLARE_int32(paddle_num_threads); // executor @@ -376,7 +377,7 @@ static void RegisterGlobalVarGetterSetter() { FLAGS_fraction_of_gpu_memory_to_use, FLAGS_initial_gpu_memory_in_mb, FLAGS_reallocate_gpu_memory_in_mb, FLAGS_enable_cublas_tensor_op_math, FLAGS_selected_gpus, FLAGS_sync_nccl_allreduce, - FLAGS_conv2d_disable_cudnn); + FLAGS_conv2d_disable_cudnn, FLAGS_check_kernel_launch); #endif #ifdef PADDLE_WITH_XPU REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_selected_xpus); -- GitLab From 4d95c8c7a1af422a88ea1ca46d763fe6ae5a7ebd Mon Sep 17 00:00:00 2001 From: Feiyu Chan Date: Fri, 30 Apr 2021 15:29:03 +0800 Subject: [PATCH 071/720] avoid polluting logging's root logger (#32673) avoid polluting logging's root logger --- .../meta_optimizers/sharding_optimizer.py | 89 ++++++++++--------- .../distributed/fleet/utils/recompute.py | 11 ++- .../fluid/incubate/fleet/utils/utils.py | 7 +- .../utils/cpp_extension/extension_utils.py | 9 +- 4 files changed, 64 insertions(+), 52 deletions(-) diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py index 852421523b1..db6925ace5a 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py @@ -29,9 +29,12 @@ from paddle.fluid.framework import Program, Variable, name_scope, default_main_p from paddle.fluid import layers import logging -logging.basicConfig( - format='%(asctime)s %(levelname)-8s %(message)s', - datefmt='%Y-%m-%d %H:%M:%S') +logger = logging.getLogger(__name__) +formatter = logging.Formatter( + fmt='%(asctime)s %(levelname)-8s %(message)s', datefmt='%Y-%m-%d %H:%M:%S') +ch = logging.StreamHandler() +ch.setFormatter(formatter) +logger.addHandler(ch) from functools import reduce __all__ = ["ShardingOptimizer"] @@ -136,7 +139,7 @@ class ShardingOptimizer(MetaOptimizerBase): # FIXME (JZ-LIANG) deprecated hybrid_dp if self.user_defined_strategy.sharding_configs["hybrid_dp"]: - logging.warning( + logger.warning( "[hybrid_dp] API setting is deprecated. Now when dp_degree >= 2, its will be in hybrid dp mode automatically" ) assert self.dp_degree >= 1 @@ -174,7 +177,7 @@ class ShardingOptimizer(MetaOptimizerBase): self._gradient_merge_acc_step = self.user_defined_strategy.pipeline_configs[ 'accumulate_steps'] if self._gradient_merge_acc_step > 1: - logging.info("Gradient merge in [{}], acc step = [{}]".format( + logger.info("Gradient merge in [{}], acc step = [{}]".format( self.gradient_merge_mode, self._gradient_merge_acc_step)) # optimize offload @@ -338,7 +341,7 @@ class ShardingOptimizer(MetaOptimizerBase): # opt offload should be enable while gradient merge is enable && acc_step is quite large (e.g. >> 100) # sync its memcpy could not be overlap with calc, otherwise it will slower down training severely. if self.optimize_offload: - logging.info("Sharding with optimize offload !") + logger.info("Sharding with optimize offload !") offload_helper = OffloadHelper() offload_helper.offload(main_block, startup_block) offload_helper.offload_fp32param(main_block, startup_block) @@ -641,15 +644,15 @@ class ShardingOptimizer(MetaOptimizerBase): for varname in sorted( var2broadcast_time, key=var2broadcast_time.get, reverse=True): - logging.info("Sharding broadcast: [{}] times [{}]".format( + logger.info("Sharding broadcast: [{}] times [{}]".format( var2broadcast_time[varname], varname)) for idx_ in range(len(self._segments)): - logging.info("segment [{}] :".format(idx_)) - logging.info("start op: [{}] [{}]".format(block.ops[ + logger.info("segment [{}] :".format(idx_)) + logger.info("start op: [{}] [{}]".format(block.ops[ self._segments[idx_]._start_idx].desc.type(), block.ops[ self._segments[idx_]._start_idx].desc.input_arg_names( ))) - logging.info("end op: [{}] [{}]".format(block.ops[ + logger.info("end op: [{}] [{}]".format(block.ops[ self._segments[idx_]._end_idx].desc.type(), block.ops[ self._segments[idx_]._end_idx].desc.input_arg_names())) return @@ -1108,7 +1111,7 @@ class ShardingOptimizer(MetaOptimizerBase): self.dp_group_endpoints.append(self.global_endpoints[ dp_first_rank_idx + dp_offset * i]) assert self.current_endpoint in self.dp_group_endpoints - logging.info("Hybrid DP mode turn on !") + logger.info("Hybrid DP mode turn on !") else: self.dp_ring_id = -1 self.dp_rank = -1 @@ -1119,40 +1122,40 @@ class ShardingOptimizer(MetaOptimizerBase): # NOTE (JZ-LIANG) when use global ring for calc global norm and dp_degree > 1, the allreduce result should be devided by dp_degree self.global_ring_id = 3 - logging.info("global word size: {}".format(self.global_word_size)) - logging.info("global rank: {}".format(self.global_rank)) - logging.info("global endpoints: {}".format(self.global_endpoints)) - logging.info("global ring id: {}".format(self.global_ring_id)) - logging.info("#####" * 6) - - logging.info("mp group size: {}".format(self.mp_degree)) - logging.info("mp rank: {}".format(self.mp_rank)) - logging.info("mp group id: {}".format(self.mp_group_id)) - logging.info("mp group endpoints: {}".format(self.mp_group_endpoints)) - logging.info("mp ring id: {}".format(self.mp_ring_id)) - logging.info("#####" * 6) - - logging.info("sharding group size: {}".format(self.sharding_degree)) - logging.info("sharding rank: {}".format(self.sharding_rank)) - logging.info("sharding group id: {}".format(self.sharding_group_id)) - logging.info("sharding group endpoints: {}".format( + logger.info("global word size: {}".format(self.global_word_size)) + logger.info("global rank: {}".format(self.global_rank)) + logger.info("global endpoints: {}".format(self.global_endpoints)) + logger.info("global ring id: {}".format(self.global_ring_id)) + logger.info("#####" * 6) + + logger.info("mp group size: {}".format(self.mp_degree)) + logger.info("mp rank: {}".format(self.mp_rank)) + logger.info("mp group id: {}".format(self.mp_group_id)) + logger.info("mp group endpoints: {}".format(self.mp_group_endpoints)) + logger.info("mp ring id: {}".format(self.mp_ring_id)) + logger.info("#####" * 6) + + logger.info("sharding group size: {}".format(self.sharding_degree)) + logger.info("sharding rank: {}".format(self.sharding_rank)) + logger.info("sharding group id: {}".format(self.sharding_group_id)) + logger.info("sharding group endpoints: {}".format( self.sharding_group_endpoints)) - logging.info("sharding ring id: {}".format(self.sharding_ring_id)) - logging.info("#####" * 6) - - logging.info("pp group size: {}".format(self.pp_degree)) - logging.info("pp rank: {}".format(self.pp_rank)) - logging.info("pp group id: {}".format(self.pp_group_id)) - logging.info("pp group endpoints: {}".format(self.pp_group_endpoints)) - logging.info("pp ring id: {}".format(self.pp_ring_id)) - logging.info("#####" * 6) - - logging.info("pure dp group size: {}".format(self.dp_degree)) - logging.info("pure dp rank: {}".format(self.dp_rank)) - logging.info("pure dp group endpoints: {}".format( + logger.info("sharding ring id: {}".format(self.sharding_ring_id)) + logger.info("#####" * 6) + + logger.info("pp group size: {}".format(self.pp_degree)) + logger.info("pp rank: {}".format(self.pp_rank)) + logger.info("pp group id: {}".format(self.pp_group_id)) + logger.info("pp group endpoints: {}".format(self.pp_group_endpoints)) + logger.info("pp ring id: {}".format(self.pp_ring_id)) + logger.info("#####" * 6) + + logger.info("pure dp group size: {}".format(self.dp_degree)) + logger.info("pure dp rank: {}".format(self.dp_rank)) + logger.info("pure dp group endpoints: {}".format( self.dp_group_endpoints)) - logging.info("pure dp ring id: {}".format(self.dp_ring_id)) - logging.info("#####" * 6) + logger.info("pure dp ring id: {}".format(self.dp_ring_id)) + logger.info("#####" * 6) return diff --git a/python/paddle/distributed/fleet/utils/recompute.py b/python/paddle/distributed/fleet/utils/recompute.py index 0dc305ec77d..d61c3cfd1e5 100644 --- a/python/paddle/distributed/fleet/utils/recompute.py +++ b/python/paddle/distributed/fleet/utils/recompute.py @@ -19,9 +19,12 @@ from paddle.fluid import framework import contextlib import logging -logging.basicConfig( - format='%(asctime)s %(levelname)-8s %(message)s', - datefmt='%Y-%m-%d %H:%M:%S') +logger = logging.getLogger(__name__) +formatter = logging.Formatter( + fmt='%(asctime)s %(levelname)-8s %(message)s', datefmt='%Y-%m-%d %H:%M:%S') +ch = logging.StreamHandler() +ch.setFormatter(formatter) +logger.addHandler(ch) def detach_variable(inputs): @@ -40,7 +43,7 @@ def detach_variable(inputs): def check_recompute_necessary(inputs): if not any(input_.stop_gradient == False for input_ in inputs if isinstance(input_, paddle.Tensor)): - logging.warn( + logger.warn( "[Recompute]: None of the inputs to current recompute block need grad, " "therefore there is NO need to recompute this block in backward !") diff --git a/python/paddle/fluid/incubate/fleet/utils/utils.py b/python/paddle/fluid/incubate/fleet/utils/utils.py index 79f3fb91934..5cb4948a859 100644 --- a/python/paddle/fluid/incubate/fleet/utils/utils.py +++ b/python/paddle/fluid/incubate/fleet/utils/utils.py @@ -34,9 +34,12 @@ __all__ = [ "graphviz" ] -logging.basicConfig( - format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO) logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +formatter = logging.Formatter(fmt='%(asctime)s - %(levelname)s - %(message)s') +ch = logging.StreamHandler() +ch.setFormatter(formatter) +logger.addHandler(ch) persistable_vars_out_fn = "vars_persistable.log" all_vars_out_fn = "vars_all.log" diff --git a/python/paddle/utils/cpp_extension/extension_utils.py b/python/paddle/utils/cpp_extension/extension_utils.py index aa5a7ab533a..c055084886c 100644 --- a/python/paddle/utils/cpp_extension/extension_utils.py +++ b/python/paddle/utils/cpp_extension/extension_utils.py @@ -32,9 +32,12 @@ from ...fluid import core from ...fluid.framework import OpProtoHolder from ...sysconfig import get_include, get_lib -logging.basicConfig( - format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO) logger = logging.getLogger("utils.cpp_extension") +logger.setLevel(logging.INFO) +formatter = logging.Formatter(fmt='%(asctime)s - %(levelname)s - %(message)s') +ch = logging.StreamHandler() +ch.setFormatter(formatter) +logger.addHandler(ch) OS_NAME = sys.platform IS_WINDOWS = OS_NAME.startswith('win') @@ -1125,4 +1128,4 @@ def log_v(info, verbose=True): Print log information on stdout. """ if verbose: - logging.info(info) + logger.info(info) -- GitLab From 0a0f3244fe8de2c869c43987bcf2050b799467bd Mon Sep 17 00:00:00 2001 From: zlsh80826 Date: Fri, 30 Apr 2021 17:48:38 +0800 Subject: [PATCH 072/720] loose affine channel fp16 atol (#32581) --- .../tests/unittests/ir/inference/test_trt_affine_channel_op.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_affine_channel_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_affine_channel_op.py index 8bbba7c8b55..90cdf784b1f 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_affine_channel_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_affine_channel_op.py @@ -70,7 +70,7 @@ class TRTAffineChannelTest(InferencePassTest): use_gpu = True atol = 1e-5 if self.trt_parameters.precision == AnalysisConfig.Precision.Half: - atol = 1e-3 + atol = 2e-2 self.check_output_with_option(use_gpu, atol, flatten=True) self.assertTrue( PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) -- GitLab From 002681942fec43b24e49bde71dd82954666f4e02 Mon Sep 17 00:00:00 2001 From: Huihuang Zheng Date: Fri, 30 Apr 2021 18:04:31 +0800 Subject: [PATCH 073/720] [Dy2stat] Fix to_tensor Bug Reported from QA (#32701) Dy2stat failed when user writes return paddle.to_tensor(xxx), the reason is that visit_Expr doesn't work when the Expr is in return. Some other statements may trigger same bug. To fix it, we re-wrote a transformer to transform paddle.to_tensor to paddle.assign for all Call nodes. --- .../basic_api_transformer.py | 33 +++++++++++++++---- .../test_basic_api_transformation.py | 6 ++-- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/basic_api_transformer.py b/python/paddle/fluid/dygraph/dygraph_to_static/basic_api_transformer.py index 198c2920eec..5ea1fdfac09 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/basic_api_transformer.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/basic_api_transformer.py @@ -33,10 +33,11 @@ class BasicApiTransformer(gast.NodeTransformer): self.root = wrapper_root.node self.class_node_dict = {} - self.name_to_tensor_shape = {} - def transform(self): + to_tensor_transformer = ToTensorTransformer(self.root) + to_tensor_transformer.transform() self.visit(self.root) + return self.wrapper_root def visit_Assign(self, node): @@ -62,11 +63,6 @@ class BasicApiTransformer(gast.NodeTransformer): def _visit_Call(self, node): assert isinstance(node, gast.Call) - # Replace API `to_variable` with `fluid.layers.assign` - if is_to_variable(node): - node = to_assign_node(node) - return node - func_name = astor.to_source(gast.gast_to_ast(node.func)) if self._is_dygraph_forward(func_name): @@ -102,6 +98,29 @@ class BasicApiTransformer(gast.NodeTransformer): return False +class ToTensorTransformer(gast.NodeTransformer): + """ + Class to transform paddle.to_tensor and paddle.to_variable to paddle.assign + """ + + def __init__(self, node): + assert isinstance( + node, gast.AST + ), "Input non-gast.AST node for the initialization of ToTensorTransformer." + self.root = node + + def transform(self): + self.visit(self.root) + return self.root + + def visit_Call(self, node): + assert isinstance(node, gast.Call) + if is_to_variable(node): + node = to_assign_node(node) + self.generic_visit(node) + return node + + def is_to_variable(node): assert isinstance(node, gast.Call) api_name = utils.ast_to_source_code(node.func).strip() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py index 630b804f9a2..ea745ad6614 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py @@ -64,13 +64,11 @@ def dyfunc_int_to_tensor(x): def dyfunc_float_to_tensor(x): - res = paddle.to_tensor(2.0) - return res + return paddle.to_tensor(2.0) def dyfunc_bool_to_tensor(x): - res = paddle.to_tensor(True) - return res + return paddle.to_tensor(True) class TestDygraphBasicApi_ToVariable(unittest.TestCase): -- GitLab From 3cc11a3dfd6bbaf2cb4171903f5182636cf12e90 Mon Sep 17 00:00:00 2001 From: WeiXin Date: Fri, 30 Apr 2021 23:22:10 +0800 Subject: [PATCH 074/720] pylayer_op:release context after compute. (#32707) --- paddle/fluid/imperative/py_layer_fwd.h | 5 +++-- paddle/fluid/operators/py_layer_op.cc | 9 ++++++--- paddle/fluid/operators/py_layer_op.h | 11 +++++++++-- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/imperative/py_layer_fwd.h b/paddle/fluid/imperative/py_layer_fwd.h index ccfd5b0e2db..de5f9d75e91 100644 --- a/paddle/fluid/imperative/py_layer_fwd.h +++ b/paddle/fluid/imperative/py_layer_fwd.h @@ -63,15 +63,16 @@ std::shared_ptr CreateGradOpNode( } } -py::object PyLayerApply(const platform::Place& place, const py::object& cls, +py::object PyLayerApply(const platform::Place& place, const py::handle& cls, const py::args args, const py::kwargs kwargs) { + py::gil_scoped_acquire guard; auto bk_function = cls.attr("_backward_function"); auto context = bk_function(); auto forward = cls.attr("forward"); auto result_forward = forward(context, *args, **kwargs); std::shared_ptr py_layer_ctx = - std::make_shared(context.release().ptr()); + std::make_shared(context.ptr()); // make inputs to varbase std::vector> input_vars; // process args,`input_vars` only collect `imperative::VarBase` diff --git a/paddle/fluid/operators/py_layer_op.cc b/paddle/fluid/operators/py_layer_op.cc index 0090747d116..f91496eeab1 100644 --- a/paddle/fluid/operators/py_layer_op.cc +++ b/paddle/fluid/operators/py_layer_op.cc @@ -157,9 +157,12 @@ class PyLayerOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto &op_ = ctx.GetOp(); - auto pylayer_op = dynamic_cast(&op_); - if (pylayer_op) { - auto py_layer_context = pylayer_op->GetPyLayerContext(); + auto const_pylayer_op = dynamic_cast(&op_); + if (const_pylayer_op) { + auto pylayer_op = const_cast(const_pylayer_op); + + // Release contex after executing the compute + auto py_layer_context = pylayer_op->ReleasePyLayerContext(); py::object bk_ctx(py::handle(py_layer_context->GetMutableCtx()), true); auto &input_vars = ctx.MultiInputVar("X"); auto output_vars = ctx.MultiOutputVar("Out"); diff --git a/paddle/fluid/operators/py_layer_op.h b/paddle/fluid/operators/py_layer_op.h index 133435aa84d..d80faab90b2 100644 --- a/paddle/fluid/operators/py_layer_op.h +++ b/paddle/fluid/operators/py_layer_op.h @@ -34,6 +34,10 @@ class PyLayerContext { PyLayerContext() = delete; PyObject* GetMutableCtx() { return context_; } + ~PyLayerContext() { + py::gil_scoped_acquire guard; + Py_XDECREF(context_); + } private: PyObject* context_; @@ -58,8 +62,11 @@ class PyLayerOp : public framework::OperatorWithKernel { void SetPyLayerContext(const std::shared_ptr& py_context) { py_context_ = py_context; } - const std::shared_ptr& GetPyLayerContext() const { - return py_context_; + std::shared_ptr ReleasePyLayerContext() { + auto temp = py_context_; + py_context_.reset(); + VLOG(3) << "`py_context_` in the PyLayerOp is released."; + return temp; } private: -- GitLab From f4a3f85bb21e3eb76ade4116dbc4afbada791630 Mon Sep 17 00:00:00 2001 From: ShenLiang <1422485404@qq.com> Date: Sat, 1 May 2021 04:23:23 +0800 Subject: [PATCH 075/720] fix traverse graph in reducer (#32715) --- paddle/fluid/imperative/reducer.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/paddle/fluid/imperative/reducer.cc b/paddle/fluid/imperative/reducer.cc index bf479e0d797..e3dd0a2aa75 100644 --- a/paddle/fluid/imperative/reducer.cc +++ b/paddle/fluid/imperative/reducer.cc @@ -443,10 +443,6 @@ void Reducer::PrepareDeps(const std::unordered_set &init_nodes) { auto *cur_node = q.front(); q.pop(); - for (auto &cur_op : *cur_node) { - cur_op.EnforceHasInOut(); - } - const auto &grad_pending_nodes = cur_node->GradPendingNodes(); for (auto &grad_pending_node : grad_pending_nodes) { PADDLE_ENFORCE_NOT_NULL( @@ -523,7 +519,6 @@ void Reducer::PrepareForBackward( q.pop(); for (const auto &cur_op : *cur_node) { - cur_op.EnforceHasInOut(); auto &bwd_outs = cur_op.GetOutsMap(); for (const auto &pair : bwd_outs) { if (!pair.second.IsGrad()) { -- GitLab From a0f4ac54ee03e8b1197b6c44b43abd5db49c0c78 Mon Sep 17 00:00:00 2001 From: lilong12 Date: Mon, 3 May 2021 21:34:54 +0800 Subject: [PATCH 076/720] Fix the bug in pipeline for dygraph mode (#32716) * update, test=develop --- .../parallel_layers/pp_layers.py | 1 - .../fleet/meta_parallel/pipeline_parallel.py | 342 ++++++++++-------- .../fleet/meta_parallel/pp_utils/utils.py | 43 ++- 3 files changed, 231 insertions(+), 155 deletions(-) diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py index 669ed032a34..a9704e38f3f 100644 --- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py +++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py @@ -108,7 +108,6 @@ class PipelineLayer(Layer): # construct layer self.run_function = [] self._build_layer() - self.to(paddle.CUDAPlace(self.device_id)) def _segment_network(self, seg_method): logger.info("start segment network..") diff --git a/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py b/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py index 98a82f2b798..11180054afb 100644 --- a/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py +++ b/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py @@ -22,15 +22,11 @@ from numpy import prod import paddle import paddle.fluid as fluid from .meta_parallel_base import MetaParallelBase -from .pp_utils.utils import get_tensor_bytes +from .pp_utils.utils import get_tensor_bytes, is_float_tensor from .pp_utils import utils from .parallel_layers.pp_layers import PipelineLayer - -FLOAT_TYPES = [ - paddle.float16, - paddle.float32, - paddle.float64, -] +from ..utils.hybrid_parallel_util import * +from ..utils.log_util import logger class PipelineParallel(MetaParallelBase): @@ -46,20 +42,18 @@ class PipelineParallel(MetaParallelBase): 'inputs': [], 'labels': [], 'outputs': [], - 'backward_tensors': [], } + self.recv_cache = None self.grad_tensors = None - self.meta_buffer = None - self.send_meta = True - self.first_gradient_send = True self.current_loss = paddle.to_tensor(0.0) self.total_loss = None - def _prepare_for_model(self): + self.use_amp = self._strategy.amp + self.init_loss_scaling = self._strategy.amp_configs['init_loss_scaling'] self.micro_batch_size = self._strategy.pipeline_configs[ 'micro_batch_size'] self.accumulate_steps = self._strategy.pipeline_configs[ @@ -69,9 +63,17 @@ class PipelineParallel(MetaParallelBase): self.stage_id = self._hcg.get_stage_id() self.prev_stage_id = self.stage_id - 1 self.next_stage_id = self.stage_id + 1 - self._layers = PipelineLayer( - layers=self._layers, num_stages=self.num_stages) - #TODO: init process group + self.pp_group = self._hcg.get_pipe_parallel_group() + logger.info("Pipeline Info -- num_stages: {}, stage_id: {}".format( + self.num_stages, self.stage_id)) + + if self.use_model_parallel: + logger.info("start broadcast mp parameters") + broadcast_mp_parameters(self._layers, self._hcg) + + if self.use_data_parallel: + logger.info("start broadcast mp parameters") + broadcast_dp_parameters(self._layers, self._hcg) def _allocate_caches(self, num_caches): if self.num_caches >= num_caches: @@ -82,19 +84,19 @@ class PipelineParallel(MetaParallelBase): for key in self.caches: self.caches[key].extend([None] * num) - def train_batch(self, data_iter, optimizer): + def train_batch(self, data, optimizer): self.optimizer = optimizer assert fluid.framework._dygraph_tracer()._has_grad, ( 'Please enable the generation of gradients.') if self.stage_id == 0 or self.stage_id == self.num_stages - 1: - assert data_iter, ( + assert data, ( "For the first and the last stage, the data_iter must be set.") else: - assert data_iter is None, ( + assert data is None, ( "For pipe stages other than the first and the last one, " "the data_iter must be None.") - self.data_iter = data_iter + self.data = data self._layers.train() self.total_loss = None @@ -104,39 +106,24 @@ class PipelineParallel(MetaParallelBase): return self.total_loss def _train(self, minibatch_cmds): - self._allocate_caches(self.num_stages) - for microbatch_cmds in minibatch_cmds: - for cmd in microbatch_cmds: - if type(cmd) not in self._COMMAND_MAP: - #FIXME: - continue - + self._allocate_caches(self.accumulate_steps) + for micro_cmds in minibatch_cmds: + for cmd in micro_cmds: + assert type(cmd) in self._COMMAND_MAP, "unknow cmd: {}".format( + type(cmd)) self._apply_cmd = MethodType(self._COMMAND_MAP[type(cmd)], self) self._apply_cmd(**cmd.kwargs) def _allreduce_grads(self): - self._modifying_grad = True - assert self.use_data_parallel <= 1, ("Do not support data parallel " - "with pipeline parallel now.") - self._modifying_grad = False - - def _get_data(self): - if self.use_model_parallel: - mp_rank = self._hcg.get_model_parallel_rank() - else: - mp_rank = 0 - - data = None - - # mp rank 0 loads the data and broadcat it to others. - if mp_rank == 0: - data = next(self.data_iter) - if self.use_model_parallel: - data = paddle.distributed.broadcast( - data, group=self._hcg.get_model_parallel_group()) - return data + if not self.use_data_parallel: return + fused_allreduce_gradients(list(self._layers.parameters()), self._hcg) def _forward(self, cache_id): + # load data + self._load_micro_batch(cache_id) + if self.stage_id != 0: + self._recv_activations(cache_id) + if isinstance(self.caches['inputs'][cache_id], tuple): inputs = tuple(t.clone() for t in self.caches['inputs'][cache_id]) else: @@ -144,9 +131,13 @@ class PipelineParallel(MetaParallelBase): self._clear_grads(inputs) outputs = self._layers.forward(inputs) - self.caches['outputs'][cache_id] = outputs + if self.stage_id == self.num_stages - 1: + if self._layers._loss_fn is not None: + labels = self.caches['labels'][cache_id] + outputs = self._layers._loss_fn(outputs, labels) + if self.stage_id == self.num_stages - 1: self.current_loss = outputs if isinstance(self.current_loss, paddle.Tensor): @@ -160,18 +151,28 @@ class PipelineParallel(MetaParallelBase): ] for idx, v in enumerate(self.current_loss): self.total_loss[idx] += v.detach() + if self.use_data_parallel: + self.current_loss = self.current_loss / self._hcg.get_data_parallel_world_size( + ) + if self.accumulate_steps > 1: + self.current_loss = self.current_loss / self.accumulate_steps + self.caches['outputs'][cache_id] = self.current_loss.clone() + else: + self._send_activations(cache_id) def _backward(self, cache_id): assert self.optimizer is not None if self.stage_id == self.num_stages - 1: - paddle.autograd.backward(self.current_loss) + paddle.autograd.backward(self.caches['outputs'][cache_id]) + self._send_gradients(cache_id) return + self._recv_gradients(cache_id) outputs = self.caches['outputs'][cache_id] grad_tensors = self.grad_tensors if isinstance(outputs, tuple): - out_tensors = [t for t in outputs if t.dtype in FLOAT_TYPES] + out_tensors = [t for t in outputs if is_float_tensor(t)] assert len(out_tensors) == len(grad_tensors) paddle.autograd.backward( tensors=out_tensors, grad_tensors=grad_tensors) @@ -179,41 +180,76 @@ class PipelineParallel(MetaParallelBase): paddle.autograd.backward( tensors=[outputs], grad_tensors=[grad_tensors]) - self.caches['outputs'][cache_id] = None grad_tensors = None + if self.stage_id != 0: self._send_gradients(cache_id) + self.caches['outputs'][cache_id] = None + #self.caches['backward_tensors'][cache_id] = None + + def _get_data(self): + if self.use_model_parallel: + mp_rank = self._hcg.get_model_parallel_rank() + else: + mp_rank = 0 + + # mp rank 0 loads the data and broadcat it to others. + data = self.data + if self.use_model_parallel and (self.stage_id == 0 or + self.stage_id == self.num_stages - 1): + assert isinstance(data, (tuple, paddle.Tensor)) + if isinstance(data, paddle.Tensor): + paddle.distributed.broadcast( + data, + src=self._hcg.get_model_parallel_group_src_rank(), + group=self._hcg.get_model_parallel_group()) + else: + data = [] + for d in self.data: + assert isinstance(d, paddle.Tensor) + paddle.distributed.broadcast( + d, + src=self._hcg.get_model_parallel_group_src_rank(), + group=self._hcg.get_model_parallel_group()) + data.append(d) + data = tuple(data) + return data def _load_micro_batch(self, cache_id): inputs = self._get_data() if self.stage_id == 0: data = None - if isinstance(inputs[0], paddle.Tensor): + #if isinstance(inputs[0], paddle.Tensor): + if len(inputs) == 1: + assert isinstance(inputs[0], paddle.Tensor) data = inputs[0].clone().detach() - data.stop_gradient = data.dtype == paddle.float32 + #data.stop_gradient = not is_float_tensor(data) + data.stop_gradient = True else: - assert isinstance(inputs[0], tuple) - # Assume list or tuple + assert isinstance(inputs, tuple) data = [] - for d in inputs[0]: + for d in inputs: assert isinstance(d, paddle.Tensor) - d = d.clone().detach() - d.stop_gradient = d.dtype == paddle.float32 - loaded.append(d) + i = d.clone().detach() + #i.stop_gradient = not is_float_tensor(i) + i.stop_gradient = True + data.append(i) data = tuple(data) self.caches['inputs'][cache_id] = data if self.stage_id == self.num_stages - 1: - label = None - if isinstance(inputs[1], paddle.Tensor): - label = inputs[1] - elif isinstance(data[1], tuple): - label = [] - for l in inputs[1]: - assert isinstance(l, paddle.Tensor) - l = l.detach() - label.append(l) - label = tuple(label) - self.caches['labels'][cache_id] = label + labels = None + #if isinstance(inputs[1], paddle.Tensor): + if len(inputs) == 1: + assert isinstance(inputs[0], paddle.Tensor) + labels = inputs[0] + elif isinstance(inputs, tuple): + labels = [] + for label in inputs: + assert isinstance(label, paddle.Tensor) + label = label.detach() + labels.append(label) + labels = tuple(labels) + self.caches['labels'][cache_id] = labels def _send_meta(self, data, peer): """ @@ -225,54 +261,67 @@ class PipelineParallel(MetaParallelBase): """ if isinstance(data, paddle.Tensor): tensor_type = paddle.to_tensor([0]) - paddle.distributed.send(tensor_type, peer) + paddle.distributed.send( + tensor_type, peer, use_calc_stream=True, group=self.pp_group) dims = paddle.to_tensor(len(data.shape)) - paddle.distributed.send(dims, peer) + paddle.distributed.send( + dims, peer, use_calc_stream=True, group=self.pp_group) shape = paddle.to_tensor(data.shape) - paddle.distributed.send(shape, peer) + paddle.distributed.send( + shape, peer, use_calc_stream=True, group=self.pp_group) elif isinstance(data, tuple): tensor_type = paddle.to_tensor([1]) - paddle.distributed.send(tensor_type, peer) + paddle.distributed.send( + tensor_type, peer, use_calc_stream=True, group=self.pp_group) nums = paddle.to_tensor(len(data)) - paddle.distributed.send(nums, peer) + paddle.distributed.send( + nums, peer, use_calc_stream=True, group=self.pp_group) for idx, d in enumerate(data): assert isinstance(d, paddle.Tensor) dims = paddle.to_tensor(len(d.shape)) - paddle.distributed.send(dims, peer) + paddle.distributed.send( + dims, peer, use_calc_stream=True, group=self.pp_group) shape = paddle.to_tensor(d.shape) - paddle.distributed.send(shape, peer) + paddle.distributed.send( + shape, peer, use_calc_stream=True, group=self.pp_group) def _recv_meta(self, peer): tensor_type = paddle.to_tensor([0]) - paddle.distributed.recv(tensor_type, peer) + paddle.distributed.recv( + tensor_type, peer, use_calc_stream=True, group=self.pp_group) tensor_type = tensor_type.numpy()[0] if tensor_type == 0: dims = paddle.to_tensor([0]) - paddle.distributed.recv(dims, peer) + paddle.distributed.recv( + dims, peer, use_calc_stream=True, group=self.pp_group) dims = dims.numpy()[0] shape = paddle.to_tensor([0] * dims) - paddle.distributed.recv(shape, peer) + paddle.distributed.recv( + shape, peer, use_calc_stream=True, group=self.pp_group) shape = shape.numpy().tolist() return self._allocate_buffer( shape, dtype="float32", num_caches=1)[0] elif tensor_type == 1: num = paddle.to_tensor([0]) - paddle.distributed.recv(num, peer) + paddle.distributed.recv( + num, peer, use_calc_stream=True, group=self.pp_group) num = num.numpy()[0] shapes = [] for i in range(num): dims = paddle.to_tensor([0]) - paddle.distributed.recv(dims, peer) + paddle.distributed.recv( + dims, peer, use_calc_stream=True, group=self.pp_group) dims = dims.numpy()[0] shape = paddle.to_tensor([0] * dims) - paddle.distributed.recv(shape, peer) + paddle.distributed.recv( + shape, peer, use_calc_stream=True, group=self.pp_group) shapes.append(shape.numpy().tolist()) dtypes = ["float32"] * len(shapes) - caches = self._allocate_buffers(shapes, dtypes, num_buffers=1)[0] - buffers = tuple(buffers) - return buffers + caches = self._allocate_buffers(shapes, dtypes, num_caches=1)[0] + caches = tuple(caches) + return caches def _send_activations(self, cache_id): outputs = self.caches['outputs'][cache_id] @@ -282,10 +331,18 @@ class PipelineParallel(MetaParallelBase): self._send_meta(outputs, self.next_stage_id) if isinstance(outputs, paddle.Tensor): - paddle.distributed.send(outputs, self.next_stage_id) + paddle.distributed.send( + outputs, + self.next_stage_id, + use_calc_stream=True, + group=self.pp_group) elif isinstance(outputs, tuple): for output in outputs: - paddle.distributed.send(output, self.next_stage_id) + paddle.distributed.send( + output, + self.next_stage_id, + use_calc_stream=True, + group=self.pp_group) def _send_gradients(self, cache_id): inputs = self.caches['inputs'][cache_id] @@ -293,15 +350,22 @@ class PipelineParallel(MetaParallelBase): if isinstance(inputs, paddle.Tensor): assert inputs.grad is not None paddle.distributed.send( - paddle.to_tensor(inputs.grad), self.prev_stage_id) + paddle.to_tensor(inputs.grad), + self.prev_stage_id, + use_calc_stream=True, + group=self.pp_group) else: for idx, d in enumerate(inputs): # Skip tensors that will not produce a grad - if not d.dtype in FLOAT_TYPES: + if not is_float_tensor(d): assert d.grad is None continue assert d.grad is not None - paddle.distributed.send(d.grad, self.prev_stage_id) + paddle.distributed.send( + d.grad, + self.prev_stage_id, + use_calc_stream=True, + group=self.pp_group) self.caches['inputs'][cache_id] = None def _recv_activations(self, cache_id): @@ -312,22 +376,30 @@ class PipelineParallel(MetaParallelBase): self.recv_cache = self._recv_meta(self.prev_stage_id) if isinstance(self.recv_cache, paddle.Tensor): - paddle.distributed.recv(self.recv_cache, self.prev_stage_id) + paddle.distributed.recv( + self.recv_cache, + self.prev_stage_id, + use_calc_stream=True, + group=self.pp_group) inputs = self.recv_cache.clone().detach() - inputs.stop_gradient = inputs.dtype not in FLOAT_TYPES + inputs.stop_gradient = not is_float_tensor(inputs) else: assert isinstance(self.recv_cache, tuple) inputs = [None] * len(self.recv_cache) for idx, d in enumerate(self.recv_cache): assert isinstance(d, paddle.Tensor) - paddle.distributed.recv(d, self.prev_stage_id) + paddle.distributed.recv( + d, + self.prev_stage_id, + use_calc_stream=True, + group=self.pp_group) inputs[idx] = d.clone().detach() inputs = tuple(inputs) for d in inputs: - d.stop_gradient = d.dtype not in FLOAT_TYPES + d.stop_gradient = not is_float_tensor(d) self.caches['inputs'][cache_id] = inputs @@ -336,29 +408,35 @@ class PipelineParallel(MetaParallelBase): if self.grad_tensors is None: if isinstance(outputs, paddle.Tensor): s = list(outputs.shape) - dtype = 'float32' + dtype = 'float16' if self.use_amp else "float32" self.grad_tensors = self._allocate_buffer( s, dtype, num_buffers=1)[0] else: - sizes = [ - list(d.shape) for d in outputs if d.dtype in FLOAT_TYPES - ] - dtypes = ['float32'] * len(sizes) + sizes = [list(d.shape) for d in outputs if is_float_tensor(d)] + dtypes = ['float16'] * len( + sizes) if self.use_amp else ['float32'] * len(sizes) self.grad_tensors = self._allocate_buffers( - sizes, dtypes, num_buffers=1)[0] + sizes, dtypes, num_caches=1)[0] if isinstance(self.grad_tensors, paddle.Tensor): - paddle.distributed.recv(self.grad_tensors, self.next_stage_id) + paddle.distributed.recv( + self.grad_tensors, + self.next_stage_id, + use_calc_stream=True, + group=self.pp_group) else: assert isinstance(outputs, tuple) for d in self.grad_tensors: - paddle.distributed.recv(d, self.next_stage_id) - - def _step(self, lr_kwargs=None): - self._modifying_grad = True + paddle.distributed.recv( + d, + self.next_stage_id, + use_calc_stream=True, + group=self.pp_group) + + def _step(self): + self._allreduce_grads() self.optimizer.step() self.optimizer.clear_gradients() - self._modifying_grad = False def _clear_grads(self, inputs): if isinstance(inputs, paddle.Tensor): @@ -372,26 +450,24 @@ class PipelineParallel(MetaParallelBase): def _allocate_zeros(self, shape, dtype): return paddle.zeros(shape, dtype) - def _allocate_buffer(self, shape, dtype, num_buffers=-1, **kwargs): - buffers = [] - if num_buffers == -1: - num_buffers = self.num_caches - for count in range(num_buffers): - buffers.append(self._allocate_zeros(shape, dtype)) - return buffers - - def _allocate_buffers(self, shapes, dtypes, num_buffers=-1): - buffers = [] - if num_buffers == -1: - num_buffers = self.num_caches - for count in range(num_buffers): - buffer = [] + def _allocate_buffer(self, shape, dtype, num_caches=-1): + caches = [] + if num_caches == -1: + num_caches = self.num_caches + for count in range(num_caches): + caches.append(self._allocate_zeros(shape, dtype)) + return caches + + def _allocate_buffers(self, shapes, dtypes, num_caches=-1): + caches = [] + if num_caches == -1: + num_caches = self.num_caches + for count in range(num_caches): + cache = [] for shape, dtype in zip(shapes, dtypes): - buffer.append( - self._allocate_zeros( - shape, dtype, requires_grad=requires_grad)) - buffers.append(buffer) - return buffers + cache.append(self._allocate_zeros(shape, dtype)) + caches.append(cache) + return caches def save_state_dict(self, model_path): state_dict = self._layers.state_dict() @@ -403,25 +479,9 @@ class PipelineParallel(MetaParallelBase): _COMMAND_MAP = { utils.Optimize: _step, - #utils.ReduceGrads: _allreduce_grads, utils.Forward: _forward, utils.Backward: _backward, } - def _pre_forward(self, *inputs, **kwargs): - pass - def forward(self, *inputs, **kwargs): raise RuntimeError("Call train_batch for pipeline instead of forward.") - - def _post_forward(self, output): - pass - - def _pre_backward(self, loss): - pass - - def backward_impl(self, loss, parameters): - pass - - def _post_backward(self, loss): - pass diff --git a/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py b/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py index 56eef8d7d21..7b426e2c3f7 100644 --- a/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py +++ b/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py @@ -16,7 +16,21 @@ import abc import paddle from ...utils import hybrid_parallel_util as hp_util -__all__ = ['get_tensor_bytes', ] +__all__ = [ + 'get_tensor_bytes', + 'is_float_tensor', +] + +FLOAT_TYPES = [ + paddle.float16, + paddle.float32, + paddle.float64, +] + + +def is_float_tensor(tensor): + """Is a float tensor""" + return tensor.dtype in FLOAT_TYPES def get_tensor_bytes(tensor): @@ -48,10 +62,6 @@ class Generator(): self.stage_id = stage_id self.prev_stage = self.stage_id - 1 self.next_stage = self.stage_id + 1 - assert self.micro_batches >= self.stages, ( - "micro_batches {} " - "must be greater than or equal to {}".format(self.micro_batches, - self.stages)) @abc.abstractmethod def generate(self): @@ -73,18 +83,25 @@ class TrainGenerator(Generator): cmds = [] forward_steps = 0 backward_steps = 0 - while (forward_steps < startup_steps): - cmds.append(Forward) - forward_steps += 1 + #while (forward_steps < startup_steps): + # cmds.append(Forward(cache_id=forward_steps)) + # forward_steps += 1 + #while (forward_steps < self.micro_batches): + # cmds.append(Forward(cache_id=forward_steps)) + # forward_steps += 1 + # cmds.append(Backward(cache_id=backward_steps)) + # backward_steps += 1 + #while (backward_steps < self.micro_batches): + # cmds.append(Backward(cache_id=backward_steps)) + # backward_steps += 1 + #cmds.append(Optimize()) while (forward_steps < self.micro_batches): - cmds.append(Forward) + cmds.append(Forward(cache_id=forward_steps)) forward_steps += 1 - cmds.append(Backward) - backward_steps += 1 while (backward_steps < self.micro_batches): - cmds.append(Backward) + cmds.append(Backward(cache_id=backward_steps)) backward_steps += 1 - cmds.append(Optimize) + cmds.append(Optimize()) yield cmds -- GitLab From d0de2d83abb0b2d0d23f750f705d95f9450bf00f Mon Sep 17 00:00:00 2001 From: lilong12 Date: Mon, 3 May 2021 22:04:54 +0800 Subject: [PATCH 077/720] fix the bug in processing subblock in pipeline (#32727) --- python/paddle/fluid/optimizer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index e4fafb0132c..4ae90b3c72c 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -4604,13 +4604,13 @@ class PipelineOptimizer(object): origin_sub_block_id = op.attr('sub_block').id origin_sub_block = main_program.block(origin_sub_block_id) new_sub_block = prog._create_block(parent_idx=0) - for op in origin_sub_block.ops: - op_desc = op.desc + for sub_op in origin_sub_block.ops: + op_desc = sub_op.desc ap_op = new_sub_block.desc.append_op() ap_op.copy_from(op_desc) new_sub_block._sync_with_cpp() self._create_vars(new_sub_block, origin_sub_block) - op._set_attr('sub_block:', new_sub_block) + op._set_attr('sub_block', new_sub_block) def _get_device_info(self, block): for op in block.ops: -- GitLab From a259076dd01801e2e619237da02235a4856a96bb Mon Sep 17 00:00:00 2001 From: lilong12 Date: Wed, 5 May 2021 09:31:44 +0800 Subject: [PATCH 078/720] update, test=develop (#32726) --- paddle/fluid/pybind/op_function_generator.cc | 1 - python/paddle/distributed/collective.py | 46 ++++++++++---------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc index a340d7a0f00..bf3c7784321 100644 --- a/paddle/fluid/pybind/op_function_generator.cc +++ b/paddle/fluid/pybind/op_function_generator.cc @@ -44,7 +44,6 @@ std::map> op_ins_map = { {"gru_unit", {"Input", "HiddenPrev", "Weight", "Bias"}}, {"label_smooth", {"X", "PriorDist"}}, {"assign", {"X"}}, - {"send_v2", {"X"}}, {"reshape2", {"X", "Shape"}}, {"expand", {"X", "ExpandTimes"}}, {"slice", {"Input", "StartsTensor", "EndsTensor"}}, diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index 7aa765ba93f..bd7f5e5733b 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -1258,23 +1258,24 @@ def send(tensor, dst=0, group=None, use_calc_stream=True): tensor (Tensor): The Tensor to send. Its data type should be float16, float32, float64, int32 or int64. dst (int): The destination rank id. - group (Group): The group instance return by new_group or None for global default group. - use_calc_stream (bool): Whether to use calculate stream or communication stream. + group (Group, optional): The group instance return by new_group or None for global default group. Default: None. + use_calc_stream (bool, optional): Whether to use calculate stream or communication stream. Default: True. Returns: None. Examples: .. code-block:: python + # required: distributed import paddle - #from paddle.distributed import init_parallel_env - #init_parallel_env() - #if paddle.distributed.ParallelEnv().rank == 0: - # data = paddle.to_tensor([7, 8, 9]) - # paddle.distributed.send(data, dst=1) - #else: - # data = paddle.to_tensor([1,2,3]) - # paddle.distributed.recv(data, src=0) - #out = data.numpy() + from paddle.distributed import init_parallel_env + init_parallel_env() + if paddle.distributed.ParallelEnv().rank == 0: + data = paddle.to_tensor([7, 8, 9]) + paddle.distributed.send(data, dst=1) + else: + data = paddle.to_tensor([1,2,3]) + paddle.distributed.recv(data, src=0) + out = data.numpy() """ if group is not None and not group.is_member(): return @@ -1307,23 +1308,24 @@ def recv(tensor, src=0, group=None, use_calc_stream=True): tensor (Tensor): The Tensor to receive. Its data type should be float16, float32, float64, int32 or int64. src (int): The source rank id. - group (Group): The group instance return by new_group or None for global default group. - use_calc_stream (bool): Whether to use calculate stream or communication stream. + group (Group, optional): The group instance return by new_group or None for global default group. Default: None. + use_calc_stream (bool, optional): Whether to use calculate stream or communication stream. Default: True. Returns: None. Examples: .. code-block:: python + # required: distributed import paddle - #from paddle.distributed import init_parallel_env - #init_parallel_env() - #if paddle.distributed.ParallelEnv().rank == 0: - # data = paddle.to_tensor([7, 8, 9]) - # paddle.distributed.send(data, dst=1) - #else: - # data = paddle.to_tensor([1,2,3]) - # paddle.distributed.recv(data, src=0) - #out = data.numpy() + from paddle.distributed import init_parallel_env + init_parallel_env() + if paddle.distributed.ParallelEnv().rank == 0: + data = paddle.to_tensor([7, 8, 9]) + paddle.distributed.send(data, dst=1) + else: + data = paddle.to_tensor([1,2,3]) + paddle.distributed.recv(data, src=0) + out = data.numpy() """ if group is not None and not group.is_member(): return -- GitLab From 8b1b214f29b24dc45c7bcf78db2e30d9e4542258 Mon Sep 17 00:00:00 2001 From: tianshuo78520a <707759223@qq.com> Date: Thu, 6 May 2021 10:01:44 +0800 Subject: [PATCH 079/720] Change Paddle CI-Cverage Python3.8 (#32515) --- README.md | 3 +-- paddle/scripts/paddle_build.sh | 1 + .../tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py | 1 - python/paddle/fluid/tests/unittests/test_fusion_gru_op.py | 4 ++-- python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py | 2 +- python/paddle/fluid/tests/unittests/test_gru_op.py | 2 +- python/unittest_py/requirements.txt | 1 + tools/summary_env.py | 5 +++-- 8 files changed, 10 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index e8a7013d0b4..8b437e4115a 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ - -

+

diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index b8b9f40aa33..0865d48c0d3 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -1450,6 +1450,7 @@ function parallel_test() { mkdir -p ${PADDLE_ROOT}/build cd ${PADDLE_ROOT}/build pip install ${PADDLE_ROOT}/build/python/dist/*whl + cp ${PADDLE_ROOT}/build/python/paddle/fluid/tests/unittests/op_test.py ${PADDLE_ROOT}/build/python if [ "$WITH_GPU" == "ON" ] || [ "$WITH_ROCM" == "ON" ];then parallel_test_base_gpu else diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py index c024ffbdb4b..7320efd259f 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py @@ -19,7 +19,6 @@ import numpy as np import struct import paddle.fluid.core as core from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16 -from paddle.fluid.tests.unittests.op_test import OpTest from paddle.fluid.tests.unittests.test_fusion_gru_op import fusion_gru from paddle.fluid.tests.unittests.test_fusion_lstm_op import fc, ACTIVATION diff --git a/python/paddle/fluid/tests/unittests/test_fusion_gru_op.py b/python/paddle/fluid/tests/unittests/test_fusion_gru_op.py index 1e25b8034da..c241fc65d9b 100644 --- a/python/paddle/fluid/tests/unittests/test_fusion_gru_op.py +++ b/python/paddle/fluid/tests/unittests/test_fusion_gru_op.py @@ -18,8 +18,8 @@ import unittest import numpy as np import math from op_test import OpTest -from test_gru_op import gru -from test_fusion_lstm_op import fc, ACTIVATION +from paddle.fluid.tests.unittests.test_gru_op import gru +from paddle.fluid.tests.unittests.test_fusion_lstm_op import fc, ACTIVATION def fusion_gru( diff --git a/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py b/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py index 3928b6fa034..4899927a769 100644 --- a/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py +++ b/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py @@ -17,7 +17,7 @@ from __future__ import print_function import unittest import numpy as np from op_test import OpTest -from test_lstm_op import lstm, ACTIVATION +from paddle.fluid.tests.unittests.test_lstm_op import lstm, ACTIVATION def fc(x, w, b): diff --git a/python/paddle/fluid/tests/unittests/test_gru_op.py b/python/paddle/fluid/tests/unittests/test_gru_op.py index 3ea47a5d690..3ec943ef2e0 100644 --- a/python/paddle/fluid/tests/unittests/test_gru_op.py +++ b/python/paddle/fluid/tests/unittests/test_gru_op.py @@ -19,7 +19,7 @@ import numpy as np import math import functools from op_test import OpTest -from test_lstm_op import ACTIVATION +from paddle.fluid.tests.unittests.test_lstm_op import ACTIVATION from paddle import fluid from paddle.fluid import Program, program_guard diff --git a/python/unittest_py/requirements.txt b/python/unittest_py/requirements.txt index 5a59935887b..752f3545c69 100644 --- a/python/unittest_py/requirements.txt +++ b/python/unittest_py/requirements.txt @@ -10,3 +10,4 @@ scipy>=0.19.0, <=1.2.1 ; python_version<"3.5" scipy<=1.3.1 ; python_version=="3.5" scipy ; python_version>"3.5" prettytable +distro diff --git a/tools/summary_env.py b/tools/summary_env.py index 38bae87651d..d12e644cc28 100644 --- a/tools/summary_env.py +++ b/tools/summary_env.py @@ -13,6 +13,7 @@ # limitations under the License. import os import sys +import distro import platform import subprocess @@ -47,8 +48,8 @@ def get_os_info(): plat = "macOs" ver = platform.mac_ver()[0] elif platform.system() == "Linux": - plat = platform.linux_distribution()[0] - ver = platform.linux_distribution()[1] + plat = distro.linux_distribution()[0] + ver = distro.linux_distribution()[1] elif platform.system() == "Windows": plat = "Windows" ver = platform.win32_ver()[0] -- GitLab From 9599c3b3445d4eb985ac41b6a0d9e4973a143bb3 Mon Sep 17 00:00:00 2001 From: Adam Osewski Date: Thu, 6 May 2021 04:48:28 +0200 Subject: [PATCH 080/720] Sum kernel for CPU supporting BF16 and SelectedRows (#32631) --- paddle/fluid/operators/math/blas_impl.h | 19 +++++ .../operators/math/selected_rows_functor.cc | 40 +++++------ paddle/fluid/operators/sum_op.cc | 2 + paddle/fluid/platform/mkldnn_reuse.h | 2 +- .../fluid/tests/unittests/test_sgd_op_bf16.py | 9 +-- .../fluid/tests/unittests/test_sum_op.py | 71 +++++++++++++++++++ 6 files changed, 116 insertions(+), 27 deletions(-) diff --git a/paddle/fluid/operators/math/blas_impl.h b/paddle/fluid/operators/math/blas_impl.h index 64b533de098..05d42f02c10 100644 --- a/paddle/fluid/operators/math/blas_impl.h +++ b/paddle/fluid/operators/math/blas_impl.h @@ -15,6 +15,7 @@ #ifdef PADDLE_WITH_MKLML #include #endif + #include #include #include @@ -28,6 +29,19 @@ namespace paddle { namespace operators { namespace math { +namespace detail { + +template +static void axpy(int n, const T alpha, const T *x, const int incx, T *y, + const int incy) { + // Y = Y + alpha * X + while (n-- > 0) { + *y += alpha * *x; + y = y + incy; + x = x + incx; + } +} +} // namespace detail template struct CBlas; @@ -43,6 +57,11 @@ struct CBlas { template <> struct CBlas { + template + static void AXPY(ARGS... args) { + detail::axpy(args...); + } + template static void VCOPY(ARGS... args) { PADDLE_THROW(platform::errors::Unimplemented( diff --git a/paddle/fluid/operators/math/selected_rows_functor.cc b/paddle/fluid/operators/math/selected_rows_functor.cc index f7b16453e01..b9a1854a661 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cc +++ b/paddle/fluid/operators/math/selected_rows_functor.cc @@ -285,6 +285,8 @@ template struct SelectedRowsAddToTensor; template struct SelectedRowsAddToTensor; template struct SelectedRowsAddToTensor; template struct SelectedRowsAddToTensor; +template struct SelectedRowsAddToTensor; // This is a separated namespace for manipulate SelectedRows typed // data. Like merge duplicated rows, adding two SelectedRows etc. @@ -294,21 +296,17 @@ template struct SelectedRowsAddToTensor; // add or mul. namespace scatter { -template -typename std::enable_if< - std::is_floating_point::value && - std::is_same::value>::type -elementwise_add_to(const DeviceContext& ctx, BlasT* blas, - size_t data_len, const T* in, T* out) { - blas->AXPY(data_len, 1., in, out); +template +typename std::enable_if::value>::type +elementwise_add_to(BlasT* blas, size_t data_len, + const T* in, T* out) { + blas->AXPY(data_len, T(1.f), in, out); } -template -typename std::enable_if< - !std::is_floating_point::value && - std::is_same::value>::type -elementwise_add_to(const DeviceContext& ctx, BlasT* blas, - size_t data_len, const T* in, T* out) { +template +typename std::enable_if::value>::type elementwise_add_to( + BlasT* blas, size_t data_len, const T* in, + T* out) { for (size_t i = 0; i < data_len; i++) { out[i] += in[i]; } @@ -412,7 +410,7 @@ struct MergeAdd { out.set_rows(merge_rows); math::SetConstant constant_functor; - constant_functor(context, out.mutable_value(), 0.0); + constant_functor(context, out.mutable_value(), static_cast(0.f)); std::unordered_map rows_to_id; for (size_t i = 0; i < merge_rows.size(); ++i) { @@ -429,9 +427,9 @@ struct MergeAdd { for (size_t i = 0; i < input_rows.size(); i++) { size_t out_i = rows_to_id[input_rows[i]]; - elementwise_add_to( - context, &blas, static_cast(input_width), - &input_data[i * input_width], &out_data[out_i * input_width]); + elementwise_add_to(&blas, static_cast(input_width), + &input_data[i * input_width], + &out_data[out_i * input_width]); } } } @@ -524,9 +522,9 @@ struct MergeAverage { for (size_t i = 0; i < input_rows.size(); i++) { size_t out_i = rows_to_id[input_rows[i]]; - elementwise_add_to( - context, &blas, static_cast(input_width), - &input_data[i * input_width], &out_data[out_i * input_width]); + elementwise_add_to(&blas, static_cast(input_width), + &input_data[i * input_width], + &out_data[out_i * input_width]); } } size_t input_width_cast = static_cast(input_width); @@ -547,6 +545,8 @@ template struct MergeAdd; template struct MergeAdd; +template struct MergeAdd; template struct MergeAverage; template struct MergeAverage; diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc index 741f86f3584..0f520adba57 100644 --- a/paddle/fluid/operators/sum_op.cc +++ b/paddle/fluid/operators/sum_op.cc @@ -326,4 +326,6 @@ REGISTER_OP_CPU_KERNEL( sum, ops::SumKernel, ops::SumKernel, ops::SumKernel, + ops::SumKernel, ops::SumKernel); diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index f1eb1f96363..e584b849368 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -640,7 +640,7 @@ class BroadcastDataMKLDNNHandler platform::Place cpu_place, const Tensor* x, const Tensor* y, float scale_x, float scale_y, const std::string& uniq_name, - std::vector& input_dims) + const std::vector& input_dims) : platform::MKLDNNHandlerT( dev_ctx, engine, cpu_place, platform::CreateKey(dev_ctx, framework::vectorize(x->dims()), diff --git a/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py b/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py index 0717ec80f6a..fa8ff4effcf 100644 --- a/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py +++ b/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py @@ -76,8 +76,7 @@ class TestSparseSGDOpBF16(unittest.TestCase): grad_selected_rows = scope.var('Grad').get_selected_rows() grad_selected_rows.set_height(height) grad_selected_rows.set_rows(rows) - # grad_array = np.random.random((len(rows), row_numel)).astype('float32') - grad_array = np.full((len(rows), row_numel), 2, np.float32) + grad_array = np.random.random((len(rows), row_numel)).astype('float32') np_array_bf16 = convert_float_to_uint16(grad_array) grad_tensor = grad_selected_rows.get_tensor() @@ -87,8 +86,7 @@ class TestSparseSGDOpBF16(unittest.TestCase): def create_dense_param_var(self, scope, place, height, width): param_tensor = scope.var('Param').get_tensor() - # param_array = np.random.random((height, width)).astype('float32') - param_array = np.full((height, width), 5, np.float32) + param_array = np.random.random((height, width)).astype('float32') param_array_bf16 = convert_float_to_uint16(param_array) param_tensor.set(param_array_bf16, place) @@ -109,8 +107,7 @@ class TestSparseSGDOpBF16(unittest.TestCase): def create_dense_lr_var(self, scope, place): lr_tensor = scope.var('LearningRate').get_tensor() - # lr_value = np.random.uniform() - lr_value = 2 + lr_value = np.random.uniform() lr_array = np.full((1), lr_value, np.float32) lr_array_bf16 = convert_float_to_uint16(lr_array) lr_tensor.set(lr_array_bf16, place) diff --git a/python/paddle/fluid/tests/unittests/test_sum_op.py b/python/paddle/fluid/tests/unittests/test_sum_op.py index 35dc92ffb08..f9e40cf8133 100644 --- a/python/paddle/fluid/tests/unittests/test_sum_op.py +++ b/python/paddle/fluid/tests/unittests/test_sum_op.py @@ -18,9 +18,12 @@ import unittest import numpy as np from op_test import OpTest import paddle +from paddle import enable_static import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.op import Operator +from paddle.fluid.tests.unittests.op_test import ( + OpTest, convert_float_to_uint16, convert_uint16_to_float) class TestSumOp(OpTest): @@ -141,6 +144,73 @@ class TestSelectedRowsSumOp(unittest.TestCase): self.check_with_place(place, inplace) +class TestSelectedRowsSumOpInt(TestSelectedRowsSumOp): + def init_kernel_type(self): + self.dtype = np.int32 + + +@unittest.skipIf(not core.supports_bfloat16(), + 'place does not support BF16 evaluation') +class TestSelectedRowsSumBF16Op(TestSelectedRowsSumOp): + def setUp(self): + self.height = 10 + self.row_numel = 12 + self.rows = [0, 1, 2, 3, 4, 5, 6] + self.dtype = np.uint16 + self.init_kernel_type() + np.random.seed(12345) + self.data = np.random.random((len(self.rows), + self.row_numel)).astype(np.float32) + + def _get_array(self, rows, row_numel): + if len(rows) > 0: + return convert_float_to_uint16(self.data) + else: + return np.ndarray((0, row_numel), dtype=self.dtype) + + def check_input_and_optput(self, + scope, + place, + inplace, + w1_has_data=False, + w2_has_data=False, + w3_has_data=False): + + self.create_selected_rows(scope, place, "W1", w1_has_data) + self.create_selected_rows(scope, place, "W2", w2_has_data) + self.create_selected_rows(scope, place, "W3", w3_has_data) + + # create Out Variable + if inplace: + out_var_name = "W1" + else: + out_var_name = "Out" + out = scope.var(out_var_name).get_selected_rows() + + # create and run sum operator + sum_op = Operator("sum", X=["W1", "W2", "W3"], Out=out_var_name) + sum_op.run(scope, place) + + has_data_w_num = 0 + for has_data in [w1_has_data, w2_has_data, w3_has_data]: + if has_data: + has_data_w_num += 1 + + if has_data_w_num > 0: + self.assertEqual(len(out.rows()), 7) + out_bf16 = np.array(out.get_tensor()) + out_fp32 = convert_uint16_to_float(out_bf16) + ref_fp32 = convert_uint16_to_float( + self._get_array(self.rows, self.row_numel)) * has_data_w_num + np.testing.assert_allclose(out_fp32, ref_fp32, atol=0, rtol=0.95e-2) + else: + self.assertEqual(len(out.rows()), 0) + + def test_w_is_selected_rows(self): + for inplace in [True, False]: + self.check_with_place(core.CPUPlace(), inplace) + + class TestLoDTensorAndSelectedRowsOp(TestSelectedRowsSumOp): def setUp(self): self.height = 10 @@ -324,4 +394,5 @@ create_test_sum_fp16_class(TestSelectedRowsSumOp) create_test_sum_fp16_class(TestLoDTensorAndSelectedRowsOp) if __name__ == "__main__": + enable_static() unittest.main() -- GitLab From f1c68a08555955d13b9190ffd1ff0dd3b0b66465 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Thu, 6 May 2021 11:02:44 +0800 Subject: [PATCH 081/720] add int64 support test=develop (#32736) add int64 support --- paddle/fluid/operators/lookup_table_v2_op_npu.cc | 5 ----- .../fluid/tests/unittests/npu/test_lookup_table_v2_op_npu.py | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/paddle/fluid/operators/lookup_table_v2_op_npu.cc b/paddle/fluid/operators/lookup_table_v2_op_npu.cc index 87618b954d2..9574b325ef7 100644 --- a/paddle/fluid/operators/lookup_table_v2_op_npu.cc +++ b/paddle/fluid/operators/lookup_table_v2_op_npu.cc @@ -29,11 +29,6 @@ class LookupTableV2NPUKernel : public framework::OpKernel { auto *output_t = ctx.Output("Out"); // float tensor auto *table_t = ctx.Input("W"); - // It seems cann 20.1 accepts int64, but cann 20.2+ not. - PADDLE_ENFORCE_EQ(ids_t->type(), framework::proto::VarType::INT32, - platform::errors::Unimplemented( - "The index of LookupTableV2 should be int32.")); - auto *table_var = ctx.InputVar("W"); PADDLE_ENFORCE_EQ( table_var->IsType(), true, diff --git a/python/paddle/fluid/tests/unittests/npu/test_lookup_table_v2_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_lookup_table_v2_op_npu.py index 2463ddb7137..400ddd9d4aa 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_lookup_table_v2_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_lookup_table_v2_op_npu.py @@ -41,7 +41,7 @@ class TestLookupTableV2(OpTest): vocab = 10 dim = 20 w = np.ones([vocab, dim]).astype(self.dtype) - x = np.random.randint(0, vocab, size=(bsz, seqlen)).astype(np.int32) + x = np.random.randint(0, vocab, size=(bsz, seqlen)).astype(np.int64) out = np.ones([bsz, seqlen, dim]).astype(self.dtype) self.inputs = { -- GitLab From c5ae21f43503382520badcbd78aad4d2148561f1 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Thu, 6 May 2021 11:16:27 +0800 Subject: [PATCH 082/720] Fix bugs of pipeline on ascend. (#32737) --- paddle/fluid/framework/device_worker.h | 2 +- paddle/fluid/framework/device_worker_factory.cc | 2 +- paddle/fluid/framework/pipeline_trainer.cc | 4 ++-- paddle/fluid/framework/section_worker.cc | 2 +- paddle/fluid/framework/trainer.h | 2 +- paddle/fluid/framework/trainer_factory.cc | 3 ++- paddle/fluid/operators/collective/c_allreduce_op.h | 1 + python/paddle/fluid/framework.py | 4 ++-- python/paddle/fluid/optimizer.py | 2 +- 9 files changed, 12 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/framework/device_worker.h b/paddle/fluid/framework/device_worker.h index a49e492e480..cd5de19bdc0 100644 --- a/paddle/fluid/framework/device_worker.h +++ b/paddle/fluid/framework/device_worker.h @@ -639,7 +639,7 @@ class PSGPUWorker : public HogwildWorker { #endif #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \ - defined(WITH_ASCEND_CL) + defined(PADDLE_WITH_ASCEND_CL) class SectionWorker : public DeviceWorker { public: SectionWorker() {} diff --git a/paddle/fluid/framework/device_worker_factory.cc b/paddle/fluid/framework/device_worker_factory.cc index 5780a953433..fb2323d96e2 100644 --- a/paddle/fluid/framework/device_worker_factory.cc +++ b/paddle/fluid/framework/device_worker_factory.cc @@ -80,7 +80,7 @@ REGISTER_DEVICE_WORKER_CLASS(PSGPUWorker); #endif #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \ - defined(WITH_ASCEND_CL) + defined(PADDLE_WITH_ASCEND_CL) REGISTER_DEVICE_WORKER_CLASS(SectionWorker); #endif } // namespace framework diff --git a/paddle/fluid/framework/pipeline_trainer.cc b/paddle/fluid/framework/pipeline_trainer.cc index cdd2dbd5b1d..75c42fa3e52 100644 --- a/paddle/fluid/framework/pipeline_trainer.cc +++ b/paddle/fluid/framework/pipeline_trainer.cc @@ -13,7 +13,7 @@ // limitations under the License. #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \ - defined(WITH_ASCEND_CL) + defined(PADDLE_WITH_ASCEND_CL) #include "paddle/fluid/framework/data_feed_factory.h" #include "paddle/fluid/framework/device_worker_factory.h" #include "paddle/fluid/framework/trainer.h" @@ -37,7 +37,7 @@ void PipelineTrainer::Initialize(const TrainerDesc& trainer_desc, int place_id = section_config.place_id(); #if (defined PADDLE_WITH_NCCL) place_ = platform::CUDAPlace(place_id); -#elif (defined WITH_ASCEND_CL) // NOLINT +#elif (defined PADDLE_WITH_ASCEND_CL) // NOLINT place_ = platform::NPUPlace(place_id); #endif worker_ = DeviceWorkerFactory::CreateDeviceWorker( diff --git a/paddle/fluid/framework/section_worker.cc b/paddle/fluid/framework/section_worker.cc index 7860b69313e..00ff50abadd 100644 --- a/paddle/fluid/framework/section_worker.cc +++ b/paddle/fluid/framework/section_worker.cc @@ -10,7 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \ - defined(WITH_ASCEND_CL) + defined(PADDLE_WITH_ASCEND_CL) #include #include "paddle/fluid/framework/device_worker.h" #include "paddle/fluid/framework/executor_gc_helper.h" diff --git a/paddle/fluid/framework/trainer.h b/paddle/fluid/framework/trainer.h index 10f6c1ddbd0..3ac36bd2e4a 100644 --- a/paddle/fluid/framework/trainer.h +++ b/paddle/fluid/framework/trainer.h @@ -332,7 +332,7 @@ class PSGPUTrainer : public TrainerBase { #endif #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \ - defined(WITH_ASCEND_CL) + defined(PADDLE_WITH_ASCEND_CL) class PipelineTrainer : public TrainerBase { public: PipelineTrainer() {} diff --git a/paddle/fluid/framework/trainer_factory.cc b/paddle/fluid/framework/trainer_factory.cc index 6b9dbece897..15073b6f78c 100644 --- a/paddle/fluid/framework/trainer_factory.cc +++ b/paddle/fluid/framework/trainer_factory.cc @@ -76,7 +76,8 @@ REGISTER_TRAINER_CLASS(HeterBoxTrainer); (defined PADDLE_WITH_PSLIB) REGISTER_TRAINER_CLASS(PSGPUTrainer); #endif -#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) +#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \ + defined(PADDLE_WITH_ASCEND_CL) REGISTER_TRAINER_CLASS(PipelineTrainer); #endif } // namespace framework diff --git a/paddle/fluid/operators/collective/c_allreduce_op.h b/paddle/fluid/operators/collective/c_allreduce_op.h index 0eaa377869e..3a74f551e7a 100644 --- a/paddle/fluid/operators/collective/c_allreduce_op.h +++ b/paddle/fluid/operators/collective/c_allreduce_op.h @@ -131,6 +131,7 @@ class CAllReduceOpASCENDKernel : public framework::OpKernel { int64_t numel = in->numel(); void* sendbuff = reinterpret_cast(const_cast(in->data())); + out->mutable_data(in->dims(), ctx.GetPlace()); void* recvbuff = reinterpret_cast(out->data()); int ring_id = ctx.Attr("ring_id"); diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 0e9d756848a..2eac5adcf22 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -6124,9 +6124,9 @@ def device_guard(device=None): device, index = device.split(':') if device == 'cpu': raise ValueError("Should not set device id for cpu.") - if device not in ['cpu', 'gpu', '', None]: + if device not in ['cpu', 'gpu', 'npu', '', None]: raise ValueError( - "The Attr(device) should be 'cpu' or 'gpu', and it can also be empty string or None " + "The Attr(device) should be 'cpu' 'npu' or 'gpu', and it can also be empty string or None " "when there is no need to specify device. But received %s" % device) if index: device = ":".join([device, index]) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 4ae90b3c72c..41b2843ea33 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -4116,7 +4116,7 @@ class PipelineOptimizer(object): device = op.attr(self._op_device_key) \ if op.has_attr(self._op_device_key) else None if device: - assert device[0:3] == 'gpu', "Now, only gpu devices are " \ + assert device[0:3] == 'gpu' or dev_type == 'npu', "Now, only gpu and npu devices are " \ "supported in pipeline parallemism." return device -- GitLab From efdb0a7d41a0d35c5274d2bc49d47cd18dc98971 Mon Sep 17 00:00:00 2001 From: littletomatodonkey <2120160898@bit.edu.cn> Date: Thu, 6 May 2021 11:44:18 +0800 Subject: [PATCH 083/720] fix l1 decay for inplace (#32717) --- python/paddle/fluid/regularizer.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index db08955c455..64ce283a63c 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -326,19 +326,21 @@ class L1DecayRegularizer(WeightDecayRegularizer): assert isinstance(block, framework.Block) if framework.in_dygraph_mode(): + sign = block.create_var(dtype=param.dtype, shape=param.shape) decay = block.create_var(dtype=param.dtype, shape=param.shape) else: + sign = block.create_var( + dtype=param.dtype, shape=param.shape, lod_level=param.lod_level) decay = block.create_var( dtype=param.dtype, shape=param.shape, lod_level=param.lod_level) # Append sign op - block.append_op( - type='sign', inputs={"X": param}, outputs={"Out": decay}) + block.append_op(type='sign', inputs={"X": param}, outputs={"Out": sign}) # Append scale op to the output of sign op block.append_op( type='scale', - inputs={"X": decay}, + inputs={"X": sign}, outputs={"Out": decay}, attrs={"scale": self._regularization_coeff}) -- GitLab From 313926277eaa028f977c4a8b7ab34c057cbc0777 Mon Sep 17 00:00:00 2001 From: ronnywang <524019753@qq.com> Date: Thu, 6 May 2021 14:09:11 +0800 Subject: [PATCH 084/720] [ROCM] bugfix for unittest (#32392) * fix test_unpool_op * fix test_inplace_addto_strategy * fix test_conv2d_fusion_op * fix test_imperative_lod_tensor_to_selected_rows, test_imperative_selected_rows_to_lod_tensor * fix test_dot_op * fix test_correlation_op * fix tracer * fix test_memcpy_op --- cmake/operators.cmake | 1 - paddle/fluid/operators/conv_cudnn_op.cu | 49 ++++++++--- paddle/fluid/operators/conv_miopen_helper.h | 70 ++-------------- paddle/fluid/operators/correlation_op.cu | 21 +++-- paddle/fluid/operators/fused/CMakeLists.txt | 3 +- .../fluid/operators/fused/conv_fusion_op.cu | 83 ++++++++++++++++++- paddle/fluid/operators/math/unpooling.cu | 8 ++ paddle/fluid/operators/memcpy_op.cc | 2 +- paddle/fluid/platform/dynload/miopen.h | 1 + .../fluid/tests/unittests/test_dot_op.py | 36 +++++++- ..._imperative_lod_tensor_to_selected_rows.py | 5 +- ..._imperative_selected_rows_to_lod_tensor.py | 5 +- 12 files changed, 193 insertions(+), 91 deletions(-) diff --git a/cmake/operators.cmake b/cmake/operators.cmake index 16288e1fb45..75b1100caa9 100644 --- a/cmake/operators.cmake +++ b/cmake/operators.cmake @@ -180,7 +180,6 @@ function(op_library TARGET) list(REMOVE_ITEM miopen_cu_cc_srcs "affine_grid_cudnn_op.cu.cc") list(REMOVE_ITEM miopen_cu_cc_srcs "grid_sampler_cudnn_op.cu.cc") list(REMOVE_ITEM hip_srcs "cholesky_op.cu") - list(REMOVE_ITEM hip_srcs "correlation_op.cu") list(REMOVE_ITEM hip_srcs "multinomial_op.cu") list(REMOVE_ITEM hip_srcs "decode_jpeg_op.cu") hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs} ${mkldnn_cc_srcs} ${hip_srcs} DEPS ${op_library_DEPS} diff --git a/paddle/fluid/operators/conv_cudnn_op.cu b/paddle/fluid/operators/conv_cudnn_op.cu index ab535e341f7..7fdb1ccfe96 100644 --- a/paddle/fluid/operators/conv_cudnn_op.cu +++ b/paddle/fluid/operators/conv_cudnn_op.cu @@ -699,24 +699,51 @@ class CUDNNConvGradOpKernel : public framework::OpKernel { // ------------------- cudnn conv backward data --------------------- ScalingParamType alpha = 1.0f; +#ifdef PADDLE_WITH_HIP + // MIOPEN ONLY support beta to be 0.0f + ScalingParamType beta = 0.0f; +#else ScalingParamType beta = ctx.Attr("use_addto") ? 1.0f : 0.0f; +#endif VLOG(4) << "Conv_grad: use_addto = " << ctx.Attr("use_addto"); if (input_grad) { // When beta is 0, it is unnecessary to reset input_grad. // When beta is 1, the output cannot be reset since addt strategy used. #ifdef PADDLE_WITH_HIP - workspace_handle.RunFunc( - [&](void* cudnn_workspace_ptr) { - PADDLE_ENFORCE_CUDA_SUCCESS( - platform::dynload::miopenConvolutionBackwardData( - handle, &alpha, args1.odesc.desc(), output_grad_data, - args1.wdesc.desc(), filter_data, args1.cdesc.desc(), - data_algo, &beta, args1.idesc.desc(), - transformed_input_grad_data, cudnn_workspace_ptr, - workspace_size)); - }, - workspace_size); + if (ctx.Attr("use_addto")) { + Tensor temp_tensor(transformed_input_grad.type()); + temp_tensor.Resize(transformed_input_grad.dims()); + T* temp_tensor_data = temp_tensor.mutable_data(ctx.GetPlace()); + workspace_handle.RunFunc( + [&](void* cudnn_workspace_ptr) { + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::miopenConvolutionBackwardData( + handle, &alpha, args1.odesc.desc(), output_grad_data, + args1.wdesc.desc(), filter_data, args1.cdesc.desc(), + data_algo, &beta, args1.idesc.desc(), temp_tensor_data, + cudnn_workspace_ptr, workspace_size)); + }, + workspace_size); + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::miopenOpTensor( + handle, miopenTensorOpAdd, &alpha, args1.idesc.desc(), + transformed_input_grad_data, &alpha, args1.idesc.desc(), + temp_tensor_data, &beta, args1.idesc.desc(), + transformed_input_grad_data)); + } else { + workspace_handle.RunFunc( + [&](void* cudnn_workspace_ptr) { + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::miopenConvolutionBackwardData( + handle, &alpha, args1.odesc.desc(), output_grad_data, + args1.wdesc.desc(), filter_data, args1.cdesc.desc(), + data_algo, &beta, args1.idesc.desc(), + transformed_input_grad_data, cudnn_workspace_ptr, + workspace_size)); + }, + workspace_size); + } + #else for (int i = 0; i < groups; i++) { workspace_handle.RunFunc( diff --git a/paddle/fluid/operators/conv_miopen_helper.h b/paddle/fluid/operators/conv_miopen_helper.h index 3ab27e1ec4f..befe09c8e6b 100644 --- a/paddle/fluid/operators/conv_miopen_helper.h +++ b/paddle/fluid/operators/conv_miopen_helper.h @@ -146,28 +146,8 @@ struct SearchAlgorithm { cudnn_workspace_ptr, workspace_size, false)); }; - if (!exhaustive_search && !deterministic) { - workspace_handle.RunFuncSync(cudnn_find_func, workspace_size); - algo = find_result.fwd_algo; - } else { - auto& temp = ctx.cuda_device_context(); - AlgorithmsCache& algo_cache = - *(framework::ConvSearchCache::Instance().GetForward()); - - auto x_dims = framework::vectorize(args.x->dims()); - auto w_dims = framework::vectorize(args.w->dims()); - - VLOG(10) << "miopenConvolutionFwdAlgoPerf_t:" - << ", x_dims:" << x_dims << ", w_dims:" << w_dims << ", args.s" - << args.s << ", args.p" << args.p << ", args.d" << args.d; - - algo = algo_cache.GetAlgorithm( - x_dims, w_dims, args.s, args.p, args.d, 0, - static_cast(args.cudnn_dtype), [&]() { - workspace_handle.RunFuncSync(cudnn_find_func, workspace_size); - return find_result.fwd_algo; - }); - } + workspace_handle.RunFuncSync(cudnn_find_func, workspace_size); + algo = find_result.fwd_algo; VLOG(3) << "choose algo " << algo; return algo; } @@ -208,27 +188,8 @@ struct SearchAlgorithm { cudnn_workspace_ptr, workspace_size, false)); }; - if (!exhaustive_search && !deterministic) { - workspace_handle.RunFuncSync(cudnn_find_func, workspace_size); - algo = find_result.bwd_data_algo; - } else { - AlgorithmsCache& algo_cache = - *(framework::ConvSearchCache::Instance().GetBackwardData()); - - auto x_dims = framework::vectorize(args.x->dims()); - auto w_dims = framework::vectorize(args.w->dims()); - - VLOG(10) << "miopenConvolutionFwdAlgoPerf_t" - << ", x_dims:" << x_dims << ", w_dims:" << w_dims << ", args.s" - << args.s << ", args.p" << args.p << ", args.d" << args.d; - - algo = algo_cache.GetAlgorithm( - x_dims, w_dims, args.s, args.p, args.d, 0, - static_cast(args.cudnn_dtype), [&]() { - workspace_handle.RunFuncSync(cudnn_find_func, workspace_size); - return find_result.bwd_data_algo; - }); - } + workspace_handle.RunFuncSync(cudnn_find_func, workspace_size); + algo = find_result.bwd_data_algo; VLOG(3) << "choose algo " << algo; return algo; } @@ -269,27 +230,8 @@ struct SearchAlgorithm { cudnn_workspace_ptr, workspace_size, false)); }; - if (!exhaustive_search && !deterministic) { - workspace_handle.RunFuncSync(cudnn_find_func, workspace_size); - algo = find_result.bwd_weights_algo; - } else { - AlgorithmsCache& algo_cache = - *(framework::ConvSearchCache::Instance().GetBackwardFilter()); - - auto x_dims = framework::vectorize(args.x->dims()); - auto w_dims = framework::vectorize(args.w->dims()); - - VLOG(10) << "miopenConvolutionFwdAlgoPerf_t:" - << ", x_dims:" << x_dims << ", w_dims:" << w_dims << ", args.s" - << args.s << ", args.p" << args.p << ", args.d" << args.d; - - algo = algo_cache.GetAlgorithm( - x_dims, w_dims, args.s, args.p, args.d, 0, - static_cast(args.cudnn_dtype), [&]() { - workspace_handle.RunFuncSync(cudnn_find_func, workspace_size); - return find_result.bwd_weights_algo; - }); - } + workspace_handle.RunFuncSync(cudnn_find_func, workspace_size); + algo = find_result.bwd_weights_algo; VLOG(3) << "choose algo " << algo; return algo; } diff --git a/paddle/fluid/operators/correlation_op.cu b/paddle/fluid/operators/correlation_op.cu index a51fce81324..9b08f875bb6 100644 --- a/paddle/fluid/operators/correlation_op.cu +++ b/paddle/fluid/operators/correlation_op.cu @@ -12,17 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifndef PADDLE_WITH_HIP -// HIP not supported yet - #include #include #include "paddle/fluid/framework/op_registry.h" +#ifdef __HIPCC__ +#define __syncwarp() __all(1) +#endif + namespace paddle { namespace operators { +#ifdef __HIPCC__ +#define THREADS_PER_BLOCK 64 +#else #define THREADS_PER_BLOCK 32 +#endif #define FULL_MASK 0xffffffff using framework::Tensor; @@ -30,14 +35,22 @@ using framework::Tensor; template __forceinline__ __device__ T warpReduceSum(T val) { for (int offset = 16; offset > 0; offset /= 2) { +#ifdef __HIPCC__ + val += __shfl_down(val, offset); +#else val += __shfl_down_sync(FULL_MASK, val, offset); +#endif } return val; } template __forceinline__ __device__ T blockReduceSum(T val) { +#ifdef __HIPCC__ + static __shared__ T shared[64]; +#else static __shared__ T shared[32]; +#endif int lane = threadIdx.x % warpSize; int wid = threadIdx.x / warpSize; @@ -483,5 +496,3 @@ REGISTER_OP_CUDA_KERNEL(correlation, ops::CorrelationCUDAKernel, ops::CorrelationCUDAKernel); REGISTER_OP_CUDA_KERNEL(correlation_grad, ops::CorrelationCUDAGradKernel, ops::CorrelationCUDAGradKernel); - -#endif // not PADDLE_WITH_HIP diff --git a/paddle/fluid/operators/fused/CMakeLists.txt b/paddle/fluid/operators/fused/CMakeLists.txt index 287827ced51..104298e0373 100644 --- a/paddle/fluid/operators/fused/CMakeLists.txt +++ b/paddle/fluid/operators/fused/CMakeLists.txt @@ -32,8 +32,7 @@ if (WITH_GPU OR WITH_ROCM) file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(fused_batch_norm_act);\n") endif() # conv_fusion_op needs cudnn 7 above - # HIP not support cudnnConvolutionBiasActivationForward - if ((NOT WITH_ROCM) AND (NOT ${CUDNN_VERSION} VERSION_LESS 7100)) + if (NOT ${CUDNN_VERSION} VERSION_LESS 7100) op_library(conv_fusion_op) file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(conv2d_fusion);\n") endif() diff --git a/paddle/fluid/operators/fused/conv_fusion_op.cu b/paddle/fluid/operators/fused/conv_fusion_op.cu index c9ba7a61e09..f5ee7f55991 100644 --- a/paddle/fluid/operators/fused/conv_fusion_op.cu +++ b/paddle/fluid/operators/fused/conv_fusion_op.cu @@ -18,14 +18,18 @@ limitations under the License. */ #include "paddle/fluid/operators/conv_cudnn_op_cache.h" #include "paddle/fluid/operators/conv_op.h" #include "paddle/fluid/operators/math/padding.h" +#ifdef PADDLE_WITH_HIP +#include "paddle/fluid/platform/miopen_helper.h" +#else #include "paddle/fluid/platform/cudnn_helper.h" +#endif DECLARE_int64(cudnn_exhaustive_search_times); namespace paddle { namespace operators { -#if CUDNN_VERSION >= 7100 +#if PADDLE_WITH_HIP || CUDNN_VERSION >= 7100 using Tensor = framework::Tensor; using ScopedTensorDescriptor = platform::ScopedTensorDescriptor; using ScopedFilterDescriptor = platform::ScopedFilterDescriptor; @@ -162,7 +166,78 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { if (input->dims().size() == 5) { layout = DataLayout::kNCDHW; } +#ifdef PADDLE_WITH_HIP + miopenConvolutionDescriptor_t cudnn_conv_desc = + conv_desc.descriptor(padding_common, strides, dilations); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::miopenSetConvolutionGroupCount(cudnn_conv_desc, + groups)); + // Now only support NCHW + std::vector bias_dim = { + 1, static_cast(transformed_output.dims()[1]), 1, 1}; + miopenTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( + layout, framework::vectorize(transformed_input.dims())); + miopenTensorDescriptor_t cudnn_output_desc = output_desc.descriptor( + layout, framework::vectorize(transformed_output.dims())); + miopenTensorDescriptor_t cudnn_filter_desc = filter_desc.descriptor( + layout, framework::vectorize(filter->dims())); + miopenTensorDescriptor_t cudnn_bias_desc = + bias_desc.descriptor(layout, bias_dim); + miopenActivationDescriptor_t cudnn_act_desc = + act_desc.descriptor(activation); + miopenConvFwdAlgorithm_t algo; + auto handle = dev_ctx.cudnn_handle(); + auto workspace_handle = dev_ctx.cudnn_workspace_handle(); + + auto x_dims = framework::vectorize(transformed_input.dims()); + auto f_dims = framework::vectorize(filter->dims()); + + size_t workspace_size = 0; + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::miopenConvolutionForwardGetWorkSpaceSize( + handle, cudnn_filter_desc, cudnn_input_desc, cudnn_conv_desc, + cudnn_output_desc, &workspace_size)); + int find_count; + miopenConvAlgoPerf_t find_result; + auto cudnn_find_func = [&](void* cudnn_workspace_ptr) { + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::miopenFindConvolutionForwardAlgorithm( + handle, cudnn_input_desc, input_data, cudnn_filter_desc, + filter_data, cudnn_conv_desc, cudnn_output_desc, output_data, + kNUM_CUDNN_FWD_ALGS, &find_count, &find_result, + cudnn_workspace_ptr, workspace_size, false)); + }; + workspace_handle.RunFuncSync(cudnn_find_func, workspace_size); + algo = find_result.fwd_algo; + VLOG(3) << "cuDNN forward algo " << algo; + + { + ScalingParamType alpha = 1.0f, beta = 0.0f; + auto cudnn_func = [&](void* cudnn_workspace) { + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::miopenConvolutionForward( + handle, &alpha, cudnn_input_desc, input_data, cudnn_filter_desc, + filter_data, cudnn_conv_desc, algo, &beta, cudnn_output_desc, + output_data, cudnn_workspace, workspace_size)); + }; + workspace_handle.RunFunc(cudnn_func, workspace_size); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::miopenConvolutionForwardBias( + handle, &alpha, cudnn_bias_desc, bias_data, &beta, + cudnn_output_desc, output_data)); + if (activation != "identity") { + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::miopenActivationForward( + handle, cudnn_act_desc, &alpha, cudnn_output_desc, output_data, + &beta, cudnn_output_desc, output_data)); + } + if (residual) { + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::miopenOpTensor( + handle, miopenTensorOpAdd, &alpha, cudnn_output_desc, output_data, + &alpha, cudnn_output_desc, residual_data, &beta, cudnn_output_desc, + output_data)); + } + } +#else // PADDLE_WITH_HIP cudnnConvolutionDescriptor_t cudnn_conv_desc = conv_desc.descriptor(padding_common, strides, dilations); PADDLE_ENFORCE_CUDA_SUCCESS( @@ -327,6 +402,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { }; workspace_handle.RunFunc(cudnn_func, workspace_size_in_bytes); } +#endif std::vector channels = ctx.Attr>("split_channels"); if (channels.size()) { auto outs = ctx.MultiOutput("Outputs"); @@ -358,8 +434,11 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { } // namespace operators } // namespace paddle -#if CUDNN_VERSION >= 7100 namespace ops = paddle::operators; +#if CUDNN_VERSION >= 7100 REGISTER_OP_CUDA_KERNEL(conv2d_fusion, ops::CUDNNConvFusionOpKernel, ops::CUDNNConvFusionOpKernel); #endif +#ifdef PADDLE_WITH_HIP +REGISTER_OP_CUDA_KERNEL(conv2d_fusion, ops::CUDNNConvFusionOpKernel); +#endif diff --git a/paddle/fluid/operators/math/unpooling.cu b/paddle/fluid/operators/math/unpooling.cu index d78e3385efb..a73f76f53be 100644 --- a/paddle/fluid/operators/math/unpooling.cu +++ b/paddle/fluid/operators/math/unpooling.cu @@ -87,7 +87,11 @@ class Unpool2dMaxFunctor { const T* input_data = input.data(); const int* indices_data = indices.data(); T* output_data = output->mutable_data(context.GetPlace()); +#ifdef __HIPCC__ + int threads = 256; +#else int threads = 1024; +#endif int grid = (input.numel() + threads - 1) / threads; KernelUnpool2dMax<<>>( input.numel(), input_data, indices_data, input_height, input_width, @@ -117,7 +121,11 @@ class Unpool2dMaxGradFunctor { const T* output_data = output.data(); const T* output_grad_data = output_grad.data(); T* input_grad_data = input_grad->mutable_data(context.GetPlace()); +#ifdef __HIPCC__ + int threads = 256; +#else int threads = 1024; +#endif int grid = (input.numel() + threads - 1) / threads; KernelUnpool2dMaxGrad<<>>( input.numel(), input_data, indices_data, input_height, input_width, diff --git a/paddle/fluid/operators/memcpy_op.cc b/paddle/fluid/operators/memcpy_op.cc index 4e10498efa1..ecd2d48dcbd 100644 --- a/paddle/fluid/operators/memcpy_op.cc +++ b/paddle/fluid/operators/memcpy_op.cc @@ -141,7 +141,7 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy, float, ops::MemcpyKernel, double, ops::MemcpyKernel, plat::float16, ops::MemcpyKernel); -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_ROCM) REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy, float, ops::MemcpyKernel, double, ops::MemcpyKernel, int, ops::MemcpyKernel, int64_t, ops::MemcpyKernel, bool, diff --git a/paddle/fluid/platform/dynload/miopen.h b/paddle/fluid/platform/dynload/miopen.h index 5ff4bff4bff..77ff3f3ccbb 100644 --- a/paddle/fluid/platform/dynload/miopen.h +++ b/paddle/fluid/platform/dynload/miopen.h @@ -110,6 +110,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name); __macro(miopenActivationBackward); \ __macro(miopenConvolutionBackwardWeights); \ __macro(miopenConvolutionForward); \ + __macro(miopenConvolutionForwardBias); \ __macro(miopenConvolutionBackwardBias); \ __macro(miopenConvolutionForwardGetWorkSpaceSize); \ __macro(miopenConvolutionBackwardDataGetWorkSpaceSize); \ diff --git a/python/paddle/fluid/tests/unittests/test_dot_op.py b/python/paddle/fluid/tests/unittests/test_dot_op.py index f65301f2d86..a92104a5a6f 100644 --- a/python/paddle/fluid/tests/unittests/test_dot_op.py +++ b/python/paddle/fluid/tests/unittests/test_dot_op.py @@ -15,6 +15,7 @@ from __future__ import print_function import paddle import paddle.fluid as fluid +import paddle.fluid.core as core import unittest import numpy as np from op_test import OpTest, skip_check_grad_ci @@ -39,13 +40,33 @@ class DotOp(OpTest): self.check_output() def test_check_grad_normal(self): - self.check_grad(['X', 'Y'], 'Out') + if core.is_compiled_with_rocm(): + self.check_grad( + ['X', 'Y'], + 'Out', + user_defined_grads=[self.inputs['Y'], self.inputs['X']]) + else: + self.check_grad(['X', 'Y'], 'Out') def test_check_grad_ingore_x(self): - self.check_grad(['Y'], 'Out', no_grad_set=set("X")) + if core.is_compiled_with_rocm(): + self.check_grad( + ['Y'], + 'Out', + no_grad_set=set("X"), + user_defined_grads=[self.inputs['X']]) + else: + self.check_grad(['Y'], 'Out', no_grad_set=set("X")) def test_check_grad_ingore_y(self): - self.check_grad(['X'], 'Out', no_grad_set=set('Y')) + if core.is_compiled_with_rocm(): + self.check_grad( + ['X'], + 'Out', + no_grad_set=set('Y'), + user_defined_grads=[self.inputs['Y']]) + else: + self.check_grad(['X'], 'Out', no_grad_set=set('Y')) def init_input_output(self): self.x = np.random.uniform(0.1, 1, [121]).astype(self.dtype) @@ -64,6 +85,15 @@ class DotOpBatch(DotOp): [11, 12]) self.out = np.sum(self.x * self.y, axis=1).reshape([11, 1]) + def test_check_grad_normal(self): + self.check_grad(['X', 'Y'], 'Out') + + def test_check_grad_ingore_x(self): + self.check_grad(['Y'], 'Out', no_grad_set=set("X")) + + def test_check_grad_ingore_y(self): + self.check_grad(['X'], 'Out', no_grad_set=set('Y')) + class TestDotOpError(unittest.TestCase): def test_errors(self): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py index e7af249cf8b..64f1715fc97 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py @@ -76,7 +76,10 @@ class SimpleNet(fluid.Layer): class TestDygraphSimpleNet(unittest.TestCase): def test_simple_net(self): for is_sparse in [True, False]: - for dtype in ["float32", "float64"]: + dtype_list = ["float32"] + if not core.is_compiled_with_rocm(): + dtype_list.append("float64") + for dtype in dtype_list: self.simple_net_float32(is_sparse, dtype) def simple_net_float32(self, is_sparse, dtype): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py index 2f2a3e5de5e..8b2e61f8d2a 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py @@ -82,7 +82,10 @@ class SimpleNet(fluid.Layer): class TestDygraphSimpleNet(unittest.TestCase): def test_simple_net(self): for is_sparse in [True, False]: - for dtype in ["float32", "float64"]: + dtype_list = ["float32"] + if not core.is_compiled_with_rocm(): + dtype_list.append("float64") + for dtype in dtype_list: self.simple_net_float(is_sparse, dtype) def simple_net_float(self, is_sparse, dtype): -- GitLab From 2fe45806e8ab8e6a6452bd2a2b1834875da94404 Mon Sep 17 00:00:00 2001 From: zhulei <563755780@qq.com> Date: Thu, 6 May 2021 15:04:10 +0800 Subject: [PATCH 085/720] [Rocm] fix expand as (#32704) * [Rocm] fix test_expand_as_op * [Rocm] fix test_expand_as_op * [Rocm] fix test_expand_as_op * [Rocm] fix test_expand_as_op * [Rocm] fix test_expand_as_op * [Rocm] fix test_expand_as_op --- cmake/external/eigen.cmake | 4 +- patches/eigen/TensorReductionGpu.h | 996 +++++++++++++++++++++++++++++ 2 files changed, 999 insertions(+), 1 deletion(-) create mode 100644 patches/eigen/TensorReductionGpu.h diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake index 4619f9f7b7e..aa471002eac 100644 --- a/cmake/external/eigen.cmake +++ b/cmake/external/eigen.cmake @@ -33,7 +33,9 @@ elseif(LINUX) # which will cause compiler error of using __host__ funciont in __host__ __device__ file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/eigen/Meta.h native_src) file(TO_NATIVE_PATH ${EIGEN_SOURCE_DIR}/Eigen/src/Core/util/Meta.h native_dst) - set(EIGEN_PATCH_COMMAND cp ${native_src} ${native_dst}) + file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/eigen/TensorReductionGpu.h native_src1) + file(TO_NATIVE_PATH ${EIGEN_SOURCE_DIR}/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h native_dst1) + set(EIGEN_PATCH_COMMAND cp ${native_src} ${native_dst} && cp ${native_src1} ${native_dst1}) endif() endif() diff --git a/patches/eigen/TensorReductionGpu.h b/patches/eigen/TensorReductionGpu.h new file mode 100644 index 00000000000..696078e5488 --- /dev/null +++ b/patches/eigen/TensorReductionGpu.h @@ -0,0 +1,996 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +// clang-format off +#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H +#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H + +namespace Eigen { +namespace internal { + +#if defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC) +// Full reducers for GPU, don't vectorize for now + +// Reducer function that enables multiple gpu thread to safely accumulate at the same +// output address. It basically reads the current value of the output variable, and +// attempts to update it with the new value. If in the meantime another gpu thread +// updated the content of the output address it will try again. +template +__device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) { +#if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300) + if (sizeof(T) == 4) + { + unsigned int oldval = *reinterpret_cast(output); + unsigned int newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + unsigned int readback; + while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) { + oldval = readback; + newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + } + } + else if (sizeof(T) == 8) { + unsigned long long oldval = *reinterpret_cast(output); + unsigned long long newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + unsigned long long readback; + while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) { + oldval = readback; + newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + } + } + else { + gpu_assert(0 && "Wordsize not supported"); + } +#else // EIGEN_CUDA_ARCH >= 300 + gpu_assert(0 && "Shouldn't be called on unsupported device"); +#endif // EIGEN_CUDA_ARCH >= 300 +} + +// We extend atomicExch to support extra data types +template +__device__ inline Type atomicExchCustom(Type* address, Type val) { + return atomicExch(address, val); +} + +template <> +__device__ inline double atomicExchCustom(double* address, double val) { + unsigned long long int* address_as_ull = reinterpret_cast(address); + return __longlong_as_double(atomicExch(address_as_ull, __double_as_longlong(val))); +} + +#ifdef EIGEN_HAS_GPU_FP16 +template