diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index b3baedc401504f2411e4a660fc9a3b1c5ea53924..ba5e51c11dd65b827d2c3ee5dcaceb6c53667ec3 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -331,22 +331,56 @@ class NormalInitializer(Initializer): ["uint16", "float16", "float32", "float64"], "guassian_random") + # to be compatible of fp16 initalizers + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: + out_dtype = VarDesc.VarType.FP32 + out_var = block.create_var( + name=unique_name.generate(".".join( + ['normal_init', var.name, 'tmp'])), + shape=var.shape, + dtype=out_dtype, + type=VarDesc.VarType.LOD_TENSOR, + persistable=False) + else: + out_dtype = var.dtype + out_var = var + if self._seed == 0: self._seed = block.program.random_seed - if framework._non_static_mode(): + if in_dygraph_mode(): + place = _current_expected_place() + out_var = _C_ops.final_state_gaussian_random( + var.shape, self._mean, self._std_dev, self._seed, out_dtype, + place) + out_var._share_underline_tensor_to(var) + + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: + var_tmp = _C_ops.final_state_cast(out_var, var.dtype) + var_tmp._share_underline_tensor_to(var) + else: + out_var._share_underline_tensor_to(var) + return None + + if _in_legacy_dygraph(): out_var = _C_ops.gaussian_random( - 'shape', var.shape, 'dtype', var.dtype, 'mean', self._mean, + 'shape', var.shape, 'dtype', out_dtype, 'mean', self._mean, 'std', self._std_dev, 'seed', self._seed, 'use_mkldnn', False) - out_var._share_underline_tensor_to(var) + + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: + var_tmp = _C_ops.cast(out_var, 'in_dtype', out_var.dtype, + 'out_dtype', var.dtype) + var_tmp._share_underline_tensor_to(var) + else: + out_var._share_underline_tensor_to(var) return None else: op = block.append_op( type="gaussian_random", - outputs={"Out": var}, + outputs={"Out": out_var}, attrs={ "shape": var.shape, - "dtype": var.dtype, + "dtype": out_dtype, "mean": self._mean, "std": self._std_dev, "seed": self._seed, @@ -354,6 +388,13 @@ class NormalInitializer(Initializer): }, stop_gradient=True) + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: + block.append_op( + type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={"in_dtype": out_var.dtype, + "out_dtype": var.dtype}) var.op = op return op @@ -567,9 +608,15 @@ class XavierInitializer(Initializer): self._seed, 'dtype', out_dtype) else: std = math.sqrt(2.0 / float(fan_in + fan_out)) - out_var = _C_ops.gaussian_random( - 'shape', out_var.shape, 'dtype', out_dtype, 'mean', 0.0, - 'std', std, 'seed', self._seed) + + if in_dygraph_mode(): + place = _current_expected_place() + out_var = _C_ops.final_state_gaussian_random( + out_var.shape, 0.0, std, self._seed, out_dtype, place) + else: + out_var = _C_ops.gaussian_random( + 'shape', out_var.shape, 'dtype', out_dtype, 'mean', 0.0, + 'std', std, 'seed', self._seed) if var.dtype == VarDesc.VarType.FP16 or ( var.dtype == VarDesc.VarType.BF16 and not self._uniform): @@ -720,9 +767,15 @@ class MSRAInitializer(Initializer): int(out_dtype)) else: std = math.sqrt(2.0 / float(fan_in)) - out_var = _C_ops.gaussian_random( - 'shape', out_var.shape, 'dtype', - int(out_dtype), 'mean', 0.0, 'std', std, 'seed', self._seed) + if in_dygraph_mode(): + place = _current_expected_place() + out_var = _C_ops.final_state_gaussian_random( + out_var.shape, 0.0, std, self._seed, out_dtype, place) + else: + out_var = _C_ops.gaussian_random( + 'shape', out_var.shape, 'dtype', + int(out_dtype), 'mean', 0.0, 'std', std, 'seed', + self._seed) if var.dtype == VarDesc.VarType.FP16 or ( var.dtype == VarDesc.VarType.BF16 and not self._uniform): diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index a405bf829fd489c2e3bed54c1ea514adc3c85373..47f40a2e6a5af1567765357efc893d841637574b 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -28,6 +28,7 @@ from ..layer_helper import LayerHelper from paddle.fluid.framework import _in_legacy_dygraph from ..initializer import Normal, Constant, NumpyArrayInitializer from ..framework import Variable, OpProtoHolder, _non_static_mode, dygraph_only, _dygraph_tracer, default_main_program, _varbase_creator, static_only, _global_flags, _in_legacy_dygraph, in_dygraph_mode +from ..framework import _current_expected_place from .. import dygraph_utils from ..param_attr import ParamAttr from .layer_function_generator import autodoc, templatedoc, _generate_doc_string_ @@ -10970,7 +10971,15 @@ def gaussian_random(shape, if not isinstance(dtype, core.VarDesc.VarType): dtype = convert_np_dtype_to_dtype_(dtype) - if _non_static_mode(): + if in_dygraph_mode(): + shape = utils.convert_shape_to_list(shape) + place = _current_expected_place() + return _C_ops.final_state_gaussian_random(shape, + float(mean), + float(std), seed, dtype, + place) + + if _in_legacy_dygraph(): shape = utils.convert_shape_to_list(shape) return _C_ops.gaussian_random('shape', shape, 'mean', float(mean), 'std', diff --git a/python/paddle/fluid/tests/unittests/test_egr_python_api.py b/python/paddle/fluid/tests/unittests/test_egr_python_api.py index e7abed09646792c7b0592d993e146ef8cc84cefe..ae29c6c262a8467cf80523da8a1b53f334f053b1 100644 --- a/python/paddle/fluid/tests/unittests/test_egr_python_api.py +++ b/python/paddle/fluid/tests/unittests/test_egr_python_api.py @@ -251,9 +251,6 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertTrue(egr_tensor12.place._equals(paddle.fluid.CPUPlace())) self.assertTrue(np.array_equal(egr_tensor12.numpy(), x)) - egr_tensor13 = paddle.randn([2, 2]) - self.assertTrue("eager_tmp" in egr_tensor13.name) - with self.assertRaisesRegexp( ValueError, "The shape of Parameter should not be None"): eager_param = EagerParamBase(shape=None, dtype="float32") diff --git a/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py b/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py index 4fca8b9f2a11827609931e647a9543b71560f06d..4140ce44648fa60bc4f8c67ff10d72c07d9297ce 100644 --- a/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py +++ b/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py @@ -23,12 +23,14 @@ import paddle.fluid.core as core from paddle.fluid.op import Operator from paddle.fluid.executor import Executor from paddle.fluid.tests.unittests.op_test import OpTest, convert_uint16_to_float +from paddle.fluid.framework import _test_eager_guard import paddle class TestGaussianRandomOp(OpTest): def setUp(self): self.op_type = "gaussian_random" + self.python_api = paddle.normal self.set_attrs() self.inputs = {} self.use_mkldnn = False @@ -50,6 +52,10 @@ class TestGaussianRandomOp(OpTest): def test_check_output(self): self.check_output_customized(self.verify_output) + def test_eager(self): + with _test_eager_guard(): + self.test_check_output() + def verify_output(self, outs): self.assertEqual(outs[0].shape, (123, 92)) hist, _ = np.histogram(outs[0], range=(-3, 5)) @@ -70,6 +76,7 @@ class TestGaussianRandomOp(OpTest): class TestGaussianRandomBF16Op(OpTest): def setUp(self): self.op_type = "gaussian_random" + self.python_api = paddle.normal self.set_attrs() self.inputs = {} self.use_mkldnn = False @@ -93,6 +100,10 @@ class TestGaussianRandomBF16Op(OpTest): self.check_output_with_place_customized( self.verify_output, place=core.CUDAPlace(0)) + def test_eager(self): + with _test_eager_guard(): + self.test_check_output() + def verify_output(self, outs): outs = convert_uint16_to_float(outs) self.assertEqual(outs[0].shape, (123, 92)) diff --git a/python/paddle/fluid/tests/unittests/test_initializer.py b/python/paddle/fluid/tests/unittests/test_initializer.py index 91c2800836c9dc9b3ef45473e975e4153e5d442c..3a9387082e6802c6d11e1dc1b0ecbac8a91e0884 100644 --- a/python/paddle/fluid/tests/unittests/test_initializer.py +++ b/python/paddle/fluid/tests/unittests/test_initializer.py @@ -244,7 +244,7 @@ class TestNormalInitializer(unittest.TestCase): lod_level=0, name="param", initializer=initializer.NormalInitializer(2.3, 1.9, 123)) - num_ops = 1 + num_ops = 2 if (dtype == "float16" or dtype == "uint16") else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'gaussian_random') @@ -685,6 +685,68 @@ class TestUniformInitializerDygraph(unittest.TestCase): self.func_uniform_initializer() +class TestXavierInitializerDygraph(unittest.TestCase): + def func_xvarier_initializer(self, dtype="float32"): + """ + In dygraph mode, we can use initializer directly to initialize a tensor. + """ + paddle.disable_static() + + tensor = paddle.zeros([1024, 1024, 16]) + tensor.stop_gradient = False + + xavier_ = paddle.fluid.initializer.XavierInitializer( + uniform=False, fan_in=3, fan_out=5) + xavier_(tensor) + + hist, _ = output_hist(tensor.numpy()) + + hist2, _ = output_hist( + np.random.normal(0, np.sqrt(2.0 / (3 + 5)), [1024, 1024, 16])) + + self.assertTrue( + np.allclose( + hist, hist2, rtol=0, atol=0.01), + "hist: " + str(hist) + " hist2: " + str(hist2)) + paddle.enable_static() + + def test_xavier_initializer(self, dtype="float32"): + with framework._test_eager_guard(): + self.func_xvarier_initializer() + self.func_xvarier_initializer() + + +class TestMSRAInitializerDygraph(unittest.TestCase): + def func_msra_initializer(self, dtype="float32"): + """ + In dygraph mode, we can use initializer directly to initialize a tensor. + """ + paddle.disable_static() + + tensor = paddle.zeros([1024, 1024, 16]) + tensor.stop_gradient = False + + msra_ = paddle.fluid.initializer.MSRAInitializer( + uniform=False, fan_in=4) + msra_(tensor) + + hist, _ = output_hist(tensor.numpy()) + + hist2, _ = output_hist( + np.random.normal(0, np.sqrt(2.0 / (4)), [1024, 1024, 16])) + + self.assertTrue( + np.allclose( + hist, hist2, rtol=0, atol=0.01), + "hist: " + str(hist) + " hist2: " + str(hist2)) + paddle.enable_static() + + def test_msra_initializer(self, dtype="float32"): + with framework._test_eager_guard(): + self.func_msra_initializer() + self.func_msra_initializer() + + class TesetconsistencyOfDynamicAndStaticGraph(unittest.TestCase): def func_order(self): paddle.set_device('cpu') diff --git a/python/paddle/fluid/tests/unittests/test_initializer_nn.py b/python/paddle/fluid/tests/unittests/test_initializer_nn.py index 74686652044ec660c2f99ad85ef05e824a430115..9953681e0f5bd34950850c1cc80d1063523f54e0 100644 --- a/python/paddle/fluid/tests/unittests/test_initializer_nn.py +++ b/python/paddle/fluid/tests/unittests/test_initializer_nn.py @@ -400,7 +400,7 @@ class TestNormal(unittest.TestCase): lod_level=0, name="param", initializer=initializer.Normal(2.3, 1.9)) - num_ops = 1 + num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'gaussian_random') diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py index 82818d50510c9bba8fd47ba041367fc22fd664e6..3d0617e40d6b6a15bfcac7936da689a0bbafafc2 100644 --- a/python/paddle/tensor/random.py +++ b/python/paddle/tensor/random.py @@ -239,7 +239,15 @@ def gaussian(shape, mean=0.0, std=1.0, dtype=None, name=None): if not isinstance(dtype, core.VarDesc.VarType): dtype = convert_np_dtype_to_dtype_(dtype) - if paddle.in_dynamic_mode(): + if in_dygraph_mode(): + shape = utils.convert_shape_to_list(shape) + place = _current_expected_place() + return _C_ops.final_state_gaussian_random(shape, + float(mean), + float(std), seed, dtype, + place) + + if _in_legacy_dygraph(): shape = utils.convert_shape_to_list(shape) return _C_ops.gaussian_random('shape', shape, 'mean', float(mean), 'std', diff --git a/python/paddle/utils/code_gen/api.yaml b/python/paddle/utils/code_gen/api.yaml index e3d8e8f5f47a50d9583f54c51a40f7b21b57b4f7..6b58c84061384ba8e86811d50b48367ff9f746cb 100644 --- a/python/paddle/utils/code_gen/api.yaml +++ b/python/paddle/utils/code_gen/api.yaml @@ -782,6 +782,18 @@ kernel : func : gather_tree +- api : gaussian_random + args : (IntArray shape, float mean, float std, int seed, DataType dtype, Place place={}) + output: Tensor + infer_meta : + func : GaussianRandomInferMeta + param : [shape, mean, std, seed, dtype] + kernel : + func : gaussian_random + param : [shape, mean, std, seed, dtype] + data_type : dtype + backend : place + - api : gelu args : (Tensor x, bool approximate) output : Tensor(out)