From 4819ab4dc2248540b178e5e7aaf5df722f35cae3 Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Tue, 12 Apr 2022 19:43:52 +0800 Subject: [PATCH] [Yaml]add gaussian_random yaml and test case (#41312) * add guassian random yaml * add gaussian_random yaml and test case * fix error modify of full yaml * import in_dygraph_mode * import _in_legacy_dygraph * add place arg in api * import __current_expected_place * fix test_egr_python_api failed case * add test case * add cast for NormalInitializer * fix test error * fix test error * rm unsed check code * fix test error in test_initializer_nn * modify by review --- python/paddle/fluid/initializer.py | 75 ++++++++++++++++--- python/paddle/fluid/layers/nn.py | 11 ++- .../tests/unittests/test_egr_python_api.py | 3 - .../unittests/test_gaussian_random_op.py | 11 +++ .../fluid/tests/unittests/test_initializer.py | 64 +++++++++++++++- .../tests/unittests/test_initializer_nn.py | 2 +- python/paddle/tensor/random.py | 10 ++- python/paddle/utils/code_gen/api.yaml | 12 +++ 8 files changed, 170 insertions(+), 18 deletions(-) diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index b3baedc4015..ba5e51c11dd 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -331,22 +331,56 @@ class NormalInitializer(Initializer): ["uint16", "float16", "float32", "float64"], "guassian_random") + # to be compatible of fp16 initalizers + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: + out_dtype = VarDesc.VarType.FP32 + out_var = block.create_var( + name=unique_name.generate(".".join( + ['normal_init', var.name, 'tmp'])), + shape=var.shape, + dtype=out_dtype, + type=VarDesc.VarType.LOD_TENSOR, + persistable=False) + else: + out_dtype = var.dtype + out_var = var + if self._seed == 0: self._seed = block.program.random_seed - if framework._non_static_mode(): + if in_dygraph_mode(): + place = _current_expected_place() + out_var = _C_ops.final_state_gaussian_random( + var.shape, self._mean, self._std_dev, self._seed, out_dtype, + place) + out_var._share_underline_tensor_to(var) + + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: + var_tmp = _C_ops.final_state_cast(out_var, var.dtype) + var_tmp._share_underline_tensor_to(var) + else: + out_var._share_underline_tensor_to(var) + return None + + if _in_legacy_dygraph(): out_var = _C_ops.gaussian_random( - 'shape', var.shape, 'dtype', var.dtype, 'mean', self._mean, + 'shape', var.shape, 'dtype', out_dtype, 'mean', self._mean, 'std', self._std_dev, 'seed', self._seed, 'use_mkldnn', False) - out_var._share_underline_tensor_to(var) + + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: + var_tmp = _C_ops.cast(out_var, 'in_dtype', out_var.dtype, + 'out_dtype', var.dtype) + var_tmp._share_underline_tensor_to(var) + else: + out_var._share_underline_tensor_to(var) return None else: op = block.append_op( type="gaussian_random", - outputs={"Out": var}, + outputs={"Out": out_var}, attrs={ "shape": var.shape, - "dtype": var.dtype, + "dtype": out_dtype, "mean": self._mean, "std": self._std_dev, "seed": self._seed, @@ -354,6 +388,13 @@ class NormalInitializer(Initializer): }, stop_gradient=True) + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: + block.append_op( + type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={"in_dtype": out_var.dtype, + "out_dtype": var.dtype}) var.op = op return op @@ -567,9 +608,15 @@ class XavierInitializer(Initializer): self._seed, 'dtype', out_dtype) else: std = math.sqrt(2.0 / float(fan_in + fan_out)) - out_var = _C_ops.gaussian_random( - 'shape', out_var.shape, 'dtype', out_dtype, 'mean', 0.0, - 'std', std, 'seed', self._seed) + + if in_dygraph_mode(): + place = _current_expected_place() + out_var = _C_ops.final_state_gaussian_random( + out_var.shape, 0.0, std, self._seed, out_dtype, place) + else: + out_var = _C_ops.gaussian_random( + 'shape', out_var.shape, 'dtype', out_dtype, 'mean', 0.0, + 'std', std, 'seed', self._seed) if var.dtype == VarDesc.VarType.FP16 or ( var.dtype == VarDesc.VarType.BF16 and not self._uniform): @@ -720,9 +767,15 @@ class MSRAInitializer(Initializer): int(out_dtype)) else: std = math.sqrt(2.0 / float(fan_in)) - out_var = _C_ops.gaussian_random( - 'shape', out_var.shape, 'dtype', - int(out_dtype), 'mean', 0.0, 'std', std, 'seed', self._seed) + if in_dygraph_mode(): + place = _current_expected_place() + out_var = _C_ops.final_state_gaussian_random( + out_var.shape, 0.0, std, self._seed, out_dtype, place) + else: + out_var = _C_ops.gaussian_random( + 'shape', out_var.shape, 'dtype', + int(out_dtype), 'mean', 0.0, 'std', std, 'seed', + self._seed) if var.dtype == VarDesc.VarType.FP16 or ( var.dtype == VarDesc.VarType.BF16 and not self._uniform): diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index a405bf829fd..47f40a2e6a5 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -28,6 +28,7 @@ from ..layer_helper import LayerHelper from paddle.fluid.framework import _in_legacy_dygraph from ..initializer import Normal, Constant, NumpyArrayInitializer from ..framework import Variable, OpProtoHolder, _non_static_mode, dygraph_only, _dygraph_tracer, default_main_program, _varbase_creator, static_only, _global_flags, _in_legacy_dygraph, in_dygraph_mode +from ..framework import _current_expected_place from .. import dygraph_utils from ..param_attr import ParamAttr from .layer_function_generator import autodoc, templatedoc, _generate_doc_string_ @@ -10970,7 +10971,15 @@ def gaussian_random(shape, if not isinstance(dtype, core.VarDesc.VarType): dtype = convert_np_dtype_to_dtype_(dtype) - if _non_static_mode(): + if in_dygraph_mode(): + shape = utils.convert_shape_to_list(shape) + place = _current_expected_place() + return _C_ops.final_state_gaussian_random(shape, + float(mean), + float(std), seed, dtype, + place) + + if _in_legacy_dygraph(): shape = utils.convert_shape_to_list(shape) return _C_ops.gaussian_random('shape', shape, 'mean', float(mean), 'std', diff --git a/python/paddle/fluid/tests/unittests/test_egr_python_api.py b/python/paddle/fluid/tests/unittests/test_egr_python_api.py index e7abed09646..ae29c6c262a 100644 --- a/python/paddle/fluid/tests/unittests/test_egr_python_api.py +++ b/python/paddle/fluid/tests/unittests/test_egr_python_api.py @@ -251,9 +251,6 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertTrue(egr_tensor12.place._equals(paddle.fluid.CPUPlace())) self.assertTrue(np.array_equal(egr_tensor12.numpy(), x)) - egr_tensor13 = paddle.randn([2, 2]) - self.assertTrue("eager_tmp" in egr_tensor13.name) - with self.assertRaisesRegexp( ValueError, "The shape of Parameter should not be None"): eager_param = EagerParamBase(shape=None, dtype="float32") diff --git a/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py b/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py index 4fca8b9f2a1..4140ce44648 100644 --- a/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py +++ b/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py @@ -23,12 +23,14 @@ import paddle.fluid.core as core from paddle.fluid.op import Operator from paddle.fluid.executor import Executor from paddle.fluid.tests.unittests.op_test import OpTest, convert_uint16_to_float +from paddle.fluid.framework import _test_eager_guard import paddle class TestGaussianRandomOp(OpTest): def setUp(self): self.op_type = "gaussian_random" + self.python_api = paddle.normal self.set_attrs() self.inputs = {} self.use_mkldnn = False @@ -50,6 +52,10 @@ class TestGaussianRandomOp(OpTest): def test_check_output(self): self.check_output_customized(self.verify_output) + def test_eager(self): + with _test_eager_guard(): + self.test_check_output() + def verify_output(self, outs): self.assertEqual(outs[0].shape, (123, 92)) hist, _ = np.histogram(outs[0], range=(-3, 5)) @@ -70,6 +76,7 @@ class TestGaussianRandomOp(OpTest): class TestGaussianRandomBF16Op(OpTest): def setUp(self): self.op_type = "gaussian_random" + self.python_api = paddle.normal self.set_attrs() self.inputs = {} self.use_mkldnn = False @@ -93,6 +100,10 @@ class TestGaussianRandomBF16Op(OpTest): self.check_output_with_place_customized( self.verify_output, place=core.CUDAPlace(0)) + def test_eager(self): + with _test_eager_guard(): + self.test_check_output() + def verify_output(self, outs): outs = convert_uint16_to_float(outs) self.assertEqual(outs[0].shape, (123, 92)) diff --git a/python/paddle/fluid/tests/unittests/test_initializer.py b/python/paddle/fluid/tests/unittests/test_initializer.py index 91c2800836c..3a9387082e6 100644 --- a/python/paddle/fluid/tests/unittests/test_initializer.py +++ b/python/paddle/fluid/tests/unittests/test_initializer.py @@ -244,7 +244,7 @@ class TestNormalInitializer(unittest.TestCase): lod_level=0, name="param", initializer=initializer.NormalInitializer(2.3, 1.9, 123)) - num_ops = 1 + num_ops = 2 if (dtype == "float16" or dtype == "uint16") else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'gaussian_random') @@ -685,6 +685,68 @@ class TestUniformInitializerDygraph(unittest.TestCase): self.func_uniform_initializer() +class TestXavierInitializerDygraph(unittest.TestCase): + def func_xvarier_initializer(self, dtype="float32"): + """ + In dygraph mode, we can use initializer directly to initialize a tensor. + """ + paddle.disable_static() + + tensor = paddle.zeros([1024, 1024, 16]) + tensor.stop_gradient = False + + xavier_ = paddle.fluid.initializer.XavierInitializer( + uniform=False, fan_in=3, fan_out=5) + xavier_(tensor) + + hist, _ = output_hist(tensor.numpy()) + + hist2, _ = output_hist( + np.random.normal(0, np.sqrt(2.0 / (3 + 5)), [1024, 1024, 16])) + + self.assertTrue( + np.allclose( + hist, hist2, rtol=0, atol=0.01), + "hist: " + str(hist) + " hist2: " + str(hist2)) + paddle.enable_static() + + def test_xavier_initializer(self, dtype="float32"): + with framework._test_eager_guard(): + self.func_xvarier_initializer() + self.func_xvarier_initializer() + + +class TestMSRAInitializerDygraph(unittest.TestCase): + def func_msra_initializer(self, dtype="float32"): + """ + In dygraph mode, we can use initializer directly to initialize a tensor. + """ + paddle.disable_static() + + tensor = paddle.zeros([1024, 1024, 16]) + tensor.stop_gradient = False + + msra_ = paddle.fluid.initializer.MSRAInitializer( + uniform=False, fan_in=4) + msra_(tensor) + + hist, _ = output_hist(tensor.numpy()) + + hist2, _ = output_hist( + np.random.normal(0, np.sqrt(2.0 / (4)), [1024, 1024, 16])) + + self.assertTrue( + np.allclose( + hist, hist2, rtol=0, atol=0.01), + "hist: " + str(hist) + " hist2: " + str(hist2)) + paddle.enable_static() + + def test_msra_initializer(self, dtype="float32"): + with framework._test_eager_guard(): + self.func_msra_initializer() + self.func_msra_initializer() + + class TesetconsistencyOfDynamicAndStaticGraph(unittest.TestCase): def func_order(self): paddle.set_device('cpu') diff --git a/python/paddle/fluid/tests/unittests/test_initializer_nn.py b/python/paddle/fluid/tests/unittests/test_initializer_nn.py index 74686652044..9953681e0f5 100644 --- a/python/paddle/fluid/tests/unittests/test_initializer_nn.py +++ b/python/paddle/fluid/tests/unittests/test_initializer_nn.py @@ -400,7 +400,7 @@ class TestNormal(unittest.TestCase): lod_level=0, name="param", initializer=initializer.Normal(2.3, 1.9)) - num_ops = 1 + num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'gaussian_random') diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py index 82818d50510..3d0617e40d6 100644 --- a/python/paddle/tensor/random.py +++ b/python/paddle/tensor/random.py @@ -239,7 +239,15 @@ def gaussian(shape, mean=0.0, std=1.0, dtype=None, name=None): if not isinstance(dtype, core.VarDesc.VarType): dtype = convert_np_dtype_to_dtype_(dtype) - if paddle.in_dynamic_mode(): + if in_dygraph_mode(): + shape = utils.convert_shape_to_list(shape) + place = _current_expected_place() + return _C_ops.final_state_gaussian_random(shape, + float(mean), + float(std), seed, dtype, + place) + + if _in_legacy_dygraph(): shape = utils.convert_shape_to_list(shape) return _C_ops.gaussian_random('shape', shape, 'mean', float(mean), 'std', diff --git a/python/paddle/utils/code_gen/api.yaml b/python/paddle/utils/code_gen/api.yaml index e3d8e8f5f47..6b58c840613 100644 --- a/python/paddle/utils/code_gen/api.yaml +++ b/python/paddle/utils/code_gen/api.yaml @@ -782,6 +782,18 @@ kernel : func : gather_tree +- api : gaussian_random + args : (IntArray shape, float mean, float std, int seed, DataType dtype, Place place={}) + output: Tensor + infer_meta : + func : GaussianRandomInferMeta + param : [shape, mean, std, seed, dtype] + kernel : + func : gaussian_random + param : [shape, mean, std, seed, dtype] + data_type : dtype + backend : place + - api : gelu args : (Tensor x, bool approximate) output : Tensor(out) -- GitLab