[Yaml]add gaussian_random yaml and test case (#41312)

* add guassian random yaml * add gaussian_random yaml and test case * fix error modify of full yaml * import in_dygraph_mode * import _in_legacy_dygraph * add place arg in api * import __current_expected_place * fix test_egr_python_api failed case * add test case * add cast for NormalInitializer * fix test error * fix test error * rm unsed check code * fix test error in test_initializer_nn * modify by review

[Yaml]add gaussian_random yaml and test case (#41312)
* add guassian random yaml * add gaussian_random yaml and test case * fix error modify of full yaml * import in_dygraph_mode * import _in_legacy_dygraph * add place arg in api * import __current_expected_place * fix test_egr_python_api failed case * add test case * add cast for NormalInitializer * fix test error * fix test error * rm unsed check code * fix test error in test_initializer_nn * modify by review
4819ab4d · chentianyu03 · GitHub · 18f569c3 · 4819ab4d · 4819ab4d
8 changed file
--- a/python/paddle/fluid/initializer.py
+++ b/python/paddle/fluid/initializer.py
@@ -331,22 +331,56 @@ class NormalInitializer(Initializer):
                                 ["uint16", "float16", "float32", "float64"],
                                 "guassian_random")

+        # to be compatible of fp16 initalizers
+        if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
+            out_dtype = VarDesc.VarType.FP32
+            out_var = block.create_var(
+                name=unique_name.generate(".".join(
+                    ['normal_init', var.name, 'tmp'])),
+                shape=var.shape,
+                dtype=out_dtype,
+                type=VarDesc.VarType.LOD_TENSOR,
+                persistable=False)
+        else:
+            out_dtype = var.dtype
+            out_var = var
+
        if self._seed == 0:
            self._seed = block.program.random_seed

-        if framework._non_static_mode():
+        if in_dygraph_mode():
+            place = _current_expected_place()
+            out_var = _C_ops.final_state_gaussian_random(
+                var.shape, self._mean, self._std_dev, self._seed, out_dtype,
+                place)
+            out_var._share_underline_tensor_to(var)
+
+            if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
+                var_tmp = _C_ops.final_state_cast(out_var, var.dtype)
+                var_tmp._share_underline_tensor_to(var)
+            else:
+                out_var._share_underline_tensor_to(var)
+            return None
+
+        if _in_legacy_dygraph():
            out_var = _C_ops.gaussian_random(
-                'shape', var.shape, 'dtype', var.dtype, 'mean', self._mean,
+                'shape', var.shape, 'dtype', out_dtype, 'mean', self._mean,
                'std', self._std_dev, 'seed', self._seed, 'use_mkldnn', False)
+
+            if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
+                var_tmp = _C_ops.cast(out_var, 'in_dtype', out_var.dtype,
+                                      'out_dtype', var.dtype)
+                var_tmp._share_underline_tensor_to(var)
+            else:
                out_var._share_underline_tensor_to(var)
            return None
        else:
            op = block.append_op(
                type="gaussian_random",
-                outputs={"Out": var},
+                outputs={"Out": out_var},
                attrs={
                    "shape": var.shape,
-                    "dtype": var.dtype,
+                    "dtype": out_dtype,
                    "mean": self._mean,
                    "std": self._std_dev,
                    "seed": self._seed,
@@ -354,6 +388,13 @@ class NormalInitializer(Initializer):
                },
                stop_gradient=True)

+            if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
+                block.append_op(
+                    type="cast",
+                    inputs={"X": out_var},
+                    outputs={"Out": var},
+                    attrs={"in_dtype": out_var.dtype,
+                           "out_dtype": var.dtype})
            var.op = op
            return op

@@ -567,6 +608,12 @@ class XavierInitializer(Initializer):
                                                self._seed, 'dtype', out_dtype)
            else:
                std = math.sqrt(2.0 / float(fan_in + fan_out))
+
+                if in_dygraph_mode():
+                    place = _current_expected_place()
+                    out_var = _C_ops.final_state_gaussian_random(
+                        out_var.shape, 0.0, std, self._seed, out_dtype, place)
+                else:
                    out_var = _C_ops.gaussian_random(
                        'shape', out_var.shape, 'dtype', out_dtype, 'mean', 0.0,
                        'std', std, 'seed', self._seed)
@@ -720,9 +767,15 @@ class MSRAInitializer(Initializer):
                                                int(out_dtype))
            else:
                std = math.sqrt(2.0 / float(fan_in))
+                if in_dygraph_mode():
+                    place = _current_expected_place()
+                    out_var = _C_ops.final_state_gaussian_random(
+                        out_var.shape, 0.0, std, self._seed, out_dtype, place)
+                else:
                    out_var = _C_ops.gaussian_random(
                        'shape', out_var.shape, 'dtype',
-                    int(out_dtype), 'mean', 0.0, 'std', std, 'seed', self._seed)
+                        int(out_dtype), 'mean', 0.0, 'std', std, 'seed',
+                        self._seed)

            if var.dtype == VarDesc.VarType.FP16 or (
                    var.dtype == VarDesc.VarType.BF16 and not self._uniform):

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -28,6 +28,7 @@ from ..layer_helper import LayerHelper
 from paddle.fluid.framework import _in_legacy_dygraph
 from ..initializer import Normal, Constant, NumpyArrayInitializer
 from ..framework import Variable, OpProtoHolder, _non_static_mode, dygraph_only, _dygraph_tracer, default_main_program, _varbase_creator, static_only, _global_flags, _in_legacy_dygraph, in_dygraph_mode
+from ..framework import _current_expected_place
 from .. import dygraph_utils
 from ..param_attr import ParamAttr
 from .layer_function_generator import autodoc, templatedoc, _generate_doc_string_
@@ -10970,7 +10971,15 @@ def gaussian_random(shape,
    if not isinstance(dtype, core.VarDesc.VarType):
        dtype = convert_np_dtype_to_dtype_(dtype)

-    if _non_static_mode():
+    if in_dygraph_mode():
+        shape = utils.convert_shape_to_list(shape)
+        place = _current_expected_place()
+        return _C_ops.final_state_gaussian_random(shape,
+                                                  float(mean),
+                                                  float(std), seed, dtype,
+                                                  place)
+
+    if _in_legacy_dygraph():
        shape = utils.convert_shape_to_list(shape)
        return _C_ops.gaussian_random('shape', shape, 'mean',
                                      float(mean), 'std',

--- a/python/paddle/fluid/tests/unittests/test_egr_python_api.py
+++ b/python/paddle/fluid/tests/unittests/test_egr_python_api.py
@@ -251,9 +251,6 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase):
        self.assertTrue(egr_tensor12.place._equals(paddle.fluid.CPUPlace()))
        self.assertTrue(np.array_equal(egr_tensor12.numpy(), x))

-        egr_tensor13 = paddle.randn([2, 2])
-        self.assertTrue("eager_tmp" in egr_tensor13.name)
-
        with self.assertRaisesRegexp(
                ValueError, "The shape of Parameter should not be None"):
            eager_param = EagerParamBase(shape=None, dtype="float32")

--- a/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py
@@ -23,12 +23,14 @@ import paddle.fluid.core as core
 from paddle.fluid.op import Operator
 from paddle.fluid.executor import Executor
 from paddle.fluid.tests.unittests.op_test import OpTest, convert_uint16_to_float
+from paddle.fluid.framework import _test_eager_guard
 import paddle


 class TestGaussianRandomOp(OpTest):
    def setUp(self):
        self.op_type = "gaussian_random"
+        self.python_api = paddle.normal
        self.set_attrs()
        self.inputs = {}
        self.use_mkldnn = False
@@ -50,6 +52,10 @@ class TestGaussianRandomOp(OpTest):
    def test_check_output(self):
        self.check_output_customized(self.verify_output)

+    def test_eager(self):
+        with _test_eager_guard():
+            self.test_check_output()
+
    def verify_output(self, outs):
        self.assertEqual(outs[0].shape, (123, 92))
        hist, _ = np.histogram(outs[0], range=(-3, 5))
@@ -70,6 +76,7 @@ class TestGaussianRandomOp(OpTest):
 class TestGaussianRandomBF16Op(OpTest):
    def setUp(self):
        self.op_type = "gaussian_random"
+        self.python_api = paddle.normal
        self.set_attrs()
        self.inputs = {}
        self.use_mkldnn = False
@@ -93,6 +100,10 @@ class TestGaussianRandomBF16Op(OpTest):
        self.check_output_with_place_customized(
            self.verify_output, place=core.CUDAPlace(0))

+    def test_eager(self):
+        with _test_eager_guard():
+            self.test_check_output()
+
    def verify_output(self, outs):
        outs = convert_uint16_to_float(outs)
        self.assertEqual(outs[0].shape, (123, 92))

--- a/python/paddle/fluid/tests/unittests/test_initializer.py
+++ b/python/paddle/fluid/tests/unittests/test_initializer.py
@@ -244,7 +244,7 @@ class TestNormalInitializer(unittest.TestCase):
                lod_level=0,
                name="param",
                initializer=initializer.NormalInitializer(2.3, 1.9, 123))
-        num_ops = 1
+        num_ops = 2 if (dtype == "float16" or dtype == "uint16") else 1
        self.assertEqual(len(block.ops), num_ops)
        init_op = block.ops[0]
        self.assertEqual(init_op.type, 'gaussian_random')
@@ -685,6 +685,68 @@ class TestUniformInitializerDygraph(unittest.TestCase):
        self.func_uniform_initializer()


+class TestXavierInitializerDygraph(unittest.TestCase):
+    def func_xvarier_initializer(self, dtype="float32"):
+        """
+        In dygraph mode, we can use initializer directly to initialize a tensor.
+        """
+        paddle.disable_static()
+
+        tensor = paddle.zeros([1024, 1024, 16])
+        tensor.stop_gradient = False
+
+        xavier_ = paddle.fluid.initializer.XavierInitializer(
+            uniform=False, fan_in=3, fan_out=5)
+        xavier_(tensor)
+
+        hist, _ = output_hist(tensor.numpy())
+
+        hist2, _ = output_hist(
+            np.random.normal(0, np.sqrt(2.0 / (3 + 5)), [1024, 1024, 16]))
+
+        self.assertTrue(
+            np.allclose(
+                hist, hist2, rtol=0, atol=0.01),
+            "hist: " + str(hist) + " hist2: " + str(hist2))
+        paddle.enable_static()
+
+    def test_xavier_initializer(self, dtype="float32"):
+        with framework._test_eager_guard():
+            self.func_xvarier_initializer()
+        self.func_xvarier_initializer()
+
+
+class TestMSRAInitializerDygraph(unittest.TestCase):
+    def func_msra_initializer(self, dtype="float32"):
+        """
+        In dygraph mode, we can use initializer directly to initialize a tensor.
+        """
+        paddle.disable_static()
+
+        tensor = paddle.zeros([1024, 1024, 16])
+        tensor.stop_gradient = False
+
+        msra_ = paddle.fluid.initializer.MSRAInitializer(
+            uniform=False, fan_in=4)
+        msra_(tensor)
+
+        hist, _ = output_hist(tensor.numpy())
+
+        hist2, _ = output_hist(
+            np.random.normal(0, np.sqrt(2.0 / (4)), [1024, 1024, 16]))
+
+        self.assertTrue(
+            np.allclose(
+                hist, hist2, rtol=0, atol=0.01),
+            "hist: " + str(hist) + " hist2: " + str(hist2))
+        paddle.enable_static()
+
+    def test_msra_initializer(self, dtype="float32"):
+        with framework._test_eager_guard():
+            self.func_msra_initializer()
+        self.func_msra_initializer()
+
+
 class TesetconsistencyOfDynamicAndStaticGraph(unittest.TestCase):
    def func_order(self):
        paddle.set_device('cpu')

--- a/python/paddle/fluid/tests/unittests/test_initializer_nn.py
+++ b/python/paddle/fluid/tests/unittests/test_initializer_nn.py
@@ -400,7 +400,7 @@ class TestNormal(unittest.TestCase):
                lod_level=0,
                name="param",
                initializer=initializer.Normal(2.3, 1.9))
-        num_ops = 1
+        num_ops = 2 if dtype in ["float16", "uint16"] else 1
        self.assertEqual(len(block.ops), num_ops)
        init_op = block.ops[0]
        self.assertEqual(init_op.type, 'gaussian_random')

--- a/python/paddle/tensor/random.py
+++ b/python/paddle/tensor/random.py
@@ -239,7 +239,15 @@ def gaussian(shape, mean=0.0, std=1.0, dtype=None, name=None):
    if not isinstance(dtype, core.VarDesc.VarType):
        dtype = convert_np_dtype_to_dtype_(dtype)

-    if paddle.in_dynamic_mode():
+    if in_dygraph_mode():
+        shape = utils.convert_shape_to_list(shape)
+        place = _current_expected_place()
+        return _C_ops.final_state_gaussian_random(shape,
+                                                  float(mean),
+                                                  float(std), seed, dtype,
+                                                  place)
+
+    if _in_legacy_dygraph():
        shape = utils.convert_shape_to_list(shape)
        return _C_ops.gaussian_random('shape', shape, 'mean',
                                      float(mean), 'std',

--- a/python/paddle/utils/code_gen/api.yaml
+++ b/python/paddle/utils/code_gen/api.yaml
@@ -782,6 +782,18 @@
  kernel :
    func : gather_tree

+- api : gaussian_random
+  args : (IntArray shape, float mean, float std, int seed, DataType dtype, Place place={})
+  output: Tensor
+  infer_meta :
+    func : GaussianRandomInferMeta
+    param : [shape, mean, std, seed, dtype]
+  kernel :
+    func : gaussian_random
+    param : [shape, mean, std, seed, dtype]
+    data_type : dtype
+    backend : place
+
 - api : gelu
  args : (Tensor x,  bool approximate)
  output : Tensor(out)