From 76cb83e879befbda4e540d25950b9911a6258493 Mon Sep 17 00:00:00 2001 From: "joanna.wozna.intel" Date: Mon, 19 Apr 2021 05:45:31 +0200 Subject: [PATCH] Add BF16 Constant Initializer and support for other initializer (#31935) --- paddle/fluid/operators/fill_constant_op.cc | 1 + paddle/fluid/operators/fill_constant_op.h | 3 +- paddle/fluid/operators/math/math_function.cc | 2 + paddle/fluid/operators/math/math_function.cu | 2 + python/paddle/fluid/initializer.py | 41 ++++++----- python/paddle/fluid/layer_helper_base.py | 6 +- python/paddle/fluid/layers/nn.py | 2 +- .../paddle/fluid/tests/unittests/op_test.py | 4 +- .../tests/unittests/test_fill_constant_op.py | 28 +++++++- .../fluid/tests/unittests/test_initializer.py | 69 ++++++++++++++++--- .../tests/unittests/test_initializer_nn.py | 55 ++++++++++++--- .../unittests/test_lookup_table_bf16_op.py | 46 +++++++++++++ 12 files changed, 216 insertions(+), 43 deletions(-) diff --git a/paddle/fluid/operators/fill_constant_op.cc b/paddle/fluid/operators/fill_constant_op.cc index caa2930990..f35d8b6bbf 100644 --- a/paddle/fluid/operators/fill_constant_op.cc +++ b/paddle/fluid/operators/fill_constant_op.cc @@ -154,6 +154,7 @@ REGISTER_OP_CPU_KERNEL(fill_constant, ops::FillConstantKernel, ops::FillConstantKernel, ops::FillConstantKernel, ops::FillConstantKernel, + ops::FillConstantKernel, ops::FillConstantKernel, ops::FillConstantKernel); diff --git a/paddle/fluid/operators/fill_constant_op.h b/paddle/fluid/operators/fill_constant_op.h index 4608f16754..46c4ae1203 100644 --- a/paddle/fluid/operators/fill_constant_op.h +++ b/paddle/fluid/operators/fill_constant_op.h @@ -105,7 +105,8 @@ class FillConstantKernel : public framework::OpKernel { int actual_place = place_type; if (actual_place == -1) { - bool cpu_place = force_cpu || ctx.GetPlace() == platform::CPUPlace(); + bool cpu_place = (force_cpu || ctx.GetPlace() == platform::CPUPlace() || + data_type == framework::proto::VarType::BF16); if (cpu_place) { actual_place = 0; } else if (platform::is_gpu_place(ctx.GetPlace())) { diff --git a/paddle/fluid/operators/math/math_function.cc b/paddle/fluid/operators/math/math_function.cc index 68179a6857..0bdc7b6943 100644 --- a/paddle/fluid/operators/math/math_function.cc +++ b/paddle/fluid/operators/math/math_function.cc @@ -27,6 +27,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/operators/math/math_function_impl.h" +#include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/float16.h" #include "unsupported/Eigen/CXX11/Tensor" @@ -49,6 +50,7 @@ template struct SetConstant; #ifdef PADDLE_WITH_XPU template struct SetConstant; +template struct SetConstant; template struct SetConstant; template struct SetConstant; template struct SetConstant; diff --git a/paddle/fluid/operators/math/math_function.cu b/paddle/fluid/operators/math/math_function.cu index 2b93cd9260..f94c1bf696 100644 --- a/paddle/fluid/operators/math/math_function.cu +++ b/paddle/fluid/operators/math/math_function.cu @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function_impl.h" +#include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/complex128.h" #include "paddle/fluid/platform/complex64.h" #include "paddle/fluid/platform/float16.h" @@ -33,6 +34,7 @@ using complex64 = paddle::platform::complex64; using complex128 = paddle::platform::complex128; template struct SetConstant; +template struct SetConstant; template struct SetConstant; template struct SetConstant; template struct SetConstant; diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index 86fab98112..c5345c7fed 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -238,7 +238,8 @@ class UniformInitializer(Initializer): block = self._check_block(block) assert isinstance(block, framework.Block) - check_variable_and_dtype(var, "Out", ["float16", "float32", "float64"], + check_variable_and_dtype(var, "Out", + ["uint16", "float16", "float32", "float64"], "uniform_random") # Initialization Ops should be prepended and not appended @@ -246,7 +247,7 @@ class UniformInitializer(Initializer): self._seed = block.program.random_seed # to be compatible of fp16 initializers - if var.dtype == VarDesc.VarType.FP16: + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: out_dtype = VarDesc.VarType.FP32 out_var = block.create_var( name=unique_name.generate(".".join( @@ -275,7 +276,7 @@ class UniformInitializer(Initializer): }, stop_gradient=True) - if var.dtype == VarDesc.VarType.FP16: + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: block.append_op( type="cast", inputs={"X": out_var}, @@ -330,14 +331,15 @@ class NormalInitializer(Initializer): assert isinstance(block, framework.Block) - check_variable_and_dtype(var, "Out", ["float16", "float32", "float64"], + check_variable_and_dtype(var, "Out", + ["uint16", "float16", "float32", "float64"], "guassian_random") # Initialization Ops should be prepended and not appended if self._seed == 0: self._seed = block.program.random_seed # to be compatible of fp16 initalizers - if var.dtype == VarDesc.VarType.FP16: + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: out_dtype = VarDesc.VarType.FP32 out_var = block.create_var( name=unique_name.generate(".".join( @@ -363,7 +365,7 @@ class NormalInitializer(Initializer): }, stop_gradient=True) - if var.dtype == VarDesc.VarType.FP16: + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: block.append_op( type="cast", inputs={"X": out_var}, @@ -421,7 +423,7 @@ class TruncatedNormalInitializer(Initializer): self._seed = block.program.random_seed # to be compatible of fp16 initalizers - if var.dtype == VarDesc.VarType.FP16: + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: out_dtype = VarDesc.VarType.FP32 out_var = block.create_var( name=unique_name.generate(".".join( @@ -446,7 +448,7 @@ class TruncatedNormalInitializer(Initializer): }, stop_gradient=True) - if var.dtype == VarDesc.VarType.FP16: + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: block.append_op( type="cast", inputs={"X": out_var}, @@ -526,7 +528,8 @@ class XavierInitializer(Initializer): block = self._check_block(block) assert isinstance(block, framework.Block) - check_variable_and_dtype(var, "Out", ["float16", "float32", "float64"], + check_variable_and_dtype(var, "Out", + ["uint16", "float16", "float32", "float64"], "xavier_init") f_in, f_out = self._compute_fans(var) @@ -539,7 +542,7 @@ class XavierInitializer(Initializer): self._seed = block.program.random_seed # to be compatible of fp16 initalizers - if var.dtype == VarDesc.VarType.FP16: + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: out_dtype = VarDesc.VarType.FP32 out_var = block.create_var( name=unique_name.generate(".".join( @@ -581,7 +584,7 @@ class XavierInitializer(Initializer): }, stop_gradient=True) - if var.dtype == VarDesc.VarType.FP16: + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: block.append_op( type="cast", inputs={"X": out_var}, @@ -670,7 +673,7 @@ class MSRAInitializer(Initializer): self._seed = block.program.random_seed # to be compatible of fp16 initalizers - if var.dtype == VarDesc.VarType.FP16: + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: out_dtype = VarDesc.VarType.FP32 out_var = block.create_var( name=unique_name.generate(".".join( @@ -712,7 +715,7 @@ class MSRAInitializer(Initializer): }, stop_gradient=True) - if var.dtype == VarDesc.VarType.FP16: + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: block.append_op( type="cast", inputs={"X": out_var}, @@ -812,7 +815,9 @@ class BilinearInitializer(Initializer): weight = np.reshape(weight, shape) # to be compatible of fp16 initalizers - if var.dtype == VarDesc.VarType.FP16 or var.dtype == VarDesc.VarType.FP64: + if var.dtype in [ + VarDesc.VarType.FP16, VarDesc.VarType.BF16, VarDesc.VarType.FP64 + ]: out_dtype = VarDesc.VarType.FP32 out_var = block.create_var( name=unique_name.generate(".".join( @@ -842,7 +847,9 @@ class BilinearInitializer(Initializer): value_name: values }) - if var.dtype == VarDesc.VarType.FP16 or var.dtype == VarDesc.VarType.FP64: + if var.dtype in [ + VarDesc.VarType.FP16, VarDesc.VarType.BF16, VarDesc.VarType.FP64 + ]: block.append_op( type="cast", inputs={"X": out_var}, @@ -898,7 +905,7 @@ class NumpyArrayInitializer(Initializer): assert isinstance(block, framework.Block) # to be compatible of fp16 initalizers - if var.dtype == VarDesc.VarType.FP16: + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: out_dtype = VarDesc.VarType.FP32 np_value = self._value.astype("float32") out_var = block.create_var( @@ -935,7 +942,7 @@ class NumpyArrayInitializer(Initializer): }, stop_gradient=True) - if var.dtype == VarDesc.VarType.FP16: + if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: block.append_op( type="cast", inputs={"X": out_var}, diff --git a/python/paddle/fluid/layer_helper_base.py b/python/paddle/fluid/layer_helper_base.py index 5ee46a68fb..858078615a 100644 --- a/python/paddle/fluid/layer_helper_base.py +++ b/python/paddle/fluid/layer_helper_base.py @@ -331,12 +331,14 @@ class LayerHelperBase(object): if isinstance(dtype, core.VarDesc.VarType): if dtype != core.VarDesc.VarType.FP32 and \ dtype != core.VarDesc.VarType.FP64 and \ - dtype != core.VarDesc.VarType.FP16: + dtype != core.VarDesc.VarType.FP16 and \ + dtype != core.VarDesc.VarType.BF16: raise TypeError( "Can not create parameter with default initializer when dtype is not float type. Set default_initializer to fit the parameter dtype!" ) else: - if not (dtype.startswith("float") or dtype == "double"): + if not (dtype.startswith("float") or + dtype in ["double", "uint16"]): raise TypeError( "Can not create parameter with default initializer when dtype is not float type. Set default_initializer to fit the parameter dtype!" ) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index e90af2a1e7..e5663d607a 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -491,7 +491,7 @@ def embedding(input, helper = LayerHelper('embedding', **locals()) check_variable_and_dtype(input, 'input', ['int64'], 'fluid.layers.embedding') - check_dtype(dtype, 'dtype', ['float16', 'float32', 'float64'], + check_dtype(dtype, 'dtype', ['uint16', 'float16', 'float32', 'float64'], 'fluid.layers.embedding') if is_distributed: diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index 583bd3994b..25717b7967 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -1171,7 +1171,9 @@ class OpTest(unittest.TestCase): expect = self.outputs[out_name] expect_t = expect[0] if isinstance(expect, tuple) else expect - if actual_t.dtype == np.uint16 and expect_t.dtype == np.float32: + if actual_t.dtype == np.uint16 and expect_t.dtype in [ + np.float32, np.float64 + ]: actual_t = convert_uint16_to_float(actual_t) atol = 0.03 diff --git a/python/paddle/fluid/tests/unittests/test_fill_constant_op.py b/python/paddle/fluid/tests/unittests/test_fill_constant_op.py index 0dd78ea53c..770b6d3e92 100644 --- a/python/paddle/fluid/tests/unittests/test_fill_constant_op.py +++ b/python/paddle/fluid/tests/unittests/test_fill_constant_op.py @@ -16,7 +16,7 @@ from __future__ import print_function import unittest import numpy as np -from op_test import OpTest +from op_test import OpTest, convert_float_to_uint16 import paddle import paddle.fluid.core as core @@ -425,5 +425,31 @@ class TestFillConstantOpError(unittest.TestCase): self.assertRaises(TypeError, test_shape_tensor_list_dtype) +class TestFillConstantOp_ValueTensorBf16(OpTest): + def setUp(self): + '''Test fill_constant op with specified value + ''' + self.op_type = "fill_constant" + self.init_data() + + self.inputs = { + "ShapeTensor": np.array(self.shape).astype("int32"), + 'ValueTensor': + convert_float_to_uint16(np.array([self.value]).astype("float32")) + } + self.attrs = {'value': self.value, 'dtype': core.VarDesc.VarType.BF16} + self.outputs = {'Out': np.full(self.shape, self.value)} + + def init_data(self): + self.shape = [123, 92] + self.value = 3.0 + self.dtype = np.uint16 + self.mkldnn_data_type = "bfloat16" + + def test_check_output(self): + self.check_output_with_place(core.CPUPlace()) + + if __name__ == "__main__": + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_initializer.py b/python/paddle/fluid/tests/unittests/test_initializer.py index 952265e119..3d1b081863 100644 --- a/python/paddle/fluid/tests/unittests/test_initializer.py +++ b/python/paddle/fluid/tests/unittests/test_initializer.py @@ -29,7 +29,7 @@ DELTA = 0.00001 def check_cast_op(op): return op.type == 'cast' and \ op.attr('in_dtype') == VarDesc.VarType.FP32 and \ - op.attr('out_dtype') == VarDesc.VarType.FP16 + op.attr('out_dtype') in [VarDesc.VarType.FP16, VarDesc.VarType.BF16] def output_hist(out): @@ -53,7 +53,7 @@ class TestConstantInitializer(unittest.TestCase): lod_level=0, name="param", initializer=initializer.ConstantInitializer()) - num_ops = 2 if dtype == "float16" else 1 + num_ops = 2 if dtype in ["float16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'fill_constant') @@ -72,7 +72,7 @@ class TestConstantInitializer(unittest.TestCase): lod_level=0, name="param", initializer=initializer.ConstantInitializer(2.3)) - num_ops = 2 if dtype == "float16" else 1 + num_ops = 2 if dtype in ["float16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'fill_constant') @@ -87,6 +87,13 @@ class TestConstantInitializer(unittest.TestCase): block = self.test_constant_initializer("float16") self.assertTrue(check_cast_op(block.ops[1])) + def test_constant_initializer_bf16(self): + """Test constant initializer with bfloat16 + No cast operator has been added here + """ + self.test_constant_initializer_default_value("uint16") + self.test_constant_initializer("uint16") + class TestUniformInitializer(unittest.TestCase): def test_uniform_initializer_default_value(self, dtype="float32"): @@ -101,7 +108,7 @@ class TestUniformInitializer(unittest.TestCase): lod_level=0, name="param", initializer=initializer.UniformInitializer()) - num_ops = 2 if dtype == "float16" else 1 + num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'uniform_random') @@ -146,7 +153,7 @@ class TestUniformInitializer(unittest.TestCase): lod_level=0, name="param", initializer=initializer.UniformInitializer(-4.2, 3.1, 123)) - num_ops = 2 if dtype == "float16" else 1 + num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'uniform_random') @@ -167,7 +174,7 @@ class TestUniformInitializer(unittest.TestCase): lod_level=0, name="param", initializer=initializer.UniformInitializer(-4.2, float(i), 123)) - num_ops = 2 if dtype == "float16" else 1 + num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op0 = block.ops[0] self.assertEqual(init_op0.type, 'uniform_random') @@ -186,6 +193,16 @@ class TestUniformInitializer(unittest.TestCase): block = self.test_uniform_initializer_two_op("float16") self.assertTrue(check_cast_op(block.ops[1])) + def test_uniform_initializer_bf16(self): + """Test uniform initializer with bfloat16 + """ + block = self.test_uniform_initializer_default_value("uint16") + self.assertTrue(check_cast_op(block.ops[1])) + block = self.test_uniform_initializer(dtype="uint16") + self.assertTrue(check_cast_op(block.ops[1])) + block = self.test_uniform_initializer_two_op("uint16") + self.assertTrue(check_cast_op(block.ops[1])) + class TestNormalInitializer(unittest.TestCase): def test_normal_initializer_default_value(self): @@ -219,7 +236,7 @@ class TestNormalInitializer(unittest.TestCase): lod_level=0, name="param", initializer=initializer.NormalInitializer(2.3, 1.9, 123)) - num_ops = 2 if dtype == "float16" else 1 + num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'gaussian_random') @@ -234,6 +251,12 @@ class TestNormalInitializer(unittest.TestCase): block = self.test_normal_initializer("float16") self.assertTrue(check_cast_op(block.ops[1])) + def test_normal_initializer_bf16(self): + """Test normal initializer with bfloat16 + """ + block = self.test_normal_initializer("uint16") + self.assertTrue(check_cast_op(block.ops[1])) + class TestXavierInitializer(unittest.TestCase): def test_uniform_xavier_initializer(self): @@ -337,7 +360,7 @@ class TestXavierInitializer(unittest.TestCase): name="param", initializer=initializer.XavierInitializer( fan_in=12, fan_out=23, seed=134)) - num_ops = 2 if dtype == "float16" else 1 + num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'uniform_random') @@ -353,6 +376,12 @@ class TestXavierInitializer(unittest.TestCase): block = self.test_xavier_initializer_supplied_arguments("float16") self.assertTrue(check_cast_op(block.ops[1])) + def test_xavier_initializer_bf16(self): + """Test the Xavier initializer with bfloat16 + """ + block = self.test_xavier_initializer_supplied_arguments("uint16") + self.assertTrue(check_cast_op(block.ops[1])) + class TestMSRAInitializer(unittest.TestCase): def test_uniform_msra_initializer(self): @@ -454,7 +483,7 @@ class TestMSRAInitializer(unittest.TestCase): name="param", initializer=initializer.MSRAInitializer( fan_in=12, seed=134)) - num_ops = 2 if dtype == "float16" else 1 + num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'uniform_random') @@ -470,6 +499,12 @@ class TestMSRAInitializer(unittest.TestCase): block = self.test_msra_initializer_supplied_arguments("float16") self.assertTrue(check_cast_op(block.ops[1])) + def test_msra_initializer_bf16(self): + """Test the MSRA initializer with bfloat16 + """ + block = self.test_msra_initializer_supplied_arguments("uint16") + self.assertTrue(check_cast_op(block.ops[1])) + class TestBilinearInitializer(unittest.TestCase): def test_bilinear_initializer(self, dtype="float32"): @@ -484,7 +519,7 @@ class TestBilinearInitializer(unittest.TestCase): lod_level=0, name="param", initializer=initializer.BilinearInitializer()) - num_ops = 2 if dtype == "float16" or dtype == "float64" else 1 + num_ops = 2 if dtype in ["float16", "uint16", "float64"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'assign_value') @@ -499,6 +534,12 @@ class TestBilinearInitializer(unittest.TestCase): block = self.test_bilinear_initializer("float16") self.assertTrue(check_cast_op(block.ops[1])) + def test_bilinear_initializer_bf16(self): + """Test the bilinear initializer with supplied arguments + """ + block = self.test_bilinear_initializer("uint16") + self.assertTrue(check_cast_op(block.ops[1])) + def test_type_error(self): self.assertRaises(TypeError, self.test_bilinear_initializer, 'int32') @@ -518,7 +559,7 @@ class TestNumpyArrayInitializer(unittest.TestCase): lod_level=0, name="param", initializer=initializer.NumpyArrayInitializer(np_array)) - num_ops = 2 if dtype == "float16" else 1 + num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'assign_value') @@ -531,6 +572,12 @@ class TestNumpyArrayInitializer(unittest.TestCase): block = self.test_numpy_array_initializer("float16") self.assertTrue(block.ops[1]) + def test_numpy_array_initializer_bf16(self): + """Test the numpy array initializer with bfloat16 + """ + block = self.test_numpy_array_initializer("uint16") + self.assertTrue(block.ops[1]) + class TestSetGlobalInitializer(unittest.TestCase): def test_set_global_weight_initilizer(self): diff --git a/python/paddle/fluid/tests/unittests/test_initializer_nn.py b/python/paddle/fluid/tests/unittests/test_initializer_nn.py index ce72b5effb..08ec516ba9 100644 --- a/python/paddle/fluid/tests/unittests/test_initializer_nn.py +++ b/python/paddle/fluid/tests/unittests/test_initializer_nn.py @@ -36,7 +36,7 @@ def get_uniform_min_and_max(weight): def check_cast_op(op): return op.type == 'cast' and \ op.attr('in_dtype') == VarDesc.VarType.FP32 and \ - op.attr('out_dtype') == VarDesc.VarType.FP16 + op.attr('out_dtype') in [VarDesc.VarType.FP16, VarDesc.VarType.BF16] class TestConstantInitializer(unittest.TestCase): @@ -54,7 +54,7 @@ class TestConstantInitializer(unittest.TestCase): lod_level=0, name="param", initializer=init_inst) - num_ops = 2 if dtype == "float16" else 1 + num_ops = 2 if dtype in ["float16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'fill_constant') @@ -109,6 +109,13 @@ class TestConstantInitializer(unittest.TestCase): self.test_constant_initializer_default_value_dygraph("float16") self.test_constant_initializer_dygraph("float16") + def test_constant_initializer_bf16(self): + """Test constant initializer with bfloat16 + No cast operator has been added here + """ + self.test_constant_initializer_default_value_static("uint16") #bfloat16 + self.test_constant_initializer_static("uint16") #bfloat16 + class TestKaimingInitializer(unittest.TestCase): def static_test_kaiming_initializer_common(self, @@ -218,7 +225,7 @@ class TestUniform(unittest.TestCase): lod_level=0, name="param", initializer=initializer.Uniform()) - num_ops = 2 if dtype == "float16" else 1 + num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'uniform_random') @@ -249,7 +256,7 @@ class TestUniform(unittest.TestCase): lod_level=0, name="param", initializer=initializer.Uniform()) - num_ops = 2 if dtype == "float16" else 1 + num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'uniform_random') @@ -280,7 +287,7 @@ class TestUniform(unittest.TestCase): lod_level=0, name="param", initializer=initializer.Uniform(min_value, max_vlaue)) - num_ops = 2 if dtype == "float16" else 1 + num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'uniform_random') @@ -310,7 +317,7 @@ class TestUniform(unittest.TestCase): lod_level=0, name="param", initializer=initializer.Uniform(min_value, float(i))) - num_ops = 2 if dtype == "float16" else 1 + num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op0 = block.ops[0] self.assertEqual(init_op0.type, 'uniform_random') @@ -332,6 +339,16 @@ class TestUniform(unittest.TestCase): block = self.test_uniform_initializer_two_op("float16") self.assertTrue(check_cast_op(block.ops[1])) + def test_uniform_initializer_bf16(self): + """Test uniform initializer with bfloat16 + """ + block = self.test_uniform_initializer_default_value("uint16") #bfloat16 + self.assertTrue(check_cast_op(block.ops[1])) + block = self.test_uniform_initializer(dtype="uint16") #bfloat16 + self.assertTrue(check_cast_op(block.ops[1])) + block = self.test_uniform_initializer_two_op("uint16") #bfloat16 + self.assertTrue(check_cast_op(block.ops[1])) + def test_uniform_initializer_dygraph(self): """Test uniform initializer in dygraph model. """ @@ -388,7 +405,7 @@ class TestNormal(unittest.TestCase): lod_level=0, name="param", initializer=initializer.Normal(2.3, 1.9)) - num_ops = 2 if dtype == "float16" else 1 + num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'gaussian_random') @@ -405,6 +422,12 @@ class TestNormal(unittest.TestCase): block = self.test_normal_initializer("float16") self.assertTrue(check_cast_op(block.ops[1])) + def test_normal_initializer_bf16(self): + """Test normal initializer with bfloat16 + """ + block = self.test_normal_initializer("uint16") #bfloat16 + self.assertTrue(check_cast_op(block.ops[1])) + def test_normal_initializer_dygraph(self): """Test normal initializer in dygraph model. """ @@ -455,7 +478,7 @@ class TestTruncatedNormal(unittest.TestCase): lod_level=0, name="param", initializer=initializer.TruncatedNormal(2.3, 1.9)) - num_ops = 2 if dtype == "float16" else 1 + num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'truncated_gaussian_random') @@ -474,6 +497,14 @@ class TestTruncatedNormal(unittest.TestCase): block = self.test_truncated_normal_initializer("float16") self.assertTrue(check_cast_op(block.ops[1])) + def test_truncated_normal_initializer_bf16(self): + """Test truncated normal initializer with bfloat16 + """ + paddle.enable_static() + + block = self.test_truncated_normal_initializer("uint16") #bfloat16 + self.assertTrue(check_cast_op(block.ops[1])) + def test_truncated_normal_initializer_dygraph(self): """Test truncated normal initializer in dygraph model. """ @@ -629,7 +660,7 @@ class TestAssign(unittest.TestCase): lod_level=0, name="param", initializer=initializer.Assign(np_array)) - num_ops = 2 if dtype == "float16" else 1 + num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'assign_value') @@ -645,6 +676,12 @@ class TestAssign(unittest.TestCase): block = self.test_assign_initializer("float16") self.assertTrue(block.ops[1]) + def test_assign_initializer_bf16(self): + """Test the numpy array initializer with bfloat16 + """ + block = self.test_assign_initializer("uint16") #bfloat16 + self.assertTrue(block.ops[1]) + def test_assign_initializer_dygraph_1(self): """Test assign initializer in dygraph model. """ diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py index 13c4aa6d76..b423123160 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py @@ -171,6 +171,52 @@ class TestLookupTableBF16OpIds4DPadding(TestLookupTableBF16OpIds4D): self.check_output_with_place(core.CPUPlace(), check_dygraph=False) +class TestEmbeddingLayerBF16ConstantInitializer(unittest.TestCase): + """ + Test embedding layer api and results for bfloat16 + """ + + def set_initializer(self): + self.initializer = fluid.initializer.Constant(value=self.value) + + def setUp(self): + self.ids_shape = [4, 1] + self.w_shape = [10, 64] + self.ids = np.random.randint( + low=0, high=9, size=self.ids_shape).astype("int64") + self.flat_ids = self.ids.flatten() + self.value = 3.0 + self.w_fp32 = np.full(self.w_shape, self.value) + self.place = fluid.CPUPlace() + self.prog = fluid.Program() + self.startup_prog = fluid.Program() + self.set_initializer() + + with fluid.program_guard(self.prog, self.startup_prog): + x = fluid.layers.data(name='x', shape=self.ids_shape, dtype='int64') + self.emb = fluid.layers.embedding( + input=x, + size=self.w_shape, + param_attr=fluid.ParamAttr( + name="emb_weight", initializer=self.initializer), + is_sparse=False, + dtype="uint16") # bfloat16 + exe = fluid.Executor(self.place) + exe.run(self.startup_prog) + self.result = exe.run(self.prog, + feed={'x': self.ids}, + fetch_list=['emb_weight', self.emb]) + + def test_embedding_weights(self): + result = convert_uint16_to_float(self.result[0]) + self.assertTrue(np.array_equal(self.w_fp32, result)) + + def test_lookup_results(self): + lookup_result = convert_uint16_to_float(self.result[1]) + lookup_ref = _lookup(self.w_fp32, self.ids, self.flat_ids) + self.assertTrue(np.array_equal(lookup_result, lookup_ref)) + + if __name__ == "__main__": enable_static() unittest.main() -- GitLab