未验证 提交 2238a535 编写于 作者: G Guoxia Wang 提交者: GitHub

remove fp32 tmp tensor and cast op for initializer.Normal and initializer.Constant (#38818)

上级 04f73d89
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/operators/fill_constant_op.h" #include "paddle/fluid/operators/fill_constant_op.h"
namespace paddle { namespace paddle {
...@@ -38,10 +39,12 @@ struct GaussianGenerator { ...@@ -38,10 +39,12 @@ struct GaussianGenerator {
__host__ __device__ T operator()(const unsigned int n) const { __host__ __device__ T operator()(const unsigned int n) const {
thrust::minstd_rand rng; thrust::minstd_rand rng;
rng.seed(seed_); rng.seed(seed_);
thrust::normal_distribution<T> dist(mean_, std_); using MT = typename details::MPTypeTrait<T>::Type;
thrust::normal_distribution<MT> dist(mean_, std_);
unsigned int new_n = n + offset_; unsigned int new_n = n + offset_;
rng.discard(new_n); rng.discard(new_n);
return dist(rng); MT out = dist(rng);
return static_cast<T>(out);
} }
}; };
...@@ -124,10 +127,14 @@ class GPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel<T> { ...@@ -124,10 +127,14 @@ class GPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel<T> {
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
REGISTER_OP_CUDA_KERNEL(gaussian_random, REGISTER_OP_CUDA_KERNEL(
paddle::operators::GPUGaussianRandomKernel<float>, gaussian_random,
paddle::operators::GPUGaussianRandomKernel<double>); paddle::operators::GPUGaussianRandomKernel<paddle::platform::float16>,
paddle::operators::GPUGaussianRandomKernel<float>,
paddle::operators::GPUGaussianRandomKernel<double>);
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
gaussian_random_batch_size_like, gaussian_random_batch_size_like,
paddle::operators::GPUGaussianRandomBatchSizeLikeKernel<
paddle::platform::float16>,
paddle::operators::GPUGaussianRandomBatchSizeLikeKernel<float>, paddle::operators::GPUGaussianRandomBatchSizeLikeKernel<float>,
paddle::operators::GPUGaussianRandomBatchSizeLikeKernel<double>); paddle::operators::GPUGaussianRandomBatchSizeLikeKernel<double>);
...@@ -137,54 +137,27 @@ class ConstantInitializer(Initializer): ...@@ -137,54 +137,27 @@ class ConstantInitializer(Initializer):
isinstance(var, framework.EagerParamBase)) isinstance(var, framework.EagerParamBase))
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
# to be compatible of fp16 initializers
if var.dtype == VarDesc.VarType.FP16:
out_dtype = VarDesc.VarType.FP32
out_var = block.create_var(
name=unique_name.generate(".".join(
['constant_init', var.name, 'tmp'])),
shape=var.shape,
dtype=out_dtype,
type=VarDesc.VarType.LOD_TENSOR,
persistable=False)
else:
out_dtype = var.dtype
out_var = var
if framework.in_dygraph_mode(): if framework.in_dygraph_mode():
out_var = _C_ops.fill_constant( var = _C_ops.fill_constant(
out_var, 'value', var, 'value',
float(self._value), 'force_cpu', self._force_cpu, 'dtype', float(self._value), 'force_cpu', self._force_cpu, 'dtype',
int(out_dtype), 'str_value', int(var.dtype), 'str_value',
str(float(self._value)), 'shape', var.shape) str(float(self._value)), 'shape', var.shape)
if var.dtype == VarDesc.VarType.FP16:
var_tmp = _C_ops.cast(out_var, 'in_dtype', out_var.dtype,
'out_dtype', var.dtype)
var.copy_(var_tmp, False)
else:
var.copy_(out_var, False)
return None return None
else: else:
# fill constant should set the "str_value" to preserve precision # fill constant should set the "str_value" to preserve precision
op = block.append_op( op = block.append_op(
type="fill_constant", type="fill_constant",
outputs={"Out": out_var}, outputs={"Out": var},
attrs={ attrs={
"shape": var.shape, "shape": var.shape,
"dtype": int(out_dtype), "dtype": int(var.dtype),
"value": float(self._value), "value": float(self._value),
'str_value': str(float(self._value)), 'str_value': str(float(self._value)),
'force_cpu': self._force_cpu 'force_cpu': self._force_cpu
}, },
stop_gradient=True) stop_gradient=True)
if var.dtype == VarDesc.VarType.FP16:
block.append_op(
type="cast",
inputs={"X": out_var},
outputs={"Out": var},
attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype})
var.op = op var.op = op
return op return op
...@@ -361,54 +334,24 @@ class NormalInitializer(Initializer): ...@@ -361,54 +334,24 @@ class NormalInitializer(Initializer):
if self._seed == 0: if self._seed == 0:
self._seed = block.program.random_seed self._seed = block.program.random_seed
# to be compatible of fp16 initalizers op = block.append_op(
if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: type="gaussian_random",
out_dtype = VarDesc.VarType.FP32 outputs={"Out": var},
out_var = block.create_var( attrs={
name=unique_name.generate(".".join( "shape": var.shape,
['gaussian_random', var.name, 'tmp'])), "dtype": var.dtype,
shape=var.shape, "mean": self._mean,
dtype=out_dtype, "std": self._std_dev,
type=VarDesc.VarType.LOD_TENSOR, "seed": self._seed,
persistable=False) "use_mkldnn": False
else: },
out_dtype = var.dtype stop_gradient=True)
out_var = var
if not framework.in_dygraph_mode():
if framework.in_dygraph_mode():
out_var = _C_ops.gaussian_random(
'shape', var.shape, 'dtype', out_dtype, 'mean', self._mean,
'std', self._std_dev, 'seed', self._seed, 'use_mkldnn', False)
if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
var_tmp = _C_ops.cast(out_var, 'in_dtype', out_var.dtype,
'out_dtype', var.dtype)
var.copy_(var_tmp, False)
else:
var.copy_(out_var, False)
return None
else:
op = block.append_op(
type="gaussian_random",
outputs={"Out": out_var},
attrs={
"shape": var.shape,
"dtype": out_dtype,
"mean": self._mean,
"std": self._std_dev,
"seed": self._seed,
"use_mkldnn": False
},
stop_gradient=True)
if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
block.append_op(
type="cast",
inputs={"X": out_var},
outputs={"Out": var},
attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype})
var.op = op var.op = op
return op return op
else:
return None
class TruncatedNormalInitializer(Initializer): class TruncatedNormalInitializer(Initializer):
......
...@@ -65,7 +65,7 @@ class TestConstantInitializer(unittest.TestCase): ...@@ -65,7 +65,7 @@ class TestConstantInitializer(unittest.TestCase):
lod_level=0, lod_level=0,
name="param", name="param",
initializer=initializer.ConstantInitializer()) initializer=initializer.ConstantInitializer())
num_ops = 2 if dtype == "float16" else 1 num_ops = 1
self.assertEqual(len(block.ops), num_ops) self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0] init_op = block.ops[0]
self.assertEqual(init_op.type, 'fill_constant') self.assertEqual(init_op.type, 'fill_constant')
...@@ -84,7 +84,7 @@ class TestConstantInitializer(unittest.TestCase): ...@@ -84,7 +84,7 @@ class TestConstantInitializer(unittest.TestCase):
lod_level=0, lod_level=0,
name="param", name="param",
initializer=initializer.ConstantInitializer(2.3)) initializer=initializer.ConstantInitializer(2.3))
num_ops = 2 if dtype == "float16" else 1 num_ops = 1
self.assertEqual(len(block.ops), num_ops) self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0] init_op = block.ops[0]
self.assertEqual(init_op.type, 'fill_constant') self.assertEqual(init_op.type, 'fill_constant')
...@@ -94,10 +94,8 @@ class TestConstantInitializer(unittest.TestCase): ...@@ -94,10 +94,8 @@ class TestConstantInitializer(unittest.TestCase):
def test_constant_initializer_fp16(self): def test_constant_initializer_fp16(self):
"""Test constant initializer with float16 """Test constant initializer with float16
""" """
block = self.test_constant_initializer_default_value("float16") self.test_constant_initializer_default_value("float16")
self.assertTrue(check_cast_op(block.ops[1])) self.test_constant_initializer("float16")
block = self.test_constant_initializer("float16")
self.assertTrue(check_cast_op(block.ops[1]))
def test_constant_initializer_bf16(self): def test_constant_initializer_bf16(self):
"""Test constant initializer with bfloat16 """Test constant initializer with bfloat16
...@@ -246,7 +244,7 @@ class TestNormalInitializer(unittest.TestCase): ...@@ -246,7 +244,7 @@ class TestNormalInitializer(unittest.TestCase):
lod_level=0, lod_level=0,
name="param", name="param",
initializer=initializer.NormalInitializer(2.3, 1.9, 123)) initializer=initializer.NormalInitializer(2.3, 1.9, 123))
num_ops = 2 if dtype in ["float16", "uint16"] else 1 num_ops = 1
self.assertEqual(len(block.ops), num_ops) self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0] init_op = block.ops[0]
self.assertEqual(init_op.type, 'gaussian_random') self.assertEqual(init_op.type, 'gaussian_random')
...@@ -258,14 +256,12 @@ class TestNormalInitializer(unittest.TestCase): ...@@ -258,14 +256,12 @@ class TestNormalInitializer(unittest.TestCase):
def test_normal_initializer_fp16(self): def test_normal_initializer_fp16(self):
"""Test normal initializer with float16 """Test normal initializer with float16
""" """
block = self.test_normal_initializer("float16") self.test_normal_initializer("float16")
self.assertTrue(check_cast_op(block.ops[1]))
def test_normal_initializer_bf16(self): def test_normal_initializer_bf16(self):
"""Test normal initializer with bfloat16 """Test normal initializer with bfloat16
""" """
block = self.test_normal_initializer("uint16") self.test_normal_initializer("uint16")
self.assertTrue(check_cast_op(block.ops[1]))
class TestXavierInitializer(unittest.TestCase): class TestXavierInitializer(unittest.TestCase):
......
...@@ -54,7 +54,7 @@ class TestConstantInitializer(unittest.TestCase): ...@@ -54,7 +54,7 @@ class TestConstantInitializer(unittest.TestCase):
lod_level=0, lod_level=0,
name="param", name="param",
initializer=init_inst) initializer=init_inst)
num_ops = 2 if dtype in ["float16"] else 1 num_ops = 1
self.assertEqual(len(block.ops), num_ops) self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0] init_op = block.ops[0]
self.assertEqual(init_op.type, 'fill_constant') self.assertEqual(init_op.type, 'fill_constant')
...@@ -103,9 +103,7 @@ class TestConstantInitializer(unittest.TestCase): ...@@ -103,9 +103,7 @@ class TestConstantInitializer(unittest.TestCase):
"""Test constant initializer with float16 """Test constant initializer with float16
""" """
block = self.test_constant_initializer_default_value_static("float16") block = self.test_constant_initializer_default_value_static("float16")
self.assertTrue(check_cast_op(block.ops[1]))
block = self.test_constant_initializer_static("float16") block = self.test_constant_initializer_static("float16")
self.assertTrue(check_cast_op(block.ops[1]))
self.test_constant_initializer_default_value_dygraph("float16") self.test_constant_initializer_default_value_dygraph("float16")
self.test_constant_initializer_dygraph("float16") self.test_constant_initializer_dygraph("float16")
...@@ -402,7 +400,7 @@ class TestNormal(unittest.TestCase): ...@@ -402,7 +400,7 @@ class TestNormal(unittest.TestCase):
lod_level=0, lod_level=0,
name="param", name="param",
initializer=initializer.Normal(2.3, 1.9)) initializer=initializer.Normal(2.3, 1.9))
num_ops = 2 if dtype in ["float16", "uint16"] else 1 num_ops = 1
self.assertEqual(len(block.ops), num_ops) self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0] init_op = block.ops[0]
self.assertEqual(init_op.type, 'gaussian_random') self.assertEqual(init_op.type, 'gaussian_random')
...@@ -417,13 +415,11 @@ class TestNormal(unittest.TestCase): ...@@ -417,13 +415,11 @@ class TestNormal(unittest.TestCase):
"""Test normal initializer with float16 """Test normal initializer with float16
""" """
block = self.test_normal_initializer("float16") block = self.test_normal_initializer("float16")
self.assertTrue(check_cast_op(block.ops[1]))
def test_normal_initializer_bf16(self): def test_normal_initializer_bf16(self):
"""Test normal initializer with bfloat16 """Test normal initializer with bfloat16
""" """
block = self.test_normal_initializer("uint16") #bfloat16 block = self.test_normal_initializer("uint16") #bfloat16
self.assertTrue(check_cast_op(block.ops[1]))
def test_normal_initializer_dygraph(self): def test_normal_initializer_dygraph(self):
"""Test normal initializer in dygraph model. """Test normal initializer in dygraph model.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册