diff --git a/paddle/phi/kernels/gpu/matmul_grad_kernel.cu b/paddle/phi/kernels/gpu/matmul_grad_kernel.cu index 2753937eb7142c63d63067278bee04793e88bcce..4b48e2eb1732266250fe9e647f7bba73f9aaf393 100644 --- a/paddle/phi/kernels/gpu/matmul_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/matmul_grad_kernel.cu @@ -55,6 +55,7 @@ PD_REGISTER_KERNEL(matmul_with_flatten_grad, phi::MatmulWithFlattenGradKernel, float, double, + phi::dtype::bfloat16, phi::dtype::float16) {} PD_REGISTER_KERNEL(matmul_with_flatten_double_grad, diff --git a/paddle/phi/kernels/gpu/matmul_kernel.cu b/paddle/phi/kernels/gpu/matmul_kernel.cu index e5de7966c2ec40dc0eeeb960e979a195643d4c6e..590c041555f58410392ef346ba08999aa0b8bf77 100644 --- a/paddle/phi/kernels/gpu/matmul_kernel.cu +++ b/paddle/phi/kernels/gpu/matmul_kernel.cu @@ -36,4 +36,5 @@ PD_REGISTER_KERNEL(matmul_with_flatten, phi::MatmulWithFlattenKernel, float, double, + phi::dtype::bfloat16, phi::dtype::float16) {} diff --git a/python/paddle/fluid/layers/layer_function_generator.py b/python/paddle/fluid/layers/layer_function_generator.py index 6e4b1f836f020ccdc0967fa90765ab178b40cbd3..d4c55f03eaba20770a8697248b34f2a6abeece23 100644 --- a/python/paddle/fluid/layers/layer_function_generator.py +++ b/python/paddle/fluid/layers/layer_function_generator.py @@ -278,7 +278,7 @@ def generate_activation_fn(op_type): if op_type not in ["abs", "exp", "square"]: check_variable_and_dtype( - x, 'x', ['float16', 'float32', 'float64'], op_type + x, 'x', ['float16', 'float32', 'float64', 'uint16'], op_type ) else: # abs exp square ops support dtype(int32, int64, float16, float32, float64) @@ -293,6 +293,7 @@ def generate_activation_fn(op_type): 'float64', 'complex64', 'complex128', + 'uint16', ], op_type, ) diff --git a/python/paddle/static/amp/amp_nn.py b/python/paddle/static/amp/amp_nn.py index 2361c11f23b824358637077a75e5ff21842a800e..73f3ce6dcd0f63f71a45462a3c9f58afadf074e3 100644 --- a/python/paddle/static/amp/amp_nn.py +++ b/python/paddle/static/amp/amp_nn.py @@ -49,7 +49,7 @@ def check_finite_and_unscale(x, scale, name=None, float_status=None): check_variable_and_dtype( e, "x", - ['float16', 'float32', 'float64'], + ['float16', 'float32', 'float64', 'uint16'], 'check_finite_and_unscale', ) @@ -133,9 +133,15 @@ def update_loss_scaling( check_type(x, 'x', (tuple, list), 'update_loss_scaling') for e in x: check_variable_and_dtype( - e, "x", ['float16', 'float32', 'float64'], 'update_loss_scaling' + e, + "x", + ['float16', 'float32', 'float64', 'uint16'], + 'update_loss_scaling', ) - if e.dtype == core.VarDesc.VarType.FP16: + if ( + e.dtype == core.VarDesc.VarType.FP16 + or e.dtype == core.VarDesc.VarType.BF16 + ): assert ( prev_loss_scaling.dtype == core.VarDesc.VarType.FP32 ), "The dtype of prev_loss_scaling should be float32 when the dtype of x is float16." diff --git a/python/paddle/tensor/layer_function_generator.py b/python/paddle/tensor/layer_function_generator.py index 8b753e1d2b6301ddca0272fde24a05079e592be3..5436d2f515a4ad4adf62674e38b7bb258dad01ad 100644 --- a/python/paddle/tensor/layer_function_generator.py +++ b/python/paddle/tensor/layer_function_generator.py @@ -294,6 +294,7 @@ def generate_activation_fn(op_type): 'float64', 'complex64', 'complex128', + 'uint16', ], op_type, )