# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import unittest import numpy as np from eager_op_test import ( OpTest, _set_use_system_allocator, convert_float_to_uint16, ) import paddle import paddle.nn.functional as F from paddle.fluid import core paddle.enable_static() np.random.seed(123) paddle.seed(123) _set_use_system_allocator(True) def batch_norm_wrapper( x, running_mean, running_variance, weight, bias, is_test, momentum, epsilon, data_format, use_global_stats, ): y = F.batch_norm( x, running_mean, running_variance, weight, bias, training=not is_test, momentum=momentum, epsilon=epsilon, data_format=data_format, use_global_stats=use_global_stats, ) z = F.relu(y) return z class TestBatchNormOp(OpTest): def setUp(self): self.python_api = batch_norm_wrapper self.public_python_api = batch_norm_wrapper self.op_type = "batch_norm" self.prim_op_type = "comp" self.python_out_sig = ["Y"] self.initConfig() self.initTestCase() def test_check_output(self): if self.dtype not in ("uint16", "float16"): self.check_output_with_place( core.CPUPlace(), no_check_set=None, check_prim=True, only_check_prim=True, ) if paddle.is_compiled_with_cuda(): self.check_output_with_place( core.CUDAPlace(0), no_check_set=None, check_prim=True, only_check_prim=True, ) def test_check_grad_x(self): if self.dtype not in ("uint16", "float16"): self.check_grad_with_place( core.CPUPlace(), ["X"], ['Y'], user_defined_grad_outputs=self.out_grad, check_prim=True, only_check_prim=True, ) elif self.data_format == "NCHW" and paddle.is_compiled_with_cuda(): # origin batch_norm cuda kernel differ in nhwc x_grad whether to calculate scale_grad and bias_grad self.check_grad_with_place( core.CUDAPlace(0), ["X"], ['Y'], user_defined_grad_outputs=self.out_grad, check_prim=True, only_check_prim=True, ) def test_check_grad_scale_bias(self): self.enable_cinn = False self.rev_comp_atol = 1e-3 self.rev_comp_rtol = 1e-3 if self.dtype not in ("uint16", "float16"): self.check_grad_with_place( core.CPUPlace(), ["X", "Scale", "Bias"], ['Y'], user_defined_grad_outputs=self.out_grad, check_prim=True, only_check_prim=True, ) if paddle.is_compiled_with_cuda(): self.check_grad_with_place( core.CUDAPlace(0), ["X", "Scale", "Bias"], ['Y'], user_defined_grad_outputs=self.out_grad, check_prim=True, only_check_prim=True, ) # restore init config self.initConfig() def initConfig(self): self.rev_comp_atol = 1e-5 self.rev_comp_rtol = 1e-5 self.fw_comp_atol = 1e-5 self.fw_comp_rtol = 1e-5 self.cinn_atol = 1e-5 self.cinn_rtol = 1e-5 self.dtype = "float32" self.shape = [16, 24, 16, 8] self.training = True self.momentum = 0.1 self.epsilon = 1e-05 self.data_format = "NCHW" self.use_global_stats = None def initTestCase(self): if ( self.dtype in ("uint16", "float16") and not paddle.is_compiled_with_cuda() ): self.__class__.op_type = self.op_type self.__class__.no_need_check_grad = True return np.random.seed(123) self.C = self.shape[1] if self.data_format == "NCHW" else self.shape[-1] if self.dtype == "uint16": x = convert_float_to_uint16( np.random.random(self.shape).astype("float32") ) else: x = np.random.random(self.shape).astype(self.dtype) self.var_dtype = ( "float32" if self.dtype in ["float16", "uint16"] else self.dtype ) weight = np.random.random(self.C).astype(self.var_dtype) bias = np.random.random(self.C).astype(self.var_dtype) running_mean = np.random.random(self.C).astype(self.var_dtype) running_var = np.random.random(self.C).astype(self.var_dtype) if self.dtype == "uint16": self.out_grad = [ convert_float_to_uint16( np.random.random(self.shape).astype("float32") ) ] else: self.out_grad = [np.random.random(self.shape).astype(self.dtype)] self.inputs = { "X": x, "Scale": weight, "Bias": bias, "Mean": running_mean, "Variance": running_var, } if self.use_global_stats is None: self.use_global_stats = not self.training trainable_statistics = False else: trainable_statistics = not self.use_global_stats self.attrs = { "momentum": self.momentum, "epsilon": self.epsilon, "is_test": not self.training, "data_layout": self.data_format, "use_global_stats": self.use_global_stats, "trainable_statistics": trainable_statistics, } paddle.disable_static() ( y, running_mean, running_var, saved_mean, saved_variance, _, ) = paddle._C_ops.batch_norm( paddle.to_tensor(x), paddle.to_tensor(running_mean), paddle.to_tensor(running_var), paddle.to_tensor(weight), paddle.to_tensor(bias), not self.training, self.momentum, self.epsilon, self.data_format, self.use_global_stats, trainable_statistics, ) if self.dtype == "uint16": y = convert_float_to_uint16(y) paddle.enable_static() self.outputs = { "Y": y, "MeanOut": running_mean, "VarianceOut": running_var, "SavedMean": saved_mean, "SavedVariance": saved_variance, } class TestBatchNormOpNCHWTestMode(TestBatchNormOp): def initConfig(self): self.fw_comp_atol = 1e-5 self.fw_comp_rtol = 1e-5 self.rev_comp_atol = 1e-5 self.rev_comp_rtol = 1e-5 self.dtype = "float32" self.shape = [16, 16, 16, 8] self.training = False self.momentum = 0.1 self.epsilon = 1e-05 self.data_format = "NCHW" self.use_global_stats = True class TestBatchNormOpNCHWFp64(TestBatchNormOp): def initConfig(self): self.fw_comp_atol = 1e-11 self.fw_comp_rtol = 1e-11 self.rev_comp_atol = 1e-11 self.rev_comp_rtol = 1e-11 self.dtype = "float64" self.shape = [16, 16, 16, 8] self.training = True self.momentum = 0.1 self.epsilon = 1e-05 self.data_format = "NCHW" self.use_global_stats = None class TestBatchNormOpNCHWTestModeFp64(TestBatchNormOp): def initConfig(self): self.fw_comp_atol = 1e-15 self.fw_comp_rtol = 1e-15 self.rev_comp_atol = 1e-15 self.rev_comp_rtol = 1e-15 self.dtype = "float64" self.shape = [16, 16, 16, 8] self.training = False self.momentum = 0.1 self.epsilon = 1e-05 self.data_format = "NCHW" self.use_global_stats = None class TestBatchNormOpNCHWFp16(TestBatchNormOp): def initConfig(self): self.fw_comp_atol = 1e-3 self.fw_comp_rtol = 1e-3 self.rev_comp_atol = 1e-3 self.rev_comp_rtol = 1e-3 self.dtype = "float16" self.shape = [16, 16, 16, 8] self.training = True self.momentum = 0.1 self.epsilon = 1e-05 self.data_format = "NCHW" self.use_global_stats = None class TestBatchNormOpNCHWTestModeFp16(TestBatchNormOp): def initConfig(self): self.fw_comp_atol = 1e-3 self.fw_comp_rtol = 1e-3 self.rev_comp_atol = 1e-3 self.rev_comp_rtol = 1e-3 self.dtype = "float16" self.shape = [16, 16, 16, 8] self.training = False self.momentum = 0.1 self.epsilon = 1e-05 self.data_format = "NCHW" self.use_global_stats = None @unittest.skipIf( not core.is_compiled_with_cuda() or not core.is_bfloat16_supported(core.CUDAPlace(0)), "core is not compiled with CUDA or not support the bfloat16", ) class TestBatchNormOpNCHWbf16(TestBatchNormOp): def initConfig(self): self.fw_comp_atol = 1e-3 self.fw_comp_rtol = 1e-3 self.rev_comp_atol = 1e-3 self.rev_comp_rtol = 1e-3 self.cinn_atol = 1e-3 self.cinn_rtol = 1e-3 self.dtype = "uint16" self.shape = [16, 16, 16, 8] self.training = True self.momentum = 0.1 self.epsilon = 1e-05 self.data_format = "NCHW" self.use_global_stats = None @unittest.skipIf( not core.is_compiled_with_cuda() or not core.is_bfloat16_supported(core.CUDAPlace(0)), "core is not compiled with CUDA or not support the bfloat16", ) class TestBatchNormOpNCHWTestModebf16(TestBatchNormOp): def initConfig(self): self.fw_comp_atol = 1e-3 self.fw_comp_rtol = 1e-3 self.rev_comp_atol = 1e-3 self.rev_comp_rtol = 1e-3 self.cinn_atol = 1e-3 self.cinn_rtol = 1e-3 self.dtype = "uint16" self.shape = [16, 16, 16, 8] self.training = False self.momentum = 0.1 self.epsilon = 1e-05 self.data_format = "NCHW" self.use_global_stats = None class TestBatchNormOpNHWC(TestBatchNormOp): def initConfig(self): self.fw_comp_atol = 1e-5 self.fw_comp_rtol = 1e-5 self.rev_comp_atol = 1e-5 self.rev_comp_rtol = 1e-5 self.dtype = "float32" self.shape = [16, 16, 16, 8] self.training = True self.momentum = 0.1 self.epsilon = 1e-05 self.data_format = "NHWC" self.use_global_stats = None class TestBatchNormOpNHWCFp64(TestBatchNormOp): def initConfig(self): self.fw_comp_atol = 1e-11 self.fw_comp_rtol = 1e-11 self.rev_comp_atol = 1e-11 self.rev_comp_rtol = 1e-11 self.dtype = "float64" self.shape = [16, 16, 16, 8] self.training = True self.momentum = 0.1 self.epsilon = 1e-05 self.data_format = "NHWC" self.use_global_stats = None class TestBatchNormOpNHWCFp16(TestBatchNormOp): def initConfig(self): self.fw_comp_atol = 1e-3 self.fw_comp_rtol = 1e-3 self.rev_comp_atol = 1e-3 self.rev_comp_rtol = 1e-3 self.dtype = "float16" self.shape = [16, 16, 16, 8] self.training = True self.momentum = 0.1 self.epsilon = 1e-05 self.data_format = "NHWC" self.use_global_stats = None @unittest.skipIf( not core.is_compiled_with_cuda() or not core.is_bfloat16_supported(core.CUDAPlace(0)), "core is not compiled with CUDA or not support the bfloat16", ) class TestBatchNormOpNHWCbf16(TestBatchNormOp): def initConfig(self): self.fw_comp_atol = 1e-3 self.fw_comp_rtol = 1e-3 self.rev_comp_atol = 1e-3 self.rev_comp_rtol = 1e-3 self.cinn_atol = 1e-3 self.cinn_rtol = 1e-3 self.dtype = "uint16" self.shape = [16, 16, 16, 8] self.training = True self.momentum = 0.1 self.epsilon = 1e-05 self.data_format = "NHWC" self.use_global_stats = None class TestBatchNormOpNCHWShape2(TestBatchNormOp): def initConfig(self): self.fw_comp_atol = 1e-5 self.fw_comp_rtol = 1e-5 self.rev_comp_atol = 1e-5 self.rev_comp_rtol = 1e-5 self.dtype = "float32" self.shape = [4, 8, 16, 32] self.training = True self.momentum = 0.1 self.epsilon = 1e-05 self.data_format = "NCHW" self.use_global_stats = None class TestBatchNormOpNCHWMomentum2(TestBatchNormOp): def initConfig(self): self.fw_comp_atol = 1e-5 self.fw_comp_rtol = 1e-5 self.rev_comp_atol = 1e-5 self.rev_comp_rtol = 1e-5 self.dtype = "float32" self.shape = [16, 16, 16, 8] self.training = True self.momentum = 0.9 self.epsilon = 1e-05 self.data_format = "NCHW" self.use_global_stats = None class TestBatchNormOpNCHWEps2(TestBatchNormOp): def initConfig(self): self.fw_comp_atol = 1e-5 self.fw_comp_rtol = 1e-5 self.rev_comp_atol = 1e-5 self.rev_comp_rtol = 1e-5 self.dtype = "float32" self.shape = [16, 16, 16, 8] self.training = True self.momentum = 0.1 self.epsilon = 1e-06 self.data_format = "NCHW" self.use_global_stats = None class TestBatchNormOpNHWCShape2(TestBatchNormOp): def initConfig(self): self.fw_comp_atol = 1e-5 self.fw_comp_rtol = 1e-5 self.rev_comp_atol = 1e-5 self.rev_comp_rtol = 1e-5 self.dtype = "float32" self.shape = [4, 8, 16, 32] self.training = True self.momentum = 0.1 self.epsilon = 1e-05 self.data_format = "NHWC" self.use_global_stats = None class TestBatchNormOpNHWCMomentum2(TestBatchNormOp): def initConfig(self): self.fw_comp_atol = 1e-5 self.fw_comp_rtol = 1e-5 self.rev_comp_atol = 1e-5 self.rev_comp_rtol = 1e-5 self.dtype = "float32" self.shape = [16, 16, 16, 8] self.training = True self.momentum = 0.9 self.epsilon = 1e-05 self.data_format = "NHWC" self.use_global_stats = None class TestBatchNormOpNHWCEps2(TestBatchNormOp): def initConfig(self): self.fw_comp_atol = 1e-5 self.fw_comp_rtol = 1e-5 self.rev_comp_atol = 1e-5 self.rev_comp_rtol = 1e-5 self.dtype = "float32" self.shape = [16, 16, 16, 8] self.training = True self.momentum = 0.1 self.epsilon = 1e-06 self.data_format = "NHWC" self.use_global_stats = None if __name__ == '__main__': paddle.enable_static() unittest.main()