diff --git a/test/amp/test_amp_api.py b/test/amp/test_amp_api.py index 179236d909cbe0f0e1dc89fefb5f91ef3e9a11dd..acb47d3302e1975f85c646aa8c62a0e4cd969a9a 100644 --- a/test/amp/test_amp_api.py +++ b/test/amp/test_amp_api.py @@ -18,6 +18,7 @@ import numpy as np from amp_base_models import AmpTestBase import paddle +import paddle.nn.functional as F from paddle import nn from paddle.static import amp @@ -153,5 +154,88 @@ class TestGradScaler(AmpTestBase): self.assertTrue('check_finite_and_unscale' not in op_list) +class TestFp16Guard(AmpTestBase): + def test_fp16_gurad(self): + paddle.enable_static() + + def run_example_code(): + place = paddle.CUDAPlace(0) + main_program = paddle.static.Program() + startup_program = paddle.static.Program() + + exe = paddle.static.Executor(place) + + fetch_vars = [] + # 1) Use fp16_guard to control the range of fp16 kernels used. + with paddle.static.program_guard(main_program, startup_program): + with paddle.static.amp.fp16_guard(): + data = paddle.static.data( + name='X', shape=[None, 1, 28, 28], dtype='float32' + ) + conv2d = paddle.static.nn.conv2d( + input=data, num_filters=6, filter_size=3 + ) + bn = paddle.static.nn.batch_norm(input=conv2d, act="relu") + + pool = F.max_pool2d(bn, kernel_size=2, stride=2) + hidden = paddle.static.nn.fc(pool, size=10) + loss = paddle.mean(hidden) + fetch_vars = [loss] + # 2) Create the optimizer and set `multi_precision` to True. + # Setting `multi_precision` to True can avoid the poor accuracy + # or the slow convergence in a way. + optimizer = paddle.optimizer.Momentum( + learning_rate=0.01, multi_precision=True + ) + # 3) These ops in `custom_black_list` will keep in the float32 computation type. + amp_list = paddle.static.amp.CustomOpLists( + custom_black_list=['pool2d'] + ) + # 4) The entry of Paddle AMP. + # Enable pure fp16 training by setting `use_pure_fp16` to True. + optimizer = paddle.static.amp.decorate( + optimizer, + amp_list, + init_loss_scaling=128.0, + use_dynamic_loss_scaling=True, + use_pure_fp16=True, + ) + # If you don't use the default_startup_program(), you sholud pass + # your defined `startup_program` into `minimize`. + optimizer.minimize(loss) + + exe.run(startup_program) + # 5) Use `amp_init` after FP32 parameters initialization(such as `exe.run(startup_program)`). + # If you want to perform the testing process, you should pass `test_program` into `amp_init`. + optimizer.amp_init(place, scope=paddle.static.global_scope()) + + x_fp32 = np.random.random(size=[1, 1, 28, 28]).astype("float32") + (loss_data,) = exe.run( + main_program, feed={"X": x_fp32}, fetch_list=[loss.name] + ) + + self.assertEqual( + paddle.static.global_scope() + .find_var("conv2d_0.b_0") + .get_tensor() + ._dtype(), + paddle.float16, + ) + self.assertEqual( + paddle.static.global_scope() + .find_var("fc_0.b_0") + .get_tensor() + ._dtype(), + paddle.float32, + ) + + if ( + paddle.is_compiled_with_cuda() + and len(paddle.static.cuda_places()) > 0 + ): + run_example_code() + paddle.disable_static() + + if __name__ == '__main__': unittest.main()