未验证 提交 35f1a89e 编写于 作者: N niuliling123 提交者: GitHub

Add test case for fp16_guard for amp O2 (#53971)

上级 5f8e7d8f
......@@ -18,6 +18,7 @@ import numpy as np
from amp_base_models import AmpTestBase
import paddle
import paddle.nn.functional as F
from paddle import nn
from paddle.static import amp
......@@ -153,5 +154,88 @@ class TestGradScaler(AmpTestBase):
self.assertTrue('check_finite_and_unscale' not in op_list)
class TestFp16Guard(AmpTestBase):
def test_fp16_gurad(self):
paddle.enable_static()
def run_example_code():
place = paddle.CUDAPlace(0)
main_program = paddle.static.Program()
startup_program = paddle.static.Program()
exe = paddle.static.Executor(place)
fetch_vars = []
# 1) Use fp16_guard to control the range of fp16 kernels used.
with paddle.static.program_guard(main_program, startup_program):
with paddle.static.amp.fp16_guard():
data = paddle.static.data(
name='X', shape=[None, 1, 28, 28], dtype='float32'
)
conv2d = paddle.static.nn.conv2d(
input=data, num_filters=6, filter_size=3
)
bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
pool = F.max_pool2d(bn, kernel_size=2, stride=2)
hidden = paddle.static.nn.fc(pool, size=10)
loss = paddle.mean(hidden)
fetch_vars = [loss]
# 2) Create the optimizer and set `multi_precision` to True.
# Setting `multi_precision` to True can avoid the poor accuracy
# or the slow convergence in a way.
optimizer = paddle.optimizer.Momentum(
learning_rate=0.01, multi_precision=True
)
# 3) These ops in `custom_black_list` will keep in the float32 computation type.
amp_list = paddle.static.amp.CustomOpLists(
custom_black_list=['pool2d']
)
# 4) The entry of Paddle AMP.
# Enable pure fp16 training by setting `use_pure_fp16` to True.
optimizer = paddle.static.amp.decorate(
optimizer,
amp_list,
init_loss_scaling=128.0,
use_dynamic_loss_scaling=True,
use_pure_fp16=True,
)
# If you don't use the default_startup_program(), you sholud pass
# your defined `startup_program` into `minimize`.
optimizer.minimize(loss)
exe.run(startup_program)
# 5) Use `amp_init` after FP32 parameters initialization(such as `exe.run(startup_program)`).
# If you want to perform the testing process, you should pass `test_program` into `amp_init`.
optimizer.amp_init(place, scope=paddle.static.global_scope())
x_fp32 = np.random.random(size=[1, 1, 28, 28]).astype("float32")
(loss_data,) = exe.run(
main_program, feed={"X": x_fp32}, fetch_list=[loss.name]
)
self.assertEqual(
paddle.static.global_scope()
.find_var("conv2d_0.b_0")
.get_tensor()
._dtype(),
paddle.float16,
)
self.assertEqual(
paddle.static.global_scope()
.find_var("fc_0.b_0")
.get_tensor()
._dtype(),
paddle.float32,
)
if (
paddle.is_compiled_with_cuda()
and len(paddle.static.cuda_places()) > 0
):
run_example_code()
paddle.disable_static()
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册