diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py new file mode 100644 index 0000000000000000000000000000000000000000..509accd8f8ab256ebaa76f25d381512c763debe5 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py @@ -0,0 +1,119 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import math +import time +import unittest + +import numpy as np + +import paddle +import paddle.fluid as fluid +from paddle.fluid.dygraph import declarative, ProgramTranslator +from paddle.fluid.dygraph.nn import BatchNorm, Conv2D, Linear, Pool2D +from test_resnet import ResNet, optimizer_setting, SEED + +# NOTE: Reduce batch_size from 8 to 2 to avoid unittest timeout. +batch_size = 2 +epoch_num = 1 +place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() \ + else fluid.CPUPlace() + +program_translator = ProgramTranslator() + +if fluid.is_compiled_with_cuda(): + fluid.set_flags({'FLAGS_cudnn_deterministic': True}) + + +def train(to_static, build_strategy=None): + """ + Tests model decorated by `dygraph_to_static_output` in static mode. For users, the model is defined in dygraph mode and trained in static mode. + """ + with fluid.dygraph.guard(place): + np.random.seed(SEED) + paddle.seed(SEED) + paddle.framework.random._manual_program_seed(SEED) + + resnet = ResNet() + if to_static: + resnet = paddle.jit.to_static(resnet, build_strategy=build_strategy) + optimizer = optimizer_setting(parameter_list=resnet.parameters()) + scaler = paddle.amp.GradScaler(init_loss_scaling=1024) + + for epoch in range(epoch_num): + total_loss = 0.0 + total_acc1 = 0.0 + total_acc5 = 0.0 + total_sample = 0 + + for batch_id in range(100): + start_time = time.time() + img = paddle.to_tensor( + np.random.random([batch_size, 3, 224, 224]).astype( + 'float32')) + label = paddle.to_tensor( + np.random.randint( + 0, 100, [batch_size, 1], dtype='int64')) + img.stop_gradient = True + label.stop_gradient = True + + with paddle.amp.auto_cast(): + pred = resnet(img) + # FIXME(Aurelius84): The followding cross_entropy seems to bring out a + # precision problem, need to figure out the underlying reason. + # If we remove it, the loss between dygraph and dy2stat is exactly same. + loss = fluid.layers.cross_entropy(input=pred, label=label) + avg_loss = fluid.layers.mean(x=pred) + acc_top1 = fluid.layers.accuracy(input=pred, label=label, k=1) + acc_top5 = fluid.layers.accuracy(input=pred, label=label, k=5) + + scaled = scaler.scale(avg_loss) + scaled.backward() + scaler.minimize(optimizer, scaled) + resnet.clear_gradients() + + total_loss += avg_loss + total_acc1 += acc_top1 + total_acc5 += acc_top5 + total_sample += 1 + + end_time = time.time() + if batch_id % 2 == 0: + print( "epoch %d | batch step %d, loss %0.3f, acc1 %0.3f, acc5 %0.3f, time %f" % \ + ( epoch, batch_id, total_loss.numpy() / total_sample, \ + total_acc1.numpy() / total_sample, total_acc5.numpy() / total_sample, end_time-start_time)) + if batch_id == 10: + break + + return total_loss.numpy() + + +class TestResnet(unittest.TestCase): + def train(self, to_static): + program_translator.enable(to_static) + return train(to_static) + + def test_resnet(self): + static_loss = self.train(to_static=True) + dygraph_loss = self.train(to_static=False) + self.assertTrue( + np.allclose(static_loss, dygraph_loss), + msg="static_loss: {} \n dygraph_loss: {}".format(static_loss, + dygraph_loss)) + + +if __name__ == '__main__': + unittest.main() diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index ccf849a2975845c3334d3783d68a7fe8c6959e6f..456ffa3e7706efaedacc42ae6d2c55d92ddf1cc7 100644 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -359,8 +359,8 @@ TETRAD_PARALLEL_JOB_NEW = [ 'test_conv3d_transpose_op', 'test_jit_save_load', 'test_unsqueeze2_op', 'test_eager_deletion_while_op', 'test_zeros_like_op', 'test_c_embedding_op', 'test_regularizer', 'zero_copy_tensor_test', 'test_tensor_shape', - 'test_resnet', 'test_dygraph_weight_norm', 'test_tracer', 'test_list', - 'test_sequence_concat', 'test_adaptive_avg_pool1d', + 'test_resnet', 'test_resnet_amp', 'test_dygraph_weight_norm', 'test_tracer', + 'test_list', 'test_sequence_concat', 'test_adaptive_avg_pool1d', 'test_elementwise_div_op', 'test_conv1d_transpose_layer', 'test_adamw_op', 'trt_fc_prelu_test', 'test_temporal_shift_op', 'test_naive_best_fit_gpu_memory_limit', 'dlpack_tensor_test', diff --git a/tools/windows/run_unittests.sh b/tools/windows/run_unittests.sh index 88c8ba3dab9f6f05a3e164f2ecf7e9a74969fa80..05365f028f51d9a04ce238a9fb5c9bf88132cf1a 100644 --- a/tools/windows/run_unittests.sh +++ b/tools/windows/run_unittests.sh @@ -96,6 +96,7 @@ disable_wincpu_test="^jit_kernel_test$|\ ^test_bmn$|\ ^test_mobile_net$|\ ^test_resnet_v2$|\ +^test_resnet_amp$|\ ^test_build_strategy$|\ ^test_se_resnet$|\ ^disable_wincpu_test$"