From 44a476c2abd23a317c11dc898be39a094d272b46 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Wed, 4 Nov 2020 20:38:29 +0800 Subject: [PATCH] support cuda pinned place (#28416) --- paddle/fluid/imperative/amp_auto_cast.cc | 16 ++-- .../test_imperative_auto_mixed_precision.py | 74 ++++++++++++++----- 2 files changed, 65 insertions(+), 25 deletions(-) diff --git a/paddle/fluid/imperative/amp_auto_cast.cc b/paddle/fluid/imperative/amp_auto_cast.cc index d4a1519b07e..d0f3efcdf67 100644 --- a/paddle/fluid/imperative/amp_auto_cast.cc +++ b/paddle/fluid/imperative/amp_auto_cast.cc @@ -49,15 +49,15 @@ inline std::string GetDtypeStr( } inline bool NeedCast(const std::shared_ptr& var) { - if (!platform::is_gpu_place(var->Place())) { - return false; - } - if (var->DataType() == framework::proto::VarType::FP32 || - var->DataType() == framework::proto::VarType::FP16) { - return true; - } else { - return false; + if (platform::is_gpu_place(var->Place()) || + platform::is_cuda_pinned_place(var->Place())) { + // CudaPinndePlace is added for varbase created by dataloader + if (var->DataType() == framework::proto::VarType::FP32 || + var->DataType() == framework::proto::VarType::FP16) { + return true; + } } + return false; } // NOTE: Trace a cast op, so if a var is casted from fp32 to fp16, then the grad diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py index 2d1d2949a4e..0118f3c800b 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py @@ -196,15 +196,27 @@ class TestAmpScaler(unittest.TestCase): np.array_equal(param.numpy(), params_init[param.name])) +def reader_decorator(reader): + def __reader__(): + for item in reader(): + img = np.array(item[0]).astype('float32').reshape(3, 224, 224) + label = np.array(item[1]).astype('int64').reshape(1) + yield img, label + + return __reader__ + + class TestResnet2(unittest.TestCase): - def train_resnet(self, enable_amp=True): + """ + Use paddle-2.0 API + """ + + def train_resnet(self, enable_amp=True, use_data_loader=False): seed = 90 batch_size = train_parameters["batch_size"] batch_num = 1 - paddle.disable_static() - paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) @@ -223,18 +235,35 @@ class TestResnet2(unittest.TestCase): scaler = paddle.amp.GradScaler( enable=enable_amp, init_loss_scaling=2.**10) + if use_data_loader: + train_reader = paddle.batch( + reader_decorator(paddle.dataset.flowers.train(use_xmap=False)), + batch_size=batch_size, + drop_last=True) + train_loader = fluid.io.DataLoader.from_generator( + capacity=4, + use_double_buffer=True, + iterable=True, + return_list=True) + train_loader.set_sample_list_generator(train_reader) + train_reader = train_loader + for batch_id, data in enumerate(train_reader()): if batch_id >= batch_num: break - dy_x_data = np.array( - [x[0].reshape(3, 224, 224) for x in data]).astype('float32') - if len(np.array([x[1] - for x in data]).astype('int64')) != batch_size: - continue - y_data = np.array([x[1] for x in data]).astype('int64').reshape(-1, - 1) - img = paddle.to_tensor(dy_x_data) - label = paddle.to_tensor(y_data) + if use_data_loader: + img, label = data + else: + dy_x_data = np.array( + [x[0].reshape(3, 224, 224) for x in data]).astype('float32') + if len(np.array([x[1] + for x in data]).astype('int64')) != batch_size: + continue + y_data = np.array([x[1] for x in data]).astype('int64').reshape( + -1, 1) + + img = paddle.to_tensor(dy_x_data) + label = paddle.to_tensor(y_data) label.stop_gradient = True with paddle.amp.auto_cast(enable=enable_amp): @@ -262,19 +291,30 @@ class TestResnet2(unittest.TestCase): dy_param_value = {} for param in resnet.parameters(): dy_param_value[param.name] = param.numpy() - - paddle.enable_static() - + if use_data_loader: + train_reader._reset() return dy_out, dy_param_value, dy_grad_value def test_resnet(self): - out_fp32 = self.train_resnet(enable_amp=False) - out_amp = self.train_resnet(enable_amp=True) + with fluid.dygraph.guard(): + out_fp32 = self.train_resnet(enable_amp=False) + out_amp = self.train_resnet(enable_amp=True) + print(out_fp32[0], out_amp[0]) + self.assertTrue(np.allclose(out_fp32[0], out_amp[0], atol=1.e-2)) + + def test_with_data_loader(self): + with fluid.dygraph.guard(): + out_fp32 = self.train_resnet(enable_amp=False, use_data_loader=True) + out_amp = self.train_resnet(enable_amp=True, use_data_loader=True) print(out_fp32[0], out_amp[0]) self.assertTrue(np.allclose(out_fp32[0], out_amp[0], atol=1.e-2)) class TestResnet(unittest.TestCase): + """ + Use paddle-1.x API + """ + def train_resnet(self, enable_amp=True): seed = 90 -- GitLab