未验证 提交 44a476c2 编写于 作者: L Leo Chen 提交者: GitHub

support cuda pinned place (#28416)

上级 12b9587b
...@@ -49,15 +49,15 @@ inline std::string GetDtypeStr( ...@@ -49,15 +49,15 @@ inline std::string GetDtypeStr(
} }
inline bool NeedCast(const std::shared_ptr<VarBase>& var) { inline bool NeedCast(const std::shared_ptr<VarBase>& var) {
if (!platform::is_gpu_place(var->Place())) { if (platform::is_gpu_place(var->Place()) ||
return false; platform::is_cuda_pinned_place(var->Place())) {
} // CudaPinndePlace is added for varbase created by dataloader
if (var->DataType() == framework::proto::VarType::FP32 || if (var->DataType() == framework::proto::VarType::FP32 ||
var->DataType() == framework::proto::VarType::FP16) { var->DataType() == framework::proto::VarType::FP16) {
return true; return true;
} else { }
return false;
} }
return false;
} }
// NOTE: Trace a cast op, so if a var is casted from fp32 to fp16, then the grad // NOTE: Trace a cast op, so if a var is casted from fp32 to fp16, then the grad
......
...@@ -196,15 +196,27 @@ class TestAmpScaler(unittest.TestCase): ...@@ -196,15 +196,27 @@ class TestAmpScaler(unittest.TestCase):
np.array_equal(param.numpy(), params_init[param.name])) np.array_equal(param.numpy(), params_init[param.name]))
def reader_decorator(reader):
def __reader__():
for item in reader():
img = np.array(item[0]).astype('float32').reshape(3, 224, 224)
label = np.array(item[1]).astype('int64').reshape(1)
yield img, label
return __reader__
class TestResnet2(unittest.TestCase): class TestResnet2(unittest.TestCase):
def train_resnet(self, enable_amp=True): """
Use paddle-2.0 API
"""
def train_resnet(self, enable_amp=True, use_data_loader=False):
seed = 90 seed = 90
batch_size = train_parameters["batch_size"] batch_size = train_parameters["batch_size"]
batch_num = 1 batch_num = 1
paddle.disable_static()
paddle.seed(seed) paddle.seed(seed)
paddle.framework.random._manual_program_seed(seed) paddle.framework.random._manual_program_seed(seed)
...@@ -223,18 +235,35 @@ class TestResnet2(unittest.TestCase): ...@@ -223,18 +235,35 @@ class TestResnet2(unittest.TestCase):
scaler = paddle.amp.GradScaler( scaler = paddle.amp.GradScaler(
enable=enable_amp, init_loss_scaling=2.**10) enable=enable_amp, init_loss_scaling=2.**10)
if use_data_loader:
train_reader = paddle.batch(
reader_decorator(paddle.dataset.flowers.train(use_xmap=False)),
batch_size=batch_size,
drop_last=True)
train_loader = fluid.io.DataLoader.from_generator(
capacity=4,
use_double_buffer=True,
iterable=True,
return_list=True)
train_loader.set_sample_list_generator(train_reader)
train_reader = train_loader
for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_reader()):
if batch_id >= batch_num: if batch_id >= batch_num:
break break
dy_x_data = np.array( if use_data_loader:
[x[0].reshape(3, 224, 224) for x in data]).astype('float32') img, label = data
if len(np.array([x[1] else:
for x in data]).astype('int64')) != batch_size: dy_x_data = np.array(
continue [x[0].reshape(3, 224, 224) for x in data]).astype('float32')
y_data = np.array([x[1] for x in data]).astype('int64').reshape(-1, if len(np.array([x[1]
1) for x in data]).astype('int64')) != batch_size:
img = paddle.to_tensor(dy_x_data) continue
label = paddle.to_tensor(y_data) y_data = np.array([x[1] for x in data]).astype('int64').reshape(
-1, 1)
img = paddle.to_tensor(dy_x_data)
label = paddle.to_tensor(y_data)
label.stop_gradient = True label.stop_gradient = True
with paddle.amp.auto_cast(enable=enable_amp): with paddle.amp.auto_cast(enable=enable_amp):
...@@ -262,19 +291,30 @@ class TestResnet2(unittest.TestCase): ...@@ -262,19 +291,30 @@ class TestResnet2(unittest.TestCase):
dy_param_value = {} dy_param_value = {}
for param in resnet.parameters(): for param in resnet.parameters():
dy_param_value[param.name] = param.numpy() dy_param_value[param.name] = param.numpy()
if use_data_loader:
paddle.enable_static() train_reader._reset()
return dy_out, dy_param_value, dy_grad_value return dy_out, dy_param_value, dy_grad_value
def test_resnet(self): def test_resnet(self):
out_fp32 = self.train_resnet(enable_amp=False) with fluid.dygraph.guard():
out_amp = self.train_resnet(enable_amp=True) out_fp32 = self.train_resnet(enable_amp=False)
out_amp = self.train_resnet(enable_amp=True)
print(out_fp32[0], out_amp[0])
self.assertTrue(np.allclose(out_fp32[0], out_amp[0], atol=1.e-2))
def test_with_data_loader(self):
with fluid.dygraph.guard():
out_fp32 = self.train_resnet(enable_amp=False, use_data_loader=True)
out_amp = self.train_resnet(enable_amp=True, use_data_loader=True)
print(out_fp32[0], out_amp[0]) print(out_fp32[0], out_amp[0])
self.assertTrue(np.allclose(out_fp32[0], out_amp[0], atol=1.e-2)) self.assertTrue(np.allclose(out_fp32[0], out_amp[0], atol=1.e-2))
class TestResnet(unittest.TestCase): class TestResnet(unittest.TestCase):
"""
Use paddle-1.x API
"""
def train_resnet(self, enable_amp=True): def train_resnet(self, enable_amp=True):
seed = 90 seed = 90
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册