未验证 提交 de27569e 编写于 作者: A Aurelius84 提交者: GitHub

[Dy2Stat] fix diff of cycle GAN model on GPU (#25233)

* fix GPU diff test=develop

* refine code test=develop
上级 23a4f54b
...@@ -40,10 +40,13 @@ from paddle.fluid.dygraph.nn import Conv2D, Conv2DTranspose, BatchNorm ...@@ -40,10 +40,13 @@ from paddle.fluid.dygraph.nn import Conv2D, Conv2DTranspose, BatchNorm
# Note: Set True to eliminate randomness. # Note: Set True to eliminate randomness.
# 1. For one operation, cuDNN has several algorithms, # 1. For one operation, cuDNN has several algorithms,
# some algorithm results are non-deterministic, like convolution algorithms. # some algorithm results are non-deterministic, like convolution algorithms.
# 2. If include BatchNorm, please set `use_global_stats=True` to avoid using
# cudnnBatchNormalizationBackward which is non-deterministic.
if fluid.is_compiled_with_cuda(): if fluid.is_compiled_with_cuda():
fluid.set_flags({'FLAGS_cudnn_deterministic': True}) fluid.set_flags({'FLAGS_cudnn_deterministic': True})
use_cudnn = True # set False to speed up training.
use_cudnn = False
step_per_epoch = 10 step_per_epoch = 10
lambda_A = 10.0 lambda_A = 10.0
lambda_B = 10.0 lambda_B = 10.0
...@@ -110,7 +113,7 @@ class Cycle_Gan(fluid.dygraph.Layer): ...@@ -110,7 +113,7 @@ class Cycle_Gan(fluid.dygraph.Layer):
return fake_A, fake_B, cyc_A, cyc_B, g_A_loss, g_B_loss, idt_loss_A, idt_loss_B, cyc_A_loss, cyc_B_loss, g_loss return fake_A, fake_B, cyc_A, cyc_B, g_A_loss, g_B_loss, idt_loss_A, idt_loss_B, cyc_A_loss, cyc_B_loss, g_loss
@declarative @declarative
def disriminatorA(self, input_A, input_B): def discriminatorA(self, input_A, input_B):
""" """
Discriminator A of GAN model. Discriminator A of GAN model.
""" """
...@@ -326,6 +329,7 @@ class conv2d(fluid.dygraph.Layer): ...@@ -326,6 +329,7 @@ class conv2d(fluid.dygraph.Layer):
bias_attr=con_bias_attr) bias_attr=con_bias_attr)
if norm: if norm:
self.bn = BatchNorm( self.bn = BatchNorm(
use_global_stats=True, # set True to use deterministic algorithm
num_channels=num_filters, num_channels=num_filters,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.NormalInitializer(1.0, 0.02)), initializer=fluid.initializer.NormalInitializer(1.0, 0.02)),
...@@ -381,6 +385,7 @@ class DeConv2D(fluid.dygraph.Layer): ...@@ -381,6 +385,7 @@ class DeConv2D(fluid.dygraph.Layer):
bias_attr=de_bias_attr) bias_attr=de_bias_attr)
if norm: if norm:
self.bn = BatchNorm( self.bn = BatchNorm(
use_global_stats=True, # set True to use deterministic algorithm
num_channels=num_filters, num_channels=num_filters,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.NormalInitializer(1.0, 0.02)), initializer=fluid.initializer.NormalInitializer(1.0, 0.02)),
...@@ -429,7 +434,6 @@ class ImagePool(object): ...@@ -429,7 +434,6 @@ class ImagePool(object):
def reader_creater(): def reader_creater():
# local_random = np.random.RandomState(SEED)
def reader(): def reader():
while True: while True:
fake_image = np.uint8( fake_image = np.uint8(
...@@ -480,13 +484,8 @@ def optimizer_setting(parameters): ...@@ -480,13 +484,8 @@ def optimizer_setting(parameters):
def train(args, to_static): def train(args, to_static):
# FIXME(Aurelius84): Found diff just on GPU and it disappears when we remove the BatchNorm layers. place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() \
# In dygraph mode, it still exists with different output while executing the every time. else fluid.CPUPlace()
# place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() \
# else fluid.CPUPlace()
place = fluid.CPUPlace()
program_translator.enable(to_static) program_translator.enable(to_static)
...@@ -553,7 +552,7 @@ def train(args, to_static): ...@@ -553,7 +552,7 @@ def train(args, to_static):
fake_pool_A = to_variable(fake_pool_A) fake_pool_A = to_variable(fake_pool_A)
# optimize the d_A network # optimize the d_A network
rec_B, fake_pool_rec_B = cycle_gan.disriminatorA(data_B, rec_B, fake_pool_rec_B = cycle_gan.discriminatorA(data_B,
fake_pool_B) fake_pool_B)
d_loss_A = (fluid.layers.square(fake_pool_rec_B) + d_loss_A = (fluid.layers.square(fake_pool_rec_B) +
fluid.layers.square(rec_B - 1)) / 2.0 fluid.layers.square(rec_B - 1)) / 2.0
...@@ -581,7 +580,6 @@ def train(args, to_static): ...@@ -581,7 +580,6 @@ def train(args, to_static):
idt_loss_A, g_B_loss, cyc_B_loss, idt_loss_B idt_loss_A, g_B_loss, cyc_B_loss, idt_loss_B
] ]
cur_batch_loss = [x.numpy()[0] for x in cur_batch_loss] cur_batch_loss = [x.numpy()[0] for x in cur_batch_loss]
loss_data.append(cur_batch_loss)
batch_time = time.time() - s_time batch_time = time.time() - s_time
t_time += batch_time t_time += batch_time
...@@ -593,6 +591,7 @@ def train(args, to_static): ...@@ -593,6 +591,7 @@ def train(args, to_static):
if batch_id > args.train_step: if batch_id > args.train_step:
break break
loss_data.append(cur_batch_loss)
return np.array(loss_data) return np.array(loss_data)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册