There is potential bug in ParallelExecutor with memory optimization.
Created by: qingqing01
We train SE-ResNeXt 50 on ImageNet dataset. The results by ParallelDo:
End pass 1, train_loss 3.65435218811, train_acc1 0.252288132906, train_acc5 0.492917865515,test_loss 3.05385923386, test_acc1 0.330301254988, test_acc5 0.599193990231
#...
End pass 7, train_loss 2.52759289742, train_acc1 0.443090349436, train_acc5 0.696969151497,test_loss 2.00805091858, test_acc1 0.527231276035, test_acc5 0.785744726658
But by ParallelExecutor:
End pass 1, train_loss 3.64258718491, train_acc1 0.259208679199, train_acc5 0.496450036764, test_loss 5.03210878372, test_acc1 0.250996500254, test_acc5 0.451654165983
# ...
End pass 7, train_loss 2.74988269806, train_acc1 0.405690789223, train_acc5 0.660648167133, test_loss 4.71979236603, test_acc1 0.355536520481, test_acc5 0.555923163891
For code by ParallelExecutor:
class_dim = 1000
image_shape = [3, 224, 224]
image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers)
cost = fluid.layers.cross_entropy(input=out, label=label)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
avg_cost = fluid.layers.mean(x=cost)
test_program = fluid.default_main_program().clone(for_test=True)
if lr_strategy is None:
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4))
else:
bd = lr_strategy["bd"]
lr = lr_strategy["lr"]
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr),
momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4))
opts = optimizer.minimize(avg_cost)
fluid.memory_optimize(fluid.default_main_program())
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
if init_model is not None:
fluid.io.load_persistables(exe, init_model)
train_reader = paddle.batch(reader.train(), batch_size=batch_size)
test_reader = paddle.batch(reader.test(), batch_size=batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
test_exe = fluid.ParallelExecutor(
use_cuda=True,
main_program=test_program,
share_vars_from=train_exe)
fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name]
# ...