diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 58fa7f1bebc082df11e07f5f97927b417af3e4e8..e02245d05dd9fd5d82a26dcbc00a8b832f93842e 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -220,7 +220,10 @@ def _callback_lookup_(op): :return: callback function """ if op.type == 'parallel_do' and op.attr('use_nccl'): + all_vars = op.block.vars param_names = set(op.input('parameters')) + param_names = filter(lambda name: all_vars[name].stop_gradient is False, + param_names) param_grad_names = [n + "@GRAD" for n in param_names] class ParallelDoCallBack(object): diff --git a/python/paddle/fluid/tests/unittests/test_parallel_op.py b/python/paddle/fluid/tests/unittests/test_parallel_op.py index cd20b430f93498372dd706a46c3a6d9d798721f5..1a7551c57b26f576ab286e7b18177b9120261623 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_op.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_op.py @@ -170,6 +170,7 @@ class ParallelOpTest(BaseParallelForTest): x = fluid.layers.data(shape=[784], dtype='float32', name='img') x = yield x hidden = fluid.layers.fc(input=x, size=200, param_attr='fc1.w') + hidden = fluid.layers.batch_norm(input=hidden) loss = fluid.layers.mean(hidden) yield loss