diff --git a/paddle/operators/parallel_do_op.cc b/paddle/operators/parallel_do_op.cc index 67f9854c02fa92d0141463088915e720733306fb..dfff6f0888a5258dfbd68d34ef83ca8306aeed2a 100644 --- a/paddle/operators/parallel_do_op.cc +++ b/paddle/operators/parallel_do_op.cc @@ -248,17 +248,19 @@ class ParallelDoGradOp : public framework::OperatorBase { const std::vector &sub_scopes, const platform::PlaceList &places) const { for (auto &s : Outputs(framework::GradVarName(kParameters))) { + VLOG(3) << "Accumulating " << s; + if (s == framework::kEmptyVarName) continue; std::string tmp_name; auto *tmp = sub_scopes[0]->Var(&tmp_name); for (size_t i = 1; i < sub_scopes.size(); ++i) { CopyOrShare(*sub_scopes[i]->FindVar(s), places[0], tmp); - WaitOnPlace(places[0]); + WaitOnPlaces(places); auto sum_op = framework::OpRegistry::CreateOp( "sum", {{"X", {s, tmp_name}}}, {{"Out", {s}}}, framework::AttributeMap{}); - VLOG(3) << sum_op->DebugStringEx(sub_scopes[0]); + VLOG(10) << sum_op->DebugStringEx(sub_scopes[0]); sum_op->Run(*sub_scopes[0], places[0]); WaitOnPlace(places[0]); } @@ -334,16 +336,9 @@ class ParallelDoGradOpDescMaker : public framework::SingleGradOpDescMaker { class ParallelDoGradOpShapeInference : public framework::InferShapeBase { public: void operator()(framework::InferShapeContext *ctx) const override { - std::vector input{kParameters, kInputs}; - std::vector output{kOutputs}; - PADDLE_ENFORCE(ctx->HasInputs(kParameters)); - PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName(kParameters))); PADDLE_ENFORCE(ctx->HasInputs(kInputs)); - - for (auto &s : output) { - PADDLE_ENFORCE(ctx->HasInputs(s)); - } + PADDLE_ENFORCE(ctx->HasInputs(kOutputs)); ctx->SetOutputsDim(framework::GradVarName(kParameters), ctx->GetInputsDim(kParameters)); @@ -360,10 +355,14 @@ class ParallelDoGradOpShapeInference : public framework::InferShapeBase { ctx->SetDims({ig_name}, {i_dims[i]}); } - if (ctx->HasInputs(kParameters)) { - PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName(kParameters))); - ctx->SetOutputsDim(framework::GradVarName(kParameters), - ctx->GetInputsDim(kParameters)); + auto p_dims = ctx->GetInputsDim(kParameters); + auto pg_names = ctx->Outputs(framework::GradVarName(kParameters)); + for (size_t i = 0; i < pg_names.size(); ++i) { + auto &pg_name = pg_names[i]; + if (pg_name == framework::kEmptyVarName) { + continue; + } + ctx->SetDims({pg_name}, {p_dims[i]}); } } }; diff --git a/python/paddle/v2/fluid/layers/control_flow.py b/python/paddle/v2/fluid/layers/control_flow.py index f29d7712334783529cb69b71ac9513889d3bce7d..71a9459d556e2b3e25b1cd4ae768a8fb8ae41273 100644 --- a/python/paddle/v2/fluid/layers/control_flow.py +++ b/python/paddle/v2/fluid/layers/control_flow.py @@ -277,21 +277,20 @@ class ParallelDo(object): parent_block = self.parent_block() local_inputs = set() - - for op in current_block.ops: - for oname in op.output_names: - for out_var_name in op.output(oname): - local_inputs.add(out_var_name) - + params = list() for var in self.inputs: local_inputs.add(var.name) - params = list() for op in current_block.ops: for iname in op.input_names: for in_var_name in op.input(iname): if in_var_name not in local_inputs: params.append(in_var_name) + + for oname in op.output_names: + for out_var_name in op.output(oname): + local_inputs.add(out_var_name) + params = list(set(params)) return [parent_block.var(name) for name in params] diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits.py index c3f6877575488e6e76602a5641d648171b8815f4..d8f0ad89cd89215ac83a133bd27a53c4b904363f 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits.py @@ -67,6 +67,7 @@ def conv_net(img, label): pool_size=2, pool_stride=2, act="relu") + conv_pool_1 = fluid.layers.batch_norm(conv_pool_1) conv_pool_2 = fluid.nets.simple_img_conv_pool( input=conv_pool_1, filter_size=5, diff --git a/python/paddle/v2/fluid/tests/book/test_word2vec.py b/python/paddle/v2/fluid/tests/book/test_word2vec.py index c9ba70c20a654bb137b2fa03d5a6de278accc6f6..f013d7f1551bdbfb2f725809e2fb4d7d686560fe 100644 --- a/python/paddle/v2/fluid/tests/book/test_word2vec.py +++ b/python/paddle/v2/fluid/tests/book/test_word2vec.py @@ -158,6 +158,4 @@ for use_cuda in (False, True): inject_test_method(use_cuda, is_sparse, parallel) if __name__ == '__main__': - # FIXME(tonyyang-svail): - # This test always fail on MultiGPU CI unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_parallel_op.py b/python/paddle/v2/fluid/tests/test_parallel_op.py index 6b3d72902c755cf215705aaeb31664ea560d0fee..367cc8b1aaf0aff24c685031f33d35becb9eb7ef 100644 --- a/python/paddle/v2/fluid/tests/test_parallel_op.py +++ b/python/paddle/v2/fluid/tests/test_parallel_op.py @@ -198,7 +198,4 @@ class ParallelOpTestMultipleInput(BaseParallelForTest): if __name__ == '__main__': - # FIXME(tonyyang-svail): - # This test always fail on MultiGPU CI - exit(0) unittest.main()