fix batch_norm_grad infer shape=0 & add allreduce enforce shape, test=develop (#21801)

17299b8d · WangXi · gongweibao · a3a3558d · 17299b8d · 17299b8d
Showing with 18 addition and 6 deletion

paddle/fluid/framework/details/all_reduce_op_handle.cc paddle/fluid/framework/details/all_reduce_op_handle.cc +5 -0

paddle/fluid/operators/batch_norm_op.cc paddle/fluid/operators/batch_norm_op.cc +13 -6

未找到文件。
--- a/paddle/fluid/framework/details/all_reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc
@@ -84,6 +84,11 @@ void AllReduceOpHandle::AllReduceImpl(
    if (i == 0) {
      numel = static_cast<int64_t>(lod_tensor.numel());
+      // only enforce place0, we will enforce other palce numel == place0 numel
+      PADDLE_ENFORCE_GT(
+          numel, 0, platform::errors::InvalidArgument(
+                        "The numel of tensos=[%s] must > 0. But now numel=[%d]",
+                        in_var_handles[i]->name(), numel));
      dtype = lod_tensor.type();
      is_gpu_place = platform::is_gpu_place(lod_tensor.place());
    }

--- a/paddle/fluid/operators/batch_norm_op.cc
+++ b/paddle/fluid/operators/batch_norm_op.cc
@@ -444,11 +444,17 @@ void BatchNormGradOp::InferShape(framework::InferShapeContext *ctx) const {
  // check output
  PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), "");
-  if (ctx->HasOutput(framework::GradVarName("Scale"))) {
-    PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("Bias")),
+  const bool has_scale_grad = ctx->HasOutput(framework::GradVarName("Scale"));
-                   "Output(Scale@GRAD) and Output(Bias@GRAD) should not be "
+  const bool has_bias_grad = ctx->HasOutput(framework::GradVarName("Bias"));
-                   "null at same time");
-  }
+  PADDLE_ENFORCE_EQ((has_scale_grad == has_bias_grad), true,
+                    platform::errors::InvalidArgument(
+                        "Output(Scale@GRAD) and Output(Bias@GRAD) must be null "
+                        "or not be null at same time. But now, "
+                        "has Scale@Grad=[%d], has Bias@GRAD=[%d]",
+                        has_scale_grad, has_bias_grad));
  const bool use_global_stats = ctx->Attrs().Get<bool>("use_global_stats");
  if (use_global_stats) {
    PADDLE_ENFORCE(!ctx->Attrs().Get<bool>("use_mkldnn"),
@@ -463,7 +469,8 @@ void BatchNormGradOp::InferShape(framework::InferShapeContext *ctx) const {
                                                  : x_dims[x_dims.size() - 1]);
  ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
-  if (ctx->HasOutput(framework::GradVarName("Scale"))) {
+  // has_scale_grad == has_bias_grad, judge has_scale_grad is enough
+  if (has_scale_grad) {
    ctx->SetOutputDim(framework::GradVarName("Scale"), {C});
    ctx->SetOutputDim(framework::GradVarName("Bias"), {C});
  }