diff --git a/paddle/fluid/framework/details/fetch_async_op_handle.cc b/paddle/fluid/framework/details/fetch_async_op_handle.cc
index 1dbf5956ab52b6600083fd3934812dfd31e398cb..abe02849141c046930147165b208b7ef2b5dcd33 100644
--- a/paddle/fluid/framework/details/fetch_async_op_handle.cc
+++ b/paddle/fluid/framework/details/fetch_async_op_handle.cc
@@ -164,24 +164,32 @@ void FetchAsyncOpHandle::FetchMergedLodTensor(
     }
   }
 
-  bool find_first_dims = false;
-  for (auto *t : src_lodtensors) {
-    if (t->numel() && t->IsInitialized()) {
-      if (!find_first_dims) {
-        new_dim = t->dims();
-        find_first_dims = true;
-      } else {
-        new_dim[0] += t->dims()[0];
-      }
-    }
-  }
-
   // check src type,layout,dim,lod consistence
   for (size_t i = 1; i < src_lodtensors.size(); ++i) {
     CheckTensorAttrs(
         src_lodtensors[i], new_type, new_layout, check_dim, new_lod, offset_);
   }
 
+  auto rank = src_lodtensors[0]->dims().size();
+
+  // for 0D tensor, can't concat eath tensor. So stack 0D and concat 1+D tensor
+  if (rank == 0) {
+    int src_lodtensor_size = src_lodtensors.size();
+    new_dim = phi::make_ddim(std::vector<int>({src_lodtensor_size}));
+  } else {
+    bool find_first_dims = false;
+    for (auto *t : src_lodtensors) {
+      if (t->numel() && t->IsInitialized()) {
+        if (!find_first_dims) {
+          new_dim = t->dims();
+          find_first_dims = true;
+        } else {
+          new_dim[0] += t->dims()[0];
+        }
+      }
+    }
+  }
+
   // set dst tensor
   dst_lodtensor->Resize(new_dim);
   dst_lodtensor->set_layout(src_lodtensors[0]->layout());
@@ -195,9 +203,17 @@ void FetchAsyncOpHandle::FetchMergedLodTensor(
   }
 
   // slice and memcpy
+  // for 0D tensor, can't concat eath tensor, stack them. for 1+D tensor, concat
+  // them
   int begin = 0;
+  int end = 0;
   for (auto *src : src_lodtensors) {
-    int end = begin + src->dims()[0];
+    if (rank == 0) {
+      end = begin + 1;
+    } else {
+      end = begin + src->dims()[0];
+    }
+
     if (end == begin) {
       continue;
     }
diff --git a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
index caffeba538daeda7ab620428451899078f88d274..9c9d670a275b0a5bfa7ba52f463c5ff31efbb611 100644
--- a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
+++ b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
@@ -16,6 +16,7 @@
 
 #include <string>
 
+#include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/platform/profiler/event_tracing.h"
 
 namespace phi {
@@ -101,6 +102,7 @@ std::string ScaleLossGradOpHandle::LossGradName() const {
 void ScaleLossGradOpHandle::RunImpl() {
   platform::RecordEvent record_event(
       Name(), platform::TracerEventType::UserDefined, 2);
+
   RunOnVar(local_exec_scopes_[0]->FindVar(LossGradName()), true);
 }
 
diff --git a/paddle/fluid/framework/infershape_utils.cc b/paddle/fluid/framework/infershape_utils.cc
index c7cd883cec4c4a64f8db3b7fcf4f619e1fb48f48..de9f6a4745fd00996187b85797a285271bc99aa1 100644
--- a/paddle/fluid/framework/infershape_utils.cc
+++ b/paddle/fluid/framework/infershape_utils.cc
@@ -213,8 +213,9 @@ DDim CompatMetaTensor::dims() const {
   } else {
     auto* var = PADDLE_GET_CONST(VarDesc*, var_);
 
-    return var->GetShape().empty() ? phi::make_ddim({0UL})
-                                   : phi::make_ddim(var->GetShape());
+    return phi::make_ddim(var->GetShape());
+    // return var->GetShape().empty() ? phi::make_ddim({0UL}) :
+    // phi::make_ddim(var->GetShape());
   }
 }
 
diff --git a/paddle/fluid/framework/lod_tensor.cc b/paddle/fluid/framework/lod_tensor.cc
index bd78f9b0a0ea758ce42486085d5b3530528bf426..9124dc2fbd026ca52056887deec71f9b59dd293b 100644
--- a/paddle/fluid/framework/lod_tensor.cc
+++ b/paddle/fluid/framework/lod_tensor.cc
@@ -262,6 +262,16 @@ std::vector<LoDTensor> SplitLoDTensor(
                     platform::errors::InvalidArgument(
                         "Place number cannot be empty when splitting."));
   src.check_memory_size();
+  auto rank = src.dims().size();
+  // if rank is 0, just return #places.size() copys of src
+  if (rank == 0) {
+    LoDTensor dst;
+    framework::TensorCopy(src, src.place(), &dst);
+    std::vector<LoDTensor> ret;
+    ret.emplace_back(std::move(dst));
+    return ret;
+  }
+
   size_t batch_size = src.lod().empty() ? static_cast<size_t>(src.dims()[0])
                                         : src.lod()[0].size() - 1;
 
@@ -349,6 +359,7 @@ void MergeLoDTensor(LoDTensor *target,
   }
 
   LoD new_lod = lod_tensors[0]->lod();
+  auto rank = lod_tensors[0]->dims().size();
 
   for (size_t i = 1; i < lod_tensors.size(); ++i) {
     auto *t = lod_tensors[i];
@@ -369,16 +380,24 @@ void MergeLoDTensor(LoDTensor *target,
               "actual layout is %s.",
               DataLayoutToString(new_layout),
               DataLayoutToString(t->layout())));
-      PADDLE_ENFORCE_EQ(
-          phi::product(new_dim) / new_dim[0],
-          phi::product(t->dims()) / t->dims()[0],
-          platform::errors::InvalidArgument(
-              "LoDTensor dimension does not match, all dimensions except the "
-              "first dimension need to be equal,"
-              "but expected dimension is %s, actual dimension is %s.",
-              new_dim,
-              t->dims()));
-      new_dim[0] += t->dims()[0];
+      auto tensor_dims = t->dims();
+      PADDLE_ENFORCE_EQ(tensor_dims.size(),
+                        new_dim.size(),
+                        platform::errors::InvalidArgument(
+                            "dimensions of LoDTensor does not match"));
+      for (int j = 1; j < t->dims().size(); j++) {
+        PADDLE_ENFORCE_EQ(
+            tensor_dims[j],
+            new_dim[j],
+            platform::errors::InvalidArgument(
+                "LoDTensor.ddim[%d] should eaqual to %d, but is %d",
+                j,
+                new_dim[j],
+                tensor_dims[j]));
+      }
+      if (rank > 0) {
+        new_dim[0] += t->dims()[0];
+      }
     }
 
     auto &lod = t->lod();
diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc
index 3042dfa00d68cf2d0c7a098c761d5ca26fcfd7c9..fc3a8bf24dbcff58ae1f40f196a249b55127cada 100644
--- a/paddle/fluid/framework/op_desc.cc
+++ b/paddle/fluid/framework/op_desc.cc
@@ -362,7 +362,7 @@ class CompileTimeInferShapeContext : public InferShapeContext {
     DDim res;
     try {
       auto shape = var->GetShape();
-      res = shape.empty() ? phi::make_ddim({0UL}) : phi::make_ddim(shape);
+      res = phi::make_ddim(shape);
     } catch (...) {
       VLOG(5) << "GetDim of variable " << name << " error";
       std::rethrow_exception(std::current_exception());
@@ -1258,7 +1258,7 @@ std::vector<DDim> CompileTimeInferShapeContext::GetRepeatedDims(
   try {
     auto shapes = var->GetShapes();
     for (const auto &s : shapes) {
-      res.push_back(s.empty() ? phi::make_ddim({0UL}) : phi::make_ddim(s));
+      res.push_back(phi::make_ddim(s));
     }
   } catch (...) {
     VLOG(5) << "GetRepeatedDim of variable " << name << " error.";
diff --git a/paddle/phi/core/utils/dim.h b/paddle/phi/core/utils/dim.h
index 9fd1dbf4d05b6eb5f8a3b4195e7eb64e828bc2d1..d6740d907a0581ee0b7d80cbf2992875621f38b2 100644
--- a/paddle/phi/core/utils/dim.h
+++ b/paddle/phi/core/utils/dim.h
@@ -72,10 +72,15 @@ HOSTDEVICE inline Dim<sizeof...(Args)> make_dim(Args... idxes) {
 // Allows us to output a Dim
 template <int D>
 inline std::ostream& operator<<(std::ostream& os, const Dim<D>& d) {
-  os << d[0];
-  for (int i = 1; i < D; ++i) {
-    os << ", " << d[i];
+  if (D > 0) {
+    os << d[0];
+    for (int i = 1; i < D; ++i) {
+      os << ", " << d[i];
+    }
+  } else {
+    os << "";
   }
+
   return os;
 }
 
diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc
index 5868d9741221862721ad8dbb1b2f5d23359ce696..84f83601ba6d931eb9ba2743072272565ef3570e 100644
--- a/paddle/phi/infermeta/multiary.cc
+++ b/paddle/phi/infermeta/multiary.cc
@@ -305,9 +305,14 @@ void AddNInferMeta(const std::vector<const MetaTensor*>& x,
     if (x[i]->is_selected_rows() && x_dim.size() == 1) {
       continue;
     }
+    // for zero-sized tensor
     if (phi::product(x_dim) == 0) {
       continue;
     }
+    // for 0D tensor
+    if (x_dim.size() == 0) {
+      continue;
+    }
     if (phi::product(in_dim) == 0) {
       in_dim = x_dim;
     } else {
@@ -2547,8 +2552,8 @@ void WarpctcInferMeta(const MetaTensor& logits,
                       const MetaTensor& labels_length,
                       int blank,
                       bool norm_by_times,
-                      MetaTensor* warpctcgrad,
-                      MetaTensor* loss) {
+                      MetaTensor* loss,
+                      MetaTensor* warpctcgrad) {
   auto logits_dims = logits.dims();
   int sequence_width = 0;
 
diff --git a/paddle/phi/infermeta/multiary.h b/paddle/phi/infermeta/multiary.h
index 6c790ca3b36be11e63e43751c4fcd88f8576ed74..6a8b5511a6c4e01b679669e64c4b96bdfd1d7ce3 100644
--- a/paddle/phi/infermeta/multiary.h
+++ b/paddle/phi/infermeta/multiary.h
@@ -483,8 +483,8 @@ void WarpctcInferMeta(const MetaTensor& logits,
                       const MetaTensor& labels_length,
                       int blank,
                       bool norm_by_times,
-                      MetaTensor* warpctcgrad,
-                      MetaTensor* loss);
+                      MetaTensor* loss,
+                      MetaTensor* warpctcgrad);
 
 void WhereInferMeta(const MetaTensor& condition,
                     const MetaTensor& x,
diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc
index 2fdb32644adde6a2c5b369041431f606b229bad9..8d085a05a4c91472450dfa5281d64113d8525699 100644
--- a/paddle/phi/infermeta/unary.cc
+++ b/paddle/phi/infermeta/unary.cc
@@ -2668,7 +2668,7 @@ DDim ReduceInferDim(const MetaTensor& x,
                       x_rank,
                       errors::InvalidArgument(
                           "The reduce dim index %d should be in the "
-                          "range [-dimension(X), dimension(X)] "
+                          "range [ -dimension(X), dimension(X) ) "
                           "which dimesion = %d. But received dim index = %d.",
                           i,
                           x_rank,
@@ -2677,7 +2677,7 @@ DDim ReduceInferDim(const MetaTensor& x,
                       -x_rank,
                       errors::InvalidArgument(
                           "The reduce dim index %d should be in the "
-                          "range [-dimension(X), dimension(X)] "
+                          "range [ -dimension(X), dimension(X) )  "
                           "which dimesion = %d. But received dim index = %d.",
                           i,
                           x_rank,
diff --git a/paddle/phi/kernels/funcs/unsqueeze.h b/paddle/phi/kernels/funcs/unsqueeze.h
index 2d77c809bf9c9fd16d6c92d99ad686dae7642203..6dd69c8212a22651a40c60e81f2ca49c0d37613e 100644
--- a/paddle/phi/kernels/funcs/unsqueeze.h
+++ b/paddle/phi/kernels/funcs/unsqueeze.h
@@ -36,6 +36,24 @@ inline DDim GetOutputSqueezeShape(const std::vector<int> squeeze_dims,
     }
   } else {
     for (size_t i = 0; i < num_squeeze_dims; ++i) {
+      if (in_dims.size() == 0) {
+        PADDLE_ENFORCE_GE(
+            squeeze_dims[i],
+            -1,
+            phi::errors::InvalidArgument(
+                "For 0D Tensor, Each axis in Attr(axes) should be in the range "
+                "of [-1, 0]"
+                "But current axis is:%d, input tensor's shape = [%s]."));
+        PADDLE_ENFORCE_LE(
+            squeeze_dims[i],
+            0,
+            phi::errors::InvalidArgument(
+                "For 0D Tensor, Each axis in Attr(axes) should be in the range "
+                "of [-1, 0]"
+                "But current axis is:%d, input tensor's shape = [%s]."));
+        continue;
+      }
+
       int current = squeeze_dims[i] < 0 ? squeeze_dims[i] + in_dims.size()
                                         : squeeze_dims[i];
 
diff --git a/paddle/phi/kernels/onednn/reduce_kernel_impl.h b/paddle/phi/kernels/onednn/reduce_kernel_impl.h
index a3c2f149a9a81e05c32274dfe9a2fdd530a85880..4665876469cd591f76eb3e4efe23bb770fdeed4e 100644
--- a/paddle/phi/kernels/onednn/reduce_kernel_impl.h
+++ b/paddle/phi/kernels/onednn/reduce_kernel_impl.h
@@ -25,8 +25,7 @@ inline std::vector<int64_t> CalculateReducedDims(
     bool keep_dim) {
   if (keep_dim) return vectorize(output->dims());
 
-  if (reduce_all && reduce_dims.size() > 0)
-    return std::vector<int64_t>(input->dims().size(), 1);
+  if (reduce_all) return std::vector<int64_t>(input->dims().size(), 1);
 
   std::vector<int64_t> output_dims(vectorize(input->dims()));
   for (size_t i = 0; i < reduce_dims.size(); ++i) {
diff --git a/paddle/phi/tests/core/test_ddim.cc b/paddle/phi/tests/core/test_ddim.cc
index e1217042259e08e9798e7611511a03e2d355df3a..72c91b452296f968a6b89bb545b3ed1d8ad64b73 100644
--- a/paddle/phi/tests/core/test_ddim.cc
+++ b/paddle/phi/tests/core/test_ddim.cc
@@ -21,18 +21,43 @@ namespace phi {
 namespace tests {
 
 TEST(DDim, Equality) {
+  // default construct ddim
+  phi::DDim default_ddim;
+  EXPECT_EQ(arity(default_ddim), 1);
+  EXPECT_EQ(default_ddim[0], 0);
+
+  // construct a zero-DDim
+  phi::DDim zero_ddim = phi::make_ddim({});
+  EXPECT_EQ(arity(zero_ddim), 0);
+  EXPECT_EQ(zero_ddim.size(), 0);
+  EXPECT_EQ(phi::product(zero_ddim), 1);
+
+  std::vector<int64_t> zero_vec;
+  phi::DDim zero_ddim1 = phi::make_ddim(zero_vec);
+  EXPECT_EQ(arity(zero_ddim1), 0);
+  EXPECT_EQ(zero_ddim1.size(), 0);
+  EXPECT_EQ(phi::product(zero_ddim1), 1);
+
+  // zero-DDim to vector
+  std::vector<int64_t> zero_ddim_vec = phi::vectorize(zero_ddim);
+  EXPECT_EQ(zero_ddim_vec.size(), size_t(0));
+
+  // reshape zero-DDim
+  std::vector<int> reshape_vec = {1};
+  phi::DDim reshape_ddim = zero_ddim.reshape(reshape_vec);
+  EXPECT_EQ(arity(reshape_ddim), 1);
+  EXPECT_EQ(reshape_ddim.size(), 1);
+  EXPECT_EQ(phi::product(reshape_ddim), 1);
+
   // construct a DDim from an initialization list
   phi::DDim ddim = phi::make_ddim({9, 1, 5});
   EXPECT_EQ(ddim[0], 9);
   EXPECT_EQ(ddim[1], 1);
   EXPECT_EQ(ddim[2], 5);
 
-  // construct a DDim from a vector
-  std::vector<int64_t> vec({9, 1, 5});
-  phi::DDim vddim = phi::make_ddim(vec);
-  EXPECT_EQ(ddim[0], 9);
-  EXPECT_EQ(ddim[1], 1);
-  EXPECT_EQ(ddim[2], 5);
+  // arity of a DDim
+  EXPECT_EQ(phi::arity(ddim), 3);
+  EXPECT_EQ(ddim.size(), 3);
 
   // mutate a DDim
   ddim[1] = 2;
@@ -40,6 +65,13 @@ TEST(DDim, Equality) {
   ddim[0] = 6;
   EXPECT_EQ(ddim[0], 6);
 
+  // construct a DDim from a vector
+  std::vector<int64_t> vec({9, 1, 5});
+  phi::DDim vddim = phi::make_ddim(vec);
+  EXPECT_EQ(vddim[0], 9);
+  EXPECT_EQ(vddim[1], 1);
+  EXPECT_EQ(vddim[2], 5);
+
   // vectorize a DDim
   std::vector<int64_t> res_vec = phi::vectorize(vddim);
   EXPECT_EQ(res_vec[0], 9);
@@ -51,37 +83,45 @@ TEST(DDim, Equality) {
   EXPECT_EQ(res_vec[1], 2);
   EXPECT_EQ(res_vec[2], 1);
 
-  // arity of a DDim
-  EXPECT_EQ(phi::arity(ddim), 3);
-  EXPECT_EQ(ddim.size(), 3);
-
   // product of a DDim
   EXPECT_EQ(phi::product(vddim), 45);
   EXPECT_EQ(phi::product(phi::make_ddim({3, 2, 5, 3})), 90);
 
   // slice a DDim
   phi::DDim ddim2 = phi::make_ddim({1, 2, 3, 4, 5, 6});
-  phi::DDim ss = phi::slice_ddim(ddim2, 2, 5);
-  EXPECT_EQ(arity(ss), 3);
-  EXPECT_EQ(ss[0], 3);
-  EXPECT_EQ(ss[1], 4);
-  EXPECT_EQ(ss[2], 5);
-  phi::DDim ss2 = phi::slice_ddim(ddim2, 0, 6);
-  EXPECT_EQ(arity(ss2), 6);
-  EXPECT_EQ(ss2[0], 1);
-  EXPECT_EQ(ss2[1], 2);
-  EXPECT_EQ(ss2[2], 3);
-  EXPECT_EQ(ss2[3], 4);
-  EXPECT_EQ(ss2[4], 5);
-  EXPECT_EQ(ss2[5], 6);
+  phi::DDim slice_dim1 = phi::slice_ddim(ddim2, 2, 5);
+  EXPECT_EQ(arity(slice_dim1), 3);
+  EXPECT_EQ(slice_dim1[0], 3);
+  EXPECT_EQ(slice_dim1[1], 4);
+  EXPECT_EQ(slice_dim1[2], 5);
+
+  phi::DDim slice_dim2 = phi::slice_ddim(ddim2, 0, 6);
+  EXPECT_EQ(arity(slice_dim2), 6);
+  EXPECT_EQ(slice_dim2[0], 1);
+  EXPECT_EQ(slice_dim2[1], 2);
+  EXPECT_EQ(slice_dim2[2], 3);
+  EXPECT_EQ(slice_dim2[3], 4);
+  EXPECT_EQ(slice_dim2[4], 5);
+  EXPECT_EQ(slice_dim2[5], 6);
+
+  phi::DDim slice_dim3 = phi::slice_ddim(ddim2, 1, 1);
+  EXPECT_EQ(arity(slice_dim3), 0);
+  EXPECT_EQ(slice_dim3.size(), 0);
+  EXPECT_EQ(phi::product(slice_dim3), 1);
 }
 
 TEST(DDim, Print) {
   // print a DDim
-  std::stringstream ss;
+  std::stringstream ss1;
   phi::DDim ddim = phi::make_ddim({2, 3, 4});
-  ss << ddim;
-  EXPECT_EQ("2, 3, 4", ss.str());
+  ss1 << ddim;
+  EXPECT_EQ("2, 3, 4", ss1.str());
+
+  // print a zero-DDim
+  std::stringstream ss2;
+  phi::DDim zero_ddim = phi::make_ddim({});
+  ss2 << zero_ddim;
+  EXPECT_EQ("", ss2.str());
 }
 
 }  // namespace tests
diff --git a/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py b/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py
index 7f2b768faab5e08f6a22fd70de3943ea1b0def60..fd81c5b11f4a02929d2729fd6306355a6b3ac243 100644
--- a/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py
+++ b/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py
@@ -688,8 +688,10 @@ class HybridParallelInferenceHelper(object):
                         })
                 else:
                     var_shape = list(var.shape)
-                    var_shape[0] = self.micro_batch_size if var_shape[
-                        0] < 0 else var_shape[0]
+                    print(var_name)
+                    if len(var.shape) > 0:
+                        var_shape[0] = self.micro_batch_size if var_shape[
+                            0] < 0 else var_shape[0]
                     block._insert_op_without_sync(
                         index=index,
                         type='recv_v2',
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/convert_operators.py b/python/paddle/fluid/dygraph/dygraph_to_static/convert_operators.py
index d5fb80ce57575e967a62bfa6a76b163d7ed40164..2e8aa40541ca5091ef5f243934185c135af343aa 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/convert_operators.py
+++ b/python/paddle/fluid/dygraph/dygraph_to_static/convert_operators.py
@@ -462,6 +462,7 @@ def convert_len(var):
           `shape_op` in var.block.
     """
     if isinstance(var, Variable):
+        assert var.ndim > 0, "len() of a 0D tensor is wrong"
         if var.type in [
                 core.VarDesc.VarType.LOD_TENSOR,
                 core.VarDesc.VarType.SELECTED_ROWS
diff --git a/python/paddle/fluid/dygraph/math_op_patch.py b/python/paddle/fluid/dygraph/math_op_patch.py
index b875013415867f9adf637c0c34b9a93fef6983b6..72e770e8e50b7c41b487333024db22bbf7b744b9 100644
--- a/python/paddle/fluid/dygraph/math_op_patch.py
+++ b/python/paddle/fluid/dygraph/math_op_patch.py
@@ -144,6 +144,7 @@ def monkey_patch_math_varbase():
         return int(var.numpy().flatten()[0])
 
     def _len_(var):
+        assert var.ndim > 0, "len() of a 0D tensor is wrong"
         if var.type == core.VarDesc.VarType.VOCAB:
             return len(var.value().get_map_tensor())
         elif var.type == core.VarDesc.VarType.STRINGS:
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 01a42dce9e15bea4bfe4cbecf741c630d9fcc0df..b7814bc19c47543d9078901df54d62c0bc870251 100755
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -208,6 +208,30 @@ OP_NAMEMAPPING = {
 }
 
 
+def _get_reduce_dim(dim, input):
+    """
+    Internal function for reduce_sum, reduce_mean, reduce_max, reduce_min, reduce_prod.
+    It computes the attribute reduce_all value based on axis.
+    """
+    if dim is not None and not isinstance(dim, list):
+        if isinstance(dim, (tuple, range)):
+            dim = list(dim)
+        elif isinstance(dim, int):
+            dim = [dim]
+        else:
+            raise TypeError(
+                "The type of dim must be int, list, tuple or range, but received {}"
+                .format(type(axis)))
+    if dim is None:
+        dim = []
+    if dim == [] or len(dim) == len(input.shape):
+        reduce_all = True
+    else:
+        reduce_all = False
+
+    return reduce_all, dim
+
+
 @dygraph_only
 def _elementwise_op_in_dygraph(x,
                                y,
@@ -4689,29 +4713,14 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None):
     if dim is not None and not isinstance(dim, list):
         dim = [dim]
 
+    reduce_all, dim = _get_reduce_dim(dim, input)
+
     if in_dygraph_mode():
-        reduce_all = True if dim == None or dim == [] or len(dim) == len(
-            input.shape) else False
-        dim = dim if dim != None and dim != [] else [0]
-        if reduce_all:
-            return _C_ops.sum(input, [], None, keep_dim)
-        else:
-            return _C_ops.sum(input, dim, None, keep_dim)
+        return _C_ops.sum(input, dim, None, keep_dim)
     elif _in_legacy_dygraph():
-        reduce_all = True if dim == None or dim == [] or len(dim) == len(
-            input.shape) else False
-        dim = dim if dim != None and dim != [] else [0]
         return _legacy_C_ops.reduce_sum(input, 'dim', dim, 'keep_dim', keep_dim,
                                         'reduce_all', reduce_all)
-    attrs = {
-        'dim':
-        dim if dim != None and dim != [] else [0],
-        'keep_dim':
-        keep_dim,
-        'reduce_all':
-        True
-        if dim == None or dim == [] or len(dim) == len(input.shape) else False
-    }
+    attrs = {'dim': dim, 'keep_dim': keep_dim, 'reduce_all': reduce_all}
     check_variable_and_dtype(
         input, 'input', ['float16', 'float32', 'float64', 'int32', 'int64'],
         'reduce_sum')
diff --git a/python/paddle/fluid/layers/utils.py b/python/paddle/fluid/layers/utils.py
index 6f7b1faf7fcf2a6b123e2cdd464aea07457ea8f3..cfb2615cc58ad0a20632044291e9d4672ec1bbe4 100644
--- a/python/paddle/fluid/layers/utils.py
+++ b/python/paddle/fluid/layers/utils.py
@@ -363,9 +363,6 @@ def get_shape_tensor_inputs(inputs, attrs, shape, op_type):
             shape = cast(shape, 'int32')
         inputs["ShapeTensor"] = shape
     elif isinstance(shape, (list, tuple)):
-        assert len(shape) > 0, ("The size of 'shape' in" + op_type +
-                                " can't be zero, "
-                                "but received %s." % len(shape))
         attrs["shape"] = _get_attr_shape(shape)
         if _contain_var(shape):
             inputs['ShapeTensorList'] = _get_shape_tensor(shape)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_inference_helper.py b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_inference_helper.py
index da9823184512f5a267e1e8a674a6f037e13844c8..83b0aaf76c888d616b255b696de762d42811c8ee 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_inference_helper.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_inference_helper.py
@@ -82,6 +82,7 @@ class TestHybridParallelInferenceHelperClass(unittest.TestCase):
                                                 value=0,
                                                 force_cpu=False,
                                                 name="cond_int")
+                print(cond_int.shape)
                 cond = layers.less_than(x=step_idx, y=max_len)
                 while_op = layers.While(cond, is_test=True)
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py
index b23acbf860c5c44792bfbb0966d3b5f3af73270c..c2a7fb2d1b9df26d016e623f5453a2d56071dc3e 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py
@@ -82,6 +82,7 @@ def len_with_selected_rows(place):
     # create selected_rows variable
     var = block.create_var(name="X",
                            dtype="float32",
+                           shape=[-1],
                            persistable=True,
                            type=fluid.core.VarDesc.VarType.SELECTED_ROWS)
     # y is Variable(SelectedRows)
diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py
index e4360253aeeae4cb37ee9857b97a879fd3aa3514..2113e120b21d7707f81594c7adaa0765f593080f 100644
--- a/python/paddle/fluid/tests/unittests/op_test.py
+++ b/python/paddle/fluid/tests/unittests/op_test.py
@@ -505,6 +505,7 @@ class OpTest(unittest.TestCase):
                 else:
                     tensor.set(self.inputs[var_name], place)
                 feed_map[var_name] = tensor
+
         return feed_map
 
     def _append_ops(self, block):
@@ -1136,6 +1137,7 @@ class OpTest(unittest.TestCase):
                             continue
                         else:
                             grad_feed_map[arg] = fwd_outs[i]._copy(p)
+
         return grad_feed_map
 
     def _get_need_run_ops(self, op_desc, fwd_op_desc=None):
@@ -1254,6 +1256,7 @@ class OpTest(unittest.TestCase):
                                                  build_strategy=build_strategy,
                                                  places=place)
             program = compiled_program
+
         outs = exe.run(program,
                        feed=grad_feed_map,
                        fetch_list=grad_fetch_list,
@@ -1290,6 +1293,7 @@ class OpTest(unittest.TestCase):
                                             fwd_res,
                                             grad_op_desc,
                                             enable_inplace=True)
+
         self._compare_expect_and_actual_outputs(place,
                                                 expect_res[1],
                                                 expect_res[0],
@@ -1457,7 +1461,7 @@ class OpTest(unittest.TestCase):
                 # NOTE(zhiqiu): np.allclose([], [1.]) returns True
                 # see details: https://stackoverflow.com/questions/38331703/why-does-numpys-broadcasting-sometimes-allow-comparing-arrays-of-different-leng
                 if expect_np.size == 0:
-                    self.op_test.assertTrue(actual_np.size == 0)  # }}}
+                    self.op_test.assertTrue(actual_np.size == 0)
                 self._compare_numpy(name, actual_np, expect_np)
                 if isinstance(expect, tuple):
                     self._compare_list(name, actual, expect)
@@ -1663,7 +1667,6 @@ class OpTest(unittest.TestCase):
         if check_dygraph:
             # always enable legacy dygraph
             g_enable_legacy_dygraph()
-
             dygraph_checker = DygraphChecker(self, self.outputs)
             dygraph_checker.check()
             dygraph_outs = dygraph_checker.outputs
@@ -1830,15 +1833,29 @@ class OpTest(unittest.TestCase):
             # Therefore, it asserts np.abs(a - b) / (np.abs(a)*1e4) < max_relative_error,
             # which is the same as np.abs(a - b) / np.abs(a) < max_relative_error*1e4.
             abs_a = np.abs(a)
-            if self.dtype == np.float64 and \
-                self.op_type not in op_threshold_white_list.NEED_FIX_FP64_CHECK_GRAD_THRESHOLD_OP_LIST:
-                abs_a[abs_a < 1e-10] = 1e-3
-                abs_a[np.logical_and(abs_a > 1e-10, abs_a <= 1e-8)] *= 1e4
-                abs_a[np.logical_and(abs_a > 1e-8, abs_a <= 1e-6)] *= 1e2
-            elif self.is_bfloat16_op():
-                abs_a[abs_a < 1e-2] = 1
-            else:
-                abs_a[abs_a < 1e-3] = 1
+            if abs_a.ndim > 0:
+                if self.dtype == np.float64 and \
+                    self.op_type not in op_threshold_white_list.NEED_FIX_FP64_CHECK_GRAD_THRESHOLD_OP_LIST:
+                    abs_a[abs_a < 1e-10] = 1e-3
+                    abs_a[np.logical_and(abs_a > 1e-10, abs_a <= 1e-8)] *= 1e4
+                    abs_a[np.logical_and(abs_a > 1e-8, abs_a <= 1e-6)] *= 1e2
+                elif self.is_bfloat16_op():
+                    abs_a[abs_a < 1e-2] = 1
+                else:
+                    abs_a[abs_a < 1e-3] = 1
+            elif abs_a.ndim == 0:
+                if self.dtype == np.float64 and \
+                    self.op_type not in op_threshold_white_list.NEED_FIX_FP64_CHECK_GRAD_THRESHOLD_OP_LIST:
+                    if abs_a < 1e-10:
+                        abs_a = 1e-3
+                    elif abs_a > 1e-10 and abs_a <= 1e-8:
+                        abs_a = abs_a * 1e4
+                    elif abs_a > 1e-8 and abs_a <= 1e-6:
+                        abs_a = abs_a * 1e2
+                elif self.is_bfloat16_op():
+                    abs_a = 1 if abs_a < 1e-2 else abs_a
+                else:
+                    abs_a = 1 if abs_a < 1e-3 else abs_a
 
             diff_mat = np.abs(a - b) / abs_a
             max_diff = np.max(diff_mat)
@@ -1958,7 +1975,9 @@ class OpTest(unittest.TestCase):
             tensor_to_check = self.scope.find_var(input_to_check).get_tensor()
             tensor_size = six.moves.reduce(lambda a, b: a * b,
                                            tensor_to_check.shape(), 1)
-            if tensor_size < 100:
+            tensor_ndim = len(tensor_to_check.shape())
+            # for 0D Tensor, it's additional case for OP, so not raise error
+            if tensor_ndim > 0 and tensor_size < 100:
                 self.__class__.input_shape_is_large = False
 
         if not type(output_names) is list:
diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py
index 7486c0022e03eb217e3777fda62a8cd995071f83..49c19f1cbff9a1a0990d0c329ecc5a1b673dd7e0 100755
--- a/python/paddle/fluid/tests/unittests/test_activation_op.py
+++ b/python/paddle/fluid/tests/unittests/test_activation_op.py
@@ -52,12 +52,13 @@ class TestActivation(OpTest):
     def setUp(self):
         self.op_type = "exp"
         self.init_dtype()
+        self.init_shape()
         self.init_kernel_type()
         self.check_eager = True
         self.python_api = paddle.exp
 
         np.random.seed(2049)
-        x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
         out = np.exp(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
@@ -80,19 +81,29 @@ class TestActivation(OpTest):
     def init_dtype(self):
         self.dtype = np.float64
 
+    def init_shape(self):
+        self.shape = [11, 17]
+
     def init_kernel_type(self):
         pass
 
 
+class TestActivation_ZeroDim(TestActivation):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestExpm1(TestActivation):
 
     def setUp(self):
         self.op_type = "expm1"
         self.python_api = paddle.expm1
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(2049)
-        x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
         out = np.expm1(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
@@ -105,6 +116,12 @@ class TestExpm1(TestActivation):
         self.check_output(check_eager=True)
 
 
+class TestExpm1_ZeroDim(TestExpm1):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestExpm1API(unittest.TestCase):
 
     def init_dtype(self):
@@ -182,9 +199,10 @@ class TestSigmoid(TestActivation):
     def setUp(self):
         self.op_type = "sigmoid"
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
         out = 1 / (1 + np.exp(-x))
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
@@ -199,6 +217,12 @@ class TestSigmoid(TestActivation):
         self.check_grad(['X'], 'Out', max_relative_error=0.01)
 
 
+class TestSigmoid_ZeroDim(TestSigmoid):
+
+    def init_shape(self):
+        self.shape = []
+
+
 @unittest.skipIf(not core.is_compiled_with_cuda(),
                  "core is not compiled with CUDA")
 class TestSigmoidBF16(OpTest):
@@ -206,9 +230,10 @@ class TestSigmoidBF16(OpTest):
     def setUp(self):
         self.op_type = "sigmoid"
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(-1, 1, [11, 17]).astype(np.float32)
+        x = np.random.uniform(-1, 1, self.shape).astype(np.float32)
         out = 1 / (1 + np.exp(-x))
 
         self.inputs = {
@@ -219,6 +244,9 @@ class TestSigmoidBF16(OpTest):
     def init_dtype(self):
         self.dtype = np.uint16
 
+    def init_shape(self):
+        self.shape = [11, 17]
+
     def test_check_output(self):
         place = core.CUDAPlace(0)
         self.check_output_with_place(place)
@@ -228,14 +256,23 @@ class TestSigmoidBF16(OpTest):
         self.check_grad_with_place(place, ['X'], 'Out')
 
 
+'''
+class TestSigmoidBF16_ZeroDim(TestSigmoidBF16):
+
+    def init_shape(self):
+        self.shape = []
+'''
+
+
 class TestSilu(TestActivation):
 
     def setUp(self):
         self.op_type = "silu"
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
         out = x / (np.exp(-x) + 1)
 
         self.inputs = {'X': x}
@@ -250,6 +287,12 @@ class TestSilu(TestActivation):
         self.check_grad(['X'], 'Out')
 
 
+class TestSilu_ZeroDim(TestSilu):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestSiluAPI(unittest.TestCase):
     # test paddle.nn.Silu, paddle.nn.functional.silu
     def setUp(self):
@@ -302,9 +345,10 @@ class TestLogSigmoid(TestActivation):
     def setUp(self):
         self.op_type = "logsigmoid"
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(2048)
-        x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
         out = np.log(1 / (1 + np.exp(-x)))
 
         self.inputs = {'X': x}
@@ -316,6 +360,12 @@ class TestLogSigmoid(TestActivation):
         self.check_grad(['X'], 'Out', max_relative_error=0.008)
 
 
+class TestLogSigmoid_ZeroDim(TestLogSigmoid):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestLogSigmoidAPI(unittest.TestCase):
     # test paddle.nn.LogSigmoid, paddle.nn.functional.log_sigmoid
     def setUp(self):
@@ -380,8 +430,10 @@ class TestTanh(TestActivation, TestParameter):
     def setUp(self):
         self.op_type = "tanh"
         self.init_dtype()
+        self.init_shape()
+
         np.random.seed(1024)
-        x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
         out = np.tanh(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
@@ -399,6 +451,12 @@ class TestTanh(TestActivation, TestParameter):
         self.dtype = np.float32
 
 
+class TestTanh_ZeroDim(TestTanh):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestTanhAPI(unittest.TestCase):
     # test paddle.tanh, paddle.nn.tanh, paddle.nn.functional.tanh
     def setUp(self):
@@ -475,9 +533,10 @@ class TestAtan(TestActivation, TestParameter):
     def setUp(self):
         self.op_type = "atan"
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
         out = np.arctan(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
@@ -508,14 +567,21 @@ class TestAtan(TestActivation, TestParameter):
             self.assertEqual(z, z_expected)
 
 
+class TestAtan_ZeroDim(TestTanh):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestSinh(TestActivation):
 
     def setUp(self):
         self.op_type = "sinh"
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
         out = np.sinh(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
@@ -526,6 +592,15 @@ class TestSinh(TestActivation):
             return
         self.check_grad(['X'], 'Out')
 
+
+class TestSinh_ZeroDim(TestSinh):
+
+    def init_shape(self):
+        self.shape = []
+
+
+class TestSinhAPI(unittest.TestCase):
+
     def test_dygraph(self):
         with fluid.dygraph.guard():
             np_x = np.array([0.1])
@@ -586,9 +661,10 @@ class TestCosh(TestActivation):
     def setUp(self):
         self.op_type = "cosh"
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
         out = np.cosh(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
@@ -599,6 +675,15 @@ class TestCosh(TestActivation):
             return
         self.check_grad(['X'], 'Out')
 
+
+class TestCosh_ZeroDim(TestCosh):
+
+    def init_shape(self):
+        self.shape = []
+
+
+class TestCoshAPI(unittest.TestCase):
+
     def test_dygraph(self):
         with fluid.dygraph.guard():
             np_x = np.array([0.1])
@@ -664,9 +749,10 @@ class TestTanhshrink(TestActivation):
     def setUp(self):
         self.op_type = "tanh_shrink"
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(10, 20, [10, 17]).astype(self.dtype)
+        x = np.random.uniform(10, 20, self.shape).astype(self.dtype)
         out = ref_tanhshrink(x)
 
         self.inputs = {'X': x}
@@ -678,6 +764,12 @@ class TestTanhshrink(TestActivation):
         self.check_grad(['X'], 'Out')
 
 
+class TestTanhshrink_ZeroDim(TestTanhshrink):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestTanhshrinkAPI(unittest.TestCase):
     # test paddle.nn.Tanhshrink, paddle.nn.functional.tanhshrink
     def setUp(self):
@@ -748,17 +840,21 @@ class TestHardShrink(TestActivation):
     def setUp(self):
         self.op_type = "hard_shrink"
         self.init_dtype()
+        self.init_shape()
 
         self.threshold = 0.5
         self.set_attrs()
         np.random.seed(1024)
-        x = np.random.uniform(-1, 1, [10, 12]).astype(self.dtype) * 10
+        x = np.random.uniform(-1, 1, self.shape).astype(self.dtype) * 10
         out = ref_hardshrink(x, self.threshold)
 
         self.attrs = {'threshold': self.threshold}
         self.inputs = {'X': x}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def set_attrs(self):
         pass
 
@@ -774,6 +870,14 @@ class TestHardShrink_threshold_negative(TestHardShrink):
         self.threshold = -0.1
 
 
+'''
+class TestHardShrink_ZeroDim(TestHardShrink):
+
+    def init_shape(self):
+        self.shape = []
+'''
+
+
 class TestHardShrinkAPI(unittest.TestCase):
     # test paddle.nn.Hardshrink, paddle.nn.functional.hardshrink
     def setUp(self):
@@ -918,11 +1022,12 @@ class TestSoftshrink(TestActivation):
         self.check_eager = True
         self.python_api = paddle.nn.functional.softshrink
         self.init_dtype()
+        self.init_shape()
 
         threshold = 0.8
 
         np.random.seed(1023)
-        x = np.random.uniform(0.25, 10, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(0.25, 10, self.shape).astype(self.dtype)
         out = ref_softshrink(x, threshold)
         self.inputs = {'X': x}
         self.attrs = {"lambda": threshold}
@@ -934,6 +1039,12 @@ class TestSoftshrink(TestActivation):
         self.check_grad(['X'], 'Out', check_eager=True)
 
 
+class TestSoftshrink_ZeroDim(TestSoftshrink):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestSoftshrinkAPI(unittest.TestCase):
     # test paddle.nn.Softshrink, paddle.nn.functional.softshrink
     def setUp(self):
@@ -1005,9 +1116,10 @@ class TestSqrt(TestActivation, TestParameter):
         self.op_type = "sqrt"
         self.python_api = paddle.sqrt
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1023)
-        x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
         out = np.sqrt(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
@@ -1022,6 +1134,12 @@ class TestSqrt(TestActivation, TestParameter):
         self.check_output(check_eager=True)
 
 
+class TestSqrt_ZeroDim(TestSqrt):
+
+    def init_shape(self):
+        self.shape = []
+
+
 @unittest.skipIf(not core.is_compiled_with_cuda(),
                  "core is not compiled with CUDA")
 class TestSqrtBF16(OpTest):
@@ -1030,9 +1148,10 @@ class TestSqrtBF16(OpTest):
         self.op_type = "sqrt"
         self.python_api = paddle.sqrt
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1023)
-        x = np.random.uniform(0.1, 1, [11, 17]).astype(np.float32)
+        x = np.random.uniform(0.1, 1, self.shape).astype(np.float32)
         out = np.sqrt(x)
 
         self.inputs = {
@@ -1043,6 +1162,9 @@ class TestSqrtBF16(OpTest):
     def init_dtype(self):
         self.dtype = np.uint16
 
+    def init_shape(self):
+        self.shape = [11, 17]
+
     def test_check_output(self):
         place = core.CUDAPlace(0)
         self.check_output_with_place(place, check_eager=True)
@@ -1058,14 +1180,18 @@ class TestRsqrt(TestActivation):
         self.op_type = "rsqrt"
         self.python_api = paddle.rsqrt
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(0.1, 1, [10, 12]).astype(self.dtype) * 10
+        x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype) * 10
         out = 1.0 / np.sqrt(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
@@ -1075,14 +1201,23 @@ class TestRsqrt(TestActivation):
                         check_eager=True)
 
 
+'''
+class TestRsqrt_ZeroDim(TestRsqrt):
+
+    def init_shape(self):
+        self.shape = []
+'''
+
+
 class TestAbs(TestActivation):
 
     def setUp(self):
         self.op_type = "abs"
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(-1, 1, [4, 25]).astype(self.dtype)
+        x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
         # Because we set delta = 0.005 in calculating numeric gradient,
         # if x is too small, such as 0.002, x_neg will be -0.003
         # x_pos will be 0.007, so the numeric gradient is inaccurate.
@@ -1093,12 +1228,21 @@ class TestAbs(TestActivation):
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [4, 25]
+
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
         self.check_grad(['X'], 'Out', check_eager=False)
 
 
+class TestAbs_ZeroDim(TestAbs):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestCeil(TestActivation):
 
     def setUp(self):
@@ -1106,19 +1250,29 @@ class TestCeil(TestActivation):
         self.check_eager = True
         self.python_api = paddle.ceil
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(-1, 1, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
         out = np.ceil(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     # The same reason with TestFloor
     def test_check_grad(self):
         pass
 
 
+class TestCeil_ZeroDim(TestCeil):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestFloor(TestActivation):
 
     def setUp(self):
@@ -1126,14 +1280,18 @@ class TestFloor(TestActivation):
         self.check_eager = True
         self.python_api = paddle.floor
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(-1, 1, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
         out = np.floor(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     # the gradient on floor, ceil, round is undefined.
     # we return zero as gradient, but the numpy return nan
     # The same reason with TestFloor
@@ -1141,33 +1299,51 @@ class TestFloor(TestActivation):
         pass
 
 
+class TestFloor_ZeroDim(TestFloor):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestCos(TestActivation):
 
     def setUp(self):
         self.op_type = "cos"
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(-1, 1, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
         out = np.cos(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
         self.check_grad(['X'], 'Out')
 
 
+class TestCos_ZeroDim(TestCos):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestTan(TestActivation):
 
     def setUp(self):
         np.random.seed(1024)
         self.op_type = "tan"
         self.init_dtype()
+        self.init_shape()
+
         self.dtype = 'float32'
-        self.x_np = np.random.uniform(-1, 1, [10, 12]).astype(self.dtype)
+        self.x_np = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
         self.place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda() \
             else paddle.CPUPlace()
 
@@ -1176,11 +1352,30 @@ class TestTan(TestActivation):
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(self.x_np)}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
         self.check_grad(['X'], 'Out')
 
+
+class TestTan_ZeroDim(TestTan):
+
+    def init_shape(self):
+        self.shape = []
+
+
+class TestTanAPI(unittest.TestCase):
+
+    def setUp(self):
+        np.random.seed(1024)
+        self.dtype = 'float32'
+        self.x_np = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
+        self.place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda() \
+            else paddle.CPUPlace()
+
     def test_dygraph_api(self):
         paddle.disable_static(self.place)
         x = paddle.to_tensor(self.x_np)
@@ -1192,7 +1387,7 @@ class TestTan(TestActivation):
     def test_static_api(self):
         paddle.enable_static()
         with paddle.static.program_guard(paddle.static.Program()):
-            x = paddle.static.data('X', [10, 12], self.dtype)
+            x = paddle.static.data('X', [11, 17], self.dtype)
             out = paddle.tan(x)
             exe = paddle.static.Executor(self.place)
             res = exe.run(feed={'X': self.x_np}, fetch_list=[out])
@@ -1217,115 +1412,175 @@ class TestAcos(TestActivation):
     def setUp(self):
         self.op_type = "acos"
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(-0.95, 0.95, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(-0.95, 0.95, self.shape).astype(self.dtype)
         out = np.arccos(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
         self.check_grad(['X'], 'Out')
 
 
+class TestAcos_ZeroDim(TestAcos):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestSin(TestActivation, TestParameter):
 
     def setUp(self):
         self.op_type = "sin"
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(-1, 1, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
         out = np.sin(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
         self.check_grad(['X'], 'Out')
 
 
+class TestSin_ZeroDim(TestSin):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestAsin(TestActivation):
 
     def setUp(self):
         self.op_type = "asin"
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(2048)
-        x = np.random.uniform(-0.95, 0.95, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(-0.95, 0.95, self.shape).astype(self.dtype)
         out = np.arcsin(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
         self.check_grad(['X'], 'Out')
 
 
+class TestAsin_ZeroDim(TestAsin):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestAcosh(TestActivation):
 
     def setUp(self):
         self.op_type = "acosh"
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(2, 3, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(2, 3, self.shape).astype(self.dtype)
         out = np.arccosh(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
         self.check_grad(['X'], 'Out')
 
 
+class TestAcosh_ZeroDim(TestAcosh):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestAsinh(TestActivation):
 
     def setUp(self):
         self.op_type = "asinh"
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(1, 2, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(1, 2, self.shape).astype(self.dtype)
         out = np.arcsinh(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
         self.check_grad(['X'], 'Out')
 
 
+class TestAsinh_ZeroDim(TestAsinh):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestAtanh(TestActivation):
 
     def setUp(self):
         self.op_type = "atanh"
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(400)
-        x = np.random.uniform(-0.9, 0.9, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(-0.9, 0.9, self.shape).astype(self.dtype)
         out = np.arctanh(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
         self.check_grad(['X'], 'Out')
 
 
+class TestAtanh_ZeroDim(TestAtanh):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestRound(TestActivation):
 
     def setUp(self):
@@ -1333,33 +1588,44 @@ class TestRound(TestActivation):
         self.check_eager = True
         self.python_api = paddle.round
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(-1, 1, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
         out = np.round(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def test_check_grad(self):
         pass
 
 
+class TestRound_ZeroDim(TestRound):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestRelu(TestActivation):
 
     def setUp(self):
         self.op_type = "relu"
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
         if self.dtype == np.uint16:
-            x = np.random.uniform(-1, 1, [11, 17]).astype(np.float32)
+            x = np.random.uniform(-1, 1, self.shape).astype(np.float32)
             # The same reason with TestAbs
             x[np.abs(x) < 0.005] = 0.02
             out = convert_float_to_uint16(np.maximum(x, 0))
             self.inputs = {'X': convert_float_to_uint16(x)}
         else:
-            x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
+            x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
             # The same reason with TestAbs
             x[np.abs(x) < 0.005] = 0.02
             out = np.maximum(x, 0)
@@ -1373,6 +1639,12 @@ class TestRelu(TestActivation):
         self.check_grad(['X'], 'Out')
 
 
+class TestRelu_ZeroDim(TestRelu):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestReluAPI(unittest.TestCase):
     # test paddle.nn.ReLU, paddle.nn.functional.relu
     def setUp(self):
@@ -1446,10 +1718,11 @@ class TestLeakyRelu(TestActivation):
     def setUp(self):
         self.op_type = "leaky_relu"
         self.init_dtype()
+        self.init_shape()
         alpha = self.get_alpha()
 
         np.random.seed(1024)
-        x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
         # The same reason with TestAbs
         x[np.abs(x) < 0.005] = 0.05
         out = ref_leaky_relu(x, alpha)
@@ -1482,6 +1755,12 @@ class TestLeakyReluAlpha3(TestLeakyRelu):
         return -2.0
 
 
+class TestLeakyRelu_ZeroDim(TestLeakyRelu):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestLeakyReluAPI(unittest.TestCase):
     # test paddle.nn.LeakyReLU, paddle.nn.functional.leaky_relu,
     # fluid.layers.leaky_relu
@@ -1563,9 +1842,10 @@ class TestGeluApproximate(TestActivation):
     def setUp(self):
         self.op_type = "gelu"
         self.init_dtype()
+        self.init_shape()
         approximate = True
         np.random.seed(1024)
-        x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
         out = gelu(x, approximate)
 
         self.inputs = {'X': x}
@@ -1583,9 +1863,10 @@ class TestGelu(TestActivation):
     def setUp(self):
         self.op_type = "gelu"
         self.init_dtype()
+        self.init_shape()
         approximate = False
         np.random.seed(2048)
-        x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
         out = gelu(x, approximate)
 
         self.inputs = {'X': x}
@@ -1598,6 +1879,12 @@ class TestGelu(TestActivation):
         self.check_grad(['X'], 'Out')
 
 
+class TestGelu_ZeroDim(TestGelu):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestGELUAPI(unittest.TestCase):
     # test paddle.nn.GELU, paddle.nn.functional.gelu
     def setUp(self):
@@ -1735,10 +2022,11 @@ class TestRelu6(TestActivation):
     def setUp(self):
         self.op_type = "relu6"
         self.init_dtype()
+        self.init_shape()
         self.python_api = paddle.nn.functional.relu6
 
         np.random.seed(1024)
-        x = np.random.uniform(-1, 10, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(-1, 10, self.shape).astype(self.dtype)
         x[np.abs(x) < 0.005] = 0.02
         out = ref_relu6(x)
 
@@ -1746,12 +2034,21 @@ class TestRelu6(TestActivation):
         self.attrs = {'threshold': 6.0}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
         self.check_grad(['X'], 'Out', check_eager=True)
 
 
+class TestRelu6_ZeroDim(TestRelu6):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestRelu6API(unittest.TestCase):
     # test paddle.nn.ReLU6, paddle.nn.functional.relu6
     def setUp(self):
@@ -1826,10 +2123,11 @@ class TestHardSwish(TestActivation):
     def setUp(self):
         self.op_type = 'hard_swish'
         self.init_dtype()
+        self.init_shape()
         self.python_api = paddle.nn.functional.hardswish
 
         np.random.seed(1024)
-        x = np.random.uniform(-6, 6, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(-6, 6, self.shape).astype(self.dtype)
         threshold = 6.0
         scale = 6.0
         offset = 3.0
@@ -1842,6 +2140,9 @@ class TestHardSwish(TestActivation):
         self.attrs = {'threshold': threshold, 'scale': scale, 'offset': offset}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def test_check_grad(self):
         self.check_grad(['X'], 'Out', check_eager=True)
 
@@ -1849,6 +2150,12 @@ class TestHardSwish(TestActivation):
         self.check_output(check_eager=True)
 
 
+class TestHardSwish_ZeroDim(TestHardSwish):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestHardswishAPI(unittest.TestCase):
     # test paddle.nn.Hardswish, paddle.nn.functional.hardswish
     def setUp(self):
@@ -1966,9 +2273,10 @@ class TestELU(TestActivation):
     def setUp(self):
         self.op_type = "elu"
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(-3, 3, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(-3, 3, self.shape).astype(self.dtype)
         alpha = self.get_alpha()
         out = elu(x, alpha)
         # Note: unlike other Relu extensions, point 0 on standard ELU function (i.e. alpha = 1)
@@ -1977,6 +2285,9 @@ class TestELU(TestActivation):
         self.attrs = {'alpha': alpha}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
@@ -1992,6 +2303,12 @@ class TestELUAlpha(TestELU):
         return -0.2
 
 
+class TestELU_ZeroDim(TestELU):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestELUAPI(unittest.TestCase):
     # test paddle.nn.ELU, paddle.nn.functional.elu
     def setUp(self):
@@ -2076,22 +2393,32 @@ class TestCELU(TestActivation):
     def setUp(self):
         self.op_type = "celu"
         self.init_dtype()
+        self.init_shape()
 
         self.python_api = paddle.nn.functional.celu
         np.random.seed(1024)
-        x = np.random.uniform(-3, 3, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(-3, 3, self.shape).astype(self.dtype)
         alpha = 1.5
         out = celu(x, alpha)
         self.inputs = {'X': x}
         self.attrs = {'alpha': alpha}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
         self.check_grad(['X'], 'Out', check_eager=True)
 
 
+class TestCELU_ZeroDim(TestCELU):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestCELUAPI(unittest.TestCase):
     # test paddle.nn.CELU, paddle.nn.functional.celu
     def setUp(self):
@@ -2170,9 +2497,10 @@ class TestReciprocal(TestActivation):
         self.op_type = "reciprocal"
         self.python_api = paddle.reciprocal
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(1, 2, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(1, 2, self.shape).astype(self.dtype)
         out = np.reciprocal(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
@@ -2187,6 +2515,12 @@ class TestReciprocal(TestActivation):
         self.check_output(check_eager=True)
 
 
+class TestReciprocal_ZeroDim(TestReciprocal):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestLog(TestActivation):
 
     def setUp(self):
@@ -2194,9 +2528,10 @@ class TestLog(TestActivation):
         self.check_eager = True
         self.python_api = paddle.log
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
         out = np.log(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
@@ -2221,6 +2556,12 @@ class TestLog(TestActivation):
         self.assertRaises(TypeError, fluid.layers.log, in2)
 
 
+class TestLog_ZeroDim(TestLog):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestLog2(TestActivation):
 
     def setUp(self):
@@ -2228,8 +2569,9 @@ class TestLog2(TestActivation):
         self.check_eager = True
         self.python_api = paddle.log2
         self.init_dtype()
+        self.init_shape()
 
-        x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
         out = np.log2(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
@@ -2274,6 +2616,12 @@ class TestLog2(TestActivation):
         np.testing.assert_allclose(np_z, z_expected, rtol=1e-05)
 
 
+class TestLog2_ZeroDim(TestLog2):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestLog10(TestActivation):
 
     def setUp(self):
@@ -2281,8 +2629,9 @@ class TestLog10(TestActivation):
         self.check_eager = True
         self.python_api = paddle.log10
         self.init_dtype()
+        self.init_shape()
 
-        x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
         out = np.log10(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
@@ -2293,6 +2642,15 @@ class TestLog10(TestActivation):
             return
         self.check_grad(['X'], 'Out', check_eager=True)
 
+
+class TestLog10_ZeroDim(TestLog10):
+
+    def init_shape(self):
+        self.shape = []
+
+
+class TestLog10API(unittest.TestCase):
+
     def test_error(self):
         in1 = paddle.static.data(name="in1", shape=[11, 17], dtype="int32")
         in2 = paddle.static.data(name="in2", shape=[11, 17], dtype="int64")
@@ -2334,9 +2692,10 @@ class TestLog1p(TestActivation):
         self.check_eager = True
         self.python_api = paddle.log1p
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
         out = np.log1p(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
@@ -2347,6 +2706,15 @@ class TestLog1p(TestActivation):
             return
         self.check_grad(['X'], 'Out', check_eager=True)
 
+
+class TestLog1p_ZeroDim(TestLog1p):
+
+    def init_shape(self):
+        self.shape = []
+
+
+class TestLog1pAPI(unittest.TestCase):
+
     def test_api(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
             input_x = np.random.uniform(0.1, 1, [11, 17]).astype("float64")
@@ -2380,9 +2748,10 @@ class TestSquare(TestActivation):
         self.op_type = "square"
         self.python_api = paddle.square
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
         out = np.square(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
@@ -2400,6 +2769,12 @@ class TestSquare(TestActivation):
         self.check_output(check_eager=True)
 
 
+class TestSquare_ZeroDim(TestSquare):
+
+    def init_shape(self):
+        self.shape = []
+
+
 @unittest.skipIf(not core.is_compiled_with_cuda(),
                  "core is not compiled with CUDA")
 class TestSquareBF16(OpTest):
@@ -2440,9 +2815,10 @@ class TestPow(TestActivation):
         self.python_api = paddle.pow
         self.check_eager = True
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(1, 2, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(1, 2, self.shape).astype(self.dtype)
         out = np.power(x, 3)
 
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
@@ -2458,6 +2834,12 @@ class TestPow(TestActivation):
         self.check_grad(['X'], 'Out', check_eager=self.check_eager)
 
 
+class TestPow_ZeroDim(TestPow):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestPow_factor_tensor(TestActivation):
 
     def setUp(self):
@@ -2557,11 +2939,13 @@ class TestSTanh(TestActivation):
     def setUp(self):
         self.op_type = "stanh"
         self.init_dtype()
+        self.init_shape()
+
         scale_a = self.get_scale_a()
         scale_b = self.get_scale_b()
 
         np.random.seed(1024)
-        x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
+        x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
         # The same reason with TestAbs
         out = ref_stanh(x, scale_a, scale_b)
 
@@ -2587,6 +2971,12 @@ class TestSTanhScaleB(TestSTanh):
         return 0.5
 
 
+class TestSTanh_ZeroDim(TestSTanh):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestSTanhAPI(unittest.TestCase):
     # test paddle.nn.stanh
     def get_scale_a(self):
@@ -2675,12 +3065,13 @@ class TestSoftplus(TestActivation):
         self.op_type = "softplus"
         self.python_api = paddle.nn.functional.softplus
         self.init_dtype()
+        self.init_shape()
 
         beta = 2
         threshold = 15
 
         np.random.seed(1024)
-        x = np.random.uniform(-1, 1, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
         out = ref_softplus(x, beta, threshold)
         self.inputs = {'X': x}
         self.attrs = {'beta': beta, "threshold": threshold}
@@ -2688,6 +3079,9 @@ class TestSoftplus(TestActivation):
 
         self.check_eager = True
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
@@ -2696,6 +3090,12 @@ class TestSoftplus(TestActivation):
         self.check_grad(['X'], 'Out', check_eager=check_eager)
 
 
+class TestSoftplus_ZeroDim(TestSoftplus):
+
+    def init_shape(self):
+        self.shape = []
+
+
 @unittest.skipIf(not core.is_compiled_with_cuda(),
                  "core is not compiled with CUDA")
 class TestSoftplusBF16(OpTest):
@@ -2797,20 +3197,31 @@ class TestSoftsign(TestActivation):
     def setUp(self):
         self.op_type = "softsign"
         self.init_dtype()
+        self.init_shape()
+
         self.python_api = paddle.nn.functional.softsign
 
         np.random.seed(1024)
-        x = np.random.uniform(-1, 1, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
         out = ref_softsign(x)
         self.inputs = {'X': x}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
         self.check_grad(['X'], 'Out', check_eager=True)
 
 
+class TestSoftsign_ZeroDim(TestSoftsign):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestSoftsignAPI(unittest.TestCase):
     # test paddle.nn.Softsign, paddle.nn.functional.softsign
     def setUp(self):
@@ -2880,23 +3291,33 @@ class TestThresholdedRelu(TestActivation):
     def setUp(self):
         self.op_type = "thresholded_relu"
         self.init_dtype()
+        self.init_shape()
 
         threshold = 15
 
         np.random.seed(1024)
-        x = np.random.uniform(-20, 20, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(-20, 20, self.shape).astype(self.dtype)
         x[np.abs(x) < 0.005] = 0.02
         out = ref_thresholded_relu(x, threshold)
         self.inputs = {'X': x}
         self.attrs = {"threshold": threshold}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
         self.check_grad(['X'], 'Out')
 
 
+class TestThresholdedRelu_ZeroDim(TestThresholdedRelu):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestThresholdedReluAPI(unittest.TestCase):
     # test paddle.nn.ThresholdedReLU, paddle.nn.functional.thresholded_relu
     def setUp(self):
@@ -2970,8 +3391,9 @@ class TestHardSigmoid(TestActivation):
         self.slope = 0.166666666666667
         self.offset = 0.5
         self.set_attrs()
+        self.init_shape()
 
-        x = np.random.uniform(-5, 5, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(-5, 5, self.shape).astype(self.dtype)
         lower_threshold = -self.offset / self.slope
         upper_threshold = (1. - self.offset) / self.slope
 
@@ -2986,6 +3408,9 @@ class TestHardSigmoid(TestActivation):
         self.inputs = {'X': x}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def set_attrs(self):
         pass
 
@@ -3003,6 +3428,12 @@ class TestHardSigmoidSlopeOffset(TestHardSigmoid):
         self.offset = 0.4
 
 
+class TestHardSigmoid_ZeroDim(TestHardSigmoid):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestHardsigmoidAPI(unittest.TestCase):
     # test paddle.nn.Hardsigmoid, paddle.nn.functional.hardsigmoid
     def setUp(self):
@@ -3075,15 +3506,20 @@ class TestSwish(TestActivation):
         self.op_type = "swish"
         self.python_api = paddle.nn.functional.swish
         self.init_dtype()
+        self.init_shape()
+
         self.check_eager = True
 
         np.random.seed(1024)
-        x = np.random.uniform(-1, 1, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
         out = ref_swish(x)
         self.inputs = {'X': x}
         self.attrs = {'beta': 1.0}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
@@ -3093,6 +3529,12 @@ class TestSwish(TestActivation):
         self.check_grad(['X'], 'Out', check_eager=check_eager)
 
 
+class TestSwish_ZeroDim(TestSwish):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestSwishAPI(unittest.TestCase):
     # test paddle.nn.Swish, paddle.nn.functional.swish
     def setUp(self):
@@ -3169,13 +3611,17 @@ class TestMish(TestActivation):
         self.op_type = "mish"
         self.python_api = paddle.fluid.layers.nn.mish
         self.init_dtype()
+        self.init_shape()
 
         np.random.seed(1024)
-        x = np.random.uniform(-1, 1, [10, 12]).astype(self.dtype)
+        x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
         out = ref_mish(x)
         self.inputs = {'X': x}
         self.outputs = {'Out': out}
 
+    def init_shape(self):
+        self.shape = [10, 12]
+
     def test_check_output(self):
         self.check_output(check_eager=True)
 
@@ -3185,6 +3631,12 @@ class TestMish(TestActivation):
         self.check_grad(['X'], 'Out', check_eager=True)
 
 
+class TestMish_ZeroDim(TestMish):
+
+    def init_shape(self):
+        self.shape = []
+
+
 class TestMishAPI(unittest.TestCase):
     # test paddle.nn.Mish, paddle.nn.functional.mish
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_fill_constant_op.py b/python/paddle/fluid/tests/unittests/test_fill_constant_op.py
index 4737d209395bea89b22189bb3dfc7e8f5940545e..a031385aa0a096776b843da42cdfb7d0daaa43d3 100644
--- a/python/paddle/fluid/tests/unittests/test_fill_constant_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fill_constant_op.py
@@ -437,12 +437,6 @@ class TestFillConstantOpError(unittest.TestCase):
 
             self.assertRaises(TypeError, test_shape_type)
 
-            # The argument shape's size of fill_constant_op must not be 0.
-            def test_shape_size():
-                fluid.layers.fill_constant(shape=[], dtype="float32", value=1)
-
-            self.assertRaises(AssertionError, test_shape_size)
-
             # The shape dtype of fill_constant_op must be int32 or int64.
             def test_shape_tensor_dtype():
                 shape = fluid.data(name="shape_tensor",
diff --git a/python/paddle/fluid/tests/unittests/test_full_op.py b/python/paddle/fluid/tests/unittests/test_full_op.py
index c55e17923be9e1761ad319abdf71a325f77097fa..1f20cfd6c726c3f196c9cd978e2cd5fda97ea1a2 100644
--- a/python/paddle/fluid/tests/unittests/test_full_op.py
+++ b/python/paddle/fluid/tests/unittests/test_full_op.py
@@ -175,12 +175,6 @@ class TestFullOpError(unittest.TestCase):
 
             self.assertRaises(TypeError, test_shape_type)
 
-            # The argument shape's size of full_op must not be 0.
-            def test_shape_size():
-                paddle.full(shape=[], dtype="float32", fill_value=1)
-
-            self.assertRaises(AssertionError, test_shape_size)
-
             # The shape dtype of full op must be int32 or int64.
             def test_shape_tensor_dtype():
                 shape = fluid.data(name="shape_tensor",
diff --git a/python/paddle/fluid/tests/unittests/test_mse_loss.py b/python/paddle/fluid/tests/unittests/test_mse_loss.py
index b1ba9d8d37199af227fa91f562d7d6b0bccc7fe8..ef2a494c219e77c492875327d2d424c2ae0ec920 100644
--- a/python/paddle/fluid/tests/unittests/test_mse_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_mse_loss.py
@@ -30,8 +30,8 @@ class TestMseLoss(unittest.TestCase):
         sub = input_val - label_val
         np_result = np.mean(sub * sub)
 
-        input_var = layers.create_tensor(dtype="float32", name="input")
-        label_var = layers.create_tensor(dtype="float32", name="label")
+        input_var = fluid.data(name="input", shape=[-1, 3], dtype="float32")
+        label_var = fluid.data(name="label", shape=[-1, 3], dtype="float32")
 
         output = layers.mse_loss(input=input_var, label=label_var)
         for use_cuda in ([False, True]
@@ -54,7 +54,7 @@ class TestMseInvalidInput(unittest.TestCase):
 
         def test_invalid_input():
             input = [256, 3]
-            label = fluid.data(name='label', shape=[None, 3], dtype='float32')
+            label = fluid.data(name='label1', shape=[None, 3], dtype='float32')
             loss = fluid.layers.mse_loss(input, label)
 
         self.assertRaises(TypeError, test_invalid_input)
diff --git a/python/paddle/fluid/tests/unittests/test_randn_op.py b/python/paddle/fluid/tests/unittests/test_randn_op.py
index c6f2ca7e2c9bd9a0778a9ee39c194bd244b979a4..4590af0f10f81b9c24a6eb4824b39a6af798731e 100644
--- a/python/paddle/fluid/tests/unittests/test_randn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_randn_op.py
@@ -75,9 +75,6 @@ class TestRandnOpError(unittest.TestCase):
 
     def test_error(self):
         with program_guard(Program(), Program()):
-            # The argument shape's size of randn_op should not be 0.
-            self.assertRaises(AssertionError, paddle.randn, [])
-
             # The argument shape's type of randn_op should be list or tuple.
             self.assertRaises(TypeError, paddle.randn, 1)
 
diff --git a/python/paddle/fluid/tests/unittests/test_select_input_output_op.py b/python/paddle/fluid/tests/unittests/test_select_input_output_op.py
index d679615a63cf76ea7d946d6e4418fe391e3a80a5..119bcc0ee5a3e981d616c4bcd1601606e288dacc 100644
--- a/python/paddle/fluid/tests/unittests/test_select_input_output_op.py
+++ b/python/paddle/fluid/tests/unittests/test_select_input_output_op.py
@@ -23,6 +23,8 @@ from paddle.fluid.executor import Executor
 from paddle.fluid.framework import Program, program_guard
 from paddle.fluid.layers.control_flow import select_input, select_output
 
+paddle.enable_static()
+
 
 class TestSplitMergeSelectedVarOps(unittest.TestCase):
 
@@ -37,7 +39,9 @@ class TestSplitMergeSelectedVarOps(unittest.TestCase):
                 outputs = []
                 for i in range(branch_num):
                     out = program.current_block().create_var(
-                        dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR)
+                        dtype='float32',
+                        shape=[2],
+                        type=core.VarDesc.VarType.LOD_TENSOR)
                     outputs.append(out)
 
                 select_output(x, outputs, mask)
diff --git a/python/paddle/fluid/tests/unittests/test_set_value_op.py b/python/paddle/fluid/tests/unittests/test_set_value_op.py
index ab5382ce5e7916ca2e429c38c59c1be73cab4a0d..7f6b9831aaee6882c0ce7279f8b3cc2bba3c6b6e 100644
--- a/python/paddle/fluid/tests/unittests/test_set_value_op.py
+++ b/python/paddle/fluid/tests/unittests/test_set_value_op.py
@@ -23,7 +23,6 @@ from paddle.fluid.layer_helper import LayerHelper
 from functools import reduce
 from paddle.fluid.framework import _test_eager_guard
 
-
 class TestSetValueBase(unittest.TestCase):
 
     def setUp(self):
@@ -1442,7 +1441,6 @@ class TestGradientTruncated(unittest.TestCase):
             # When `input.stop_gradient = True` and `value.stop_gradient = False`,
             # set_value_grad_op will not be run during backward.
             y, value = op(x)
-
             y2 = y + 1
             loss = paddle.fluid.layers.reduce_sum(y2)
             sgd = paddle.optimizer.Adam()
diff --git a/python/paddle/fluid/tests/unittests/test_zero_dim_shape.py b/python/paddle/fluid/tests/unittests/test_zero_dim_shape.py
new file mode 100644
index 0000000000000000000000000000000000000000..089a076ad80dc46f2e5dbf43da2ebcf31fd30b87
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_zero_dim_shape.py
@@ -0,0 +1,157 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.fluid as fluid
+import numpy as np
+import unittest
+
+unary_api_list = [
+    paddle.nn.functional.elu,
+    paddle.nn.functional.gelu,
+    paddle.nn.functional.hardsigmoid,
+    paddle.nn.functional.hardswish,
+    paddle.nn.functional.leaky_relu,
+    paddle.nn.functional.log_sigmoid,
+    paddle.nn.functional.relu,
+    paddle.nn.functional.relu6,
+    paddle.nn.functional.sigmoid,
+    paddle.nn.functional.softplus,
+    paddle.nn.functional.softshrink,
+    paddle.nn.functional.softsign,
+    paddle.nn.functional.swish,
+    paddle.nn.functional.tanhshrink,
+    paddle.nn.functional.thresholded_relu,
+    paddle.stanh,
+    paddle.nn.functional.celu,
+    paddle.nn.functional.mish,
+    paddle.nn.functional.silu,
+    paddle.nn.functional.tanh,
+    paddle.cosh,
+    paddle.sinh,
+    paddle.abs,
+    paddle.acos,
+    paddle.asin,
+    paddle.atan,
+    paddle.ceil,
+    paddle.cos,
+    paddle.exp,
+    paddle.floor,
+    paddle.log,
+    paddle.log1p,
+    paddle.reciprocal,
+    paddle.round,
+    paddle.sin,
+    paddle.sqrt,
+    paddle.square,
+    paddle.tanh,
+    paddle.acosh,
+    paddle.asinh,
+    paddle.atanh,
+    paddle.expm1,
+    paddle.log10,
+    paddle.log2,
+    paddle.tan,
+]
+
+
+# Use to test zero-dim in the whole API
+class TestUnaryAPI(unittest.TestCase):
+
+    def test_dygraph_unary(self):
+        paddle.disable_static()
+        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
+        for api in unary_api_list:
+            x = paddle.rand([])
+            x.stop_gradient = False
+            out = api(x)
+            out.backward()
+
+            self.assertEqual(x.shape, [])
+            self.assertEqual(out.shape, [])
+            self.assertEqual(x.grad.shape, [])
+            self.assertEqual(out.grad.shape, [])
+
+        paddle.enable_static()
+
+    def test_static_unary(self):
+        paddle.enable_static()
+
+        for api in unary_api_list:
+            main_prog = fluid.Program()
+            with fluid.program_guard(main_prog, fluid.Program()):
+                x = paddle.rand([])
+                x.stop_gradient = False
+                out = api(x)
+                fluid.backward.append_backward(out)
+
+                # ScaleLossGradOp / append_backward always set grad shape to [1]
+                prog = paddle.static.default_main_program()
+                block = prog.global_block()
+
+                x_grad = block.var(fluid.framework.grad_var_name(x.name))
+                out_grad = block.var(fluid.framework.grad_var_name(out.name))
+
+                # Test compile shape, grad is always [1]
+                self.assertEqual(x.shape, ())
+                self.assertEqual(out.shape, ())
+
+                exe = fluid.Executor()
+                result = exe.run(main_prog,
+                                 fetch_list=[x, out, x_grad, out_grad])
+
+                # Test runtime shape
+                self.assertEqual(result[0].shape, ())
+                self.assertEqual(result[1].shape, ())
+                self.assertEqual(result[3].shape, (1, ))
+
+                # 0D will be stacked when 1+ place, due to it cannot be concated
+                # for 1 place: [ x-place1 ]
+                # for 1+ place: [ paddle.stack([x-place1, x_place2...]) ]
+                if paddle.device.is_compiled_with_cuda():
+                    places = [paddle.CUDAPlace(0)]
+                    device_num = 1
+                    expect_shape = ()
+                else:
+                    places = [paddle.CPUPlace()] * 4
+                    device_num = 4
+                    expect_shape = (device_num, )
+
+                compiled_program = fluid.CompiledProgram(
+                    main_prog).with_data_parallel(out.name, places=places)
+                result = exe.run(compiled_program,
+                                 fetch_list=[x, out, x_grad, out_grad],
+                                 return_merged=True)
+
+                # Test runtime parallel shape
+                self.assertEqual(result[0].shape, expect_shape)
+                self.assertEqual(result[1].shape, expect_shape)
+                self.assertEqual(result[3].shape, (device_num, ))
+
+                compiled_program = fluid.CompiledProgram(
+                    main_prog).with_data_parallel(out.name, places=places)
+                result = exe.run(compiled_program,
+                                 fetch_list=[x, out, x_grad, out_grad],
+                                 return_merged=False)
+
+                # [[x-place1, x-place2, ...], [], [], ...]
+                self.assertEqual(np.array(result[0]).shape, (device_num, ))
+                self.assertEqual(np.array(result[1]).shape, (device_num, ))
+                self.assertEqual(np.array(result[3]).shape, (device_num, 1))
+
+        paddle.disable_static()
+
+
+if __name__ == "__main__":
+    unittest.main()